Compare commits

..

6 Commits

Author SHA1 Message Date
dgtlmoon
4fae34cd28 Fixes for blocking 2026-01-19 18:02:58 +01:00
dgtlmoon
280f423cb3 API validation 2026-01-19 17:56:43 +01:00
dgtlmoon
a9c19d062b More API error handling 2026-01-19 17:01:41 +01:00
dgtlmoon
bac4022047 API - Improving URL validation 2026-01-19 17:00:53 +01:00
dgtlmoon
9e2acadb7e 0.52.7
Some checks failed
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
2026-01-19 09:37:01 +01:00
吾爱分享
48da93b4ec Fix zh PO duplicates and complete new translations. (#3773) 2026-01-19 09:35:52 +01:00
20 changed files with 1609 additions and 860 deletions

View File

@@ -132,15 +132,6 @@ ENV LOGGER_LEVEL="$LOGGER_LEVEL"
ENV LC_ALL=en_US.UTF-8
WORKDIR /app
# Copy and set up entrypoint script for installing extra packages
COPY docker-entrypoint.sh /docker-entrypoint.sh
RUN chmod +x /docker-entrypoint.sh
# Set entrypoint to handle EXTRA_PACKAGES env var
ENTRYPOINT ["/docker-entrypoint.sh"]
# Default command (can be overridden in docker-compose.yml)
CMD ["python", "./changedetection.py", "-d", "/datastore"]

View File

@@ -2,7 +2,7 @@
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
# Semver means never use .01, or 00. Should be .1.
__version__ = '0.52.6'
__version__ = '0.52.7'
from changedetectionio.strtobool import strtobool
from json.decoder import JSONDecodeError

View File

@@ -68,13 +68,17 @@ class Watch(Resource):
import time
from copy import deepcopy
watch = None
for _ in range(20):
# Retry up to 20 times if dict is being modified
# With sleep(0), this is fast: ~200µs best case, ~20ms worst case under heavy load
for attempt in range(20):
try:
watch = deepcopy(self.datastore.data['watching'].get(uuid))
break
except RuntimeError:
# Incase dict changed, try again
time.sleep(0.01)
# Dict changed during deepcopy, retry after yielding to scheduler
# sleep(0) releases GIL and yields - no fixed delay, just lets other threads run
if attempt < 19: # Don't yield on last attempt
time.sleep(0) # Yield to scheduler (microseconds, not milliseconds)
if not watch:
abort(404, message='No watch exists with the UUID of {}'.format(uuid))
@@ -126,32 +130,86 @@ class Watch(Resource):
if request.json.get('proxy'):
plist = self.datastore.proxy_list
if not request.json.get('proxy') in plist:
return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400
if not plist or request.json.get('proxy') not in plist:
proxy_list_str = ', '.join(plist) if plist else 'none configured'
return f"Invalid proxy choice, currently supported proxies are '{proxy_list_str}'", 400
# Validate time_between_check when not using defaults
validation_error = validate_time_between_check_required(request.json)
if validation_error:
return validation_error, 400
# XSS etc protection
if request.json.get('url') and not is_safe_valid_url(request.json.get('url')):
return "Invalid URL", 400
# XSS etc protection - validate URL if it's being updated
if 'url' in request.json:
new_url = request.json.get('url')
# URL must be a non-empty string
if new_url is None:
return "URL cannot be null", 400
if not isinstance(new_url, str):
return "URL must be a string", 400
if not new_url.strip():
return "URL cannot be empty or whitespace only", 400
if not is_safe_valid_url(new_url.strip()):
return "Invalid or unsupported URL format. URL must use http://, https://, or ftp:// protocol", 400
# Handle processor-config-* fields separately (save to JSON, not datastore)
from changedetectionio import processors
processor_config_data = {}
regular_data = {}
# Make a mutable copy of request.json for modification
json_data = dict(request.json)
# Extract and remove processor config fields from json_data
processor_config_data = processors.extract_processor_config_from_form_data(json_data)
for key, value in request.json.items():
if key.startswith('processor_config_'):
config_key = key.replace('processor_config_', '')
if value: # Only save non-empty values
processor_config_data[config_key] = value
else:
regular_data[key] = value
# Update watch with regular (non-processor-config) fields
watch.update(json_data)
watch.update(regular_data)
# Save processor config to JSON file
processors.save_processor_config(self.datastore, uuid, processor_config_data)
# Save processor config to JSON file if any config data exists
if processor_config_data:
try:
processor_name = request.json.get('processor', watch.get('processor'))
if processor_name:
# Create a processor instance to access config methods
from changedetectionio.processors import difference_detection_processor
processor_instance = difference_detection_processor(self.datastore, uuid)
# Use processor name as filename so each processor keeps its own config
config_filename = f'{processor_name}.json'
processor_instance.update_extra_watch_config(config_filename, processor_config_data)
logger.debug(f"API: Saved processor config to {config_filename}: {processor_config_data}")
# Call optional edit_hook if processor has one
try:
import importlib
edit_hook_module_name = f'changedetectionio.processors.{processor_name}.edit_hook'
try:
edit_hook = importlib.import_module(edit_hook_module_name)
logger.debug(f"API: Found edit_hook module for {processor_name}")
if hasattr(edit_hook, 'on_config_save'):
logger.info(f"API: Calling edit_hook.on_config_save for {processor_name}")
# Call hook and get updated config
updated_config = edit_hook.on_config_save(watch, processor_config_data, self.datastore)
# Save updated config back to file
processor_instance.update_extra_watch_config(config_filename, updated_config)
logger.info(f"API: Edit hook updated config: {updated_config}")
else:
logger.debug(f"API: Edit hook module found but no on_config_save function")
except ModuleNotFoundError:
logger.debug(f"API: No edit_hook module for processor {processor_name} (this is normal)")
except Exception as hook_error:
logger.error(f"API: Edit hook error (non-fatal): {hook_error}", exc_info=True)
except Exception as e:
logger.error(f"API: Failed to save processor config: {e}")
return "OK", 200
@@ -192,6 +250,10 @@ class WatchSingleHistory(Resource):
if timestamp == 'latest':
timestamp = list(watch.history.keys())[-1]
# Validate that the timestamp exists in history
if timestamp not in watch.history:
abort(404, message=f"No history snapshot found for timestamp '{timestamp}'")
if request.args.get('html'):
content = watch.get_fetched_html(timestamp)
if content:
@@ -379,8 +441,9 @@ class CreateWatch(Resource):
if json_data.get('proxy'):
plist = self.datastore.proxy_list
if not json_data.get('proxy') in plist:
return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400
if not plist or json_data.get('proxy') not in plist:
proxy_list_str = ', '.join(plist) if plist else 'none configured'
return f"Invalid proxy choice, currently supported proxies are '{proxy_list_str}'", 400
# Validate time_between_check when not using defaults
validation_error = validate_time_between_check_required(json_data)

View File

@@ -117,14 +117,11 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
processor = watch.get('processor', 'text_json_diff')
# Init a new 'difference_detection_processor'
# Use get_processor_module() to support both built-in and plugin processors
from changedetectionio.processors import get_processor_module
processor_module = get_processor_module(processor)
if not processor_module:
error_msg = f"Processor module '{processor}' not found."
logger.error(error_msg)
raise ModuleNotFoundError(error_msg)
try:
processor_module = importlib.import_module(f"changedetectionio.processors.{processor}.processor")
except ModuleNotFoundError as e:
print(f"Processor module '{processor}' not found.")
raise e
update_handler = processor_module.perform_site_check(datastore=datastore,
watch_uuid=uuid)

View File

@@ -101,21 +101,23 @@ def construct_blueprint(datastore: ChangeDetectionStore):
# Get the processor type for this watch
processor_name = watch.get('processor', 'text_json_diff')
# Try to get the processor's difference module (works for both built-in and plugin processors)
from changedetectionio.processors import get_processor_submodule
processor_module = get_processor_submodule(processor_name, 'difference')
try:
# Try to import the processor's difference module
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.difference')
# Call the processor's render() function
if processor_module and hasattr(processor_module, 'render'):
return processor_module.render(
watch=watch,
datastore=datastore,
request=request,
url_for=url_for,
render_template=render_template,
flash=flash,
redirect=redirect
)
# Call the processor's render() function
if hasattr(processor_module, 'render'):
return processor_module.render(
watch=watch,
datastore=datastore,
request=request,
url_for=url_for,
render_template=render_template,
flash=flash,
redirect=redirect
)
except (ImportError, ModuleNotFoundError) as e:
logger.warning(f"Processor {processor_name} does not have a difference module, falling back to text_json_diff: {e}")
# Fallback: if processor doesn't have difference module, use text_json_diff as default
from changedetectionio.processors.text_json_diff.difference import render as default_render
@@ -155,21 +157,23 @@ def construct_blueprint(datastore: ChangeDetectionStore):
# Get the processor type for this watch
processor_name = watch.get('processor', 'text_json_diff')
# Try to get the processor's extract module (works for both built-in and plugin processors)
from changedetectionio.processors import get_processor_submodule
processor_module = get_processor_submodule(processor_name, 'extract')
try:
# Try to import the processor's extract module
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.extract')
# Call the processor's render_form() function
if processor_module and hasattr(processor_module, 'render_form'):
return processor_module.render_form(
watch=watch,
datastore=datastore,
request=request,
url_for=url_for,
render_template=render_template,
flash=flash,
redirect=redirect
)
# Call the processor's render_form() function
if hasattr(processor_module, 'render_form'):
return processor_module.render_form(
watch=watch,
datastore=datastore,
request=request,
url_for=url_for,
render_template=render_template,
flash=flash,
redirect=redirect
)
except (ImportError, ModuleNotFoundError) as e:
logger.warning(f"Processor {processor_name} does not have an extract module, falling back to base extractor: {e}")
# Fallback: if processor doesn't have extract module, use base processors.extract as default
from changedetectionio.processors.extract import render_form as default_render_form
@@ -209,22 +213,24 @@ def construct_blueprint(datastore: ChangeDetectionStore):
# Get the processor type for this watch
processor_name = watch.get('processor', 'text_json_diff')
# Try to get the processor's extract module (works for both built-in and plugin processors)
from changedetectionio.processors import get_processor_submodule
processor_module = get_processor_submodule(processor_name, 'extract')
try:
# Try to import the processor's extract module
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.extract')
# Call the processor's process_extraction() function
if processor_module and hasattr(processor_module, 'process_extraction'):
return processor_module.process_extraction(
watch=watch,
datastore=datastore,
request=request,
url_for=url_for,
make_response=make_response,
send_from_directory=send_from_directory,
flash=flash,
redirect=redirect
)
# Call the processor's process_extraction() function
if hasattr(processor_module, 'process_extraction'):
return processor_module.process_extraction(
watch=watch,
datastore=datastore,
request=request,
url_for=url_for,
make_response=make_response,
send_from_directory=send_from_directory,
flash=flash,
redirect=redirect
)
except (ImportError, ModuleNotFoundError) as e:
logger.warning(f"Processor {processor_name} does not have an extract module, falling back to base extractor: {e}")
# Fallback: if processor doesn't have extract module, use base processors.extract as default
from changedetectionio.processors.extract import process_extraction as default_process_extraction
@@ -274,33 +280,38 @@ def construct_blueprint(datastore: ChangeDetectionStore):
# Get the processor type for this watch
processor_name = watch.get('processor', 'text_json_diff')
# Try to get the processor's difference module (works for both built-in and plugin processors)
from changedetectionio.processors import get_processor_submodule
processor_module = get_processor_submodule(processor_name, 'difference')
try:
# Try to import the processor's difference module
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.difference')
# Call the processor's get_asset() function
if processor_module and hasattr(processor_module, 'get_asset'):
result = processor_module.get_asset(
asset_name=asset_name,
watch=watch,
datastore=datastore,
request=request
)
# Call the processor's get_asset() function
if hasattr(processor_module, 'get_asset'):
result = processor_module.get_asset(
asset_name=asset_name,
watch=watch,
datastore=datastore,
request=request
)
if result is None:
if result is None:
from flask import abort
abort(404, description=f"Asset '{asset_name}' not found")
binary_data, content_type, cache_control = result
response = make_response(binary_data)
response.headers['Content-Type'] = content_type
if cache_control:
response.headers['Cache-Control'] = cache_control
return response
else:
logger.warning(f"Processor {processor_name} does not implement get_asset()")
from flask import abort
abort(404, description=f"Asset '{asset_name}' not found")
abort(404, description=f"Processor '{processor_name}' does not support assets")
binary_data, content_type, cache_control = result
response = make_response(binary_data)
response.headers['Content-Type'] = content_type
if cache_control:
response.headers['Cache-Control'] = cache_control
return response
else:
logger.warning(f"Processor {processor_name} does not implement get_asset()")
except (ImportError, ModuleNotFoundError) as e:
logger.warning(f"Processor {processor_name} does not have a difference module: {e}")
from flask import abort
abort(404, description=f"Processor '{processor_name}' does not support assets")
abort(404, description=f"Processor '{processor_name}' not found")
return diff_blueprint

View File

@@ -72,13 +72,8 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
processor_name = datastore.data['watching'][uuid].get('processor', '')
processor_classes = next((tpl for tpl in processors.find_processors() if tpl[1] == processor_name), None)
if not processor_classes:
flash(gettext("Could not load '{}' processor, processor plugin might be missing. Please select a different processor.").format(processor_name), 'error')
# Fall back to default processor so user can still edit and change processor
processor_classes = next((tpl for tpl in processors.find_processors() if tpl[1] == 'text_json_diff'), None)
if not processor_classes:
# If even text_json_diff is missing, something is very wrong
flash(gettext("Could not load '{}' processor, processor plugin might be missing.").format(processor_name), 'error')
return redirect(url_for('watchlist.index'))
flash(gettext("Cannot load the edit form for processor/plugin '{}', plugin missing?").format(processor_classes[1]), 'error')
return redirect(url_for('watchlist.index'))
parent_module = processors.get_parent_module(processor_classes[0])
@@ -155,10 +150,58 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
extra_update_obj['time_between_check'] = form.time_between_check.data
# Handle processor-config-* fields separately (save to JSON, not datastore)
# IMPORTANT: These must NOT be saved to url-watches.json, only to the processor-specific JSON file
processor_config_data = processors.extract_processor_config_from_form_data(form.data)
processors.save_processor_config(datastore, uuid, processor_config_data)
# Handle processor-config-* fields separately (save to JSON, not datastore)
processor_config_data = {}
fields_to_remove = []
for field_name, field_value in form.data.items():
if field_name.startswith('processor_config_'):
config_key = field_name.replace('processor_config_', '')
if field_value: # Only save non-empty values
processor_config_data[config_key] = field_value
fields_to_remove.append(field_name)
# Save processor config to JSON file if any config data exists
if processor_config_data:
try:
processor_name = form.data.get('processor')
# Create a processor instance to access config methods
processor_instance = processors.difference_detection_processor(datastore, uuid)
# Use processor name as filename so each processor keeps its own config
config_filename = f'{processor_name}.json'
processor_instance.update_extra_watch_config(config_filename, processor_config_data)
logger.debug(f"Saved processor config to {config_filename}: {processor_config_data}")
# Call optional edit_hook if processor has one
try:
# Try to import the edit_hook module from the processor package
import importlib
edit_hook_module_name = f'changedetectionio.processors.{processor_name}.edit_hook'
try:
edit_hook = importlib.import_module(edit_hook_module_name)
logger.debug(f"Found edit_hook module for {processor_name}")
if hasattr(edit_hook, 'on_config_save'):
logger.info(f"Calling edit_hook.on_config_save for {processor_name}")
watch_obj = datastore.data['watching'][uuid]
# Call hook and get updated config
updated_config = edit_hook.on_config_save(watch_obj, processor_config_data, datastore)
# Save updated config back to file
processor_instance.update_extra_watch_config(config_filename, updated_config)
logger.info(f"Edit hook updated config: {updated_config}")
else:
logger.debug(f"Edit hook module found but no on_config_save function")
except ModuleNotFoundError:
logger.debug(f"No edit_hook module for processor {processor_name} (this is normal)")
except Exception as hook_error:
logger.error(f"Edit hook error (non-fatal): {hook_error}", exc_info=True)
except Exception as e:
logger.error(f"Failed to save processor config: {e}")
# Remove processor-config-* fields from form.data before updating datastore
for field_name in fields_to_remove:
form.data.pop(field_name, None)
# Ignore text
form_ignore_text = form.ignore_text.data

View File

@@ -39,21 +39,24 @@ def construct_blueprint(datastore: ChangeDetectionStore):
# Get the processor type for this watch
processor_name = watch.get('processor', 'text_json_diff')
# Try to get the processor's preview module (works for both built-in and plugin processors)
from changedetectionio.processors import get_processor_submodule
processor_module = get_processor_submodule(processor_name, 'preview')
try:
# Try to import the processor's preview module
import importlib
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.preview')
# Call the processor's render() function
if processor_module and hasattr(processor_module, 'render'):
return processor_module.render(
watch=watch,
datastore=datastore,
request=request,
url_for=url_for,
render_template=render_template,
flash=flash,
redirect=redirect
)
# Call the processor's render() function
if hasattr(processor_module, 'render'):
return processor_module.render(
watch=watch,
datastore=datastore,
request=request,
url_for=url_for,
render_template=render_template,
flash=flash,
redirect=redirect
)
except (ImportError, ModuleNotFoundError) as e:
logger.debug(f"Processor {processor_name} does not have a preview module, using default preview: {e}")
# Fallback: if processor doesn't have preview module, use default text preview
content = []
@@ -160,33 +163,39 @@ def construct_blueprint(datastore: ChangeDetectionStore):
# Get the processor type for this watch
processor_name = watch.get('processor', 'text_json_diff')
# Try to get the processor's preview module (works for both built-in and plugin processors)
from changedetectionio.processors import get_processor_submodule
processor_module = get_processor_submodule(processor_name, 'preview')
try:
# Try to import the processor's preview module
import importlib
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.preview')
# Call the processor's get_asset() function
if processor_module and hasattr(processor_module, 'get_asset'):
result = processor_module.get_asset(
asset_name=asset_name,
watch=watch,
datastore=datastore,
request=request
)
# Call the processor's get_asset() function
if hasattr(processor_module, 'get_asset'):
result = processor_module.get_asset(
asset_name=asset_name,
watch=watch,
datastore=datastore,
request=request
)
if result is None:
if result is None:
from flask import abort
abort(404, description=f"Asset '{asset_name}' not found")
binary_data, content_type, cache_control = result
response = make_response(binary_data)
response.headers['Content-Type'] = content_type
if cache_control:
response.headers['Cache-Control'] = cache_control
return response
else:
logger.warning(f"Processor {processor_name} does not implement get_asset()")
from flask import abort
abort(404, description=f"Asset '{asset_name}' not found")
abort(404, description=f"Processor '{processor_name}' does not support assets")
binary_data, content_type, cache_control = result
response = make_response(binary_data)
response.headers['Content-Type'] = content_type
if cache_control:
response.headers['Cache-Control'] = cache_control
return response
else:
logger.warning(f"Processor {processor_name} does not implement get_asset()")
except (ImportError, ModuleNotFoundError) as e:
logger.warning(f"Processor {processor_name} does not have a preview module: {e}")
from flask import abort
abort(404, description=f"Processor '{processor_name}' does not support assets")
abort(404, description=f"Processor '{processor_name}' not found")
return preview_blueprint

View File

@@ -205,24 +205,23 @@ html[data-darkmode="true"] .watch-tag-list.tag-{{ class_name }} {
</div>
{% endif %}
<div>
{%- if watch['processor'] and watch['processor'] in processor_badge_texts -%}
<span class="processor-badge processor-badge-{{ watch['processor'] }}" title="{{ processor_descriptions.get(watch['processor'], watch['processor']) }}">{{ processor_badge_texts[watch['processor']] }}</span>
{%- endif -%}
<span class="watch-title">
{% if system_use_url_watchlist or watch.get('use_page_title_in_list') %}
{{ watch.label }}
{% else %}
{{ watch.get('title') or watch.link }}
{% endif %}
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}">&nbsp;</a>
</span>
<span class="watch-title">
{% if system_use_url_watchlist or watch.get('use_page_title_in_list') %}
{{ watch.label }}
{% else %}
{{ watch.get('title') or watch.link }}
{% endif %}
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}">&nbsp;</a>
</span>
<div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list)|safe }}</div>
{%- if watch['processor'] == 'text_json_diff' -%}
{%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] -%}
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
{%- endif -%}
{%- endif -%}
{%- if watch['processor'] and watch['processor'] in processor_badge_texts -%}
<span class="processor-badge processor-badge-{{ watch['processor'] }}" title="{{ processor_descriptions.get(watch['processor'], watch['processor']) }}">{{ processor_badge_texts[watch['processor']] }}</span>
{%- endif -%}
{%- for watch_tag_uuid, watch_tag in datastore.get_all_tags_for_watch(watch['uuid']).items() -%}
<span class="watch-tag-list tag-{{ watch_tag.title|sanitize_tag_class }}">{{ watch_tag.title }}</span>
{%- endfor -%}

View File

@@ -105,30 +105,6 @@ class ChangeDetectionSpec:
"""
pass
@hookspec
def register_processor(self):
"""Register an external processor plugin.
External packages can implement this hook to register custom processors
that will be discovered alongside built-in processors.
Returns:
dict or None: Dictionary with processor information:
{
'processor_name': str, # Machine name (e.g., 'osint_recon')
'processor_module': module, # Module containing processor.py
'processor_class': class, # The perform_site_check class
'metadata': { # Optional metadata
'name': str, # Display name
'description': str, # Description
'processor_weight': int,# Sort weight (lower = higher priority)
'list_badge_text': str, # Badge text for UI
}
}
Return None if this plugin doesn't provide a processor
"""
pass
# Set up Plugin Manager
plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)

View File

@@ -17,11 +17,9 @@ def find_sub_packages(package_name):
return [name for _, name, is_pkg in pkgutil.iter_modules(package.__path__) if is_pkg]
@lru_cache(maxsize=1)
def find_processors():
"""
Find all subclasses of DifferenceDetectionProcessor in the specified package.
Results are cached to avoid repeated discovery.
:param package_name: The name of the package to scan for processor modules.
:return: A list of (module, class) tuples.
@@ -48,22 +46,6 @@ def find_processors():
except (ModuleNotFoundError, ImportError) as e:
logger.warning(f"Failed to import module {module_name}: {e} (find_processors())")
# Discover plugin processors via pluggy
try:
from changedetectionio.pluggy_interface import plugin_manager
plugin_results = plugin_manager.hook.register_processor()
for result in plugin_results:
if result and isinstance(result, dict):
processor_module = result.get('processor_module')
processor_name = result.get('processor_name')
if processor_module and processor_name:
processors.append((processor_module, processor_name))
logger.info(f"Registered plugin processor: {processor_name}")
except Exception as e:
logger.warning(f"Error loading plugin processors: {e}")
return processors
@@ -115,137 +97,54 @@ def find_processor_module(processor_name):
return None
def get_processor_module(processor_name):
"""
Get the actual processor module (with perform_site_check class) by name.
Works for both built-in and plugin processors.
Args:
processor_name: Processor machine name (e.g., 'text_json_diff', 'osint_recon')
Returns:
module: The processor module containing perform_site_check, or None if not found
"""
processor_classes = find_processors()
processor_tuple = next((tpl for tpl in processor_classes if tpl[1] == processor_name), None)
if processor_tuple:
# Return the actual processor module (first element of tuple)
return processor_tuple[0]
return None
def get_processor_submodule(processor_name, submodule_name):
"""
Get an optional submodule from a processor (e.g., 'difference', 'extract', 'preview').
Works for both built-in and plugin processors.
Args:
processor_name: Processor machine name (e.g., 'text_json_diff', 'osint_recon')
submodule_name: Name of the submodule (e.g., 'difference', 'extract', 'preview')
Returns:
module: The submodule if it exists, or None if not found
"""
processor_classes = find_processors()
processor_tuple = next((tpl for tpl in processor_classes if tpl[1] == processor_name), None)
if not processor_tuple:
return None
processor_module = processor_tuple[0]
parent_module = get_parent_module(processor_module)
if not parent_module:
return None
# Try to import the submodule
try:
# For built-in processors: changedetectionio.processors.text_json_diff.difference
# For plugin processors: changedetectionio_osint.difference
parent_module_name = parent_module.__name__
submodule_full_name = f"{parent_module_name}.{submodule_name}"
return importlib.import_module(submodule_full_name)
except (ModuleNotFoundError, ImportError):
return None
@lru_cache(maxsize=1)
def get_plugin_processor_metadata():
"""Get metadata from plugin processors."""
metadata = {}
try:
from changedetectionio.pluggy_interface import plugin_manager
plugin_results = plugin_manager.hook.register_processor()
for result in plugin_results:
if result and isinstance(result, dict):
processor_name = result.get('processor_name')
meta = result.get('metadata', {})
if processor_name:
metadata[processor_name] = meta
except Exception as e:
logger.warning(f"Error getting plugin processor metadata: {e}")
return metadata
def available_processors():
"""
Get a list of processors by name and description for the UI elements.
Can be filtered via DISABLED_PROCESSORS environment variable (comma-separated list).
Can be filtered via ALLOWED_PROCESSORS environment variable (comma-separated list).
:return: A list :)
"""
processor_classes = find_processors()
# Check if DISABLED_PROCESSORS env var is set
disabled_processors_env = os.getenv('DISABLED_PROCESSORS', 'image_ssim_diff').strip()
disabled_processors = []
if disabled_processors_env:
# Check if ALLOWED_PROCESSORS env var is set
# For now we disable it, need to make a deploy with lots of new code and this will be an overload
allowed_processors_env = os.getenv('ALLOWED_PROCESSORS', 'text_json_diff, restock_diff').strip()
allowed_processors = None
if allowed_processors_env:
# Parse comma-separated list and strip whitespace
disabled_processors = [p.strip() for p in disabled_processors_env.split(',') if p.strip()]
logger.info(f"DISABLED_PROCESSORS set, disabling: {disabled_processors}")
allowed_processors = [p.strip() for p in allowed_processors_env.split(',') if p.strip()]
logger.info(f"ALLOWED_PROCESSORS set, filtering to: {allowed_processors}")
available = []
plugin_metadata = get_plugin_processor_metadata()
for module, sub_package_name in processor_classes:
# Skip disabled processors
if sub_package_name in disabled_processors:
logger.debug(f"Skipping processor '{sub_package_name}' (in DISABLED_PROCESSORS)")
# Filter by allowed processors if set
if allowed_processors and sub_package_name not in allowed_processors:
logger.debug(f"Skipping processor '{sub_package_name}' (not in ALLOWED_PROCESSORS)")
continue
# Check if this is a plugin processor
if sub_package_name in plugin_metadata:
meta = plugin_metadata[sub_package_name]
description = gettext(meta.get('name', sub_package_name))
# Plugin processors start from weight 10 to separate them from built-in processors
weight = 100 + meta.get('processor_weight', 0)
# Try to get the 'name' attribute from the processor module first
if hasattr(module, 'name'):
description = gettext(module.name)
else:
# Try to get the 'name' attribute from the processor module first
if hasattr(module, 'name'):
description = gettext(module.name)
# Fall back to processor_description from parent module's __init__.py
parent_module = get_parent_module(module)
if parent_module and hasattr(parent_module, 'processor_description'):
description = gettext(parent_module.processor_description)
else:
# Fall back to processor_description from parent module's __init__.py
parent_module = get_parent_module(module)
if parent_module and hasattr(parent_module, 'processor_description'):
description = gettext(parent_module.processor_description)
else:
# Final fallback to a readable name
description = sub_package_name.replace('_', ' ').title()
# Final fallback to a readable name
description = sub_package_name.replace('_', ' ').title()
# Get weight for sorting (lower weight = higher in list)
weight = 0 # Default weight for processors without explicit weight
# Get weight for sorting (lower weight = higher in list)
weight = 0 # Default weight for processors without explicit weight
# Check processor module itself first
if hasattr(module, 'processor_weight'):
weight = module.processor_weight
else:
# Fall back to parent module (package __init__.py)
parent_module = get_parent_module(module)
if parent_module and hasattr(parent_module, 'processor_weight'):
weight = parent_module.processor_weight
# Check processor module itself first
if hasattr(module, 'processor_weight'):
weight = module.processor_weight
else:
# Fall back to parent module (package __init__.py)
parent_module = get_parent_module(module)
if parent_module and hasattr(parent_module, 'processor_weight'):
weight = parent_module.processor_weight
available.append((sub_package_name, description, weight))
@@ -380,76 +279,3 @@ def get_processor_badge_css():
return '\n\n'.join(css_rules)
def save_processor_config(datastore, watch_uuid, config_data):
"""
Save processor-specific configuration to JSON file.
This is a shared helper function used by both the UI edit form and API endpoints
to consistently handle processor configuration storage.
Args:
datastore: The application datastore instance
watch_uuid: UUID of the watch
config_data: Dictionary of configuration data to save (with processor_config_* prefix removed)
Returns:
bool: True if saved successfully, False otherwise
"""
if not config_data:
return True
try:
from changedetectionio.processors.base import difference_detection_processor
# Get processor name from watch
watch = datastore.data['watching'].get(watch_uuid)
if not watch:
logger.error(f"Cannot save processor config: watch {watch_uuid} not found")
return False
processor_name = watch.get('processor', 'text_json_diff')
# Create a processor instance to access config methods
processor_instance = difference_detection_processor(datastore, watch_uuid)
# Use processor name as filename so each processor keeps its own config
config_filename = f'{processor_name}.json'
processor_instance.update_extra_watch_config(config_filename, config_data)
logger.debug(f"Saved processor config to {config_filename}: {config_data}")
return True
except Exception as e:
logger.error(f"Failed to save processor config: {e}")
return False
def extract_processor_config_from_form_data(form_data):
"""
Extract processor_config_* fields from form data and return separate dicts.
This is a shared helper function used by both the UI edit form and API endpoints
to consistently handle processor configuration extraction.
IMPORTANT: This function modifies form_data in-place by removing processor_config_* fields.
Args:
form_data: Dictionary of form data (will be modified in-place)
Returns:
dict: Dictionary of processor config data (with processor_config_* prefix removed)
"""
processor_config_data = {}
# Use list() to create a copy of keys since we're modifying the dict
for field_name in list(form_data.keys()):
if field_name.startswith('processor_config_'):
config_key = field_name.replace('processor_config_', '')
# Save all values (including empty strings) to allow explicit clearing of settings
processor_config_data[config_key] = form_data[field_name]
# Remove from form_data to prevent it from reaching datastore
del form_data[field_name]
return processor_config_data

View File

@@ -348,7 +348,8 @@ class ChangeDetectionStore:
r = requests.request(method="GET",
url=url,
# So we know to return the JSON instead of the human-friendly "help" page
headers={'App-Guid': self.__data['app_guid']})
headers={'App-Guid': self.__data['app_guid']},
timeout=5.0) # 5 second timeout to prevent blocking
res = r.json()
# List of permissible attributes we accept from the wild internet

View File

@@ -58,7 +58,7 @@ def is_valid_uuid(val):
def test_api_simple(client, live_server, measure_memory_usage, datastore_path):
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
@@ -506,7 +506,7 @@ def test_api_import(client, live_server, measure_memory_usage, datastore_path):
def test_api_conflict_UI_password(client, live_server, measure_memory_usage, datastore_path):
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
# Enable password check and diff page access bypass
@@ -548,3 +548,172 @@ def test_api_conflict_UI_password(client, live_server, measure_memory_usage, dat
assert len(res.json)
def test_api_url_validation(client, live_server, measure_memory_usage, datastore_path):
"""
Test URL validation for edge cases in both CREATE and UPDATE endpoints.
Addresses security issues where empty/null/invalid URLs could bypass validation.
This test ensures that:
- CREATE endpoint rejects null, empty, and invalid URLs
- UPDATE endpoint rejects attempts to change URL to null, empty, or invalid
- UPDATE endpoint allows updating other fields without touching URL
- URL validation properly checks protocol, format, and safety
"""
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
set_original_response(datastore_path=datastore_path)
test_url = url_for('test_endpoint', _external=True)
# Test 1: CREATE with null URL should fail
res = client.post(
url_for("createwatch"),
data=json.dumps({"url": None}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
follow_redirects=True
)
assert res.status_code == 400, "Creating watch with null URL should fail"
# Test 2: CREATE with empty string URL should fail
res = client.post(
url_for("createwatch"),
data=json.dumps({"url": ""}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
follow_redirects=True
)
assert res.status_code == 400, "Creating watch with empty string URL should fail"
assert b'Invalid or unsupported URL' in res.data or b'required' in res.data.lower()
# Test 3: CREATE with whitespace-only URL should fail
res = client.post(
url_for("createwatch"),
data=json.dumps({"url": " "}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
follow_redirects=True
)
assert res.status_code == 400, "Creating watch with whitespace-only URL should fail"
# Test 4: CREATE with invalid protocol should fail
res = client.post(
url_for("createwatch"),
data=json.dumps({"url": "javascript:alert(1)"}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
follow_redirects=True
)
assert res.status_code == 400, "Creating watch with javascript: protocol should fail"
# Test 5: CREATE with missing protocol should fail
res = client.post(
url_for("createwatch"),
data=json.dumps({"url": "example.com"}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
follow_redirects=True
)
assert res.status_code == 400, "Creating watch without protocol should fail"
# Test 6: CREATE with valid URL should succeed (baseline)
res = client.post(
url_for("createwatch"),
data=json.dumps({"url": test_url, "title": "Valid URL test"}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
follow_redirects=True
)
assert res.status_code == 201, "Creating watch with valid URL should succeed"
assert is_valid_uuid(res.json.get('uuid'))
watch_uuid = res.json.get('uuid')
wait_for_all_checks(client)
# Test 7: UPDATE to null URL should fail
res = client.put(
url_for("watch", uuid=watch_uuid),
headers={'x-api-key': api_key, 'content-type': 'application/json'},
data=json.dumps({"url": None}),
)
assert res.status_code == 400, "Updating watch URL to null should fail"
# Accept either OpenAPI validation error or our custom validation error
assert b'URL cannot be null' in res.data or b'OpenAPI validation failed' in res.data or b'validation error' in res.data.lower()
# Test 8: UPDATE to empty string URL should fail
res = client.put(
url_for("watch", uuid=watch_uuid),
headers={'x-api-key': api_key, 'content-type': 'application/json'},
data=json.dumps({"url": ""}),
)
assert res.status_code == 400, "Updating watch URL to empty string should fail"
# Accept either our custom validation error or OpenAPI/schema validation error
assert b'URL cannot be empty' in res.data or b'OpenAPI validation' in res.data or b'Invalid or unsupported URL' in res.data
# Test 9: UPDATE to whitespace-only URL should fail
res = client.put(
url_for("watch", uuid=watch_uuid),
headers={'x-api-key': api_key, 'content-type': 'application/json'},
data=json.dumps({"url": " \t\n "}),
)
assert res.status_code == 400, "Updating watch URL to whitespace should fail"
# Accept either our custom validation error or generic validation error
assert b'URL cannot be empty' in res.data or b'Invalid or unsupported URL' in res.data or b'validation' in res.data.lower()
# Test 10: UPDATE to invalid protocol should fail (javascript:)
res = client.put(
url_for("watch", uuid=watch_uuid),
headers={'x-api-key': api_key, 'content-type': 'application/json'},
data=json.dumps({"url": "javascript:alert(document.domain)"}),
)
assert res.status_code == 400, "Updating watch URL to XSS attempt should fail"
assert b'Invalid or unsupported URL' in res.data or b'protocol' in res.data.lower()
# Test 11: UPDATE to file:// protocol should fail (unless ALLOW_FILE_URI is set)
res = client.put(
url_for("watch", uuid=watch_uuid),
headers={'x-api-key': api_key, 'content-type': 'application/json'},
data=json.dumps({"url": "file:///etc/passwd"}),
)
assert res.status_code == 400, "Updating watch URL to file:// should fail by default"
# Test 12: UPDATE other fields without URL should succeed
res = client.put(
url_for("watch", uuid=watch_uuid),
headers={'x-api-key': api_key, 'content-type': 'application/json'},
data=json.dumps({"title": "Updated title without URL change"}),
)
assert res.status_code == 200, "Updating other fields without URL should succeed"
# Test 13: Verify URL is still valid after non-URL update
res = client.get(
url_for("watch", uuid=watch_uuid),
headers={'x-api-key': api_key}
)
assert res.json.get('url') == test_url, "URL should remain unchanged"
assert res.json.get('title') == "Updated title without URL change"
# Test 14: UPDATE to valid different URL should succeed
new_valid_url = test_url + "?new=param"
res = client.put(
url_for("watch", uuid=watch_uuid),
headers={'x-api-key': api_key, 'content-type': 'application/json'},
data=json.dumps({"url": new_valid_url}),
)
assert res.status_code == 200, "Updating to valid different URL should succeed"
# Test 15: Verify URL was actually updated
res = client.get(
url_for("watch", uuid=watch_uuid),
headers={'x-api-key': api_key}
)
assert res.json.get('url') == new_valid_url, "URL should be updated to new valid URL"
# Test 16: CREATE with XSS in URL parameters should fail
res = client.post(
url_for("createwatch"),
data=json.dumps({"url": "http://example.com?xss=<script>alert(1)</script>"}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
follow_redirects=True
)
# This should fail because of suspicious characters check
assert res.status_code == 400, "Creating watch with XSS in URL params should fail"
# Cleanup
client.delete(
url_for("watch", uuid=watch_uuid),
headers={'x-api-key': api_key},
)
delete_all_watches(client)

View File

@@ -0,0 +1,805 @@
#!/usr/bin/env python3
"""
Comprehensive security and edge case tests for the API.
Tests critical areas that were identified as gaps in the existing test suite.
"""
import time
import json
import threading
import uuid as uuid_module
from flask import url_for
from .util import live_server_setup, wait_for_all_checks, delete_all_watches
import os
def set_original_response(datastore_path):
test_return_data = """<html>
<body>
Some initial text<br>
<p>Which is across multiple lines</p>
</body>
</html>
"""
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
f.write(test_return_data)
return None
def is_valid_uuid(val):
try:
uuid_module.UUID(str(val))
return True
except ValueError:
return False
# ============================================================================
# TIER 1: CRITICAL SECURITY TESTS
# ============================================================================
def test_api_path_traversal_in_uuids(client, live_server, measure_memory_usage, datastore_path):
"""
Test that path traversal attacks via UUID parameter are blocked.
Addresses CVE-like vulnerabilities where ../../../ in UUID could access arbitrary files.
"""
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
set_original_response(datastore_path=datastore_path)
test_url = url_for('test_endpoint', _external=True)
# Create a valid watch first
res = client.post(
url_for("createwatch"),
data=json.dumps({"url": test_url, "title": "Valid watch"}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
assert res.status_code == 201
valid_uuid = res.json.get('uuid')
# Test 1: Path traversal with ../../../
res = client.get(
f"/api/v1/watch/../../etc/passwd",
headers={'x-api-key': api_key}
)
assert res.status_code in [400, 404], "Path traversal should be rejected"
# Test 2: Encoded path traversal
res = client.get(
"/api/v1/watch/..%2F..%2F..%2Fetc%2Fpasswd",
headers={'x-api-key': api_key}
)
assert res.status_code in [400, 404], "Encoded path traversal should be rejected"
# Test 3: Double-encoded path traversal
res = client.get(
"/api/v1/watch/%2e%2e%2f%2e%2e%2f%2e%2e%2f",
headers={'x-api-key': api_key}
)
assert res.status_code in [400, 404], "Double-encoded traversal should be rejected"
# Test 4: Try to access datastore file
res = client.get(
"/api/v1/watch/../url-watches.json",
headers={'x-api-key': api_key}
)
assert res.status_code in [400, 404], "Access to datastore should be blocked"
# Test 5: Null byte injection
res = client.get(
f"/api/v1/watch/{valid_uuid}%00.json",
headers={'x-api-key': api_key}
)
# Should either work (ignoring null byte) or reject - but not crash
assert res.status_code in [200, 400, 404]
# Test 6: DELETE with path traversal
res = client.delete(
"/api/v1/watch/../../datastore/url-watches.json",
headers={'x-api-key': api_key}
)
assert res.status_code in [400, 404, 405], "DELETE with traversal should be blocked (405=method not allowed is also acceptable)"
# Cleanup
client.delete(url_for("watch", uuid=valid_uuid), headers={'x-api-key': api_key})
delete_all_watches(client)
def test_api_injection_via_headers_and_proxy(client, live_server, measure_memory_usage, datastore_path):
"""
Test that injection attacks via headers and proxy fields are properly sanitized.
Addresses XSS and injection vulnerabilities.
"""
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
set_original_response(datastore_path=datastore_path)
test_url = url_for('test_endpoint', _external=True)
# Test 1: XSS in headers
res = client.post(
url_for("createwatch"),
data=json.dumps({
"url": test_url,
"headers": {
"User-Agent": "<script>alert(1)</script>",
"X-Custom": "'; DROP TABLE watches; --"
}
}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
# Headers are metadata used for HTTP requests, not HTML rendering
# Storing them as-is is expected behavior
assert res.status_code in [201, 400]
if res.status_code == 201:
watch_uuid = res.json.get('uuid')
# Verify headers are stored (API returns JSON, not HTML, so no XSS risk)
res = client.get(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
assert res.status_code == 200
client.delete(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
# Test 2: Null bytes in headers
res = client.post(
url_for("createwatch"),
data=json.dumps({
"url": test_url,
"headers": {"X-Test": "value\x00null"}
}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
# Should handle null bytes gracefully (reject or sanitize)
assert res.status_code in [201, 400]
# Test 3: Malformed proxy string
res = client.post(
url_for("createwatch"),
data=json.dumps({
"url": test_url,
"proxy": "http://evil.com:8080@victim.com"
}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
# Should reject invalid proxy format
assert res.status_code == 400
# Test 4: Control characters in notification title
res = client.post(
url_for("createwatch"),
data=json.dumps({
"url": test_url,
"notification_title": "Test\r\nInjected-Header: value"
}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
# Should accept but sanitize control characters
if res.status_code == 201:
watch_uuid = res.json.get('uuid')
client.delete(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
delete_all_watches(client)
def test_api_large_payload_dos(client, live_server, measure_memory_usage, datastore_path):
"""
Test that excessively large payloads are rejected to prevent DoS.
Addresses memory leak issues found in changelog.
"""
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
set_original_response(datastore_path=datastore_path)
test_url = url_for('test_endpoint', _external=True)
# Test 1: Huge ignore_text array
res = client.post(
url_for("createwatch"),
data=json.dumps({
"url": test_url,
"ignore_text": ["a" * 10000] * 100 # 1MB of data
}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
# Should either accept (with limits) or reject
if res.status_code == 201:
watch_uuid = res.json.get('uuid')
client.delete(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
# Test 2: Massive headers object
huge_headers = {f"X-Header-{i}": "x" * 1000 for i in range(100)}
res = client.post(
url_for("createwatch"),
data=json.dumps({
"url": test_url,
"headers": huge_headers
}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
# Should reject or truncate
assert res.status_code in [201, 400, 413]
if res.status_code == 201:
watch_uuid = res.json.get('uuid')
client.delete(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
# Test 3: Huge browser_steps array
res = client.post(
url_for("createwatch"),
data=json.dumps({
"url": test_url,
"browser_steps": [
{"operation": "click", "selector": "#test" * 1000, "optional_value": ""}
] * 100
}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
# Should reject or limit
assert res.status_code in [201, 400, 413]
if res.status_code == 201:
watch_uuid = res.json.get('uuid')
client.delete(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
# Test 4: Extremely long title
res = client.post(
url_for("createwatch"),
data=json.dumps({
"url": test_url,
"title": "x" * 100000 # 100KB title
}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
# Should reject (exceeds maxLength: 5000)
assert res.status_code == 400
delete_all_watches(client)
def test_api_utf8_encoding_edge_cases(client, live_server, measure_memory_usage, datastore_path):
"""
Test UTF-8 encoding edge cases that have caused bugs on Windows.
Addresses 18+ encoding bugs from changelog.
"""
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
set_original_response(datastore_path=datastore_path)
test_url = url_for('test_endpoint', _external=True)
# Test 1: Unicode in title (should work)
res = client.post(
url_for("createwatch"),
data=json.dumps({
"url": test_url,
"title": "Test 中文 Ελληνικά 日本語 🔥"
}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
assert res.status_code == 201
watch_uuid = res.json.get('uuid')
# Verify it round-trips correctly
res = client.get(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
assert res.status_code == 200
assert "中文" in res.json.get('title')
client.delete(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
# Test 2: Unicode in URL query parameters
res = client.post(
url_for("createwatch"),
data=json.dumps({
"url": test_url + "?search=日本語"
}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
# Should handle URL encoding properly
assert res.status_code in [201, 400]
if res.status_code == 201:
watch_uuid = res.json.get('uuid')
client.delete(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
# Test 3: Null byte in title (should be rejected or sanitized)
res = client.post(
url_for("createwatch"),
data=json.dumps({
"url": test_url,
"title": "Test\x00Title"
}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
# Should handle gracefully
assert res.status_code in [201, 400]
if res.status_code == 201:
watch_uuid = res.json.get('uuid')
client.delete(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
# Test 4: BOM (Byte Order Mark) in title
res = client.post(
url_for("createwatch"),
data=json.dumps({
"url": test_url,
"title": "\ufeffTest with BOM"
}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
assert res.status_code in [201, 400]
if res.status_code == 201:
watch_uuid = res.json.get('uuid')
client.delete(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
delete_all_watches(client)
def test_api_concurrency_race_conditions(client, live_server, measure_memory_usage, datastore_path):
"""
Test concurrent API requests to detect race conditions.
Addresses 20+ concurrency bugs from changelog.
"""
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
set_original_response(datastore_path=datastore_path)
test_url = url_for('test_endpoint', _external=True)
# Create a watch
res = client.post(
url_for("createwatch"),
data=json.dumps({"url": test_url, "title": "Concurrency test"}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
assert res.status_code == 201
watch_uuid = res.json.get('uuid')
wait_for_all_checks(client)
# Test 1: Concurrent updates to same watch
# Note: Flask test client is not thread-safe, so we test sequential updates instead
# Real concurrency issues would be caught in integration tests with actual HTTP requests
results = []
for i in range(10):
try:
r = client.put(
url_for("watch", uuid=watch_uuid),
data=json.dumps({"title": f"Title {i}"}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
results.append(r.status_code)
except Exception as e:
results.append(str(e))
# All updates should succeed (200) without crashes
assert all(r == 200 for r in results), f"Some updates failed: {results}"
# Test 2: Update while watch is being checked
# Queue a recheck
client.get(
url_for("watch", uuid=watch_uuid, recheck=True),
headers={'x-api-key': api_key}
)
# Immediately update it
res = client.put(
url_for("watch", uuid=watch_uuid),
data=json.dumps({"title": "Updated during check"}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
# Should succeed without error
assert res.status_code == 200
# Test 3: Delete watch that's being processed
# Create another watch
res = client.post(
url_for("createwatch"),
data=json.dumps({"url": test_url}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
watch_uuid2 = res.json.get('uuid')
# Queue it for checking
client.get(url_for("watch", uuid=watch_uuid2, recheck=True), headers={'x-api-key': api_key})
# Immediately delete it
res = client.delete(url_for("watch", uuid=watch_uuid2), headers={'x-api-key': api_key})
# Should succeed or return appropriate error
assert res.status_code in [204, 404, 400]
# Cleanup
client.delete(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
delete_all_watches(client)
# ============================================================================
# TIER 2: IMPORTANT FUNCTIONALITY TESTS
# ============================================================================
def test_api_time_validation_edge_cases(client, live_server, measure_memory_usage, datastore_path):
"""
Test time_between_check validation edge cases.
"""
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
set_original_response(datastore_path=datastore_path)
test_url = url_for('test_endpoint', _external=True)
# Test 1: Zero interval
res = client.post(
url_for("createwatch"),
data=json.dumps({
"url": test_url,
"time_between_check_use_default": False,
"time_between_check": {"seconds": 0}
}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
assert res.status_code == 400, "Zero interval should be rejected"
# Test 2: Negative interval
res = client.post(
url_for("createwatch"),
data=json.dumps({
"url": test_url,
"time_between_check_use_default": False,
"time_between_check": {"seconds": -100}
}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
assert res.status_code == 400, "Negative interval should be rejected"
# Test 3: All fields null with use_default=false
res = client.post(
url_for("createwatch"),
data=json.dumps({
"url": test_url,
"time_between_check_use_default": False,
"time_between_check": {"weeks": None, "days": None, "hours": None, "minutes": None, "seconds": None}
}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
assert res.status_code == 400, "All null intervals should be rejected when not using default"
# Test 4: Extremely large interval (overflow risk)
res = client.post(
url_for("createwatch"),
data=json.dumps({
"url": test_url,
"time_between_check_use_default": False,
"time_between_check": {"weeks": 999999999}
}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
# Should either accept (with limits) or reject
assert res.status_code in [201, 400]
if res.status_code == 201:
watch_uuid = res.json.get('uuid')
client.delete(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
# Test 5: Valid minimal interval (should work)
res = client.post(
url_for("createwatch"),
data=json.dumps({
"url": test_url,
"time_between_check_use_default": False,
"time_between_check": {"seconds": 60}
}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
assert res.status_code == 201
watch_uuid = res.json.get('uuid')
client.delete(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
delete_all_watches(client)
def test_api_browser_steps_validation(client, live_server, measure_memory_usage, datastore_path):
"""
Test browser_steps validation for invalid operations and structures.
"""
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
set_original_response(datastore_path=datastore_path)
test_url = url_for('test_endpoint', _external=True)
# Test 1: Empty browser step
res = client.post(
url_for("createwatch"),
data=json.dumps({
"url": test_url,
"browser_steps": [
{"operation": "", "selector": "", "optional_value": ""}
]
}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
# Should accept (empty is valid as null)
assert res.status_code in [201, 400]
if res.status_code == 201:
watch_uuid = res.json.get('uuid')
client.delete(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
# Test 2: Invalid operation type
res = client.post(
url_for("createwatch"),
data=json.dumps({
"url": test_url,
"browser_steps": [
{"operation": "invalid_operation", "selector": "#test", "optional_value": ""}
]
}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
# Should accept (validation happens at runtime) or reject
assert res.status_code in [201, 400]
if res.status_code == 201:
watch_uuid = res.json.get('uuid')
client.delete(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
# Test 3: Missing required fields in browser step
res = client.post(
url_for("createwatch"),
data=json.dumps({
"url": test_url,
"browser_steps": [
{"operation": "click"} # Missing selector and optional_value
]
}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
# Should be rejected due to schema validation
assert res.status_code == 400
# Test 4: Extra fields in browser step
res = client.post(
url_for("createwatch"),
data=json.dumps({
"url": test_url,
"browser_steps": [
{"operation": "click", "selector": "#test", "optional_value": "", "extra_field": "value"}
]
}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
# Should be rejected due to additionalProperties: false
assert res.status_code == 400
delete_all_watches(client)
def test_api_queue_manipulation(client, live_server, measure_memory_usage, datastore_path):
"""
Test queue behavior under stress and edge cases.
"""
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
set_original_response(datastore_path=datastore_path)
test_url = url_for('test_endpoint', _external=True)
# Test 1: Create many watches rapidly
watch_uuids = []
for i in range(20):
res = client.post(
url_for("createwatch"),
data=json.dumps({"url": test_url, "title": f"Watch {i}"}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
if res.status_code == 201:
watch_uuids.append(res.json.get('uuid'))
assert len(watch_uuids) == 20, "Should be able to create 20 watches"
# Test 2: Recheck all when watches exist
res = client.get(
url_for("createwatch", recheck_all='1'),
headers={'x-api-key': api_key},
)
# Should return success (200 or 202 for background processing)
assert res.status_code in [200, 202]
# Test 3: Verify queue doesn't overflow with moderate load
# The app has MAX_QUEUE_SIZE = 5000, we're well below that
wait_for_all_checks(client)
# Cleanup
for uuid in watch_uuids:
client.delete(url_for("watch", uuid=uuid), headers={'x-api-key': api_key})
delete_all_watches(client)
# ============================================================================
# TIER 3: EDGE CASES & POLISH
# ============================================================================
def test_api_history_edge_cases(client, live_server, measure_memory_usage, datastore_path):
"""
Test history API with invalid timestamps and edge cases.
"""
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
set_original_response(datastore_path=datastore_path)
test_url = url_for('test_endpoint', _external=True)
# Create watch and generate history
res = client.post(
url_for("createwatch"),
data=json.dumps({"url": test_url}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
watch_uuid = res.json.get('uuid')
wait_for_all_checks(client)
# Test 1: Get history with invalid timestamp
res = client.get(
url_for("watchsinglehistory", uuid=watch_uuid, timestamp="invalid"),
headers={'x-api-key': api_key}
)
assert res.status_code == 404, "Invalid timestamp should return 404"
# Test 2: Future timestamp
res = client.get(
url_for("watchsinglehistory", uuid=watch_uuid, timestamp="9999999999"),
headers={'x-api-key': api_key}
)
assert res.status_code == 404, "Future timestamp should return 404"
# Test 3: Negative timestamp
res = client.get(
url_for("watchsinglehistory", uuid=watch_uuid, timestamp="-1"),
headers={'x-api-key': api_key}
)
assert res.status_code == 404, "Negative timestamp should return 404"
# Test 4: Diff with reversed timestamps (from > to)
# First get actual timestamps
res = client.get(
url_for("watchhistory", uuid=watch_uuid),
headers={'x-api-key': api_key}
)
if len(res.json) >= 2:
timestamps = sorted(res.json.keys())
# Try reversed order
res = client.get(
url_for("watchhistorydiff", uuid=watch_uuid, from_timestamp=timestamps[-1], to_timestamp=timestamps[0]),
headers={'x-api-key': api_key}
)
# Should either work (show reverse diff) or return error
assert res.status_code in [200, 400]
# Cleanup
client.delete(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
delete_all_watches(client)
def test_api_notification_edge_cases(client, live_server, measure_memory_usage, datastore_path):
"""
Test notification configuration edge cases.
"""
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
set_original_response(datastore_path=datastore_path)
test_url = url_for('test_endpoint', _external=True)
# Test 1: Invalid notification URL
res = client.post(
url_for("createwatch"),
data=json.dumps({
"url": test_url,
"notification_urls": ["invalid://url", "ftp://test.com"]
}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
# Should accept (apprise validates at runtime) or reject
assert res.status_code in [201, 400]
if res.status_code == 201:
watch_uuid = res.json.get('uuid')
client.delete(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
# Test 2: Invalid notification format
res = client.post(
url_for("createwatch"),
data=json.dumps({
"url": test_url,
"notification_format": "invalid_format"
}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
# Should be rejected by schema
assert res.status_code == 400
# Test 3: Empty notification arrays
res = client.post(
url_for("createwatch"),
data=json.dumps({
"url": test_url,
"notification_urls": []
}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
# Should accept (empty is valid)
assert res.status_code == 201
watch_uuid = res.json.get('uuid')
client.delete(url_for("watch", uuid=watch_uuid), headers={'x-api-key': api_key})
delete_all_watches(client)
def test_api_tag_edge_cases(client, live_server, measure_memory_usage, datastore_path):
"""
Test tag/group API edge cases including XSS and path traversal.
"""
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
# Test 1: Empty tag title
res = client.post(
url_for("tag"),
data=json.dumps({"title": ""}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
# Should be rejected (empty title)
assert res.status_code == 400
# Test 2: XSS in tag title
res = client.post(
url_for("tag"),
data=json.dumps({"title": "<script>alert(1)</script>"}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
# Should accept but sanitize
if res.status_code == 201:
tag_uuid = res.json.get('uuid')
# Verify title is stored safely
res = client.get(url_for("tag", uuid=tag_uuid), headers={'x-api-key': api_key})
# Should be escaped or sanitized
client.delete(url_for("tag", uuid=tag_uuid), headers={'x-api-key': api_key})
# Test 3: Path traversal in tag title
res = client.post(
url_for("tag"),
data=json.dumps({"title": "../../etc/passwd"}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
# Should accept (it's just a string, not a path)
if res.status_code == 201:
tag_uuid = res.json.get('uuid')
client.delete(url_for("tag", uuid=tag_uuid), headers={'x-api-key': api_key})
# Test 4: Very long tag title
res = client.post(
url_for("tag"),
data=json.dumps({"title": "x" * 10000}),
headers={'content-type': 'application/json', 'x-api-key': api_key},
)
# Should be rejected (exceeds maxLength)
assert res.status_code == 400
def test_api_authentication_edge_cases(client, live_server, measure_memory_usage, datastore_path):
"""
Test API authentication edge cases.
"""
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
set_original_response(datastore_path=datastore_path)
test_url = url_for('test_endpoint', _external=True)
# Test 1: Missing API key
res = client.get(url_for("createwatch"))
assert res.status_code == 403, "Missing API key should be forbidden"
# Test 2: Invalid API key
res = client.get(
url_for("createwatch"),
headers={'x-api-key': "invalid_key_12345"}
)
assert res.status_code == 403, "Invalid API key should be forbidden"
# Test 3: API key with special characters
res = client.get(
url_for("createwatch"),
headers={'x-api-key': "key<script>alert(1)</script>"}
)
assert res.status_code == 403, "Invalid API key should be forbidden"
# Test 4: Very long API key
res = client.get(
url_for("createwatch"),
headers={'x-api-key': "x" * 10000}
)
assert res.status_code == 403, "Invalid API key should be forbidden"
# Test 5: Case sensitivity of API key
wrong_case_key = api_key.upper() if api_key.islower() else api_key.lower()
res = client.get(
url_for("createwatch"),
headers={'x-api-key': wrong_case_key}
)
# Should be forbidden (keys are case-sensitive)
assert res.status_code == 403, "Wrong case API key should be forbidden"
# Test 6: Valid API key should work
res = client.get(
url_for("createwatch"),
headers={'x-api-key': api_key}
)
assert res.status_code == 200, "Valid API key should work"

File diff suppressed because it is too large Load Diff

View File

@@ -64,6 +64,19 @@ def is_safe_valid_url(test_url):
import re
import validators
# Validate input type first - must be a non-empty string
if test_url is None:
logger.warning('URL validation failed: URL is None')
return False
if not isinstance(test_url, str):
logger.warning(f'URL validation failed: URL must be a string, got {type(test_url).__name__}')
return False
if not test_url.strip():
logger.warning('URL validation failed: URL is empty or whitespace only')
return False
allow_file_access = strtobool(os.getenv('ALLOW_FILE_URI', 'false'))
safe_protocol_regex = '^(http|https|ftp|file):' if allow_file_access else '^(http|https|ftp):'

View File

@@ -16,13 +16,6 @@ services:
# Log output levels: TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL
# - LOGGER_LEVEL=TRACE
#
# Install additional Python packages (processor plugins, etc.)
# Packages are installed at container startup and cached to avoid reinstalling on every restart
# Example: Install the OSINT reconnaissance processor plugin
# - EXTRA_PACKAGES=changedetection-osint-processor
# Multiple packages can be installed by separating with spaces:
# - EXTRA_PACKAGES=changedetection-osint-processor another-plugin
#
#
# Uncomment below and the "sockpuppetbrowser" to use a real Chrome browser (It uses the "playwright" protocol)
# - PLAYWRIGHT_DRIVER_URL=ws://browser-sockpuppet-chrome:3000

View File

@@ -1,28 +0,0 @@
#!/bin/bash
set -e
# Install additional packages from EXTRA_PACKAGES env var
# Uses a marker file to avoid reinstalling on every container restart
INSTALLED_MARKER="/datastore/.extra_packages_installed"
CURRENT_PACKAGES="$EXTRA_PACKAGES"
if [ -n "$EXTRA_PACKAGES" ]; then
# Check if we need to install/update packages
if [ ! -f "$INSTALLED_MARKER" ] || [ "$(cat $INSTALLED_MARKER 2>/dev/null)" != "$CURRENT_PACKAGES" ]; then
echo "Installing extra packages: $EXTRA_PACKAGES"
pip3 install --no-cache-dir $EXTRA_PACKAGES
if [ $? -eq 0 ]; then
echo "$CURRENT_PACKAGES" > "$INSTALLED_MARKER"
echo "Extra packages installed successfully"
else
echo "ERROR: Failed to install extra packages"
exit 1
fi
else
echo "Extra packages already installed: $EXTRA_PACKAGES"
fi
fi
# Execute the main command
exec "$@"

View File

@@ -183,15 +183,30 @@ components:
properties:
weeks:
type: integer
minimum: 0
maximum: 52000
nullable: true
days:
type: integer
minimum: 0
maximum: 365000
nullable: true
hours:
type: integer
minimum: 0
maximum: 8760000
nullable: true
minutes:
type: integer
minimum: 0
maximum: 525600000
nullable: true
seconds:
type: integer
description: Time intervals between checks
minimum: 0
maximum: 31536000000
nullable: true
description: Time intervals between checks. All fields must be non-negative. At least one non-zero value required when not using default settings.
time_between_check_use_default:
type: boolean
default: true
@@ -200,7 +215,9 @@ components:
type: array
items:
type: string
description: Notification URLs for this web page change monitor (watch)
maxLength: 1000
maxItems: 100
description: Notification URLs for this web page change monitor (watch). Maximum 100 URLs.
notification_title:
type: string
description: Custom notification title
@@ -224,14 +241,19 @@ components:
operation:
type: string
maxLength: 5000
nullable: true
selector:
type: string
maxLength: 5000
nullable: true
optional_value:
type: string
maxLength: 5000
nullable: true
required: [operation, selector, optional_value]
description: Browser automation steps
additionalProperties: false
maxItems: 100
description: Browser automation steps. Maximum 100 steps allowed.
processor:
type: string
enum: [restock_diff, text_json_diff]

View File

@@ -51,9 +51,9 @@ linkify-it-py
# - Needed for apprise/spush, and maybe others? hopefully doesnt trigger a rust compile.
# - Requires extra wheel for rPi, adds build time for arm/v8 which is not in piwheels
# Pinned to 44.x for ARM compatibility and sslyze compatibility (sslyze requires <45)
# Pinned to 43.0.1 for ARM compatibility (45.x may not have pre-built ARM wheels)
# Also pinned because dependabot wants specific versions
cryptography==44.0.0
cryptography==46.0.3
# apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315
# use any version other than 2.0.x due to https://github.com/eclipse/paho.mqtt.python/issues/814