from changedetectionio.strtobool import strtobool from flask_restful import abort, Resource from flask import request from functools import wraps from . import auth, validate_openapi_request from ..validate_url import is_safe_valid_url import json # Number of URLs above which import switches to background processing IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD = 20 def default_content_type(content_type='text/plain'): """Decorator to set a default Content-Type header if none is provided.""" def decorator(f): @wraps(f) def wrapper(*args, **kwargs): if not request.content_type: # Set default content type in the request environment request.environ['CONTENT_TYPE'] = content_type return f(*args, **kwargs) return wrapper return decorator def convert_query_param_to_type(value, schema_property): """ Convert a query parameter string to the appropriate type based on schema definition. Args: value: String value from query parameter schema_property: Schema property definition with 'type' or 'anyOf' field Returns: Converted value in the appropriate type Supports both OpenAPI 3.1 formats: - type: [string, 'null'] (array format) - anyOf: [{type: string}, {type: null}] (anyOf format) """ prop_type = schema_property.get('type') # Handle OpenAPI 3.1 type arrays: type: [string, 'null'] if isinstance(prop_type, list): # Use the first non-null type from the array for t in prop_type: if t != 'null': prop_type = t break else: prop_type = None # Handle anyOf schemas (older format) elif 'anyOf' in schema_property: # Use the first non-null type from anyOf for option in schema_property['anyOf']: if option.get('type') and option.get('type') != 'null': prop_type = option.get('type') break else: prop_type = None # Handle array type (e.g., notification_urls) if prop_type == 'array': # Support both comma-separated and JSON array format if value.startswith('['): try: return json.loads(value) except json.JSONDecodeError: return [v.strip() for v in value.split(',')] return [v.strip() for v in value.split(',')] # Handle object type (e.g., time_between_check, headers) elif prop_type == 'object': try: return json.loads(value) except json.JSONDecodeError: raise ValueError(f"Invalid JSON object for field: {value}") # Handle boolean type elif prop_type == 'boolean': return strtobool(value) # Handle integer type elif prop_type == 'integer': return int(value) # Handle number type (float) elif prop_type == 'number': return float(value) # Default: return as string return value class Import(Resource): def __init__(self, **kwargs): # datastore is a black box dependency self.datastore = kwargs['datastore'] @auth.check_token @default_content_type('text/plain') #3547 #3542 @validate_openapi_request('importWatches') def post(self): """Import a list of watched URLs with optional watch configuration.""" from . import get_watch_schema_properties # Special parameters that are NOT watch configuration special_params = {'tag', 'tag_uuids', 'dedupe', 'proxy'} extras = {} # Handle special 'proxy' parameter if request.args.get('proxy'): plist = self.datastore.proxy_list if not request.args.get('proxy') in plist: proxy_list_str = ', '.join(plist) if plist else 'none configured' return f"Invalid proxy choice, currently supported proxies are '{proxy_list_str}'", 400 else: extras['proxy'] = request.args.get('proxy') # Handle special 'dedupe' parameter dedupe = strtobool(request.args.get('dedupe', 'true')) # Handle special 'tag' and 'tag_uuids' parameters tags = request.args.get('tag') tag_uuids = request.args.get('tag_uuids') if tag_uuids: tag_uuids = tag_uuids.split(',') # Extract ALL other query parameters as watch configuration # Get schema from OpenAPI spec (replaces old schema_create_watch) schema_properties = get_watch_schema_properties() for param_name, param_value in request.args.items(): # Skip special parameters if param_name in special_params: continue # Skip if not in schema (unknown parameter) if param_name not in schema_properties: return f"Unknown watch configuration parameter: {param_name}", 400 # Convert to appropriate type based on schema try: converted_value = convert_query_param_to_type(param_value, schema_properties[param_name]) extras[param_name] = converted_value except (ValueError, json.JSONDecodeError) as e: return f"Invalid value for parameter '{param_name}': {str(e)}", 400 # Validate processor if provided if 'processor' in extras: from changedetectionio.processors import available_processors available = [p[0] for p in available_processors()] if extras['processor'] not in available: return f"Invalid processor '{extras['processor']}'. Available processors: {', '.join(available)}", 400 # Validate fetch_backend if provided if 'fetch_backend' in extras: from changedetectionio.content_fetchers import available_fetchers available = [f[0] for f in available_fetchers()] # Also allow 'system' and extra_browser_* patterns is_valid = ( extras['fetch_backend'] == 'system' or extras['fetch_backend'] in available or extras['fetch_backend'].startswith('extra_browser_') ) if not is_valid: return f"Invalid fetch_backend '{extras['fetch_backend']}'. Available: system, {', '.join(available)}", 400 # Validate notification_urls if provided if 'notification_urls' in extras: from wtforms import ValidationError from changedetectionio.api.Notifications import validate_notification_urls try: validate_notification_urls(extras['notification_urls']) except ValidationError as e: return f"Invalid notification_urls: {str(e)}", 400 urls = request.get_data().decode('utf8').splitlines() # Clean and validate URLs upfront urls_to_import = [] for url in urls: url = url.strip() if not len(url): continue # Validate URL if not is_safe_valid_url(url): return f"Invalid or unsupported URL - {url}", 400 # Check for duplicates if dedupe is enabled if dedupe and self.datastore.url_exists(url): continue urls_to_import.append(url) # For small imports, process synchronously for immediate feedback if len(urls_to_import) < IMPORT_SWITCH_TO_BACKGROUND_THRESHOLD: added = [] for url in urls_to_import: new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags, tag_uuids=tag_uuids) added.append(new_uuid) return added, 200 # For large imports (>= 20), process in background thread else: import threading from loguru import logger def import_watches_background(): """Background thread to import watches - discarded after completion.""" try: added_count = 0 for url in urls_to_import: try: self.datastore.add_watch(url=url, extras=extras, tag=tags, tag_uuids=tag_uuids) added_count += 1 except Exception as e: logger.error(f"Error importing URL {url}: {e}") logger.info(f"Background import complete: {added_count} watches created") except Exception as e: logger.error(f"Error in background import: {e}") # Start background thread and return immediately thread = threading.Thread(target=import_watches_background, daemon=True, name="ImportWatches-Background") thread.start() return {'status': f'Importing {len(urls_to_import)} URLs in background', 'count': len(urls_to_import)}, 202