changedetection.io/changedetectionio/is_safe_url.py

"""
URL redirect validation module for preventing open redirect vulnerabilities.

This module provides functionality to safely validate redirect URLs, ensuring they:
1. Point to internal routes only (no external redirects)
2. Are properly normalized (preventing browser parsing differences)
3. Match registered Flask routes (no fake/non-existent pages)
4. Are fully logged for security monitoring

References:
- https://flask-login.readthedocs.io/ (safe redirect patterns)
- https://blog.miguelgrinberg.com/post/the-flask-mega-tutorial-part-v-user-logins
- https://www.pythonkitchen.com/how-prevent-open-redirect-vulnerab-flask/
"""

from urllib.parse import urlparse, urljoin
from flask import request
from loguru import logger


def is_safe_url(target, app):
    """
    Validate that a redirect URL is safe to prevent open redirect vulnerabilities.

    This follows Flask/Werkzeug best practices by ensuring the redirect URL:
    1. Is a relative path starting with exactly one '/'
    2. Does not start with '//' (double-slash attack)
    3. Has no external protocol handlers
    4. Points to a valid registered route in the application
    5. Is properly normalized to prevent browser parsing differences

    Args:
        target: The URL to validate (e.g., '/settings', '/login#top')
        app: The Flask application instance (needed for route validation)

    Returns:
        bool: True if the URL is safe for redirection, False otherwise

    Examples:
        >>> is_safe_url('/settings', app)
        True
        >>> is_safe_url('//evil.com', app)
        False
        >>> is_safe_url('/settings#general', app)
        True
        >>> is_safe_url('/fake-page', app)
        False
    """
    if not target:
        return False

    # Normalize the URL to prevent browser parsing differences
    # Strip whitespace and replace backslashes (which some browsers interpret as forward slashes)
    target = target.strip()
    target = target.replace('\\', '/')

    # First, check if it starts with // or more (double-slash attack)
    if target.startswith('//'):
        logger.warning(f"Blocked redirect attempt with double-slash: {target}")
        return False

    # Parse the URL to check for scheme and netloc
    parsed = urlparse(target)

    # Block any URL with a scheme (http://, https://, javascript:, etc.)
    if parsed.scheme:
        logger.warning(f"Blocked redirect attempt with scheme: {target}")
        return False

    # Block any URL with a network location (netloc)
    # This catches patterns like //evil.com, user@host, etc.
    if parsed.netloc:
        logger.warning(f"Blocked redirect attempt with netloc: {target}")
        return False

    # At this point, we have a relative URL with no scheme or netloc
    # Use urljoin to resolve it and verify it points to the same host
    ref_url = urlparse(request.host_url)
    test_url = urlparse(urljoin(request.host_url, target))

    # Check: ensure the resolved URL has the same netloc as current host
    if not (test_url.scheme in ('http', 'https') and ref_url.netloc == test_url.netloc):
        logger.warning(f"Blocked redirect attempt with mismatched netloc: {target}")
        return False

    # Additional validation: Check if the URL matches a registered route
    # This prevents redirects to non-existent pages or unintended endpoints
    try:
        # Get the path without query string and fragment
        # Fragments (like #general) are automatically stripped by urlparse
        path = parsed.path

        # Create a URL adapter bound to the server name
        adapter = app.url_map.bind(ref_url.netloc)

        # Try to match the path to a registered route
        # This will raise NotFound if the route doesn't exist
        endpoint, values = adapter.match(path, return_rule=False)

        # Block redirects to static file endpoints - these are catch-all routes
        # that would match arbitrary paths, potentially allowing unintended redirects
        if endpoint in ('static_content', 'static', 'static_flags'):
            logger.warning(f"Blocked redirect to static endpoint: {target}")
            return False

        # Successfully matched a valid route
        logger.debug(f"Validated safe redirect to endpoint '{endpoint}': {target}")
        return True

    except Exception as e:
        # Route doesn't exist or can't be matched
        logger.warning(f"Blocked redirect to non-existent route: {target} (error: {e})")
        return False