changedetection.io/changedetectionio/blueprint/ui/diff.py

from flask import Blueprint, request, redirect, url_for, flash, render_template, make_response, send_from_directory
from flask_babel import gettext

import re
import importlib
from loguru import logger
from markupsafe import Markup

from changedetectionio.diff import (
    REMOVED_STYLE, ADDED_STYLE, REMOVED_INNER_STYLE, ADDED_INNER_STYLE,
    REMOVED_PLACEMARKER_OPEN, REMOVED_PLACEMARKER_CLOSED,
    ADDED_PLACEMARKER_OPEN, ADDED_PLACEMARKER_CLOSED,
    CHANGED_PLACEMARKER_OPEN, CHANGED_PLACEMARKER_CLOSED,
    CHANGED_INTO_PLACEMARKER_OPEN, CHANGED_INTO_PLACEMARKER_CLOSED
)
from changedetectionio.store import ChangeDetectionStore
from changedetectionio.auth_decorator import login_optionally_required


def construct_blueprint(datastore: ChangeDetectionStore):
    diff_blueprint = Blueprint('ui_diff', __name__, template_folder="../ui/templates")

    @diff_blueprint.app_template_filter('diff_unescape_difference_spans')
    def diff_unescape_difference_spans(content):
        """Emulate Jinja2's auto-escape, then selectively unescape our diff spans."""
        from markupsafe import escape

        if not content:
            return Markup('')

        # Step 1: Escape everything like Jinja2 would (this makes it XSS-safe)
        escaped_content = escape(str(content))

        # Step 2: Unescape only our exact diff spans generated by apply_html_color_to_body()
        # Pattern matches the exact structure:
        # <span style="{STYLE}" role="{ROLE}" aria-label="{LABEL}" title="{TITLE}">

        # Unescape outer span opening tags with full attributes (role, aria-label, title)
        # Matches removed/added/changed/changed_into spans
        result = re.sub(
            rf'&lt;span style=&#34;({re.escape(REMOVED_STYLE)}|{re.escape(ADDED_STYLE)})&#34; '
            rf'role=&#34;(deletion|insertion|note)&#34; '
            rf'aria-label=&#34;([^&]+?)&#34; '
            rf'title=&#34;([^&]+?)&#34;&gt;',
            r'<span style="\1" role="\2" aria-label="\3" title="\4">',
            str(escaped_content),
            flags=re.IGNORECASE
        )

        # Unescape inner span opening tags (without additional attributes)
        # This matches the darker background styles for changed parts within lines
        result = re.sub(
            rf'&lt;span style=&#34;({re.escape(REMOVED_INNER_STYLE)}|{re.escape(ADDED_INNER_STYLE)})&#34;&gt;',
            r'<span style="\1">',
            result,
            flags=re.IGNORECASE
        )

        # Unescape closing tags (but only as many as we opened)
        open_count = result.count('<span style=')
        close_count = str(escaped_content).count('&lt;/span&gt;')

        # Replace up to the number of spans we opened
        for _ in range(min(open_count, close_count)):
            result = result.replace('&lt;/span&gt;', '</span>', 1)

        return Markup(result)

    @diff_blueprint.route("/diff/<string:uuid>", methods=['GET'])
    @login_optionally_required
    def diff_history_page(uuid):
        """
        Render the history/diff page for a watch.

        This route is processor-aware: it delegates rendering to the processor's
        difference.py module, allowing different processor types to provide
        custom visualizations:
        - text_json_diff: Text/HTML diff with syntax highlighting
        - restock_diff: Could show price charts and stock history
        - image_diff: Could show image comparison slider/overlay

        Each processor implements processors/{type}/difference.py::render()
        If a processor doesn't have a difference module, falls back to text_json_diff.
        """

        # More for testing, possible to return the first/only
        if uuid == 'first':
            uuid = list(datastore.data['watching'].keys()).pop()

        try:
            watch = datastore.data['watching'][uuid]
        except KeyError:
            flash(gettext("No history found for the specified link, bad link?"), "error")
            return redirect(url_for('watchlist.index'))

        dates = list(watch.history.keys())
        if not dates or len(dates) < 2:
            flash(gettext("Not enough history (2 snapshots required) to show difference page for this watch."), "error")
            return redirect(url_for('watchlist.index'))

        # Get the processor type for this watch
        processor_name = watch.get('processor', 'text_json_diff')

        try:
            # Try to import the processor's difference module
            processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.difference')

            # Call the processor's render() function
            if hasattr(processor_module, 'render'):
                return processor_module.render(
                    watch=watch,
                    datastore=datastore,
                    request=request,
                    url_for=url_for,
                    render_template=render_template,
                    flash=flash,
                    redirect=redirect
                )
        except (ImportError, ModuleNotFoundError) as e:
            logger.warning(f"Processor {processor_name} does not have a difference module, falling back to text_json_diff: {e}")

        # Fallback: if processor doesn't have difference module, use text_json_diff as default
        from changedetectionio.processors.text_json_diff.difference import render as default_render
        return default_render(
            watch=watch,
            datastore=datastore,
            request=request,
            url_for=url_for,
            render_template=render_template,
            flash=flash,
            redirect=redirect
        )

    @diff_blueprint.route("/diff/<string:uuid>/extract", methods=['GET'])
    @login_optionally_required
    def diff_history_page_extract_GET(uuid):
        """
        Render the data extraction form for a watch.

        This route is processor-aware: it delegates to the processor's
        extract.py module, allowing different processor types to provide
        custom extraction interfaces.

        Each processor implements processors/{type}/extract.py::render_form()
        If a processor doesn't have an extract module, falls back to text_json_diff.
        """
        # More for testing, possible to return the first/only
        if uuid == 'first':
            uuid = list(datastore.data['watching'].keys()).pop()

        try:
            watch = datastore.data['watching'][uuid]
        except KeyError:
            flash(gettext("No history found for the specified link, bad link?"), "error")
            return redirect(url_for('watchlist.index'))

        # Get the processor type for this watch
        processor_name = watch.get('processor', 'text_json_diff')

        try:
            # Try to import the processor's extract module
            processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.extract')

            # Call the processor's render_form() function
            if hasattr(processor_module, 'render_form'):
                return processor_module.render_form(
                    watch=watch,
                    datastore=datastore,
                    request=request,
                    url_for=url_for,
                    render_template=render_template,
                    flash=flash,
                    redirect=redirect
                )
        except (ImportError, ModuleNotFoundError) as e:
            logger.warning(f"Processor {processor_name} does not have an extract module, falling back to base extractor: {e}")

        # Fallback: if processor doesn't have extract module, use base processors.extract as default
        from changedetectionio.processors.extract import render_form as default_render_form
        return default_render_form(
            watch=watch,
            datastore=datastore,
            request=request,
            url_for=url_for,
            render_template=render_template,
            flash=flash,
            redirect=redirect
        )

    @diff_blueprint.route("/diff/<string:uuid>/extract", methods=['POST'])
    @login_optionally_required
    def diff_history_page_extract_POST(uuid):
        """
        Process the data extraction request.

        This route is processor-aware: it delegates to the processor's
        extract.py module, allowing different processor types to provide
        custom extraction logic.

        Each processor implements processors/{type}/extract.py::process_extraction()
        If a processor doesn't have an extract module, falls back to text_json_diff.
        """
        # More for testing, possible to return the first/only
        if uuid == 'first':
            uuid = list(datastore.data['watching'].keys()).pop()

        try:
            watch = datastore.data['watching'][uuid]
        except KeyError:
            flash(gettext("No history found for the specified link, bad link?"), "error")
            return redirect(url_for('watchlist.index'))

        # Get the processor type for this watch
        processor_name = watch.get('processor', 'text_json_diff')

        try:
            # Try to import the processor's extract module
            processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.extract')

            # Call the processor's process_extraction() function
            if hasattr(processor_module, 'process_extraction'):
                return processor_module.process_extraction(
                    watch=watch,
                    datastore=datastore,
                    request=request,
                    url_for=url_for,
                    make_response=make_response,
                    send_from_directory=send_from_directory,
                    flash=flash,
                    redirect=redirect
                )
        except (ImportError, ModuleNotFoundError) as e:
            logger.warning(f"Processor {processor_name} does not have an extract module, falling back to base extractor: {e}")

        # Fallback: if processor doesn't have extract module, use base processors.extract as default
        from changedetectionio.processors.extract import process_extraction as default_process_extraction
        return default_process_extraction(
            watch=watch,
            datastore=datastore,
            request=request,
            url_for=url_for,
            make_response=make_response,
            send_from_directory=send_from_directory,
            flash=flash,
            redirect=redirect
        )

    @diff_blueprint.route("/diff/<string:uuid>/processor-asset/<string:asset_name>", methods=['GET'])
    @login_optionally_required
    def processor_asset(uuid, asset_name):
        """
        Serve processor-specific binary assets (images, files, etc.).

        This route is processor-aware: it delegates to the processor's
        difference.py module, allowing different processor types to serve
        custom assets without embedding them as base64 in templates.

        This solves memory issues with large binary data (e.g., screenshots)
        by streaming them as separate HTTP responses instead of embedding
        in the HTML template.

        Each processor implements processors/{type}/difference.py::get_asset()
        which returns (binary_data, content_type, cache_control_header).

        Example URLs:
        - /diff/{uuid}/processor-asset/before
        - /diff/{uuid}/processor-asset/after
        - /diff/{uuid}/processor-asset/rendered_diff
        """
        # More for testing, possible to return the first/only
        if uuid == 'first':
            uuid = list(datastore.data['watching'].keys()).pop()

        try:
            watch = datastore.data['watching'][uuid]
        except KeyError:
            flash(gettext("No history found for the specified link, bad link?"), "error")
            return redirect(url_for('watchlist.index'))

        # Get the processor type for this watch
        processor_name = watch.get('processor', 'text_json_diff')

        try:
            # Try to import the processor's difference module
            processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.difference')

            # Call the processor's get_asset() function
            if hasattr(processor_module, 'get_asset'):
                result = processor_module.get_asset(
                    asset_name=asset_name,
                    watch=watch,
                    datastore=datastore,
                    request=request
                )

                if result is None:
                    from flask import abort
                    abort(404, description=f"Asset '{asset_name}' not found")

                binary_data, content_type, cache_control = result

                response = make_response(binary_data)
                response.headers['Content-Type'] = content_type
                if cache_control:
                    response.headers['Cache-Control'] = cache_control
                return response
            else:
                logger.warning(f"Processor {processor_name} does not implement get_asset()")
                from flask import abort
                abort(404, description=f"Processor '{processor_name}' does not support assets")

        except (ImportError, ModuleNotFoundError) as e:
            logger.warning(f"Processor {processor_name} does not have a difference module: {e}")
            from flask import abort
            abort(404, description=f"Processor '{processor_name}' not found")

    return diff_blueprint