changedetection.io/changedetectionio/blueprint/ui/diff.py

from flask import Blueprint, request, redirect, url_for, flash, render_template, make_response, send_from_directory
import os
import time
import re
import importlib
from loguru import logger
from markupsafe import Markup

from changedetectionio.diff import (
    REMOVED_STYLE, ADDED_STYLE, REMOVED_INNER_STYLE, ADDED_INNER_STYLE,
    REMOVED_PLACEMARKER_OPEN, REMOVED_PLACEMARKER_CLOSED,
    ADDED_PLACEMARKER_OPEN, ADDED_PLACEMARKER_CLOSED,
    CHANGED_PLACEMARKER_OPEN, CHANGED_PLACEMARKER_CLOSED,
    CHANGED_INTO_PLACEMARKER_OPEN, CHANGED_INTO_PLACEMARKER_CLOSED
)
from changedetectionio.store import ChangeDetectionStore
from changedetectionio.auth_decorator import login_optionally_required


def construct_blueprint(datastore: ChangeDetectionStore):
    diff_blueprint = Blueprint('ui_diff', __name__, template_folder="../ui/templates")

    @diff_blueprint.app_template_filter('diff_unescape_difference_spans')
    def diff_unescape_difference_spans(content):
        """Emulate Jinja2's auto-escape, then selectively unescape our diff spans."""
        from markupsafe import escape

        if not content:
            return Markup('')

        # Step 1: Escape everything like Jinja2 would (this makes it XSS-safe)
        escaped_content = escape(str(content))

        # Step 2: Unescape only our exact diff spans generated by apply_html_color_to_body()
        # Pattern matches the exact structure:
        # <span style="{STYLE}" role="{ROLE}" aria-label="{LABEL}" title="{TITLE}">

        # Unescape outer span opening tags with full attributes (role, aria-label, title)
        # Matches removed/added/changed/changed_into spans
        result = re.sub(
            rf'&lt;span style=&#34;({re.escape(REMOVED_STYLE)}|{re.escape(ADDED_STYLE)})&#34; '
            rf'role=&#34;(deletion|insertion|note)&#34; '
            rf'aria-label=&#34;([^&]+?)&#34; '
            rf'title=&#34;([^&]+?)&#34;&gt;',
            r'<span style="\1" role="\2" aria-label="\3" title="\4">',
            str(escaped_content),
            flags=re.IGNORECASE
        )

        # Unescape inner span opening tags (without additional attributes)
        # This matches the darker background styles for changed parts within lines
        result = re.sub(
            rf'&lt;span style=&#34;({re.escape(REMOVED_INNER_STYLE)}|{re.escape(ADDED_INNER_STYLE)})&#34;&gt;',
            r'<span style="\1">',
            result,
            flags=re.IGNORECASE
        )

        # Unescape closing tags (but only as many as we opened)
        open_count = result.count('<span style=')
        close_count = str(escaped_content).count('&lt;/span&gt;')

        # Replace up to the number of spans we opened
        for _ in range(min(open_count, close_count)):
            result = result.replace('&lt;/span&gt;', '</span>', 1)

        return Markup(result)

    @diff_blueprint.route("/diff/<string:uuid>", methods=['GET'])
    @login_optionally_required
    def diff_history_page(uuid):
        """
        Render the history/diff page for a watch.

        This route is processor-aware: it delegates rendering to the processor's
        difference.py module, allowing different processor types to provide
        custom visualizations:
        - text_json_diff: Text/HTML diff with syntax highlighting
        - restock_diff: Could show price charts and stock history
        - image_diff: Could show image comparison slider/overlay

        Each processor implements processors/{type}/difference.py::render()
        If a processor doesn't have a difference module, falls back to text_json_diff.
        """

        # More for testing, possible to return the first/only
        if uuid == 'first':
            uuid = list(datastore.data['watching'].keys()).pop()

        try:
            watch = datastore.data['watching'][uuid]
        except KeyError:
            flash("No history found for the specified link, bad link?", "error")
            return redirect(url_for('watchlist.index'))

        # Get the processor type for this watch
        processor_name = watch.get('processor', 'text_json_diff')

        try:
            # Try to import the processor's difference module
            processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.difference')

            # Call the processor's render() function
            if hasattr(processor_module, 'render'):
                return processor_module.render(
                    watch=watch,
                    datastore=datastore,
                    request=request,
                    url_for=url_for,
                    render_template=render_template,
                    flash=flash,
                    redirect=redirect
                )
        except (ImportError, ModuleNotFoundError) as e:
            logger.warning(f"Processor {processor_name} does not have a difference module, falling back to text_json_diff: {e}")

        # Fallback: if processor doesn't have difference module, use text_json_diff as default
        from changedetectionio.processors.text_json_diff.difference import render as default_render
        return default_render(
            watch=watch,
            datastore=datastore,
            request=request,
            url_for=url_for,
            render_template=render_template,
            flash=flash,
            redirect=redirect
        )

    @diff_blueprint.route("/diff/<string:uuid>/extract", methods=['GET'])
    @login_optionally_required
    def diff_history_page_extract_GET(uuid):
        """
        Render the data extraction form for a watch.

        This route is processor-aware: it delegates to the processor's
        extract.py module, allowing different processor types to provide
        custom extraction interfaces.

        Each processor implements processors/{type}/extract.py::render_form()
        If a processor doesn't have an extract module, falls back to text_json_diff.
        """
        # More for testing, possible to return the first/only
        if uuid == 'first':
            uuid = list(datastore.data['watching'].keys()).pop()

        try:
            watch = datastore.data['watching'][uuid]
        except KeyError:
            flash("No history found for the specified link, bad link?", "error")
            return redirect(url_for('watchlist.index'))

        # Get the processor type for this watch
        processor_name = watch.get('processor', 'text_json_diff')

        try:
            # Try to import the processor's extract module
            processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.extract')

            # Call the processor's render_form() function
            if hasattr(processor_module, 'render_form'):
                return processor_module.render_form(
                    watch=watch,
                    datastore=datastore,
                    request=request,
                    url_for=url_for,
                    render_template=render_template,
                    flash=flash,
                    redirect=redirect
                )
        except (ImportError, ModuleNotFoundError) as e:
            logger.warning(f"Processor {processor_name} does not have an extract module, falling back to base extractor: {e}")

        # Fallback: if processor doesn't have extract module, use base processors.extract as default
        from changedetectionio.processors.extract import render_form as default_render_form
        return default_render_form(
            watch=watch,
            datastore=datastore,
            request=request,
            url_for=url_for,
            render_template=render_template,
            flash=flash,
            redirect=redirect
        )

    @diff_blueprint.route("/diff/<string:uuid>/extract", methods=['POST'])
    @login_optionally_required
    def diff_history_page_extract_POST(uuid):
        """
        Process the data extraction request.

        This route is processor-aware: it delegates to the processor's
        extract.py module, allowing different processor types to provide
        custom extraction logic.

        Each processor implements processors/{type}/extract.py::process_extraction()
        If a processor doesn't have an extract module, falls back to text_json_diff.
        """
        # More for testing, possible to return the first/only
        if uuid == 'first':
            uuid = list(datastore.data['watching'].keys()).pop()

        try:
            watch = datastore.data['watching'][uuid]
        except KeyError:
            flash("No history found for the specified link, bad link?", "error")
            return redirect(url_for('watchlist.index'))

        # Get the processor type for this watch
        processor_name = watch.get('processor', 'text_json_diff')

        try:
            # Try to import the processor's extract module
            processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.extract')

            # Call the processor's process_extraction() function
            if hasattr(processor_module, 'process_extraction'):
                return processor_module.process_extraction(
                    watch=watch,
                    datastore=datastore,
                    request=request,
                    url_for=url_for,
                    make_response=make_response,
                    send_from_directory=send_from_directory,
                    flash=flash,
                    redirect=redirect
                )
        except (ImportError, ModuleNotFoundError) as e:
            logger.warning(f"Processor {processor_name} does not have an extract module, falling back to base extractor: {e}")

        # Fallback: if processor doesn't have extract module, use base processors.extract as default
        from changedetectionio.processors.extract import process_extraction as default_process_extraction
        return default_process_extraction(
            watch=watch,
            datastore=datastore,
            request=request,
            url_for=url_for,
            make_response=make_response,
            send_from_directory=send_from_directory,
            flash=flash,
            redirect=redirect
        )

    return diff_blueprint