Files
changedetection.io/changedetectionio/blueprint/ui/diff.py
2025-12-19 11:38:52 +01:00

312 lines
13 KiB
Python

from flask import Blueprint, request, redirect, url_for, flash, render_template, make_response, send_from_directory
import re
import importlib
from loguru import logger
from markupsafe import Markup
from changedetectionio.diff import (
REMOVED_STYLE, ADDED_STYLE, REMOVED_INNER_STYLE, ADDED_INNER_STYLE,
REMOVED_PLACEMARKER_OPEN, REMOVED_PLACEMARKER_CLOSED,
ADDED_PLACEMARKER_OPEN, ADDED_PLACEMARKER_CLOSED,
CHANGED_PLACEMARKER_OPEN, CHANGED_PLACEMARKER_CLOSED,
CHANGED_INTO_PLACEMARKER_OPEN, CHANGED_INTO_PLACEMARKER_CLOSED
)
from changedetectionio.store import ChangeDetectionStore
from changedetectionio.auth_decorator import login_optionally_required
def construct_blueprint(datastore: ChangeDetectionStore):
diff_blueprint = Blueprint('ui_diff', __name__, template_folder="../ui/templates")
@diff_blueprint.app_template_filter('diff_unescape_difference_spans')
def diff_unescape_difference_spans(content):
"""Emulate Jinja2's auto-escape, then selectively unescape our diff spans."""
from markupsafe import escape
if not content:
return Markup('')
# Step 1: Escape everything like Jinja2 would (this makes it XSS-safe)
escaped_content = escape(str(content))
# Step 2: Unescape only our exact diff spans generated by apply_html_color_to_body()
# Pattern matches the exact structure:
# <span style="{STYLE}" role="{ROLE}" aria-label="{LABEL}" title="{TITLE}">
# Unescape outer span opening tags with full attributes (role, aria-label, title)
# Matches removed/added/changed/changed_into spans
result = re.sub(
rf'&lt;span style=&#34;({re.escape(REMOVED_STYLE)}|{re.escape(ADDED_STYLE)})&#34; '
rf'role=&#34;(deletion|insertion|note)&#34; '
rf'aria-label=&#34;([^&]+?)&#34; '
rf'title=&#34;([^&]+?)&#34;&gt;',
r'<span style="\1" role="\2" aria-label="\3" title="\4">',
str(escaped_content),
flags=re.IGNORECASE
)
# Unescape inner span opening tags (without additional attributes)
# This matches the darker background styles for changed parts within lines
result = re.sub(
rf'&lt;span style=&#34;({re.escape(REMOVED_INNER_STYLE)}|{re.escape(ADDED_INNER_STYLE)})&#34;&gt;',
r'<span style="\1">',
result,
flags=re.IGNORECASE
)
# Unescape closing tags (but only as many as we opened)
open_count = result.count('<span style=')
close_count = str(escaped_content).count('&lt;/span&gt;')
# Replace up to the number of spans we opened
for _ in range(min(open_count, close_count)):
result = result.replace('&lt;/span&gt;', '</span>', 1)
return Markup(result)
@diff_blueprint.route("/diff/<string:uuid>", methods=['GET'])
@login_optionally_required
def diff_history_page(uuid):
"""
Render the history/diff page for a watch.
This route is processor-aware: it delegates rendering to the processor's
difference.py module, allowing different processor types to provide
custom visualizations:
- text_json_diff: Text/HTML diff with syntax highlighting
- restock_diff: Could show price charts and stock history
- image_diff: Could show image comparison slider/overlay
Each processor implements processors/{type}/difference.py::render()
If a processor doesn't have a difference module, falls back to text_json_diff.
"""
# More for testing, possible to return the first/only
if uuid == 'first':
uuid = list(datastore.data['watching'].keys()).pop()
try:
watch = datastore.data['watching'][uuid]
except KeyError:
flash("No history found for the specified link, bad link?", "error")
return redirect(url_for('watchlist.index'))
# Get the processor type for this watch
processor_name = watch.get('processor', 'text_json_diff')
try:
# Try to import the processor's difference module
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.difference')
# Call the processor's render() function
if hasattr(processor_module, 'render'):
return processor_module.render(
watch=watch,
datastore=datastore,
request=request,
url_for=url_for,
render_template=render_template,
flash=flash,
redirect=redirect
)
except (ImportError, ModuleNotFoundError) as e:
logger.warning(f"Processor {processor_name} does not have a difference module, falling back to text_json_diff: {e}")
# Fallback: if processor doesn't have difference module, use text_json_diff as default
from changedetectionio.processors.text_json_diff.difference import render as default_render
return default_render(
watch=watch,
datastore=datastore,
request=request,
url_for=url_for,
render_template=render_template,
flash=flash,
redirect=redirect
)
@diff_blueprint.route("/diff/<string:uuid>/extract", methods=['GET'])
@login_optionally_required
def diff_history_page_extract_GET(uuid):
"""
Render the data extraction form for a watch.
This route is processor-aware: it delegates to the processor's
extract.py module, allowing different processor types to provide
custom extraction interfaces.
Each processor implements processors/{type}/extract.py::render_form()
If a processor doesn't have an extract module, falls back to text_json_diff.
"""
# More for testing, possible to return the first/only
if uuid == 'first':
uuid = list(datastore.data['watching'].keys()).pop()
try:
watch = datastore.data['watching'][uuid]
except KeyError:
flash("No history found for the specified link, bad link?", "error")
return redirect(url_for('watchlist.index'))
# Get the processor type for this watch
processor_name = watch.get('processor', 'text_json_diff')
try:
# Try to import the processor's extract module
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.extract')
# Call the processor's render_form() function
if hasattr(processor_module, 'render_form'):
return processor_module.render_form(
watch=watch,
datastore=datastore,
request=request,
url_for=url_for,
render_template=render_template,
flash=flash,
redirect=redirect
)
except (ImportError, ModuleNotFoundError) as e:
logger.warning(f"Processor {processor_name} does not have an extract module, falling back to base extractor: {e}")
# Fallback: if processor doesn't have extract module, use base processors.extract as default
from changedetectionio.processors.extract import render_form as default_render_form
return default_render_form(
watch=watch,
datastore=datastore,
request=request,
url_for=url_for,
render_template=render_template,
flash=flash,
redirect=redirect
)
@diff_blueprint.route("/diff/<string:uuid>/extract", methods=['POST'])
@login_optionally_required
def diff_history_page_extract_POST(uuid):
"""
Process the data extraction request.
This route is processor-aware: it delegates to the processor's
extract.py module, allowing different processor types to provide
custom extraction logic.
Each processor implements processors/{type}/extract.py::process_extraction()
If a processor doesn't have an extract module, falls back to text_json_diff.
"""
# More for testing, possible to return the first/only
if uuid == 'first':
uuid = list(datastore.data['watching'].keys()).pop()
try:
watch = datastore.data['watching'][uuid]
except KeyError:
flash("No history found for the specified link, bad link?", "error")
return redirect(url_for('watchlist.index'))
# Get the processor type for this watch
processor_name = watch.get('processor', 'text_json_diff')
try:
# Try to import the processor's extract module
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.extract')
# Call the processor's process_extraction() function
if hasattr(processor_module, 'process_extraction'):
return processor_module.process_extraction(
watch=watch,
datastore=datastore,
request=request,
url_for=url_for,
make_response=make_response,
send_from_directory=send_from_directory,
flash=flash,
redirect=redirect
)
except (ImportError, ModuleNotFoundError) as e:
logger.warning(f"Processor {processor_name} does not have an extract module, falling back to base extractor: {e}")
# Fallback: if processor doesn't have extract module, use base processors.extract as default
from changedetectionio.processors.extract import process_extraction as default_process_extraction
return default_process_extraction(
watch=watch,
datastore=datastore,
request=request,
url_for=url_for,
make_response=make_response,
send_from_directory=send_from_directory,
flash=flash,
redirect=redirect
)
@diff_blueprint.route("/diff/<string:uuid>/processor-asset/<string:asset_name>", methods=['GET'])
@login_optionally_required
def processor_asset(uuid, asset_name):
"""
Serve processor-specific binary assets (images, files, etc.).
This route is processor-aware: it delegates to the processor's
difference.py module, allowing different processor types to serve
custom assets without embedding them as base64 in templates.
This solves memory issues with large binary data (e.g., screenshots)
by streaming them as separate HTTP responses instead of embedding
in the HTML template.
Each processor implements processors/{type}/difference.py::get_asset()
which returns (binary_data, content_type, cache_control_header).
Example URLs:
- /diff/{uuid}/processor-asset/before
- /diff/{uuid}/processor-asset/after
- /diff/{uuid}/processor-asset/rendered_diff
"""
# More for testing, possible to return the first/only
if uuid == 'first':
uuid = list(datastore.data['watching'].keys()).pop()
try:
watch = datastore.data['watching'][uuid]
except KeyError:
flash("No history found for the specified link, bad link?", "error")
return redirect(url_for('watchlist.index'))
# Get the processor type for this watch
processor_name = watch.get('processor', 'text_json_diff')
try:
# Try to import the processor's difference module
processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.difference')
# Call the processor's get_asset() function
if hasattr(processor_module, 'get_asset'):
result = processor_module.get_asset(
asset_name=asset_name,
watch=watch,
datastore=datastore,
request=request
)
if result is None:
from flask import abort
abort(404, description=f"Asset '{asset_name}' not found")
binary_data, content_type, cache_control = result
response = make_response(binary_data)
response.headers['Content-Type'] = content_type
if cache_control:
response.headers['Cache-Control'] = cache_control
return response
else:
logger.warning(f"Processor {processor_name} does not implement get_asset()")
from flask import abort
abort(404, description=f"Processor '{processor_name}' does not support assets")
except (ImportError, ModuleNotFoundError) as e:
logger.warning(f"Processor {processor_name} does not have a difference module: {e}")
from flask import abort
abort(404, description=f"Processor '{processor_name}' not found")
return diff_blueprint