diff --git a/changedetectionio/api/Watch.py b/changedetectionio/api/Watch.py index 81f8f9ac..77721a1a 100644 --- a/changedetectionio/api/Watch.py +++ b/changedetectionio/api/Watch.py @@ -184,6 +184,17 @@ class WatchSingleHistory(Resource): return response class WatchHistoryDiff(Resource): + """ + Generate diff between two historical snapshots. + + Note: This API endpoint currently returns text-based diffs and works best + with the text_json_diff processor. Future processor types (like image_diff, + restock_diff) may want to implement their own specialized API endpoints + for returning processor-specific data (e.g., price charts, image comparisons). + + The web UI diff page (/diff/) is processor-aware and delegates rendering + to processors/{type}/difference.py::render() for processor-specific visualizations. + """ def __init__(self, **kwargs): # datastore is a black box dependency self.datastore = kwargs['datastore'] diff --git a/changedetectionio/blueprint/ui/views.py b/changedetectionio/blueprint/ui/views.py index ab74fe87..58f970d6 100644 --- a/changedetectionio/blueprint/ui/views.py +++ b/changedetectionio/blueprint/ui/views.py @@ -229,6 +229,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe @views_blueprint.route("/diff/", methods=['POST']) @login_optionally_required def diff_history_page_build_report(uuid): + import importlib from changedetectionio import forms # More for testing, possible to return the first/only @@ -247,7 +248,35 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe ) if not extract_form.validate(): flash("An error occurred, please see below.", "error") - return _render_diff_template(uuid, extract_form) + # Use processor-specific render with the error form + processor_name = watch.get('processor', 'text_json_diff') + try: + processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.difference') + if hasattr(processor_module, 'render'): + return processor_module.render( + watch=watch, + datastore=datastore, + request=request, + url_for=url_for, + render_template=render_template, + flash=flash, + redirect=redirect, + extract_form=extract_form + ) + except (ImportError, ModuleNotFoundError): + pass + # Fallback to text_json_diff + from changedetectionio.processors.text_json_diff.difference import render as default_render + return default_render( + watch=watch, + datastore=datastore, + request=request, + url_for=url_for, + render_template=render_template, + flash=flash, + redirect=redirect, + extract_form=extract_form + ) else: extract_regex = request.form.get('extract_regex', '').strip() @@ -391,7 +420,63 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe @views_blueprint.route("/diff/", methods=['GET']) @login_optionally_required def diff_history_page(uuid): - return _render_diff_template(uuid) + """ + Render the history/diff page for a watch. + + This route is processor-aware: it delegates rendering to the processor's + difference.py module, allowing different processor types to provide + custom visualizations: + - text_json_diff: Text/HTML diff with syntax highlighting + - restock_diff: Could show price charts and stock history + - image_diff: Could show image comparison slider/overlay + + Each processor implements processors/{type}/difference.py::render() + If a processor doesn't have a difference module, falls back to text_json_diff. + """ + import importlib + + # More for testing, possible to return the first/only + if uuid == 'first': + uuid = list(datastore.data['watching'].keys()).pop() + + try: + watch = datastore.data['watching'][uuid] + except KeyError: + flash("No history found for the specified link, bad link?", "error") + return redirect(url_for('watchlist.index')) + + # Get the processor type for this watch + processor_name = watch.get('processor', 'text_json_diff') + + try: + # Try to import the processor's difference module + processor_module = importlib.import_module(f'changedetectionio.processors.{processor_name}.difference') + + # Call the processor's render() function + if hasattr(processor_module, 'render'): + return processor_module.render( + watch=watch, + datastore=datastore, + request=request, + url_for=url_for, + render_template=render_template, + flash=flash, + redirect=redirect + ) + except (ImportError, ModuleNotFoundError) as e: + logger.warning(f"Processor {processor_name} does not have a difference module, falling back to text_json_diff: {e}") + + # Fallback: if processor doesn't have difference module, use text_json_diff as default + from changedetectionio.processors.text_json_diff.difference import render as default_render + return default_render( + watch=watch, + datastore=datastore, + request=request, + url_for=url_for, + render_template=render_template, + flash=flash, + redirect=redirect + ) @views_blueprint.route("/form/add/quickwatch", methods=['POST']) diff --git a/changedetectionio/processors/text_json_diff/difference.py b/changedetectionio/processors/text_json_diff/difference.py new file mode 100644 index 00000000..33553e71 --- /dev/null +++ b/changedetectionio/processors/text_json_diff/difference.py @@ -0,0 +1,235 @@ +""" +History/diff rendering for text_json_diff processor. + +This module handles the visualization of text/HTML/JSON changes by rendering +a side-by-side or unified diff view with syntax highlighting and change markers. +""" + +import os +import time +from loguru import logger +from markupsafe import Markup + +from changedetectionio import diff, strtobool +from changedetectionio.diff import ( + REMOVED_STYLE, ADDED_STYLE, REMOVED_INNER_STYLE, ADDED_INNER_STYLE, + REMOVED_PLACEMARKER_OPEN, REMOVED_PLACEMARKER_CLOSED, + ADDED_PLACEMARKER_OPEN, ADDED_PLACEMARKER_CLOSED, + CHANGED_PLACEMARKER_OPEN, CHANGED_PLACEMARKER_CLOSED, + CHANGED_INTO_PLACEMARKER_OPEN, CHANGED_INTO_PLACEMARKER_CLOSED +) +from changedetectionio.notification.handler import apply_html_color_to_body + + +# Diff display preferences configuration - single source of truth +DIFF_PREFERENCES_CONFIG = { + 'changesOnly': {'default': True, 'type': 'bool'}, + 'ignoreWhitespace': {'default': False, 'type': 'bool'}, + 'removed': {'default': True, 'type': 'bool'}, + 'added': {'default': True, 'type': 'bool'}, + 'replaced': {'default': True, 'type': 'bool'}, + 'type': {'default': 'diffLines', 'type': 'value'}, +} + + +def build_diff_cell_visualizer(content, resolution=100): + """ + Build a visual cell grid for the diff visualizer. + + Analyzes the content for placemarkers indicating changes and creates a + grid of cells representing the document, with each cell marked as: + - 'deletion' for removed content + - 'insertion' for added content + - 'mixed' for cells containing both deletions and insertions + - empty string for cells with no changes + + Args: + content: The diff content with placemarkers + resolution: Number of cells to create (default 100) + + Returns: + List of dicts with 'class' key for each cell's CSS class + """ + if not content: + return [{'class': ''} for _ in range(resolution)] + now = time.time() + # Work with character positions for better accuracy + content_length = len(content) + + if content_length == 0: + return [{'class': ''} for _ in range(resolution)] + + chars_per_cell = max(1, content_length / resolution) + + # Track change type for each cell + cell_data = {} + + # Placemarkers to detect + change_markers = { + REMOVED_PLACEMARKER_OPEN: 'deletion', + ADDED_PLACEMARKER_OPEN: 'insertion', + CHANGED_PLACEMARKER_OPEN: 'deletion', + CHANGED_INTO_PLACEMARKER_OPEN: 'insertion', + } + + # Find all occurrences of each marker + for marker, change_type in change_markers.items(): + pos = 0 + while True: + pos = content.find(marker, pos) + if pos == -1: + break + + # Calculate which cell this marker falls into + cell_index = min(int(pos / chars_per_cell), resolution - 1) + + if cell_index not in cell_data: + cell_data[cell_index] = change_type + elif cell_data[cell_index] != change_type: + # Mixed changes in this cell + cell_data[cell_index] = 'mixed' + + pos += len(marker) + + # Build the cell list + cells = [] + for i in range(resolution): + change_type = cell_data.get(i, '') + cells.append({'class': change_type}) + + logger.debug(f"Built diff cell visualizer: {len([c for c in cells if c['class']])} cells with changes out of {resolution} in {time.time() - now:.2f}s") + + return cells + + +def render(watch, datastore, request, url_for, render_template, flash, redirect, extract_form=None): + """ + Render the history/diff view for text/JSON/HTML changes. + + Args: + watch: The watch object + datastore: The ChangeDetectionStore instance + request: Flask request object + url_for: Flask url_for function + render_template: Flask render_template function + flash: Flask flash function + redirect: Flask redirect function + extract_form: Optional pre-built extract form (for error cases) + + Returns: + Rendered HTML response + """ + from changedetectionio import forms + + uuid = watch.get('uuid') + + extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')] + + # Use provided form or create a new one + if extract_form is None: + extract_form = forms.extractDataForm(formdata=request.form, + data={'extract_regex': request.form.get('extract_regex', '')} + ) + history = watch.history + dates = list(history.keys()) + + # If a "from_version" was requested, then find it (or the closest one) + # Also set "from version" to be the closest version to the one that was last viewed. + + best_last_viewed_timestamp = watch.get_from_version_based_on_last_viewed + from_version_timestamp = best_last_viewed_timestamp if best_last_viewed_timestamp else dates[-2] + from_version = request.args.get('from_version', from_version_timestamp ) + + # Use the current one if nothing was specified + to_version = request.args.get('to_version', str(dates[-1])) + + try: + to_version_file_contents = watch.get_history_snapshot(timestamp=to_version) + except Exception as e: + logger.error(f"Unable to read watch history to-version for version {to_version}: {str(e)}") + to_version_file_contents = f"Unable to read to-version at {to_version}.\n" + + try: + from_version_file_contents = watch.get_history_snapshot(timestamp=from_version) + except Exception as e: + logger.error(f"Unable to read watch history from-version for version {from_version}: {str(e)}") + from_version_file_contents = f"Unable to read to-version {from_version}.\n" + + screenshot_url = watch.get_screenshot() + + system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver' + + is_html_webdriver = False + if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'): + is_html_webdriver = True + + password_enabled_and_share_is_off = False + if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False): + password_enabled_and_share_is_off = not datastore.data['settings']['application'].get('shared_diff_access') + + datastore.set_last_viewed(uuid, time.time()) + + # Parse diff preferences from request using config as single source of truth + # Check if this is a user submission (any diff pref param exists in query string) + user_submitted = any(key in request.args for key in DIFF_PREFERENCES_CONFIG.keys()) + + diff_prefs = {} + for key, config in DIFF_PREFERENCES_CONFIG.items(): + if user_submitted: + # User submitted form - missing checkboxes are explicitly OFF + if config['type'] == 'bool': + diff_prefs[key] = strtobool(request.args.get(key, 'off')) + else: + diff_prefs[key] = request.args.get(key, config['default']) + else: + # Initial load - use defaults from config + diff_prefs[key] = config['default'] + + content = diff.render_diff(previous_version_file_contents=from_version_file_contents, + newest_version_file_contents=to_version_file_contents, + include_replaced=diff_prefs['replaced'], + include_added=diff_prefs['added'], + include_removed=diff_prefs['removed'], + include_equal=diff_prefs['changesOnly'], + ignore_junk=diff_prefs['ignoreWhitespace'], + word_diff=diff_prefs['type'] == 'diffWords', + ) + + # Build cell grid visualizer before applying HTML color (so we can detect placemarkers) + diff_cell_grid = build_diff_cell_visualizer(content) + + content = apply_html_color_to_body(n_body=content) + offscreen_content = render_template("diff-offscreen-options.html") + + note = '' + if str(from_version) != str(dates[-2]) or str(to_version) != str(dates[-1]): + note = 'Note: You are not viewing the latest changes.' + + output = render_template("diff.html", + #initial_scroll_line_number=100, + bottom_horizontal_offscreen_contents=offscreen_content, + content=content, + current_diff_url=watch['url'], + diff_cell_grid=diff_cell_grid, + diff_prefs=diff_prefs, + extra_classes='difference-page', + extra_stylesheets=extra_stylesheets, + extra_title=f" - {watch.label} - History", + extract_form=extract_form, + from_version=str(from_version), + is_html_webdriver=is_html_webdriver, + last_error=watch['last_error'], + last_error_screenshot=watch.get_error_snapshot(), + last_error_text=watch.get_error_text(), + newest=to_version_file_contents, + newest_version_timestamp=dates[-1], + note=note, + password_enabled_and_share_is_off=password_enabled_and_share_is_off, + pure_menu_fixed=False, + screenshot=screenshot_url, + to_version=str(to_version), + uuid=uuid, + versions=dates, # All except current/last + watch_a=watch, + ) + return output