Files
changedetection.io/changedetectionio/processors/text_json_diff/difference.py
dgtlmoon a748a43224
Some checks failed
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/amd64 (alpine) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm64 (alpine) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/amd64 (main) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm/v7 (main) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm/v8 (main) (push) Has been cancelled
ChangeDetection.io Container Build Test / Build linux/arm64 (main) (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
"History" page - Use faster server side "difference" rendering, show ignored/triggered rows (#3442)
2025-12-15 15:39:07 +01:00

233 lines
9.7 KiB
Python

"""
History/diff rendering for text_json_diff processor.
This module handles the visualization of text/HTML/JSON changes by rendering
a side-by-side or unified diff view with syntax highlighting and change markers.
"""
import os
import time
from loguru import logger
from changedetectionio import diff, strtobool
from changedetectionio.diff import (
REMOVED_STYLE, ADDED_STYLE, REMOVED_INNER_STYLE, ADDED_INNER_STYLE,
REMOVED_PLACEMARKER_OPEN, REMOVED_PLACEMARKER_CLOSED,
ADDED_PLACEMARKER_OPEN, ADDED_PLACEMARKER_CLOSED,
CHANGED_PLACEMARKER_OPEN, CHANGED_PLACEMARKER_CLOSED,
CHANGED_INTO_PLACEMARKER_OPEN, CHANGED_INTO_PLACEMARKER_CLOSED
)
from changedetectionio.notification.handler import apply_html_color_to_body
def build_diff_cell_visualizer(content, resolution=100):
"""
Build a visual cell grid for the diff visualizer.
Analyzes the content for placemarkers indicating changes and creates a
grid of cells representing the document, with each cell marked as:
- 'deletion' for removed content
- 'insertion' for added content
- 'mixed' for cells containing both deletions and insertions
- empty string for cells with no changes
Args:
content: The diff content with placemarkers
resolution: Number of cells to create (default 100)
Returns:
List of dicts with 'class' key for each cell's CSS class
"""
if not content:
return [{'class': ''} for _ in range(resolution)]
now = time.time()
# Work with character positions for better accuracy
content_length = len(content)
if content_length == 0:
return [{'class': ''} for _ in range(resolution)]
chars_per_cell = max(1, content_length / resolution)
# Track change type for each cell
cell_data = {}
# Placemarkers to detect
change_markers = {
REMOVED_PLACEMARKER_OPEN: 'deletion',
ADDED_PLACEMARKER_OPEN: 'insertion',
CHANGED_PLACEMARKER_OPEN: 'deletion',
CHANGED_INTO_PLACEMARKER_OPEN: 'insertion',
}
# Find all occurrences of each marker
for marker, change_type in change_markers.items():
pos = 0
while True:
pos = content.find(marker, pos)
if pos == -1:
break
# Calculate which cell this marker falls into
cell_index = min(int(pos / chars_per_cell), resolution - 1)
if cell_index not in cell_data:
cell_data[cell_index] = change_type
elif cell_data[cell_index] != change_type:
# Mixed changes in this cell
cell_data[cell_index] = 'mixed'
pos += len(marker)
# Build the cell list
cells = []
for i in range(resolution):
change_type = cell_data.get(i, '')
cells.append({'class': change_type})
logger.debug(f"Built diff cell visualizer: {len([c for c in cells if c['class']])} cells with changes out of {resolution} in {time.time() - now:.2f}s")
return cells
# Diff display preferences configuration - single source of truth
DIFF_PREFERENCES_CONFIG = {
'changesOnly': {'default': True, 'type': 'bool'},
'ignoreWhitespace': {'default': False, 'type': 'bool'},
'removed': {'default': True, 'type': 'bool'},
'added': {'default': True, 'type': 'bool'},
'replaced': {'default': True, 'type': 'bool'},
'type': {'default': 'diffLines', 'type': 'value'},
}
def render(watch, datastore, request, url_for, render_template, flash, redirect, extract_form=None):
"""
Render the history/diff view for text/JSON/HTML changes.
Args:
watch: The watch object
datastore: The ChangeDetectionStore instance
request: Flask request object
url_for: Flask url_for function
render_template: Flask render_template function
flash: Flask flash function
redirect: Flask redirect function
extract_form: Optional pre-built extract form (for error cases)
Returns:
Rendered HTML response
"""
from changedetectionio import forms
uuid = watch.get('uuid')
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
# Use provided form or create a new one
if extract_form is None:
extract_form = forms.extractDataForm(formdata=request.form,
data={'extract_regex': request.form.get('extract_regex', '')}
)
history = watch.history
dates = list(history.keys())
# If a "from_version" was requested, then find it (or the closest one)
# Also set "from version" to be the closest version to the one that was last viewed.
best_last_viewed_timestamp = watch.get_from_version_based_on_last_viewed
from_version_timestamp = best_last_viewed_timestamp if best_last_viewed_timestamp else dates[-2]
from_version = request.args.get('from_version', from_version_timestamp )
# Use the current one if nothing was specified
to_version = request.args.get('to_version', str(dates[-1]))
try:
to_version_file_contents = watch.get_history_snapshot(timestamp=to_version)
except Exception as e:
logger.error(f"Unable to read watch history to-version for version {to_version}: {str(e)}")
to_version_file_contents = f"Unable to read to-version at {to_version}.\n"
try:
from_version_file_contents = watch.get_history_snapshot(timestamp=from_version)
except Exception as e:
logger.error(f"Unable to read watch history from-version for version {from_version}: {str(e)}")
from_version_file_contents = f"Unable to read to-version {from_version}.\n"
screenshot_url = watch.get_screenshot()
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
is_html_webdriver = False
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
is_html_webdriver = True
password_enabled_and_share_is_off = False
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
password_enabled_and_share_is_off = not datastore.data['settings']['application'].get('shared_diff_access')
datastore.set_last_viewed(uuid, time.time())
# Parse diff preferences from request using config as single source of truth
# Check if this is a user submission (any diff pref param exists in query string)
user_submitted = any(key in request.args for key in DIFF_PREFERENCES_CONFIG.keys())
diff_prefs = {}
for key, config in DIFF_PREFERENCES_CONFIG.items():
if user_submitted:
# User submitted form - missing checkboxes are explicitly OFF
if config['type'] == 'bool':
diff_prefs[key] = strtobool(request.args.get(key, 'off'))
else:
diff_prefs[key] = request.args.get(key, config['default'])
else:
# Initial load - use defaults from config
diff_prefs[key] = config['default']
content = diff.render_diff(previous_version_file_contents=from_version_file_contents,
newest_version_file_contents=to_version_file_contents,
include_replaced=diff_prefs['replaced'],
include_added=diff_prefs['added'],
include_removed=diff_prefs['removed'],
include_equal=diff_prefs['changesOnly'],
ignore_junk=diff_prefs['ignoreWhitespace'],
word_diff=diff_prefs['type'] == 'diffWords',
)
# Build cell grid visualizer before applying HTML color (so we can detect placemarkers)
diff_cell_grid = build_diff_cell_visualizer(content)
content = apply_html_color_to_body(n_body=content)
offscreen_content = render_template("diff-offscreen-options.html")
note = ''
if str(from_version) != str(dates[-2]) or str(to_version) != str(dates[-1]):
note = 'Note: You are not viewing the latest changes.'
output = render_template("diff.html",
#initial_scroll_line_number=100,
bottom_horizontal_offscreen_contents=offscreen_content,
content=content,
current_diff_url=watch['url'],
diff_cell_grid=diff_cell_grid,
diff_prefs=diff_prefs,
extra_classes='difference-page',
extra_stylesheets=extra_stylesheets,
extra_title=f" - {watch.label} - History",
extract_form=extract_form,
from_version=str(from_version),
is_html_webdriver=is_html_webdriver,
last_error=watch['last_error'],
last_error_screenshot=watch.get_error_snapshot(),
last_error_text=watch.get_error_text(),
newest=to_version_file_contents,
newest_version_timestamp=dates[-1],
note=note,
password_enabled_and_share_is_off=password_enabled_and_share_is_off,
pure_menu_fixed=False,
screenshot=screenshot_url,
to_version=str(to_version),
uuid=uuid,
versions=dates, # All except current/last
watch_a=watch,
)
return output