diff --git a/changedetectionio/blueprint/ui/views.py b/changedetectionio/blueprint/ui/views.py index 97b7d1ba..04187b55 100644 --- a/changedetectionio/blueprint/ui/views.py +++ b/changedetectionio/blueprint/ui/views.py @@ -5,7 +5,7 @@ import re from loguru import logger from markupsafe import Markup -from changedetectionio.diff import REMOVED_STYLE, ADDED_STYLE +from changedetectionio.diff import REMOVED_STYLE, ADDED_STYLE, DIFF_HTML_LABEL_REMOVED, DIFF_HTML_LABEL_ADDED from changedetectionio.store import ChangeDetectionStore from changedetectionio.auth_decorator import login_optionally_required from changedetectionio import html_tools, diff @@ -27,6 +27,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe # Step 2: Simple regex to unescape only our exact diff spans # Unescape opening tags with exact styles + # This matches the styles used in DIFF_HTML_LABEL_REMOVED, DIFF_HTML_LABEL_ADDED, etc. result = re.sub( rf'<span style="({REMOVED_STYLE}|{ADDED_STYLE})" title="([A-Za-z0-9]+)">', r'', diff --git a/changedetectionio/diff.py b/changedetectionio/diff.py index 1324cb99..b6ace2f4 100644 --- a/changedetectionio/diff.py +++ b/changedetectionio/diff.py @@ -14,6 +14,12 @@ DIFF_LABEL_TEXT_REMOVED = '(removed) {content}' DIFF_LABEL_TEXT_CHANGED = '(changed) {content}' DIFF_LABEL_TEXT_INTO = '(into) {content}' +# Diff HTML label formats (use {content} as placeholder) +DIFF_HTML_LABEL_REMOVED = f'{{content}}' +DIFF_HTML_LABEL_ADDED = f'{{content}}' +DIFF_HTML_LABEL_REPLACED = f'{{content}}' +DIFF_HTML_LABEL_INSERTED = f'{{content}}' + # Compiled regex patterns for performance WHITESPACE_NORMALIZE_RE = re.compile(r'\s+') @@ -78,13 +84,13 @@ def render_inline_word_diff(before_line: str, after_line: str, html_colour: bool content = m.group(1).rstrip() trailing = m.group(1)[len(content):] if len(m.group(1)) > len(content) else '' line_break = '\n' if whole_line_replaced else '' - return f'{content}{trailing}{line_break}' + return f'{DIFF_HTML_LABEL_REMOVED.format(content=content)}{trailing}{line_break}' def replace_added(m): content = m.group(1).rstrip() trailing = m.group(1)[len(content):] if len(m.group(1)) > len(content) else '' line_break = '\n' if whole_line_replaced else '' - return f'{content}{trailing}{line_break}' + return f'{DIFF_HTML_LABEL_ADDED.format(content=content)}{trailing}{line_break}' diff_output = REDLINES_REMOVED_RE.sub(replace_removed, diff_output) diff_output = REDLINES_ADDED_RE.sub(replace_added, diff_output) @@ -209,7 +215,7 @@ def customSequenceMatcher( yield context_lines_to_include elif include_removed and tag == 'delete': if html_colour: - yield [f'{line}' for line in same_slicer(before, alo, ahi)] + yield [DIFF_HTML_LABEL_REMOVED.format(content=line) for line in same_slicer(before, alo, ahi)] else: yield [DIFF_LABEL_TEXT_REMOVED.format(content=line) for line in same_slicer(before, alo, ahi)] if include_change_type_prefix else same_slicer(before, alo, ahi) elif include_replaced and tag == 'replace': @@ -227,14 +233,14 @@ def customSequenceMatcher( else: # Fall back to line-level diff for multi-line changes or when word_diff disabled if html_colour: - yield [f'{line}' for line in before_lines] + \ - [f'{line}' for line in after_lines] + yield [DIFF_HTML_LABEL_REMOVED.format(content=line) for line in before_lines] + \ + [DIFF_HTML_LABEL_REPLACED.format(content=line) for line in after_lines] else: yield [DIFF_LABEL_TEXT_CHANGED.format(content=line) for line in before_lines] + \ [DIFF_LABEL_TEXT_INTO.format(content=line) for line in after_lines] if include_change_type_prefix else before_lines + after_lines elif include_added and tag == 'insert': if html_colour: - yield [f'{line}' for line in same_slicer(after, blo, bhi)] + yield [DIFF_HTML_LABEL_INSERTED.format(content=line) for line in same_slicer(after, blo, bhi)] else: yield [DIFF_LABEL_TEXT_ADDED.format(content=line) for line in same_slicer(after, blo, bhi)] if include_change_type_prefix else same_slicer(after, blo, bhi) diff --git a/changedetectionio/tests/test_notification.py b/changedetectionio/tests/test_notification.py index f0fde27e..d14f95cf 100644 --- a/changedetectionio/tests/test_notification.py +++ b/changedetectionio/tests/test_notification.py @@ -459,7 +459,7 @@ def test_global_send_test_notification(client, live_server, measure_memory_usage def _test_color_notifications(client, notification_body_token): - from changedetectionio.diff import ADDED_STYLE, REMOVED_STYLE + from changedetectionio.diff import ADDED_STYLE, REMOVED_STYLE, DIFF_HTML_LABEL_REMOVED set_original_response() @@ -507,7 +507,7 @@ def _test_color_notifications(client, notification_body_token): with open("test-datastore/notification.txt", 'r') as f: x = f.read() - assert f'Which is across multiple lines' in x + assert DIFF_HTML_LABEL_REMOVED.format(content='Which is across multiple lines') in x client.get( diff --git a/changedetectionio/tests/test_source.py b/changedetectionio/tests/test_source.py index 8a6a1152..771462ca 100644 --- a/changedetectionio/tests/test_source.py +++ b/changedetectionio/tests/test_source.py @@ -4,7 +4,7 @@ import time from flask import url_for from urllib.request import urlopen from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks -from ..diff import ADDED_STYLE +from ..diff import ADDED_STYLE, DIFF_HTML_LABEL_ADDED sleep_time_for_fetch_thread = 3 @@ -53,7 +53,7 @@ def test_check_basic_change_detection_functionality_source(client, live_server, follow_redirects=True ) - assert f'modified head title</title></head>'.encode('utf-8') in res.data + assert f'{DIFF_HTML_LABEL_ADDED.format(content="modified ")}head title</title></head>'.encode('utf-8') in res.data diff --git a/changedetectionio/tests/unit/test_jinja2_security.py b/changedetectionio/tests/unit/test_jinja2_security.py index e385dab3..96822ecd 100644 --- a/changedetectionio/tests/unit/test_jinja2_security.py +++ b/changedetectionio/tests/unit/test_jinja2_security.py @@ -61,7 +61,7 @@ class TestJinja2SSTI(unittest.TestCase): from markupsafe import Markup, escape # Import the constants from diff module - from changedetectionio.diff import REMOVED_STYLE, ADDED_STYLE + from changedetectionio.diff import REMOVED_STYLE, ADDED_STYLE, DIFF_HTML_LABEL_REMOVED, DIFF_HTML_LABEL_ADDED, DIFF_HTML_LABEL_INSERTED # Recreate the filter logic for testing def diff_unescape_difference_spans(content): @@ -89,7 +89,7 @@ class TestJinja2SSTI(unittest.TestCase): return Markup(result) # Test 1: Valid diff spans should be unescaped - valid_diff_content = f'old text\nnew text' + valid_diff_content = f'{DIFF_HTML_LABEL_REMOVED.format(content="old text")}\n{DIFF_HTML_LABEL_INSERTED.format(content="new text")}' result = diff_unescape_difference_spans(valid_diff_content) self.assertIn('bad', # Wrong style - f'bad', # Invalid title chars + f'bad', # Invalid title chars (space not allowed) f'no title', # Missing title ] @@ -132,7 +132,7 @@ class TestJinja2SSTI(unittest.TestCase): self.assertIn('<span', str(result), f"Invalid span remained escaped: {invalid_span}") # Test 4: Mixed content - valid diffs + XSS should only unescape valid parts - mixed_content = f'safe' + mixed_content = f'{DIFF_HTML_LABEL_REMOVED.format(content="safe")}' result = diff_unescape_difference_spans(mixed_content) self.assertIn(' 111) - self.assertIn('110', output) - self.assertIn('111', output) + self.assertIn(DIFF_HTML_LABEL_REMOVED.format(content='110'), output) + self.assertIn(DIFF_HTML_LABEL_ADDED.format(content='111'), output) # Unchanged text should not be wrapped in spans self.assertIn('points by user', output)