diff --git a/changedetectionio/blueprint/ui/views.py b/changedetectionio/blueprint/ui/views.py
index f6afb748..8191c4f7 100644
--- a/changedetectionio/blueprint/ui/views.py
+++ b/changedetectionio/blueprint/ui/views.py
@@ -224,7 +224,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
to_version_file_contents,
include_equal=True,
html_colour=True,
- case_insensitive=datastore.data['settings']['application'].get('ignore_whitespace', False)
+ case_insensitive=datastore.data['settings']['application'].get('ignore_whitespace', False),
)
return render_template("diff.html",
diff --git a/changedetectionio/diff.py b/changedetectionio/diff.py
index 75395fdc..e6e98680 100644
--- a/changedetectionio/diff.py
+++ b/changedetectionio/diff.py
@@ -6,7 +6,7 @@ from typing import List, Iterator, Union
REMOVED_STYLE = "background-color: #fadad7; color: #b30000;"
ADDED_STYLE = "background-color: #eaf2c2; color: #406619;"
-def render_inline_word_diff(before_line: str, after_line: str, html_colour: bool = False) -> str:
+def render_inline_word_diff(before_line: str, after_line: str, html_colour: bool = False, ignore_junk: bool = False) -> str:
"""
Render word-level differences between two lines inline.
@@ -14,6 +14,7 @@ def render_inline_word_diff(before_line: str, after_line: str, html_colour: bool
before_line: Original line text
after_line: Modified line text
html_colour: Use HTML background colors for differences
+ ignore_junk: Ignore whitespace-only changes
Returns:
str: Single line with inline word-level highlighting
@@ -30,7 +31,9 @@ def render_inline_word_diff(before_line: str, after_line: str, html_colour: bool
after_tokens = tokenize(after_line)
# Use SequenceMatcher to find word-level differences
- matcher = difflib.SequenceMatcher(None, before_tokens, after_tokens)
+ # If ignore_junk is True, treat whitespace tokens as junk
+ isjunk = (lambda x: x.strip() == '') if ignore_junk else None
+ matcher = difflib.SequenceMatcher(isjunk, before_tokens, after_tokens)
if html_colour:
result = []
@@ -39,15 +42,45 @@ def render_inline_word_diff(before_line: str, after_line: str, html_colour: bool
result.append(''.join(before_tokens[i1:i2]))
elif tag == 'delete':
deleted = ''.join(before_tokens[i1:i2])
+ # If only whitespace and ignore_junk is enabled, preserve whitespace without marking
+ if ignore_junk and deleted.strip() == '':
+ result.append(deleted)
+ continue
result.append(f'{deleted}')
elif tag == 'insert':
inserted = ''.join(after_tokens[j1:j2])
+ # If only whitespace and ignore_junk is enabled, preserve whitespace without marking
+ if ignore_junk and inserted.strip() == '':
+ result.append(inserted)
+ continue
result.append(f'{inserted}')
elif tag == 'replace':
deleted = ''.join(before_tokens[i1:i2])
inserted = ''.join(after_tokens[j1:j2])
- result.append(f'{deleted}')
- result.append(f'{inserted}')
+ # If both are only whitespace and ignore_junk is enabled, use the after version
+ if ignore_junk and deleted.strip() == '' and inserted.strip() == '':
+ result.append(inserted)
+ continue
+ # When ignore_junk is enabled, filter out whitespace-only tokens from replace operations
+ if ignore_junk:
+ deleted_parts = []
+ inserted_parts = []
+ for token in before_tokens[i1:i2]:
+ if token.strip() != '':
+ deleted_parts.append(token)
+ for token in after_tokens[j1:j2]:
+ if token.strip() != '':
+ inserted_parts.append(token)
+ # Add a single space between words (normalized whitespace)
+ if deleted_parts or inserted_parts:
+ result.append(' ')
+ if deleted_parts:
+ result.append(f'{"".join(deleted_parts)}')
+ if inserted_parts:
+ result.append(f'{"".join(inserted_parts)}')
+ else:
+ result.append(f'{deleted}')
+ result.append(f'{inserted}')
return ''.join(result)
else:
# Plain text format with markers
@@ -57,14 +90,45 @@ def render_inline_word_diff(before_line: str, after_line: str, html_colour: bool
result.append(''.join(before_tokens[i1:i2]))
elif tag == 'delete':
deleted = ''.join(before_tokens[i1:i2])
+ # If only whitespace and ignore_junk is enabled, preserve whitespace without marking
+ if ignore_junk and deleted.strip() == '':
+ result.append(deleted)
+ continue
result.append(f'[-{deleted}-]')
elif tag == 'insert':
inserted = ''.join(after_tokens[j1:j2])
+ # If only whitespace and ignore_junk is enabled, preserve whitespace without marking
+ if ignore_junk and inserted.strip() == '':
+ result.append(inserted)
+ continue
result.append(f'[+{inserted}+]')
elif tag == 'replace':
deleted = ''.join(before_tokens[i1:i2])
inserted = ''.join(after_tokens[j1:j2])
- result.append(f'[-{deleted}-][+{inserted}+]')
+ # If both are only whitespace and ignore_junk is enabled, use the after version
+ if ignore_junk and deleted.strip() == '' and inserted.strip() == '':
+ result.append(inserted)
+ continue
+ # When ignore_junk is enabled, filter out whitespace-only tokens from replace operations
+ if ignore_junk:
+ deleted_parts = []
+ inserted_parts = []
+ for token in before_tokens[i1:i2]:
+ if token.strip() != '':
+ deleted_parts.append(token)
+ for token in after_tokens[j1:j2]:
+ if token.strip() != '':
+ inserted_parts.append(token)
+ # Add a single space between words (normalized whitespace)
+ if deleted_parts or inserted_parts:
+ result.append(' ')
+ if deleted_parts:
+ result.append(f'[-{"".join(deleted_parts)}-]')
+ if inserted_parts:
+ result.append(f'[+{"".join(inserted_parts)}+]')
+ else:
+ result.append(f'[-{deleted}-]')
+ result.append(f'[+{inserted}+]')
return ''.join(result)
def same_slicer(lst: List[str], start: int, end: int) -> List[str]:
@@ -82,7 +146,8 @@ def customSequenceMatcher(
html_colour: bool = False,
word_diff: bool = False,
context_lines: int = 0,
- case_insensitive: bool = False
+ case_insensitive: bool = False,
+ ignore_junk: bool = False
) -> Iterator[List[str]]:
"""
Compare two sequences and yield differences based on specified parameters.
@@ -99,13 +164,23 @@ def customSequenceMatcher(
word_diff (bool): Use word-level diffing for replaced lines
context_lines (int): Number of unchanged lines to show around changes (like grep -C)
case_insensitive (bool): Perform case-insensitive comparison
+ ignore_junk (bool): Ignore whitespace-only changes
Yields:
List[str]: Differences between sequences
"""
- # Prepare sequences for comparison (lowercase if case-insensitive)
- compare_before = [line.lower() for line in before] if case_insensitive else before
- compare_after = [line.lower() for line in after] if case_insensitive else after
+ # Prepare sequences for comparison (lowercase if case-insensitive, normalize whitespace if ignore_junk)
+ import re
+ def prepare_line(line):
+ if case_insensitive:
+ line = line.lower()
+ if ignore_junk:
+ # Normalize whitespace: replace multiple spaces/tabs with single space
+ line = re.sub(r'\s+', ' ', line)
+ return line
+
+ compare_before = [prepare_line(line) for line in before]
+ compare_after = [prepare_line(line) for line in after]
cruncher = difflib.SequenceMatcher(isjunk=lambda x: x in " \t", a=compare_before, b=compare_after)
@@ -157,7 +232,17 @@ def customSequenceMatcher(
# Use word-level diff for single line replacements when enabled
if word_diff and len(before_lines) == 1 and len(after_lines) == 1:
- inline_diff = render_inline_word_diff(before_lines[0], after_lines[0], html_colour)
+ inline_diff = render_inline_word_diff(before_lines[0], after_lines[0], html_colour, ignore_junk)
+ # Check if there are any actual changes (not just whitespace when ignore_junk is enabled)
+ if ignore_junk:
+ # Check if the output contains any change markers
+ if html_colour:
+ has_changes = ' str:
"""
Render the difference between two file contents.
@@ -204,7 +290,8 @@ def render_diff(
html_colour (bool): Use HTML background colors for differences
word_diff (bool): Use word-level diffing for replaced lines
context_lines (int): Number of unchanged lines to show around changes (like grep -C)
- case_insensitive (bool): Perform case-insensitive comparison
+ case_insensitive (bool): Perform case-insensitive comparison, By default the test_json_diff/process.py is case sensitive, so this follows same logic
+ ignore_junk (bool): Ignore whitespace-only changes
Returns:
str: Rendered difference
@@ -227,7 +314,8 @@ def render_diff(
html_colour=html_colour,
word_diff=word_diff,
context_lines=context_lines,
- case_insensitive=case_insensitive
+ case_insensitive=case_insensitive,
+ ignore_junk=ignore_junk
)
def flatten(lst: List[Union[str, List[str]]]) -> str:
diff --git a/changedetectionio/notification_service.py b/changedetectionio/notification_service.py
index eb6bc721..cd4216eb 100644
--- a/changedetectionio/notification_service.py
+++ b/changedetectionio/notification_service.py
@@ -76,15 +76,15 @@ class NotificationService:
prev_snapshot = watch.get_history_snapshot(dates[-2])
current_snapshot = watch.get_history_snapshot(dates[-1])
- case_insensitive=self. datastore.data['settings']['application'].get('ignore_whitespace', False)
+ ignore_junk = self.datastore.data['settings']['application'].get('ignore_whitespace', False)
n_object.update({
'current_snapshot': snapshot_contents,
- 'diff': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=line_feed_sep, html_colour=html_colour_enable, case_insensitive=case_insensitive),
- 'diff_added': diff.render_diff(prev_snapshot, current_snapshot, include_removed=False, line_feed_sep=line_feed_sep, case_insensitive=case_insensitive),
- 'diff_full': diff.render_diff(prev_snapshot, current_snapshot, include_equal=True, line_feed_sep=line_feed_sep, html_colour=html_colour_enable, case_insensitive=case_insensitive),
- 'diff_patch': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=line_feed_sep, patch_format=True, case_insensitive=case_insensitive),
- 'diff_removed': diff.render_diff(prev_snapshot, current_snapshot, include_added=False, line_feed_sep=line_feed_sep, case_insensitive=case_insensitive),
+ 'diff': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=line_feed_sep, html_colour=html_colour_enable, ignore_junk=ignore_junk),
+ 'diff_added': diff.render_diff(prev_snapshot, current_snapshot, include_removed=False, line_feed_sep=line_feed_sep, ignore_junk=ignore_junk),
+ 'diff_full': diff.render_diff(prev_snapshot, current_snapshot, include_equal=True, line_feed_sep=line_feed_sep, html_colour=html_colour_enable, ignore_junk=ignore_junk),
+ 'diff_patch': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=line_feed_sep, patch_format=True, ignore_junk=ignore_junk),
+ 'diff_removed': diff.render_diff(prev_snapshot, current_snapshot, include_added=False, line_feed_sep=line_feed_sep, ignore_junk=ignore_junk),
'notification_timestamp': now,
'screenshot': watch.get_screenshot() if watch and watch.get('notification_screenshot') else None,
'triggered_text': triggered_text,
diff --git a/changedetectionio/tests/unit/test_notification_diff.py b/changedetectionio/tests/unit/test_notification_diff.py
index 477fcd57..bfd7ee1f 100755
--- a/changedetectionio/tests/unit/test_notification_diff.py
+++ b/changedetectionio/tests/unit/test_notification_diff.py
@@ -228,5 +228,124 @@ Line 4"""
self.assertIn('200', output)
self.assertIn('background-color', output)
+ def test_ignore_junk_word_diff_enabled(self):
+ """Test ignore_junk with word_diff=True"""
+ before = "The quick brown fox"
+ after = "The quick brown fox"
+
+ # Without ignore_junk, should detect whitespace changes
+ output = diff.render_diff(before, after, include_equal=False, word_diff=True, ignore_junk=False)
+ # Should show some difference (whitespace changes)
+ self.assertTrue(len(output.strip()) > 0, "Should detect whitespace changes when ignore_junk=False")
+
+ # With ignore_junk, should ignore whitespace-only changes
+ output = diff.render_diff(before, after, include_equal=False, word_diff=True, ignore_junk=True)
+ lines = [l for l in output.split("\n") if l.strip()]
+ self.assertEqual(len(lines), 0, "Should ignore whitespace-only changes when ignore_junk=True")
+
+ def test_ignore_junk_word_diff_disabled(self):
+ """Test ignore_junk with word_diff=False"""
+ before = "Hello World"
+ after = "Hello World"
+
+ # Without ignore_junk, should detect line change
+ output = diff.render_diff(before, after, include_equal=False, word_diff=False, ignore_junk=False)
+ self.assertIn('(changed)', output)
+ self.assertIn('(into)', output)
+
+ # With ignore_junk enabled and word_diff disabled
+ # When ignore_junk is enabled, whitespace is normalized at line level so lines match
+ output = diff.render_diff(before, after, include_equal=False, word_diff=False, ignore_junk=True)
+ # Lines should be treated as equal
+ lines = [l for l in output.split("\n") if l.strip()]
+ self.assertEqual(len(lines), 0, "Should ignore whitespace differences at line level")
+
+ def test_ignore_junk_with_real_changes(self):
+ """Test ignore_junk doesn't ignore actual word changes"""
+ before = "The quick brown fox"
+ after = "The quick brown cat"
+
+ output = diff.render_diff(before, after, include_equal=False, word_diff=True, ignore_junk=True)
+
+ # Should still detect the word change (fox -> cat)
+ self.assertIn('[-fox-]', output)
+ self.assertIn('[+cat+]', output)
+ # But shouldn't highlight whitespace differences
+
+ def test_ignore_junk_tabs_vs_spaces(self):
+ """Test ignore_junk treats tabs and spaces as equivalent"""
+ before = "Column1\tColumn2\tColumn3"
+ after = "Column1 Column2 Column3"
+
+ # Without ignore_junk, should detect difference
+ output = diff.render_diff(before, after, include_equal=False, word_diff=True, ignore_junk=False)
+ self.assertTrue(len(output.strip()) > 0, "Should detect tab vs space differences")
+
+ # With ignore_junk, should ignore tab/space differences
+ output = diff.render_diff(before, after, include_equal=False, word_diff=True, ignore_junk=True)
+ lines = [l for l in output.split("\n") if l.strip()]
+ self.assertEqual(len(lines), 0, "Should ignore tab vs space differences when ignore_junk=True")
+
+ def test_ignore_junk_html_output(self):
+ """Test ignore_junk with HTML coloring"""
+ before = "Value: 100 points"
+ after = "Value: 200 points"
+
+ output = diff.render_diff(before, after, include_equal=False, word_diff=True, html_colour=True, ignore_junk=True)
+
+ # Should only highlight the actual value change
+ self.assertIn('100', output)
+ self.assertIn('200', output)
+ self.assertIn('background-color', output)
+ # Should not create separate spans for whitespace changes
+
+ def test_ignore_junk_case_insensitive_combination(self):
+ """Test ignore_junk combined with case_insensitive"""
+ before = "The QUICK Brown Fox"
+ after = "The quick brown FOX"
+
+ # Both enabled: should ignore case and whitespace
+ output = diff.render_diff(before, after, include_equal=False, word_diff=True,
+ case_insensitive=True, ignore_junk=True)
+ lines = [l for l in output.split("\n") if l.strip()]
+ self.assertEqual(len(lines), 0, "Should ignore both case and whitespace differences")
+
+ # Only case_insensitive: should detect whitespace changes
+ output = diff.render_diff(before, after, include_equal=False, word_diff=True,
+ case_insensitive=True, ignore_junk=False)
+ self.assertTrue(len(output.strip()) > 0, "Should detect whitespace changes")
+
+ # Only ignore_junk: should detect case changes
+ output = diff.render_diff(before, after, include_equal=False, word_diff=True,
+ case_insensitive=False, ignore_junk=True)
+ # Should detect case differences
+ self.assertIn('QUICK', output)
+ self.assertIn('quick', output)
+ self.assertIn('Brown', output)
+ self.assertIn('brown', output)
+ # Should show changes (though may be grouped together)
+ self.assertTrue('[-' in output and '-]' in output, "Should show removed text")
+ self.assertTrue('[+' in output and '+]' in output, "Should show added text")
+
+ def test_ignore_junk_multiline(self):
+ """Test ignore_junk with multiple lines"""
+ before = """Line 1 with spaces
+Line 2 unchanged
+Line 3 with tabs and spaces"""
+
+ after = """Line 1 with spaces
+Line 2 unchanged
+Line 3 with tabs and spaces"""
+
+ # With ignore_junk, should only show unchanged line when include_equal=True
+ output = diff.render_diff(before, after, include_equal=False, word_diff=True, ignore_junk=True)
+ lines = [l for l in output.split("\n") if l.strip()]
+ # Should be empty since only whitespace changed
+ self.assertEqual(len(lines), 0, "Should ignore whitespace changes across multiple lines")
+
+ # Verify Line 2 is not shown as changed
+ self.assertNotIn('[-Line 2-]', output)
+ self.assertNotIn('[+Line 2+]', output)
+
if __name__ == '__main__':
unittest.main()