mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-04-29 06:17:11 +00:00
Improving diff
This commit is contained in:
+121
-10
@@ -6,6 +6,67 @@ from typing import List, Iterator, Union
|
||||
REMOVED_STYLE = "background-color: #fadad7; color: #b30000;"
|
||||
ADDED_STYLE = "background-color: #eaf2c2; color: #406619;"
|
||||
|
||||
def render_inline_word_diff(before_line: str, after_line: str, html_colour: bool = False) -> str:
|
||||
"""
|
||||
Render word-level differences between two lines inline.
|
||||
|
||||
Args:
|
||||
before_line: Original line text
|
||||
after_line: Modified line text
|
||||
html_colour: Use HTML background colors for differences
|
||||
|
||||
Returns:
|
||||
str: Single line with inline word-level highlighting
|
||||
"""
|
||||
# Use difflib for word-level comparison (splitting on whitespace)
|
||||
import re
|
||||
|
||||
# Tokenize into words and whitespace
|
||||
def tokenize(text):
|
||||
# Split on word boundaries, keeping delimiters
|
||||
return re.findall(r'\S+|\s+', text)
|
||||
|
||||
before_tokens = tokenize(before_line)
|
||||
after_tokens = tokenize(after_line)
|
||||
|
||||
# Use SequenceMatcher to find word-level differences
|
||||
matcher = difflib.SequenceMatcher(None, before_tokens, after_tokens)
|
||||
|
||||
if html_colour:
|
||||
result = []
|
||||
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
|
||||
if tag == 'equal':
|
||||
result.append(''.join(before_tokens[i1:i2]))
|
||||
elif tag == 'delete':
|
||||
deleted = ''.join(before_tokens[i1:i2])
|
||||
result.append(f'<span style="{REMOVED_STYLE}" title="Removed">{deleted}</span>')
|
||||
elif tag == 'insert':
|
||||
inserted = ''.join(after_tokens[j1:j2])
|
||||
result.append(f'<span style="{ADDED_STYLE}" title="Added">{inserted}</span>')
|
||||
elif tag == 'replace':
|
||||
deleted = ''.join(before_tokens[i1:i2])
|
||||
inserted = ''.join(after_tokens[j1:j2])
|
||||
result.append(f'<span style="{REMOVED_STYLE}" title="Removed">{deleted}</span>')
|
||||
result.append(f'<span style="{ADDED_STYLE}" title="Added">{inserted}</span>')
|
||||
return ''.join(result)
|
||||
else:
|
||||
# Plain text format with markers
|
||||
result = []
|
||||
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
|
||||
if tag == 'equal':
|
||||
result.append(''.join(before_tokens[i1:i2]))
|
||||
elif tag == 'delete':
|
||||
deleted = ''.join(before_tokens[i1:i2])
|
||||
result.append(f'[-{deleted}-]')
|
||||
elif tag == 'insert':
|
||||
inserted = ''.join(after_tokens[j1:j2])
|
||||
result.append(f'[+{inserted}+]')
|
||||
elif tag == 'replace':
|
||||
deleted = ''.join(before_tokens[i1:i2])
|
||||
inserted = ''.join(after_tokens[j1:j2])
|
||||
result.append(f'[-{deleted}-][+{inserted}+]')
|
||||
return ''.join(result)
|
||||
|
||||
def same_slicer(lst: List[str], start: int, end: int) -> List[str]:
|
||||
"""Return a slice of the list, or a single element if start == end."""
|
||||
return lst[start:end] if start != end else [lst[start]]
|
||||
@@ -18,7 +79,9 @@ def customSequenceMatcher(
|
||||
include_added: bool = True,
|
||||
include_replaced: bool = True,
|
||||
include_change_type_prefix: bool = True,
|
||||
html_colour: bool = False
|
||||
html_colour: bool = False,
|
||||
word_diff: bool = False,
|
||||
context_lines: int = 0
|
||||
) -> Iterator[List[str]]:
|
||||
"""
|
||||
Compare two sequences and yield differences based on specified parameters.
|
||||
@@ -32,30 +95,72 @@ def customSequenceMatcher(
|
||||
include_replaced (bool): Include replaced parts
|
||||
include_change_type_prefix (bool): Add prefixes to indicate change types
|
||||
html_colour (bool): Use HTML background colors for differences
|
||||
word_diff (bool): Use word-level diffing for replaced lines
|
||||
context_lines (int): Number of unchanged lines to show around changes (like grep -C)
|
||||
|
||||
Yields:
|
||||
List[str]: Differences between sequences
|
||||
"""
|
||||
cruncher = difflib.SequenceMatcher(isjunk=lambda x: x in " \t", a=before, b=after)
|
||||
|
||||
# When context_lines is set and include_equal is False, we need to track which equal lines to include
|
||||
if context_lines > 0 and not include_equal:
|
||||
opcodes = list(cruncher.get_opcodes())
|
||||
# Mark equal ranges that should be included based on context
|
||||
included_equal_ranges = set()
|
||||
|
||||
for i, (tag, alo, ahi, blo, bhi) in enumerate(opcodes):
|
||||
if tag != 'equal':
|
||||
# Include context lines before this change
|
||||
for j in range(max(0, i - 1), i):
|
||||
if opcodes[j][0] == 'equal':
|
||||
prev_alo, prev_ahi = opcodes[j][1], opcodes[j][2]
|
||||
# Include last N lines of the previous equal block
|
||||
context_start = max(prev_alo, prev_ahi - context_lines)
|
||||
for line_num in range(context_start, prev_ahi):
|
||||
included_equal_ranges.add(line_num)
|
||||
|
||||
# Include context lines after this change
|
||||
for j in range(i + 1, min(len(opcodes), i + 2)):
|
||||
if opcodes[j][0] == 'equal':
|
||||
next_alo, next_ahi = opcodes[j][1], opcodes[j][2]
|
||||
# Include first N lines of the next equal block
|
||||
context_end = min(next_ahi, next_alo + context_lines)
|
||||
for line_num in range(next_alo, context_end):
|
||||
included_equal_ranges.add(line_num)
|
||||
|
||||
# Remember! gmail, outlook etc dont support <style> must be inline.
|
||||
# Gmail: strips <ins> and <del> tags entirely.
|
||||
for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
|
||||
if include_equal and tag == 'equal':
|
||||
yield before[alo:ahi]
|
||||
if tag == 'equal':
|
||||
if include_equal:
|
||||
yield before[alo:ahi]
|
||||
elif context_lines > 0:
|
||||
# Only include equal lines that are in the context range
|
||||
context_lines_to_include = [before[i] for i in range(alo, ahi) if i in included_equal_ranges]
|
||||
if context_lines_to_include:
|
||||
yield context_lines_to_include
|
||||
elif include_removed and tag == 'delete':
|
||||
if html_colour:
|
||||
yield [f'<span style="{REMOVED_STYLE}" title="Removed">{line}</span>' for line in same_slicer(before, alo, ahi)]
|
||||
else:
|
||||
yield [f"(removed) {line}" for line in same_slicer(before, alo, ahi)] if include_change_type_prefix else same_slicer(before, alo, ahi)
|
||||
elif include_replaced and tag == 'replace':
|
||||
if html_colour:
|
||||
yield [f'<span style="{REMOVED_STYLE}" title="Removed">{line}</span>' for line in same_slicer(before, alo, ahi)] + \
|
||||
[f'<span style="{ADDED_STYLE}" title="Replaced">{line}</span>' for line in same_slicer(after, blo, bhi)]
|
||||
before_lines = same_slicer(before, alo, ahi)
|
||||
after_lines = same_slicer(after, blo, bhi)
|
||||
|
||||
# Use word-level diff for single line replacements when enabled
|
||||
if word_diff and len(before_lines) == 1 and len(after_lines) == 1:
|
||||
inline_diff = render_inline_word_diff(before_lines[0], after_lines[0], html_colour)
|
||||
yield [inline_diff]
|
||||
else:
|
||||
yield [f"(changed) {line}" for line in same_slicer(before, alo, ahi)] + \
|
||||
[f"(into) {line}" for line in same_slicer(after, blo, bhi)] if include_change_type_prefix else same_slicer(before, alo, ahi) + same_slicer(after, blo, bhi)
|
||||
# Fall back to line-level diff for multi-line changes or when word_diff disabled
|
||||
if html_colour:
|
||||
yield [f'<span style="{REMOVED_STYLE}" title="Removed">{line}</span>' for line in before_lines] + \
|
||||
[f'<span style="{ADDED_STYLE}" title="Replaced">{line}</span>' for line in after_lines]
|
||||
else:
|
||||
yield [f"(changed) {line}" for line in before_lines] + \
|
||||
[f"(into) {line}" for line in after_lines] if include_change_type_prefix else before_lines + after_lines
|
||||
elif include_added and tag == 'insert':
|
||||
if html_colour:
|
||||
yield [f'<span style="{ADDED_STYLE}" title="Inserted">{line}</span>' for line in same_slicer(after, blo, bhi)]
|
||||
@@ -72,7 +177,9 @@ def render_diff(
|
||||
line_feed_sep: str = "\n",
|
||||
include_change_type_prefix: bool = True,
|
||||
patch_format: bool = False,
|
||||
html_colour: bool = False
|
||||
html_colour: bool = False,
|
||||
word_diff: bool = True,
|
||||
context_lines: int = 0
|
||||
) -> str:
|
||||
"""
|
||||
Render the difference between two file contents.
|
||||
@@ -88,6 +195,8 @@ def render_diff(
|
||||
include_change_type_prefix (bool): Add prefixes to indicate change types
|
||||
patch_format (bool): Use patch format for output
|
||||
html_colour (bool): Use HTML background colors for differences
|
||||
word_diff (bool): Use word-level diffing for replaced lines
|
||||
context_lines (int): Number of unchanged lines to show around changes (like grep -C)
|
||||
|
||||
Returns:
|
||||
str: Rendered difference
|
||||
@@ -107,7 +216,9 @@ def render_diff(
|
||||
include_added=include_added,
|
||||
include_replaced=include_replaced,
|
||||
include_change_type_prefix=include_change_type_prefix,
|
||||
html_colour=html_colour
|
||||
html_colour=html_colour,
|
||||
word_diff=word_diff,
|
||||
context_lines=context_lines
|
||||
)
|
||||
|
||||
def flatten(lst: List[Union[str, List[str]]]) -> str:
|
||||
|
||||
@@ -77,5 +77,112 @@ class TestDiffBuilder(unittest.TestCase):
|
||||
|
||||
# @todo test blocks of changed, blocks of added, blocks of removed
|
||||
|
||||
def test_word_level_diff(self):
|
||||
"""Test word-level diff functionality"""
|
||||
before = "The quick brown fox jumps over the lazy dog"
|
||||
after = "The fast brown cat jumps over the lazy dog"
|
||||
|
||||
# Test with word_diff enabled
|
||||
output = diff.render_diff(before, after, include_equal=False, word_diff=True)
|
||||
# Should highlight only changed words, not entire line
|
||||
self.assertIn('[-quick-]', output)
|
||||
self.assertIn('[+fast+]', output)
|
||||
self.assertIn('[-fox-]', output)
|
||||
self.assertIn('[+cat+]', output)
|
||||
# Unchanged words should appear without markers
|
||||
self.assertIn('brown', output)
|
||||
self.assertIn('jumps', output)
|
||||
|
||||
# Test with word_diff disabled (line-level)
|
||||
output = diff.render_diff(before, after, include_equal=False, word_diff=False)
|
||||
# Should show full line changes
|
||||
self.assertIn('(changed)', output)
|
||||
self.assertIn('(into)', output)
|
||||
|
||||
def test_word_level_diff_html(self):
|
||||
"""Test word-level diff with HTML coloring"""
|
||||
before = "110 points by user"
|
||||
after = "111 points by user"
|
||||
|
||||
output = diff.render_diff(before, after, include_equal=False, word_diff=True, html_colour=True)
|
||||
|
||||
# Should highlight only the changed word (110 -> 111)
|
||||
self.assertIn('<span style="background-color: #fadad7; color: #b30000;" title="Removed">110</span>', output)
|
||||
self.assertIn('<span style="background-color: #eaf2c2; color: #406619;" title="Added">111</span>', output)
|
||||
# Unchanged text should not be wrapped in spans
|
||||
self.assertIn('points by user', output)
|
||||
|
||||
def test_context_lines(self):
|
||||
"""Test context_lines parameter"""
|
||||
before = """Line 1
|
||||
Line 2
|
||||
Line 3
|
||||
Old line
|
||||
Line 5
|
||||
Line 6
|
||||
Line 7
|
||||
Another old
|
||||
Line 9
|
||||
Line 10"""
|
||||
|
||||
after = """Line 1
|
||||
Line 2
|
||||
Line 3
|
||||
New line
|
||||
Line 5
|
||||
Line 6
|
||||
Line 7
|
||||
Another new
|
||||
Line 9
|
||||
Line 10"""
|
||||
|
||||
# Test with no context
|
||||
output = diff.render_diff(before, after, include_equal=False, context_lines=0, word_diff=True)
|
||||
lines = output.split("\n")
|
||||
# Should only show changed lines
|
||||
self.assertEqual(len([l for l in lines if l.strip()]), 2) # Two changed lines
|
||||
self.assertIn('[-Old-]', output)
|
||||
self.assertIn('[+New+]', output)
|
||||
|
||||
# Test with 1 line of context
|
||||
output = diff.render_diff(before, after, include_equal=False, context_lines=1, word_diff=True)
|
||||
lines = [l for l in output.split("\n") if l.strip()]
|
||||
# Should show changed lines + 1 line before and after each
|
||||
self.assertIn('Line 3', output) # 1 line before first change
|
||||
self.assertIn('Line 5', output) # 1 line after first change
|
||||
self.assertIn('Line 7', output) # 1 line before second change
|
||||
self.assertIn('Line 9', output) # 1 line after second change
|
||||
self.assertGreater(len(lines), 2) # More than just the changed lines
|
||||
|
||||
# Test with 2 lines of context
|
||||
output = diff.render_diff(before, after, include_equal=False, context_lines=2, word_diff=True)
|
||||
lines = [l for l in output.split("\n") if l.strip()]
|
||||
# Should show changed lines + 2 lines before and after each
|
||||
self.assertIn('Line 2', output) # 2 lines before first change
|
||||
self.assertIn('Line 6', output) # 2 lines after first change
|
||||
self.assertGreater(len(lines), 6) # Even more context
|
||||
|
||||
def test_context_lines_with_include_equal(self):
|
||||
"""Test that context_lines is ignored when include_equal=True"""
|
||||
before = """Line 1
|
||||
Line 2
|
||||
Changed line
|
||||
Line 4"""
|
||||
|
||||
after = """Line 1
|
||||
Line 2
|
||||
Modified line
|
||||
Line 4"""
|
||||
|
||||
# With include_equal=True, context_lines should be ignored
|
||||
output_with_context = diff.render_diff(before, after, include_equal=True, context_lines=1)
|
||||
output_without_context = diff.render_diff(before, after, include_equal=True, context_lines=0)
|
||||
|
||||
# Both should show all lines
|
||||
self.assertIn('Line 1', output_with_context)
|
||||
self.assertIn('Line 4', output_with_context)
|
||||
self.assertIn('Line 1', output_without_context)
|
||||
self.assertIn('Line 4', output_without_context)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
||||
@@ -65,6 +65,8 @@ elementpath==4.1.5
|
||||
|
||||
selenium~=4.31.0
|
||||
|
||||
diff-match-patch
|
||||
|
||||
# https://github.com/pallets/werkzeug/issues/2985
|
||||
# Maybe related to pytest?
|
||||
werkzeug==3.0.6
|
||||
|
||||
Reference in New Issue
Block a user