Compare commits

...

2 Commits

Author SHA1 Message Date
dgtlmoon
388b280219 Re #4037 - adding test 2026-04-10 11:50:47 +02:00
chaoliang yan
4294b461c7 fix: pass include_change_type_prefix to word-level diff (#4037) 2026-04-10 11:46:55 +02:00
2 changed files with 84 additions and 16 deletions

View File

@@ -76,7 +76,7 @@ def extract_changed_to(raw_diff: str) -> str:
return '\n'.join(m.group(1) or m.group(2) for m in _EXTRACT_ADDED_RE.finditer(raw_diff))
def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool = False, markdown_style: str = None, tokenizer: str = 'words_and_html') -> tuple[str, bool]:
def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool = False, markdown_style: str = None, tokenizer: str = 'words_and_html', include_change_type_prefix: bool = True) -> tuple[str, bool]:
"""
Render word-level differences between two lines inline using diff-match-patch library.
@@ -163,14 +163,20 @@ def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool
if removed_tokens:
removed_full = ''.join(removed_tokens).rstrip()
trailing_removed = ''.join(removed_tokens)[len(removed_full):] if len(''.join(removed_tokens)) > len(removed_full) else ''
result_parts.append(f'{CHANGED_PLACEMARKER_OPEN}{removed_full}{CHANGED_PLACEMARKER_CLOSED}{trailing_removed}')
if include_change_type_prefix:
result_parts.append(f'{CHANGED_PLACEMARKER_OPEN}{removed_full}{CHANGED_PLACEMARKER_CLOSED}{trailing_removed}')
else:
result_parts.append(f'{removed_full}{trailing_removed}')
if added_tokens:
if result_parts: # Add newline between removed and added
result_parts.append('\n')
added_full = ''.join(added_tokens).rstrip()
trailing_added = ''.join(added_tokens)[len(added_full):] if len(''.join(added_tokens)) > len(added_full) else ''
result_parts.append(f'{CHANGED_INTO_PLACEMARKER_OPEN}{added_full}{CHANGED_INTO_PLACEMARKER_CLOSED}{trailing_added}')
if include_change_type_prefix:
result_parts.append(f'{CHANGED_INTO_PLACEMARKER_OPEN}{added_full}{CHANGED_INTO_PLACEMARKER_CLOSED}{trailing_added}')
else:
result_parts.append(f'{added_full}{trailing_added}')
return ''.join(result_parts), has_changes
else:
@@ -180,21 +186,27 @@ def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool
if op == 0: # Equal
result_parts.append(text)
elif op == 1: # Insertion
# Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
content = text.rstrip()
trailing = text[len(content):] if len(text) > len(content) else ''
if content:
result_parts.append(f'{ADDED_PLACEMARKER_OPEN}{content}{ADDED_PLACEMARKER_CLOSED}{trailing}')
if not include_change_type_prefix:
result_parts.append(text)
else:
result_parts.append(trailing)
# Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
content = text.rstrip()
trailing = text[len(content):] if len(text) > len(content) else ''
if content:
result_parts.append(f'{ADDED_PLACEMARKER_OPEN}{content}{ADDED_PLACEMARKER_CLOSED}{trailing}')
else:
result_parts.append(trailing)
elif op == -1: # Deletion
# Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
content = text.rstrip()
trailing = text[len(content):] if len(text) > len(content) else ''
if content:
result_parts.append(f'{REMOVED_PLACEMARKER_OPEN}{content}{REMOVED_PLACEMARKER_CLOSED}{trailing}')
if not include_change_type_prefix:
result_parts.append(text)
else:
result_parts.append(trailing)
# Don't wrap empty content (e.g., whitespace-only tokens after rstrip)
content = text.rstrip()
trailing = text[len(content):] if len(text) > len(content) else ''
if content:
result_parts.append(f'{REMOVED_PLACEMARKER_OPEN}{content}{REMOVED_PLACEMARKER_CLOSED}{trailing}')
else:
result_parts.append(trailing)
return ''.join(result_parts), has_changes
@@ -390,7 +402,7 @@ def customSequenceMatcher(
# Use inline word-level diff for single line replacements when word_diff is enabled
if word_diff and len(before_lines) == 1 and len(after_lines) == 1:
inline_diff, has_changes = render_inline_word_diff(before_lines[0], after_lines[0], ignore_junk=ignore_junk, tokenizer=tokenizer)
inline_diff, has_changes = render_inline_word_diff(before_lines[0], after_lines[0], ignore_junk=ignore_junk, tokenizer=tokenizer, include_change_type_prefix=include_change_type_prefix)
# Check if there are any actual changes (not just whitespace when ignore_junk is enabled)
if ignore_junk and not has_changes:
# No real changes, skip this line

View File

@@ -462,5 +462,61 @@ Line 3 with tabs and spaces"""
self.assertEqual(extract_changed_to(raw), "")
def test_word_diff_no_prefix_whole_line_replaced(self):
"""When include_change_type_prefix=False, word-level diffs for whole-line
replacements must not include placemarkers (issue #3816)."""
before = "73"
after = "100"
raw = diff.render_diff(before, after, word_diff=True, include_change_type_prefix=False)
self.assertNotIn('PLACEMARKER', raw)
# Should contain just the raw values separated by newline
self.assertIn('73', raw)
self.assertIn('100', raw)
def test_word_diff_no_prefix_inline_changes(self):
"""When include_change_type_prefix=False, inline word-level diffs
must not include placemarkers (issue #3816)."""
before = "the price is 50 dollars"
after = "the price is 75 dollars"
raw = diff.render_diff(before, after, word_diff=True, include_change_type_prefix=False)
self.assertNotIn('PLACEMARKER', raw)
self.assertIn('50', raw)
self.assertIn('75', raw)
def test_word_diff_with_prefix_still_wraps(self):
"""Default include_change_type_prefix=True must still wrap tokens."""
before = "73"
after = "100"
raw = diff.render_diff(before, after, word_diff=True, include_change_type_prefix=True)
self.assertIn('PLACEMARKER', raw)
def test_word_diff_no_prefix_exact_output(self):
"""Pin exact output for include_change_type_prefix=False to catch regressions.
Whole-line replacement: old and new values separated by newline, no markers.
Inline partial replacement: equal tokens kept, changed tokens (both old and new)
appended without markers — this means old+new are concatenated in place.
"""
# Whole-line replaced: both values on separate lines, clean
raw = diff.render_diff('73', '100', word_diff=True, include_change_type_prefix=False)
self.assertEqual(raw, '73\n100')
# Inline word replacement: equal context preserved, old+new token concatenated in-place
raw = diff.render_diff('the price is 50 dollars', 'the price is 75 dollars',
word_diff=True, include_change_type_prefix=False)
self.assertEqual(raw, 'the price is 5075 dollars')
# Sanity: with prefix the whole-line case is fully wrapped
raw = diff.render_diff('73', '100', word_diff=True, include_change_type_prefix=True)
self.assertEqual(raw, '@changed_PLACEMARKER_OPEN73@changed_PLACEMARKER_CLOSED\n'
'@changed_into_PLACEMARKER_OPEN100@changed_into_PLACEMARKER_CLOSED')
if __name__ == '__main__':
unittest.main()