diff --git a/changedetectionio/diff.py b/changedetectionio/diff.py index 3c93ef33..a597cd64 100644 --- a/changedetectionio/diff.py +++ b/changedetectionio/diff.py @@ -6,6 +6,67 @@ from typing import List, Iterator, Union REMOVED_STYLE = "background-color: #fadad7; color: #b30000;" ADDED_STYLE = "background-color: #eaf2c2; color: #406619;" +def render_inline_word_diff(before_line: str, after_line: str, html_colour: bool = False) -> str: + """ + Render word-level differences between two lines inline. + + Args: + before_line: Original line text + after_line: Modified line text + html_colour: Use HTML background colors for differences + + Returns: + str: Single line with inline word-level highlighting + """ + # Use difflib for word-level comparison (splitting on whitespace) + import re + + # Tokenize into words and whitespace + def tokenize(text): + # Split on word boundaries, keeping delimiters + return re.findall(r'\S+|\s+', text) + + before_tokens = tokenize(before_line) + after_tokens = tokenize(after_line) + + # Use SequenceMatcher to find word-level differences + matcher = difflib.SequenceMatcher(None, before_tokens, after_tokens) + + if html_colour: + result = [] + for tag, i1, i2, j1, j2 in matcher.get_opcodes(): + if tag == 'equal': + result.append(''.join(before_tokens[i1:i2])) + elif tag == 'delete': + deleted = ''.join(before_tokens[i1:i2]) + result.append(f'{deleted}') + elif tag == 'insert': + inserted = ''.join(after_tokens[j1:j2]) + result.append(f'{inserted}') + elif tag == 'replace': + deleted = ''.join(before_tokens[i1:i2]) + inserted = ''.join(after_tokens[j1:j2]) + result.append(f'{deleted}') + result.append(f'{inserted}') + return ''.join(result) + else: + # Plain text format with markers + result = [] + for tag, i1, i2, j1, j2 in matcher.get_opcodes(): + if tag == 'equal': + result.append(''.join(before_tokens[i1:i2])) + elif tag == 'delete': + deleted = ''.join(before_tokens[i1:i2]) + result.append(f'[-{deleted}-]') + elif tag == 'insert': + inserted = ''.join(after_tokens[j1:j2]) + result.append(f'[+{inserted}+]') + elif tag == 'replace': + deleted = ''.join(before_tokens[i1:i2]) + inserted = ''.join(after_tokens[j1:j2]) + result.append(f'[-{deleted}-][+{inserted}+]') + return ''.join(result) + def same_slicer(lst: List[str], start: int, end: int) -> List[str]: """Return a slice of the list, or a single element if start == end.""" return lst[start:end] if start != end else [lst[start]] @@ -18,7 +79,9 @@ def customSequenceMatcher( include_added: bool = True, include_replaced: bool = True, include_change_type_prefix: bool = True, - html_colour: bool = False + html_colour: bool = False, + word_diff: bool = False, + context_lines: int = 0 ) -> Iterator[List[str]]: """ Compare two sequences and yield differences based on specified parameters. @@ -32,30 +95,72 @@ def customSequenceMatcher( include_replaced (bool): Include replaced parts include_change_type_prefix (bool): Add prefixes to indicate change types html_colour (bool): Use HTML background colors for differences + word_diff (bool): Use word-level diffing for replaced lines + context_lines (int): Number of unchanged lines to show around changes (like grep -C) Yields: List[str]: Differences between sequences """ cruncher = difflib.SequenceMatcher(isjunk=lambda x: x in " \t", a=before, b=after) + # When context_lines is set and include_equal is False, we need to track which equal lines to include + if context_lines > 0 and not include_equal: + opcodes = list(cruncher.get_opcodes()) + # Mark equal ranges that should be included based on context + included_equal_ranges = set() + + for i, (tag, alo, ahi, blo, bhi) in enumerate(opcodes): + if tag != 'equal': + # Include context lines before this change + for j in range(max(0, i - 1), i): + if opcodes[j][0] == 'equal': + prev_alo, prev_ahi = opcodes[j][1], opcodes[j][2] + # Include last N lines of the previous equal block + context_start = max(prev_alo, prev_ahi - context_lines) + for line_num in range(context_start, prev_ahi): + included_equal_ranges.add(line_num) + + # Include context lines after this change + for j in range(i + 1, min(len(opcodes), i + 2)): + if opcodes[j][0] == 'equal': + next_alo, next_ahi = opcodes[j][1], opcodes[j][2] + # Include first N lines of the next equal block + context_end = min(next_ahi, next_alo + context_lines) + for line_num in range(next_alo, context_end): + included_equal_ranges.add(line_num) # Remember! gmail, outlook etc dont support