diff --git a/changedetectionio/processors/text_json_diff/__init__.py b/changedetectionio/processors/text_json_diff/__init__.py index 59fa0dd2..41f32550 100644 --- a/changedetectionio/processors/text_json_diff/__init__.py +++ b/changedetectionio/processors/text_json_diff/__init__.py @@ -35,6 +35,50 @@ def _task(watch, update_handler): return text_after_filter +def _compute_ignore_line_numbers_for_preview(text_pre_extract, ignore_patterns, extract_patterns): + """1-indexed output line numbers in the post-extract display that correspond + to input lines matching ignore_text patterns. + + Needed because extract_text (#4138) transforms line content — e.g. "0.54.10" + becomes ".54.10" — so a substring match for "0.54.10" against the post-extract + text fails and the preview UI can no longer mark the line as ignored. We find + the ignored line numbers in the pre-extract text and replay extract_by_regex + line-by-line to map them forward. + """ + from changedetectionio import html_tools + from changedetectionio.processors.text_json_diff.processor import ContentTransformer + + if not text_pre_extract or not ignore_patterns: + return [] + + ignored_input_lines = set( + html_tools.strip_ignore_text( + content=text_pre_extract, + wordlist=ignore_patterns, + mode='line numbers' + ) + ) + if not ignored_input_lines: + return [] + + if not extract_patterns: + return sorted(ignored_input_lines) + + # Replay extract_by_regex per-line. Each emitted match ends with exactly one + # '\n', so counting newlines tells us how many output lines this input produced. + output_line_counter = 0 + result = [] + for input_idx, line in enumerate(text_pre_extract.splitlines()): + is_ignored = (input_idx + 1) in ignored_input_lines + matches_in_line = ContentTransformer.extract_by_regex(line, extract_patterns).count('\n') + for _ in range(matches_in_line): + output_line_counter += 1 + if is_ignored: + result.append(output_line_counter) + + return result + + def prepare_filter_prevew(datastore, watch_uuid, form_data): '''Used by @app.route("/edit//preview-rendered", methods=['POST'])''' from changedetectionio import forms, html_tools @@ -50,6 +94,7 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data): text_after_filter = '' text_before_filter = '' + text_pre_extract = '' trigger_line_numbers = [] ignore_line_numbers = [] blocked_line_numbers = [] @@ -89,15 +134,22 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data): update_handler.fetcher.content = str(decompressed_data) # str() because playwright/puppeteer/requests return string update_handler.fetcher.headers['content-type'] = tmp_watch.get('content-type') - # Process our watch with filters and the HTML from disk, and also a blank watch with no filters but also with the same HTML from disk + # Process our watch with filters and the HTML from disk, and also a blank watch with no filters but also with the same HTML from disk. + # The third task runs with extract_text cleared so we can compute ignore_line_numbers + # against the pre-extract text (extract_text transforms lines so post-extract substring + # matching for ignore patterns would otherwise fail — see #4138 follow-up). # Do this as parallel threads (not processes) to avoid pickle issues with Lock objects + tmp_watch_no_extract = deepcopy(tmp_watch) + tmp_watch_no_extract['extract_text'] = [] try: - with ThreadPoolExecutor(max_workers=2) as executor: + with ThreadPoolExecutor(max_workers=3) as executor: future1 = executor.submit(_task, tmp_watch, update_handler) future2 = executor.submit(_task, blank_watch_no_filters, update_handler) + future3 = executor.submit(_task, tmp_watch_no_extract, update_handler) text_after_filter = future1.result() text_before_filter = future2.result() + text_pre_extract = future3.result() except Exception as e: x=1 @@ -111,10 +163,11 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data): try: text_to_ignore = tmp_watch.get('ignore_text', []) + datastore.data['settings']['application'].get('global_ignore_text', []) - ignore_line_numbers = html_tools.strip_ignore_text(content=text_after_filter, - wordlist=text_to_ignore, - mode='line numbers' - ) + ignore_line_numbers = _compute_ignore_line_numbers_for_preview( + text_pre_extract=text_pre_extract, + ignore_patterns=text_to_ignore, + extract_patterns=tmp_watch.get('extract_text', []) + ) except Exception as e: text_before_filter = f"Error: {str(e)}" diff --git a/changedetectionio/static/js/watch-settings.js b/changedetectionio/static/js/watch-settings.js index 5610f1ca..e02d3586 100644 --- a/changedetectionio/static/js/watch-settings.js +++ b/changedetectionio/static/js/watch-settings.js @@ -9,6 +9,10 @@ function request_textpreview_update() { $('textarea:visible, input:visible').each(function () { const $element = $(this); // Cache the jQuery object for the current element const name = $element.attr('name'); // Get the name attribute of the element + // Radios share a name across multiple inputs; .val() returns the value + // attribute regardless of checked state, so iterating would let the last + // unchecked radio overwrite the user's actual selection. Skip unchecked. + if ($element.is(':radio') && !$element.is(':checked')) return; data[name] = $element.is(':checkbox') ? ($element.is(':checked') ? $element.val() : false) : $element.val(); }); diff --git a/changedetectionio/tests/test_live_preview.py b/changedetectionio/tests/test_live_preview.py index 89629912..945ae701 100644 --- a/changedetectionio/tests/test_live_preview.py +++ b/changedetectionio/tests/test_live_preview.py @@ -77,3 +77,82 @@ def test_content_filter_live_preview(client, live_server, measure_memory_usage, assert reply.get('trigger_line_numbers') == [1] # Triggers "Awesome" in line 1 delete_all_watches(client) + + +def _setup_version_list_preview(datastore_path, client): + """Shared HTML fixture for #4138 preview regressions (version tag list).""" + import time + + data = """ +0.55.5
+0.55.4
+0.55.3
+0.54.10
+0.54.9
+""" + with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: + f.write(data) + + test_url = url_for('test_endpoint', _external=True) + uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) + client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) + time.sleep(0.5) + wait_for_all_checks(client) + return test_url, uuid + + +def test_preview_ignore_highlight_with_extract_text(client, live_server, measure_memory_usage, datastore_path): + """Regression for #4138 follow-up: when extract_text rewrites a line (e.g. "0.54.10" → ".54.10"), + the preview must still highlight that row as 'ignored' even though substring matching against the + post-extract text fails.""" + import json + + test_url, uuid = _setup_version_list_preview(datastore_path, client) + + res = client.post( + url_for("ui.ui_edit.watch_get_preview_rendered", uuid=uuid), + data={ + "include_filters": "", + "fetch_backend": 'html_requests', + "ignore_text": "0.54.10", + "extract_text": r"/(.\d+\.\d+)/", + "url": test_url, + }, + ) + reply = json.loads(res.data.decode('utf-8')) + # The regex strips the leading "0", so the post-extract line for the ignored input is ".54.10". + # The preview should still mark its position (line 4) as ignored. + assert reply.get('ignore_line_numbers') == [4], \ + f"Expected line 4 to be highlighted as ignored, got {reply.get('ignore_line_numbers')!r}" + + delete_all_watches(client) + + +def test_preview_strip_ignored_lines_with_extract_text(client, live_server, measure_memory_usage, datastore_path): + """Regression for #4138 follow-up: with strip_ignored_lines enabled, an ignored line must be + removed from the preview output even when extract_text would otherwise rewrite it (0.54.10 → .54.10).""" + import json + + test_url, uuid = _setup_version_list_preview(datastore_path, client) + + res = client.post( + url_for("ui.ui_edit.watch_get_preview_rendered", uuid=uuid), + data={ + "include_filters": "", + "fetch_backend": 'html_requests', + "ignore_text": "0.54.10", + "extract_text": r"/(.\d+\.\d+)/", + "strip_ignored_lines": "true", + "url": test_url, + }, + ) + reply = json.loads(res.data.decode('utf-8')) + after_filter = reply.get('after_filter', '') + + assert '.54.10' not in after_filter, \ + f"Stripped ignored line should not appear in preview output, got:\n{after_filter!r}" + assert '0.54.10' not in after_filter + assert reply.get('ignore_line_numbers') == [], \ + f"Stripped lines need no highlight, got {reply.get('ignore_line_numbers')!r}" + + delete_all_watches(client)