UI - Preview problem fix for extract_text/ignore_text #4138 (#4169)
Build and push containers / metadata (push) Has been cancelled
Build and push containers / build-push-containers (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled
ChangeDetection.io App Test / lint-code (push) Has been cancelled
ChangeDetection.io App Test / lint-translations (push) Has been cancelled
ChangeDetection.io App Test / lint-template-i18n (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled
Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled
ChangeDetection.io App Test / test-application-3-14 (push) Has been cancelled

This commit is contained in:
dgtlmoon
2026-05-20 13:57:17 +02:00
committed by GitHub
parent d04862d2fa
commit 43bb196aa4
3 changed files with 142 additions and 6 deletions
@@ -35,6 +35,50 @@ def _task(watch, update_handler):
return text_after_filter
def _compute_ignore_line_numbers_for_preview(text_pre_extract, ignore_patterns, extract_patterns):
"""1-indexed output line numbers in the post-extract display that correspond
to input lines matching ignore_text patterns.
Needed because extract_text (#4138) transforms line content — e.g. "0.54.10"
becomes ".54.10" — so a substring match for "0.54.10" against the post-extract
text fails and the preview UI can no longer mark the line as ignored. We find
the ignored line numbers in the pre-extract text and replay extract_by_regex
line-by-line to map them forward.
"""
from changedetectionio import html_tools
from changedetectionio.processors.text_json_diff.processor import ContentTransformer
if not text_pre_extract or not ignore_patterns:
return []
ignored_input_lines = set(
html_tools.strip_ignore_text(
content=text_pre_extract,
wordlist=ignore_patterns,
mode='line numbers'
)
)
if not ignored_input_lines:
return []
if not extract_patterns:
return sorted(ignored_input_lines)
# Replay extract_by_regex per-line. Each emitted match ends with exactly one
# '\n', so counting newlines tells us how many output lines this input produced.
output_line_counter = 0
result = []
for input_idx, line in enumerate(text_pre_extract.splitlines()):
is_ignored = (input_idx + 1) in ignored_input_lines
matches_in_line = ContentTransformer.extract_by_regex(line, extract_patterns).count('\n')
for _ in range(matches_in_line):
output_line_counter += 1
if is_ignored:
result.append(output_line_counter)
return result
def prepare_filter_prevew(datastore, watch_uuid, form_data):
'''Used by @app.route("/edit/<uuid_str:uuid>/preview-rendered", methods=['POST'])'''
from changedetectionio import forms, html_tools
@@ -50,6 +94,7 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
text_after_filter = ''
text_before_filter = ''
text_pre_extract = ''
trigger_line_numbers = []
ignore_line_numbers = []
blocked_line_numbers = []
@@ -89,15 +134,22 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
update_handler.fetcher.content = str(decompressed_data) # str() because playwright/puppeteer/requests return string
update_handler.fetcher.headers['content-type'] = tmp_watch.get('content-type')
# Process our watch with filters and the HTML from disk, and also a blank watch with no filters but also with the same HTML from disk
# Process our watch with filters and the HTML from disk, and also a blank watch with no filters but also with the same HTML from disk.
# The third task runs with extract_text cleared so we can compute ignore_line_numbers
# against the pre-extract text (extract_text transforms lines so post-extract substring
# matching for ignore patterns would otherwise fail — see #4138 follow-up).
# Do this as parallel threads (not processes) to avoid pickle issues with Lock objects
tmp_watch_no_extract = deepcopy(tmp_watch)
tmp_watch_no_extract['extract_text'] = []
try:
with ThreadPoolExecutor(max_workers=2) as executor:
with ThreadPoolExecutor(max_workers=3) as executor:
future1 = executor.submit(_task, tmp_watch, update_handler)
future2 = executor.submit(_task, blank_watch_no_filters, update_handler)
future3 = executor.submit(_task, tmp_watch_no_extract, update_handler)
text_after_filter = future1.result()
text_before_filter = future2.result()
text_pre_extract = future3.result()
except Exception as e:
x=1
@@ -111,10 +163,11 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
try:
text_to_ignore = tmp_watch.get('ignore_text', []) + datastore.data['settings']['application'].get('global_ignore_text', [])
ignore_line_numbers = html_tools.strip_ignore_text(content=text_after_filter,
wordlist=text_to_ignore,
mode='line numbers'
)
ignore_line_numbers = _compute_ignore_line_numbers_for_preview(
text_pre_extract=text_pre_extract,
ignore_patterns=text_to_ignore,
extract_patterns=tmp_watch.get('extract_text', [])
)
except Exception as e:
text_before_filter = f"Error: {str(e)}"
@@ -9,6 +9,10 @@ function request_textpreview_update() {
$('textarea:visible, input:visible').each(function () {
const $element = $(this); // Cache the jQuery object for the current element
const name = $element.attr('name'); // Get the name attribute of the element
// Radios share a name across multiple inputs; .val() returns the value
// attribute regardless of checked state, so iterating would let the last
// unchecked radio overwrite the user's actual selection. Skip unchecked.
if ($element.is(':radio') && !$element.is(':checked')) return;
data[name] = $element.is(':checkbox') ? ($element.is(':checked') ? $element.val() : false) : $element.val();
});
@@ -77,3 +77,82 @@ def test_content_filter_live_preview(client, live_server, measure_memory_usage,
assert reply.get('trigger_line_numbers') == [1] # Triggers "Awesome" in line 1
delete_all_watches(client)
def _setup_version_list_preview(datastore_path, client):
"""Shared HTML fixture for #4138 preview regressions (version tag list)."""
import time
data = """<html><body>
0.55.5<br>
0.55.4<br>
0.55.3<br>
0.54.10<br>
0.54.9<br>
</body></html>"""
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
f.write(data)
test_url = url_for('test_endpoint', _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
time.sleep(0.5)
wait_for_all_checks(client)
return test_url, uuid
def test_preview_ignore_highlight_with_extract_text(client, live_server, measure_memory_usage, datastore_path):
"""Regression for #4138 follow-up: when extract_text rewrites a line (e.g. "0.54.10"".54.10"),
the preview must still highlight that row as 'ignored' even though substring matching against the
post-extract text fails."""
import json
test_url, uuid = _setup_version_list_preview(datastore_path, client)
res = client.post(
url_for("ui.ui_edit.watch_get_preview_rendered", uuid=uuid),
data={
"include_filters": "",
"fetch_backend": 'html_requests',
"ignore_text": "0.54.10",
"extract_text": r"/(.\d+\.\d+)/",
"url": test_url,
},
)
reply = json.loads(res.data.decode('utf-8'))
# The regex strips the leading "0", so the post-extract line for the ignored input is ".54.10".
# The preview should still mark its position (line 4) as ignored.
assert reply.get('ignore_line_numbers') == [4], \
f"Expected line 4 to be highlighted as ignored, got {reply.get('ignore_line_numbers')!r}"
delete_all_watches(client)
def test_preview_strip_ignored_lines_with_extract_text(client, live_server, measure_memory_usage, datastore_path):
"""Regression for #4138 follow-up: with strip_ignored_lines enabled, an ignored line must be
removed from the preview output even when extract_text would otherwise rewrite it (0.54.10 → .54.10)."""
import json
test_url, uuid = _setup_version_list_preview(datastore_path, client)
res = client.post(
url_for("ui.ui_edit.watch_get_preview_rendered", uuid=uuid),
data={
"include_filters": "",
"fetch_backend": 'html_requests',
"ignore_text": "0.54.10",
"extract_text": r"/(.\d+\.\d+)/",
"strip_ignored_lines": "true",
"url": test_url,
},
)
reply = json.loads(res.data.decode('utf-8'))
after_filter = reply.get('after_filter', '')
assert '.54.10' not in after_filter, \
f"Stripped ignored line should not appear in preview output, got:\n{after_filter!r}"
assert '0.54.10' not in after_filter
assert reply.get('ignore_line_numbers') == [], \
f"Stripped lines need no highlight, got {reply.get('ignore_line_numbers')!r}"
delete_all_watches(client)