mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-06-07 09:21:23 +00:00
Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| cba745e205 | |||
| d04862d2fa | |||
| 9d9a58e763 | |||
| 649c153bf4 | |||
| be3ba3bca3 |
@@ -2,7 +2,7 @@
|
||||
|
||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||
# Semver means never use .01, or 00. Should be .1.
|
||||
__version__ = '0.55.4'
|
||||
__version__ = '0.55.5'
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from json.decoder import JSONDecodeError
|
||||
|
||||
@@ -364,6 +364,10 @@ def process_notification(n_object: NotificationContextData, datastore):
|
||||
# Should always be false for 'text' mode or its too hard to read
|
||||
# But otherwise, this could be some setting
|
||||
word_diff=False if requested_output_format_original == 'text' else True,
|
||||
# HTML-format notifications must escape diff content (GHSA-q8xq-qg4x-wphg).
|
||||
# FormattableDiff/Extract escape internally so {{ diff(...) }} stays callable —
|
||||
# the post-Jinja escape loop below would otherwise convert them to plain str.
|
||||
escape_output='html' in requested_output_format,
|
||||
)
|
||||
)
|
||||
|
||||
@@ -394,10 +398,19 @@ def process_notification(n_object: NotificationContextData, datastore):
|
||||
# so they survive escape and are still replaced with <span> tags later.
|
||||
if 'html' in requested_output_format:
|
||||
from markupsafe import escape as html_escape
|
||||
from changedetectionio.notification_service import FormattableDiff, FormattableExtract
|
||||
_page_content_keys = {'raw_diff', 'current_snapshot', 'prev_snapshot', 'triggered_text'}
|
||||
for key in [k for k in notification_parameters if k.startswith('diff') or k in _page_content_keys]:
|
||||
if notification_parameters.get(key):
|
||||
notification_parameters[key] = str(html_escape(str(notification_parameters[key])))
|
||||
value = notification_parameters.get(key)
|
||||
if not value:
|
||||
continue
|
||||
# FormattableDiff / FormattableExtract are callable str subclasses — {{ diff(lines=5) }}
|
||||
# etc. relies on __call__. Wrapping them with str(html_escape(...)) here would lose
|
||||
# __call__ and break those tokens. They escape internally via escape_output=True
|
||||
# (set by add_rendered_diff_to_notification_vars above) for both __str__ and __call__.
|
||||
if isinstance(value, (FormattableDiff, FormattableExtract)):
|
||||
continue
|
||||
notification_parameters[key] = str(html_escape(str(value)))
|
||||
|
||||
with (apprise.LogCapture(level=apprise.logging.DEBUG) as logs):
|
||||
for url in n_object['notification_urls']:
|
||||
|
||||
@@ -99,7 +99,7 @@ class FormattableExtract(str):
|
||||
Multiple changed fragments are joined with newlines.
|
||||
Being a str subclass means it is natively JSON serializable.
|
||||
"""
|
||||
def __new__(cls, prev_snapshot, current_snapshot, extract_fn):
|
||||
def __new__(cls, prev_snapshot, current_snapshot, extract_fn, escape_output=False):
|
||||
if prev_snapshot or current_snapshot:
|
||||
from changedetectionio import diff as diff_module
|
||||
# word_diff=True is required — placemarker extraction regexes only exist in word-diff output
|
||||
@@ -107,6 +107,12 @@ class FormattableExtract(str):
|
||||
extracted = extract_fn(raw)
|
||||
else:
|
||||
extracted = ''
|
||||
if escape_output and extracted:
|
||||
# Placemarkers (@removed_PLACEMARKER_OPEN etc) contain no HTML chars,
|
||||
# so html_escape leaves them intact — they still get swapped to <span>
|
||||
# tags later by apply_service_tweaks. See GHSA-q8xq-qg4x-wphg.
|
||||
from markupsafe import escape as html_escape
|
||||
extracted = str(html_escape(extracted))
|
||||
instance = super().__new__(cls, extracted)
|
||||
return instance
|
||||
|
||||
@@ -128,16 +134,23 @@ class FormattableDiff(str):
|
||||
|
||||
Being a str subclass means it is natively JSON serializable.
|
||||
"""
|
||||
def __new__(cls, prev_snapshot, current_snapshot, **base_kwargs):
|
||||
def __new__(cls, prev_snapshot, current_snapshot, escape_output=False, **base_kwargs):
|
||||
if prev_snapshot or current_snapshot:
|
||||
from changedetectionio import diff as diff_module
|
||||
rendered = diff_module.render_diff(prev_snapshot, current_snapshot, **base_kwargs)
|
||||
else:
|
||||
rendered = ''
|
||||
if escape_output and rendered:
|
||||
# Placemarkers (@removed_PLACEMARKER_OPEN etc) contain no HTML chars,
|
||||
# so html_escape leaves them intact — they still get swapped to <span>
|
||||
# tags later by apply_service_tweaks. See GHSA-q8xq-qg4x-wphg.
|
||||
from markupsafe import escape as html_escape
|
||||
rendered = str(html_escape(rendered))
|
||||
instance = super().__new__(cls, rendered)
|
||||
instance._prev = prev_snapshot
|
||||
instance._current = current_snapshot
|
||||
instance._base_kwargs = base_kwargs
|
||||
instance._escape_output = escape_output
|
||||
return instance
|
||||
|
||||
def __call__(self, lines=None, added_only=False, removed_only=False, context=0,
|
||||
@@ -163,6 +176,10 @@ class FormattableDiff(str):
|
||||
if lines is not None:
|
||||
result = '\n'.join(result.splitlines()[:int(lines)])
|
||||
|
||||
if self._escape_output and result:
|
||||
from markupsafe import escape as html_escape
|
||||
result = str(html_escape(result))
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@@ -236,7 +253,7 @@ class NotificationContextData(dict):
|
||||
|
||||
super().__setitem__(key, value)
|
||||
|
||||
def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snapshot:str, current_snapshot:str, word_diff:bool):
|
||||
def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snapshot:str, current_snapshot:str, word_diff:bool, escape_output:bool=False):
|
||||
"""
|
||||
Efficiently renders only the diff placeholders that are actually used in the notification text.
|
||||
|
||||
@@ -249,6 +266,9 @@ def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snap
|
||||
prev_snapshot: Previous version of content for diff comparison
|
||||
current_snapshot: Current version of content for diff comparison
|
||||
word_diff: Whether to use word-level (True) or line-level (False) diffing
|
||||
escape_output: If True, the rendered diff output is HTML-escaped. Used for HTML-format
|
||||
notifications so attacker-controlled page content can't inject live markup.
|
||||
Both the cached str representation and the result of {{ diff(...) }} calls are escaped.
|
||||
|
||||
Returns:
|
||||
dict: Only the diff placeholders that were found in notification_scan_text, with rendered content
|
||||
@@ -287,10 +307,10 @@ def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snap
|
||||
if not re.search(pattern, notification_scan_text, re.IGNORECASE):
|
||||
continue
|
||||
if key in diff_specs:
|
||||
ret[key] = FormattableDiff(prev_snapshot, current_snapshot, **diff_specs[key])
|
||||
ret[key] = FormattableDiff(prev_snapshot, current_snapshot, escape_output=escape_output, **diff_specs[key])
|
||||
rendered_count += 1
|
||||
elif key in extract_specs:
|
||||
ret[key] = FormattableExtract(prev_snapshot, current_snapshot, extract_fn=extract_specs[key])
|
||||
ret[key] = FormattableExtract(prev_snapshot, current_snapshot, extract_fn=extract_specs[key], escape_output=escape_output)
|
||||
rendered_count += 1
|
||||
|
||||
if rendered_count:
|
||||
|
||||
@@ -35,6 +35,50 @@ def _task(watch, update_handler):
|
||||
return text_after_filter
|
||||
|
||||
|
||||
def _compute_ignore_line_numbers_for_preview(text_pre_extract, ignore_patterns, extract_patterns):
|
||||
"""1-indexed output line numbers in the post-extract display that correspond
|
||||
to input lines matching ignore_text patterns.
|
||||
|
||||
Needed because extract_text (#4138) transforms line content — e.g. "0.54.10"
|
||||
becomes ".54.10" — so a substring match for "0.54.10" against the post-extract
|
||||
text fails and the preview UI can no longer mark the line as ignored. We find
|
||||
the ignored line numbers in the pre-extract text and replay extract_by_regex
|
||||
line-by-line to map them forward.
|
||||
"""
|
||||
from changedetectionio import html_tools
|
||||
from changedetectionio.processors.text_json_diff.processor import ContentTransformer
|
||||
|
||||
if not text_pre_extract or not ignore_patterns:
|
||||
return []
|
||||
|
||||
ignored_input_lines = set(
|
||||
html_tools.strip_ignore_text(
|
||||
content=text_pre_extract,
|
||||
wordlist=ignore_patterns,
|
||||
mode='line numbers'
|
||||
)
|
||||
)
|
||||
if not ignored_input_lines:
|
||||
return []
|
||||
|
||||
if not extract_patterns:
|
||||
return sorted(ignored_input_lines)
|
||||
|
||||
# Replay extract_by_regex per-line. Each emitted match ends with exactly one
|
||||
# '\n', so counting newlines tells us how many output lines this input produced.
|
||||
output_line_counter = 0
|
||||
result = []
|
||||
for input_idx, line in enumerate(text_pre_extract.splitlines()):
|
||||
is_ignored = (input_idx + 1) in ignored_input_lines
|
||||
matches_in_line = ContentTransformer.extract_by_regex(line, extract_patterns).count('\n')
|
||||
for _ in range(matches_in_line):
|
||||
output_line_counter += 1
|
||||
if is_ignored:
|
||||
result.append(output_line_counter)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def prepare_filter_prevew(datastore, watch_uuid, form_data):
|
||||
'''Used by @app.route("/edit/<uuid_str:uuid>/preview-rendered", methods=['POST'])'''
|
||||
from changedetectionio import forms, html_tools
|
||||
@@ -50,6 +94,7 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
|
||||
|
||||
text_after_filter = ''
|
||||
text_before_filter = ''
|
||||
text_pre_extract = ''
|
||||
trigger_line_numbers = []
|
||||
ignore_line_numbers = []
|
||||
blocked_line_numbers = []
|
||||
@@ -89,15 +134,22 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
|
||||
update_handler.fetcher.content = str(decompressed_data) # str() because playwright/puppeteer/requests return string
|
||||
update_handler.fetcher.headers['content-type'] = tmp_watch.get('content-type')
|
||||
|
||||
# Process our watch with filters and the HTML from disk, and also a blank watch with no filters but also with the same HTML from disk
|
||||
# Process our watch with filters and the HTML from disk, and also a blank watch with no filters but also with the same HTML from disk.
|
||||
# The third task runs with extract_text cleared so we can compute ignore_line_numbers
|
||||
# against the pre-extract text (extract_text transforms lines so post-extract substring
|
||||
# matching for ignore patterns would otherwise fail — see #4138 follow-up).
|
||||
# Do this as parallel threads (not processes) to avoid pickle issues with Lock objects
|
||||
tmp_watch_no_extract = deepcopy(tmp_watch)
|
||||
tmp_watch_no_extract['extract_text'] = []
|
||||
try:
|
||||
with ThreadPoolExecutor(max_workers=2) as executor:
|
||||
with ThreadPoolExecutor(max_workers=3) as executor:
|
||||
future1 = executor.submit(_task, tmp_watch, update_handler)
|
||||
future2 = executor.submit(_task, blank_watch_no_filters, update_handler)
|
||||
future3 = executor.submit(_task, tmp_watch_no_extract, update_handler)
|
||||
|
||||
text_after_filter = future1.result()
|
||||
text_before_filter = future2.result()
|
||||
text_pre_extract = future3.result()
|
||||
except Exception as e:
|
||||
x=1
|
||||
|
||||
@@ -111,10 +163,11 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
|
||||
|
||||
try:
|
||||
text_to_ignore = tmp_watch.get('ignore_text', []) + datastore.data['settings']['application'].get('global_ignore_text', [])
|
||||
ignore_line_numbers = html_tools.strip_ignore_text(content=text_after_filter,
|
||||
wordlist=text_to_ignore,
|
||||
mode='line numbers'
|
||||
)
|
||||
ignore_line_numbers = _compute_ignore_line_numbers_for_preview(
|
||||
text_pre_extract=text_pre_extract,
|
||||
ignore_patterns=text_to_ignore,
|
||||
extract_patterns=tmp_watch.get('extract_text', [])
|
||||
)
|
||||
except Exception as e:
|
||||
text_before_filter = f"Error: {str(e)}"
|
||||
|
||||
|
||||
@@ -9,6 +9,10 @@ function request_textpreview_update() {
|
||||
$('textarea:visible, input:visible').each(function () {
|
||||
const $element = $(this); // Cache the jQuery object for the current element
|
||||
const name = $element.attr('name'); // Get the name attribute of the element
|
||||
// Radios share a name across multiple inputs; .val() returns the value
|
||||
// attribute regardless of checked state, so iterating would let the last
|
||||
// unchecked radio overwrite the user's actual selection. Skip unchecked.
|
||||
if ($element.is(':radio') && !$element.is(':checked')) return;
|
||||
data[name] = $element.is(':checkbox') ? ($element.is(':checked') ? $element.val() : false) : $element.val();
|
||||
});
|
||||
|
||||
|
||||
@@ -77,3 +77,82 @@ def test_content_filter_live_preview(client, live_server, measure_memory_usage,
|
||||
assert reply.get('trigger_line_numbers') == [1] # Triggers "Awesome" in line 1
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def _setup_version_list_preview(datastore_path, client):
|
||||
"""Shared HTML fixture for #4138 preview regressions (version tag list)."""
|
||||
import time
|
||||
|
||||
data = """<html><body>
|
||||
0.55.5<br>
|
||||
0.55.4<br>
|
||||
0.55.3<br>
|
||||
0.54.10<br>
|
||||
0.54.9<br>
|
||||
</body></html>"""
|
||||
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
|
||||
f.write(data)
|
||||
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
time.sleep(0.5)
|
||||
wait_for_all_checks(client)
|
||||
return test_url, uuid
|
||||
|
||||
|
||||
def test_preview_ignore_highlight_with_extract_text(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""Regression for #4138 follow-up: when extract_text rewrites a line (e.g. "0.54.10" → ".54.10"),
|
||||
the preview must still highlight that row as 'ignored' even though substring matching against the
|
||||
post-extract text fails."""
|
||||
import json
|
||||
|
||||
test_url, uuid = _setup_version_list_preview(datastore_path, client)
|
||||
|
||||
res = client.post(
|
||||
url_for("ui.ui_edit.watch_get_preview_rendered", uuid=uuid),
|
||||
data={
|
||||
"include_filters": "",
|
||||
"fetch_backend": 'html_requests',
|
||||
"ignore_text": "0.54.10",
|
||||
"extract_text": r"/(.\d+\.\d+)/",
|
||||
"url": test_url,
|
||||
},
|
||||
)
|
||||
reply = json.loads(res.data.decode('utf-8'))
|
||||
# The regex strips the leading "0", so the post-extract line for the ignored input is ".54.10".
|
||||
# The preview should still mark its position (line 4) as ignored.
|
||||
assert reply.get('ignore_line_numbers') == [4], \
|
||||
f"Expected line 4 to be highlighted as ignored, got {reply.get('ignore_line_numbers')!r}"
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
def test_preview_strip_ignored_lines_with_extract_text(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""Regression for #4138 follow-up: with strip_ignored_lines enabled, an ignored line must be
|
||||
removed from the preview output even when extract_text would otherwise rewrite it (0.54.10 → .54.10)."""
|
||||
import json
|
||||
|
||||
test_url, uuid = _setup_version_list_preview(datastore_path, client)
|
||||
|
||||
res = client.post(
|
||||
url_for("ui.ui_edit.watch_get_preview_rendered", uuid=uuid),
|
||||
data={
|
||||
"include_filters": "",
|
||||
"fetch_backend": 'html_requests',
|
||||
"ignore_text": "0.54.10",
|
||||
"extract_text": r"/(.\d+\.\d+)/",
|
||||
"strip_ignored_lines": "true",
|
||||
"url": test_url,
|
||||
},
|
||||
)
|
||||
reply = json.loads(res.data.decode('utf-8'))
|
||||
after_filter = reply.get('after_filter', '')
|
||||
|
||||
assert '.54.10' not in after_filter, \
|
||||
f"Stripped ignored line should not appear in preview output, got:\n{after_filter!r}"
|
||||
assert '0.54.10' not in after_filter
|
||||
assert reply.get('ignore_line_numbers') == [], \
|
||||
f"Stripped lines need no highlight, got {reply.get('ignore_line_numbers')!r}"
|
||||
|
||||
delete_all_watches(client)
|
||||
|
||||
@@ -634,6 +634,12 @@ def _test_color_notifications(client, notification_body_token, datastore_path):
|
||||
def test_html_color_notifications(client, live_server, measure_memory_usage, datastore_path):
|
||||
_test_color_notifications(client, '{{diff}}',datastore_path=datastore_path)
|
||||
_test_color_notifications(client, '{{diff_full}}',datastore_path=datastore_path)
|
||||
# Regression: the html-output escape pass in handler.py used to convert
|
||||
# FormattableDiff into a plain str, stripping its __call__ and breaking any
|
||||
# {{ diff(...) }} / {{ diff_added(...) }} token on htmlcolor/html notifications
|
||||
# with 'str' object is not callable (see commit 08d30c6 + #3923).
|
||||
# word_diff=false reproduces the exact form the user-reported failure used.
|
||||
_test_color_notifications(client, '{{diff(word_diff=false)}}', datastore_path=datastore_path)
|
||||
|
||||
|
||||
def _test_custom_html_in_notification_body_not_escaped(client, datastore_path, content_type=None):
|
||||
|
||||
Binary file not shown.
@@ -2318,11 +2318,11 @@ msgstr "Último Comprobado"
|
||||
|
||||
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
|
||||
msgid "Changed"
|
||||
msgstr "Cambiadp"
|
||||
msgstr "Cambiado"
|
||||
|
||||
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
|
||||
msgid "Last Changed"
|
||||
msgstr "Último Cambiadp"
|
||||
msgstr "Último Cambiado"
|
||||
|
||||
#: changedetectionio/blueprint/watchlist/templates/watch-overview.html
|
||||
msgid "No web page change detection watches configured, please add a URL in the box above, or"
|
||||
|
||||
@@ -6,9 +6,9 @@
|
||||
#, fuzzy
|
||||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: changedetection.io 0.55.4\n"
|
||||
"Project-Id-Version: changedetection.io 0.55.5\n"
|
||||
"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
|
||||
"POT-Creation-Date: 2026-05-19 18:05+0200\n"
|
||||
"POT-Creation-Date: 2026-05-19 19:05+0200\n"
|
||||
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
|
||||
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
||||
"Language-Team: LANGUAGE <LL@li.org>\n"
|
||||
|
||||
Reference in New Issue
Block a user