diff --git a/changedetectionio/notification/handler.py b/changedetectionio/notification/handler.py index 951ed1e6..59a14710 100644 --- a/changedetectionio/notification/handler.py +++ b/changedetectionio/notification/handler.py @@ -382,14 +382,17 @@ def process_notification(n_object: NotificationContextData, datastore): n_object['llm_summary'] = _llm_change_summary or (n_object.get('_llm_result') or {}).get('summary', '') n_object['llm_intent'] = n_object.get('_llm_intent', '') - # Re #3529: diff content from text/plain pages may contain raw '<' chars that break HTML emails. - # Escape only the diff variables before Jinja2 renders them into the template, so the user's - # own HTML in the notification body (e.g. ) is never touched. - # Diff placemarkers (e.g. @removed_PLACEMARKER_OPEN) contain no HTML chars so they survive - # html_escape and are still replaced with tags by apply_service_tweaks later. - watch_mime_type = n_object.get('watch_mime_type') - if (watch_mime_type and 'text/' in watch_mime_type.lower() and 'html' not in watch_mime_type.lower() - and 'html' in requested_output_format): + # Escape diff/snapshot variables before Jinja renders them into an HTML notification. + # GHSA-q8xq-qg4x-wphg: inscriptis decodes HTML entities when converting text/html + # pages to snapshot text, so a page that visibly displays "<a href...>" yields + # literal "" in the snapshot — which would otherwise render as live + # markup in HTML emails / Telegram (parse_mode=html) / Discord embeds, letting a + # watched page inject phishing links into the operator's notification channel. + # Also covers #3529 — raw '<' chars from text/plain pages breaking HTML email layout. + # The operator's own template HTML (e.g. ) is outside the + # variable values so it stays untouched. Diff placemarkers contain no HTML chars, + # so they survive escape and are still replaced with tags later. + if 'html' in requested_output_format: from markupsafe import escape as html_escape _page_content_keys = {'raw_diff', 'current_snapshot', 'prev_snapshot', 'triggered_text'} for key in [k for k in notification_parameters if k.startswith('diff') or k in _page_content_keys]: diff --git a/changedetectionio/tests/test_notification.py b/changedetectionio/tests/test_notification.py index bcdacd57..1d666793 100644 --- a/changedetectionio/tests/test_notification.py +++ b/changedetectionio/tests/test_notification.py @@ -638,9 +638,11 @@ def test_html_color_notifications(client, live_server, measure_memory_usage, dat def _test_custom_html_in_notification_body_not_escaped(client, datastore_path, content_type=None): """ - #4121 - Custom HTML in the notification body (e.g. ) must NOT be - HTML-escaped regardless of the watched page's content-type. Only raw diff content from - text/plain pages needs escaping (to prevent raw '<' chars breaking HTML email rendering). + #4121 - The operator's own HTML in the notification body template (e.g. + ) must survive unescaped regardless of the watched page's + Content-Type. The escape pass in handler.py only touches the variable *values* + (diff/snapshot content from the page — see GHSA-q8xq-qg4x-wphg) — it leaves the + surrounding template HTML alone. """ set_original_response(datastore_path=datastore_path) @@ -693,10 +695,98 @@ def _test_custom_html_in_notification_body_not_escaped(client, datastore_path, c def test_plaintext_watch_custom_html_in_notification_body_not_escaped(client, live_server, measure_memory_usage, datastore_path): - # text/plain: diff content may contain raw '<' chars — those must be escaped, but NOT the user's template HTML + # Diff/snapshot values are escaped for HTML notifications (covered by + # test_html_watch_diff_content_escaped_in_html_notification). What this test + # locks in is that the *surrounding* template HTML is left alone in every case. _test_custom_html_in_notification_body_not_escaped(client, datastore_path, content_type="text/plain") - # text/html: HTML processor strips tags before diffing, no escaping needed, user's template HTML must be preserved _test_custom_html_in_notification_body_not_escaped(client, datastore_path, content_type="text/html") - # no MIME type (None): same as HTML case, user's template HTML must be preserved _test_custom_html_in_notification_body_not_escaped(client, datastore_path, content_type=None) + +def test_html_watch_diff_content_escaped_in_html_notification(client, live_server, measure_memory_usage, datastore_path): + """ + GHSA-q8xq-qg4x-wphg — diff/snapshot content from the watched page must be + HTML-escaped before it is rendered into an HTML-format notification, regardless + of the watched page's Content-Type. + + Inscriptis (used to convert text/html pages to snapshot text) decodes HTML + entities — so a page that visibly displays "<a href=...>" produces snapshot + text containing literal "". The previous gate at handler.py:391 + only escaped when watch_mime_type matched 'text/' and not 'html', which let + that decoded markup through to HTML emails / Telegram (parse_mode=html) / + Discord embeds, where it renders as a real clickable link — i.e. an attacker + who controls a watched page can inject phishing links into the operator's + trusted notification channel. + """ + from .util import write_test_file_and_sync + + if os.path.isfile(os.path.join(datastore_path, "notification.txt")): + os.unlink(os.path.join(datastore_path, "notification.txt")) + + # Baseline: an innocuous text/html page. + baseline_html = "

nothing to see here

" + write_test_file_and_sync(os.path.join(datastore_path, "endpoint-content.txt"), baseline_html) + + test_notification_url = url_for('test_notification_endpoint', _external=True).replace('http://', 'post://') + # Pass content_type=text/html so the watch records 'text/html' as its content-type + # — this is the branch the previous gate skipped escaping for. + test_url = url_for('test_endpoint', _external=True, content_type='text/html') + + # HTML-format notification body that embeds the snapshot directly. Operators do this + # when they want the full changed content in the alert (e.g. an email digest). + res = client.post( + url_for("settings.settings_page"), + data={ + "application-fetch_backend": "html_requests", + "application-minutes_between_check": 180, + "application-notification_body": 'Watch had changes:\n{{current_snapshot}}', + "application-notification_format": "html", + "application-notification_urls": test_notification_url, + "application-notification_title": "Change detected", + }, + follow_redirects=True + ) + assert b'Settings updated' in res.data + + res = client.post( + url_for("ui.ui_views.form_quick_watch_add"), + data={"url": test_url, "tags": ''}, + follow_redirects=True + ) + assert b"Watch added" in res.data + + wait_for_all_checks(client) + + # Now flip the page to something whose *visible* text contains entity-encoded + # angle brackets — exactly the pattern a forum / pastebin / code-sample site uses + # to display literal HTML on the page. Inscriptis will decode </> back to + # literal < / > in the stored snapshot. + attacker_html = ( + '
'
+        '<a href="https://attacker.example/payment">ACTION REQUIRED</a>'
+        '<img src="https://attacker.example/track" width="1" height="1">'
+        '
' + ) + write_test_file_and_sync(os.path.join(datastore_path, "endpoint-content.txt"), attacker_html) + + res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) + assert b'Queued 1 watch for rechecking.' in res.data + + wait_for_all_checks(client) + wait_for_notification_endpoint_output(datastore_path=datastore_path) + + with open(os.path.join(datastore_path, "notification.txt"), 'r') as f: + body = f.read() + + # Sanity: the snapshot really did contain the decoded markup (otherwise the test + # would pass for the wrong reason). The escaped form must appear somewhere. + assert '<a href=' in body or '&lt;a href=' in body, \ + f"Expected escaped attacker markup in notification body, got: {body!r}" + + # The bug: a live
ends up in the HTML notification. + assert '