mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-06-07 09:21:23 +00:00
Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 52b51374da | |||
| 6922b160ee | |||
| 028e62f91c |
@@ -3,16 +3,6 @@ from functools import wraps
|
||||
from flask import current_app, redirect, request
|
||||
from loguru import logger
|
||||
|
||||
# Endpoints exempt from auth when `shared_diff_access` is enabled.
|
||||
# Must be exact endpoint names — substring matching (GHSA-vwgh-2hvh-4xm5)
|
||||
# let the state-changing `/diff/<uuid>/extract` endpoints slip through
|
||||
# because their names share the `diff_history_page` prefix.
|
||||
SHARED_DIFF_READ_ONLY_ENDPOINTS = frozenset({
|
||||
'ui.ui_diff.diff_history_page',
|
||||
'ui.ui_diff.processor_asset',
|
||||
'ui.ui_diff.download_patch',
|
||||
})
|
||||
|
||||
def login_optionally_required(func):
|
||||
"""
|
||||
If password authentication is enabled, verify the user is logged in.
|
||||
@@ -30,7 +20,7 @@ def login_optionally_required(func):
|
||||
has_password_enabled = datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False)
|
||||
|
||||
# Permitted
|
||||
if request.endpoint in SHARED_DIFF_READ_ONLY_ENDPOINTS and datastore.data['settings']['application'].get('shared_diff_access'):
|
||||
if request.endpoint and 'diff_history_page' in request.endpoint and datastore.data['settings']['application'].get('shared_diff_access'):
|
||||
return func(*args, **kwargs)
|
||||
elif request.method in flask_login.config.EXEMPT_METHODS:
|
||||
return func(*args, **kwargs)
|
||||
|
||||
@@ -414,7 +414,7 @@ def _jinja2_filter_sanitize_tag_class(tag_title):
|
||||
return sanitized if sanitized else 'tag'
|
||||
|
||||
# Import login_optionally_required from auth_decorator
|
||||
from changedetectionio.auth_decorator import SHARED_DIFF_READ_ONLY_ENDPOINTS, login_optionally_required
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
|
||||
# When nobody is logged in Flask-Login's current_user is set to an AnonymousUser object.
|
||||
class User(flask_login.UserMixin):
|
||||
@@ -541,7 +541,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
# Permitted
|
||||
elif request.endpoint and 'login' in request.endpoint:
|
||||
return None
|
||||
elif request.endpoint in SHARED_DIFF_READ_ONLY_ENDPOINTS and datastore.data['settings']['application'].get('shared_diff_access'):
|
||||
elif request.endpoint and 'diff_history_page' in request.endpoint and datastore.data['settings']['application'].get('shared_diff_access'):
|
||||
return None
|
||||
elif request.method in flask_login.config.EXEMPT_METHODS:
|
||||
return None
|
||||
|
||||
@@ -382,17 +382,14 @@ def process_notification(n_object: NotificationContextData, datastore):
|
||||
n_object['llm_summary'] = _llm_change_summary or (n_object.get('_llm_result') or {}).get('summary', '')
|
||||
n_object['llm_intent'] = n_object.get('_llm_intent', '')
|
||||
|
||||
# Escape diff/snapshot variables before Jinja renders them into an HTML notification.
|
||||
# GHSA-q8xq-qg4x-wphg: inscriptis decodes HTML entities when converting text/html
|
||||
# pages to snapshot text, so a page that visibly displays "<a href...>" yields
|
||||
# literal "<a href...>" in the snapshot — which would otherwise render as live
|
||||
# markup in HTML emails / Telegram (parse_mode=html) / Discord embeds, letting a
|
||||
# watched page inject phishing links into the operator's notification channel.
|
||||
# Also covers #3529 — raw '<' chars from text/plain pages breaking HTML email layout.
|
||||
# The operator's own template HTML (e.g. <a href="{{watch_url}}">) is outside the
|
||||
# variable values so it stays untouched. Diff placemarkers contain no HTML chars,
|
||||
# so they survive escape and are still replaced with <span> tags later.
|
||||
if 'html' in requested_output_format:
|
||||
# Re #3529: diff content from text/plain pages may contain raw '<' chars that break HTML emails.
|
||||
# Escape only the diff variables before Jinja2 renders them into the template, so the user's
|
||||
# own HTML in the notification body (e.g. <a href="{{watch_url}}">) is never touched.
|
||||
# Diff placemarkers (e.g. @removed_PLACEMARKER_OPEN) contain no HTML chars so they survive
|
||||
# html_escape and are still replaced with <span> tags by apply_service_tweaks later.
|
||||
watch_mime_type = n_object.get('watch_mime_type')
|
||||
if (watch_mime_type and 'text/' in watch_mime_type.lower() and 'html' not in watch_mime_type.lower()
|
||||
and 'html' in requested_output_format):
|
||||
from markupsafe import escape as html_escape
|
||||
_page_content_keys = {'raw_diff', 'current_snapshot', 'prev_snapshot', 'triggered_text'}
|
||||
for key in [k for k in notification_parameters if k.startswith('diff') or k in _page_content_keys]:
|
||||
|
||||
@@ -108,9 +108,7 @@ def test_check_notification_email_formats_default_HTML(client, live_server, meas
|
||||
html_content = html_part.get_content()
|
||||
assert 'some text<br>' in html_content # We converted \n from the notification body
|
||||
assert 'fallback-body<br>' in html_content # kept the original <br>
|
||||
# GHSA-q8xq-qg4x-wphg: apostrophes in diff content are escaped (') for HTML notifications.
|
||||
# Renders as ' in the recipient's email client; only the byte-source differs.
|
||||
assert '(added) So let's see what happens.<br>' in html_content # the html part
|
||||
assert '(added) So let\'s see what happens.<br>' in html_content # the html part
|
||||
delete_all_watches(client)
|
||||
|
||||
|
||||
@@ -454,8 +452,7 @@ def test_check_notification_email_formats_default_Text_override_HTML(client, liv
|
||||
html_part = parts[1]
|
||||
assert html_part.get_content_type() == 'text/html'
|
||||
html_content = html_part.get_content()
|
||||
# GHSA-q8xq-qg4x-wphg: apostrophes in diff content are escaped (') for HTML notifications.
|
||||
assert '(removed) So let's see what happens.' in html_content # the html part
|
||||
assert '(removed) So let\'s see what happens.' in html_content # the html part
|
||||
assert '<!DOCTYPE html' not in html_content
|
||||
assert '<!DOCTYPE html' in html_content # Our original template is working correctly
|
||||
|
||||
@@ -795,6 +792,5 @@ def test_check_html_notification_with_apprise_format_is_html(client, live_server
|
||||
html_content = html_part.get_content()
|
||||
assert 'some text<br>' in html_content # We converted \n from the notification body
|
||||
assert 'fallback-body<br>' in html_content # kept the original <br>
|
||||
# GHSA-q8xq-qg4x-wphg: apostrophes in diff content are escaped (') for HTML notifications.
|
||||
assert '(added) So let's see what happens.<br>' in html_content # the html part
|
||||
assert '(added) So let\'s see what happens.<br>' in html_content # the html part
|
||||
delete_all_watches(client)
|
||||
@@ -48,32 +48,6 @@ def test_check_access_control(app, client, live_server, measure_memory_usage, da
|
||||
res = c.get(url_for("ui.ui_diff.diff_history_page", uuid="first"))
|
||||
assert b'Random content' in res.data
|
||||
|
||||
# GHSA-vwgh-2hvh-4xm5: shared_diff_access only covers the read-only
|
||||
# diff page — the extract endpoints (which run an attacker-supplied
|
||||
# regex against history and write a CSV to disk) must still require
|
||||
# auth even when the share flag is enabled.
|
||||
res = c.get(url_for("ui.ui_diff.diff_history_page_extract_GET", uuid="first"))
|
||||
assert res.status_code == 302, "Extract form GET must redirect to login for anonymous users"
|
||||
assert b'/login' in res.data or b'login' in res.headers.get('Location', '').encode()
|
||||
|
||||
res = c.post(
|
||||
url_for("ui.ui_diff.diff_history_page_extract_POST", uuid="first"),
|
||||
data={"extract_regex": ".*", "extract_submit_button": "Extract as CSV"},
|
||||
)
|
||||
assert res.status_code == 302, "Extract POST must redirect to login for anonymous users"
|
||||
assert b'login' in res.headers.get('Location', '').encode()
|
||||
|
||||
# But sub-resources the diff page legitimately loads should still pass the gate.
|
||||
# download_patch is linked from diff.html — anonymous viewers must be able to fetch it.
|
||||
# (We don't care about the body here, just that auth doesn't block it.)
|
||||
res = c.get(url_for("ui.ui_diff.download_patch", uuid="first"))
|
||||
assert res.status_code != 302, "download_patch must be reachable for shared diff viewers"
|
||||
|
||||
# processor_asset (used for screenshots embedded in image_ssim_diff watches) must also be reachable.
|
||||
# For a text watch the processor has no such asset so 404 is fine — what matters is no auth redirect.
|
||||
res = c.get(url_for("ui.ui_diff.processor_asset", uuid="first", asset_name="before"))
|
||||
assert res.status_code != 302, "processor_asset must be reachable for shared diff viewers"
|
||||
|
||||
# access to assets should work (check_authentication)
|
||||
res = c.get(url_for('static_content', group='js', filename='jquery-3.6.0.min.js'))
|
||||
assert res.status_code == 200
|
||||
|
||||
@@ -638,11 +638,9 @@ def test_html_color_notifications(client, live_server, measure_memory_usage, dat
|
||||
|
||||
def _test_custom_html_in_notification_body_not_escaped(client, datastore_path, content_type=None):
|
||||
"""
|
||||
#4121 - The operator's own HTML in the notification body template (e.g.
|
||||
<a href="{{watch_url}}">) must survive unescaped regardless of the watched page's
|
||||
Content-Type. The escape pass in handler.py only touches the variable *values*
|
||||
(diff/snapshot content from the page — see GHSA-q8xq-qg4x-wphg) — it leaves the
|
||||
surrounding template HTML alone.
|
||||
#4121 - Custom HTML in the notification body (e.g. <a href="{{watch_url}}">) must NOT be
|
||||
HTML-escaped regardless of the watched page's content-type. Only raw diff content from
|
||||
text/plain pages needs escaping (to prevent raw '<' chars breaking HTML email rendering).
|
||||
"""
|
||||
set_original_response(datastore_path=datastore_path)
|
||||
|
||||
@@ -695,174 +693,10 @@ def _test_custom_html_in_notification_body_not_escaped(client, datastore_path, c
|
||||
|
||||
|
||||
def test_plaintext_watch_custom_html_in_notification_body_not_escaped(client, live_server, measure_memory_usage, datastore_path):
|
||||
# Diff/snapshot values are escaped for HTML notifications (covered by
|
||||
# test_html_watch_diff_content_escaped_in_html_notification). What this test
|
||||
# locks in is that the *surrounding* template HTML is left alone in every case.
|
||||
# text/plain: diff content may contain raw '<' chars — those must be escaped, but NOT the user's template HTML
|
||||
_test_custom_html_in_notification_body_not_escaped(client, datastore_path, content_type="text/plain")
|
||||
# text/html: HTML processor strips tags before diffing, no escaping needed, user's template HTML must be preserved
|
||||
_test_custom_html_in_notification_body_not_escaped(client, datastore_path, content_type="text/html")
|
||||
# no MIME type (None): same as HTML case, user's template HTML must be preserved
|
||||
_test_custom_html_in_notification_body_not_escaped(client, datastore_path, content_type=None)
|
||||
|
||||
|
||||
def test_html_watch_diff_content_escaped_in_html_notification(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
GHSA-q8xq-qg4x-wphg — diff/snapshot content from the watched page must be
|
||||
HTML-escaped before it is rendered into an HTML-format notification, regardless
|
||||
of the watched page's Content-Type.
|
||||
|
||||
Inscriptis (used to convert text/html pages to snapshot text) decodes HTML
|
||||
entities — so a page that visibly displays "<a href=...>" produces snapshot
|
||||
text containing literal "<a href=...>". The previous gate at handler.py:391
|
||||
only escaped when watch_mime_type matched 'text/' and not 'html', which let
|
||||
that decoded markup through to HTML emails / Telegram (parse_mode=html) /
|
||||
Discord embeds, where it renders as a real clickable link — i.e. an attacker
|
||||
who controls a watched page can inject phishing links into the operator's
|
||||
trusted notification channel.
|
||||
"""
|
||||
from .util import write_test_file_and_sync
|
||||
|
||||
if os.path.isfile(os.path.join(datastore_path, "notification.txt")):
|
||||
os.unlink(os.path.join(datastore_path, "notification.txt"))
|
||||
|
||||
# Baseline: an innocuous text/html page.
|
||||
baseline_html = "<html><body><p>nothing to see here</p></body></html>"
|
||||
write_test_file_and_sync(os.path.join(datastore_path, "endpoint-content.txt"), baseline_html)
|
||||
|
||||
test_notification_url = url_for('test_notification_endpoint', _external=True).replace('http://', 'post://')
|
||||
# Pass content_type=text/html so the watch records 'text/html' as its content-type
|
||||
# — this is the branch the previous gate skipped escaping for.
|
||||
test_url = url_for('test_endpoint', _external=True, content_type='text/html')
|
||||
|
||||
# HTML-format notification body that embeds the snapshot directly. Operators do this
|
||||
# when they want the full changed content in the alert (e.g. an email digest).
|
||||
res = client.post(
|
||||
url_for("settings.settings_page"),
|
||||
data={
|
||||
"application-fetch_backend": "html_requests",
|
||||
"application-minutes_between_check": 180,
|
||||
"application-notification_body": 'Watch had changes:\n{{current_snapshot}}',
|
||||
"application-notification_format": "html",
|
||||
"application-notification_urls": test_notification_url,
|
||||
"application-notification_title": "Change detected",
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b'Settings updated' in res.data
|
||||
|
||||
res = client.post(
|
||||
url_for("ui.ui_views.form_quick_watch_add"),
|
||||
data={"url": test_url, "tags": ''},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Watch added" in res.data
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Now flip the page to something whose *visible* text contains entity-encoded
|
||||
# angle brackets — exactly the pattern a forum / pastebin / code-sample site uses
|
||||
# to display literal HTML on the page. Inscriptis will decode </> back to
|
||||
# literal < / > in the stored snapshot.
|
||||
attacker_html = (
|
||||
'<html><body><pre>'
|
||||
'<a href="https://attacker.example/payment">ACTION REQUIRED</a>'
|
||||
'<img src="https://attacker.example/track" width="1" height="1">'
|
||||
'</pre></body></html>'
|
||||
)
|
||||
write_test_file_and_sync(os.path.join(datastore_path, "endpoint-content.txt"), attacker_html)
|
||||
|
||||
res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
assert b'Queued 1 watch for rechecking.' in res.data
|
||||
|
||||
wait_for_all_checks(client)
|
||||
wait_for_notification_endpoint_output(datastore_path=datastore_path)
|
||||
|
||||
with open(os.path.join(datastore_path, "notification.txt"), 'r') as f:
|
||||
body = f.read()
|
||||
|
||||
# Sanity: the snapshot really did contain the decoded markup (otherwise the test
|
||||
# would pass for the wrong reason). The escaped form must appear somewhere.
|
||||
assert '<a href=' in body or '&lt;a href=' in body, \
|
||||
f"Expected escaped attacker markup in notification body, got: {body!r}"
|
||||
|
||||
# The bug: a live <a href="https://attacker..."> ends up in the HTML notification.
|
||||
assert '<a href="https://attacker.example/payment"' not in body, \
|
||||
f"Diff content from text/html page was NOT escaped — phishing link reached HTML notification: {body!r}"
|
||||
assert '<img src="https://attacker.example/track"' not in body, \
|
||||
f"Diff content from text/html page was NOT escaped — tracking pixel reached HTML notification: {body!r}"
|
||||
|
||||
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
|
||||
|
||||
def test_source_url_diff_content_escaped_in_html_notification(client, live_server, measure_memory_usage, datastore_path):
|
||||
"""
|
||||
GHSA-q8xq-qg4x-wphg — companion to the inscriptis test. `source:`-prefixed
|
||||
URLs short-circuit the HTML→text step (processor.py:509-511) and store the
|
||||
raw HTML body verbatim as the snapshot. That gives an attacker who controls
|
||||
a watched page a *direct* injection path — no entity-encoding tricks needed,
|
||||
any live `<a>` / `<img>` / `<script>` on the page lands straight into
|
||||
current_snapshot / raw_diff. The escape pass must catch this too.
|
||||
"""
|
||||
from .util import write_test_file_and_sync
|
||||
|
||||
if os.path.isfile(os.path.join(datastore_path, "notification.txt")):
|
||||
os.unlink(os.path.join(datastore_path, "notification.txt"))
|
||||
|
||||
# Baseline: innocuous raw HTML.
|
||||
baseline_html = "<html><body><p>nothing to see here</p></body></html>"
|
||||
write_test_file_and_sync(os.path.join(datastore_path, "endpoint-content.txt"), baseline_html)
|
||||
|
||||
test_notification_url = url_for('test_notification_endpoint', _external=True).replace('http://', 'post://')
|
||||
# `source:` prefix → raw HTML body is stored as-is in the snapshot (no inscriptis).
|
||||
test_url = 'source:' + url_for('test_endpoint', _external=True, content_type='text/html')
|
||||
|
||||
res = client.post(
|
||||
url_for("settings.settings_page"),
|
||||
data={
|
||||
"application-fetch_backend": "html_requests",
|
||||
"application-minutes_between_check": 180,
|
||||
"application-notification_body": 'Watch had changes:\n{{current_snapshot}}',
|
||||
"application-notification_format": "html",
|
||||
"application-notification_urls": test_notification_url,
|
||||
"application-notification_title": "Change detected",
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b'Settings updated' in res.data
|
||||
|
||||
res = client.post(
|
||||
url_for("ui.ui_views.form_quick_watch_add"),
|
||||
data={"url": test_url, "tags": ''},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Watch added" in res.data
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Modified page contains LIVE HTML directly — no entity encoding. With source:
|
||||
# this lands in the snapshot verbatim.
|
||||
attacker_html = (
|
||||
'<html><body>'
|
||||
'<a href="https://attacker.example/payment">ACTION REQUIRED</a>'
|
||||
'<img src="https://attacker.example/track" width="1" height="1">'
|
||||
'</body></html>'
|
||||
)
|
||||
write_test_file_and_sync(os.path.join(datastore_path, "endpoint-content.txt"), attacker_html)
|
||||
|
||||
res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
assert b'Queued 1 watch for rechecking.' in res.data
|
||||
|
||||
wait_for_all_checks(client)
|
||||
wait_for_notification_endpoint_output(datastore_path=datastore_path)
|
||||
|
||||
with open(os.path.join(datastore_path, "notification.txt"), 'r') as f:
|
||||
body = f.read()
|
||||
|
||||
# Sanity: snapshot really did carry the markup through. Escaped form must show up.
|
||||
assert '<a href=' in body or '&lt;a href=' in body, \
|
||||
f"Expected escaped attacker markup in notification body, got: {body!r}"
|
||||
|
||||
assert '<a href="https://attacker.example/payment"' not in body, \
|
||||
f"source: URL raw HTML was NOT escaped — phishing link reached HTML notification: {body!r}"
|
||||
assert '<img src="https://attacker.example/track"' not in body, \
|
||||
f"source: URL raw HTML was NOT escaped — tracking pixel reached HTML notification: {body!r}"
|
||||
|
||||
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
|
||||
Reference in New Issue
Block a user