Security - Hardening XML parser against XXE

This commit is contained in:
dgtlmoon
2026-04-27 16:36:57 +10:00
parent 866b442576
commit 52b189fc7c
2 changed files with 37 additions and 2 deletions
+2 -2
View File
@@ -282,7 +282,7 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
try:
if is_xml:
# So that we can keep CDATA for cdata_in_document_to_text() to process
parser = etree.XMLParser(strip_cdata=False)
parser = etree.XMLParser(strip_cdata=False, resolve_entities=False, no_network=True)
# For XML/RSS content, use etree.fromstring to properly handle XML declarations
tree = etree.fromstring(html_content.encode('utf-8') if isinstance(html_content, str) else html_content, parser=parser)
else:
@@ -346,7 +346,7 @@ def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=Fals
try:
if is_xml:
# So that we can keep CDATA for cdata_in_document_to_text() to process
parser = etree.XMLParser(strip_cdata=False)
parser = etree.XMLParser(strip_cdata=False, resolve_entities=False, no_network=True)
# For XML/RSS content, use etree.fromstring to properly handle XML declarations
tree = etree.fromstring(html_content.encode('utf-8') if isinstance(html_content, str) else html_content, parser=parser)
else:
@@ -0,0 +1,35 @@
#!/usr/bin/env python3
# run from dir above changedetectionio/ dir
# python3 -m pytest changedetectionio/tests/unit/test_xml_security.py
import pytest
from changedetectionio import html_tools
def _xxe_payload(file_path: str) -> str:
return f"""<?xml version="1.0"?>
<!DOCTYPE root [
<!ENTITY xxe SYSTEM "file://{file_path}">
]>
<root><item>&xxe;</item></root>"""
def test_xxe_not_expanded_xpath_filter(tmp_path):
"""xpath_filter must not expand external entities (CVE-2026-41895)."""
sentinel_file = tmp_path / "sentinel.txt"
sentinel = "xxe_sentinel_should_never_appear_in_output"
sentinel_file.write_text(sentinel)
result = html_tools.xpath_filter("//item", _xxe_payload(sentinel_file), is_xml=True)
assert sentinel not in result
def test_xxe_not_expanded_xpath1_filter(tmp_path):
"""xpath1_filter must not expand external entities (CVE-2026-41895)."""
sentinel_file = tmp_path / "sentinel.txt"
sentinel = "xxe_sentinel_should_never_appear_in_output"
sentinel_file.write_text(sentinel)
result = html_tools.xpath1_filter("//item", _xxe_payload(sentinel_file), is_xml=True)
assert sentinel not in result