mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-05-01 15:20:33 +00:00
Security - Hardening XML parser against XXE
This commit is contained in:
@@ -282,7 +282,7 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
|
||||
try:
|
||||
if is_xml:
|
||||
# So that we can keep CDATA for cdata_in_document_to_text() to process
|
||||
parser = etree.XMLParser(strip_cdata=False)
|
||||
parser = etree.XMLParser(strip_cdata=False, resolve_entities=False, no_network=True)
|
||||
# For XML/RSS content, use etree.fromstring to properly handle XML declarations
|
||||
tree = etree.fromstring(html_content.encode('utf-8') if isinstance(html_content, str) else html_content, parser=parser)
|
||||
else:
|
||||
@@ -346,7 +346,7 @@ def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=Fals
|
||||
try:
|
||||
if is_xml:
|
||||
# So that we can keep CDATA for cdata_in_document_to_text() to process
|
||||
parser = etree.XMLParser(strip_cdata=False)
|
||||
parser = etree.XMLParser(strip_cdata=False, resolve_entities=False, no_network=True)
|
||||
# For XML/RSS content, use etree.fromstring to properly handle XML declarations
|
||||
tree = etree.fromstring(html_content.encode('utf-8') if isinstance(html_content, str) else html_content, parser=parser)
|
||||
else:
|
||||
|
||||
@@ -0,0 +1,35 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# run from dir above changedetectionio/ dir
|
||||
# python3 -m pytest changedetectionio/tests/unit/test_xml_security.py
|
||||
|
||||
import pytest
|
||||
from changedetectionio import html_tools
|
||||
|
||||
|
||||
def _xxe_payload(file_path: str) -> str:
|
||||
return f"""<?xml version="1.0"?>
|
||||
<!DOCTYPE root [
|
||||
<!ENTITY xxe SYSTEM "file://{file_path}">
|
||||
]>
|
||||
<root><item>&xxe;</item></root>"""
|
||||
|
||||
|
||||
def test_xxe_not_expanded_xpath_filter(tmp_path):
|
||||
"""xpath_filter must not expand external entities (CVE-2026-41895)."""
|
||||
sentinel_file = tmp_path / "sentinel.txt"
|
||||
sentinel = "xxe_sentinel_should_never_appear_in_output"
|
||||
sentinel_file.write_text(sentinel)
|
||||
|
||||
result = html_tools.xpath_filter("//item", _xxe_payload(sentinel_file), is_xml=True)
|
||||
assert sentinel not in result
|
||||
|
||||
|
||||
def test_xxe_not_expanded_xpath1_filter(tmp_path):
|
||||
"""xpath1_filter must not expand external entities (CVE-2026-41895)."""
|
||||
sentinel_file = tmp_path / "sentinel.txt"
|
||||
sentinel = "xxe_sentinel_should_never_appear_in_output"
|
||||
sentinel_file.write_text(sentinel)
|
||||
|
||||
result = html_tools.xpath1_filter("//item", _xxe_payload(sentinel_file), is_xml=True)
|
||||
assert sentinel not in result
|
||||
Reference in New Issue
Block a user