diff --git a/changedetectionio/html_tools.py b/changedetectionio/html_tools.py index f029ad7a..8e8f12ea 100644 --- a/changedetectionio/html_tools.py +++ b/changedetectionio/html_tools.py @@ -282,7 +282,7 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False try: if is_xml: # So that we can keep CDATA for cdata_in_document_to_text() to process - parser = etree.XMLParser(strip_cdata=False) + parser = etree.XMLParser(strip_cdata=False, resolve_entities=False, no_network=True) # For XML/RSS content, use etree.fromstring to properly handle XML declarations tree = etree.fromstring(html_content.encode('utf-8') if isinstance(html_content, str) else html_content, parser=parser) else: @@ -346,7 +346,7 @@ def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=Fals try: if is_xml: # So that we can keep CDATA for cdata_in_document_to_text() to process - parser = etree.XMLParser(strip_cdata=False) + parser = etree.XMLParser(strip_cdata=False, resolve_entities=False, no_network=True) # For XML/RSS content, use etree.fromstring to properly handle XML declarations tree = etree.fromstring(html_content.encode('utf-8') if isinstance(html_content, str) else html_content, parser=parser) else: diff --git a/changedetectionio/tests/unit/test_xml_security.py b/changedetectionio/tests/unit/test_xml_security.py new file mode 100644 index 00000000..e359683a --- /dev/null +++ b/changedetectionio/tests/unit/test_xml_security.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python3 + +# run from dir above changedetectionio/ dir +# python3 -m pytest changedetectionio/tests/unit/test_xml_security.py + +import pytest +from changedetectionio import html_tools + + +def _xxe_payload(file_path: str) -> str: + return f""" + +]> +&xxe;""" + + +def test_xxe_not_expanded_xpath_filter(tmp_path): + """xpath_filter must not expand external entities (CVE-2026-41895).""" + sentinel_file = tmp_path / "sentinel.txt" + sentinel = "xxe_sentinel_should_never_appear_in_output" + sentinel_file.write_text(sentinel) + + result = html_tools.xpath_filter("//item", _xxe_payload(sentinel_file), is_xml=True) + assert sentinel not in result + + +def test_xxe_not_expanded_xpath1_filter(tmp_path): + """xpath1_filter must not expand external entities (CVE-2026-41895).""" + sentinel_file = tmp_path / "sentinel.txt" + sentinel = "xxe_sentinel_should_never_appear_in_output" + sentinel_file.write_text(sentinel) + + result = html_tools.xpath1_filter("//item", _xxe_payload(sentinel_file), is_xml=True) + assert sentinel not in result