#!/usr/bin/env python3
"""
Unit tests for XPath default namespace handling in RSS/Atom feeds.
Tests the fix for issue where //title/text() returns empty on feeds with default namespaces.
Real-world test data from https://github.com/microsoft/PowerToys/releases.atom
"""
import sys
import os
import pytest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import html_tools
# Real-world Atom feed with default namespace from GitHub PowerToys releases
# This is the actual format that was failing before the fix
atom_feed_with_default_ns = """
tag:github.com,2008:https://github.com/microsoft/PowerToys/releasesRelease notes from PowerToys2025-10-23T08:53:12Ztag:github.com,2008:Repository/184456251/v0.95.12025-10-24T14:20:14ZRelease 0.95.1<p>This patch release fixes several important stability issues.</p>Jaylyn-Barbeetag:github.com,2008:Repository/184456251/v0.95.02025-10-17T12:51:21ZRelease v0.95.0<p>New features, stability, optimization improvements.</p>Jaylyn-Barbee"""
# RSS feed without default namespace
rss_feed_no_default_ns = """
Channel TitleChannel DescriptionItem 1 TitleItem 1 DescriptionItem 2 TitleItem 2 Description"""
# RSS 2.0 feed with namespace prefix (not default)
rss_feed_with_ns_prefix = """
Channel TitleItem TitleAuthor Name"""
class TestXPathDefaultNamespace:
"""Test XPath queries on feeds with and without default namespaces."""
def test_atom_feed_simple_xpath_with_xpath_filter(self):
"""Test that //title/text() works on Atom feed with default namespace using xpath_filter."""
result = html_tools.xpath_filter('//title/text()', atom_feed_with_default_ns, is_rss=True)
assert 'Release notes from PowerToys' in result
assert 'Release 0.95.1' in result
assert 'Release v0.95.0' in result
def test_atom_feed_nested_xpath_with_xpath_filter(self):
"""Test nested XPath like //entry/title/text() on Atom feed."""
result = html_tools.xpath_filter('//entry/title/text()', atom_feed_with_default_ns, is_rss=True)
assert 'Release 0.95.1' in result
assert 'Release v0.95.0' in result
# Should NOT include the feed title
assert 'Release notes from PowerToys' not in result
def test_atom_feed_other_elements_with_xpath_filter(self):
"""Test that other elements like //updated/text() work on Atom feed."""
result = html_tools.xpath_filter('//updated/text()', atom_feed_with_default_ns, is_rss=True)
assert '2025-10-23T08:53:12Z' in result
assert '2025-10-24T14:20:14Z' in result
def test_rss_feed_without_namespace(self):
"""Test that //title/text() works on RSS feed without default namespace."""
result = html_tools.xpath_filter('//title/text()', rss_feed_no_default_ns, is_rss=True)
assert 'Channel Title' in result
assert 'Item 1 Title' in result
assert 'Item 2 Title' in result
def test_rss_feed_nested_xpath(self):
"""Test nested XPath on RSS feed without default namespace."""
result = html_tools.xpath_filter('//item/title/text()', rss_feed_no_default_ns, is_rss=True)
assert 'Item 1 Title' in result
assert 'Item 2 Title' in result
# Should NOT include channel title
assert 'Channel Title' not in result
def test_rss_feed_with_prefixed_namespaces(self):
"""Test that feeds with namespace prefixes (not default) still work."""
result = html_tools.xpath_filter('//title/text()', rss_feed_with_ns_prefix, is_rss=True)
assert 'Channel Title' in result
assert 'Item Title' in result
def test_local_name_workaround_still_works(self):
"""Test that local-name() workaround still works for Atom feeds."""
result = html_tools.xpath_filter('//*[local-name()="title"]/text()', atom_feed_with_default_ns, is_rss=True)
assert 'Release notes from PowerToys' in result
assert 'Release 0.95.1' in result
def test_xpath1_filter_without_default_namespace(self):
"""Test xpath1_filter works on RSS without default namespace."""
result = html_tools.xpath1_filter('//title/text()', rss_feed_no_default_ns, is_rss=True)
assert 'Channel Title' in result
assert 'Item 1 Title' in result
def test_xpath1_filter_with_default_namespace_returns_empty(self):
"""Test that xpath1_filter returns empty on Atom with default namespace (known limitation)."""
result = html_tools.xpath1_filter('//title/text()', atom_feed_with_default_ns, is_rss=True)
# xpath1_filter (lxml) doesn't support default namespaces, so this returns empty
assert result == ''
def test_xpath1_filter_local_name_workaround(self):
"""Test that xpath1_filter works with local-name() workaround on Atom feeds."""
result = html_tools.xpath1_filter('//*[local-name()="title"]/text()', atom_feed_with_default_ns, is_rss=True)
assert 'Release notes from PowerToys' in result
assert 'Release 0.95.1' in result
if __name__ == '__main__':
pytest.main([__file__, '-v'])