diff --git a/changedetectionio/html_tools.py b/changedetectionio/html_tools.py
index 0a3cd108..cda37cc0 100644
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -172,99 +172,131 @@ def elementpath_tostring(obj):
     return str(obj)
 
 # Return str Utf-8 of matched rules
-def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False, is_rss=False):
+def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False, is_xml=False):
+    """
+
+    :param xpath_filter:
+    :param html_content:
+    :param append_pretty_line_formatting:
+    :param is_xml: set to true if is XML or is RSS (RSS is XML)
+    :return:
+    """
     from lxml import etree, html
     import elementpath
     # xpath 2.0-3.1
     from elementpath.xpath3 import XPath3Parser
 
     parser = etree.HTMLParser()
-    if is_rss:
-        # So that we can keep CDATA for cdata_in_document_to_text() to process
-        parser = etree.XMLParser(strip_cdata=False)
-
-    tree = html.fromstring(bytes(html_content, encoding='utf-8'), parser=parser)
-    html_block = ""
-
-    # Build namespace map for XPath queries
-    namespaces = {'re': 'http://exslt.org/regular-expressions'}
-
-    # Handle default namespace in documents (common in RSS/Atom feeds, but can occur in any XML)
-    # XPath spec: unprefixed element names have no namespace, not the default namespace
-    # Solution: Register the default namespace with empty string prefix in elementpath
-    # This is primarily for RSS/Atom feeds but works for any XML with default namespace
-    if hasattr(tree, 'nsmap') and tree.nsmap and None in tree.nsmap:
-        # Register the default namespace with empty string prefix for elementpath
-        # This allows //title to match elements in the default namespace
-        namespaces[''] = tree.nsmap[None]
-
-    r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=XPath3Parser)
-    #@note: //title/text() now works with default namespaces (fixed by registering '' prefix)
-    #@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)
-
-    if type(r) != list:
-        r = [r]
-
-    for element in r:
-        # When there's more than 1 match, then add the suffix to separate each line
-        # And where the matched result doesn't include something that will cause Inscriptis to add a newline
-        # (This way each 'match' reliably has a new-line in the diff)
-        # Divs are converted to 4 whitespaces by inscriptis
-        if append_pretty_line_formatting and len(html_block) and (not hasattr( element, 'tag' ) or not element.tag in (['br', 'hr', 'div', 'p'])):
-            html_block += TEXT_FILTER_LIST_LINE_SUFFIX
-
-        if type(element) == str:
-            html_block += element
-        elif issubclass(type(element), etree._Element) or issubclass(type(element), etree._ElementTree):
-            html_block += etree.tostring(element, pretty_print=True).decode('utf-8')
+    tree = None
+    try:
+        if is_xml:
+            # So that we can keep CDATA for cdata_in_document_to_text() to process
+            parser = etree.XMLParser(strip_cdata=False)
+            # For XML/RSS content, use etree.fromstring to properly handle XML declarations
+            tree = etree.fromstring(html_content.encode('utf-8') if isinstance(html_content, str) else html_content, parser=parser)
         else:
-            html_block += elementpath_tostring(element)
+            tree = html.fromstring(html_content, parser=parser)
+        html_block = ""
 
-    return html_block
+        # Build namespace map for XPath queries
+        namespaces = {'re': 'http://exslt.org/regular-expressions'}
+
+        # Handle default namespace in documents (common in RSS/Atom feeds, but can occur in any XML)
+        # XPath spec: unprefixed element names have no namespace, not the default namespace
+        # Solution: Register the default namespace with empty string prefix in elementpath
+        # This is primarily for RSS/Atom feeds but works for any XML with default namespace
+        if hasattr(tree, 'nsmap') and tree.nsmap and None in tree.nsmap:
+            # Register the default namespace with empty string prefix for elementpath
+            # This allows //title to match elements in the default namespace
+            namespaces[''] = tree.nsmap[None]
+
+        r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=XPath3Parser)
+        #@note: //title/text() now works with default namespaces (fixed by registering '' prefix)
+        #@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)
+
+        if type(r) != list:
+            r = [r]
+
+        for element in r:
+            # When there's more than 1 match, then add the suffix to separate each line
+            # And where the matched result doesn't include something that will cause Inscriptis to add a newline
+            # (This way each 'match' reliably has a new-line in the diff)
+            # Divs are converted to 4 whitespaces by inscriptis
+            if append_pretty_line_formatting and len(html_block) and (not hasattr( element, 'tag' ) or not element.tag in (['br', 'hr', 'div', 'p'])):
+                html_block += TEXT_FILTER_LIST_LINE_SUFFIX
+
+            if type(element) == str:
+                html_block += element
+            elif issubclass(type(element), etree._Element) or issubclass(type(element), etree._ElementTree):
+                # Use 'xml' method for RSS/XML content, 'html' for HTML content
+                # parser will be XMLParser if we detected XML content
+                method = 'xml' if (is_xml or isinstance(parser, etree.XMLParser)) else 'html'
+                html_block += etree.tostring(element, pretty_print=True, method=method, encoding='unicode')
+            else:
+                html_block += elementpath_tostring(element)
+
+        return html_block
+    finally:
+        # Explicitly clear the tree to free memory
+        # lxml trees can hold significant memory, especially with large documents
+        if tree is not None:
+            tree.clear()
 
 # Return str Utf-8 of matched rules
 # 'xpath1:'
-def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=False, is_rss=False):
+def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=False, is_xml=False):
     from lxml import etree, html
 
     parser = None
-    if is_rss:
-        # So that we can keep CDATA for cdata_in_document_to_text() to process
-        parser = etree.XMLParser(strip_cdata=False)
-
-    tree = html.fromstring(bytes(html_content, encoding='utf-8'), parser=parser)
-    html_block = ""
-
-    # Build namespace map for XPath queries
-    namespaces = {'re': 'http://exslt.org/regular-expressions'}
-
-    # NOTE: lxml's native xpath() does NOT support empty string prefix for default namespace
-    # For documents with default namespace (RSS/Atom feeds), users must use:
-    #   - local-name(): //*[local-name()='title']/text()
-    #   - Or use xpath_filter (not xpath1_filter) which supports default namespaces
-    # XPath spec: unprefixed element names have no namespace, not the default namespace
-
-    r = tree.xpath(xpath_filter.strip(), namespaces=namespaces)
-    #@note: xpath1 (lxml) does NOT automatically handle default namespaces
-    #@note: Use //*[local-name()='element'] or switch to xpath_filter for default namespace support
-    #@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)
-
-    for element in r:
-        # When there's more than 1 match, then add the suffix to separate each line
-        # And where the matched result doesn't include something that will cause Inscriptis to add a newline
-        # (This way each 'match' reliably has a new-line in the diff)
-        # Divs are converted to 4 whitespaces by inscriptis
-        if append_pretty_line_formatting and len(html_block) and (not hasattr(element, 'tag') or not element.tag in (['br', 'hr', 'div', 'p'])):
-            html_block += TEXT_FILTER_LIST_LINE_SUFFIX
-
-        # Some kind of text, UTF-8 or other
-        if isinstance(element, (str, bytes)):
-            html_block += element
+    tree = None
+    try:
+        if is_xml:
+            # So that we can keep CDATA for cdata_in_document_to_text() to process
+            parser = etree.XMLParser(strip_cdata=False)
+            # For XML/RSS content, use etree.fromstring to properly handle XML declarations
+            tree = etree.fromstring(html_content.encode('utf-8') if isinstance(html_content, str) else html_content, parser=parser)
         else:
-            # Return the HTML which will get parsed as text
-            html_block += etree.tostring(element, pretty_print=True).decode('utf-8')
+            tree = html.fromstring(html_content, parser=parser)
+        html_block = ""
 
-    return html_block
+        # Build namespace map for XPath queries
+        namespaces = {'re': 'http://exslt.org/regular-expressions'}
+
+        # NOTE: lxml's native xpath() does NOT support empty string prefix for default namespace
+        # For documents with default namespace (RSS/Atom feeds), users must use:
+        #   - local-name(): //*[local-name()='title']/text()
+        #   - Or use xpath_filter (not xpath1_filter) which supports default namespaces
+        # XPath spec: unprefixed element names have no namespace, not the default namespace
+
+        r = tree.xpath(xpath_filter.strip(), namespaces=namespaces)
+        #@note: xpath1 (lxml) does NOT automatically handle default namespaces
+        #@note: Use //*[local-name()='element'] or switch to xpath_filter for default namespace support
+        #@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)
+
+        for element in r:
+            # When there's more than 1 match, then add the suffix to separate each line
+            # And where the matched result doesn't include something that will cause Inscriptis to add a newline
+            # (This way each 'match' reliably has a new-line in the diff)
+            # Divs are converted to 4 whitespaces by inscriptis
+            if append_pretty_line_formatting and len(html_block) and (not hasattr(element, 'tag') or not element.tag in (['br', 'hr', 'div', 'p'])):
+                html_block += TEXT_FILTER_LIST_LINE_SUFFIX
+
+            # Some kind of text, UTF-8 or other
+            if isinstance(element, (str, bytes)):
+                html_block += element
+            else:
+                # Return the HTML/XML which will get parsed as text
+                # Use 'xml' method for RSS/XML content, 'html' for HTML content
+                # parser will be XMLParser if we detected XML content
+                method = 'xml' if (is_xml or isinstance(parser, etree.XMLParser)) else 'html'
+                html_block += etree.tostring(element, pretty_print=True, method=method, encoding='unicode')
+
+        return html_block
+    finally:
+        # Explicitly clear the tree to free memory
+        # lxml trees can hold significant memory, especially with large documents
+        if tree is not None:
+            tree.clear()
 
 # Extract/find element
 def extract_element(find='title', html_content=''):
diff --git a/changedetectionio/processors/magic.py b/changedetectionio/processors/magic.py
index 2a0ef68f..9d9018d7 100644
--- a/changedetectionio/processors/magic.py
+++ b/changedetectionio/processors/magic.py
@@ -103,15 +103,15 @@ class guess_stream_type():
             self.is_json = True
         elif 'pdf' in magic_content_header:
             self.is_pdf = True
-        elif has_html_patterns or http_content_header == 'text/html':
-            self.is_html = True
-        elif any(s in magic_content_header for s in JSON_CONTENT_TYPES):
-            self.is_json = True
         # magic will call a rss document 'xml'
         # Rarely do endpoints give the right header, usually just text/xml, so we check also for <rss
         # This also triggers the automatic CDATA text parser so the RSS goes back a nice content list
         elif '<rss' in test_content_normalized or '<feed' in test_content_normalized or any(s in magic_content_header for s in RSS_XML_CONTENT_TYPES) or '<rdf:' in test_content_normalized:
             self.is_rss = True
+        elif has_html_patterns or http_content_header == 'text/html':
+            self.is_html = True
+        elif any(s in magic_content_header for s in JSON_CONTENT_TYPES):
+            self.is_json = True
         elif any(s in http_content_header for s in XML_CONTENT_TYPES):
             # Only mark as generic XML if not already detected as RSS
             if not self.is_rss:
diff --git a/changedetectionio/processors/text_json_diff/processor.py b/changedetectionio/processors/text_json_diff/processor.py
index 62d749f7..2f15a55e 100644
--- a/changedetectionio/processors/text_json_diff/processor.py
+++ b/changedetectionio/processors/text_json_diff/processor.py
@@ -298,7 +298,7 @@ class ContentProcessor:
                     xpath_filter=filter_rule.replace('xpath:', ''),
                     html_content=content,
                     append_pretty_line_formatting=not self.watch.is_source_type_url,
-                    is_rss=stream_content_type.is_rss
+                    is_xml=stream_content_type.is_rss or stream_content_type.is_xml
                 )
 
             # XPath1 filters (first match only)
@@ -307,7 +307,7 @@ class ContentProcessor:
                     xpath_filter=filter_rule.replace('xpath1:', ''),
                     html_content=content,
                     append_pretty_line_formatting=not self.watch.is_source_type_url,
-                    is_rss=stream_content_type.is_rss
+                    is_xml=stream_content_type.is_rss or stream_content_type.is_xml
                 )
 
             # JSON filters
diff --git a/changedetectionio/tests/test_backend.py b/changedetectionio/tests/test_backend.py
index 0fa74094..6023732b 100644
--- a/changedetectionio/tests/test_backend.py
+++ b/changedetectionio/tests/test_backend.py
@@ -405,7 +405,10 @@ def test_plaintext_even_if_xml_content_and_can_apply_filters(client, live_server
         follow_redirects=True
     )
 
-    assert b'&lt;string name=&#34;feed_update_receiver_name&#34;' in res.data
+    # Check that the string element with the correct name attribute is present
+    # Note: namespace declarations may be included when extracting elements, which is correct XML behavior
+    assert b'feed_update_receiver_name' in res.data
+    assert b'Abonnementen bijwerken' in res.data
     assert b'&lt;foobar' not in res.data
 
     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
diff --git a/changedetectionio/tests/test_xpath_default_namespace.py b/changedetectionio/tests/test_xpath_default_namespace.py
index 10eda4ca..d22e1ac5 100644
--- a/changedetectionio/tests/test_xpath_default_namespace.py
+++ b/changedetectionio/tests/test_xpath_default_namespace.py
@@ -84,14 +84,14 @@ class TestXPathDefaultNamespace:
 
     def test_atom_feed_simple_xpath_with_xpath_filter(self):
         """Test that //title/text() works on Atom feed with default namespace using xpath_filter."""
-        result = html_tools.xpath_filter('//title/text()', atom_feed_with_default_ns, is_rss=True)
+        result = html_tools.xpath_filter('//title/text()', atom_feed_with_default_ns, is_xml=True)
         assert 'Release notes from PowerToys' in result
         assert 'Release 0.95.1' in result
         assert 'Release v0.95.0' in result
 
     def test_atom_feed_nested_xpath_with_xpath_filter(self):
         """Test nested XPath like //entry/title/text() on Atom feed."""
-        result = html_tools.xpath_filter('//entry/title/text()', atom_feed_with_default_ns, is_rss=True)
+        result = html_tools.xpath_filter('//entry/title/text()', atom_feed_with_default_ns, is_xml=True)
         assert 'Release 0.95.1' in result
         assert 'Release v0.95.0' in result
         # Should NOT include the feed title
@@ -99,20 +99,20 @@ class TestXPathDefaultNamespace:
 
     def test_atom_feed_other_elements_with_xpath_filter(self):
         """Test that other elements like //updated/text() work on Atom feed."""
-        result = html_tools.xpath_filter('//updated/text()', atom_feed_with_default_ns, is_rss=True)
+        result = html_tools.xpath_filter('//updated/text()', atom_feed_with_default_ns, is_xml=True)
         assert '2025-10-23T08:53:12Z' in result
         assert '2025-10-24T14:20:14Z' in result
 
     def test_rss_feed_without_namespace(self):
         """Test that //title/text() works on RSS feed without default namespace."""
-        result = html_tools.xpath_filter('//title/text()', rss_feed_no_default_ns, is_rss=True)
+        result = html_tools.xpath_filter('//title/text()', rss_feed_no_default_ns, is_xml=True)
         assert 'Channel Title' in result
         assert 'Item 1 Title' in result
         assert 'Item 2 Title' in result
 
     def test_rss_feed_nested_xpath(self):
         """Test nested XPath on RSS feed without default namespace."""
-        result = html_tools.xpath_filter('//item/title/text()', rss_feed_no_default_ns, is_rss=True)
+        result = html_tools.xpath_filter('//item/title/text()', rss_feed_no_default_ns, is_xml=True)
         assert 'Item 1 Title' in result
         assert 'Item 2 Title' in result
         # Should NOT include channel title
@@ -120,31 +120,31 @@ class TestXPathDefaultNamespace:
 
     def test_rss_feed_with_prefixed_namespaces(self):
         """Test that feeds with namespace prefixes (not default) still work."""
-        result = html_tools.xpath_filter('//title/text()', rss_feed_with_ns_prefix, is_rss=True)
+        result = html_tools.xpath_filter('//title/text()', rss_feed_with_ns_prefix, is_xml=True)
         assert 'Channel Title' in result
         assert 'Item Title' in result
 
     def test_local_name_workaround_still_works(self):
         """Test that local-name() workaround still works for Atom feeds."""
-        result = html_tools.xpath_filter('//*[local-name()="title"]/text()', atom_feed_with_default_ns, is_rss=True)
+        result = html_tools.xpath_filter('//*[local-name()="title"]/text()', atom_feed_with_default_ns, is_xml=True)
         assert 'Release notes from PowerToys' in result
         assert 'Release 0.95.1' in result
 
     def test_xpath1_filter_without_default_namespace(self):
         """Test xpath1_filter works on RSS without default namespace."""
-        result = html_tools.xpath1_filter('//title/text()', rss_feed_no_default_ns, is_rss=True)
+        result = html_tools.xpath1_filter('//title/text()', rss_feed_no_default_ns, is_xml=True)
         assert 'Channel Title' in result
         assert 'Item 1 Title' in result
 
     def test_xpath1_filter_with_default_namespace_returns_empty(self):
         """Test that xpath1_filter returns empty on Atom with default namespace (known limitation)."""
-        result = html_tools.xpath1_filter('//title/text()', atom_feed_with_default_ns, is_rss=True)
+        result = html_tools.xpath1_filter('//title/text()', atom_feed_with_default_ns, is_xml=True)
         # xpath1_filter (lxml) doesn't support default namespaces, so this returns empty
         assert result == ''
 
     def test_xpath1_filter_local_name_workaround(self):
         """Test that xpath1_filter works with local-name() workaround on Atom feeds."""
-        result = html_tools.xpath1_filter('//*[local-name()="title"]/text()', atom_feed_with_default_ns, is_rss=True)
+        result = html_tools.xpath1_filter('//*[local-name()="title"]/text()', atom_feed_with_default_ns, is_xml=True)
         assert 'Release notes from PowerToys' in result
         assert 'Release 0.95.1' in result
 
diff --git a/changedetectionio/tests/test_xpath_selector_unit.py b/changedetectionio/tests/test_xpath_selector_unit.py
index b4dda080..a09a6539 100644
--- a/changedetectionio/tests/test_xpath_selector_unit.py
+++ b/changedetectionio/tests/test_xpath_selector_unit.py
@@ -201,3 +201,120 @@ def test_trips(html_content, xpath, answer):
     html_content = html_tools.xpath_filter(xpath, html_content, append_pretty_line_formatting=True)
     assert type(html_content) == str
     assert answer in html_content
+
+
+# Test for UTF-8 encoding bug fix (issue #3658)
+# Polish and other UTF-8 characters should be preserved correctly
+polish_html = """<!DOCTYPE html>
+<html>
+<head><meta charset="utf-8"></head>
+<body>
+<div class="index--s-headline-link">
+    <a class="index--s-headline-link" href="#">
+        Naukowcy potwierdzają: oglądanie krótkich filmików prowadzi do "zgnilizny mózgu"
+    </a>
+</div>
+<div>
+    <a class="other-class" href="#">
+        Test with Polish chars: żółć ąę śń
+    </a>
+</div>
+<div>
+    <p class="unicode-test">Cyrillic: Привет мир</p>
+    <p class="unicode-test">Greek: Γειά σου κόσμε</p>
+    <p class="unicode-test">Arabic: مرحبا بالعالم</p>
+    <p class="unicode-test">Chinese: 你好世界</p>
+    <p class="unicode-test">Japanese: こんにちは世界</p>
+    <p class="unicode-test">Emoji: 🌍🎉✨</p>
+</div>
+</body>
+</html>
+"""
+
+
+@pytest.mark.parametrize("html_content", [polish_html])
+@pytest.mark.parametrize("xpath, expected_text", [
+    # Test Polish characters in xpath_filter
+    ('//a[(contains(@class,"index--s-headline-link"))]', 'Naukowcy potwierdzają'),
+    ('//a[(contains(@class,"index--s-headline-link"))]', 'oglądanie krótkich filmików'),
+    ('//a[(contains(@class,"index--s-headline-link"))]', 'zgnilizny mózgu'),
+    ('//a[@class="other-class"]', 'żółć ąę śń'),
+
+    # Test various Unicode scripts
+    ('//p[@class="unicode-test"]', 'Привет мир'),
+    ('//p[@class="unicode-test"]', 'Γειά σου κόσμε'),
+    ('//p[@class="unicode-test"]', 'مرحبا بالعالم'),
+    ('//p[@class="unicode-test"]', '你好世界'),
+    ('//p[@class="unicode-test"]', 'こんにちは世界'),
+    ('//p[@class="unicode-test"]', '🌍🎉✨'),
+
+    # Test with text() extraction
+    ('//a[@class="other-class"]/text()', 'żółć'),
+])
+def test_xpath_utf8_encoding(html_content, xpath, expected_text):
+    """Test that XPath filters preserve UTF-8 characters correctly (issue #3658)"""
+    result = html_tools.xpath_filter(xpath, html_content, append_pretty_line_formatting=False)
+    assert type(result) == str
+    assert expected_text in result
+    # Ensure characters are NOT HTML-entity encoded
+    # For example, 'ą' should NOT become '&#261;'
+    assert '&#' not in result or expected_text in result
+
+
+@pytest.mark.parametrize("html_content", [polish_html])
+@pytest.mark.parametrize("xpath, expected_text", [
+    # Test Polish characters in xpath1_filter
+    ('//a[(contains(@class,"index--s-headline-link"))]', 'Naukowcy potwierdzają'),
+    ('//a[(contains(@class,"index--s-headline-link"))]', 'mózgu'),
+    ('//a[@class="other-class"]', 'żółć ąę śń'),
+
+    # Test various Unicode scripts with xpath1
+    ('//p[@class="unicode-test" and contains(text(), "Cyrillic")]', 'Привет мир'),
+    ('//p[@class="unicode-test" and contains(text(), "Greek")]', 'Γειά σου'),
+    ('//p[@class="unicode-test" and contains(text(), "Chinese")]', '你好世界'),
+])
+def test_xpath1_utf8_encoding(html_content, xpath, expected_text):
+    """Test that XPath1 filters preserve UTF-8 characters correctly"""
+    result = html_tools.xpath1_filter(xpath, html_content, append_pretty_line_formatting=False)
+    assert type(result) == str
+    assert expected_text in result
+    # Ensure characters are NOT HTML-entity encoded
+    assert '&#' not in result or expected_text in result
+
+
+# Test with real-world example from wyborcza.pl (issue #3658)
+wyborcza_style_html = """<!DOCTYPE html>
+<html lang="pl">
+<head><meta charset="utf-8"></head>
+<body>
+<div class="article-list">
+    <a class="index--s-headline-link" href="/article1">
+        Naukowcy potwierdzają: oglądanie krótkich filmików prowadzi do "zgnilizny mózgu"
+    </a>
+    <a class="index--s-headline-link" href="/article2">
+        Zmiany klimatyczne wpływają na życie w miastach
+    </a>
+    <a class="index--s-headline-link" href="/article3">
+        Łódź: Nowe inwestycje w infrastrukturę miejską
+    </a>
+</div>
+</body>
+</html>
+"""
+
+
+def test_wyborcza_real_world_example():
+    """Test real-world case from wyborcza.pl that was failing (issue #3658)"""
+    xpath = '//a[(contains(@class,"index--s-headline-link"))]'
+    result = html_tools.xpath_filter(xpath, wyborcza_style_html, append_pretty_line_formatting=False)
+
+    # These exact strings should appear in the result
+    assert 'Naukowcy potwierdzają' in result
+    assert 'oglądanie krótkich filmików' in result
+    assert 'zgnilizny mózgu' in result
+    assert 'Łódź' in result
+
+    # Make sure they're NOT corrupted to mojibake like "potwierdzajÄ"
+    assert 'potwierdzajÄ' not in result
+    assert 'ogl&#261;danie' not in result
+    assert 'm&#243;zgu' not in result