No need to reformat/reprocess content in the case that no filters were found (#3484, #3483)

2025-12-15 04:26:14 +00:00 · 2025-10-10 13:44:49 +02:00
parent 93b4f79006
commit 80be1a30f2
2 changed files with 13 additions and 3 deletions
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -1,5 +1,4 @@
 from loguru import logger
-from lxml import etree
 from typing import List
 import html
 import json
@@ -58,13 +57,17 @@ def include_filters(include_filters, html_content, append_pretty_line_formatting

    return html_block

-def subtractive_css_selector(css_selector, html_content):
+def subtractive_css_selector(css_selector, content):
    from bs4 import BeautifulSoup
-    soup = BeautifulSoup(html_content, "html.parser")
+    soup = BeautifulSoup(content, "html.parser")

    # So that the elements dont shift their index, build a list of elements here which will be pointers to their place in the DOM
    elements_to_remove = soup.select(css_selector)

+    if not elements_to_remove:
+        # Better to return the original that rebuild with BeautifulSoup
+        return content
+
    # Then, remove them in a separate loop
    for item in elements_to_remove:
        item.decompose()
@@ -72,6 +75,7 @@ def subtractive_css_selector(css_selector, html_content):
    return str(soup)

 def subtractive_xpath_selector(selectors: List[str], html_content: str) -> str:
+    from lxml import etree
    # Parse the HTML content using lxml
    html_tree = etree.HTML(html_content)

@@ -83,6 +87,10 @@ def subtractive_xpath_selector(selectors: List[str], html_content: str) -> str:
        # Collect elements for each selector
        elements_to_remove.extend(html_tree.xpath(selector))

+    # If no elements were found, return the original HTML content
+    if not elements_to_remove:
+        return html_content
+
    # Then, remove them in a separate loop
    for element in elements_to_remove:
        if element.getparent() is not None:  # Ensure the element has a parent before removing