Merge branch 'master' into enhanced-RSS-and-register-all-namespaces

Testing - locale fix for test (#2623 )
repair error handling
2026-07-06 23:41:08 +00:00 · 2024-09-11 11:32:00 +02:00 · 2024-09-11 11:31:07 +02:00 · 2024-09-11 10:14:31 +02:00 · 2024-09-10 19:10:47 +02:00 · 2024-09-10 14:31:09 +02:00
22 changed files with 193 additions and 80 deletions
@@ -2,7 +2,7 @@

 # Read more https://github.com/dgtlmoon/changedetection.io/wiki

-__version__ = '0.46.03'
+__version__ = '0.46.04'

 from changedetectionio.strtobool import strtobool
 from json.decoder import JSONDecodeError
@@ -58,9 +58,9 @@ xpath://body/div/span[contains(@class, 'example-class')]",
                        {% if '/text()' in  field %}
                          <span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br>
                        {% endif %}
-                        <span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br>
-
-                    <ul>
+                        <span class="pure-form-message-inline">One CSS, xPath, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br>
+                    <div data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div>
+                    <ul id="advanced-help-selectors">
                        <li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
                        <li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
                            <ul>
@@ -75,6 +75,7 @@ function isItemInStock() {
        'vergriffen',
        'vorbestellen',
        'vorbestellung ist bald möglich',
+        'we don\'t currently have any',
        'we couldn\'t find any products that match',
        'we do not currently have an estimate of when this product will be back in stock.',
        'we don\'t know when or if this item will be back in stock.',
@@ -173,7 +174,8 @@ function isItemInStock() {
        const element = elementsToScan[i];
        // outside the 'fold' or some weird text in the heading area
        // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
-        if (element.getBoundingClientRect().top + window.scrollY >= vh + 150 || element.getBoundingClientRect().top + window.scrollY <= 100) {
+        // Note: theres also an automated test that places the 'out of stock' text fairly low down
+        if (element.getBoundingClientRect().top + window.scrollY >= vh + 250 || element.getBoundingClientRect().top + window.scrollY <= 100) {
            continue
        }
        elementText = "";
@@ -187,7 +189,7 @@ function isItemInStock() {
            // and these mean its out of stock
            for (const outOfStockText of outOfStockTexts) {
                if (elementText.includes(outOfStockText)) {
-                    console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}"`)
+                    console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}" - offset top ${element.getBoundingClientRect().top}, page height is ${vh}`)
                    return outOfStockText; // item is out of stock
                }
            }
@@ -164,6 +164,15 @@ visibleElementsArray.forEach(function (element) {
        }
    }

+    let label = "not-interesting" // A placeholder, the actual labels for training are done by hand for now
+
+    let text = element.textContent.trim().slice(0, 30).trim();
+    while (/\n{2,}|\t{2,}/.test(text)) {
+        text = text.replace(/\n{2,}/g, '\n').replace(/\t{2,}/g, '\t')
+    }
+
+    // Try to identify any possible currency amounts "Sale: 4000" or "Sale now 3000 Kc", can help with the training.
+    const hasDigitCurrency = (/\d/.test(text.slice(0, 6)) || /\d/.test(text.slice(-6)) ) &&  /([€£$¥₩₹]|USD|AUD|EUR|Kč|kr|SEK|,–)/.test(text) ;

    size_pos.push({
        xpath: xpath_result,
@@ -171,9 +180,16 @@ visibleElementsArray.forEach(function (element) {
        height: Math.round(bbox['height']),
        left: Math.floor(bbox['left']),
        top: Math.floor(bbox['top']) + scroll_y,
+        // tagName used by Browser Steps
        tagName: (element.tagName) ? element.tagName.toLowerCase() : '',
+        // tagtype used by Browser Steps
        tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '',
-        isClickable: window.getComputedStyle(element).cursor == "pointer"
+        isClickable: window.getComputedStyle(element).cursor === "pointer",
+        // Used by the keras trainer
+        fontSize: window.getComputedStyle(element).getPropertyValue('font-size'),
+        fontWeight: window.getComputedStyle(element).getPropertyValue('font-weight'),
+        hasDigitCurrency: hasDigitCurrency,
+        label: label,
    });

 });
@@ -729,6 +729,12 @@ def changedetection_app(config=None, datastore_o=None):
            for p in datastore.proxy_list:
                form.proxy.choices.append(tuple((p, datastore.proxy_list[p]['label'])))

+        # Add some HTML to be used for form validation
+        if datastore.data['watching'][uuid].history.keys():
+            timestamp = list(datastore.data['watching'][uuid].history.keys())[-1]
+            form.last_html_for_form_validation = datastore.data['watching'][uuid].get_fetched_html(timestamp)
+        else:
+            form.last_html_for_form_validation = "<html><body></body></html>"

        if request.method == 'POST' and form.validate():

@@ -1,6 +1,9 @@
 import os
 import re

+import elementpath
+
+from changedetectionio.html_tools import xpath_filter, xpath1_filter
 from changedetectionio.strtobool import strtobool

 from wtforms import (
@@ -322,52 +325,39 @@ class ValidateCSSJSONXPATHInput(object):
        self.allow_json = allow_json

    def __call__(self, form, field):
-
+        from lxml.etree import XPathEvalError
        if isinstance(field.data, str):
            data = [field.data]
        else:
            data = field.data

        for line in data:
-        # Nothing to see here
-            if not len(line.strip()):
-                return
+            line = line.strip()

-            # Does it look like XPath?
-            if line.strip()[0] == '/' or line.strip().startswith('xpath:'):
+            if not line:
+                continue
+
+            if line.startswith('xpath') or line.startswith('/'):
                if not self.allow_xpath:
                    raise ValidationError("XPath not permitted in this field!")
-                from lxml import etree, html
-                import elementpath
-                # xpath 2.0-3.1
-                from elementpath.xpath3 import XPath3Parser
-                tree = html.fromstring("<html></html>")
-                line = line.replace('xpath:', '')
+
+                if line.startswith('xpath1:'):
+                    filter_function = xpath1_filter
+                else:
+                    line = line.replace('xpath:', '')
+                    filter_function = xpath_filter

                try:
-                    elementpath.select(tree, line.strip(), parser=XPath3Parser)
-                except elementpath.ElementPathError as e:
+                    # Call the determined function
+                    res = filter_function(xpath_filter=line, html_content=form.last_html_for_form_validation)
+                    # It's OK if this is an empty result, we just want to check that it doesn't crash the parser
+                except (elementpath.ElementPathError,XPathEvalError) as e:
                    message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
                    raise ValidationError(message % (line, str(e)))
-                except:
+                except Exception as e:
                    raise ValidationError("A system-error occurred when validating your XPath expression")

-            if line.strip().startswith('xpath1:'):
-                if not self.allow_xpath:
-                    raise ValidationError("XPath not permitted in this field!")
-                from lxml import etree, html
-                tree = html.fromstring("<html></html>")
-                line = re.sub(r'^xpath1:', '', line)
-
-                try:
-                    tree.xpath(line.strip())
-                except etree.XPathEvalError as e:
-                    message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
-                    raise ValidationError(message % (line, str(e)))
-                except:
-                    raise ValidationError("A system-error occurred when validating your XPath expression")
-
-            if 'json:' in line:
+            elif 'json:' in line:
                if not self.allow_json:
                    raise ValidationError("JSONPath not permitted in this field!")

@@ -392,7 +382,7 @@ class ValidateCSSJSONXPATHInput(object):
                if not self.allow_json:
                    raise ValidationError("jq not permitted in this field!")

-            if 'jq:' in line:
+            elif line.startswith('jq:'):
                try:
                    import jq
                except ModuleNotFoundError:
@@ -8,6 +8,7 @@ from xml.sax.saxutils import escape as xml_escape
 import json
 import re

+from loguru import logger

 # HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
 TEXT_FILTER_LIST_LINE_SUFFIX = "<br>"
@@ -108,6 +109,20 @@ def elementpath_tostring(obj):

    return str(obj)

+def extract_namespaces(xml_content):
+    """
+    Extracts all namespaces from the XML content.
+    """
+    from lxml import etree
+    from io import BytesIO
+
+    it = etree.iterparse(BytesIO(xml_content), events=('start-ns',))
+    namespaces = {}
+    for _, ns in it:
+        prefix, uri = ns
+        namespaces[prefix] = uri
+    return namespaces
+
 # Return str Utf-8 of matched rules
 def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False, is_rss=False):
    from lxml import etree, html
@@ -123,7 +138,14 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
    tree = html.fromstring(bytes(html_content, encoding='utf-8'), parser=parser)
    html_block = ""

-    r = elementpath.select(tree, xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'}, parser=XPath3Parser)
+    # Automatically extract all namespaces from the XML content
+    namespaces = {'re': 'http://exslt.org/regular-expressions'}
+    try:
+        namespaces.update(extract_namespaces(html_content.encode('utf-8')))
+    except Exception as e:
+        logger.warning(f"Problem extracting namespaces from HTMl/XML content {str(e)}")
+
+    r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=XPath3Parser)
    #@note: //title/text() wont work where <title>CDATA..

    if type(r) != list:
@@ -40,13 +40,16 @@ def get_itemprop_availability(html_content) -> Restock:
    import extruct
    logger.trace(f"Imported extruct module in {time.time() - now:.3f}s")

-    value = {}
    now = time.time()
+
    # Extruct is very slow, I'm wondering if some ML is going to be faster (800ms on my i7), 'rdfa' seems to be the heaviest.
-
    syntaxes = ['dublincore', 'json-ld', 'microdata', 'microformat', 'opengraph']
+    try:
+        data = extruct.extract(html_content, syntaxes=syntaxes)
+    except Exception as e:
+        logger.warning(f"Unable to extract data, document parsing with extruct failed with {type(e).__name__} - {str(e)}")
+        return Restock()

-    data = extruct.extract(html_content, syntaxes=syntaxes)
    logger.trace(f"Extruct basic extract of all metadata done in {time.time() - now:.3f}s")

    # First phase, dead simple scanning of anything that looks useful
@@ -77,11 +77,12 @@ class perform_site_check(difference_detection_processor):

        ctype_header = self.fetcher.get_all_headers().get('content-type', '').lower()
        # Go into RSS preprocess for converting CDATA/comment to usable text
-        if any(substring in ctype_header for substring in ['application/xml', 'application/rss', 'text/xml']):
-            if '<rss' in self.fetcher.content[:100].lower():
+        # Ctype_header could be unset if we are just reprocessing the existin content
+        if any(substring in ctype_header for substring in ['application/xml', 'application/rss', 'text/xml']) or not ctype_header:
+            top_text = self.fetcher.content[:200].lower().strip()
+            if '<rss' in top_text or 'search.yahoo.com/mrss/' in top_text:
                self.fetcher.content = cdata_in_document_to_text(html_content=self.fetcher.content)
                is_rss = True
-
        # source: support, basically treat it as plaintext
        if watch.is_source_type_url:
            is_html = False
@@ -18,9 +18,11 @@ $(document).ready(function () {

    });

-    $("#notification-token-toggle").click(function (e) {
+    $(".toggle-show").click(function (e) {
        e.preventDefault();
-        $('#notification-tokens-info').toggle();
+        let target = $(this).data('target');
+        $(target).toggle();
    });
+
 });

@@ -11,8 +11,11 @@
    class="notification-urls" )
                            }}
                            <div class="pure-form-message-inline">
-                              <ul>
-                                <li>Use <a target=_new href="https://github.com/caronc/apprise">AppRise URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.</li>
+                                <p>
+                                <strong>Tip:</strong> Use <a target=_new href="https://github.com/caronc/apprise">AppRise Notification URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.<br>
+</p>
+                                <div data-target="#advanced-help-notifications" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div>
+                              <ul style="display: none" id="advanced-help-notifications">
                                <li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_discord">discord://</a></code> (or <code>https://discord.com/api/webhooks...</code>)) only supports a maximum <strong>2,000 characters</strong> of notification text, including the title.</li>
                                <li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> bots can't send messages to other bots, so you should specify chat ID of non-bot user.</li>
                                <li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> only supports very limited HTML and can fail when extra tags are sent, <a href="https://core.telegram.org/bots/api#html-style">read more here</a> (or use plaintext/markdown format)</li>
@@ -40,7 +43,7 @@

                            </div>
                            <div class="pure-controls">
-                                <div id="notification-token-toggle" class="pure-button button-tag button-xsmall">Show token/placeholders</div>
+                                <div data-target="#notification-tokens-info" class="toggle-show pure-button button-tag button-xsmall">Show token/placeholders</div>
                            </div>
                            <div class="pure-controls" style="display: none;" id="notification-tokens-info">
                                <table class="pure-table" id="token-table">
@@ -4,6 +4,7 @@
 {% from '_common_fields.html' import render_common_settings_form %}
 <script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
 <script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script>
+<script src="{{url_for('static_content', group='js', filename='global-settings.js')}}" defer></script>
 <script>
    const browser_steps_available_screenshots=JSON.parse('{{ watch.get_browsersteps_available_screenshots|tojson }}');
    const browser_steps_config=JSON.parse('{{ browser_steps_config|tojson }}');
@@ -275,9 +276,9 @@ xpath://body/div/span[contains(@class, 'example-class')]",
                        {% if '/text()' in  field %}
                          <span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br>
                        {% endif %}
-                        <span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br>
-
-                    <ul>
+                        <span class="pure-form-message-inline">One CSS, xPath, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br>
+<p><div data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div><br></p>
+                    <ul id="advanced-help-selectors" style="display: none;">
                        <li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
                        <li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
                            <ul>
@@ -297,9 +298,12 @@ xpath://body/div/span[contains(@class, 'example-class')]",
                                <li>To use XPath1.0: Prefix with <code>xpath1:</code></li>
                            </ul>
                            </li>
-                    </ul>
-                    Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
+                    <li>
+                        Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
                                href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br>
+                    </li>
+                    </ul>
+
                </span>
                    </div>
                <fieldset class="pure-control-group">
@@ -2,7 +2,7 @@
 import os
 import time
 from flask import url_for
-from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
+from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, wait_for_notification_endpoint_output
 from changedetectionio.notification import (
    default_notification_body,
    default_notification_format,
@@ -94,7 +94,7 @@ def test_restock_detection(client, live_server, measure_memory_usage):
    assert b'not-in-stock' not in res.data

    # We should have a notification
-    time.sleep(2)
+    wait_for_notification_endpoint_output()
    assert os.path.isfile("test-datastore/notification.txt"), "Notification received"
    os.unlink("test-datastore/notification.txt")

@@ -103,6 +103,7 @@ def test_restock_detection(client, live_server, measure_memory_usage):
    set_original_response()
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)
+    time.sleep(5)
    assert not os.path.isfile("test-datastore/notification.txt"), "No notification should have fired when it went OUT OF STOCK by default"

    # BUT we should see that it correctly shows "not in stock"
@@ -2,7 +2,7 @@
 import os.path
 import time
 from flask import url_for
-from .util import live_server_setup, wait_for_all_checks
+from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output
 from changedetectionio import html_tools


@@ -165,7 +165,7 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
    assert b'unviewed' in res.data

    # Takes a moment for apprise to fire
-    time.sleep(3)
+    wait_for_notification_endpoint_output()
    assert os.path.isfile("test-datastore/notification.txt"), "Notification fired because I can see the output file"
    with open("test-datastore/notification.txt", 'rb') as f:
        response = f.read()
@@ -4,7 +4,7 @@
 import os
 import time
 from flask import url_for
-from .util import set_original_response, live_server_setup
+from .util import set_original_response, live_server_setup, wait_for_notification_endpoint_output
 from changedetectionio.model import App


@@ -102,14 +102,15 @@ def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_se
        follow_redirects=True
    )
    assert b"Updated watch." in res.data
-    time.sleep(3)
+    wait_for_notification_endpoint_output()

    # Shouldn't exist, shouldn't have fired
    assert not os.path.isfile("test-datastore/notification.txt")
    # Now the filter should exist
    set_response_with_filter()
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
-    time.sleep(3)
+
+    wait_for_notification_endpoint_output()

    assert os.path.isfile("test-datastore/notification.txt")

@@ -1,7 +1,8 @@
 import os
 import time
 from flask import url_for
-from .util import set_original_response, live_server_setup, extract_UUID_from_client, wait_for_all_checks
+from .util import set_original_response, live_server_setup, extract_UUID_from_client, wait_for_all_checks, \
+    wait_for_notification_endpoint_output
 from changedetectionio.model import App


@@ -107,7 +108,8 @@ def run_filter_test(client, live_server, content_filter):
    # One more check should trigger the _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT threshold
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)
-    time.sleep(2)  # delay for apprise to fire
+
+    wait_for_notification_endpoint_output()
    # Now it should exist and contain our "filter not found" alert
    assert os.path.isfile("test-datastore/notification.txt")

@@ -127,6 +129,7 @@ def run_filter_test(client, live_server, content_filter):
        client.get(url_for("form_watch_checknow"), follow_redirects=True)
        wait_for_all_checks(client)

+    wait_for_notification_endpoint_output()
    # It should have sent a notification, but..
    assert os.path.isfile("test-datastore/notification.txt")
    # but it should not contain the info about a failed filter (because there was none in this case)
@@ -2,6 +2,8 @@

 from flask import url_for
 from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
+import time
+

 def set_nonrenderable_response():
    test_return_data = """<html>
@@ -11,17 +13,16 @@ def set_nonrenderable_response():
     </body>
     </html>
    """
-
    with open("test-datastore/endpoint-content.txt", "w") as f:
        f.write(test_return_data)
+    time.sleep(1)

    return None

 def set_zero_byte_response():
-
    with open("test-datastore/endpoint-content.txt", "w") as f:
        f.write("")
-
+    time.sleep(1)
    return None

 def test_check_basic_change_detection_functionality(client, live_server, measure_memory_usage):
@@ -3,7 +3,7 @@ import os
 import time

 from flask import url_for
-from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
+from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, wait_for_notification_endpoint_output
 from ..notification import default_notification_format

 instock_props = [
@@ -182,7 +182,8 @@ def _run_test_minmax_limit(client, extra_watch_edit_form):
    # price changed to something LESS than min (900), SHOULD be a change
    set_original_response(props_markup=instock_props[0], price='890.45')
    # let previous runs wait
-    time.sleep(1)
+    time.sleep(2)
+    
    res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
    assert b'1 watches queued for rechecking.' in res.data
    wait_for_all_checks(client)
@@ -197,7 +198,8 @@ def _run_test_minmax_limit(client, extra_watch_edit_form):
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)
    res = client.get(url_for("index"))
-    assert b'1,890.45' or b'1890.45' in res.data
+    # Depending on the LOCALE it may be either of these (generally for US/default/etc)
+    assert b'1,890.45' in res.data or b'1890.45' in res.data
    assert b'unviewed' in res.data

    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
@@ -362,7 +364,7 @@ def test_change_with_notification_values(client, live_server):
    set_original_response(props_markup=instock_props[0], price='1950.45')
    client.get(url_for("form_watch_checknow"))
    wait_for_all_checks(client)
-    time.sleep(3)
+    wait_for_notification_endpoint_output()
    assert os.path.isfile("test-datastore/notification.txt"), "Notification received"
    with open("test-datastore/notification.txt", 'r') as f:
        notification = f.read()
@@ -164,3 +164,46 @@ def test_rss_xpath_filtering(client, live_server, measure_memory_usage):
    assert b'Some other description' not in res.data  # Should NOT be selected by the xpath

    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
+
+def test_namespace_selectors(live_server, client):
+    set_original_cdata_xml()
+    #live_server_setup(live_server)
+
+    test_url = url_for('test_endpoint', content_type="application/xml", _external=True)
+
+    res = client.post(
+        url_for("import_page"),
+        data={"urls": test_url},
+        follow_redirects=True
+    )
+
+    assert b"1 Imported" in res.data
+
+    wait_for_all_checks(client)
+
+    uuid = extract_UUID_from_client(client)
+    # because it will look for the namespaced stuff during form validation, but on the first check it wont exist..
+    res = client.post(
+        url_for("edit_page", uuid=uuid),
+        data={
+            "include_filters": "//media:thumbnail/@url",
+            "fetch_backend": "html_requests",
+            "headers": "",
+            "proxy": "no-proxy",
+            "tags": "",
+            "url": test_url,
+        },
+        follow_redirects=True
+    )
+
+    wait_for_all_checks(client)
+
+    res = client.get(
+        url_for("preview_page", uuid="first"),
+        follow_redirects=True
+    )
+    assert b'CDATA' not in res.data
+    assert b'<![' not in res.data
+    assert b'https://testsite.com/thumbnail-c224e10d81488e818701c981da04869e.jpg' in res.data
+
+    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
@@ -76,6 +76,17 @@ def set_more_modified_response():
    return None


+def wait_for_notification_endpoint_output():
+    '''Apprise can take a few seconds to fire'''
+    from os.path import isfile
+    for i in range(1, 20):
+        time.sleep(1)
+        if isfile("test-datastore/notification.txt"):
+            return True
+
+    return False
+
+
 # kinda funky, but works for now
 def extract_api_key_from_UI(client):
    import re
@@ -18,7 +18,7 @@ services:
  #
  #        Log levels are in descending order. (TRACE is the most detailed one)
  #        Log output levels: TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL
-  #      - LOGGER_LEVEL=DEBUG
+  #      - LOGGER_LEVEL=TRACE
  #
  #       Alternative WebDriver/selenium URL, do not use "'s or 's!
  #      - WEBDRIVER_URL=http://browser-chrome:4444/wd/hub
@@ -29,8 +29,9 @@ services:
  #
  #             https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy
  #
-  #       Alternative Playwright URL, do not use "'s or 's!
-  #      - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000
+  #       Alternative target "Chrome" Playwright URL, do not use "'s or 's!
+  #       "Playwright" is a driver/librarythat allows changedetection to talk to a Chrome or similar browser.
+  #      - PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000
  #
  #       Playwright proxy settings playwright_proxy_server, playwright_proxy_bypass, playwright_proxy_username, playwright_proxy_password
  #
@@ -73,10 +74,10 @@ services:
 #              condition: service_started


-     # Used for fetching pages via Playwright+Chrome where you need Javascript support.
+     # Sockpuppetbrowser is basically chrome wrapped in an API for allowing fast fetching of web-pages.
     # RECOMMENDED FOR FETCHING PAGES WITH CHROME
-#    playwright-chrome:
-#        hostname: playwright-chrome
+#    sockpuppetbrowser:
+#        hostname: sockpuppetbrowser
 #        image: dgtlmoon/sockpuppetbrowser:latest
 #        cap_add:
 #            - SYS_ADMIN
@@ -79,8 +79,9 @@ pyppeteerstealth>=0.0.4
 pytest ~=7.2
 pytest-flask ~=1.2

-# Pin jsonschema version to prevent build errors on armv6 while rpds-py wheels aren't available (1708)
-jsonschema==4.17.3
+# Anything 4.0 and up but not 5.0
+jsonschema ~= 4.0
+

 loguru
Author	SHA1	Message	Date
dgtlmoon	365df6cf81	Merge branch 'master' into enhanced-RSS-and-register-all-namespaces	2024-09-11 11:32:00 +02:00
dgtlmoon	e16814e40b	Testing - locale fix for test (#2623 )	2024-09-11 11:31:07 +02:00
dgtlmoon	76729f2106	repair error handling	2024-09-11 10:14:31 +02:00
dgtlmoon	591dd5b570	Adding validation	2024-09-10 19:10:47 +02:00
dgtlmoon	919812bf8b	Automatically apply any XML/RSS namespaces	2024-09-10 14:31:09 +02:00
dgtlmoon	337fcab3f1	Testing/Code - Improving test reliability (#2617 )	2024-09-09 16:50:00 +02:00
dgtlmoon	eaccd6026c	UI - Hiding noisy info under 'show advanced help' button (#2609 )	2024-09-06 14:33:06 +02:00
dgtlmoon	5b70625eaa	0.46.04	2024-09-04 13:55:18 +02:00
dgtlmoon	60d292107d	Fixing restock monitor tests and tweaking docker default config example,	2024-09-02 15:11:31 +02:00
dgtlmoon	1cb38347da	Container name should be 'sockpuppetbrowser' because its not just playwright that uses it	2024-09-02 13:21:38 +02:00
dgtlmoon	55fe2abf42	Restock/Price detection - Better catching of errors when parsing metadata documents for restock/price check (#2602 )	2024-09-01 13:07:06 +02:00
dgtlmoon	4225900ec3	Restock - updating texts and text offsets	2024-09-01 12:47:21 +02:00
dgtlmoon	1fb4342488	Build - Unpin jsonschema for faster builds (#2583 )	2024-08-22 15:02:00 +02:00
dgtlmoon	7071df061a	Price detection/scraping - Adding extra element training data (#2582 )	2024-08-22 15:01:36 +02:00