Solve circular import

Bump field text
Re #2528 - handle zero-byte responses with "Empty pages are a change" the same as when the HTML doesnt render any useful text
2025-12-17 13:35:50 +00:00 · 2024-07-29 12:50:43 +02:00 · 2024-07-29 11:45:44 +02:00 · 2024-07-29 11:41:05 +02:00
26 changed files with 102 additions and 222 deletions
--- a/changedetectionio/init.py
+++ b/changedetectionio/init.py
@@ -2,7 +2,7 @@

 # Read more https://github.com/dgtlmoon/changedetection.io/wiki

-__version__ = '0.46.04'
+__version__ = '0.46.02'

 from changedetectionio.strtobool import strtobool
 from json.decoder import JSONDecodeError
--- a/changedetectionio/blueprint/browser_steps/init.py
+++ b/changedetectionio/blueprint/browser_steps/init.py
@@ -85,8 +85,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
        browsersteps_start_session['browserstepper'] = browser_steps.browsersteps_live_ui(
            playwright_browser=browsersteps_start_session['browser'],
            proxy=proxy,
-            start_url=datastore.data['watching'][watch_uuid].get('url'),
-            headers=datastore.data['watching'][watch_uuid].get('headers')
+            start_url=datastore.data['watching'][watch_uuid].get('url')
        )

        # For test
--- a/changedetectionio/blueprint/tags/templates/edit-tag.html
+++ b/changedetectionio/blueprint/tags/templates/edit-tag.html
@@ -58,9 +58,9 @@ xpath://body/div/span[contains(@class, 'example-class')]",
                        {% if '/text()' in  field %}
                          <span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br>
                        {% endif %}
-                        <span class="pure-form-message-inline">One CSS, xPath, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br>
-                    <div data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div>
-                    <ul id="advanced-help-selectors">
+                        <span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br>
+
+                    <ul>
                        <li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
                        <li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
                            <ul>
--- a/changedetectionio/content_fetchers/base.py
+++ b/changedetectionio/content_fetchers/base.py
@@ -65,8 +65,8 @@ class Fetcher():

    def __init__(self):
        import importlib.resources
-        self.xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8')
-        self.instock_data_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8')
+        self.xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
+        self.instock_data_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text()

    @abstractmethod
    def get_error(self):
--- a/changedetectionio/content_fetchers/res/stock-not-in-stock.js
+++ b/changedetectionio/content_fetchers/res/stock-not-in-stock.js
@@ -75,7 +75,6 @@ function isItemInStock() {
        'vergriffen',
        'vorbestellen',
        'vorbestellung ist bald möglich',
-        'we don\'t currently have any',
        'we couldn\'t find any products that match',
        'we do not currently have an estimate of when this product will be back in stock.',
        'we don\'t know when or if this item will be back in stock.',
@@ -174,8 +173,7 @@ function isItemInStock() {
        const element = elementsToScan[i];
        // outside the 'fold' or some weird text in the heading area
        // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
-        // Note: theres also an automated test that places the 'out of stock' text fairly low down
-        if (element.getBoundingClientRect().top + window.scrollY >= vh + 250 || element.getBoundingClientRect().top + window.scrollY <= 100) {
+        if (element.getBoundingClientRect().top + window.scrollY >= vh + 150 || element.getBoundingClientRect().top + window.scrollY <= 100) {
            continue
        }
        elementText = "";
@@ -189,7 +187,7 @@ function isItemInStock() {
            // and these mean its out of stock
            for (const outOfStockText of outOfStockTexts) {
                if (elementText.includes(outOfStockText)) {
-                    console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}" - offset top ${element.getBoundingClientRect().top}, page height is ${vh}`)
+                    console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}"`)
                    return outOfStockText; // item is out of stock
                }
            }
--- a/changedetectionio/content_fetchers/res/xpath_element_scraper.js
+++ b/changedetectionio/content_fetchers/res/xpath_element_scraper.js
@@ -164,15 +164,6 @@ visibleElementsArray.forEach(function (element) {
        }
    }

-    let label = "not-interesting" // A placeholder, the actual labels for training are done by hand for now
-
-    let text = element.textContent.trim().slice(0, 30).trim();
-    while (/\n{2,}|\t{2,}/.test(text)) {
-        text = text.replace(/\n{2,}/g, '\n').replace(/\t{2,}/g, '\t')
-    }
-
-    // Try to identify any possible currency amounts "Sale: 4000" or "Sale now 3000 Kc", can help with the training.
-    const hasDigitCurrency = (/\d/.test(text.slice(0, 6)) || /\d/.test(text.slice(-6)) ) &&  /([€£$¥₩₹]|USD|AUD|EUR|Kč|kr|SEK|,–)/.test(text) ;

    size_pos.push({
        xpath: xpath_result,
@@ -180,16 +171,9 @@ visibleElementsArray.forEach(function (element) {
        height: Math.round(bbox['height']),
        left: Math.floor(bbox['left']),
        top: Math.floor(bbox['top']) + scroll_y,
-        // tagName used by Browser Steps
        tagName: (element.tagName) ? element.tagName.toLowerCase() : '',
-        // tagtype used by Browser Steps
        tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '',
-        isClickable: window.getComputedStyle(element).cursor === "pointer",
-        // Used by the keras trainer
-        fontSize: window.getComputedStyle(element).getPropertyValue('font-size'),
-        fontWeight: window.getComputedStyle(element).getPropertyValue('font-weight'),
-        hasDigitCurrency: hasDigitCurrency,
-        label: label,
+        isClickable: window.getComputedStyle(element).cursor == "pointer"
    });

 });
--- a/changedetectionio/flask_app.py
+++ b/changedetectionio/flask_app.py
@@ -729,12 +729,6 @@ def changedetection_app(config=None, datastore_o=None):
            for p in datastore.proxy_list:
                form.proxy.choices.append(tuple((p, datastore.proxy_list[p]['label'])))

-        # Add some HTML to be used for form validation
-        if datastore.data['watching'][uuid].history.keys():
-            timestamp = list(datastore.data['watching'][uuid].history.keys())[-1]
-            form.last_html_for_form_validation = datastore.data['watching'][uuid].get_fetched_html(timestamp)
-        else:
-            form.last_html_for_form_validation = "<html><body></body></html>"

        if request.method == 'POST' and form.validate():

@@ -1383,19 +1377,17 @@ def changedetection_app(config=None, datastore_o=None):
        import brotli

        watch = datastore.data['watching'].get(uuid)
-        if watch and watch.history.keys() and os.path.isdir(watch.watch_data_dir):
-            latest_filename = list(watch.history.keys())[-1]
+        if watch and os.path.isdir(watch.watch_data_dir):
+            latest_filename = list(watch.history.keys())[0]
            html_fname = os.path.join(watch.watch_data_dir, f"{latest_filename}.html.br")
-            with open(html_fname, 'rb') as f:
-                if html_fname.endswith('.br'):
-                    # Read and decompress the Brotli file
+            if html_fname.endswith('.br'):
+                # Read and decompress the Brotli file
+                with open(html_fname, 'rb') as f:
                    decompressed_data = brotli.decompress(f.read())
-                else:
-                    decompressed_data = f.read()

-            buffer = BytesIO(decompressed_data)
+                buffer = BytesIO(decompressed_data)

-            return send_file(buffer, as_attachment=True, download_name=f"{latest_filename}.html", mimetype='text/html')
+                return send_file(buffer, as_attachment=True, download_name=f"{latest_filename}.html", mimetype='text/html')


        # Return a 500 error
--- a/changedetectionio/forms.py
+++ b/changedetectionio/forms.py
@@ -1,9 +1,6 @@
 import os
 import re

-import elementpath
-
-from changedetectionio.html_tools import xpath_filter, xpath1_filter
 from changedetectionio.strtobool import strtobool

 from wtforms import (
@@ -325,39 +322,52 @@ class ValidateCSSJSONXPATHInput(object):
        self.allow_json = allow_json

    def __call__(self, form, field):
-        from lxml.etree import XPathEvalError
+
        if isinstance(field.data, str):
            data = [field.data]
        else:
            data = field.data

        for line in data:
-            line = line.strip()
+        # Nothing to see here
+            if not len(line.strip()):
+                return

-            if not line:
-                continue
-
-            if line.startswith('xpath') or line.startswith('/'):
+            # Does it look like XPath?
+            if line.strip()[0] == '/' or line.strip().startswith('xpath:'):
                if not self.allow_xpath:
                    raise ValidationError("XPath not permitted in this field!")
-
-                if line.startswith('xpath1:'):
-                    filter_function = xpath1_filter
-                else:
-                    line = line.replace('xpath:', '')
-                    filter_function = xpath_filter
+                from lxml import etree, html
+                import elementpath
+                # xpath 2.0-3.1
+                from elementpath.xpath3 import XPath3Parser
+                tree = html.fromstring("<html></html>")
+                line = line.replace('xpath:', '')

                try:
-                    # Call the determined function
-                    res = filter_function(xpath_filter=line, html_content=form.last_html_for_form_validation)
-                    # It's OK if this is an empty result, we just want to check that it doesn't crash the parser
-                except (elementpath.ElementPathError,XPathEvalError) as e:
+                    elementpath.select(tree, line.strip(), parser=XPath3Parser)
+                except elementpath.ElementPathError as e:
                    message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
                    raise ValidationError(message % (line, str(e)))
-                except Exception as e:
+                except:
                    raise ValidationError("A system-error occurred when validating your XPath expression")

-            elif 'json:' in line:
+            if line.strip().startswith('xpath1:'):
+                if not self.allow_xpath:
+                    raise ValidationError("XPath not permitted in this field!")
+                from lxml import etree, html
+                tree = html.fromstring("<html></html>")
+                line = re.sub(r'^xpath1:', '', line)
+
+                try:
+                    tree.xpath(line.strip())
+                except etree.XPathEvalError as e:
+                    message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
+                    raise ValidationError(message % (line, str(e)))
+                except:
+                    raise ValidationError("A system-error occurred when validating your XPath expression")
+
+            if 'json:' in line:
                if not self.allow_json:
                    raise ValidationError("JSONPath not permitted in this field!")

@@ -382,7 +392,7 @@ class ValidateCSSJSONXPATHInput(object):
                if not self.allow_json:
                    raise ValidationError("jq not permitted in this field!")

-            elif line.startswith('jq:'):
+            if 'jq:' in line:
                try:
                    import jq
                except ModuleNotFoundError:
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -8,7 +8,6 @@ from xml.sax.saxutils import escape as xml_escape
 import json
 import re

-from loguru import logger

 # HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
 TEXT_FILTER_LIST_LINE_SUFFIX = "<br>"
@@ -109,20 +108,6 @@ def elementpath_tostring(obj):

    return str(obj)

-def extract_namespaces(xml_content):
-    """
-    Extracts all namespaces from the XML content.
-    """
-    from lxml import etree
-    from io import BytesIO
-
-    it = etree.iterparse(BytesIO(xml_content), events=('start-ns',))
-    namespaces = {}
-    for _, ns in it:
-        prefix, uri = ns
-        namespaces[prefix] = uri
-    return namespaces
-
 # Return str Utf-8 of matched rules
 def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False, is_rss=False):
    from lxml import etree, html
@@ -138,14 +123,7 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
    tree = html.fromstring(bytes(html_content, encoding='utf-8'), parser=parser)
    html_block = ""

-    # Automatically extract all namespaces from the XML content
-    namespaces = {'re': 'http://exslt.org/regular-expressions'}
-    try:
-        namespaces.update(extract_namespaces(html_content.encode('utf-8')))
-    except Exception as e:
-        logger.warning(f"Problem extracting namespaces from HTMl/XML content {str(e)}")
-
-    r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=XPath3Parser)
+    r = elementpath.select(tree, xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'}, parser=XPath3Parser)
    #@note: //title/text() wont work where <title>CDATA..

    if type(r) != list:
--- a/changedetectionio/processors/restock_diff/init.py
+++ b/changedetectionio/processors/restock_diff/init.py
@@ -1,12 +1,11 @@

-from babel.numbers import parse_decimal
 from changedetectionio.model.Watch import model as BaseWatch
-from typing import Union
 import re
+from babel.numbers import parse_decimal

 class Restock(dict):

-    def parse_currency(self, raw_value: str) -> Union[float, None]:
+    def parse_currency(self, raw_value: str) -> float:
        # Clean and standardize the value (ie 1,400.00 should be 1400.00), even better would be store the whole thing as an integer.
        standardized_value = raw_value

@@ -22,11 +21,8 @@ class Restock(dict):
        # Remove any non-numeric characters except for the decimal point
        standardized_value = re.sub(r'[^\d.-]', '', standardized_value)

-        if standardized_value:
-            # Convert to float
-            return float(parse_decimal(standardized_value, locale='en'))
-
-        return None
+        # Convert to float
+        return float(parse_decimal(standardized_value, locale='en'))

    def __init__(self, *args, **kwargs):
        # Define default values
--- a/changedetectionio/processors/restock_diff/processor.py
+++ b/changedetectionio/processors/restock_diff/processor.py
@@ -40,16 +40,13 @@ def get_itemprop_availability(html_content) -> Restock:
    import extruct
    logger.trace(f"Imported extruct module in {time.time() - now:.3f}s")

+    value = {}
    now = time.time()
-
    # Extruct is very slow, I'm wondering if some ML is going to be faster (800ms on my i7), 'rdfa' seems to be the heaviest.
-    syntaxes = ['dublincore', 'json-ld', 'microdata', 'microformat', 'opengraph']
-    try:
-        data = extruct.extract(html_content, syntaxes=syntaxes)
-    except Exception as e:
-        logger.warning(f"Unable to extract data, document parsing with extruct failed with {type(e).__name__} - {str(e)}")
-        return Restock()

+    syntaxes = ['dublincore', 'json-ld', 'microdata', 'microformat', 'opengraph']
+
+    data = extruct.extract(html_content, syntaxes=syntaxes)
    logger.trace(f"Extruct basic extract of all metadata done in {time.time() - now:.3f}s")

    # First phase, dead simple scanning of anything that looks useful
--- a/changedetectionio/processors/text_json_diff/processor.py
+++ b/changedetectionio/processors/text_json_diff/processor.py
@@ -77,12 +77,11 @@ class perform_site_check(difference_detection_processor):

        ctype_header = self.fetcher.get_all_headers().get('content-type', '').lower()
        # Go into RSS preprocess for converting CDATA/comment to usable text
-        # Ctype_header could be unset if we are just reprocessing the existin content
-        if any(substring in ctype_header for substring in ['application/xml', 'application/rss', 'text/xml']) or not ctype_header:
-            top_text = self.fetcher.content[:200].lower().strip()
-            if '<rss' in top_text or 'search.yahoo.com/mrss/' in top_text:
+        if any(substring in ctype_header for substring in ['application/xml', 'application/rss', 'text/xml']):
+            if '<rss' in self.fetcher.content[:100].lower():
                self.fetcher.content = cdata_in_document_to_text(html_content=self.fetcher.content)
                is_rss = True
+
        # source: support, basically treat it as plaintext
        if watch.is_source_type_url:
            is_html = False
--- a/changedetectionio/static/js/global-settings.js
+++ b/changedetectionio/static/js/global-settings.js
@@ -18,11 +18,9 @@ $(document).ready(function () {

    });

-    $(".toggle-show").click(function (e) {
+    $("#notification-token-toggle").click(function (e) {
        e.preventDefault();
-        let target = $(this).data('target');
-        $(target).toggle();
+        $('#notification-tokens-info').toggle();
    });
-
 });

--- a/changedetectionio/templates/_common_fields.html
+++ b/changedetectionio/templates/_common_fields.html
@@ -11,11 +11,8 @@
    class="notification-urls" )
                            }}
                            <div class="pure-form-message-inline">
-                                <p>
-                                <strong>Tip:</strong> Use <a target=_new href="https://github.com/caronc/apprise">AppRise Notification URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.<br>
-</p>
-                                <div data-target="#advanced-help-notifications" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div>
-                              <ul style="display: none" id="advanced-help-notifications">
+                              <ul>
+                                <li>Use <a target=_new href="https://github.com/caronc/apprise">AppRise URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.</li>
                                <li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_discord">discord://</a></code> (or <code>https://discord.com/api/webhooks...</code>)) only supports a maximum <strong>2,000 characters</strong> of notification text, including the title.</li>
                                <li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> bots can't send messages to other bots, so you should specify chat ID of non-bot user.</li>
                                <li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> only supports very limited HTML and can fail when extra tags are sent, <a href="https://core.telegram.org/bots/api#html-style">read more here</a> (or use plaintext/markdown format)</li>
@@ -43,7 +40,7 @@

                            </div>
                            <div class="pure-controls">
-                                <div data-target="#notification-tokens-info" class="toggle-show pure-button button-tag button-xsmall">Show token/placeholders</div>
+                                <div id="notification-token-toggle" class="pure-button button-tag button-xsmall">Show token/placeholders</div>
                            </div>
                            <div class="pure-controls" style="display: none;" id="notification-tokens-info">
                                <table class="pure-table" id="token-table">
--- a/changedetectionio/templates/edit.html
+++ b/changedetectionio/templates/edit.html
@@ -4,7 +4,6 @@
 {% from '_common_fields.html' import render_common_settings_form %}
 <script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
 <script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script>
-<script src="{{url_for('static_content', group='js', filename='global-settings.js')}}" defer></script>
 <script>
    const browser_steps_available_screenshots=JSON.parse('{{ watch.get_browsersteps_available_screenshots|tojson }}');
    const browser_steps_config=JSON.parse('{{ browser_steps_config|tojson }}');
@@ -276,9 +275,9 @@ xpath://body/div/span[contains(@class, 'example-class')]",
                        {% if '/text()' in  field %}
                          <span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br>
                        {% endif %}
-                        <span class="pure-form-message-inline">One CSS, xPath, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br>
-<p><div data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div><br></p>
-                    <ul id="advanced-help-selectors" style="display: none;">
+                        <span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br>
+
+                    <ul>
                        <li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
                        <li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
                            <ul>
@@ -298,12 +297,9 @@ xpath://body/div/span[contains(@class, 'example-class')]",
                                <li>To use XPath1.0: Prefix with <code>xpath1:</code></li>
                            </ul>
                            </li>
-                    <li>
-                        Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
-                                href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br>
-                    </li>
                    </ul>
-
+                    Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
+                                href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br>
                </span>
                    </div>
                <fieldset class="pure-control-group">
--- a/changedetectionio/tests/restock/test_restock.py
+++ b/changedetectionio/tests/restock/test_restock.py
@@ -2,7 +2,7 @@
 import os
 import time
 from flask import url_for
-from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, wait_for_notification_endpoint_output
+from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
 from changedetectionio.notification import (
    default_notification_body,
    default_notification_format,
@@ -94,7 +94,7 @@ def test_restock_detection(client, live_server, measure_memory_usage):
    assert b'not-in-stock' not in res.data

    # We should have a notification
-    wait_for_notification_endpoint_output()
+    time.sleep(2)
    assert os.path.isfile("test-datastore/notification.txt"), "Notification received"
    os.unlink("test-datastore/notification.txt")

@@ -103,7 +103,6 @@ def test_restock_detection(client, live_server, measure_memory_usage):
    set_original_response()
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)
-    time.sleep(5)
    assert not os.path.isfile("test-datastore/notification.txt"), "No notification should have fired when it went OUT OF STOCK by default"

    # BUT we should see that it correctly shows "not in stock"
--- a/changedetectionio/tests/test_add_replace_remove_filter.py
+++ b/changedetectionio/tests/test_add_replace_remove_filter.py
@@ -2,7 +2,7 @@
 import os.path
 import time
 from flask import url_for
-from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output
+from .util import live_server_setup, wait_for_all_checks
 from changedetectionio import html_tools


@@ -165,7 +165,7 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
    assert b'unviewed' in res.data

    # Takes a moment for apprise to fire
-    wait_for_notification_endpoint_output()
+    time.sleep(3)
    assert os.path.isfile("test-datastore/notification.txt"), "Notification fired because I can see the output file"
    with open("test-datastore/notification.txt", 'rb') as f:
        response = f.read()
--- a/changedetectionio/tests/test_backend.py
+++ b/changedetectionio/tests/test_backend.py
@@ -69,12 +69,6 @@ def test_check_basic_change_detection_functionality(client, live_server, measure

    wait_for_all_checks(client)

-    uuid = extract_UUID_from_client(client)
-
-    # Check the 'get latest snapshot works'
-    res = client.get(url_for("watch_get_latest_html", uuid=uuid))
-    assert b'which has this one new line' in res.data
-
    # Now something should be ready, indicated by having a 'unviewed' class
    res = client.get(url_for("index"))
    assert b'unviewed' in res.data
@@ -92,7 +86,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
    assert expected_url.encode('utf-8') in res.data

    # Following the 'diff' link, it should no longer display as 'unviewed' even after we recheck it a few times
-    res = client.get(url_for("diff_history_page", uuid=uuid))
+    res = client.get(url_for("diff_history_page", uuid="first"))
    assert b'selected=""' in res.data, "Confirm diff history page loaded"

    # Check the [preview] pulls the right one
@@ -149,12 +143,18 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
    assert b'unviewed' not in res.data

    # #2458 "clear history" should make the Watch object update its status correctly when the first snapshot lands again
+    uuid = extract_UUID_from_client(client)
    client.get(url_for("clear_watch_history", uuid=uuid))
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)
    res = client.get(url_for("index"))
    assert b'preview/' in res.data

+
+    # Check the 'get latest snapshot works'
+    res = client.get(url_for("watch_get_latest_html", uuid=uuid))
+    assert b'<head><title>head title</title></head>' in res.data
+
    #
    # Cleanup everything
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
--- a/changedetectionio/tests/test_filter_exist_changes.py
+++ b/changedetectionio/tests/test_filter_exist_changes.py
@@ -4,7 +4,7 @@
 import os
 import time
 from flask import url_for
-from .util import set_original_response, live_server_setup, wait_for_notification_endpoint_output
+from .util import set_original_response, live_server_setup
 from changedetectionio.model import App


@@ -102,15 +102,14 @@ def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_se
        follow_redirects=True
    )
    assert b"Updated watch." in res.data
-    wait_for_notification_endpoint_output()
+    time.sleep(3)

    # Shouldn't exist, shouldn't have fired
    assert not os.path.isfile("test-datastore/notification.txt")
    # Now the filter should exist
    set_response_with_filter()
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
-
-    wait_for_notification_endpoint_output()
+    time.sleep(3)

    assert os.path.isfile("test-datastore/notification.txt")

--- a/changedetectionio/tests/test_filter_failure_notification.py
+++ b/changedetectionio/tests/test_filter_failure_notification.py
@@ -1,8 +1,7 @@
 import os
 import time
 from flask import url_for
-from .util import set_original_response, live_server_setup, extract_UUID_from_client, wait_for_all_checks, \
-    wait_for_notification_endpoint_output
+from .util import set_original_response, live_server_setup, extract_UUID_from_client, wait_for_all_checks
 from changedetectionio.model import App


@@ -108,8 +107,7 @@ def run_filter_test(client, live_server, content_filter):
    # One more check should trigger the _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT threshold
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)
-
-    wait_for_notification_endpoint_output()
+    time.sleep(2)  # delay for apprise to fire
    # Now it should exist and contain our "filter not found" alert
    assert os.path.isfile("test-datastore/notification.txt")

@@ -129,7 +127,6 @@ def run_filter_test(client, live_server, content_filter):
        client.get(url_for("form_watch_checknow"), follow_redirects=True)
        wait_for_all_checks(client)

-    wait_for_notification_endpoint_output()
    # It should have sent a notification, but..
    assert os.path.isfile("test-datastore/notification.txt")
    # but it should not contain the info about a failed filter (because there was none in this case)
--- a/changedetectionio/tests/test_nonrenderable_pages.py
+++ b/changedetectionio/tests/test_nonrenderable_pages.py
@@ -2,8 +2,6 @@

 from flask import url_for
 from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
-import time
-

 def set_nonrenderable_response():
    test_return_data = """<html>
@@ -13,16 +11,17 @@ def set_nonrenderable_response():
     </body>
     </html>
    """
+
    with open("test-datastore/endpoint-content.txt", "w") as f:
        f.write(test_return_data)
-    time.sleep(1)

    return None

 def set_zero_byte_response():
+
    with open("test-datastore/endpoint-content.txt", "w") as f:
        f.write("")
-    time.sleep(1)
+
    return None

 def test_check_basic_change_detection_functionality(client, live_server, measure_memory_usage):
--- a/changedetectionio/tests/test_restock_itemprop.py
+++ b/changedetectionio/tests/test_restock_itemprop.py
@@ -3,7 +3,7 @@ import os
 import time

 from flask import url_for
-from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, wait_for_notification_endpoint_output
+from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
 from ..notification import default_notification_format

 instock_props = [
@@ -182,8 +182,7 @@ def _run_test_minmax_limit(client, extra_watch_edit_form):
    # price changed to something LESS than min (900), SHOULD be a change
    set_original_response(props_markup=instock_props[0], price='890.45')
    # let previous runs wait
-    time.sleep(2)
-    
+    time.sleep(1)
    res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
    assert b'1 watches queued for rechecking.' in res.data
    wait_for_all_checks(client)
@@ -198,8 +197,7 @@ def _run_test_minmax_limit(client, extra_watch_edit_form):
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)
    res = client.get(url_for("index"))
-    # Depending on the LOCALE it may be either of these (generally for US/default/etc)
-    assert b'1,890.45' in res.data or b'1890.45' in res.data
+    assert b'1,890.45' or b'1890.45' in res.data
    assert b'unviewed' in res.data

    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
@@ -364,7 +362,7 @@ def test_change_with_notification_values(client, live_server):
    set_original_response(props_markup=instock_props[0], price='1950.45')
    client.get(url_for("form_watch_checknow"))
    wait_for_all_checks(client)
-    wait_for_notification_endpoint_output()
+    time.sleep(3)
    assert os.path.isfile("test-datastore/notification.txt"), "Notification received"
    with open("test-datastore/notification.txt", 'r') as f:
        notification = f.read()
--- a/changedetectionio/tests/test_rss.py
+++ b/changedetectionio/tests/test_rss.py
@@ -164,46 +164,3 @@ def test_rss_xpath_filtering(client, live_server, measure_memory_usage):
    assert b'Some other description' not in res.data  # Should NOT be selected by the xpath

    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
-
-def test_namespace_selectors(live_server, client):
-    set_original_cdata_xml()
-    #live_server_setup(live_server)
-
-    test_url = url_for('test_endpoint', content_type="application/xml", _external=True)
-
-    res = client.post(
-        url_for("import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-
-    assert b"1 Imported" in res.data
-
-    wait_for_all_checks(client)
-
-    uuid = extract_UUID_from_client(client)
-    # because it will look for the namespaced stuff during form validation, but on the first check it wont exist..
-    res = client.post(
-        url_for("edit_page", uuid=uuid),
-        data={
-            "include_filters": "//media:thumbnail/@url",
-            "fetch_backend": "html_requests",
-            "headers": "",
-            "proxy": "no-proxy",
-            "tags": "",
-            "url": test_url,
-        },
-        follow_redirects=True
-    )
-
-    wait_for_all_checks(client)
-
-    res = client.get(
-        url_for("preview_page", uuid="first"),
-        follow_redirects=True
-    )
-    assert b'CDATA' not in res.data
-    assert b'<![' not in res.data
-    assert b'https://testsite.com/thumbnail-c224e10d81488e818701c981da04869e.jpg' in res.data
-
-    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
--- a/changedetectionio/tests/util.py
+++ b/changedetectionio/tests/util.py
@@ -76,17 +76,6 @@ def set_more_modified_response():
    return None


-def wait_for_notification_endpoint_output():
-    '''Apprise can take a few seconds to fire'''
-    from os.path import isfile
-    for i in range(1, 20):
-        time.sleep(1)
-        if isfile("test-datastore/notification.txt"):
-            return True
-
-    return False
-
-
 # kinda funky, but works for now
 def extract_api_key_from_UI(client):
    import re
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -18,7 +18,7 @@ services:
  #
  #        Log levels are in descending order. (TRACE is the most detailed one)
  #        Log output levels: TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL
-  #      - LOGGER_LEVEL=TRACE
+  #      - LOGGER_LEVEL=DEBUG
  #
  #       Alternative WebDriver/selenium URL, do not use "'s or 's!
  #      - WEBDRIVER_URL=http://browser-chrome:4444/wd/hub
@@ -29,9 +29,8 @@ services:
  #
  #             https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy
  #
-  #       Alternative target "Chrome" Playwright URL, do not use "'s or 's!
-  #       "Playwright" is a driver/librarythat allows changedetection to talk to a Chrome or similar browser.
-  #      - PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000
+  #       Alternative Playwright URL, do not use "'s or 's!
+  #      - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000
  #
  #       Playwright proxy settings playwright_proxy_server, playwright_proxy_bypass, playwright_proxy_username, playwright_proxy_password
  #
@@ -74,10 +73,10 @@ services:
 #              condition: service_started


-     # Sockpuppetbrowser is basically chrome wrapped in an API for allowing fast fetching of web-pages.
+     # Used for fetching pages via Playwright+Chrome where you need Javascript support.
     # RECOMMENDED FOR FETCHING PAGES WITH CHROME
-#    sockpuppetbrowser:
-#        hostname: sockpuppetbrowser
+#    playwright-chrome:
+#        hostname: playwright-chrome
 #        image: dgtlmoon/sockpuppetbrowser:latest
 #        cap_add:
 #            - SYS_ADMIN
--- a/requirements.txt
+++ b/requirements.txt
@@ -79,9 +79,8 @@ pyppeteerstealth>=0.0.4
 pytest ~=7.2
 pytest-flask ~=1.2

-# Anything 4.0 and up but not 5.0
-jsonschema ~= 4.0
-
+# Pin jsonschema version to prevent build errors on armv6 while rpds-py wheels aren't available (1708)
+jsonschema==4.17.3

 loguru
Author	SHA1	Message	Date
dgtlmoon	4fdabd53fc	Solve circular import	2024-07-29 12:50:43 +02:00
dgtlmoon	b97d34d77c	Bump field text	2024-07-29 11:45:44 +02:00
dgtlmoon	e12b5a6f71	Re #2528 - handle zero-byte responses with "Empty pages are a change" the same as when the HTML doesnt render any useful text	2024-07-29 11:41:05 +02:00