revert

finally
try different import
2025-10-30 22:27:52 +00:00 · 2024-09-27 19:47:06 +02:00 · 2024-09-27 19:44:28 +02:00 · 2024-09-27 14:12:18 +02:00 · 2024-09-26 09:43:10 +02:00 · 2024-09-25 14:58:53 +02:00
44 changed files with 681 additions and 332 deletions
--- a/changedetectionio/init.py
+++ b/changedetectionio/init.py
@@ -2,7 +2,7 @@

 # Read more https://github.com/dgtlmoon/changedetection.io/wiki

-__version__ = '0.46.02'
+__version__ = '0.46.04'

 from changedetectionio.strtobool import strtobool
 from json.decoder import JSONDecodeError
--- a/changedetectionio/apprise_plugin/init.py
+++ b/changedetectionio/apprise_plugin/init.py
@@ -0,0 +1,78 @@
+# include the decorator
+from apprise.decorators import notify
+
+@notify(on="delete")
+@notify(on="deletes")
+@notify(on="get")
+@notify(on="gets")
+@notify(on="post")
+@notify(on="posts")
+@notify(on="put")
+@notify(on="puts")
+def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs):
+    import requests
+    import json
+    from apprise.utils import parse_url as apprise_parse_url
+    from apprise import URLBase
+
+    url = kwargs['meta'].get('url')
+
+    if url.startswith('post'):
+        r = requests.post
+    elif url.startswith('get'):
+        r = requests.get
+    elif url.startswith('put'):
+        r = requests.put
+    elif url.startswith('delete'):
+        r = requests.delete
+
+    url = url.replace('post://', 'http://')
+    url = url.replace('posts://', 'https://')
+    url = url.replace('put://', 'http://')
+    url = url.replace('puts://', 'https://')
+    url = url.replace('get://', 'http://')
+    url = url.replace('gets://', 'https://')
+    url = url.replace('put://', 'http://')
+    url = url.replace('puts://', 'https://')
+    url = url.replace('delete://', 'http://')
+    url = url.replace('deletes://', 'https://')
+
+    headers = {}
+    params = {}
+    auth = None
+
+    # Convert /foobar?+some-header=hello to proper header dictionary
+    results = apprise_parse_url(url)
+    if results:
+        # Add our headers that the user can potentially over-ride if they wish
+        # to to our returned result set and tidy entries by unquoting them
+        headers = {URLBase.unquote(x): URLBase.unquote(y)
+                   for x, y in results['qsd+'].items()}
+
+        # https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
+        # In Apprise, it relies on prefixing each request arg with "-", because it uses say &method=update as a flag for apprise
+        # but here we are making straight requests, so we need todo convert this against apprise's logic
+        for k, v in results['qsd'].items():
+            if not k.strip('+-') in results['qsd+'].keys():
+                params[URLBase.unquote(k)] = URLBase.unquote(v)
+
+        # Determine Authentication
+        auth = ''
+        if results.get('user') and results.get('password'):
+            auth = (URLBase.unquote(results.get('user')), URLBase.unquote(results.get('user')))
+        elif results.get('user'):
+            auth = (URLBase.unquote(results.get('user')))
+
+    # Try to auto-guess if it's JSON
+    try:
+        json.loads(body)
+        headers['Content-Type'] = 'application/json; charset=utf-8'
+    except ValueError as e:
+        pass
+
+    r(results.get('url'),
+      auth=auth,
+      data=body.encode('utf-8') if type(body) is str else body,
+      headers=headers,
+      params=params
+      )
--- a/changedetectionio/blueprint/browser_steps/init.py
+++ b/changedetectionio/blueprint/browser_steps/init.py
@@ -85,7 +85,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
        browsersteps_start_session['browserstepper'] = browser_steps.browsersteps_live_ui(
            playwright_browser=browsersteps_start_session['browser'],
            proxy=proxy,
-            start_url=datastore.data['watching'][watch_uuid].get('url')
+            start_url=datastore.data['watching'][watch_uuid].get('url'),
+            headers=datastore.data['watching'][watch_uuid].get('headers')
        )

        # For test
--- a/changedetectionio/blueprint/browser_steps/browser_steps.py
+++ b/changedetectionio/blueprint/browser_steps/browser_steps.py
@@ -25,6 +25,7 @@ browser_step_ui_config = {'Choose one': '0 0',
                          'Click element if exists': '1 0',
                          'Click element': '1 0',
                          'Click element containing text': '0 1',
+                          'Click element containing text if exists': '0 1',
                          'Enter text in field': '1 1',
                          'Execute JS': '0 1',
 #                          'Extract text and use as filter': '1 0',
@@ -96,12 +97,24 @@ class steppable_browser_interface():
        return self.action_goto_url(value=self.start_url)

    def action_click_element_containing_text(self, selector=None, value=''):
+        logger.debug("Clicking element containing text")
        if not len(value.strip()):
            return
        elem = self.page.get_by_text(value)
        if elem.count():
            elem.first.click(delay=randint(200, 500), timeout=3000)

+    def action_click_element_containing_text_if_exists(self, selector=None, value=''):
+        logger.debug("Clicking element containing text if exists")
+        if not len(value.strip()):
+            return
+        elem = self.page.get_by_text(value)
+        logger.debug(f"Clicking element containing text - {elem.count()} elements found")
+        if elem.count():
+            elem.first.click(delay=randint(200, 500), timeout=3000)
+        else:
+            return
+
    def action_enter_text_in_field(self, selector, value):
        if not len(selector.strip()):
            return
--- a/changedetectionio/blueprint/tags/templates/edit-tag.html
+++ b/changedetectionio/blueprint/tags/templates/edit-tag.html
@@ -58,9 +58,9 @@ xpath://body/div/span[contains(@class, 'example-class')]",
                        {% if '/text()' in  field %}
                          <span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br>
                        {% endif %}
-                        <span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br>
-
-                    <ul>
+                        <span class="pure-form-message-inline">One CSS, xPath, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br>
+                    <div data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div>
+                    <ul id="advanced-help-selectors">
                        <li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
                        <li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
                            <ul>
@@ -89,11 +89,13 @@ xpath://body/div/span[contains(@class, 'example-class')]",
                    {{ render_field(form.subtractive_selectors, rows=5, placeholder="header
 footer
 nav
-.stockticker") }}
+.stockticker
+//*[contains(text(), 'Advertisement')]") }}
                    <span class="pure-form-message-inline">
                        <ul>
-                          <li> Remove HTML element(s) by CSS selector before text conversion. </li>
-                          <li> Add multiple elements or CSS selectors per line to ignore multiple parts of the HTML. </li>
+                          <li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li>
+                          <li> Don't paste HTML here, use only CSS and XPath selectors </li>
+                          <li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li>
                        </ul>
                      </span>
                </fieldset>
--- a/changedetectionio/content_fetchers/base.py
+++ b/changedetectionio/content_fetchers/base.py
@@ -65,8 +65,8 @@ class Fetcher():

    def __init__(self):
        import importlib.resources
-        self.xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
-        self.instock_data_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text()
+        self.xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8')
+        self.instock_data_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8')

    @abstractmethod
    def get_error(self):
@@ -81,7 +81,8 @@ class Fetcher():
            request_method,
            ignore_status_codes=False,
            current_include_filters=None,
-            is_binary=False):
+            is_binary=False,
+            empty_pages_are_a_change=False):
        # Should set self.error, self.status_code and self.content
        pass

--- a/changedetectionio/content_fetchers/playwright.py
+++ b/changedetectionio/content_fetchers/playwright.py
@@ -83,7 +83,8 @@ class fetcher(Fetcher):
            request_method,
            ignore_status_codes=False,
            current_include_filters=None,
-            is_binary=False):
+            is_binary=False,
+            empty_pages_are_a_change=False):

        from playwright.sync_api import sync_playwright
        import playwright._impl._errors
@@ -130,7 +131,7 @@ class fetcher(Fetcher):
            if response is None:
                context.close()
                browser.close()
-                logger.debug("Content Fetcher > Response object was none")
+                logger.debug("Content Fetcher > Response object from the browser communication was none")
                raise EmptyReply(url=url, status_code=None)

            try:
@@ -166,10 +167,10 @@ class fetcher(Fetcher):

                raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)

-            if len(self.page.content().strip()) == 0:
+            if not empty_pages_are_a_change and len(self.page.content().strip()) == 0:
+                logger.debug("Content Fetcher > Content was empty, empty_pages_are_a_change = False")
                context.close()
                browser.close()
-                logger.debug("Content Fetcher > Content was empty")
                raise EmptyReply(url=url, status_code=response.status)

            # Run Browser Steps here
--- a/changedetectionio/content_fetchers/puppeteer.py
+++ b/changedetectionio/content_fetchers/puppeteer.py
@@ -75,7 +75,8 @@ class fetcher(Fetcher):
                         request_method,
                         ignore_status_codes,
                         current_include_filters,
-                         is_binary
+                         is_binary,
+                         empty_pages_are_a_change
                         ):

        from changedetectionio.content_fetchers import visualselector_xpath_selectors
@@ -153,7 +154,7 @@ class fetcher(Fetcher):
        if response is None:
            await self.page.close()
            await browser.close()
-            logger.warning("Content Fetcher > Response object was none")
+            logger.warning("Content Fetcher > Response object was none (as in, the response from the browser was empty, not just the content)")
            raise EmptyReply(url=url, status_code=None)

        self.headers = response.headers
@@ -186,10 +187,11 @@ class fetcher(Fetcher):

            raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
        content = await self.page.content
-        if len(content.strip()) == 0:
+
+        if not empty_pages_are_a_change and len(content.strip()) == 0:
+            logger.error("Content Fetcher > Content was empty (empty_pages_are_a_change is False), closing browsers")
            await self.page.close()
            await browser.close()
-            logger.error("Content Fetcher > Content was empty")
            raise EmptyReply(url=url, status_code=response.status)

        # Run Browser Steps here
@@ -247,7 +249,7 @@ class fetcher(Fetcher):
        await self.fetch_page(**kwargs)

    def run(self, url, timeout, request_headers, request_body, request_method, ignore_status_codes=False,
-            current_include_filters=None, is_binary=False):
+            current_include_filters=None, is_binary=False, empty_pages_are_a_change=False):

        #@todo make update_worker async which could run any of these content_fetchers within memory and time constraints
        max_time = os.getenv('PUPPETEER_MAX_PROCESSING_TIMEOUT_SECONDS', 180)
@@ -262,7 +264,8 @@ class fetcher(Fetcher):
                request_method=request_method,
                ignore_status_codes=ignore_status_codes,
                current_include_filters=current_include_filters,
-                is_binary=is_binary
+                is_binary=is_binary,
+                empty_pages_are_a_change=empty_pages_are_a_change
            ), timeout=max_time))
        except asyncio.TimeoutError:
            raise(BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds."))
--- a/changedetectionio/content_fetchers/requests.py
+++ b/changedetectionio/content_fetchers/requests.py
@@ -1,9 +1,6 @@
+from loguru import logger
 import hashlib
 import os
-
-import chardet
-import requests
-
 from changedetectionio import strtobool
 from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived
 from changedetectionio.content_fetchers.base import Fetcher
@@ -26,7 +23,11 @@ class fetcher(Fetcher):
            request_method,
            ignore_status_codes=False,
            current_include_filters=None,
-            is_binary=False):
+            is_binary=False,
+            empty_pages_are_a_change=False):
+
+        import chardet
+        import requests

        if self.browser_steps_get_valid_steps():
            raise BrowserStepsInUnsupportedFetcher(url=url)
@@ -74,7 +75,10 @@ class fetcher(Fetcher):
        self.headers = r.headers

        if not r.content or not len(r.content):
-            raise EmptyReply(url=url, status_code=r.status_code)
+            if not empty_pages_are_a_change:
+                raise EmptyReply(url=url, status_code=r.status_code)
+            else:
+                logger.debug(f"URL {url} gave zero byte content reply with Status Code {r.status_code}, but empty_pages_are_a_change = True")

        # @todo test this
        # @todo maybe you really want to test zero-byte return pages?
--- a/changedetectionio/content_fetchers/res/stock-not-in-stock.js
+++ b/changedetectionio/content_fetchers/res/stock-not-in-stock.js
@@ -75,6 +75,7 @@ function isItemInStock() {
        'vergriffen',
        'vorbestellen',
        'vorbestellung ist bald möglich',
+        'we don\'t currently have any',
        'we couldn\'t find any products that match',
        'we do not currently have an estimate of when this product will be back in stock.',
        'we don\'t know when or if this item will be back in stock.',
@@ -173,7 +174,8 @@ function isItemInStock() {
        const element = elementsToScan[i];
        // outside the 'fold' or some weird text in the heading area
        // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
-        if (element.getBoundingClientRect().top + window.scrollY >= vh + 150 || element.getBoundingClientRect().top + window.scrollY <= 100) {
+        // Note: theres also an automated test that places the 'out of stock' text fairly low down
+        if (element.getBoundingClientRect().top + window.scrollY >= vh + 250 || element.getBoundingClientRect().top + window.scrollY <= 100) {
            continue
        }
        elementText = "";
@@ -187,7 +189,7 @@ function isItemInStock() {
            // and these mean its out of stock
            for (const outOfStockText of outOfStockTexts) {
                if (elementText.includes(outOfStockText)) {
-                    console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}"`)
+                    console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}" - offset top ${element.getBoundingClientRect().top}, page height is ${vh}`)
                    return outOfStockText; // item is out of stock
                }
            }
--- a/changedetectionio/content_fetchers/res/xpath_element_scraper.js
+++ b/changedetectionio/content_fetchers/res/xpath_element_scraper.js
@@ -164,6 +164,15 @@ visibleElementsArray.forEach(function (element) {
        }
    }

+    let label = "not-interesting" // A placeholder, the actual labels for training are done by hand for now
+
+    let text = element.textContent.trim().slice(0, 30).trim();
+    while (/\n{2,}|\t{2,}/.test(text)) {
+        text = text.replace(/\n{2,}/g, '\n').replace(/\t{2,}/g, '\t')
+    }
+
+    // Try to identify any possible currency amounts "Sale: 4000" or "Sale now 3000 Kc", can help with the training.
+    const hasDigitCurrency = (/\d/.test(text.slice(0, 6)) || /\d/.test(text.slice(-6)) ) &&  /([€£$¥₩₹]|USD|AUD|EUR|Kč|kr|SEK|,–)/.test(text) ;

    size_pos.push({
        xpath: xpath_result,
@@ -171,9 +180,16 @@ visibleElementsArray.forEach(function (element) {
        height: Math.round(bbox['height']),
        left: Math.floor(bbox['left']),
        top: Math.floor(bbox['top']) + scroll_y,
+        // tagName used by Browser Steps
        tagName: (element.tagName) ? element.tagName.toLowerCase() : '',
+        // tagtype used by Browser Steps
        tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '',
-        isClickable: window.getComputedStyle(element).cursor == "pointer"
+        isClickable: window.getComputedStyle(element).cursor === "pointer",
+        // Used by the keras trainer
+        fontSize: window.getComputedStyle(element).getPropertyValue('font-size'),
+        fontWeight: window.getComputedStyle(element).getPropertyValue('font-weight'),
+        hasDigitCurrency: hasDigitCurrency,
+        label: label,
    });

 });
--- a/changedetectionio/content_fetchers/webdriver_selenium.py
+++ b/changedetectionio/content_fetchers/webdriver_selenium.py
@@ -56,7 +56,8 @@ class fetcher(Fetcher):
            request_method,
            ignore_status_codes=False,
            current_include_filters=None,
-            is_binary=False):
+            is_binary=False,
+            empty_pages_are_a_change=False):

        from selenium import webdriver
        from selenium.webdriver.chrome.options import Options as ChromeOptions
--- a/changedetectionio/flask_app.py
+++ b/changedetectionio/flask_app.py
@@ -537,7 +537,8 @@ def changedetection_app(config=None, datastore_o=None):
        import random
        from .apprise_asset import asset
        apobj = apprise.Apprise(asset=asset)
-
+        # so that the custom endpoints are registered
+        from changedetectionio.apprise_plugin import apprise_custom_api_call_wrapper
        is_global_settings_form = request.args.get('mode', '') == 'global-settings'
        is_group_settings_form = request.args.get('mode', '') == 'group-settings'

@@ -1377,17 +1378,19 @@ def changedetection_app(config=None, datastore_o=None):
        import brotli

        watch = datastore.data['watching'].get(uuid)
-        if watch and os.path.isdir(watch.watch_data_dir):
-            latest_filename = list(watch.history.keys())[0]
+        if watch and watch.history.keys() and os.path.isdir(watch.watch_data_dir):
+            latest_filename = list(watch.history.keys())[-1]
            html_fname = os.path.join(watch.watch_data_dir, f"{latest_filename}.html.br")
-            if html_fname.endswith('.br'):
-                # Read and decompress the Brotli file
-                with open(html_fname, 'rb') as f:
+            with open(html_fname, 'rb') as f:
+                if html_fname.endswith('.br'):
+                    # Read and decompress the Brotli file
                    decompressed_data = brotli.decompress(f.read())
+                else:
+                    decompressed_data = f.read()

-                buffer = BytesIO(decompressed_data)
+            buffer = BytesIO(decompressed_data)

-                return send_file(buffer, as_attachment=True, download_name=f"{latest_filename}.html", mimetype='text/html')
+            return send_file(buffer, as_attachment=True, download_name=f"{latest_filename}.html", mimetype='text/html')


        # Return a 500 error
--- a/changedetectionio/forms.py
+++ b/changedetectionio/forms.py
@@ -221,7 +221,8 @@ class ValidateAppRiseServers(object):
    def __call__(self, form, field):
        import apprise
        apobj = apprise.Apprise()
-
+        # so that the custom endpoints are registered
+        from changedetectionio.apprise_plugin import apprise_custom_api_call_wrapper
        for server_url in field.data:
            if not apobj.add(server_url):
                message = field.gettext('\'%s\' is not a valid AppRise URL.' % (server_url))
@@ -468,7 +469,7 @@ class processor_text_json_diff_form(commonSettingsForm):

    include_filters = StringListField('CSS/JSONPath/JQ/XPath Filters', [ValidateCSSJSONXPATHInput()], default='')

-    subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
+    subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_json=False)])

    extract_text = StringListField('Extract text', [ValidateListRegex()])

@@ -479,8 +480,10 @@ class processor_text_json_diff_form(commonSettingsForm):
    body = TextAreaField('Request body', [validators.Optional()])
    method = SelectField('Request method', choices=valid_method, default=default_method)
    ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False)
-    check_unique_lines = BooleanField('Only trigger when unique lines appear', default=False)
+    check_unique_lines = BooleanField('Only trigger when unique lines appear in all history', default=False)
+    remove_duplicate_lines = BooleanField('Remove duplicate lines of text', default=False)
    sort_text_alphabetically =  BooleanField('Sort text alphabetically', default=False)
+    trim_text_whitespace = BooleanField('Trim whitespace before and after text', default=False)

    filter_text_added = BooleanField('Added lines', default=True)
    filter_text_replaced = BooleanField('Replaced/changed lines', default=True)
@@ -575,7 +578,7 @@ class globalSettingsApplicationForm(commonSettingsForm):
    empty_pages_are_a_change =  BooleanField('Treat empty pages as a change?', default=False)
    fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
    global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
-    global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
+    global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_json=False)])
    ignore_whitespace = BooleanField('Ignore whitespace')
    password = SaltyPasswordField()
    pager_size = IntegerField('Pager size',
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -1,10 +1,5 @@
-
-from bs4 import BeautifulSoup
-from inscriptis import get_text
-from jsonpath_ng.ext import parse
 from typing import List
-from inscriptis.model.config import ParserConfig
-from xml.sax.saxutils import escape as xml_escape
+from lxml import etree
 import json
 import re

@@ -39,6 +34,7 @@ def perl_style_slash_enclosed_regex_to_options(regex):

 # Given a CSS Rule, and a blob of HTML, return the blob of HTML that matches
 def include_filters(include_filters, html_content, append_pretty_line_formatting=False):
+    from bs4 import BeautifulSoup
    soup = BeautifulSoup(html_content, "html.parser")
    html_block = ""
    r = soup.select(include_filters, separator="")
@@ -56,16 +52,32 @@ def include_filters(include_filters, html_content, append_pretty_line_formatting
    return html_block

 def subtractive_css_selector(css_selector, html_content):
+    from bs4 import BeautifulSoup
    soup = BeautifulSoup(html_content, "html.parser")
    for item in soup.select(css_selector):
        item.decompose()
    return str(soup)

+def subtractive_xpath_selector(xpath_selector, html_content): 
+    html_tree = etree.HTML(html_content)
+    elements_to_remove = html_tree.xpath(xpath_selector)
+
+    for element in elements_to_remove:
+        element.getparent().remove(element)
+
+    modified_html = etree.tostring(html_tree, method="html").decode("utf-8")
+    return modified_html

 def element_removal(selectors: List[str], html_content):
-    """Joins individual filters into one css filter."""
-    selector = ",".join(selectors)
-    return subtractive_css_selector(selector, html_content)
+    """Removes elements that match a list of CSS or xPath selectors."""
+    modified_html = html_content
+    for selector in selectors:
+        if selector.startswith(('xpath:', 'xpath1:', '//')):
+            xpath_selector = selector.removeprefix('xpath:').removeprefix('xpath1:')
+            modified_html = subtractive_xpath_selector(xpath_selector, modified_html)
+        else:
+            modified_html = subtractive_css_selector(selector, modified_html)
+    return modified_html

 def elementpath_tostring(obj):
    """
@@ -181,6 +193,7 @@ def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=Fals

 # Extract/find element
 def extract_element(find='title', html_content=''):
+    from bs4 import BeautifulSoup

    #Re #106, be sure to handle when its not found
    element_text = None
@@ -194,6 +207,8 @@ def extract_element(find='title', html_content=''):

 #
 def _parse_json(json_data, json_filter):
+    from jsonpath_ng.ext import parse
+
    if json_filter.startswith("json:"):
        jsonpath_expression = parse(json_filter.replace('json:', ''))
        match = jsonpath_expression.find(json_data)
@@ -242,6 +257,8 @@ def _get_stripped_text_from_json_match(match):
 # json_filter - ie json:$..price
 # ensure_is_ldjson_info_type - str "product", optional, "@type == product" (I dont know how to do that as a json selector)
 def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None):
+    from bs4 import BeautifulSoup
+
    stripped_text_from_html = False
 # https://github.com/dgtlmoon/changedetection.io/pull/2041#issuecomment-1848397161w
    # Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded within HTML tags
@@ -352,6 +369,7 @@ def strip_ignore_text(content, wordlist, mode="content"):
    return "\n".encode('utf8').join(output)

 def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str:
+    from xml.sax.saxutils import escape as xml_escape
    pattern = '<!\[CDATA\[(\s*(?:.(?<!\]\]>)\s*)*)\]\]>'
    def repl(m):
        text = m.group(1)
@@ -360,6 +378,9 @@ def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False
    return re.sub(pattern, repl, html_content)

 def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False) -> str:
+    from inscriptis import get_text
+    from inscriptis.model.config import ParserConfig
+
    """Converts html string to a string with just the text. If ignoring
    rendering anchor tag content is enable, anchor tag content are also
    included in the text
--- a/changedetectionio/model/init.py
+++ b/changedetectionio/model/init.py
@@ -60,6 +60,8 @@ class watch_base(dict):
            'time_between_check_use_default': True,
            'title': None,
            'track_ldjson_price_data': None,
+            'trim_text_whitespace': False,
+            'remove_duplicate_lines': False,
            'trigger_text': [],  # List of text or regex to wait for until a change is detected
            'url': '',
            'uuid': str(uuid.uuid4()),
--- a/changedetectionio/notification.py
+++ b/changedetectionio/notification.py
@@ -1,9 +1,10 @@
-import apprise
+
 import time
 from apprise import NotifyFormat
-import json
+import apprise
 from loguru import logger

+
 valid_tokens = {
    'base_url': '',
    'current_snapshot': '',
@@ -34,86 +35,11 @@ valid_notification_formats = {
    default_notification_format_for_watch: default_notification_format_for_watch
 }

-# include the decorator
-from apprise.decorators import notify
-
-@notify(on="delete")
-@notify(on="deletes")
-@notify(on="get")
-@notify(on="gets")
-@notify(on="post")
-@notify(on="posts")
-@notify(on="put")
-@notify(on="puts")
-def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs):
-    import requests
-    from apprise.utils import parse_url as apprise_parse_url
-    from apprise import URLBase
-
-    url = kwargs['meta'].get('url')
-
-    if url.startswith('post'):
-        r = requests.post
-    elif url.startswith('get'):
-        r = requests.get
-    elif url.startswith('put'):
-        r = requests.put
-    elif url.startswith('delete'):
-        r = requests.delete
-
-    url = url.replace('post://', 'http://')
-    url = url.replace('posts://', 'https://')
-    url = url.replace('put://', 'http://')
-    url = url.replace('puts://', 'https://')
-    url = url.replace('get://', 'http://')
-    url = url.replace('gets://', 'https://')
-    url = url.replace('put://', 'http://')
-    url = url.replace('puts://', 'https://')
-    url = url.replace('delete://', 'http://')
-    url = url.replace('deletes://', 'https://')
-
-    headers = {}
-    params = {}
-    auth = None
-
-    # Convert /foobar?+some-header=hello to proper header dictionary
-    results = apprise_parse_url(url)
-    if results:
-        # Add our headers that the user can potentially over-ride if they wish
-        # to to our returned result set and tidy entries by unquoting them
-        headers = {URLBase.unquote(x): URLBase.unquote(y)
-                   for x, y in results['qsd+'].items()}
-
-        # https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
-        # In Apprise, it relies on prefixing each request arg with "-", because it uses say &method=update as a flag for apprise
-        # but here we are making straight requests, so we need todo convert this against apprise's logic
-        for k, v in results['qsd'].items():
-            if not k.strip('+-') in results['qsd+'].keys():
-                params[URLBase.unquote(k)] = URLBase.unquote(v)
-
-        # Determine Authentication
-        auth = ''
-        if results.get('user') and results.get('password'):
-            auth = (URLBase.unquote(results.get('user')), URLBase.unquote(results.get('user')))
-        elif results.get('user'):
-            auth = (URLBase.unquote(results.get('user')))
-
-    # Try to auto-guess if it's JSON
-    try:
-        json.loads(body)
-        headers['Content-Type'] = 'application/json; charset=utf-8'
-    except ValueError as e:
-        pass
-
-    r(results.get('url'),
-      auth=auth,
-      data=body.encode('utf-8') if type(body) is str else body,
-      headers=headers,
-      params=params
-      )


 def process_notification(n_object, datastore):
+    # so that the custom endpoints are registered
+    from changedetectionio.apprise_plugin import apprise_custom_api_call_wrapper

    from .safe_jinja import render as jinja_render
    now = time.time()
--- a/changedetectionio/processors/init.py
+++ b/changedetectionio/processors/init.py
@@ -26,6 +26,8 @@ class difference_detection_processor():

    def call_browser(self):
        from requests.structures import CaseInsensitiveDict
+        from changedetectionio.content_fetchers.exceptions import EmptyReply
+
        # Protect against file:// access
        if re.search(r'^file://', self.watch.get('url', '').strip(), re.IGNORECASE):
            if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
@@ -133,8 +135,18 @@ class difference_detection_processor():
        is_binary = self.watch.is_pdf

        # And here we go! call the right browser with browser-specific settings
-        self.fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, self.watch.get('include_filters'),
-                    is_binary=is_binary)
+        empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
+
+        self.fetcher.run(url=url,
+                         timeout=timeout,
+                         request_headers=request_headers,
+                         request_body=request_body,
+                         request_method=request_method,
+                         ignore_status_codes=ignore_status_codes,
+                         current_include_filters=self.watch.get('include_filters'),
+                         is_binary=is_binary,
+                         empty_pages_are_a_change=empty_pages_are_a_change
+                         )

        #@todo .quit here could go on close object, so we can run JS if change-detected
        self.fetcher.quit()
--- a/changedetectionio/processors/restock_diff/init.py
+++ b/changedetectionio/processors/restock_diff/init.py
@@ -1,11 +1,12 @@

-from changedetectionio.model.Watch import model as BaseWatch
-import re
 from babel.numbers import parse_decimal
+from changedetectionio.model.Watch import model as BaseWatch
+from typing import Union
+import re

 class Restock(dict):

-    def parse_currency(self, raw_value: str) -> float:
+    def parse_currency(self, raw_value: str) -> Union[float, None]:
        # Clean and standardize the value (ie 1,400.00 should be 1400.00), even better would be store the whole thing as an integer.
        standardized_value = raw_value

@@ -21,8 +22,11 @@ class Restock(dict):
        # Remove any non-numeric characters except for the decimal point
        standardized_value = re.sub(r'[^\d.-]', '', standardized_value)

-        # Convert to float
-        return float(parse_decimal(standardized_value, locale='en'))
+        if standardized_value:
+            # Convert to float
+            return float(parse_decimal(standardized_value, locale='en'))
+
+        return None

    def __init__(self, *args, **kwargs):
        # Define default values
--- a/changedetectionio/processors/restock_diff/processor.py
+++ b/changedetectionio/processors/restock_diff/processor.py
@@ -2,8 +2,7 @@ from .. import difference_detection_processor
 from ..exceptions import ProcessorException
 from . import Restock
 from loguru import logger
-import hashlib
-import re
+
 import urllib3
 import time

@@ -27,6 +26,25 @@ def _search_prop_by_value(matches, value):
            if value in prop[0]:
                return prop[1]  # Yield the desired value and exit the function

+def _deduplicate_prices(data):
+    seen = set()
+    unique_data = []
+
+    for datum in data:
+        # Convert 'value' to float if it can be a numeric string, otherwise leave it as is
+        try:
+            normalized_value = float(datum.value) if isinstance(datum.value, str) and datum.value.replace('.', '', 1).isdigit() else datum.value
+        except ValueError:
+            normalized_value = datum.value
+
+        # If the normalized value hasn't been seen yet, add it to unique data
+        if normalized_value not in seen:
+            unique_data.append(datum)
+            seen.add(normalized_value)
+    
+    return unique_data
+
+
 # should return Restock()
 # add casting?
 def get_itemprop_availability(html_content) -> Restock:
@@ -36,17 +54,21 @@ def get_itemprop_availability(html_content) -> Restock:
    """
    from jsonpath_ng import parse

+    import re
    now = time.time()
    import extruct
    logger.trace(f"Imported extruct module in {time.time() - now:.3f}s")

-    value = {}
    now = time.time()
+
    # Extruct is very slow, I'm wondering if some ML is going to be faster (800ms on my i7), 'rdfa' seems to be the heaviest.
-
    syntaxes = ['dublincore', 'json-ld', 'microdata', 'microformat', 'opengraph']
+    try:
+        data = extruct.extract(html_content, syntaxes=syntaxes)
+    except Exception as e:
+        logger.warning(f"Unable to extract data, document parsing with extruct failed with {type(e).__name__} - {str(e)}")
+        return Restock()

-    data = extruct.extract(html_content, syntaxes=syntaxes)
    logger.trace(f"Extruct basic extract of all metadata done in {time.time() - now:.3f}s")

    # First phase, dead simple scanning of anything that looks useful
@@ -57,7 +79,7 @@ def get_itemprop_availability(html_content) -> Restock:
        pricecurrency_parse = parse('$..(pricecurrency|currency|priceCurrency )')
        availability_parse = parse('$..(availability|Availability)')

-        price_result = price_parse.find(data)
+        price_result = _deduplicate_prices(price_parse.find(data))
        if price_result:
            # Right now, we just support single product items, maybe we will store the whole actual metadata seperately in teh future and
            # parse that for the UI?
@@ -119,6 +141,10 @@ class perform_site_check(difference_detection_processor):
    xpath_data = None

    def run_changedetection(self, watch, skip_when_checksum_same=True):
+        import hashlib
+
+        from concurrent.futures import ProcessPoolExecutor
+        from functools import partial
        if not watch:
            raise Exception("Watch no longer exists.")

@@ -132,6 +158,20 @@ class perform_site_check(difference_detection_processor):
        update_obj['content_type'] = self.fetcher.headers.get('Content-Type', '')
        update_obj["last_check_status"] = self.fetcher.get_last_status_code()

+        # Only try to process restock information (like scraping for keywords) if the page was actually rendered correctly.
+        # Otherwise it will assume "in stock" because nothing suggesting the opposite was found
+        from ...html_tools import html_to_text
+        text = html_to_text(self.fetcher.content)
+        logger.debug(f"Length of text after conversion: {len(text)}")
+        if not len(text):
+            from ...content_fetchers.exceptions import ReplyWithContentButNoText
+            raise ReplyWithContentButNoText(url=watch.link,
+                                            status_code=self.fetcher.get_last_status_code(),
+                                            screenshot=self.fetcher.screenshot,
+                                            html_content=self.fetcher.content,
+                                            xpath_data=self.fetcher.xpath_data
+                                            )
+
        # Which restock settings to compare against?
        restock_settings = watch.get('restock_settings', {})

@@ -146,7 +186,11 @@ class perform_site_check(difference_detection_processor):

        itemprop_availability = {}
        try:
-            itemprop_availability = get_itemprop_availability(html_content=self.fetcher.content)
+            with ProcessPoolExecutor() as executor:
+                # Use functools.partial to create a callable with arguments
+                # anything using bs4/lxml etc is quite "leaky"
+                future = executor.submit(partial(get_itemprop_availability, self.fetcher.content))
+                itemprop_availability = future.result()
        except MoreThanOnePriceFound as e:
            # Add the real data
            raise ProcessorException(message="Cannot run, more than one price detected, this plugin is only for product pages with ONE product, try the content-change detection mode.",
--- a/changedetectionio/processors/text_json_diff/processor.py
+++ b/changedetectionio/processors/text_json_diff/processor.py
@@ -36,6 +36,9 @@ class PDFToHTMLToolNotFound(ValueError):
 class perform_site_check(difference_detection_processor):

    def run_changedetection(self, watch, skip_when_checksum_same=True):
+        from concurrent.futures import ProcessPoolExecutor
+        from functools import partial
+
        changed_detected = False
        html_content = ""
        screenshot = False  # as bytes
@@ -171,20 +174,30 @@ class perform_site_check(difference_detection_processor):
                    for filter_rule in include_filters_rule:
                        # For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
                        if filter_rule[0] == '/' or filter_rule.startswith('xpath:'):
-                            html_content += html_tools.xpath_filter(xpath_filter=filter_rule.replace('xpath:', ''),
+                            with ProcessPoolExecutor() as executor:
+                                # Use functools.partial to create a callable with arguments - anything using bs4/lxml etc is quite "leaky"
+                                future = executor.submit(partial(html_tools.xpath_filter, xpath_filter=filter_rule.replace('xpath:', ''),
                                                                    html_content=self.fetcher.content,
                                                                    append_pretty_line_formatting=not watch.is_source_type_url,
-                                                                    is_rss=is_rss)
+                                                                    is_rss=is_rss))
+                                html_content += future.result()
+
                        elif filter_rule.startswith('xpath1:'):
-                            html_content += html_tools.xpath1_filter(xpath_filter=filter_rule.replace('xpath1:', ''),
+                            with ProcessPoolExecutor() as executor:
+                                # Use functools.partial to create a callable with arguments - anything using bs4/lxml etc is quite "leaky"
+                                future = executor.submit(partial(html_tools.xpath1_filter, xpath_filter=filter_rule.replace('xpath1:', ''),
                                                                    html_content=self.fetcher.content,
                                                                    append_pretty_line_formatting=not watch.is_source_type_url,
-                                                                    is_rss=is_rss)
+                                                                    is_rss=is_rss))
+                                html_content += future.result()
                        else:
-                            # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
-                            html_content += html_tools.include_filters(include_filters=filter_rule,
+                            with ProcessPoolExecutor() as executor:
+                                # Use functools.partial to create a callable with arguments - anything using bs4/lxml etc is quite "leaky"
+                                # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
+                                future = executor.submit(partial(html_tools.include_filters, include_filters=filter_rule,
                                                                       html_content=self.fetcher.content,
-                                                                       append_pretty_line_formatting=not watch.is_source_type_url)
+                                                                       append_pretty_line_formatting=not watch.is_source_type_url))
+                                html_content += future.result()

                    if not html_content.strip():
                        raise FilterNotFoundInResponse(msg=include_filters_rule, screenshot=self.fetcher.screenshot, xpath_data=self.fetcher.xpath_data)
@@ -197,18 +210,27 @@ class perform_site_check(difference_detection_processor):
                else:
                    # extract text
                    do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
-                    stripped_text_from_html = \
-                        html_tools.html_to_text(
-                            html_content=html_content,
+                    with ProcessPoolExecutor() as executor:
+                        # Use functools.partial to create a callable with arguments - anything using bs4/lxml etc is quite "leaky"
+                        # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
+                        future = executor.submit(partial(html_tools.html_to_text, html_content=html_content,
                            render_anchor_tag_content=do_anchor,
-                            is_rss=is_rss # #1874 activate the <title workaround hack
-                        )
+                            is_rss=is_rss)) #1874 activate the <title workaround hack
+                        stripped_text_from_html = future.result()

-        if watch.get('sort_text_alphabetically') and stripped_text_from_html:
+
+        if watch.get('trim_text_whitespace'):
+            stripped_text_from_html = '\n'.join(line.strip() for line in stripped_text_from_html.replace("\n\n", "\n").splitlines())
+
+        if watch.get('remove_duplicate_lines'):
+            stripped_text_from_html = '\n'.join(dict.fromkeys(line.strip() for line in stripped_text_from_html.replace("\n\n", "\n").splitlines()))
+
+        if watch.get('sort_text_alphabetically'):
            # Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap
            # we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here.
-            stripped_text_from_html = stripped_text_from_html.replace('\n\n', '\n')
-            stripped_text_from_html = '\n'.join( sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower() ))
+            stripped_text_from_html = stripped_text_from_html.replace("\n\n", "\n")
+            stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower()))
+

        # Re #340 - return the content before the 'ignore text' was applied
        text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
@@ -290,7 +312,7 @@ class perform_site_check(difference_detection_processor):
                        for match in res:
                            regex_matched_output += [match] + [b'\n']

-            # Now we will only show what the regex matched
+            ##########################################################
            stripped_text_from_html = b''
            text_content_before_ignored_filter = b''
            if regex_matched_output:
@@ -298,6 +320,8 @@ class perform_site_check(difference_detection_processor):
                stripped_text_from_html = b''.join(regex_matched_output)
                text_content_before_ignored_filter = stripped_text_from_html

+
+
        # Re #133 - if we should strip whitespaces from triggering the change detected comparison
        if self.datastore.data['settings']['application'].get('ignore_whitespace', False):
            fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest()
--- a/changedetectionio/run_socks_proxy_tests.sh
+++ b/changedetectionio/run_socks_proxy_tests.sh
@@ -16,25 +16,31 @@ echo "---------------------------------- SOCKS5 -------------------"
 docker run --network changedet-network \
  -v `pwd`/tests/proxy_socks5/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json \
  --rm \
+  -e "FLASK_SERVER_NAME=cdio" \
+  --hostname cdio \
  -e "SOCKSTEST=proxiesjson" \
  test-changedetectionio \
-  bash -c 'cd changedetectionio && pytest tests/proxy_socks5/test_socks5_proxy_sources.py'
+  bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004  -s tests/proxy_socks5/test_socks5_proxy_sources.py'

 # SOCKS5 related - by manually entering in UI
 docker run --network changedet-network \
  --rm \
+  -e "FLASK_SERVER_NAME=cdio" \
+  --hostname cdio \
  -e "SOCKSTEST=manual" \
  test-changedetectionio \
-  bash -c 'cd changedetectionio && pytest tests/proxy_socks5/test_socks5_proxy.py'
+  bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004  -s tests/proxy_socks5/test_socks5_proxy.py'

 # SOCKS5 related - test from proxies.json via playwright - NOTE- PLAYWRIGHT DOESNT SUPPORT AUTHENTICATING PROXY
 docker run --network changedet-network \
  -e "SOCKSTEST=manual-playwright" \
+  --hostname cdio \
+  -e "FLASK_SERVER_NAME=cdio" \
  -v `pwd`/tests/proxy_socks5/proxies.json-example-noauth:/app/changedetectionio/test-datastore/proxies.json \
  -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" \
  --rm \
  test-changedetectionio \
-  bash -c 'cd changedetectionio && pytest tests/proxy_socks5/test_socks5_proxy_sources.py'
+  bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004  -s tests/proxy_socks5/test_socks5_proxy_sources.py'

 echo "socks5 server logs"
 docker logs socks5proxy
--- a/changedetectionio/static/js/global-settings.js
+++ b/changedetectionio/static/js/global-settings.js
@@ -18,9 +18,11 @@ $(document).ready(function () {

    });

-    $("#notification-token-toggle").click(function (e) {
+    $(".toggle-show").click(function (e) {
        e.preventDefault();
-        $('#notification-tokens-info').toggle();
+        let target = $(this).data('target');
+        $(target).toggle();
    });
+
 });

--- a/changedetectionio/static/styles/scss/parts/_browser-steps.scss
+++ b/changedetectionio/static/styles/scss/parts/_browser-steps.scss
@@ -40,15 +40,29 @@
  }
 }

-#browser-steps-fieldlist {
-  height: 100%;
-  overflow-y: scroll;
-}

 #browser-steps .flex-wrapper {
  display: flex;
  flex-flow: row;
  height: 70vh;
+  font-size: 80%;
+  #browser-steps-ui {
+    flex-grow: 1;      /* Allow it to grow and fill the available space */
+    flex-shrink: 1;    /* Allow it to shrink if needed */
+    flex-basis: 0;     /* Start with 0 base width so it stretches as much as possible */
+    background-color: #eee;
+    border-radius: 5px;
+
+  }
+
+  #browser-steps-fieldlist {
+    flex-grow: 0;      /* Don't allow it to grow */
+    flex-shrink: 0;    /* Don't allow it to shrink */
+    flex-basis: auto;  /* Base width is determined by the content */
+    max-width: 400px;  /* Set a max width to prevent overflow */
+    padding-left: 1rem;
+    overflow-y: scroll;
+  }
 }

 /*  this is duplicate :( */
--- a/changedetectionio/static/styles/styles.css
+++ b/changedetectionio/static/styles/styles.css
@@ -46,14 +46,31 @@
    #browser_steps li > label {
      display: none; }

-#browser-steps-fieldlist {
-  height: 100%;
-  overflow-y: scroll; }
-
 #browser-steps .flex-wrapper {
  display: flex;
  flex-flow: row;
-  height: 70vh; }
+  height: 70vh;
+  font-size: 80%; }
+  #browser-steps .flex-wrapper #browser-steps-ui {
+    flex-grow: 1;
+    /* Allow it to grow and fill the available space */
+    flex-shrink: 1;
+    /* Allow it to shrink if needed */
+    flex-basis: 0;
+    /* Start with 0 base width so it stretches as much as possible */
+    background-color: #eee;
+    border-radius: 5px; }
+  #browser-steps .flex-wrapper #browser-steps-fieldlist {
+    flex-grow: 0;
+    /* Don't allow it to grow */
+    flex-shrink: 0;
+    /* Don't allow it to shrink */
+    flex-basis: auto;
+    /* Base width is determined by the content */
+    max-width: 400px;
+    /* Set a max width to prevent overflow */
+    padding-left: 1rem;
+    overflow-y: scroll; }

 /*  this is duplicate :( */
 #browsersteps-selector-wrapper {
@@ -1194,11 +1211,9 @@ ul {
  color: #fff;
  opacity: 0.7; }

-
 .restock-label svg {
  vertical-align: middle; }

-
 #chrome-extension-link {
  padding: 9px;
  border: 1px solid var(--color-grey-800);
--- a/changedetectionio/store.py
+++ b/changedetectionio/store.py
@@ -11,7 +11,6 @@ from threading import Lock
 import json
 import os
 import re
-import requests
 import secrets
 import threading
 import time
@@ -270,6 +269,7 @@ class ChangeDetectionStore:
        self.needs_write_urgent = True

    def add_watch(self, url, tag='', extras=None, tag_uuids=None, write_to_disk_now=True):
+        import requests

        if extras is None:
            extras = {}
--- a/changedetectionio/templates/_common_fields.html
+++ b/changedetectionio/templates/_common_fields.html
@@ -11,8 +11,11 @@
    class="notification-urls" )
                            }}
                            <div class="pure-form-message-inline">
-                              <ul>
-                                <li>Use <a target=_new href="https://github.com/caronc/apprise">AppRise URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.</li>
+                                <p>
+                                <strong>Tip:</strong> Use <a target=_new href="https://github.com/caronc/apprise">AppRise Notification URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.<br>
+</p>
+                                <div data-target="#advanced-help-notifications" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div>
+                                <ul style="display: none" id="advanced-help-notifications">
                                <li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_discord">discord://</a></code> (or <code>https://discord.com/api/webhooks...</code>)) only supports a maximum <strong>2,000 characters</strong> of notification text, including the title.</li>
                                <li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> bots can't send messages to other bots, so you should specify chat ID of non-bot user.</li>
                                <li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> only supports very limited HTML and can fail when extra tags are sent, <a href="https://core.telegram.org/bots/api#html-style">read more here</a> (or use plaintext/markdown format)</li>
@@ -40,7 +43,7 @@

                            </div>
                            <div class="pure-controls">
-                                <div id="notification-token-toggle" class="pure-button button-tag button-xsmall">Show token/placeholders</div>
+                                <div data-target="#notification-tokens-info" class="toggle-show pure-button button-tag button-xsmall">Show token/placeholders</div>
                            </div>
                            <div class="pure-controls" style="display: none;" id="notification-tokens-info">
                                <table class="pure-table" id="token-table">
--- a/changedetectionio/templates/edit.html
+++ b/changedetectionio/templates/edit.html
@@ -4,6 +4,7 @@
 {% from '_common_fields.html' import render_common_settings_form %}
 <script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
 <script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script>
+<script src="{{url_for('static_content', group='js', filename='global-settings.js')}}" defer></script>
 <script>
    const browser_steps_available_screenshots=JSON.parse('{{ watch.get_browsersteps_available_screenshots|tojson }}');
    const browser_steps_config=JSON.parse('{{ browser_steps_config|tojson }}');
@@ -199,7 +200,7 @@ User-Agent: wonderbra 1.0") }}
                        <div id="loading-status-text" style="display: none;">Please wait, first browser step can take a little time to load..<div class="spinner"></div></div>
                        <div class="flex-wrapper" >

-                            <div id="browser-steps-ui" class="noselect"  style="width: 100%; background-color: #eee; border-radius: 5px;">
+                            <div id="browser-steps-ui" class="noselect">

                                <div class="noselect"  id="browsersteps-selector-wrapper" style="width: 100%">
                                    <span class="loader" >
@@ -214,7 +215,7 @@ User-Agent: wonderbra 1.0") }}
                                    <canvas  class="noselect" id="browsersteps-selector-canvas" style="max-width: 100%; width: 100%;"></canvas>
                                </div>
                            </div>
-                            <div id="browser-steps-fieldlist" style="padding-left: 1em;  width: 350px; font-size: 80%;" >
+                            <div id="browser-steps-fieldlist" >
                                <span id="browser-seconds-remaining">Loading</span> <span style="font-size: 80%;"> (<a target=_new href="https://github.com/dgtlmoon/changedetection.io/pull/478/files#diff-1a79d924d1840c485238e66772391268a89c95b781d69091384cf1ea1ac146c9R4">?</a>) </span>
                                {{ render_field(form.browser_steps) }}
                            </div>
@@ -275,9 +276,9 @@ xpath://body/div/span[contains(@class, 'example-class')]",
                        {% if '/text()' in  field %}
                          <span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br>
                        {% endif %}
-                        <span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br>
-
-                    <ul>
+                        <span class="pure-form-message-inline">One CSS, xPath, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br>
+<p><div data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div><br></p>
+                    <ul id="advanced-help-selectors" style="display: none;">
                        <li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
                        <li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
                            <ul>
@@ -297,21 +298,25 @@ xpath://body/div/span[contains(@class, 'example-class')]",
                                <li>To use XPath1.0: Prefix with <code>xpath1:</code></li>
                            </ul>
                            </li>
-                    </ul>
-                    Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
+                    <li>
+                        Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
                                href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br>
+                    </li>
+                    </ul>
+
                </span>
                    </div>
                <fieldset class="pure-control-group">
                    {{ render_field(form.subtractive_selectors, rows=5, placeholder=has_tag_filters_extra+"header
 footer
 nav
-.stockticker") }}
+.stockticker
+//*[contains(text(), 'Advertisement')]") }}
                    <span class="pure-form-message-inline">
                        <ul>
-                          <li> Remove HTML element(s) by CSS selector before text conversion. </li>
-                          <li> Don't paste HTML here, use only CSS selectors </li>
-                          <li> Add multiple elements or CSS selectors per line to ignore multiple parts of the HTML. </li>
+                          <li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li>
+                          <li> Don't paste HTML here, use only CSS and XPath selectors </li>
+                          <li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li>
                        </ul>
                      </span>
                </fieldset>
@@ -326,11 +331,22 @@ nav
                    <span class="pure-form-message-inline">So it's always better to select <strong>Added</strong>+<strong>Replaced</strong> when you're interested in new content.</span><br>
                    <span class="pure-form-message-inline">When content is merely moved in a list, it will also trigger an <strong>addition</strong>, consider enabling <code><strong>Only trigger when unique lines appear</strong></code></span>
                </fieldset>
-
+                <fieldset class="pure-control-group">
+                    {{ render_checkbox_field(form.check_unique_lines) }}
+                    <span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span>
+                </fieldset>
+                <fieldset class="pure-control-group">
+                    {{ render_checkbox_field(form.remove_duplicate_lines) }}
+                    <span class="pure-form-message-inline">Remove duplicate lines of text</span>
+                </fieldset>
                <fieldset class="pure-control-group">
                    {{ render_checkbox_field(form.sort_text_alphabetically) }}
                    <span class="pure-form-message-inline">Helps reduce changes detected caused by sites shuffling lines around, combine with <i>check unique lines</i> below.</span>
                </fieldset>
+                <fieldset class="pure-control-group">
+                    {{ render_checkbox_field(form.trim_text_whitespace) }}
+                    <span class="pure-form-message-inline">Remove any whitespace before and after each line of text</span>
+                </fieldset>
                <fieldset class="pure-control-group">
                    {{ render_checkbox_field(form.check_unique_lines) }}
                    <span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span>
--- a/changedetectionio/templates/settings.html
+++ b/changedetectionio/templates/settings.html
@@ -76,7 +76,7 @@
                    </div>
                    <div class="pure-control-group">
                        {{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }}
-                        <span class="pure-form-message-inline">When a page contains HTML, but no renderable text appears (empty page), is this considered a change?</span>
+                        <span class="pure-form-message-inline">When a request returns no content, or the HTML does not contain any text, is this considered a change?</span>
                    </div>
                {% if form.requests.proxy %}
                    <div class="pure-control-group inline-radio">
@@ -155,11 +155,13 @@
                      {{ render_field(form.application.form.global_subtractive_selectors, rows=5, placeholder="header
 footer
 nav
-.stockticker") }}
+.stockticker
+//*[contains(text(), 'Advertisement')]") }}
                      <span class="pure-form-message-inline">
                        <ul>
-                          <li> Remove HTML element(s) by CSS selector before text conversion. </li>
-                          <li> Add multiple elements or CSS selectors per line to ignore multiple parts of the HTML. </li>
+                          <li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li>
+                          <li> Don't paste HTML here, use only CSS and XPath selectors </li>
+                          <li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li>
                        </ul>
                      </span>
                    </fieldset>
--- a/changedetectionio/tests/proxy_socks5/test_socks5_proxy.py
+++ b/changedetectionio/tests/proxy_socks5/test_socks5_proxy.py
@@ -1,12 +1,27 @@
 #!/usr/bin/env python3
 import os
-import time
 from flask import url_for
 from changedetectionio.tests.util import live_server_setup, wait_for_all_checks


+def set_response():
+    import time
+    data = f"""<html>
+       <body>
+     <h1>Awesome, you made it</h1>
+     yeah the socks request worked
+     </body>
+     </html>
+    """
+
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write(data)
+    time.sleep(1)
+
+
 def test_socks5(client, live_server, measure_memory_usage):
    live_server_setup(live_server)
+    set_response()

    # Setup a proxy
    res = client.post(
@@ -24,7 +39,10 @@ def test_socks5(client, live_server, measure_memory_usage):

    assert b"Settings updated." in res.data

-    test_url = "https://changedetection.io/CHANGELOG.txt?socks-test-tag=" + os.getenv('SOCKSTEST', '')
+    # Because the socks server should connect back to us
+    test_url = url_for('test_endpoint', _external=True) + f"?socks-test-tag={os.getenv('SOCKSTEST', '')}"
+    test_url = test_url.replace('localhost.localdomain', 'cdio')
+    test_url = test_url.replace('localhost', 'cdio')

    res = client.post(
        url_for("form_quick_watch_add"),
@@ -60,4 +78,4 @@ def test_socks5(client, live_server, measure_memory_usage):
    )

    # Should see the proper string
-    assert "+0200:".encode('utf-8') in res.data
+    assert "Awesome, you made it".encode('utf-8') in res.data
--- a/changedetectionio/tests/proxy_socks5/test_socks5_proxy_sources.py
+++ b/changedetectionio/tests/proxy_socks5/test_socks5_proxy_sources.py
@@ -1,16 +1,32 @@
 #!/usr/bin/env python3
 import os
-import time
 from flask import url_for
 from changedetectionio.tests.util import live_server_setup, wait_for_all_checks


+def set_response():
+    import time
+    data = f"""<html>
+       <body>
+     <h1>Awesome, you made it</h1>
+     yeah the socks request worked
+     </body>
+     </html>
+    """
+
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write(data)
+    time.sleep(1)
+
 # should be proxies.json mounted from run_proxy_tests.sh already
 # -v `pwd`/tests/proxy_socks5/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json
 def test_socks5_from_proxiesjson_file(client, live_server, measure_memory_usage):
    live_server_setup(live_server)
-
-    test_url = "https://changedetection.io/CHANGELOG.txt?socks-test-tag=" + os.getenv('SOCKSTEST', '')
+    set_response()
+    # Because the socks server should connect back to us
+    test_url = url_for('test_endpoint', _external=True) + f"?socks-test-tag={os.getenv('SOCKSTEST', '')}"
+    test_url = test_url.replace('localhost.localdomain', 'cdio')
+    test_url = test_url.replace('localhost', 'cdio')

    res = client.get(url_for("settings_page"))
    assert b'name="requests-proxy" type="radio" value="socks5proxy"' in res.data
@@ -49,4 +65,4 @@ def test_socks5_from_proxiesjson_file(client, live_server, measure_memory_usage)
    )

    # Should see the proper string
-    assert "+0200:".encode('utf-8') in res.data
+    assert "Awesome, you made it".encode('utf-8') in res.data
--- a/changedetectionio/tests/restock/test_restock.py
+++ b/changedetectionio/tests/restock/test_restock.py
@@ -2,7 +2,7 @@
 import os
 import time
 from flask import url_for
-from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
+from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, wait_for_notification_endpoint_output
 from changedetectionio.notification import (
    default_notification_body,
    default_notification_format,
@@ -94,7 +94,7 @@ def test_restock_detection(client, live_server, measure_memory_usage):
    assert b'not-in-stock' not in res.data

    # We should have a notification
-    time.sleep(2)
+    wait_for_notification_endpoint_output()
    assert os.path.isfile("test-datastore/notification.txt"), "Notification received"
    os.unlink("test-datastore/notification.txt")

@@ -103,6 +103,7 @@ def test_restock_detection(client, live_server, measure_memory_usage):
    set_original_response()
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)
+    time.sleep(5)
    assert not os.path.isfile("test-datastore/notification.txt"), "No notification should have fired when it went OUT OF STOCK by default"

    # BUT we should see that it correctly shows "not in stock"
--- a/changedetectionio/tests/test_add_replace_remove_filter.py
+++ b/changedetectionio/tests/test_add_replace_remove_filter.py
@@ -2,7 +2,7 @@
 import os.path
 import time
 from flask import url_for
-from .util import live_server_setup, wait_for_all_checks
+from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output
 from changedetectionio import html_tools


@@ -165,7 +165,7 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
    assert b'unviewed' in res.data

    # Takes a moment for apprise to fire
-    time.sleep(3)
+    wait_for_notification_endpoint_output()
    assert os.path.isfile("test-datastore/notification.txt"), "Notification fired because I can see the output file"
    with open("test-datastore/notification.txt", 'rb') as f:
        response = f.read()
--- a/changedetectionio/tests/test_backend.py
+++ b/changedetectionio/tests/test_backend.py
@@ -69,6 +69,12 @@ def test_check_basic_change_detection_functionality(client, live_server, measure

    wait_for_all_checks(client)

+    uuid = extract_UUID_from_client(client)
+
+    # Check the 'get latest snapshot works'
+    res = client.get(url_for("watch_get_latest_html", uuid=uuid))
+    assert b'which has this one new line' in res.data
+
    # Now something should be ready, indicated by having a 'unviewed' class
    res = client.get(url_for("index"))
    assert b'unviewed' in res.data
@@ -86,7 +92,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
    assert expected_url.encode('utf-8') in res.data

    # Following the 'diff' link, it should no longer display as 'unviewed' even after we recheck it a few times
-    res = client.get(url_for("diff_history_page", uuid="first"))
+    res = client.get(url_for("diff_history_page", uuid=uuid))
    assert b'selected=""' in res.data, "Confirm diff history page loaded"

    # Check the [preview] pulls the right one
@@ -143,18 +149,12 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
    assert b'unviewed' not in res.data

    # #2458 "clear history" should make the Watch object update its status correctly when the first snapshot lands again
-    uuid = extract_UUID_from_client(client)
    client.get(url_for("clear_watch_history", uuid=uuid))
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)
    res = client.get(url_for("index"))
    assert b'preview/' in res.data

-
-    # Check the 'get latest snapshot works'
-    res = client.get(url_for("watch_get_latest_html", uuid=uuid))
-    assert b'<head><title>head title</title></head>' in res.data
-
    #
    # Cleanup everything
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
--- a/changedetectionio/tests/test_element_removal.py
+++ b/changedetectionio/tests/test_element_removal.py
@@ -87,6 +87,9 @@ def test_element_removal_output():
     Some initial text<br>
     <p>across multiple lines</p>
     <div id="changetext">Some text that changes</div>
+     <div>Some text should be matched by xPath // selector</div>
+     <div>Some text should be matched by xPath selector</div>
+     <div>Some text should be matched by xPath1 selector</div>
     </body>
    <footer>
    <p>Footer</p>
@@ -94,7 +97,16 @@ def test_element_removal_output():
     </html>
    """
    html_blob = element_removal(
-        ["header", "footer", "nav", "#changetext"], html_content=content
+      [
+        "header",
+        "footer",
+        "nav",
+        "#changetext",
+        "//*[contains(text(), 'xPath // selector')]",
+        "xpath://*[contains(text(), 'xPath selector')]",
+        "xpath1://*[contains(text(), 'xPath1 selector')]"
+      ],
+      html_content=content
    )
    text = get_text(html_blob)
    assert (
--- a/changedetectionio/tests/test_filter_exist_changes.py
+++ b/changedetectionio/tests/test_filter_exist_changes.py
@@ -4,7 +4,7 @@
 import os
 import time
 from flask import url_for
-from .util import set_original_response, live_server_setup
+from .util import set_original_response, live_server_setup, wait_for_notification_endpoint_output
 from changedetectionio.model import App


@@ -102,14 +102,15 @@ def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_se
        follow_redirects=True
    )
    assert b"Updated watch." in res.data
-    time.sleep(3)
+    wait_for_notification_endpoint_output()

    # Shouldn't exist, shouldn't have fired
    assert not os.path.isfile("test-datastore/notification.txt")
    # Now the filter should exist
    set_response_with_filter()
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
-    time.sleep(3)
+
+    wait_for_notification_endpoint_output()

    assert os.path.isfile("test-datastore/notification.txt")

--- a/changedetectionio/tests/test_filter_failure_notification.py
+++ b/changedetectionio/tests/test_filter_failure_notification.py
@@ -1,7 +1,9 @@
 import os
 import time
+from loguru import logger
 from flask import url_for
-from .util import set_original_response, live_server_setup, extract_UUID_from_client, wait_for_all_checks
+from .util import set_original_response, live_server_setup, extract_UUID_from_client, wait_for_all_checks, \
+    wait_for_notification_endpoint_output
 from changedetectionio.model import App


@@ -26,6 +28,12 @@ def run_filter_test(client, live_server, content_filter):
    # Response WITHOUT the filter ID element
    set_original_response()

+    # Goto the edit page, add our ignore text
+    notification_url = url_for('test_notification_endpoint', _external=True).replace('http', 'json')
+
+    # Add our URL to the import page
+    test_url = url_for('test_endpoint', _external=True)
+
    # cleanup for the next
    client.get(
        url_for("form_delete", uuid="all"),
@@ -34,83 +42,90 @@ def run_filter_test(client, live_server, content_filter):
    if os.path.isfile("test-datastore/notification.txt"):
        os.unlink("test-datastore/notification.txt")

-    # Add our URL to the import page
-    test_url = url_for('test_endpoint', _external=True)
    res = client.post(
-        url_for("form_quick_watch_add"),
-        data={"url": test_url, "tags": ''},
+        url_for("import_page"),
+        data={"urls": test_url},
        follow_redirects=True
    )

-    assert b"Watch added" in res.data
-
-    # Give the thread time to pick up the first version
+    assert b"1 Imported" in res.data
    wait_for_all_checks(client)

-    # Goto the edit page, add our ignore text
-    # Add our URL to the import page
-    url = url_for('test_notification_endpoint', _external=True)
-    notification_url = url.replace('http', 'json')
+    uuid = extract_UUID_from_client(client)

-    print(">>>> Notification URL: " + notification_url)
+    assert live_server.app.config['DATASTORE'].data['watching'][uuid]['consecutive_filter_failures'] == 0, "No filter = No filter failure"

-    # Just a regular notification setting, this will be used by the special 'filter not found' notification
-    notification_form_data = {"notification_urls": notification_url,
-                              "notification_title": "New ChangeDetection.io Notification - {{watch_url}}",
-                              "notification_body": "BASE URL: {{base_url}}\n"
-                                                   "Watch URL: {{watch_url}}\n"
-                                                   "Watch UUID: {{watch_uuid}}\n"
-                                                   "Watch title: {{watch_title}}\n"
-                                                   "Watch tag: {{watch_tag}}\n"
-                                                   "Preview: {{preview_url}}\n"
-                                                   "Diff URL: {{diff_url}}\n"
-                                                   "Snapshot: {{current_snapshot}}\n"
-                                                   "Diff: {{diff}}\n"
-                                                   "Diff Full: {{diff_full}}\n"
-                                                   "Diff as Patch: {{diff_patch}}\n"
-                                                   ":-)",
-                              "notification_format": "Text"}
+    watch_data = {"notification_urls": notification_url,
+                  "notification_title": "New ChangeDetection.io Notification - {{watch_url}}",
+                  "notification_body": "BASE URL: {{base_url}}\n"
+                                       "Watch URL: {{watch_url}}\n"
+                                       "Watch UUID: {{watch_uuid}}\n"
+                                       "Watch title: {{watch_title}}\n"
+                                       "Watch tag: {{watch_tag}}\n"
+                                       "Preview: {{preview_url}}\n"
+                                       "Diff URL: {{diff_url}}\n"
+                                       "Snapshot: {{current_snapshot}}\n"
+                                       "Diff: {{diff}}\n"
+                                       "Diff Full: {{diff_full}}\n"
+                                       "Diff as Patch: {{diff_patch}}\n"
+                                       ":-)",
+                  "notification_format": "Text",
+                  "fetch_backend": "html_requests",
+                  "filter_failure_notification_send": 'y',
+                  "headers": "",
+                  "tags": "my tag",
+                  "title": "my title 123",
+                  "time_between_check-hours": 5,  # So that the queue runner doesnt also put it in
+                  "url": test_url,
+                  }

-    notification_form_data.update({
-        "url": test_url,
-        "tags": "my tag",
-        "title": "my title 123",
-        "headers": "",
-        "filter_failure_notification_send": 'y',
-        "include_filters": content_filter,
-        "fetch_backend": "html_requests"})
-
-    # A POST here will also reset the filter failure counter (filter_failure_notification_threshold_attempts)
    res = client.post(
-        url_for("edit_page", uuid="first"),
-        data=notification_form_data,
+        url_for("edit_page", uuid=uuid),
+        data=watch_data,
        follow_redirects=True
    )
-
    assert b"Updated watch." in res.data
    wait_for_all_checks(client)
+    assert live_server.app.config['DATASTORE'].data['watching'][uuid]['consecutive_filter_failures'] == 0, "No filter = No filter failure"

-    # Now the notification should not exist, because we didnt reach the threshold
+    # Now add a filter, because recheck hours == 5, ONLY pressing of the [edit] or [recheck all] should trigger
+    watch_data['include_filters'] = content_filter
+    res = client.post(
+        url_for("edit_page", uuid=uuid),
+        data=watch_data,
+        follow_redirects=True
+    )
+    assert b"Updated watch." in res.data
+
+    # It should have checked once so far and given this error (because we hit SAVE)
+
+    wait_for_all_checks(client)
    assert not os.path.isfile("test-datastore/notification.txt")

+    # Hitting [save] would have triggered a recheck, and we have a filter, so this would be ONE failure
+    assert live_server.app.config['DATASTORE'].data['watching'][uuid]['consecutive_filter_failures'] == 1, "Should have been checked once"
+
    # recheck it up to just before the threshold, including the fact that in the previous POST it would have rechecked (and incremented)
-    for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT-2):
+    # Add 4 more checks
+    checked = 0
+    ATTEMPT_THRESHOLD_SETTING = live_server.app.config['DATASTORE'].data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0)
+    for i in range(0, ATTEMPT_THRESHOLD_SETTING - 2):
+        checked += 1
        client.get(url_for("form_watch_checknow"), follow_redirects=True)
        wait_for_all_checks(client)
-        time.sleep(2) # delay for apprise to fire
-        assert not os.path.isfile("test-datastore/notification.txt"), f"test-datastore/notification.txt should not exist - Attempt {i} when threshold is {App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT}"
+        res = client.get(url_for("index"))
+        assert b'Warning, no filters were found' in res.data
+        assert not os.path.isfile("test-datastore/notification.txt")

-    # We should see something in the frontend
-    res = client.get(url_for("index"))
-    assert b'Warning, no filters were found' in res.data
+    assert live_server.app.config['DATASTORE'].data['watching'][uuid]['consecutive_filter_failures'] == 5

    # One more check should trigger the _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT threshold
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)
-    time.sleep(2)  # delay for apprise to fire
+    wait_for_notification_endpoint_output()
+
    # Now it should exist and contain our "filter not found" alert
    assert os.path.isfile("test-datastore/notification.txt")
-
    with open("test-datastore/notification.txt", 'r') as f:
        notification = f.read()

@@ -123,10 +138,11 @@ def run_filter_test(client, live_server, content_filter):
    set_response_with_filter()

    # Try several times, it should NOT have 'filter not found'
-    for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT):
+    for i in range(0, ATTEMPT_THRESHOLD_SETTING + 2):
        client.get(url_for("form_watch_checknow"), follow_redirects=True)
        wait_for_all_checks(client)

+    wait_for_notification_endpoint_output()
    # It should have sent a notification, but..
    assert os.path.isfile("test-datastore/notification.txt")
    # but it should not contain the info about a failed filter (because there was none in this case)
@@ -135,9 +151,6 @@ def run_filter_test(client, live_server, content_filter):
    assert not 'CSS/xPath filter was not present in the page' in notification

    # Re #1247 - All tokens got replaced correctly in the notification
-    res = client.get(url_for("index"))
-    uuid = extract_UUID_from_client(client)
-    # UUID is correct, but notification contains tag uuid as UUIID wtf
    assert uuid in notification

    # cleanup for the next
@@ -152,9 +165,11 @@ def test_setup(live_server):
    live_server_setup(live_server)

 def test_check_include_filters_failure_notification(client, live_server, measure_memory_usage):
+#    live_server_setup(live_server)
    run_filter_test(client, live_server,'#nope-doesnt-exist')

 def test_check_xpath_filter_failure_notification(client, live_server, measure_memory_usage):
+#    live_server_setup(live_server)
    run_filter_test(client, live_server, '//*[@id="nope-doesnt-exist"]')

 # Test that notification is never sent
--- a/changedetectionio/tests/test_nonrenderable_pages.py
+++ b/changedetectionio/tests/test_nonrenderable_pages.py
@@ -1,11 +1,8 @@
 #!/usr/bin/env python3

-import time
 from flask import url_for
-from urllib.request import urlopen
-from .util import set_original_response, set_modified_response, live_server_setup
-
-sleep_time_for_fetch_thread = 3
+from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
+import time


 def set_nonrenderable_response():
@@ -16,12 +13,18 @@ def set_nonrenderable_response():
     </body>
     </html>
    """
-
    with open("test-datastore/endpoint-content.txt", "w") as f:
        f.write(test_return_data)
+    time.sleep(1)

    return None

+def set_zero_byte_response():
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write("")
+    time.sleep(1)
+    return None
+
 def test_check_basic_change_detection_functionality(client, live_server, measure_memory_usage):
    set_original_response()
    live_server_setup(live_server)
@@ -35,18 +38,11 @@ def test_check_basic_change_detection_functionality(client, live_server, measure

    assert b"1 Imported" in res.data

-    time.sleep(sleep_time_for_fetch_thread)
+    wait_for_all_checks(client)

-    # Do this a few times.. ensures we dont accidently set the status
-    for n in range(3):
-        client.get(url_for("form_watch_checknow"), follow_redirects=True)
-
-        # Give the thread time to pick it up
-        time.sleep(sleep_time_for_fetch_thread)
-
-        # It should report nothing found (no new 'unviewed' class)
-        res = client.get(url_for("index"))
-        assert b'unviewed' not in res.data
+    # It should report nothing found (no new 'unviewed' class)
+    res = client.get(url_for("index"))
+    assert b'unviewed' not in res.data


    #####################
@@ -64,7 +60,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
    client.get(url_for("form_watch_checknow"), follow_redirects=True)

    # Give the thread time to pick it up
-    time.sleep(sleep_time_for_fetch_thread)
+    wait_for_all_checks(client)

    # It should report nothing found (no new 'unviewed' class)
    res = client.get(url_for("index"))
@@ -86,14 +82,20 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
    client.get(url_for("form_watch_checknow"), follow_redirects=True)

    # Give the thread time to pick it up
-    time.sleep(sleep_time_for_fetch_thread)
+    wait_for_all_checks(client)

    # It should report nothing found (no new 'unviewed' class)
    res = client.get(url_for("index"))
    assert b'unviewed' in res.data
+    client.get(url_for("mark_all_viewed"), follow_redirects=True)

-
-
+    # A totally zero byte (#2528) response should also not trigger an error
+    set_zero_byte_response()
+    client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    wait_for_all_checks(client)
+    res = client.get(url_for("index"))
+    assert b'unviewed' in res.data # A change should have registered because empty_pages_are_a_change is ON
+    assert b'fetch-error' not in res.data

    #
    # Cleanup everything
--- a/changedetectionio/tests/test_restock_itemprop.py
+++ b/changedetectionio/tests/test_restock_itemprop.py
@@ -3,7 +3,7 @@ import os
 import time

 from flask import url_for
-from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
+from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, wait_for_notification_endpoint_output
 from ..notification import default_notification_format

 instock_props = [
@@ -146,14 +146,13 @@ def _run_test_minmax_limit(client, extra_watch_edit_form):
        data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
        follow_redirects=True
    )
-
-    # A change in price, should trigger a change by default
    wait_for_all_checks(client)

    data = {
        "tags": "",
        "url": test_url,
        "headers": "",
+        "time_between_check-hours": 5,
        'fetch_backend': "html_requests"
    }
    data.update(extra_watch_edit_form)
@@ -178,11 +177,9 @@ def _run_test_minmax_limit(client, extra_watch_edit_form):
    assert b'1,000.45' or b'1000.45' in res.data #depending on locale
    assert b'unviewed' not in res.data

-
    # price changed to something LESS than min (900), SHOULD be a change
    set_original_response(props_markup=instock_props[0], price='890.45')
-    # let previous runs wait
-    time.sleep(1)
+
    res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
    assert b'1 watches queued for rechecking.' in res.data
    wait_for_all_checks(client)
@@ -197,7 +194,8 @@ def _run_test_minmax_limit(client, extra_watch_edit_form):
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)
    res = client.get(url_for("index"))
-    assert b'1,890.45' or b'1890.45' in res.data
+    # Depending on the LOCALE it may be either of these (generally for US/default/etc)
+    assert b'1,890.45' in res.data or b'1890.45' in res.data
    assert b'unviewed' in res.data

    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
@@ -362,7 +360,7 @@ def test_change_with_notification_values(client, live_server):
    set_original_response(props_markup=instock_props[0], price='1950.45')
    client.get(url_for("form_watch_checknow"))
    wait_for_all_checks(client)
-    time.sleep(3)
+    wait_for_notification_endpoint_output()
    assert os.path.isfile("test-datastore/notification.txt"), "Notification received"
    with open("test-datastore/notification.txt", 'r') as f:
        notification = f.read()
--- a/changedetectionio/tests/test_unique_lines.py
+++ b/changedetectionio/tests/test_unique_lines.py
@@ -11,6 +11,8 @@ def set_original_ignore_response():
     <p>Some initial text</p>
     <p>Which is across multiple lines</p>
     <p>So let's see what happens.</p>
+     <p>&nbsp;  So let's see what happens.   <br> </p>
+     <p>A - sortable line</p> 
     </body>
     </html>
    """
@@ -164,5 +166,52 @@ def test_sort_lines_functionality(client, live_server, measure_memory_usage):
    assert res.data.find(b'A uppercase') < res.data.find(b'Z last')
    assert res.data.find(b'Some initial text') < res.data.find(b'Which is across multiple lines')
    
+    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
+    assert b'Deleted' in res.data
+
+
+def test_extra_filters(client, live_server, measure_memory_usage):
+    #live_server_setup(live_server)
+
+    set_original_ignore_response()
+
+    # Add our URL to the import page
+    test_url = url_for('test_endpoint', _external=True)
+    res = client.post(
+        url_for("import_page"),
+        data={"urls": test_url},
+        follow_redirects=True
+    )
+    assert b"1 Imported" in res.data
+    wait_for_all_checks(client)
+
+    # Add our URL to the import page
+    res = client.post(
+        url_for("edit_page", uuid="first"),
+        data={"remove_duplicate_lines": "y",
+              "trim_text_whitespace": "y",
+              "sort_text_alphabetically": "",  # leave this OFF for testing
+              "url": test_url,
+              "fetch_backend": "html_requests"},
+        follow_redirects=True
+    )
+    assert b"Updated watch." in res.data
+    # Give the thread time to pick it up
+    wait_for_all_checks(client)
+    # Trigger a check
+    client.get(url_for("form_watch_checknow"), follow_redirects=True)
+
+    # Give the thread time to pick it up
+    wait_for_all_checks(client)
+
+    res = client.get(
+        url_for("preview_page", uuid="first")
+    )
+
+    assert res.data.count(b"see what happens.") == 1
+
+    # still should remain unsorted ('A - sortable line') stays at the end
+    assert res.data.find(b'A - sortable line') > res.data.find(b'Which is across multiple lines')
+
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
    assert b'Deleted' in res.data
--- a/changedetectionio/tests/util.py
+++ b/changedetectionio/tests/util.py
@@ -76,6 +76,17 @@ def set_more_modified_response():
    return None


+def wait_for_notification_endpoint_output():
+    '''Apprise can take a few seconds to fire'''
+    from os.path import isfile
+    for i in range(1, 20):
+        time.sleep(1)
+        if isfile("test-datastore/notification.txt"):
+            return True
+
+    return False
+
+
 # kinda funky, but works for now
 def extract_api_key_from_UI(client):
    import re
--- a/changedetectionio/update_worker.py
+++ b/changedetectionio/update_worker.py
@@ -1,6 +1,5 @@
 from .processors.exceptions import ProcessorException
-from . import content_fetchers
-
+import changedetectionio.content_fetchers.exceptions as content_fetchers_exceptions
 from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
 from changedetectionio import html_tools

@@ -190,7 +189,9 @@ class update_worker(threading.Thread):
                'screenshot': None
            })
            self.notification_q.put(n_object)
-            logger.error(f"Sent filter not found notification for {watch_uuid}")
+            logger.debug(f"Sent filter not found notification for {watch_uuid}")
+        else:
+            logger.debug(f"NOT sending filter not found notification for {watch_uuid} - no notification URLs")

    def send_step_failure_notification(self, watch_uuid, step_n):
        watch = self.datastore.data['watching'].get(watch_uuid, False)
@@ -301,7 +302,7 @@ class update_worker(threading.Thread):
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': e.message})
                        process_changedetection_results = False

-                    except content_fetchers.exceptions.ReplyWithContentButNoText as e:
+                    except content_fetchers_exceptions.ReplyWithContentButNoText as e:
                        # Totally fine, it's by choice - just continue on, nothing more to care about
                        # Page had elements/content but no renderable text
                        # Backend (not filters) gave zero output
@@ -327,7 +328,7 @@ class update_worker(threading.Thread):
                            
                        process_changedetection_results = False

-                    except content_fetchers.exceptions.Non200ErrorCodeReceived as e:
+                    except content_fetchers_exceptions.Non200ErrorCodeReceived as e:
                        if e.status_code == 403:
                            err_text = "Error - 403 (Access denied) received"
                        elif e.status_code == 404:
@@ -365,38 +366,42 @@ class update_worker(threading.Thread):

                        # Only when enabled, send the notification
                        if watch.get('filter_failure_notification_send', False):
-                            c = watch.get('consecutive_filter_failures', 5)
+                            c = watch.get('consecutive_filter_failures', 0)
                            c += 1
                            # Send notification if we reached the threshold?
-                            threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
-                                                                                           0)
-                            logger.warning(f"Filter for {uuid} not found, consecutive_filter_failures: {c}")
-                            if threshold > 0 and c >= threshold:
+                            threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0)
+                            logger.debug(f"Filter for {uuid} not found, consecutive_filter_failures: {c} of threshold {threshold}")
+                            if c >= threshold:
                                if not watch.get('notification_muted'):
+                                    logger.debug(f"Sending filter failed notification for {uuid}")
                                    self.send_filter_failure_notification(uuid)
                                c = 0
+                                logger.debug(f"Reset filter failure count back to zero")

                            self.datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c})
+                        else:
+                            logger.trace(f"{uuid} - filter_failure_notification_send not enabled, skipping")
+

                        process_changedetection_results = False

-                    except content_fetchers.exceptions.checksumFromPreviousCheckWasTheSame as e:
+                    except content_fetchers_exceptions.checksumFromPreviousCheckWasTheSame as e:
                        # Yes fine, so nothing todo, don't continue to process.
                        process_changedetection_results = False
                        changed_detected = False
-                    except content_fetchers.exceptions.BrowserConnectError as e:
+                    except content_fetchers_exceptions.BrowserConnectError as e:
                        self.datastore.update_watch(uuid=uuid,
                                                    update_obj={'last_error': e.msg
                                                                }
                                                    )
                        process_changedetection_results = False
-                    except content_fetchers.exceptions.BrowserFetchTimedOut as e:
+                    except content_fetchers_exceptions.BrowserFetchTimedOut as e:
                        self.datastore.update_watch(uuid=uuid,
                                                    update_obj={'last_error': e.msg
                                                                }
                                                    )
                        process_changedetection_results = False
-                    except content_fetchers.exceptions.BrowserStepsStepException as e:
+                    except content_fetchers_exceptions.BrowserStepsStepException as e:

                        if not self.datastore.data['watching'].get(uuid):
                            continue
@@ -423,7 +428,7 @@ class update_worker(threading.Thread):
                                                    )

                        if watch.get('filter_failure_notification_send', False):
-                            c = watch.get('consecutive_filter_failures', 5)
+                            c = watch.get('consecutive_filter_failures', 0)
                            c += 1
                            # Send notification if we reached the threshold?
                            threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
@@ -438,25 +443,25 @@ class update_worker(threading.Thread):

                        process_changedetection_results = False

-                    except content_fetchers.exceptions.EmptyReply as e:
+                    except content_fetchers_exceptions.EmptyReply as e:
                        # Some kind of custom to-str handler in the exception handler that does this?
                        err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code)
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
                                                                           'last_check_status': e.status_code})
                        process_changedetection_results = False
-                    except content_fetchers.exceptions.ScreenshotUnavailable as e:
+                    except content_fetchers_exceptions.ScreenshotUnavailable as e:
                        err_text = "Screenshot unavailable, page did not render fully in the expected time or page was too long - try increasing 'Wait seconds before extracting text'"
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
                                                                           'last_check_status': e.status_code})
                        process_changedetection_results = False
-                    except content_fetchers.exceptions.JSActionExceptions as e:
+                    except content_fetchers_exceptions.JSActionExceptions as e:
                        err_text = "Error running JS Actions - Page request - "+e.message
                        if e.screenshot:
                            watch.save_screenshot(screenshot=e.screenshot, as_error=True)
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
                                                                           'last_check_status': e.status_code})
                        process_changedetection_results = False
-                    except content_fetchers.exceptions.PageUnloadable as e:
+                    except content_fetchers_exceptions.PageUnloadable as e:
                        err_text = "Page request from server didnt respond correctly"
                        if e.message:
                            err_text = "{} - {}".format(err_text, e.message)
@@ -468,7 +473,7 @@ class update_worker(threading.Thread):
                                                                           'last_check_status': e.status_code,
                                                                           'has_ldjson_price_data': None})
                        process_changedetection_results = False
-                    except content_fetchers.exceptions.BrowserStepsInUnsupportedFetcher as e:
+                    except content_fetchers_exceptions.BrowserStepsInUnsupportedFetcher as e:
                        err_text = "This watch has Browser Steps configured and so it cannot run with the 'Basic fast Plaintext/HTTP Client', either remove the Browser Steps or select a Chrome fetcher."
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
                        process_changedetection_results = False
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -18,7 +18,7 @@ services:
  #
  #        Log levels are in descending order. (TRACE is the most detailed one)
  #        Log output levels: TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL
-  #      - LOGGER_LEVEL=DEBUG
+  #      - LOGGER_LEVEL=TRACE
  #
  #       Alternative WebDriver/selenium URL, do not use "'s or 's!
  #      - WEBDRIVER_URL=http://browser-chrome:4444/wd/hub
@@ -29,8 +29,9 @@ services:
  #
  #             https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy
  #
-  #       Alternative Playwright URL, do not use "'s or 's!
-  #      - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000
+  #       Alternative target "Chrome" Playwright URL, do not use "'s or 's!
+  #       "Playwright" is a driver/librarythat allows changedetection to talk to a Chrome or similar browser.
+  #      - PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000
  #
  #       Playwright proxy settings playwright_proxy_server, playwright_proxy_bypass, playwright_proxy_username, playwright_proxy_password
  #
@@ -73,10 +74,10 @@ services:
 #              condition: service_started


-     # Used for fetching pages via Playwright+Chrome where you need Javascript support.
+     # Sockpuppetbrowser is basically chrome wrapped in an API for allowing fast fetching of web-pages.
     # RECOMMENDED FOR FETCHING PAGES WITH CHROME
-#    playwright-chrome:
-#        hostname: playwright-chrome
+#    sockpuppetbrowser:
+#        hostname: sockpuppetbrowser
 #        image: dgtlmoon/sockpuppetbrowser:latest
 #        cap_add:
 #            - SYS_ADMIN
--- a/requirements.txt
+++ b/requirements.txt
@@ -35,7 +35,7 @@ dnspython==2.6.1 # related to eventlet fixes
 # jq not available on Windows so must be installed manually

 # Notification library
-apprise~=1.8.1
+apprise==1.9.0

 # apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315
 # and 2.0.0 https://github.com/dgtlmoon/changedetection.io/issues/2241 not yet compatible
@@ -79,8 +79,9 @@ pyppeteerstealth>=0.0.4
 pytest ~=7.2
 pytest-flask ~=1.2

-# Pin jsonschema version to prevent build errors on armv6 while rpds-py wheels aren't available (1708)
-jsonschema==4.17.3
+# Anything 4.0 and up but not 5.0
+jsonschema ~= 4.0
+

 loguru
Author	SHA1	Message	Date
dgtlmoon	a8b6c3133f	revert	2024-09-27 19:47:06 +02:00
dgtlmoon	9ea68087d8	finally	2024-09-27 19:44:28 +02:00
dgtlmoon	40d23aa2fa	try different import	2024-09-27 14:12:18 +02:00
dgtlmoon	18e1655844	WIP	2024-09-26 09:43:10 +02:00
dgtlmoon	a8de06e2e5	woops	2024-09-25 14:58:53 +02:00
dgtlmoon	fd25fe8a5f	Use local data instead of reaching out to changedetection when testing	2024-09-25 14:32:25 +02:00
dgtlmoon	e173954cdd	Restock monitor - Only try to process restock information (like scraping for "out of stock" keywords) if the page was actually rendered correctly. (#2645 )	2024-09-20 09:19:57 +02:00
dgtlmoon	e830fb2320	Text filters - Adding filters "Trim whitespace" and "Remove duplicate lines"	2024-09-18 15:45:44 +02:00
dgtlmoon	c6589ee1b4	Browser Steps - UI - Use a better flexbox layout	2024-09-18 11:26:10 +02:00
Michael McMillan	dc936a2e8a	Filters - Add support for also removing HTML elements using XPath selectors (#2632 )	2024-09-17 22:43:04 +02:00
dgtlmoon	8c1527c1ad	Update AppRise notification library to 1.9.0 (#2624 )	2024-09-17 19:06:17 +02:00
Dawid Wróbel	a5ff1cd1d7	browser_steps: add "click element containing text if exists" (#2629 )	2024-09-17 18:30:54 +02:00
dgtlmoon	543cb205d2	Testing - Fixing Restock test #2641	2024-09-17 18:29:12 +02:00
dgtlmoon	273adfa0a4	Testing - Fix false filter missing check alerts	2024-09-17 16:55:04 +02:00
Felipe Tuffani	8ecfd17973	Restock/Price detection - Fix duplicated prices with different data type on single page product #2636 (#2638 )	2024-09-17 11:22:54 +02:00
dgtlmoon	19f3851c9d	Memory management improvements - LXML and other libraries can leak allocation, wrap in a sub-process (#2626 )	2024-09-11 16:20:49 +02:00
dgtlmoon	7f2fa20318	Small memory allocation fixes (#2625 )	2024-09-11 14:51:32 +02:00
dgtlmoon	e16814e40b	Testing - locale fix for test (#2623 )	2024-09-11 11:31:07 +02:00
dgtlmoon	337fcab3f1	Testing/Code - Improving test reliability (#2617 )	2024-09-09 16:50:00 +02:00
dgtlmoon	eaccd6026c	UI - Hiding noisy info under 'show advanced help' button (#2609 )	2024-09-06 14:33:06 +02:00
dgtlmoon	5b70625eaa	0.46.04	2024-09-04 13:55:18 +02:00
dgtlmoon	60d292107d	Fixing restock monitor tests and tweaking docker default config example,	2024-09-02 15:11:31 +02:00
dgtlmoon	1cb38347da	Container name should be 'sockpuppetbrowser' because its not just playwright that uses it	2024-09-02 13:21:38 +02:00
dgtlmoon	55fe2abf42	Restock/Price detection - Better catching of errors when parsing metadata documents for restock/price check (#2602 )	2024-09-01 13:07:06 +02:00
dgtlmoon	4225900ec3	Restock - updating texts and text offsets	2024-09-01 12:47:21 +02:00
dgtlmoon	1fb4342488	Build - Unpin jsonschema for faster builds (#2583 )	2024-08-22 15:02:00 +02:00
dgtlmoon	7071df061a	Price detection/scraping - Adding extra element training data (#2582 )	2024-08-22 15:01:36 +02:00
dgtlmoon	6dd1fa2b88	0.46.03	2024-08-19 17:22:13 +02:00
dgtlmoon	371f85d544	Watch 'Download last snapshot' link/button should give last, not first snapshot (#2576 )	2024-08-19 17:20:30 +02:00
dgtlmoon	932cf15e1e	Price and restock scraping - small price fix scraper (#2575 )	2024-08-19 15:47:19 +02:00
Mike Splain	bf0d410d32	Browser Steps UI - Interactive UI wasn't sending headers but was when the check ran (#2551 )	2024-08-19 10:21:05 +02:00
dgtlmoon	730f37c7ba	Set encoding type for scraper script reader (#2574 #2568 )	2024-08-19 09:17:18 +02:00
dgtlmoon	8a35d62e02	Handle zero-byte/empty content responses with "`[ ] Empty pages are a change`" option, the same as when the HTML doesnt render any useful text (#2530 )	2024-07-29 13:27:59 +02:00