fix test

update texty
add helper text
2026-04-29 06:17:11 +00:00 · 2023-10-03 17:22:30 +02:00 · 2023-10-03 17:17:36 +02:00 · 2023-10-03 17:16:14 +02:00 · 2023-10-03 17:12:31 +02:00 · 2023-10-03 11:22:29 +02:00
30 changed files with 305 additions and 225 deletions
@@ -30,11 +30,11 @@ jobs:

    steps:
    - name: Checkout repository
-      uses: actions/checkout@v2
+      uses: actions/checkout@v4

    # Initializes the CodeQL tools for scanning.
    - name: Initialize CodeQL
-      uses: github/codeql-action/init@v1
+      uses: github/codeql-action/init@v2
      with:
        languages: ${{ matrix.language }}
        # If you wish to specify custom queries, you can do so here or in a config file.
@@ -45,7 +45,7 @@ jobs:
    # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
    # If this step fails, then you should remove it and run the build manually (see below)
    - name: Autobuild
-      uses: github/codeql-action/autobuild@v1
+      uses: github/codeql-action/autobuild@v2

    # ℹ️ Command-line programs to run using the OS shell.
    # 📚 https://git.io/JvXDl
@@ -59,4 +59,4 @@ jobs:
    #   make release

    - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@v1
+      uses: github/codeql-action/analyze@v2
@@ -39,9 +39,9 @@ jobs:
    # Or if we are in a tagged release scenario.
    if: ${{ github.event.workflow_run.conclusion == 'success' }} || ${{ github.event.release.tag_name }} != ''
    steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
      - name: Set up Python 3.9
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v4
        with:
          python-version: 3.9

@@ -58,27 +58,27 @@ jobs:
          echo ${{ github.ref }} > changedetectionio/tag.txt

      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v1
+        uses: docker/setup-qemu-action@v3
        with:
          image: tonistiigi/binfmt:latest
          platforms: all

      - name: Login to GitHub Container Registry
-        uses: docker/login-action@v1
+        uses: docker/login-action@v3
        with:
          registry: ghcr.io
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}

      - name: Login to Docker Hub Container Registry
-        uses: docker/login-action@v1
+        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKER_HUB_USERNAME }}
          password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}

      - name: Set up Docker Buildx
        id: buildx
-        uses: docker/setup-buildx-action@v1
+        uses: docker/setup-buildx-action@v3
        with:
          install: true
          version: latest
@@ -88,7 +88,7 @@ jobs:
      - name: Build and push :dev
        id: docker_build
        if: ${{ github.ref }} == "refs/heads/master"
-        uses: docker/build-push-action@v2
+        uses: docker/build-push-action@v5
        with:
          context: ./
          file: ./Dockerfile
@@ -105,7 +105,7 @@ jobs:
      - name: Build and push :tag
        id: docker_build_tag_release
        if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
-        uses: docker/build-push-action@v2
+        uses: docker/build-push-action@v5
        with:
          context: ./
          file: ./Dockerfile
@@ -125,7 +125,7 @@ jobs:
        run: echo step SHA ${{ steps.vars.outputs.sha_short }} tag ${{steps.vars.outputs.tag}} branch ${{steps.vars.outputs.branch}} digest ${{ steps.docker_build.outputs.digest }}

      - name: Cache Docker layers
-        uses: actions/cache@v2
+        uses: actions/cache@v3
        with:
          path: /tmp/.buildx-cache
          key: ${{ runner.os }}-buildx-${{ github.sha }}
@@ -24,22 +24,22 @@ jobs:
  test-container-build:
    runs-on: ubuntu-latest
    steps:
-        - uses: actions/checkout@v2
+        - uses: actions/checkout@v4
        - name: Set up Python 3.9
-          uses: actions/setup-python@v2
+          uses: actions/setup-python@v4
          with:
            python-version: 3.9

        # Just test that the build works, some libraries won't compile on ARM/rPi etc
        - name: Set up QEMU
-          uses: docker/setup-qemu-action@v1
+          uses: docker/setup-qemu-action@v3
          with:
            image: tonistiigi/binfmt:latest
            platforms: all

        - name: Set up Docker Buildx
          id: buildx
-          uses: docker/setup-buildx-action@v1
+          uses: docker/setup-buildx-action@v3
          with:
            install: true
            version: latest
@@ -49,7 +49,7 @@ jobs:
        # Check we can still build under alpine/musl
        - name: Test that the docker containers can build (musl via alpine check)
          id: docker_build_musl
-          uses: docker/build-push-action@v2
+          uses: docker/build-push-action@v5
          with:
            context: ./
            file: ./.github/test/Dockerfile-alpine
@@ -57,7 +57,7 @@ jobs:

        - name: Test that the docker containers can build
          id: docker_build
-          uses: docker/build-push-action@v2
+          uses: docker/build-push-action@v5
          # https://github.com/docker/build-push-action#customizing
          with:
            context: ./
@@ -7,11 +7,11 @@ jobs:
  test-application:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4

      # Mainly just for link/flake8
      - name: Set up Python 3.10
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v4
        with:
          python-version: '3.10'

@@ -11,10 +11,10 @@ jobs:
  test-pip-build-basics:
    runs-on: ubuntu-latest
    steps:
-        - uses: actions/checkout@v2
+        - uses: actions/checkout@v4

        - name: Set up Python 3.9
-          uses: actions/setup-python@v2
+          uses: actions/setup-python@v4
          with:
            python-version: 3.9

@@ -38,7 +38,9 @@ from flask_paginate import Pagination, get_page_parameter
 from changedetectionio import html_tools
 from changedetectionio.api import api_v1

-__version__ = '0.45.1'
+__version__ = '0.45.2'
+
+from changedetectionio.store import BASE_URL_NOT_SET_TEXT

 datastore = None

@@ -356,12 +358,10 @@ def changedetection_app(config=None, datastore_o=None):
                # Include a link to the diff page, they will have to login here to see if password protection is enabled.
                # Description is the page you watch, link takes you to the diff JS UI page
                # Dict val base_url will get overriden with the env var if it is set.
-                ext_base_url = datastore.data['settings']['application'].get('base_url')
-                if ext_base_url:
-                    # Go with overriden value
-                    diff_link = {'href': "{}{}".format(ext_base_url, url_for('diff_history_page', uuid=watch['uuid'], _external=False))}
-                else:
-                    diff_link = {'href': url_for('diff_history_page', uuid=watch['uuid'], _external=True)}
+                ext_base_url = datastore.data['settings']['application'].get('active_base_url')
+
+                # Because we are called via whatever web server, flask should figure out the right path (
+                diff_link = {'href': url_for('diff_history_page', uuid=watch['uuid'], _external=True)}

                fe.link(link=diff_link)

@@ -714,7 +714,6 @@ def changedetection_app(config=None, datastore_o=None):
            output = render_template("edit.html",
                                     available_processors=processors.available_processors(),
                                     browser_steps_config=browser_step_ui_config,
-                                     current_base_url=datastore.data['settings']['application']['base_url'],
                                     emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
                                     form=form,
                                     has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
@@ -804,7 +803,6 @@ def changedetection_app(config=None, datastore_o=None):

        output = render_template("settings.html",
                                 form=form,
-                                 current_base_url = datastore.data['settings']['application']['base_url'],
                                 hide_remove_pass=os.getenv("SALTED_PASS", False),
                                 api_key=datastore.data['settings']['application'].get('api_access_token'),
                                 emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
@@ -57,9 +57,11 @@ def construct_blueprint(datastore: ChangeDetectionStore):
                status.update({'status': 'ERROR OTHER', 'length': len(contents), 'text': f"Got empty reply with code {e.status_code} - Access denied"})
            else:
                status.update({'status': 'ERROR OTHER', 'length': len(contents) if contents else 0, 'text': f"Empty reply with code {e.status_code}, needs chrome?"})
-
+        except content_fetcher.ReplyWithContentButNoText as e:
+            txt = f"Got reply but with no content - Status code {e.status_code} - It's possible that the filters were found, but contained no usable text (or contained only an image)."
+            status.update({'status': 'ERROR', 'text': txt})
        except Exception as e:
-            status.update({'status': 'ERROR OTHER', 'length': len(contents) if contents else 0, 'text': 'Error: '+str(e)})
+            status.update({'status': 'ERROR OTHER', 'length': len(contents) if contents else 0, 'text': 'Error: '+type(e).__name__+str(e)})
        else:
            status.update({'status': 'OK', 'length': len(contents), 'text': ''})

@@ -77,11 +77,13 @@ class ScreenshotUnavailable(Exception):


 class ReplyWithContentButNoText(Exception):
-    def __init__(self, status_code, url, screenshot=None):
+    def __init__(self, status_code, url, screenshot=None, has_filters=False, html_content=''):
        # Set this so we can use it in other parts of the app
        self.status_code = status_code
        self.url = url
        self.screenshot = screenshot
+        self.has_filters = has_filters
+        self.html_content = html_content
        return


@@ -343,8 +345,8 @@ class base_html_playwright(Fetcher):
                        'req_headers': request_headers,
                        'screenshot_quality': int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)),
                        'url': url,
-                        'user_agent': request_headers.get('User-Agent', 'Mozilla/5.0'),
-                        'proxy_username': self.proxy.get('username','') if self.proxy else False,
+                        'user_agent': {k.lower(): v for k, v in request_headers.items()}.get('user-agent', None),
+                        'proxy_username': self.proxy.get('username', '') if self.proxy else False,
                        'proxy_password': self.proxy.get('password', '') if self.proxy else False,
                        'no_cache_list': [
                            'twitter',
@@ -443,7 +445,7 @@ class base_html_playwright(Fetcher):
            # Set user agent to prevent Cloudflare from blocking the browser
            # Use the default one configured in the App.py model that's passed from fetch_site_status.py
            context = browser.new_context(
-                user_agent=request_headers.get('User-Agent', 'Mozilla/5.0'),
+                user_agent={k.lower(): v for k, v in request_headers.items()}.get('user-agent', None),
                proxy=self.proxy,
                # This is needed to enable JavaScript execution on GitHub and others
                bypass_csp=True,
@@ -684,7 +686,7 @@ class html_requests(Fetcher):
            is_binary=False):

        # Make requests use a more modern looking user-agent
-        if not 'User-Agent' in request_headers:
+        if not {k.lower(): v for k, v in request_headers.items()}.get('user-agent', None):
            request_headers['User-Agent'] = os.getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT",
                                                      'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36')

@@ -229,16 +229,19 @@ class ValidateJinja2Template(object):
    def __call__(self, form, field):
        from changedetectionio import notification

-        from jinja2 import Environment, BaseLoader, TemplateSyntaxError
+        from jinja2 import Environment, BaseLoader, TemplateSyntaxError, UndefinedError
        from jinja2.meta import find_undeclared_variables


        try:
            jinja2_env = Environment(loader=BaseLoader)
            jinja2_env.globals.update(notification.valid_tokens)
+
            rendered = jinja2_env.from_string(field.data).render()
        except TemplateSyntaxError as e:
            raise ValidationError(f"This is not a valid Jinja2 template: {e}") from e
+        except UndefinedError as e:
+            raise ValidationError(f"A variable or function is not defined: {e}") from e

        ast = jinja2_env.parse(field.data)
        undefined = ", ".join(find_undeclared_variables(ast))
@@ -502,7 +505,10 @@ class globalSettingsRequestForm(Form):
 class globalSettingsApplicationForm(commonSettingsForm):

    api_access_token_enabled = BooleanField('API access token security check enabled', default=True, validators=[validators.Optional()])
-    base_url = StringField('Base URL', validators=[validators.Optional()])
+    base_url = StringField('Notification base URL override',
+                           validators=[validators.Optional()],
+                           render_kw={"placeholder": os.getenv('BASE_URL', 'Not set')}
+                           )
    empty_pages_are_a_change =  BooleanField('Treat empty pages as a change?', default=False)
    fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetcher.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
    global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
@@ -10,6 +10,7 @@ import re
 # HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
 TEXT_FILTER_LIST_LINE_SUFFIX = "<br>"

+PERL_STYLE_REGEX = r'^/(.*?)/([a-z]*)?$'
 # 'price' , 'lowPrice', 'highPrice' are usually under here
 # all of those may or may not appear on different websites
 LD_JSON_PRODUCT_OFFER_SELECTOR = "json:$..offers"
@@ -17,7 +18,23 @@ LD_JSON_PRODUCT_OFFER_SELECTOR = "json:$..offers"
 class JSONNotFound(ValueError):
    def __init__(self, msg):
        ValueError.__init__(self, msg)
-        
+
+
+# Doesn't look like python supports forward slash auto enclosure in re.findall
+# So convert it to inline flag "(?i)foobar" type configuration
+def perl_style_slash_enclosed_regex_to_options(regex):
+
+    res = re.search(PERL_STYLE_REGEX, regex, re.IGNORECASE)
+
+    if res:
+        flags = res.group(2) if res.group(2) else 'i'
+        regex = f"(?{flags}){res.group(1)}"
+    else:
+        # Fall back to just ignorecase as an option
+        regex = f"(?i){regex}"
+
+    return regex
+
 # Given a CSS Rule, and a blob of HTML, return the blob of HTML that matches
 def include_filters(include_filters, html_content, append_pretty_line_formatting=False):
    soup = BeautifulSoup(html_content, "html.parser")
@@ -195,23 +212,14 @@ def strip_ignore_text(content, wordlist, mode="content"):
    output = []
    ignore_text = []
    ignore_regex = []
-
    ignored_line_numbers = []

    for k in wordlist:
        # Is it a regex?
-        x = re.search('^\/(.*)\/(.*)', k.strip())
-        if x:
-            # Starts with / but doesn't look like a regex
-            p = x.group(1)
-            try:
-                # @Todo python regex options can go before the regex str, but not really many of the options apply on a per-line basis
-                ignore_regex.append(re.compile(rf"{p}", re.IGNORECASE))
-            except Exception as e:
-                # Badly formed regex, treat as text
-                ignore_text.append(k.strip())
+        res = re.search(PERL_STYLE_REGEX, k, re.IGNORECASE)
+        if res:
+            ignore_regex.append(re.compile(perl_style_slash_enclosed_regex_to_options(k)))
        else:
-            # Had a / but doesn't work as regex
            ignore_text.append(k.strip())

    for line in content.splitlines():
@@ -208,15 +208,11 @@ def create_notification_parameters(n_object, datastore):
        watch_tag = ''

    # Create URLs to customise the notification with
-    base_url = datastore.data['settings']['application']['base_url']
+    # active_base_url - set in store.py data property
+    base_url = datastore.data['settings']['application'].get('active_base_url')

    watch_url = n_object['watch_url']

-    # Re #148 - Some people have just {{ base_url }} in the body or title, but this may break some notification services
-    #           like 'Join', so it's always best to atleast set something obvious so that they are not broken.
-    if base_url == '':
-        base_url = "<base-url-env-var-not-set>"
-
    diff_url = "{}/diff/{}".format(base_url, uuid)
    preview_url = "{}/preview/{}".format(base_url, uuid)

@@ -226,7 +222,7 @@ def create_notification_parameters(n_object, datastore):
    # Valid_tokens also used as a field validator
    tokens.update(
        {
-            'base_url': base_url if base_url is not None else '',
+            'base_url': base_url,
            'current_snapshot': n_object['current_snapshot'] if 'current_snapshot' in n_object else '',
            'diff': n_object.get('diff', ''),  # Null default in the case we use a test
            'diff_added': n_object.get('diff_added', ''),  # Null default in the case we use a test
@@ -18,26 +18,7 @@ class difference_detection_processor():


 def available_processors():
-    import importlib
-    import pkgutil
-
    from . import restock_diff, text_json_diff
-
-    processors = [('text_json_diff', text_json_diff.name), ('restock_diff', restock_diff.name)]
-
-    discovered_plugins = {
-        name: importlib.import_module(name)
-        for finder, name, ispkg
-        in pkgutil.iter_modules()
-        if name.startswith('changedetectionio-plugin-')
-    }
-
-    try:
-        for name, plugin in discovered_plugins.items():
-            if hasattr(plugin, 'processors'):
-                for machine_name, desc in plugin.processors.items():
-                    processors.append((machine_name, desc))
-    except Exception as e:
-        print (f"Problem fetching one or more plugins")
-
-    return processors
+    x=[('text_json_diff', text_json_diff.name), ('restock_diff', restock_diff.name)]
+    # @todo Make this smarter with introspection of sorts.
+    return x
@@ -11,17 +11,19 @@ from changedetectionio import content_fetcher, html_tools
 from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
 from copy import deepcopy
 from . import difference_detection_processor
+from ..html_tools import PERL_STYLE_REGEX

 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

-
-name =  'Webpage Text/HTML, JSON and PDF changes'
+name = 'Webpage Text/HTML, JSON and PDF changes'
 description = 'Detects all text changes where possible'

+
 class FilterNotFoundInResponse(ValueError):
    def __init__(self, msg):
        ValueError.__init__(self, msg)

+
 class PDFToHTMLToolNotFound(ValueError):
    def __init__(self, msg):
        ValueError.__init__(self, msg)
@@ -37,19 +39,6 @@ class perform_site_check(difference_detection_processor):
        super().__init__(*args, **kwargs)
        self.datastore = datastore

-    # Doesn't look like python supports forward slash auto enclosure in re.findall
-    # So convert it to inline flag "foobar(?i)" type configuration
-    def forward_slash_enclosed_regex_to_options(self, regex):
-        res = re.search(r'^/(.*?)/(\w+)$', regex, re.IGNORECASE)
-
-        if res:
-            regex = res.group(1)
-            regex += '(?{})'.format(res.group(2))
-        else:
-            regex += '(?{})'.format('i')
-
-        return regex
-
    def run(self, uuid, skip_when_checksum_same=True, preferred_proxy=None):
        changed_detected = False
        screenshot = False  # as bytes
@@ -135,7 +124,8 @@ class perform_site_check(difference_detection_processor):
        # requests for PDF's, images etc should be passwd the is_binary flag
        is_binary = watch.is_pdf

-        fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch.get('include_filters'), is_binary=is_binary)
+        fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch.get('include_filters'),
+                    is_binary=is_binary)
        fetcher.quit()

        self.screenshot = fetcher.screenshot
@@ -151,7 +141,6 @@ class perform_site_check(difference_detection_processor):
            if update_obj['previous_md5_before_filters'] == watch.get('previous_md5_before_filters'):
                raise content_fetcher.checksumFromPreviousCheckWasTheSame()

-
        # Fetching complete, now filters
        # @todo move to class / maybe inside of fetcher abstract base?

@@ -231,8 +220,6 @@ class perform_site_check(difference_detection_processor):
                    stripped_text_from_html += html_tools.extract_json_as_string(content=fetcher.content, json_filter=filter)
                    is_html = False

-
-
        if is_html or is_source:

            # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
@@ -283,7 +270,6 @@ class perform_site_check(difference_detection_processor):
        # Re #340 - return the content before the 'ignore text' was applied
        text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')

-
        # @todo whitespace coming from missing rtrim()?
        # stripped_text_from_html could be based on their preferences, replace the processed text with only that which they want to know about.
        # Rewrite's the processing text based on only what diff result they want to see
@@ -293,13 +279,13 @@ class perform_site_check(difference_detection_processor):
            # needs to not include (added) etc or it may get used twice
            # Replace the processed text with the preferred result
            rendered_diff = diff.render_diff(previous_version_file_contents=watch.get_last_fetched_before_filters(),
-                                                       newest_version_file_contents=stripped_text_from_html,
-                                                       include_equal=False,  # not the same lines
-                                                       include_added=watch.get('filter_text_added', True),
-                                                       include_removed=watch.get('filter_text_removed', True),
-                                                       include_replaced=watch.get('filter_text_replaced', True),
-                                                       line_feed_sep="\n",
-                                                       include_change_type_prefix=False)
+                                             newest_version_file_contents=stripped_text_from_html,
+                                             include_equal=False,  # not the same lines
+                                             include_added=watch.get('filter_text_added', True),
+                                             include_removed=watch.get('filter_text_removed', True),
+                                             include_replaced=watch.get('filter_text_replaced', True),
+                                             line_feed_sep="\n",
+                                             include_change_type_prefix=False)

            watch.save_last_fetched_before_filters(text_content_before_ignored_filter)

@@ -314,7 +300,12 @@ class perform_site_check(difference_detection_processor):
        # Treat pages with no renderable text content as a change? No by default
        empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
        if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0:
-            raise content_fetcher.ReplyWithContentButNoText(url=url, status_code=fetcher.get_last_status_code(), screenshot=screenshot)
+            raise content_fetcher.ReplyWithContentButNoText(url=url,
+                                                            status_code=fetcher.get_last_status_code(),
+                                                            screenshot=screenshot,
+                                                            has_filters=has_filter_rule,
+                                                            html_content=html_content
+                                                            )

        # We rely on the actual text in the html output.. many sites have random script vars etc,
        # in the future we'll implement other mechanisms.
@@ -335,16 +326,25 @@ class perform_site_check(difference_detection_processor):
            regex_matched_output = []
            for s_re in extract_text:
                # incase they specified something in '/.../x'
-                regex = self.forward_slash_enclosed_regex_to_options(s_re)
-                result = re.findall(regex.encode('utf-8'), stripped_text_from_html)
+                if re.search(PERL_STYLE_REGEX, s_re, re.IGNORECASE):
+                    regex = html_tools.perl_style_slash_enclosed_regex_to_options(s_re)
+                    result = re.findall(regex.encode('utf-8'), stripped_text_from_html)

-                for l in result:
-                    if type(l) is tuple:
-                        # @todo - some formatter option default (between groups)
-                        regex_matched_output += list(l) + [b'\n']
-                    else:
-                        # @todo - some formatter option default (between each ungrouped result)
-                        regex_matched_output += [l] + [b'\n']
+                    for l in result:
+                        if type(l) is tuple:
+                            # @todo - some formatter option default (between groups)
+                            regex_matched_output += list(l) + [b'\n']
+                        else:
+                            # @todo - some formatter option default (between each ungrouped result)
+                            regex_matched_output += [l] + [b'\n']
+                else:
+                    # Doesnt look like regex, just hunt for plaintext and return that which matches
+                    # `stripped_text_from_html` will be bytes, so we must encode s_re also to bytes
+                    r = re.compile(re.escape(s_re.encode('utf-8')), re.IGNORECASE)
+                    res = r.findall(stripped_text_from_html)
+                    if res:
+                        for match in res:
+                            regex_matched_output += [match] + [b'\n']

            # Now we will only show what the regex matched
            stripped_text_from_html = b''
@@ -18,7 +18,9 @@ module.exports = async ({page, context}) => {

    await page.setBypassCSP(true)
    await page.setExtraHTTPHeaders(req_headers);
-    await page.setUserAgent(user_agent);
+    if (user_agent) {
+        await page.setUserAgent(user_agent);
+    }
    // https://ourcodeworld.com/articles/read/1106/how-to-solve-puppeteer-timeouterror-navigation-timeout-of-30000-ms-exceeded

    await page.setDefaultNavigationTimeout(0);
@@ -5,14 +5,19 @@ function isItemInStock() {
    'agotado',
    'artikel zurzeit vergriffen',
    'as soon as stock is available',
+    'ausverkauft', // sold out
    'available for back order',
+    'back-order or out of stock',
    'backordered',
+    'benachrichtigt mich', // notify me
    'brak na stanie',
    'brak w magazynie',
    'coming soon',
    'currently have any tickets for this',
    'currently unavailable',
+    'dostępne wkrótce',
    'en rupture de stock',
+    'ist derzeit nicht auf lager',
    'item is no longer available',
    'message if back in stock',
    'nachricht bei',
@@ -37,6 +42,7 @@ function isItemInStock() {
    'unavailable tickets',
    'we do not currently have an estimate of when this product will be back in stock.',
    'zur zeit nicht an lager',
+    '已售完',
  ];


@@ -208,7 +208,7 @@ $(document).ready(function () {
            console.log(x);
            if (x && first_available.length) {
                // @todo will it let you click shit that has a layer ontop? probably not.
-                if (x['tagtype'] === 'text' || x['tagtype'] === 'email' || x['tagName'] === 'textarea' || x['tagtype'] === 'password' || x['tagtype'] === 'search') {
+                if (x['tagtype'] === 'text' || x['tagtype'] === 'number' || x['tagtype'] === 'email' || x['tagName'] === 'textarea' || x['tagtype'] === 'password' || x['tagtype'] === 'search') {
                    $('select', first_available).val('Enter text in field').change();
                    $('input[type=text]', first_available).first().val(x['xpath']);
                    $('input[placeholder="Value"]', first_available).addClass('ok').click().focus();
@@ -32,5 +32,10 @@ $(document).ready(function () {
        window.getSelection().removeAllRanges();

    });
+
+    $("#notification-token-toggle").click(function (e) {
+        e.preventDefault();
+        $('#notification-tokens-info').toggle();
+    });
 });

@@ -42,4 +42,8 @@ $(document).ready(function () {
        $('#notification_urls').val('');
        e.preventDefault();
    });
+    $("#notification-token-toggle").click(function (e) {
+        e.preventDefault();
+        $('#notification-tokens-info').toggle();
+    });
 });
@@ -44,7 +44,7 @@
 #browser-steps .flex-wrapper {
  display: flex;
  flex-flow: row;
-  height: 600px; /*@todo make this dynamic */
+  height: 70vh;
 }

 /*  this is duplicate :( */
@@ -50,8 +50,7 @@
 #browser-steps .flex-wrapper {
  display: flex;
  flex-flow: row;
-  height: 600px;
-  /*@todo make this dynamic */ }
+  height: 70vh; }

 /*  this is duplicate :( */
 #browsersteps-selector-wrapper {
@@ -18,6 +18,9 @@ import threading
 import time
 import uuid as uuid_builder

+# Because the server will run as a daemon and wont know the URL for notification links when firing off a notification
+BASE_URL_NOT_SET_TEXT = '("Base URL" not set - see settings - notifications)'
+
 dictfilt = lambda x, y: dict([ (i,x[i]) for i in x if i in set(y) ])

 # Is there an existing library to ensure some data store (JSON etc) is in sync with CRUD methods?
@@ -126,7 +129,6 @@ class ChangeDetectionStore:

        self.needs_write = True

-        self.scan_plugins()
        # Finally start the thread that will manage periodic data saves to JSON
        save_data_thread = threading.Thread(target=self.save_datastore).start()

@@ -176,12 +178,21 @@ class ChangeDetectionStore:

    @property
    def data(self):
-        # Re #152, Return env base_url if not overriden, @todo also prefer the proxy pass url
-        env_base_url = os.getenv('BASE_URL','')
-        if not self.__data['settings']['application']['base_url']:
-          self.__data['settings']['application']['base_url'] = env_base_url.strip('" ')
+        # Re #152, Return env base_url if not overriden
+        # Re #148 - Some people have just {{ base_url }} in the body or title, but this may break some notification services
+        #           like 'Join', so it's always best to atleast set something obvious so that they are not broken.

-        return self.__data
+        active_base_url = BASE_URL_NOT_SET_TEXT
+        if self.__data['settings']['application'].get('base_url'):
+            active_base_url = self.__data['settings']['application'].get('base_url')
+        elif os.getenv('BASE_URL'):
+            active_base_url = os.getenv('BASE_URL')
+
+        # I looked at various ways todo the following, but in the end just copying the dict seemed simplest/most reliable
+        # even given the memory tradeoff - if you know a better way.. maybe return d|self.__data.. or something
+        d = self.__data
+        d['settings']['application']['active_base_url'] = active_base_url.strip('" ')
+        return d

    # Delete a single watch by UUID
    def delete(self, uuid):
@@ -613,19 +624,6 @@ class ChangeDetectionStore:
    def tag_exists_by_name(self, tag_name):
        return any(v.get('title', '').lower() == tag_name.lower() for k, v in self.__data['settings']['application']['tags'].items())

-    def scan_plugins(self):
-        import importlib
-        import pkgutil
-
-        discovered_plugins = {
-            name: importlib.import_module(name)
-            for finder, name, ispkg
-            in pkgutil.iter_modules()
-            if name.startswith('changedetectionio-plugin-')
-        }
-
-        return discovered_plugins
-
    # Run all updates
    # IMPORTANT - Each update could be run even when they have a new install and the schema is correct
    #             So therefor - each `update_n` should be very careful about checking if it needs to actually run
@@ -13,7 +13,7 @@
                            <div class="pure-form-message-inline">
                              <ul>
                                <li>Use <a target=_new href="https://github.com/caronc/apprise">AppRise URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.</li>
-                                <li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_discord">discord://</a></code> only supports a maximum <strong>2,000 characters</strong> of notification text, including the title.</li>
+                                <li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_discord">discord://</a></code> (or <code>https://discord.com/api/webhooks...</code>)) </code> only supports a maximum <strong>2,000 characters</strong> of notification text, including the title.</li>
                                <li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> bots can't send messages to other bots, so you should specify chat ID of non-bot user.</li>
                                <li><code><a target=_new href="https://github.com/caronc/apprise/wiki/Notify_telegram">tgram://</a></code> only supports very limited HTML and can fail when extra tags are sent, <a href="https://core.telegram.org/bots/api#html-style">read more here</a> (or use plaintext/markdown format)</li>
                                <li><code>gets://</code>, <code>posts://</code>, <code>puts://</code>, <code>deletes://</code> for direct API calls (or omit the "<code>s</code>" for non-SSL ie <code>get://</code>)</li>
@@ -35,18 +35,14 @@
                            </div>
                            <div class="pure-control-group">
                                {{ render_field(form.notification_body , rows=5, class="notification-body", placeholder=settings_application['notification_body']) }}
-                                <span class="pure-form-message-inline">Body for all notifications</span>
-                            </div>
-                            <div class="pure-control-group">
-                            <!-- unsure -->
-                                {{ render_field(form.notification_format , class="notification-format") }}
-                                <span class="pure-form-message-inline">Format for all notifications</span>
+                                <span class="pure-form-message-inline">Body for all notifications &dash; You can use <a target="_new" href="https://jinja.palletsprojects.com/en/3.0.x/templates/">Jinja2</a> templating in the notification title, body and URL, and tokens from below.
+                                </span>
+
                            </div>
                            <div class="pure-controls">
-                            <p class="pure-form-message-inline">
-                                You can use <a target="_new" href="https://jinja.palletsprojects.com/en/3.0.x/templates/">Jinja2</a> templating in the notification title, body and URL.
-                            </p>
-
+                                <div id="notification-token-toggle" class="pure-button button-tag button-xsmall">Show token/placeholders</div>
+                            </div>
+                            <div class="pure-controls" style="display: none;" id="notification-tokens-info">
                                <table class="pure-table" id="token-table">
                                    <thead>
                                    <tr>
@@ -105,7 +101,7 @@
                                    </tr>
                                    <tr>
                                        <td><code>{{ '{{current_snapshot}}' }}</code></td>
-                                        <td>The current snapshot value, useful when combined with JSON or CSS filters
+                                        <td>The current snapshot text contents value, useful when combined with JSON or CSS filters
                                        </td>
                                    </tr>
                                    <tr>
@@ -115,12 +111,15 @@
                                    </tbody>
                                </table>
                                <div class="pure-form-message-inline">
-                                    <br>
-                                    URLs generated by changedetection.io (such as <code>{{ '{{diff_url}}' }}</code>) require the <code>BASE_URL</code> environment variable set.<br>
-                                    Your <code>BASE_URL</code> var is currently "{{settings_application['current_base_url']}}"
-									<br>
-									Warning: Contents of <code>{{ '{{diff}}' }}</code>, <code>{{ '{{diff_removed}}' }}</code>, and <code>{{ '{{diff_added}}' }}</code> depend on how the difference algorithm perceives the change. For example, an addition or removal could be perceived as a change in some cases. <a target="_new" href="https://github.com/dgtlmoon/changedetection.io/wiki/Using-the-%7B%7Bdiff%7D%7D,-%7B%7Bdiff_added%7D%7D,-and-%7B%7Bdiff_removed%7D%7D-notification-tokens">More Here</a> <br>
+                                    <p>
+									Warning: Contents of <code>{{ '{{diff}}' }}</code>, <code>{{ '{{diff_removed}}' }}</code>, and <code>{{ '{{diff_added}}' }}</code> depend on how the difference algorithm perceives the change. <br>
+                                    For example, an addition or removal could be perceived as a change in some cases. <a target="_new" href="https://github.com/dgtlmoon/changedetection.io/wiki/Using-the-%7B%7Bdiff%7D%7D,-%7B%7Bdiff_added%7D%7D,-and-%7B%7Bdiff_removed%7D%7D-notification-tokens">More Here</a> <br>
+                                    </p>
                                </div>
                            </div>
+                            <div class="pure-control-group">
+                                {{ render_field(form.notification_format , class="notification-format") }}
+                                <span class="pure-form-message-inline">Format for all notifications</span>
+                            </div>
                        </div>
 {% endmacro %}
@@ -378,15 +378,16 @@ Unavailable") }}
                        {{ render_field(form.extract_text, rows=5, placeholder="\d+ online") }}
                        <span class="pure-form-message-inline">
                    <ul>
-                        <li>Extracts text in the final output (line by line) after other filters using regular expressions;
+                        <li>Extracts text in the final output (line by line) after other filters using regular expressions or string match;
                            <ul>
                                <li>Regular expression &dash; example <code>/reports.+?2022/i</code></li>
+                                <li>Don't forget to consider the white-space at the start of a line <code>/.+?reports.+?2022/i</code></li>
                                <li>Use <code>//(?aiLmsux))</code> type flags (more <a href="https://docs.python.org/3/library/re.html#index-15">information here</a>)<br></li>
                                <li>Keyword example &dash; example <code>Out of stock</code></li>
                                <li>Use groups to extract just that text &dash; example <code>/reports.+?(\d+)/i</code> returns a list of years only</li>
                            </ul>
                        </li>
-                        <li>One line per regular-expression/ string match</li>
+                        <li>One line per regular-expression/string match</li>
                    </ul>
                        </span>
                    </div>
@@ -62,14 +62,6 @@
                        <span class="pure-form-message-inline">Allow access to view watch diff page when password is enabled (Good for sharing the diff page)
                        </span>
                    </div>
-                    <div class="pure-control-group">
-                        {{ render_field(form.application.form.base_url, placeholder="http://yoursite.com:5000/",
-                        class="m-d") }}
-                        <span class="pure-form-message-inline">
-                            Base URL used for the <code>{{ '{{ base_url }}' }}</code> token in notifications and RSS links.<br>Default value is the ENV var 'BASE_URL' (Currently "{{settings_application['current_base_url']}}"),
-                            <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Configurable-BASE_URL-setting">read more here</a>.
-                        </span>
-                    </div>
                    <div class="pure-control-group">
                        {{ render_field(form.application.form.pager_size) }}
                        <span class="pure-form-message-inline">Number of items per page in the watch overview list, 0 to disable.</span>
@@ -100,6 +92,13 @@
                        {{ render_common_settings_form(form.application.form, emailprefix, settings_application) }}
                    </div>
                </fieldset>
+                <div class="pure-control-group" id="notification-base-url">
+                    {{ render_field(form.application.form.base_url, class="m-d") }}
+                    <span class="pure-form-message-inline">
+                        Base URL used for the <code>{{ '{{ base_url }}' }}</code> token in notification links.<br>
+                        Default value is the system environment variable '<code>BASE_URL</code>' - <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Configurable-BASE_URL-setting">read more here</a>.
+                    </span>
+                </div>
            </div>

            <div class="tab-pane-inner" id="fetching">
@@ -119,6 +119,9 @@
                            <a href="{{ url_for('settings_page', uuid=watch.uuid) }}#proxies">Try adding external proxies/locations</a>
                        
                        {% endif %}
+                        {% if 'empty result or contain only an image' in watch.last_error %}
+                            <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Detecting-changes-in-images">more help here</a>.
+                        {% endif %}
                    </div>
                    {% endif %}
                    {% if watch.last_notification_error is defined and watch.last_notification_error != False %}
@@ -1,6 +1,6 @@
 #!/usr/bin/python3

-from .util import set_original_response, set_modified_response, live_server_setup
+from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
 from flask import url_for
 from urllib.request import urlopen
 from zipfile import ZipFile
@@ -19,12 +19,12 @@ def test_backup(client, live_server):
    # Add our URL to the import page
    res = client.post(
        url_for("import_page"),
-        data={"urls": url_for('test_endpoint', _external=True)},
+        data={"urls": url_for('test_endpoint', _external=True)+"?somechar=őőőőőőőő"},
        follow_redirects=True
    )

    assert b"1 Imported" in res.data
-    time.sleep(3)
+    wait_for_all_checks(client)

    res = client.get(
        url_for("get_backup"),
@@ -2,7 +2,7 @@

 import time
 from flask import url_for
-from . util import live_server_setup
+from .util import live_server_setup, wait_for_all_checks

 from ..html_tools import *

@@ -176,3 +176,77 @@ def test_check_multiple_filters(client, live_server):
    assert b"Blob A" in res.data # CSS was ok
    assert b"Blob B" in res.data # xPath was ok
    assert b"Blob C" not in res.data # Should not be included
+
+# The filter exists, but did not contain anything useful
+# Mainly used when the filter contains just an IMG, this can happen when someone selects an image in the visual-selector
+# Tests fetcher can throw a "ReplyWithContentButNoText" exception after applying filter and extracting text
+def test_filter_is_empty_help_suggestion(client, live_server):
+    #live_server_setup(live_server)
+
+    include_filters = "#blob-a"
+
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write("""<html><body>
+         <div id="blob-a">
+           <img src="something.jpg">
+         </div>
+         </body>
+         </html>
+        """)
+
+
+    # Add our URL to the import page
+    test_url = url_for('test_endpoint', _external=True)
+    res = client.post(
+        url_for("import_page"),
+        data={"urls": test_url},
+        follow_redirects=True
+    )
+    assert b"1 Imported" in res.data
+    wait_for_all_checks(client)
+
+    # Goto the edit page, add our ignore text
+    # Add our URL to the import page
+    res = client.post(
+        url_for("edit_page", uuid="first"),
+        data={"include_filters": include_filters,
+              "url": test_url,
+              "tags": "",
+              "headers": "",
+              'fetch_backend': "html_requests"},
+        follow_redirects=True
+    )
+    assert b"Updated watch." in res.data
+
+    wait_for_all_checks(client)
+
+
+    res = client.get(
+        url_for("index"),
+        follow_redirects=True
+    )
+
+    assert b'empty result or contain only an image' in res.data
+
+
+    ### Just an empty selector, no image
+
+    with open("test-datastore/endpoint-content.txt", "w") as f:
+        f.write("""<html><body>
+         <div id="blob-a">
+           <!-- doo doo -->
+         </div>
+         </body>
+         </html>
+        """)
+
+    res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    wait_for_all_checks(client)
+
+    res = client.get(
+        url_for("index"),
+        follow_redirects=True
+    )
+
+    assert b'empty result or contain only an image' not in res.data
+    assert b'but contained no usable text' in res.data
@@ -2,7 +2,7 @@

 import time
 from flask import url_for
-from .util import live_server_setup
+from .util import live_server_setup, wait_for_all_checks

 from ..html_tools import *

@@ -55,6 +55,8 @@ def set_multiline_response():
     </p>
     
     <div>aaand something lines</div>
+     <br>
+     <div>and this should be</div>
     </body>
     </html>
    """
@@ -66,11 +68,10 @@ def set_multiline_response():


 def test_setup(client, live_server):
-
    live_server_setup(live_server)

 def test_check_filter_multiline(client, live_server):
-
+    #live_server_setup(live_server)
    set_multiline_response()

    # Add our URL to the import page
@@ -82,14 +83,15 @@ def test_check_filter_multiline(client, live_server):
    )
    assert b"1 Imported" in res.data

-    time.sleep(3)
+    wait_for_all_checks(client)

    # Goto the edit page, add our ignore text
    # Add our URL to the import page
    res = client.post(
        url_for("edit_page", uuid="first"),
        data={"include_filters": '',
-              'extract_text': '/something.+?6 billion.+?lines/si',
+              # Test a regex and a plaintext
+              'extract_text': '/something.+?6 billion.+?lines/si\r\nand this should be',
              "url": test_url,
              "tags": "",
              "headers": "",
@@ -99,13 +101,19 @@ def test_check_filter_multiline(client, live_server):
    )

    assert b"Updated watch." in res.data
-    time.sleep(3)
+    wait_for_all_checks(client)
+
+    res = client.get(url_for("index"))
+
+    # Issue 1828
+    assert b'not at the start of the expression' not in res.data

    res = client.get(
        url_for("preview_page", uuid="first"),
        follow_redirects=True
    )
-
+    # Plaintext that doesnt look like a regex should match also
+    assert b'and this should be' in res.data

    assert b'<div class="">Something' in res.data
    assert b'<div class="">across 6 billion multiple' in res.data
@@ -115,14 +123,11 @@ def test_check_filter_multiline(client, live_server):
    assert b'aaand something lines' not in res.data

 def test_check_filter_and_regex_extract(client, live_server):
-    sleep_time_for_fetch_thread = 3
+    
    include_filters = ".changetext"

    set_original_response()

-    # Give the endpoint time to spin up
-    time.sleep(1)
-
    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
    res = client.post(
@@ -132,19 +137,15 @@ def test_check_filter_and_regex_extract(client, live_server):
    )
    assert b"1 Imported" in res.data

-    time.sleep(1)
-    # Trigger a check
-    client.get(url_for("form_watch_checknow"), follow_redirects=True)
-
    # Give the thread time to pick it up
-    time.sleep(sleep_time_for_fetch_thread)
+    wait_for_all_checks(client)

    # Goto the edit page, add our ignore text
    # Add our URL to the import page
    res = client.post(
        url_for("edit_page", uuid="first"),
        data={"include_filters": include_filters,
-              'extract_text': '\d+ online\r\n\d+ guests\r\n/somecase insensitive \d+/i\r\n/somecase insensitive (345\d)/i',
+              'extract_text': '/\d+ online/\r\n/\d+ guests/\r\n/somecase insensitive \d+/i\r\n/somecase insensitive (345\d)/i\r\n/issue1828.+?2022/i',
              "url": test_url,
              "tags": "",
              "headers": "",
@@ -155,8 +156,13 @@ def test_check_filter_and_regex_extract(client, live_server):

    assert b"Updated watch." in res.data

+
    # Give the thread time to pick it up
-    time.sleep(sleep_time_for_fetch_thread)
+    wait_for_all_checks(client)
+
+    res = client.get(url_for("index"))
+    #issue 1828
+    assert b'not at the start of the expression' not in res.data

    #  Make a change
    set_modified_response()
@@ -164,7 +170,7 @@ def test_check_filter_and_regex_extract(client, live_server):
    # Trigger a check
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
    # Give the thread time to pick it up
-    time.sleep(sleep_time_for_fetch_thread)
+    wait_for_all_checks(client)

    # It should have 'unviewed' still
    # Because it should be looking at only that 'sametext' id
@@ -2,7 +2,7 @@

 import time
 from flask import url_for
-from . util import live_server_setup
+from .util import live_server_setup, wait_for_all_checks


 def set_original_ignore_response():
@@ -26,13 +26,8 @@ def test_trigger_regex_functionality(client, live_server):

    live_server_setup(live_server)

-    sleep_time_for_fetch_thread = 3
-
    set_original_ignore_response()

-    # Give the endpoint time to spin up
-    time.sleep(1)
-
    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
    res = client.post(
@@ -43,7 +38,7 @@ def test_trigger_regex_functionality(client, live_server):
    assert b"1 Imported" in res.data

    # Give the thread time to pick it up
-    time.sleep(sleep_time_for_fetch_thread)
+    wait_for_all_checks(client)

    # It should report nothing found (just a new one shouldnt have anything)
    res = client.get(url_for("index"))
@@ -57,7 +52,7 @@ def test_trigger_regex_functionality(client, live_server):
              "fetch_backend": "html_requests"},
        follow_redirects=True
    )
-    time.sleep(sleep_time_for_fetch_thread)
+    wait_for_all_checks(client)
    # so that we set the state to 'unviewed' after all the edits
    client.get(url_for("diff_history_page", uuid="first"))

@@ -65,7 +60,7 @@ def test_trigger_regex_functionality(client, live_server):
        f.write("some new noise")

    client.get(url_for("form_watch_checknow"), follow_redirects=True)
-    time.sleep(sleep_time_for_fetch_thread)
+    wait_for_all_checks(client)

    # It should report nothing found (nothing should match the regex)
    res = client.get(url_for("index"))
@@ -75,7 +70,7 @@ def test_trigger_regex_functionality(client, live_server):
        f.write("regex test123<br>\nsomething 123")

    client.get(url_for("form_watch_checknow"), follow_redirects=True)
-    time.sleep(sleep_time_for_fetch_thread)
+    wait_for_all_checks(client)
    res = client.get(url_for("index"))
    assert b'unviewed' in res.data

@@ -1,11 +1,9 @@
-import importlib
 import os
-import pkgutil
-import queue
 import threading
+import queue
 import time

-from changedetectionio import content_fetcher
+from changedetectionio import content_fetcher, html_tools
 from .processors.text_json_diff import FilterNotFoundInResponse
 from .processors.restock_diff import UnableToExtractRestockData

@@ -231,28 +229,13 @@ class update_worker(threading.Thread):
                    now = time.time()

                    try:
-                        processor = self.datastore.data['watching'][uuid].get('processor', 'text_json_diff')
+                        processor = self.datastore.data['watching'][uuid].get('processor','text_json_diff')

                        # @todo some way to switch by name
                        if processor == 'restock_diff':
                            update_handler = restock_diff.perform_site_check(datastore=self.datastore)
                        else:
                            # Used as a default and also by some tests
-                            discovered_plugins = {
-                                name: importlib.import_module(name)
-                                for finder, name, ispkg
-                                in pkgutil.iter_modules()
-                                if name.startswith('changedetectionio-plugin-')
-                            }
-
-                            for module_name, plugin in discovered_plugins.items():
-                                if hasattr(plugin, 'processors'):
-                                    for machine_name, desc in plugin.processors:
-                                        if machine_name == processor:
-                                            module = importlib.import_module(f"{module_name}.processors.{plugin}")
-                                            update_handler = module.perform_site_check(datastore=self.datastore)
-                                            #processors.append((machine_name, desc))
-
                            update_handler = text_json_diff.perform_site_check(datastore=self.datastore)

                        changed_detected, update_obj, contents = update_handler.run(uuid, skip_when_checksum_same=queued_item_data.item.get('skip_when_checksum_same'))
@@ -268,7 +251,20 @@ class update_worker(threading.Thread):
                        # Totally fine, it's by choice - just continue on, nothing more to care about
                        # Page had elements/content but no renderable text
                        # Backend (not filters) gave zero output
-                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found (With {} reply code).".format(e.status_code)})
+                        extra_help = ""
+                        if e.has_filters:
+                            # Maybe it contains an image? offer a more helpful link
+                            has_img = html_tools.include_filters(include_filters='img',
+                                                                 html_content=e.html_content)
+                            if has_img:
+                                extra_help = ", it's possible that the filters you have give an empty result or contain only an image."
+                            else:
+                                extra_help = ", it's possible that the filters were found, but contained no usable text."
+
+                        self.datastore.update_watch(uuid=uuid, update_obj={
+                            'last_error': f"Got HTML content but no text found (With {e.status_code} reply code){extra_help}"
+                        })
+
                        if e.screenshot:
                            self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot)
                        process_changedetection_results = False
Author	SHA1	Message	Date
dgtlmoon	bd7282cd04	fix test	2023-10-03 17:22:30 +02:00
dgtlmoon	0579c5be2f	update texty	2023-10-03 17:17:36 +02:00
dgtlmoon	85aeea34b1	add helper text	2023-10-03 17:16:14 +02:00
dgtlmoon	2ccd0fc77b	Should also support non-regex strings	2023-10-03 17:12:31 +02:00
dgtlmoon	18d48bc2a0	fix comment	2023-10-03 11:22:29 +02:00
dgtlmoon	b344adb53d	fix regex	2023-10-03 11:22:09 +02:00
dgtlmoon	fc38e30989	WIP	2023-10-03 11:08:37 +02:00
dgtlmoon	8c8f378395	Merge branch 'master' into regex-cleanup-311	2023-10-03 10:14:55 +02:00
dgtlmoon	34bc7fe1a6	improve test	2023-10-03 10:04:52 +02:00
dgtlmoon	700729a332	UI - BrowserSteps - Browser Steps interface screen should resize relative to the browser	2023-10-02 18:06:25 +02:00
dgtlmoon	b6060ac90c	BrowserSteps - <input> of type 'number' should use 'enter text in field'	2023-10-02 11:50:15 +02:00
dgtlmoon	5cccccb0b6	Restock detect - bumping texts for restock detection	2023-09-26 14:32:39 +02:00
dgtlmoon	c52eb512e8	UI - Proxy Scanner tool should also understand when a filter is empty or contains only an image	2023-09-26 14:29:42 +02:00
dgtlmoon	7282df9c08	UI + Fetching - Improving helper message when filter contains only an image (adding link to more help)	2023-09-26 14:10:07 +02:00
dgtlmoon	e30b17b8bc	UI + Fetching - Be more helpful when a filter contains no text, suggest ways to deal with images in filters (#1819 )	2023-09-26 13:59:59 +02:00
Marcelo Alencar	1e88136325	Building application - Upgrade test workflows to latest versions (#1817 )	2023-09-26 10:18:54 +02:00
dgtlmoon	57de4ffe4f	Page fetching - Fixed possible incorrect browser user-agent header in playwright/puppeteer/browserless fetchers (#1811 )	2023-09-24 08:42:24 +02:00
dgtlmoon	51e2e8a226	UI - Add extra validation help for notification body with Jinja2 markup (#1810 )	2023-09-23 14:50:21 +02:00
dgtlmoon	8887459462	UI - More precise text to describe "current_snapshot" notification token	2023-09-23 14:31:48 +02:00
dgtlmoon	460c724e51	0.45.2	2023-09-22 09:45:55 +02:00
dgtlmoon	dcf4bf37ed	Code/Test - Improve testing for creating backups	2023-09-22 09:21:07 +02:00
dgtlmoon	e3cf22fc27	UI - Re-order notification field settings	2023-09-14 14:34:44 +02:00
dgtlmoon	d497db639e	UI - Notifications - Tidyup - Hide the notification tokens but show with a button/link	2023-09-14 14:16:08 +02:00
dgtlmoon	7355ac8d21	UI - Notifications - Tweak discord help text	2023-09-14 13:55:48 +02:00
dgtlmoon	2f2d0ea0f2	RSS feeds - Fixing broken links from RSS index in some environments, refactor code (#152 , #148 , #1684 , #1798 )	2023-09-14 13:19:45 +02:00
dgtlmoon	f991abf7ea	Small regex tidyup for 3.11	2023-09-10 09:51:48 +02:00