Fixing deprecation warning

UI - Visual Selector should still update when elements were not found (#2476 )
Code - Update/modernise diff.py (#2471 )
2026-01-10 17:20:26 +00:00 · 2024-07-09 16:17:31 +02:00 · 2024-07-09 15:35:19 +02:00 · 2024-07-09 15:08:13 +02:00 · 2024-07-09 15:07:23 +02:00 · 2024-07-05 20:43:26 +02:00
48 changed files with 759 additions and 597 deletions
--- a/.github/workflows/containers.yml
+++ b/.github/workflows/containers.yml
@@ -88,7 +88,7 @@ jobs:
      - name: Build and push :dev
        id: docker_build
        if: ${{ github.ref }} == "refs/heads/master"
-        uses: docker/build-push-action@v5
+        uses: docker/build-push-action@v6
        with:
          context: ./
          file: ./Dockerfile
@@ -106,7 +106,7 @@ jobs:
      - name: Build and push :tag
        id: docker_build_tag_release
        if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
-        uses: docker/build-push-action@v5
+        uses: docker/build-push-action@v6
        with:
          context: ./
          file: ./Dockerfile
--- a/.github/workflows/test-container-build.yml
+++ b/.github/workflows/test-container-build.yml
@@ -51,7 +51,7 @@ jobs:
        # Check we can still build under alpine/musl
        - name: Test that the docker containers can build (musl via alpine check)
          id: docker_build_musl
-          uses: docker/build-push-action@v5
+          uses: docker/build-push-action@v6
          with:
            context: ./
            file: ./.github/test/Dockerfile-alpine
@@ -59,7 +59,7 @@ jobs:

        - name: Test that the docker containers can build
          id: docker_build
-          uses: docker/build-push-action@v5
+          uses: docker/build-push-action@v6
          # https://github.com/docker/build-push-action#customizing
          with:
            context: ./
--- a/.github/workflows/test-only.yml
+++ b/.github/workflows/test-only.yml
@@ -37,10 +37,3 @@ jobs:
      python-version: '3.12'
      skip-pypuppeteer: true

-  test-application-3-13:
-    needs: lint-code
-    uses: ./.github/workflows/test-stack-reusable-workflow.yml
-    with:
-      python-version: '3.13'
-      skip-pypuppeteer: true
-      
--- a/7
+++ b/7
@@ -3,9 +3,9 @@
 # @NOTE! I would love to move to 3.11 but it breaks the async handler in changedetectionio/content_fetchers/puppeteer.py
 #        If you know how to fix it, please do! and test it for both 3.10 and 3.11

-ARG PYTHON_VERSION=3.10
+ARG PYTHON_VERSION=3.11

-FROM python:${PYTHON_VERSION}-slim-bookworm as builder
+FROM python:${PYTHON_VERSION}-slim-bookworm AS builder

 # See `cryptography` pin comment in requirements.txt
 ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1
@@ -26,7 +26,8 @@ WORKDIR /install

 COPY requirements.txt /requirements.txt

-RUN pip install --target=/dependencies -r /requirements.txt
+# --extra-index-url https://www.piwheels.org/simple  is for cryptography module to be prebuilt (or rustc etc needs to be installed)
+RUN pip install --extra-index-url https://www.piwheels.org/simple  --target=/dependencies -r /requirements.txt

 # Playwright is an alternative to Selenium
 # Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing
--- a/changedetectionio/init.py
+++ b/changedetectionio/init.py
@@ -2,7 +2,7 @@

 # Read more https://github.com/dgtlmoon/changedetection.io/wiki

-__version__ = '0.45.23'
+__version__ = '0.45.25'

 from changedetectionio.strtobool import strtobool
 from json.decoder import JSONDecodeError
--- a/changedetectionio/api/api_v1.py
+++ b/changedetectionio/api/api_v1.py
@@ -170,23 +170,33 @@ class WatchSingleHistory(Resource):
            curl http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091/history/1677092977 -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json"
        @apiName Get single snapshot content
        @apiGroup Watch History
+        @apiParam {String} [html]       Optional Set to =1 to return the last HTML (only stores last 2 snapshots, use `latest` as timestamp)
        @apiSuccess (200) {String} OK
        @apiSuccess (404) {String} ERR Not found
        """
        watch = self.datastore.data['watching'].get(uuid)
        if not watch:
-            abort(404, message='No watch exists with the UUID of {}'.format(uuid))
+            abort(404, message=f"No watch exists with the UUID of {uuid}")

        if not len(watch.history):
-            abort(404, message='Watch found but no history exists for the UUID {}'.format(uuid))
+            abort(404, message=f"Watch found but no history exists for the UUID {uuid}")

        if timestamp == 'latest':
            timestamp = list(watch.history.keys())[-1]

-        content = watch.get_history_snapshot(timestamp)
+        if request.args.get('html'):
+            content = watch.get_fetched_html(timestamp)
+            if content:
+                response = make_response(content, 200)
+                response.mimetype = "text/html"
+            else:
+                response = make_response("No content found", 404)
+                response.mimetype = "text/plain"
+        else:
+            content = watch.get_history_snapshot(timestamp)
+            response = make_response(content, 200)
+            response.mimetype = "text/plain"

-        response = make_response(content, 200)
-        response.mimetype = "text/plain"
        return response


--- a/changedetectionio/blueprint/browser_steps/init.py
+++ b/changedetectionio/blueprint/browser_steps/init.py
@@ -187,8 +187,10 @@ def construct_blueprint(datastore: ChangeDetectionStore):
            u = browsersteps_sessions[browsersteps_session_id]['browserstepper'].page.url
            if is_last_step and u:
                (screenshot, xpath_data) = browsersteps_sessions[browsersteps_session_id]['browserstepper'].request_visualselector_data()
-                datastore.save_screenshot(watch_uuid=uuid, screenshot=screenshot)
-                datastore.save_xpath_data(watch_uuid=uuid, data=xpath_data)
+                watch = datastore.data['watching'].get(uuid)
+                if watch:
+                    watch.save_screenshot(screenshot=screenshot)
+                    watch.save_xpath_data(data=xpath_data)

 #        if not this_session.page:
 #            cleanup_playwright_session()
--- a/changedetectionio/blueprint/browser_steps/browser_steps.py
+++ b/changedetectionio/blueprint/browser_steps/browser_steps.py
@@ -255,8 +255,9 @@ class browsersteps_live_ui(steppable_browser_interface):

    def get_current_state(self):
        """Return the screenshot and interactive elements mapping, generally always called after action_()"""
-        from pkg_resources import resource_string
-        xpath_element_js = resource_string(__name__, "../../content_fetchers/res/xpath_element_scraper.js").decode('utf-8')
+        import importlib.resources
+        xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
+
        now = time.time()
        self.page.wait_for_timeout(1 * 1000)

@@ -287,11 +288,9 @@ class browsersteps_live_ui(steppable_browser_interface):
        :param current_include_filters:
        :return:
        """
-
+        import importlib.resources
        self.page.evaluate("var include_filters=''")
-        from pkg_resources import resource_string
-        # The code that scrapes elements and makes a list of elements/size/position to click on in the VisualSelector
-        xpath_element_js = resource_string(__name__, "../../content_fetchers/res/xpath_element_scraper.js").decode('utf-8')
+        xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
        from changedetectionio.content_fetchers import visualselector_xpath_selectors
        xpath_element_js = xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors)
        xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}")
--- a/changedetectionio/blueprint/tags/templates/edit-tag.html
+++ b/changedetectionio/blueprint/tags/templates/edit-tag.html
@@ -63,7 +63,7 @@ xpath://body/div/span[contains(@class, 'example-class')]",
                            <ul>
                                <li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required,  <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li>
                                {% if jq_support %}
-                                <li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>.</li>
+                                <li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>. Prefix <code>jqraw:</code> outputs the results as text instead of a JSON list.</li>
                                {% else %}
                                <li>jq support not installed</li>
                                {% endif %}
--- a/changedetectionio/content_fetchers/base.py
+++ b/changedetectionio/content_fetchers/base.py
@@ -64,10 +64,9 @@ class Fetcher():
    render_extract_delay = 0

    def __init__(self):
-        from pkg_resources import resource_string
-        # The code that scrapes elements and makes a list of elements/size/position to click on in the VisualSelector
-        self.xpath_element_js = resource_string(__name__, "res/xpath_element_scraper.js").decode('utf-8')
-        self.instock_data_js = resource_string(__name__, "res/stock-not-in-stock.js").decode('utf-8')
+        import importlib.resources
+        self.xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
+        self.instock_data_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text()

    @abstractmethod
    def get_error(self):
--- a/changedetectionio/content_fetchers/exceptions/init.py
+++ b/changedetectionio/content_fetchers/exceptions/init.py
@@ -87,11 +87,12 @@ class ScreenshotUnavailable(Exception):


 class ReplyWithContentButNoText(Exception):
-    def __init__(self, status_code, url, screenshot=None, has_filters=False, html_content=''):
+    def __init__(self, status_code, url, screenshot=None, has_filters=False, html_content='', xpath_data=None):
        # Set this so we can use it in other parts of the app
        self.status_code = status_code
        self.url = url
        self.screenshot = screenshot
        self.has_filters = has_filters
        self.html_content = html_content
+        self.xpath_data = xpath_data
        return
--- a/changedetectionio/content_fetchers/res/init.py
+++ b/changedetectionio/content_fetchers/res/init.py
@@ -0,0 +1 @@
+# resources for browser injection/scraping
--- a/changedetectionio/content_fetchers/res/stock-not-in-stock.js
+++ b/changedetectionio/content_fetchers/res/stock-not-in-stock.js
@@ -30,14 +30,21 @@ function isItemInStock() {
        'dieser artikel ist bald wieder verfügbar',
        'dostępne wkrótce',
        'en rupture de stock',
-        'ist derzeit nicht auf lager',
+        'isn\'t in stock right now',
+        'isnt in stock right now',
+        'isn’t in stock right now',
        'item is no longer available',
        'let me know when it\'s available',
+        'mail me when available',
        'message if back in stock',
        'nachricht bei',
        'nicht auf lager',
+        'nicht lagernd',
        'nicht lieferbar',
+        'nicht verfügbar',
+        'nicht vorrätig',
        'nicht zur verfügung',
+        'nie znaleziono produktów',
        'niet beschikbaar',
        'niet leverbaar',
        'niet op voorraad',
@@ -48,6 +55,7 @@ function isItemInStock() {
        'not currently available',
        'not in stock',
        'notify me when available',
+        'notify me',
        'notify when available',
        'não estamos a aceitar encomendas',
        'out of stock',
@@ -62,12 +70,16 @@ function isItemInStock() {
        'this item is currently unavailable',
        'tickets unavailable',
        'tijdelijk uitverkocht',
+        'unavailable nearby',
        'unavailable tickets',
+        'vergriffen',
+        'vorbestellen',
        'vorbestellung ist bald möglich',
        'we couldn\'t find any products that match',
        'we do not currently have an estimate of when this product will be back in stock.',
        'we don\'t know when or if this item will be back in stock.',
        'we were not able to find a match',
+        'when this arrives in stock',
        'zur zeit nicht an lager',
        '品切れ',
        '已售',
--- a/changedetectionio/content_fetchers/res/xpath_element_scraper.js
+++ b/changedetectionio/content_fetchers/res/xpath_element_scraper.js
@@ -182,6 +182,7 @@ visibleElementsArray.forEach(function (element) {
 // Inject the current one set in the include_filters, which may be a CSS rule
 // used for displaying the current one in VisualSelector, where its not one we generated.
 if (include_filters.length) {
+    let results;
    // Foreach filter, go and find it on the page and add it to the results so we can visualise it again
    for (const f of include_filters) {
        bbox = false;
@@ -197,10 +198,15 @@ if (include_filters.length) {
            if (f.startsWith('/') || f.startsWith('xpath')) {
                var qry_f = f.replace(/xpath(:|\d:)/, '')
                console.log("[xpath] Scanning for included filter " + qry_f)
-                q = document.evaluate(qry_f, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
+                let xpathResult = document.evaluate(qry_f, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
+                results = [];
+                for (let i = 0; i < xpathResult.snapshotLength; i++) {
+                    results.push(xpathResult.snapshotItem(i));
+                }
            } else {
                console.log("[css] Scanning for included filter " + f)
-                q = document.querySelector(f);
+                console.log("[css] Scanning for included filter " + f);
+                results = document.querySelectorAll(f);
            }
        } catch (e) {
            // Maybe catch DOMException and alert?
@@ -208,44 +214,45 @@ if (include_filters.length) {
            console.log(e);
        }

-        if (q) {
-            // Try to resolve //something/text() back to its /something so we can atleast get the bounding box
-            try {
-                if (typeof q.nodeName == 'string' && q.nodeName === '#text') {
-                    q = q.parentElement
-                }
-            } catch (e) {
-                console.log(e)
-                console.log("xpath_element_scraper: #text resolver")
-            }
+        if (results.length) {

-            // #1231 - IN the case XPath attribute filter is applied, we will have to traverse up and find the element.
-            if (typeof q.getBoundingClientRect == 'function') {
-                bbox = q.getBoundingClientRect();
-                console.log("xpath_element_scraper: Got filter element, scroll from top was " + scroll_y)
-            } else {
+            // Iterate over the results
+            results.forEach(node => {
+                // Try to resolve //something/text() back to its /something so we can atleast get the bounding box
                try {
-                    // Try and see we can find its ownerElement
-                    bbox = q.ownerElement.getBoundingClientRect();
-                    console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y)
+                    if (typeof node.nodeName == 'string' && node.nodeName === '#text') {
+                        node = node.parentElement
+                    }
                } catch (e) {
                    console.log(e)
-                    console.log("xpath_element_scraper: error looking up q.ownerElement")
+                    console.log("xpath_element_scraper: #text resolver")
                }
-            }
-        }

-        if (!q) {
-            console.log("xpath_element_scraper: filter element " + f + " was not found");
-        }
+                // #1231 - IN the case XPath attribute filter is applied, we will have to traverse up and find the element.
+                if (typeof node.getBoundingClientRect == 'function') {
+                    bbox = node.getBoundingClientRect();
+                    console.log("xpath_element_scraper: Got filter element, scroll from top was " + scroll_y)
+                } else {
+                    try {
+                        // Try and see we can find its ownerElement
+                        bbox = node.ownerElement.getBoundingClientRect();
+                        console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y)
+                    } catch (e) {
+                        console.log(e)
+                        console.log("xpath_element_scraper: error looking up q.ownerElement")
+                    }
+                }

-        if (bbox && bbox['width'] > 0 && bbox['height'] > 0) {
-            size_pos.push({
-                xpath: f,
-                width: parseInt(bbox['width']),
-                height: parseInt(bbox['height']),
-                left: parseInt(bbox['left']),
-                top: parseInt(bbox['top']) + scroll_y
+                if (bbox && bbox['width'] > 0 && bbox['height'] > 0) {
+                    size_pos.push({
+                        xpath: f,
+                        width: parseInt(bbox['width']),
+                        height: parseInt(bbox['height']),
+                        left: parseInt(bbox['left']),
+                        top: parseInt(bbox['top']) + scroll_y,
+                        highlight_as_custom_filter: true
+                    });
+                }
            });
        }
    }
--- a/changedetectionio/diff.py
+++ b/changedetectionio/diff.py
@@ -1,62 +1,97 @@
-# used for the notifications, the front-end is using a JS library
-
 import difflib
+from typing import List, Iterator, Union

+def same_slicer(lst: List[str], start: int, end: int) -> List[str]:
+    """Return a slice of the list, or a single element if start == end."""
+    return lst[start:end] if start != end else [lst[start]]

-def same_slicer(l, a, b):
-    if a == b:
-        return [l[a]]
-    else:
-        return l[a:b]
-
-# like .compare but a little different output
-def customSequenceMatcher(before, after, include_equal=False, include_removed=True, include_added=True, include_replaced=True, include_change_type_prefix=True):
-    cruncher = difflib.SequenceMatcher(isjunk=lambda x: x in " \\t", a=before, b=after)
-
-    # @todo Line-by-line mode instead of buncghed, including `after` that is not in `before` (maybe unset?)
+def customSequenceMatcher(
+    before: List[str],
+    after: List[str],
+    include_equal: bool = False,
+    include_removed: bool = True,
+    include_added: bool = True,
+    include_replaced: bool = True,
+    include_change_type_prefix: bool = True
+) -> Iterator[List[str]]:
+    """
+    Compare two sequences and yield differences based on specified parameters.
+    
+    Args:
+        before (List[str]): Original sequence
+        after (List[str]): Modified sequence
+        include_equal (bool): Include unchanged parts
+        include_removed (bool): Include removed parts
+        include_added (bool): Include added parts
+        include_replaced (bool): Include replaced parts
+        include_change_type_prefix (bool): Add prefixes to indicate change types
+    
+    Yields:
+        List[str]: Differences between sequences
+    """
+    cruncher = difflib.SequenceMatcher(isjunk=lambda x: x in " \t", a=before, b=after)
+    
    for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
        if include_equal and tag == 'equal':
-            g = before[alo:ahi]
-            yield g
+            yield before[alo:ahi]
        elif include_removed and tag == 'delete':
-            row_prefix = "(removed) " if include_change_type_prefix else ''
-            g = [ row_prefix + i for i in same_slicer(before, alo, ahi)]
-            yield g
+            prefix = "(removed) " if include_change_type_prefix else ''
+            yield [f"{prefix}{line}" for line in same_slicer(before, alo, ahi)]
        elif include_replaced and tag == 'replace':
-            row_prefix = "(changed) " if include_change_type_prefix else ''
-            g = [row_prefix + i for i in same_slicer(before, alo, ahi)]
-            row_prefix = "(into) " if include_change_type_prefix else ''
-            g += [row_prefix + i for i in same_slicer(after, blo, bhi)]
-            yield g
+            prefix_changed = "(changed) " if include_change_type_prefix else ''
+            prefix_into = "(into) " if include_change_type_prefix else ''
+            yield [f"{prefix_changed}{line}" for line in same_slicer(before, alo, ahi)] + \
+                  [f"{prefix_into}{line}" for line in same_slicer(after, blo, bhi)]
        elif include_added and tag == 'insert':
-            row_prefix = "(added) " if include_change_type_prefix else ''
-            g = [row_prefix + i for i in same_slicer(after, blo, bhi)]
-            yield g
+            prefix = "(added) " if include_change_type_prefix else ''
+            yield [f"{prefix}{line}" for line in same_slicer(after, blo, bhi)]

-# only_differences - only return info about the differences, no context
-# line_feed_sep could be "<br>" or "<li>" or "\n" etc
-def render_diff(previous_version_file_contents, newest_version_file_contents, include_equal=False, include_removed=True, include_added=True, include_replaced=True, line_feed_sep="\n", include_change_type_prefix=True, patch_format=False):
-
-    newest_version_file_contents = [line.rstrip() for line in newest_version_file_contents.splitlines()]
-
-    if previous_version_file_contents:
-        previous_version_file_contents = [line.rstrip() for line in previous_version_file_contents.splitlines()]
-    else:
-        previous_version_file_contents = ""
+def render_diff(
+    previous_version_file_contents: str,
+    newest_version_file_contents: str,
+    include_equal: bool = False,
+    include_removed: bool = True,
+    include_added: bool = True,
+    include_replaced: bool = True,
+    line_feed_sep: str = "\n",
+    include_change_type_prefix: bool = True,
+    patch_format: bool = False
+) -> str:
+    """
+    Render the difference between two file contents.
+    
+    Args:
+        previous_version_file_contents (str): Original file contents
+        newest_version_file_contents (str): Modified file contents
+        include_equal (bool): Include unchanged parts
+        include_removed (bool): Include removed parts
+        include_added (bool): Include added parts
+        include_replaced (bool): Include replaced parts
+        line_feed_sep (str): Separator for lines in output
+        include_change_type_prefix (bool): Add prefixes to indicate change types
+        patch_format (bool): Use patch format for output
+    
+    Returns:
+        str: Rendered difference
+    """
+    newest_lines = [line.rstrip() for line in newest_version_file_contents.splitlines()]
+    previous_lines = [line.rstrip() for line in previous_version_file_contents.splitlines()] if previous_version_file_contents else []

    if patch_format:
-        patch = difflib.unified_diff(previous_version_file_contents, newest_version_file_contents)
+        patch = difflib.unified_diff(previous_lines, newest_lines)
        return line_feed_sep.join(patch)

-    rendered_diff = customSequenceMatcher(before=previous_version_file_contents,
-                                          after=newest_version_file_contents,
-                                          include_equal=include_equal,
-                                          include_removed=include_removed,
-                                          include_added=include_added,
-                                          include_replaced=include_replaced,
-                                          include_change_type_prefix=include_change_type_prefix)
+    rendered_diff = customSequenceMatcher(
+        before=previous_lines,
+        after=newest_lines,
+        include_equal=include_equal,
+        include_removed=include_removed,
+        include_added=include_added,
+        include_replaced=include_replaced,
+        include_change_type_prefix=include_change_type_prefix
+    )

-    # Recursively join lists
-    f = lambda L: line_feed_sep.join([f(x) if type(x) is list else x for x in L])
-    p= f(rendered_diff)
-    return p
+    def flatten(lst: List[Union[str, List[str]]]) -> str:
+        return line_feed_sep.join(flatten(x) if isinstance(x, list) else x for x in lst)
+
+    return flatten(rendered_diff)
--- a/changedetectionio/flask_app.py
+++ b/changedetectionio/flask_app.py
@@ -679,7 +679,10 @@ def changedetection_app(config=None, datastore_o=None):

        if request.method == 'POST' and form.validate():

-            extra_update_obj = {}
+            extra_update_obj = {
+                'consecutive_filter_failures': 0,
+                'last_error' : False
+            }

            if request.args.get('unpause_on_save'):
                extra_update_obj['paused'] = False
@@ -718,7 +721,7 @@ def changedetection_app(config=None, datastore_o=None):
            datastore.data['watching'][uuid].update(extra_update_obj)

            if request.args.get('unpause_on_save'):
-                flash("Updated watch - unpaused!.")
+                flash("Updated watch - unpaused!")
            else:
                flash("Updated watch.")

--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -3,8 +3,6 @@ from bs4 import BeautifulSoup
 from inscriptis import get_text
 from jsonpath_ng.ext import parse
 from typing import List
-from inscriptis.css_profiles import CSS_PROFILES, HtmlElement
-from inscriptis.html_properties import Display
 from inscriptis.model.config import ParserConfig
 from xml.sax.saxutils import escape as xml_escape
 import json
@@ -196,12 +194,12 @@ def extract_element(find='title', html_content=''):

 #
 def _parse_json(json_data, json_filter):
-    if 'json:' in json_filter:
+    if json_filter.startswith("json:"):
        jsonpath_expression = parse(json_filter.replace('json:', ''))
        match = jsonpath_expression.find(json_data)
        return _get_stripped_text_from_json_match(match)

-    if 'jq:' in json_filter:
+    if json_filter.startswith("jq:") or json_filter.startswith("jqraw:"):

        try:
            import jq
@@ -209,10 +207,15 @@ def _parse_json(json_data, json_filter):
            # `jq` requires full compilation in windows and so isn't generally available
            raise Exception("jq not support not found")

-        jq_expression = jq.compile(json_filter.replace('jq:', ''))
-        match = jq_expression.input(json_data).all()
+        if json_filter.startswith("jq:"):
+            jq_expression = jq.compile(json_filter.removeprefix("jq:"))
+            match = jq_expression.input(json_data).all()
+            return _get_stripped_text_from_json_match(match)

-        return _get_stripped_text_from_json_match(match)
+        if json_filter.startswith("jqraw:"):
+            jq_expression = jq.compile(json_filter.removeprefix("jqraw:"))
+            match = jq_expression.input(json_data).all()
+            return '\n'.join(str(item) for item in match)

 def _get_stripped_text_from_json_match(match):
    s = []
--- a/changedetectionio/model/App.py
+++ b/changedetectionio/model/App.py
@@ -5,6 +5,7 @@ from changedetectionio.notification import (
    default_notification_title,
 )

+# Equal to or greater than this number of FilterNotFoundInResponse exceptions will trigger a filter-not-found notification
 _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT = 6
 DEFAULT_SETTINGS_HEADERS_USERAGENT='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'

--- a/changedetectionio/model/Watch.py
+++ b/changedetectionio/model/Watch.py
@@ -238,6 +238,8 @@ class model(dict):

        if len(tmp_history):
            self.__newest_history_key = list(tmp_history.keys())[-1]
+        else:
+            self.__newest_history_key = None

        self.__history_n = len(tmp_history)

@@ -328,14 +330,9 @@ class model(dict):
    def save_history_text(self, contents, timestamp, snapshot_id):
        import brotli

-        self.ensure_data_dir_exists()
+        logger.trace(f"{self.get('uuid')} - Updating history.txt with timestamp {timestamp}")

-        # Small hack so that we sleep just enough to allow 1 second  between history snapshots
-        # this is because history.txt indexes/keys snapshots by epoch seconds and we dont want dupe keys
-        if self.__newest_history_key and int(timestamp) == int(self.__newest_history_key):
-            logger.warning(f"Timestamp {timestamp} already exists, waiting 1 seconds so we have a unique key in history.txt")
-            timestamp = str(int(timestamp) + 1)
-            time.sleep(1)
+        self.ensure_data_dir_exists()

        threshold = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024))
        skip_brotli = strtobool(os.getenv('DISABLE_BROTLI_TEXT_SNAPSHOT', 'False'))
@@ -528,8 +525,42 @@ class model(dict):
        # None is set
        return False

+    def save_error_text(self, contents):
+        self.ensure_data_dir_exists()
+        target_path = os.path.join(self.watch_data_dir, "last-error.txt")
+        with open(target_path, 'w') as f:
+            f.write(contents)

-    def get_last_fetched_before_filters(self):
+    def save_xpath_data(self, data, as_error=False):
+        import json
+
+        if as_error:
+            target_path = os.path.join(self.watch_data_dir, "elements-error.json")
+        else:
+            target_path = os.path.join(self.watch_data_dir, "elements.json")
+
+        self.ensure_data_dir_exists()
+
+        with open(target_path, 'w') as f:
+            f.write(json.dumps(data))
+            f.close()
+
+    # Save as PNG, PNG is larger but better for doing visual diff in the future
+    def save_screenshot(self, screenshot: bytes, as_error=False):
+
+        if as_error:
+            target_path = os.path.join(self.watch_data_dir, "last-error-screenshot.png")
+        else:
+            target_path = os.path.join(self.watch_data_dir, "last-screenshot.png")
+
+        self.ensure_data_dir_exists()
+
+        with open(target_path, 'wb') as f:
+            f.write(screenshot)
+            f.close()
+
+
+    def get_last_fetched_text_before_filters(self):
        import brotli
        filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')

@@ -544,12 +575,56 @@ class model(dict):
        with open(filepath, 'rb') as f:
            return(brotli.decompress(f.read()).decode('utf-8'))

-    def save_last_fetched_before_filters(self, contents):
+    def save_last_text_fetched_before_filters(self, contents):
        import brotli
        filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
        with open(filepath, 'wb') as f:
            f.write(brotli.compress(contents, mode=brotli.MODE_TEXT))

+    def save_last_fetched_html(self, timestamp, contents):
+        import brotli
+
+        self.ensure_data_dir_exists()
+        snapshot_fname = f"{timestamp}.html.br"
+        filepath = os.path.join(self.watch_data_dir, snapshot_fname)
+
+        with open(filepath, 'wb') as f:
+            contents = contents.encode('utf-8') if isinstance(contents, str) else contents
+            try:
+                f.write(brotli.compress(contents))
+            except Exception as e:
+                logger.warning(f"{self.get('uuid')} - Unable to compress snapshot, saving as raw data to {filepath}")
+                logger.warning(e)
+                f.write(contents)
+
+        self._prune_last_fetched_html_snapshots()
+
+    def get_fetched_html(self, timestamp):
+        import brotli
+
+        snapshot_fname = f"{timestamp}.html.br"
+        filepath = os.path.join(self.watch_data_dir, snapshot_fname)
+        if os.path.isfile(filepath):
+            with open(filepath, 'rb') as f:
+                return (brotli.decompress(f.read()).decode('utf-8'))
+
+        return False
+
+
+    def _prune_last_fetched_html_snapshots(self):
+
+        dates = list(self.history.keys())
+        dates.reverse()
+
+        for index, timestamp in enumerate(dates):
+            snapshot_fname = f"{timestamp}.html.br"
+            filepath = os.path.join(self.watch_data_dir, snapshot_fname)
+
+            # Keep only the first 2
+            if index > 1 and os.path.isfile(filepath):
+                os.remove(filepath)
+
+
    @property
    def get_browsersteps_available_screenshots(self):
        "For knowing which screenshots are available to show the user in BrowserSteps UI"
--- a/changedetectionio/processors/init.py
+++ b/changedetectionio/processors/init.py
@@ -1,5 +1,6 @@
 from abc import abstractmethod
 from changedetectionio.strtobool import strtobool
+from changedetectionio.model import Watch
 from copy import deepcopy
 from loguru import logger
 import hashlib
@@ -138,7 +139,7 @@ class difference_detection_processor():
        # After init, call run_changedetection() which will do the actual change-detection

    @abstractmethod
-    def run_changedetection(self, uuid, skip_when_checksum_same=True):
+    def run_changedetection(self, watch: Watch, skip_when_checksum_same=True):
        update_obj = {'last_notification_error': False, 'last_error': False}
        some_data = 'xxxxx'
        update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
--- a/changedetectionio/processors/restock_diff.py
+++ b/changedetectionio/processors/restock_diff.py
@@ -1,6 +1,5 @@

 from . import difference_detection_processor
-from copy import deepcopy
 from loguru import logger
 import hashlib
 import urllib3
@@ -20,10 +19,7 @@ class perform_site_check(difference_detection_processor):
    screenshot = None
    xpath_data = None

-    def run_changedetection(self, uuid, skip_when_checksum_same=True):
-
-        # DeepCopy so we can be sure we don't accidently change anything by reference
-        watch = deepcopy(self.datastore.data['watching'].get(uuid))
+    def run_changedetection(self, watch, skip_when_checksum_same=True):

        if not watch:
            raise Exception("Watch no longer exists.")
@@ -44,13 +40,13 @@ class perform_site_check(difference_detection_processor):
            fetched_md5 = hashlib.md5(self.fetcher.instock_data.encode('utf-8')).hexdigest()
            # 'Possibly in stock' comes from stock-not-in-stock.js when no string found above the fold.
            update_obj["in_stock"] = True if self.fetcher.instock_data == 'Possibly in stock' else False
-            logger.debug(f"Watch UUID {uuid} restock check returned '{self.fetcher.instock_data}' from JS scraper.")
+            logger.debug(f"Watch UUID {watch.get('uuid')} restock check returned '{self.fetcher.instock_data}' from JS scraper.")
        else:
            raise UnableToExtractRestockData(status_code=self.fetcher.status_code)

        # The main thing that all this at the moment comes down to :)
        changed_detected = False
-        logger.debug(f"Watch UUID {uuid} restock check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")
+        logger.debug(f"Watch UUID {watch.get('uuid')} restock check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")

        if watch.get('previous_md5') and watch.get('previous_md5') != fetched_md5:
            # Yes if we only care about it going to instock, AND we are in stock
--- a/changedetectionio/processors/text_json_diff.py
+++ b/changedetectionio/processors/text_json_diff.py
@@ -10,18 +10,18 @@ from . import difference_detection_processor
 from ..html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text
 from changedetectionio import html_tools, content_fetchers
 from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
-import changedetectionio.content_fetchers
-from copy import deepcopy
 from loguru import logger

 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

 name = 'Webpage Text/HTML, JSON and PDF changes'
 description = 'Detects all text changes where possible'
-json_filter_prefixes = ['json:', 'jq:']
+json_filter_prefixes = ['json:', 'jq:', 'jqraw:']

 class FilterNotFoundInResponse(ValueError):
-    def __init__(self, msg):
+    def __init__(self, msg, screenshot=None, xpath_data=None):
+        self.screenshot = screenshot
+        self.xpath_data = xpath_data
        ValueError.__init__(self, msg)


@@ -34,14 +34,12 @@ class PDFToHTMLToolNotFound(ValueError):
 # (set_proxy_from_list)
 class perform_site_check(difference_detection_processor):

-    def run_changedetection(self, uuid, skip_when_checksum_same=True):
+    def run_changedetection(self, watch, skip_when_checksum_same=True):
        changed_detected = False
        html_content = ""
        screenshot = False  # as bytes
        stripped_text_from_html = ""

-        # DeepCopy so we can be sure we don't accidently change anything by reference
-        watch = deepcopy(self.datastore.data['watching'].get(uuid))
        if not watch:
            raise Exception("Watch no longer exists.")

@@ -116,12 +114,12 @@ class perform_site_check(difference_detection_processor):
        # Better would be if Watch.model could access the global data also
        # and then use getattr https://docs.python.org/3/reference/datamodel.html#object.__getitem__
        # https://realpython.com/inherit-python-dict/ instead of doing it procedurely
-        include_filters_from_tags = self.datastore.get_tag_overrides_for_watch(uuid=uuid, attr='include_filters')
+        include_filters_from_tags = self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='include_filters')

        # 1845 - remove duplicated filters in both group and watch include filter
        include_filters_rule = list(dict.fromkeys(watch.get('include_filters', []) + include_filters_from_tags))

-        subtractive_selectors = [*self.datastore.get_tag_overrides_for_watch(uuid=uuid, attr='subtractive_selectors'),
+        subtractive_selectors = [*self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='subtractive_selectors'),
                                 *watch.get("subtractive_selectors", []),
                                 *self.datastore.data["settings"]["application"].get("global_subtractive_selectors", [])
                                 ]
@@ -188,7 +186,7 @@ class perform_site_check(difference_detection_processor):
                                                                       append_pretty_line_formatting=not watch.is_source_type_url)

                    if not html_content.strip():
-                        raise FilterNotFoundInResponse(include_filters_rule)
+                        raise FilterNotFoundInResponse(msg=include_filters_rule, screenshot=self.fetcher.screenshot, xpath_data=self.fetcher.xpath_data)

                if has_subtractive_selectors:
                    html_content = html_tools.element_removal(subtractive_selectors, html_content)
@@ -222,7 +220,7 @@ class perform_site_check(difference_detection_processor):
            from .. import diff
            # needs to not include (added) etc or it may get used twice
            # Replace the processed text with the preferred result
-            rendered_diff = diff.render_diff(previous_version_file_contents=watch.get_last_fetched_before_filters(),
+            rendered_diff = diff.render_diff(previous_version_file_contents=watch.get_last_fetched_text_before_filters(),
                                             newest_version_file_contents=stripped_text_from_html,
                                             include_equal=False,  # not the same lines
                                             include_added=watch.get('filter_text_added', True),
@@ -231,7 +229,7 @@ class perform_site_check(difference_detection_processor):
                                             line_feed_sep="\n",
                                             include_change_type_prefix=False)

-            watch.save_last_fetched_before_filters(text_content_before_ignored_filter)
+            watch.save_last_text_fetched_before_filters(text_content_before_ignored_filter)

            if not rendered_diff and stripped_text_from_html:
                # We had some content, but no differences were found
@@ -246,9 +244,10 @@ class perform_site_check(difference_detection_processor):
        if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0:
            raise content_fetchers.exceptions.ReplyWithContentButNoText(url=url,
                                                            status_code=self.fetcher.get_last_status_code(),
-                                                            screenshot=screenshot,
+                                                            screenshot=self.fetcher.screenshot,
                                                            has_filters=has_filter_rule,
-                                                            html_content=html_content
+                                                            html_content=html_content,
+                                                            xpath_data=self.fetcher.xpath_data
                                                            )

        # We rely on the actual text in the html output.. many sites have random script vars etc,
@@ -344,17 +343,17 @@ class perform_site_check(difference_detection_processor):
                if not watch['title'] or not len(watch['title']):
                    update_obj['title'] = html_tools.extract_element(find='title', html_content=self.fetcher.content)

-        logger.debug(f"Watch UUID {uuid} content check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")
+        logger.debug(f"Watch UUID {watch.get('uuid')} content check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")

        if changed_detected:
            if watch.get('check_unique_lines', False):
                has_unique_lines = watch.lines_contain_something_unique_compared_to_history(lines=stripped_text_from_html.splitlines())
                # One or more lines? unsure?
                if not has_unique_lines:
-                    logger.debug(f"check_unique_lines: UUID {uuid} didnt have anything new setting change_detected=False")
+                    logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} didnt have anything new setting change_detected=False")
                    changed_detected = False
                else:
-                    logger.debug(f"check_unique_lines: UUID {uuid} had unique content")
+                    logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} had unique content")

        # Always record the new checksum
        update_obj["previous_md5"] = fetched_md5
--- a/changedetectionio/static/js/browser-steps.js
+++ b/changedetectionio/static/js/browser-steps.js
@@ -1,14 +1,5 @@
 $(document).ready(function () {

-    // duplicate
-    var csrftoken = $('input[name=csrf_token]').val();
-    $.ajaxSetup({
-        beforeSend: function (xhr, settings) {
-            if (!/^(GET|HEAD|OPTIONS|TRACE)$/i.test(settings.type) && !this.crossDomain) {
-                xhr.setRequestHeader("X-CSRFToken", csrftoken)
-            }
-        }
-    })
    var browsersteps_session_id;
    var browser_interface_seconds_remaining = 0;
    var apply_buttons_disabled = false;
--- a/changedetectionio/static/js/csrf.js
+++ b/changedetectionio/static/js/csrf.js
@@ -0,0 +1,10 @@
+$(document).ready(function () {
+    $.ajaxSetup({
+        beforeSend: function (xhr, settings) {
+            if (!/^(GET|HEAD|OPTIONS|TRACE)$/i.test(settings.type) && !this.crossDomain) {
+                xhr.setRequestHeader("X-CSRFToken", csrftoken)
+            }
+        }
+    })
+});
+
--- a/changedetectionio/static/js/diff-overview.js
+++ b/changedetectionio/static/js/diff-overview.js
@@ -1,13 +1,4 @@
 $(document).ready(function () {
-    var csrftoken = $('input[name=csrf_token]').val();
-    $.ajaxSetup({
-        beforeSend: function (xhr, settings) {
-            if (!/^(GET|HEAD|OPTIONS|TRACE)$/i.test(settings.type) && !this.crossDomain) {
-                xhr.setRequestHeader("X-CSRFToken", csrftoken)
-            }
-        }
-    })
-
    $('.needs-localtime').each(function () {
        for (var option of this.options) {
            var dateObject = new Date(option.value * 1000);
@@ -48,6 +39,12 @@ $(document).ready(function () {
      $("#highlightSnippet").remove();
    }

+    // Listen for Escape key press
+    window.addEventListener('keydown', function (e) {
+        if (e.key === 'Escape') {
+            clean();
+        }
+    }, false);

    function dragTextHandler(event) {
        console.log('mouseupped');
--- a/changedetectionio/static/js/notifications.js
+++ b/changedetectionio/static/js/notifications.js
@@ -13,16 +13,6 @@ $(document).ready(function() {
  $('#send-test-notification').click(function (e) {
    e.preventDefault();

-    // this can be global
-    var csrftoken = $('input[name=csrf_token]').val();
-    $.ajaxSetup({
-        beforeSend: function(xhr, settings) {
-            if (!/^(GET|HEAD|OPTIONS|TRACE)$/i.test(settings.type) && !this.crossDomain) {
-                xhr.setRequestHeader("X-CSRFToken", csrftoken)
-            }
-        }
-    })
-
    data = {
      notification_body: $('#notification_body').val(),
      notification_format: $('#notification_format').val(),
--- a/changedetectionio/static/js/visual-selector.js
+++ b/changedetectionio/static/js/visual-selector.js
@@ -2,250 +2,258 @@
 // All rights reserved.
 // yes - this is really a hack, if you are a front-ender and want to help, please get in touch!

-$(document).ready(function () {
+let runInClearMode = false;

-    var current_selected_i;
-    var state_clicked = false;
+$(document).ready(() => {
+    let currentSelections = [];
+    let currentSelection = null;
+    let appendToList = false;
+    let c, xctx, ctx;
+    let xScale = 1, yScale = 1;
+    let selectorImage, selectorImageRect, selectorData;

-    var c;

-    // greyed out fill context
-    var xctx;
-    // redline highlight context
-    var ctx;
+    // Global jQuery selectors with "Elem" appended
+    const $selectorCanvasElem = $('#selector-canvas');
+    const $includeFiltersElem = $("#include_filters");
+    const $selectorBackgroundElem = $("img#selector-background");
+    const $selectorCurrentXpathElem = $("#selector-current-xpath span");
+    const $fetchingUpdateNoticeElem = $('.fetching-update-notice');
+    const $selectorWrapperElem = $("#selector-wrapper");

-    var current_default_xpath = [];
-    var x_scale = 1;
-    var y_scale = 1;
-    var selector_image;
-    var selector_image_rect;
-    var selector_data;
+    // Color constants
+    const FILL_STYLE_HIGHLIGHT = 'rgba(205,0,0,0.35)';
+    const FILL_STYLE_GREYED_OUT = 'rgba(205,205,205,0.95)';
+    const STROKE_STYLE_HIGHLIGHT = 'rgba(255,0,0, 0.9)';
+    const FILL_STYLE_REDLINE = 'rgba(255,0,0, 0.1)';
+    const STROKE_STYLE_REDLINE = 'rgba(225,0,0,0.9)';

-    $('#visualselector-tab').click(function () {
-        $("img#selector-background").off('load');
-        state_clicked = false;
-        current_selected_i = false;
-        bootstrap_visualselector();
+    $('#visualselector-tab').click(() => {
+        $selectorBackgroundElem.off('load');
+        currentSelections = [];
+        bootstrapVisualSelector();
    });

-    $(document).on('keydown', function (event) {
-        if ($("img#selector-background").is(":visible")) {
-            if (event.key == "Escape") {
-                state_clicked = false;
-                ctx.clearRect(0, 0, c.width, c.height);
+    function clearReset() {
+        ctx.clearRect(0, 0, c.width, c.height);
+
+        if ($includeFiltersElem.val().length) {
+            alert("Existing filters under the 'Filters & Triggers' tab were cleared.");
+        }
+        $includeFiltersElem.val('');
+
+        currentSelections = [];
+
+        // Means we ignore the xpaths from the scraper marked as sel.highlight_as_custom_filter (it matched a previous selector)
+        runInClearMode = true;
+
+        highlightCurrentSelected();
+    }
+
+    function splitToList(v) {
+        return v.split('\n').map(line => line.trim()).filter(line => line.length > 0);
+    }
+
+    function sortScrapedElementsBySize() {
+        // Sort the currentSelections array by area (width * height) in descending order
+        selectorData['size_pos'].sort((a, b) => {
+            const areaA = a.width * a.height;
+            const areaB = b.width * b.height;
+            return areaB - areaA;
+        });
+    }
+
+    $(document).on('keydown keyup', (event) => {
+        if (event.code === 'ShiftLeft' || event.code === 'ShiftRight') {
+            appendToList = event.type === 'keydown';
+        }
+
+        if (event.type === 'keydown') {
+            if ($selectorBackgroundElem.is(":visible") && event.key === "Escape") {
+                clearReset();
            }
        }
    });

-    // For when the page loads
-    if (!window.location.hash || window.location.hash != '#visualselector') {
-        $("img#selector-background").attr('src', '');
+    $('#clear-selector').on('click', () => {
+        clearReset();
+    });
+    // So if they start switching between visualSelector and manual filters, stop it from rendering old filters
+    $('li.tab a').on('click', () => {
+        runInClearMode = true;
+    });
+
+    if (!window.location.hash || window.location.hash !== '#visualselector') {
+        $selectorBackgroundElem.attr('src', '');
        return;
    }

-    // Handle clearing button/link
-    $('#clear-selector').on('click', function (event) {
-        if (!state_clicked) {
-            alert('Oops, Nothing selected!');
-        }
-        state_clicked = false;
-        ctx.clearRect(0, 0, c.width, c.height);
-        xctx.clearRect(0, 0, c.width, c.height);
-        $("#include_filters").val('');
-    });
+    bootstrapVisualSelector();

-
-    bootstrap_visualselector();
-
-
-    function bootstrap_visualselector() {
-        if (1) {
-            // bootstrap it, this will trigger everything else
-            $("img#selector-background").on("error", function () {
-                $('.fetching-update-notice').html("<strong>Ooops!</strong> The VisualSelector tool needs atleast one fetched page, please unpause the watch and/or wait for the watch to complete fetching and then reload this page.");
-                $('.fetching-update-notice').css('color','#bb0000');
-                $('#selector-current-xpath').hide();
-                $('#clear-selector').hide();
-            }).bind('load', function () {
+    function bootstrapVisualSelector() {
+        $selectorBackgroundElem
+            .on("error", () => {
+                $fetchingUpdateNoticeElem.html("<strong>Ooops!</strong> The VisualSelector tool needs at least one fetched page, please unpause the watch and/or wait for the watch to complete fetching and then reload this page.")
+                    .css('color', '#bb0000');
+                $('#selector-current-xpath, #clear-selector').hide();
+            })
+            .on('load', () => {
                console.log("Loaded background...");
                c = document.getElementById("selector-canvas");
-                // greyed out fill context
                xctx = c.getContext("2d");
-                // redline highlight context
                ctx = c.getContext("2d");
-                if ($("#include_filters").val().trim().length) {
-                    current_default_xpath = $("#include_filters").val().split(/\r?\n/g);
-                } else {
-                    current_default_xpath = [];
-                }
-                fetch_data();
-                $('#selector-canvas').off("mousemove mousedown");
-                // screenshot_url defined in the edit.html template
-            }).attr("src", screenshot_url);
-        }
-        // Tell visualSelector that the image should update
-        var s = $("img#selector-background").attr('src') + "?" + new Date().getTime();
-        $("img#selector-background").attr('src', s)
+                fetchData();
+                $selectorCanvasElem.off("mousemove mousedown");
+            })
+            .attr("src", screenshot_url);
+
+        let s = `${$selectorBackgroundElem.attr('src')}?${new Date().getTime()}`;
+        $selectorBackgroundElem.attr('src', s);
    }

-    // This is fired once the img src is loaded in bootstrap_visualselector()
-    function fetch_data() {
-        // Image is ready
-        $('.fetching-update-notice').html("Fetching element data..");
+    function alertIfFilterNotFound() {
+        let existingFilters = splitToList($includeFiltersElem.val());
+        let sizePosXpaths = selectorData['size_pos'].map(sel => sel.xpath);
+
+        for (let filter of existingFilters) {
+            if (!sizePosXpaths.includes(filter)) {
+                alert(`One or more of your existing filters was not found and will be removed when a new filter is selected.`);
+                break;
+            }
+        }
+    }
+
+    function fetchData() {
+        $fetchingUpdateNoticeElem.html("Fetching element data..");

        $.ajax({
            url: watch_visual_selector_data_url,
            context: document.body
-        }).done(function (data) {
-            $('.fetching-update-notice').html("Rendering..");
-            selector_data = data;
-            console.log("Reported browser width from backend: " + data['browser_width']);
-            state_clicked = false;
-            set_scale();
-            reflow_selector();
-            $('.fetching-update-notice').fadeOut();
-        });
+        }).done((data) => {
+            $fetchingUpdateNoticeElem.html("Rendering..");
+            selectorData = data;
+            sortScrapedElementsBySize();
+            console.log(`Reported browser width from backend: ${data['browser_width']}`);

+            // Little sanity check for the user, alert them if something missing
+            alertIfFilterNotFound();
+
+            setScale();
+            reflowSelector();
+            $fetchingUpdateNoticeElem.fadeOut();
+        });
    }

+    function updateFiltersText() {
+        // Assuming currentSelections is already defined and contains the selections
+        let uniqueSelections = new Set(currentSelections.map(sel => (sel[0] === '/' ? `xpath:${sel.xpath}` : sel.xpath)));

-    function set_scale() {
-
-        // some things to check if the scaling doesnt work
-        // - that the widths/sizes really are about the actual screen size cat elements.json |grep -o width......|sort|uniq
-        $("#selector-wrapper").show();
-        selector_image = $("img#selector-background")[0];
-        selector_image_rect = selector_image.getBoundingClientRect();
-
-        // make the canvas the same size as the image
-        $('#selector-canvas').attr('height', selector_image_rect.height);
-        $('#selector-canvas').attr('width', selector_image_rect.width);
-        $('#selector-wrapper').attr('width', selector_image_rect.width);
-        x_scale = selector_image_rect.width / selector_data['browser_width'];
-        y_scale = selector_image_rect.height / selector_image.naturalHeight;
-        ctx.strokeStyle = 'rgba(255,0,0, 0.9)';
-        ctx.fillStyle = 'rgba(255,0,0, 0.1)';
-        ctx.lineWidth = 3;
-        console.log("scaling set  x: " + x_scale + " by y:" + y_scale);
-        $("#selector-current-xpath").css('max-width', selector_image_rect.width);
-    }
-
-    function reflow_selector() {
-        $(window).resize(function () {
-            set_scale();
-            highlight_current_selected_i();
-        });
-        var selector_currnt_xpath_text = $("#selector-current-xpath span");
-
-        set_scale();
-
-        console.log(selector_data['size_pos'].length + " selectors found");
-
-        // highlight the default one if we can find it in the xPath list
-        // or the xpath matches the default one
-        found = false;
-        if (current_default_xpath.length) {
-            // Find the first one that matches
-            // @todo In the future paint all that match
-            for (const c of current_default_xpath) {
-                for (var i = selector_data['size_pos'].length; i !== 0; i--) {
-                    if (selector_data['size_pos'][i - 1].xpath.trim() === c.trim()) {
-                        console.log("highlighting " + c);
-                        current_selected_i = i - 1;
-                        highlight_current_selected_i();
-                        found = true;
-                        break;
-                    }
-                }
-                if (found) {
-                    break;
-                }
-            }
-            if (!found) {
-                alert("Unfortunately your existing CSS/xPath Filter was no longer found!");
-            }
+        if (currentSelections.length > 0) {
+            // Convert the Set back to an array and join with newline characters
+            let textboxFilterText = Array.from(uniqueSelections).join("\n");
+            $includeFiltersElem.val(textboxFilterText);
        }
+    }

+    function setScale() {
+        $selectorWrapperElem.show();
+        selectorImage = $selectorBackgroundElem[0];
+        selectorImageRect = selectorImage.getBoundingClientRect();

-        $('#selector-canvas').bind('mousemove', function (e) {
-            if (state_clicked) {
-                return;
+        $selectorCanvasElem.attr({
+            'height': selectorImageRect.height,
+            'width': selectorImageRect.width
+        });
+        $selectorWrapperElem.attr('width', selectorImageRect.width);
+        $('#visual-selector-heading').css('max-width', selectorImageRect.width + "px")
+
+        xScale = selectorImageRect.width / selectorImage.naturalWidth;
+        yScale = selectorImageRect.height / selectorImage.naturalHeight;
+
+        ctx.strokeStyle = STROKE_STYLE_HIGHLIGHT;
+        ctx.fillStyle = FILL_STYLE_REDLINE;
+        ctx.lineWidth = 3;
+        console.log("Scaling set  x: " + xScale + " by y:" + yScale);
+        $("#selector-current-xpath").css('max-width', selectorImageRect.width);
+    }
+
+    function reflowSelector() {
+        $(window).resize(() => {
+            setScale();
+            highlightCurrentSelected();
+        });
+
+        setScale();
+
+        console.log(selectorData['size_pos'].length + " selectors found");
+
+        let existingFilters = splitToList($includeFiltersElem.val());
+
+        selectorData['size_pos'].forEach(sel => {
+            if ((!runInClearMode && sel.highlight_as_custom_filter) || existingFilters.includes(sel.xpath)) {
+                console.log("highlighting " + c);
+                currentSelections.push(sel);
            }
-            ctx.clearRect(0, 0, c.width, c.height);
-            current_selected_i = null;
+        });

-            // Add in offset
-            if ((typeof e.offsetX === "undefined" || typeof e.offsetY === "undefined") || (e.offsetX === 0 && e.offsetY === 0)) {
-                var targetOffset = $(e.target).offset();
+
+        highlightCurrentSelected();
+        updateFiltersText();
+
+        $selectorCanvasElem.bind('mousemove', handleMouseMove.debounce(5));
+        $selectorCanvasElem.bind('mousedown', handleMouseDown.debounce(5));
+        $selectorCanvasElem.bind('mouseleave', highlightCurrentSelected.debounce(5));
+
+        function handleMouseMove(e) {
+            if (!e.offsetX && !e.offsetY) {
+                const targetOffset = $(e.target).offset();
                e.offsetX = e.pageX - targetOffset.left;
                e.offsetY = e.pageY - targetOffset.top;
            }

-            // Reverse order - the most specific one should be deeper/"laster"
-            // Basically, find the most 'deepest'
-            var found = 0;
-            ctx.fillStyle = 'rgba(205,0,0,0.35)';
-            // Will be sorted by smallest width*height first
-            for (var i = 0; i <= selector_data['size_pos'].length; i++) {
-                // draw all of them? let them choose somehow?
-                var sel = selector_data['size_pos'][i];
-                // If we are in a bounding-box
-                if (e.offsetY > sel.top * y_scale && e.offsetY < sel.top * y_scale + sel.height * y_scale
-                    &&
-                    e.offsetX > sel.left * y_scale && e.offsetX < sel.left * y_scale + sel.width * y_scale
+            ctx.fillStyle = FILL_STYLE_HIGHLIGHT;

-                ) {
-
-                    // FOUND ONE
-                    set_current_selected_text(sel.xpath);
-                    ctx.strokeRect(sel.left * x_scale, sel.top * y_scale, sel.width * x_scale, sel.height * y_scale);
-                    ctx.fillRect(sel.left * x_scale, sel.top * y_scale, sel.width * x_scale, sel.height * y_scale);
-
-                    // no need to keep digging
-                    // @todo or, O to go out/up, I to go in
-                    // or double click to go up/out the selector?
-                    current_selected_i = i;
-                    found += 1;
-                    break;
+            selectorData['size_pos'].forEach(sel => {
+                if (e.offsetY > sel.top * yScale && e.offsetY < sel.top * yScale + sel.height * yScale &&
+                    e.offsetX > sel.left * yScale && e.offsetX < sel.left * yScale + sel.width * yScale) {
+                    setCurrentSelectedText(sel.xpath);
+                    drawHighlight(sel);
+                    currentSelections.push(sel);
+                    currentSelection = sel;
+                    highlightCurrentSelected();
+                    currentSelections.pop();
                }
-            }
-
-        }.debounce(5));
-
-        function set_current_selected_text(s) {
-            selector_currnt_xpath_text[0].innerHTML = s;
-        }
-
-        function highlight_current_selected_i() {
-            if (state_clicked) {
-                state_clicked = false;
-                xctx.clearRect(0, 0, c.width, c.height);
-                return;
-            }
-
-            var sel = selector_data['size_pos'][current_selected_i];
-            if (sel[0] == '/') {
-                // @todo - not sure just checking / is right
-                $("#include_filters").val('xpath:' + sel.xpath);
-            } else {
-                $("#include_filters").val(sel.xpath);
-            }
-            xctx.fillStyle = 'rgba(205,205,205,0.95)';
-            xctx.strokeStyle = 'rgba(225,0,0,0.9)';
-            xctx.lineWidth = 3;
-            xctx.fillRect(0, 0, c.width, c.height);
-            // Clear out what only should be seen (make a clear/clean spot)
-            xctx.clearRect(sel.left * x_scale, sel.top * y_scale, sel.width * x_scale, sel.height * y_scale);
-            xctx.strokeRect(sel.left * x_scale, sel.top * y_scale, sel.width * x_scale, sel.height * y_scale);
-            state_clicked = true;
-            set_current_selected_text(sel.xpath);
-
+            })
        }


-        $('#selector-canvas').bind('mousedown', function (e) {
-            highlight_current_selected_i();
-        });
+        function setCurrentSelectedText(s) {
+            $selectorCurrentXpathElem[0].innerHTML = s;
+        }
+
+        function drawHighlight(sel) {
+            ctx.strokeRect(sel.left * xScale, sel.top * yScale, sel.width * xScale, sel.height * yScale);
+            ctx.fillRect(sel.left * xScale, sel.top * yScale, sel.width * xScale, sel.height * yScale);
+        }
+
+        function handleMouseDown() {
+            // If we are in 'appendToList' mode, grow the list, if not, just 1
+            currentSelections = appendToList ? [...currentSelections, currentSelection] : [currentSelection];
+            highlightCurrentSelected();
+            updateFiltersText();
+        }
+
    }

+    function highlightCurrentSelected() {
+        xctx.fillStyle = FILL_STYLE_GREYED_OUT;
+        xctx.strokeStyle = STROKE_STYLE_REDLINE;
+        xctx.lineWidth = 3;
+        xctx.clearRect(0, 0, c.width, c.height);
+
+        currentSelections.forEach(sel => {
+            //xctx.clearRect(sel.left * xScale, sel.top * yScale, sel.width * xScale, sel.height * yScale);
+            xctx.strokeRect(sel.left * xScale, sel.top * yScale, sel.width * xScale, sel.height * yScale);
+        });
+    }
 });
--- a/changedetectionio/static/styles/scss/parts/_visualselector.scss
+++ b/changedetectionio/static/styles/scss/parts/_visualselector.scss
@@ -1,6 +1,8 @@

 #selector-wrapper {
  height: 100%;
+  text-align: center;
+  
  max-height: 70vh;
  overflow-y: scroll;
  position: relative;
--- a/changedetectionio/static/styles/scss/styles.scss
+++ b/changedetectionio/static/styles/scss/styles.scss
@@ -671,14 +671,25 @@ footer {
  and also iPads specifically.
  */
  .watch-table {
+    /* make headings work on mobile */
+    thead {
+      display: block;
+      tr {
+        th {
+          display: inline-block;
+        }
+      }
+      .empty-cell {
+        display: none;
+      }
+    }

    /* Force table to not be like tables anymore */
-    thead,
-    tbody,
-    th,
-    td,
-    tr {
-      display: block;
+    tbody {
+      td,
+      tr {
+        display: block;
+      }
    }

    .last-checked {
@@ -702,13 +713,6 @@ footer {
      display: inline-block;
    }

-    /* Hide table headers (but not display: none;, for accessibility) */
-    thead tr {
-      position: absolute;
-      top: -9999px;
-      left: -9999px;
-    }
-
    .pure-table td,
    .pure-table th {
      border: none;
@@ -753,6 +757,7 @@ footer {
  thead {
    background-color: var(--color-background-table-thead);
    color: var(--color-text);
+    border-bottom: 1px solid var(--color-background-table-thead);
  }

  td,
--- a/changedetectionio/static/styles/styles.css
+++ b/changedetectionio/static/styles/styles.css
@@ -863,14 +863,17 @@ footer {
  and also iPads specifically.
  */
  .watch-table {
+    /* make headings work on mobile */
    /* Force table to not be like tables anymore */
-    /* Force table to not be like tables anymore */
-    /* Hide table headers (but not display: none;, for accessibility) */ }
-    .watch-table thead,
-    .watch-table tbody,
-    .watch-table th,
-    .watch-table td,
-    .watch-table tr {
+    /* Force table to not be like tables anymore */ }
+    .watch-table thead {
+      display: block; }
+      .watch-table thead tr th {
+        display: inline-block; }
+      .watch-table thead .empty-cell {
+        display: none; }
+    .watch-table tbody td,
+    .watch-table tbody tr {
      display: block; }
    .watch-table .last-checked > span {
      vertical-align: middle; }
@@ -882,10 +885,6 @@ footer {
      content: "Last Changed "; }
    .watch-table td.inline {
      display: inline-block; }
-    .watch-table thead tr {
-      position: absolute;
-      top: -9999px;
-      left: -9999px; }
    .watch-table .pure-table td,
    .watch-table .pure-table th {
      border: none; }
@@ -912,7 +911,8 @@ footer {
  border-color: var(--color-border-table-cell); }
  .pure-table thead {
    background-color: var(--color-background-table-thead);
-    color: var(--color-text); }
+    color: var(--color-text);
+    border-bottom: 1px solid var(--color-background-table-thead); }
  .pure-table td,
  .pure-table th {
    border-left-color: var(--color-border-table-cell); }
@@ -1065,6 +1065,7 @@ ul {

 #selector-wrapper {
  height: 100%;
+  text-align: center;
  max-height: 70vh;
  overflow-y: scroll;
  position: relative; }
--- a/changedetectionio/store.py
+++ b/changedetectionio/store.py
@@ -163,7 +163,6 @@ class ChangeDetectionStore:
                        del (update_obj[dict_key])

            self.__data['watching'][uuid].update(update_obj)
-
        self.needs_write = True

    @property
@@ -243,6 +242,14 @@ class ChangeDetectionStore:
    def clear_watch_history(self, uuid):
        import pathlib

+        # JSON Data, Screenshots, Textfiles (history index and snapshots), HTML in the future etc
+        for item in pathlib.Path(os.path.join(self.datastore_path, uuid)).rglob("*.*"):
+            unlink(item)
+
+        # Force the attr to recalculate
+        bump = self.__data['watching'][uuid].history
+
+        # Do this last because it will trigger a recheck due to last_checked being zero
        self.__data['watching'][uuid].update({
                'browser_steps_last_error_step' : None,
                'check_count': 0,
@@ -259,13 +266,6 @@ class ChangeDetectionStore:
                'track_ldjson_price_data': None,
            })

-        # JSON Data, Screenshots, Textfiles (history index and snapshots), HTML in the future etc
-        for item in pathlib.Path(os.path.join(self.datastore_path, uuid)).rglob("*.*"):
-            unlink(item)
-
-        # Force the attr to recalculate
-        bump = self.__data['watching'][uuid].history
-
        self.needs_write_urgent = True

    def add_watch(self, url, tag='', extras=None, tag_uuids=None, write_to_disk_now=True):
@@ -376,46 +376,6 @@ class ChangeDetectionStore:

        return False

-    # Save as PNG, PNG is larger but better for doing visual diff in the future
-    def save_screenshot(self, watch_uuid, screenshot: bytes, as_error=False):
-        if not self.data['watching'].get(watch_uuid):
-            return
-
-        if as_error:
-            target_path = os.path.join(self.datastore_path, watch_uuid, "last-error-screenshot.png")
-        else:
-            target_path = os.path.join(self.datastore_path, watch_uuid, "last-screenshot.png")
-
-        self.data['watching'][watch_uuid].ensure_data_dir_exists()
-
-        with open(target_path, 'wb') as f:
-            f.write(screenshot)
-            f.close()
-
-
-    def save_error_text(self, watch_uuid, contents):
-        if not self.data['watching'].get(watch_uuid):
-            return
-
-        self.data['watching'][watch_uuid].ensure_data_dir_exists()
-        target_path = os.path.join(self.datastore_path, watch_uuid, "last-error.txt")
-        with open(target_path, 'w') as f:
-            f.write(contents)
-
-    def save_xpath_data(self, watch_uuid, data, as_error=False):
-
-        if not self.data['watching'].get(watch_uuid):
-            return
-        if as_error:
-            target_path = os.path.join(self.datastore_path, watch_uuid, "elements-error.json")
-        else:
-            target_path = os.path.join(self.datastore_path, watch_uuid, "elements.json")
-        self.data['watching'][watch_uuid].ensure_data_dir_exists()
-        with open(target_path, 'w') as f:
-            f.write(json.dumps(data))
-            f.close()
-
-
    def sync_to_json(self):
        logger.info("Saving JSON..")
        try:
@@ -884,3 +844,8 @@ class ChangeDetectionStore:
                # Something custom here
                self.__data["watching"][uuid]['time_between_check_use_default'] = False

+    # Correctly set datatype for older installs where 'tag' was string and update_12 did not catch it
+    def update_16(self):
+        for uuid, watch in self.data['watching'].items():
+            if isinstance(watch.get('tags'), str):
+                self.data['watching'][uuid]['tags'] = []
--- a/changedetectionio/templates/base.html
+++ b/changedetectionio/templates/base.html
@@ -26,7 +26,11 @@
    <meta name="msapplication-TileColor" content="#da532c">
    <meta name="msapplication-config" content="favicons/browserconfig.xml">
    <meta name="theme-color" content="#ffffff">
+    <script>
+        const csrftoken="{{ csrf_token() }}";
+    </script>
    <script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
+    <script src="{{url_for('static_content', group='js', filename='csrf.js')}}" defer></script>
  </head>

  <body>
--- a/changedetectionio/templates/edit.html
+++ b/changedetectionio/templates/edit.html
@@ -292,7 +292,7 @@ xpath://body/div/span[contains(@class, 'example-class')]",
                            <ul>
                                <li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required,  <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li>
                                {% if jq_support %}
-                                <li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>.</li>
+                                <li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>. Prefix <code>jqraw:</code> outputs the results as text instead of a JSON list.</li>
                                {% else %}
                                <li>jq support not installed</li>
                                {% endif %}
@@ -432,9 +432,8 @@ Unavailable") }}
                <fieldset>
                    <div class="pure-control-group">
                        {% if visualselector_enabled %}
-                            <span class="pure-form-message-inline">
-                                The Visual Selector tool lets you select the <i>text</i> elements that will be used for the change detection &dash; after the <i>Browser Steps</i> has completed.<br>
-                                This tool is a helper to manage filters in the  "CSS/JSONPath/JQ/XPath Filters" box of the <a href="#filters-and-triggers">Filters & Triggers</a> tab.
+                            <span class="pure-form-message-inline" id="visual-selector-heading">
+                                The Visual Selector tool lets you select the <i>text</i> elements that will be used for the change detection. It automatically fills-in the filters in the "CSS/JSONPath/JQ/XPath Filters" box of the <a href="#filters-and-triggers">Filters & Triggers</a> tab. Use <strong>Shift+Click</strong> to select multiple items.
                            </span>

                            <div id="selector-header">
--- a/changedetectionio/templates/watch-overview.html
+++ b/changedetectionio/templates/watch-overview.html
@@ -68,11 +68,11 @@
                {% set link_order = "desc" if sort_order  == 'asc' else "asc" %}
                {% set arrow_span = "" %}
                <th><input style="vertical-align: middle" type="checkbox" id="check-all" > <a class="{{ 'active '+link_order if sort_attribute == 'date_created' else 'inactive' }}"  href="{{url_for('index', sort='date_created', order=link_order, tag=active_tag_uuid)}}"># <span class='arrow {{link_order}}'></span></a></th>
-                <th></th>
+                <th class="empty-cell"></th>
                <th><a class="{{ 'active '+link_order if sort_attribute == 'label' else 'inactive' }}" href="{{url_for('index', sort='label', order=link_order, tag=active_tag_uuid)}}">Website <span class='arrow {{link_order}}'></span></a></th>
                <th><a class="{{ 'active '+link_order if sort_attribute == 'last_checked' else 'inactive' }}" href="{{url_for('index', sort='last_checked', order=link_order, tag=active_tag_uuid)}}">Last Checked <span class='arrow {{link_order}}'></span></a></th>
                <th><a class="{{ 'active '+link_order if sort_attribute == 'last_changed' else 'inactive' }}" href="{{url_for('index', sort='last_changed', order=link_order, tag=active_tag_uuid)}}">Last Changed <span class='arrow {{link_order}}'></span></a></th>
-                <th></th>
+                <th class="empty-cell"></th>
            </tr>
            </thead>
            <tbody>
--- a/changedetectionio/tests/test_api.py
+++ b/changedetectionio/tests/test_api.py
@@ -149,6 +149,15 @@ def test_api_simple(client, live_server):
        headers={'x-api-key': api_key},
    )
    assert b'which has this one new line' in res.data
+    assert b'<div id' not in res.data
+
+    # Fetch the HTML of the latest one
+    res = client.get(
+        url_for("watchsinglehistory", uuid=watch_uuid, timestamp='latest')+"?html=1",
+        headers={'x-api-key': api_key},
+    )
+    assert b'which has this one new line' in res.data
+    assert b'<div id' in res.data

    # Fetch the whole watch
    res = client.get(
--- a/changedetectionio/tests/test_auth.py
+++ b/changedetectionio/tests/test_auth.py
@@ -2,13 +2,12 @@

 import time
 from flask import url_for
-from . util import live_server_setup
+from .util import live_server_setup, wait_for_all_checks
+

 def test_basic_auth(client, live_server):

    live_server_setup(live_server)
-    # Give the endpoint time to spin up
-    time.sleep(1)

    # Add our URL to the import page
    test_url = url_for('test_basicauth_method', _external=True).replace("//","//myuser:mypass@")
@@ -19,8 +18,8 @@ def test_basic_auth(client, live_server):
        follow_redirects=True
    )
    assert b"1 Imported" in res.data
+    wait_for_all_checks(client)
    time.sleep(1)
-
    # Check form validation
    res = client.post(
        url_for("edit_page", uuid="first"),
@@ -29,7 +28,7 @@ def test_basic_auth(client, live_server):
    )
    assert b"Updated watch." in res.data

-    time.sleep(1)
+    wait_for_all_checks(client)
    res = client.get(
        url_for("preview_page", uuid="first"),
        follow_redirects=True
--- a/changedetectionio/tests/test_automatic_follow_ldjson_price.py
+++ b/changedetectionio/tests/test_automatic_follow_ldjson_price.py
@@ -100,7 +100,7 @@ def test_check_ldjson_price_autodetect(client, live_server):

    # Accept it
    uuid = extract_UUID_from_client(client)
-
+    time.sleep(1)
    client.get(url_for('price_data_follower.accept', uuid=uuid, follow_redirects=True))
    wait_for_all_checks(client)

--- a/changedetectionio/tests/test_backend.py
+++ b/changedetectionio/tests/test_backend.py
@@ -3,7 +3,8 @@
 import time
 from flask import url_for
 from urllib.request import urlopen
-from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_rss_token_from_UI
+from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, \
+    extract_UUID_from_client

 sleep_time_for_fetch_thread = 3

@@ -62,9 +63,6 @@ def test_check_basic_change_detection_functionality(client, live_server):
    # Make a change
    set_modified_response()

-    res = urlopen(url_for('test_endpoint', _external=True))
-    assert b'which has this one new line' in res.read()
-
    # Force recheck
    res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
    assert b'1 watches queued for rechecking.' in res.data
@@ -144,6 +142,14 @@ def test_check_basic_change_detection_functionality(client, live_server):
    assert b'Mark all viewed' not in res.data
    assert b'unviewed' not in res.data

+    # #2458 "clear history" should make the Watch object update its status correctly when the first snapshot lands again
+    uuid = extract_UUID_from_client(client)
+    client.get(url_for("clear_watch_history", uuid=uuid))
+    client.get(url_for("form_watch_checknow"), follow_redirects=True)
+    wait_for_all_checks(client)
+    res = client.get(url_for("index"))
+    assert b'preview/' in res.data
+
    #
    # Cleanup everything
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
--- a/changedetectionio/tests/test_encoding.py
+++ b/changedetectionio/tests/test_encoding.py
@@ -3,7 +3,7 @@

 import time
 from flask import url_for
-from .util import live_server_setup
+from .util import live_server_setup, wait_for_all_checks
 import pytest


@@ -27,9 +27,6 @@ def set_html_response():
 def test_check_encoding_detection(client, live_server):
    set_html_response()

-    # Give the endpoint time to spin up
-    time.sleep(1)
-
    # Add our URL to the import page
    test_url = url_for('test_endpoint', content_type="text/html", _external=True)
    client.post(
@@ -39,7 +36,7 @@ def test_check_encoding_detection(client, live_server):
    )

    # Give the thread time to pick it up
-    time.sleep(2)
+    wait_for_all_checks(client)

    res = client.get(
        url_for("preview_page", uuid="first"),
@@ -56,9 +53,6 @@ def test_check_encoding_detection(client, live_server):
 def test_check_encoding_detection_missing_content_type_header(client, live_server):
    set_html_response()

-    # Give the endpoint time to spin up
-    time.sleep(1)
-
    # Add our URL to the import page
    test_url = url_for('test_endpoint', _external=True)
    client.post(
@@ -67,8 +61,7 @@ def test_check_encoding_detection_missing_content_type_header(client, live_serve
        follow_redirects=True
    )

-    # Give the thread time to pick it up
-    time.sleep(2)
+    wait_for_all_checks(client)

    res = client.get(
        url_for("preview_page", uuid="first"),
--- a/changedetectionio/tests/test_extract_csv.py
+++ b/changedetectionio/tests/test_extract_csv.py
@@ -29,6 +29,7 @@ def test_check_extract_text_from_diff(client, live_server):
    # Load in 5 different numbers/changes
    last_date=""
    for n in range(5):
+        time.sleep(1)
        # Give the thread time to pick it up
        print("Bumping snapshot and checking.. ", n)
        last_date = str(time.time())
--- a/changedetectionio/tests/test_filter_failure_notification.py
+++ b/changedetectionio/tests/test_filter_failure_notification.py
@@ -21,10 +21,11 @@ def set_response_with_filter():
        f.write(test_return_data)
    return None

-def run_filter_test(client, content_filter):
+def run_filter_test(client, live_server, content_filter):
+
+    # Response WITHOUT the filter ID element
+    set_original_response()

-    # Give the endpoint time to spin up
-    time.sleep(1)
    # cleanup for the next
    client.get(
        url_for("form_delete", uuid="all"),
@@ -79,6 +80,7 @@ def run_filter_test(client, content_filter):
        "include_filters": content_filter,
        "fetch_backend": "html_requests"})

+    # A POST here will also reset the filter failure counter (filter_failure_notification_threshold_attempts)
    res = client.post(
        url_for("edit_page", uuid="first"),
        data=notification_form_data,
@@ -91,20 +93,21 @@ def run_filter_test(client, content_filter):
    # Now the notification should not exist, because we didnt reach the threshold
    assert not os.path.isfile("test-datastore/notification.txt")

-    # -2 because we would have checked twice above (on adding and on edit)
+    # recheck it up to just before the threshold, including the fact that in the previous POST it would have rechecked (and incremented)
    for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT-2):
-        res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
+        client.get(url_for("form_watch_checknow"), follow_redirects=True)
        wait_for_all_checks(client)
-        assert not os.path.isfile("test-datastore/notification.txt"), f"test-datastore/notification.txt should not exist - Attempt {i}"
+        time.sleep(2) # delay for apprise to fire
+        assert not os.path.isfile("test-datastore/notification.txt"), f"test-datastore/notification.txt should not exist - Attempt {i} when threshold is {App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT}"

    # We should see something in the frontend
+    res = client.get(url_for("index"))
    assert b'Warning, no filters were found' in res.data

-    # One more check should trigger it (see -2 above)
-    client.get(url_for("form_watch_checknow"), follow_redirects=True)
-    wait_for_all_checks(client)
+    # One more check should trigger the _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT threshold
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)
+    time.sleep(2)  # delay for apprise to fire
    # Now it should exist and contain our "filter not found" alert
    assert os.path.isfile("test-datastore/notification.txt")

@@ -149,13 +152,9 @@ def test_setup(live_server):
    live_server_setup(live_server)

 def test_check_include_filters_failure_notification(client, live_server):
-    set_original_response()
-    wait_for_all_checks(client)
-    run_filter_test(client, '#nope-doesnt-exist')
+    run_filter_test(client, live_server,'#nope-doesnt-exist')

 def test_check_xpath_filter_failure_notification(client, live_server):
-    set_original_response()
-    time.sleep(1)
-    run_filter_test(client, '//*[@id="nope-doesnt-exist"]')
+    run_filter_test(client, live_server, '//*[@id="nope-doesnt-exist"]')

 # Test that notification is never sent
--- a/changedetectionio/tests/test_history_consistency.py
+++ b/changedetectionio/tests/test_history_consistency.py
@@ -5,15 +5,13 @@ import os
 import json
 import logging
 from flask import url_for
-from .util import live_server_setup
+from .util import live_server_setup, wait_for_all_checks
 from urllib.parse import urlparse, parse_qs

 def test_consistent_history(client, live_server):
    live_server_setup(live_server)

-    # Give the endpoint time to spin up
-    time.sleep(1)
-    r = range(1, 50)
+    r = range(1, 30)

    for one in r:
        test_url = url_for('test_endpoint', content_type="text/html", content=str(one), _external=True)
@@ -25,15 +23,8 @@ def test_consistent_history(client, live_server):

        assert b"1 Imported" in res.data

-    time.sleep(3)
-    while True:
-        res = client.get(url_for("index"))
-        logging.debug("Waiting for 'Checking now' to go away..")
-        if b'Checking now' not in res.data:
-            break
-        time.sleep(0.5)
+    wait_for_all_checks(client)

-    time.sleep(3)
    # Essentially just triggers the DB write/update
    res = client.post(
        url_for("settings_page"),
@@ -44,8 +35,9 @@ def test_consistent_history(client, live_server):
    )
    assert b"Settings updated." in res.data

-    # Give it time to write it out
-    time.sleep(3)
+
+    time.sleep(2)
+
    json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json')

    json_obj = None
@@ -58,7 +50,7 @@ def test_consistent_history(client, live_server):
    # each one should have a history.txt containing just one line
    for w in json_obj['watching'].keys():
        history_txt_index_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, w, 'history.txt')
-        assert os.path.isfile(history_txt_index_file), "History.txt should exist where I expect it - {}".format(history_txt_index_file)
+        assert os.path.isfile(history_txt_index_file), f"History.txt should exist where I expect it at {history_txt_index_file}"

        # Same like in model.Watch
        with open(history_txt_index_file, "r") as f:
@@ -70,15 +62,15 @@ def test_consistent_history(client, live_server):
                                                     w))
        # Find the snapshot one
        for fname in files_in_watch_dir:
-            if fname != 'history.txt':
+            if fname != 'history.txt' and 'html' not in fname:
                # contents should match what we requested as content returned from the test url
                with open(os.path.join(live_server.app.config['DATASTORE'].datastore_path, w, fname), 'r') as snapshot_f:
                    contents = snapshot_f.read()
                    watch_url = json_obj['watching'][w]['url']
                    u = urlparse(watch_url)
                    q = parse_qs(u[4])
-                    assert q['content'][0] == contents.strip(), "Snapshot file {} should contain {}".format(fname, q['content'][0])
+                    assert q['content'][0] == contents.strip(), f"Snapshot file {fname} should contain {q['content'][0]}"



-        assert len(files_in_watch_dir) == 2, "Should be just two files in the dir, history.txt and the snapshot"
+        assert len(files_in_watch_dir) == 3, "Should be just three files in the dir, html.br snapshot, history.txt and the extracted text snapshot"
--- a/changedetectionio/tests/test_ignorehighlighter.py
+++ b/changedetectionio/tests/test_ignorehighlighter.py
@@ -45,7 +45,6 @@ def test_highlight_ignore(client, live_server):
    )

    res = client.get(url_for("edit_page", uuid=uuid))
-
    # should be a regex now
    assert b'/oh\ yeah\ \d+/' in res.data

@@ -55,3 +54,7 @@ def test_highlight_ignore(client, live_server):
    # And it should register in the preview page
    res = client.get(url_for("preview_page", uuid=uuid))
    assert b'<div class="ignored">oh yeah 456' in res.data
+
+    # Should be in base.html
+    assert b'csrftoken' in res.data
+
--- a/changedetectionio/tests/test_jsonpath_jq_selector.py
+++ b/changedetectionio/tests/test_jsonpath_jq_selector.py
@@ -41,19 +41,26 @@ and it can also be repeated
    from .. import html_tools

    # See that we can find the second <script> one, which is not broken, and matches our filter
-    text = html_tools.extract_json_as_string(content, "json:$.offers.price")
-    assert text == "23.5"
+    text = html_tools.extract_json_as_string(content, "json:$.offers.priceCurrency")
+    assert text == '"AUD"'
+
+    text = html_tools.extract_json_as_string('{"id":5}', "json:$.id")
+    assert text == "5"

    # also check for jq
    if jq_support:
-        text = html_tools.extract_json_as_string(content, "jq:.offers.price")
-        assert text == "23.5"
+        text = html_tools.extract_json_as_string(content, "jq:.offers.priceCurrency")
+        assert text == '"AUD"'

        text = html_tools.extract_json_as_string('{"id":5}', "jq:.id")
        assert text == "5"

-    text = html_tools.extract_json_as_string('{"id":5}', "json:$.id")
-    assert text == "5"
+        text = html_tools.extract_json_as_string(content, "jqraw:.offers.priceCurrency")
+        assert text == "AUD"
+
+        text = html_tools.extract_json_as_string('{"id":5}', "jqraw:.id")
+        assert text == "5"
+

    # When nothing at all is found, it should throw JSONNOTFound
    # Which is caught and shown to the user in the watch-overview table
@@ -64,6 +71,9 @@ and it can also be repeated
        with pytest.raises(html_tools.JSONNotFound) as e_info:
            html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "jq:.id")

+        with pytest.raises(html_tools.JSONNotFound) as e_info:
+            html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "jqraw:.id")
+

 def test_unittest_inline_extract_body():
    content = """
@@ -291,6 +301,10 @@ def test_check_jq_filter(client, live_server):
    if jq_support:
        check_json_filter('jq:.boss.name', client, live_server)

+def test_check_jqraw_filter(client, live_server):
+    if jq_support:
+        check_json_filter('jqraw:.boss.name', client, live_server)
+
 def check_json_filter_bool_val(json_filter, client, live_server):
    set_original_response()

@@ -345,6 +359,10 @@ def test_check_jq_filter_bool_val(client, live_server):
    if jq_support:
        check_json_filter_bool_val("jq:.available", client, live_server)

+def test_check_jqraw_filter_bool_val(client, live_server):
+    if jq_support:
+        check_json_filter_bool_val("jq:.available", client, live_server)
+
 # Re #265 - Extended JSON selector test
 # Stuff to consider here
 # - Selector should be allowed to return empty when it doesnt match (people might wait for some condition)
@@ -491,4 +509,8 @@ def test_check_jsonpath_ext_filter(client, live_server):

 def test_check_jq_ext_filter(client, live_server):
    if jq_support:
-        check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server)
+        check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server)
+
+def test_check_jqraw_ext_filter(client, live_server):
+    if jq_support:
+        check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server)
--- a/changedetectionio/tests/test_rss.py
+++ b/changedetectionio/tests/test_rss.py
@@ -69,6 +69,7 @@ def test_rss_and_token(client, live_server):

    wait_for_all_checks(client)
    set_modified_response()
+    time.sleep(1)
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)

@@ -87,7 +88,7 @@ def test_rss_and_token(client, live_server):
    assert b"Access denied, bad token" not in res.data
    assert b"Random content" in res.data

-    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
+    client.get(url_for("form_delete", uuid="all"), follow_redirects=True)

 def test_basic_cdata_rss_markup(client, live_server):
    #live_server_setup(live_server)
--- a/changedetectionio/update_worker.py
+++ b/changedetectionio/update_worker.py
@@ -1,11 +1,12 @@
-import os
-import threading
-import queue
-import time
 from . import content_fetchers
-from changedetectionio import html_tools
-from .processors.text_json_diff import FilterNotFoundInResponse
 from .processors.restock_diff import UnableToExtractRestockData
+from .processors.text_json_diff import FilterNotFoundInResponse
+from changedetectionio import html_tools
+from copy import deepcopy
+import os
+import queue
+import threading
+import time

 # A single update worker
 #
@@ -245,14 +246,18 @@ class update_worker(threading.Thread):
                    contents = b''
                    process_changedetection_results = True
                    update_obj = {}
-                    logger.info(f"Processing watch UUID {uuid} "
-                            f"Priority {queued_item_data.priority} "
-                            f"URL {self.datastore.data['watching'][uuid]['url']}")
+
+                    # Clear last errors (move to preflight func?)
+                    self.datastore.data['watching'][uuid]['browser_steps_last_error_step'] = None
+
+                    watch = self.datastore.data['watching'].get(uuid)
+
+                    logger.info(f"Processing watch UUID {uuid} Priority {queued_item_data.priority} URL {watch['url']}")
                    now = time.time()

                    try:
                        # Processor is what we are using for detecting the "Change"
-                        processor = self.datastore.data['watching'][uuid].get('processor', 'text_json_diff')
+                        processor = watch.get('processor', 'text_json_diff')
                        # if system...

                        # Abort processing when the content was the same as the last fetch
@@ -272,14 +277,12 @@ class update_worker(threading.Thread):
                                                                               watch_uuid=uuid
                                                                               )

-                        # Clear last errors (move to preflight func?)
-                        self.datastore.data['watching'][uuid]['browser_steps_last_error_step'] = None
-
                        update_handler.call_browser()

-                        changed_detected, update_obj, contents = update_handler.run_changedetection(uuid,
-                                                                                    skip_when_checksum_same=skip_when_same_checksum,
-                                                                                    )
+                        changed_detected, update_obj, contents = update_handler.run_changedetection(
+                            watch=watch,
+                            skip_when_checksum_same=skip_when_same_checksum,
+                        )

                        # Re #342
                        # In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
@@ -309,7 +312,11 @@ class update_worker(threading.Thread):
                        })

                        if e.screenshot:
-                            self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot)
+                            watch.save_screenshot(screenshot=e.screenshot, as_error=True)
+
+                        if e.xpath_data:
+                            watch.save_xpath_data(data=e.xpath_data)
+                            
                        process_changedetection_results = False

                    except content_fetchers.exceptions.Non200ErrorCodeReceived as e:
@@ -325,11 +332,11 @@ class update_worker(threading.Thread):
                            err_text = "Error - Request returned a HTTP error code {}".format(str(e.status_code))

                        if e.screenshot:
-                            self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
+                            watch.save_screenshot(screenshot=e.screenshot, as_error=True)
                        if e.xpath_data:
-                            self.datastore.save_xpath_data(watch_uuid=uuid, data=e.xpath_data, as_error=True)
+                            watch.save_xpath_data(data=e.xpath_data, as_error=True)
                        if e.page_text:
-                            self.datastore.save_error_text(watch_uuid=uuid, contents=e.page_text)
+                            watch.save_error_text(contents=e.page_text)

                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
                        process_changedetection_results = False
@@ -341,16 +348,23 @@ class update_worker(threading.Thread):
                        err_text = "Warning, no filters were found, no change detection ran - Did the page change layout? update your Visual Filter if necessary."
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})

+                        # Filter wasnt found, but we should still update the visual selector so that they can have a chance to set it up again
+                        if e.screenshot:
+                            watch.save_screenshot(screenshot=e.screenshot)
+
+                        if e.xpath_data:
+                            watch.save_xpath_data(data=e.xpath_data)
+
                        # Only when enabled, send the notification
-                        if self.datastore.data['watching'][uuid].get('filter_failure_notification_send', False):
-                            c = self.datastore.data['watching'][uuid].get('consecutive_filter_failures', 5)
+                        if watch.get('filter_failure_notification_send', False):
+                            c = watch.get('consecutive_filter_failures', 5)
                            c += 1
                            # Send notification if we reached the threshold?
                            threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
                                                                                           0)
-                            logger.error(f"Filter for {uuid} not found, consecutive_filter_failures: {c}")
+                            logger.warning(f"Filter for {uuid} not found, consecutive_filter_failures: {c}")
                            if threshold > 0 and c >= threshold:
-                                if not self.datastore.data['watching'][uuid].get('notification_muted'):
+                                if not watch.get('notification_muted'):
                                    self.send_filter_failure_notification(uuid)
                                c = 0

@@ -362,7 +376,6 @@ class update_worker(threading.Thread):
                        # Yes fine, so nothing todo, don't continue to process.
                        process_changedetection_results = False
                        changed_detected = False
-                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': False})
                    except content_fetchers.exceptions.BrowserConnectError as e:
                        self.datastore.update_watch(uuid=uuid,
                                                    update_obj={'last_error': e.msg
@@ -401,15 +414,15 @@ class update_worker(threading.Thread):
                                                                }
                                                    )

-                        if self.datastore.data['watching'][uuid].get('filter_failure_notification_send', False):
-                            c = self.datastore.data['watching'][uuid].get('consecutive_filter_failures', 5)
+                        if watch.get('filter_failure_notification_send', False):
+                            c = watch.get('consecutive_filter_failures', 5)
                            c += 1
                            # Send notification if we reached the threshold?
                            threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
                                                                                           0)
                            logger.error(f"Step for {uuid} not found, consecutive_filter_failures: {c}")
                            if threshold > 0 and c >= threshold:
-                                if not self.datastore.data['watching'][uuid].get('notification_muted'):
+                                if not watch.get('notification_muted'):
                                    self.send_step_failure_notification(watch_uuid=uuid, step_n=e.step_n)
                                c = 0

@@ -431,7 +444,7 @@ class update_worker(threading.Thread):
                    except content_fetchers.exceptions.JSActionExceptions as e:
                        err_text = "Error running JS Actions - Page request - "+e.message
                        if e.screenshot:
-                            self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
+                            watch.save_screenshot(screenshot=e.screenshot, as_error=True)
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
                                                                           'last_check_status': e.status_code})
                        process_changedetection_results = False
@@ -441,7 +454,7 @@ class update_worker(threading.Thread):
                            err_text = "{} - {}".format(err_text, e.message)

                        if e.screenshot:
-                            self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
+                            watch.save_screenshot(screenshot=e.screenshot, as_error=True)

                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
                                                                           'last_check_status': e.status_code,
@@ -465,8 +478,6 @@ class update_worker(threading.Thread):
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Exception: " + str(e)})
                        # Other serious error
                        process_changedetection_results = False
-#                        import traceback
-#                        print(traceback.format_exc())

                    else:
                        # Crash protection, the watch entry could have been removed by this point (during a slow chrome fetch etc)
@@ -474,7 +485,7 @@ class update_worker(threading.Thread):
                            continue

                        # Mark that we never had any failures
-                        if not self.datastore.data['watching'][uuid].get('ignore_status_codes'):
+                        if not watch.get('ignore_status_codes'):
                            update_obj['consecutive_filter_failures'] = 0

                        # Everything ran OK, clean off any previous error
@@ -482,25 +493,48 @@ class update_worker(threading.Thread):

                        self.cleanup_error_artifacts(uuid)

+                    if not self.datastore.data['watching'].get(uuid):
+                        continue
                    #
                    # Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc
                    if process_changedetection_results:
+                        # Always save the screenshot if it's available
+
+                        if update_handler.screenshot:
+                            watch.save_screenshot(screenshot=update_handler.screenshot)
+
+                        if update_handler.xpath_data:
+                            watch.save_xpath_data(data=update_handler.xpath_data)
+
                        try:
-                            watch = self.datastore.data['watching'].get(uuid)
                            self.datastore.update_watch(uuid=uuid, update_obj=update_obj)

                            # Also save the snapshot on the first time checked
-                            if changed_detected or not watch['last_checked']:
+                            if changed_detected or not watch.get('last_checked'):
+                                timestamp = round(time.time())
+
+                                # Small hack so that we sleep just enough to allow 1 second  between history snapshots
+                                # this is because history.txt indexes/keys snapshots by epoch seconds and we dont want dupe keys
+
+                                if watch.newest_history_key and int(timestamp) == int(watch.newest_history_key):
+                                    logger.warning(
+                                        f"Timestamp {timestamp} already exists, waiting 1 seconds so we have a unique key in history.txt")
+                                    timestamp = str(int(timestamp) + 1)
+                                    time.sleep(1)
+
                                watch.save_history_text(contents=contents,
-                                                        timestamp=str(round(time.time())),
+                                                        timestamp=timestamp,
                                                        snapshot_id=update_obj.get('previous_md5', 'none'))

+                                if update_handler.fetcher.content:
+                                    watch.save_last_fetched_html(contents=update_handler.fetcher.content, timestamp=timestamp)
+
                            # A change was detected
                            if changed_detected:
                                # Notifications should only trigger on the second time (first time, we gather the initial snapshot)
                                if watch.history_n >= 2:
                                    logger.info(f"Change detected in UUID {uuid} - {watch['url']}")
-                                    if not self.datastore.data['watching'][uuid].get('notification_muted'):
+                                    if not watch.get('notification_muted'):
                                        self.send_content_changed_notification(watch_uuid=uuid)
                                else:
                                    logger.info(f"Change triggered in UUID {uuid} due to first history saving (no notifications sent) - {watch['url']}")
@@ -511,29 +545,23 @@ class update_worker(threading.Thread):
                            logger.critical(str(e))
                            self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})

-                    if self.datastore.data['watching'].get(uuid):
-                        # Always record that we atleast tried
-                        count = self.datastore.data['watching'][uuid].get('check_count', 0) + 1

-                        # Record the 'server' header reply, can be used for actions in the future like cloudflare/akamai workarounds
-                        try:
-                            server_header = update_handler.fetcher.headers.get('server', '').strip().lower()[:255]
-                            self.datastore.update_watch(uuid=uuid,
-                                                        update_obj={'remote_server_reply': server_header}
-                                                        )
-                        except Exception as e:
-                            pass
+                    # Always record that we atleast tried
+                    count = watch.get('check_count', 0) + 1

-                        self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
-                                                                           'last_checked': round(time.time()),
-                                                                           'check_count': count
-                                                                           })
+                    # Record the 'server' header reply, can be used for actions in the future like cloudflare/akamai workarounds
+                    try:
+                        server_header = update_handler.fetcher.headers.get('server', '').strip().lower()[:255]
+                        self.datastore.update_watch(uuid=uuid,
+                                                    update_obj={'remote_server_reply': server_header}
+                                                    )
+                    except Exception as e:
+                        pass

-                        # Always save the screenshot if it's available
-                        if update_handler.screenshot:
-                            self.datastore.save_screenshot(watch_uuid=uuid, screenshot=update_handler.screenshot)
-                        if update_handler.xpath_data:
-                            self.datastore.save_xpath_data(watch_uuid=uuid, data=update_handler.xpath_data)
+                    self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
+                                                                       'last_checked': round(time.time()),
+                                                                       'check_count': count
+                                                                       })


                self.current_uuid = None  # Done
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -68,9 +68,10 @@ services:
     
     # If WEBDRIVER or PLAYWRIGHT are enabled, changedetection container depends on that
     # and must wait before starting (substitute "browser-chrome" with "playwright-chrome" if last one is used)
-#    depends_on:
-#        browser-chrome:
-#            condition: service_started
+#      depends_on:
+#          playwright-chrome:
+#              condition: service_started
+

     # Used for fetching pages via Playwright+Chrome where you need Javascript support.
     # RECOMMENDED FOR FETCHING PAGES WITH CHROME
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 # Used by Pyppeteer
 pyee

-eventlet==0.35.2 # related to dnspython fixes
+eventlet>=0.36.1 # fixes SSL error on Python 3.12
 feedgen~=0.9
 flask-compress
 # 0.6.3 included compatibility fix for werkzeug 3.x (2.x had deprecation of url handlers)
@@ -23,13 +23,13 @@ validators~=0.21
 brotli~=1.0
 requests[socks]

-urllib3==1.26.18
+urllib3==1.26.19
 chardet>2.3.0

 wtforms~=3.0
 jsonpath-ng~=1.5.3

-dnspython==2.6.1
+dnspython==2.6.1 # related to eventlet fixes

 # jq not available on Windows so must be installed manually

@@ -41,10 +41,8 @@ apprise~=1.8.0
 # use v1.x due to https://github.com/eclipse/paho.mqtt.python/issues/814
 paho-mqtt>=1.6.1,<2.0.0

-# This mainly affects some ARM builds, which unlike the other builds ignores "ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1"
-# so without this pinning, the newer versions on ARM will forcefully try to build rust, which results in "rust compiler not found"
-# (introduced once apprise became a dep)
-cryptography~=3.4
+# Requires extra wheel for rPi
+cryptography~=42.0.8

 # Used for CSS filtering
 beautifulsoup4
@@ -85,4 +83,4 @@ jsonschema==4.17.3

 loguru
 # Needed for > 3.10, https://github.com/microsoft/playwright-python/issues/2096
-greenlet >= 3.0.3
+greenlet >= 3.0.3
Author	SHA1	Message	Date
dgtlmoon	5765d129e8	Fixing deprecation warning	2024-07-09 16:17:31 +02:00
dgtlmoon	f07ff9b55e	UI - Visual Selector should still update when elements were not found (#2476 )	2024-07-09 15:35:19 +02:00
Nectariferous	1c46914992	Code - Update/modernise diff.py (#2471 )	2024-07-09 15:08:13 +02:00
dgtlmoon	e9c4037178	UI - Visual Selector - Multiple selections (refactor) (#2475 )	2024-07-09 15:07:23 +02:00
dgtlmoon	1af342ef64	UI - Visual Selector now supports Shift+Click for multiple selections!	2024-07-05 20:43:26 +02:00
dgtlmoon	e09ee7da97	UI - Visual Selector - Show/visualise all/any matching filter elements from all filters in "CSS/JSONPath/JQ/XPath Filters" include filters (#2440 )	2024-07-05 15:20:39 +02:00
dgtlmoon	09bc24ff34	UI - Visual Selector graphics should be centred	2024-07-05 14:33:36 +02:00
dgtlmoon	a1d04bb37f	Snapshot count from history was not updated in watch after using [clear history] (#2459 )	2024-07-05 11:09:31 +02:00
dgtlmoon	01f910f840	Fixing 'tags'' field from old installs (0.43.0+) could have wrong data-type causing crash	2024-07-04 15:23:06 +02:00
dgtlmoon	bed16009bb	0.45.25	2024-07-03 19:27:23 +02:00
dgtlmoon	faeed78ffb	UI - Fixing preview/diff "ignore text" highlight button (refactor, didnt work in "preview" mode) (#2455 )	2024-07-03 19:26:33 +02:00
dgtlmoon	5d9081ccb2	Restock detection - Updating detection texts	2024-07-03 18:45:36 +02:00
dgtlmoon	2cf1829073	UI - Mobile - Hiding empty columns	2024-07-03 17:13:31 +02:00
dgtlmoon	526551a205	UI - Mobile - Watch overview table - Sort/order buttons were not being shown correctly	2024-06-30 18:05:13 +02:00
dgtlmoon	ba139e7f3f	Update docker-compose.yml - fix indentation re #2447	2024-06-28 23:08:37 +02:00
Max Michels	13e343f9da	Restock detection - Added extra out-of-stock phrases for DE (#2442 )	2024-06-26 11:03:00 +02:00
dgtlmoon	13be4623db	Restock detection - updating texts	2024-06-25 13:23:43 +02:00
dgtlmoon	3b19e3d2bf	UI - Fixing double punctuation in 'unpaused' message #2435	2024-06-24 09:15:48 +02:00
dependabot[bot]	ce42f8ea26	Build - Bump docker/build-push-action from 5 to 6 in the all group (#2436 )	2024-06-24 08:50:02 +02:00
dgtlmoon	343e359b39	Now saving last two HTML snapshots for future reference, refactor, dont write screenshots and xpath to disk when no change detected (saves disk IO) (#2431 )	2024-06-23 09:19:32 +02:00
Hritik Vijay	ffd160ce0e	Filters - Implement jqraw: filter (use this to output nicer JSON format when selecting/filtering by JSON) (#2430 )	2024-06-21 13:31:03 +02:00
dgtlmoon	d31fc860cc	Build - fixing build warnings	2024-06-20 15:07:17 +02:00
dgtlmoon	90b357f457	Upgrade to Python 3.11 from 3.10, add faster prebuilt "wheels" for rPi devices, upgrade cryptography security library	2024-06-20 14:42:17 +02:00
dgtlmoon	cc147be76e	Prefer pythons built in "importlib" over pkg_resources+setuptools (#2424 )	2024-06-18 09:08:48 +02:00
dependabot[bot]	8ae5ed76ce	Security/dependabot - Bump urllib3 from 1.26.18 to 1.26.19 (#2423 )	2024-06-18 08:23:12 +02:00
dgtlmoon	a9ed113369	0.45.24	2024-06-17 13:27:11 +02:00
dgtlmoon	eacf920b9a	Update eventlet ( Fixes SSL error on Python 3.12 ) (#2419 )	2024-06-17 12:05:20 +02:00
dgtlmoon	c9af9b6374	Filter failure/not found notification threshold - Counter should be reset when editing a watch, clear watch errors on 'save' (#2413 )	2024-06-17 11:42:41 +02:00
dependabot[bot]	5e65fb606b	Bump dnspython from 2.3.0 to 2.6.1 (#2306 )	2024-06-17 10:23:50 +02:00
dgtlmoon	434a1b242e	Improve testing for Python 3.10, 3.11 and 3.12	2024-06-17 09:46:54 +02:00