Add diff view option for JSON compare (comparing the fields defined on each. The order of fields, etc does not matter in this comparison.)

Fix time handling
Make checkbox work
2025-12-18 05:55:45 +00:00 · 2022-11-19 15:15:25 +01:00 · 2022-11-19 14:47:58 +01:00 · 2022-11-19 14:44:51 +01:00 · 2022-11-19 14:17:30 +01:00 · 2022-11-19 13:42:52 +01:00
17 changed files with 435 additions and 1386 deletions
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -3,6 +3,7 @@ recursive-include changedetectionio/templates *
 recursive-include changedetectionio/static *
 recursive-include changedetectionio/model *
 recursive-include changedetectionio/tests *
+recursive-include changedetectionio/res *
 include changedetection.py
 global-exclude *.pyc
 global-exclude node_modules
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 ## Web Site Change Detection, Monitoring and Notification.

-_Live your data-life pro-actively, track website and JSON content changes, trigger notifications via Discord, Email, Slack, Telegram, API calls and 70+ more._
+_Live your data-life pro-actively, Detect website changes and perform meaningful actions, trigger notifications via Discord, Email, Slack, Telegram, API calls and many more._


 [<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring"  title="Self-hosted web page change monitoring"  />](https://lemonade.changedetection.io/start?src=github)
--- a/changedetectionio/init.py
+++ b/changedetectionio/init.py
@@ -1368,7 +1368,7 @@ def notification_runner():
                # UUID wont be present when we submit a 'test' from the global settings
                if 'uuid' in n_object:
                    datastore.update_watch(uuid=n_object['uuid'],
-                                           update_obj={'last_notification_error': "Notification error detected, please see logs."})
+                                           update_obj={'last_notification_error': "Notification error detected, goto notification log."})

                log_lines = str(e).splitlines()
                notification_debug_log += log_lines
--- a/changedetectionio/changedetection.py
+++ b/changedetectionio/changedetection.py
@@ -2,19 +2,20 @@

 # Launch as a eventlet.wsgi server instance.

+from distutils.util import strtobool
+import eventlet
+import eventlet.wsgi
 import getopt
 import os
 import signal
 import sys

-import eventlet
-import eventlet.wsgi
 from . import store, changedetection_app, content_fetcher
 from . import __version__

 # Only global so we can access it in the signal handler
-datastore = None
 app = None
+datastore = None

 def sigterm_handler(_signo, _stack_frame):
    global app
@@ -106,8 +107,9 @@ def main():
    # @Note: Incompatible with password login (and maybe other features) for now, submit a PR!
    @app.after_request
    def hide_referrer(response):
-        if os.getenv("HIDE_REFERER", False):
+        if strtobool(os.getenv("HIDE_REFERER", 'false')):
            response.headers["Referrer-Policy"] = "no-referrer"
+
        return response

    # Proxy sub-directory support
--- a/changedetectionio/content_fetcher.py
+++ b/changedetectionio/content_fetcher.py
@@ -1,11 +1,11 @@
-from abc import ABC, abstractmethod
+from abc import abstractmethod
+from pkg_resources import resource_string
 import chardet
 import json
 import os
 import requests
-import time
 import sys
-
+import time

 class Non200ErrorCodeReceived(Exception):
    def __init__(self, status_code, url, screenshot=None, xpath_data=None, page_html=None):
@@ -73,131 +73,8 @@ class Fetcher():

    fetcher_description = "No description"
    webdriver_js_execute_code = None
-    xpath_element_js = """               
-                // Include the getXpath script directly, easier than fetching
-                !function(e,n){"object"==typeof exports&&"undefined"!=typeof module?module.exports=n():"function"==typeof define&&define.amd?define(n):(e=e||self).getXPath=n()}(this,function(){return function(e){var n=e;if(n&&n.id)return'//*[@id="'+n.id+'"]';for(var o=[];n&&Node.ELEMENT_NODE===n.nodeType;){for(var i=0,r=!1,d=n.previousSibling;d;)d.nodeType!==Node.DOCUMENT_TYPE_NODE&&d.nodeName===n.nodeName&&i++,d=d.previousSibling;for(d=n.nextSibling;d;){if(d.nodeName===n.nodeName){r=!0;break}d=d.nextSibling}o.push((n.prefix?n.prefix+":":"")+n.localName+(i||r?"["+(i+1)+"]":"")),n=n.parentNode}return o.length?"/"+o.reverse().join("/"):""}});
+    xpath_element_js = ""

-
-                const findUpTag = (el) => {
-                  let r = el
-                  chained_css = [];
-                  depth=0;
-            
-                // Strategy 1: Keep going up until we hit an ID tag, imagine it's like  #list-widget div h4
-                  while (r.parentNode) {
-                    if(depth==5) {
-                      break;
-                    }
-                    if('' !==r.id) {
-                      chained_css.unshift("#"+CSS.escape(r.id));
-                      final_selector= chained_css.join(' > ');
-                      // Be sure theres only one, some sites have multiples of the same ID tag :-(
-                      if (window.document.querySelectorAll(final_selector).length ==1 ) {
-                        return final_selector;
-                        }
-                      return null;
-                    } else {
-                      chained_css.unshift(r.tagName.toLowerCase());
-                    }
-                    r=r.parentNode;
-                    depth+=1;
-                  }
-                  return null;
-                }
-
-
-                // @todo - if it's SVG or IMG, go into image diff mode
-                var elements = window.document.querySelectorAll("div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4, header, footer, section, article, aside, details, main, nav, section, summary");
-                var size_pos=[];
-                // after page fetch, inject this JS
-                // build a map of all elements and their positions (maybe that only include text?)
-                var bbox;
-                for (var i = 0; i < elements.length; i++) {   
-                 bbox = elements[i].getBoundingClientRect();
-
-                 // forget really small ones
-                 if (bbox['width'] <20 && bbox['height'] < 20 ) {
-                   continue;
-                 }
-
-                 // @todo the getXpath kind of sucks, it doesnt know when there is for example just one ID sometimes
-                 // it should not traverse when we know we can anchor off just an ID one level up etc..
-                 // maybe, get current class or id, keep traversing up looking for only class or id until there is just one match 
-
-                 // 1st primitive - if it has class, try joining it all and select, if theres only one.. well thats us.
-                 xpath_result=false;
-                 
-                 try {
-                   var d= findUpTag(elements[i]);
-                   if (d) {
-                     xpath_result =d;
-                   }                
-                 } catch (e) {
-                   console.log(e);
-                 }
-                 
-                 // You could swap it and default to getXpath and then try the smarter one
-                 // default back to the less intelligent one
-                 if (!xpath_result) {
-                    try {
-                       // I've seen on FB and eBay that this doesnt work
-                       // ReferenceError: getXPath is not defined at eval (eval at evaluate (:152:29), <anonymous>:67:20) at UtilityScript.evaluate (<anonymous>:159:18) at UtilityScript.<anonymous> (<anonymous>:1:44)
-                       xpath_result = getXPath(elements[i]);
-                     } catch (e) {
-                       console.log(e);
-                       continue;
-                     }            
-                 }
-                 
-                 if(window.getComputedStyle(elements[i]).visibility === "hidden") {
-                   continue;
-                 }
-
-                 size_pos.push({
-                   xpath: xpath_result,
-                   width: Math.round(bbox['width']), 
-                   height: Math.round(bbox['height']), 
-                   left: Math.floor(bbox['left']), 
-                   top: Math.floor(bbox['top']), 
-                   childCount: elements[i].childElementCount
-                 });                 
-                }
-
-
-                // inject the current one set in the include_filters, which may be a CSS rule
-                // used for displaying the current one in VisualSelector, where its not one we generated.
-                if (include_filters.length) {
-                   q=false;                   
-                   try {
-                       // is it xpath?
-                       if (include_filters.startsWith('/') || include_filters.startsWith('xpath:')) {
-                         q=document.evaluate(include_filters.replace('xpath:',''), document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
-                       } else {
-                         q=document.querySelector(include_filters);
-                       }                       
-                   } catch (e) {
-                    // Maybe catch DOMException and alert? 
-                     console.log(e);                       
-                   }
-                   bbox=false;
-                   if(q) {
-                     bbox = q.getBoundingClientRect();
-                   }
-                                   
-                   if (bbox && bbox['width'] >0 && bbox['height']>0) {                       
-                       size_pos.push({
-                           xpath: include_filters,
-                           width: bbox['width'], 
-                           height: bbox['height'],
-                           left: bbox['left'],
-                           top: bbox['top'],
-                           childCount: q.childElementCount
-                         });
-                     }
-                }
-                // Window.width required for proper scaling in the frontend
-                return {'size_pos':size_pos, 'browser_width': window.innerWidth};
-    """
    xpath_data = None

    # Will be needed in the future by the VisualSelector, always get this where possible.
@@ -208,6 +85,10 @@ class Fetcher():
    # Time ONTOP of the system defined env minimum time
    render_extract_delay = 0

+    def __init__(self):
+        # The code that scrapes elements and makes a list of elements/size/position to click on in the VisualSelector
+        self.xpath_element_js = resource_string(__name__, "res/xpath_element_scraper.js").decode('utf-8')
+
    @abstractmethod
    def get_error(self):
        return self.error
@@ -273,7 +154,7 @@ class base_html_playwright(Fetcher):
    proxy = None

    def __init__(self, proxy_override=None):
-
+        super().__init__()
        # .strip('"') is going to save someone a lot of time when they accidently wrap the env value
        self.browser_type = os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').strip('"')
        self.command_executor = os.getenv(
@@ -465,6 +346,7 @@ class base_html_webdriver(Fetcher):
    proxy = None

    def __init__(self, proxy_override=None):
+        super().__init__()
        from selenium.webdriver.common.proxy import Proxy as SeleniumProxy

        # .strip('"') is going to save someone a lot of time when they accidently wrap the env value
--- a/changedetectionio/fetch_site_status.py
+++ b/changedetectionio/fetch_site_status.py
@@ -15,7 +15,6 @@ class FilterNotFoundInResponse(ValueError):
        ValueError.__init__(self, msg)


-
 # Some common stuff here that can be moved to a base class
 # (set_proxy_from_list)
 class perform_site_check():
@@ -39,18 +38,20 @@ class perform_site_check():

        return regex

-
    def run(self, uuid):
+        from copy import deepcopy
        changed_detected = False
        screenshot = False  # as bytes
        stripped_text_from_html = ""

-        watch = self.datastore.data['watching'].get(uuid)
+        # DeepCopy so we can be sure we don't accidently change anything by reference
+        watch = deepcopy(self.datastore.data['watching'].get(uuid))
+
        if not watch:
            return

        # Protect against file:// access
-        if re.search(r'^file', watch['url'], re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False):
+        if re.search(r'^file', watch.get('url', ''), re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False):
            raise Exception(
                "file:// type access is denied for security reasons."
            )
@@ -58,10 +59,10 @@ class perform_site_check():
        # Unset any existing notification error
        update_obj = {'last_notification_error': False, 'last_error': False}

-        extra_headers =self.datastore.data['watching'][uuid].get('headers')
+        extra_headers = watch.get('headers', [])

        # Tweak the base config with the per-watch ones
-        request_headers = self.datastore.data['settings']['headers'].copy()
+        request_headers = deepcopy(self.datastore.data['settings']['headers'])
        request_headers.update(extra_headers)

        # https://github.com/psf/requests/issues/4525
@@ -85,7 +86,7 @@ class perform_site_check():
            is_source = True

        # Pluggable content fetcher
-        prefer_backend = watch['fetch_backend']
+        prefer_backend = watch.get('fetch_backend')
        if hasattr(content_fetcher, prefer_backend):
            klass = getattr(content_fetcher, prefer_backend)
        else:
@@ -96,21 +97,21 @@ class perform_site_check():
        proxy_url = None
        if proxy_id:
            proxy_url = self.datastore.proxy_list.get(proxy_id).get('url')
-            print ("UUID {} Using proxy {}".format(uuid, proxy_url))
+            print("UUID {} Using proxy {}".format(uuid, proxy_url))

        fetcher = klass(proxy_override=proxy_url)

        # Configurable per-watch or global extra delay before extracting text (for webDriver types)
        system_webdriver_delay = self.datastore.data['settings']['application'].get('webdriver_delay', None)
        if watch['webdriver_delay'] is not None:
-            fetcher.render_extract_delay = watch['webdriver_delay']
+            fetcher.render_extract_delay = watch.get('webdriver_delay')
        elif system_webdriver_delay is not None:
            fetcher.render_extract_delay = system_webdriver_delay

-        if watch['webdriver_js_execute_code'] is not None and watch['webdriver_js_execute_code'].strip():
-            fetcher.webdriver_js_execute_code = watch['webdriver_js_execute_code']
+        if watch.get('webdriver_js_execute_code') is not None and watch.get('webdriver_js_execute_code').strip():
+            fetcher.webdriver_js_execute_code = watch.get('webdriver_js_execute_code')

-        fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch['include_filters'])
+        fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch.get('include_filters'))
        fetcher.quit()

        self.screenshot = fetcher.screenshot
@@ -134,7 +135,8 @@ class perform_site_check():
            is_html = False
            is_json = False

-        include_filters_rule = watch['include_filters']
+        include_filters_rule = watch.get('include_filters', [])
+        # include_filters_rule = watch['include_filters']
        subtractive_selectors = watch.get(
            "subtractive_selectors", []
        ) + self.datastore.data["settings"]["application"].get(
@@ -156,7 +158,7 @@ class perform_site_check():
                    is_html = False

        if is_html or is_source:
-            
+
            # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
            fetcher.content = html_tools.workarounds_for_obfuscations(fetcher.content)
            html_content = fetcher.content
@@ -178,8 +180,8 @@ class perform_site_check():
                        else:
                            # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
                            html_content += html_tools.include_filters(include_filters=filter_rule,
-                                                                  html_content=fetcher.content,
-                                                                  append_pretty_line_formatting=not is_source)
+                                                                       html_content=fetcher.content,
+                                                                       append_pretty_line_formatting=not is_source)

                    if not html_content.strip():
                        raise FilterNotFoundInResponse(include_filters_rule)
@@ -191,12 +193,11 @@ class perform_site_check():
                    stripped_text_from_html = html_content
                else:
                    # extract text
+                    do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
                    stripped_text_from_html = \
                        html_tools.html_to_text(
                            html_content,
-                            render_anchor_tag_content=self.datastore.data["settings"][
-                                "application"].get(
-                                "render_anchor_tag_content", False)
+                            render_anchor_tag_content=do_anchor
                        )

        # Re #340 - return the content before the 'ignore text' was applied
@@ -231,7 +232,7 @@ class perform_site_check():

                for l in result:
                    if type(l) is tuple:
-                        #@todo - some formatter option default (between groups)
+                        # @todo - some formatter option default (between groups)
                        regex_matched_output += list(l) + [b'\n']
                    else:
                        # @todo - some formatter option default (between each ungrouped result)
@@ -245,7 +246,6 @@ class perform_site_check():
                stripped_text_from_html = b''.join(regex_matched_output)
                text_content_before_ignored_filter = stripped_text_from_html

-
        # Re #133 - if we should strip whitespaces from triggering the change detected comparison
        if self.datastore.data['settings']['application'].get('ignore_whitespace', False):
            fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest()
@@ -255,29 +255,30 @@ class perform_site_check():
        ############ Blocking rules, after checksum #################
        blocked = False

-        if len(watch['trigger_text']):
+        trigger_text = watch.get('trigger_text', [])
+        if len(trigger_text):
            # Assume blocked
            blocked = True
            # Filter and trigger works the same, so reuse it
            # It should return the line numbers that match
            result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
-                                                  wordlist=watch['trigger_text'],
+                                                  wordlist=trigger_text,
                                                  mode="line numbers")
            # Unblock if the trigger was found
            if result:
                blocked = False

-
-        if len(watch['text_should_not_be_present']):
+        text_should_not_be_present = watch.get('text_should_not_be_present', [])
+        if len(text_should_not_be_present):
            # If anything matched, then we should block a change from happening
            result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
-                                                  wordlist=watch['text_should_not_be_present'],
+                                                  wordlist=text_should_not_be_present,
                                                  mode="line numbers")
            if result:
                blocked = True

        # The main thing that all this at the moment comes down to :)
-        if watch['previous_md5'] != fetched_md5:
+        if watch.get('previous_md5') != fetched_md5:
            changed_detected = True

        # Looks like something changed, but did it match all the rules?
@@ -286,7 +287,7 @@ class perform_site_check():

        # Extract title as title
        if is_html:
-            if self.datastore.data['settings']['application']['extract_title_as_title'] or watch['extract_title_as_title']:
+            if self.datastore.data['settings']['application'].get('extract_title_as_title') or watch['extract_title_as_title']:
                if not watch['title'] or not len(watch['title']):
                    update_obj['title'] = html_tools.extract_element(find='title', html_content=fetcher.content)

--- a/changedetectionio/model/Watch.py
+++ b/changedetectionio/model/Watch.py
@@ -16,42 +16,43 @@ class model(dict):
    __newest_history_key = None
    __history_n=0
    __base_config = {
-            'url': None,
-            'tag': None,
-            'last_checked': 0,
-            'paused': False,
-            'last_viewed': 0,  # history key value of the last viewed via the [diff] link
-            #'newest_history_key': 0,
-            'title': None,
-            'previous_md5': False,
-            'uuid': str(uuid.uuid4()),
-            'headers': {},  # Extra headers to send
+            #'history': {},  # Dict of timestamp and output stripped filename (removed)
+            #'newest_history_key': 0, (removed, taken from history.txt index)
            'body': None,
-            'method': 'GET',
-            #'history': {},  # Dict of timestamp and output stripped filename
+            'check_unique_lines': False, # On change-detected, compare against all history if its something new
+            'check_count': 0,
+            'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
+            'extract_text': [],  # Extract text by regex after filters
+            'extract_title_as_title': False,
+            'fetch_backend': None,
+            'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
+            'headers': {},  # Extra headers to send
            'ignore_text': [],  # List of text to ignore when calculating the comparison checksum
-            # Custom notification content
-            'notification_urls': [],  # List of URLs to add to the notification Queue (Usually AppRise)
-            'notification_title': None,
+            'include_filters': [],
+            'last_checked': 0,
+            'last_error': False,
+            'last_viewed': 0,  # history key value of the last viewed via the [diff] link
+            'method': 'GET',
+             # Custom notification content
            'notification_body': None,
            'notification_format': default_notification_format_for_watch,
            'notification_muted': False,
-            'include_filters': [],
-            'last_error': False,
-            'extract_text': [],  # Extract text by regex after filters
-            'subtractive_selectors': [],
-            'trigger_text': [],  # List of text or regex to wait for until a change is detected
-            'text_should_not_be_present': [], # Text that should not present
-            'fetch_backend': None,
-            'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
-            'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
-            'extract_title_as_title': False,
-            'check_unique_lines': False, # On change-detected, compare against all history if its something new
+            'notification_title': None,
+            'notification_urls': [],  # List of URLs to add to the notification Queue (Usually AppRise)
+            'paused': False,
+            'previous_md5': False,
            'proxy': None, # Preferred proxy connection
+            'subtractive_selectors': [],
+            'tag': None,
+            'text_should_not_be_present': [], # Text that should not present
            # Re #110, so then if this is set to None, we know to use the default value instead
            # Requires setting to None on submit if it's the same as the default
            # Should be all None by default, so we use the system default in this case.
            'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None},
+            'title': None,
+            'trigger_text': [],  # List of text or regex to wait for until a change is detected
+            'url': None,
+            'uuid': str(uuid.uuid4()),
            'webdriver_delay': None,
            'webdriver_js_execute_code': None, # Run before change-detection
        }
--- a/changedetectionio/res/xpath_element_scraper.js
+++ b/changedetectionio/res/xpath_element_scraper.js
@@ -0,0 +1,154 @@
+// Include the getXpath script directly, easier than fetching
+!function (e, n) {
+    "object" == typeof exports && "undefined" != typeof module ? module.exports = n() : "function" == typeof define && define.amd ? define(n) : (e = e || self).getXPath = n()
+}(this, function () {
+    return function (e) {
+        var n = e;
+        if (n && n.id) return '//*[@id="' + n.id + '"]';
+        for (var o = []; n && Node.ELEMENT_NODE === n.nodeType;) {
+            for (var i = 0, r = !1, d = n.previousSibling; d;) d.nodeType !== Node.DOCUMENT_TYPE_NODE && d.nodeName === n.nodeName && i++, d = d.previousSibling;
+            for (d = n.nextSibling; d;) {
+                if (d.nodeName === n.nodeName) {
+                    r = !0;
+                    break
+                }
+                d = d.nextSibling
+            }
+            o.push((n.prefix ? n.prefix + ":" : "") + n.localName + (i || r ? "[" + (i + 1) + "]" : "")), n = n.parentNode
+        }
+        return o.length ? "/" + o.reverse().join("/") : ""
+    }
+});
+
+
+const findUpTag = (el) => {
+    let r = el
+    chained_css = [];
+    depth = 0;
+
+// Strategy 1: Keep going up until we hit an ID tag, imagine it's like  #list-widget div h4
+    while (r.parentNode) {
+        if (depth == 5) {
+            break;
+        }
+        if ('' !== r.id) {
+            chained_css.unshift("#" + CSS.escape(r.id));
+            final_selector = chained_css.join(' > ');
+            // Be sure theres only one, some sites have multiples of the same ID tag :-(
+            if (window.document.querySelectorAll(final_selector).length == 1) {
+                return final_selector;
+            }
+            return null;
+        } else {
+            chained_css.unshift(r.tagName.toLowerCase());
+        }
+        r = r.parentNode;
+        depth += 1;
+    }
+    return null;
+}
+
+
+// @todo - if it's SVG or IMG, go into image diff mode
+var elements = window.document.querySelectorAll("div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4, header, footer, section, article, aside, details, main, nav, section, summary");
+var size_pos = [];
+// after page fetch, inject this JS
+// build a map of all elements and their positions (maybe that only include text?)
+var bbox;
+for (var i = 0; i < elements.length; i++) {
+    bbox = elements[i].getBoundingClientRect();
+
+    // forget really small ones
+    if (bbox['width'] < 15 && bbox['height'] < 15) {
+        continue;
+    }
+
+    // @todo the getXpath kind of sucks, it doesnt know when there is for example just one ID sometimes
+    // it should not traverse when we know we can anchor off just an ID one level up etc..
+    // maybe, get current class or id, keep traversing up looking for only class or id until there is just one match
+
+    // 1st primitive - if it has class, try joining it all and select, if theres only one.. well thats us.
+    xpath_result = false;
+
+    try {
+        var d = findUpTag(elements[i]);
+        if (d) {
+            xpath_result = d;
+        }
+    } catch (e) {
+        console.log(e);
+    }
+
+    // You could swap it and default to getXpath and then try the smarter one
+    // default back to the less intelligent one
+    if (!xpath_result) {
+        try {
+            // I've seen on FB and eBay that this doesnt work
+            // ReferenceError: getXPath is not defined at eval (eval at evaluate (:152:29), <anonymous>:67:20) at UtilityScript.evaluate (<anonymous>:159:18) at UtilityScript.<anonymous> (<anonymous>:1:44)
+            xpath_result = getXPath(elements[i]);
+        } catch (e) {
+            console.log(e);
+            continue;
+        }
+    }
+
+    if (window.getComputedStyle(elements[i]).visibility === "hidden") {
+        continue;
+    }
+
+    size_pos.push({
+        xpath: xpath_result,
+        width: Math.round(bbox['width']),
+        height: Math.round(bbox['height']),
+        left: Math.floor(bbox['left']),
+        top: Math.floor(bbox['top'])
+    });
+}
+
+
+// Inject the current one set in the include_filters, which may be a CSS rule
+// used for displaying the current one in VisualSelector, where its not one we generated.
+if (include_filters.length) {
+    // Foreach filter, go and find it on the page and add it to the results so we can visualise it again
+    for (const f of include_filters) {
+        bbox = false;
+        q = false;
+
+        if (!f.length) {
+            console.log("xpath_element_scraper: Empty filter, skipping");
+            continue;
+        }
+
+        try {
+            // is it xpath?
+            if (f.startsWith('/') || f.startsWith('xpath:')) {
+                q = document.evaluate(f.replace('xpath:', ''), document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
+            } else {
+                q = document.querySelector(f);
+            }
+        } catch (e) {
+            // Maybe catch DOMException and alert?
+            console.log("xpath_element_scraper: Exception selecting element from filter "+f);
+            console.log(e);
+        }
+
+        if (q) {
+            bbox = q.getBoundingClientRect();
+        } else {
+            console.log("xpath_element_scraper: filter element "+f+" was not found");
+        }
+
+        if (bbox && bbox['width'] > 0 && bbox['height'] > 0) {
+            size_pos.push({
+                xpath: f,
+                width: Math.round(bbox['width']),
+                height: Math.round(bbox['height']),
+                left: Math.floor(bbox['left']),
+                top: Math.floor(bbox['top'])
+            });
+        }
+    }
+}
+
+// Window.width required for proper scaling in the frontend
+return {'size_pos': size_pos, 'browser_width': window.innerWidth};
--- a/changedetectionio/run_all_tests.sh
+++ b/changedetectionio/run_all_tests.sh
@@ -24,6 +24,12 @@ echo "RUNNING WITH BASE_URL SET"
 export BASE_URL="https://really-unique-domain.io"
 pytest tests/test_notification.py

+
+# Re-run with HIDE_REFERER set - could affect login
+export HIDE_REFERER=True
+pytest tests/test_access_control.py
+
+
 # Now for the selenium and playwright/browserless fetchers
 # Note - this is not UI functional tests - just checking that each one can fetch the content

--- a/changedetectionio/static/js/diff-render.js
+++ b/changedetectionio/static/js/diff-render.js
@@ -0,0 +1,112 @@
+var a = document.getElementById('a');
+var b = document.getElementById('b');
+var result = document.getElementById('result');
+
+function changed() {
+    // https://github.com/kpdecker/jsdiff/issues/389
+    // I would love to use `{ignoreWhitespace: true}` here but it breaks the formatting
+    options = {ignoreWhitespace: document.getElementById('ignoreWhitespace').checked};
+
+    var diff = Diff[window.diffType](a.textContent, b.textContent, options);
+    var fragment = document.createDocumentFragment();
+    for (var i = 0; i < diff.length; i++) {
+
+        if (diff[i].added && diff[i + 1] && diff[i + 1].removed) {
+            var swap = diff[i];
+            diff[i] = diff[i + 1];
+            diff[i + 1] = swap;
+        }
+
+        var node;
+        if (diff[i].removed) {
+            node = document.createElement('del');
+            node.classList.add("change");
+            node.appendChild(document.createTextNode(diff[i].value));
+
+        } else if (diff[i].added) {
+            node = document.createElement('ins');
+            node.classList.add("change");
+            node.appendChild(document.createTextNode(diff[i].value));
+        } else {
+            node = document.createTextNode(diff[i].value);
+        }
+        fragment.appendChild(node);
+    }
+
+    result.textContent = '';
+    result.appendChild(fragment);
+
+    // Jump at start
+    inputs.current = 0;
+    next_diff();
+}
+
+window.onload = function () {
+
+
+    /* Convert what is options from UTC time.time() to local browser time */
+    var diffList = document.getElementById("diff-version");
+    if (typeof (diffList) != 'undefined' && diffList != null) {
+        for (var option of diffList.options) {
+            var dateObject = new Date(option.value * 1000);
+            option.label = dateObject.toLocaleString();
+        }
+    }
+
+    /* Set current version date as local time in the browser also */
+    var current_v = document.getElementById("current-v-date");
+    var dateObject = new Date(newest_version_timestamp*1000);
+    current_v.innerHTML = dateObject.toLocaleString();
+    onDiffTypeChange(document.querySelector('#settings [name="diff_type"]:checked'));
+    changed();
+};
+
+a.onpaste = a.onchange =
+    b.onpaste = b.onchange = changed;
+
+if ('oninput' in a) {
+    a.oninput = b.oninput = changed;
+} else {
+    a.onkeyup = b.onkeyup = changed;
+}
+
+function onDiffTypeChange(radio) {
+    window.diffType = radio.value;
+// Not necessary
+//	document.title = "Diff " + radio.value.slice(4);
+}
+
+var radio = document.getElementsByName('diff_type');
+for (var i = 0; i < radio.length; i++) {
+    radio[i].onchange = function (e) {
+        onDiffTypeChange(e.target);
+        changed();
+    }
+}
+
+document.getElementById('ignoreWhitespace').onchange = function (e) {
+    changed();
+}
+
+
+var inputs = document.getElementsByClassName('change');
+inputs.current = 0;
+
+
+function next_diff() {
+
+    var element = inputs[inputs.current];
+    var headerOffset = 80;
+    var elementPosition = element.getBoundingClientRect().top;
+    var offsetPosition = elementPosition - headerOffset + window.scrollY;
+
+    window.scrollTo({
+        top: offsetPosition,
+        behavior: "smooth"
+    });
+
+    inputs.current++;
+    if (inputs.current >= inputs.length) {
+        inputs.current = 0;
+    }
+}
--- a/changedetectionio/static/js/diff.js
+++ b/changedetectionio/static/js/diff.js
--- a/changedetectionio/static/js/diff.min.js
+++ b/changedetectionio/static/js/diff.min.js
--- a/changedetectionio/static/js/visual-selector.js
+++ b/changedetectionio/static/js/visual-selector.js
@@ -68,7 +68,7 @@ $(document).ready(function() {
               xctx = c.getContext("2d");
                // redline highlight context
               ctx = c.getContext("2d");
-               current_default_xpath =$("#include_filters").val();
+               current_default_xpath =$("#include_filters").val().split(/\r?\n/g);
               fetch_data();
               $('#selector-canvas').off("mousemove mousedown");
               // screenshot_url defined in the edit.html template
@@ -127,24 +127,30 @@ $(document).ready(function() {

      console.log(selector_data['size_pos'].length + " selectors found");

-      // highlight the default one if we can find it in the xPath list
-      // or the xpath matches the default one
-      found = false;
-      if(current_default_xpath.length) {
-          for (var i = selector_data['size_pos'].length; i!==0; i--) {
-            var sel = selector_data['size_pos'][i-1];
-            if(selector_data['size_pos'][i - 1].xpath == current_default_xpath) {
-            console.log("highlighting "+current_default_xpath);
-              current_selected_i = i-1;
-              highlight_current_selected_i();
-              found = true;
-              break;
+        // highlight the default one if we can find it in the xPath list
+        // or the xpath matches the default one
+        found = false;
+        if (current_default_xpath.length) {
+            // Find the first one that matches
+            // @todo In the future paint all that match
+            for (const c of current_default_xpath) {
+                for (var i = selector_data['size_pos'].length; i !== 0; i--) {
+                    if (selector_data['size_pos'][i - 1].xpath === c) {
+                        console.log("highlighting " + c);
+                        current_selected_i = i - 1;
+                        highlight_current_selected_i();
+                        found = true;
+                        break;
+                    }
+                }
+                if (found) {
+                    break;
+                }
+            }
+            if (!found) {
+                alert("Unfortunately your existing CSS/xPath Filter was no longer found!");
            }
-          }
-        if(!found) {
-          alert("Unfortunately your existing CSS/xPath Filter was no longer found!");
        }
-      }


      $('#selector-canvas').bind('mousemove', function (e) {
--- a/changedetectionio/templates/diff.html
+++ b/changedetectionio/templates/diff.html
@@ -21,6 +21,9 @@

            <label for="diffChars" class="pure-checkbox">
                <input type="radio" name="diff_type" id="diffChars" value="diffChars"/> Chars</label>
+            <!-- @todo - when mimetype is JSON, select this by default? -->
+            <label for="diffJson" class="pure-checkbox">
+                <input type="radio" name="diff_type" id="diffJson" value="diffJson" /> JSON</label>

            {% if versions|length >= 1 %}
            <label for="diff-version">Compare newest (<span id="current-v-date"></span>) with</label>
@@ -37,6 +40,11 @@
    </form>
    <del>Removed text</del>
    <ins>Inserted Text</ins>
+    <span>
+        <!-- https://github.com/kpdecker/jsdiff/issues/389 ? -->
+        <label for="ignoreWhitespace" class="pure-checkbox" id="label-diff-ignorewhitespace">
+            <input type="checkbox" id="ignoreWhitespace" name="ignoreWhitespace"/> Ignore Whitespace</label>
+    </span>
 </div>

 <div id="diff-jump">
@@ -102,122 +110,12 @@
     </div>
 </div>

-
-<script type="text/javascript" src="{{url_for('static_content', group='js', filename='diff.js')}}"></script>
-
-<script defer="">
-
-var a = document.getElementById('a');
-var b = document.getElementById('b');
-var result = document.getElementById('result');
-
-function changed() {
-	var diff = JsDiff[window.diffType](a.textContent, b.textContent);
-	var fragment = document.createDocumentFragment();
-	for (var i=0; i < diff.length; i++) {
-
-		if (diff[i].added && diff[i + 1] && diff[i + 1].removed) {
-			var swap = diff[i];
-			diff[i] = diff[i + 1];
-			diff[i + 1] = swap;
-		}
-
-		var node;
-		if (diff[i].removed) {
-			node = document.createElement('del');
-			node.classList.add("change");
-			node.appendChild(document.createTextNode(diff[i].value));
-
-		} else if (diff[i].added) {
-			node = document.createElement('ins');
-			node.classList.add("change");
-			node.appendChild(document.createTextNode(diff[i].value));
-		} else {
-			node = document.createTextNode(diff[i].value);
-		}
-		fragment.appendChild(node);
-	}
-
-	result.textContent = '';
-	result.appendChild(fragment);
-
-	// Jump at start
-	inputs.current=0;
-    next_diff();
-}
-
-window.onload = function() {
-
-
-    /* Convert what is options from UTC time.time() to local browser time */
-    var diffList=document.getElementById("diff-version");
-    if (typeof(diffList) != 'undefined' && diffList != null) {
-        for (var option of diffList.options) {
-          var dateObject = new Date(option.value*1000);
-          option.label=dateObject.toLocaleString();
-        }
-    }
-
-    /* Set current version date as local time in the browser also */
-    var current_v = document.getElementById("current-v-date");
-    var dateObject = new Date({{ newest_version_timestamp }}*1000);
-    current_v.innerHTML=dateObject.toLocaleString();
-
-
-	onDiffTypeChange(document.querySelector('#settings [name="diff_type"]:checked'));
-	changed();
-
-};
-
-a.onpaste = a.onchange =
-b.onpaste = b.onchange = changed;
-
-if ('oninput' in a) {
-	a.oninput = b.oninput = changed;
-} else {
-	a.onkeyup = b.onkeyup = changed;
-}
-
-function onDiffTypeChange(radio) {
-	window.diffType = radio.value;
-// Not necessary 
-//	document.title = "Diff " + radio.value.slice(4);
-}
-
-var radio = document.getElementsByName('diff_type');
-for (var i = 0; i < radio.length; i++) {
-	radio[i].onchange = function(e) {
-		onDiffTypeChange(e.target);
-		changed();
-	}
-}
-
-
-var inputs = document.getElementsByClassName('change');
-inputs.current=0;
-
-
-function next_diff() {
-
-    var element = inputs[inputs.current];
-    var headerOffset = 80;
-    var elementPosition = element.getBoundingClientRect().top;
-    var offsetPosition = elementPosition - headerOffset +  window.scrollY;
-
-    window.scrollTo({
-         top: offsetPosition,
-         behavior: "smooth"
-    });
-
-    inputs.current++;
-    if(inputs.current >= inputs.length) {
-      inputs.current=0;
-    }
-}
-
-
-
+<script>
+    const newest_version_timestamp = {{newest_version_timestamp}};
 </script>
+<script type="text/javascript" src="{{url_for('static_content', group='js', filename='diff.min.js')}}"></script>
+
+<script type="text/javascript" src="{{url_for('static_content', group='js', filename='diff-render.js')}}"></script>


 {% endblock %}
--- a/changedetectionio/templates/watch-overview.html
+++ b/changedetectionio/templates/watch-overview.html
@@ -96,7 +96,7 @@
                    <div class="fetch-error">{{ watch.last_error }}</div>
                    {% endif %}
                    {% if watch.last_notification_error is defined and watch.last_notification_error != False %}
-                    <div class="fetch-error notification-error">{{ watch.last_notification_error }}</div>
+                    <div class="fetch-error notification-error"><a href="{{url_for('notification_logs')}}">{{ watch.last_notification_error }}</a></div>
                    {% endif %}
                    {% if not active_tag %}
                    <span class="watch-tag-list">{{ watch.tag}}</span>
--- a/changedetectionio/update_worker.py
+++ b/changedetectionio/update_worker.py
@@ -282,16 +282,19 @@ class update_worker(threading.Thread):
                            self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
                            self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})

+                    if self.datastore.data['watching'].get(uuid):
+                        # Always record that we atleast tried
+                        count = self.datastore.data['watching'][uuid].get('check_count', 0) + 1
+                        self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
+                                                                           'last_checked': round(time.time()),
+                                                                           'check_count': count
+                                                                           })

-                    # Always record that we atleast tried
-                    self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
-                                                                       'last_checked': round(time.time())})
-
-                    # Always save the screenshot if it's available
-                    if update_handler.screenshot:
-                        self.datastore.save_screenshot(watch_uuid=uuid, screenshot=update_handler.screenshot)
-                    if update_handler.xpath_data:
-                        self.datastore.save_xpath_data(watch_uuid=uuid, data=update_handler.xpath_data)
+                        # Always save the screenshot if it's available
+                        if update_handler.screenshot:
+                            self.datastore.save_screenshot(watch_uuid=uuid, screenshot=update_handler.screenshot)
+                        if update_handler.xpath_data:
+                            self.datastore.save_xpath_data(watch_uuid=uuid, data=update_handler.xpath_data)


                self.current_uuid = None  # Done
--- a/requirements.txt
+++ b/requirements.txt
@@ -23,7 +23,7 @@ jsonpath-ng~=1.5.3
 # jq not available on Windows so must be installed manually

 # Notification library
-apprise~=1.1.0
+apprise~=1.2.0

 # apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315
 paho-mqtt
Author	SHA1	Message	Date
dgtlmoon	51a0306d05	Add diff view option for JSON compare (comparing the fields defined on each. The order of fields, etc does not matter in this comparison.)	2022-11-19 15:15:25 +01:00
dgtlmoon	216f93edf5	Fix time handling	2022-11-19 14:47:58 +01:00
dgtlmoon	1efb001a63	Make checkbox work	2022-11-19 14:44:51 +01:00
dgtlmoon	2a15365e30	Move diff handler to its own JS to make it easier to manage	2022-11-19 14:17:30 +01:00
dgtlmoon	7d29c4799c	Update and rename diff.js	2022-11-19 13:42:52 +01:00
dgtlmoon	df6e835035	Make VisualSelector show first available multiple selector, refactor to make more maintainable (#1132 )	2022-11-17 11:52:48 +01:00
dgtlmoon	ab28f20eba	Make link to notification debug log easier to find (#1130 )	2022-11-16 09:17:57 +01:00
Hmmbob	1174b95ab4	Bump notification library (#1128 )	2022-11-15 22:54:12 +01:00
dgtlmoon	a564475325	Re #1126 HIDE_REFERER setting had wrong default	2022-11-14 10:28:05 +01:00
dgtlmoon	85d8d57997	Test: Re-test under HIDE_REFERER condition, use strtobool so you can use 'False' (#1121 )	2022-11-12 13:57:41 +01:00
dgtlmoon	359dcb63e3	Stability fix related to the new watch check count (#1113 )	2022-11-10 20:01:07 +01:00
dgtlmoon	b043d477dc	Use deepcopy to stop possible data corruption (#1108 )	2022-11-08 12:18:38 +01:00
dgtlmoon	06bcfb28e5	Code- Use dict .get instead of key	2022-11-07 20:43:20 +01:00
dgtlmoon	ca3b351bae	Adding a check counter to watch fetching (#1099 )	2022-11-06 09:48:07 +01:00
dgtlmoon	b7e0f0a5e4	Update README.md	2022-11-05 12:22:52 +01:00