Be sure that only valid history index lines are read

2025-11-03 08:07:23 +00:00 · 2022-10-19 22:05:37 +02:00
25 changed files with 56 additions and 647 deletions
--- a/.github/workflows/test-container-build.yml
+++ b/.github/workflows/test-container-build.yml
@@ -1,21 +1,12 @@
 name: ChangeDetection.io Container Build Test

 # Triggers the workflow on push or pull request events
-
-# This line doesnt work, even tho it is the documented one
-#on: [push, pull_request]
-
 on:
  push:
    paths:
      - requirements.txt
      - Dockerfile

-  pull_request:
-    paths:
-      - requirements.txt
-      - Dockerfile
-
  # Changes to requirements.txt packages and Dockerfile may or may not always be compatible with arm etc, so worth testing
  # @todo: some kind of path filter for requirements.txt and Dockerfile
 jobs:
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -6,7 +6,7 @@ Otherwise, it's always best to PR into the `dev` branch.

 Please be sure that all new functionality has a matching test!

-Use `pytest` to validate/test, you can run the existing tests as `pytest tests/test_notification.py` for example
+Use `pytest` to validate/test, you can run the existing tests as `pytest tests/test_notifications.py` for example

 ```
 pip3 install -r requirements-dev
--- a/1
+++ b/1
@@ -64,7 +64,6 @@ EXPOSE 5000

 # The actual flask app
 COPY changedetectionio /app/changedetectionio
-
 # The eventlet server wrapper
 COPY changedetection.py /app/changedetection.py

--- a/README.md
+++ b/README.md
@@ -184,9 +184,9 @@ When you enable a `json:` or `jq:` filter, you can even automatically extract an

 `json:$.price` or `jq:.price` would give `23.50`, or you can extract the whole structure

-## Proxy Configuration
+## Proxy configuration

-See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration , we also support using [BrightData proxy services where possible]( https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support)
+See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration

 ## Raspberry Pi support?

--- a/changedetectionio/init.py
+++ b/changedetectionio/init.py
@@ -194,9 +194,6 @@ def changedetection_app(config=None, datastore_o=None):
    watch_api.add_resource(api_v1.Watch, '/api/v1/watch/<string:uuid>',
                           resource_class_kwargs={'datastore': datastore, 'update_q': update_q})

-    watch_api.add_resource(api_v1.SystemInfo, '/api/v1/systeminfo',
-                           resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
-



@@ -987,6 +984,9 @@ def changedetection_app(config=None, datastore_o=None):

        # create a ZipFile object
        backupname = "changedetection-backup-{}.zip".format(int(time.time()))
+
+        # We only care about UUIDS from the current index file
+        uuids = list(datastore.data['watching'].keys())
        backup_filepath = os.path.join(datastore_o.datastore_path, backupname)

        with zipfile.ZipFile(backup_filepath, "w",
@@ -1002,12 +1002,12 @@ def changedetection_app(config=None, datastore_o=None):
            # Add the flask app secret
            zipObj.write(os.path.join(datastore_o.datastore_path, "secret.txt"), arcname="secret.txt")

-            # Add any data in the watch data directory.
-            for uuid, w in datastore.data['watching'].items():
-                for f in Path(w.watch_data_dir).glob('*'):
-                    zipObj.write(f,
-                                 # Use the full path to access the file, but make the file 'relative' in the Zip.
-                                 arcname=os.path.join(f.parts[-2], f.parts[-1]),
+            # Add any snapshot data we find, use the full path to access the file, but make the file 'relative' in the Zip.
+            for txt_file_path in Path(datastore_o.datastore_path).rglob('*.txt'):
+                parent_p = txt_file_path.parent
+                if parent_p.name in uuids:
+                    zipObj.write(txt_file_path,
+                                 arcname=str(txt_file_path).replace(datastore_o.datastore_path, ''),
                                 compress_type=zipfile.ZIP_DEFLATED,
                                 compresslevel=8)

--- a/changedetectionio/api/api_v1.py
+++ b/changedetectionio/api/api_v1.py
@@ -122,37 +122,3 @@ class CreateWatch(Resource):
            return {'status': "OK"}, 200

        return list, 200
-
-class SystemInfo(Resource):
-    def __init__(self, **kwargs):
-        # datastore is a black box dependency
-        self.datastore = kwargs['datastore']
-        self.update_q = kwargs['update_q']
-
-    @auth.check_token
-    def get(self):
-        import time
-        overdue_watches = []
-
-        # Check all watches and report which have not been checked but should have been
-
-        for uuid, watch in self.datastore.data.get('watching', {}).items():
-            # see if now - last_checked is greater than the time that should have been
-            # this is not super accurate (maybe they just edited it) but better than nothing
-            t = watch.threshold_seconds()
-            if not t:
-                # Use the system wide default
-                t = self.datastore.threshold_seconds
-
-            time_since_check = time.time() - watch.get('last_checked')
-
-            # Allow 5 minutes of grace time before we decide it's overdue
-            if time_since_check - (5 * 60) > t:
-                overdue_watches.append(uuid)
-
-        return {
-                   'queue_size': self.update_q.qsize(),
-                   'overdue_watches': overdue_watches,
-                   'uptime': round(time.time() - self.datastore.start_time, 2),
-                   'watch_count': len(self.datastore.data.get('watching', {}))
-               }, 200
--- a/changedetectionio/changedetection.py
+++ b/changedetectionio/changedetection.py
@@ -102,14 +102,6 @@ def main():
                    has_password=datastore.data['settings']['application']['password'] != False
                    )

-    # Monitored websites will not receive a Referer header
-    # when a user clicks on an outgoing link.
-    @app.after_request
-    def hide_referrer(response):
-        if os.getenv("HIDE_REFERER", False):
-            response.headers["Referrer-Policy"] = "no-referrer"
-        return response
-
    # Proxy sub-directory support
    # Set environment var USE_X_SETTINGS=1 on this script
    # And then in your proxy_pass settings
--- a/changedetectionio/fetch_site_status.py
+++ b/changedetectionio/fetch_site_status.py
@@ -2,14 +2,14 @@ import hashlib
 import logging
 import os
 import re
+import time
 import urllib3
-import difflib
-

 from changedetectionio import content_fetcher, html_tools

 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

+
 # Some common stuff here that can be moved to a base class
 # (set_proxy_from_list)
 class perform_site_check():
@@ -65,9 +65,7 @@ class perform_site_check():
            request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '')

        timeout = self.datastore.data['settings']['requests'].get('timeout')
-
-        url = watch.link
-
+        url = watch.get('url')
        request_body = self.datastore.data['watching'][uuid].get('body')
        request_method = self.datastore.data['watching'][uuid].get('method')
        ignore_status_codes = self.datastore.data['watching'][uuid].get('ignore_status_codes', False)
@@ -289,23 +287,8 @@ class perform_site_check():
                else:
                    logging.debug("check_unique_lines: UUID {} had unique content".format(uuid))

-        if changed_detected:
-            if not watch.get("trigger_add", True) or not watch.get("trigger_del", True): # if we are supposed to filter any diff types
-                # get the diff types present in the watch
-                diff_types = watch.get_diff_types(text_content_before_ignored_filter)
-                print("Diff components found: " + str(diff_types))
-
-                # Only Additions (deletions are turned off)
-                if not watch["trigger_del"] and diff_types["del"] and not diff_types["add"]:
-                    changed_detected = False
-
-                # Only Deletions (additions are turned off)
-                elif not watch["trigger_add"] and  diff_types["add"] and not diff_types["del"]:
-                    changed_detected = False
-
-        # Always record the new checksum and the new text
+        # Always record the new checksum
        update_obj["previous_md5"] = fetched_md5
-        watch.save_previous_text(text_content_before_ignored_filter)

        # On the first run of a site, watch['previous_md5'] will be None, set it the current one.
        if not watch.get('previous_md5'):
--- a/changedetectionio/forms.py
+++ b/changedetectionio/forms.py
@@ -323,18 +323,6 @@ class ValidateCSSJSONXPATHInput(object):
                except:
                    raise ValidationError("A system-error occurred when validating your jq expression")

-class ValidateDiffFilters(object):
-    """
-    Validates that at least one filter checkbox is selected
-    """
-    def __init__(self, message=None):
-        self.message = message
-
-    def __call__(self, form, field):
-        if not form.trigger_add.data and not form.trigger_del.data:
-            message = field.gettext('At least one filter checkbox must be selected')
-            raise ValidationError(message)
-

 class quickWatchForm(Form):
    url = fields.URLField('URL', validators=[validateURL()])
@@ -377,8 +365,6 @@ class watchForm(commonSettingsForm):
    check_unique_lines = BooleanField('Only trigger when new lines appear', default=False)
    trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
    text_should_not_be_present = StringListField('Block change-detection if text matches', [validators.Optional(), ValidateListRegex()])
-    trigger_add = BooleanField('Additions', [ValidateDiffFilters()], default=True)
-    trigger_del = BooleanField('Deletions', [ValidateDiffFilters()], default=True)

    webdriver_js_execute_code = TextAreaField('Execute JavaScript before change detection', render_kw={"rows": "5"}, validators=[validators.Optional()])

--- a/changedetectionio/model/Watch.py
+++ b/changedetectionio/model/Watch.py
@@ -1,8 +1,6 @@
-from distutils.util import strtobool
-import logging
 import os
-import time
-import uuid
+import uuid as uuid_builder
+from distutils.util import strtobool

 minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 60))
 mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
@@ -24,7 +22,7 @@ class model(dict):
            #'newest_history_key': 0,
            'title': None,
            'previous_md5': False,
-            'uuid': str(uuid.uuid4()),
+            'uuid': str(uuid_builder.uuid4()),
            'headers': {},  # Extra headers to send
            'body': None,
            'method': 'GET',
@@ -47,8 +45,6 @@ class model(dict):
            'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
            'extract_title_as_title': False,
            'check_unique_lines': False, # On change-detected, compare against all history if its something new
-            'trigger_add': True,
-            'trigger_del': True,
            'proxy': None, # Preferred proxy connection
            # Re #110, so then if this is set to None, we know to use the default value instead
            # Requires setting to None on submit if it's the same as the default
@@ -64,7 +60,7 @@ class model(dict):
        self.update(self.__base_config)
        self.__datastore_path = kw['datastore_path']

-        self['uuid'] = str(uuid.uuid4())
+        self['uuid'] = str(uuid_builder.uuid4())

        del kw['datastore_path']

@@ -86,19 +82,10 @@ class model(dict):
        return False

    def ensure_data_dir_exists(self):
-        if not os.path.isdir(self.watch_data_dir):
-            print ("> Creating data dir {}".format(self.watch_data_dir))
-            os.mkdir(self.watch_data_dir)
-
-    @property
-    def link(self):
-        url = self.get('url', '')
-        if '{%' in url or '{{' in url:
-            from jinja2 import Environment
-            # Jinja2 available in URLs along with https://pypi.org/project/jinja2-time/
-            jinja2_env = Environment(extensions=['jinja2_time.TimeExtension'])
-            return str(jinja2_env.from_string(url).render())
-        return url
+        target_path = os.path.join(self.__datastore_path, self['uuid'])
+        if not os.path.isdir(target_path):
+            print ("> Creating data dir {}".format(target_path))
+            os.mkdir(target_path)

    @property
    def label(self):
@@ -122,39 +109,18 @@ class model(dict):

    @property
    def history(self):
-        """History index is just a text file as a list
-            {watch-uuid}/history.txt
-
-            contains a list like
-
-            {epoch-time},{filename}\n
-
-            We read in this list as the history information
-
-        """
        tmp_history = {}
+        import logging
+        import time

        # Read the history file as a dict
-        fname = os.path.join(self.watch_data_dir, "history.txt")
+        fname = os.path.join(self.__datastore_path, self.get('uuid'), "history.txt")
        if os.path.isfile(fname):
            logging.debug("Reading history index " + str(time.time()))
            with open(fname, "r") as f:
                for i in f.readlines():
                    if ',' in i:
                        k, v = i.strip().split(',', 2)
-
-                        # The index history could contain a relative path, so we need to make the fullpath
-                        # so that python can read it
-                        if not '/' in v and not '\'' in v:
-                            v = os.path.join(self.watch_data_dir, v)
-                        else:
-                            # It's possible that they moved the datadir on older versions
-                            # So the snapshot exists but is in a different path
-                            snapshot_fname = v.split('/')[-1]
-                            proposed_new_path = os.path.join(self.watch_data_dir, snapshot_fname)
-                            if not os.path.exists(v) and os.path.exists(proposed_new_path):
-                                v = proposed_new_path
-
                        tmp_history[k] = v

        if len(tmp_history):
@@ -166,7 +132,7 @@ class model(dict):

    @property
    def has_history(self):
-        fname = os.path.join(self.watch_data_dir, "history.txt")
+        fname = os.path.join(self.__datastore_path, self.get('uuid'), "history.txt")
        return os.path.isfile(fname)

    # Returns the newest key, but if theres only 1 record, then it's counted as not being new, so return 0.
@@ -185,19 +151,25 @@ class model(dict):
    # Save some text file to the appropriate path and bump the history
    # result_obj from fetch_site_status.run()
    def save_history_text(self, contents, timestamp):
+        import uuid
+        import logging
+
+        output_path = os.path.join(self.__datastore_path, self['uuid'])

        self.ensure_data_dir_exists()
-        snapshot_fname = "{}.txt".format(str(uuid.uuid4()))
+        snapshot_fname = os.path.join(output_path, str(uuid.uuid4()))
+
+        logging.debug("Saving history text {}".format(snapshot_fname))

        # in /diff/ and /preview/ we are going to assume for now that it's UTF-8 when reading
        # most sites are utf-8 and some are even broken utf-8
-        with open(os.path.join(self.watch_data_dir, snapshot_fname), 'wb') as f:
+        with open(snapshot_fname, 'wb') as f:
            f.write(contents)
            f.close()

        # Append to index
        # @todo check last char was \n
-        index_fname = os.path.join(self.watch_data_dir, "history.txt")
+        index_fname = os.path.join(output_path, "history.txt")
        with open(index_fname, 'a') as f:
            f.write("{},{}\n".format(timestamp, snapshot_fname))
            f.close()
@@ -208,35 +180,6 @@ class model(dict):
        # @todo bump static cache of the last timestamp so we dont need to examine the file to set a proper ''viewed'' status
        return snapshot_fname

-    # Save previous text snapshot for diffing - used for calculating additions and deletions
-    def save_previous_text(self, contents):
-        import logging
-
-        output_path = os.path.join(self.__datastore_path, self['uuid'])
-
-        # Incase the operator deleted it, check and create.
-        self.ensure_data_dir_exists()
-
-        snapshot_fname = os.path.join(self.watch_data_dir, "previous.txt")
-        logging.debug("Saving previous text {}".format(snapshot_fname))
-
-        with open(snapshot_fname, 'wb') as f:
-            f.write(contents)
-
-        return snapshot_fname
-
-    # Get previous text snapshot for diffing - used for calculating additions and deletions
-    def get_previous_text(self):
-
-        snapshot_fname = os.path.join(self.watch_data_dir, "previous.txt")
-        if self.history_n < 1:
-            return ""
-
-        with open(snapshot_fname, 'rb') as f:
-            contents = f.read()
-
-        return contents
-
    @property
    def has_empty_checktime(self):
        # using all() + dictionary comprehension
@@ -266,40 +209,15 @@ class model(dict):
        # if not, something new happened
        return not local_lines.issubset(existing_history)

-    # Get diff types (addition, deletion, modification) from the previous snapshot and new_text
-    # uses similar algorithm to customSequenceMatcher in diff.py
-    # Returns a dict of diff types and wether they are present in the diff
-    def get_diff_types(self, new_text):
-        import difflib
-
-        diff_types = {
-            'add': False,
-            'del': False,
-        }
-
-        # get diff types using difflib
-        cruncher = difflib.SequenceMatcher(isjunk=lambda x: x in " \\t", a=str(self.get_previous_text()), b=str(new_text))
-
-        for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
-            if tag == 'delete':
-                diff_types["del"] = True
-            elif tag == 'insert':
-                diff_types["add"] = True
-            elif tag == 'replace':
-                diff_types["del"] = True
-                diff_types["add"] = True
-
-        return diff_types
-
    def get_screenshot(self):
-        fname = os.path.join(self.watch_data_dir, "last-screenshot.png")
+        fname = os.path.join(self.__datastore_path, self['uuid'], "last-screenshot.png")
        if os.path.isfile(fname):
            return fname

        return False

    def __get_file_ctime(self, filename):
-        fname = os.path.join(self.watch_data_dir, filename)
+        fname = os.path.join(self.__datastore_path, self['uuid'], filename)
        if os.path.isfile(fname):
            return int(os.path.getmtime(fname))
        return False
@@ -324,14 +242,9 @@ class model(dict):
    def snapshot_error_screenshot_ctime(self):
        return self.__get_file_ctime('last-error-screenshot.png')

-    @property
-    def watch_data_dir(self):
-        # The base dir of the watch data
-        return os.path.join(self.__datastore_path, self['uuid'])
-    
    def get_error_text(self):
        """Return the text saved from a previous request that resulted in a non-200 error"""
-        fname = os.path.join(self.watch_data_dir, "last-error.txt")
+        fname = os.path.join(self.__datastore_path, self['uuid'], "last-error.txt")
        if os.path.isfile(fname):
            with open(fname, 'r') as f:
                return f.read()
@@ -339,7 +252,7 @@ class model(dict):

    def get_error_snapshot(self):
        """Return path to the screenshot that resulted in a non-200 error"""
-        fname = os.path.join(self.watch_data_dir, "last-error-screenshot.png")
+        fname = os.path.join(self.__datastore_path, self['uuid'], "last-error-screenshot.png")
        if os.path.isfile(fname):
            return fname
        return False
--- a/changedetectionio/run_all_tests.sh
+++ b/changedetectionio/run_all_tests.sh
@@ -9,8 +9,6 @@
 # exit when any command fails
 set -e

-SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
-
 find tests/test_*py -type f|while read test_name
 do
  echo "TEST RUNNING $test_name"
@@ -47,9 +45,7 @@ docker kill $$-test_selenium

 echo "TESTING WEBDRIVER FETCH > PLAYWRIGHT/BROWSERLESS..."
 # Not all platforms support playwright (not ARM/rPI), so it's not packaged in requirements.txt
-PLAYWRIGHT_VERSION=$(grep -i -E "RUN pip install.+" "$SCRIPT_DIR/../Dockerfile" | grep --only-matching -i -E "playwright[=><~+]+[0-9\.]+")
-echo "using $PLAYWRIGHT_VERSION"
-pip3 install "$PLAYWRIGHT_VERSION"
+pip3 install playwright~=1.24
 docker run -d --name $$-test_browserless -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm  -p 3000:3000  --shm-size="2g"  browserless/chrome:1.53-chrome-stable
 # takes a while to spin up
 sleep 5
--- a/changedetectionio/static/styles/styles.scss
+++ b/changedetectionio/static/styles/styles.scss
@@ -156,7 +156,7 @@ body:after, body:before {

 .fetch-error {
  padding-top: 1em;
-  font-size: 80%;
+  font-size: 60%;
  max-width: 400px;
  display: block;
 }
@@ -803,4 +803,4 @@ ul {
  padding: 0.5rem;
  border-radius: 5px;
  color: #ff3300;
-}
+}
--- a/changedetectionio/store.py
+++ b/changedetectionio/store.py
@@ -30,14 +30,14 @@ class ChangeDetectionStore:
    def __init__(self, datastore_path="/datastore", include_default_watches=True, version_tag="0.0.0"):
        # Should only be active for docker
        # logging.basicConfig(filename='/dev/stdout', level=logging.INFO)
-        self.__data = App.model()
+        self.needs_write = False
        self.datastore_path = datastore_path
        self.json_store_path = "{}/url-watches.json".format(self.datastore_path)
-        self.needs_write = False
        self.proxy_list = None
-        self.start_time = time.time()
        self.stop_thread = False

+        self.__data = App.model()
+
        # Base definition for all watchers
        # deepcopy part of #569 - not sure why its needed exactly
        self.generic_definition = deepcopy(Watch.model(datastore_path = datastore_path, default={}))
@@ -548,10 +548,6 @@ class ChangeDetectionStore:
    # `last_changed` not needed, we pull that information from the history.txt index
    def update_4(self):
        for uuid, watch in self.data['watching'].items():
-            # Be sure it's recalculated
-            p = watch.history
-            if watch.history_n < 2:
-                watch['last_changed'] = 0
            try:
                # Remove it from the struct
                del(watch['last_changed'])
@@ -587,23 +583,3 @@ class ChangeDetectionStore:
        for v in ['User-Agent', 'Accept', 'Accept-Encoding', 'Accept-Language']:
            if self.data['settings']['headers'].get(v):
                del self.data['settings']['headers'][v]
-                
-    # Generate a previous.txt for all watches that do not have one and contain history
-    def update_8(self):
-        for uuid, watch in self.data['watching'].items():
-            # Make sure we actually have history
-            if (watch.history_n == 0):
-                continue
-            latest_file_name = watch.history[watch.newest_history_key]
-
-
-            # Check if the previous.txt exists
-            if not os.path.exists(os.path.join(watch.watch_data_dir, "previous.txt")):
-                # Generate a previous.txt
-                with open(os.path.join(watch.watch_data_dir, "previous.txt"), "wb") as f:
-                    # Fill it with the latest history
-                    latest_file_name = watch.history[watch.newest_history_key]
-                    with open(latest_file_name, "rb") as f2:
-                        f.write(f2.read())
-                        
-
--- a/changedetectionio/templates/edit.html
+++ b/changedetectionio/templates/edit.html
@@ -40,8 +40,7 @@
                <fieldset>
                    <div class="pure-control-group">
                        {{ render_field(form.url, placeholder="https://...", required=true, class="m-d") }}
-                        <span class="pure-form-message-inline">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></span><br/>
-                        <span class="pure-form-message-inline">You can use variables in the URL, perfect for inserting the current date and other logic, <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a></span><br/>
+                        <span class="pure-form-message-inline">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></span>
                    </div>
                    <div class="pure-control-group">
                        {{ render_field(form.title, class="m-d") }}
@@ -173,16 +172,6 @@ User-Agent: wonderbra 1.0") }}
                            <span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span>
                        </div>
                    </fieldset>
-                    <fieldset>
-                        <div class="pure-control-group">
-                            <label for="trigger-type">Filter and restrict change detection of content to</label>
-                            {{ render_checkbox_field(form.trigger_add, class="trigger-type") }}
-                            {{ render_checkbox_field(form.trigger_del, class="trigger-type") }}
-                            <span class="pure-form-message-inline">
-                                Filters the change-detection of this watch to only this type of content change. <strong>Replacements</strong> (neither additions nor deletions) are always included. The 'diff' will still include all changes.
-                            </span>
-                        </div>
-                    </fieldset>
                    <div class="pure-control-group">
                        {% set field = render_field(form.css_filter,
                            placeholder=".class-name or #some-id, or other CSS selector rule.",
--- a/changedetectionio/templates/watch-overview.html
+++ b/changedetectionio/templates/watch-overview.html
@@ -87,7 +87,7 @@
                    <a class="state-{{'on' if watch.notification_muted}}" href="{{url_for('index', op='mute', uuid=watch.uuid, tag=active_tag)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notifications" title="Mute notifications"/></a>
                </td>
                <td class="title-col inline">{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}}
-                    <a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"></a>
+                    <a class="external" target="_blank" rel="noopener" href="{{ watch.url.replace('source:','') }}"></a>
                    <a href="{{url_for('form_share_put_watch', uuid=watch.uuid)}}"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread.svg')}}" /></a>

                    {%if watch.fetch_backend == "html_webdriver" %}<img style="height: 1em; display:inline-block;" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" />{% endif %}
--- a/changedetectionio/tests/test_api.py
+++ b/changedetectionio/tests/test_api.py
@@ -147,16 +147,6 @@ def test_api_simple(client, live_server):
    # @todo how to handle None/default global values?
    assert watch['history_n'] == 2, "Found replacement history section, which is in its own API"

-    # basic systeminfo check
-    res = client.get(
-        url_for("systeminfo"),
-        headers={'x-api-key': api_key},
-    )
-    info = json.loads(res.data)
-    assert info.get('watch_count') == 1
-    assert info.get('uptime') > 0.5
-
-
    # Finally delete the watch
    res = client.delete(
        url_for("watch", uuid=watch_uuid),
--- a/changedetectionio/tests/test_backup.py
+++ b/changedetectionio/tests/test_backup.py
@@ -1,31 +1,18 @@
 #!/usr/bin/python3

-from .util import set_original_response, set_modified_response, live_server_setup
+import time
 from flask import url_for
 from urllib.request import urlopen
-from zipfile import ZipFile
-import re
-import time
+from . util import set_original_response, set_modified_response, live_server_setup


 def test_backup(client, live_server):
-    live_server_setup(live_server)

-    set_original_response()
+    live_server_setup(live_server)

    # Give the endpoint time to spin up
    time.sleep(1)

-    # Add our URL to the import page
-    res = client.post(
-        url_for("import_page"),
-        data={"urls": url_for('test_endpoint', _external=True)},
-        follow_redirects=True
-    )
-
-    assert b"1 Imported" in res.data
-    time.sleep(3)
-
    res = client.get(
        url_for("get_backup"),
        follow_redirects=True
@@ -33,19 +20,6 @@ def test_backup(client, live_server):

    # Should get the right zip content type
    assert res.content_type == "application/zip"
-
    # Should be PK/ZIP stream
    assert res.data.count(b'PK') >= 2

-    # ZipFile from buffer seems non-obvious, just save it instead
-    with open("download.zip", 'wb') as f:
-        f.write(res.data)
-
-    zip = ZipFile('download.zip')
-    l = zip.namelist()
-    uuid4hex = re.compile('^[a-f0-9]{8}-?[a-f0-9]{4}-?4[a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}.*txt', re.I)
-    newlist = list(filter(uuid4hex.match, l))  # Read Note below
-
-    # Should be three txt files in the archive (history and the snapshot)
-    assert len(newlist) == 3
-
--- a/changedetectionio/tests/test_diff_filter_changes_as_add_delete.py
+++ b/changedetectionio/tests/test_diff_filter_changes_as_add_delete.py
@@ -1,107 +0,0 @@
-#!/usr/bin/python3
-# @NOTE:  THIS RELIES ON SOME MIDDLEWARE TO MAKE CHECKBOXES WORK WITH WTFORMS UNDER TEST CONDITION, see changedetectionio/tests/util.py
-import time
-from flask import url_for
-from .util import live_server_setup
-
-def set_original_response():
-    test_return_data = """
-        Here
-        is
-        some
-        text
-    """
-
-    with open("test-datastore/endpoint-content.txt", "w") as f:
-        f.write(test_return_data)
-
-def set_response_with_deleted_word():
-    test_return_data = """
-        Here
-        is
-        text
-    """
-
-    with open("test-datastore/endpoint-content.txt", "w") as f:
-        f.write(test_return_data)
-
-def set_response_with_changed_word():
-    test_return_data = """
-        Here
-        ix
-        some
-        text
-    """
-
-    with open("test-datastore/endpoint-content.txt", "w") as f:
-        f.write(test_return_data)
-
-def test_diff_filter_changes_as_add_delete(client, live_server):
-    live_server_setup(live_server)
-
-    sleep_time_for_fetch_thread = 3
-
-    set_original_response()
-    # Give the endpoint time to spin up
-    time.sleep(1)
-
-    # Add our URL to the import page
-    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-
-    assert b"1 Imported" in res.data
-    # Wait for it to read the original version
-    time.sleep(sleep_time_for_fetch_thread)
-
-    #  Make a change that ONLY includes deletes
-    set_response_with_deleted_word()
-    res = client.post(
-        url_for("edit_page", uuid="first"),
-        data={"trigger_add": "y",
-              "trigger_del": "n",
-              "url": test_url,
-              "fetch_backend": "html_requests"},
-        follow_redirects=True
-    )
-    assert b"Updated watch." in res.data
-    time.sleep(sleep_time_for_fetch_thread)
-
-    # We should NOT see a change because we chose to not know about any Deletions
-    res = client.get(url_for("index"))
-    assert b'unviewed' not in res.data
-    # Recheck to be sure
-    client.get(url_for("form_watch_checknow"), follow_redirects=True)
-    time.sleep(sleep_time_for_fetch_thread)
-    res = client.get(url_for("index"))
-    assert b'unviewed' not in res.data
-
-
-    # Now set the original response, which will include the word, which should trigger Added (because trigger_add ==y)
-    set_original_response()
-    client.get(url_for("form_watch_checknow"), follow_redirects=True)
-    time.sleep(sleep_time_for_fetch_thread)
-    res = client.get(url_for("index"))
-    assert b'unviewed' in res.data
-
-    # Now check 'changes' are always going to be triggered
-    set_original_response()
-    client.post(
-        url_for("edit_page", uuid="first"),
-        # Neither trigger add nor del? then we should see changes still
-        data={"trigger_add": "n",
-              "trigger_del": "n",
-              "url": test_url,
-              "fetch_backend": "html_requests"},
-        follow_redirects=True
-    )
-    time.sleep(sleep_time_for_fetch_thread)
-    client.get(url_for("mark_all_viewed"), follow_redirects=True)
-    set_response_with_changed_word()
-    client.get(url_for("form_watch_checknow"), follow_redirects=True)
-    time.sleep(sleep_time_for_fetch_thread)
-    res = client.get(url_for("index"))
-    assert b'unviewed' in res.data
--- a/changedetectionio/tests/test_diff_filter_only_additions.py
+++ b/changedetectionio/tests/test_diff_filter_only_additions.py
@@ -1,83 +0,0 @@
-#!/usr/bin/python3
-
-import time
-from flask import url_for
-from .util import live_server_setup
-
-def set_original_response():
-    test_return_data = """
-        A few new lines
-        Where there is more lines originally
-    """
-
-    with open("test-datastore/endpoint-content.txt", "w") as f:
-        f.write(test_return_data)
-
-def set_delete_response():
-    test_return_data = """
-        A few new lines
-    """
-
-    with open("test-datastore/endpoint-content.txt", "w") as f:
-        f.write(test_return_data)
-
-def test_diff_filtering_no_del(client, live_server):
-    live_server_setup(live_server)
-
-    sleep_time_for_fetch_thread = 3
-
-    set_original_response()
-    # Give the endpoint time to spin up
-    time.sleep(1)
-
-    # Add our URL to the import page
-    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-
-    assert b"1 Imported" in res.data
-    time.sleep(sleep_time_for_fetch_thread)
-
-    # Add our URL to the import page
-    res = client.post(
-        url_for("edit_page", uuid="first"),
-        data={"trigger_add": "y",
-              "trigger_del": "n",
-              "url": test_url,
-              "fetch_backend": "html_requests"},
-        follow_redirects=True
-    )
-    assert b"Updated watch." in res.data
-    assert b'unviewed' not in res.data
-
-    #  Make an delete change
-    set_delete_response()
-
-    time.sleep(sleep_time_for_fetch_thread)
-    # Trigger a check
-    client.get(url_for("form_watch_checknow"), follow_redirects=True)
-
-    # Give the thread time to pick it up
-    time.sleep(sleep_time_for_fetch_thread)
-
-    # We should NOT see the change
-    res = client.get(url_for("index"))
-    assert b'unviewed' not in res.data
-
-    #  Make an delete change
-    set_original_response()
-
-    time.sleep(sleep_time_for_fetch_thread)
-    # Trigger a check
-    client.get(url_for("form_watch_checknow"), follow_redirects=True)
-
-    # Give the thread time to pick it up
-    time.sleep(sleep_time_for_fetch_thread)
-
-    # We should see the change
-    res = client.get(url_for("index"))
-    assert b'unviewed' in res.data
-
--- a/changedetectionio/tests/test_diff_filter_only_deletions.py
+++ b/changedetectionio/tests/test_diff_filter_only_deletions.py
@@ -1,72 +0,0 @@
-#!/usr/bin/python3
-
-import time
-from flask import url_for
-from .util import live_server_setup
-
-def set_original_response():
-    test_return_data = """
-        A few new lines
-    """
-
-    with open("test-datastore/endpoint-content.txt", "w") as f:
-        f.write(test_return_data)
-
-def set_add_response():
-    test_return_data = """
-        A few new lines
-        Where there is more lines than before
-    """
-
-    with open("test-datastore/endpoint-content.txt", "w") as f:
-        f.write(test_return_data)
-
-def test_diff_filtering_no_add(client, live_server):
-    live_server_setup(live_server)
-
-    sleep_time_for_fetch_thread = 3
-
-    set_original_response()
-    # Give the endpoint time to spin up
-    time.sleep(1)
-
-    # Add our URL to the import page
-    test_url = url_for('test_endpoint', _external=True)
-    res = client.post(
-        url_for("import_page"),
-        data={"urls": test_url},
-        follow_redirects=True
-    )
-
-    assert b"1 Imported" in res.data
-    time.sleep(sleep_time_for_fetch_thread)
-
-    # Add our URL to the import page
-    res = client.post(
-        url_for("edit_page", uuid="first"),
-        data={"trigger_add": "n",
-              "trigger_del": "y",
-              "url": test_url,
-              "fetch_backend": "html_requests"},
-        follow_redirects=True
-    )
-    assert b"Updated watch." in res.data
-    assert b'unviewed' not in res.data
-
-    #  Make an add change
-    set_add_response()
-
-    time.sleep(sleep_time_for_fetch_thread)
-    # Trigger a check
-
-    # Give the thread time to pick it up
-    time.sleep(sleep_time_for_fetch_thread)
-
-    # We should NOT see the change
-    res = client.get(url_for("index"))
-    # save res.data to a file
-    
-        
-        
-    assert b'unviewed' not in res.data
-
--- a/changedetectionio/tests/test_history_consistency.py
+++ b/changedetectionio/tests/test_history_consistency.py
@@ -81,4 +81,4 @@ def test_consistent_history(client, live_server):



-        assert len(files_in_watch_dir) == 3, "Should be just three files in the dir, history.txt, previous.txt, and the snapshot"
+        assert len(files_in_watch_dir) == 2, "Should be just two files in the dir, history.txt and the snapshot"
--- a/changedetectionio/tests/test_jinja2.py
+++ b/changedetectionio/tests/test_jinja2.py
@@ -1,33 +0,0 @@
-#!/usr/bin/python3
-
-import time
-from flask import url_for
-from .util import live_server_setup
-
-
-# If there was only a change in the whitespacing, then we shouldnt have a change detected
-def test_jinja2_in_url_query(client, live_server):
-    live_server_setup(live_server)
-
-    # Give the endpoint time to spin up
-    time.sleep(1)
-
-    # Add our URL to the import page
-    test_url = url_for('test_return_query', _external=True)
-
-    # because url_for() will URL-encode the var, but we dont here
-    full_url = "{}?{}".format(test_url,
-                              "date={% now 'Europe/Berlin', '%Y' %}.{% now 'Europe/Berlin', '%m' %}.{% now 'Europe/Berlin', '%d' %}", )
-    res = client.post(
-        url_for("form_quick_watch_add"),
-        data={"url": full_url, "tag": "test"},
-        follow_redirects=True
-    )
-    assert b"Watch added" in res.data
-    time.sleep(3)
-    # It should report nothing found (no new 'unviewed' class)
-    res = client.get(
-        url_for("preview_page", uuid="first"),
-        follow_redirects=True
-    )
-    assert b'date=2' in res.data
--- a/changedetectionio/tests/util.py
+++ b/changedetectionio/tests/util.py
@@ -4,12 +4,6 @@ from flask import make_response, request
 from flask import url_for
 import logging
 import time
-from werkzeug import Request
-import io
-
-# This is a fix for macOS running tests.
-import multiprocessing
-multiprocessing.set_start_method("fork")

 def set_original_response():
    test_return_data = """<html>
@@ -165,42 +159,5 @@ def live_server_setup(live_server):
        ret = " ".join([auth.username, auth.password, auth.type])
        return ret

-    # Make sure any checkboxes that are supposed to be defaulted to true are set during the post request
-    # This is due to the fact that defaults are set in the HTML which we are not using during tests.
-    # This does not affect the server when running outside of a test
-    class DefaultCheckboxMiddleware(object):
-        def __init__(self, app):
-            self.app = app
-
-        def __call__(self, environ, start_response):
-            request = Request(environ)
-            if request.method == "POST" and "/edit" in request.path:
-                body = environ['wsgi.input'].read()
-
-                # if the checkboxes are not set, set them to true
-                if b"trigger_add" not in body:
-                    body += b'&trigger_add=y'
-
-                if b"trigger_del" not in body:
-                    body += b'&trigger_del=y'
-
-                # remove any checkboxes set to "n" so wtforms processes them correctly
-                body = body.replace(b"trigger_add=n", b"")
-                body = body.replace(b"trigger_del=n", b"")
-                body = body.replace(b"&&", b"&")
-
-                new_stream = io.BytesIO(body)
-                environ["CONTENT_LENGTH"] = len(body)
-                environ['wsgi.input'] = new_stream
-
-            return self.app(environ, start_response)
-
-    live_server.app.wsgi_app = DefaultCheckboxMiddleware(live_server.app.wsgi_app)
-
-    # Just return some GET var
-    @live_server.app.route('/test-return-query', methods=['GET'])
-    def test_return_query():
-        return request.query_string
-
    live_server.start()

--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -45,9 +45,6 @@ services:
  #        Respect proxy_pass type settings, `proxy_set_header Host "localhost";` and `proxy_set_header X-Forwarded-Prefix /app;`
  #        More here https://github.com/dgtlmoon/changedetection.io/wiki/Running-changedetection.io-behind-a-reverse-proxy-sub-directory
  #      - USE_X_SETTINGS=1
-  #
-  #        Hides the `Referer` header so that monitored websites can't see the changedetection.io hostname.
-  #      - HIDE_REFERER=true

      # Comment out ports: when using behind a reverse proxy , enable networks: etc.
      ports:
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,8 @@
-flask ~= 2.0
+flask~= 2.0
 flask_wtf
-eventlet >= 0.31.0
+eventlet>=0.31.0
 validators
-timeago ~= 1.0
+timeago ~=1.0
 inscriptis ~= 2.2
 feedgen ~= 0.9
 flask-login ~= 0.5
@@ -46,9 +46,4 @@ selenium ~= 4.1.0
 # need to revisit flask login versions
 werkzeug ~= 2.0.0

-# Templating, so far just in the URLs but in the future can be for the notifications also
-jinja2 ~= 3.1
-jinja2-time
-
 # playwright is installed at Dockerfile build time because it's not available on all platforms
-