Oops

Use Loguru for logging
Deleting a watch now removes the entire watch storage directory (#1408 )
2025-11-13 21:16:11 +00:00 · 2023-02-11 16:29:15 +01:00 · 2023-02-11 16:26:09 +01:00 · 2023-02-11 14:10:54 +01:00 · 2023-02-11 13:44:16 +01:00 · 2023-02-08 09:32:57 +01:00
15 changed files with 87 additions and 204 deletions
--- a/changedetectionio/init.py
+++ b/changedetectionio/init.py
@@ -12,7 +12,7 @@ from functools import wraps
 from threading import Event
 import datetime
 import flask_login
-import logging
+from loguru import logger
 import os
 import pytz
 import queue
@@ -505,41 +505,6 @@ def changedetection_app(config=None, datastore_o=None):
        output = render_template("clear_all_history.html")
        return output

-
-    # If they edited an existing watch, we need to know to reset the current/previous md5 to include
-    # the excluded text.
-    def get_current_checksum_include_ignore_text(uuid):
-
-        import hashlib
-
-        from changedetectionio import fetch_site_status
-
-        # Get the most recent one
-        newest_history_key = datastore.data['watching'][uuid].get('newest_history_key')
-
-        # 0 means that theres only one, so that there should be no 'unviewed' history available
-        if newest_history_key == 0:
-            newest_history_key = list(datastore.data['watching'][uuid].history.keys())[0]
-
-        if newest_history_key:
-            with open(datastore.data['watching'][uuid].history[newest_history_key],
-                      encoding='utf-8') as file:
-                raw_content = file.read()
-
-                handler = fetch_site_status.perform_site_check(datastore=datastore)
-                stripped_content = html_tools.strip_ignore_text(raw_content,
-                                                             datastore.data['watching'][uuid]['ignore_text'])
-
-                if datastore.data['settings']['application'].get('ignore_whitespace', False):
-                    checksum = hashlib.md5(stripped_content.translate(None, b'\r\n\t ')).hexdigest()
-                else:
-                    checksum = hashlib.md5(stripped_content).hexdigest()
-
-                return checksum
-
-        return datastore.data['watching'][uuid]['previous_md5']
-
-
    @app.route("/edit/<string:uuid>", methods=['GET', 'POST'])
    @login_optionally_required
    # https://stackoverflow.com/questions/42984453/wtforms-populate-form-with-data-if-data-exists
@@ -943,8 +908,9 @@ def changedetection_app(config=None, datastore_o=None):
        extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]


-        is_html_webdriver = True if watch.get('fetch_backend') == 'html_webdriver' or (
-                watch.get('fetch_backend', None) is None and system_uses_webdriver) else False
+        is_html_webdriver = False
+        if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver':
+            is_html_webdriver = True

        # Never requested successfully, but we detected a fetch error
        if datastore.data['watching'][uuid].history_n == 0 and (watch.get_error_text() or watch.get_error_snapshot()):
@@ -1035,7 +1001,8 @@ def changedetection_app(config=None, datastore_o=None):
            os.unlink(previous_backup_filename)

        # create a ZipFile object
-        backupname = "changedetection-backup-{}.zip".format(int(time.time()))
+        timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
+        backupname = "changedetection-backup-{}.zip".format(timestamp)
        backup_filepath = os.path.join(datastore_o.datastore_path, backupname)

        with zipfile.ZipFile(backup_filepath, "w",
@@ -1350,7 +1317,7 @@ def changedetection_app(config=None, datastore_o=None):


        except Exception as e:
-            logging.error("Error sharing -{}".format(str(e)))
+            logger.error("Error sharing -{}".format(str(e)))
            flash("Could not share, something went wrong while communicating with the share server - {}".format(str(e)), 'error')

        # https://changedetection.io/share/VrMv05wpXyQa
@@ -1425,7 +1392,7 @@ def notification_runner():
                sent_obj = notification.process_notification(n_object, datastore)

            except Exception as e:
-                logging.error("Watch URL: {}  Error {}".format(n_object['watch_url'], str(e)))
+                logger.error("Watch URL: {}  Error {}".format(n_object['watch_url'], str(e)))

                # UUID wont be present when we submit a 'test' from the global settings
                if 'uuid' in n_object:
@@ -1448,7 +1415,7 @@ def ticker_thread_check_time_launch_checks():
    proxy_last_called_time = {}

    recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 20))
-    print("System env MINIMUM_SECONDS_RECHECK_TIME", recheck_time_minimum_seconds)
+    logger.info("System env MINIMUM_SECONDS_RECHECK_TIME - {}", recheck_time_minimum_seconds)

    # Spin up Workers that do the fetching
    # Can be overriden by ENV or use the default settings
@@ -1493,7 +1460,7 @@ def ticker_thread_check_time_launch_checks():
            now = time.time()
            watch = datastore.data['watching'].get(uuid)
            if not watch:
-                logging.error("Watch: {} no longer present.".format(uuid))
+                logger.error("Watch: {} no longer present.".format(uuid))
                continue

            # No need todo further processing if it's paused
@@ -1526,7 +1493,7 @@ def ticker_thread_check_time_launch_checks():
                            time_since_proxy_used = int(time.time() - proxy_last_used_time)
                            if time_since_proxy_used < proxy_list_reuse_time_minimum:
                                # Not enough time difference reached, skip this watch
-                                print("> Skipped UUID {} using proxy '{}', not enough time between proxy requests {}s/{}s".format(uuid,
+                                logger.info("> Skipped UUID {} using proxy '{}', not enough time between proxy requests {}s/{}s".format(uuid,
                                                                                                                         watch_proxy,
                                                                                                                         time_since_proxy_used,
                                                                                                                         proxy_list_reuse_time_minimum))
@@ -1537,7 +1504,7 @@ def ticker_thread_check_time_launch_checks():

                    # Use Epoch time as priority, so we get a "sorted" PriorityQueue, but we can still push a priority 1 into it.
                    priority = int(time.time())
-                    print(
+                    logger.info(
                        "> Queued watch UUID {} last checked at {} queued at {:0.2f} priority {} jitter {:0.2f}s, {:0.2f}s since last checked".format(
                            uuid,
                            watch['last_checked'],
--- a/changedetectionio/blueprint/browser_steps/init.py
+++ b/changedetectionio/blueprint/browser_steps/init.py
@@ -24,7 +24,7 @@
 from distutils.util import strtobool
 from flask import Blueprint, request, make_response
 import os
-import logging
+from loguru import logger
 from changedetectionio.store import ChangeDetectionStore
 from changedetectionio import login_optionally_required
 browsersteps_live_ui_o = {}
@@ -49,7 +49,7 @@ def cleanup_playwright_session():
    browsersteps_playwright_browser_interface_end_time = None
    browsersteps_playwright_browser_interface_start_time = None

-    print("Cleaning up old playwright session because time was up, calling .goodbye()")
+    logger.info("Cleaning up old playwright session because time was up, calling .goodbye()")
    try:
        browsersteps_playwright_browser_interface_context.goodbye()
    except Exception as e:
@@ -114,7 +114,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):

                this_session = browsersteps_live_ui_o.get(browsersteps_session_id)
                if not this_session:
-                    print("Browser exited")
+                    logger.info("Browser exited")
                    return make_response('Browser session ran out of time :( Please reload this page.', 401)

                this_session.call_action(action_name=step_operation,
@@ -122,7 +122,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
                                         optional_value=step_optional_value)

            except Exception as e:
-                print("Exception when calling step operation", step_operation, str(e))
+                logger.info("Exception when calling step operation", step_operation, str(e))
                # Try to find something of value to give back to the user
                return make_response(str(e).splitlines()[0], 401)

@@ -139,7 +139,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
        if request.method == 'GET':

            if not browsersteps_playwright_browser_interface:
-                print("Starting connection with playwright")
+                logger.info("Starting connection with playwright")
                logging.debug("browser_steps.py connecting")

                global browsersteps_playwright_browser_interface_context
@@ -162,7 +162,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
                        return make_response('Unable to start the Playwright session properly, is it running?', 401)

                browsersteps_playwright_browser_interface_end_time = time.time() + (seconds_keepalive-3)
-                print("Starting connection with playwright - done")
+                logger.info("Starting connection with playwright - done")

            if not browsersteps_live_ui_o.get(browsersteps_session_id):
                # Boot up a new session
@@ -172,7 +172,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
                    proxy_url = datastore.proxy_list.get(proxy_id).get('url')
                    if proxy_url:
                        proxy = {'server': proxy_url}
-                        print("Browser Steps: UUID {} Using proxy {}".format(uuid, proxy_url))
+                        logger.info("Browser Steps: UUID {} Using proxy {}".format(uuid, proxy_url))

                # Begin the new "Playwright Context" that re-uses the playwright interface
                # Each session is a "Playwright Context" as a list, that uses the playwright interface
--- a/changedetectionio/blueprint/browser_steps/browser_steps.py
+++ b/changedetectionio/blueprint/browser_steps/browser_steps.py
@@ -51,7 +51,7 @@ class steppable_browser_interface():
        if call_action_name == 'choose_one':
            return

-        print("> action calling", call_action_name)
+        logger.info("> action calling", call_action_name)
        # https://playwright.dev/python/docs/selectors#xpath-selectors
        if selector.startswith('/') and not selector.startswith('//'):
            selector = "xpath=" + selector
@@ -70,7 +70,7 @@ class steppable_browser_interface():

        action_handler(selector, optional_value)
        self.page.wait_for_timeout(3 * 1000)
-        print("Call action done in", time.time() - now)
+        logger.info("Call action done in", time.time() - now)

    def action_goto_url(self, url, optional_value):
        # self.page.set_viewport_size({"width": 1280, "height": 5000})
@@ -81,7 +81,7 @@ class steppable_browser_interface():
        # - `'commit'` - consider operation to be finished when network response is received and the document started loading.
        # Better to not use any smarts from Playwright and just wait an arbitrary number of seconds
        # This seemed to solve nearly all 'TimeoutErrors'
-        print("Time to goto URL ", time.time() - now)
+        logger.info("Time to goto URL ", time.time() - now)

    def action_click_element_containing_text(self, selector=None, value=''):
        if not len(value.strip()):
@@ -100,14 +100,14 @@ class steppable_browser_interface():
        self.page.evaluate(value)

    def action_click_element(self, selector, value):
-        print("Clicking element")
+        logger.info("Clicking element")
        if not len(selector.strip()):
            return
        self.page.click(selector, timeout=10 * 1000, delay=randint(200, 500))

    def action_click_element_if_exists(self, selector, value):
        import playwright._impl._api_types as _api_types
-        print("Clicking element if exists")
+        logger.info("Clicking element if exists")
        if not len(selector.strip()):
            return
        try:
@@ -207,13 +207,13 @@ class browsersteps_live_ui(steppable_browser_interface):
            self.mark_as_closed,
        )
        # Listen for all console events and handle errors
-        self.page.on("console", lambda msg: print(f"Browser steps console - {msg.type}: {msg.text} {msg.args}"))
+        self.page.on("console", lambda msg: logger.info(f"Browser steps console - {msg.type}: {msg.text} {msg.args}"))

-        print("Time to browser setup", time.time() - now)
+        logger.info("Time to browser setup", time.time() - now)
        self.page.wait_for_timeout(1 * 1000)

    def mark_as_closed(self):
-        print("Page closed, cleaning up..")
+        logger.info("Page closed, cleaning up..")

    @property
    def has_expired(self):
@@ -239,7 +239,7 @@ class browsersteps_live_ui(steppable_browser_interface):
        xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}")
        # So the JS will find the smallest one first
        xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True)
-        print("Time to complete get_current_state of browser", time.time() - now)
+        logger.info("Time to complete get_current_state of browser", time.time() - now)
        # except
        # playwright._impl._api_types.Error: Browser closed.
        # @todo show some countdown timer?
--- a/changedetectionio/changedetection.py
+++ b/changedetectionio/changedetection.py
@@ -8,6 +8,7 @@ from json.decoder import JSONDecodeError
 import eventlet
 import eventlet.wsgi
 import getopt
+from loguru import logger
 import os
 import signal
 import socket
@@ -24,7 +25,7 @@ def sigterm_handler(_signo, _stack_frame):
    global app
    global datastore
 #    app.config.exit.set()
-    print('Shutdown: Got SIGTERM, DB saved to disk')
+    logger.warning('Shutdown: Got SIGTERM, DB saved to disk')
    datastore.sync_to_json()
 #    raise SystemExit

--- a/changedetectionio/content_fetcher.py
+++ b/changedetectionio/content_fetcher.py
@@ -2,7 +2,7 @@ import hashlib
 from abc import abstractmethod
 import chardet
 import json
-import logging
+from loguru import logger
 import os
 import requests
 import sys
@@ -155,7 +155,7 @@ class Fetcher():

            for step in valid_steps:
                step_n += 1
-                print(">> Iterating check - browser Step n {} - {}...".format(step_n, step['operation']))
+                logger.info(">> Iterating check - browser Step n {} - {}...".format(step_n, step['operation']))
                self.screenshot_step("before-"+str(step_n))
                self.save_step_html("before-"+str(step_n))
                try:
@@ -510,7 +510,7 @@ class base_html_webdriver(Fetcher):
            try:
                self.driver.quit()
            except Exception as e:
-                print("Content Fetcher > Exception in chrome shutdown/quit" + str(e))
+                logger.error("Content Fetcher > Exception in chrome shutdown/quit" + str(e))


 # "html_requests" is listed as the default fetcher in store.py!
@@ -568,6 +568,7 @@ class html_requests(Fetcher):
        if not r.content or not len(r.content):
            raise EmptyReply(url=url, status_code=r.status_code)

+        # @todo test this
        # @todo maybe you really want to test zero-byte return pages?
        if r.status_code != 200 and not ignore_status_codes:
            # maybe check with content works?
--- a/changedetectionio/fetch_site_status.py
+++ b/changedetectionio/fetch_site_status.py
@@ -1,6 +1,6 @@
 import hashlib
 import json
-import logging
+from loguru import logger
 import os
 import re
 import urllib3
@@ -105,7 +105,7 @@ class perform_site_check():
        proxy_url = None
        if proxy_id:
            proxy_url = self.datastore.proxy_list.get(proxy_id).get('url')
-            print("UUID {} Using proxy {}".format(uuid, proxy_url))
+            logger.info("UUID {} Using proxy {}".format(uuid, proxy_url))

        fetcher = klass(proxy_override=proxy_url)

@@ -135,7 +135,7 @@ class perform_site_check():

        # Track the content type
        update_obj['content_type'] = fetcher.headers.get('Content-Type', '')
-
+        logger.info("UUID: {} - Fetch complete {:,} bytes".format(watch.get('uuid'), len(fetcher.content)))
        # Watches added automatically in the queue manager will skip if its the same checksum as the previous run
        # Saves a lot of CPU
        update_obj['previous_md5_before_filters'] = hashlib.md5(fetcher.content.encode('utf-8')).hexdigest()
@@ -349,6 +349,7 @@ class perform_site_check():

        # The main thing that all this at the moment comes down to :)
        if watch.get('previous_md5') != fetched_md5:
+            logger.debug("UUID: {} - Change detected - Prev MD5: {} - Fetched MD5: {}, applying filters...".format(uuid, watch.get('previous_md5'), fetched_md5))
            changed_detected = True

        # Looks like something changed, but did it match all the rules?
@@ -366,10 +367,10 @@ class perform_site_check():
                has_unique_lines = watch.lines_contain_something_unique_compared_to_history(lines=stripped_text_from_html.splitlines())
                # One or more lines? unsure?
                if not has_unique_lines:
-                    logging.debug("check_unique_lines: UUID {} didnt have anything new setting change_detected=False".format(uuid))
+                    logger.debug("check_unique_lines: UUID {} didnt have anything new setting change_detected=False".format(uuid))
                    changed_detected = False
                else:
-                    logging.debug("check_unique_lines: UUID {} had unique content".format(uuid))
+                    logger.debug("check_unique_lines: UUID {} had unique content".format(uuid))

        # Always record the new checksum
        update_obj["previous_md5"] = fetched_md5
@@ -378,4 +379,9 @@ class perform_site_check():
        if not watch.get('previous_md5'):
            watch['previous_md5'] = fetched_md5

+        if changed_detected:
+            logger.success("UUID: {} Change detected after all filters applied.", uuid)
+        else:
+            logger.info("UUID: {} NO Change detected after all filters applied.", uuid)
+
        return changed_detected, update_obj, text_content_before_ignored_filter
--- a/changedetectionio/model/Watch.py
+++ b/changedetectionio/model/Watch.py
@@ -1,5 +1,5 @@
 from distutils.util import strtobool
-import logging
+from loguru import logger
 import os
 import re
 import time
@@ -109,7 +109,7 @@ class model(dict):

    def ensure_data_dir_exists(self):
        if not os.path.isdir(self.watch_data_dir):
-            print ("> Creating data dir {}".format(self.watch_data_dir))
+            logger.debug("> Creating data dir {}".format(self.watch_data_dir))
            os.mkdir(self.watch_data_dir)

    @property
@@ -153,7 +153,9 @@ class model(dict):
    @property
    def is_pdf(self):
        # content_type field is set in the future
-        return '.pdf' in self.get('url', '').lower() or 'pdf' in self.get('content_type', '').lower()
+        # https://github.com/dgtlmoon/changedetection.io/issues/1392
+        # Not sure the best logic here
+        return self.get('url', '').lower().endswith('.pdf') or 'pdf' in self.get('content_type', '').lower()

    @property
    def label(self):
@@ -192,7 +194,7 @@ class model(dict):
        # Read the history file as a dict
        fname = os.path.join(self.watch_data_dir, "history.txt")
        if os.path.isfile(fname):
-            logging.debug("Reading history index " + str(time.time()))
+            logger.debug("UUID: {} Reading history index".format(self.get('uuid')))
            with open(fname, "r") as f:
                for i in f.readlines():
                    if ',' in i:
--- a/changedetectionio/notification.py
+++ b/changedetectionio/notification.py
@@ -101,7 +101,7 @@ def process_notification(n_object, datastore):
        apobj = apprise.Apprise(debug=True, asset=asset)
        url = url.strip()
        if len(url):
-            print(">> Process Notification: AppRise notifying {}".format(url))
+            logger.info(">> Process Notification: AppRise notifying {}".format(url))
            with apprise.LogCapture(level=apprise.logging.DEBUG) as logs:
                # Re 323 - Limit discord length to their 2000 char limit total or it wont send.
                # Because different notifications may require different pre-processing, run each sequentially :(
--- a/changedetectionio/store.py
+++ b/changedetectionio/store.py
@@ -7,7 +7,7 @@ from copy import deepcopy
 from os import path, unlink
 from threading import Lock
 import json
-import logging
+from loguru import logger
 import os
 import re
 import requests
@@ -75,12 +75,12 @@ class ChangeDetectionStore:
                for uuid, watch in self.__data['watching'].items():
                    watch['uuid']=uuid
                    self.__data['watching'][uuid] = Watch.model(datastore_path=self.datastore_path, default=watch)
-                    print("Watching:", uuid, self.__data['watching'][uuid]['url'])
+                    logger.info("Watching: {} - {}", uuid, self.__data['watching'][uuid]['url'])

        # First time ran, Create the datastore.
        except (FileNotFoundError):
            if include_default_watches:
-                print("No JSON DB found at {}, creating JSON store at {}".format(self.json_store_path, self.datastore_path))
+                logger.info("No JSON DB found at {}, creating JSON store at {}".format(self.json_store_path, self.datastore_path))
                self.add_watch(url='https://news.ycombinator.com/',
                               tag='Tech news',
                               extras={'fetch_backend': 'html_requests'})
@@ -192,27 +192,24 @@ class ChangeDetectionStore:
        tags.sort()
        return tags

-    def unlink_history_file(self, path):
-        try:
-            unlink(path)
-        except (FileNotFoundError, IOError):
-            pass
-
    # Delete a single watch by UUID
    def delete(self, uuid):
+        import pathlib
+        import shutil
+
        with self.lock:
            if uuid == 'all':
                self.__data['watching'] = {}

                # GitHub #30 also delete history records
                for uuid in self.data['watching']:
-                    for path in self.data['watching'][uuid].history.values():
-                        self.unlink_history_file(path)
+                    path = pathlib.Path(os.path.join(self.datastore_path, uuid))
+                    shutil.rmtree(path)
+                    self.needs_write_urgent = True

            else:
-                for path in self.data['watching'][uuid].history.values():
-                    self.unlink_history_file(path)
-
+                path = pathlib.Path(os.path.join(self.datastore_path, uuid))
+                shutil.rmtree(path)
                del self.data['watching'][uuid]

            self.needs_write_urgent = True
@@ -306,7 +303,7 @@ class ChangeDetectionStore:
                            apply_extras['include_filters'] = [res['css_filter']]

            except Exception as e:
-                logging.error("Error fetching metadata for shared watch link", url, str(e))
+                logger.error("Error fetching metadata for shared watch link", url, str(e))
                flash("Error fetching metadata for {}".format(url), 'error')
                return False
        from .model.Watch import is_safe_url
@@ -391,14 +388,13 @@ class ChangeDetectionStore:


    def sync_to_json(self):
-        logging.info("Saving JSON..")
-        print("Saving JSON..")
+        logger.debug("Saving JSON DB")
        try:
            data = deepcopy(self.__data)
        except RuntimeError as e:
            # Try again in 15 seconds
            time.sleep(15)
-            logging.error ("! Data changed when writing to JSON, trying again.. %s", str(e))
+            logger.error ("! Data changed when writing to JSON, trying again.. %s", str(e))
            self.sync_to_json()
            return
        else:
@@ -411,7 +407,7 @@ class ChangeDetectionStore:
                    json.dump(data, json_file, indent=4)
                os.replace(self.json_store_path+".tmp", self.json_store_path)
            except Exception as e:
-                logging.error("Error writing JSON!! (Main JSON file save was skipped) : %s", str(e))
+                logger.error("Error writing JSON!! (Main JSON file save was skipped) : %s", str(e))

            self.needs_write = False
            self.needs_write_urgent = False
@@ -422,7 +418,7 @@ class ChangeDetectionStore:

        while True:
            if self.stop_thread:
-                print("Shutting down datastore thread")
+                logger.info("Shutting down datastore thread")
                return

            if self.needs_write or self.needs_write_urgent:
@@ -536,8 +532,8 @@ class ChangeDetectionStore:
                try:
                    update_method = getattr(self, "update_{}".format(update_n))()
                except Exception as e:
-                    print("Error while trying update_{}".format((update_n)))
-                    print(e)
+                    logger.error("Error while trying update_{}".format((update_n)))
+                    logger.error(str(e))
                    # Don't run any more updates
                    return
                else:
@@ -575,7 +571,7 @@ class ChangeDetectionStore:
                        with open(os.path.join(target_path, "history.txt"), "w") as f:
                            f.writelines(history)
                    else:
-                        logging.warning("Datastore history directory {} does not exist, skipping history import.".format(target_path))
+                        logger.warning("Datastore history directory {} does not exist, skipping history import.".format(target_path))

                # No longer needed, dynamically pulled from the disk when needed.
                # But we should set it back to a empty dict so we don't break if this schema runs on an earlier version.
--- a/changedetectionio/tests/test_backend.py
+++ b/changedetectionio/tests/test_backend.py
@@ -15,78 +15,10 @@ def test_inscriptus():
    stripped_text_from_html = get_text(html_content)
    assert stripped_text_from_html == 'test!\nok man'

-def test_setup(client, live_server):
-    live_server_setup(live_server)
-
-# Assert that non-200's dont give notifications or register as a change
-def test_non_200_doesnt_trigger_change(client, live_server):
-  #  live_server_setup(live_server)
-
-    set_original_response()
-    url = url_for('test_changing_status_code_endpoint', _external=True)
-
-    # Add our URL to the import page
-    res = client.post(
-        url_for("import_page"),
-        data={"urls": url},
-        follow_redirects=True
-    )
-
-    assert b"1 Imported" in res.data
-
-    time.sleep(sleep_time_for_fetch_thread)
-
-    res = client.post(
-        url_for("edit_page", uuid="first"),
-        data={
-            "include_filters": ".foobar-detection",
-            "fetch_backend": "html_requests",
-            "headers": "",
-            "tag": "",
-            "url": url
-        },
-        follow_redirects=True
-    )
-
-    # A recheck will happen here automatically
-    time.sleep(sleep_time_for_fetch_thread)
-
-    # hit the mark all viewed link
-    res = client.get(url_for("mark_all_viewed"), follow_redirects=True)
-
-    # Now be sure the filter is missing and then recheck it
-    set_modified_response()
-
-    # https://github.com/dgtlmoon/changedetection.io/issues/962#issuecomment-1416807742
-    for ecode in ['429', '400', '204', '429', '403', '404', '500']:
-        with open("test-endpoint-status-code.txt", 'w') as f:
-            f.write(ecode)
-
-        res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
-        assert b'1 watches queued for rechecking.' in res.data
-        time.sleep(sleep_time_for_fetch_thread)
-
-        # No change should be seen/no trigger of change
-        res = client.get(url_for("index"))
-        assert b'unviewed' not in res.data
-
-        # load preview page so we can see what was returned
-        res = client.get(url_for("preview_page", uuid="first"))
-        #        with open('/tmp/debug-'+ecode+'.html', 'wb') as f:
-        #            f.write(res.data)
-
-        # Should still say the original 200, because "ignore_status_codes" should be off by default
-        # commented out - this will fail because we also show what the error was
-        # assert b'code: '+ecode.encode('utf-8') not in res.data
-
-        assert b'code: 200' in res.data
-
-    # Cleanup everything
-    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
-    assert b'Deleted' in res.data

 def test_check_basic_change_detection_functionality(client, live_server):
    set_original_response()
+    live_server_setup(live_server)

    # Add our URL to the import page
    res = client.post(
--- a/changedetectionio/tests/test_filter_exist_changes.py
+++ b/changedetectionio/tests/test_filter_exist_changes.py
@@ -69,8 +69,6 @@ def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_se
    url = url_for('test_notification_endpoint', _external=True)
    notification_url = url.replace('http', 'json')

-    print(">>>> Notification URL: " + notification_url)
-
    # Just a regular notification setting, this will be used by the special 'filter not found' notification
    notification_form_data = {"notification_urls": notification_url,
                              "notification_title": "New ChangeDetection.io Notification - {{watch_url}}",
--- a/changedetectionio/tests/util.py
+++ b/changedetectionio/tests/util.py
@@ -1,5 +1,4 @@
 #!/usr/bin/python3
-import os.path

 from flask import make_response, request
 from flask import url_for
@@ -113,25 +112,6 @@ def live_server_setup(live_server):
        import secrets
        return "Random content - {}\n".format(secrets.token_hex(64))

-    @live_server.app.route('/test-changing-status-code-endpoint')
-    def test_changing_status_code_endpoint():
-        # status_code can also be overriden in a file, used for doing things that it wouldnt normally expect
-        # (test_non_200_doesnt_trigger_change)
-        status_code = '200'
-        if os.path.isfile("test-endpoint-status-code.txt"):
-            with open("test-endpoint-status-code.txt", 'r') as f:
-                status_code = f.read().strip()
-            os.unlink("test-endpoint-status-code.txt")
-
-        # Contents includes the status code, which will change and should not trigger a change
-        # (Non-200 should get ignored)
-        with open("test-datastore/endpoint-content.txt", "r") as f:
-            contents ="{} code: {} ".format(f.read(), status_code)
-            if status_code == '204':
-                contents=''
-            resp = make_response(contents, status_code)
-            resp.headers['Content-Type'] = 'text/html'
-            return resp, status_code

    @live_server.app.route('/test-endpoint')
    def test_endpoint():
--- a/changedetectionio/update_worker.py
+++ b/changedetectionio/update_worker.py
@@ -2,6 +2,7 @@ import os
 import threading
 import queue
 import time
+from loguru import logger

 from changedetectionio import content_fetcher
 from changedetectionio import queuedWatchMetaData
@@ -12,14 +13,12 @@ from changedetectionio.fetch_site_status import FilterNotFoundInResponse
 # Requests for checking on a single site(watch) from a queue of watches
 # (another process inserts watches into the queue that are time-ready for checking)

-import logging
 import sys

 class update_worker(threading.Thread):
    current_uuid = None

    def __init__(self, q, notification_q, app, datastore, *args, **kwargs):
-        logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
        self.q = q
        self.app = app
        self.notification_q = notification_q
@@ -80,10 +79,10 @@ class update_worker(threading.Thread):
                'diff': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], line_feed_sep=line_feed_sep),
                'diff_full': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], True, line_feed_sep=line_feed_sep)
            })
-            logging.info (">> SENDING NOTIFICATION")
+            logger.info ("UUID: {} - Sending notification".format(watch_uuid))
            self.notification_q.put(n_object)
        else:
-            logging.info (">> NO Notification sent, notification_url was empty in both watch and system")
+            logger.info("UUID: {} - NO Notification sent, notification_url was empty in both watch and system".format(watch_uuid))

    def send_filter_failure_notification(self, watch_uuid):

@@ -112,7 +111,7 @@ class update_worker(threading.Thread):
                'screenshot': None
            })
            self.notification_q.put(n_object)
-            print("Sent filter not found notification for {}".format(watch_uuid))
+            logger.info("Sent filter not found notification for {}".format(watch_uuid))

    def send_step_failure_notification(self, watch_uuid, step_n):
        watch = self.datastore.data['watching'].get(watch_uuid, False)
@@ -139,7 +138,7 @@ class update_worker(threading.Thread):
                'uuid': watch_uuid
            })
            self.notification_q.put(n_object)
-            print("Sent step not found notification for {}".format(watch_uuid))
+            logger.error("Sent step not found notification for {}".format(watch_uuid))


    def cleanup_error_artifacts(self, uuid):
@@ -173,7 +172,7 @@ class update_worker(threading.Thread):
                    update_obj= {}
                    xpath_data = False
                    process_changedetection_results = True
-                    print("> Processing UUID {} Priority {} URL {}".format(uuid, queued_item_data.priority, self.datastore.data['watching'][uuid]['url']))
+                    logger.info("UUID: {} Start processing, Priority {} URL {}",uuid, queued_item_data.priority, self.datastore.data['watching'][uuid]['url'])
                    now = time.time()

                    try:
@@ -184,7 +183,7 @@ class update_worker(threading.Thread):
                        if not isinstance(contents, (bytes, bytearray)):
                            raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
                    except PermissionError as e:
-                        self.app.logger.error("File permission error updating", uuid, str(e))
+                        logger.error("UUID: {} File permission error updating - {}", uuid, str(e))
                        process_changedetection_results = False
                    except content_fetcher.ReplyWithContentButNoText as e:
                        # Totally fine, it's by choice - just continue on, nothing more to care about
@@ -233,7 +232,7 @@ class update_worker(threading.Thread):
                            # Send notification if we reached the threshold?
                            threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
                                                                                           0)
-                            print("Filter for {} not found, consecutive_filter_failures: {}".format(uuid, c))
+                            logger.error("Filter for {} not found, consecutive_filter_failures: {}".format(uuid, c))
                            if threshold > 0 and c >= threshold:
                                if not self.datastore.data['watching'][uuid].get('notification_muted'):
                                    self.send_filter_failure_notification(uuid)
@@ -264,7 +263,7 @@ class update_worker(threading.Thread):
                            # Send notification if we reached the threshold?
                            threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
                                                                                           0)
-                            print("Step for {} not found, consecutive_filter_failures: {}".format(uuid, c))
+                            logger.error("Step for {} not found, consecutive_filter_failures: {}".format(uuid, c))
                            if threshold > 0 and c >= threshold:
                                if not self.datastore.data['watching'][uuid].get('notification_muted'):
                                    self.send_step_failure_notification(watch_uuid=uuid, step_n=e.step_n)
@@ -330,8 +329,6 @@ class update_worker(threading.Thread):

                            # A change was detected
                            if changed_detected:
-                                print (">> Change detected in UUID {} - {}".format(uuid, watch['url']))
-
                                # Notifications should only trigger on the second time (first time, we gather the initial snapshot)
                                if watch.history_n >= 2:
                                    if not self.datastore.data['watching'][uuid].get('notification_muted'):
@@ -340,7 +337,7 @@ class update_worker(threading.Thread):

                        except Exception as e:
                            # Catch everything possible here, so that if a worker crashes, we don't lose it until restart!
-                            print("!!!! Exception in update_worker !!!\n", e)
+                            logger.error("!!!! Exception in update_worker !!!\n", e)
                            self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
                            self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})

--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -41,7 +41,6 @@ services:
  #
  #        Base URL of your changedetection.io install (Added to the notification alert)
  #      - BASE_URL=https://mysite.com
-
  #        Respect proxy_pass type settings, `proxy_set_header Host "localhost";` and `proxy_set_header X-Forwarded-Prefix /app;`
  #        More here https://github.com/dgtlmoon/changedetection.io/wiki/Running-changedetection.io-behind-a-reverse-proxy-sub-directory
  #      - USE_X_SETTINGS=1
@@ -95,7 +94,10 @@ services:
 #            - CHROME_REFRESH_TIME=600000
 #            - DEFAULT_BLOCK_ADS=true
 #            - DEFAULT_STEALTH=true
-
+#
+#             Ignore HTTPS errors, like for self-signed certs
+#            - DEFAULT_IGNORE_HTTPS_ERRORS=true
+#
 volumes:
  changedetection-data:

--- a/requirements.txt
+++ b/requirements.txt
@@ -10,6 +10,7 @@ inscriptis~=2.2
 pytz
 timeago~=1.0
 validators
+loguru

 # Set these versions together to avoid a RequestsDependencyWarning
 # >= 2.26 also adds Brotli support if brotli is installed
Author	SHA1	Message	Date
dgtlmoon	7b664c43ea	Oops	2023-02-11 16:29:15 +01:00
dgtlmoon	d4eb9f2b64	Use Loguru for logging	2023-02-11 16:26:09 +01:00
dgtlmoon	7b8b50138b	Deleting a watch now removes the entire watch storage directory (#1408 )	2023-02-11 14:10:54 +01:00
dgtlmoon	01af21f856	Use year/date in the backup snapshot zip filename instead of epoch seconds (#1377 #1407 )	2023-02-11 13:44:16 +01:00
dgtlmoon	f7f4ab314b	PDF text conversion - fix bug where it detected a site as a PDF file incorrectly Re #1392 #1393	2023-02-08 09:32:57 +01:00
dgtlmoon	ce0355c0ad	Remove unused code (#1394 )	2023-02-08 09:32:15 +01:00
dgtlmoon	0f43213d9d	UI - preview page - Fix bug where playwright/chrome was system default and [preview] didnt show snapshot	2023-02-07 16:55:34 +01:00
dgtlmoon	93c57d9fad	Adding example docker-compose.yml config to ignore errors from self-signed certs #1389	2023-02-06 17:24:12 +01:00