Compare commits

...

2 Commits

Author SHA1 Message Date
dgtlmoon
7b664c43ea Oops 2023-02-11 16:29:15 +01:00
dgtlmoon
d4eb9f2b64 Use Loguru for logging 2023-02-11 16:26:09 +01:00
12 changed files with 65 additions and 63 deletions

View File

@@ -12,7 +12,7 @@ from functools import wraps
from threading import Event
import datetime
import flask_login
import logging
from loguru import logger
import os
import pytz
import queue
@@ -1317,7 +1317,7 @@ def changedetection_app(config=None, datastore_o=None):
except Exception as e:
logging.error("Error sharing -{}".format(str(e)))
logger.error("Error sharing -{}".format(str(e)))
flash("Could not share, something went wrong while communicating with the share server - {}".format(str(e)), 'error')
# https://changedetection.io/share/VrMv05wpXyQa
@@ -1392,7 +1392,7 @@ def notification_runner():
sent_obj = notification.process_notification(n_object, datastore)
except Exception as e:
logging.error("Watch URL: {} Error {}".format(n_object['watch_url'], str(e)))
logger.error("Watch URL: {} Error {}".format(n_object['watch_url'], str(e)))
# UUID wont be present when we submit a 'test' from the global settings
if 'uuid' in n_object:
@@ -1415,7 +1415,7 @@ def ticker_thread_check_time_launch_checks():
proxy_last_called_time = {}
recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 20))
print("System env MINIMUM_SECONDS_RECHECK_TIME", recheck_time_minimum_seconds)
logger.info("System env MINIMUM_SECONDS_RECHECK_TIME - {}", recheck_time_minimum_seconds)
# Spin up Workers that do the fetching
# Can be overriden by ENV or use the default settings
@@ -1460,7 +1460,7 @@ def ticker_thread_check_time_launch_checks():
now = time.time()
watch = datastore.data['watching'].get(uuid)
if not watch:
logging.error("Watch: {} no longer present.".format(uuid))
logger.error("Watch: {} no longer present.".format(uuid))
continue
# No need todo further processing if it's paused
@@ -1493,7 +1493,7 @@ def ticker_thread_check_time_launch_checks():
time_since_proxy_used = int(time.time() - proxy_last_used_time)
if time_since_proxy_used < proxy_list_reuse_time_minimum:
# Not enough time difference reached, skip this watch
print("> Skipped UUID {} using proxy '{}', not enough time between proxy requests {}s/{}s".format(uuid,
logger.info("> Skipped UUID {} using proxy '{}', not enough time between proxy requests {}s/{}s".format(uuid,
watch_proxy,
time_since_proxy_used,
proxy_list_reuse_time_minimum))
@@ -1504,7 +1504,7 @@ def ticker_thread_check_time_launch_checks():
# Use Epoch time as priority, so we get a "sorted" PriorityQueue, but we can still push a priority 1 into it.
priority = int(time.time())
print(
logger.info(
"> Queued watch UUID {} last checked at {} queued at {:0.2f} priority {} jitter {:0.2f}s, {:0.2f}s since last checked".format(
uuid,
watch['last_checked'],

View File

@@ -24,7 +24,7 @@
from distutils.util import strtobool
from flask import Blueprint, request, make_response
import os
import logging
from loguru import logger
from changedetectionio.store import ChangeDetectionStore
from changedetectionio import login_optionally_required
browsersteps_live_ui_o = {}
@@ -49,7 +49,7 @@ def cleanup_playwright_session():
browsersteps_playwright_browser_interface_end_time = None
browsersteps_playwright_browser_interface_start_time = None
print("Cleaning up old playwright session because time was up, calling .goodbye()")
logger.info("Cleaning up old playwright session because time was up, calling .goodbye()")
try:
browsersteps_playwright_browser_interface_context.goodbye()
except Exception as e:
@@ -114,7 +114,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
this_session = browsersteps_live_ui_o.get(browsersteps_session_id)
if not this_session:
print("Browser exited")
logger.info("Browser exited")
return make_response('Browser session ran out of time :( Please reload this page.', 401)
this_session.call_action(action_name=step_operation,
@@ -122,7 +122,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
optional_value=step_optional_value)
except Exception as e:
print("Exception when calling step operation", step_operation, str(e))
logger.info("Exception when calling step operation", step_operation, str(e))
# Try to find something of value to give back to the user
return make_response(str(e).splitlines()[0], 401)
@@ -139,7 +139,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
if request.method == 'GET':
if not browsersteps_playwright_browser_interface:
print("Starting connection with playwright")
logger.info("Starting connection with playwright")
logging.debug("browser_steps.py connecting")
global browsersteps_playwright_browser_interface_context
@@ -162,7 +162,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
return make_response('Unable to start the Playwright session properly, is it running?', 401)
browsersteps_playwright_browser_interface_end_time = time.time() + (seconds_keepalive-3)
print("Starting connection with playwright - done")
logger.info("Starting connection with playwright - done")
if not browsersteps_live_ui_o.get(browsersteps_session_id):
# Boot up a new session
@@ -172,7 +172,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
proxy_url = datastore.proxy_list.get(proxy_id).get('url')
if proxy_url:
proxy = {'server': proxy_url}
print("Browser Steps: UUID {} Using proxy {}".format(uuid, proxy_url))
logger.info("Browser Steps: UUID {} Using proxy {}".format(uuid, proxy_url))
# Begin the new "Playwright Context" that re-uses the playwright interface
# Each session is a "Playwright Context" as a list, that uses the playwright interface

View File

@@ -51,7 +51,7 @@ class steppable_browser_interface():
if call_action_name == 'choose_one':
return
print("> action calling", call_action_name)
logger.info("> action calling", call_action_name)
# https://playwright.dev/python/docs/selectors#xpath-selectors
if selector.startswith('/') and not selector.startswith('//'):
selector = "xpath=" + selector
@@ -70,7 +70,7 @@ class steppable_browser_interface():
action_handler(selector, optional_value)
self.page.wait_for_timeout(3 * 1000)
print("Call action done in", time.time() - now)
logger.info("Call action done in", time.time() - now)
def action_goto_url(self, url, optional_value):
# self.page.set_viewport_size({"width": 1280, "height": 5000})
@@ -81,7 +81,7 @@ class steppable_browser_interface():
# - `'commit'` - consider operation to be finished when network response is received and the document started loading.
# Better to not use any smarts from Playwright and just wait an arbitrary number of seconds
# This seemed to solve nearly all 'TimeoutErrors'
print("Time to goto URL ", time.time() - now)
logger.info("Time to goto URL ", time.time() - now)
def action_click_element_containing_text(self, selector=None, value=''):
if not len(value.strip()):
@@ -100,14 +100,14 @@ class steppable_browser_interface():
self.page.evaluate(value)
def action_click_element(self, selector, value):
print("Clicking element")
logger.info("Clicking element")
if not len(selector.strip()):
return
self.page.click(selector, timeout=10 * 1000, delay=randint(200, 500))
def action_click_element_if_exists(self, selector, value):
import playwright._impl._api_types as _api_types
print("Clicking element if exists")
logger.info("Clicking element if exists")
if not len(selector.strip()):
return
try:
@@ -207,13 +207,13 @@ class browsersteps_live_ui(steppable_browser_interface):
self.mark_as_closed,
)
# Listen for all console events and handle errors
self.page.on("console", lambda msg: print(f"Browser steps console - {msg.type}: {msg.text} {msg.args}"))
self.page.on("console", lambda msg: logger.info(f"Browser steps console - {msg.type}: {msg.text} {msg.args}"))
print("Time to browser setup", time.time() - now)
logger.info("Time to browser setup", time.time() - now)
self.page.wait_for_timeout(1 * 1000)
def mark_as_closed(self):
print("Page closed, cleaning up..")
logger.info("Page closed, cleaning up..")
@property
def has_expired(self):
@@ -239,7 +239,7 @@ class browsersteps_live_ui(steppable_browser_interface):
xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}")
# So the JS will find the smallest one first
xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True)
print("Time to complete get_current_state of browser", time.time() - now)
logger.info("Time to complete get_current_state of browser", time.time() - now)
# except
# playwright._impl._api_types.Error: Browser closed.
# @todo show some countdown timer?

View File

@@ -8,6 +8,7 @@ from json.decoder import JSONDecodeError
import eventlet
import eventlet.wsgi
import getopt
from loguru import logger
import os
import signal
import socket
@@ -24,7 +25,7 @@ def sigterm_handler(_signo, _stack_frame):
global app
global datastore
# app.config.exit.set()
print('Shutdown: Got SIGTERM, DB saved to disk')
logger.warning('Shutdown: Got SIGTERM, DB saved to disk')
datastore.sync_to_json()
# raise SystemExit

View File

@@ -2,7 +2,7 @@ import hashlib
from abc import abstractmethod
import chardet
import json
import logging
from loguru import logger
import os
import requests
import sys
@@ -155,7 +155,7 @@ class Fetcher():
for step in valid_steps:
step_n += 1
print(">> Iterating check - browser Step n {} - {}...".format(step_n, step['operation']))
logger.info(">> Iterating check - browser Step n {} - {}...".format(step_n, step['operation']))
self.screenshot_step("before-"+str(step_n))
self.save_step_html("before-"+str(step_n))
try:
@@ -510,7 +510,7 @@ class base_html_webdriver(Fetcher):
try:
self.driver.quit()
except Exception as e:
print("Content Fetcher > Exception in chrome shutdown/quit" + str(e))
logger.error("Content Fetcher > Exception in chrome shutdown/quit" + str(e))
# "html_requests" is listed as the default fetcher in store.py!

View File

@@ -1,6 +1,6 @@
import hashlib
import json
import logging
from loguru import logger
import os
import re
import urllib3
@@ -105,7 +105,7 @@ class perform_site_check():
proxy_url = None
if proxy_id:
proxy_url = self.datastore.proxy_list.get(proxy_id).get('url')
print("UUID {} Using proxy {}".format(uuid, proxy_url))
logger.info("UUID {} Using proxy {}".format(uuid, proxy_url))
fetcher = klass(proxy_override=proxy_url)
@@ -135,7 +135,7 @@ class perform_site_check():
# Track the content type
update_obj['content_type'] = fetcher.headers.get('Content-Type', '')
logger.info("UUID: {} - Fetch complete {:,} bytes".format(watch.get('uuid'), len(fetcher.content)))
# Watches added automatically in the queue manager will skip if its the same checksum as the previous run
# Saves a lot of CPU
update_obj['previous_md5_before_filters'] = hashlib.md5(fetcher.content.encode('utf-8')).hexdigest()
@@ -349,6 +349,7 @@ class perform_site_check():
# The main thing that all this at the moment comes down to :)
if watch.get('previous_md5') != fetched_md5:
logger.debug("UUID: {} - Change detected - Prev MD5: {} - Fetched MD5: {}, applying filters...".format(uuid, watch.get('previous_md5'), fetched_md5))
changed_detected = True
# Looks like something changed, but did it match all the rules?
@@ -366,10 +367,10 @@ class perform_site_check():
has_unique_lines = watch.lines_contain_something_unique_compared_to_history(lines=stripped_text_from_html.splitlines())
# One or more lines? unsure?
if not has_unique_lines:
logging.debug("check_unique_lines: UUID {} didnt have anything new setting change_detected=False".format(uuid))
logger.debug("check_unique_lines: UUID {} didnt have anything new setting change_detected=False".format(uuid))
changed_detected = False
else:
logging.debug("check_unique_lines: UUID {} had unique content".format(uuid))
logger.debug("check_unique_lines: UUID {} had unique content".format(uuid))
# Always record the new checksum
update_obj["previous_md5"] = fetched_md5
@@ -378,4 +379,9 @@ class perform_site_check():
if not watch.get('previous_md5'):
watch['previous_md5'] = fetched_md5
if changed_detected:
logger.success("UUID: {} Change detected after all filters applied.", uuid)
else:
logger.info("UUID: {} NO Change detected after all filters applied.", uuid)
return changed_detected, update_obj, text_content_before_ignored_filter

View File

@@ -1,5 +1,5 @@
from distutils.util import strtobool
import logging
from loguru import logger
import os
import re
import time
@@ -109,7 +109,7 @@ class model(dict):
def ensure_data_dir_exists(self):
if not os.path.isdir(self.watch_data_dir):
print ("> Creating data dir {}".format(self.watch_data_dir))
logger.debug("> Creating data dir {}".format(self.watch_data_dir))
os.mkdir(self.watch_data_dir)
@property
@@ -194,7 +194,7 @@ class model(dict):
# Read the history file as a dict
fname = os.path.join(self.watch_data_dir, "history.txt")
if os.path.isfile(fname):
logging.debug("Reading history index " + str(time.time()))
logger.debug("UUID: {} Reading history index".format(self.get('uuid')))
with open(fname, "r") as f:
for i in f.readlines():
if ',' in i:

View File

@@ -101,7 +101,7 @@ def process_notification(n_object, datastore):
apobj = apprise.Apprise(debug=True, asset=asset)
url = url.strip()
if len(url):
print(">> Process Notification: AppRise notifying {}".format(url))
logger.info(">> Process Notification: AppRise notifying {}".format(url))
with apprise.LogCapture(level=apprise.logging.DEBUG) as logs:
# Re 323 - Limit discord length to their 2000 char limit total or it wont send.
# Because different notifications may require different pre-processing, run each sequentially :(

View File

@@ -7,7 +7,7 @@ from copy import deepcopy
from os import path, unlink
from threading import Lock
import json
import logging
from loguru import logger
import os
import re
import requests
@@ -75,12 +75,12 @@ class ChangeDetectionStore:
for uuid, watch in self.__data['watching'].items():
watch['uuid']=uuid
self.__data['watching'][uuid] = Watch.model(datastore_path=self.datastore_path, default=watch)
print("Watching:", uuid, self.__data['watching'][uuid]['url'])
logger.info("Watching: {} - {}", uuid, self.__data['watching'][uuid]['url'])
# First time ran, Create the datastore.
except (FileNotFoundError):
if include_default_watches:
print("No JSON DB found at {}, creating JSON store at {}".format(self.json_store_path, self.datastore_path))
logger.info("No JSON DB found at {}, creating JSON store at {}".format(self.json_store_path, self.datastore_path))
self.add_watch(url='https://news.ycombinator.com/',
tag='Tech news',
extras={'fetch_backend': 'html_requests'})
@@ -303,7 +303,7 @@ class ChangeDetectionStore:
apply_extras['include_filters'] = [res['css_filter']]
except Exception as e:
logging.error("Error fetching metadata for shared watch link", url, str(e))
logger.error("Error fetching metadata for shared watch link", url, str(e))
flash("Error fetching metadata for {}".format(url), 'error')
return False
from .model.Watch import is_safe_url
@@ -388,14 +388,13 @@ class ChangeDetectionStore:
def sync_to_json(self):
logging.info("Saving JSON..")
print("Saving JSON..")
logger.debug("Saving JSON DB")
try:
data = deepcopy(self.__data)
except RuntimeError as e:
# Try again in 15 seconds
time.sleep(15)
logging.error ("! Data changed when writing to JSON, trying again.. %s", str(e))
logger.error ("! Data changed when writing to JSON, trying again.. %s", str(e))
self.sync_to_json()
return
else:
@@ -408,7 +407,7 @@ class ChangeDetectionStore:
json.dump(data, json_file, indent=4)
os.replace(self.json_store_path+".tmp", self.json_store_path)
except Exception as e:
logging.error("Error writing JSON!! (Main JSON file save was skipped) : %s", str(e))
logger.error("Error writing JSON!! (Main JSON file save was skipped) : %s", str(e))
self.needs_write = False
self.needs_write_urgent = False
@@ -419,7 +418,7 @@ class ChangeDetectionStore:
while True:
if self.stop_thread:
print("Shutting down datastore thread")
logger.info("Shutting down datastore thread")
return
if self.needs_write or self.needs_write_urgent:
@@ -533,8 +532,8 @@ class ChangeDetectionStore:
try:
update_method = getattr(self, "update_{}".format(update_n))()
except Exception as e:
print("Error while trying update_{}".format((update_n)))
print(e)
logger.error("Error while trying update_{}".format((update_n)))
logger.error(str(e))
# Don't run any more updates
return
else:
@@ -572,7 +571,7 @@ class ChangeDetectionStore:
with open(os.path.join(target_path, "history.txt"), "w") as f:
f.writelines(history)
else:
logging.warning("Datastore history directory {} does not exist, skipping history import.".format(target_path))
logger.warning("Datastore history directory {} does not exist, skipping history import.".format(target_path))
# No longer needed, dynamically pulled from the disk when needed.
# But we should set it back to a empty dict so we don't break if this schema runs on an earlier version.

View File

@@ -69,8 +69,6 @@ def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_se
url = url_for('test_notification_endpoint', _external=True)
notification_url = url.replace('http', 'json')
print(">>>> Notification URL: " + notification_url)
# Just a regular notification setting, this will be used by the special 'filter not found' notification
notification_form_data = {"notification_urls": notification_url,
"notification_title": "New ChangeDetection.io Notification - {{watch_url}}",

View File

@@ -2,6 +2,7 @@ import os
import threading
import queue
import time
from loguru import logger
from changedetectionio import content_fetcher
from changedetectionio import queuedWatchMetaData
@@ -12,14 +13,12 @@ from changedetectionio.fetch_site_status import FilterNotFoundInResponse
# Requests for checking on a single site(watch) from a queue of watches
# (another process inserts watches into the queue that are time-ready for checking)
import logging
import sys
class update_worker(threading.Thread):
current_uuid = None
def __init__(self, q, notification_q, app, datastore, *args, **kwargs):
logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
self.q = q
self.app = app
self.notification_q = notification_q
@@ -80,10 +79,10 @@ class update_worker(threading.Thread):
'diff': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], line_feed_sep=line_feed_sep),
'diff_full': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], True, line_feed_sep=line_feed_sep)
})
logging.info (">> SENDING NOTIFICATION")
logger.info ("UUID: {} - Sending notification".format(watch_uuid))
self.notification_q.put(n_object)
else:
logging.info (">> NO Notification sent, notification_url was empty in both watch and system")
logger.info("UUID: {} - NO Notification sent, notification_url was empty in both watch and system".format(watch_uuid))
def send_filter_failure_notification(self, watch_uuid):
@@ -112,7 +111,7 @@ class update_worker(threading.Thread):
'screenshot': None
})
self.notification_q.put(n_object)
print("Sent filter not found notification for {}".format(watch_uuid))
logger.info("Sent filter not found notification for {}".format(watch_uuid))
def send_step_failure_notification(self, watch_uuid, step_n):
watch = self.datastore.data['watching'].get(watch_uuid, False)
@@ -139,7 +138,7 @@ class update_worker(threading.Thread):
'uuid': watch_uuid
})
self.notification_q.put(n_object)
print("Sent step not found notification for {}".format(watch_uuid))
logger.error("Sent step not found notification for {}".format(watch_uuid))
def cleanup_error_artifacts(self, uuid):
@@ -173,7 +172,7 @@ class update_worker(threading.Thread):
update_obj= {}
xpath_data = False
process_changedetection_results = True
print("> Processing UUID {} Priority {} URL {}".format(uuid, queued_item_data.priority, self.datastore.data['watching'][uuid]['url']))
logger.info("UUID: {} Start processing, Priority {} URL {}",uuid, queued_item_data.priority, self.datastore.data['watching'][uuid]['url'])
now = time.time()
try:
@@ -184,7 +183,7 @@ class update_worker(threading.Thread):
if not isinstance(contents, (bytes, bytearray)):
raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
except PermissionError as e:
self.app.logger.error("File permission error updating", uuid, str(e))
logger.error("UUID: {} File permission error updating - {}", uuid, str(e))
process_changedetection_results = False
except content_fetcher.ReplyWithContentButNoText as e:
# Totally fine, it's by choice - just continue on, nothing more to care about
@@ -233,7 +232,7 @@ class update_worker(threading.Thread):
# Send notification if we reached the threshold?
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
0)
print("Filter for {} not found, consecutive_filter_failures: {}".format(uuid, c))
logger.error("Filter for {} not found, consecutive_filter_failures: {}".format(uuid, c))
if threshold > 0 and c >= threshold:
if not self.datastore.data['watching'][uuid].get('notification_muted'):
self.send_filter_failure_notification(uuid)
@@ -264,7 +263,7 @@ class update_worker(threading.Thread):
# Send notification if we reached the threshold?
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
0)
print("Step for {} not found, consecutive_filter_failures: {}".format(uuid, c))
logger.error("Step for {} not found, consecutive_filter_failures: {}".format(uuid, c))
if threshold > 0 and c >= threshold:
if not self.datastore.data['watching'][uuid].get('notification_muted'):
self.send_step_failure_notification(watch_uuid=uuid, step_n=e.step_n)
@@ -330,8 +329,6 @@ class update_worker(threading.Thread):
# A change was detected
if changed_detected:
print (">> Change detected in UUID {} - {}".format(uuid, watch['url']))
# Notifications should only trigger on the second time (first time, we gather the initial snapshot)
if watch.history_n >= 2:
if not self.datastore.data['watching'][uuid].get('notification_muted'):
@@ -340,7 +337,7 @@ class update_worker(threading.Thread):
except Exception as e:
# Catch everything possible here, so that if a worker crashes, we don't lose it until restart!
print("!!!! Exception in update_worker !!!\n", e)
logger.error("!!!! Exception in update_worker !!!\n", e)
self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})

View File

@@ -10,6 +10,7 @@ inscriptis~=2.2
pytz
timeago~=1.0
validators
loguru
# Set these versions together to avoid a RequestsDependencyWarning
# >= 2.26 also adds Brotli support if brotli is installed