mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-11-09 19:17:15 +00:00
Compare commits
2 Commits
playwright
...
improve-lo
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7b664c43ea | ||
|
|
d4eb9f2b64 |
@@ -12,7 +12,7 @@ from functools import wraps
|
|||||||
from threading import Event
|
from threading import Event
|
||||||
import datetime
|
import datetime
|
||||||
import flask_login
|
import flask_login
|
||||||
import logging
|
from loguru import logger
|
||||||
import os
|
import os
|
||||||
import pytz
|
import pytz
|
||||||
import queue
|
import queue
|
||||||
@@ -1317,7 +1317,7 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("Error sharing -{}".format(str(e)))
|
logger.error("Error sharing -{}".format(str(e)))
|
||||||
flash("Could not share, something went wrong while communicating with the share server - {}".format(str(e)), 'error')
|
flash("Could not share, something went wrong while communicating with the share server - {}".format(str(e)), 'error')
|
||||||
|
|
||||||
# https://changedetection.io/share/VrMv05wpXyQa
|
# https://changedetection.io/share/VrMv05wpXyQa
|
||||||
@@ -1392,7 +1392,7 @@ def notification_runner():
|
|||||||
sent_obj = notification.process_notification(n_object, datastore)
|
sent_obj = notification.process_notification(n_object, datastore)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("Watch URL: {} Error {}".format(n_object['watch_url'], str(e)))
|
logger.error("Watch URL: {} Error {}".format(n_object['watch_url'], str(e)))
|
||||||
|
|
||||||
# UUID wont be present when we submit a 'test' from the global settings
|
# UUID wont be present when we submit a 'test' from the global settings
|
||||||
if 'uuid' in n_object:
|
if 'uuid' in n_object:
|
||||||
@@ -1415,7 +1415,7 @@ def ticker_thread_check_time_launch_checks():
|
|||||||
proxy_last_called_time = {}
|
proxy_last_called_time = {}
|
||||||
|
|
||||||
recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 20))
|
recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 20))
|
||||||
print("System env MINIMUM_SECONDS_RECHECK_TIME", recheck_time_minimum_seconds)
|
logger.info("System env MINIMUM_SECONDS_RECHECK_TIME - {}", recheck_time_minimum_seconds)
|
||||||
|
|
||||||
# Spin up Workers that do the fetching
|
# Spin up Workers that do the fetching
|
||||||
# Can be overriden by ENV or use the default settings
|
# Can be overriden by ENV or use the default settings
|
||||||
@@ -1460,7 +1460,7 @@ def ticker_thread_check_time_launch_checks():
|
|||||||
now = time.time()
|
now = time.time()
|
||||||
watch = datastore.data['watching'].get(uuid)
|
watch = datastore.data['watching'].get(uuid)
|
||||||
if not watch:
|
if not watch:
|
||||||
logging.error("Watch: {} no longer present.".format(uuid))
|
logger.error("Watch: {} no longer present.".format(uuid))
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# No need todo further processing if it's paused
|
# No need todo further processing if it's paused
|
||||||
@@ -1493,7 +1493,7 @@ def ticker_thread_check_time_launch_checks():
|
|||||||
time_since_proxy_used = int(time.time() - proxy_last_used_time)
|
time_since_proxy_used = int(time.time() - proxy_last_used_time)
|
||||||
if time_since_proxy_used < proxy_list_reuse_time_minimum:
|
if time_since_proxy_used < proxy_list_reuse_time_minimum:
|
||||||
# Not enough time difference reached, skip this watch
|
# Not enough time difference reached, skip this watch
|
||||||
print("> Skipped UUID {} using proxy '{}', not enough time between proxy requests {}s/{}s".format(uuid,
|
logger.info("> Skipped UUID {} using proxy '{}', not enough time between proxy requests {}s/{}s".format(uuid,
|
||||||
watch_proxy,
|
watch_proxy,
|
||||||
time_since_proxy_used,
|
time_since_proxy_used,
|
||||||
proxy_list_reuse_time_minimum))
|
proxy_list_reuse_time_minimum))
|
||||||
@@ -1504,7 +1504,7 @@ def ticker_thread_check_time_launch_checks():
|
|||||||
|
|
||||||
# Use Epoch time as priority, so we get a "sorted" PriorityQueue, but we can still push a priority 1 into it.
|
# Use Epoch time as priority, so we get a "sorted" PriorityQueue, but we can still push a priority 1 into it.
|
||||||
priority = int(time.time())
|
priority = int(time.time())
|
||||||
print(
|
logger.info(
|
||||||
"> Queued watch UUID {} last checked at {} queued at {:0.2f} priority {} jitter {:0.2f}s, {:0.2f}s since last checked".format(
|
"> Queued watch UUID {} last checked at {} queued at {:0.2f} priority {} jitter {:0.2f}s, {:0.2f}s since last checked".format(
|
||||||
uuid,
|
uuid,
|
||||||
watch['last_checked'],
|
watch['last_checked'],
|
||||||
|
|||||||
@@ -24,7 +24,7 @@
|
|||||||
from distutils.util import strtobool
|
from distutils.util import strtobool
|
||||||
from flask import Blueprint, request, make_response
|
from flask import Blueprint, request, make_response
|
||||||
import os
|
import os
|
||||||
import logging
|
from loguru import logger
|
||||||
from changedetectionio.store import ChangeDetectionStore
|
from changedetectionio.store import ChangeDetectionStore
|
||||||
from changedetectionio import login_optionally_required
|
from changedetectionio import login_optionally_required
|
||||||
browsersteps_live_ui_o = {}
|
browsersteps_live_ui_o = {}
|
||||||
@@ -49,7 +49,7 @@ def cleanup_playwright_session():
|
|||||||
browsersteps_playwright_browser_interface_end_time = None
|
browsersteps_playwright_browser_interface_end_time = None
|
||||||
browsersteps_playwright_browser_interface_start_time = None
|
browsersteps_playwright_browser_interface_start_time = None
|
||||||
|
|
||||||
print("Cleaning up old playwright session because time was up, calling .goodbye()")
|
logger.info("Cleaning up old playwright session because time was up, calling .goodbye()")
|
||||||
try:
|
try:
|
||||||
browsersteps_playwright_browser_interface_context.goodbye()
|
browsersteps_playwright_browser_interface_context.goodbye()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -114,7 +114,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
|
|
||||||
this_session = browsersteps_live_ui_o.get(browsersteps_session_id)
|
this_session = browsersteps_live_ui_o.get(browsersteps_session_id)
|
||||||
if not this_session:
|
if not this_session:
|
||||||
print("Browser exited")
|
logger.info("Browser exited")
|
||||||
return make_response('Browser session ran out of time :( Please reload this page.', 401)
|
return make_response('Browser session ran out of time :( Please reload this page.', 401)
|
||||||
|
|
||||||
this_session.call_action(action_name=step_operation,
|
this_session.call_action(action_name=step_operation,
|
||||||
@@ -122,7 +122,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
optional_value=step_optional_value)
|
optional_value=step_optional_value)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Exception when calling step operation", step_operation, str(e))
|
logger.info("Exception when calling step operation", step_operation, str(e))
|
||||||
# Try to find something of value to give back to the user
|
# Try to find something of value to give back to the user
|
||||||
return make_response(str(e).splitlines()[0], 401)
|
return make_response(str(e).splitlines()[0], 401)
|
||||||
|
|
||||||
@@ -139,7 +139,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
if request.method == 'GET':
|
if request.method == 'GET':
|
||||||
|
|
||||||
if not browsersteps_playwright_browser_interface:
|
if not browsersteps_playwright_browser_interface:
|
||||||
print("Starting connection with playwright")
|
logger.info("Starting connection with playwright")
|
||||||
logging.debug("browser_steps.py connecting")
|
logging.debug("browser_steps.py connecting")
|
||||||
|
|
||||||
global browsersteps_playwright_browser_interface_context
|
global browsersteps_playwright_browser_interface_context
|
||||||
@@ -162,7 +162,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
return make_response('Unable to start the Playwright session properly, is it running?', 401)
|
return make_response('Unable to start the Playwright session properly, is it running?', 401)
|
||||||
|
|
||||||
browsersteps_playwright_browser_interface_end_time = time.time() + (seconds_keepalive-3)
|
browsersteps_playwright_browser_interface_end_time = time.time() + (seconds_keepalive-3)
|
||||||
print("Starting connection with playwright - done")
|
logger.info("Starting connection with playwright - done")
|
||||||
|
|
||||||
if not browsersteps_live_ui_o.get(browsersteps_session_id):
|
if not browsersteps_live_ui_o.get(browsersteps_session_id):
|
||||||
# Boot up a new session
|
# Boot up a new session
|
||||||
@@ -172,7 +172,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
proxy_url = datastore.proxy_list.get(proxy_id).get('url')
|
proxy_url = datastore.proxy_list.get(proxy_id).get('url')
|
||||||
if proxy_url:
|
if proxy_url:
|
||||||
proxy = {'server': proxy_url}
|
proxy = {'server': proxy_url}
|
||||||
print("Browser Steps: UUID {} Using proxy {}".format(uuid, proxy_url))
|
logger.info("Browser Steps: UUID {} Using proxy {}".format(uuid, proxy_url))
|
||||||
|
|
||||||
# Begin the new "Playwright Context" that re-uses the playwright interface
|
# Begin the new "Playwright Context" that re-uses the playwright interface
|
||||||
# Each session is a "Playwright Context" as a list, that uses the playwright interface
|
# Each session is a "Playwright Context" as a list, that uses the playwright interface
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ class steppable_browser_interface():
|
|||||||
if call_action_name == 'choose_one':
|
if call_action_name == 'choose_one':
|
||||||
return
|
return
|
||||||
|
|
||||||
print("> action calling", call_action_name)
|
logger.info("> action calling", call_action_name)
|
||||||
# https://playwright.dev/python/docs/selectors#xpath-selectors
|
# https://playwright.dev/python/docs/selectors#xpath-selectors
|
||||||
if selector.startswith('/') and not selector.startswith('//'):
|
if selector.startswith('/') and not selector.startswith('//'):
|
||||||
selector = "xpath=" + selector
|
selector = "xpath=" + selector
|
||||||
@@ -70,7 +70,7 @@ class steppable_browser_interface():
|
|||||||
|
|
||||||
action_handler(selector, optional_value)
|
action_handler(selector, optional_value)
|
||||||
self.page.wait_for_timeout(3 * 1000)
|
self.page.wait_for_timeout(3 * 1000)
|
||||||
print("Call action done in", time.time() - now)
|
logger.info("Call action done in", time.time() - now)
|
||||||
|
|
||||||
def action_goto_url(self, url, optional_value):
|
def action_goto_url(self, url, optional_value):
|
||||||
# self.page.set_viewport_size({"width": 1280, "height": 5000})
|
# self.page.set_viewport_size({"width": 1280, "height": 5000})
|
||||||
@@ -81,7 +81,7 @@ class steppable_browser_interface():
|
|||||||
# - `'commit'` - consider operation to be finished when network response is received and the document started loading.
|
# - `'commit'` - consider operation to be finished when network response is received and the document started loading.
|
||||||
# Better to not use any smarts from Playwright and just wait an arbitrary number of seconds
|
# Better to not use any smarts from Playwright and just wait an arbitrary number of seconds
|
||||||
# This seemed to solve nearly all 'TimeoutErrors'
|
# This seemed to solve nearly all 'TimeoutErrors'
|
||||||
print("Time to goto URL ", time.time() - now)
|
logger.info("Time to goto URL ", time.time() - now)
|
||||||
|
|
||||||
def action_click_element_containing_text(self, selector=None, value=''):
|
def action_click_element_containing_text(self, selector=None, value=''):
|
||||||
if not len(value.strip()):
|
if not len(value.strip()):
|
||||||
@@ -100,14 +100,14 @@ class steppable_browser_interface():
|
|||||||
self.page.evaluate(value)
|
self.page.evaluate(value)
|
||||||
|
|
||||||
def action_click_element(self, selector, value):
|
def action_click_element(self, selector, value):
|
||||||
print("Clicking element")
|
logger.info("Clicking element")
|
||||||
if not len(selector.strip()):
|
if not len(selector.strip()):
|
||||||
return
|
return
|
||||||
self.page.click(selector, timeout=10 * 1000, delay=randint(200, 500))
|
self.page.click(selector, timeout=10 * 1000, delay=randint(200, 500))
|
||||||
|
|
||||||
def action_click_element_if_exists(self, selector, value):
|
def action_click_element_if_exists(self, selector, value):
|
||||||
import playwright._impl._api_types as _api_types
|
import playwright._impl._api_types as _api_types
|
||||||
print("Clicking element if exists")
|
logger.info("Clicking element if exists")
|
||||||
if not len(selector.strip()):
|
if not len(selector.strip()):
|
||||||
return
|
return
|
||||||
try:
|
try:
|
||||||
@@ -207,13 +207,13 @@ class browsersteps_live_ui(steppable_browser_interface):
|
|||||||
self.mark_as_closed,
|
self.mark_as_closed,
|
||||||
)
|
)
|
||||||
# Listen for all console events and handle errors
|
# Listen for all console events and handle errors
|
||||||
self.page.on("console", lambda msg: print(f"Browser steps console - {msg.type}: {msg.text} {msg.args}"))
|
self.page.on("console", lambda msg: logger.info(f"Browser steps console - {msg.type}: {msg.text} {msg.args}"))
|
||||||
|
|
||||||
print("Time to browser setup", time.time() - now)
|
logger.info("Time to browser setup", time.time() - now)
|
||||||
self.page.wait_for_timeout(1 * 1000)
|
self.page.wait_for_timeout(1 * 1000)
|
||||||
|
|
||||||
def mark_as_closed(self):
|
def mark_as_closed(self):
|
||||||
print("Page closed, cleaning up..")
|
logger.info("Page closed, cleaning up..")
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def has_expired(self):
|
def has_expired(self):
|
||||||
@@ -239,7 +239,7 @@ class browsersteps_live_ui(steppable_browser_interface):
|
|||||||
xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}")
|
xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}")
|
||||||
# So the JS will find the smallest one first
|
# So the JS will find the smallest one first
|
||||||
xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True)
|
xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True)
|
||||||
print("Time to complete get_current_state of browser", time.time() - now)
|
logger.info("Time to complete get_current_state of browser", time.time() - now)
|
||||||
# except
|
# except
|
||||||
# playwright._impl._api_types.Error: Browser closed.
|
# playwright._impl._api_types.Error: Browser closed.
|
||||||
# @todo show some countdown timer?
|
# @todo show some countdown timer?
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ from json.decoder import JSONDecodeError
|
|||||||
import eventlet
|
import eventlet
|
||||||
import eventlet.wsgi
|
import eventlet.wsgi
|
||||||
import getopt
|
import getopt
|
||||||
|
from loguru import logger
|
||||||
import os
|
import os
|
||||||
import signal
|
import signal
|
||||||
import socket
|
import socket
|
||||||
@@ -24,7 +25,7 @@ def sigterm_handler(_signo, _stack_frame):
|
|||||||
global app
|
global app
|
||||||
global datastore
|
global datastore
|
||||||
# app.config.exit.set()
|
# app.config.exit.set()
|
||||||
print('Shutdown: Got SIGTERM, DB saved to disk')
|
logger.warning('Shutdown: Got SIGTERM, DB saved to disk')
|
||||||
datastore.sync_to_json()
|
datastore.sync_to_json()
|
||||||
# raise SystemExit
|
# raise SystemExit
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ import hashlib
|
|||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
import chardet
|
import chardet
|
||||||
import json
|
import json
|
||||||
import logging
|
from loguru import logger
|
||||||
import os
|
import os
|
||||||
import requests
|
import requests
|
||||||
import sys
|
import sys
|
||||||
@@ -155,7 +155,7 @@ class Fetcher():
|
|||||||
|
|
||||||
for step in valid_steps:
|
for step in valid_steps:
|
||||||
step_n += 1
|
step_n += 1
|
||||||
print(">> Iterating check - browser Step n {} - {}...".format(step_n, step['operation']))
|
logger.info(">> Iterating check - browser Step n {} - {}...".format(step_n, step['operation']))
|
||||||
self.screenshot_step("before-"+str(step_n))
|
self.screenshot_step("before-"+str(step_n))
|
||||||
self.save_step_html("before-"+str(step_n))
|
self.save_step_html("before-"+str(step_n))
|
||||||
try:
|
try:
|
||||||
@@ -510,7 +510,7 @@ class base_html_webdriver(Fetcher):
|
|||||||
try:
|
try:
|
||||||
self.driver.quit()
|
self.driver.quit()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Content Fetcher > Exception in chrome shutdown/quit" + str(e))
|
logger.error("Content Fetcher > Exception in chrome shutdown/quit" + str(e))
|
||||||
|
|
||||||
|
|
||||||
# "html_requests" is listed as the default fetcher in store.py!
|
# "html_requests" is listed as the default fetcher in store.py!
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import logging
|
from loguru import logger
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import urllib3
|
import urllib3
|
||||||
@@ -105,7 +105,7 @@ class perform_site_check():
|
|||||||
proxy_url = None
|
proxy_url = None
|
||||||
if proxy_id:
|
if proxy_id:
|
||||||
proxy_url = self.datastore.proxy_list.get(proxy_id).get('url')
|
proxy_url = self.datastore.proxy_list.get(proxy_id).get('url')
|
||||||
print("UUID {} Using proxy {}".format(uuid, proxy_url))
|
logger.info("UUID {} Using proxy {}".format(uuid, proxy_url))
|
||||||
|
|
||||||
fetcher = klass(proxy_override=proxy_url)
|
fetcher = klass(proxy_override=proxy_url)
|
||||||
|
|
||||||
@@ -135,7 +135,7 @@ class perform_site_check():
|
|||||||
|
|
||||||
# Track the content type
|
# Track the content type
|
||||||
update_obj['content_type'] = fetcher.headers.get('Content-Type', '')
|
update_obj['content_type'] = fetcher.headers.get('Content-Type', '')
|
||||||
|
logger.info("UUID: {} - Fetch complete {:,} bytes".format(watch.get('uuid'), len(fetcher.content)))
|
||||||
# Watches added automatically in the queue manager will skip if its the same checksum as the previous run
|
# Watches added automatically in the queue manager will skip if its the same checksum as the previous run
|
||||||
# Saves a lot of CPU
|
# Saves a lot of CPU
|
||||||
update_obj['previous_md5_before_filters'] = hashlib.md5(fetcher.content.encode('utf-8')).hexdigest()
|
update_obj['previous_md5_before_filters'] = hashlib.md5(fetcher.content.encode('utf-8')).hexdigest()
|
||||||
@@ -349,6 +349,7 @@ class perform_site_check():
|
|||||||
|
|
||||||
# The main thing that all this at the moment comes down to :)
|
# The main thing that all this at the moment comes down to :)
|
||||||
if watch.get('previous_md5') != fetched_md5:
|
if watch.get('previous_md5') != fetched_md5:
|
||||||
|
logger.debug("UUID: {} - Change detected - Prev MD5: {} - Fetched MD5: {}, applying filters...".format(uuid, watch.get('previous_md5'), fetched_md5))
|
||||||
changed_detected = True
|
changed_detected = True
|
||||||
|
|
||||||
# Looks like something changed, but did it match all the rules?
|
# Looks like something changed, but did it match all the rules?
|
||||||
@@ -366,10 +367,10 @@ class perform_site_check():
|
|||||||
has_unique_lines = watch.lines_contain_something_unique_compared_to_history(lines=stripped_text_from_html.splitlines())
|
has_unique_lines = watch.lines_contain_something_unique_compared_to_history(lines=stripped_text_from_html.splitlines())
|
||||||
# One or more lines? unsure?
|
# One or more lines? unsure?
|
||||||
if not has_unique_lines:
|
if not has_unique_lines:
|
||||||
logging.debug("check_unique_lines: UUID {} didnt have anything new setting change_detected=False".format(uuid))
|
logger.debug("check_unique_lines: UUID {} didnt have anything new setting change_detected=False".format(uuid))
|
||||||
changed_detected = False
|
changed_detected = False
|
||||||
else:
|
else:
|
||||||
logging.debug("check_unique_lines: UUID {} had unique content".format(uuid))
|
logger.debug("check_unique_lines: UUID {} had unique content".format(uuid))
|
||||||
|
|
||||||
# Always record the new checksum
|
# Always record the new checksum
|
||||||
update_obj["previous_md5"] = fetched_md5
|
update_obj["previous_md5"] = fetched_md5
|
||||||
@@ -378,4 +379,9 @@ class perform_site_check():
|
|||||||
if not watch.get('previous_md5'):
|
if not watch.get('previous_md5'):
|
||||||
watch['previous_md5'] = fetched_md5
|
watch['previous_md5'] = fetched_md5
|
||||||
|
|
||||||
|
if changed_detected:
|
||||||
|
logger.success("UUID: {} Change detected after all filters applied.", uuid)
|
||||||
|
else:
|
||||||
|
logger.info("UUID: {} NO Change detected after all filters applied.", uuid)
|
||||||
|
|
||||||
return changed_detected, update_obj, text_content_before_ignored_filter
|
return changed_detected, update_obj, text_content_before_ignored_filter
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
from distutils.util import strtobool
|
from distutils.util import strtobool
|
||||||
import logging
|
from loguru import logger
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
@@ -109,7 +109,7 @@ class model(dict):
|
|||||||
|
|
||||||
def ensure_data_dir_exists(self):
|
def ensure_data_dir_exists(self):
|
||||||
if not os.path.isdir(self.watch_data_dir):
|
if not os.path.isdir(self.watch_data_dir):
|
||||||
print ("> Creating data dir {}".format(self.watch_data_dir))
|
logger.debug("> Creating data dir {}".format(self.watch_data_dir))
|
||||||
os.mkdir(self.watch_data_dir)
|
os.mkdir(self.watch_data_dir)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@@ -194,7 +194,7 @@ class model(dict):
|
|||||||
# Read the history file as a dict
|
# Read the history file as a dict
|
||||||
fname = os.path.join(self.watch_data_dir, "history.txt")
|
fname = os.path.join(self.watch_data_dir, "history.txt")
|
||||||
if os.path.isfile(fname):
|
if os.path.isfile(fname):
|
||||||
logging.debug("Reading history index " + str(time.time()))
|
logger.debug("UUID: {} Reading history index".format(self.get('uuid')))
|
||||||
with open(fname, "r") as f:
|
with open(fname, "r") as f:
|
||||||
for i in f.readlines():
|
for i in f.readlines():
|
||||||
if ',' in i:
|
if ',' in i:
|
||||||
|
|||||||
@@ -101,7 +101,7 @@ def process_notification(n_object, datastore):
|
|||||||
apobj = apprise.Apprise(debug=True, asset=asset)
|
apobj = apprise.Apprise(debug=True, asset=asset)
|
||||||
url = url.strip()
|
url = url.strip()
|
||||||
if len(url):
|
if len(url):
|
||||||
print(">> Process Notification: AppRise notifying {}".format(url))
|
logger.info(">> Process Notification: AppRise notifying {}".format(url))
|
||||||
with apprise.LogCapture(level=apprise.logging.DEBUG) as logs:
|
with apprise.LogCapture(level=apprise.logging.DEBUG) as logs:
|
||||||
# Re 323 - Limit discord length to their 2000 char limit total or it wont send.
|
# Re 323 - Limit discord length to their 2000 char limit total or it wont send.
|
||||||
# Because different notifications may require different pre-processing, run each sequentially :(
|
# Because different notifications may require different pre-processing, run each sequentially :(
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ from copy import deepcopy
|
|||||||
from os import path, unlink
|
from os import path, unlink
|
||||||
from threading import Lock
|
from threading import Lock
|
||||||
import json
|
import json
|
||||||
import logging
|
from loguru import logger
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import requests
|
import requests
|
||||||
@@ -75,12 +75,12 @@ class ChangeDetectionStore:
|
|||||||
for uuid, watch in self.__data['watching'].items():
|
for uuid, watch in self.__data['watching'].items():
|
||||||
watch['uuid']=uuid
|
watch['uuid']=uuid
|
||||||
self.__data['watching'][uuid] = Watch.model(datastore_path=self.datastore_path, default=watch)
|
self.__data['watching'][uuid] = Watch.model(datastore_path=self.datastore_path, default=watch)
|
||||||
print("Watching:", uuid, self.__data['watching'][uuid]['url'])
|
logger.info("Watching: {} - {}", uuid, self.__data['watching'][uuid]['url'])
|
||||||
|
|
||||||
# First time ran, Create the datastore.
|
# First time ran, Create the datastore.
|
||||||
except (FileNotFoundError):
|
except (FileNotFoundError):
|
||||||
if include_default_watches:
|
if include_default_watches:
|
||||||
print("No JSON DB found at {}, creating JSON store at {}".format(self.json_store_path, self.datastore_path))
|
logger.info("No JSON DB found at {}, creating JSON store at {}".format(self.json_store_path, self.datastore_path))
|
||||||
self.add_watch(url='https://news.ycombinator.com/',
|
self.add_watch(url='https://news.ycombinator.com/',
|
||||||
tag='Tech news',
|
tag='Tech news',
|
||||||
extras={'fetch_backend': 'html_requests'})
|
extras={'fetch_backend': 'html_requests'})
|
||||||
@@ -303,7 +303,7 @@ class ChangeDetectionStore:
|
|||||||
apply_extras['include_filters'] = [res['css_filter']]
|
apply_extras['include_filters'] = [res['css_filter']]
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("Error fetching metadata for shared watch link", url, str(e))
|
logger.error("Error fetching metadata for shared watch link", url, str(e))
|
||||||
flash("Error fetching metadata for {}".format(url), 'error')
|
flash("Error fetching metadata for {}".format(url), 'error')
|
||||||
return False
|
return False
|
||||||
from .model.Watch import is_safe_url
|
from .model.Watch import is_safe_url
|
||||||
@@ -388,14 +388,13 @@ class ChangeDetectionStore:
|
|||||||
|
|
||||||
|
|
||||||
def sync_to_json(self):
|
def sync_to_json(self):
|
||||||
logging.info("Saving JSON..")
|
logger.debug("Saving JSON DB")
|
||||||
print("Saving JSON..")
|
|
||||||
try:
|
try:
|
||||||
data = deepcopy(self.__data)
|
data = deepcopy(self.__data)
|
||||||
except RuntimeError as e:
|
except RuntimeError as e:
|
||||||
# Try again in 15 seconds
|
# Try again in 15 seconds
|
||||||
time.sleep(15)
|
time.sleep(15)
|
||||||
logging.error ("! Data changed when writing to JSON, trying again.. %s", str(e))
|
logger.error ("! Data changed when writing to JSON, trying again.. %s", str(e))
|
||||||
self.sync_to_json()
|
self.sync_to_json()
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
@@ -408,7 +407,7 @@ class ChangeDetectionStore:
|
|||||||
json.dump(data, json_file, indent=4)
|
json.dump(data, json_file, indent=4)
|
||||||
os.replace(self.json_store_path+".tmp", self.json_store_path)
|
os.replace(self.json_store_path+".tmp", self.json_store_path)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("Error writing JSON!! (Main JSON file save was skipped) : %s", str(e))
|
logger.error("Error writing JSON!! (Main JSON file save was skipped) : %s", str(e))
|
||||||
|
|
||||||
self.needs_write = False
|
self.needs_write = False
|
||||||
self.needs_write_urgent = False
|
self.needs_write_urgent = False
|
||||||
@@ -419,7 +418,7 @@ class ChangeDetectionStore:
|
|||||||
|
|
||||||
while True:
|
while True:
|
||||||
if self.stop_thread:
|
if self.stop_thread:
|
||||||
print("Shutting down datastore thread")
|
logger.info("Shutting down datastore thread")
|
||||||
return
|
return
|
||||||
|
|
||||||
if self.needs_write or self.needs_write_urgent:
|
if self.needs_write or self.needs_write_urgent:
|
||||||
@@ -533,8 +532,8 @@ class ChangeDetectionStore:
|
|||||||
try:
|
try:
|
||||||
update_method = getattr(self, "update_{}".format(update_n))()
|
update_method = getattr(self, "update_{}".format(update_n))()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Error while trying update_{}".format((update_n)))
|
logger.error("Error while trying update_{}".format((update_n)))
|
||||||
print(e)
|
logger.error(str(e))
|
||||||
# Don't run any more updates
|
# Don't run any more updates
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
@@ -572,7 +571,7 @@ class ChangeDetectionStore:
|
|||||||
with open(os.path.join(target_path, "history.txt"), "w") as f:
|
with open(os.path.join(target_path, "history.txt"), "w") as f:
|
||||||
f.writelines(history)
|
f.writelines(history)
|
||||||
else:
|
else:
|
||||||
logging.warning("Datastore history directory {} does not exist, skipping history import.".format(target_path))
|
logger.warning("Datastore history directory {} does not exist, skipping history import.".format(target_path))
|
||||||
|
|
||||||
# No longer needed, dynamically pulled from the disk when needed.
|
# No longer needed, dynamically pulled from the disk when needed.
|
||||||
# But we should set it back to a empty dict so we don't break if this schema runs on an earlier version.
|
# But we should set it back to a empty dict so we don't break if this schema runs on an earlier version.
|
||||||
|
|||||||
@@ -69,8 +69,6 @@ def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_se
|
|||||||
url = url_for('test_notification_endpoint', _external=True)
|
url = url_for('test_notification_endpoint', _external=True)
|
||||||
notification_url = url.replace('http', 'json')
|
notification_url = url.replace('http', 'json')
|
||||||
|
|
||||||
print(">>>> Notification URL: " + notification_url)
|
|
||||||
|
|
||||||
# Just a regular notification setting, this will be used by the special 'filter not found' notification
|
# Just a regular notification setting, this will be used by the special 'filter not found' notification
|
||||||
notification_form_data = {"notification_urls": notification_url,
|
notification_form_data = {"notification_urls": notification_url,
|
||||||
"notification_title": "New ChangeDetection.io Notification - {{watch_url}}",
|
"notification_title": "New ChangeDetection.io Notification - {{watch_url}}",
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ import os
|
|||||||
import threading
|
import threading
|
||||||
import queue
|
import queue
|
||||||
import time
|
import time
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
from changedetectionio import content_fetcher
|
from changedetectionio import content_fetcher
|
||||||
from changedetectionio import queuedWatchMetaData
|
from changedetectionio import queuedWatchMetaData
|
||||||
@@ -12,14 +13,12 @@ from changedetectionio.fetch_site_status import FilterNotFoundInResponse
|
|||||||
# Requests for checking on a single site(watch) from a queue of watches
|
# Requests for checking on a single site(watch) from a queue of watches
|
||||||
# (another process inserts watches into the queue that are time-ready for checking)
|
# (another process inserts watches into the queue that are time-ready for checking)
|
||||||
|
|
||||||
import logging
|
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
class update_worker(threading.Thread):
|
class update_worker(threading.Thread):
|
||||||
current_uuid = None
|
current_uuid = None
|
||||||
|
|
||||||
def __init__(self, q, notification_q, app, datastore, *args, **kwargs):
|
def __init__(self, q, notification_q, app, datastore, *args, **kwargs):
|
||||||
logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
|
|
||||||
self.q = q
|
self.q = q
|
||||||
self.app = app
|
self.app = app
|
||||||
self.notification_q = notification_q
|
self.notification_q = notification_q
|
||||||
@@ -80,10 +79,10 @@ class update_worker(threading.Thread):
|
|||||||
'diff': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], line_feed_sep=line_feed_sep),
|
'diff': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], line_feed_sep=line_feed_sep),
|
||||||
'diff_full': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], True, line_feed_sep=line_feed_sep)
|
'diff_full': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], True, line_feed_sep=line_feed_sep)
|
||||||
})
|
})
|
||||||
logging.info (">> SENDING NOTIFICATION")
|
logger.info ("UUID: {} - Sending notification".format(watch_uuid))
|
||||||
self.notification_q.put(n_object)
|
self.notification_q.put(n_object)
|
||||||
else:
|
else:
|
||||||
logging.info (">> NO Notification sent, notification_url was empty in both watch and system")
|
logger.info("UUID: {} - NO Notification sent, notification_url was empty in both watch and system".format(watch_uuid))
|
||||||
|
|
||||||
def send_filter_failure_notification(self, watch_uuid):
|
def send_filter_failure_notification(self, watch_uuid):
|
||||||
|
|
||||||
@@ -112,7 +111,7 @@ class update_worker(threading.Thread):
|
|||||||
'screenshot': None
|
'screenshot': None
|
||||||
})
|
})
|
||||||
self.notification_q.put(n_object)
|
self.notification_q.put(n_object)
|
||||||
print("Sent filter not found notification for {}".format(watch_uuid))
|
logger.info("Sent filter not found notification for {}".format(watch_uuid))
|
||||||
|
|
||||||
def send_step_failure_notification(self, watch_uuid, step_n):
|
def send_step_failure_notification(self, watch_uuid, step_n):
|
||||||
watch = self.datastore.data['watching'].get(watch_uuid, False)
|
watch = self.datastore.data['watching'].get(watch_uuid, False)
|
||||||
@@ -139,7 +138,7 @@ class update_worker(threading.Thread):
|
|||||||
'uuid': watch_uuid
|
'uuid': watch_uuid
|
||||||
})
|
})
|
||||||
self.notification_q.put(n_object)
|
self.notification_q.put(n_object)
|
||||||
print("Sent step not found notification for {}".format(watch_uuid))
|
logger.error("Sent step not found notification for {}".format(watch_uuid))
|
||||||
|
|
||||||
|
|
||||||
def cleanup_error_artifacts(self, uuid):
|
def cleanup_error_artifacts(self, uuid):
|
||||||
@@ -173,7 +172,7 @@ class update_worker(threading.Thread):
|
|||||||
update_obj= {}
|
update_obj= {}
|
||||||
xpath_data = False
|
xpath_data = False
|
||||||
process_changedetection_results = True
|
process_changedetection_results = True
|
||||||
print("> Processing UUID {} Priority {} URL {}".format(uuid, queued_item_data.priority, self.datastore.data['watching'][uuid]['url']))
|
logger.info("UUID: {} Start processing, Priority {} URL {}",uuid, queued_item_data.priority, self.datastore.data['watching'][uuid]['url'])
|
||||||
now = time.time()
|
now = time.time()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -184,7 +183,7 @@ class update_worker(threading.Thread):
|
|||||||
if not isinstance(contents, (bytes, bytearray)):
|
if not isinstance(contents, (bytes, bytearray)):
|
||||||
raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
|
raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
|
||||||
except PermissionError as e:
|
except PermissionError as e:
|
||||||
self.app.logger.error("File permission error updating", uuid, str(e))
|
logger.error("UUID: {} File permission error updating - {}", uuid, str(e))
|
||||||
process_changedetection_results = False
|
process_changedetection_results = False
|
||||||
except content_fetcher.ReplyWithContentButNoText as e:
|
except content_fetcher.ReplyWithContentButNoText as e:
|
||||||
# Totally fine, it's by choice - just continue on, nothing more to care about
|
# Totally fine, it's by choice - just continue on, nothing more to care about
|
||||||
@@ -233,7 +232,7 @@ class update_worker(threading.Thread):
|
|||||||
# Send notification if we reached the threshold?
|
# Send notification if we reached the threshold?
|
||||||
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
|
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
|
||||||
0)
|
0)
|
||||||
print("Filter for {} not found, consecutive_filter_failures: {}".format(uuid, c))
|
logger.error("Filter for {} not found, consecutive_filter_failures: {}".format(uuid, c))
|
||||||
if threshold > 0 and c >= threshold:
|
if threshold > 0 and c >= threshold:
|
||||||
if not self.datastore.data['watching'][uuid].get('notification_muted'):
|
if not self.datastore.data['watching'][uuid].get('notification_muted'):
|
||||||
self.send_filter_failure_notification(uuid)
|
self.send_filter_failure_notification(uuid)
|
||||||
@@ -264,7 +263,7 @@ class update_worker(threading.Thread):
|
|||||||
# Send notification if we reached the threshold?
|
# Send notification if we reached the threshold?
|
||||||
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
|
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
|
||||||
0)
|
0)
|
||||||
print("Step for {} not found, consecutive_filter_failures: {}".format(uuid, c))
|
logger.error("Step for {} not found, consecutive_filter_failures: {}".format(uuid, c))
|
||||||
if threshold > 0 and c >= threshold:
|
if threshold > 0 and c >= threshold:
|
||||||
if not self.datastore.data['watching'][uuid].get('notification_muted'):
|
if not self.datastore.data['watching'][uuid].get('notification_muted'):
|
||||||
self.send_step_failure_notification(watch_uuid=uuid, step_n=e.step_n)
|
self.send_step_failure_notification(watch_uuid=uuid, step_n=e.step_n)
|
||||||
@@ -330,8 +329,6 @@ class update_worker(threading.Thread):
|
|||||||
|
|
||||||
# A change was detected
|
# A change was detected
|
||||||
if changed_detected:
|
if changed_detected:
|
||||||
print (">> Change detected in UUID {} - {}".format(uuid, watch['url']))
|
|
||||||
|
|
||||||
# Notifications should only trigger on the second time (first time, we gather the initial snapshot)
|
# Notifications should only trigger on the second time (first time, we gather the initial snapshot)
|
||||||
if watch.history_n >= 2:
|
if watch.history_n >= 2:
|
||||||
if not self.datastore.data['watching'][uuid].get('notification_muted'):
|
if not self.datastore.data['watching'][uuid].get('notification_muted'):
|
||||||
@@ -340,7 +337,7 @@ class update_worker(threading.Thread):
|
|||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Catch everything possible here, so that if a worker crashes, we don't lose it until restart!
|
# Catch everything possible here, so that if a worker crashes, we don't lose it until restart!
|
||||||
print("!!!! Exception in update_worker !!!\n", e)
|
logger.error("!!!! Exception in update_worker !!!\n", e)
|
||||||
self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
|
self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
|
||||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
|
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
|
||||||
|
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ inscriptis~=2.2
|
|||||||
pytz
|
pytz
|
||||||
timeago~=1.0
|
timeago~=1.0
|
||||||
validators
|
validators
|
||||||
|
loguru
|
||||||
|
|
||||||
# Set these versions together to avoid a RequestsDependencyWarning
|
# Set these versions together to avoid a RequestsDependencyWarning
|
||||||
# >= 2.26 also adds Brotli support if brotli is installed
|
# >= 2.26 also adds Brotli support if brotli is installed
|
||||||
|
|||||||
Reference in New Issue
Block a user