mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-04-11 13:38:02 +00:00
Compare commits
3 Commits
thread-rec
...
test-visua
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c143b38a5f | ||
|
|
2c6faa7c4e | ||
|
|
6168cd2899 |
@@ -18,7 +18,6 @@ import threading
|
||||
import time
|
||||
from copy import deepcopy
|
||||
from threading import Event
|
||||
from PriorityThreadPoolExecutor import PriorityThreadPoolExecutor
|
||||
|
||||
import flask_login
|
||||
import logging
|
||||
@@ -50,12 +49,12 @@ __version__ = '0.39.18'
|
||||
datastore = None
|
||||
|
||||
# Local
|
||||
running_update_uuids = set()
|
||||
running_update_threads = []
|
||||
ticker_thread = None
|
||||
|
||||
extra_stylesheets = []
|
||||
|
||||
pool = None
|
||||
update_q = queue.PriorityQueue()
|
||||
|
||||
notification_q = queue.Queue()
|
||||
|
||||
@@ -106,9 +105,10 @@ def init_app_secret(datastore_path):
|
||||
# running or something similar.
|
||||
@app.template_filter('format_last_checked_time')
|
||||
def _jinja2_filter_datetime(watch_obj, format="%Y-%m-%d %H:%M:%S"):
|
||||
|
||||
if watch_obj['uuid'] in running_update_uuids:
|
||||
return '<span class="loader"></span><span> Checking now</span>'
|
||||
# Worker thread tells us which UUID it is currently processing.
|
||||
for t in running_update_threads:
|
||||
if t.current_uuid == watch_obj['uuid']:
|
||||
return '<span class="loader"></span><span> Checking now</span>'
|
||||
|
||||
if watch_obj['last_checked'] == 0:
|
||||
return 'Not yet'
|
||||
@@ -178,15 +178,13 @@ class User(flask_login.UserMixin):
|
||||
|
||||
def changedetection_app(config=None, datastore_o=None):
|
||||
global datastore
|
||||
global pool
|
||||
datastore = datastore_o
|
||||
|
||||
# so far just for read-only via tests, but this will be moved eventually to be the main source
|
||||
# (instead of the global var)
|
||||
app.config['DATASTORE']=datastore_o
|
||||
|
||||
pool = PriorityThreadPoolExecutor(max_workers=int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers'])))
|
||||
|
||||
#app.config.update(config or {})
|
||||
|
||||
login_manager = flask_login.LoginManager(app)
|
||||
login_manager.login_view = 'login'
|
||||
@@ -195,17 +193,20 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
watch_api.add_resource(api_v1.WatchSingleHistory,
|
||||
'/api/v1/watch/<string:uuid>/history/<string:timestamp>',
|
||||
resource_class_kwargs={'datastore': datastore, 'queue_single_watch': queue_single_watch})
|
||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||
|
||||
watch_api.add_resource(api_v1.WatchHistory,
|
||||
'/api/v1/watch/<string:uuid>/history',
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
|
||||
watch_api.add_resource(api_v1.CreateWatch, '/api/v1/watch',
|
||||
resource_class_kwargs={'datastore': datastore, 'queue_single_watch': queue_single_watch})
|
||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||
|
||||
watch_api.add_resource(api_v1.Watch, '/api/v1/watch/<string:uuid>',
|
||||
resource_class_kwargs={'datastore': datastore, 'queue_single_watch': queue_single_watch})
|
||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# Setup cors headers to allow all domains
|
||||
@@ -416,7 +417,8 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
# Don't link to hosting when we're on the hosting environment
|
||||
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
|
||||
guid=datastore.data['app_guid'],
|
||||
queued_uuids=get_uuids_in_queue())
|
||||
queued_uuids=[uuid for p,uuid in update_q.queue])
|
||||
|
||||
|
||||
if session.get('share-link'):
|
||||
del(session['share-link'])
|
||||
@@ -501,7 +503,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
from changedetectionio import fetch_site_status
|
||||
|
||||
# Get the most recent one
|
||||
newest_history_key = datastore.get_val(uuid, 'newest_history_key')
|
||||
newest_history_key = datastore.data['watching'][uuid].get('newest_history_key')
|
||||
|
||||
# 0 means that theres only one, so that there should be no 'unviewed' history available
|
||||
if newest_history_key == 0:
|
||||
@@ -630,7 +632,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
datastore.needs_write_urgent = True
|
||||
|
||||
# Queue the watch for immediate recheck, with a higher priority
|
||||
queue_single_watch(uuid=uuid, priority=1)
|
||||
update_q.put((1, uuid))
|
||||
|
||||
# Diff page [edit] link should go back to diff page
|
||||
if request.args.get("next") and request.args.get("next") == 'diff':
|
||||
@@ -747,7 +749,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
importer = import_url_list()
|
||||
importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore)
|
||||
for uuid in importer.new_uuids:
|
||||
queue_single_watch(uuid=uuid, priority=1)
|
||||
update_q.put((1, uuid))
|
||||
|
||||
if len(importer.remaining_data) == 0:
|
||||
return redirect(url_for('index'))
|
||||
@@ -760,7 +762,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
d_importer = import_distill_io_json()
|
||||
d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore)
|
||||
for uuid in d_importer.new_uuids:
|
||||
queue_single_watch(uuid=uuid, priority=1)
|
||||
update_q.put((1, uuid))
|
||||
|
||||
|
||||
|
||||
@@ -1105,7 +1107,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
if not add_paused and new_uuid:
|
||||
# Straight into the queue.
|
||||
queue_single_watch(uuid=new_uuid, priority=1)
|
||||
update_q.put((1, new_uuid))
|
||||
flash("Watch added.")
|
||||
|
||||
if add_paused:
|
||||
@@ -1142,7 +1144,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
new_uuid = datastore.clone(uuid)
|
||||
queue_single_watch(uuid=uuid, priority=5)
|
||||
update_q.put((5, new_uuid))
|
||||
flash('Cloned.')
|
||||
|
||||
return redirect(url_for('index'))
|
||||
@@ -1155,25 +1157,31 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
uuid = request.args.get('uuid')
|
||||
i = 0
|
||||
|
||||
running_uuids = []
|
||||
for t in running_update_threads:
|
||||
running_uuids.append(t.current_uuid)
|
||||
|
||||
# @todo check thread is running and skip
|
||||
|
||||
if uuid:
|
||||
if uuid not in get_uuids_in_queue():
|
||||
queue_single_watch(uuid=uuid, priority=1)
|
||||
if uuid not in running_uuids:
|
||||
update_q.put((1, uuid))
|
||||
i = 1
|
||||
|
||||
elif tag != None:
|
||||
# Items that have this current tag
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
if (tag != None and tag in watch['tag']):
|
||||
if watch_uuid not in get_uuids_in_queue() and not datastore.data['watching'][watch_uuid]['paused']:
|
||||
queue_single_watch(uuid=watch_uuid, priority=1)
|
||||
if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
|
||||
update_q.put((1, watch_uuid))
|
||||
i += 1
|
||||
|
||||
else:
|
||||
# No tag, no uuid, add everything.
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
|
||||
if watch_uuid not in get_uuids_in_queue() and not datastore.data['watching'][watch_uuid]['paused']:
|
||||
queue_single_watch(uuid=watch_uuid, priority=1)
|
||||
if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
|
||||
update_q.put((1, watch_uuid))
|
||||
i += 1
|
||||
flash("{} watches are queued for rechecking.".format(i))
|
||||
return redirect(url_for('index', tag=tag))
|
||||
@@ -1338,31 +1346,33 @@ def notification_runner():
|
||||
# Trim the log length
|
||||
notification_debug_log = notification_debug_log[-100:]
|
||||
|
||||
def queue_single_watch(uuid, priority=1):
|
||||
pool.submit(process_single_watch, uuid, priority=int(time.time()) - priority)
|
||||
|
||||
def process_single_watch(uuid):
|
||||
running_update_uuids.add(uuid)
|
||||
from changedetectionio import update_worker
|
||||
worker = update_worker.update_worker(notification_q=notification_q, datastore=datastore)
|
||||
worker.run(uuid)
|
||||
running_update_uuids.remove(uuid)
|
||||
|
||||
def get_uuids_in_queue():
|
||||
return [workitem.args[0] for p, workitem in pool._work_queue.queue]
|
||||
|
||||
# Thread runner to load watch jobs into the queue as they become ready/due for checking again
|
||||
# Thread runner to check every minute, look for new watches to feed into the Queue.
|
||||
def ticker_thread_check_time_launch_checks():
|
||||
import random
|
||||
from changedetectionio import update_worker
|
||||
|
||||
recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 20))
|
||||
print("System env MINIMUM_SECONDS_RECHECK_TIME", recheck_time_minimum_seconds)
|
||||
|
||||
# Spin up Workers that do the fetching
|
||||
# Can be overriden by ENV or use the default settings
|
||||
n_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
|
||||
for _ in range(n_workers):
|
||||
new_worker = update_worker.update_worker(update_q, notification_q, app, datastore)
|
||||
running_update_threads.append(new_worker)
|
||||
new_worker.start()
|
||||
|
||||
while not app.config.exit.is_set():
|
||||
|
||||
# Get a list of watches by UUID that are currently fetching data
|
||||
running_uuids = []
|
||||
for t in running_update_threads:
|
||||
if t.current_uuid:
|
||||
running_uuids.append(t.current_uuid)
|
||||
|
||||
# Re #232 - Deepcopy the data incase it changes while we're iterating through it all
|
||||
watch_uuid_list = []
|
||||
while not app.config.exit.is_set():
|
||||
while True:
|
||||
try:
|
||||
watch_uuid_list = datastore.data['watching'].keys()
|
||||
except RuntimeError as e:
|
||||
@@ -1372,9 +1382,8 @@ def ticker_thread_check_time_launch_checks():
|
||||
break
|
||||
|
||||
# Re #438 - Don't place more watches in the queue to be checked if the queue is already large
|
||||
while pool._work_queue.qsize() >= 2000:
|
||||
if not app.config.exit.is_set():
|
||||
time.sleep(1)
|
||||
while update_q.qsize() >= 2000:
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
recheck_time_system_seconds = int(datastore.threshold_seconds)
|
||||
@@ -1405,8 +1414,7 @@ def ticker_thread_check_time_launch_checks():
|
||||
|
||||
seconds_since_last_recheck = now - watch['last_checked']
|
||||
if seconds_since_last_recheck >= (threshold + watch.jitter_seconds) and seconds_since_last_recheck >= recheck_time_minimum_seconds:
|
||||
#@todo check 'not in running_uuids'
|
||||
if not uuid and uuid not in get_uuids_in_queue():
|
||||
if not uuid in running_uuids and uuid not in [q_uuid for p,q_uuid in update_q.queue]:
|
||||
# Use Epoch time as priority, so we get a "sorted" PriorityQueue, but we can still push a priority 1 into it.
|
||||
priority = int(time.time())
|
||||
print(
|
||||
@@ -1417,8 +1425,8 @@ def ticker_thread_check_time_launch_checks():
|
||||
priority,
|
||||
watch.jitter_seconds,
|
||||
now - watch['last_checked']))
|
||||
|
||||
queue_single_watch(uuid=uuid, priority=priority)
|
||||
# Into the queue with you
|
||||
update_q.put((priority, uuid))
|
||||
|
||||
# Reset for next time
|
||||
watch.jitter_seconds = 0
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
from flask_restful import abort, Resource
|
||||
from flask import request, make_response
|
||||
|
||||
import validators
|
||||
|
||||
from . import auth
|
||||
|
||||
|
||||
@@ -13,7 +11,7 @@ class Watch(Resource):
|
||||
def __init__(self, **kwargs):
|
||||
# datastore is a black box dependency
|
||||
self.datastore = kwargs['datastore']
|
||||
self.queue_single_watch = kwargs['queue_single_watch']
|
||||
self.update_q = kwargs['update_q']
|
||||
|
||||
# Get information about a single watch, excluding the history list (can be large)
|
||||
# curl http://localhost:4000/api/v1/watch/<string:uuid>
|
||||
@@ -26,7 +24,7 @@ class Watch(Resource):
|
||||
abort(404, message='No watch exists with the UUID of {}'.format(uuid))
|
||||
|
||||
if request.args.get('recheck'):
|
||||
self.queue_single_watch(uuid, priority=1)
|
||||
self.update_q.put((1, uuid))
|
||||
return "OK", 200
|
||||
|
||||
# Return without history, get that via another API call
|
||||
@@ -88,7 +86,7 @@ class CreateWatch(Resource):
|
||||
def __init__(self, **kwargs):
|
||||
# datastore is a black box dependency
|
||||
self.datastore = kwargs['datastore']
|
||||
self.queue_single_watch = kwargs['queue_single_watch']
|
||||
self.update_q = kwargs['update_q']
|
||||
|
||||
@auth.check_token
|
||||
def post(self):
|
||||
@@ -102,7 +100,7 @@ class CreateWatch(Resource):
|
||||
extras = {'title': json_data['title'].strip()} if json_data.get('title') else {}
|
||||
|
||||
new_uuid = self.datastore.add_watch(url=json_data['url'].strip(), tag=tag, extras=extras)
|
||||
self.queue_single_watch(new_uuid, priority=1)
|
||||
self.update_q.put((1, new_uuid))
|
||||
return {'uuid': new_uuid}, 201
|
||||
|
||||
# Return concise list of available watches and some very basic info
|
||||
@@ -120,7 +118,7 @@ class CreateWatch(Resource):
|
||||
|
||||
if request.args.get('recheck_all'):
|
||||
for uuid in self.datastore.data['watching'].keys():
|
||||
self.queue_single_watch(uuid, priority=1)
|
||||
self.update_q.put((1, uuid))
|
||||
return {'status': "OK"}, 200
|
||||
|
||||
return list, 200
|
||||
|
||||
@@ -63,13 +63,11 @@ class perform_site_check():
|
||||
|
||||
|
||||
def run(self, uuid):
|
||||
timestamp = int(time.time()) # used for storage etc too
|
||||
|
||||
changed_detected = False
|
||||
screenshot = False # as bytes
|
||||
stripped_text_from_html = ""
|
||||
|
||||
watch = self.datastore.data['watching'][uuid]
|
||||
watch = self.datastore.data['watching'].get(uuid)
|
||||
|
||||
# Protect against file:// access
|
||||
if re.search(r'^file', watch['url'], re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False):
|
||||
@@ -80,7 +78,7 @@ class perform_site_check():
|
||||
# Unset any existing notification error
|
||||
update_obj = {'last_notification_error': False, 'last_error': False}
|
||||
|
||||
extra_headers = self.datastore.get_val(uuid, 'headers')
|
||||
extra_headers =self.datastore.data['watching'][uuid].get('headers')
|
||||
|
||||
# Tweak the base config with the per-watch ones
|
||||
request_headers = self.datastore.data['settings']['headers'].copy()
|
||||
@@ -93,9 +91,9 @@ class perform_site_check():
|
||||
request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '')
|
||||
|
||||
timeout = self.datastore.data['settings']['requests']['timeout']
|
||||
url = self.datastore.get_val(uuid, 'url')
|
||||
request_body = self.datastore.get_val(uuid, 'body')
|
||||
request_method = self.datastore.get_val(uuid, 'method')
|
||||
url = watch.get('url')
|
||||
request_body = self.datastore.data['watching'][uuid].get('body')
|
||||
request_method = self.datastore.data['watching'][uuid].get('method')
|
||||
ignore_status_codes = self.datastore.data['watching'][uuid].get('ignore_status_codes', False)
|
||||
|
||||
# source: support
|
||||
|
||||
@@ -355,6 +355,8 @@ class watchForm(commonSettingsForm):
|
||||
filter_failure_notification_send = BooleanField(
|
||||
'Send a notification when the filter can no longer be found on the page', default=False)
|
||||
|
||||
notification_use_default = BooleanField('Use default/system notification settings', default=True)
|
||||
|
||||
def validate(self, **kwargs):
|
||||
if not super().validate():
|
||||
return False
|
||||
|
||||
@@ -35,6 +35,7 @@ class model(dict):
|
||||
'notification_title': default_notification_title,
|
||||
'notification_body': default_notification_body,
|
||||
'notification_format': default_notification_format,
|
||||
'notification_use_default': True, # Use default for new
|
||||
'notification_muted': False,
|
||||
'css_filter': '',
|
||||
'last_error': False,
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
$(document).ready(function() {
|
||||
function toggle() {
|
||||
$(document).ready(function () {
|
||||
function toggle_fetch_backend() {
|
||||
if ($('input[name="fetch_backend"]:checked').val() == 'html_webdriver') {
|
||||
if(playwright_enabled) {
|
||||
if (playwright_enabled) {
|
||||
// playwright supports headers, so hide everything else
|
||||
// See #664
|
||||
$('#requests-override-options #request-method').hide();
|
||||
@@ -13,12 +13,8 @@ $(document).ready(function() {
|
||||
// selenium/webdriver doesnt support anything afaik, hide it all
|
||||
$('#requests-override-options').hide();
|
||||
}
|
||||
|
||||
|
||||
$('#webdriver-override-options').show();
|
||||
|
||||
} else {
|
||||
|
||||
$('#requests-override-options').show();
|
||||
$('#requests-override-options *:hidden').show();
|
||||
$('#webdriver-override-options').hide();
|
||||
@@ -26,8 +22,27 @@ $(document).ready(function() {
|
||||
}
|
||||
|
||||
$('input[name="fetch_backend"]').click(function (e) {
|
||||
toggle();
|
||||
toggle_fetch_backend();
|
||||
});
|
||||
toggle();
|
||||
toggle_fetch_backend();
|
||||
|
||||
function toggle_default_notifications() {
|
||||
var n=$('#notification_urls, #notification_title, #notification_body, #notification_format');
|
||||
if ($('#notification_use_default').is(':checked')) {
|
||||
$('#notification-field-group').fadeOut();
|
||||
$(n).each(function (e) {
|
||||
$(this).attr('readonly', true);
|
||||
});
|
||||
} else {
|
||||
$('#notification-field-group').show();
|
||||
$(n).each(function (e) {
|
||||
$(this).attr('readonly', false);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
$('#notification_use_default').click(function (e) {
|
||||
toggle_default_notifications();
|
||||
});
|
||||
toggle_default_notifications();
|
||||
});
|
||||
|
||||
@@ -244,10 +244,6 @@ class ChangeDetectionStore:
|
||||
|
||||
return False
|
||||
|
||||
def get_val(self, uuid, val):
|
||||
# Probably their should be dict...
|
||||
return self.data['watching'][uuid].get(val)
|
||||
|
||||
# Remove a watchs data but keep the entry (URL etc)
|
||||
def clear_watch_history(self, uuid):
|
||||
import pathlib
|
||||
@@ -539,4 +535,28 @@ class ChangeDetectionStore:
|
||||
del(watch['last_changed'])
|
||||
except:
|
||||
continue
|
||||
return
|
||||
|
||||
|
||||
def update_5(self):
|
||||
|
||||
from changedetectionio.notification import (
|
||||
default_notification_body,
|
||||
default_notification_format,
|
||||
default_notification_title,
|
||||
)
|
||||
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
try:
|
||||
# If it's all the same to the system settings, then prefer system notification settings
|
||||
# include \r\n -> \n incase they already hit submit and the browser put \r in
|
||||
if watch.get('notification_body').replace('\r\n', '\n') == default_notification_body.replace('\r\n', '\n') and \
|
||||
watch.get('notification_format') == default_notification_format and \
|
||||
watch.get('notification_title').replace('\r\n', '\n') == default_notification_title.replace('\r\n', '\n') and \
|
||||
watch.get('notification_urls') == self.__data['settings']['application']['notification_urls']:
|
||||
watch['notification_use_default'] = True
|
||||
else:
|
||||
watch['notification_use_default'] = False
|
||||
except:
|
||||
continue
|
||||
return
|
||||
@@ -135,9 +135,11 @@ User-Agent: wonderbra 1.0") }}
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="notifications">
|
||||
<strong>Note: <i>These settings override the global settings for this watch.</i></strong>
|
||||
<fieldset>
|
||||
<div class="field-group">
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_checkbox_field(form.notification_use_default) }}
|
||||
</div>
|
||||
<div class="field-group" id="notification-field-group">
|
||||
{{ render_common_settings_form(form, current_base_url, emailprefix) }}
|
||||
</div>
|
||||
</fieldset>
|
||||
|
||||
@@ -71,6 +71,7 @@ def test_check_notification(client, live_server):
|
||||
"url": test_url,
|
||||
"tag": "my tag",
|
||||
"title": "my title",
|
||||
# No 'notification_use_default' here, so it's effectively False/off
|
||||
"headers": "",
|
||||
"fetch_backend": "html_requests"})
|
||||
|
||||
@@ -215,3 +216,82 @@ def test_notification_validation(client, live_server):
|
||||
url_for("form_delete", uuid="all"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
# Check that the default VS watch specific notification is hit
|
||||
def test_check_notification_use_default(client, live_server):
|
||||
set_original_response()
|
||||
notification_url = url_for('test_notification_endpoint', _external=True).replace('http', 'json')
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
res = client.post(
|
||||
url_for("form_quick_watch_add"),
|
||||
data={"url": test_url, "tag": ''},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Watch added" in res.data
|
||||
|
||||
## Setup the local one and enable it
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"notification_urls": notification_url,
|
||||
"notification_title": "watch-notification",
|
||||
"notification_body": "watch-body",
|
||||
'notification_use_default': "True",
|
||||
"notification_format": "Text",
|
||||
"url": test_url,
|
||||
"tag": "my tag",
|
||||
"title": "my title",
|
||||
"headers": "",
|
||||
"fetch_backend": "html_requests"},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
res = client.post(
|
||||
url_for("settings_page"),
|
||||
data={"application-notification_title": "global-notifications-title",
|
||||
"application-notification_body": "global-notifications-body\n",
|
||||
"application-notification_format": "Text",
|
||||
"application-notification_urls": notification_url,
|
||||
"requests-time_between_check-minutes": 180,
|
||||
"fetch_backend": "html_requests"
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
# A change should by default trigger a notification of the global-notifications
|
||||
time.sleep(1)
|
||||
set_modified_response()
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
time.sleep(2)
|
||||
with open("test-datastore/notification.txt", "r") as f:
|
||||
assert 'global-notifications-title' in f.read()
|
||||
|
||||
## Setup the local one and enable it
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"notification_urls": notification_url,
|
||||
"notification_title": "watch-notification",
|
||||
"notification_body": "watch-body",
|
||||
# No 'notification_use_default' here, so it's effectively False/off = "dont use default, use this one"
|
||||
"notification_format": "Text",
|
||||
"url": test_url,
|
||||
"tag": "my tag",
|
||||
"title": "my title",
|
||||
"headers": "",
|
||||
"fetch_backend": "html_requests"},
|
||||
follow_redirects=True
|
||||
)
|
||||
set_original_response()
|
||||
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
time.sleep(2)
|
||||
assert os.path.isfile("test-datastore/notification.txt")
|
||||
with open("test-datastore/notification.txt", "r") as f:
|
||||
assert 'watch-notification' in f.read()
|
||||
|
||||
|
||||
# cleanup for the next
|
||||
client.get(
|
||||
url_for("form_delete", uuid="all"),
|
||||
follow_redirects=True
|
||||
)
|
||||
@@ -7,6 +7,7 @@ from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_cli
|
||||
# Add a site in paused mode, add an invalid filter, we should still have visual selector data ready
|
||||
def test_visual_selector_content_ready(client, live_server):
|
||||
import os
|
||||
import json
|
||||
|
||||
assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test"
|
||||
live_server_setup(live_server)
|
||||
@@ -33,3 +34,7 @@ def test_visual_selector_content_ready(client, live_server):
|
||||
uuid = extract_UUID_from_client(client)
|
||||
assert os.path.isfile(os.path.join('test-datastore', uuid, 'last-screenshot.png')), "last-screenshot.png should exist"
|
||||
assert os.path.isfile(os.path.join('test-datastore', uuid, 'elements.json')), "xpath elements.json data should exist"
|
||||
|
||||
# Open it and see if it roughly looks correct
|
||||
with open(os.path.join('test-datastore', uuid, 'elements.json'), 'r') as f:
|
||||
json.load(f)
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
import queue
|
||||
import time
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
from changedetectionio import content_fetcher
|
||||
from changedetectionio.html_tools import FilterNotFoundInResponse
|
||||
|
||||
@@ -11,12 +12,15 @@ from changedetectionio.html_tools import FilterNotFoundInResponse
|
||||
# (another process inserts watches into the queue that are time-ready for checking)
|
||||
|
||||
|
||||
class update_worker():
|
||||
class update_worker(threading.Thread):
|
||||
current_uuid = None
|
||||
|
||||
def __init__(self, notification_q, datastore):
|
||||
def __init__(self, q, notification_q, app, datastore, *args, **kwargs):
|
||||
self.q = q
|
||||
self.app = app
|
||||
self.notification_q = notification_q
|
||||
self.datastore = datastore
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def send_content_changed_notification(self, t, watch_uuid):
|
||||
|
||||
@@ -37,7 +41,7 @@ class update_worker():
|
||||
)
|
||||
|
||||
# Did it have any notification alerts to hit?
|
||||
if len(watch['notification_urls']):
|
||||
if not watch.get('notification_use_default') and len(watch['notification_urls']):
|
||||
print(">>> Notifications queued for UUID from watch {}".format(watch_uuid))
|
||||
n_object['notification_urls'] = watch['notification_urls']
|
||||
n_object['notification_title'] = watch['notification_title']
|
||||
@@ -45,7 +49,7 @@ class update_worker():
|
||||
n_object['notification_format'] = watch['notification_format']
|
||||
|
||||
# No? maybe theres a global setting, queue them all
|
||||
elif len(self.datastore.data['settings']['application']['notification_urls']):
|
||||
elif watch.get('notification_use_default') and len(self.datastore.data['settings']['application']['notification_urls']):
|
||||
print(">>> Watch notification URLs were empty, using GLOBAL notifications for UUID: {}".format(watch_uuid))
|
||||
n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls']
|
||||
n_object['notification_title'] = self.datastore.data['settings']['application']['notification_title']
|
||||
@@ -112,168 +116,182 @@ class update_worker():
|
||||
if os.path.isfile(full_path):
|
||||
os.unlink(full_path)
|
||||
|
||||
def run(self, uuid):
|
||||
def run(self):
|
||||
from changedetectionio import fetch_site_status
|
||||
|
||||
update_handler = fetch_site_status.perform_site_check(datastore=self.datastore)
|
||||
|
||||
self.current_uuid = uuid
|
||||
|
||||
if uuid in list(self.datastore.data['watching'].keys()):
|
||||
changed_detected = False
|
||||
contents = b''
|
||||
screenshot = False
|
||||
update_obj= {}
|
||||
xpath_data = False
|
||||
process_changedetection_results = True
|
||||
print("> Processing UUID {} Priority {} URL {}".format(uuid, 1, self.datastore.data['watching'][uuid]['url']))
|
||||
now = time.time()
|
||||
while not self.app.config.exit.is_set():
|
||||
|
||||
try:
|
||||
changed_detected, update_obj, contents = update_handler.run(uuid)
|
||||
# Re #342
|
||||
# In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
|
||||
# We then convert/.decode('utf-8') for the notification etc
|
||||
if not isinstance(contents, (bytes, bytearray)):
|
||||
raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
|
||||
except PermissionError as e:
|
||||
logging.error("File permission error updating", uuid, str(e))
|
||||
process_changedetection_results = False
|
||||
except content_fetcher.ReplyWithContentButNoText as e:
|
||||
# Totally fine, it's by choice - just continue on, nothing more to care about
|
||||
# Page had elements/content but no renderable text
|
||||
# Backend (not filters) gave zero output
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found (With {} reply code).".format(e.status_code)})
|
||||
if e.screenshot:
|
||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot)
|
||||
process_changedetection_results = False
|
||||
priority, uuid = self.q.get(block=False)
|
||||
except queue.Empty:
|
||||
pass
|
||||
|
||||
except content_fetcher.Non200ErrorCodeReceived as e:
|
||||
if e.status_code == 403:
|
||||
err_text = "Error - 403 (Access denied) received"
|
||||
elif e.status_code == 404:
|
||||
err_text = "Error - 404 (Page not found) received"
|
||||
elif e.status_code == 500:
|
||||
err_text = "Error - 500 (Internal server Error) received"
|
||||
else:
|
||||
err_text = "Error - Request returned a HTTP error code {}".format(str(e.status_code))
|
||||
|
||||
if e.screenshot:
|
||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
|
||||
if e.xpath_data:
|
||||
self.datastore.save_xpath_data(watch_uuid=uuid, data=e.xpath_data, as_error=True)
|
||||
if e.page_text:
|
||||
self.datastore.save_error_text(watch_uuid=uuid, contents=e.page_text)
|
||||
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
# So that we get a trigger when the content is added again
|
||||
'previous_md5': ''})
|
||||
process_changedetection_results = False
|
||||
|
||||
except FilterNotFoundInResponse as e:
|
||||
err_text = "Warning, filter '{}' not found".format(str(e))
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
# So that we get a trigger when the content is added again
|
||||
'previous_md5': ''})
|
||||
|
||||
# Only when enabled, send the notification
|
||||
if self.datastore.data['watching'][uuid].get('filter_failure_notification_send', False):
|
||||
c = self.datastore.data['watching'][uuid].get('consecutive_filter_failures', 5)
|
||||
c += 1
|
||||
# Send notification if we reached the threshold?
|
||||
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
|
||||
0)
|
||||
print("Filter for {} not found, consecutive_filter_failures: {}".format(uuid, c))
|
||||
if threshold > 0 and c >= threshold:
|
||||
if not self.datastore.data['watching'][uuid].get('notification_muted'):
|
||||
self.send_filter_failure_notification(uuid)
|
||||
c = 0
|
||||
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c})
|
||||
|
||||
process_changedetection_results = True
|
||||
|
||||
except content_fetcher.EmptyReply as e:
|
||||
# Some kind of custom to-str handler in the exception handler that does this?
|
||||
err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code)
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
except content_fetcher.ScreenshotUnavailable as e:
|
||||
err_text = "Screenshot unavailable, page did not render fully in the expected time - try increasing 'Wait seconds before extracting text'"
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
process_changedetection_results = False
|
||||
except content_fetcher.JSActionExceptions as e:
|
||||
err_text = "Error running JS Actions - Page request - "+e.message
|
||||
if e.screenshot:
|
||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
except content_fetcher.PageUnloadable as e:
|
||||
err_text = "Page request from server didnt respond correctly"
|
||||
if e.message:
|
||||
err_text = "{} - {}".format(err_text, e.message)
|
||||
|
||||
if e.screenshot:
|
||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
|
||||
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
except Exception as e:
|
||||
logging.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
|
||||
# Other serious error
|
||||
process_changedetection_results = False
|
||||
else:
|
||||
# Crash protection, the watch entry could have been removed by this point (during a slow chrome fetch etc)
|
||||
if not self.datastore.data['watching'].get(uuid):
|
||||
return
|
||||
self.current_uuid = uuid
|
||||
|
||||
# Mark that we never had any failures
|
||||
if not self.datastore.data['watching'][uuid].get('ignore_status_codes'):
|
||||
update_obj['consecutive_filter_failures'] = 0
|
||||
if uuid in list(self.datastore.data['watching'].keys()):
|
||||
changed_detected = False
|
||||
contents = b''
|
||||
screenshot = False
|
||||
update_obj= {}
|
||||
xpath_data = False
|
||||
process_changedetection_results = True
|
||||
print("> Processing UUID {} Priority {} URL {}".format(uuid, priority, self.datastore.data['watching'][uuid]['url']))
|
||||
now = time.time()
|
||||
|
||||
self.cleanup_error_artifacts(uuid)
|
||||
try:
|
||||
changed_detected, update_obj, contents = update_handler.run(uuid)
|
||||
# Re #342
|
||||
# In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
|
||||
# We then convert/.decode('utf-8') for the notification etc
|
||||
if not isinstance(contents, (bytes, bytearray)):
|
||||
raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
|
||||
except PermissionError as e:
|
||||
self.app.logger.error("File permission error updating", uuid, str(e))
|
||||
process_changedetection_results = False
|
||||
except content_fetcher.ReplyWithContentButNoText as e:
|
||||
# Totally fine, it's by choice - just continue on, nothing more to care about
|
||||
# Page had elements/content but no renderable text
|
||||
# Backend (not filters) gave zero output
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found (With {} reply code).".format(e.status_code)})
|
||||
if e.screenshot:
|
||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot)
|
||||
process_changedetection_results = False
|
||||
|
||||
# Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc
|
||||
if process_changedetection_results:
|
||||
try:
|
||||
watch = self.datastore.data['watching'][uuid]
|
||||
fname = "" # Saved history text filename
|
||||
except content_fetcher.Non200ErrorCodeReceived as e:
|
||||
if e.status_code == 403:
|
||||
err_text = "Error - 403 (Access denied) received"
|
||||
elif e.status_code == 404:
|
||||
err_text = "Error - 404 (Page not found) received"
|
||||
elif e.status_code == 500:
|
||||
err_text = "Error - 500 (Internal server Error) received"
|
||||
else:
|
||||
err_text = "Error - Request returned a HTTP error code {}".format(str(e.status_code))
|
||||
|
||||
# For the FIRST time we check a site, or a change detected, save the snapshot.
|
||||
if changed_detected or not watch['last_checked']:
|
||||
# A change was detected
|
||||
watch.save_history_text(contents=contents, timestamp=str(round(time.time())))
|
||||
if e.screenshot:
|
||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
|
||||
if e.xpath_data:
|
||||
self.datastore.save_xpath_data(watch_uuid=uuid, data=e.xpath_data, as_error=True)
|
||||
if e.page_text:
|
||||
self.datastore.save_error_text(watch_uuid=uuid, contents=e.page_text)
|
||||
|
||||
self.datastore.update_watch(uuid=uuid, update_obj=update_obj)
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
# So that we get a trigger when the content is added again
|
||||
'previous_md5': ''})
|
||||
process_changedetection_results = False
|
||||
|
||||
# A change was detected
|
||||
if changed_detected:
|
||||
print (">> Change detected in UUID {} - {}".format(uuid, watch['url']))
|
||||
except FilterNotFoundInResponse as e:
|
||||
err_text = "Warning, filter '{}' not found".format(str(e))
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
# So that we get a trigger when the content is added again
|
||||
'previous_md5': ''})
|
||||
|
||||
# Notifications should only trigger on the second time (first time, we gather the initial snapshot)
|
||||
if watch.history_n >= 2:
|
||||
if not self.datastore.data['watching'][uuid].get('notification_muted'):
|
||||
self.send_content_changed_notification(self, watch_uuid=uuid)
|
||||
# Only when enabled, send the notification
|
||||
if self.datastore.data['watching'][uuid].get('filter_failure_notification_send', False):
|
||||
c = self.datastore.data['watching'][uuid].get('consecutive_filter_failures', 5)
|
||||
c += 1
|
||||
# Send notification if we reached the threshold?
|
||||
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
|
||||
0)
|
||||
print("Filter for {} not found, consecutive_filter_failures: {}".format(uuid, c))
|
||||
if threshold > 0 and c >= threshold:
|
||||
if not self.datastore.data['watching'][uuid].get('notification_muted'):
|
||||
self.send_filter_failure_notification(uuid)
|
||||
c = 0
|
||||
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c})
|
||||
|
||||
process_changedetection_results = True
|
||||
|
||||
except content_fetcher.EmptyReply as e:
|
||||
# Some kind of custom to-str handler in the exception handler that does this?
|
||||
err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code)
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
except content_fetcher.ScreenshotUnavailable as e:
|
||||
err_text = "Screenshot unavailable, page did not render fully in the expected time - try increasing 'Wait seconds before extracting text'"
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
process_changedetection_results = False
|
||||
except content_fetcher.JSActionExceptions as e:
|
||||
err_text = "Error running JS Actions - Page request - "+e.message
|
||||
if e.screenshot:
|
||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
except content_fetcher.PageUnloadable as e:
|
||||
err_text = "Page request from server didnt respond correctly"
|
||||
if e.message:
|
||||
err_text = "{} - {}".format(err_text, e.message)
|
||||
|
||||
if e.screenshot:
|
||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
|
||||
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
except Exception as e:
|
||||
self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
|
||||
# Other serious error
|
||||
process_changedetection_results = False
|
||||
else:
|
||||
# Crash protection, the watch entry could have been removed by this point (during a slow chrome fetch etc)
|
||||
if not self.datastore.data['watching'].get(uuid):
|
||||
continue
|
||||
|
||||
# Mark that we never had any failures
|
||||
if not self.datastore.data['watching'][uuid].get('ignore_status_codes'):
|
||||
update_obj['consecutive_filter_failures'] = 0
|
||||
|
||||
self.cleanup_error_artifacts(uuid)
|
||||
|
||||
# Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc
|
||||
if process_changedetection_results:
|
||||
try:
|
||||
watch = self.datastore.data['watching'][uuid]
|
||||
fname = "" # Saved history text filename
|
||||
|
||||
# For the FIRST time we check a site, or a change detected, save the snapshot.
|
||||
if changed_detected or not watch['last_checked']:
|
||||
# A change was detected
|
||||
watch.save_history_text(contents=contents, timestamp=str(round(time.time())))
|
||||
|
||||
self.datastore.update_watch(uuid=uuid, update_obj=update_obj)
|
||||
|
||||
# A change was detected
|
||||
if changed_detected:
|
||||
print (">> Change detected in UUID {} - {}".format(uuid, watch['url']))
|
||||
|
||||
# Notifications should only trigger on the second time (first time, we gather the initial snapshot)
|
||||
if watch.history_n >= 2:
|
||||
if not self.datastore.data['watching'][uuid].get('notification_muted'):
|
||||
self.send_content_changed_notification(self, watch_uuid=uuid)
|
||||
|
||||
|
||||
except Exception as e:
|
||||
# Catch everything possible here, so that if a worker crashes, we don't lose it until restart!
|
||||
print("!!!! Exception in update_worker !!!\n", e)
|
||||
logging.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
|
||||
except Exception as e:
|
||||
# Catch everything possible here, so that if a worker crashes, we don't lose it until restart!
|
||||
print("!!!! Exception in update_worker !!!\n", e)
|
||||
self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
|
||||
|
||||
|
||||
# Always record that we atleast tried
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
|
||||
'last_checked': round(time.time())})
|
||||
# Always record that we atleast tried
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
|
||||
'last_checked': round(time.time())})
|
||||
|
||||
# Always save the screenshot if it's available
|
||||
if update_handler.screenshot:
|
||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=update_handler.screenshot)
|
||||
if update_handler.xpath_data:
|
||||
self.datastore.save_xpath_data(watch_uuid=uuid, data=update_handler.xpath_data)
|
||||
# Always save the screenshot if it's available
|
||||
if update_handler.screenshot:
|
||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=update_handler.screenshot)
|
||||
if update_handler.xpath_data:
|
||||
self.datastore.save_xpath_data(watch_uuid=uuid, data=update_handler.xpath_data)
|
||||
|
||||
|
||||
self.current_uuid = None
|
||||
self.current_uuid = None # Done
|
||||
self.q.task_done()
|
||||
|
||||
# Give the CPU time to interrupt
|
||||
time.sleep(0.1)
|
||||
|
||||
self.app.config.exit.wait(1)
|
||||
|
||||
@@ -33,8 +33,6 @@ bs4
|
||||
# XPath filtering, lxml is required by bs4 anyway, but put it here to be safe.
|
||||
lxml
|
||||
|
||||
PriorityThreadPoolExecutor
|
||||
|
||||
# 3.141 was missing socksVersion, 3.150 was not in pypi, so we try 4.1.0
|
||||
selenium ~= 4.1.0
|
||||
|
||||
|
||||
Reference in New Issue
Block a user