mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-12-02 06:12:36 +00:00
Compare commits
24 Commits
be-sure-di
...
thread-rec
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
56b88624d7 | ||
|
|
c3c0f62662 | ||
|
|
f3c7c969d8 | ||
|
|
1355c2a245 | ||
|
|
96cf1a06df | ||
|
|
019a4a0375 | ||
|
|
db2f7b80ea | ||
|
|
bfabd7b094 | ||
|
|
d92dbfe765 | ||
|
|
67d2441334 | ||
|
|
3c30bc02d5 | ||
|
|
dcb54117d5 | ||
|
|
b1e32275dc | ||
|
|
e2a6865932 | ||
|
|
f04adb7202 | ||
|
|
1193a7f22c | ||
|
|
0b976827bb | ||
|
|
280e916033 | ||
|
|
5494e61a05 | ||
|
|
e461c0b819 | ||
|
|
d67c654f37 | ||
|
|
06ab34b6af | ||
|
|
ba8676c4ba | ||
|
|
4899c1a4f9 |
14
.github/ISSUE_TEMPLATE/bug_report.md
vendored
14
.github/ISSUE_TEMPLATE/bug_report.md
vendored
@@ -7,6 +7,20 @@ assignees: 'dgtlmoon'
|
||||
|
||||
---
|
||||
|
||||
**DO NOT USE THIS FORM TO REPORT THAT A PARTICULAR WEBSITE IS NOT SCRAPING/WATCHING AS EXPECTED**
|
||||
|
||||
This form is only for direct bugs and feature requests todo directly with the software.
|
||||
|
||||
Please report watched websites (full URL and _any_ settings) that do not work with changedetection.io as expected [**IN THE DISCUSSION FORUMS**](https://github.com/dgtlmoon/changedetection.io/discussions) or your report will be deleted
|
||||
|
||||
CONSIDER TAKING OUT A SUBSCRIPTION FOR A SMALL PRICE PER MONTH, YOU GET THE BENEFIT OF USING OUR PAID PROXIES AND FURTHERING THE DEVELOPMENT OF CHANGEDETECTION.IO
|
||||
|
||||
THANK YOU
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
**Describe the bug**
|
||||
A clear and concise description of what the bug is.
|
||||
|
||||
|
||||
36
README.md
36
README.md
@@ -1,23 +1,16 @@
|
||||
# changedetection.io
|
||||
## Web Site Change Detection, Monitoring and Notification.
|
||||
|
||||
Live your data-life pro-actively, track website content changes and receive notifications via Discord, Email, Slack, Telegram and 70+ more
|
||||
|
||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring" title="Self-hosted web page change monitoring" />](https://lemonade.changedetection.io/start)
|
||||
|
||||
[![Release Version][release-shield]][release-link] [![Docker Pulls][docker-pulls]][docker-link] [![License][license-shield]](LICENSE.md)
|
||||
|
||||

|
||||
|
||||
## Web Site Change Detection, Monitoring and Notification - Self-Hosted or SaaS.
|
||||
Know when important content changes, we support notifications via Discord, Telegram, Home-Assistant, Slack, Email and 70+ more
|
||||
|
||||
_Know when web pages change! Stay ontop of new information! get notifications when important website content changes_
|
||||
|
||||
Live your data-life *pro-actively* instead of *re-actively*.
|
||||
|
||||
Free, Open-source web page monitoring, notification and change detection. Don't have time? [**Try our $6.99/month subscription - unlimited checks and watches!**](https://lemonade.changedetection.io/start)
|
||||
|
||||
|
||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring" title="Self-hosted web page change monitoring" />](https://lemonade.changedetection.io/start)
|
||||
|
||||
|
||||
**Get your own private instance now! Let us host it for you!**
|
||||
|
||||
[**Try our $6.99/month subscription - unlimited checks and watches!**](https://lemonade.changedetection.io/start) , _half the price of other website change monitoring services and comes with unlimited watches & checks!_
|
||||
[**Don't have time? Let us host it for you! try our $6.99/month subscription - use our proxies and support!**](https://lemonade.changedetection.io/start) , _half the price of other website change monitoring services and comes with unlimited watches & checks!_
|
||||
|
||||
|
||||
|
||||
@@ -46,7 +39,18 @@ Free, Open-source web page monitoring, notification and change detection. Don't
|
||||
- Monitor HTML source code for unexpected changes, strengthen your PCI compliance
|
||||
- You have a very sensitive list of URLs to watch and you do _not_ want to use the paid alternatives. (Remember, _you_ are the product)
|
||||
|
||||
_Need an actual Chrome runner with Javascript support? We support fetching via WebDriver!</a>_
|
||||
_Need an actual Chrome runner with Javascript support? We support fetching via WebDriver and Playwright!</a>_
|
||||
|
||||
#### Key Features
|
||||
|
||||
- Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions!
|
||||
- Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JsonPath rules
|
||||
- Switch between fast non-JS and Chrome JS based "fetchers"
|
||||
- Easily specify how often a site should be checked
|
||||
- Execute JS before extracting text (Good for logging in, see examples in the UI!)
|
||||
- Override Request Headers, Specify `POST` or `GET` and other methods
|
||||
- Use the "Visual Selector" to help target specific elements
|
||||
|
||||
|
||||
## Screenshots
|
||||
|
||||
|
||||
@@ -18,6 +18,7 @@ import threading
|
||||
import time
|
||||
from copy import deepcopy
|
||||
from threading import Event
|
||||
from PriorityThreadPoolExecutor import PriorityThreadPoolExecutor
|
||||
|
||||
import flask_login
|
||||
import logging
|
||||
@@ -44,17 +45,17 @@ from flask_wtf import CSRFProtect
|
||||
from changedetectionio import html_tools
|
||||
from changedetectionio.api import api_v1
|
||||
|
||||
__version__ = '0.39.17.2'
|
||||
__version__ = '0.39.18'
|
||||
|
||||
datastore = None
|
||||
|
||||
# Local
|
||||
running_update_threads = []
|
||||
running_update_uuids = set()
|
||||
ticker_thread = None
|
||||
|
||||
extra_stylesheets = []
|
||||
|
||||
update_q = queue.PriorityQueue()
|
||||
pool = None
|
||||
|
||||
notification_q = queue.Queue()
|
||||
|
||||
@@ -105,10 +106,9 @@ def init_app_secret(datastore_path):
|
||||
# running or something similar.
|
||||
@app.template_filter('format_last_checked_time')
|
||||
def _jinja2_filter_datetime(watch_obj, format="%Y-%m-%d %H:%M:%S"):
|
||||
# Worker thread tells us which UUID it is currently processing.
|
||||
for t in running_update_threads:
|
||||
if t.current_uuid == watch_obj['uuid']:
|
||||
return '<span class="loader"></span><span> Checking now</span>'
|
||||
|
||||
if watch_obj['uuid'] in running_update_uuids:
|
||||
return '<span class="loader"></span><span> Checking now</span>'
|
||||
|
||||
if watch_obj['last_checked'] == 0:
|
||||
return 'Not yet'
|
||||
@@ -178,13 +178,15 @@ class User(flask_login.UserMixin):
|
||||
|
||||
def changedetection_app(config=None, datastore_o=None):
|
||||
global datastore
|
||||
global pool
|
||||
datastore = datastore_o
|
||||
|
||||
# so far just for read-only via tests, but this will be moved eventually to be the main source
|
||||
# (instead of the global var)
|
||||
app.config['DATASTORE']=datastore_o
|
||||
|
||||
#app.config.update(config or {})
|
||||
pool = PriorityThreadPoolExecutor(max_workers=int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers'])))
|
||||
|
||||
|
||||
login_manager = flask_login.LoginManager(app)
|
||||
login_manager.login_view = 'login'
|
||||
@@ -193,20 +195,17 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
watch_api.add_resource(api_v1.WatchSingleHistory,
|
||||
'/api/v1/watch/<string:uuid>/history/<string:timestamp>',
|
||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||
resource_class_kwargs={'datastore': datastore, 'queue_single_watch': queue_single_watch})
|
||||
|
||||
watch_api.add_resource(api_v1.WatchHistory,
|
||||
'/api/v1/watch/<string:uuid>/history',
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
|
||||
watch_api.add_resource(api_v1.CreateWatch, '/api/v1/watch',
|
||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||
resource_class_kwargs={'datastore': datastore, 'queue_single_watch': queue_single_watch})
|
||||
|
||||
watch_api.add_resource(api_v1.Watch, '/api/v1/watch/<string:uuid>',
|
||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||
|
||||
|
||||
|
||||
resource_class_kwargs={'datastore': datastore, 'queue_single_watch': queue_single_watch})
|
||||
|
||||
|
||||
# Setup cors headers to allow all domains
|
||||
@@ -417,8 +416,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
# Don't link to hosting when we're on the hosting environment
|
||||
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
|
||||
guid=datastore.data['app_guid'],
|
||||
queued_uuids=[uuid for p,uuid in update_q.queue])
|
||||
|
||||
queued_uuids=get_uuids_in_queue())
|
||||
|
||||
if session.get('share-link'):
|
||||
del(session['share-link'])
|
||||
@@ -632,7 +630,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
datastore.needs_write_urgent = True
|
||||
|
||||
# Queue the watch for immediate recheck, with a higher priority
|
||||
update_q.put((1, uuid))
|
||||
queue_single_watch(uuid=uuid, priority=1)
|
||||
|
||||
# Diff page [edit] link should go back to diff page
|
||||
if request.args.get("next") and request.args.get("next") == 'diff':
|
||||
@@ -749,7 +747,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
importer = import_url_list()
|
||||
importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore)
|
||||
for uuid in importer.new_uuids:
|
||||
update_q.put((1, uuid))
|
||||
queue_single_watch(uuid=uuid, priority=1)
|
||||
|
||||
if len(importer.remaining_data) == 0:
|
||||
return redirect(url_for('index'))
|
||||
@@ -762,7 +760,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
d_importer = import_distill_io_json()
|
||||
d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore)
|
||||
for uuid in d_importer.new_uuids:
|
||||
update_q.put((1, uuid))
|
||||
queue_single_watch(uuid=uuid, priority=1)
|
||||
|
||||
|
||||
|
||||
@@ -1107,7 +1105,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
if not add_paused and new_uuid:
|
||||
# Straight into the queue.
|
||||
update_q.put((1, new_uuid))
|
||||
queue_single_watch(uuid=new_uuid, priority=1)
|
||||
flash("Watch added.")
|
||||
|
||||
if add_paused:
|
||||
@@ -1144,7 +1142,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
new_uuid = datastore.clone(uuid)
|
||||
update_q.put((5, new_uuid))
|
||||
queue_single_watch(uuid=uuid, priority=5)
|
||||
flash('Cloned.')
|
||||
|
||||
return redirect(url_for('index'))
|
||||
@@ -1157,35 +1155,59 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
uuid = request.args.get('uuid')
|
||||
i = 0
|
||||
|
||||
running_uuids = []
|
||||
for t in running_update_threads:
|
||||
running_uuids.append(t.current_uuid)
|
||||
|
||||
# @todo check thread is running and skip
|
||||
|
||||
if uuid:
|
||||
if uuid not in running_uuids:
|
||||
update_q.put((1, uuid))
|
||||
if uuid not in get_uuids_in_queue():
|
||||
queue_single_watch(uuid=uuid, priority=1)
|
||||
i = 1
|
||||
|
||||
elif tag != None:
|
||||
# Items that have this current tag
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
if (tag != None and tag in watch['tag']):
|
||||
if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
|
||||
update_q.put((1, watch_uuid))
|
||||
if watch_uuid not in get_uuids_in_queue() and not datastore.data['watching'][watch_uuid]['paused']:
|
||||
queue_single_watch(uuid=watch_uuid, priority=1)
|
||||
i += 1
|
||||
|
||||
else:
|
||||
# No tag, no uuid, add everything.
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
|
||||
if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
|
||||
update_q.put((1, watch_uuid))
|
||||
if watch_uuid not in get_uuids_in_queue() and not datastore.data['watching'][watch_uuid]['paused']:
|
||||
queue_single_watch(uuid=watch_uuid, priority=1)
|
||||
i += 1
|
||||
flash("{} watches are queued for rechecking.".format(i))
|
||||
return redirect(url_for('index', tag=tag))
|
||||
|
||||
@app.route("/form/checkbox-operations", methods=['POST'])
|
||||
@login_required
|
||||
def form_watch_list_checkbox_operations():
|
||||
op = request.form['op']
|
||||
uuids = request.form.getlist('uuids')
|
||||
|
||||
if (op == 'delete'):
|
||||
for uuid in uuids:
|
||||
uuid = uuid.strip()
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.delete(uuid.strip())
|
||||
flash("{} watches deleted".format(len(uuids)))
|
||||
|
||||
if (op == 'pause'):
|
||||
for uuid in uuids:
|
||||
uuid = uuid.strip()
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid.strip()]['paused'] = True
|
||||
|
||||
flash("{} watches paused".format(len(uuids)))
|
||||
|
||||
if (op == 'unpause'):
|
||||
for uuid in uuids:
|
||||
uuid = uuid.strip()
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid.strip()]['paused'] = False
|
||||
flash("{} watches unpaused".format(len(uuids)))
|
||||
|
||||
return redirect(url_for('index'))
|
||||
|
||||
@app.route("/api/share-url", methods=['GET'])
|
||||
@login_required
|
||||
def form_share_put_watch():
|
||||
@@ -1316,33 +1338,31 @@ def notification_runner():
|
||||
# Trim the log length
|
||||
notification_debug_log = notification_debug_log[-100:]
|
||||
|
||||
# Thread runner to check every minute, look for new watches to feed into the Queue.
|
||||
def queue_single_watch(uuid, priority=1):
|
||||
pool.submit(process_single_watch, uuid, priority=int(time.time()) - priority)
|
||||
|
||||
def process_single_watch(uuid):
|
||||
running_update_uuids.add(uuid)
|
||||
from changedetectionio import update_worker
|
||||
worker = update_worker.update_worker(notification_q=notification_q, datastore=datastore)
|
||||
worker.run(uuid)
|
||||
running_update_uuids.remove(uuid)
|
||||
|
||||
def get_uuids_in_queue():
|
||||
return [workitem.args[0] for p, workitem in pool._work_queue.queue]
|
||||
|
||||
# Thread runner to load watch jobs into the queue as they become ready/due for checking again
|
||||
def ticker_thread_check_time_launch_checks():
|
||||
import random
|
||||
from changedetectionio import update_worker
|
||||
|
||||
recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 20))
|
||||
print("System env MINIMUM_SECONDS_RECHECK_TIME", recheck_time_minimum_seconds)
|
||||
|
||||
# Spin up Workers that do the fetching
|
||||
# Can be overriden by ENV or use the default settings
|
||||
n_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
|
||||
for _ in range(n_workers):
|
||||
new_worker = update_worker.update_worker(update_q, notification_q, app, datastore)
|
||||
running_update_threads.append(new_worker)
|
||||
new_worker.start()
|
||||
|
||||
while not app.config.exit.is_set():
|
||||
|
||||
# Get a list of watches by UUID that are currently fetching data
|
||||
running_uuids = []
|
||||
for t in running_update_threads:
|
||||
if t.current_uuid:
|
||||
running_uuids.append(t.current_uuid)
|
||||
|
||||
# Re #232 - Deepcopy the data incase it changes while we're iterating through it all
|
||||
watch_uuid_list = []
|
||||
while True:
|
||||
while not app.config.exit.is_set():
|
||||
try:
|
||||
watch_uuid_list = datastore.data['watching'].keys()
|
||||
except RuntimeError as e:
|
||||
@@ -1352,8 +1372,9 @@ def ticker_thread_check_time_launch_checks():
|
||||
break
|
||||
|
||||
# Re #438 - Don't place more watches in the queue to be checked if the queue is already large
|
||||
while update_q.qsize() >= 2000:
|
||||
time.sleep(1)
|
||||
while pool._work_queue.qsize() >= 2000:
|
||||
if not app.config.exit.is_set():
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
recheck_time_system_seconds = int(datastore.threshold_seconds)
|
||||
@@ -1384,14 +1405,20 @@ def ticker_thread_check_time_launch_checks():
|
||||
|
||||
seconds_since_last_recheck = now - watch['last_checked']
|
||||
if seconds_since_last_recheck >= (threshold + watch.jitter_seconds) and seconds_since_last_recheck >= recheck_time_minimum_seconds:
|
||||
if not uuid in running_uuids and uuid not in [q_uuid for p,q_uuid in update_q.queue]:
|
||||
print("> Queued watch UUID {} last checked at {} queued at {:0.2f} jitter {:0.2f}s, {:0.2f}s since last checked".format(uuid,
|
||||
watch['last_checked'],
|
||||
now,
|
||||
watch.jitter_seconds,
|
||||
now - watch['last_checked']))
|
||||
# Into the queue with you
|
||||
update_q.put((5, uuid))
|
||||
#@todo check 'not in running_uuids'
|
||||
if not uuid and uuid not in get_uuids_in_queue():
|
||||
# Use Epoch time as priority, so we get a "sorted" PriorityQueue, but we can still push a priority 1 into it.
|
||||
priority = int(time.time())
|
||||
print(
|
||||
"> Queued watch UUID {} last checked at {} queued at {:0.2f} priority {} jitter {:0.2f}s, {:0.2f}s since last checked".format(
|
||||
uuid,
|
||||
watch['last_checked'],
|
||||
now,
|
||||
priority,
|
||||
watch.jitter_seconds,
|
||||
now - watch['last_checked']))
|
||||
|
||||
queue_single_watch(uuid=uuid, priority=priority)
|
||||
|
||||
# Reset for next time
|
||||
watch.jitter_seconds = 0
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
from flask_restful import abort, Resource
|
||||
from flask import request, make_response
|
||||
|
||||
import validators
|
||||
|
||||
from . import auth
|
||||
|
||||
|
||||
@@ -11,7 +13,7 @@ class Watch(Resource):
|
||||
def __init__(self, **kwargs):
|
||||
# datastore is a black box dependency
|
||||
self.datastore = kwargs['datastore']
|
||||
self.update_q = kwargs['update_q']
|
||||
self.queue_single_watch = kwargs['queue_single_watch']
|
||||
|
||||
# Get information about a single watch, excluding the history list (can be large)
|
||||
# curl http://localhost:4000/api/v1/watch/<string:uuid>
|
||||
@@ -24,7 +26,7 @@ class Watch(Resource):
|
||||
abort(404, message='No watch exists with the UUID of {}'.format(uuid))
|
||||
|
||||
if request.args.get('recheck'):
|
||||
self.update_q.put((1, uuid))
|
||||
self.queue_single_watch(uuid, priority=1)
|
||||
return "OK", 200
|
||||
|
||||
# Return without history, get that via another API call
|
||||
@@ -86,7 +88,7 @@ class CreateWatch(Resource):
|
||||
def __init__(self, **kwargs):
|
||||
# datastore is a black box dependency
|
||||
self.datastore = kwargs['datastore']
|
||||
self.update_q = kwargs['update_q']
|
||||
self.queue_single_watch = kwargs['queue_single_watch']
|
||||
|
||||
@auth.check_token
|
||||
def post(self):
|
||||
@@ -100,7 +102,7 @@ class CreateWatch(Resource):
|
||||
extras = {'title': json_data['title'].strip()} if json_data.get('title') else {}
|
||||
|
||||
new_uuid = self.datastore.add_watch(url=json_data['url'].strip(), tag=tag, extras=extras)
|
||||
self.update_q.put((1, new_uuid))
|
||||
self.queue_single_watch(new_uuid, priority=1)
|
||||
return {'uuid': new_uuid}, 201
|
||||
|
||||
# Return concise list of available watches and some very basic info
|
||||
@@ -118,7 +120,7 @@ class CreateWatch(Resource):
|
||||
|
||||
if request.args.get('recheck_all'):
|
||||
for uuid in self.datastore.data['watching'].keys():
|
||||
self.update_q.put((1, uuid))
|
||||
self.queue_single_watch(uuid, priority=1)
|
||||
return {'status': "OK"}, 200
|
||||
|
||||
return list, 200
|
||||
|
||||
@@ -31,11 +31,12 @@ class JSActionExceptions(Exception):
|
||||
return
|
||||
|
||||
class PageUnloadable(Exception):
|
||||
def __init__(self, status_code, url, screenshot=False):
|
||||
def __init__(self, status_code, url, screenshot=False, message=False):
|
||||
# Set this so we can use it in other parts of the app
|
||||
self.status_code = status_code
|
||||
self.url = url
|
||||
self.screenshot = screenshot
|
||||
self.message = message
|
||||
return
|
||||
|
||||
class EmptyReply(Exception):
|
||||
@@ -292,7 +293,15 @@ class base_html_playwright(Fetcher):
|
||||
|
||||
# allow per-watch proxy selection override
|
||||
if proxy_override:
|
||||
self.proxy = {'server': proxy_override}
|
||||
# https://playwright.dev/docs/network#http-proxy
|
||||
from urllib.parse import urlparse
|
||||
parsed = urlparse(proxy_override)
|
||||
proxy_url = "{}://{}:{}".format(parsed.scheme, parsed.hostname, parsed.port)
|
||||
self.proxy = {'server': proxy_url}
|
||||
if parsed.username:
|
||||
self.proxy['username'] = parsed.username
|
||||
if parsed.password:
|
||||
self.proxy['password'] = parsed.password
|
||||
|
||||
def run(self,
|
||||
url,
|
||||
@@ -356,7 +365,7 @@ class base_html_playwright(Fetcher):
|
||||
print(str(e))
|
||||
context.close()
|
||||
browser.close()
|
||||
raise PageUnloadable(url=url, status_code=None)
|
||||
raise PageUnloadable(url=url, status_code=None, message=e.message)
|
||||
|
||||
if response is None:
|
||||
context.close()
|
||||
|
||||
@@ -13,6 +13,8 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
# Some common stuff here that can be moved to a base class
|
||||
# (set_proxy_from_list)
|
||||
class perform_site_check():
|
||||
screenshot = None
|
||||
xpath_data = None
|
||||
|
||||
def __init__(self, *args, datastore, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
@@ -127,6 +129,9 @@ class perform_site_check():
|
||||
fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch['css_filter'])
|
||||
fetcher.quit()
|
||||
|
||||
self.screenshot = fetcher.screenshot
|
||||
self.xpath_data = fetcher.xpath_data
|
||||
|
||||
# Fetching complete, now filters
|
||||
# @todo move to class / maybe inside of fetcher abstract base?
|
||||
|
||||
@@ -312,4 +317,4 @@ class perform_site_check():
|
||||
if not watch.get('previous_md5'):
|
||||
watch['previous_md5'] = fetched_md5
|
||||
|
||||
return changed_detected, update_obj, text_content_before_ignored_filter, fetcher.screenshot, fetcher.xpath_data
|
||||
return changed_detected, update_obj, text_content_before_ignored_filter
|
||||
|
||||
@@ -384,7 +384,6 @@ class globalSettingsApplicationForm(commonSettingsForm):
|
||||
global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
|
||||
global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
|
||||
ignore_whitespace = BooleanField('Ignore whitespace')
|
||||
real_browser_save_screenshot = BooleanField('Save last screenshot when using Chrome?')
|
||||
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
|
||||
empty_pages_are_a_change = BooleanField('Treat empty pages as a change?', default=False)
|
||||
render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
|
||||
|
||||
@@ -42,7 +42,6 @@ class model(dict):
|
||||
'notification_title': default_notification_title,
|
||||
'notification_body': default_notification_body,
|
||||
'notification_format': default_notification_format,
|
||||
'real_browser_save_screenshot': True,
|
||||
'schema_version' : 0,
|
||||
'webdriver_delay': None # Extra delay in seconds before extracting text
|
||||
}
|
||||
|
||||
@@ -83,6 +83,12 @@ class model(dict):
|
||||
|
||||
return False
|
||||
|
||||
def ensure_data_dir_exists(self):
|
||||
target_path = os.path.join(self.__datastore_path, self['uuid'])
|
||||
if not os.path.isdir(target_path):
|
||||
print ("> Creating data dir {}".format(target_path))
|
||||
os.mkdir(target_path)
|
||||
|
||||
@property
|
||||
def label(self):
|
||||
# Used for sorting
|
||||
@@ -149,9 +155,7 @@ class model(dict):
|
||||
|
||||
output_path = "{}/{}".format(self.__datastore_path, self['uuid'])
|
||||
|
||||
# Incase the operator deleted it, check and create.
|
||||
if not os.path.isdir(output_path):
|
||||
os.mkdir(output_path)
|
||||
self.ensure_data_dir_exists()
|
||||
|
||||
snapshot_fname = "{}/{}.stripped.txt".format(output_path, uuid.uuid4())
|
||||
logging.debug("Saving history text {}".format(snapshot_fname))
|
||||
|
||||
@@ -38,13 +38,14 @@ docker kill $$-test_selenium
|
||||
|
||||
echo "TESTING WEBDRIVER FETCH > PLAYWRIGHT/BROWSERLESS..."
|
||||
# Not all platforms support playwright (not ARM/rPI), so it's not packaged in requirements.txt
|
||||
pip3 install playwright~=1.22
|
||||
pip3 install playwright~=1.24
|
||||
docker run -d --name $$-test_browserless -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm -p 3000:3000 --shm-size="2g" browserless/chrome:1.53-chrome-stable
|
||||
# takes a while to spin up
|
||||
sleep 5
|
||||
export PLAYWRIGHT_DRIVER_URL=ws://127.0.0.1:3000
|
||||
pytest tests/fetchers/test_content.py
|
||||
pytest tests/test_errorhandling.py
|
||||
pytest tests/visualselector/test_fetch_data.py
|
||||
|
||||
unset PLAYWRIGHT_DRIVER_URL
|
||||
docker kill $$-test_browserless
|
||||
@@ -22,5 +22,18 @@ $(function () {
|
||||
});
|
||||
});
|
||||
|
||||
// checkboxes - check all
|
||||
$("#check-all").click(function (e) {
|
||||
$('input[type=checkbox]').not(this).prop('checked', this.checked);
|
||||
});
|
||||
// checkboxes - show/hide buttons
|
||||
$("input[type=checkbox]").click(function (e) {
|
||||
if ($('input[type=checkbox]:checked').length) {
|
||||
$('#checkbox-operations').slideDown();
|
||||
} else {
|
||||
$('#checkbox-operations').slideUp();
|
||||
}
|
||||
});
|
||||
|
||||
});
|
||||
|
||||
|
||||
@@ -555,3 +555,13 @@ ul {
|
||||
.snapshot-age.error {
|
||||
background-color: #ff0000;
|
||||
color: #fff; }
|
||||
|
||||
#checkbox-operations {
|
||||
background: rgba(0, 0, 0, 0.05);
|
||||
padding: 1em;
|
||||
border-radius: 10px;
|
||||
margin-bottom: 1em;
|
||||
display: none; }
|
||||
|
||||
.checkbox-uuid > * {
|
||||
vertical-align: middle; }
|
||||
|
||||
@@ -774,3 +774,15 @@ ul {
|
||||
}
|
||||
}
|
||||
|
||||
#checkbox-operations {
|
||||
background: rgba(0, 0, 0, 0.05);
|
||||
padding: 1em;
|
||||
border-radius: 10px;
|
||||
margin-bottom: 1em;
|
||||
display: none;
|
||||
}
|
||||
.checkbox-uuid {
|
||||
> * {
|
||||
vertical-align: middle;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@ import threading
|
||||
import time
|
||||
import uuid as uuid_builder
|
||||
from copy import deepcopy
|
||||
from os import mkdir, path, unlink
|
||||
from os import path, unlink
|
||||
from threading import Lock
|
||||
import re
|
||||
import requests
|
||||
@@ -324,12 +324,7 @@ class ChangeDetectionStore:
|
||||
new_watch.update(apply_extras)
|
||||
self.__data['watching'][new_uuid]=new_watch
|
||||
|
||||
# Get the directory ready
|
||||
output_path = "{}/{}".format(self.datastore_path, new_uuid)
|
||||
try:
|
||||
mkdir(output_path)
|
||||
except FileExistsError:
|
||||
print(output_path, "already exists.")
|
||||
self.__data['watching'][new_uuid].ensure_data_dir_exists()
|
||||
|
||||
if write_to_disk_now:
|
||||
self.sync_to_json()
|
||||
@@ -346,29 +341,35 @@ class ChangeDetectionStore:
|
||||
|
||||
# Save as PNG, PNG is larger but better for doing visual diff in the future
|
||||
def save_screenshot(self, watch_uuid, screenshot: bytes, as_error=False):
|
||||
if not self.data['watching'].get(watch_uuid):
|
||||
return
|
||||
|
||||
if as_error:
|
||||
target_path = os.path.join(self.datastore_path, watch_uuid, "last-error-screenshot.png")
|
||||
else:
|
||||
target_path = os.path.join(self.datastore_path, watch_uuid, "last-screenshot.png")
|
||||
|
||||
self.data['watching'][watch_uuid].ensure_data_dir_exists()
|
||||
|
||||
with open(target_path, 'wb') as f:
|
||||
f.write(screenshot)
|
||||
f.close()
|
||||
|
||||
def save_error_text(self, watch_uuid, contents):
|
||||
|
||||
if not self.data['watching'].get(watch_uuid):
|
||||
return
|
||||
target_path = os.path.join(self.datastore_path, watch_uuid, "last-error.txt")
|
||||
|
||||
with open(target_path, 'w') as f:
|
||||
f.write(contents)
|
||||
|
||||
def save_xpath_data(self, watch_uuid, data, as_error=False):
|
||||
|
||||
if not self.data['watching'].get(watch_uuid):
|
||||
return
|
||||
if as_error:
|
||||
target_path = os.path.join(self.datastore_path, watch_uuid, "elements.json")
|
||||
else:
|
||||
target_path = os.path.join(self.datastore_path, watch_uuid, "elements-error.json")
|
||||
else:
|
||||
target_path = os.path.join(self.datastore_path, watch_uuid, "elements.json")
|
||||
|
||||
with open(target_path, 'w') as f:
|
||||
f.write(json.dumps(data))
|
||||
|
||||
@@ -57,6 +57,7 @@
|
||||
</br>
|
||||
{% if is_html_webdriver %}
|
||||
{% if screenshot %}
|
||||
<div class="snapshot-age">{{watch.snapshot_screenshot_ctime|format_timestamp_timeago}}</div>
|
||||
<img style="max-width: 80%" id="screenshot-img" alt="Current screenshot from most recent request"/>
|
||||
{% else %}
|
||||
No screenshot available just yet! Try rechecking the page.
|
||||
|
||||
@@ -69,12 +69,6 @@
|
||||
{{ render_checkbox_field(form.application.form.extract_title_as_title) }}
|
||||
<span class="pure-form-message-inline">Note: This will automatically apply to all existing watches.</span>
|
||||
</div>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.real_browser_save_screenshot) }}
|
||||
<span class="pure-form-message-inline">When using a Chrome browser, a screenshot from the last check will be available on the Diff page</span>
|
||||
</div>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }}
|
||||
<span class="pure-form-message-inline">When a page contains HTML, but no renderable text appears (empty page), is this considered a change?</span>
|
||||
|
||||
@@ -24,6 +24,14 @@
|
||||
</fieldset>
|
||||
<span style="color:#eee; font-size: 80%;"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread-white.svg')}}" /> Tip: You can also add 'shared' watches. <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">More info</a></a></span>
|
||||
</form>
|
||||
|
||||
<form class="pure-form" action="{{ url_for('form_watch_list_checkbox_operations') }}" method="POST" id="watch-list-form">
|
||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
|
||||
<div id="checkbox-operations">
|
||||
<button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="pause">Pause</button>
|
||||
<button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="unpause">UnPause</button>
|
||||
<button class="pure-button button-secondary button-xsmall" style="background: #dd4242; font-size: 70%" name="op" value="delete">Delete</button>
|
||||
</div>
|
||||
<div>
|
||||
<a href="{{url_for('index')}}" class="pure-button button-tag {{'active' if not active_tag }}">All</a>
|
||||
{% for tag in tags %}
|
||||
@@ -41,7 +49,7 @@
|
||||
<table class="pure-table pure-table-striped watch-table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>#</th>
|
||||
<th><input style="vertical-align: middle" type="checkbox" id="check-all"/> #</th>
|
||||
<th></th>
|
||||
{% set link_order = "desc" if sort_order else "asc" %}
|
||||
{% set arrow_span = "" %}
|
||||
@@ -66,7 +74,7 @@
|
||||
{% if watch.paused is defined and watch.paused != False %}paused{% endif %}
|
||||
{% if watch.newest_history_key| int > watch.last_viewed and watch.history_n>=2 %}unviewed{% endif %}
|
||||
{% if watch.uuid in queued_uuids %}queued{% endif %}">
|
||||
<td class="inline">{{ loop.index }}</td>
|
||||
<td class="inline checkbox-uuid" ><input name="uuids" type="checkbox" value="{{ watch.uuid}} "/> <span>{{ loop.index }}</span></td>
|
||||
<td class="inline watch-controls">
|
||||
<a class="state-{{'on' if watch.paused }}" href="{{url_for('index', op='pause', uuid=watch.uuid, tag=active_tag)}}"><img src="{{url_for('static_content', group='images', filename='pause.svg')}}" alt="Pause checks" title="Pause checks"/></a>
|
||||
<a class="state-{{'on' if watch.notification_muted}}" href="{{url_for('index', op='mute', uuid=watch.uuid, tag=active_tag)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notifications" title="Mute notifications"/></a>
|
||||
@@ -129,5 +137,6 @@
|
||||
#}
|
||||
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
{% endblock %}
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from ..util import live_server_setup
|
||||
from ..util import live_server_setup, wait_for_all_checks
|
||||
import logging
|
||||
|
||||
|
||||
@@ -29,14 +29,8 @@ def test_fetch_webdriver_content(client, live_server):
|
||||
|
||||
assert b"1 Imported" in res.data
|
||||
time.sleep(3)
|
||||
attempt = 0
|
||||
while attempt < 20:
|
||||
res = client.get(url_for("index"))
|
||||
if not b'Checking now' in res.data:
|
||||
break
|
||||
logging.getLogger().info("Waiting for check to not say 'Checking now'..")
|
||||
time.sleep(3)
|
||||
attempt += 1
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
|
||||
res = client.get(
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
|
||||
from flask import make_response, request
|
||||
from flask import url_for
|
||||
import logging
|
||||
import time
|
||||
|
||||
def set_original_response():
|
||||
test_return_data = """<html>
|
||||
@@ -68,6 +70,31 @@ def extract_api_key_from_UI(client):
|
||||
api_key = m.group(1)
|
||||
return api_key.strip()
|
||||
|
||||
|
||||
# kinda funky, but works for now
|
||||
def extract_UUID_from_client(client):
|
||||
import re
|
||||
res = client.get(
|
||||
url_for("index"),
|
||||
)
|
||||
# <span id="api-key">{{api_key}}</span>
|
||||
|
||||
m = re.search('edit/(.+?)"', str(res.data))
|
||||
uuid = m.group(1)
|
||||
return uuid.strip()
|
||||
|
||||
def wait_for_all_checks(client):
|
||||
# Loop waiting until done..
|
||||
attempt=0
|
||||
while attempt < 60:
|
||||
time.sleep(1)
|
||||
res = client.get(url_for("index"))
|
||||
if not b'Checking now' in res.data:
|
||||
break
|
||||
logging.getLogger().info("Waiting for watch-list to not say 'Checking now'.. {}".format(attempt))
|
||||
|
||||
attempt += 1
|
||||
|
||||
def live_server_setup(live_server):
|
||||
|
||||
@live_server.app.route('/test-endpoint')
|
||||
@@ -133,3 +160,4 @@ def live_server_setup(live_server):
|
||||
return ret
|
||||
|
||||
live_server.start()
|
||||
|
||||
|
||||
2
changedetectionio/tests/visualselector/__init__.py
Normal file
2
changedetectionio/tests/visualselector/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
"""Tests for the app."""
|
||||
|
||||
3
changedetectionio/tests/visualselector/conftest.py
Normal file
3
changedetectionio/tests/visualselector/conftest.py
Normal file
@@ -0,0 +1,3 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
from .. import conftest
|
||||
35
changedetectionio/tests/visualselector/test_fetch_data.py
Normal file
35
changedetectionio/tests/visualselector/test_fetch_data.py
Normal file
@@ -0,0 +1,35 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
|
||||
|
||||
# Add a site in paused mode, add an invalid filter, we should still have visual selector data ready
|
||||
def test_visual_selector_content_ready(client, live_server):
|
||||
import os
|
||||
|
||||
assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test"
|
||||
live_server_setup(live_server)
|
||||
time.sleep(1)
|
||||
|
||||
# Add our URL to the import page, maybe better to use something we control?
|
||||
# We use an external URL because the docker container is too difficult to setup to connect back to the pytest socket
|
||||
test_url = 'https://news.ycombinator.com'
|
||||
res = client.post(
|
||||
url_for("form_quick_watch_add"),
|
||||
data={"url": test_url, "tag": '', 'edit_and_watch_submit_button': 'Edit > Watch'},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Watch added in Paused state, saving will unpause" in res.data
|
||||
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first", unpause_on_save=1),
|
||||
data={"css_filter": ".does-not-exist", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_webdriver"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"unpaused" in res.data
|
||||
time.sleep(1)
|
||||
wait_for_all_checks(client)
|
||||
uuid = extract_UUID_from_client(client)
|
||||
assert os.path.isfile(os.path.join('test-datastore', uuid, 'last-screenshot.png')), "last-screenshot.png should exist"
|
||||
assert os.path.isfile(os.path.join('test-datastore', uuid, 'elements.json')), "xpath elements.json data should exist"
|
||||
@@ -1,8 +1,7 @@
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
import queue
|
||||
import time
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
from changedetectionio import content_fetcher
|
||||
from changedetectionio.html_tools import FilterNotFoundInResponse
|
||||
|
||||
@@ -12,15 +11,12 @@ from changedetectionio.html_tools import FilterNotFoundInResponse
|
||||
# (another process inserts watches into the queue that are time-ready for checking)
|
||||
|
||||
|
||||
class update_worker(threading.Thread):
|
||||
class update_worker():
|
||||
current_uuid = None
|
||||
|
||||
def __init__(self, q, notification_q, app, datastore, *args, **kwargs):
|
||||
self.q = q
|
||||
self.app = app
|
||||
def __init__(self, notification_q, datastore):
|
||||
self.notification_q = notification_q
|
||||
self.datastore = datastore
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def send_content_changed_notification(self, t, watch_uuid):
|
||||
|
||||
@@ -116,180 +112,168 @@ class update_worker(threading.Thread):
|
||||
if os.path.isfile(full_path):
|
||||
os.unlink(full_path)
|
||||
|
||||
def run(self):
|
||||
def run(self, uuid):
|
||||
from changedetectionio import fetch_site_status
|
||||
|
||||
update_handler = fetch_site_status.perform_site_check(datastore=self.datastore)
|
||||
|
||||
while not self.app.config.exit.is_set():
|
||||
self.current_uuid = uuid
|
||||
|
||||
if uuid in list(self.datastore.data['watching'].keys()):
|
||||
changed_detected = False
|
||||
contents = b''
|
||||
screenshot = False
|
||||
update_obj= {}
|
||||
xpath_data = False
|
||||
process_changedetection_results = True
|
||||
print("> Processing UUID {} Priority {} URL {}".format(uuid, 1, self.datastore.data['watching'][uuid]['url']))
|
||||
now = time.time()
|
||||
|
||||
try:
|
||||
priority, uuid = self.q.get(block=False)
|
||||
except queue.Empty:
|
||||
pass
|
||||
changed_detected, update_obj, contents = update_handler.run(uuid)
|
||||
# Re #342
|
||||
# In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
|
||||
# We then convert/.decode('utf-8') for the notification etc
|
||||
if not isinstance(contents, (bytes, bytearray)):
|
||||
raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
|
||||
except PermissionError as e:
|
||||
logging.error("File permission error updating", uuid, str(e))
|
||||
process_changedetection_results = False
|
||||
except content_fetcher.ReplyWithContentButNoText as e:
|
||||
# Totally fine, it's by choice - just continue on, nothing more to care about
|
||||
# Page had elements/content but no renderable text
|
||||
# Backend (not filters) gave zero output
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found (With {} reply code).".format(e.status_code)})
|
||||
if e.screenshot:
|
||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot)
|
||||
process_changedetection_results = False
|
||||
|
||||
except content_fetcher.Non200ErrorCodeReceived as e:
|
||||
if e.status_code == 403:
|
||||
err_text = "Error - 403 (Access denied) received"
|
||||
elif e.status_code == 404:
|
||||
err_text = "Error - 404 (Page not found) received"
|
||||
elif e.status_code == 500:
|
||||
err_text = "Error - 500 (Internal server Error) received"
|
||||
else:
|
||||
err_text = "Error - Request returned a HTTP error code {}".format(str(e.status_code))
|
||||
|
||||
if e.screenshot:
|
||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
|
||||
if e.xpath_data:
|
||||
self.datastore.save_xpath_data(watch_uuid=uuid, data=e.xpath_data, as_error=True)
|
||||
if e.page_text:
|
||||
self.datastore.save_error_text(watch_uuid=uuid, contents=e.page_text)
|
||||
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
# So that we get a trigger when the content is added again
|
||||
'previous_md5': ''})
|
||||
process_changedetection_results = False
|
||||
|
||||
except FilterNotFoundInResponse as e:
|
||||
err_text = "Warning, filter '{}' not found".format(str(e))
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
# So that we get a trigger when the content is added again
|
||||
'previous_md5': ''})
|
||||
|
||||
# Only when enabled, send the notification
|
||||
if self.datastore.data['watching'][uuid].get('filter_failure_notification_send', False):
|
||||
c = self.datastore.data['watching'][uuid].get('consecutive_filter_failures', 5)
|
||||
c += 1
|
||||
# Send notification if we reached the threshold?
|
||||
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
|
||||
0)
|
||||
print("Filter for {} not found, consecutive_filter_failures: {}".format(uuid, c))
|
||||
if threshold > 0 and c >= threshold:
|
||||
if not self.datastore.data['watching'][uuid].get('notification_muted'):
|
||||
self.send_filter_failure_notification(uuid)
|
||||
c = 0
|
||||
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c})
|
||||
|
||||
process_changedetection_results = True
|
||||
|
||||
except content_fetcher.EmptyReply as e:
|
||||
# Some kind of custom to-str handler in the exception handler that does this?
|
||||
err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code)
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
except content_fetcher.ScreenshotUnavailable as e:
|
||||
err_text = "Screenshot unavailable, page did not render fully in the expected time - try increasing 'Wait seconds before extracting text'"
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
process_changedetection_results = False
|
||||
except content_fetcher.JSActionExceptions as e:
|
||||
err_text = "Error running JS Actions - Page request - "+e.message
|
||||
if e.screenshot:
|
||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
except content_fetcher.PageUnloadable as e:
|
||||
err_text = "Page request from server didnt respond correctly"
|
||||
if e.message:
|
||||
err_text = "{} - {}".format(err_text, e.message)
|
||||
|
||||
if e.screenshot:
|
||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
|
||||
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
except Exception as e:
|
||||
logging.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
|
||||
# Other serious error
|
||||
process_changedetection_results = False
|
||||
else:
|
||||
self.current_uuid = uuid
|
||||
# Crash protection, the watch entry could have been removed by this point (during a slow chrome fetch etc)
|
||||
if not self.datastore.data['watching'].get(uuid):
|
||||
return
|
||||
|
||||
if uuid in list(self.datastore.data['watching'].keys()):
|
||||
changed_detected = False
|
||||
contents = b''
|
||||
screenshot = False
|
||||
update_obj= {}
|
||||
xpath_data = False
|
||||
process_changedetection_results = True
|
||||
print("> Processing UUID {} Priority {} URL {}".format(uuid, priority, self.datastore.data['watching'][uuid]['url']))
|
||||
now = time.time()
|
||||
# Mark that we never had any failures
|
||||
if not self.datastore.data['watching'][uuid].get('ignore_status_codes'):
|
||||
update_obj['consecutive_filter_failures'] = 0
|
||||
|
||||
try:
|
||||
changed_detected, update_obj, contents, screenshot, xpath_data = update_handler.run(uuid)
|
||||
# Re #342
|
||||
# In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
|
||||
# We then convert/.decode('utf-8') for the notification etc
|
||||
if not isinstance(contents, (bytes, bytearray)):
|
||||
raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
|
||||
except PermissionError as e:
|
||||
self.app.logger.error("File permission error updating", uuid, str(e))
|
||||
process_changedetection_results = False
|
||||
except content_fetcher.ReplyWithContentButNoText as e:
|
||||
# Totally fine, it's by choice - just continue on, nothing more to care about
|
||||
# Page had elements/content but no renderable text
|
||||
# Backend (not filters) gave zero output
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found (With {} reply code).".format(e.status_code)})
|
||||
if e.screenshot:
|
||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot)
|
||||
process_changedetection_results = False
|
||||
self.cleanup_error_artifacts(uuid)
|
||||
|
||||
except content_fetcher.Non200ErrorCodeReceived as e:
|
||||
if e.status_code == 403:
|
||||
err_text = "Error - 403 (Access denied) received"
|
||||
elif e.status_code == 404:
|
||||
err_text = "Error - 404 (Page not found) received"
|
||||
elif e.status_code == 500:
|
||||
err_text = "Error - 500 (Internal server Error) received"
|
||||
else:
|
||||
err_text = "Error - Request returned a HTTP error code {}".format(str(e.status_code))
|
||||
# Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc
|
||||
if process_changedetection_results:
|
||||
try:
|
||||
watch = self.datastore.data['watching'][uuid]
|
||||
fname = "" # Saved history text filename
|
||||
|
||||
if e.screenshot:
|
||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
|
||||
if e.xpath_data:
|
||||
self.datastore.save_xpath_data(watch_uuid=uuid, data=e.xpath_data, as_error=True)
|
||||
if e.page_text:
|
||||
self.datastore.save_error_text(watch_uuid=uuid, contents=e.page_text)
|
||||
# For the FIRST time we check a site, or a change detected, save the snapshot.
|
||||
if changed_detected or not watch['last_checked']:
|
||||
# A change was detected
|
||||
watch.save_history_text(contents=contents, timestamp=str(round(time.time())))
|
||||
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
# So that we get a trigger when the content is added again
|
||||
'previous_md5': ''})
|
||||
process_changedetection_results = False
|
||||
self.datastore.update_watch(uuid=uuid, update_obj=update_obj)
|
||||
|
||||
except FilterNotFoundInResponse as e:
|
||||
err_text = "Warning, filter '{}' not found".format(str(e))
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
# So that we get a trigger when the content is added again
|
||||
'previous_md5': ''})
|
||||
# A change was detected
|
||||
if changed_detected:
|
||||
print (">> Change detected in UUID {} - {}".format(uuid, watch['url']))
|
||||
|
||||
# Only when enabled, send the notification
|
||||
if self.datastore.data['watching'][uuid].get('filter_failure_notification_send', False):
|
||||
c = self.datastore.data['watching'][uuid].get('consecutive_filter_failures', 5)
|
||||
c += 1
|
||||
# Send notification if we reached the threshold?
|
||||
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
|
||||
0)
|
||||
print("Filter for {} not found, consecutive_filter_failures: {}".format(uuid, c))
|
||||
if threshold > 0 and c >= threshold:
|
||||
if not self.datastore.data['watching'][uuid].get('notification_muted'):
|
||||
self.send_filter_failure_notification(uuid)
|
||||
c = 0
|
||||
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c})
|
||||
|
||||
process_changedetection_results = True
|
||||
|
||||
except content_fetcher.EmptyReply as e:
|
||||
# Some kind of custom to-str handler in the exception handler that does this?
|
||||
err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code)
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
except content_fetcher.ScreenshotUnavailable as e:
|
||||
err_text = "Screenshot unavailable, page did not render fully in the expected time - try increasing 'Wait seconds before extracting text'"
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
process_changedetection_results = False
|
||||
except content_fetcher.JSActionExceptions as e:
|
||||
err_text = "Error running JS Actions - Page request - "+e.message
|
||||
if e.screenshot:
|
||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
except content_fetcher.PageUnloadable as e:
|
||||
err_text = "Page request from server didnt respond correctly"
|
||||
if e.screenshot:
|
||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
|
||||
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
except Exception as e:
|
||||
self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
|
||||
# Other serious error
|
||||
process_changedetection_results = False
|
||||
else:
|
||||
|
||||
# Mark that we never had any failures
|
||||
if not self.datastore.data['watching'][uuid].get('ignore_status_codes'):
|
||||
update_obj['consecutive_filter_failures'] = 0
|
||||
|
||||
self.cleanup_error_artifacts(uuid)
|
||||
|
||||
# Crash protection, the watch entry could have been removed by this point (during a slow chrome fetch etc)
|
||||
if not self.datastore.data['watching'].get(uuid):
|
||||
continue
|
||||
|
||||
# Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc
|
||||
if process_changedetection_results:
|
||||
try:
|
||||
watch = self.datastore.data['watching'][uuid]
|
||||
fname = "" # Saved history text filename
|
||||
|
||||
# For the FIRST time we check a site, or a change detected, save the snapshot.
|
||||
if changed_detected or not watch['last_checked']:
|
||||
# A change was detected
|
||||
watch.save_history_text(contents=contents, timestamp=str(round(time.time())))
|
||||
|
||||
self.datastore.update_watch(uuid=uuid, update_obj=update_obj)
|
||||
|
||||
# A change was detected
|
||||
if changed_detected:
|
||||
print (">> Change detected in UUID {} - {}".format(uuid, watch['url']))
|
||||
|
||||
# Notifications should only trigger on the second time (first time, we gather the initial snapshot)
|
||||
if watch.history_n >= 2:
|
||||
if not self.datastore.data['watching'][uuid].get('notification_muted'):
|
||||
self.send_content_changed_notification(self, watch_uuid=uuid)
|
||||
# Notifications should only trigger on the second time (first time, we gather the initial snapshot)
|
||||
if watch.history_n >= 2:
|
||||
if not self.datastore.data['watching'][uuid].get('notification_muted'):
|
||||
self.send_content_changed_notification(self, watch_uuid=uuid)
|
||||
|
||||
|
||||
except Exception as e:
|
||||
# Catch everything possible here, so that if a worker crashes, we don't lose it until restart!
|
||||
print("!!!! Exception in update_worker !!!\n", e)
|
||||
self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
|
||||
except Exception as e:
|
||||
# Catch everything possible here, so that if a worker crashes, we don't lose it until restart!
|
||||
print("!!!! Exception in update_worker !!!\n", e)
|
||||
logging.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
|
||||
|
||||
|
||||
# Always record that we atleast tried
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
|
||||
'last_checked': round(time.time())})
|
||||
# Always record that we atleast tried
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
|
||||
'last_checked': round(time.time())})
|
||||
|
||||
# Always save the screenshot if it's available
|
||||
if screenshot:
|
||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=screenshot)
|
||||
if xpath_data:
|
||||
self.datastore.save_xpath_data(watch_uuid=uuid, data=xpath_data)
|
||||
# Always save the screenshot if it's available
|
||||
if update_handler.screenshot:
|
||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=update_handler.screenshot)
|
||||
if update_handler.xpath_data:
|
||||
self.datastore.save_xpath_data(watch_uuid=uuid, data=update_handler.xpath_data)
|
||||
|
||||
|
||||
self.current_uuid = None # Done
|
||||
self.q.task_done()
|
||||
|
||||
# Give the CPU time to interrupt
|
||||
time.sleep(0.1)
|
||||
|
||||
self.app.config.exit.wait(1)
|
||||
self.current_uuid = None
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 190 KiB After Width: | Height: | Size: 209 KiB |
@@ -33,6 +33,8 @@ bs4
|
||||
# XPath filtering, lxml is required by bs4 anyway, but put it here to be safe.
|
||||
lxml
|
||||
|
||||
PriorityThreadPoolExecutor
|
||||
|
||||
# 3.141 was missing socksVersion, 3.150 was not in pypi, so we try 4.1.0
|
||||
selenium ~= 4.1.0
|
||||
|
||||
|
||||
Reference in New Issue
Block a user