Compare commits

..

16 Commits

Author SHA1 Message Date
dgtlmoon
7b664c43ea Oops 2023-02-11 16:29:15 +01:00
dgtlmoon
d4eb9f2b64 Use Loguru for logging 2023-02-11 16:26:09 +01:00
dgtlmoon
7b8b50138b Deleting a watch now removes the entire watch storage directory (#1408) 2023-02-11 14:10:54 +01:00
dgtlmoon
01af21f856 Use year/date in the backup snapshot zip filename instead of epoch seconds (#1377 #1407) 2023-02-11 13:44:16 +01:00
dgtlmoon
f7f4ab314b PDF text conversion - fix bug where it detected a site as a PDF file incorrectly Re #1392 #1393 2023-02-08 09:32:57 +01:00
dgtlmoon
ce0355c0ad Remove unused code (#1394) 2023-02-08 09:32:15 +01:00
dgtlmoon
0f43213d9d UI - preview page - Fix bug where playwright/chrome was system default and [preview] didnt show snapshot 2023-02-07 16:55:34 +01:00
dgtlmoon
93c57d9fad Adding example docker-compose.yml config to ignore errors from self-signed certs #1389 2023-02-06 17:24:12 +01:00
dgtlmoon
3cdd075baf 0.40.2 2023-02-03 19:20:13 +01:00
dgtlmoon
5c617e8530 Code cleanup - remove unused import 2023-02-03 18:35:58 +01:00
dgtlmoon
1a48965ba1 UI fix - Fix logic for showing screenshot on diff page (#1379) 2023-02-03 11:23:48 +01:00
dgtlmoon
41856c4ed8 Re #1365 - Playwright - Browser "Service Workers" should be enabled by default but unset via env var PLAYWRIGHT_SERVICE_WORKERS=block (#1367) 2023-02-01 20:50:40 +01:00
dgtlmoon
0ed897c50f New setting to allow passwordless access to your 'diff' page - perfect for sharing your diff page securely, refactored login code (#1357) 2023-01-29 22:36:55 +01:00
dgtlmoon
f8e587c415 Security - Possible stored XSS in watch list - Only permit HTTP/HTTP/FTP by default - override with env var SAFE_PROTOCOL_REGEX (#1359) 2023-01-29 11:12:06 +01:00
dgtlmoon
d47a25eb6d Playwright - Removing old bug fix where playwright needed screenshot called twice to make the full screen screenshot be actually fullscreen (#1356) 2023-01-28 15:02:53 +01:00
dgtlmoon
9a0792d185 Fetch backend UI default fixes for VisualSelector and BrowserSteps (#1344) 2023-01-25 19:47:54 +01:00
24 changed files with 391 additions and 244 deletions

View File

@@ -67,10 +67,10 @@ jobs:
sleep 3 sleep 3
# Should return 0 (no error) when grep finds it # Should return 0 (no error) when grep finds it
curl -s http://localhost:5556 |grep -q checkbox-uuid curl -s http://localhost:5556 |grep -q checkbox-uuid
curl -s http://localhost:5556/rss|grep -q rss-specification
# and IPv6 # and IPv6
curl -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid curl -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid
curl -s -g -6 "http://[::1]:5556/rss"|grep -q rss-specification
#export WEBDRIVER_URL=http://localhost:4444/wd/hub #export WEBDRIVER_URL=http://localhost:4444/wd/hub
#pytest tests/fetchers/test_content.py #pytest tests/fetchers/test_content.py

View File

@@ -1,8 +1,18 @@
#!/usr/bin/python3 #!/usr/bin/python3
from changedetectionio import queuedWatchMetaData
from copy import deepcopy
from distutils.util import strtobool
from feedgen.feed import FeedGenerator
from flask_compress import Compress as FlaskCompress
from flask_login import current_user
from flask_restful import abort, Api
from flask_wtf import CSRFProtect
from functools import wraps
from threading import Event
import datetime import datetime
import flask_login import flask_login
import logging from loguru import logger
import os import os
import pytz import pytz
import queue import queue
@@ -10,12 +20,6 @@ import threading
import time import time
import timeago import timeago
from changedetectionio import queuedWatchMetaData
from copy import deepcopy
from distutils.util import strtobool
from feedgen.feed import FeedGenerator
from threading import Event
from flask import ( from flask import (
Flask, Flask,
abort, abort,
@@ -28,15 +32,11 @@ from flask import (
session, session,
url_for, url_for,
) )
from flask_compress import Compress as FlaskCompress
from flask_login import login_required
from flask_restful import abort, Api
from flask_wtf import CSRFProtect
from changedetectionio import html_tools from changedetectionio import html_tools
from changedetectionio.api import api_v1 from changedetectionio.api import api_v1
__version__ = '0.40.1.1' __version__ = '0.40.2'
datastore = None datastore = None
@@ -53,7 +53,6 @@ app = Flask(__name__,
static_url_path="", static_url_path="",
static_folder="static", static_folder="static",
template_folder="templates") template_folder="templates")
from flask_compress import Compress
# Super handy for compressing large BrowserSteps responses and others # Super handy for compressing large BrowserSteps responses and others
FlaskCompress(app) FlaskCompress(app)
@@ -65,8 +64,6 @@ app.config.exit = Event()
app.config['NEW_VERSION_AVAILABLE'] = False app.config['NEW_VERSION_AVAILABLE'] = False
app.config['LOGIN_DISABLED'] = False
#app.config["EXPLAIN_TEMPLATE_LOADING"] = True #app.config["EXPLAIN_TEMPLATE_LOADING"] = True
# Disables caching of the templates # Disables caching of the templates
@@ -74,7 +71,6 @@ app.config['TEMPLATES_AUTO_RELOAD'] = True
app.jinja_env.add_extension('jinja2.ext.loopcontrols') app.jinja_env.add_extension('jinja2.ext.loopcontrols')
csrf = CSRFProtect() csrf = CSRFProtect()
csrf.init_app(app) csrf.init_app(app)
notification_debug_log=[] notification_debug_log=[]
watch_api = Api(app, decorators=[csrf.exempt]) watch_api = Api(app, decorators=[csrf.exempt])
@@ -149,7 +145,6 @@ class User(flask_login.UserMixin):
# Compare given password against JSON store or Env var # Compare given password against JSON store or Env var
def check_password(self, password): def check_password(self, password):
import base64 import base64
import hashlib import hashlib
@@ -157,11 +152,9 @@ class User(flask_login.UserMixin):
raw_salt_pass = os.getenv("SALTED_PASS", False) raw_salt_pass = os.getenv("SALTED_PASS", False)
if not raw_salt_pass: if not raw_salt_pass:
raw_salt_pass = datastore.data['settings']['application']['password'] raw_salt_pass = datastore.data['settings']['application'].get('password')
raw_salt_pass = base64.b64decode(raw_salt_pass) raw_salt_pass = base64.b64decode(raw_salt_pass)
salt_from_storage = raw_salt_pass[:32] # 32 is the length of the salt salt_from_storage = raw_salt_pass[:32] # 32 is the length of the salt
# Use the exact same setup you used to generate the key, but this time put in the password to check # Use the exact same setup you used to generate the key, but this time put in the password to check
@@ -171,21 +164,44 @@ class User(flask_login.UserMixin):
salt_from_storage, salt_from_storage,
100000 100000
) )
new_key = salt_from_storage + new_key new_key = salt_from_storage + new_key
return new_key == raw_salt_pass return new_key == raw_salt_pass
pass pass
def login_optionally_required(func):
@wraps(func)
def decorated_view(*args, **kwargs):
has_password_enabled = datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False)
# Permitted
if request.endpoint == 'static_content' and request.view_args['group'] == 'styles':
return func(*args, **kwargs)
# Permitted
elif request.endpoint == 'diff_history_page' and datastore.data['settings']['application'].get('shared_diff_access'):
return func(*args, **kwargs)
elif request.method in flask_login.config.EXEMPT_METHODS:
return func(*args, **kwargs)
elif app.config.get('LOGIN_DISABLED'):
return func(*args, **kwargs)
elif has_password_enabled and not current_user.is_authenticated:
return app.login_manager.unauthorized()
return func(*args, **kwargs)
return decorated_view
def changedetection_app(config=None, datastore_o=None): def changedetection_app(config=None, datastore_o=None):
global datastore global datastore
datastore = datastore_o datastore = datastore_o
# so far just for read-only via tests, but this will be moved eventually to be the main source # so far just for read-only via tests, but this will be moved eventually to be the main source
# (instead of the global var) # (instead of the global var)
app.config['DATASTORE']=datastore_o app.config['DATASTORE'] = datastore_o
#app.config.update(config or {})
login_manager = flask_login.LoginManager(app) login_manager = flask_login.LoginManager(app)
login_manager.login_view = 'login' login_manager.login_view = 'login'
@@ -213,6 +229,8 @@ def changedetection_app(config=None, datastore_o=None):
# https://flask-cors.readthedocs.io/en/latest/ # https://flask-cors.readthedocs.io/en/latest/
# CORS(app) # CORS(app)
@login_manager.user_loader @login_manager.user_loader
def user_loader(email): def user_loader(email):
user = User() user = User()
@@ -221,7 +239,7 @@ def changedetection_app(config=None, datastore_o=None):
@login_manager.unauthorized_handler @login_manager.unauthorized_handler
def unauthorized_handler(): def unauthorized_handler():
# @todo validate its a URL of this host and use that flash("You must be logged in, please log in.", 'error')
return redirect(url_for('login', next=url_for('index'))) return redirect(url_for('login', next=url_for('index')))
@app.route('/logout') @app.route('/logout')
@@ -234,10 +252,6 @@ def changedetection_app(config=None, datastore_o=None):
@app.route('/login', methods=['GET', 'POST']) @app.route('/login', methods=['GET', 'POST'])
def login(): def login():
if not datastore.data['settings']['application']['password'] and not os.getenv("SALTED_PASS", False):
flash("Login not required, no password enabled.", "notice")
return redirect(url_for('index'))
if request.method == 'GET': if request.method == 'GET':
if flask_login.current_user.is_authenticated: if flask_login.current_user.is_authenticated:
flash("Already logged in") flash("Already logged in")
@@ -272,27 +286,22 @@ def changedetection_app(config=None, datastore_o=None):
return redirect(url_for('login')) return redirect(url_for('login'))
@app.before_request @app.before_request
def do_something_whenever_a_request_comes_in(): def before_request_handle_cookie_x_settings():
# Disable password login if there is not one set
# (No password in settings or env var)
app.config['LOGIN_DISABLED'] = datastore.data['settings']['application']['password'] == False and os.getenv("SALTED_PASS", False) == False
# Set the auth cookie path if we're running as X-settings/X-Forwarded-Prefix # Set the auth cookie path if we're running as X-settings/X-Forwarded-Prefix
if os.getenv('USE_X_SETTINGS') and 'X-Forwarded-Prefix' in request.headers: if os.getenv('USE_X_SETTINGS') and 'X-Forwarded-Prefix' in request.headers:
app.config['REMEMBER_COOKIE_PATH'] = request.headers['X-Forwarded-Prefix'] app.config['REMEMBER_COOKIE_PATH'] = request.headers['X-Forwarded-Prefix']
app.config['SESSION_COOKIE_PATH'] = request.headers['X-Forwarded-Prefix'] app.config['SESSION_COOKIE_PATH'] = request.headers['X-Forwarded-Prefix']
# For the RSS path, allow access via a token return None
if request.path == '/rss' and request.args.get('token'):
app_rss_token = datastore.data['settings']['application']['rss_access_token']
rss_url_token = request.args.get('token')
if app_rss_token == rss_url_token:
app.config['LOGIN_DISABLED'] = True
@app.route("/rss", methods=['GET']) @app.route("/rss", methods=['GET'])
@login_required
def rss(): def rss():
# Always requires token set
app_rss_token = datastore.data['settings']['application'].get('rss_access_token')
rss_url_token = request.args.get('token')
if rss_url_token != app_rss_token:
return "Access denied, bad token", 403
from . import diff from . import diff
limit_tag = request.args.get('tag') limit_tag = request.args.get('tag')
@@ -366,7 +375,7 @@ def changedetection_app(config=None, datastore_o=None):
return response return response
@app.route("/", methods=['GET']) @app.route("/", methods=['GET'])
@login_required @login_optionally_required
def index(): def index():
from changedetectionio import forms from changedetectionio import forms
@@ -430,7 +439,7 @@ def changedetection_app(config=None, datastore_o=None):
# AJAX endpoint for sending a test # AJAX endpoint for sending a test
@app.route("/notification/send-test", methods=['POST']) @app.route("/notification/send-test", methods=['POST'])
@login_required @login_optionally_required
def ajax_callback_send_notification_test(): def ajax_callback_send_notification_test():
import apprise import apprise
@@ -463,7 +472,7 @@ def changedetection_app(config=None, datastore_o=None):
@app.route("/clear_history/<string:uuid>", methods=['GET']) @app.route("/clear_history/<string:uuid>", methods=['GET'])
@login_required @login_optionally_required
def clear_watch_history(uuid): def clear_watch_history(uuid):
try: try:
datastore.clear_watch_history(uuid) datastore.clear_watch_history(uuid)
@@ -475,7 +484,7 @@ def changedetection_app(config=None, datastore_o=None):
return redirect(url_for('index')) return redirect(url_for('index'))
@app.route("/clear_history", methods=['GET', 'POST']) @app.route("/clear_history", methods=['GET', 'POST'])
@login_required @login_optionally_required
def clear_all_history(): def clear_all_history():
if request.method == 'POST': if request.method == 'POST':
@@ -496,43 +505,8 @@ def changedetection_app(config=None, datastore_o=None):
output = render_template("clear_all_history.html") output = render_template("clear_all_history.html")
return output return output
# If they edited an existing watch, we need to know to reset the current/previous md5 to include
# the excluded text.
def get_current_checksum_include_ignore_text(uuid):
import hashlib
from changedetectionio import fetch_site_status
# Get the most recent one
newest_history_key = datastore.data['watching'][uuid].get('newest_history_key')
# 0 means that theres only one, so that there should be no 'unviewed' history available
if newest_history_key == 0:
newest_history_key = list(datastore.data['watching'][uuid].history.keys())[0]
if newest_history_key:
with open(datastore.data['watching'][uuid].history[newest_history_key],
encoding='utf-8') as file:
raw_content = file.read()
handler = fetch_site_status.perform_site_check(datastore=datastore)
stripped_content = html_tools.strip_ignore_text(raw_content,
datastore.data['watching'][uuid]['ignore_text'])
if datastore.data['settings']['application'].get('ignore_whitespace', False):
checksum = hashlib.md5(stripped_content.translate(None, b'\r\n\t ')).hexdigest()
else:
checksum = hashlib.md5(stripped_content).hexdigest()
return checksum
return datastore.data['watching'][uuid]['previous_md5']
@app.route("/edit/<string:uuid>", methods=['GET', 'POST']) @app.route("/edit/<string:uuid>", methods=['GET', 'POST'])
@login_required @login_optionally_required
# https://stackoverflow.com/questions/42984453/wtforms-populate-form-with-data-if-data-exists # https://stackoverflow.com/questions/42984453/wtforms-populate-form-with-data-if-data-exists
# https://wtforms.readthedocs.io/en/3.0.x/forms/#wtforms.form.Form.populate_obj ? # https://wtforms.readthedocs.io/en/3.0.x/forms/#wtforms.form.Form.populate_obj ?
@@ -586,6 +560,7 @@ def changedetection_app(config=None, datastore_o=None):
if request.method == 'POST' and form.validate(): if request.method == 'POST' and form.validate():
extra_update_obj = {} extra_update_obj = {}
if request.args.get('unpause_on_save'): if request.args.get('unpause_on_save'):
@@ -677,7 +652,7 @@ def changedetection_app(config=None, datastore_o=None):
return output return output
@app.route("/settings", methods=['GET', "POST"]) @app.route("/settings", methods=['GET', "POST"])
@login_required @login_optionally_required
def settings_page(): def settings_page():
from changedetectionio import content_fetcher, forms from changedetectionio import content_fetcher, forms
@@ -757,7 +732,7 @@ def changedetection_app(config=None, datastore_o=None):
return output return output
@app.route("/import", methods=['GET', "POST"]) @app.route("/import", methods=['GET', "POST"])
@login_required @login_optionally_required
def import_page(): def import_page():
remaining_urls = [] remaining_urls = []
if request.method == 'POST': if request.method == 'POST':
@@ -795,7 +770,7 @@ def changedetection_app(config=None, datastore_o=None):
# Clear all statuses, so we do not see the 'unviewed' class # Clear all statuses, so we do not see the 'unviewed' class
@app.route("/form/mark-all-viewed", methods=['GET']) @app.route("/form/mark-all-viewed", methods=['GET'])
@login_required @login_optionally_required
def mark_all_viewed(): def mark_all_viewed():
# Save the current newest history as the most recently viewed # Save the current newest history as the most recently viewed
@@ -805,7 +780,7 @@ def changedetection_app(config=None, datastore_o=None):
return redirect(url_for('index')) return redirect(url_for('index'))
@app.route("/diff/<string:uuid>", methods=['GET', 'POST']) @app.route("/diff/<string:uuid>", methods=['GET', 'POST'])
@login_required @login_optionally_required
def diff_history_page(uuid): def diff_history_page(uuid):
from changedetectionio import forms from changedetectionio import forms
@@ -881,8 +856,13 @@ def changedetection_app(config=None, datastore_o=None):
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver' system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
is_html_webdriver = True if watch.get('fetch_backend') == 'html_webdriver' or ( is_html_webdriver = False
watch.get('fetch_backend', None) is None and system_uses_webdriver) else False if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver':
is_html_webdriver = True
password_enabled_and_share_is_off = False
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
password_enabled_and_share_is_off = not datastore.data['settings']['application'].get('shared_diff_access')
output = render_template("diff.html", output = render_template("diff.html",
current_diff_url=watch['url'], current_diff_url=watch['url'],
@@ -897,6 +877,7 @@ def changedetection_app(config=None, datastore_o=None):
left_sticky=True, left_sticky=True,
newest=newest_version_file_contents, newest=newest_version_file_contents,
newest_version_timestamp=dates[-1], newest_version_timestamp=dates[-1],
password_enabled_and_share_is_off=password_enabled_and_share_is_off,
previous=previous_version_file_contents, previous=previous_version_file_contents,
screenshot=screenshot_url, screenshot=screenshot_url,
uuid=uuid, uuid=uuid,
@@ -907,7 +888,7 @@ def changedetection_app(config=None, datastore_o=None):
return output return output
@app.route("/preview/<string:uuid>", methods=['GET']) @app.route("/preview/<string:uuid>", methods=['GET'])
@login_required @login_optionally_required
def preview_page(uuid): def preview_page(uuid):
content = [] content = []
ignored_line_numbers = [] ignored_line_numbers = []
@@ -927,8 +908,9 @@ def changedetection_app(config=None, datastore_o=None):
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')] extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
is_html_webdriver = True if watch.get('fetch_backend') == 'html_webdriver' or ( is_html_webdriver = False
watch.get('fetch_backend', None) is None and system_uses_webdriver) else False if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver':
is_html_webdriver = True
# Never requested successfully, but we detected a fetch error # Never requested successfully, but we detected a fetch error
if datastore.data['watching'][uuid].history_n == 0 and (watch.get_error_text() or watch.get_error_snapshot()): if datastore.data['watching'][uuid].history_n == 0 and (watch.get_error_text() or watch.get_error_snapshot()):
@@ -997,7 +979,7 @@ def changedetection_app(config=None, datastore_o=None):
return output return output
@app.route("/settings/notification-logs", methods=['GET']) @app.route("/settings/notification-logs", methods=['GET'])
@login_required @login_optionally_required
def notification_logs(): def notification_logs():
global notification_debug_log global notification_debug_log
output = render_template("notification-log.html", output = render_template("notification-log.html",
@@ -1007,7 +989,7 @@ def changedetection_app(config=None, datastore_o=None):
# We're good but backups are even better! # We're good but backups are even better!
@app.route("/backup", methods=['GET']) @app.route("/backup", methods=['GET'])
@login_required @login_optionally_required
def get_backup(): def get_backup():
import zipfile import zipfile
@@ -1019,7 +1001,8 @@ def changedetection_app(config=None, datastore_o=None):
os.unlink(previous_backup_filename) os.unlink(previous_backup_filename)
# create a ZipFile object # create a ZipFile object
backupname = "changedetection-backup-{}.zip".format(int(time.time())) timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
backupname = "changedetection-backup-{}.zip".format(timestamp)
backup_filepath = os.path.join(datastore_o.datastore_path, backupname) backup_filepath = os.path.join(datastore_o.datastore_path, backupname)
with zipfile.ZipFile(backup_filepath, "w", with zipfile.ZipFile(backup_filepath, "w",
@@ -1127,13 +1110,14 @@ def changedetection_app(config=None, datastore_o=None):
abort(404) abort(404)
@app.route("/form/add/quickwatch", methods=['POST']) @app.route("/form/add/quickwatch", methods=['POST'])
@login_required @login_optionally_required
def form_quick_watch_add(): def form_quick_watch_add():
from changedetectionio import forms from changedetectionio import forms
form = forms.quickWatchForm(request.form) form = forms.quickWatchForm(request.form)
if not form.validate(): if not form.validate():
flash("Error") for widget, l in form.errors.items():
flash(','.join(l), 'error')
return redirect(url_for('index')) return redirect(url_for('index'))
url = request.form.get('url').strip() url = request.form.get('url').strip()
@@ -1144,22 +1128,21 @@ def changedetection_app(config=None, datastore_o=None):
add_paused = request.form.get('edit_and_watch_submit_button') != None add_paused = request.form.get('edit_and_watch_submit_button') != None
new_uuid = datastore.add_watch(url=url, tag=request.form.get('tag').strip(), extras={'paused': add_paused}) new_uuid = datastore.add_watch(url=url, tag=request.form.get('tag').strip(), extras={'paused': add_paused})
if new_uuid:
if not add_paused and new_uuid: if add_paused:
# Straight into the queue. flash('Watch added in Paused state, saving will unpause.')
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid})) return redirect(url_for('edit_page', uuid=new_uuid, unpause_on_save=1))
flash("Watch added.") else:
# Straight into the queue.
if add_paused: update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
flash('Watch added in Paused state, saving will unpause.') flash("Watch added.")
return redirect(url_for('edit_page', uuid=new_uuid, unpause_on_save=1))
return redirect(url_for('index')) return redirect(url_for('index'))
@app.route("/api/delete", methods=['GET']) @app.route("/api/delete", methods=['GET'])
@login_required @login_optionally_required
def form_delete(): def form_delete():
uuid = request.args.get('uuid') uuid = request.args.get('uuid')
@@ -1176,7 +1159,7 @@ def changedetection_app(config=None, datastore_o=None):
return redirect(url_for('index')) return redirect(url_for('index'))
@app.route("/api/clone", methods=['GET']) @app.route("/api/clone", methods=['GET'])
@login_required @login_optionally_required
def form_clone(): def form_clone():
uuid = request.args.get('uuid') uuid = request.args.get('uuid')
# More for testing, possible to return the first/only # More for testing, possible to return the first/only
@@ -1184,13 +1167,14 @@ def changedetection_app(config=None, datastore_o=None):
uuid = list(datastore.data['watching'].keys()).pop() uuid = list(datastore.data['watching'].keys()).pop()
new_uuid = datastore.clone(uuid) new_uuid = datastore.clone(uuid)
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True})) if new_uuid:
flash('Cloned.') update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
flash('Cloned.')
return redirect(url_for('index')) return redirect(url_for('index'))
@app.route("/api/checknow", methods=['GET']) @app.route("/api/checknow", methods=['GET'])
@login_required @login_optionally_required
def form_watch_checknow(): def form_watch_checknow():
# Forced recheck will skip the 'skip if content is the same' rule (, 'reprocess_existing_data': True}))) # Forced recheck will skip the 'skip if content is the same' rule (, 'reprocess_existing_data': True})))
tag = request.args.get('tag') tag = request.args.get('tag')
@@ -1224,7 +1208,7 @@ def changedetection_app(config=None, datastore_o=None):
return redirect(url_for('index', tag=tag)) return redirect(url_for('index', tag=tag))
@app.route("/form/checkbox-operations", methods=['POST']) @app.route("/form/checkbox-operations", methods=['POST'])
@login_required @login_optionally_required
def form_watch_list_checkbox_operations(): def form_watch_list_checkbox_operations():
op = request.form['op'] op = request.form['op']
uuids = request.form.getlist('uuids') uuids = request.form.getlist('uuids')
@@ -1288,7 +1272,7 @@ def changedetection_app(config=None, datastore_o=None):
return redirect(url_for('index')) return redirect(url_for('index'))
@app.route("/api/share-url", methods=['GET']) @app.route("/api/share-url", methods=['GET'])
@login_required @login_optionally_required
def form_share_put_watch(): def form_share_put_watch():
"""Given a watch UUID, upload the info and return a share-link """Given a watch UUID, upload the info and return a share-link
the share-link can be imported/added""" the share-link can be imported/added"""
@@ -1333,7 +1317,7 @@ def changedetection_app(config=None, datastore_o=None):
except Exception as e: except Exception as e:
logging.error("Error sharing -{}".format(str(e))) logger.error("Error sharing -{}".format(str(e)))
flash("Could not share, something went wrong while communicating with the share server - {}".format(str(e)), 'error') flash("Could not share, something went wrong while communicating with the share server - {}".format(str(e)), 'error')
# https://changedetection.io/share/VrMv05wpXyQa # https://changedetection.io/share/VrMv05wpXyQa
@@ -1408,7 +1392,7 @@ def notification_runner():
sent_obj = notification.process_notification(n_object, datastore) sent_obj = notification.process_notification(n_object, datastore)
except Exception as e: except Exception as e:
logging.error("Watch URL: {} Error {}".format(n_object['watch_url'], str(e))) logger.error("Watch URL: {} Error {}".format(n_object['watch_url'], str(e)))
# UUID wont be present when we submit a 'test' from the global settings # UUID wont be present when we submit a 'test' from the global settings
if 'uuid' in n_object: if 'uuid' in n_object:
@@ -1431,7 +1415,7 @@ def ticker_thread_check_time_launch_checks():
proxy_last_called_time = {} proxy_last_called_time = {}
recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 20)) recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 20))
print("System env MINIMUM_SECONDS_RECHECK_TIME", recheck_time_minimum_seconds) logger.info("System env MINIMUM_SECONDS_RECHECK_TIME - {}", recheck_time_minimum_seconds)
# Spin up Workers that do the fetching # Spin up Workers that do the fetching
# Can be overriden by ENV or use the default settings # Can be overriden by ENV or use the default settings
@@ -1476,7 +1460,7 @@ def ticker_thread_check_time_launch_checks():
now = time.time() now = time.time()
watch = datastore.data['watching'].get(uuid) watch = datastore.data['watching'].get(uuid)
if not watch: if not watch:
logging.error("Watch: {} no longer present.".format(uuid)) logger.error("Watch: {} no longer present.".format(uuid))
continue continue
# No need todo further processing if it's paused # No need todo further processing if it's paused
@@ -1509,7 +1493,7 @@ def ticker_thread_check_time_launch_checks():
time_since_proxy_used = int(time.time() - proxy_last_used_time) time_since_proxy_used = int(time.time() - proxy_last_used_time)
if time_since_proxy_used < proxy_list_reuse_time_minimum: if time_since_proxy_used < proxy_list_reuse_time_minimum:
# Not enough time difference reached, skip this watch # Not enough time difference reached, skip this watch
print("> Skipped UUID {} using proxy '{}', not enough time between proxy requests {}s/{}s".format(uuid, logger.info("> Skipped UUID {} using proxy '{}', not enough time between proxy requests {}s/{}s".format(uuid,
watch_proxy, watch_proxy,
time_since_proxy_used, time_since_proxy_used,
proxy_list_reuse_time_minimum)) proxy_list_reuse_time_minimum))
@@ -1520,7 +1504,7 @@ def ticker_thread_check_time_launch_checks():
# Use Epoch time as priority, so we get a "sorted" PriorityQueue, but we can still push a priority 1 into it. # Use Epoch time as priority, so we get a "sorted" PriorityQueue, but we can still push a priority 1 into it.
priority = int(time.time()) priority = int(time.time())
print( logger.info(
"> Queued watch UUID {} last checked at {} queued at {:0.2f} priority {} jitter {:0.2f}s, {:0.2f}s since last checked".format( "> Queued watch UUID {} last checked at {} queued at {:0.2f} priority {} jitter {:0.2f}s, {:0.2f}s since last checked".format(
uuid, uuid,
watch['last_checked'], watch['last_checked'],

View File

@@ -202,8 +202,11 @@ class CreateWatch(Resource):
del extras['url'] del extras['url']
new_uuid = self.datastore.add_watch(url=url, extras=extras) new_uuid = self.datastore.add_watch(url=url, extras=extras)
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid, 'skip_when_checksum_same': True})) if new_uuid:
return {'uuid': new_uuid}, 201 self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
return {'uuid': new_uuid}, 201
else:
return "Invalid or unsupported URL", 400
@auth.check_token @auth.check_token
def get(self): def get(self):

View File

@@ -23,11 +23,10 @@
from distutils.util import strtobool from distutils.util import strtobool
from flask import Blueprint, request, make_response from flask import Blueprint, request, make_response
from flask_login import login_required
import os import os
import logging from loguru import logger
from changedetectionio.store import ChangeDetectionStore from changedetectionio.store import ChangeDetectionStore
from changedetectionio import login_optionally_required
browsersteps_live_ui_o = {} browsersteps_live_ui_o = {}
browsersteps_playwright_browser_interface = None browsersteps_playwright_browser_interface = None
browsersteps_playwright_browser_interface_browser = None browsersteps_playwright_browser_interface_browser = None
@@ -50,7 +49,7 @@ def cleanup_playwright_session():
browsersteps_playwright_browser_interface_end_time = None browsersteps_playwright_browser_interface_end_time = None
browsersteps_playwright_browser_interface_start_time = None browsersteps_playwright_browser_interface_start_time = None
print("Cleaning up old playwright session because time was up, calling .goodbye()") logger.info("Cleaning up old playwright session because time was up, calling .goodbye()")
try: try:
browsersteps_playwright_browser_interface_context.goodbye() browsersteps_playwright_browser_interface_context.goodbye()
except Exception as e: except Exception as e:
@@ -65,7 +64,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
browser_steps_blueprint = Blueprint('browser_steps', __name__, template_folder="templates") browser_steps_blueprint = Blueprint('browser_steps', __name__, template_folder="templates")
@login_required @login_optionally_required
@browser_steps_blueprint.route("/browsersteps_update", methods=['GET', 'POST']) @browser_steps_blueprint.route("/browsersteps_update", methods=['GET', 'POST'])
def browsersteps_ui_update(): def browsersteps_ui_update():
import base64 import base64
@@ -115,7 +114,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
this_session = browsersteps_live_ui_o.get(browsersteps_session_id) this_session = browsersteps_live_ui_o.get(browsersteps_session_id)
if not this_session: if not this_session:
print("Browser exited") logger.info("Browser exited")
return make_response('Browser session ran out of time :( Please reload this page.', 401) return make_response('Browser session ran out of time :( Please reload this page.', 401)
this_session.call_action(action_name=step_operation, this_session.call_action(action_name=step_operation,
@@ -123,7 +122,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
optional_value=step_optional_value) optional_value=step_optional_value)
except Exception as e: except Exception as e:
print("Exception when calling step operation", step_operation, str(e)) logger.info("Exception when calling step operation", step_operation, str(e))
# Try to find something of value to give back to the user # Try to find something of value to give back to the user
return make_response(str(e).splitlines()[0], 401) return make_response(str(e).splitlines()[0], 401)
@@ -140,7 +139,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
if request.method == 'GET': if request.method == 'GET':
if not browsersteps_playwright_browser_interface: if not browsersteps_playwright_browser_interface:
print("Starting connection with playwright") logger.info("Starting connection with playwright")
logging.debug("browser_steps.py connecting") logging.debug("browser_steps.py connecting")
global browsersteps_playwright_browser_interface_context global browsersteps_playwright_browser_interface_context
@@ -163,7 +162,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
return make_response('Unable to start the Playwright session properly, is it running?', 401) return make_response('Unable to start the Playwright session properly, is it running?', 401)
browsersteps_playwright_browser_interface_end_time = time.time() + (seconds_keepalive-3) browsersteps_playwright_browser_interface_end_time = time.time() + (seconds_keepalive-3)
print("Starting connection with playwright - done") logger.info("Starting connection with playwright - done")
if not browsersteps_live_ui_o.get(browsersteps_session_id): if not browsersteps_live_ui_o.get(browsersteps_session_id):
# Boot up a new session # Boot up a new session
@@ -173,7 +172,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
proxy_url = datastore.proxy_list.get(proxy_id).get('url') proxy_url = datastore.proxy_list.get(proxy_id).get('url')
if proxy_url: if proxy_url:
proxy = {'server': proxy_url} proxy = {'server': proxy_url}
print("Browser Steps: UUID {} Using proxy {}".format(uuid, proxy_url)) logger.info("Browser Steps: UUID {} Using proxy {}".format(uuid, proxy_url))
# Begin the new "Playwright Context" that re-uses the playwright interface # Begin the new "Playwright Context" that re-uses the playwright interface
# Each session is a "Playwright Context" as a list, that uses the playwright interface # Each session is a "Playwright Context" as a list, that uses the playwright interface

View File

@@ -51,7 +51,7 @@ class steppable_browser_interface():
if call_action_name == 'choose_one': if call_action_name == 'choose_one':
return return
print("> action calling", call_action_name) logger.info("> action calling", call_action_name)
# https://playwright.dev/python/docs/selectors#xpath-selectors # https://playwright.dev/python/docs/selectors#xpath-selectors
if selector.startswith('/') and not selector.startswith('//'): if selector.startswith('/') and not selector.startswith('//'):
selector = "xpath=" + selector selector = "xpath=" + selector
@@ -70,7 +70,7 @@ class steppable_browser_interface():
action_handler(selector, optional_value) action_handler(selector, optional_value)
self.page.wait_for_timeout(3 * 1000) self.page.wait_for_timeout(3 * 1000)
print("Call action done in", time.time() - now) logger.info("Call action done in", time.time() - now)
def action_goto_url(self, url, optional_value): def action_goto_url(self, url, optional_value):
# self.page.set_viewport_size({"width": 1280, "height": 5000}) # self.page.set_viewport_size({"width": 1280, "height": 5000})
@@ -81,7 +81,7 @@ class steppable_browser_interface():
# - `'commit'` - consider operation to be finished when network response is received and the document started loading. # - `'commit'` - consider operation to be finished when network response is received and the document started loading.
# Better to not use any smarts from Playwright and just wait an arbitrary number of seconds # Better to not use any smarts from Playwright and just wait an arbitrary number of seconds
# This seemed to solve nearly all 'TimeoutErrors' # This seemed to solve nearly all 'TimeoutErrors'
print("Time to goto URL ", time.time() - now) logger.info("Time to goto URL ", time.time() - now)
def action_click_element_containing_text(self, selector=None, value=''): def action_click_element_containing_text(self, selector=None, value=''):
if not len(value.strip()): if not len(value.strip()):
@@ -100,14 +100,14 @@ class steppable_browser_interface():
self.page.evaluate(value) self.page.evaluate(value)
def action_click_element(self, selector, value): def action_click_element(self, selector, value):
print("Clicking element") logger.info("Clicking element")
if not len(selector.strip()): if not len(selector.strip()):
return return
self.page.click(selector, timeout=10 * 1000, delay=randint(200, 500)) self.page.click(selector, timeout=10 * 1000, delay=randint(200, 500))
def action_click_element_if_exists(self, selector, value): def action_click_element_if_exists(self, selector, value):
import playwright._impl._api_types as _api_types import playwright._impl._api_types as _api_types
print("Clicking element if exists") logger.info("Clicking element if exists")
if not len(selector.strip()): if not len(selector.strip()):
return return
try: try:
@@ -207,13 +207,13 @@ class browsersteps_live_ui(steppable_browser_interface):
self.mark_as_closed, self.mark_as_closed,
) )
# Listen for all console events and handle errors # Listen for all console events and handle errors
self.page.on("console", lambda msg: print(f"Browser steps console - {msg.type}: {msg.text} {msg.args}")) self.page.on("console", lambda msg: logger.info(f"Browser steps console - {msg.type}: {msg.text} {msg.args}"))
print("Time to browser setup", time.time() - now) logger.info("Time to browser setup", time.time() - now)
self.page.wait_for_timeout(1 * 1000) self.page.wait_for_timeout(1 * 1000)
def mark_as_closed(self): def mark_as_closed(self):
print("Page closed, cleaning up..") logger.info("Page closed, cleaning up..")
@property @property
def has_expired(self): def has_expired(self):
@@ -239,7 +239,7 @@ class browsersteps_live_ui(steppable_browser_interface):
xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}") xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}")
# So the JS will find the smallest one first # So the JS will find the smallest one first
xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True) xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True)
print("Time to complete get_current_state of browser", time.time() - now) logger.info("Time to complete get_current_state of browser", time.time() - now)
# except # except
# playwright._impl._api_types.Error: Browser closed. # playwright._impl._api_types.Error: Browser closed.
# @todo show some countdown timer? # @todo show some countdown timer?

View File

@@ -8,6 +8,7 @@ from json.decoder import JSONDecodeError
import eventlet import eventlet
import eventlet.wsgi import eventlet.wsgi
import getopt import getopt
from loguru import logger
import os import os
import signal import signal
import socket import socket
@@ -24,7 +25,7 @@ def sigterm_handler(_signo, _stack_frame):
global app global app
global datastore global datastore
# app.config.exit.set() # app.config.exit.set()
print('Shutdown: Got SIGTERM, DB saved to disk') logger.warning('Shutdown: Got SIGTERM, DB saved to disk')
datastore.sync_to_json() datastore.sync_to_json()
# raise SystemExit # raise SystemExit

View File

@@ -2,7 +2,7 @@ import hashlib
from abc import abstractmethod from abc import abstractmethod
import chardet import chardet
import json import json
import logging from loguru import logger
import os import os
import requests import requests
import sys import sys
@@ -155,7 +155,7 @@ class Fetcher():
for step in valid_steps: for step in valid_steps:
step_n += 1 step_n += 1
print(">> Iterating check - browser Step n {} - {}...".format(step_n, step['operation'])) logger.info(">> Iterating check - browser Step n {} - {}...".format(step_n, step['operation']))
self.screenshot_step("before-"+str(step_n)) self.screenshot_step("before-"+str(step_n))
self.save_step_html("before-"+str(step_n)) self.save_step_html("before-"+str(step_n))
try: try:
@@ -252,9 +252,6 @@ class base_html_playwright(Fetcher):
self.proxy['password'] = parsed.password self.proxy['password'] = parsed.password
def screenshot_step(self, step_n=''): def screenshot_step(self, step_n=''):
# There's a bug where we need to do it twice or it doesnt take the whole page, dont know why.
self.page.screenshot(type='jpeg', clip={'x': 1.0, 'y': 1.0, 'width': 1280, 'height': 1024})
screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=85) screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=85)
if self.browser_steps_screenshot_path is not None: if self.browser_steps_screenshot_path is not None:
@@ -300,8 +297,8 @@ class base_html_playwright(Fetcher):
proxy=self.proxy, proxy=self.proxy,
# This is needed to enable JavaScript execution on GitHub and others # This is needed to enable JavaScript execution on GitHub and others
bypass_csp=True, bypass_csp=True,
# Can't think why we need the service workers for our use case? # Should be `allow` or `block` - sites like YouTube can transmit large amounts of data via Service Workers
service_workers='block', service_workers=os.getenv('PLAYWRIGHT_SERVICE_WORKERS', 'allow'),
# Should never be needed # Should never be needed
accept_downloads=False accept_downloads=False
) )
@@ -361,28 +358,20 @@ class base_html_playwright(Fetcher):
print ("Content Fetcher > Response object was none") print ("Content Fetcher > Response object was none")
raise EmptyReply(url=url, status_code=None) raise EmptyReply(url=url, status_code=None)
# Bug 2(?) Set the viewport size AFTER loading the page
self.page.set_viewport_size({"width": 1280, "height": 1024})
# Run Browser Steps here # Run Browser Steps here
self.iterate_browser_steps() self.iterate_browser_steps()
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
time.sleep(extra_wait) time.sleep(extra_wait)
self.content = self.page.content() self.content = self.page.content()
self.status_code = response.status self.status_code = response.status
if len(self.page.content().strip()) == 0: if len(self.page.content().strip()) == 0:
context.close() context.close()
browser.close() browser.close()
print ("Content Fetcher > Content was empty") print ("Content Fetcher > Content was empty")
raise EmptyReply(url=url, status_code=response.status) raise EmptyReply(url=url, status_code=response.status)
# Bug 2(?) Set the viewport size AFTER loading the page
self.page.set_viewport_size({"width": 1280, "height": 1024})
self.status_code = response.status self.status_code = response.status
self.content = self.page.content() self.content = self.page.content()
self.headers = response.all_headers() self.headers = response.all_headers()
@@ -403,8 +392,6 @@ class base_html_playwright(Fetcher):
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest # which will significantly increase the IO size between the server and client, it's recommended to use the lowest
# acceptable screenshot quality here # acceptable screenshot quality here
try: try:
# Quality set to 1 because it's not used, just used as a work-around for a bug, no need to change this.
self.page.screenshot(type='jpeg', clip={'x': 1.0, 'y': 1.0, 'width': 1280, 'height': 1024}, quality=1)
# The actual screenshot # The actual screenshot
self.screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72))) self.screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)))
except Exception as e: except Exception as e:
@@ -523,7 +510,7 @@ class base_html_webdriver(Fetcher):
try: try:
self.driver.quit() self.driver.quit()
except Exception as e: except Exception as e:
print("Content Fetcher > Exception in chrome shutdown/quit" + str(e)) logger.error("Content Fetcher > Exception in chrome shutdown/quit" + str(e))
# "html_requests" is listed as the default fetcher in store.py! # "html_requests" is listed as the default fetcher in store.py!

View File

@@ -1,6 +1,6 @@
import hashlib import hashlib
import json import json
import logging from loguru import logger
import os import os
import re import re
import urllib3 import urllib3
@@ -105,7 +105,7 @@ class perform_site_check():
proxy_url = None proxy_url = None
if proxy_id: if proxy_id:
proxy_url = self.datastore.proxy_list.get(proxy_id).get('url') proxy_url = self.datastore.proxy_list.get(proxy_id).get('url')
print("UUID {} Using proxy {}".format(uuid, proxy_url)) logger.info("UUID {} Using proxy {}".format(uuid, proxy_url))
fetcher = klass(proxy_override=proxy_url) fetcher = klass(proxy_override=proxy_url)
@@ -135,7 +135,7 @@ class perform_site_check():
# Track the content type # Track the content type
update_obj['content_type'] = fetcher.headers.get('Content-Type', '') update_obj['content_type'] = fetcher.headers.get('Content-Type', '')
logger.info("UUID: {} - Fetch complete {:,} bytes".format(watch.get('uuid'), len(fetcher.content)))
# Watches added automatically in the queue manager will skip if its the same checksum as the previous run # Watches added automatically in the queue manager will skip if its the same checksum as the previous run
# Saves a lot of CPU # Saves a lot of CPU
update_obj['previous_md5_before_filters'] = hashlib.md5(fetcher.content.encode('utf-8')).hexdigest() update_obj['previous_md5_before_filters'] = hashlib.md5(fetcher.content.encode('utf-8')).hexdigest()
@@ -349,6 +349,7 @@ class perform_site_check():
# The main thing that all this at the moment comes down to :) # The main thing that all this at the moment comes down to :)
if watch.get('previous_md5') != fetched_md5: if watch.get('previous_md5') != fetched_md5:
logger.debug("UUID: {} - Change detected - Prev MD5: {} - Fetched MD5: {}, applying filters...".format(uuid, watch.get('previous_md5'), fetched_md5))
changed_detected = True changed_detected = True
# Looks like something changed, but did it match all the rules? # Looks like something changed, but did it match all the rules?
@@ -366,10 +367,10 @@ class perform_site_check():
has_unique_lines = watch.lines_contain_something_unique_compared_to_history(lines=stripped_text_from_html.splitlines()) has_unique_lines = watch.lines_contain_something_unique_compared_to_history(lines=stripped_text_from_html.splitlines())
# One or more lines? unsure? # One or more lines? unsure?
if not has_unique_lines: if not has_unique_lines:
logging.debug("check_unique_lines: UUID {} didnt have anything new setting change_detected=False".format(uuid)) logger.debug("check_unique_lines: UUID {} didnt have anything new setting change_detected=False".format(uuid))
changed_detected = False changed_detected = False
else: else:
logging.debug("check_unique_lines: UUID {} had unique content".format(uuid)) logger.debug("check_unique_lines: UUID {} had unique content".format(uuid))
# Always record the new checksum # Always record the new checksum
update_obj["previous_md5"] = fetched_md5 update_obj["previous_md5"] = fetched_md5
@@ -378,4 +379,9 @@ class perform_site_check():
if not watch.get('previous_md5'): if not watch.get('previous_md5'):
watch['previous_md5'] = fetched_md5 watch['previous_md5'] = fetched_md5
if changed_detected:
logger.success("UUID: {} Change detected after all filters applied.", uuid)
else:
logger.info("UUID: {} NO Change detected after all filters applied.", uuid)
return changed_detected, update_obj, text_content_before_ignored_filter return changed_detected, update_obj, text_content_before_ignored_filter

View File

@@ -232,12 +232,17 @@ class validateURL(object):
def __call__(self, form, field): def __call__(self, form, field):
import validators import validators
try: try:
validators.url(field.data.strip()) validators.url(field.data.strip())
except validators.ValidationFailure: except validators.ValidationFailure:
message = field.gettext('\'%s\' is not a valid URL.' % (field.data.strip())) message = field.gettext('\'%s\' is not a valid URL.' % (field.data.strip()))
raise ValidationError(message) raise ValidationError(message)
from .model.Watch import is_safe_url
if not is_safe_url(field.data):
raise ValidationError('Watch protocol is not permitted by SAFE_PROTOCOL_REGEX')
class ValidateListRegex(object): class ValidateListRegex(object):
""" """
@@ -454,17 +459,17 @@ class globalSettingsRequestForm(Form):
# datastore.data['settings']['application'].. # datastore.data['settings']['application']..
class globalSettingsApplicationForm(commonSettingsForm): class globalSettingsApplicationForm(commonSettingsForm):
base_url = StringField('Base URL', validators=[validators.Optional()])
global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
ignore_whitespace = BooleanField('Ignore whitespace')
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
empty_pages_are_a_change = BooleanField('Treat empty pages as a change?', default=False)
render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetcher.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
api_access_token_enabled = BooleanField('API access token security check enabled', default=True, validators=[validators.Optional()]) api_access_token_enabled = BooleanField('API access token security check enabled', default=True, validators=[validators.Optional()])
base_url = StringField('Base URL', validators=[validators.Optional()])
empty_pages_are_a_change = BooleanField('Treat empty pages as a change?', default=False)
fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetcher.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
ignore_whitespace = BooleanField('Ignore whitespace')
password = SaltyPasswordField() password = SaltyPasswordField()
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
shared_diff_access = BooleanField('Allow access to view diff page when password is enabled', default=False, validators=[validators.Optional()])
filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification', filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification',
render_kw={"style": "width: 5em;"}, render_kw={"style": "width: 5em;"},
validators=[validators.NumberRange(min=0, validators=[validators.NumberRange(min=0,

View File

@@ -40,6 +40,7 @@ class model(dict):
'notification_body': default_notification_body, 'notification_body': default_notification_body,
'notification_format': default_notification_format, 'notification_format': default_notification_format,
'schema_version' : 0, 'schema_version' : 0,
'shared_diff_access': False,
'webdriver_delay': None # Extra delay in seconds before extracting text 'webdriver_delay': None # Extra delay in seconds before extracting text
} }
} }

View File

@@ -1,9 +1,14 @@
from distutils.util import strtobool from distutils.util import strtobool
import logging from loguru import logger
import os import os
import re
import time import time
import uuid import uuid
# Allowable protocols, protects against javascript: etc
# file:// is further checked by ALLOW_FILE_URI
SAFE_PROTOCOL_REGEX='^(http|https|ftp|file):'
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 60)) minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 60))
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7} mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
@@ -55,6 +60,22 @@ base_config = {
'webdriver_js_execute_code': None, # Run before change-detection 'webdriver_js_execute_code': None, # Run before change-detection
} }
def is_safe_url(test_url):
# See https://github.com/dgtlmoon/changedetection.io/issues/1358
# Remove 'source:' prefix so we dont get 'source:javascript:' etc
# 'source:' is a valid way to tell us to return the source
r = re.compile(re.escape('source:'), re.IGNORECASE)
test_url = r.sub('', test_url)
pattern = re.compile(os.getenv('SAFE_PROTOCOL_REGEX', SAFE_PROTOCOL_REGEX), re.IGNORECASE)
if not pattern.match(test_url.strip()):
return False
return True
class model(dict): class model(dict):
__newest_history_key = None __newest_history_key = None
__history_n = 0 __history_n = 0
@@ -88,12 +109,16 @@ class model(dict):
def ensure_data_dir_exists(self): def ensure_data_dir_exists(self):
if not os.path.isdir(self.watch_data_dir): if not os.path.isdir(self.watch_data_dir):
print ("> Creating data dir {}".format(self.watch_data_dir)) logger.debug("> Creating data dir {}".format(self.watch_data_dir))
os.mkdir(self.watch_data_dir) os.mkdir(self.watch_data_dir)
@property @property
def link(self): def link(self):
url = self.get('url', '') url = self.get('url', '')
if not is_safe_url(url):
return 'DISABLED'
ready_url = url ready_url = url
if '{%' in url or '{{' in url: if '{%' in url or '{{' in url:
from jinja2 import Environment from jinja2 import Environment
@@ -128,7 +153,9 @@ class model(dict):
@property @property
def is_pdf(self): def is_pdf(self):
# content_type field is set in the future # content_type field is set in the future
return '.pdf' in self.get('url', '').lower() or 'pdf' in self.get('content_type', '').lower() # https://github.com/dgtlmoon/changedetection.io/issues/1392
# Not sure the best logic here
return self.get('url', '').lower().endswith('.pdf') or 'pdf' in self.get('content_type', '').lower()
@property @property
def label(self): def label(self):
@@ -167,7 +194,7 @@ class model(dict):
# Read the history file as a dict # Read the history file as a dict
fname = os.path.join(self.watch_data_dir, "history.txt") fname = os.path.join(self.watch_data_dir, "history.txt")
if os.path.isfile(fname): if os.path.isfile(fname):
logging.debug("Reading history index " + str(time.time())) logger.debug("UUID: {} Reading history index".format(self.get('uuid')))
with open(fname, "r") as f: with open(fname, "r") as f:
for i in f.readlines(): for i in f.readlines():
if ',' in i: if ',' in i:

View File

@@ -101,7 +101,7 @@ def process_notification(n_object, datastore):
apobj = apprise.Apprise(debug=True, asset=asset) apobj = apprise.Apprise(debug=True, asset=asset)
url = url.strip() url = url.strip()
if len(url): if len(url):
print(">> Process Notification: AppRise notifying {}".format(url)) logger.info(">> Process Notification: AppRise notifying {}".format(url))
with apprise.LogCapture(level=apprise.logging.DEBUG) as logs: with apprise.LogCapture(level=apprise.logging.DEBUG) as logs:
# Re 323 - Limit discord length to their 2000 char limit total or it wont send. # Re 323 - Limit discord length to their 2000 char limit total or it wont send.
# Because different notifications may require different pre-processing, run each sequentially :( # Because different notifications may require different pre-processing, run each sequentially :(

View File

@@ -1,20 +1,20 @@
from flask import ( from flask import (
flash flash
) )
import json
import logging from . model import App, Watch
import os
import threading
import time
import uuid as uuid_builder
from copy import deepcopy from copy import deepcopy
from os import path, unlink from os import path, unlink
from threading import Lock from threading import Lock
import json
from loguru import logger
import os
import re import re
import requests import requests
import secrets import secrets
import threading
from . model import App, Watch import time
import uuid as uuid_builder
# Is there an existing library to ensure some data store (JSON etc) is in sync with CRUD methods? # Is there an existing library to ensure some data store (JSON etc) is in sync with CRUD methods?
# Open a github issue if you know something :) # Open a github issue if you know something :)
@@ -75,12 +75,12 @@ class ChangeDetectionStore:
for uuid, watch in self.__data['watching'].items(): for uuid, watch in self.__data['watching'].items():
watch['uuid']=uuid watch['uuid']=uuid
self.__data['watching'][uuid] = Watch.model(datastore_path=self.datastore_path, default=watch) self.__data['watching'][uuid] = Watch.model(datastore_path=self.datastore_path, default=watch)
print("Watching:", uuid, self.__data['watching'][uuid]['url']) logger.info("Watching: {} - {}", uuid, self.__data['watching'][uuid]['url'])
# First time ran, Create the datastore. # First time ran, Create the datastore.
except (FileNotFoundError): except (FileNotFoundError):
if include_default_watches: if include_default_watches:
print("No JSON DB found at {}, creating JSON store at {}".format(self.json_store_path, self.datastore_path)) logger.info("No JSON DB found at {}, creating JSON store at {}".format(self.json_store_path, self.datastore_path))
self.add_watch(url='https://news.ycombinator.com/', self.add_watch(url='https://news.ycombinator.com/',
tag='Tech news', tag='Tech news',
extras={'fetch_backend': 'html_requests'}) extras={'fetch_backend': 'html_requests'})
@@ -192,27 +192,24 @@ class ChangeDetectionStore:
tags.sort() tags.sort()
return tags return tags
def unlink_history_file(self, path):
try:
unlink(path)
except (FileNotFoundError, IOError):
pass
# Delete a single watch by UUID # Delete a single watch by UUID
def delete(self, uuid): def delete(self, uuid):
import pathlib
import shutil
with self.lock: with self.lock:
if uuid == 'all': if uuid == 'all':
self.__data['watching'] = {} self.__data['watching'] = {}
# GitHub #30 also delete history records # GitHub #30 also delete history records
for uuid in self.data['watching']: for uuid in self.data['watching']:
for path in self.data['watching'][uuid].history.values(): path = pathlib.Path(os.path.join(self.datastore_path, uuid))
self.unlink_history_file(path) shutil.rmtree(path)
self.needs_write_urgent = True
else: else:
for path in self.data['watching'][uuid].history.values(): path = pathlib.Path(os.path.join(self.datastore_path, uuid))
self.unlink_history_file(path) shutil.rmtree(path)
del self.data['watching'][uuid] del self.data['watching'][uuid]
self.needs_write_urgent = True self.needs_write_urgent = True
@@ -306,12 +303,15 @@ class ChangeDetectionStore:
apply_extras['include_filters'] = [res['css_filter']] apply_extras['include_filters'] = [res['css_filter']]
except Exception as e: except Exception as e:
logging.error("Error fetching metadata for shared watch link", url, str(e)) logger.error("Error fetching metadata for shared watch link", url, str(e))
flash("Error fetching metadata for {}".format(url), 'error') flash("Error fetching metadata for {}".format(url), 'error')
return False return False
from .model.Watch import is_safe_url
if not is_safe_url(url):
flash('Watch protocol is not permitted by SAFE_PROTOCOL_REGEX', 'error')
return None
with self.lock: with self.lock:
# #Re 569 # #Re 569
new_watch = Watch.model(datastore_path=self.datastore_path, default={ new_watch = Watch.model(datastore_path=self.datastore_path, default={
'url': url, 'url': url,
@@ -388,14 +388,13 @@ class ChangeDetectionStore:
def sync_to_json(self): def sync_to_json(self):
logging.info("Saving JSON..") logger.debug("Saving JSON DB")
print("Saving JSON..")
try: try:
data = deepcopy(self.__data) data = deepcopy(self.__data)
except RuntimeError as e: except RuntimeError as e:
# Try again in 15 seconds # Try again in 15 seconds
time.sleep(15) time.sleep(15)
logging.error ("! Data changed when writing to JSON, trying again.. %s", str(e)) logger.error ("! Data changed when writing to JSON, trying again.. %s", str(e))
self.sync_to_json() self.sync_to_json()
return return
else: else:
@@ -408,7 +407,7 @@ class ChangeDetectionStore:
json.dump(data, json_file, indent=4) json.dump(data, json_file, indent=4)
os.replace(self.json_store_path+".tmp", self.json_store_path) os.replace(self.json_store_path+".tmp", self.json_store_path)
except Exception as e: except Exception as e:
logging.error("Error writing JSON!! (Main JSON file save was skipped) : %s", str(e)) logger.error("Error writing JSON!! (Main JSON file save was skipped) : %s", str(e))
self.needs_write = False self.needs_write = False
self.needs_write_urgent = False self.needs_write_urgent = False
@@ -419,7 +418,7 @@ class ChangeDetectionStore:
while True: while True:
if self.stop_thread: if self.stop_thread:
print("Shutting down datastore thread") logger.info("Shutting down datastore thread")
return return
if self.needs_write or self.needs_write_urgent: if self.needs_write or self.needs_write_urgent:
@@ -533,8 +532,8 @@ class ChangeDetectionStore:
try: try:
update_method = getattr(self, "update_{}".format(update_n))() update_method = getattr(self, "update_{}".format(update_n))()
except Exception as e: except Exception as e:
print("Error while trying update_{}".format((update_n))) logger.error("Error while trying update_{}".format((update_n)))
print(e) logger.error(str(e))
# Don't run any more updates # Don't run any more updates
return return
else: else:
@@ -572,7 +571,7 @@ class ChangeDetectionStore:
with open(os.path.join(target_path, "history.txt"), "w") as f: with open(os.path.join(target_path, "history.txt"), "w") as f:
f.writelines(history) f.writelines(history)
else: else:
logging.warning("Datastore history directory {} does not exist, skipping history import.".format(target_path)) logger.warning("Datastore history directory {} does not exist, skipping history import.".format(target_path))
# No longer needed, dynamically pulled from the disk when needed. # No longer needed, dynamically pulled from the disk when needed.
# But we should set it back to a empty dict so we don't break if this schema runs on an earlier version. # But we should set it back to a empty dict so we don't break if this schema runs on an earlier version.

View File

@@ -76,8 +76,12 @@
</div> </div>
<div class="tab-pane-inner" id="text"> <div class="tab-pane-inner" id="text">
<div class="tip">Pro-tip: Use <strong>show current snapshot</strong> tab to visualise what will be ignored. <div class="tip">Pro-tip: Use <strong>show current snapshot</strong> tab to visualise what will be ignored.</div>
</div>
{% if password_enabled_and_share_is_off %}
<div class="tip">Pro-tip: You can enable <strong>"share access when password is enabled"</strong> from settings</div>
{% endif %}
<div class="snapshot-age">{{watch_a.snapshot_text_ctime|format_timestamp_timeago}}</div> <div class="snapshot-age">{{watch_a.snapshot_text_ctime|format_timestamp_timeago}}</div>
<table> <table>

View File

@@ -57,6 +57,11 @@
{% endif %} {% endif %}
</div> </div>
<div class="pure-control-group">
{{ render_checkbox_field(form.application.form.shared_diff_access, class="shared_diff_access") }}
<span class="pure-form-message-inline">Allow access to view watch diff page when password is enabled (Good for sharing the diff page)
</span>
</div>
<div class="pure-control-group"> <div class="pure-control-group">
{{ render_field(form.application.form.base_url, placeholder="http://yoursite.com:5000/", {{ render_field(form.application.form.base_url, placeholder="http://yoursite.com:5000/",
class="m-d") }} class="m-d") }}

View File

@@ -1,18 +1,34 @@
from . util import live_server_setup, extract_UUID_from_client
from flask import url_for from flask import url_for
from . util import live_server_setup import time
def test_check_access_control(app, client): def test_check_access_control(app, client, live_server):
# Still doesnt work, but this is closer. # Still doesnt work, but this is closer.
live_server_setup(live_server)
with app.test_client(use_cookies=True) as c: with app.test_client(use_cookies=True) as c:
# Check we don't have any password protection enabled yet. # Check we don't have any password protection enabled yet.
res = c.get(url_for("settings_page")) res = c.get(url_for("settings_page"))
assert b"Remove password" not in res.data assert b"Remove password" not in res.data
# Enable password check. # add something that we can hit via diff page later
res = c.post(
url_for("import_page"),
data={"urls": url_for('test_random_content_endpoint', _external=True)},
follow_redirects=True
)
assert b"1 Imported" in res.data
time.sleep(2)
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
assert b'1 watches queued for rechecking.' in res.data
time.sleep(2)
# Enable password check and diff page access bypass
res = c.post( res = c.post(
url_for("settings_page"), url_for("settings_page"),
data={"application-password": "foobar", data={"application-password": "foobar",
"application-shared_diff_access": "True",
"requests-time_between_check-minutes": 180, "requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"}, 'application-fetch_backend': "html_requests"},
follow_redirects=True follow_redirects=True
@@ -22,9 +38,15 @@ def test_check_access_control(app, client):
# Check we hit the login # Check we hit the login
res = c.get(url_for("index"), follow_redirects=True) res = c.get(url_for("index"), follow_redirects=True)
# Should be logged out
assert b"Login" in res.data assert b"Login" in res.data
# The diff page should return something valid when logged out
res = client.get(url_for("diff_history_page", uuid="first"))
assert b'Random content' in res.data
# Menu should not be available yet # Menu should not be available yet
# assert b"SETTINGS" not in res.data # assert b"SETTINGS" not in res.data
# assert b"BACKUP" not in res.data # assert b"BACKUP" not in res.data
@@ -109,3 +131,25 @@ def test_check_access_control(app, client):
assert b"Password protection enabled" not in res.data assert b"Password protection enabled" not in res.data
# Now checking the diff access
# Enable password check and diff page access bypass
res = c.post(
url_for("settings_page"),
data={"application-password": "foobar",
# Should be disabled
# "application-shared_diff_access": "True",
"requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
follow_redirects=True
)
assert b"Password protection enabled." in res.data
# Check we hit the login
res = c.get(url_for("index"), follow_redirects=True)
# Should be logged out
assert b"Login" in res.data
# The diff page should return something valid when logged out
res = client.get(url_for("diff_history_page", uuid="first"))
assert b'Random content' not in res.data

View File

@@ -3,7 +3,7 @@
import time import time
from flask import url_for from flask import url_for
from urllib.request import urlopen from urllib.request import urlopen
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_rss_token_from_UI
sleep_time_for_fetch_thread = 3 sleep_time_for_fetch_thread = 3
@@ -76,7 +76,8 @@ def test_check_basic_change_detection_functionality(client, live_server):
assert b'unviewed' in res.data assert b'unviewed' in res.data
# #75, and it should be in the RSS feed # #75, and it should be in the RSS feed
res = client.get(url_for("rss")) rss_token = extract_rss_token_from_UI(client)
res = client.get(url_for("rss", token=rss_token, _external=True))
expected_url = url_for('test_endpoint', _external=True) expected_url = url_for('test_endpoint', _external=True)
assert b'<rss' in res.data assert b'<rss' in res.data

View File

@@ -69,8 +69,6 @@ def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_se
url = url_for('test_notification_endpoint', _external=True) url = url_for('test_notification_endpoint', _external=True)
notification_url = url.replace('http', 'json') notification_url = url.replace('http', 'json')
print(">>>> Notification URL: " + notification_url)
# Just a regular notification setting, this will be used by the special 'filter not found' notification # Just a regular notification setting, this will be used by the special 'filter not found' notification
notification_form_data = {"notification_urls": notification_url, notification_form_data = {"notification_urls": notification_url,
"notification_title": "New ChangeDetection.io Notification - {{watch_url}}", "notification_title": "New ChangeDetection.io Notification - {{watch_url}}",

View File

@@ -0,0 +1,39 @@
#!/usr/bin/python3
import time
from flask import url_for
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_rss_token_from_UI
def test_rss_and_token(client, live_server):
set_original_response()
live_server_setup(live_server)
# Add our URL to the import page
res = client.post(
url_for("import_page"),
data={"urls": url_for('test_random_content_endpoint', _external=True)},
follow_redirects=True
)
assert b"1 Imported" in res.data
rss_token = extract_rss_token_from_UI(client)
time.sleep(2)
client.get(url_for("form_watch_checknow"), follow_redirects=True)
time.sleep(2)
# Add our URL to the import page
res = client.get(
url_for("rss", token="bad token", _external=True),
follow_redirects=True
)
assert b"Access denied, bad token" in res.data
res = client.get(
url_for("rss", token=rss_token, _external=True),
follow_redirects=True
)
assert b"Access denied, bad token" not in res.data
assert b"Random content" in res.data

View File

@@ -2,11 +2,9 @@ from flask import url_for
from . util import set_original_response, set_modified_response, live_server_setup from . util import set_original_response, set_modified_response, live_server_setup
import time import time
def test_setup(live_server):
def test_bad_access(client, live_server):
live_server_setup(live_server) live_server_setup(live_server)
def test_file_access(client, live_server):
res = client.post( res = client.post(
url_for("import_page"), url_for("import_page"),
data={"urls": 'https://localhost'}, data={"urls": 'https://localhost'},
@@ -19,18 +17,49 @@ def test_file_access(client, live_server):
res = client.post( res = client.post(
url_for("edit_page", uuid="first"), url_for("edit_page", uuid="first"),
data={ data={
"url": 'file:///etc/passwd', "url": 'javascript:alert(document.domain)',
"tag": "", "tag": "",
"method": "GET", "method": "GET",
"fetch_backend": "html_requests", "fetch_backend": "html_requests",
"body": ""}, "body": ""},
follow_redirects=True follow_redirects=True
) )
time.sleep(3)
res = client.get( assert b'Watch protocol is not permitted by SAFE_PROTOCOL_REGEX' in res.data
url_for("index", uuid="first"),
res = client.post(
url_for("form_quick_watch_add"),
data={"url": ' javascript:alert(123)', "tag": ''},
follow_redirects=True follow_redirects=True
) )
assert b'denied for security reasons' in res.data assert b'Watch protocol is not permitted by SAFE_PROTOCOL_REGEX' in res.data
res = client.post(
url_for("form_quick_watch_add"),
data={"url": '%20%20%20javascript:alert(123)%20%20', "tag": ''},
follow_redirects=True
)
assert b'Watch protocol is not permitted by SAFE_PROTOCOL_REGEX' in res.data
res = client.post(
url_for("form_quick_watch_add"),
data={"url": ' source:javascript:alert(document.domain)', "tag": ''},
follow_redirects=True
)
assert b'Watch protocol is not permitted by SAFE_PROTOCOL_REGEX' in res.data
# file:// is permitted by default, but it will be caught by ALLOW_FILE_URI
client.post(
url_for("form_quick_watch_add"),
data={"url": 'file:///tasty/disk/drive', "tag": ''},
follow_redirects=True
)
time.sleep(1)
res = client.get(url_for("index"))
assert b'file:// type access is denied for security reasons.' in res.data

View File

@@ -70,6 +70,15 @@ def extract_api_key_from_UI(client):
api_key = m.group(1) api_key = m.group(1)
return api_key.strip() return api_key.strip()
# kinda funky, but works for now
def extract_rss_token_from_UI(client):
import re
res = client.get(
url_for("index"),
)
m = re.search('token=(.+?)"', str(res.data))
token_key = m.group(1)
return token_key.strip()
# kinda funky, but works for now # kinda funky, but works for now
def extract_UUID_from_client(client): def extract_UUID_from_client(client):
@@ -98,6 +107,12 @@ def wait_for_all_checks(client):
def live_server_setup(live_server): def live_server_setup(live_server):
@live_server.app.route('/test-random-content-endpoint')
def test_random_content_endpoint():
import secrets
return "Random content - {}\n".format(secrets.token_hex(64))
@live_server.app.route('/test-endpoint') @live_server.app.route('/test-endpoint')
def test_endpoint(): def test_endpoint():
ctype = request.args.get('content_type') ctype = request.args.get('content_type')

View File

@@ -2,6 +2,7 @@ import os
import threading import threading
import queue import queue
import time import time
from loguru import logger
from changedetectionio import content_fetcher from changedetectionio import content_fetcher
from changedetectionio import queuedWatchMetaData from changedetectionio import queuedWatchMetaData
@@ -12,14 +13,12 @@ from changedetectionio.fetch_site_status import FilterNotFoundInResponse
# Requests for checking on a single site(watch) from a queue of watches # Requests for checking on a single site(watch) from a queue of watches
# (another process inserts watches into the queue that are time-ready for checking) # (another process inserts watches into the queue that are time-ready for checking)
import logging
import sys import sys
class update_worker(threading.Thread): class update_worker(threading.Thread):
current_uuid = None current_uuid = None
def __init__(self, q, notification_q, app, datastore, *args, **kwargs): def __init__(self, q, notification_q, app, datastore, *args, **kwargs):
logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
self.q = q self.q = q
self.app = app self.app = app
self.notification_q = notification_q self.notification_q = notification_q
@@ -80,10 +79,10 @@ class update_worker(threading.Thread):
'diff': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], line_feed_sep=line_feed_sep), 'diff': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], line_feed_sep=line_feed_sep),
'diff_full': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], True, line_feed_sep=line_feed_sep) 'diff_full': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], True, line_feed_sep=line_feed_sep)
}) })
logging.info (">> SENDING NOTIFICATION") logger.info ("UUID: {} - Sending notification".format(watch_uuid))
self.notification_q.put(n_object) self.notification_q.put(n_object)
else: else:
logging.info (">> NO Notification sent, notification_url was empty in both watch and system") logger.info("UUID: {} - NO Notification sent, notification_url was empty in both watch and system".format(watch_uuid))
def send_filter_failure_notification(self, watch_uuid): def send_filter_failure_notification(self, watch_uuid):
@@ -112,7 +111,7 @@ class update_worker(threading.Thread):
'screenshot': None 'screenshot': None
}) })
self.notification_q.put(n_object) self.notification_q.put(n_object)
print("Sent filter not found notification for {}".format(watch_uuid)) logger.info("Sent filter not found notification for {}".format(watch_uuid))
def send_step_failure_notification(self, watch_uuid, step_n): def send_step_failure_notification(self, watch_uuid, step_n):
watch = self.datastore.data['watching'].get(watch_uuid, False) watch = self.datastore.data['watching'].get(watch_uuid, False)
@@ -139,7 +138,7 @@ class update_worker(threading.Thread):
'uuid': watch_uuid 'uuid': watch_uuid
}) })
self.notification_q.put(n_object) self.notification_q.put(n_object)
print("Sent step not found notification for {}".format(watch_uuid)) logger.error("Sent step not found notification for {}".format(watch_uuid))
def cleanup_error_artifacts(self, uuid): def cleanup_error_artifacts(self, uuid):
@@ -173,7 +172,7 @@ class update_worker(threading.Thread):
update_obj= {} update_obj= {}
xpath_data = False xpath_data = False
process_changedetection_results = True process_changedetection_results = True
print("> Processing UUID {} Priority {} URL {}".format(uuid, queued_item_data.priority, self.datastore.data['watching'][uuid]['url'])) logger.info("UUID: {} Start processing, Priority {} URL {}",uuid, queued_item_data.priority, self.datastore.data['watching'][uuid]['url'])
now = time.time() now = time.time()
try: try:
@@ -184,7 +183,7 @@ class update_worker(threading.Thread):
if not isinstance(contents, (bytes, bytearray)): if not isinstance(contents, (bytes, bytearray)):
raise Exception("Error - returned data from the fetch handler SHOULD be bytes") raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
except PermissionError as e: except PermissionError as e:
self.app.logger.error("File permission error updating", uuid, str(e)) logger.error("UUID: {} File permission error updating - {}", uuid, str(e))
process_changedetection_results = False process_changedetection_results = False
except content_fetcher.ReplyWithContentButNoText as e: except content_fetcher.ReplyWithContentButNoText as e:
# Totally fine, it's by choice - just continue on, nothing more to care about # Totally fine, it's by choice - just continue on, nothing more to care about
@@ -233,7 +232,7 @@ class update_worker(threading.Thread):
# Send notification if we reached the threshold? # Send notification if we reached the threshold?
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
0) 0)
print("Filter for {} not found, consecutive_filter_failures: {}".format(uuid, c)) logger.error("Filter for {} not found, consecutive_filter_failures: {}".format(uuid, c))
if threshold > 0 and c >= threshold: if threshold > 0 and c >= threshold:
if not self.datastore.data['watching'][uuid].get('notification_muted'): if not self.datastore.data['watching'][uuid].get('notification_muted'):
self.send_filter_failure_notification(uuid) self.send_filter_failure_notification(uuid)
@@ -264,7 +263,7 @@ class update_worker(threading.Thread):
# Send notification if we reached the threshold? # Send notification if we reached the threshold?
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
0) 0)
print("Step for {} not found, consecutive_filter_failures: {}".format(uuid, c)) logger.error("Step for {} not found, consecutive_filter_failures: {}".format(uuid, c))
if threshold > 0 and c >= threshold: if threshold > 0 and c >= threshold:
if not self.datastore.data['watching'][uuid].get('notification_muted'): if not self.datastore.data['watching'][uuid].get('notification_muted'):
self.send_step_failure_notification(watch_uuid=uuid, step_n=e.step_n) self.send_step_failure_notification(watch_uuid=uuid, step_n=e.step_n)
@@ -330,8 +329,6 @@ class update_worker(threading.Thread):
# A change was detected # A change was detected
if changed_detected: if changed_detected:
print (">> Change detected in UUID {} - {}".format(uuid, watch['url']))
# Notifications should only trigger on the second time (first time, we gather the initial snapshot) # Notifications should only trigger on the second time (first time, we gather the initial snapshot)
if watch.history_n >= 2: if watch.history_n >= 2:
if not self.datastore.data['watching'][uuid].get('notification_muted'): if not self.datastore.data['watching'][uuid].get('notification_muted'):
@@ -340,7 +337,7 @@ class update_worker(threading.Thread):
except Exception as e: except Exception as e:
# Catch everything possible here, so that if a worker crashes, we don't lose it until restart! # Catch everything possible here, so that if a worker crashes, we don't lose it until restart!
print("!!!! Exception in update_worker !!!\n", e) logger.error("!!!! Exception in update_worker !!!\n", e)
self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e)) self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)}) self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})

View File

@@ -41,7 +41,6 @@ services:
# #
# Base URL of your changedetection.io install (Added to the notification alert) # Base URL of your changedetection.io install (Added to the notification alert)
# - BASE_URL=https://mysite.com # - BASE_URL=https://mysite.com
# Respect proxy_pass type settings, `proxy_set_header Host "localhost";` and `proxy_set_header X-Forwarded-Prefix /app;` # Respect proxy_pass type settings, `proxy_set_header Host "localhost";` and `proxy_set_header X-Forwarded-Prefix /app;`
# More here https://github.com/dgtlmoon/changedetection.io/wiki/Running-changedetection.io-behind-a-reverse-proxy-sub-directory # More here https://github.com/dgtlmoon/changedetection.io/wiki/Running-changedetection.io-behind-a-reverse-proxy-sub-directory
# - USE_X_SETTINGS=1 # - USE_X_SETTINGS=1
@@ -95,7 +94,10 @@ services:
# - CHROME_REFRESH_TIME=600000 # - CHROME_REFRESH_TIME=600000
# - DEFAULT_BLOCK_ADS=true # - DEFAULT_BLOCK_ADS=true
# - DEFAULT_STEALTH=true # - DEFAULT_STEALTH=true
#
# Ignore HTTPS errors, like for self-signed certs
# - DEFAULT_IGNORE_HTTPS_ERRORS=true
#
volumes: volumes:
changedetection-data: changedetection-data:

View File

@@ -10,6 +10,7 @@ inscriptis~=2.2
pytz pytz
timeago~=1.0 timeago~=1.0
validators validators
loguru
# Set these versions together to avoid a RequestsDependencyWarning # Set these versions together to avoid a RequestsDependencyWarning
# >= 2.26 also adds Brotli support if brotli is installed # >= 2.26 also adds Brotli support if brotli is installed