Compare commits

..

3 Commits

Author SHA1 Message Date
dgtlmoon
5382ba3b73 tweak 2023-01-19 20:56:06 +01:00
dgtlmoon
7b5729a4da better name 2023-01-19 20:54:37 +01:00
dgtlmoon
7fbf4fe13b Ability for watch to use a more obvious system default 2023-01-19 20:52:34 +01:00
24 changed files with 182 additions and 373 deletions

View File

@@ -98,7 +98,6 @@ jobs:
platforms: linux/amd64,linux/arm64,linux/arm/v6,linux/arm/v7
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache
provenance: false
# A new tagged release is required, which builds :tag and :latest
- name: Build and push :tag
@@ -117,7 +116,6 @@ jobs:
platforms: linux/amd64,linux/arm64,linux/arm/v6,linux/arm/v7
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache
provenance: false
- name: Image digest
run: echo step SHA ${{ steps.vars.outputs.sha_short }} tag ${{steps.vars.outputs.tag}} branch ${{steps.vars.outputs.branch}} digest ${{ steps.docker_build.outputs.digest }}

View File

@@ -10,13 +10,11 @@ on:
paths:
- requirements.txt
- Dockerfile
- .github/workflows/*
pull_request:
paths:
- requirements.txt
- Dockerfile
- .github/workflows/*
# Changes to requirements.txt packages and Dockerfile may or may not always be compatible with arm etc, so worth testing
# @todo: some kind of path filter for requirements.txt and Dockerfile

View File

@@ -67,10 +67,10 @@ jobs:
sleep 3
# Should return 0 (no error) when grep finds it
curl -s http://localhost:5556 |grep -q checkbox-uuid
curl -s http://localhost:5556/rss|grep -q rss-specification
# and IPv6
curl -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid
curl -s -g -6 "http://[::1]:5556/rss"|grep -q rss-specification
#export WEBDRIVER_URL=http://localhost:4444/wd/hub
#pytest tests/fetchers/test_content.py

View File

@@ -1,8 +1,6 @@
## Web Site Change Detection, Monitoring and Notification.
**_Detect website content changes and perform meaningful actions - trigger notifications via Discord, Email, Slack, Telegram, API calls and many more._**
_Live your data-life pro-actively._
_Live your data-life pro-actively, Detect website changes and perform meaningful actions, trigger notifications via Discord, Email, Slack, Telegram, API calls and many more._
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring" title="Self-hosted web page change monitoring" />](https://lemonade.changedetection.io/start?src=github)

View File

@@ -1,15 +1,5 @@
#!/usr/bin/python3
from changedetectionio import queuedWatchMetaData
from copy import deepcopy
from distutils.util import strtobool
from feedgen.feed import FeedGenerator
from flask_compress import Compress as FlaskCompress
from flask_login import current_user
from flask_restful import abort, Api
from flask_wtf import CSRFProtect
from functools import wraps
from threading import Event
import datetime
import flask_login
import logging
@@ -20,6 +10,12 @@ import threading
import time
import timeago
from changedetectionio import queuedWatchMetaData
from copy import deepcopy
from distutils.util import strtobool
from feedgen.feed import FeedGenerator
from threading import Event
from flask import (
Flask,
abort,
@@ -32,11 +28,15 @@ from flask import (
session,
url_for,
)
from flask_compress import Compress as FlaskCompress
from flask_login import login_required
from flask_restful import abort, Api
from flask_wtf import CSRFProtect
from changedetectionio import html_tools
from changedetectionio.api import api_v1
__version__ = '0.40.2'
__version__ = '0.40.0.4'
datastore = None
@@ -53,6 +53,7 @@ app = Flask(__name__,
static_url_path="",
static_folder="static",
template_folder="templates")
from flask_compress import Compress
# Super handy for compressing large BrowserSteps responses and others
FlaskCompress(app)
@@ -64,6 +65,8 @@ app.config.exit = Event()
app.config['NEW_VERSION_AVAILABLE'] = False
app.config['LOGIN_DISABLED'] = False
#app.config["EXPLAIN_TEMPLATE_LOADING"] = True
# Disables caching of the templates
@@ -71,6 +74,7 @@ app.config['TEMPLATES_AUTO_RELOAD'] = True
app.jinja_env.add_extension('jinja2.ext.loopcontrols')
csrf = CSRFProtect()
csrf.init_app(app)
notification_debug_log=[]
watch_api = Api(app, decorators=[csrf.exempt])
@@ -145,6 +149,7 @@ class User(flask_login.UserMixin):
# Compare given password against JSON store or Env var
def check_password(self, password):
import base64
import hashlib
@@ -152,9 +157,11 @@ class User(flask_login.UserMixin):
raw_salt_pass = os.getenv("SALTED_PASS", False)
if not raw_salt_pass:
raw_salt_pass = datastore.data['settings']['application'].get('password')
raw_salt_pass = datastore.data['settings']['application']['password']
raw_salt_pass = base64.b64decode(raw_salt_pass)
salt_from_storage = raw_salt_pass[:32] # 32 is the length of the salt
# Use the exact same setup you used to generate the key, but this time put in the password to check
@@ -164,44 +171,21 @@ class User(flask_login.UserMixin):
salt_from_storage,
100000
)
new_key = salt_from_storage + new_key
new_key = salt_from_storage + new_key
return new_key == raw_salt_pass
pass
def login_optionally_required(func):
@wraps(func)
def decorated_view(*args, **kwargs):
has_password_enabled = datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False)
# Permitted
if request.endpoint == 'static_content' and request.view_args['group'] == 'styles':
return func(*args, **kwargs)
# Permitted
elif request.endpoint == 'diff_history_page' and datastore.data['settings']['application'].get('shared_diff_access'):
return func(*args, **kwargs)
elif request.method in flask_login.config.EXEMPT_METHODS:
return func(*args, **kwargs)
elif app.config.get('LOGIN_DISABLED'):
return func(*args, **kwargs)
elif has_password_enabled and not current_user.is_authenticated:
return app.login_manager.unauthorized()
return func(*args, **kwargs)
return decorated_view
def changedetection_app(config=None, datastore_o=None):
global datastore
datastore = datastore_o
# so far just for read-only via tests, but this will be moved eventually to be the main source
# (instead of the global var)
app.config['DATASTORE'] = datastore_o
app.config['DATASTORE']=datastore_o
#app.config.update(config or {})
login_manager = flask_login.LoginManager(app)
login_manager.login_view = 'login'
@@ -229,8 +213,6 @@ def changedetection_app(config=None, datastore_o=None):
# https://flask-cors.readthedocs.io/en/latest/
# CORS(app)
@login_manager.user_loader
def user_loader(email):
user = User()
@@ -239,7 +221,7 @@ def changedetection_app(config=None, datastore_o=None):
@login_manager.unauthorized_handler
def unauthorized_handler():
flash("You must be logged in, please log in.", 'error')
# @todo validate its a URL of this host and use that
return redirect(url_for('login', next=url_for('index')))
@app.route('/logout')
@@ -252,6 +234,10 @@ def changedetection_app(config=None, datastore_o=None):
@app.route('/login', methods=['GET', 'POST'])
def login():
if not datastore.data['settings']['application']['password'] and not os.getenv("SALTED_PASS", False):
flash("Login not required, no password enabled.", "notice")
return redirect(url_for('index'))
if request.method == 'GET':
if flask_login.current_user.is_authenticated:
flash("Already logged in")
@@ -286,22 +272,27 @@ def changedetection_app(config=None, datastore_o=None):
return redirect(url_for('login'))
@app.before_request
def before_request_handle_cookie_x_settings():
def do_something_whenever_a_request_comes_in():
# Disable password login if there is not one set
# (No password in settings or env var)
app.config['LOGIN_DISABLED'] = datastore.data['settings']['application']['password'] == False and os.getenv("SALTED_PASS", False) == False
# Set the auth cookie path if we're running as X-settings/X-Forwarded-Prefix
if os.getenv('USE_X_SETTINGS') and 'X-Forwarded-Prefix' in request.headers:
app.config['REMEMBER_COOKIE_PATH'] = request.headers['X-Forwarded-Prefix']
app.config['SESSION_COOKIE_PATH'] = request.headers['X-Forwarded-Prefix']
return None
# For the RSS path, allow access via a token
if request.path == '/rss' and request.args.get('token'):
app_rss_token = datastore.data['settings']['application']['rss_access_token']
rss_url_token = request.args.get('token')
if app_rss_token == rss_url_token:
app.config['LOGIN_DISABLED'] = True
@app.route("/rss", methods=['GET'])
@login_required
def rss():
# Always requires token set
app_rss_token = datastore.data['settings']['application'].get('rss_access_token')
rss_url_token = request.args.get('token')
if rss_url_token != app_rss_token:
return "Access denied, bad token", 403
from . import diff
limit_tag = request.args.get('tag')
@@ -375,7 +366,7 @@ def changedetection_app(config=None, datastore_o=None):
return response
@app.route("/", methods=['GET'])
@login_optionally_required
@login_required
def index():
from changedetectionio import forms
@@ -426,7 +417,6 @@ def changedetection_app(config=None, datastore_o=None):
has_unviewed=datastore.has_unviewed,
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
queued_uuids=[q_uuid.item['uuid'] for q_uuid in update_q.queue],
system_default_fetcher=datastore.data['settings']['application'].get('fetch_backend'),
tags=existing_tags,
watches=sorted_watches
)
@@ -439,7 +429,7 @@ def changedetection_app(config=None, datastore_o=None):
# AJAX endpoint for sending a test
@app.route("/notification/send-test", methods=['POST'])
@login_optionally_required
@login_required
def ajax_callback_send_notification_test():
import apprise
@@ -472,7 +462,7 @@ def changedetection_app(config=None, datastore_o=None):
@app.route("/clear_history/<string:uuid>", methods=['GET'])
@login_optionally_required
@login_required
def clear_watch_history(uuid):
try:
datastore.clear_watch_history(uuid)
@@ -484,7 +474,7 @@ def changedetection_app(config=None, datastore_o=None):
return redirect(url_for('index'))
@app.route("/clear_history", methods=['GET', 'POST'])
@login_optionally_required
@login_required
def clear_all_history():
if request.method == 'POST':
@@ -505,8 +495,43 @@ def changedetection_app(config=None, datastore_o=None):
output = render_template("clear_all_history.html")
return output
# If they edited an existing watch, we need to know to reset the current/previous md5 to include
# the excluded text.
def get_current_checksum_include_ignore_text(uuid):
import hashlib
from changedetectionio import fetch_site_status
# Get the most recent one
newest_history_key = datastore.data['watching'][uuid].get('newest_history_key')
# 0 means that theres only one, so that there should be no 'unviewed' history available
if newest_history_key == 0:
newest_history_key = list(datastore.data['watching'][uuid].history.keys())[0]
if newest_history_key:
with open(datastore.data['watching'][uuid].history[newest_history_key],
encoding='utf-8') as file:
raw_content = file.read()
handler = fetch_site_status.perform_site_check(datastore=datastore)
stripped_content = html_tools.strip_ignore_text(raw_content,
datastore.data['watching'][uuid]['ignore_text'])
if datastore.data['settings']['application'].get('ignore_whitespace', False):
checksum = hashlib.md5(stripped_content.translate(None, b'\r\n\t ')).hexdigest()
else:
checksum = hashlib.md5(stripped_content).hexdigest()
return checksum
return datastore.data['watching'][uuid]['previous_md5']
@app.route("/edit/<string:uuid>", methods=['GET', 'POST'])
@login_optionally_required
@login_required
# https://stackoverflow.com/questions/42984453/wtforms-populate-form-with-data-if-data-exists
# https://wtforms.readthedocs.io/en/3.0.x/forms/#wtforms.form.Form.populate_obj ?
@@ -560,7 +585,6 @@ def changedetection_app(config=None, datastore_o=None):
if request.method == 'POST' and form.validate():
extra_update_obj = {}
if request.args.get('unpause_on_save'):
@@ -614,6 +638,8 @@ def changedetection_app(config=None, datastore_o=None):
visualselector_data_is_ready = datastore.visualselector_data_is_ready(uuid)
# Only works reliably with Playwright
visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and default['fetch_backend'] == 'html_webdriver'
# JQ is difficult to install on windows and must be manually added (outside requirements.txt)
jq_support = True
@@ -626,12 +652,9 @@ def changedetection_app(config=None, datastore_o=None):
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
is_html_webdriver = False
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver':
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_requests':
is_html_webdriver = True
# Only works reliably with Playwright
visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and is_html_webdriver
output = render_template("edit.html",
browser_steps_config=browser_step_ui_config,
current_base_url=datastore.data['settings']['application']['base_url'],
@@ -652,7 +675,7 @@ def changedetection_app(config=None, datastore_o=None):
return output
@app.route("/settings", methods=['GET', "POST"])
@login_optionally_required
@login_required
def settings_page():
from changedetectionio import content_fetcher, forms
@@ -732,7 +755,7 @@ def changedetection_app(config=None, datastore_o=None):
return output
@app.route("/import", methods=['GET', "POST"])
@login_optionally_required
@login_required
def import_page():
remaining_urls = []
if request.method == 'POST':
@@ -770,7 +793,7 @@ def changedetection_app(config=None, datastore_o=None):
# Clear all statuses, so we do not see the 'unviewed' class
@app.route("/form/mark-all-viewed", methods=['GET'])
@login_optionally_required
@login_required
def mark_all_viewed():
# Save the current newest history as the most recently viewed
@@ -780,7 +803,7 @@ def changedetection_app(config=None, datastore_o=None):
return redirect(url_for('index'))
@app.route("/diff/<string:uuid>", methods=['GET', 'POST'])
@login_optionally_required
@login_required
def diff_history_page(uuid):
from changedetectionio import forms
@@ -856,13 +879,8 @@ def changedetection_app(config=None, datastore_o=None):
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
is_html_webdriver = False
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver':
is_html_webdriver = True
password_enabled_and_share_is_off = False
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
password_enabled_and_share_is_off = not datastore.data['settings']['application'].get('shared_diff_access')
is_html_webdriver = True if watch.get('fetch_backend') == 'html_webdriver' or (
watch.get('fetch_backend', None) is None and system_uses_webdriver) else False
output = render_template("diff.html",
current_diff_url=watch['url'],
@@ -877,7 +895,6 @@ def changedetection_app(config=None, datastore_o=None):
left_sticky=True,
newest=newest_version_file_contents,
newest_version_timestamp=dates[-1],
password_enabled_and_share_is_off=password_enabled_and_share_is_off,
previous=previous_version_file_contents,
screenshot=screenshot_url,
uuid=uuid,
@@ -888,7 +905,7 @@ def changedetection_app(config=None, datastore_o=None):
return output
@app.route("/preview/<string:uuid>", methods=['GET'])
@login_optionally_required
@login_required
def preview_page(uuid):
content = []
ignored_line_numbers = []
@@ -908,9 +925,8 @@ def changedetection_app(config=None, datastore_o=None):
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
is_html_webdriver = False
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver':
is_html_webdriver = True
is_html_webdriver = True if watch.get('fetch_backend') == 'html_webdriver' or (
watch.get('fetch_backend', None) is None and system_uses_webdriver) else False
# Never requested successfully, but we detected a fetch error
if datastore.data['watching'][uuid].history_n == 0 and (watch.get_error_text() or watch.get_error_snapshot()):
@@ -979,7 +995,7 @@ def changedetection_app(config=None, datastore_o=None):
return output
@app.route("/settings/notification-logs", methods=['GET'])
@login_optionally_required
@login_required
def notification_logs():
global notification_debug_log
output = render_template("notification-log.html",
@@ -989,7 +1005,7 @@ def changedetection_app(config=None, datastore_o=None):
# We're good but backups are even better!
@app.route("/backup", methods=['GET'])
@login_optionally_required
@login_required
def get_backup():
import zipfile
@@ -1109,14 +1125,13 @@ def changedetection_app(config=None, datastore_o=None):
abort(404)
@app.route("/form/add/quickwatch", methods=['POST'])
@login_optionally_required
@login_required
def form_quick_watch_add():
from changedetectionio import forms
form = forms.quickWatchForm(request.form)
if not form.validate():
for widget, l in form.errors.items():
flash(','.join(l), 'error')
flash("Error")
return redirect(url_for('index'))
url = request.form.get('url').strip()
@@ -1127,21 +1142,22 @@ def changedetection_app(config=None, datastore_o=None):
add_paused = request.form.get('edit_and_watch_submit_button') != None
new_uuid = datastore.add_watch(url=url, tag=request.form.get('tag').strip(), extras={'paused': add_paused})
if new_uuid:
if add_paused:
flash('Watch added in Paused state, saving will unpause.')
return redirect(url_for('edit_page', uuid=new_uuid, unpause_on_save=1))
else:
# Straight into the queue.
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
flash("Watch added.")
if not add_paused and new_uuid:
# Straight into the queue.
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
flash("Watch added.")
if add_paused:
flash('Watch added in Paused state, saving will unpause.')
return redirect(url_for('edit_page', uuid=new_uuid, unpause_on_save=1))
return redirect(url_for('index'))
@app.route("/api/delete", methods=['GET'])
@login_optionally_required
@login_required
def form_delete():
uuid = request.args.get('uuid')
@@ -1158,7 +1174,7 @@ def changedetection_app(config=None, datastore_o=None):
return redirect(url_for('index'))
@app.route("/api/clone", methods=['GET'])
@login_optionally_required
@login_required
def form_clone():
uuid = request.args.get('uuid')
# More for testing, possible to return the first/only
@@ -1166,14 +1182,13 @@ def changedetection_app(config=None, datastore_o=None):
uuid = list(datastore.data['watching'].keys()).pop()
new_uuid = datastore.clone(uuid)
if new_uuid:
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
flash('Cloned.')
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
flash('Cloned.')
return redirect(url_for('index'))
@app.route("/api/checknow", methods=['GET'])
@login_optionally_required
@login_required
def form_watch_checknow():
# Forced recheck will skip the 'skip if content is the same' rule (, 'reprocess_existing_data': True})))
tag = request.args.get('tag')
@@ -1207,7 +1222,7 @@ def changedetection_app(config=None, datastore_o=None):
return redirect(url_for('index', tag=tag))
@app.route("/form/checkbox-operations", methods=['POST'])
@login_optionally_required
@login_required
def form_watch_list_checkbox_operations():
op = request.form['op']
uuids = request.form.getlist('uuids')
@@ -1271,7 +1286,7 @@ def changedetection_app(config=None, datastore_o=None):
return redirect(url_for('index'))
@app.route("/api/share-url", methods=['GET'])
@login_optionally_required
@login_required
def form_share_put_watch():
"""Given a watch UUID, upload the info and return a share-link
the share-link can be imported/added"""

View File

@@ -202,11 +202,8 @@ class CreateWatch(Resource):
del extras['url']
new_uuid = self.datastore.add_watch(url=url, extras=extras)
if new_uuid:
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
return {'uuid': new_uuid}, 201
else:
return "Invalid or unsupported URL", 400
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
return {'uuid': new_uuid}, 201
@auth.check_token
def get(self):

View File

@@ -23,10 +23,11 @@
from distutils.util import strtobool
from flask import Blueprint, request, make_response
from flask_login import login_required
import os
import logging
from changedetectionio.store import ChangeDetectionStore
from changedetectionio import login_optionally_required
browsersteps_live_ui_o = {}
browsersteps_playwright_browser_interface = None
browsersteps_playwright_browser_interface_browser = None
@@ -64,7 +65,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
browser_steps_blueprint = Blueprint('browser_steps', __name__, template_folder="templates")
@login_optionally_required
@login_required
@browser_steps_blueprint.route("/browsersteps_update", methods=['GET', 'POST'])
def browsersteps_ui_update():
import base64

View File

@@ -31,13 +31,11 @@ def sigterm_handler(_signo, _stack_frame):
def main():
global datastore
global app
datastore_path = None
do_cleanup = False
host = ''
ipv6_enabled = False
port = os.environ.get('PORT') or 5000
ssl_mode = False
host = ''
port = os.environ.get('PORT') or 5000
do_cleanup = False
datastore_path = None
# On Windows, create and use a default path.
if os.name == 'nt':
@@ -48,7 +46,7 @@ def main():
datastore_path = os.path.join(os.getcwd(), "../datastore")
try:
opts, args = getopt.getopt(sys.argv[1:], "6Ccsd:h:p:", "port")
opts, args = getopt.getopt(sys.argv[1:], "Ccsd:h:p:", "port")
except getopt.GetoptError:
print('backend.py -s SSL enable -h [host] -p [port] -d [datastore path]')
sys.exit(2)
@@ -68,10 +66,6 @@ def main():
if opt == '-d':
datastore_path = arg
if opt == '-6':
print ("Enabling IPv6 listen support")
ipv6_enabled = True
# Cleanup (remove text files that arent in the index)
if opt == '-c':
do_cleanup = True
@@ -139,15 +133,13 @@ def main():
from werkzeug.middleware.proxy_fix import ProxyFix
app.wsgi_app = ProxyFix(app.wsgi_app, x_prefix=1, x_host=1)
s_type = socket.AF_INET6 if ipv6_enabled else socket.AF_INET
if ssl_mode:
# @todo finalise SSL config, but this should get you in the right direction if you need it.
eventlet.wsgi.server(eventlet.wrap_ssl(eventlet.listen((host, port), s_type),
eventlet.wsgi.server(eventlet.wrap_ssl(eventlet.listen((host, port), socket.AF_INET6),
certfile='cert.pem',
keyfile='privkey.pem',
server_side=True), app)
else:
eventlet.wsgi.server(eventlet.listen((host, int(port)), s_type), app)
eventlet.wsgi.server(eventlet.listen((host, int(port)), socket.AF_INET6), app)

View File

@@ -252,6 +252,9 @@ class base_html_playwright(Fetcher):
self.proxy['password'] = parsed.password
def screenshot_step(self, step_n=''):
# There's a bug where we need to do it twice or it doesnt take the whole page, dont know why.
self.page.screenshot(type='jpeg', clip={'x': 1.0, 'y': 1.0, 'width': 1280, 'height': 1024})
screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=85)
if self.browser_steps_screenshot_path is not None:
@@ -297,8 +300,8 @@ class base_html_playwright(Fetcher):
proxy=self.proxy,
# This is needed to enable JavaScript execution on GitHub and others
bypass_csp=True,
# Should be `allow` or `block` - sites like YouTube can transmit large amounts of data via Service Workers
service_workers=os.getenv('PLAYWRIGHT_SERVICE_WORKERS', 'allow'),
# Can't think why we need the service workers for our use case?
service_workers='block',
# Should never be needed
accept_downloads=False
)
@@ -358,20 +361,28 @@ class base_html_playwright(Fetcher):
print ("Content Fetcher > Response object was none")
raise EmptyReply(url=url, status_code=None)
# Bug 2(?) Set the viewport size AFTER loading the page
self.page.set_viewport_size({"width": 1280, "height": 1024})
# Run Browser Steps here
self.iterate_browser_steps()
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
time.sleep(extra_wait)
self.content = self.page.content()
self.status_code = response.status
if len(self.page.content().strip()) == 0:
context.close()
browser.close()
print ("Content Fetcher > Content was empty")
raise EmptyReply(url=url, status_code=response.status)
# Bug 2(?) Set the viewport size AFTER loading the page
self.page.set_viewport_size({"width": 1280, "height": 1024})
self.status_code = response.status
self.content = self.page.content()
self.headers = response.all_headers()
@@ -392,6 +403,8 @@ class base_html_playwright(Fetcher):
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
# acceptable screenshot quality here
try:
# Quality set to 1 because it's not used, just used as a work-around for a bug, no need to change this.
self.page.screenshot(type='jpeg', clip={'x': 1.0, 'y': 1.0, 'width': 1280, 'height': 1024}, quality=1)
# The actual screenshot
self.screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)))
except Exception as e:

View File

@@ -232,17 +232,12 @@ class validateURL(object):
def __call__(self, form, field):
import validators
try:
validators.url(field.data.strip())
except validators.ValidationFailure:
message = field.gettext('\'%s\' is not a valid URL.' % (field.data.strip()))
raise ValidationError(message)
from .model.Watch import is_safe_url
if not is_safe_url(field.data):
raise ValidationError('Watch protocol is not permitted by SAFE_PROTOCOL_REGEX')
class ValidateListRegex(object):
"""
@@ -459,17 +454,17 @@ class globalSettingsRequestForm(Form):
# datastore.data['settings']['application']..
class globalSettingsApplicationForm(commonSettingsForm):
api_access_token_enabled = BooleanField('API access token security check enabled', default=True, validators=[validators.Optional()])
base_url = StringField('Base URL', validators=[validators.Optional()])
empty_pages_are_a_change = BooleanField('Treat empty pages as a change?', default=False)
fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetcher.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
ignore_whitespace = BooleanField('Ignore whitespace')
password = SaltyPasswordField()
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
empty_pages_are_a_change = BooleanField('Treat empty pages as a change?', default=False)
render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
shared_diff_access = BooleanField('Allow access to view diff page when password is enabled', default=False, validators=[validators.Optional()])
fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetcher.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
api_access_token_enabled = BooleanField('API access token security check enabled', default=True, validators=[validators.Optional()])
password = SaltyPasswordField()
filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification',
render_kw={"style": "width: 5em;"},
validators=[validators.NumberRange(min=0,

View File

@@ -40,7 +40,6 @@ class model(dict):
'notification_body': default_notification_body,
'notification_format': default_notification_format,
'schema_version' : 0,
'shared_diff_access': False,
'webdriver_delay': None # Extra delay in seconds before extracting text
}
}

View File

@@ -1,14 +1,9 @@
from distutils.util import strtobool
import logging
import os
import re
import time
import uuid
# Allowable protocols, protects against javascript: etc
# file:// is further checked by ALLOW_FILE_URI
SAFE_PROTOCOL_REGEX='^(http|https|ftp|file):'
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 60))
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
@@ -23,7 +18,7 @@ base_config = {
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
'extract_text': [], # Extract text by regex after filters
'extract_title_as_title': False,
'fetch_backend': 'system',
'fetch_backend': None,
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
'has_ldjson_price_data': None,
'track_ldjson_price_data': None,
@@ -60,22 +55,6 @@ base_config = {
'webdriver_js_execute_code': None, # Run before change-detection
}
def is_safe_url(test_url):
# See https://github.com/dgtlmoon/changedetection.io/issues/1358
# Remove 'source:' prefix so we dont get 'source:javascript:' etc
# 'source:' is a valid way to tell us to return the source
r = re.compile(re.escape('source:'), re.IGNORECASE)
test_url = r.sub('', test_url)
pattern = re.compile(os.getenv('SAFE_PROTOCOL_REGEX', SAFE_PROTOCOL_REGEX), re.IGNORECASE)
if not pattern.match(test_url.strip()):
return False
return True
class model(dict):
__newest_history_key = None
__history_n = 0
@@ -114,11 +93,7 @@ class model(dict):
@property
def link(self):
url = self.get('url', '')
if not is_safe_url(url):
return 'DISABLED'
ready_url = url
if '{%' in url or '{{' in url:
from jinja2 import Environment

View File

@@ -1,7 +1,7 @@
$(document).ready(function () {
$(document).ready(function() {
function toggle() {
if ($('input[name="fetch_backend"]:checked').val() == 'html_webdriver') {
if (playwright_enabled) {
if(playwright_enabled) {
// playwright supports headers, so hide everything else
// See #664
$('#requests-override-options #request-method').hide();
@@ -14,14 +14,9 @@ $(document).ready(function () {
$('#requests-override-options').hide();
}
$('#webdriver-override-options').show();
} else if ($('input[name="fetch_backend"]:checked').val() == 'system') {
$('#requests-override-options #request-method').hide();
$('#requests-override-options #request-body').hide();
$('#ignore-status-codes-option').hide();
$('#requests-override-options').hide();
$('#webdriver-override-options').hide();
} else {
$('#requests-override-options').show();

View File

@@ -1,20 +1,20 @@
from flask import (
flash
)
from . model import App, Watch
from copy import deepcopy
from os import path, unlink
from threading import Lock
import json
import logging
import os
import re
import requests
import secrets
import threading
import time
import uuid as uuid_builder
from copy import deepcopy
from os import path, unlink
from threading import Lock
import re
import requests
import secrets
from . model import App, Watch
# Is there an existing library to ensure some data store (JSON etc) is in sync with CRUD methods?
# Open a github issue if you know something :)
@@ -309,12 +309,9 @@ class ChangeDetectionStore:
logging.error("Error fetching metadata for shared watch link", url, str(e))
flash("Error fetching metadata for {}".format(url), 'error')
return False
from .model.Watch import is_safe_url
if not is_safe_url(url):
flash('Watch protocol is not permitted by SAFE_PROTOCOL_REGEX', 'error')
return None
with self.lock:
# #Re 569
new_watch = Watch.model(datastore_path=self.datastore_path, default={
'url': url,
@@ -676,13 +673,3 @@ class ChangeDetectionStore:
self.data['settings']['application']['notification_urls'][i] = re.sub(r, r'{{\1}}', url)
return
# Some setups may have missed the correct default, so it shows the wrong config in the UI, although it will default to system-wide
def update_10(self):
for uuid, watch in self.data['watching'].items():
try:
if not watch.get('fetch_backend', ''):
watch['fetch_backend'] = 'system'
except:
continue
return

View File

@@ -76,12 +76,8 @@
</div>
<div class="tab-pane-inner" id="text">
<div class="tip">Pro-tip: Use <strong>show current snapshot</strong> tab to visualise what will be ignored.</div>
{% if password_enabled_and_share_is_off %}
<div class="tip">Pro-tip: You can enable <strong>"share access when password is enabled"</strong> from settings</div>
{% endif %}
<div class="tip">Pro-tip: Use <strong>show current snapshot</strong> tab to visualise what will be ignored.
</div>
<div class="snapshot-age">{{watch_a.snapshot_text_ctime|format_timestamp_timeago}}</div>
<table>

View File

@@ -57,11 +57,6 @@
{% endif %}
</div>
<div class="pure-control-group">
{{ render_checkbox_field(form.application.form.shared_diff_access, class="shared_diff_access") }}
<span class="pure-form-message-inline">Allow access to view watch diff page when password is enabled (Good for sharing the diff page)
</span>
</div>
<div class="pure-control-group">
{{ render_field(form.application.form.base_url, placeholder="http://yoursite.com:5000/",
class="m-d") }}

View File

@@ -91,12 +91,7 @@
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"></a>
<a class="link-spread" href="{{url_for('form_share_put_watch', uuid=watch.uuid)}}"><img class="status-icon" src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" /></a>
{% if watch.get_fetch_backend == "html_webdriver"
or ( watch.get_fetch_backend == "system" and system_default_fetcher == 'html_webdriver' )
%}
<img class="status-icon" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" title="Using a chrome browser" />
{% endif %}
{%if watch.get_fetch_backend == "html_webdriver" %}<img class="status-icon" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" title="Using a chrome browser" />{% endif %}
{%if watch.is_pdf %}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" title="Converting PDF to text" />{% endif %}
{% if watch.last_error is defined and watch.last_error != False %}
<div class="fetch-error">{{ watch.last_error }}

View File

@@ -1,34 +1,18 @@
from . util import live_server_setup, extract_UUID_from_client
from flask import url_for
import time
from . util import live_server_setup
def test_check_access_control(app, client, live_server):
def test_check_access_control(app, client):
# Still doesnt work, but this is closer.
live_server_setup(live_server)
with app.test_client(use_cookies=True) as c:
# Check we don't have any password protection enabled yet.
res = c.get(url_for("settings_page"))
assert b"Remove password" not in res.data
# add something that we can hit via diff page later
res = c.post(
url_for("import_page"),
data={"urls": url_for('test_random_content_endpoint', _external=True)},
follow_redirects=True
)
assert b"1 Imported" in res.data
time.sleep(2)
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
assert b'1 watches queued for rechecking.' in res.data
time.sleep(2)
# Enable password check and diff page access bypass
# Enable password check.
res = c.post(
url_for("settings_page"),
data={"application-password": "foobar",
"application-shared_diff_access": "True",
"requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
follow_redirects=True
@@ -38,15 +22,9 @@ def test_check_access_control(app, client, live_server):
# Check we hit the login
res = c.get(url_for("index"), follow_redirects=True)
# Should be logged out
assert b"Login" in res.data
# The diff page should return something valid when logged out
res = client.get(url_for("diff_history_page", uuid="first"))
assert b'Random content' in res.data
# Menu should not be available yet
# assert b"SETTINGS" not in res.data
# assert b"BACKUP" not in res.data
@@ -131,25 +109,3 @@ def test_check_access_control(app, client, live_server):
assert b"Password protection enabled" not in res.data
# Now checking the diff access
# Enable password check and diff page access bypass
res = c.post(
url_for("settings_page"),
data={"application-password": "foobar",
# Should be disabled
# "application-shared_diff_access": "True",
"requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
follow_redirects=True
)
assert b"Password protection enabled." in res.data
# Check we hit the login
res = c.get(url_for("index"), follow_redirects=True)
# Should be logged out
assert b"Login" in res.data
# The diff page should return something valid when logged out
res = client.get(url_for("diff_history_page", uuid="first"))
assert b'Random content' not in res.data

View File

@@ -3,7 +3,7 @@
import time
from flask import url_for
from urllib.request import urlopen
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_rss_token_from_UI
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
sleep_time_for_fetch_thread = 3
@@ -76,8 +76,7 @@ def test_check_basic_change_detection_functionality(client, live_server):
assert b'unviewed' in res.data
# #75, and it should be in the RSS feed
rss_token = extract_rss_token_from_UI(client)
res = client.get(url_for("rss", token=rss_token, _external=True))
res = client.get(url_for("rss"))
expected_url = url_for('test_endpoint', _external=True)
assert b'<rss' in res.data

View File

@@ -1,39 +0,0 @@
#!/usr/bin/python3
import time
from flask import url_for
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_rss_token_from_UI
def test_rss_and_token(client, live_server):
set_original_response()
live_server_setup(live_server)
# Add our URL to the import page
res = client.post(
url_for("import_page"),
data={"urls": url_for('test_random_content_endpoint', _external=True)},
follow_redirects=True
)
assert b"1 Imported" in res.data
rss_token = extract_rss_token_from_UI(client)
time.sleep(2)
client.get(url_for("form_watch_checknow"), follow_redirects=True)
time.sleep(2)
# Add our URL to the import page
res = client.get(
url_for("rss", token="bad token", _external=True),
follow_redirects=True
)
assert b"Access denied, bad token" in res.data
res = client.get(
url_for("rss", token=rss_token, _external=True),
follow_redirects=True
)
assert b"Access denied, bad token" not in res.data
assert b"Random content" in res.data

View File

@@ -2,9 +2,11 @@ from flask import url_for
from . util import set_original_response, set_modified_response, live_server_setup
import time
def test_bad_access(client, live_server):
def test_setup(live_server):
live_server_setup(live_server)
def test_file_access(client, live_server):
res = client.post(
url_for("import_page"),
data={"urls": 'https://localhost'},
@@ -17,49 +19,18 @@ def test_bad_access(client, live_server):
res = client.post(
url_for("edit_page", uuid="first"),
data={
"url": 'javascript:alert(document.domain)',
"url": 'file:///etc/passwd',
"tag": "",
"method": "GET",
"fetch_backend": "html_requests",
"body": ""},
follow_redirects=True
)
time.sleep(3)
assert b'Watch protocol is not permitted by SAFE_PROTOCOL_REGEX' in res.data
res = client.post(
url_for("form_quick_watch_add"),
data={"url": ' javascript:alert(123)', "tag": ''},
res = client.get(
url_for("index", uuid="first"),
follow_redirects=True
)
assert b'Watch protocol is not permitted by SAFE_PROTOCOL_REGEX' in res.data
res = client.post(
url_for("form_quick_watch_add"),
data={"url": '%20%20%20javascript:alert(123)%20%20', "tag": ''},
follow_redirects=True
)
assert b'Watch protocol is not permitted by SAFE_PROTOCOL_REGEX' in res.data
res = client.post(
url_for("form_quick_watch_add"),
data={"url": ' source:javascript:alert(document.domain)', "tag": ''},
follow_redirects=True
)
assert b'Watch protocol is not permitted by SAFE_PROTOCOL_REGEX' in res.data
# file:// is permitted by default, but it will be caught by ALLOW_FILE_URI
client.post(
url_for("form_quick_watch_add"),
data={"url": 'file:///tasty/disk/drive', "tag": ''},
follow_redirects=True
)
time.sleep(1)
res = client.get(url_for("index"))
assert b'file:// type access is denied for security reasons.' in res.data
assert b'denied for security reasons' in res.data

View File

@@ -70,15 +70,6 @@ def extract_api_key_from_UI(client):
api_key = m.group(1)
return api_key.strip()
# kinda funky, but works for now
def extract_rss_token_from_UI(client):
import re
res = client.get(
url_for("index"),
)
m = re.search('token=(.+?)"', str(res.data))
token_key = m.group(1)
return token_key.strip()
# kinda funky, but works for now
def extract_UUID_from_client(client):
@@ -107,12 +98,6 @@ def wait_for_all_checks(client):
def live_server_setup(live_server):
@live_server.app.route('/test-random-content-endpoint')
def test_random_content_endpoint():
import secrets
return "Random content - {}\n".format(secrets.token_hex(64))
@live_server.app.route('/test-endpoint')
def test_endpoint():
ctype = request.args.get('content_type')

View File

@@ -52,12 +52,3 @@ def test_visual_selector_content_ready(client, live_server):
# Open it and see if it roughly looks correct
with open(os.path.join('test-datastore', uuid, 'elements.json'), 'r') as f:
json.load(f)
# Some options should be enabled
# @todo - in the future, the visibility should be toggled by JS from the request type setting
res = client.get(
url_for("edit_page", uuid="first"),
follow_redirects=True
)
assert b'notification_screenshot' in res.data

View File

@@ -1,4 +1,3 @@
version: '3.2'
services:
changedetection:
image: ghcr.io/dgtlmoon/changedetection.io
@@ -41,6 +40,7 @@ services:
#
# Base URL of your changedetection.io install (Added to the notification alert)
# - BASE_URL=https://mysite.com
# Respect proxy_pass type settings, `proxy_set_header Host "localhost";` and `proxy_set_header X-Forwarded-Prefix /app;`
# More here https://github.com/dgtlmoon/changedetection.io/wiki/Running-changedetection.io-behind-a-reverse-proxy-sub-directory
# - USE_X_SETTINGS=1
@@ -94,10 +94,7 @@ services:
# - CHROME_REFRESH_TIME=600000
# - DEFAULT_BLOCK_ADS=true
# - DEFAULT_STEALTH=true
#
# Ignore HTTPS errors, like for self-signed certs
# - DEFAULT_IGNORE_HTTPS_ERRORS=true
#
volumes:
changedetection-data: