Compare commits

...

20 Commits

Author SHA1 Message Date
dgtlmoon
9fcd229435 Remove unused function 2023-02-07 17:25:49 +01:00
dgtlmoon
0f43213d9d UI - preview page - Fix bug where playwright/chrome was system default and [preview] didnt show snapshot 2023-02-07 16:55:34 +01:00
dgtlmoon
93c57d9fad Adding example docker-compose.yml config to ignore errors from self-signed certs #1389 2023-02-06 17:24:12 +01:00
dgtlmoon
3cdd075baf 0.40.2 2023-02-03 19:20:13 +01:00
dgtlmoon
5c617e8530 Code cleanup - remove unused import 2023-02-03 18:35:58 +01:00
dgtlmoon
1a48965ba1 UI fix - Fix logic for showing screenshot on diff page (#1379) 2023-02-03 11:23:48 +01:00
dgtlmoon
41856c4ed8 Re #1365 - Playwright - Browser "Service Workers" should be enabled by default but unset via env var PLAYWRIGHT_SERVICE_WORKERS=block (#1367) 2023-02-01 20:50:40 +01:00
dgtlmoon
0ed897c50f New setting to allow passwordless access to your 'diff' page - perfect for sharing your diff page securely, refactored login code (#1357) 2023-01-29 22:36:55 +01:00
dgtlmoon
f8e587c415 Security - Possible stored XSS in watch list - Only permit HTTP/HTTP/FTP by default - override with env var SAFE_PROTOCOL_REGEX (#1359) 2023-01-29 11:12:06 +01:00
dgtlmoon
d47a25eb6d Playwright - Removing old bug fix where playwright needed screenshot called twice to make the full screen screenshot be actually fullscreen (#1356) 2023-01-28 15:02:53 +01:00
dgtlmoon
9a0792d185 Fetch backend UI default fixes for VisualSelector and BrowserSteps (#1344) 2023-01-25 19:47:54 +01:00
dgtlmoon
948ef7ade4 Fix fetch UI default fetch backend option icon (#1343) 2023-01-25 18:07:44 +01:00
dgtlmoon
0ba139f8f9 Docker container build - docker container buildx version change causing errors with watchtower and others (#1336) 2023-01-24 23:45:43 +01:00
dgtlmoon
a9431191fc 0.40.1.1 2023-01-22 13:03:15 +01:00
dgtlmoon
774451f256 Re #1328 - add -6 flag to enable IPv6 (#1329) 2023-01-22 11:10:25 +01:00
dgtlmoon
04577cbf32 0.40.1.0 2023-01-21 15:38:54 +01:00
dgtlmoon
f2864af8f1 Update README.md 2023-01-21 14:02:14 +01:00
dgtlmoon
9a36d081c4 Setting docker-compose.yml version to 3.2 so it works with portainer and others #1306 #1144 #1079 2023-01-21 13:50:36 +01:00
dgtlmoon
7048a0acbd UI - Fix wrong logic when dealing with webdriver/playwright watch screenshot settings (#1325) 2023-01-21 13:47:32 +01:00
dgtlmoon
fba719ab8d Ability for watch to use a more obvious system default fetcher (#1320) 2023-01-19 21:57:58 +01:00
25 changed files with 385 additions and 194 deletions

View File

@@ -98,6 +98,7 @@ jobs:
platforms: linux/amd64,linux/arm64,linux/arm/v6,linux/arm/v7
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache
provenance: false
# A new tagged release is required, which builds :tag and :latest
- name: Build and push :tag
@@ -116,6 +117,7 @@ jobs:
platforms: linux/amd64,linux/arm64,linux/arm/v6,linux/arm/v7
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache
provenance: false
- name: Image digest
run: echo step SHA ${{ steps.vars.outputs.sha_short }} tag ${{steps.vars.outputs.tag}} branch ${{steps.vars.outputs.branch}} digest ${{ steps.docker_build.outputs.digest }}

View File

@@ -10,11 +10,13 @@ on:
paths:
- requirements.txt
- Dockerfile
- .github/workflows/*
pull_request:
paths:
- requirements.txt
- Dockerfile
- .github/workflows/*
# Changes to requirements.txt packages and Dockerfile may or may not always be compatible with arm etc, so worth testing
# @todo: some kind of path filter for requirements.txt and Dockerfile

View File

@@ -67,10 +67,10 @@ jobs:
sleep 3
# Should return 0 (no error) when grep finds it
curl -s http://localhost:5556 |grep -q checkbox-uuid
curl -s http://localhost:5556/rss|grep -q rss-specification
# and IPv6
curl -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid
curl -s -g -6 "http://[::1]:5556/rss"|grep -q rss-specification
#export WEBDRIVER_URL=http://localhost:4444/wd/hub
#pytest tests/fetchers/test_content.py

View File

@@ -1,6 +1,8 @@
## Web Site Change Detection, Monitoring and Notification.
_Live your data-life pro-actively, Detect website changes and perform meaningful actions, trigger notifications via Discord, Email, Slack, Telegram, API calls and many more._
**_Detect website content changes and perform meaningful actions - trigger notifications via Discord, Email, Slack, Telegram, API calls and many more._**
_Live your data-life pro-actively._
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring" title="Self-hosted web page change monitoring" />](https://lemonade.changedetection.io/start?src=github)

View File

@@ -1,5 +1,15 @@
#!/usr/bin/python3
from changedetectionio import queuedWatchMetaData
from copy import deepcopy
from distutils.util import strtobool
from feedgen.feed import FeedGenerator
from flask_compress import Compress as FlaskCompress
from flask_login import current_user
from flask_restful import abort, Api
from flask_wtf import CSRFProtect
from functools import wraps
from threading import Event
import datetime
import flask_login
import logging
@@ -10,12 +20,6 @@ import threading
import time
import timeago
from changedetectionio import queuedWatchMetaData
from copy import deepcopy
from distutils.util import strtobool
from feedgen.feed import FeedGenerator
from threading import Event
from flask import (
Flask,
abort,
@@ -28,15 +32,11 @@ from flask import (
session,
url_for,
)
from flask_compress import Compress as FlaskCompress
from flask_login import login_required
from flask_restful import abort, Api
from flask_wtf import CSRFProtect
from changedetectionio import html_tools
from changedetectionio.api import api_v1
__version__ = '0.40.0.4'
__version__ = '0.40.2'
datastore = None
@@ -53,7 +53,6 @@ app = Flask(__name__,
static_url_path="",
static_folder="static",
template_folder="templates")
from flask_compress import Compress
# Super handy for compressing large BrowserSteps responses and others
FlaskCompress(app)
@@ -65,8 +64,6 @@ app.config.exit = Event()
app.config['NEW_VERSION_AVAILABLE'] = False
app.config['LOGIN_DISABLED'] = False
#app.config["EXPLAIN_TEMPLATE_LOADING"] = True
# Disables caching of the templates
@@ -74,7 +71,6 @@ app.config['TEMPLATES_AUTO_RELOAD'] = True
app.jinja_env.add_extension('jinja2.ext.loopcontrols')
csrf = CSRFProtect()
csrf.init_app(app)
notification_debug_log=[]
watch_api = Api(app, decorators=[csrf.exempt])
@@ -149,7 +145,6 @@ class User(flask_login.UserMixin):
# Compare given password against JSON store or Env var
def check_password(self, password):
import base64
import hashlib
@@ -157,11 +152,9 @@ class User(flask_login.UserMixin):
raw_salt_pass = os.getenv("SALTED_PASS", False)
if not raw_salt_pass:
raw_salt_pass = datastore.data['settings']['application']['password']
raw_salt_pass = datastore.data['settings']['application'].get('password')
raw_salt_pass = base64.b64decode(raw_salt_pass)
salt_from_storage = raw_salt_pass[:32] # 32 is the length of the salt
# Use the exact same setup you used to generate the key, but this time put in the password to check
@@ -171,21 +164,44 @@ class User(flask_login.UserMixin):
salt_from_storage,
100000
)
new_key = salt_from_storage + new_key
new_key = salt_from_storage + new_key
return new_key == raw_salt_pass
pass
def login_optionally_required(func):
@wraps(func)
def decorated_view(*args, **kwargs):
has_password_enabled = datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False)
# Permitted
if request.endpoint == 'static_content' and request.view_args['group'] == 'styles':
return func(*args, **kwargs)
# Permitted
elif request.endpoint == 'diff_history_page' and datastore.data['settings']['application'].get('shared_diff_access'):
return func(*args, **kwargs)
elif request.method in flask_login.config.EXEMPT_METHODS:
return func(*args, **kwargs)
elif app.config.get('LOGIN_DISABLED'):
return func(*args, **kwargs)
elif has_password_enabled and not current_user.is_authenticated:
return app.login_manager.unauthorized()
return func(*args, **kwargs)
return decorated_view
def changedetection_app(config=None, datastore_o=None):
global datastore
datastore = datastore_o
# so far just for read-only via tests, but this will be moved eventually to be the main source
# (instead of the global var)
app.config['DATASTORE']=datastore_o
#app.config.update(config or {})
app.config['DATASTORE'] = datastore_o
login_manager = flask_login.LoginManager(app)
login_manager.login_view = 'login'
@@ -213,6 +229,8 @@ def changedetection_app(config=None, datastore_o=None):
# https://flask-cors.readthedocs.io/en/latest/
# CORS(app)
@login_manager.user_loader
def user_loader(email):
user = User()
@@ -221,7 +239,7 @@ def changedetection_app(config=None, datastore_o=None):
@login_manager.unauthorized_handler
def unauthorized_handler():
# @todo validate its a URL of this host and use that
flash("You must be logged in, please log in.", 'error')
return redirect(url_for('login', next=url_for('index')))
@app.route('/logout')
@@ -234,10 +252,6 @@ def changedetection_app(config=None, datastore_o=None):
@app.route('/login', methods=['GET', 'POST'])
def login():
if not datastore.data['settings']['application']['password'] and not os.getenv("SALTED_PASS", False):
flash("Login not required, no password enabled.", "notice")
return redirect(url_for('index'))
if request.method == 'GET':
if flask_login.current_user.is_authenticated:
flash("Already logged in")
@@ -272,27 +286,22 @@ def changedetection_app(config=None, datastore_o=None):
return redirect(url_for('login'))
@app.before_request
def do_something_whenever_a_request_comes_in():
# Disable password login if there is not one set
# (No password in settings or env var)
app.config['LOGIN_DISABLED'] = datastore.data['settings']['application']['password'] == False and os.getenv("SALTED_PASS", False) == False
def before_request_handle_cookie_x_settings():
# Set the auth cookie path if we're running as X-settings/X-Forwarded-Prefix
if os.getenv('USE_X_SETTINGS') and 'X-Forwarded-Prefix' in request.headers:
app.config['REMEMBER_COOKIE_PATH'] = request.headers['X-Forwarded-Prefix']
app.config['SESSION_COOKIE_PATH'] = request.headers['X-Forwarded-Prefix']
# For the RSS path, allow access via a token
if request.path == '/rss' and request.args.get('token'):
app_rss_token = datastore.data['settings']['application']['rss_access_token']
rss_url_token = request.args.get('token')
if app_rss_token == rss_url_token:
app.config['LOGIN_DISABLED'] = True
return None
@app.route("/rss", methods=['GET'])
@login_required
def rss():
# Always requires token set
app_rss_token = datastore.data['settings']['application'].get('rss_access_token')
rss_url_token = request.args.get('token')
if rss_url_token != app_rss_token:
return "Access denied, bad token", 403
from . import diff
limit_tag = request.args.get('tag')
@@ -366,7 +375,7 @@ def changedetection_app(config=None, datastore_o=None):
return response
@app.route("/", methods=['GET'])
@login_required
@login_optionally_required
def index():
from changedetectionio import forms
@@ -417,6 +426,7 @@ def changedetection_app(config=None, datastore_o=None):
has_unviewed=datastore.has_unviewed,
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
queued_uuids=[q_uuid.item['uuid'] for q_uuid in update_q.queue],
system_default_fetcher=datastore.data['settings']['application'].get('fetch_backend'),
tags=existing_tags,
watches=sorted_watches
)
@@ -429,7 +439,7 @@ def changedetection_app(config=None, datastore_o=None):
# AJAX endpoint for sending a test
@app.route("/notification/send-test", methods=['POST'])
@login_required
@login_optionally_required
def ajax_callback_send_notification_test():
import apprise
@@ -462,7 +472,7 @@ def changedetection_app(config=None, datastore_o=None):
@app.route("/clear_history/<string:uuid>", methods=['GET'])
@login_required
@login_optionally_required
def clear_watch_history(uuid):
try:
datastore.clear_watch_history(uuid)
@@ -474,7 +484,7 @@ def changedetection_app(config=None, datastore_o=None):
return redirect(url_for('index'))
@app.route("/clear_history", methods=['GET', 'POST'])
@login_required
@login_optionally_required
def clear_all_history():
if request.method == 'POST':
@@ -495,43 +505,8 @@ def changedetection_app(config=None, datastore_o=None):
output = render_template("clear_all_history.html")
return output
# If they edited an existing watch, we need to know to reset the current/previous md5 to include
# the excluded text.
def get_current_checksum_include_ignore_text(uuid):
import hashlib
from changedetectionio import fetch_site_status
# Get the most recent one
newest_history_key = datastore.data['watching'][uuid].get('newest_history_key')
# 0 means that theres only one, so that there should be no 'unviewed' history available
if newest_history_key == 0:
newest_history_key = list(datastore.data['watching'][uuid].history.keys())[0]
if newest_history_key:
with open(datastore.data['watching'][uuid].history[newest_history_key],
encoding='utf-8') as file:
raw_content = file.read()
handler = fetch_site_status.perform_site_check(datastore=datastore)
stripped_content = html_tools.strip_ignore_text(raw_content,
datastore.data['watching'][uuid]['ignore_text'])
if datastore.data['settings']['application'].get('ignore_whitespace', False):
checksum = hashlib.md5(stripped_content.translate(None, b'\r\n\t ')).hexdigest()
else:
checksum = hashlib.md5(stripped_content).hexdigest()
return checksum
return datastore.data['watching'][uuid]['previous_md5']
@app.route("/edit/<string:uuid>", methods=['GET', 'POST'])
@login_required
@login_optionally_required
# https://stackoverflow.com/questions/42984453/wtforms-populate-form-with-data-if-data-exists
# https://wtforms.readthedocs.io/en/3.0.x/forms/#wtforms.form.Form.populate_obj ?
@@ -571,6 +546,8 @@ def changedetection_app(config=None, datastore_o=None):
data=default,
)
form.fetch_backend.choices.append(("system", 'System settings default'))
# form.browser_steps[0] can be assumed that we 'goto url' first
if datastore.proxy_list is None:
@@ -583,6 +560,7 @@ def changedetection_app(config=None, datastore_o=None):
if request.method == 'POST' and form.validate():
extra_update_obj = {}
if request.args.get('unpause_on_save'):
@@ -599,10 +577,6 @@ def changedetection_app(config=None, datastore_o=None):
using_default_check_time = False
break
# Use the default if it's the same as system-wide.
if form.fetch_backend.data == datastore.data['settings']['application']['fetch_backend']:
extra_update_obj['fetch_backend'] = None
# Ignore text
@@ -640,8 +614,6 @@ def changedetection_app(config=None, datastore_o=None):
visualselector_data_is_ready = datastore.visualselector_data_is_ready(uuid)
# Only works reliably with Playwright
visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and default['fetch_backend'] == 'html_webdriver'
# JQ is difficult to install on windows and must be manually added (outside requirements.txt)
jq_support = True
@@ -652,8 +624,13 @@ def changedetection_app(config=None, datastore_o=None):
watch = datastore.data['watching'].get(uuid)
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
is_html_webdriver = True if watch.get('fetch_backend') == 'html_webdriver' or (
watch.get('fetch_backend', None) is None and system_uses_webdriver) else False
is_html_webdriver = False
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver':
is_html_webdriver = True
# Only works reliably with Playwright
visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and is_html_webdriver
output = render_template("edit.html",
browser_steps_config=browser_step_ui_config,
@@ -675,7 +652,7 @@ def changedetection_app(config=None, datastore_o=None):
return output
@app.route("/settings", methods=['GET', "POST"])
@login_required
@login_optionally_required
def settings_page():
from changedetectionio import content_fetcher, forms
@@ -755,7 +732,7 @@ def changedetection_app(config=None, datastore_o=None):
return output
@app.route("/import", methods=['GET', "POST"])
@login_required
@login_optionally_required
def import_page():
remaining_urls = []
if request.method == 'POST':
@@ -793,7 +770,7 @@ def changedetection_app(config=None, datastore_o=None):
# Clear all statuses, so we do not see the 'unviewed' class
@app.route("/form/mark-all-viewed", methods=['GET'])
@login_required
@login_optionally_required
def mark_all_viewed():
# Save the current newest history as the most recently viewed
@@ -803,7 +780,7 @@ def changedetection_app(config=None, datastore_o=None):
return redirect(url_for('index'))
@app.route("/diff/<string:uuid>", methods=['GET', 'POST'])
@login_required
@login_optionally_required
def diff_history_page(uuid):
from changedetectionio import forms
@@ -879,8 +856,13 @@ def changedetection_app(config=None, datastore_o=None):
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
is_html_webdriver = True if watch.get('fetch_backend') == 'html_webdriver' or (
watch.get('fetch_backend', None) is None and system_uses_webdriver) else False
is_html_webdriver = False
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver':
is_html_webdriver = True
password_enabled_and_share_is_off = False
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
password_enabled_and_share_is_off = not datastore.data['settings']['application'].get('shared_diff_access')
output = render_template("diff.html",
current_diff_url=watch['url'],
@@ -895,6 +877,7 @@ def changedetection_app(config=None, datastore_o=None):
left_sticky=True,
newest=newest_version_file_contents,
newest_version_timestamp=dates[-1],
password_enabled_and_share_is_off=password_enabled_and_share_is_off,
previous=previous_version_file_contents,
screenshot=screenshot_url,
uuid=uuid,
@@ -905,7 +888,7 @@ def changedetection_app(config=None, datastore_o=None):
return output
@app.route("/preview/<string:uuid>", methods=['GET'])
@login_required
@login_optionally_required
def preview_page(uuid):
content = []
ignored_line_numbers = []
@@ -925,8 +908,9 @@ def changedetection_app(config=None, datastore_o=None):
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
is_html_webdriver = True if watch.get('fetch_backend') == 'html_webdriver' or (
watch.get('fetch_backend', None) is None and system_uses_webdriver) else False
is_html_webdriver = False
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver':
is_html_webdriver = True
# Never requested successfully, but we detected a fetch error
if datastore.data['watching'][uuid].history_n == 0 and (watch.get_error_text() or watch.get_error_snapshot()):
@@ -995,7 +979,7 @@ def changedetection_app(config=None, datastore_o=None):
return output
@app.route("/settings/notification-logs", methods=['GET'])
@login_required
@login_optionally_required
def notification_logs():
global notification_debug_log
output = render_template("notification-log.html",
@@ -1005,7 +989,7 @@ def changedetection_app(config=None, datastore_o=None):
# We're good but backups are even better!
@app.route("/backup", methods=['GET'])
@login_required
@login_optionally_required
def get_backup():
import zipfile
@@ -1125,13 +1109,14 @@ def changedetection_app(config=None, datastore_o=None):
abort(404)
@app.route("/form/add/quickwatch", methods=['POST'])
@login_required
@login_optionally_required
def form_quick_watch_add():
from changedetectionio import forms
form = forms.quickWatchForm(request.form)
if not form.validate():
flash("Error")
for widget, l in form.errors.items():
flash(','.join(l), 'error')
return redirect(url_for('index'))
url = request.form.get('url').strip()
@@ -1142,22 +1127,21 @@ def changedetection_app(config=None, datastore_o=None):
add_paused = request.form.get('edit_and_watch_submit_button') != None
new_uuid = datastore.add_watch(url=url, tag=request.form.get('tag').strip(), extras={'paused': add_paused})
if not add_paused and new_uuid:
# Straight into the queue.
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
flash("Watch added.")
if add_paused:
flash('Watch added in Paused state, saving will unpause.')
return redirect(url_for('edit_page', uuid=new_uuid, unpause_on_save=1))
if new_uuid:
if add_paused:
flash('Watch added in Paused state, saving will unpause.')
return redirect(url_for('edit_page', uuid=new_uuid, unpause_on_save=1))
else:
# Straight into the queue.
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
flash("Watch added.")
return redirect(url_for('index'))
@app.route("/api/delete", methods=['GET'])
@login_required
@login_optionally_required
def form_delete():
uuid = request.args.get('uuid')
@@ -1174,7 +1158,7 @@ def changedetection_app(config=None, datastore_o=None):
return redirect(url_for('index'))
@app.route("/api/clone", methods=['GET'])
@login_required
@login_optionally_required
def form_clone():
uuid = request.args.get('uuid')
# More for testing, possible to return the first/only
@@ -1182,13 +1166,14 @@ def changedetection_app(config=None, datastore_o=None):
uuid = list(datastore.data['watching'].keys()).pop()
new_uuid = datastore.clone(uuid)
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
flash('Cloned.')
if new_uuid:
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
flash('Cloned.')
return redirect(url_for('index'))
@app.route("/api/checknow", methods=['GET'])
@login_required
@login_optionally_required
def form_watch_checknow():
# Forced recheck will skip the 'skip if content is the same' rule (, 'reprocess_existing_data': True})))
tag = request.args.get('tag')
@@ -1222,7 +1207,7 @@ def changedetection_app(config=None, datastore_o=None):
return redirect(url_for('index', tag=tag))
@app.route("/form/checkbox-operations", methods=['POST'])
@login_required
@login_optionally_required
def form_watch_list_checkbox_operations():
op = request.form['op']
uuids = request.form.getlist('uuids')
@@ -1286,7 +1271,7 @@ def changedetection_app(config=None, datastore_o=None):
return redirect(url_for('index'))
@app.route("/api/share-url", methods=['GET'])
@login_required
@login_optionally_required
def form_share_put_watch():
"""Given a watch UUID, upload the info and return a share-link
the share-link can be imported/added"""

View File

@@ -202,8 +202,11 @@ class CreateWatch(Resource):
del extras['url']
new_uuid = self.datastore.add_watch(url=url, extras=extras)
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
return {'uuid': new_uuid}, 201
if new_uuid:
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
return {'uuid': new_uuid}, 201
else:
return "Invalid or unsupported URL", 400
@auth.check_token
def get(self):

View File

@@ -23,11 +23,10 @@
from distutils.util import strtobool
from flask import Blueprint, request, make_response
from flask_login import login_required
import os
import logging
from changedetectionio.store import ChangeDetectionStore
from changedetectionio import login_optionally_required
browsersteps_live_ui_o = {}
browsersteps_playwright_browser_interface = None
browsersteps_playwright_browser_interface_browser = None
@@ -65,7 +64,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
browser_steps_blueprint = Blueprint('browser_steps', __name__, template_folder="templates")
@login_required
@login_optionally_required
@browser_steps_blueprint.route("/browsersteps_update", methods=['GET', 'POST'])
def browsersteps_ui_update():
import base64

View File

@@ -31,11 +31,13 @@ def sigterm_handler(_signo, _stack_frame):
def main():
global datastore
global app
ssl_mode = False
host = ''
port = os.environ.get('PORT') or 5000
do_cleanup = False
datastore_path = None
do_cleanup = False
host = ''
ipv6_enabled = False
port = os.environ.get('PORT') or 5000
ssl_mode = False
# On Windows, create and use a default path.
if os.name == 'nt':
@@ -46,7 +48,7 @@ def main():
datastore_path = os.path.join(os.getcwd(), "../datastore")
try:
opts, args = getopt.getopt(sys.argv[1:], "Ccsd:h:p:", "port")
opts, args = getopt.getopt(sys.argv[1:], "6Ccsd:h:p:", "port")
except getopt.GetoptError:
print('backend.py -s SSL enable -h [host] -p [port] -d [datastore path]')
sys.exit(2)
@@ -66,6 +68,10 @@ def main():
if opt == '-d':
datastore_path = arg
if opt == '-6':
print ("Enabling IPv6 listen support")
ipv6_enabled = True
# Cleanup (remove text files that arent in the index)
if opt == '-c':
do_cleanup = True
@@ -133,13 +139,15 @@ def main():
from werkzeug.middleware.proxy_fix import ProxyFix
app.wsgi_app = ProxyFix(app.wsgi_app, x_prefix=1, x_host=1)
s_type = socket.AF_INET6 if ipv6_enabled else socket.AF_INET
if ssl_mode:
# @todo finalise SSL config, but this should get you in the right direction if you need it.
eventlet.wsgi.server(eventlet.wrap_ssl(eventlet.listen((host, port), socket.AF_INET6),
eventlet.wsgi.server(eventlet.wrap_ssl(eventlet.listen((host, port), s_type),
certfile='cert.pem',
keyfile='privkey.pem',
server_side=True), app)
else:
eventlet.wsgi.server(eventlet.listen((host, int(port)), socket.AF_INET6), app)
eventlet.wsgi.server(eventlet.listen((host, int(port)), s_type), app)

View File

@@ -252,9 +252,6 @@ class base_html_playwright(Fetcher):
self.proxy['password'] = parsed.password
def screenshot_step(self, step_n=''):
# There's a bug where we need to do it twice or it doesnt take the whole page, dont know why.
self.page.screenshot(type='jpeg', clip={'x': 1.0, 'y': 1.0, 'width': 1280, 'height': 1024})
screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=85)
if self.browser_steps_screenshot_path is not None:
@@ -300,8 +297,8 @@ class base_html_playwright(Fetcher):
proxy=self.proxy,
# This is needed to enable JavaScript execution on GitHub and others
bypass_csp=True,
# Can't think why we need the service workers for our use case?
service_workers='block',
# Should be `allow` or `block` - sites like YouTube can transmit large amounts of data via Service Workers
service_workers=os.getenv('PLAYWRIGHT_SERVICE_WORKERS', 'allow'),
# Should never be needed
accept_downloads=False
)
@@ -361,28 +358,20 @@ class base_html_playwright(Fetcher):
print ("Content Fetcher > Response object was none")
raise EmptyReply(url=url, status_code=None)
# Bug 2(?) Set the viewport size AFTER loading the page
self.page.set_viewport_size({"width": 1280, "height": 1024})
# Run Browser Steps here
self.iterate_browser_steps()
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
time.sleep(extra_wait)
self.content = self.page.content()
self.status_code = response.status
if len(self.page.content().strip()) == 0:
context.close()
browser.close()
print ("Content Fetcher > Content was empty")
raise EmptyReply(url=url, status_code=response.status)
# Bug 2(?) Set the viewport size AFTER loading the page
self.page.set_viewport_size({"width": 1280, "height": 1024})
self.status_code = response.status
self.content = self.page.content()
self.headers = response.all_headers()
@@ -403,8 +392,6 @@ class base_html_playwright(Fetcher):
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
# acceptable screenshot quality here
try:
# Quality set to 1 because it's not used, just used as a work-around for a bug, no need to change this.
self.page.screenshot(type='jpeg', clip={'x': 1.0, 'y': 1.0, 'width': 1280, 'height': 1024}, quality=1)
# The actual screenshot
self.screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)))
except Exception as e:

View File

@@ -92,9 +92,8 @@ class perform_site_check():
# Pluggable content fetcher
prefer_backend = watch.get_fetch_backend
# @todo move this to Watch model get_fetch_backend
if not prefer_backend:
prefer_backend = self.datastore.data['settings']['application']['fetch_backend']
if not prefer_backend or prefer_backend == 'system':
prefer_backend = self.datastore.data['settings']['application']['fetch_backend']
if hasattr(content_fetcher, prefer_backend):
klass = getattr(content_fetcher, prefer_backend)

View File

@@ -138,7 +138,7 @@ class ValidateContentFetcherIsReady(object):
from changedetectionio import content_fetcher
# Better would be a radiohandler that keeps a reference to each class
if field.data is not None:
if field.data is not None and field.data != 'system':
klass = getattr(content_fetcher, field.data)
some_object = klass()
try:
@@ -232,12 +232,17 @@ class validateURL(object):
def __call__(self, form, field):
import validators
try:
validators.url(field.data.strip())
except validators.ValidationFailure:
message = field.gettext('\'%s\' is not a valid URL.' % (field.data.strip()))
raise ValidationError(message)
from .model.Watch import is_safe_url
if not is_safe_url(field.data):
raise ValidationError('Watch protocol is not permitted by SAFE_PROTOCOL_REGEX')
class ValidateListRegex(object):
"""
@@ -345,6 +350,7 @@ class quickWatchForm(Form):
edit_and_watch_submit_button = SubmitField('Edit > Watch', render_kw={"class": "pure-button pure-button-primary"})
# Common to a single watch and the global settings
class commonSettingsForm(Form):
notification_urls = StringListField('Notification URL List', validators=[validators.Optional(), ValidateAppRiseServers()])
@@ -453,17 +459,17 @@ class globalSettingsRequestForm(Form):
# datastore.data['settings']['application']..
class globalSettingsApplicationForm(commonSettingsForm):
base_url = StringField('Base URL', validators=[validators.Optional()])
global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
ignore_whitespace = BooleanField('Ignore whitespace')
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
empty_pages_are_a_change = BooleanField('Treat empty pages as a change?', default=False)
render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetcher.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
api_access_token_enabled = BooleanField('API access token security check enabled', default=True, validators=[validators.Optional()])
base_url = StringField('Base URL', validators=[validators.Optional()])
empty_pages_are_a_change = BooleanField('Treat empty pages as a change?', default=False)
fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetcher.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
ignore_whitespace = BooleanField('Ignore whitespace')
password = SaltyPasswordField()
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
shared_diff_access = BooleanField('Allow access to view diff page when password is enabled', default=False, validators=[validators.Optional()])
filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification',
render_kw={"style": "width: 5em;"},
validators=[validators.NumberRange(min=0,

View File

@@ -40,6 +40,7 @@ class model(dict):
'notification_body': default_notification_body,
'notification_format': default_notification_format,
'schema_version' : 0,
'shared_diff_access': False,
'webdriver_delay': None # Extra delay in seconds before extracting text
}
}

View File

@@ -1,9 +1,14 @@
from distutils.util import strtobool
import logging
import os
import re
import time
import uuid
# Allowable protocols, protects against javascript: etc
# file:// is further checked by ALLOW_FILE_URI
SAFE_PROTOCOL_REGEX='^(http|https|ftp|file):'
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 60))
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
@@ -18,7 +23,7 @@ base_config = {
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
'extract_text': [], # Extract text by regex after filters
'extract_title_as_title': False,
'fetch_backend': None,
'fetch_backend': 'system',
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
'has_ldjson_price_data': None,
'track_ldjson_price_data': None,
@@ -55,6 +60,22 @@ base_config = {
'webdriver_js_execute_code': None, # Run before change-detection
}
def is_safe_url(test_url):
# See https://github.com/dgtlmoon/changedetection.io/issues/1358
# Remove 'source:' prefix so we dont get 'source:javascript:' etc
# 'source:' is a valid way to tell us to return the source
r = re.compile(re.escape('source:'), re.IGNORECASE)
test_url = r.sub('', test_url)
pattern = re.compile(os.getenv('SAFE_PROTOCOL_REGEX', SAFE_PROTOCOL_REGEX), re.IGNORECASE)
if not pattern.match(test_url.strip()):
return False
return True
class model(dict):
__newest_history_key = None
__history_n = 0
@@ -93,7 +114,11 @@ class model(dict):
@property
def link(self):
url = self.get('url', '')
if not is_safe_url(url):
return 'DISABLED'
ready_url = url
if '{%' in url or '{{' in url:
from jinja2 import Environment

View File

@@ -1,7 +1,7 @@
$(document).ready(function() {
$(document).ready(function () {
function toggle() {
if ($('input[name="fetch_backend"]:checked').val() == 'html_webdriver') {
if(playwright_enabled) {
if (playwright_enabled) {
// playwright supports headers, so hide everything else
// See #664
$('#requests-override-options #request-method').hide();
@@ -14,9 +14,14 @@ $(document).ready(function() {
$('#requests-override-options').hide();
}
$('#webdriver-override-options').show();
} else if ($('input[name="fetch_backend"]:checked').val() == 'system') {
$('#requests-override-options #request-method').hide();
$('#requests-override-options #request-body').hide();
$('#ignore-status-codes-option').hide();
$('#requests-override-options').hide();
$('#webdriver-override-options').hide();
} else {
$('#requests-override-options').show();

View File

@@ -1,20 +1,20 @@
from flask import (
flash
)
import json
import logging
import os
import threading
import time
import uuid as uuid_builder
from . model import App, Watch
from copy import deepcopy
from os import path, unlink
from threading import Lock
import json
import logging
import os
import re
import requests
import secrets
from . model import App, Watch
import threading
import time
import uuid as uuid_builder
# Is there an existing library to ensure some data store (JSON etc) is in sync with CRUD methods?
# Open a github issue if you know something :)
@@ -309,9 +309,12 @@ class ChangeDetectionStore:
logging.error("Error fetching metadata for shared watch link", url, str(e))
flash("Error fetching metadata for {}".format(url), 'error')
return False
from .model.Watch import is_safe_url
if not is_safe_url(url):
flash('Watch protocol is not permitted by SAFE_PROTOCOL_REGEX', 'error')
return None
with self.lock:
# #Re 569
new_watch = Watch.model(datastore_path=self.datastore_path, default={
'url': url,
@@ -673,3 +676,13 @@ class ChangeDetectionStore:
self.data['settings']['application']['notification_urls'][i] = re.sub(r, r'{{\1}}', url)
return
# Some setups may have missed the correct default, so it shows the wrong config in the UI, although it will default to system-wide
def update_10(self):
for uuid, watch in self.data['watching'].items():
try:
if not watch.get('fetch_backend', ''):
watch['fetch_backend'] = 'system'
except:
continue
return

View File

@@ -76,8 +76,12 @@
</div>
<div class="tab-pane-inner" id="text">
<div class="tip">Pro-tip: Use <strong>show current snapshot</strong> tab to visualise what will be ignored.
</div>
<div class="tip">Pro-tip: Use <strong>show current snapshot</strong> tab to visualise what will be ignored.</div>
{% if password_enabled_and_share_is_off %}
<div class="tip">Pro-tip: You can enable <strong>"share access when password is enabled"</strong> from settings</div>
{% endif %}
<div class="snapshot-age">{{watch_a.snapshot_text_ctime|format_timestamp_timeago}}</div>
<table>

View File

@@ -57,6 +57,11 @@
{% endif %}
</div>
<div class="pure-control-group">
{{ render_checkbox_field(form.application.form.shared_diff_access, class="shared_diff_access") }}
<span class="pure-form-message-inline">Allow access to view watch diff page when password is enabled (Good for sharing the diff page)
</span>
</div>
<div class="pure-control-group">
{{ render_field(form.application.form.base_url, placeholder="http://yoursite.com:5000/",
class="m-d") }}

View File

@@ -91,7 +91,12 @@
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"></a>
<a class="link-spread" href="{{url_for('form_share_put_watch', uuid=watch.uuid)}}"><img class="status-icon" src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" /></a>
{%if watch.get_fetch_backend == "html_webdriver" %}<img class="status-icon" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" title="Using a chrome browser" />{% endif %}
{% if watch.get_fetch_backend == "html_webdriver"
or ( watch.get_fetch_backend == "system" and system_default_fetcher == 'html_webdriver' )
%}
<img class="status-icon" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" title="Using a chrome browser" />
{% endif %}
{%if watch.is_pdf %}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" title="Converting PDF to text" />{% endif %}
{% if watch.last_error is defined and watch.last_error != False %}
<div class="fetch-error">{{ watch.last_error }}

View File

@@ -1,18 +1,34 @@
from . util import live_server_setup, extract_UUID_from_client
from flask import url_for
from . util import live_server_setup
import time
def test_check_access_control(app, client):
def test_check_access_control(app, client, live_server):
# Still doesnt work, but this is closer.
live_server_setup(live_server)
with app.test_client(use_cookies=True) as c:
# Check we don't have any password protection enabled yet.
res = c.get(url_for("settings_page"))
assert b"Remove password" not in res.data
# Enable password check.
# add something that we can hit via diff page later
res = c.post(
url_for("import_page"),
data={"urls": url_for('test_random_content_endpoint', _external=True)},
follow_redirects=True
)
assert b"1 Imported" in res.data
time.sleep(2)
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
assert b'1 watches queued for rechecking.' in res.data
time.sleep(2)
# Enable password check and diff page access bypass
res = c.post(
url_for("settings_page"),
data={"application-password": "foobar",
"application-shared_diff_access": "True",
"requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
follow_redirects=True
@@ -22,9 +38,15 @@ def test_check_access_control(app, client):
# Check we hit the login
res = c.get(url_for("index"), follow_redirects=True)
# Should be logged out
assert b"Login" in res.data
# The diff page should return something valid when logged out
res = client.get(url_for("diff_history_page", uuid="first"))
assert b'Random content' in res.data
# Menu should not be available yet
# assert b"SETTINGS" not in res.data
# assert b"BACKUP" not in res.data
@@ -109,3 +131,25 @@ def test_check_access_control(app, client):
assert b"Password protection enabled" not in res.data
# Now checking the diff access
# Enable password check and diff page access bypass
res = c.post(
url_for("settings_page"),
data={"application-password": "foobar",
# Should be disabled
# "application-shared_diff_access": "True",
"requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
follow_redirects=True
)
assert b"Password protection enabled." in res.data
# Check we hit the login
res = c.get(url_for("index"), follow_redirects=True)
# Should be logged out
assert b"Login" in res.data
# The diff page should return something valid when logged out
res = client.get(url_for("diff_history_page", uuid="first"))
assert b'Random content' not in res.data

View File

@@ -3,7 +3,7 @@
import time
from flask import url_for
from urllib.request import urlopen
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_rss_token_from_UI
sleep_time_for_fetch_thread = 3
@@ -76,7 +76,8 @@ def test_check_basic_change_detection_functionality(client, live_server):
assert b'unviewed' in res.data
# #75, and it should be in the RSS feed
res = client.get(url_for("rss"))
rss_token = extract_rss_token_from_UI(client)
res = client.get(url_for("rss", token=rss_token, _external=True))
expected_url = url_for('test_endpoint', _external=True)
assert b'<rss' in res.data

View File

@@ -0,0 +1,39 @@
#!/usr/bin/python3
import time
from flask import url_for
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_rss_token_from_UI
def test_rss_and_token(client, live_server):
set_original_response()
live_server_setup(live_server)
# Add our URL to the import page
res = client.post(
url_for("import_page"),
data={"urls": url_for('test_random_content_endpoint', _external=True)},
follow_redirects=True
)
assert b"1 Imported" in res.data
rss_token = extract_rss_token_from_UI(client)
time.sleep(2)
client.get(url_for("form_watch_checknow"), follow_redirects=True)
time.sleep(2)
# Add our URL to the import page
res = client.get(
url_for("rss", token="bad token", _external=True),
follow_redirects=True
)
assert b"Access denied, bad token" in res.data
res = client.get(
url_for("rss", token=rss_token, _external=True),
follow_redirects=True
)
assert b"Access denied, bad token" not in res.data
assert b"Random content" in res.data

View File

@@ -2,11 +2,9 @@ from flask import url_for
from . util import set_original_response, set_modified_response, live_server_setup
import time
def test_setup(live_server):
def test_bad_access(client, live_server):
live_server_setup(live_server)
def test_file_access(client, live_server):
res = client.post(
url_for("import_page"),
data={"urls": 'https://localhost'},
@@ -19,18 +17,49 @@ def test_file_access(client, live_server):
res = client.post(
url_for("edit_page", uuid="first"),
data={
"url": 'file:///etc/passwd',
"url": 'javascript:alert(document.domain)',
"tag": "",
"method": "GET",
"fetch_backend": "html_requests",
"body": ""},
follow_redirects=True
)
time.sleep(3)
res = client.get(
url_for("index", uuid="first"),
assert b'Watch protocol is not permitted by SAFE_PROTOCOL_REGEX' in res.data
res = client.post(
url_for("form_quick_watch_add"),
data={"url": ' javascript:alert(123)', "tag": ''},
follow_redirects=True
)
assert b'denied for security reasons' in res.data
assert b'Watch protocol is not permitted by SAFE_PROTOCOL_REGEX' in res.data
res = client.post(
url_for("form_quick_watch_add"),
data={"url": '%20%20%20javascript:alert(123)%20%20', "tag": ''},
follow_redirects=True
)
assert b'Watch protocol is not permitted by SAFE_PROTOCOL_REGEX' in res.data
res = client.post(
url_for("form_quick_watch_add"),
data={"url": ' source:javascript:alert(document.domain)', "tag": ''},
follow_redirects=True
)
assert b'Watch protocol is not permitted by SAFE_PROTOCOL_REGEX' in res.data
# file:// is permitted by default, but it will be caught by ALLOW_FILE_URI
client.post(
url_for("form_quick_watch_add"),
data={"url": 'file:///tasty/disk/drive', "tag": ''},
follow_redirects=True
)
time.sleep(1)
res = client.get(url_for("index"))
assert b'file:// type access is denied for security reasons.' in res.data

View File

@@ -70,6 +70,15 @@ def extract_api_key_from_UI(client):
api_key = m.group(1)
return api_key.strip()
# kinda funky, but works for now
def extract_rss_token_from_UI(client):
import re
res = client.get(
url_for("index"),
)
m = re.search('token=(.+?)"', str(res.data))
token_key = m.group(1)
return token_key.strip()
# kinda funky, but works for now
def extract_UUID_from_client(client):
@@ -98,6 +107,12 @@ def wait_for_all_checks(client):
def live_server_setup(live_server):
@live_server.app.route('/test-random-content-endpoint')
def test_random_content_endpoint():
import secrets
return "Random content - {}\n".format(secrets.token_hex(64))
@live_server.app.route('/test-endpoint')
def test_endpoint():
ctype = request.args.get('content_type')

View File

@@ -52,3 +52,12 @@ def test_visual_selector_content_ready(client, live_server):
# Open it and see if it roughly looks correct
with open(os.path.join('test-datastore', uuid, 'elements.json'), 'r') as f:
json.load(f)
# Some options should be enabled
# @todo - in the future, the visibility should be toggled by JS from the request type setting
res = client.get(
url_for("edit_page", uuid="first"),
follow_redirects=True
)
assert b'notification_screenshot' in res.data

View File

@@ -1,3 +1,4 @@
version: '3.2'
services:
changedetection:
image: ghcr.io/dgtlmoon/changedetection.io
@@ -40,7 +41,6 @@ services:
#
# Base URL of your changedetection.io install (Added to the notification alert)
# - BASE_URL=https://mysite.com
# Respect proxy_pass type settings, `proxy_set_header Host "localhost";` and `proxy_set_header X-Forwarded-Prefix /app;`
# More here https://github.com/dgtlmoon/changedetection.io/wiki/Running-changedetection.io-behind-a-reverse-proxy-sub-directory
# - USE_X_SETTINGS=1
@@ -94,7 +94,10 @@ services:
# - CHROME_REFRESH_TIME=600000
# - DEFAULT_BLOCK_ADS=true
# - DEFAULT_STEALTH=true
#
# Ignore HTTPS errors, like for self-signed certs
# - DEFAULT_IGNORE_HTTPS_ERRORS=true
#
volumes:
changedetection-data: