Compare commits

..

4 Commits

Author SHA1 Message Date
dgtlmoon
4cc76c310e Merge branch 'master' into fetch-backend-ui-default 2023-01-25 19:31:23 +01:00
dgtlmoon
6d907b6e0d visual selector and steps 2023-01-25 19:30:26 +01:00
dgtlmoon
4c53859a4f Fix UI state to show the correct fetch mode
Added hook update to be sure the default is really set
2023-01-25 17:49:45 +01:00
dgtlmoon
fcedbbca99 Fix fetch UI default option 2023-01-25 17:30:43 +01:00
17 changed files with 179 additions and 324 deletions

View File

@@ -67,10 +67,10 @@ jobs:
sleep 3
# Should return 0 (no error) when grep finds it
curl -s http://localhost:5556 |grep -q checkbox-uuid
curl -s http://localhost:5556/rss|grep -q rss-specification
# and IPv6
curl -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid
curl -s -g -6 "http://[::1]:5556/rss"|grep -q rss-specification
#export WEBDRIVER_URL=http://localhost:4444/wd/hub
#pytest tests/fetchers/test_content.py

View File

@@ -1,15 +1,5 @@
#!/usr/bin/python3
from changedetectionio import queuedWatchMetaData
from copy import deepcopy
from distutils.util import strtobool
from feedgen.feed import FeedGenerator
from flask_compress import Compress as FlaskCompress
from flask_login import current_user
from flask_restful import abort, Api
from flask_wtf import CSRFProtect
from functools import wraps
from threading import Event
import datetime
import flask_login
import logging
@@ -20,6 +10,12 @@ import threading
import time
import timeago
from changedetectionio import queuedWatchMetaData
from copy import deepcopy
from distutils.util import strtobool
from feedgen.feed import FeedGenerator
from threading import Event
from flask import (
Flask,
abort,
@@ -32,11 +28,15 @@ from flask import (
session,
url_for,
)
from flask_compress import Compress as FlaskCompress
from flask_login import login_required
from flask_restful import abort, Api
from flask_wtf import CSRFProtect
from changedetectionio import html_tools
from changedetectionio.api import api_v1
__version__ = '0.40.2'
__version__ = '0.40.1.1'
datastore = None
@@ -53,6 +53,7 @@ app = Flask(__name__,
static_url_path="",
static_folder="static",
template_folder="templates")
from flask_compress import Compress
# Super handy for compressing large BrowserSteps responses and others
FlaskCompress(app)
@@ -64,6 +65,8 @@ app.config.exit = Event()
app.config['NEW_VERSION_AVAILABLE'] = False
app.config['LOGIN_DISABLED'] = False
#app.config["EXPLAIN_TEMPLATE_LOADING"] = True
# Disables caching of the templates
@@ -71,6 +74,7 @@ app.config['TEMPLATES_AUTO_RELOAD'] = True
app.jinja_env.add_extension('jinja2.ext.loopcontrols')
csrf = CSRFProtect()
csrf.init_app(app)
notification_debug_log=[]
watch_api = Api(app, decorators=[csrf.exempt])
@@ -145,6 +149,7 @@ class User(flask_login.UserMixin):
# Compare given password against JSON store or Env var
def check_password(self, password):
import base64
import hashlib
@@ -152,9 +157,11 @@ class User(flask_login.UserMixin):
raw_salt_pass = os.getenv("SALTED_PASS", False)
if not raw_salt_pass:
raw_salt_pass = datastore.data['settings']['application'].get('password')
raw_salt_pass = datastore.data['settings']['application']['password']
raw_salt_pass = base64.b64decode(raw_salt_pass)
salt_from_storage = raw_salt_pass[:32] # 32 is the length of the salt
# Use the exact same setup you used to generate the key, but this time put in the password to check
@@ -164,44 +171,21 @@ class User(flask_login.UserMixin):
salt_from_storage,
100000
)
new_key = salt_from_storage + new_key
new_key = salt_from_storage + new_key
return new_key == raw_salt_pass
pass
def login_optionally_required(func):
@wraps(func)
def decorated_view(*args, **kwargs):
has_password_enabled = datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False)
# Permitted
if request.endpoint == 'static_content' and request.view_args['group'] == 'styles':
return func(*args, **kwargs)
# Permitted
elif request.endpoint == 'diff_history_page' and datastore.data['settings']['application'].get('shared_diff_access'):
return func(*args, **kwargs)
elif request.method in flask_login.config.EXEMPT_METHODS:
return func(*args, **kwargs)
elif app.config.get('LOGIN_DISABLED'):
return func(*args, **kwargs)
elif has_password_enabled and not current_user.is_authenticated:
return app.login_manager.unauthorized()
return func(*args, **kwargs)
return decorated_view
def changedetection_app(config=None, datastore_o=None):
global datastore
datastore = datastore_o
# so far just for read-only via tests, but this will be moved eventually to be the main source
# (instead of the global var)
app.config['DATASTORE'] = datastore_o
app.config['DATASTORE']=datastore_o
#app.config.update(config or {})
login_manager = flask_login.LoginManager(app)
login_manager.login_view = 'login'
@@ -229,8 +213,6 @@ def changedetection_app(config=None, datastore_o=None):
# https://flask-cors.readthedocs.io/en/latest/
# CORS(app)
@login_manager.user_loader
def user_loader(email):
user = User()
@@ -239,7 +221,7 @@ def changedetection_app(config=None, datastore_o=None):
@login_manager.unauthorized_handler
def unauthorized_handler():
flash("You must be logged in, please log in.", 'error')
# @todo validate its a URL of this host and use that
return redirect(url_for('login', next=url_for('index')))
@app.route('/logout')
@@ -252,6 +234,10 @@ def changedetection_app(config=None, datastore_o=None):
@app.route('/login', methods=['GET', 'POST'])
def login():
if not datastore.data['settings']['application']['password'] and not os.getenv("SALTED_PASS", False):
flash("Login not required, no password enabled.", "notice")
return redirect(url_for('index'))
if request.method == 'GET':
if flask_login.current_user.is_authenticated:
flash("Already logged in")
@@ -286,22 +272,27 @@ def changedetection_app(config=None, datastore_o=None):
return redirect(url_for('login'))
@app.before_request
def before_request_handle_cookie_x_settings():
def do_something_whenever_a_request_comes_in():
# Disable password login if there is not one set
# (No password in settings or env var)
app.config['LOGIN_DISABLED'] = datastore.data['settings']['application']['password'] == False and os.getenv("SALTED_PASS", False) == False
# Set the auth cookie path if we're running as X-settings/X-Forwarded-Prefix
if os.getenv('USE_X_SETTINGS') and 'X-Forwarded-Prefix' in request.headers:
app.config['REMEMBER_COOKIE_PATH'] = request.headers['X-Forwarded-Prefix']
app.config['SESSION_COOKIE_PATH'] = request.headers['X-Forwarded-Prefix']
return None
# For the RSS path, allow access via a token
if request.path == '/rss' and request.args.get('token'):
app_rss_token = datastore.data['settings']['application']['rss_access_token']
rss_url_token = request.args.get('token')
if app_rss_token == rss_url_token:
app.config['LOGIN_DISABLED'] = True
@app.route("/rss", methods=['GET'])
@login_required
def rss():
# Always requires token set
app_rss_token = datastore.data['settings']['application'].get('rss_access_token')
rss_url_token = request.args.get('token')
if rss_url_token != app_rss_token:
return "Access denied, bad token", 403
from . import diff
limit_tag = request.args.get('tag')
@@ -375,7 +366,7 @@ def changedetection_app(config=None, datastore_o=None):
return response
@app.route("/", methods=['GET'])
@login_optionally_required
@login_required
def index():
from changedetectionio import forms
@@ -439,7 +430,7 @@ def changedetection_app(config=None, datastore_o=None):
# AJAX endpoint for sending a test
@app.route("/notification/send-test", methods=['POST'])
@login_optionally_required
@login_required
def ajax_callback_send_notification_test():
import apprise
@@ -472,7 +463,7 @@ def changedetection_app(config=None, datastore_o=None):
@app.route("/clear_history/<string:uuid>", methods=['GET'])
@login_optionally_required
@login_required
def clear_watch_history(uuid):
try:
datastore.clear_watch_history(uuid)
@@ -484,7 +475,7 @@ def changedetection_app(config=None, datastore_o=None):
return redirect(url_for('index'))
@app.route("/clear_history", methods=['GET', 'POST'])
@login_optionally_required
@login_required
def clear_all_history():
if request.method == 'POST':
@@ -505,8 +496,43 @@ def changedetection_app(config=None, datastore_o=None):
output = render_template("clear_all_history.html")
return output
# If they edited an existing watch, we need to know to reset the current/previous md5 to include
# the excluded text.
def get_current_checksum_include_ignore_text(uuid):
import hashlib
from changedetectionio import fetch_site_status
# Get the most recent one
newest_history_key = datastore.data['watching'][uuid].get('newest_history_key')
# 0 means that theres only one, so that there should be no 'unviewed' history available
if newest_history_key == 0:
newest_history_key = list(datastore.data['watching'][uuid].history.keys())[0]
if newest_history_key:
with open(datastore.data['watching'][uuid].history[newest_history_key],
encoding='utf-8') as file:
raw_content = file.read()
handler = fetch_site_status.perform_site_check(datastore=datastore)
stripped_content = html_tools.strip_ignore_text(raw_content,
datastore.data['watching'][uuid]['ignore_text'])
if datastore.data['settings']['application'].get('ignore_whitespace', False):
checksum = hashlib.md5(stripped_content.translate(None, b'\r\n\t ')).hexdigest()
else:
checksum = hashlib.md5(stripped_content).hexdigest()
return checksum
return datastore.data['watching'][uuid]['previous_md5']
@app.route("/edit/<string:uuid>", methods=['GET', 'POST'])
@login_optionally_required
@login_required
# https://stackoverflow.com/questions/42984453/wtforms-populate-form-with-data-if-data-exists
# https://wtforms.readthedocs.io/en/3.0.x/forms/#wtforms.form.Form.populate_obj ?
@@ -560,7 +586,6 @@ def changedetection_app(config=None, datastore_o=None):
if request.method == 'POST' and form.validate():
extra_update_obj = {}
if request.args.get('unpause_on_save'):
@@ -652,7 +677,7 @@ def changedetection_app(config=None, datastore_o=None):
return output
@app.route("/settings", methods=['GET', "POST"])
@login_optionally_required
@login_required
def settings_page():
from changedetectionio import content_fetcher, forms
@@ -732,7 +757,7 @@ def changedetection_app(config=None, datastore_o=None):
return output
@app.route("/import", methods=['GET', "POST"])
@login_optionally_required
@login_required
def import_page():
remaining_urls = []
if request.method == 'POST':
@@ -770,7 +795,7 @@ def changedetection_app(config=None, datastore_o=None):
# Clear all statuses, so we do not see the 'unviewed' class
@app.route("/form/mark-all-viewed", methods=['GET'])
@login_optionally_required
@login_required
def mark_all_viewed():
# Save the current newest history as the most recently viewed
@@ -780,7 +805,7 @@ def changedetection_app(config=None, datastore_o=None):
return redirect(url_for('index'))
@app.route("/diff/<string:uuid>", methods=['GET', 'POST'])
@login_optionally_required
@login_required
def diff_history_page(uuid):
from changedetectionio import forms
@@ -856,13 +881,8 @@ def changedetection_app(config=None, datastore_o=None):
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
is_html_webdriver = False
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver':
is_html_webdriver = True
password_enabled_and_share_is_off = False
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
password_enabled_and_share_is_off = not datastore.data['settings']['application'].get('shared_diff_access')
is_html_webdriver = True if watch.get('fetch_backend') == 'html_webdriver' or (
watch.get('fetch_backend', None) is None and system_uses_webdriver) else False
output = render_template("diff.html",
current_diff_url=watch['url'],
@@ -877,7 +897,6 @@ def changedetection_app(config=None, datastore_o=None):
left_sticky=True,
newest=newest_version_file_contents,
newest_version_timestamp=dates[-1],
password_enabled_and_share_is_off=password_enabled_and_share_is_off,
previous=previous_version_file_contents,
screenshot=screenshot_url,
uuid=uuid,
@@ -888,7 +907,7 @@ def changedetection_app(config=None, datastore_o=None):
return output
@app.route("/preview/<string:uuid>", methods=['GET'])
@login_optionally_required
@login_required
def preview_page(uuid):
content = []
ignored_line_numbers = []
@@ -908,9 +927,8 @@ def changedetection_app(config=None, datastore_o=None):
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
is_html_webdriver = False
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver':
is_html_webdriver = True
is_html_webdriver = True if watch.get('fetch_backend') == 'html_webdriver' or (
watch.get('fetch_backend', None) is None and system_uses_webdriver) else False
# Never requested successfully, but we detected a fetch error
if datastore.data['watching'][uuid].history_n == 0 and (watch.get_error_text() or watch.get_error_snapshot()):
@@ -979,7 +997,7 @@ def changedetection_app(config=None, datastore_o=None):
return output
@app.route("/settings/notification-logs", methods=['GET'])
@login_optionally_required
@login_required
def notification_logs():
global notification_debug_log
output = render_template("notification-log.html",
@@ -989,7 +1007,7 @@ def changedetection_app(config=None, datastore_o=None):
# We're good but backups are even better!
@app.route("/backup", methods=['GET'])
@login_optionally_required
@login_required
def get_backup():
import zipfile
@@ -1001,8 +1019,7 @@ def changedetection_app(config=None, datastore_o=None):
os.unlink(previous_backup_filename)
# create a ZipFile object
timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
backupname = "changedetection-backup-{}.zip".format(timestamp)
backupname = "changedetection-backup-{}.zip".format(int(time.time()))
backup_filepath = os.path.join(datastore_o.datastore_path, backupname)
with zipfile.ZipFile(backup_filepath, "w",
@@ -1110,14 +1127,13 @@ def changedetection_app(config=None, datastore_o=None):
abort(404)
@app.route("/form/add/quickwatch", methods=['POST'])
@login_optionally_required
@login_required
def form_quick_watch_add():
from changedetectionio import forms
form = forms.quickWatchForm(request.form)
if not form.validate():
for widget, l in form.errors.items():
flash(','.join(l), 'error')
flash("Error")
return redirect(url_for('index'))
url = request.form.get('url').strip()
@@ -1128,21 +1144,22 @@ def changedetection_app(config=None, datastore_o=None):
add_paused = request.form.get('edit_and_watch_submit_button') != None
new_uuid = datastore.add_watch(url=url, tag=request.form.get('tag').strip(), extras={'paused': add_paused})
if new_uuid:
if add_paused:
flash('Watch added in Paused state, saving will unpause.')
return redirect(url_for('edit_page', uuid=new_uuid, unpause_on_save=1))
else:
# Straight into the queue.
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
flash("Watch added.")
if not add_paused and new_uuid:
# Straight into the queue.
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
flash("Watch added.")
if add_paused:
flash('Watch added in Paused state, saving will unpause.')
return redirect(url_for('edit_page', uuid=new_uuid, unpause_on_save=1))
return redirect(url_for('index'))
@app.route("/api/delete", methods=['GET'])
@login_optionally_required
@login_required
def form_delete():
uuid = request.args.get('uuid')
@@ -1159,7 +1176,7 @@ def changedetection_app(config=None, datastore_o=None):
return redirect(url_for('index'))
@app.route("/api/clone", methods=['GET'])
@login_optionally_required
@login_required
def form_clone():
uuid = request.args.get('uuid')
# More for testing, possible to return the first/only
@@ -1167,14 +1184,13 @@ def changedetection_app(config=None, datastore_o=None):
uuid = list(datastore.data['watching'].keys()).pop()
new_uuid = datastore.clone(uuid)
if new_uuid:
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
flash('Cloned.')
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
flash('Cloned.')
return redirect(url_for('index'))
@app.route("/api/checknow", methods=['GET'])
@login_optionally_required
@login_required
def form_watch_checknow():
# Forced recheck will skip the 'skip if content is the same' rule (, 'reprocess_existing_data': True})))
tag = request.args.get('tag')
@@ -1208,7 +1224,7 @@ def changedetection_app(config=None, datastore_o=None):
return redirect(url_for('index', tag=tag))
@app.route("/form/checkbox-operations", methods=['POST'])
@login_optionally_required
@login_required
def form_watch_list_checkbox_operations():
op = request.form['op']
uuids = request.form.getlist('uuids')
@@ -1272,7 +1288,7 @@ def changedetection_app(config=None, datastore_o=None):
return redirect(url_for('index'))
@app.route("/api/share-url", methods=['GET'])
@login_optionally_required
@login_required
def form_share_put_watch():
"""Given a watch UUID, upload the info and return a share-link
the share-link can be imported/added"""

View File

@@ -202,11 +202,8 @@ class CreateWatch(Resource):
del extras['url']
new_uuid = self.datastore.add_watch(url=url, extras=extras)
if new_uuid:
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
return {'uuid': new_uuid}, 201
else:
return "Invalid or unsupported URL", 400
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
return {'uuid': new_uuid}, 201
@auth.check_token
def get(self):

View File

@@ -23,10 +23,11 @@
from distutils.util import strtobool
from flask import Blueprint, request, make_response
from flask_login import login_required
import os
import logging
from changedetectionio.store import ChangeDetectionStore
from changedetectionio import login_optionally_required
browsersteps_live_ui_o = {}
browsersteps_playwright_browser_interface = None
browsersteps_playwright_browser_interface_browser = None
@@ -64,7 +65,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
browser_steps_blueprint = Blueprint('browser_steps', __name__, template_folder="templates")
@login_optionally_required
@login_required
@browser_steps_blueprint.route("/browsersteps_update", methods=['GET', 'POST'])
def browsersteps_ui_update():
import base64

View File

@@ -252,6 +252,9 @@ class base_html_playwright(Fetcher):
self.proxy['password'] = parsed.password
def screenshot_step(self, step_n=''):
# There's a bug where we need to do it twice or it doesnt take the whole page, dont know why.
self.page.screenshot(type='jpeg', clip={'x': 1.0, 'y': 1.0, 'width': 1280, 'height': 1024})
screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=85)
if self.browser_steps_screenshot_path is not None:
@@ -297,8 +300,8 @@ class base_html_playwright(Fetcher):
proxy=self.proxy,
# This is needed to enable JavaScript execution on GitHub and others
bypass_csp=True,
# Should be `allow` or `block` - sites like YouTube can transmit large amounts of data via Service Workers
service_workers=os.getenv('PLAYWRIGHT_SERVICE_WORKERS', 'allow'),
# Can't think why we need the service workers for our use case?
service_workers='block',
# Should never be needed
accept_downloads=False
)
@@ -358,20 +361,28 @@ class base_html_playwright(Fetcher):
print ("Content Fetcher > Response object was none")
raise EmptyReply(url=url, status_code=None)
# Bug 2(?) Set the viewport size AFTER loading the page
self.page.set_viewport_size({"width": 1280, "height": 1024})
# Run Browser Steps here
self.iterate_browser_steps()
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
time.sleep(extra_wait)
self.content = self.page.content()
self.status_code = response.status
if len(self.page.content().strip()) == 0:
context.close()
browser.close()
print ("Content Fetcher > Content was empty")
raise EmptyReply(url=url, status_code=response.status)
# Bug 2(?) Set the viewport size AFTER loading the page
self.page.set_viewport_size({"width": 1280, "height": 1024})
self.status_code = response.status
self.content = self.page.content()
self.headers = response.all_headers()
@@ -392,6 +403,8 @@ class base_html_playwright(Fetcher):
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
# acceptable screenshot quality here
try:
# Quality set to 1 because it's not used, just used as a work-around for a bug, no need to change this.
self.page.screenshot(type='jpeg', clip={'x': 1.0, 'y': 1.0, 'width': 1280, 'height': 1024}, quality=1)
# The actual screenshot
self.screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)))
except Exception as e:

View File

@@ -232,17 +232,12 @@ class validateURL(object):
def __call__(self, form, field):
import validators
try:
validators.url(field.data.strip())
except validators.ValidationFailure:
message = field.gettext('\'%s\' is not a valid URL.' % (field.data.strip()))
raise ValidationError(message)
from .model.Watch import is_safe_url
if not is_safe_url(field.data):
raise ValidationError('Watch protocol is not permitted by SAFE_PROTOCOL_REGEX')
class ValidateListRegex(object):
"""
@@ -459,17 +454,17 @@ class globalSettingsRequestForm(Form):
# datastore.data['settings']['application']..
class globalSettingsApplicationForm(commonSettingsForm):
api_access_token_enabled = BooleanField('API access token security check enabled', default=True, validators=[validators.Optional()])
base_url = StringField('Base URL', validators=[validators.Optional()])
empty_pages_are_a_change = BooleanField('Treat empty pages as a change?', default=False)
fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetcher.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
ignore_whitespace = BooleanField('Ignore whitespace')
password = SaltyPasswordField()
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
empty_pages_are_a_change = BooleanField('Treat empty pages as a change?', default=False)
render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
shared_diff_access = BooleanField('Allow access to view diff page when password is enabled', default=False, validators=[validators.Optional()])
fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetcher.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
api_access_token_enabled = BooleanField('API access token security check enabled', default=True, validators=[validators.Optional()])
password = SaltyPasswordField()
filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification',
render_kw={"style": "width: 5em;"},
validators=[validators.NumberRange(min=0,

View File

@@ -40,7 +40,6 @@ class model(dict):
'notification_body': default_notification_body,
'notification_format': default_notification_format,
'schema_version' : 0,
'shared_diff_access': False,
'webdriver_delay': None # Extra delay in seconds before extracting text
}
}

View File

@@ -1,14 +1,9 @@
from distutils.util import strtobool
import logging
import os
import re
import time
import uuid
# Allowable protocols, protects against javascript: etc
# file:// is further checked by ALLOW_FILE_URI
SAFE_PROTOCOL_REGEX='^(http|https|ftp|file):'
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 60))
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
@@ -60,22 +55,6 @@ base_config = {
'webdriver_js_execute_code': None, # Run before change-detection
}
def is_safe_url(test_url):
# See https://github.com/dgtlmoon/changedetection.io/issues/1358
# Remove 'source:' prefix so we dont get 'source:javascript:' etc
# 'source:' is a valid way to tell us to return the source
r = re.compile(re.escape('source:'), re.IGNORECASE)
test_url = r.sub('', test_url)
pattern = re.compile(os.getenv('SAFE_PROTOCOL_REGEX', SAFE_PROTOCOL_REGEX), re.IGNORECASE)
if not pattern.match(test_url.strip()):
return False
return True
class model(dict):
__newest_history_key = None
__history_n = 0
@@ -114,11 +93,7 @@ class model(dict):
@property
def link(self):
url = self.get('url', '')
if not is_safe_url(url):
return 'DISABLED'
ready_url = url
if '{%' in url or '{{' in url:
from jinja2 import Environment
@@ -153,9 +128,7 @@ class model(dict):
@property
def is_pdf(self):
# content_type field is set in the future
# https://github.com/dgtlmoon/changedetection.io/issues/1392
# Not sure the best logic here
return self.get('url', '').lower().endswith('.pdf') or 'pdf' in self.get('content_type', '').lower()
return '.pdf' in self.get('url', '').lower() or 'pdf' in self.get('content_type', '').lower()
@property
def label(self):

View File

@@ -1,20 +1,20 @@
from flask import (
flash
)
from . model import App, Watch
from copy import deepcopy
from os import path, unlink
from threading import Lock
import json
import logging
import os
import re
import requests
import secrets
import threading
import time
import uuid as uuid_builder
from copy import deepcopy
from os import path, unlink
from threading import Lock
import re
import requests
import secrets
from . model import App, Watch
# Is there an existing library to ensure some data store (JSON etc) is in sync with CRUD methods?
# Open a github issue if you know something :)
@@ -192,24 +192,27 @@ class ChangeDetectionStore:
tags.sort()
return tags
def unlink_history_file(self, path):
try:
unlink(path)
except (FileNotFoundError, IOError):
pass
# Delete a single watch by UUID
def delete(self, uuid):
import pathlib
import shutil
with self.lock:
if uuid == 'all':
self.__data['watching'] = {}
# GitHub #30 also delete history records
for uuid in self.data['watching']:
path = pathlib.Path(os.path.join(self.datastore_path, uuid))
shutil.rmtree(path)
self.needs_write_urgent = True
for path in self.data['watching'][uuid].history.values():
self.unlink_history_file(path)
else:
path = pathlib.Path(os.path.join(self.datastore_path, uuid))
shutil.rmtree(path)
for path in self.data['watching'][uuid].history.values():
self.unlink_history_file(path)
del self.data['watching'][uuid]
self.needs_write_urgent = True
@@ -306,12 +309,9 @@ class ChangeDetectionStore:
logging.error("Error fetching metadata for shared watch link", url, str(e))
flash("Error fetching metadata for {}".format(url), 'error')
return False
from .model.Watch import is_safe_url
if not is_safe_url(url):
flash('Watch protocol is not permitted by SAFE_PROTOCOL_REGEX', 'error')
return None
with self.lock:
# #Re 569
new_watch = Watch.model(datastore_path=self.datastore_path, default={
'url': url,

View File

@@ -76,12 +76,8 @@
</div>
<div class="tab-pane-inner" id="text">
<div class="tip">Pro-tip: Use <strong>show current snapshot</strong> tab to visualise what will be ignored.</div>
{% if password_enabled_and_share_is_off %}
<div class="tip">Pro-tip: You can enable <strong>"share access when password is enabled"</strong> from settings</div>
{% endif %}
<div class="tip">Pro-tip: Use <strong>show current snapshot</strong> tab to visualise what will be ignored.
</div>
<div class="snapshot-age">{{watch_a.snapshot_text_ctime|format_timestamp_timeago}}</div>
<table>

View File

@@ -57,11 +57,6 @@
{% endif %}
</div>
<div class="pure-control-group">
{{ render_checkbox_field(form.application.form.shared_diff_access, class="shared_diff_access") }}
<span class="pure-form-message-inline">Allow access to view watch diff page when password is enabled (Good for sharing the diff page)
</span>
</div>
<div class="pure-control-group">
{{ render_field(form.application.form.base_url, placeholder="http://yoursite.com:5000/",
class="m-d") }}

View File

@@ -1,34 +1,18 @@
from . util import live_server_setup, extract_UUID_from_client
from flask import url_for
import time
from . util import live_server_setup
def test_check_access_control(app, client, live_server):
def test_check_access_control(app, client):
# Still doesnt work, but this is closer.
live_server_setup(live_server)
with app.test_client(use_cookies=True) as c:
# Check we don't have any password protection enabled yet.
res = c.get(url_for("settings_page"))
assert b"Remove password" not in res.data
# add something that we can hit via diff page later
res = c.post(
url_for("import_page"),
data={"urls": url_for('test_random_content_endpoint', _external=True)},
follow_redirects=True
)
assert b"1 Imported" in res.data
time.sleep(2)
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
assert b'1 watches queued for rechecking.' in res.data
time.sleep(2)
# Enable password check and diff page access bypass
# Enable password check.
res = c.post(
url_for("settings_page"),
data={"application-password": "foobar",
"application-shared_diff_access": "True",
"requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
follow_redirects=True
@@ -38,15 +22,9 @@ def test_check_access_control(app, client, live_server):
# Check we hit the login
res = c.get(url_for("index"), follow_redirects=True)
# Should be logged out
assert b"Login" in res.data
# The diff page should return something valid when logged out
res = client.get(url_for("diff_history_page", uuid="first"))
assert b'Random content' in res.data
# Menu should not be available yet
# assert b"SETTINGS" not in res.data
# assert b"BACKUP" not in res.data
@@ -131,25 +109,3 @@ def test_check_access_control(app, client, live_server):
assert b"Password protection enabled" not in res.data
# Now checking the diff access
# Enable password check and diff page access bypass
res = c.post(
url_for("settings_page"),
data={"application-password": "foobar",
# Should be disabled
# "application-shared_diff_access": "True",
"requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
follow_redirects=True
)
assert b"Password protection enabled." in res.data
# Check we hit the login
res = c.get(url_for("index"), follow_redirects=True)
# Should be logged out
assert b"Login" in res.data
# The diff page should return something valid when logged out
res = client.get(url_for("diff_history_page", uuid="first"))
assert b'Random content' not in res.data

View File

@@ -3,7 +3,7 @@
import time
from flask import url_for
from urllib.request import urlopen
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_rss_token_from_UI
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
sleep_time_for_fetch_thread = 3
@@ -76,8 +76,7 @@ def test_check_basic_change_detection_functionality(client, live_server):
assert b'unviewed' in res.data
# #75, and it should be in the RSS feed
rss_token = extract_rss_token_from_UI(client)
res = client.get(url_for("rss", token=rss_token, _external=True))
res = client.get(url_for("rss"))
expected_url = url_for('test_endpoint', _external=True)
assert b'<rss' in res.data

View File

@@ -1,39 +0,0 @@
#!/usr/bin/python3
import time
from flask import url_for
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_rss_token_from_UI
def test_rss_and_token(client, live_server):
set_original_response()
live_server_setup(live_server)
# Add our URL to the import page
res = client.post(
url_for("import_page"),
data={"urls": url_for('test_random_content_endpoint', _external=True)},
follow_redirects=True
)
assert b"1 Imported" in res.data
rss_token = extract_rss_token_from_UI(client)
time.sleep(2)
client.get(url_for("form_watch_checknow"), follow_redirects=True)
time.sleep(2)
# Add our URL to the import page
res = client.get(
url_for("rss", token="bad token", _external=True),
follow_redirects=True
)
assert b"Access denied, bad token" in res.data
res = client.get(
url_for("rss", token=rss_token, _external=True),
follow_redirects=True
)
assert b"Access denied, bad token" not in res.data
assert b"Random content" in res.data

View File

@@ -2,9 +2,11 @@ from flask import url_for
from . util import set_original_response, set_modified_response, live_server_setup
import time
def test_bad_access(client, live_server):
def test_setup(live_server):
live_server_setup(live_server)
def test_file_access(client, live_server):
res = client.post(
url_for("import_page"),
data={"urls": 'https://localhost'},
@@ -17,49 +19,18 @@ def test_bad_access(client, live_server):
res = client.post(
url_for("edit_page", uuid="first"),
data={
"url": 'javascript:alert(document.domain)',
"url": 'file:///etc/passwd',
"tag": "",
"method": "GET",
"fetch_backend": "html_requests",
"body": ""},
follow_redirects=True
)
time.sleep(3)
assert b'Watch protocol is not permitted by SAFE_PROTOCOL_REGEX' in res.data
res = client.post(
url_for("form_quick_watch_add"),
data={"url": ' javascript:alert(123)', "tag": ''},
res = client.get(
url_for("index", uuid="first"),
follow_redirects=True
)
assert b'Watch protocol is not permitted by SAFE_PROTOCOL_REGEX' in res.data
res = client.post(
url_for("form_quick_watch_add"),
data={"url": '%20%20%20javascript:alert(123)%20%20', "tag": ''},
follow_redirects=True
)
assert b'Watch protocol is not permitted by SAFE_PROTOCOL_REGEX' in res.data
res = client.post(
url_for("form_quick_watch_add"),
data={"url": ' source:javascript:alert(document.domain)', "tag": ''},
follow_redirects=True
)
assert b'Watch protocol is not permitted by SAFE_PROTOCOL_REGEX' in res.data
# file:// is permitted by default, but it will be caught by ALLOW_FILE_URI
client.post(
url_for("form_quick_watch_add"),
data={"url": 'file:///tasty/disk/drive', "tag": ''},
follow_redirects=True
)
time.sleep(1)
res = client.get(url_for("index"))
assert b'file:// type access is denied for security reasons.' in res.data
assert b'denied for security reasons' in res.data

View File

@@ -70,15 +70,6 @@ def extract_api_key_from_UI(client):
api_key = m.group(1)
return api_key.strip()
# kinda funky, but works for now
def extract_rss_token_from_UI(client):
import re
res = client.get(
url_for("index"),
)
m = re.search('token=(.+?)"', str(res.data))
token_key = m.group(1)
return token_key.strip()
# kinda funky, but works for now
def extract_UUID_from_client(client):
@@ -107,12 +98,6 @@ def wait_for_all_checks(client):
def live_server_setup(live_server):
@live_server.app.route('/test-random-content-endpoint')
def test_random_content_endpoint():
import secrets
return "Random content - {}\n".format(secrets.token_hex(64))
@live_server.app.route('/test-endpoint')
def test_endpoint():
ctype = request.args.get('content_type')

View File

@@ -41,6 +41,7 @@ services:
#
# Base URL of your changedetection.io install (Added to the notification alert)
# - BASE_URL=https://mysite.com
# Respect proxy_pass type settings, `proxy_set_header Host "localhost";` and `proxy_set_header X-Forwarded-Prefix /app;`
# More here https://github.com/dgtlmoon/changedetection.io/wiki/Running-changedetection.io-behind-a-reverse-proxy-sub-directory
# - USE_X_SETTINGS=1
@@ -94,10 +95,7 @@ services:
# - CHROME_REFRESH_TIME=600000
# - DEFAULT_BLOCK_ADS=true
# - DEFAULT_STEALTH=true
#
# Ignore HTTPS errors, like for self-signed certs
# - DEFAULT_IGNORE_HTTPS_ERRORS=true
#
volumes:
changedetection-data: