Compare commits

...

33 Commits

Author SHA1 Message Date
dgtlmoon
1d060225e1 0.39.11 2022-03-23 09:42:51 +01:00
dgtlmoon
7e0f0d0fd8 Microsoft Windows installation fixes (#492) 2022-03-22 23:08:08 +01:00
dgtlmoon
8b2afa2220 GitHub tweak - container tags should be CSV list (Fix ghcr.io not building) 2022-03-22 00:08:05 +01:00
dgtlmoon
f55ffa0f62 GitHub tweak - build containers also on push to master 2022-03-21 23:08:17 +01:00
dgtlmoon
942c3f021f Allow changedetector to ignore status codes as a per-site setting (#479) (#485)
Co-authored-by: Ara Hayrabedian <ara.hayrabedian@gmail.com>
2022-03-21 23:03:54 +01:00
dgtlmoon
5483f5d694 Security update - Use CSRF token protection for forms, make "remove password" use HTTP Post (#484) 2022-03-21 22:54:27 +01:00
dgtlmoon
f2fa638480 Security update - Protect against file:/// type access by webdriver/chrome. (#483) 2022-03-21 20:59:20 +01:00
dgtlmoon
82d1a7f73e Only build container on GitHub releases, not tests 2022-03-20 16:57:36 +01:00
dgtlmoon
9fc291fb63 Also change container names to help stop some DNS issues 2022-03-17 19:59:37 +01:00
dgtlmoon
3e8a15456a Detect byte-encoding when the server mishandles the content-type header reply (#472) 2022-03-17 10:28:02 +01:00
dgtlmoon
2a03f3f57e Improving form/edit example markup 2022-03-13 12:00:45 +01:00
dgtlmoon
ffad5cca97 JSON diff/preview should use utf-8 encoding where possible (#465) 2022-03-13 11:37:51 +01:00
Tim Loderhose
60a9a786e0 Fix typo in settings form 2022-03-13 10:55:37 +01:00
dgtlmoon
165e950e55 Add python venv to .gitignore 2022-03-13 10:53:33 +01:00
dgtlmoon
c25294ca57 0.39.10 2022-03-12 17:28:30 +01:00
Tim Loderhose
d4359c2e67 Add filter to remove elements by CSS rule from HTML before change detection is run (#445) 2022-03-12 13:29:30 +01:00
dgtlmoon
44fc804991 Minor updates to filters form text 2022-03-12 11:20:43 +01:00
dgtlmoon
b72c9eaf62 Re #448 - Dont use changedetection.io as the container name and hostname, fix problems fetching from the real changedetection.io webserver :) 2022-03-12 08:24:51 +01:00
dgtlmoon
7ce9e4dfc2 Testing - Refactor HTTP Request Type test (#453) 2022-03-11 18:50:02 +01:00
dgtlmoon
3cc6586695 Make table header font size the same as content 2022-03-07 13:03:59 +01:00
dgtlmoon
09204cb43f Adjust background colours 2022-03-06 19:03:59 +01:00
dgtlmoon
a709122874 Handle the case where the visitor is already logged-in and tries to login again (#447) 2022-03-06 18:19:05 +01:00
dgtlmoon
efbeaf9535 Make the Request Override settings easier to understand 2022-03-06 17:23:21 +01:00
dgtlmoon
1a19fba07d Minor tweak to notification token table 2022-03-06 17:10:30 +01:00
dgtlmoon
eb9020c175 Style tweak to watch form 2022-03-06 17:05:23 +01:00
dgtlmoon
13bb44e4f8 Login form style fixes 2022-03-06 17:03:15 +01:00
dgtlmoon
47f294c23b Upgrade apprise notification engine to 0.9.7 (important telegram fixes) 2022-03-05 13:14:14 +01:00
dgtlmoon
a4cce16188 Remove pytest from production release pip requirements 2022-03-05 13:12:15 +01:00
dgtlmoon
69aec23d1d Style fix for background image relative to X-Forwarded-Prefix when running via reverse proxy subdirectory 2022-03-05 13:08:57 +01:00
dgtlmoon
f85ccffe0a Merge branch 'master' of github.com:dgtlmoon/changedetection.io 2022-03-04 13:13:54 +01:00
dgtlmoon
0005131472 Re-arranging primary links so the important ones are easier to find on mobile 2022-03-04 13:06:39 +01:00
dgtlmoon
3be1f4ea44 Set authentication cookie path relative to X-Forwarded-Prefix when running via reverse proxy subdirectory (#446) 2022-03-04 11:23:32 +01:00
dgtlmoon
46c72a7fb3 Upgrade inscriptis HTML converter to version 2.2~ (#434) 2022-03-01 17:58:54 +01:00
38 changed files with 1019 additions and 321 deletions

View File

@@ -2,16 +2,20 @@ name: Build and push containers
on:
# Automatically triggered by a testing workflow passing, but this is only checked when it lands in the `master`/default branch
workflow_run:
workflows: ["ChangeDetection.io Test"]
branches: [master]
tags: ['0.*']
types: [completed]
# workflow_run:
# workflows: ["ChangeDetection.io Test"]
# branches: [master]
# tags: ['0.*']
# types: [completed]
# Or a new tagged release
release:
types: [published, edited]
push:
branches:
- master
jobs:
metadata:
runs-on: ubuntu-latest
@@ -91,8 +95,7 @@ jobs:
file: ./Dockerfile
push: true
tags: |
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:latest
ghcr.io/${{ github.repository }}:latest
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:latest,ghcr.io/${{ github.repository }}:latest
platforms: linux/amd64,linux/arm64,linux/arm/v6,linux/arm/v7
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache
@@ -107,8 +110,7 @@ jobs:
file: ./Dockerfile
push: true
tags: |
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:${{ github.event.release.tag_name }}
ghcr.io/dgtlmoon/changedetection.io:${{ github.event.release.tag_name }}
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:${{ github.event.release.tag_name }},ghcr.io/dgtlmoon/changedetection.io:${{ github.event.release.tag_name }}
platforms: linux/amd64,linux/arm64,linux/arm/v6,linux/arm/v7
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache

2
.gitignore vendored
View File

@@ -7,4 +7,6 @@ __pycache__
.pytest_cache
build
dist
venv
*.egg-info*
.vscode/settings.json

View File

@@ -2,5 +2,5 @@ recursive-include changedetectionio/templates *
recursive-include changedetectionio/static *
include changedetection.py
global-exclude *.pyc
global-exclude *node_modules*
global-exclude node_modules
global-exclude venv

View File

@@ -163,9 +163,9 @@ See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configura
Raspberry Pi and linux/arm/v6 linux/arm/v7 arm64 devices are supported! See the wiki for [details](https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver)
## Windows native support?
## Windows support?
Sorry not yet :( https://github.com/dgtlmoon/changedetection.io/labels/windows
YES! See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/Microsoft-Windows
## Support us

View File

@@ -1,110 +1,11 @@
#!/usr/bin/python3
# Launch as a eventlet.wsgi server instance.
import getopt
import os
import sys
import eventlet
import eventlet.wsgi
import changedetectionio
from changedetectionio import store
def main():
ssl_mode = False
host = ''
port = os.environ.get('PORT') or 5000
do_cleanup = False
# Must be absolute so that send_from_directory doesnt try to make it relative to backend/
datastore_path = os.path.join(os.getcwd(), "datastore")
try:
opts, args = getopt.getopt(sys.argv[1:], "Ccsd:h:p:", "port")
except getopt.GetoptError:
print('backend.py -s SSL enable -h [host] -p [port] -d [datastore path]')
sys.exit(2)
create_datastore_dir = False
for opt, arg in opts:
# if opt == '--purge':
# Remove history, the actual files you need to delete manually.
# for uuid, watch in datastore.data['watching'].items():
# watch.update({'history': {}, 'last_checked': 0, 'last_changed': 0, 'previous_md5': None})
if opt == '-s':
ssl_mode = True
if opt == '-h':
host = arg
if opt == '-p':
port = int(arg)
if opt == '-d':
datastore_path = arg
# Cleanup (remove text files that arent in the index)
if opt == '-c':
do_cleanup = True
# Create the datadir if it doesnt exist
if opt == '-C':
create_datastore_dir = True
# isnt there some @thingy to attach to each route to tell it, that this route needs a datastore
app_config = {'datastore_path': datastore_path}
if not os.path.isdir(app_config['datastore_path']):
if create_datastore_dir:
os.mkdir(app_config['datastore_path'])
else:
print ("ERROR: Directory path for the datastore '{}' does not exist, cannot start, please make sure the directory exists.\n"
"Alternatively, use the -C parameter.".format(app_config['datastore_path']),file=sys.stderr)
sys.exit(2)
datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], version_tag=changedetectionio.__version__)
app = changedetectionio.changedetection_app(app_config, datastore)
# Go into cleanup mode
if do_cleanup:
datastore.remove_unused_snapshots()
app.config['datastore_path'] = datastore_path
@app.context_processor
def inject_version():
return dict(right_sticky="v{}".format(datastore.data['version_tag']),
new_version_available=app.config['NEW_VERSION_AVAILABLE'],
has_password=datastore.data['settings']['application']['password'] != False
)
# Proxy sub-directory support
# Set environment var USE_X_SETTINGS=1 on this script
# And then in your proxy_pass settings
#
# proxy_set_header Host "localhost";
# proxy_set_header X-Forwarded-Prefix /app;
if os.getenv('USE_X_SETTINGS'):
print ("USE_X_SETTINGS is ENABLED\n")
from werkzeug.middleware.proxy_fix import ProxyFix
app.wsgi_app = ProxyFix(app.wsgi_app, x_prefix=1, x_host=1)
if ssl_mode:
# @todo finalise SSL config, but this should get you in the right direction if you need it.
eventlet.wsgi.server(eventlet.wrap_ssl(eventlet.listen((host, port)),
certfile='cert.pem',
keyfile='privkey.pem',
server_side=True), app)
else:
eventlet.wsgi.server(eventlet.listen((host, int(port))), app)
# Entry-point for running from the CLI when not installed via Pip, Pip will handle the console_scripts entry_points's from setup.py
# It's recommended to use `pip3 install changedetection.io` and start with `changedetection.py` instead, it will be linkd to your global path.
# or Docker.
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
from changedetectionio import changedetection
if __name__ == '__main__':
main()
changedetection.main()

1
changedetectionio/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
test-datastore

View File

@@ -35,9 +35,11 @@ from flask import (
url_for,
)
from flask_login import login_required
from flask_wtf import CSRFProtect
from changedetectionio import html_tools
__version__ = '0.39.9'
__version__ = '0.39.11'
datastore = None
@@ -51,11 +53,10 @@ update_q = queue.Queue()
notification_q = queue.Queue()
# Needs to be set this way because we also build and publish via pip
base_path = os.path.dirname(os.path.realpath(__file__))
app = Flask(__name__,
static_url_path="{}/static".format(base_path),
template_folder="{}/templates".format(base_path))
static_url_path="",
static_folder="static",
template_folder="templates")
# Stop browser caching of assets
app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0
@@ -71,6 +72,9 @@ app.config['LOGIN_DISABLED'] = False
# Disables caching of the templates
app.config['TEMPLATES_AUTO_RELOAD'] = True
csrf = CSRFProtect()
csrf.init_app(app)
notification_debug_log=[]
def init_app_secret(datastore_path):
@@ -127,7 +131,7 @@ def _jinja2_filter_datetimestamp(timestamp, format="%Y-%m-%d %H:%M:%S"):
# return timeago.format(timestamp, time.time())
# return datetime.datetime.utcfromtimestamp(timestamp).strftime(format)
# When nobody is logged in Flask-Login's current_user is set to an AnonymousUser object.
class User(flask_login.UserMixin):
id=None
@@ -136,7 +140,6 @@ class User(flask_login.UserMixin):
def get_user(self, email="defaultuser@changedetection.io"):
return self
def is_authenticated(self):
return True
def is_active(self):
return True
@@ -215,6 +218,10 @@ def changedetection_app(config=None, datastore_o=None):
return redirect(url_for('index'))
if request.method == 'GET':
if flask_login.current_user.is_authenticated:
flash("Already logged in")
return redirect(url_for("index"))
output = render_template("login.html")
return output
@@ -250,6 +257,11 @@ def changedetection_app(config=None, datastore_o=None):
# (No password in settings or env var)
app.config['LOGIN_DISABLED'] = datastore.data['settings']['application']['password'] == False and os.getenv("SALTED_PASS", False) == False
# Set the auth cookie path if we're running as X-settings/X-Forwarded-Prefix
if os.getenv('USE_X_SETTINGS') and 'X-Forwarded-Prefix' in request.headers:
app.config['REMEMBER_COOKIE_PATH'] = request.headers['X-Forwarded-Prefix']
app.config['SESSION_COOKIE_PATH'] = request.headers['X-Forwarded-Prefix']
# For the RSS path, allow access via a token
if request.path == '/rss' and request.args.get('token'):
app_rss_token = datastore.data['settings']['application']['rss_access_token']
@@ -495,13 +507,13 @@ def changedetection_app(config=None, datastore_o=None):
'headers': form.headers.data,
'body': form.body.data,
'method': form.method.data,
'ignore_status_codes': form.ignore_status_codes.data,
'fetch_backend': form.fetch_backend.data,
'trigger_text': form.trigger_text.data,
'notification_title': form.notification_title.data,
'notification_body': form.notification_body.data,
'notification_format': form.notification_format.data,
'extract_title_as_title': form.extract_title_as_title.data
'extract_title_as_title': form.extract_title_as_title.data,
}
# Notification URLs
@@ -518,6 +530,7 @@ def changedetection_app(config=None, datastore_o=None):
datastore.data['watching'][uuid]['css_filter'] = form.css_filter.data.strip()
datastore.data['watching'][uuid]['subtractive_selectors'] = form.subtractive_selectors.data
# Reset the previous_md5 so we process a new snapshot including stripping ignore text.
if form.css_filter.data.strip() != datastore.data['watching'][uuid]['css_filter']:
@@ -590,6 +603,7 @@ def changedetection_app(config=None, datastore_o=None):
if request.method == 'GET':
form.minutes_between_check.data = int(datastore.data['settings']['requests']['minutes_between_check'])
form.notification_urls.data = datastore.data['settings']['application']['notification_urls']
form.global_subtractive_selectors.data = datastore.data['settings']['application']['global_subtractive_selectors']
form.global_ignore_text.data = datastore.data['settings']['application']['global_ignore_text']
form.ignore_whitespace.data = datastore.data['settings']['application']['ignore_whitespace']
form.extract_title_as_title.data = datastore.data['settings']['application']['extract_title_as_title']
@@ -599,16 +613,15 @@ def changedetection_app(config=None, datastore_o=None):
form.notification_format.data = datastore.data['settings']['application']['notification_format']
form.base_url.data = datastore.data['settings']['application']['base_url']
# Password unset is a GET, but we can lock the session to always need the password
if not os.getenv("SALTED_PASS", False) and request.values.get('removepassword') == 'yes':
from pathlib import Path
if request.method == 'POST' and form.data.get('removepassword_button') == True:
# Password unset is a GET, but we can lock the session to a salted env password to always need the password
if not os.getenv("SALTED_PASS", False):
datastore.data['settings']['application']['password'] = False
flash("Password protection removed.", 'notice')
flask_login.logout_user()
return redirect(url_for('settings_page'))
if request.method == 'POST' and form.validate():
datastore.data['settings']['application']['notification_urls'] = form.notification_urls.data
datastore.data['settings']['requests']['minutes_between_check'] = form.minutes_between_check.data
datastore.data['settings']['application']['extract_title_as_title'] = form.extract_title_as_title.data
@@ -618,6 +631,7 @@ def changedetection_app(config=None, datastore_o=None):
datastore.data['settings']['application']['notification_format'] = form.notification_format.data
datastore.data['settings']['application']['notification_urls'] = form.notification_urls.data
datastore.data['settings']['application']['base_url'] = form.base_url.data
datastore.data['settings']['application']['global_subtractive_selectors'] = form.global_subtractive_selectors.data
datastore.data['settings']['application']['global_ignore_text'] = form.global_ignore_text.data
datastore.data['settings']['application']['ignore_whitespace'] = form.ignore_whitespace.data

View File

@@ -0,0 +1,114 @@
#!/usr/bin/python3
# Launch as a eventlet.wsgi server instance.
import getopt
import os
import sys
import eventlet
import eventlet.wsgi
from . import store, changedetection_app
from . import __version__
def main():
ssl_mode = False
host = ''
port = os.environ.get('PORT') or 5000
do_cleanup = False
datastore_path = None
# On Windows, create and use a default path.
if os.name == 'nt':
datastore_path = os.path.expandvars(r'%APPDATA%\changedetection.io')
os.makedirs(datastore_path, exist_ok=True)
else:
# Must be absolute so that send_from_directory doesnt try to make it relative to backend/
datastore_path = os.path.join(os.getcwd(), "../datastore")
try:
opts, args = getopt.getopt(sys.argv[1:], "Ccsd:h:p:", "port")
except getopt.GetoptError:
print('backend.py -s SSL enable -h [host] -p [port] -d [datastore path]')
sys.exit(2)
create_datastore_dir = False
for opt, arg in opts:
# if opt == '--purge':
# Remove history, the actual files you need to delete manually.
# for uuid, watch in datastore.data['watching'].items():
# watch.update({'history': {}, 'last_checked': 0, 'last_changed': 0, 'previous_md5': None})
if opt == '-s':
ssl_mode = True
if opt == '-h':
host = arg
if opt == '-p':
port = int(arg)
if opt == '-d':
datastore_path = arg
# Cleanup (remove text files that arent in the index)
if opt == '-c':
do_cleanup = True
# Create the datadir if it doesnt exist
if opt == '-C':
create_datastore_dir = True
# isnt there some @thingy to attach to each route to tell it, that this route needs a datastore
app_config = {'datastore_path': datastore_path}
if not os.path.isdir(app_config['datastore_path']):
if create_datastore_dir:
os.mkdir(app_config['datastore_path'])
else:
print(
"ERROR: Directory path for the datastore '{}' does not exist, cannot start, please make sure the directory exists or specify a directory with the -d option.\n"
"Or use the -C parameter to create the directory.".format(app_config['datastore_path']), file=sys.stderr)
sys.exit(2)
datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], version_tag=__version__)
app = changedetection_app(app_config, datastore)
# Go into cleanup mode
if do_cleanup:
datastore.remove_unused_snapshots()
app.config['datastore_path'] = datastore_path
@app.context_processor
def inject_version():
return dict(right_sticky="v{}".format(datastore.data['version_tag']),
new_version_available=app.config['NEW_VERSION_AVAILABLE'],
has_password=datastore.data['settings']['application']['password'] != False
)
# Proxy sub-directory support
# Set environment var USE_X_SETTINGS=1 on this script
# And then in your proxy_pass settings
#
# proxy_set_header Host "localhost";
# proxy_set_header X-Forwarded-Prefix /app;
if os.getenv('USE_X_SETTINGS'):
print ("USE_X_SETTINGS is ENABLED\n")
from werkzeug.middleware.proxy_fix import ProxyFix
app.wsgi_app = ProxyFix(app.wsgi_app, x_prefix=1, x_host=1)
if ssl_mode:
# @todo finalise SSL config, but this should get you in the right direction if you need it.
eventlet.wsgi.server(eventlet.wrap_ssl(eventlet.listen((host, port)),
certfile='cert.pem',
keyfile='privkey.pem',
server_side=True), app)
else:
eventlet.wsgi.server(eventlet.listen((host, int(port))), app)

View File

@@ -1,10 +1,12 @@
import os
import time
from abc import ABC, abstractmethod
import chardet
import os
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.common.proxy import Proxy as SeleniumProxy
from selenium.common.exceptions import WebDriverException
import requests
import time
import urllib3.exceptions
@@ -20,7 +22,7 @@ class EmptyReply(Exception):
class Fetcher():
error = None
status_code = None
content = None # Should always be bytes.
content = None
headers = None
fetcher_description ="No description"
@@ -30,7 +32,13 @@ class Fetcher():
return self.error
@abstractmethod
def run(self, url, timeout, request_headers, request_body, request_method):
def run(self,
url,
timeout,
request_headers,
request_body,
request_method,
ignore_status_codes=False):
# Should set self.error, self.status_code and self.content
pass
@@ -97,7 +105,13 @@ class html_webdriver(Fetcher):
if proxy_args:
self.proxy = SeleniumProxy(raw=proxy_args)
def run(self, url, timeout, request_headers, request_body, request_method):
def run(self,
url,
timeout,
request_headers,
request_body,
request_method,
ignore_status_codes=False):
# request_body, request_method unused for now, until some magic in the future happens.
@@ -145,8 +159,13 @@ class html_webdriver(Fetcher):
class html_requests(Fetcher):
fetcher_description = "Basic fast Plaintext/HTTP Client"
def run(self, url, timeout, request_headers, request_body, request_method):
import requests
def run(self,
url,
timeout,
request_headers,
request_body,
request_method,
ignore_status_codes=False):
r = requests.request(method=request_method,
data=request_body,
@@ -155,16 +174,21 @@ class html_requests(Fetcher):
timeout=timeout,
verify=False)
# https://stackoverflow.com/questions/44203397/python-requests-get-returns-improperly-decoded-text-instead-of-utf-8
# Return bytes here
html = r.text
# If the response did not tell us what encoding format to expect, Then use chardet to override what `requests` thinks.
# For example - some sites don't tell us it's utf-8, but return utf-8 content
# This seems to not occur when using webdriver/selenium, it seems to detect the text encoding more reliably.
# https://github.com/psf/requests/issues/1604 good info about requests encoding detection
if not r.headers.get('content-type') or not 'charset=' in r.headers.get('content-type'):
encoding = chardet.detect(r.content)['encoding']
if encoding:
r.encoding = encoding
# @todo test this
# @todo maybe you really want to test zero-byte return pages?
if not r or not html or not len(html):
if (not ignore_status_codes and not r) or not r.content or not len(r.content):
raise EmptyReply(url=url, status_code=r.status_code)
self.status_code = r.status_code
self.content = html
self.content = r.text
self.headers = r.headers

View File

@@ -1,11 +1,11 @@
import time
from changedetectionio import content_fetcher
from changedetectionio import html_tools
import hashlib
from inscriptis import get_text
import urllib3
from . import html_tools
import os
import re
import time
import urllib3
from inscriptis import get_text
from changedetectionio import content_fetcher, html_tools
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
@@ -24,8 +24,14 @@ class perform_site_check():
stripped_text_from_html = ""
watch = self.datastore.data['watching'][uuid]
# Unset any existing notification error
# Protect against file:// access
if re.search(r'^file', watch['url'], re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False):
raise Exception(
"file:// type access is denied for security reasons."
)
# Unset any existing notification error
update_obj = {'last_notification_error': False, 'last_error': False}
extra_headers = self.datastore.get_val(uuid, 'headers')
@@ -47,6 +53,7 @@ class perform_site_check():
url = self.datastore.get_val(uuid, 'url')
request_body = self.datastore.get_val(uuid, 'body')
request_method = self.datastore.get_val(uuid, 'method')
ignore_status_code = self.datastore.get_val(uuid, 'ignore_status_codes')
# Pluggable content fetcher
prefer_backend = watch['fetch_backend']
@@ -58,7 +65,7 @@ class perform_site_check():
fetcher = klass()
fetcher.run(url, timeout, request_headers, request_body, request_method)
fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_code)
# Fetching complete, now filters
# @todo move to class / maybe inside of fetcher abstract base?
@@ -72,8 +79,15 @@ class perform_site_check():
is_json = 'application/json' in fetcher.headers.get('Content-Type', '')
is_html = not is_json
css_filter_rule = watch['css_filter']
subtractive_selectors = watch.get(
"subtractive_selectors", []
) + self.datastore.data["settings"]["application"].get(
"global_subtractive_selectors", []
)
has_filter_rule = css_filter_rule and len(css_filter_rule.strip())
has_subtractive_selectors = subtractive_selectors and len(subtractive_selectors[0].strip())
if is_json and not has_filter_rule:
css_filter_rule = "json:$"
has_filter_rule = True
@@ -100,11 +114,11 @@ class perform_site_check():
else:
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
html_content = html_tools.css_filter(css_filter=css_filter_rule, html_content=fetcher.content)
if has_subtractive_selectors:
html_content = html_tools.element_removal(subtractive_selectors, html_content)
# get_text() via inscriptis
stripped_text_from_html = get_text(html_content)
# Re #340 - return the content before the 'ignore text' was applied
text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')

View File

@@ -1,13 +1,30 @@
from wtforms import Form, SelectField, RadioField, BooleanField, StringField, PasswordField, validators, IntegerField, fields, TextAreaField, \
Field
from wtforms import widgets, SubmitField
from wtforms.validators import ValidationError
from wtforms.fields import html5
from changedetectionio import content_fetcher
import re
from changedetectionio.notification import default_notification_format, valid_notification_formats, default_notification_body, default_notification_title
from wtforms import (
BooleanField,
Field,
Form,
IntegerField,
PasswordField,
RadioField,
SelectField,
StringField,
SubmitField,
TextAreaField,
fields,
validators,
widgets,
)
from wtforms.fields import html5
from wtforms.validators import ValidationError
from changedetectionio import content_fetcher
from changedetectionio.notification import (
default_notification_body,
default_notification_format,
default_notification_title,
valid_notification_formats,
)
valid_method = {
'GET',
@@ -45,8 +62,8 @@ class SaltyPasswordField(StringField):
encrypted_password = ""
def build_password(self, password):
import hashlib
import base64
import hashlib
import secrets
# Make a new salt on every new password and store it with the password
@@ -104,9 +121,10 @@ class ValidateContentFetcherIsReady(object):
self.message = message
def __call__(self, form, field):
from changedetectionio import content_fetcher
import urllib3.exceptions
from changedetectionio import content_fetcher
# Better would be a radiohandler that keeps a reference to each class
if field.data is not None:
klass = getattr(content_fetcher, field.data)
@@ -213,52 +231,69 @@ class ValidateListRegex(object):
except re.error:
message = field.gettext('RegEx \'%s\' is not a valid regular expression.')
raise ValidationError(message % (line))
class ValidateCSSJSONXPATHInput(object):
"""
Filter validation
@todo CSS validator ;)
"""
def __init__(self, message=None):
def __init__(self, message=None, allow_xpath=True, allow_json=True):
self.message = message
self.allow_xpath = allow_xpath
self.allow_json = allow_json
def __call__(self, form, field):
if isinstance(field.data, str):
data = [field.data]
else:
data = field.data
for line in data:
# Nothing to see here
if not len(field.data.strip()):
return
if not len(line.strip()):
return
# Does it look like XPath?
if field.data.strip()[0] == '/':
from lxml import html, etree
tree = html.fromstring("<html></html>")
# Does it look like XPath?
if line.strip()[0] == '/':
if not self.allow_xpath:
raise ValidationError("XPath not permitted in this field!")
from lxml import etree, html
tree = html.fromstring("<html></html>")
try:
tree.xpath(field.data.strip())
except etree.XPathEvalError as e:
message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
raise ValidationError(message % (field.data, str(e)))
except:
raise ValidationError("A system-error occurred when validating your XPath expression")
try:
tree.xpath(line.strip())
except etree.XPathEvalError as e:
message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
raise ValidationError(message % (line, str(e)))
except:
raise ValidationError("A system-error occurred when validating your XPath expression")
if 'json:' in field.data:
from jsonpath_ng.exceptions import JsonPathParserError, JsonPathLexerError
from jsonpath_ng.ext import parse
if 'json:' in line:
if not self.allow_json:
raise ValidationError("JSONPath not permitted in this field!")
input = field.data.replace('json:', '')
from jsonpath_ng.exceptions import (
JsonPathLexerError,
JsonPathParserError,
)
from jsonpath_ng.ext import parse
try:
parse(input)
except (JsonPathParserError, JsonPathLexerError) as e:
message = field.gettext('\'%s\' is not a valid JSONPath expression. (%s)')
raise ValidationError(message % (input, str(e)))
except:
raise ValidationError("A system-error occurred when validating your JSONPath expression")
input = line.replace('json:', '')
# Re #265 - maybe in the future fetch the page and offer a
# warning/notice that its possible the rule doesnt yet match anything?
try:
parse(input)
except (JsonPathParserError, JsonPathLexerError) as e:
message = field.gettext('\'%s\' is not a valid JSONPath expression. (%s)')
raise ValidationError(message % (input, str(e)))
except:
raise ValidationError("A system-error occurred when validating your JSONPath expression")
# Re #265 - maybe in the future fetch the page and offer a
# warning/notice that its possible the rule doesnt yet match anything?
class quickWatchForm(Form):
# https://wtforms.readthedocs.io/en/2.3.x/fields/#module-wtforms.fields.html5
# `require_tld` = False is needed even for the test harness "http://localhost:5005.." to run
@@ -283,12 +318,14 @@ class watchForm(commonSettingsForm):
minutes_between_check = html5.IntegerField('Maximum time in minutes until recheck',
[validators.Optional(), validators.NumberRange(min=1)])
css_filter = StringField('CSS/JSON/XPATH Filter', [ValidateCSSJSONXPATHInput()])
subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
title = StringField('Title')
ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
headers = StringDictKeyValue('Request Headers')
body = TextAreaField('Request Body', [validators.Optional()])
method = SelectField('Request Method', choices=valid_method, default=default_method)
ignore_status_codes = BooleanField('Ignore Status Codes (process non-2xx status codes as normal)', default=False)
trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
@@ -314,5 +351,8 @@ class globalSettingsForm(commonSettingsForm):
[validators.NumberRange(min=1)])
extract_title_as_title = BooleanField('Extract <title> from document and use as watch title')
base_url = StringField('Base URL', validators=[validators.Optional()])
global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
ignore_whitespace = BooleanField('Ignore whitespace')
ignore_whitespace = BooleanField('Ignore whitespace')
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})

View File

@@ -1,7 +1,10 @@
import json
import re
from typing import List
from bs4 import BeautifulSoup
from jsonpath_ng.ext import parse
import re
class JSONNotFound(ValueError):
def __init__(self, msg):
@@ -16,11 +19,22 @@ def css_filter(css_filter, html_content):
return html_block + "\n"
def subtractive_css_selector(css_selector, html_content):
soup = BeautifulSoup(html_content, "html.parser")
for item in soup.select(css_selector):
item.decompose()
return str(soup)
def element_removal(selectors: List[str], html_content):
"""Joins individual filters into one css filter."""
selector = ",".join(selectors)
return subtractive_css_selector(selector, html_content)
# Return str Utf-8 of matched rules
def xpath_filter(xpath_filter, html_content):
from lxml import html
from lxml import etree
from lxml import etree, html
tree = html.fromstring(html_content)
html_block = ""
@@ -64,7 +78,8 @@ def _parse_json(json_data, jsonpath_filter):
# Re 265 - Just return an empty string when filter not found
return ''
stripped_text_from_html = json.dumps(s, indent=4)
# Ticket #462 - allow the original encoding through, usually it's UTF-8 or similar
stripped_text_from_html = json.dumps(s, indent=4, ensure_ascii=False)
return stripped_text_from_html
@@ -151,4 +166,4 @@ def strip_ignore_text(content, wordlist, mode="content"):
if mode == "line numbers":
return ignored_line_numbers
return "\n".encode('utf8').join(output)
return "\n".encode('utf8').join(output)

View File

@@ -37,15 +37,18 @@ section.content {
align-items: center;
justify-content: center; }
code {
background: #eee; }
/* table related */
.watch-table {
width: 100%; }
width: 100%;
font-size: 80%; }
.watch-table tr.unviewed {
font-weight: bold; }
.watch-table .error {
color: #a00; }
.watch-table td {
font-size: 80%;
white-space: nowrap; }
.watch-table td.title-col {
word-break: break-all;
@@ -80,11 +83,11 @@ section.content {
body:after {
content: "";
background: linear-gradient(130deg, #ff7a18, #af002d 41.07%, #319197 76.05%); }
background: linear-gradient(130deg, #5ad8f7, #2f50af 41.07%, #9150bf 84.05%); }
body:after, body:before {
display: block;
height: 600px;
height: 650px;
position: absolute;
top: 0;
left: 0;
@@ -96,9 +99,6 @@ body::after {
body::before {
content: "";
background-image: url(/static/images/gradient-border.png); }
body:before {
background-size: cover; }
body:after, body:before {
@@ -199,7 +199,8 @@ body:after, body:before {
#new-watch-form .label {
display: none; }
#new-watch-form legend {
color: #fff; }
color: #fff;
font-weight: bold; }
#diff-col {
padding-left: 40px; }
@@ -388,6 +389,11 @@ and also iPads specifically.
.pure-form-stacked > div:first-child {
display: block; }
.login-form .inner {
background: #fff;
padding: 20px;
border-radius: 5px; }
.edit-form {
min-width: 70%; }
.edit-form .tab-pane-inner {
@@ -404,6 +410,8 @@ and also iPads specifically.
.edit-form #actions {
display: block;
background: #fff; }
.edit-form .pure-form-message-inline {
padding-left: 0; }
ul {
padding-left: 1em;

View File

@@ -42,9 +42,14 @@ section.content {
justify-content: center;
}
code {
background: #eee;
}
/* table related */
.watch-table {
width: 100%;
font-size: 80%;
tr.unviewed {
font-weight: bold;
@@ -55,7 +60,6 @@ section.content {
}
td {
font-size: 80%;
white-space: nowrap;
}
@@ -107,12 +111,12 @@ section.content {
body:after {
content: "";
background: linear-gradient(130deg, #ff7a18, #af002d 41.07%, #319197 76.05%)
background: linear-gradient(130deg, #5ad8f7, #2f50af 41.07%, #9150bf 84.05%);
}
body:after, body:before {
display: block;
height: 600px;
height: 650px;
position: absolute;
top: 0;
left: 0;
@@ -125,11 +129,8 @@ body::after {
}
body::before {
// background-image set in base.html so it works with reverse proxies etc
content: "";
background-image: url(/static/images/gradient-border.png);
}
body:before {
background-size: cover
}
@@ -265,6 +266,7 @@ body:after, body:before {
}
legend {
color: #fff;
font-weight: bold;
}
}
@@ -545,6 +547,16 @@ $form-edge-padding: 20px;
display: block;
}
}
.login-form {
.inner {
background: #fff;;
padding: $form-edge-padding;
border-radius: 5px;
}
}
.edit-form {
min-width: 70%;
.tab-pane-inner {
@@ -568,6 +580,10 @@ $form-edge-padding: 20px;
display: block;
background: #fff;
}
.pure-form-message-inline {
padding-left: 0;
}
}
ul {

View File

@@ -1,15 +1,19 @@
from os import unlink, path, mkdir
import json
import uuid as uuid_builder
from threading import Lock
from copy import deepcopy
import logging
import time
import threading
import os
import threading
import time
import uuid as uuid_builder
from copy import deepcopy
from os import mkdir, path, unlink
from threading import Lock
from changedetectionio.notification import (
default_notification_body,
default_notification_format,
default_notification_title,
)
from changedetectionio.notification import default_notification_format, default_notification_body, default_notification_title
# Is there an existing library to ensure some data store (JSON etc) is in sync with CRUD methods?
# Open a github issue if you know something :)
@@ -46,6 +50,7 @@ class ChangeDetectionStore:
'extract_title_as_title': False,
'fetch_backend': 'html_requests',
'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum
'global_subtractive_selectors': [],
'ignore_whitespace': False,
'notification_urls': [], # Apprise URL list
# Custom notification content
@@ -82,6 +87,7 @@ class ChangeDetectionStore:
'notification_body': default_notification_body,
'notification_format': default_notification_format,
'css_filter': "",
'subtractive_selectors': [],
'trigger_text': [], # List of text or regex to wait for until a change is detected
'fetch_backend': None,
'extract_title_as_title': False
@@ -144,8 +150,8 @@ class ChangeDetectionStore:
unlink(password_reset_lockfile)
if not 'app_guid' in self.__data:
import sys
import os
import sys
if "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ:
self.__data['app_guid'] = "test-" + str(uuid_builder.uuid4())
else:
@@ -394,7 +400,7 @@ class ChangeDetectionStore:
# system was out of memory, out of RAM etc
with open(self.json_store_path+".tmp", 'w') as json_file:
json.dump(data, json_file, indent=4)
os.rename(self.json_store_path+".tmp", self.json_store_path)
os.replace(self.json_store_path+".tmp", self.json_store_path)
except Exception as e:
logging.error("Error writing JSON!! (Main JSON file save was skipped) : %s", str(e))
@@ -430,6 +436,7 @@ class ChangeDetectionStore:
index.append(self.data['watching'][uuid]['history'][str(id)])
import pathlib
# Only in the sub-directories
for item in pathlib.Path(self.datastore_path).rglob("*/*txt"):
if not str(item) in index:

View File

@@ -34,9 +34,8 @@
</div>
<div class="pure-controls">
<span class="pure-form-message-inline">
These tokens can be used in the notification body and title to
customise the notification text.
</span>
These tokens can be used in the notification body and title to customise the notification text.
<table class="pure-table" id="token-table">
<thead>
<tr>
@@ -88,7 +87,7 @@
</tr>
</tbody>
</table>
<span class="pure-form-message-inline">
<br/>
URLs generated by changedetection.io (such as <code>{diff_url}</code>) require the <code>BASE_URL</code> environment variable set.<br/>
Your <code>BASE_URL</code> var is currently "{{current_base_url}}"
</span>

View File

@@ -12,7 +12,13 @@
<link rel="stylesheet" href="{{ m }}?ver=1000">
{% endfor %}
{% endif %}
<style>
body::before {
background-image: url({{url_for('static_content', group='images', filename='gradient-border.png')}});
}
</style>
</head>
<body>
<div class="header">
@@ -35,13 +41,13 @@
{% if current_user.is_authenticated or not has_password %}
{% if not current_diff_url %}
<li class="pure-menu-item">
<a href="{{ url_for('get_backup')}}" class="pure-menu-link">BACKUP</a>
<a href="{{ url_for('settings_page')}}" class="pure-menu-link">SETTINGS</a>
</li>
<li class="pure-menu-item">
<a href="{{ url_for('import_page')}}" class="pure-menu-link">IMPORT</a>
</li>
<li class="pure-menu-item">
<a href="{{ url_for('settings_page')}}" class="pure-menu-link">SETTINGS</a>
<a href="{{ url_for('get_backup')}}" class="pure-menu-link">BACKUP</a>
</li>
{% else %}
<li class="pure-menu-item">

View File

@@ -19,6 +19,7 @@
<div class="box-wrap inner">
<form class="pure-form pure-form-stacked"
action="{{ url_for('edit_page', uuid=uuid, next = request.args.get('next') ) }}" method="POST">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
<div class="tab-pane-inner" id="general">
<fieldset>
@@ -58,24 +59,33 @@
</span>
</div>
<hr/>
<fieldset class="pure-group">
<div class="pure-control-group">
{{ render_field(form.method) }}
</div>
<strong>Note: <i>Request Headers and Body settings are ONLY used by Basic fast Plaintext/HTTP Client fetch method.</i></strong>
{{ render_field(form.headers, rows=5, placeholder="Example
<span class="pure-form-message-inline">
<strong>Request override is currently only used by the <i>Basic fast Plaintext/HTTP Client</i> method.</strong>
</span>
<div class="pure-control-group">
{{ render_field(form.method) }}
</div>
<div class="pure-control-group">
{{ render_field(form.headers, rows=5, placeholder="Example
Cookie: foobar
User-Agent: wonderbra 1.0") }}
</fieldset>
<div class="pure-control-group">
{{ render_field(form.body, rows=5, placeholder="Example
</div>
<div class="pure-control-group">
{{ render_field(form.body, rows=5, placeholder="Example
{
\"name\":\"John\",
\"age\":30,
\"car\":null
}") }}
</div>
</div>
<div>
{{ render_field(form.ignore_status_codes) }}
</div>
</fieldset>
<br/>
</div>
<div class="tab-pane-inner" id="notifications">
@@ -107,16 +117,27 @@ User-Agent: wonderbra 1.0") }}
<span class="pure-form-message-inline">
<ul>
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
<li>JSON - Limit text to this JSON rule, using <a href="https://pypi.org/project/jsonpath-ng/">JSONPath</a>, prefix with <b>"json:"</b>, <a
<li>JSON - Limit text to this JSON rule, using <a href="https://pypi.org/project/jsonpath-ng/">JSONPath</a>, prefix with <code>"json:"</code>, use <code>json:$</code> to force re-formatting if required, <a
href="https://jsonpath.com/" target="new">test your JSONPath here</a></li>
<li>XPATH - Limit text to this XPath rule, simply start with a forward-slash, example <b>//*[contains(@class, 'sametext')]</b>, <a
<li>XPath - Limit text to this XPath rule, simply start with a forward-slash, example <code>//*[contains(@class, 'sametext')]</code>, <a
href="http://xpather.com/" target="new">test your XPath here</a></li>
</ul>
Please be sure that you thoroughly understand how to write CSS or JSONPath, XPath selector rules before filing an issue on GitHub! <a
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br/>
</span>
</div>
<fieldset class="pure-group">
{{ render_field(form.subtractive_selectors, rows=5, placeholder="header
footer
nav
.stockticker") }}
<span class="pure-form-message-inline">
<ul>
<li> Remove HTML element(s) by CSS selector before text conversion. </li>
<li> Add multiple elements or CSS selectors per line to ignore multiple parts of the HTML. </li>
</ul>
</span>
</fieldset>
</fieldset>
<fieldset class="pure-group">
{{ render_field(form.ignore_text, rows=5, placeholder="Some text to ignore in a line
@@ -125,7 +146,7 @@ User-Agent: wonderbra 1.0") }}
<span class="pure-form-message-inline">
<ul>
<li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
<li>Regular Expression support, wrap the line in forward slash <b>/regex/</b></li>
<li>Regular Expression support, wrap the line in forward slash <code>/regex/</code></li>
<li>Changing this will affect the comparison checksum which may trigger an alert</li>
<li>Use the preview/show current tab to see ignores</li>
</ul>
@@ -142,7 +163,7 @@ User-Agent: wonderbra 1.0") }}
<li>Text to wait for before triggering a change/notification, all text and regex are tested <i>case-insensitive</i>.</li>
<li>Trigger text is processed from the result-text that comes out of any CSS/JSON Filters for this watch</li>
<li>Each line is process separately (think of each line as "OR")</li>
<li>Note: Wrap in forward slash / to use regex example: <span style="font-family: monospace; background: #eee">/foo\d/</span></li>
<li>Note: Wrap in forward slash / to use regex example: <code>/foo\d/</code></li>
</ul>
</span>
</div>

View File

@@ -4,6 +4,7 @@
<div class="edit-form">
<div class="inner">
<form class="pure-form pure-form-aligned" action="{{url_for('import_page')}}" method="POST">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
<fieldset class="pure-group">
<legend>
Enter one URL per line, and optionally add tags for each URL after a space, delineated by comma (,):

View File

@@ -1,10 +1,10 @@
{% extends 'base.html' %}
{% block content %}
<div class="edit-form">
<div class="login-form">
<div class="inner">
<form class="pure-form pure-form-stacked" action="{{url_for('login')}}" method="POST">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
<fieldset>
<div class="pure-control-group">
<label for="password">Password</label>

View File

@@ -4,6 +4,7 @@
<div class="edit-form">
<div class="box-wrap inner">
<form class="pure-form pure-form-stacked" action="{{url_for('scrub_page')}}" method="POST">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
<fieldset>
<div class="pure-control-group">
This will remove all version snapshots/data, but keep your list of URLs. <br/>

View File

@@ -1,7 +1,7 @@
{% extends 'base.html' %}
{% block content %}
{% from '_helpers.jinja' import render_field %}
{% from '_helpers.jinja' import render_field, render_button %}
{% from '_common_fields.jinja' import render_common_settings_form %}
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='settings.js')}}" defer></script>
@@ -18,6 +18,7 @@
</div>
<div class="box-wrap inner">
<form class="pure-form pure-form-stacked settings" action="{{url_for('settings_page')}}" method="POST">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
<div class="tab-pane-inner" id="general">
<fieldset>
<div class="pure-control-group">
@@ -27,8 +28,7 @@
<div class="pure-control-group">
{% if not hide_remove_pass %}
{% if current_user.is_authenticated %}
<a href="{{url_for('settings_page', removepassword='yes')}}"
class="pure-button pure-button-primary">Remove password</a>
{{ render_button(form.removepassword_button) }}
{% else %}
{{ render_field(form.password) }}
<span class="pure-form-message-inline">Password protection for your changedetection.io application.</span>
@@ -83,7 +83,18 @@
</span>
</fieldset>
<fieldset class="pure-group">
{{ render_field(form.global_subtractive_selectors, rows=5, placeholder="header
footer
nav
.stockticker") }}
<span class="pure-form-message-inline">
<ul>
<li> Remove HTML element(s) by CSS selector before text conversion. </li>
<li> Add multiple elements or CSS selectors per line to ignore multiple parts of the HTML. </li>
</ul>
</span>
</fieldset>
<fieldset class="pure-group">
{{ render_field(form.global_ignore_text, rows=5, placeholder="Some text to ignore in a line
/some.regex\d{2}/ for case-INsensitive regex
@@ -93,7 +104,7 @@
<ul>
<li>Note: This is applied globally in addition to the per-watch rules.</li>
<li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
<li>Regular Expression support, wrap the line in forward slash <b>/regex/</b></li>
<li>Regular Expression support, wrap the line in forward slash <code>/regex/</code></li>
<li>Changing this will affect the comparison checksum which may trigger an alert</li>
<li>Use the preview/show current tab to see ignores</li>
</ul>
@@ -103,11 +114,9 @@
<div id="actions">
<div class="pure-control-group">
<button type="submit" class="pure-button pure-button-primary">Save</button>
<a href="{{url_for('index')}}" class="pure-button button-small button-cancel">Back</a>
<a href="{{url_for('scrub_page')}}" class="pure-button button-small button-cancel">Delete
History
Snapshot Data</a>
{{ render_button(form.save_button) }}
<a href="{{url_for('index')}}" class="pure-button button-small button-cancel">Back</a>
<a href="{{url_for('scrub_page')}}" class="pure-button button-small button-cancel">Delete History Snapshot Data</a>
</div>
</div>
</form>

View File

@@ -5,6 +5,7 @@
<div class="box">
<form class="pure-form" action="{{ url_for('api_watch_add') }}" method="POST" id="new-watch-form">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
<fieldset>
<legend>Add a new change detection watch</legend>
{{ render_simple_field(form.url, placeholder="https://...", required=true) }}

View File

@@ -42,6 +42,9 @@ def app(request):
cleanup(app_config['datastore_path'])
datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], include_default_watches=False)
app = changedetection_app(app_config, datastore)
# Disable CSRF while running tests
app.config['WTF_CSRF_ENABLED'] = False
app.config['STOP_THREADS'] = True
def teardown():

View File

@@ -4,8 +4,8 @@ from flask import url_for
def test_check_access_control(app, client):
# Still doesnt work, but this is closer.
with app.test_client() as c:
# Check we dont have any password protection enabled yet.
with app.test_client(use_cookies=True) as c:
# Check we don't have any password protection enabled yet.
res = c.get(url_for("settings_page"))
assert b"Remove password" not in res.data
@@ -46,15 +46,20 @@ def test_check_access_control(app, client):
assert b"BACKUP" in res.data
assert b"IMPORT" in res.data
assert b"LOG OUT" in res.data
assert b"minutes_between_check" in res.data
assert b"fetch_backend" in res.data
# Now remove the password so other tests function, @todo this should happen before each test automatically
res = c.get(url_for("settings_page", removepassword="yes"),
follow_redirects=True)
assert b"Password protection removed." in res.data
res = c.get(url_for("index"))
assert b"LOG OUT" not in res.data
res = c.post(
url_for("settings_page"),
data={
"minutes_between_check": 180,
"tag": "",
"headers": "",
"fetch_backend": "html_webdriver",
"removepassword_button": "Remove password"
},
follow_redirects=True,
)
# There was a bug where saving the settings form would submit a blank password
def test_check_access_control_no_blank_password(app, client):
@@ -71,8 +76,7 @@ def test_check_access_control_no_blank_password(app, client):
data={"password": "",
"minutes_between_check": 180,
'fetch_backend': "html_requests"},
follow_redirects=True
follow_redirects=True
)
assert b"Password protection enabled." not in res.data
@@ -91,7 +95,8 @@ def test_check_access_no_remote_access_to_remove_password(app, client):
# Enable password check.
res = c.post(
url_for("settings_page"),
data={"password": "password", "minutes_between_check": 180,
data={"password": "password",
"minutes_between_check": 180,
'fetch_backend': "html_requests"},
follow_redirects=True
)
@@ -99,8 +104,17 @@ def test_check_access_no_remote_access_to_remove_password(app, client):
assert b"Password protection enabled." in res.data
assert b"Login" in res.data
res = c.get(url_for("settings_page", removepassword="yes"),
follow_redirects=True)
res = c.post(
url_for("settings_page"),
data={
"minutes_between_check": 180,
"tag": "",
"headers": "",
"fetch_backend": "html_webdriver",
"removepassword_button": "Remove password"
},
follow_redirects=True,
)
assert b"Password protection removed." not in res.data
res = c.get(url_for("index"),

View File

@@ -25,6 +25,7 @@ def test_check_basic_change_detection_functionality(client, live_server):
data={"urls": url_for('test_endpoint', _external=True)},
follow_redirects=True
)
assert b"1 Imported" in res.data
time.sleep(sleep_time_for_fetch_thread)

View File

@@ -0,0 +1,168 @@
#!/usr/bin/python3
import time
from flask import url_for
from ..html_tools import *
from .util import live_server_setup
def test_setup(live_server):
live_server_setup(live_server)
def set_original_response():
test_return_data = """<html>
<header>
<h2>Header</h2>
</header>
<nav>
<ul>
<li><a href="#">A</a></li>
<li><a href="#">B</a></li>
<li><a href="#">C</a></li>
</ul>
</nav>
<body>
Some initial text</br>
<p>Which is across multiple lines</p>
</br>
So let's see what happens. </br>
<div id="changetext">Some text that will change</div>
</body>
<footer>
<p>Footer</p>
</footer>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
def set_modified_response():
test_return_data = """<html>
<header>
<h2>Header changed</h2>
</header>
<nav>
<ul>
<li><a href="#">A changed</a></li>
<li><a href="#">B</a></li>
<li><a href="#">C</a></li>
</ul>
</nav>
<body>
Some initial text</br>
<p>Which is across multiple lines</p>
</br>
So let's see what happens. </br>
<div id="changetext">Some text that changes</div>
</body>
<footer>
<p>Footer changed</p>
</footer>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
def test_element_removal_output():
from changedetectionio import fetch_site_status
from inscriptis import get_text
# Check text with sub-parts renders correctly
content = """<html>
<header>
<h2>Header</h2>
</header>
<nav>
<ul>
<li><a href="#">A</a></li>
</ul>
</nav>
<body>
Some initial text</br>
<p>across multiple lines</p>
<div id="changetext">Some text that changes</div>
</body>
<footer>
<p>Footer</p>
</footer>
</html>
"""
html_blob = element_removal(
["header", "footer", "nav", "#changetext"], html_content=content
)
text = get_text(html_blob)
assert (
text
== """Some initial text
across multiple lines
"""
)
def test_element_removal_full(client, live_server):
sleep_time_for_fetch_thread = 3
set_original_response()
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page
test_url = url_for("test_endpoint", _external=True)
res = client.post(
url_for("import_page"), data={"urls": test_url}, follow_redirects=True
)
assert b"1 Imported" in res.data
# Goto the edit page, add the filter data
# Not sure why \r needs to be added - absent of the #changetext this is not necessary
subtractive_selectors_data = "header\r\nfooter\r\nnav\r\n#changetext"
res = client.post(
url_for("edit_page", uuid="first"),
data={
"subtractive_selectors": subtractive_selectors_data,
"url": test_url,
"tag": "",
"headers": "",
"fetch_backend": "html_requests",
},
follow_redirects=True,
)
assert b"Updated watch." in res.data
# Check it saved
res = client.get(
url_for("edit_page", uuid="first"),
)
assert bytes(subtractive_selectors_data.encode("utf-8")) in res.data
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# No change yet - first check
res = client.get(url_for("index"))
assert b"unviewed" not in res.data
# Make a change to header/footer/nav
set_modified_response()
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# There should not be an unviewed change, as changes should be removed
res = client.get(url_for("index"))
assert b"unviewed" not in res.data

View File

@@ -0,0 +1,87 @@
#!/usr/bin/python3
# coding=utf-8
import time
from flask import url_for
from .util import live_server_setup
import pytest
def test_setup(live_server):
live_server_setup(live_server)
def set_html_response():
test_return_data = """
<html><body><span class="nav_second_img_text">
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;铸大国重器,挺制造脊梁,致力能源未来,赋能美好生活。
</span>
</body></html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
return None
# In the case the server does not issue a charset= or doesnt have content_type header set
def test_check_encoding_detection(client, live_server):
set_html_response()
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page
test_url = url_for('test_endpoint', content_type="text/html", _external=True)
client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(2)
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
# Should see the proper string
assert "铸大国重".encode('utf-8') in res.data
# Should not see the failed encoding
assert b'\xc2\xa7' not in res.data
# In the case the server does not issue a charset= or doesnt have content_type header set
def test_check_encoding_detection_missing_content_type_header(client, live_server):
set_html_response()
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(2)
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
# Should see the proper string
assert "铸大国重".encode('utf-8') in res.data
# Should not see the failed encoding
assert b'\xc2\xa7' not in res.data

View File

@@ -1,6 +1,7 @@
#!/usr/bin/python3
import time
from flask import url_for
from . util import live_server_setup
@@ -17,7 +18,9 @@ def test_error_handler(client, live_server):
time.sleep(1)
# Add our URL to the import page
test_url = url_for('test_endpoint_403_error', _external=True)
test_url = url_for('test_endpoint',
status_code=403,
_external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},

View File

@@ -0,0 +1,190 @@
#!/usr/bin/python3
import time
from flask import url_for
from . util import live_server_setup
def test_setup(live_server):
live_server_setup(live_server)
def set_original_response():
test_return_data = """<html>
<body>
Some initial text</br>
<p>Which is across multiple lines</p>
</br>
So let's see what happens. </br>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
def set_some_changed_response():
test_return_data = """<html>
<body>
Some initial text</br>
<p>Which is across multiple lines, and a new thing too.</p>
</br>
So let's see what happens. </br>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
def test_normal_page_check_works_with_ignore_status_code(client, live_server):
sleep_time_for_fetch_thread = 3
# Give the endpoint time to spin up
time.sleep(1)
set_original_response()
# Goto the settings page, add our ignore text
res = client.post(
url_for("settings_page"),
data={
"minutes_between_check": 180,
"ignore_status_codes": "y",
'fetch_backend': "html_requests"
},
follow_redirects=True
)
assert b"Settings updated." in res.data
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
time.sleep(sleep_time_for_fetch_thread)
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
set_some_changed_response()
time.sleep(sleep_time_for_fetch_thread)
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# It should report nothing found (no new 'unviewed' class)
res = client.get(url_for("index"))
assert b'unviewed' in res.data
assert b'/test-endpoint' in res.data
# Tests the whole stack works with staus codes ignored
def test_403_page_check_works_with_ignore_status_code(client, live_server):
sleep_time_for_fetch_thread = 3
set_original_response()
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page
test_url = url_for('test_endpoint', status_code=403, _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# Goto the edit page, check our ignore option
# Add our URL to the import page
res = client.post(
url_for("edit_page", uuid="first"),
data={"ignore_status_codes": "y", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
follow_redirects=True
)
assert b"Updated watch." in res.data
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# Make a change
set_some_changed_response()
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# It should have 'unviewed' still
# Because it should be looking at only that 'sametext' id
res = client.get(url_for("index"))
assert b'unviewed' in res.data
# Tests the whole stack works with staus codes ignored
def test_403_page_check_fails_without_ignore_status_code(client, live_server):
sleep_time_for_fetch_thread = 3
set_original_response()
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page
test_url = url_for('test_endpoint', status_code=403, _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# Goto the edit page, check our ignore option
# Add our URL to the import page
res = client.post(
url_for("edit_page", uuid="first"),
data={"url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
follow_redirects=True
)
assert b"Updated watch." in res.data
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# Make a change
set_some_changed_response()
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# It should have 'unviewed' still
# Because it should be looking at only that 'sametext' id
res = client.get(url_for("index"))
assert b'Status Code 403' in res.data

View File

@@ -1,4 +1,5 @@
#!/usr/bin/python3
# coding=utf-8
import time
from flask import url_for
@@ -142,7 +143,7 @@ def set_modified_response():
}
],
"boss": {
"name": "Foobar"
"name": "Örnsköldsvik"
},
"available": false
}
@@ -246,8 +247,10 @@ def test_check_json_filter(client, live_server):
# Should not see this, because its not in the JSONPath we entered
res = client.get(url_for("diff_history_page", uuid="first"))
# But the change should be there, tho its hard to test the change was detected because it will show old and new versions
assert b'Foobar' in res.data
# And #462 - check we see the proper utf-8 string there
assert "Örnsköldsvik".encode('utf-8') in res.data
def test_check_json_filter_bool_val(client, live_server):

View File

@@ -77,14 +77,6 @@ def test_body_in_request(client, live_server):
# Add our URL to the import page
test_url = url_for('test_body', _external=True)
# Add the test URL twice, we will check
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
res = client.post(
url_for("import_page"),
data={"urls": test_url},
@@ -94,19 +86,6 @@ def test_body_in_request(client, live_server):
body_value = 'Test Body Value'
# Attempt to add a body with a GET method
res = client.post(
url_for("edit_page", uuid="first"),
data={
"url": test_url,
"tag": "",
"method": "GET",
"fetch_backend": "html_requests",
"body": "invalid"},
follow_redirects=True
)
assert b"Body must be empty when Request Method is set to GET" in res.data
# Add a properly formatted body with a proper method
res = client.post(
url_for("edit_page", uuid="first"),
@@ -120,8 +99,7 @@ def test_body_in_request(client, live_server):
)
assert b"Updated watch." in res.data
# Give the thread time to pick up the first version
time.sleep(5)
time.sleep(3)
# The service should echo back the body
res = client.get(
@@ -129,9 +107,20 @@ def test_body_in_request(client, live_server):
follow_redirects=True
)
# Check if body returned contains the specified data
# If this gets stuck something is wrong, something should always be there
assert b"No history found" not in res.data
# We should see what we sent in the reply
assert str.encode(body_value) in res.data
####### data sanity checks
# Add the test URL twice, we will check
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
watches_with_body = 0
with open('test-datastore/url-watches.json') as f:
app_struct = json.load(f)
@@ -142,6 +131,20 @@ def test_body_in_request(client, live_server):
# Should be only one with body set
assert watches_with_body==1
# Attempt to add a body with a GET method
res = client.post(
url_for("edit_page", uuid="first"),
data={
"url": test_url,
"tag": "",
"method": "GET",
"fetch_backend": "html_requests",
"body": "invalid"},
follow_redirects=True
)
assert b"Body must be empty when Request Method is set to GET" in res.data
def test_method_in_request(client, live_server):
# Add our URL to the import page
test_url = url_for('test_method', _external=True)

View File

@@ -0,0 +1,36 @@
from flask import url_for
from . util import set_original_response, set_modified_response, live_server_setup
import time
def test_setup(live_server):
live_server_setup(live_server)
def test_file_access(client, live_server):
res = client.post(
url_for("import_page"),
data={"urls": 'https://localhost'},
follow_redirects=True
)
assert b"1 Imported" in res.data
# Attempt to add a body with a GET method
res = client.post(
url_for("edit_page", uuid="first"),
data={
"url": 'file:///etc/passwd',
"tag": "",
"method": "GET",
"fetch_backend": "html_requests",
"body": ""},
follow_redirects=True
)
time.sleep(3)
res = client.get(
url_for("index", uuid="first"),
follow_redirects=True
)
assert b'denied for security reasons' in res.data

View File

@@ -38,21 +38,19 @@ def set_modified_response():
def live_server_setup(live_server):
@live_server.app.route('/test-endpoint')
def test_endpoint():
ctype = request.args.get('content_type')
status_code = request.args.get('status_code')
# Tried using a global var here but didn't seem to work, so reading from a file instead.
with open("test-datastore/endpoint-content.txt", "r") as f:
resp = make_response(f.read())
resp.headers['Content-Type'] = ctype if ctype else 'text/html'
return resp
@live_server.app.route('/test-403')
def test_endpoint_403_error():
resp = make_response('', 403)
return resp
try:
# Tried using a global var here but didn't seem to work, so reading from a file instead.
with open("test-datastore/endpoint-content.txt", "r") as f:
resp = make_response(f.read(), status_code)
resp.headers['Content-Type'] = ctype if ctype else 'text/html'
return resp
except FileNotFoundError:
return make_response('', status_code)
# Just return the headers in the request
@live_server.app.route('/test-headers')

View File

@@ -42,7 +42,6 @@ class update_worker(threading.Thread):
now = time.time()
try:
changed_detected, update_obj, contents = update_handler.run(uuid)
# Re #342
@@ -50,8 +49,6 @@ class update_worker(threading.Thread):
# We then convert/.decode('utf-8') for the notification etc
if not isinstance(contents, (bytes, bytearray)):
raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
except PermissionError as e:
self.app.logger.error("File permission error updating", uuid, str(e))
except content_fetcher.EmptyReply as e:

View File

@@ -1,9 +1,9 @@
version: '2'
services:
changedetection.io:
changedetection:
image: ghcr.io/dgtlmoon/changedetection.io
container_name: changedetection.io
hostname: changedetection.io
container_name: changedetection
hostname: changedetection
volumes:
- changedetection-data:/datastore

View File

@@ -1,9 +1,9 @@
flask~= 2.0
flask_wtf
eventlet>=0.31.0
validators
timeago ~=1.0
inscriptis ~= 1.2
inscriptis ~= 2.2
feedgen ~= 0.9
flask-login ~= 0.5
pytz
@@ -17,7 +17,7 @@ wtforms ~= 2.3.3
jsonpath-ng ~= 1.5.3
# Notification library
apprise ~= 0.9.6
apprise ~= 0.9.7
# apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315
paho-mqtt
@@ -34,5 +34,4 @@ lxml
# 3.141 was missing socksVersion, 3.150 was not in pypi, so we try 4.1.0
selenium ~= 4.1.0
pytest ~=6.2
pytest-flask ~=1.2

View File

@@ -32,11 +32,11 @@ setup(
long_description_content_type='text/markdown',
keywords='website change monitor for changes notification change detection '
'alerts tracking website tracker change alert website and monitoring',
zip_safe=False,
entry_points={"console_scripts": ["changedetection.io=changedetection:main"]},
entry_points={"console_scripts": ["changedetection.io=changedetectionio.changedetection:main"]},
zip_safe=True,
scripts=["changedetection.py"],
author='dgtlmoon',
url='https://changedetection.io',
scripts=['changedetection.py'],
packages=['changedetectionio'],
include_package_data=True,
install_requires=install_requires,