Compare commits

..

24 Commits

Author SHA1 Message Date
dgtlmoon 487967de29 Adding test 2022-03-30 10:32:51 +02:00
dgtlmoon 304ef746e4 Merge branch 'master' into ticket-16-diff-in-RSS 2022-03-30 10:24:40 +02:00
dgtlmoon 082634f851 Fix - {diff} and {diff_full} notifications tokens were not always including the full output 2022-03-29 19:18:26 +02:00
dgtlmoon b9222e3243 Merge branch 'master' into ticket-16-diff-in-RSS 2022-03-26 15:53:12 +01:00
dgtlmoon 1d92d9461a WIP 2022-03-26 15:35:56 +01:00
dgtlmoon 334010025f Update README.md 2022-03-26 14:02:56 +01:00
dgtlmoon 81aa8fa16b Update README.md 2022-03-26 09:56:56 +01:00
dgtlmoon c79d6824e3 Minor UI cleanups (mobile tabs, font sizing) (#503) 2022-03-25 23:37:28 +01:00
zznidar 946377d2be Fix typo in Filters & Triggers settings. (#495) 2022-03-23 23:18:04 +01:00
zznidar 5db9a30ad4 Add autofocus attribute to password login field (#496) 2022-03-23 23:17:47 +01:00
dgtlmoon 1d060225e1 0.39.11 2022-03-23 09:42:51 +01:00
dgtlmoon 7e0f0d0fd8 Microsoft Windows installation fixes (#492) 2022-03-22 23:08:08 +01:00
dgtlmoon 8b2afa2220 GitHub tweak - container tags should be CSV list (Fix ghcr.io not building) 2022-03-22 00:08:05 +01:00
dgtlmoon f55ffa0f62 GitHub tweak - build containers also on push to master 2022-03-21 23:08:17 +01:00
dgtlmoon 942c3f021f Allow changedetector to ignore status codes as a per-site setting (#479) (#485)
Co-authored-by: Ara Hayrabedian <ara.hayrabedian@gmail.com>
2022-03-21 23:03:54 +01:00
dgtlmoon 5483f5d694 Security update - Use CSRF token protection for forms, make "remove password" use HTTP Post (#484) 2022-03-21 22:54:27 +01:00
dgtlmoon f2fa638480 Security update - Protect against file:/// type access by webdriver/chrome. (#483) 2022-03-21 20:59:20 +01:00
dgtlmoon 82d1a7f73e Only build container on GitHub releases, not tests 2022-03-20 16:57:36 +01:00
dgtlmoon 9fc291fb63 Also change container names to help stop some DNS issues 2022-03-17 19:59:37 +01:00
dgtlmoon 3e8a15456a Detect byte-encoding when the server mishandles the content-type header reply (#472) 2022-03-17 10:28:02 +01:00
dgtlmoon 2a03f3f57e Improving form/edit example markup 2022-03-13 12:00:45 +01:00
dgtlmoon ffad5cca97 JSON diff/preview should use utf-8 encoding where possible (#465) 2022-03-13 11:37:51 +01:00
Tim Loderhose 60a9a786e0 Fix typo in settings form 2022-03-13 10:55:37 +01:00
dgtlmoon 165e950e55 Add python venv to .gitignore 2022-03-13 10:53:33 +01:00
37 changed files with 692 additions and 225 deletions
+11 -9
View File
@@ -2,16 +2,20 @@ name: Build and push containers
on: on:
# Automatically triggered by a testing workflow passing, but this is only checked when it lands in the `master`/default branch # Automatically triggered by a testing workflow passing, but this is only checked when it lands in the `master`/default branch
workflow_run: # workflow_run:
workflows: ["ChangeDetection.io Test"] # workflows: ["ChangeDetection.io Test"]
branches: [master] # branches: [master]
tags: ['0.*'] # tags: ['0.*']
types: [completed] # types: [completed]
# Or a new tagged release # Or a new tagged release
release: release:
types: [published, edited] types: [published, edited]
push:
branches:
- master
jobs: jobs:
metadata: metadata:
runs-on: ubuntu-latest runs-on: ubuntu-latest
@@ -91,8 +95,7 @@ jobs:
file: ./Dockerfile file: ./Dockerfile
push: true push: true
tags: | tags: |
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:latest ${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:latest,ghcr.io/${{ github.repository }}:latest
ghcr.io/${{ github.repository }}:latest
platforms: linux/amd64,linux/arm64,linux/arm/v6,linux/arm/v7 platforms: linux/amd64,linux/arm64,linux/arm/v6,linux/arm/v7
cache-from: type=local,src=/tmp/.buildx-cache cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache cache-to: type=local,dest=/tmp/.buildx-cache
@@ -107,8 +110,7 @@ jobs:
file: ./Dockerfile file: ./Dockerfile
push: true push: true
tags: | tags: |
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:${{ github.event.release.tag_name }} ${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:${{ github.event.release.tag_name }},ghcr.io/dgtlmoon/changedetection.io:${{ github.event.release.tag_name }}
ghcr.io/dgtlmoon/changedetection.io:${{ github.event.release.tag_name }}
platforms: linux/amd64,linux/arm64,linux/arm/v6,linux/arm/v7 platforms: linux/amd64,linux/arm64,linux/arm/v6,linux/arm/v7
cache-from: type=local,src=/tmp/.buildx-cache cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache cache-to: type=local,dest=/tmp/.buildx-cache
+2
View File
@@ -7,4 +7,6 @@ __pycache__
.pytest_cache .pytest_cache
build build
dist dist
venv
*.egg-info*
.vscode/settings.json .vscode/settings.json
+1 -1
View File
@@ -2,5 +2,5 @@ recursive-include changedetectionio/templates *
recursive-include changedetectionio/static * recursive-include changedetectionio/static *
include changedetection.py include changedetection.py
global-exclude *.pyc global-exclude *.pyc
global-exclude *node_modules* global-exclude node_modules
global-exclude venv global-exclude venv
+4 -4
View File
@@ -9,10 +9,10 @@ _Know when web pages change! Stay ontop of new information!_
Live your data-life *pro-actively* instead of *re-actively*. Live your data-life *pro-actively* instead of *re-actively*.
Open source web page monitoring, notification and change detection. Free, Open-source web page monitoring, notification and change detection. Don't have time? [Try our $6.99/month plan - unlimited checks and watches!](https://lemonade.changedetection.io/start)
<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring" title="Self-hosted web page change monitoring" /> [<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring" title="Self-hosted web page change monitoring" />](https://lemonade.changedetection.io/start)
**Get your own private instance now! Let us host it for you!** **Get your own private instance now! Let us host it for you!**
@@ -163,9 +163,9 @@ See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configura
Raspberry Pi and linux/arm/v6 linux/arm/v7 arm64 devices are supported! See the wiki for [details](https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver) Raspberry Pi and linux/arm/v6 linux/arm/v7 arm64 devices are supported! See the wiki for [details](https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver)
## Windows native support? ## Windows support?
Sorry not yet :( https://github.com/dgtlmoon/changedetection.io/labels/windows YES! See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/Microsoft-Windows
## Support us ## Support us
+6 -105
View File
@@ -1,110 +1,11 @@
#!/usr/bin/python3 #!/usr/bin/python3
# Launch as a eventlet.wsgi server instance. # Entry-point for running from the CLI when not installed via Pip, Pip will handle the console_scripts entry_points's from setup.py
# It's recommended to use `pip3 install changedetection.io` and start with `changedetection.py` instead, it will be linkd to your global path.
import getopt # or Docker.
import os # Read more https://github.com/dgtlmoon/changedetection.io/wiki
import sys
import eventlet
import eventlet.wsgi
import changedetectionio
from changedetectionio import store
def main():
ssl_mode = False
host = ''
port = os.environ.get('PORT') or 5000
do_cleanup = False
# Must be absolute so that send_from_directory doesnt try to make it relative to backend/
datastore_path = os.path.join(os.getcwd(), "datastore")
try:
opts, args = getopt.getopt(sys.argv[1:], "Ccsd:h:p:", "port")
except getopt.GetoptError:
print('backend.py -s SSL enable -h [host] -p [port] -d [datastore path]')
sys.exit(2)
create_datastore_dir = False
for opt, arg in opts:
# if opt == '--purge':
# Remove history, the actual files you need to delete manually.
# for uuid, watch in datastore.data['watching'].items():
# watch.update({'history': {}, 'last_checked': 0, 'last_changed': 0, 'previous_md5': None})
if opt == '-s':
ssl_mode = True
if opt == '-h':
host = arg
if opt == '-p':
port = int(arg)
if opt == '-d':
datastore_path = arg
# Cleanup (remove text files that arent in the index)
if opt == '-c':
do_cleanup = True
# Create the datadir if it doesnt exist
if opt == '-C':
create_datastore_dir = True
# isnt there some @thingy to attach to each route to tell it, that this route needs a datastore
app_config = {'datastore_path': datastore_path}
if not os.path.isdir(app_config['datastore_path']):
if create_datastore_dir:
os.mkdir(app_config['datastore_path'])
else:
print ("ERROR: Directory path for the datastore '{}' does not exist, cannot start, please make sure the directory exists.\n"
"Alternatively, use the -C parameter.".format(app_config['datastore_path']),file=sys.stderr)
sys.exit(2)
datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], version_tag=changedetectionio.__version__)
app = changedetectionio.changedetection_app(app_config, datastore)
# Go into cleanup mode
if do_cleanup:
datastore.remove_unused_snapshots()
app.config['datastore_path'] = datastore_path
@app.context_processor
def inject_version():
return dict(right_sticky="v{}".format(datastore.data['version_tag']),
new_version_available=app.config['NEW_VERSION_AVAILABLE'],
has_password=datastore.data['settings']['application']['password'] != False
)
# Proxy sub-directory support
# Set environment var USE_X_SETTINGS=1 on this script
# And then in your proxy_pass settings
#
# proxy_set_header Host "localhost";
# proxy_set_header X-Forwarded-Prefix /app;
if os.getenv('USE_X_SETTINGS'):
print ("USE_X_SETTINGS is ENABLED\n")
from werkzeug.middleware.proxy_fix import ProxyFix
app.wsgi_app = ProxyFix(app.wsgi_app, x_prefix=1, x_host=1)
if ssl_mode:
# @todo finalise SSL config, but this should get you in the right direction if you need it.
eventlet.wsgi.server(eventlet.wrap_ssl(eventlet.listen((host, port)),
certfile='cert.pem',
keyfile='privkey.pem',
server_side=True), app)
else:
eventlet.wsgi.server(eventlet.listen((host, int(port))), app)
from changedetectionio import changedetection
if __name__ == '__main__': if __name__ == '__main__':
main() changedetection.main()
+1
View File
@@ -0,0 +1 @@
test-datastore
+31 -16
View File
@@ -35,10 +35,11 @@ from flask import (
url_for, url_for,
) )
from flask_login import login_required from flask_login import login_required
from flask_wtf import CSRFProtect
from changedetectionio import html_tools from changedetectionio import html_tools
__version__ = '0.39.10' __version__ = '0.39.11'
datastore = None datastore = None
@@ -52,11 +53,10 @@ update_q = queue.Queue()
notification_q = queue.Queue() notification_q = queue.Queue()
# Needs to be set this way because we also build and publish via pip
base_path = os.path.dirname(os.path.realpath(__file__))
app = Flask(__name__, app = Flask(__name__,
static_url_path="{}/static".format(base_path), static_url_path="",
template_folder="{}/templates".format(base_path)) static_folder="static",
template_folder="templates")
# Stop browser caching of assets # Stop browser caching of assets
app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0 app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0
@@ -72,6 +72,9 @@ app.config['LOGIN_DISABLED'] = False
# Disables caching of the templates # Disables caching of the templates
app.config['TEMPLATES_AUTO_RELOAD'] = True app.config['TEMPLATES_AUTO_RELOAD'] = True
csrf = CSRFProtect()
csrf.init_app(app)
notification_debug_log=[] notification_debug_log=[]
def init_app_secret(datastore_path): def init_app_secret(datastore_path):
@@ -269,7 +272,7 @@ def changedetection_app(config=None, datastore_o=None):
@app.route("/rss", methods=['GET']) @app.route("/rss", methods=['GET'])
@login_required @login_required
def rss(): def rss():
from . import diff
limit_tag = request.args.get('tag') limit_tag = request.args.get('tag')
# Sort by last_changed and add the uuid which is usually the key.. # Sort by last_changed and add the uuid which is usually the key..
@@ -298,6 +301,15 @@ def changedetection_app(config=None, datastore_o=None):
fg.link(href='https://changedetection.io') fg.link(href='https://changedetection.io')
for watch in sorted_watches: for watch in sorted_watches:
dates = list(watch['history'].keys())
# Convert to int, sort and back to str again
# @todo replace datastore getter that does this automatically
dates = [int(i) for i in dates]
dates.sort(reverse=True)
dates = [str(i) for i in dates]
prev_fname = watch['history'][dates[1]]
if not watch['viewed']: if not watch['viewed']:
# Re #239 - GUID needs to be individual for each event # Re #239 - GUID needs to be individual for each event
# @todo In the future make this a configurable link back (see work on BASE_URL https://github.com/dgtlmoon/changedetection.io/pull/228) # @todo In the future make this a configurable link back (see work on BASE_URL https://github.com/dgtlmoon/changedetection.io/pull/228)
@@ -313,12 +325,16 @@ def changedetection_app(config=None, datastore_o=None):
diff_link = {'href': "{}{}".format(base_url, url_for('diff_history_page', uuid=watch['uuid']))} diff_link = {'href': "{}{}".format(base_url, url_for('diff_history_page', uuid=watch['uuid']))}
# @todo use title if it exists
fe.link(link=diff_link) fe.link(link=diff_link)
fe.title(title=watch['url'])
# @todo in the future <description><![CDATA[<html><body>Any code html is valid.</body></html>]]></description> # @todo watch should be a getter - watch.get('title') (internally if URL else..)
fe.description(description=watch['url'])
watch_title = watch.get('title') if watch.get('title') else watch.get('url')
fe.title(title=watch_title)
latest_fname = watch['history'][dates[0]]
html_diff = diff.render_diff(prev_fname, latest_fname, include_equal=False, line_feed_sep="</br>")
fe.description(description="<![CDATA[<html><body><h4>{}</h4>{}</body></html>".format(watch_title, html_diff))
fe.guid(guid, permalink=False) fe.guid(guid, permalink=False)
dt = datetime.datetime.fromtimestamp(int(watch['newest_history_key'])) dt = datetime.datetime.fromtimestamp(int(watch['newest_history_key']))
@@ -504,13 +520,13 @@ def changedetection_app(config=None, datastore_o=None):
'headers': form.headers.data, 'headers': form.headers.data,
'body': form.body.data, 'body': form.body.data,
'method': form.method.data, 'method': form.method.data,
'ignore_status_codes': form.ignore_status_codes.data,
'fetch_backend': form.fetch_backend.data, 'fetch_backend': form.fetch_backend.data,
'trigger_text': form.trigger_text.data, 'trigger_text': form.trigger_text.data,
'notification_title': form.notification_title.data, 'notification_title': form.notification_title.data,
'notification_body': form.notification_body.data, 'notification_body': form.notification_body.data,
'notification_format': form.notification_format.data, 'notification_format': form.notification_format.data,
'extract_title_as_title': form.extract_title_as_title.data 'extract_title_as_title': form.extract_title_as_title.data,
} }
# Notification URLs # Notification URLs
@@ -610,16 +626,15 @@ def changedetection_app(config=None, datastore_o=None):
form.notification_format.data = datastore.data['settings']['application']['notification_format'] form.notification_format.data = datastore.data['settings']['application']['notification_format']
form.base_url.data = datastore.data['settings']['application']['base_url'] form.base_url.data = datastore.data['settings']['application']['base_url']
# Password unset is a GET, but we can lock the session to always need the password if request.method == 'POST' and form.data.get('removepassword_button') == True:
if not os.getenv("SALTED_PASS", False) and request.values.get('removepassword') == 'yes': # Password unset is a GET, but we can lock the session to a salted env password to always need the password
from pathlib import Path if not os.getenv("SALTED_PASS", False):
datastore.data['settings']['application']['password'] = False datastore.data['settings']['application']['password'] = False
flash("Password protection removed.", 'notice') flash("Password protection removed.", 'notice')
flask_login.logout_user() flask_login.logout_user()
return redirect(url_for('settings_page')) return redirect(url_for('settings_page'))
if request.method == 'POST' and form.validate(): if request.method == 'POST' and form.validate():
datastore.data['settings']['application']['notification_urls'] = form.notification_urls.data datastore.data['settings']['application']['notification_urls'] = form.notification_urls.data
datastore.data['settings']['requests']['minutes_between_check'] = form.minutes_between_check.data datastore.data['settings']['requests']['minutes_between_check'] = form.minutes_between_check.data
datastore.data['settings']['application']['extract_title_as_title'] = form.extract_title_as_title.data datastore.data['settings']['application']['extract_title_as_title'] = form.extract_title_as_title.data
+114
View File
@@ -0,0 +1,114 @@
#!/usr/bin/python3
# Launch as a eventlet.wsgi server instance.
import getopt
import os
import sys
import eventlet
import eventlet.wsgi
from . import store, changedetection_app
from . import __version__
def main():
ssl_mode = False
host = ''
port = os.environ.get('PORT') or 5000
do_cleanup = False
datastore_path = None
# On Windows, create and use a default path.
if os.name == 'nt':
datastore_path = os.path.expandvars(r'%APPDATA%\changedetection.io')
os.makedirs(datastore_path, exist_ok=True)
else:
# Must be absolute so that send_from_directory doesnt try to make it relative to backend/
datastore_path = os.path.join(os.getcwd(), "../datastore")
try:
opts, args = getopt.getopt(sys.argv[1:], "Ccsd:h:p:", "port")
except getopt.GetoptError:
print('backend.py -s SSL enable -h [host] -p [port] -d [datastore path]')
sys.exit(2)
create_datastore_dir = False
for opt, arg in opts:
# if opt == '--purge':
# Remove history, the actual files you need to delete manually.
# for uuid, watch in datastore.data['watching'].items():
# watch.update({'history': {}, 'last_checked': 0, 'last_changed': 0, 'previous_md5': None})
if opt == '-s':
ssl_mode = True
if opt == '-h':
host = arg
if opt == '-p':
port = int(arg)
if opt == '-d':
datastore_path = arg
# Cleanup (remove text files that arent in the index)
if opt == '-c':
do_cleanup = True
# Create the datadir if it doesnt exist
if opt == '-C':
create_datastore_dir = True
# isnt there some @thingy to attach to each route to tell it, that this route needs a datastore
app_config = {'datastore_path': datastore_path}
if not os.path.isdir(app_config['datastore_path']):
if create_datastore_dir:
os.mkdir(app_config['datastore_path'])
else:
print(
"ERROR: Directory path for the datastore '{}' does not exist, cannot start, please make sure the directory exists or specify a directory with the -d option.\n"
"Or use the -C parameter to create the directory.".format(app_config['datastore_path']), file=sys.stderr)
sys.exit(2)
datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], version_tag=__version__)
app = changedetection_app(app_config, datastore)
# Go into cleanup mode
if do_cleanup:
datastore.remove_unused_snapshots()
app.config['datastore_path'] = datastore_path
@app.context_processor
def inject_version():
return dict(right_sticky="v{}".format(datastore.data['version_tag']),
new_version_available=app.config['NEW_VERSION_AVAILABLE'],
has_password=datastore.data['settings']['application']['password'] != False
)
# Proxy sub-directory support
# Set environment var USE_X_SETTINGS=1 on this script
# And then in your proxy_pass settings
#
# proxy_set_header Host "localhost";
# proxy_set_header X-Forwarded-Prefix /app;
if os.getenv('USE_X_SETTINGS'):
print ("USE_X_SETTINGS is ENABLED\n")
from werkzeug.middleware.proxy_fix import ProxyFix
app.wsgi_app = ProxyFix(app.wsgi_app, x_prefix=1, x_host=1)
if ssl_mode:
# @todo finalise SSL config, but this should get you in the right direction if you need it.
eventlet.wsgi.server(eventlet.wrap_ssl(eventlet.listen((host, port)),
certfile='cert.pem',
keyfile='privkey.pem',
server_side=True), app)
else:
eventlet.wsgi.server(eventlet.listen((host, int(port))), app)
+36 -12
View File
@@ -1,10 +1,12 @@
import os
import time
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
import chardet
import os
from selenium import webdriver from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.common.proxy import Proxy as SeleniumProxy from selenium.webdriver.common.proxy import Proxy as SeleniumProxy
from selenium.common.exceptions import WebDriverException from selenium.common.exceptions import WebDriverException
import requests
import time
import urllib3.exceptions import urllib3.exceptions
@@ -20,7 +22,7 @@ class EmptyReply(Exception):
class Fetcher(): class Fetcher():
error = None error = None
status_code = None status_code = None
content = None # Should always be bytes. content = None
headers = None headers = None
fetcher_description ="No description" fetcher_description ="No description"
@@ -30,7 +32,13 @@ class Fetcher():
return self.error return self.error
@abstractmethod @abstractmethod
def run(self, url, timeout, request_headers, request_body, request_method): def run(self,
url,
timeout,
request_headers,
request_body,
request_method,
ignore_status_codes=False):
# Should set self.error, self.status_code and self.content # Should set self.error, self.status_code and self.content
pass pass
@@ -97,7 +105,13 @@ class html_webdriver(Fetcher):
if proxy_args: if proxy_args:
self.proxy = SeleniumProxy(raw=proxy_args) self.proxy = SeleniumProxy(raw=proxy_args)
def run(self, url, timeout, request_headers, request_body, request_method): def run(self,
url,
timeout,
request_headers,
request_body,
request_method,
ignore_status_codes=False):
# request_body, request_method unused for now, until some magic in the future happens. # request_body, request_method unused for now, until some magic in the future happens.
@@ -145,8 +159,13 @@ class html_webdriver(Fetcher):
class html_requests(Fetcher): class html_requests(Fetcher):
fetcher_description = "Basic fast Plaintext/HTTP Client" fetcher_description = "Basic fast Plaintext/HTTP Client"
def run(self, url, timeout, request_headers, request_body, request_method): def run(self,
import requests url,
timeout,
request_headers,
request_body,
request_method,
ignore_status_codes=False):
r = requests.request(method=request_method, r = requests.request(method=request_method,
data=request_body, data=request_body,
@@ -155,16 +174,21 @@ class html_requests(Fetcher):
timeout=timeout, timeout=timeout,
verify=False) verify=False)
# https://stackoverflow.com/questions/44203397/python-requests-get-returns-improperly-decoded-text-instead-of-utf-8 # If the response did not tell us what encoding format to expect, Then use chardet to override what `requests` thinks.
# Return bytes here # For example - some sites don't tell us it's utf-8, but return utf-8 content
html = r.text # This seems to not occur when using webdriver/selenium, it seems to detect the text encoding more reliably.
# https://github.com/psf/requests/issues/1604 good info about requests encoding detection
if not r.headers.get('content-type') or not 'charset=' in r.headers.get('content-type'):
encoding = chardet.detect(r.content)['encoding']
if encoding:
r.encoding = encoding
# @todo test this # @todo test this
# @todo maybe you really want to test zero-byte return pages? # @todo maybe you really want to test zero-byte return pages?
if not r or not html or not len(html): if (not ignore_status_codes and not r) or not r.content or not len(r.content):
raise EmptyReply(url=url, status_code=r.status_code) raise EmptyReply(url=url, status_code=r.status_code)
self.status_code = r.status_code self.status_code = r.status_code
self.content = html self.content = r.text
self.headers = r.headers self.headers = r.headers
+12 -3
View File
@@ -2,22 +2,31 @@
import difflib import difflib
def same_slicer(l, a, b):
if a == b:
return [l[a]]
else:
return l[a:b]
# like .compare but a little different output # like .compare but a little different output
def customSequenceMatcher(before, after, include_equal=False): def customSequenceMatcher(before, after, include_equal=False):
cruncher = difflib.SequenceMatcher(isjunk=lambda x: x in " \\t", a=before, b=after) cruncher = difflib.SequenceMatcher(isjunk=lambda x: x in " \\t", a=before, b=after)
# @todo Line-by-line mode instead of buncghed, including `after` that is not in `before` (maybe unset?)
for tag, alo, ahi, blo, bhi in cruncher.get_opcodes(): for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
if include_equal and tag == 'equal': if include_equal and tag == 'equal':
g = before[alo:ahi] g = before[alo:ahi]
yield g yield g
elif tag == 'delete': elif tag == 'delete':
g = "(removed) {}".format(before[alo]) g = ["(removed) " + i for i in same_slicer(before, alo, ahi)]
yield g yield g
elif tag == 'replace': elif tag == 'replace':
g = ["(changed) {}".format(before[alo]), "(-> into) {}".format(after[blo])] g = ["(changed) " + i for i in same_slicer(before, alo, ahi)]
g += ["(into ) " + i for i in same_slicer(after, blo, bhi)]
yield g yield g
elif tag == 'insert': elif tag == 'insert':
g = "(added) {}".format(after[blo]) g = ["(added ) " + i for i in same_slicer(after, blo, bhi)]
yield g yield g
# only_differences - only return info about the differences, no context # only_differences - only return info about the differences, no context
+11 -4
View File
@@ -1,10 +1,10 @@
import hashlib import hashlib
import os
import re import re
import time import time
import urllib3 import urllib3
from inscriptis import get_text
from inscriptis import get_text
from changedetectionio import content_fetcher, html_tools from changedetectionio import content_fetcher, html_tools
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
@@ -24,8 +24,14 @@ class perform_site_check():
stripped_text_from_html = "" stripped_text_from_html = ""
watch = self.datastore.data['watching'][uuid] watch = self.datastore.data['watching'][uuid]
# Unset any existing notification error
# Protect against file:// access
if re.search(r'^file', watch['url'], re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False):
raise Exception(
"file:// type access is denied for security reasons."
)
# Unset any existing notification error
update_obj = {'last_notification_error': False, 'last_error': False} update_obj = {'last_notification_error': False, 'last_error': False}
extra_headers = self.datastore.get_val(uuid, 'headers') extra_headers = self.datastore.get_val(uuid, 'headers')
@@ -47,6 +53,7 @@ class perform_site_check():
url = self.datastore.get_val(uuid, 'url') url = self.datastore.get_val(uuid, 'url')
request_body = self.datastore.get_val(uuid, 'body') request_body = self.datastore.get_val(uuid, 'body')
request_method = self.datastore.get_val(uuid, 'method') request_method = self.datastore.get_val(uuid, 'method')
ignore_status_code = self.datastore.get_val(uuid, 'ignore_status_codes')
# Pluggable content fetcher # Pluggable content fetcher
prefer_backend = watch['fetch_backend'] prefer_backend = watch['fetch_backend']
@@ -58,7 +65,7 @@ class perform_site_check():
fetcher = klass() fetcher = klass()
fetcher.run(url, timeout, request_headers, request_body, request_method) fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_code)
# Fetching complete, now filters # Fetching complete, now filters
# @todo move to class / maybe inside of fetcher abstract base? # @todo move to class / maybe inside of fetcher abstract base?
+4 -1
View File
@@ -325,6 +325,7 @@ class watchForm(commonSettingsForm):
headers = StringDictKeyValue('Request Headers') headers = StringDictKeyValue('Request Headers')
body = TextAreaField('Request Body', [validators.Optional()]) body = TextAreaField('Request Body', [validators.Optional()])
method = SelectField('Request Method', choices=valid_method, default=default_method) method = SelectField('Request Method', choices=valid_method, default=default_method)
ignore_status_codes = BooleanField('Ignore Status Codes (process non-2xx status codes as normal)', default=False)
trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()]) trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"}) save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
@@ -350,6 +351,8 @@ class globalSettingsForm(commonSettingsForm):
[validators.NumberRange(min=1)]) [validators.NumberRange(min=1)])
extract_title_as_title = BooleanField('Extract <title> from document and use as watch title') extract_title_as_title = BooleanField('Extract <title> from document and use as watch title')
base_url = StringField('Base URL', validators=[validators.Optional()]) base_url = StringField('Base URL', validators=[validators.Optional()])
global_subtractive_selectors = StringListField('Ignore elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)]) global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()]) global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()])
ignore_whitespace = BooleanField('Ignore whitespace') ignore_whitespace = BooleanField('Ignore whitespace')
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
+17 -5
View File
@@ -37,6 +37,9 @@ section.content {
align-items: center; align-items: center;
justify-content: center; } justify-content: center; }
code {
background: #eee; }
/* table related */ /* table related */
.watch-table { .watch-table {
width: 100%; width: 100%;
@@ -241,7 +244,7 @@ footer {
.sticky-tab { .sticky-tab {
position: absolute; position: absolute;
top: 60px; top: 60px;
font-size: 8px; font-size: 65%;
background: #fff; background: #fff;
padding: 10px; } padding: 10px; }
.sticky-tab#left-sticky { .sticky-tab#left-sticky {
@@ -307,14 +310,23 @@ footer {
#nav-menu { #nav-menu {
overflow-x: scroll; } } overflow-x: scroll; } }
/* @media only screen and (max-width: 760px), (min-device-width: 768px) and (max-device-width: 800px) {
div.sticky-tab#hosted-sticky {
top: 60px;
left: 0px;
right: auto; }
section.content {
padding-top: 110px; }
div.tabs ul li {
display: block;
border-radius: 0px; }
input[type='text'] {
width: 100%; }
/*
Max width before this PARTICULAR table gets nasty Max width before this PARTICULAR table gets nasty
This query will take effect for any screen smaller than 760px This query will take effect for any screen smaller than 760px
and also iPads specifically. and also iPads specifically.
*/ */
@media only screen and (max-width: 760px), (min-device-width: 768px) and (max-device-width: 1024px) {
input[type='text'] {
width: 100%; }
.watch-table { .watch-table {
/* Force table to not be like tables anymore */ /* Force table to not be like tables anymore */
/* Force table to not be like tables anymore */ /* Force table to not be like tables anymore */
+29 -8
View File
@@ -42,6 +42,10 @@ section.content {
justify-content: center; justify-content: center;
} }
code {
background: #eee;
}
/* table related */ /* table related */
.watch-table { .watch-table {
width: 100%; width: 100%;
@@ -318,7 +322,7 @@ footer {
.sticky-tab { .sticky-tab {
position: absolute; position: absolute;
top: 60px; top: 60px;
font-size: 8px; font-size: 65%;
background: #fff; background: #fff;
padding: 10px; padding: 10px;
&#left-sticky { &#left-sticky {
@@ -418,18 +422,35 @@ footer {
} }
} }
@media only screen and (max-width: 760px), (min-device-width: 768px) and (max-device-width: 800px) {
div.sticky-tab#hosted-sticky {
top: 60px;
left: 0px;
right: auto;
}
section.content {
padding-top: 110px;
}
// Make the tabs easier to hit, they will be all nice and horizontal
div.tabs ul li {
display: block;
border-radius: 0px;
}
input[type='text'] {
width: 100%;
}
/* /*
Max width before this PARTICULAR table gets nasty Max width before this PARTICULAR table gets nasty
This query will take effect for any screen smaller than 760px This query will take effect for any screen smaller than 760px
and also iPads specifically. and also iPads specifically.
*/ */
@media only screen and (max-width: 760px), (min-device-width: 768px) and (max-device-width: 1024px) {
input[type='text'] {
width: 100%;
}
.watch-table { .watch-table {
/* Force table to not be like tables anymore */ /* Force table to not be like tables anymore */
thead, tbody, th, td, tr { thead, tbody, th, td, tr {
+1 -1
View File
@@ -400,7 +400,7 @@ class ChangeDetectionStore:
# system was out of memory, out of RAM etc # system was out of memory, out of RAM etc
with open(self.json_store_path+".tmp", 'w') as json_file: with open(self.json_store_path+".tmp", 'w') as json_file:
json.dump(data, json_file, indent=4) json.dump(data, json_file, indent=4)
os.rename(self.json_store_path+".tmp", self.json_store_path) os.replace(self.json_store_path+".tmp", self.json_store_path)
except Exception as e: except Exception as e:
logging.error("Error writing JSON!! (Main JSON file save was skipped) : %s", str(e)) logging.error("Error writing JSON!! (Main JSON file save was skipped) : %s", str(e))
+9 -5
View File
@@ -19,6 +19,7 @@
<div class="box-wrap inner"> <div class="box-wrap inner">
<form class="pure-form pure-form-stacked" <form class="pure-form pure-form-stacked"
action="{{ url_for('edit_page', uuid=uuid, next = request.args.get('next') ) }}" method="POST"> action="{{ url_for('edit_page', uuid=uuid, next = request.args.get('next') ) }}" method="POST">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
<div class="tab-pane-inner" id="general"> <div class="tab-pane-inner" id="general">
<fieldset> <fieldset>
@@ -80,6 +81,9 @@ User-Agent: wonderbra 1.0") }}
\"car\":null \"car\":null
}") }} }") }}
</div> </div>
<div>
{{ render_field(form.ignore_status_codes) }}
</div>
</fieldset> </fieldset>
<br/> <br/>
</div> </div>
@@ -113,9 +117,9 @@ User-Agent: wonderbra 1.0") }}
<span class="pure-form-message-inline"> <span class="pure-form-message-inline">
<ul> <ul>
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li> <li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
<li>JSON - Limit text to this JSON rule, using <a href="https://pypi.org/project/jsonpath-ng/">JSONPath</a>, prefix with <b>"json:"</b>, <a <li>JSON - Limit text to this JSON rule, using <a href="https://pypi.org/project/jsonpath-ng/">JSONPath</a>, prefix with <code>"json:"</code>, use <code>json:$</code> to force re-formatting if required, <a
href="https://jsonpath.com/" target="new">test your JSONPath here</a></li> href="https://jsonpath.com/" target="new">test your JSONPath here</a></li>
<li>XPath - Limit text to this XPath rule, simply start with a forward-slash, example <b>//*[contains(@class, 'sametext')]</b>, <a <li>XPath - Limit text to this XPath rule, simply start with a forward-slash, example <code>//*[contains(@class, 'sametext')]</code>, <a
href="http://xpather.com/" target="new">test your XPath here</a></li> href="http://xpather.com/" target="new">test your XPath here</a></li>
</ul> </ul>
Please be sure that you thoroughly understand how to write CSS or JSONPath, XPath selector rules before filing an issue on GitHub! <a Please be sure that you thoroughly understand how to write CSS or JSONPath, XPath selector rules before filing an issue on GitHub! <a
@@ -142,7 +146,7 @@ nav
<span class="pure-form-message-inline"> <span class="pure-form-message-inline">
<ul> <ul>
<li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li> <li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
<li>Regular Expression support, wrap the line in forward slash <b>/regex/</b></li> <li>Regular Expression support, wrap the line in forward slash <code>/regex/</code></li>
<li>Changing this will affect the comparison checksum which may trigger an alert</li> <li>Changing this will affect the comparison checksum which may trigger an alert</li>
<li>Use the preview/show current tab to see ignores</li> <li>Use the preview/show current tab to see ignores</li>
</ul> </ul>
@@ -158,8 +162,8 @@ nav
<ul> <ul>
<li>Text to wait for before triggering a change/notification, all text and regex are tested <i>case-insensitive</i>.</li> <li>Text to wait for before triggering a change/notification, all text and regex are tested <i>case-insensitive</i>.</li>
<li>Trigger text is processed from the result-text that comes out of any CSS/JSON Filters for this watch</li> <li>Trigger text is processed from the result-text that comes out of any CSS/JSON Filters for this watch</li>
<li>Each line is process separately (think of each line as "OR")</li> <li>Each line is processed separately (think of each line as "OR")</li>
<li>Note: Wrap in forward slash / to use regex example: <span style="font-family: monospace; background: #eee">/foo\d/</span></li> <li>Note: Wrap in forward slash / to use regex example: <code>/foo\d/</code></li>
</ul> </ul>
</span> </span>
</div> </div>
+1
View File
@@ -4,6 +4,7 @@
<div class="edit-form"> <div class="edit-form">
<div class="inner"> <div class="inner">
<form class="pure-form pure-form-aligned" action="{{url_for('import_page')}}" method="POST"> <form class="pure-form pure-form-aligned" action="{{url_for('import_page')}}" method="POST">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
<fieldset class="pure-group"> <fieldset class="pure-group">
<legend> <legend>
Enter one URL per line, and optionally add tags for each URL after a space, delineated by comma (,): Enter one URL per line, and optionally add tags for each URL after a space, delineated by comma (,):
+2 -1
View File
@@ -4,11 +4,12 @@
<div class="login-form"> <div class="login-form">
<div class="inner"> <div class="inner">
<form class="pure-form pure-form-stacked" action="{{url_for('login')}}" method="POST"> <form class="pure-form pure-form-stacked" action="{{url_for('login')}}" method="POST">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
<fieldset> <fieldset>
<div class="pure-control-group"> <div class="pure-control-group">
<label for="password">Password</label> <label for="password">Password</label>
<input type="password" id="password" required="" name="password" value="" <input type="password" id="password" required="" name="password" value=""
size="15"/> size="15" autofocus />
<input type="hidden" id="email" name="email" value="defaultuser@changedetection.io" /> <input type="hidden" id="email" name="email" value="defaultuser@changedetection.io" />
</div> </div>
<div class="pure-control-group"> <div class="pure-control-group">
+1
View File
@@ -4,6 +4,7 @@
<div class="edit-form"> <div class="edit-form">
<div class="box-wrap inner"> <div class="box-wrap inner">
<form class="pure-form pure-form-stacked" action="{{url_for('scrub_page')}}" method="POST"> <form class="pure-form pure-form-stacked" action="{{url_for('scrub_page')}}" method="POST">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
<fieldset> <fieldset>
<div class="pure-control-group"> <div class="pure-control-group">
This will remove all version snapshots/data, but keep your list of URLs. <br/> This will remove all version snapshots/data, but keep your list of URLs. <br/>
+7 -9
View File
@@ -1,7 +1,7 @@
{% extends 'base.html' %} {% extends 'base.html' %}
{% block content %} {% block content %}
{% from '_helpers.jinja' import render_field %} {% from '_helpers.jinja' import render_field, render_button %}
{% from '_common_fields.jinja' import render_common_settings_form %} {% from '_common_fields.jinja' import render_common_settings_form %}
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='settings.js')}}" defer></script> <script type="text/javascript" src="{{url_for('static_content', group='js', filename='settings.js')}}" defer></script>
@@ -18,6 +18,7 @@
</div> </div>
<div class="box-wrap inner"> <div class="box-wrap inner">
<form class="pure-form pure-form-stacked settings" action="{{url_for('settings_page')}}" method="POST"> <form class="pure-form pure-form-stacked settings" action="{{url_for('settings_page')}}" method="POST">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
<div class="tab-pane-inner" id="general"> <div class="tab-pane-inner" id="general">
<fieldset> <fieldset>
<div class="pure-control-group"> <div class="pure-control-group">
@@ -27,8 +28,7 @@
<div class="pure-control-group"> <div class="pure-control-group">
{% if not hide_remove_pass %} {% if not hide_remove_pass %}
{% if current_user.is_authenticated %} {% if current_user.is_authenticated %}
<a href="{{url_for('settings_page', removepassword='yes')}}" {{ render_button(form.removepassword_button) }}
class="pure-button pure-button-primary">Remove password</a>
{% else %} {% else %}
{{ render_field(form.password) }} {{ render_field(form.password) }}
<span class="pure-form-message-inline">Password protection for your changedetection.io application.</span> <span class="pure-form-message-inline">Password protection for your changedetection.io application.</span>
@@ -104,7 +104,7 @@ nav
<ul> <ul>
<li>Note: This is applied globally in addition to the per-watch rules.</li> <li>Note: This is applied globally in addition to the per-watch rules.</li>
<li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li> <li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
<li>Regular Expression support, wrap the line in forward slash <b>/regex/</b></li> <li>Regular Expression support, wrap the line in forward slash <code>/regex/</code></li>
<li>Changing this will affect the comparison checksum which may trigger an alert</li> <li>Changing this will affect the comparison checksum which may trigger an alert</li>
<li>Use the preview/show current tab to see ignores</li> <li>Use the preview/show current tab to see ignores</li>
</ul> </ul>
@@ -114,11 +114,9 @@ nav
<div id="actions"> <div id="actions">
<div class="pure-control-group"> <div class="pure-control-group">
<button type="submit" class="pure-button pure-button-primary">Save</button> {{ render_button(form.save_button) }}
<a href="{{url_for('index')}}" class="pure-button button-small button-cancel">Back</a> <a href="{{url_for('index')}}" class="pure-button button-small button-cancel">Back</a>
<a href="{{url_for('scrub_page')}}" class="pure-button button-small button-cancel">Delete <a href="{{url_for('scrub_page')}}" class="pure-button button-small button-cancel">Delete History Snapshot Data</a>
History
Snapshot Data</a>
</div> </div>
</div> </div>
</form> </form>
@@ -5,6 +5,7 @@
<div class="box"> <div class="box">
<form class="pure-form" action="{{ url_for('api_watch_add') }}" method="POST" id="new-watch-form"> <form class="pure-form" action="{{ url_for('api_watch_add') }}" method="POST" id="new-watch-form">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
<fieldset> <fieldset>
<legend>Add a new change detection watch</legend> <legend>Add a new change detection watch</legend>
{{ render_simple_field(form.url, placeholder="https://...", required=true) }} {{ render_simple_field(form.url, placeholder="https://...", required=true) }}
+3
View File
@@ -42,6 +42,9 @@ def app(request):
cleanup(app_config['datastore_path']) cleanup(app_config['datastore_path'])
datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], include_default_watches=False) datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], include_default_watches=False)
app = changedetection_app(app_config, datastore) app = changedetection_app(app_config, datastore)
# Disable CSRF while running tests
app.config['WTF_CSRF_ENABLED'] = False
app.config['STOP_THREADS'] = True app.config['STOP_THREADS'] = True
def teardown(): def teardown():
+29 -15
View File
@@ -4,8 +4,8 @@ from flask import url_for
def test_check_access_control(app, client): def test_check_access_control(app, client):
# Still doesnt work, but this is closer. # Still doesnt work, but this is closer.
with app.test_client() as c: with app.test_client(use_cookies=True) as c:
# Check we dont have any password protection enabled yet. # Check we don't have any password protection enabled yet.
res = c.get(url_for("settings_page")) res = c.get(url_for("settings_page"))
assert b"Remove password" not in res.data assert b"Remove password" not in res.data
@@ -46,15 +46,20 @@ def test_check_access_control(app, client):
assert b"BACKUP" in res.data assert b"BACKUP" in res.data
assert b"IMPORT" in res.data assert b"IMPORT" in res.data
assert b"LOG OUT" in res.data assert b"LOG OUT" in res.data
assert b"minutes_between_check" in res.data
assert b"fetch_backend" in res.data
# Now remove the password so other tests function, @todo this should happen before each test automatically res = c.post(
res = c.get(url_for("settings_page", removepassword="yes"), url_for("settings_page"),
follow_redirects=True) data={
assert b"Password protection removed." in res.data "minutes_between_check": 180,
"tag": "",
res = c.get(url_for("index")) "headers": "",
assert b"LOG OUT" not in res.data "fetch_backend": "html_webdriver",
"removepassword_button": "Remove password"
},
follow_redirects=True,
)
# There was a bug where saving the settings form would submit a blank password # There was a bug where saving the settings form would submit a blank password
def test_check_access_control_no_blank_password(app, client): def test_check_access_control_no_blank_password(app, client):
@@ -71,8 +76,7 @@ def test_check_access_control_no_blank_password(app, client):
data={"password": "", data={"password": "",
"minutes_between_check": 180, "minutes_between_check": 180,
'fetch_backend': "html_requests"}, 'fetch_backend': "html_requests"},
follow_redirects=True
follow_redirects=True
) )
assert b"Password protection enabled." not in res.data assert b"Password protection enabled." not in res.data
@@ -91,7 +95,8 @@ def test_check_access_no_remote_access_to_remove_password(app, client):
# Enable password check. # Enable password check.
res = c.post( res = c.post(
url_for("settings_page"), url_for("settings_page"),
data={"password": "password", "minutes_between_check": 180, data={"password": "password",
"minutes_between_check": 180,
'fetch_backend': "html_requests"}, 'fetch_backend': "html_requests"},
follow_redirects=True follow_redirects=True
) )
@@ -99,8 +104,17 @@ def test_check_access_no_remote_access_to_remove_password(app, client):
assert b"Password protection enabled." in res.data assert b"Password protection enabled." in res.data
assert b"Login" in res.data assert b"Login" in res.data
res = c.get(url_for("settings_page", removepassword="yes"), res = c.post(
follow_redirects=True) url_for("settings_page"),
data={
"minutes_between_check": 180,
"tag": "",
"headers": "",
"fetch_backend": "html_webdriver",
"removepassword_button": "Remove password"
},
follow_redirects=True,
)
assert b"Password protection removed." not in res.data assert b"Password protection removed." not in res.data
res = c.get(url_for("index"), res = c.get(url_for("index"),
+6
View File
@@ -25,6 +25,7 @@ def test_check_basic_change_detection_functionality(client, live_server):
data={"urls": url_for('test_endpoint', _external=True)}, data={"urls": url_for('test_endpoint', _external=True)},
follow_redirects=True follow_redirects=True
) )
assert b"1 Imported" in res.data assert b"1 Imported" in res.data
time.sleep(sleep_time_for_fetch_thread) time.sleep(sleep_time_for_fetch_thread)
@@ -69,6 +70,11 @@ def test_check_basic_change_detection_functionality(client, live_server):
res = client.get(url_for("rss")) res = client.get(url_for("rss"))
expected_url = url_for('test_endpoint', _external=True) expected_url = url_for('test_endpoint', _external=True)
assert b'<rss' in res.data assert b'<rss' in res.data
# re #16 should have the diff in here too
assert b'(into ) which has this one new line' in res.data
assert b'CDATA' in res.data
assert expected_url.encode('utf-8') in res.data assert expected_url.encode('utf-8') in res.data
# Following the 'diff' link, it should no longer display as 'unviewed' even after we recheck it a few times # Following the 'diff' link, it should no longer display as 'unviewed' even after we recheck it a few times
+87
View File
@@ -0,0 +1,87 @@
#!/usr/bin/python3
# coding=utf-8
import time
from flask import url_for
from .util import live_server_setup
import pytest
def test_setup(live_server):
live_server_setup(live_server)
def set_html_response():
test_return_data = """
<html><body><span class="nav_second_img_text">
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;铸大国重器挺制造脊梁致力能源未来赋能美好生活
</span>
</body></html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
return None
# In the case the server does not issue a charset= or doesnt have content_type header set
def test_check_encoding_detection(client, live_server):
set_html_response()
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page
test_url = url_for('test_endpoint', content_type="text/html", _external=True)
client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(2)
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
# Should see the proper string
assert "铸大国重".encode('utf-8') in res.data
# Should not see the failed encoding
assert b'\xc2\xa7' not in res.data
# In the case the server does not issue a charset= or doesnt have content_type header set
def test_check_encoding_detection_missing_content_type_header(client, live_server):
set_html_response()
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(2)
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
# Should see the proper string
assert "铸大国重".encode('utf-8') in res.data
# Should not see the failed encoding
assert b'\xc2\xa7' not in res.data
@@ -1,6 +1,7 @@
#!/usr/bin/python3 #!/usr/bin/python3
import time import time
from flask import url_for from flask import url_for
from . util import live_server_setup from . util import live_server_setup
@@ -17,7 +18,9 @@ def test_error_handler(client, live_server):
time.sleep(1) time.sleep(1)
# Add our URL to the import page # Add our URL to the import page
test_url = url_for('test_endpoint_403_error', _external=True) test_url = url_for('test_endpoint',
status_code=403,
_external=True)
res = client.post( res = client.post(
url_for("import_page"), url_for("import_page"),
data={"urls": test_url}, data={"urls": test_url},
@@ -0,0 +1,190 @@
#!/usr/bin/python3
import time
from flask import url_for
from . util import live_server_setup
def test_setup(live_server):
live_server_setup(live_server)
def set_original_response():
test_return_data = """<html>
<body>
Some initial text</br>
<p>Which is across multiple lines</p>
</br>
So let's see what happens. </br>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
def set_some_changed_response():
test_return_data = """<html>
<body>
Some initial text</br>
<p>Which is across multiple lines, and a new thing too.</p>
</br>
So let's see what happens. </br>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
def test_normal_page_check_works_with_ignore_status_code(client, live_server):
sleep_time_for_fetch_thread = 3
# Give the endpoint time to spin up
time.sleep(1)
set_original_response()
# Goto the settings page, add our ignore text
res = client.post(
url_for("settings_page"),
data={
"minutes_between_check": 180,
"ignore_status_codes": "y",
'fetch_backend': "html_requests"
},
follow_redirects=True
)
assert b"Settings updated." in res.data
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
time.sleep(sleep_time_for_fetch_thread)
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
set_some_changed_response()
time.sleep(sleep_time_for_fetch_thread)
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# It should report nothing found (no new 'unviewed' class)
res = client.get(url_for("index"))
assert b'unviewed' in res.data
assert b'/test-endpoint' in res.data
# Tests the whole stack works with staus codes ignored
def test_403_page_check_works_with_ignore_status_code(client, live_server):
sleep_time_for_fetch_thread = 3
set_original_response()
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page
test_url = url_for('test_endpoint', status_code=403, _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# Goto the edit page, check our ignore option
# Add our URL to the import page
res = client.post(
url_for("edit_page", uuid="first"),
data={"ignore_status_codes": "y", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
follow_redirects=True
)
assert b"Updated watch." in res.data
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# Make a change
set_some_changed_response()
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# It should have 'unviewed' still
# Because it should be looking at only that 'sametext' id
res = client.get(url_for("index"))
assert b'unviewed' in res.data
# Tests the whole stack works with staus codes ignored
def test_403_page_check_fails_without_ignore_status_code(client, live_server):
sleep_time_for_fetch_thread = 3
set_original_response()
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page
test_url = url_for('test_endpoint', status_code=403, _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# Goto the edit page, check our ignore option
# Add our URL to the import page
res = client.post(
url_for("edit_page", uuid="first"),
data={"url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
follow_redirects=True
)
assert b"Updated watch." in res.data
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# Make a change
set_some_changed_response()
# Trigger a check
client.get(url_for("api_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# It should have 'unviewed' still
# Because it should be looking at only that 'sametext' id
res = client.get(url_for("index"))
assert b'Status Code 403' in res.data
+1 -1
View File
@@ -125,7 +125,7 @@ def test_check_notification(client, live_server):
# Diff was correctly executed # Diff was correctly executed
assert "Diff Full: Some initial text" in notification_submission assert "Diff Full: Some initial text" in notification_submission
assert "Diff: (changed) Which is across multiple lines" in notification_submission assert "Diff: (changed) Which is across multiple lines" in notification_submission
assert "(-> into) which has this one new line" in notification_submission assert "(into ) which has this one new line" in notification_submission
if env_base_url: if env_base_url:
+36
View File
@@ -0,0 +1,36 @@
from flask import url_for
from . util import set_original_response, set_modified_response, live_server_setup
import time
def test_setup(live_server):
live_server_setup(live_server)
def test_file_access(client, live_server):
res = client.post(
url_for("import_page"),
data={"urls": 'https://localhost'},
follow_redirects=True
)
assert b"1 Imported" in res.data
# Attempt to add a body with a GET method
res = client.post(
url_for("edit_page", uuid="first"),
data={
"url": 'file:///etc/passwd',
"tag": "",
"method": "GET",
"fetch_backend": "html_requests",
"body": ""},
follow_redirects=True
)
time.sleep(3)
res = client.get(
url_for("index", uuid="first"),
follow_redirects=True
)
assert b'denied for security reasons' in res.data
@@ -0,0 +1,3 @@
After twenty years, as cursed as I may be
ok
and insure that I'm one of those computer nerds.
@@ -2,5 +2,6 @@ After twenty years, as cursed as I may be
for having learned computerese, for having learned computerese,
I continue to examine bits, bytes and words I continue to examine bits, bytes and words
xok xok
next-x-ok
and insure that I'm one of those computer nerds. and insure that I'm one of those computer nerds.
and something new and something new
@@ -12,12 +12,19 @@ from changedetectionio import diff
class TestDiffBuilder(unittest.TestCase): class TestDiffBuilder(unittest.TestCase):
def test_expected_diff_output(self): def test_expected_diff_output(self):
base_dir=os.path.dirname(__file__) base_dir = os.path.dirname(__file__)
output = diff.render_diff(base_dir+"/test-content/before.txt", base_dir+"/test-content/after.txt") output = diff.render_diff(previous_file=base_dir + "/test-content/before.txt", newest_file=base_dir + "/test-content/after.txt")
output = output.split("\n") output = output.split("\n")
self.assertIn("(changed) ok", output) self.assertIn('(changed) ok', output)
self.assertIn("(-> into) xok", output) self.assertIn('(into ) xok', output)
self.assertIn("(added) and something new", output) self.assertIn('(into ) next-x-ok', output)
self.assertIn('(added ) and something new', output)
output = diff.render_diff(previous_file=base_dir + "/test-content/before.txt", newest_file=base_dir + "/test-content/after-2.txt")
output = output.split("\n")
self.assertIn('(removed) for having learned computerese,', output)
self.assertIn('(removed) I continue to examine bits, bytes and words', output)
# @todo test blocks of changed, blocks of added, blocks of removed # @todo test blocks of changed, blocks of added, blocks of removed
+9 -11
View File
@@ -38,21 +38,19 @@ def set_modified_response():
def live_server_setup(live_server): def live_server_setup(live_server):
@live_server.app.route('/test-endpoint') @live_server.app.route('/test-endpoint')
def test_endpoint(): def test_endpoint():
ctype = request.args.get('content_type') ctype = request.args.get('content_type')
status_code = request.args.get('status_code')
# Tried using a global var here but didn't seem to work, so reading from a file instead. try:
with open("test-datastore/endpoint-content.txt", "r") as f: # Tried using a global var here but didn't seem to work, so reading from a file instead.
resp = make_response(f.read()) with open("test-datastore/endpoint-content.txt", "r") as f:
resp.headers['Content-Type'] = ctype if ctype else 'text/html' resp = make_response(f.read(), status_code)
return resp resp.headers['Content-Type'] = ctype if ctype else 'text/html'
return resp
@live_server.app.route('/test-403') except FileNotFoundError:
def test_endpoint_403_error(): return make_response('', status_code)
resp = make_response('', 403)
return resp
# Just return the headers in the request # Just return the headers in the request
@live_server.app.route('/test-headers') @live_server.app.route('/test-headers')
-3
View File
@@ -42,7 +42,6 @@ class update_worker(threading.Thread):
now = time.time() now = time.time()
try: try:
changed_detected, update_obj, contents = update_handler.run(uuid) changed_detected, update_obj, contents = update_handler.run(uuid)
# Re #342 # Re #342
@@ -50,8 +49,6 @@ class update_worker(threading.Thread):
# We then convert/.decode('utf-8') for the notification etc # We then convert/.decode('utf-8') for the notification etc
if not isinstance(contents, (bytes, bytearray)): if not isinstance(contents, (bytes, bytearray)):
raise Exception("Error - returned data from the fetch handler SHOULD be bytes") raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
except PermissionError as e: except PermissionError as e:
self.app.logger.error("File permission error updating", uuid, str(e)) self.app.logger.error("File permission error updating", uuid, str(e))
except content_fetcher.EmptyReply as e: except content_fetcher.EmptyReply as e:
+1 -1
View File
@@ -2,7 +2,7 @@ version: '2'
services: services:
changedetection: changedetection:
image: ghcr.io/dgtlmoon/changedetection.io image: ghcr.io/dgtlmoon/changedetection.io
container_name: changedetection.io container_name: changedetection
hostname: changedetection hostname: changedetection
volumes: volumes:
- changedetection-data:/datastore - changedetection-data:/datastore
+6 -1
View File
@@ -1,5 +1,5 @@
flask~= 2.0 flask~= 2.0
flask_wtf
eventlet>=0.31.0 eventlet>=0.31.0
validators validators
timeago ~=1.0 timeago ~=1.0
@@ -35,3 +35,8 @@ lxml
# 3.141 was missing socksVersion, 3.150 was not in pypi, so we try 4.1.0 # 3.141 was missing socksVersion, 3.150 was not in pypi, so we try 4.1.0
selenium ~= 4.1.0 selenium ~= 4.1.0
# https://stackoverflow.com/questions/71652965/importerror-cannot-import-name-safe-str-cmp-from-werkzeug-security/71653849#71653849
# ImportError: cannot import name 'safe_str_cmp' from 'werkzeug.security'
# need to revisit flask login versions
werkzeug ~= 2.0.0
+3 -3
View File
@@ -32,11 +32,11 @@ setup(
long_description_content_type='text/markdown', long_description_content_type='text/markdown',
keywords='website change monitor for changes notification change detection ' keywords='website change monitor for changes notification change detection '
'alerts tracking website tracker change alert website and monitoring', 'alerts tracking website tracker change alert website and monitoring',
zip_safe=False, entry_points={"console_scripts": ["changedetection.io=changedetectionio.changedetection:main"]},
entry_points={"console_scripts": ["changedetection.io=changedetection:main"]}, zip_safe=True,
scripts=["changedetection.py"],
author='dgtlmoon', author='dgtlmoon',
url='https://changedetection.io', url='https://changedetection.io',
scripts=['changedetection.py'],
packages=['changedetectionio'], packages=['changedetectionio'],
include_package_data=True, include_package_data=True,
install_requires=install_requires, install_requires=install_requires,