Compare commits

..

1 Commits

Author SHA1 Message Date
dgtlmoon
775ea12067 Be sure that only valid history index lines are read 2022-10-19 22:05:37 +02:00
25 changed files with 56 additions and 647 deletions

View File

@@ -1,21 +1,12 @@
name: ChangeDetection.io Container Build Test
# Triggers the workflow on push or pull request events
# This line doesnt work, even tho it is the documented one
#on: [push, pull_request]
on:
push:
paths:
- requirements.txt
- Dockerfile
pull_request:
paths:
- requirements.txt
- Dockerfile
# Changes to requirements.txt packages and Dockerfile may or may not always be compatible with arm etc, so worth testing
# @todo: some kind of path filter for requirements.txt and Dockerfile
jobs:

View File

@@ -6,7 +6,7 @@ Otherwise, it's always best to PR into the `dev` branch.
Please be sure that all new functionality has a matching test!
Use `pytest` to validate/test, you can run the existing tests as `pytest tests/test_notification.py` for example
Use `pytest` to validate/test, you can run the existing tests as `pytest tests/test_notifications.py` for example
```
pip3 install -r requirements-dev

View File

@@ -64,7 +64,6 @@ EXPOSE 5000
# The actual flask app
COPY changedetectionio /app/changedetectionio
# The eventlet server wrapper
COPY changedetection.py /app/changedetection.py

View File

@@ -184,9 +184,9 @@ When you enable a `json:` or `jq:` filter, you can even automatically extract an
`json:$.price` or `jq:.price` would give `23.50`, or you can extract the whole structure
## Proxy Configuration
## Proxy configuration
See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration , we also support using [BrightData proxy services where possible]( https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support)
See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration
## Raspberry Pi support?

View File

@@ -194,9 +194,6 @@ def changedetection_app(config=None, datastore_o=None):
watch_api.add_resource(api_v1.Watch, '/api/v1/watch/<string:uuid>',
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
watch_api.add_resource(api_v1.SystemInfo, '/api/v1/systeminfo',
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
@@ -987,6 +984,9 @@ def changedetection_app(config=None, datastore_o=None):
# create a ZipFile object
backupname = "changedetection-backup-{}.zip".format(int(time.time()))
# We only care about UUIDS from the current index file
uuids = list(datastore.data['watching'].keys())
backup_filepath = os.path.join(datastore_o.datastore_path, backupname)
with zipfile.ZipFile(backup_filepath, "w",
@@ -1002,12 +1002,12 @@ def changedetection_app(config=None, datastore_o=None):
# Add the flask app secret
zipObj.write(os.path.join(datastore_o.datastore_path, "secret.txt"), arcname="secret.txt")
# Add any data in the watch data directory.
for uuid, w in datastore.data['watching'].items():
for f in Path(w.watch_data_dir).glob('*'):
zipObj.write(f,
# Use the full path to access the file, but make the file 'relative' in the Zip.
arcname=os.path.join(f.parts[-2], f.parts[-1]),
# Add any snapshot data we find, use the full path to access the file, but make the file 'relative' in the Zip.
for txt_file_path in Path(datastore_o.datastore_path).rglob('*.txt'):
parent_p = txt_file_path.parent
if parent_p.name in uuids:
zipObj.write(txt_file_path,
arcname=str(txt_file_path).replace(datastore_o.datastore_path, ''),
compress_type=zipfile.ZIP_DEFLATED,
compresslevel=8)

View File

@@ -122,37 +122,3 @@ class CreateWatch(Resource):
return {'status': "OK"}, 200
return list, 200
class SystemInfo(Resource):
def __init__(self, **kwargs):
# datastore is a black box dependency
self.datastore = kwargs['datastore']
self.update_q = kwargs['update_q']
@auth.check_token
def get(self):
import time
overdue_watches = []
# Check all watches and report which have not been checked but should have been
for uuid, watch in self.datastore.data.get('watching', {}).items():
# see if now - last_checked is greater than the time that should have been
# this is not super accurate (maybe they just edited it) but better than nothing
t = watch.threshold_seconds()
if not t:
# Use the system wide default
t = self.datastore.threshold_seconds
time_since_check = time.time() - watch.get('last_checked')
# Allow 5 minutes of grace time before we decide it's overdue
if time_since_check - (5 * 60) > t:
overdue_watches.append(uuid)
return {
'queue_size': self.update_q.qsize(),
'overdue_watches': overdue_watches,
'uptime': round(time.time() - self.datastore.start_time, 2),
'watch_count': len(self.datastore.data.get('watching', {}))
}, 200

View File

@@ -102,14 +102,6 @@ def main():
has_password=datastore.data['settings']['application']['password'] != False
)
# Monitored websites will not receive a Referer header
# when a user clicks on an outgoing link.
@app.after_request
def hide_referrer(response):
if os.getenv("HIDE_REFERER", False):
response.headers["Referrer-Policy"] = "no-referrer"
return response
# Proxy sub-directory support
# Set environment var USE_X_SETTINGS=1 on this script
# And then in your proxy_pass settings

View File

@@ -2,14 +2,14 @@ import hashlib
import logging
import os
import re
import time
import urllib3
import difflib
from changedetectionio import content_fetcher, html_tools
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
# Some common stuff here that can be moved to a base class
# (set_proxy_from_list)
class perform_site_check():
@@ -65,9 +65,7 @@ class perform_site_check():
request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '')
timeout = self.datastore.data['settings']['requests'].get('timeout')
url = watch.link
url = watch.get('url')
request_body = self.datastore.data['watching'][uuid].get('body')
request_method = self.datastore.data['watching'][uuid].get('method')
ignore_status_codes = self.datastore.data['watching'][uuid].get('ignore_status_codes', False)
@@ -289,23 +287,8 @@ class perform_site_check():
else:
logging.debug("check_unique_lines: UUID {} had unique content".format(uuid))
if changed_detected:
if not watch.get("trigger_add", True) or not watch.get("trigger_del", True): # if we are supposed to filter any diff types
# get the diff types present in the watch
diff_types = watch.get_diff_types(text_content_before_ignored_filter)
print("Diff components found: " + str(diff_types))
# Only Additions (deletions are turned off)
if not watch["trigger_del"] and diff_types["del"] and not diff_types["add"]:
changed_detected = False
# Only Deletions (additions are turned off)
elif not watch["trigger_add"] and diff_types["add"] and not diff_types["del"]:
changed_detected = False
# Always record the new checksum and the new text
# Always record the new checksum
update_obj["previous_md5"] = fetched_md5
watch.save_previous_text(text_content_before_ignored_filter)
# On the first run of a site, watch['previous_md5'] will be None, set it the current one.
if not watch.get('previous_md5'):

View File

@@ -323,18 +323,6 @@ class ValidateCSSJSONXPATHInput(object):
except:
raise ValidationError("A system-error occurred when validating your jq expression")
class ValidateDiffFilters(object):
"""
Validates that at least one filter checkbox is selected
"""
def __init__(self, message=None):
self.message = message
def __call__(self, form, field):
if not form.trigger_add.data and not form.trigger_del.data:
message = field.gettext('At least one filter checkbox must be selected')
raise ValidationError(message)
class quickWatchForm(Form):
url = fields.URLField('URL', validators=[validateURL()])
@@ -377,8 +365,6 @@ class watchForm(commonSettingsForm):
check_unique_lines = BooleanField('Only trigger when new lines appear', default=False)
trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
text_should_not_be_present = StringListField('Block change-detection if text matches', [validators.Optional(), ValidateListRegex()])
trigger_add = BooleanField('Additions', [ValidateDiffFilters()], default=True)
trigger_del = BooleanField('Deletions', [ValidateDiffFilters()], default=True)
webdriver_js_execute_code = TextAreaField('Execute JavaScript before change detection', render_kw={"rows": "5"}, validators=[validators.Optional()])

View File

@@ -1,8 +1,6 @@
from distutils.util import strtobool
import logging
import os
import time
import uuid
import uuid as uuid_builder
from distutils.util import strtobool
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 60))
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
@@ -24,7 +22,7 @@ class model(dict):
#'newest_history_key': 0,
'title': None,
'previous_md5': False,
'uuid': str(uuid.uuid4()),
'uuid': str(uuid_builder.uuid4()),
'headers': {}, # Extra headers to send
'body': None,
'method': 'GET',
@@ -47,8 +45,6 @@ class model(dict):
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
'extract_title_as_title': False,
'check_unique_lines': False, # On change-detected, compare against all history if its something new
'trigger_add': True,
'trigger_del': True,
'proxy': None, # Preferred proxy connection
# Re #110, so then if this is set to None, we know to use the default value instead
# Requires setting to None on submit if it's the same as the default
@@ -64,7 +60,7 @@ class model(dict):
self.update(self.__base_config)
self.__datastore_path = kw['datastore_path']
self['uuid'] = str(uuid.uuid4())
self['uuid'] = str(uuid_builder.uuid4())
del kw['datastore_path']
@@ -86,19 +82,10 @@ class model(dict):
return False
def ensure_data_dir_exists(self):
if not os.path.isdir(self.watch_data_dir):
print ("> Creating data dir {}".format(self.watch_data_dir))
os.mkdir(self.watch_data_dir)
@property
def link(self):
url = self.get('url', '')
if '{%' in url or '{{' in url:
from jinja2 import Environment
# Jinja2 available in URLs along with https://pypi.org/project/jinja2-time/
jinja2_env = Environment(extensions=['jinja2_time.TimeExtension'])
return str(jinja2_env.from_string(url).render())
return url
target_path = os.path.join(self.__datastore_path, self['uuid'])
if not os.path.isdir(target_path):
print ("> Creating data dir {}".format(target_path))
os.mkdir(target_path)
@property
def label(self):
@@ -122,39 +109,18 @@ class model(dict):
@property
def history(self):
"""History index is just a text file as a list
{watch-uuid}/history.txt
contains a list like
{epoch-time},{filename}\n
We read in this list as the history information
"""
tmp_history = {}
import logging
import time
# Read the history file as a dict
fname = os.path.join(self.watch_data_dir, "history.txt")
fname = os.path.join(self.__datastore_path, self.get('uuid'), "history.txt")
if os.path.isfile(fname):
logging.debug("Reading history index " + str(time.time()))
with open(fname, "r") as f:
for i in f.readlines():
if ',' in i:
k, v = i.strip().split(',', 2)
# The index history could contain a relative path, so we need to make the fullpath
# so that python can read it
if not '/' in v and not '\'' in v:
v = os.path.join(self.watch_data_dir, v)
else:
# It's possible that they moved the datadir on older versions
# So the snapshot exists but is in a different path
snapshot_fname = v.split('/')[-1]
proposed_new_path = os.path.join(self.watch_data_dir, snapshot_fname)
if not os.path.exists(v) and os.path.exists(proposed_new_path):
v = proposed_new_path
tmp_history[k] = v
if len(tmp_history):
@@ -166,7 +132,7 @@ class model(dict):
@property
def has_history(self):
fname = os.path.join(self.watch_data_dir, "history.txt")
fname = os.path.join(self.__datastore_path, self.get('uuid'), "history.txt")
return os.path.isfile(fname)
# Returns the newest key, but if theres only 1 record, then it's counted as not being new, so return 0.
@@ -185,19 +151,25 @@ class model(dict):
# Save some text file to the appropriate path and bump the history
# result_obj from fetch_site_status.run()
def save_history_text(self, contents, timestamp):
import uuid
import logging
output_path = os.path.join(self.__datastore_path, self['uuid'])
self.ensure_data_dir_exists()
snapshot_fname = "{}.txt".format(str(uuid.uuid4()))
snapshot_fname = os.path.join(output_path, str(uuid.uuid4()))
logging.debug("Saving history text {}".format(snapshot_fname))
# in /diff/ and /preview/ we are going to assume for now that it's UTF-8 when reading
# most sites are utf-8 and some are even broken utf-8
with open(os.path.join(self.watch_data_dir, snapshot_fname), 'wb') as f:
with open(snapshot_fname, 'wb') as f:
f.write(contents)
f.close()
# Append to index
# @todo check last char was \n
index_fname = os.path.join(self.watch_data_dir, "history.txt")
index_fname = os.path.join(output_path, "history.txt")
with open(index_fname, 'a') as f:
f.write("{},{}\n".format(timestamp, snapshot_fname))
f.close()
@@ -208,35 +180,6 @@ class model(dict):
# @todo bump static cache of the last timestamp so we dont need to examine the file to set a proper ''viewed'' status
return snapshot_fname
# Save previous text snapshot for diffing - used for calculating additions and deletions
def save_previous_text(self, contents):
import logging
output_path = os.path.join(self.__datastore_path, self['uuid'])
# Incase the operator deleted it, check and create.
self.ensure_data_dir_exists()
snapshot_fname = os.path.join(self.watch_data_dir, "previous.txt")
logging.debug("Saving previous text {}".format(snapshot_fname))
with open(snapshot_fname, 'wb') as f:
f.write(contents)
return snapshot_fname
# Get previous text snapshot for diffing - used for calculating additions and deletions
def get_previous_text(self):
snapshot_fname = os.path.join(self.watch_data_dir, "previous.txt")
if self.history_n < 1:
return ""
with open(snapshot_fname, 'rb') as f:
contents = f.read()
return contents
@property
def has_empty_checktime(self):
# using all() + dictionary comprehension
@@ -266,40 +209,15 @@ class model(dict):
# if not, something new happened
return not local_lines.issubset(existing_history)
# Get diff types (addition, deletion, modification) from the previous snapshot and new_text
# uses similar algorithm to customSequenceMatcher in diff.py
# Returns a dict of diff types and wether they are present in the diff
def get_diff_types(self, new_text):
import difflib
diff_types = {
'add': False,
'del': False,
}
# get diff types using difflib
cruncher = difflib.SequenceMatcher(isjunk=lambda x: x in " \\t", a=str(self.get_previous_text()), b=str(new_text))
for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
if tag == 'delete':
diff_types["del"] = True
elif tag == 'insert':
diff_types["add"] = True
elif tag == 'replace':
diff_types["del"] = True
diff_types["add"] = True
return diff_types
def get_screenshot(self):
fname = os.path.join(self.watch_data_dir, "last-screenshot.png")
fname = os.path.join(self.__datastore_path, self['uuid'], "last-screenshot.png")
if os.path.isfile(fname):
return fname
return False
def __get_file_ctime(self, filename):
fname = os.path.join(self.watch_data_dir, filename)
fname = os.path.join(self.__datastore_path, self['uuid'], filename)
if os.path.isfile(fname):
return int(os.path.getmtime(fname))
return False
@@ -324,14 +242,9 @@ class model(dict):
def snapshot_error_screenshot_ctime(self):
return self.__get_file_ctime('last-error-screenshot.png')
@property
def watch_data_dir(self):
# The base dir of the watch data
return os.path.join(self.__datastore_path, self['uuid'])
def get_error_text(self):
"""Return the text saved from a previous request that resulted in a non-200 error"""
fname = os.path.join(self.watch_data_dir, "last-error.txt")
fname = os.path.join(self.__datastore_path, self['uuid'], "last-error.txt")
if os.path.isfile(fname):
with open(fname, 'r') as f:
return f.read()
@@ -339,7 +252,7 @@ class model(dict):
def get_error_snapshot(self):
"""Return path to the screenshot that resulted in a non-200 error"""
fname = os.path.join(self.watch_data_dir, "last-error-screenshot.png")
fname = os.path.join(self.__datastore_path, self['uuid'], "last-error-screenshot.png")
if os.path.isfile(fname):
return fname
return False

View File

@@ -9,8 +9,6 @@
# exit when any command fails
set -e
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
find tests/test_*py -type f|while read test_name
do
echo "TEST RUNNING $test_name"
@@ -47,9 +45,7 @@ docker kill $$-test_selenium
echo "TESTING WEBDRIVER FETCH > PLAYWRIGHT/BROWSERLESS..."
# Not all platforms support playwright (not ARM/rPI), so it's not packaged in requirements.txt
PLAYWRIGHT_VERSION=$(grep -i -E "RUN pip install.+" "$SCRIPT_DIR/../Dockerfile" | grep --only-matching -i -E "playwright[=><~+]+[0-9\.]+")
echo "using $PLAYWRIGHT_VERSION"
pip3 install "$PLAYWRIGHT_VERSION"
pip3 install playwright~=1.24
docker run -d --name $$-test_browserless -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm -p 3000:3000 --shm-size="2g" browserless/chrome:1.53-chrome-stable
# takes a while to spin up
sleep 5

View File

@@ -156,7 +156,7 @@ body:after, body:before {
.fetch-error {
padding-top: 1em;
font-size: 80%;
font-size: 60%;
max-width: 400px;
display: block;
}
@@ -803,4 +803,4 @@ ul {
padding: 0.5rem;
border-radius: 5px;
color: #ff3300;
}
}

View File

@@ -30,14 +30,14 @@ class ChangeDetectionStore:
def __init__(self, datastore_path="/datastore", include_default_watches=True, version_tag="0.0.0"):
# Should only be active for docker
# logging.basicConfig(filename='/dev/stdout', level=logging.INFO)
self.__data = App.model()
self.needs_write = False
self.datastore_path = datastore_path
self.json_store_path = "{}/url-watches.json".format(self.datastore_path)
self.needs_write = False
self.proxy_list = None
self.start_time = time.time()
self.stop_thread = False
self.__data = App.model()
# Base definition for all watchers
# deepcopy part of #569 - not sure why its needed exactly
self.generic_definition = deepcopy(Watch.model(datastore_path = datastore_path, default={}))
@@ -548,10 +548,6 @@ class ChangeDetectionStore:
# `last_changed` not needed, we pull that information from the history.txt index
def update_4(self):
for uuid, watch in self.data['watching'].items():
# Be sure it's recalculated
p = watch.history
if watch.history_n < 2:
watch['last_changed'] = 0
try:
# Remove it from the struct
del(watch['last_changed'])
@@ -587,23 +583,3 @@ class ChangeDetectionStore:
for v in ['User-Agent', 'Accept', 'Accept-Encoding', 'Accept-Language']:
if self.data['settings']['headers'].get(v):
del self.data['settings']['headers'][v]
# Generate a previous.txt for all watches that do not have one and contain history
def update_8(self):
for uuid, watch in self.data['watching'].items():
# Make sure we actually have history
if (watch.history_n == 0):
continue
latest_file_name = watch.history[watch.newest_history_key]
# Check if the previous.txt exists
if not os.path.exists(os.path.join(watch.watch_data_dir, "previous.txt")):
# Generate a previous.txt
with open(os.path.join(watch.watch_data_dir, "previous.txt"), "wb") as f:
# Fill it with the latest history
latest_file_name = watch.history[watch.newest_history_key]
with open(latest_file_name, "rb") as f2:
f.write(f2.read())

View File

@@ -40,8 +40,7 @@
<fieldset>
<div class="pure-control-group">
{{ render_field(form.url, placeholder="https://...", required=true, class="m-d") }}
<span class="pure-form-message-inline">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></span><br/>
<span class="pure-form-message-inline">You can use variables in the URL, perfect for inserting the current date and other logic, <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a></span><br/>
<span class="pure-form-message-inline">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></span>
</div>
<div class="pure-control-group">
{{ render_field(form.title, class="m-d") }}
@@ -173,16 +172,6 @@ User-Agent: wonderbra 1.0") }}
<span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span>
</div>
</fieldset>
<fieldset>
<div class="pure-control-group">
<label for="trigger-type">Filter and restrict change detection of content to</label>
{{ render_checkbox_field(form.trigger_add, class="trigger-type") }}
{{ render_checkbox_field(form.trigger_del, class="trigger-type") }}
<span class="pure-form-message-inline">
Filters the change-detection of this watch to only this type of content change. <strong>Replacements</strong> (neither additions nor deletions) are always included. The 'diff' will still include all changes.
</span>
</div>
</fieldset>
<div class="pure-control-group">
{% set field = render_field(form.css_filter,
placeholder=".class-name or #some-id, or other CSS selector rule.",

View File

@@ -87,7 +87,7 @@
<a class="state-{{'on' if watch.notification_muted}}" href="{{url_for('index', op='mute', uuid=watch.uuid, tag=active_tag)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notifications" title="Mute notifications"/></a>
</td>
<td class="title-col inline">{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}}
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"></a>
<a class="external" target="_blank" rel="noopener" href="{{ watch.url.replace('source:','') }}"></a>
<a href="{{url_for('form_share_put_watch', uuid=watch.uuid)}}"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread.svg')}}" /></a>
{%if watch.fetch_backend == "html_webdriver" %}<img style="height: 1em; display:inline-block;" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" />{% endif %}

View File

@@ -147,16 +147,6 @@ def test_api_simple(client, live_server):
# @todo how to handle None/default global values?
assert watch['history_n'] == 2, "Found replacement history section, which is in its own API"
# basic systeminfo check
res = client.get(
url_for("systeminfo"),
headers={'x-api-key': api_key},
)
info = json.loads(res.data)
assert info.get('watch_count') == 1
assert info.get('uptime') > 0.5
# Finally delete the watch
res = client.delete(
url_for("watch", uuid=watch_uuid),

View File

@@ -1,31 +1,18 @@
#!/usr/bin/python3
from .util import set_original_response, set_modified_response, live_server_setup
import time
from flask import url_for
from urllib.request import urlopen
from zipfile import ZipFile
import re
import time
from . util import set_original_response, set_modified_response, live_server_setup
def test_backup(client, live_server):
live_server_setup(live_server)
set_original_response()
live_server_setup(live_server)
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page
res = client.post(
url_for("import_page"),
data={"urls": url_for('test_endpoint', _external=True)},
follow_redirects=True
)
assert b"1 Imported" in res.data
time.sleep(3)
res = client.get(
url_for("get_backup"),
follow_redirects=True
@@ -33,19 +20,6 @@ def test_backup(client, live_server):
# Should get the right zip content type
assert res.content_type == "application/zip"
# Should be PK/ZIP stream
assert res.data.count(b'PK') >= 2
# ZipFile from buffer seems non-obvious, just save it instead
with open("download.zip", 'wb') as f:
f.write(res.data)
zip = ZipFile('download.zip')
l = zip.namelist()
uuid4hex = re.compile('^[a-f0-9]{8}-?[a-f0-9]{4}-?4[a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}.*txt', re.I)
newlist = list(filter(uuid4hex.match, l)) # Read Note below
# Should be three txt files in the archive (history and the snapshot)
assert len(newlist) == 3

View File

@@ -1,107 +0,0 @@
#!/usr/bin/python3
# @NOTE: THIS RELIES ON SOME MIDDLEWARE TO MAKE CHECKBOXES WORK WITH WTFORMS UNDER TEST CONDITION, see changedetectionio/tests/util.py
import time
from flask import url_for
from .util import live_server_setup
def set_original_response():
test_return_data = """
Here
is
some
text
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
def set_response_with_deleted_word():
test_return_data = """
Here
is
text
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
def set_response_with_changed_word():
test_return_data = """
Here
ix
some
text
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
def test_diff_filter_changes_as_add_delete(client, live_server):
live_server_setup(live_server)
sleep_time_for_fetch_thread = 3
set_original_response()
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
# Wait for it to read the original version
time.sleep(sleep_time_for_fetch_thread)
# Make a change that ONLY includes deletes
set_response_with_deleted_word()
res = client.post(
url_for("edit_page", uuid="first"),
data={"trigger_add": "y",
"trigger_del": "n",
"url": test_url,
"fetch_backend": "html_requests"},
follow_redirects=True
)
assert b"Updated watch." in res.data
time.sleep(sleep_time_for_fetch_thread)
# We should NOT see a change because we chose to not know about any Deletions
res = client.get(url_for("index"))
assert b'unviewed' not in res.data
# Recheck to be sure
client.get(url_for("form_watch_checknow"), follow_redirects=True)
time.sleep(sleep_time_for_fetch_thread)
res = client.get(url_for("index"))
assert b'unviewed' not in res.data
# Now set the original response, which will include the word, which should trigger Added (because trigger_add ==y)
set_original_response()
client.get(url_for("form_watch_checknow"), follow_redirects=True)
time.sleep(sleep_time_for_fetch_thread)
res = client.get(url_for("index"))
assert b'unviewed' in res.data
# Now check 'changes' are always going to be triggered
set_original_response()
client.post(
url_for("edit_page", uuid="first"),
# Neither trigger add nor del? then we should see changes still
data={"trigger_add": "n",
"trigger_del": "n",
"url": test_url,
"fetch_backend": "html_requests"},
follow_redirects=True
)
time.sleep(sleep_time_for_fetch_thread)
client.get(url_for("mark_all_viewed"), follow_redirects=True)
set_response_with_changed_word()
client.get(url_for("form_watch_checknow"), follow_redirects=True)
time.sleep(sleep_time_for_fetch_thread)
res = client.get(url_for("index"))
assert b'unviewed' in res.data

View File

@@ -1,83 +0,0 @@
#!/usr/bin/python3
import time
from flask import url_for
from .util import live_server_setup
def set_original_response():
test_return_data = """
A few new lines
Where there is more lines originally
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
def set_delete_response():
test_return_data = """
A few new lines
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
def test_diff_filtering_no_del(client, live_server):
live_server_setup(live_server)
sleep_time_for_fetch_thread = 3
set_original_response()
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
time.sleep(sleep_time_for_fetch_thread)
# Add our URL to the import page
res = client.post(
url_for("edit_page", uuid="first"),
data={"trigger_add": "y",
"trigger_del": "n",
"url": test_url,
"fetch_backend": "html_requests"},
follow_redirects=True
)
assert b"Updated watch." in res.data
assert b'unviewed' not in res.data
# Make an delete change
set_delete_response()
time.sleep(sleep_time_for_fetch_thread)
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# We should NOT see the change
res = client.get(url_for("index"))
assert b'unviewed' not in res.data
# Make an delete change
set_original_response()
time.sleep(sleep_time_for_fetch_thread)
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# We should see the change
res = client.get(url_for("index"))
assert b'unviewed' in res.data

View File

@@ -1,72 +0,0 @@
#!/usr/bin/python3
import time
from flask import url_for
from .util import live_server_setup
def set_original_response():
test_return_data = """
A few new lines
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
def set_add_response():
test_return_data = """
A few new lines
Where there is more lines than before
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
def test_diff_filtering_no_add(client, live_server):
live_server_setup(live_server)
sleep_time_for_fetch_thread = 3
set_original_response()
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
time.sleep(sleep_time_for_fetch_thread)
# Add our URL to the import page
res = client.post(
url_for("edit_page", uuid="first"),
data={"trigger_add": "n",
"trigger_del": "y",
"url": test_url,
"fetch_backend": "html_requests"},
follow_redirects=True
)
assert b"Updated watch." in res.data
assert b'unviewed' not in res.data
# Make an add change
set_add_response()
time.sleep(sleep_time_for_fetch_thread)
# Trigger a check
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# We should NOT see the change
res = client.get(url_for("index"))
# save res.data to a file
assert b'unviewed' not in res.data

View File

@@ -81,4 +81,4 @@ def test_consistent_history(client, live_server):
assert len(files_in_watch_dir) == 3, "Should be just three files in the dir, history.txt, previous.txt, and the snapshot"
assert len(files_in_watch_dir) == 2, "Should be just two files in the dir, history.txt and the snapshot"

View File

@@ -1,33 +0,0 @@
#!/usr/bin/python3
import time
from flask import url_for
from .util import live_server_setup
# If there was only a change in the whitespacing, then we shouldnt have a change detected
def test_jinja2_in_url_query(client, live_server):
live_server_setup(live_server)
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page
test_url = url_for('test_return_query', _external=True)
# because url_for() will URL-encode the var, but we dont here
full_url = "{}?{}".format(test_url,
"date={% now 'Europe/Berlin', '%Y' %}.{% now 'Europe/Berlin', '%m' %}.{% now 'Europe/Berlin', '%d' %}", )
res = client.post(
url_for("form_quick_watch_add"),
data={"url": full_url, "tag": "test"},
follow_redirects=True
)
assert b"Watch added" in res.data
time.sleep(3)
# It should report nothing found (no new 'unviewed' class)
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
assert b'date=2' in res.data

View File

@@ -4,12 +4,6 @@ from flask import make_response, request
from flask import url_for
import logging
import time
from werkzeug import Request
import io
# This is a fix for macOS running tests.
import multiprocessing
multiprocessing.set_start_method("fork")
def set_original_response():
test_return_data = """<html>
@@ -165,42 +159,5 @@ def live_server_setup(live_server):
ret = " ".join([auth.username, auth.password, auth.type])
return ret
# Make sure any checkboxes that are supposed to be defaulted to true are set during the post request
# This is due to the fact that defaults are set in the HTML which we are not using during tests.
# This does not affect the server when running outside of a test
class DefaultCheckboxMiddleware(object):
def __init__(self, app):
self.app = app
def __call__(self, environ, start_response):
request = Request(environ)
if request.method == "POST" and "/edit" in request.path:
body = environ['wsgi.input'].read()
# if the checkboxes are not set, set them to true
if b"trigger_add" not in body:
body += b'&trigger_add=y'
if b"trigger_del" not in body:
body += b'&trigger_del=y'
# remove any checkboxes set to "n" so wtforms processes them correctly
body = body.replace(b"trigger_add=n", b"")
body = body.replace(b"trigger_del=n", b"")
body = body.replace(b"&&", b"&")
new_stream = io.BytesIO(body)
environ["CONTENT_LENGTH"] = len(body)
environ['wsgi.input'] = new_stream
return self.app(environ, start_response)
live_server.app.wsgi_app = DefaultCheckboxMiddleware(live_server.app.wsgi_app)
# Just return some GET var
@live_server.app.route('/test-return-query', methods=['GET'])
def test_return_query():
return request.query_string
live_server.start()

View File

@@ -45,9 +45,6 @@ services:
# Respect proxy_pass type settings, `proxy_set_header Host "localhost";` and `proxy_set_header X-Forwarded-Prefix /app;`
# More here https://github.com/dgtlmoon/changedetection.io/wiki/Running-changedetection.io-behind-a-reverse-proxy-sub-directory
# - USE_X_SETTINGS=1
#
# Hides the `Referer` header so that monitored websites can't see the changedetection.io hostname.
# - HIDE_REFERER=true
# Comment out ports: when using behind a reverse proxy , enable networks: etc.
ports:

View File

@@ -1,8 +1,8 @@
flask ~= 2.0
flask~= 2.0
flask_wtf
eventlet >= 0.31.0
eventlet>=0.31.0
validators
timeago ~= 1.0
timeago ~=1.0
inscriptis ~= 2.2
feedgen ~= 0.9
flask-login ~= 0.5
@@ -46,9 +46,4 @@ selenium ~= 4.1.0
# need to revisit flask login versions
werkzeug ~= 2.0.0
# Templating, so far just in the URLs but in the future can be for the notifications also
jinja2 ~= 3.1
jinja2-time
# playwright is installed at Dockerfile build time because it's not available on all platforms