mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-11-05 00:56:06 +00:00
Compare commits
13 Commits
fix-legacy
...
brotli-sna
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
78aef62485 | ||
|
|
32463369e5 | ||
|
|
6d7d3eebc2 | ||
|
|
a24551b194 | ||
|
|
dfa7126f01 | ||
|
|
3ab2efd3ac | ||
|
|
219860d01b | ||
|
|
61b7a4c777 | ||
|
|
81f84bcb2f | ||
|
|
ab4e5546ac | ||
|
|
1e5f51b80b | ||
|
|
88b37d5783 | ||
|
|
d597a861b9 |
@@ -340,8 +340,6 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
if len(dates) < 2:
|
if len(dates) < 2:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
prev_fname = watch.history[dates[-2]]
|
|
||||||
|
|
||||||
if not watch.viewed:
|
if not watch.viewed:
|
||||||
# Re #239 - GUID needs to be individual for each event
|
# Re #239 - GUID needs to be individual for each event
|
||||||
# @todo In the future make this a configurable link back (see work on BASE_URL https://github.com/dgtlmoon/changedetection.io/pull/228)
|
# @todo In the future make this a configurable link back (see work on BASE_URL https://github.com/dgtlmoon/changedetection.io/pull/228)
|
||||||
@@ -362,9 +360,12 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
|
|
||||||
watch_title = watch.get('title') if watch.get('title') else watch.get('url')
|
watch_title = watch.get('title') if watch.get('title') else watch.get('url')
|
||||||
fe.title(title=watch_title)
|
fe.title(title=watch_title)
|
||||||
latest_fname = watch.history[dates[-1]]
|
|
||||||
|
|
||||||
html_diff = diff.render_diff(prev_fname, latest_fname, include_equal=False, line_feed_sep="<br>")
|
html_diff = diff.render_diff(previous_version_file_contents=watch.get_history_snapshot(dates[-2]),
|
||||||
|
newest_version_file_contents=watch.get_history_snapshot(dates[-1]),
|
||||||
|
include_equal=False,
|
||||||
|
line_feed_sep="<br>")
|
||||||
|
|
||||||
fe.content(content="<html><body><h4>{}</h4>{}</body></html>".format(watch_title, html_diff),
|
fe.content(content="<html><body><h4>{}</h4>{}</body></html>".format(watch_title, html_diff),
|
||||||
type='CDATA')
|
type='CDATA')
|
||||||
|
|
||||||
@@ -847,28 +848,22 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
# Save the current newest history as the most recently viewed
|
# Save the current newest history as the most recently viewed
|
||||||
datastore.set_last_viewed(uuid, time.time())
|
datastore.set_last_viewed(uuid, time.time())
|
||||||
|
|
||||||
newest_file = history[dates[-1]]
|
|
||||||
|
|
||||||
# Read as binary and force decode as UTF-8
|
# Read as binary and force decode as UTF-8
|
||||||
# Windows may fail decode in python if we just use 'r' mode (chardet decode exception)
|
# Windows may fail decode in python if we just use 'r' mode (chardet decode exception)
|
||||||
try:
|
try:
|
||||||
with open(newest_file, 'r', encoding='utf-8', errors='ignore') as f:
|
newest_version_file_contents = watch.get_history_snapshot(dates[-1])
|
||||||
newest_version_file_contents = f.read()
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
newest_version_file_contents = "Unable to read {}.\n".format(newest_file)
|
newest_version_file_contents = "Unable to read {}.\n".format(dates[-1])
|
||||||
|
|
||||||
previous_version = request.args.get('previous_version')
|
previous_version = request.args.get('previous_version')
|
||||||
try:
|
previous_timestamp = dates[-2]
|
||||||
previous_file = history[previous_version]
|
if previous_version:
|
||||||
except KeyError:
|
previous_timestamp = previous_version
|
||||||
# Not present, use a default value, the second one in the sorted list.
|
|
||||||
previous_file = history[dates[-2]]
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with open(previous_file, 'r', encoding='utf-8', errors='ignore') as f:
|
previous_version_file_contents = watch.get_history_snapshot(previous_timestamp)
|
||||||
previous_version_file_contents = f.read()
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
previous_version_file_contents = "Unable to read {}.\n".format(previous_file)
|
previous_version_file_contents = "Unable to read {}.\n".format(previous_timestamp)
|
||||||
|
|
||||||
|
|
||||||
screenshot_url = watch.get_screenshot()
|
screenshot_url = watch.get_screenshot()
|
||||||
@@ -948,37 +943,35 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
return output
|
return output
|
||||||
|
|
||||||
timestamp = list(watch.history.keys())[-1]
|
timestamp = list(watch.history.keys())[-1]
|
||||||
filename = watch.history[timestamp]
|
|
||||||
try:
|
try:
|
||||||
with open(filename, 'r', encoding='utf-8', errors='ignore') as f:
|
tmp = watch.get_history_snapshot(timestamp).splitlines()
|
||||||
tmp = f.readlines()
|
|
||||||
|
|
||||||
# Get what needs to be highlighted
|
# Get what needs to be highlighted
|
||||||
ignore_rules = watch.get('ignore_text', []) + datastore.data['settings']['application']['global_ignore_text']
|
ignore_rules = watch.get('ignore_text', []) + datastore.data['settings']['application']['global_ignore_text']
|
||||||
|
|
||||||
# .readlines will keep the \n, but we will parse it here again, in the future tidy this up
|
# .readlines will keep the \n, but we will parse it here again, in the future tidy this up
|
||||||
ignored_line_numbers = html_tools.strip_ignore_text(content="".join(tmp),
|
ignored_line_numbers = html_tools.strip_ignore_text(content="\n".join(tmp),
|
||||||
wordlist=ignore_rules,
|
wordlist=ignore_rules,
|
||||||
mode='line numbers'
|
mode='line numbers'
|
||||||
)
|
)
|
||||||
|
|
||||||
trigger_line_numbers = html_tools.strip_ignore_text(content="".join(tmp),
|
trigger_line_numbers = html_tools.strip_ignore_text(content="\n".join(tmp),
|
||||||
wordlist=watch['trigger_text'],
|
wordlist=watch['trigger_text'],
|
||||||
mode='line numbers'
|
mode='line numbers'
|
||||||
)
|
)
|
||||||
# Prepare the classes and lines used in the template
|
# Prepare the classes and lines used in the template
|
||||||
i=0
|
i=0
|
||||||
for l in tmp:
|
for l in tmp:
|
||||||
classes=[]
|
classes=[]
|
||||||
i+=1
|
i+=1
|
||||||
if i in ignored_line_numbers:
|
if i in ignored_line_numbers:
|
||||||
classes.append('ignored')
|
classes.append('ignored')
|
||||||
if i in trigger_line_numbers:
|
if i in trigger_line_numbers:
|
||||||
classes.append('triggered')
|
classes.append('triggered')
|
||||||
content.append({'line': l, 'classes': ' '.join(classes)})
|
content.append({'line': l, 'classes': ' '.join(classes)})
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
content.append({'line': "File doesnt exist or unable to read file {}".format(filename), 'classes': ''})
|
content.append({'line': f"File doesnt exist or unable to read timestamp {timestamp}", 'classes': ''})
|
||||||
|
|
||||||
output = render_template("preview.html",
|
output = render_template("preview.html",
|
||||||
content=content,
|
content=content,
|
||||||
|
|||||||
@@ -179,9 +179,7 @@ class WatchSingleHistory(Resource):
|
|||||||
if timestamp == 'latest':
|
if timestamp == 'latest':
|
||||||
timestamp = list(watch.history.keys())[-1]
|
timestamp = list(watch.history.keys())[-1]
|
||||||
|
|
||||||
# @todo - Check for UTF-8 compatability
|
content = watch.get_history_snapshot(timestamp)
|
||||||
with open(watch.history[timestamp], 'r') as f:
|
|
||||||
content = f.read()
|
|
||||||
|
|
||||||
response = make_response(content, 200)
|
response = make_response(content, 200)
|
||||||
response.mimetype = "text/plain"
|
response.mimetype = "text/plain"
|
||||||
|
|||||||
@@ -31,14 +31,11 @@ def customSequenceMatcher(before, after, include_equal=False, include_removed=Tr
|
|||||||
|
|
||||||
# only_differences - only return info about the differences, no context
|
# only_differences - only return info about the differences, no context
|
||||||
# line_feed_sep could be "<br>" or "<li>" or "\n" etc
|
# line_feed_sep could be "<br>" or "<li>" or "\n" etc
|
||||||
def render_diff(previous_file, newest_file, include_equal=False, include_removed=True, include_added=True, line_feed_sep="\n"):
|
def render_diff(previous_version_file_contents, newest_version_file_contents, include_equal=False, include_removed=True, include_added=True, line_feed_sep="\n"):
|
||||||
with open(newest_file, 'r') as f:
|
|
||||||
newest_version_file_contents = f.read()
|
|
||||||
newest_version_file_contents = [line.rstrip() for line in newest_version_file_contents.splitlines()]
|
|
||||||
|
|
||||||
if previous_file:
|
newest_version_file_contents = [line.rstrip() for line in newest_version_file_contents.splitlines()]
|
||||||
with open(previous_file, 'r') as f:
|
|
||||||
previous_version_file_contents = f.read()
|
if previous_version_file_contents:
|
||||||
previous_version_file_contents = [line.rstrip() for line in previous_version_file_contents.splitlines()]
|
previous_version_file_contents = [line.rstrip() for line in previous_version_file_contents.splitlines()]
|
||||||
else:
|
else:
|
||||||
previous_version_file_contents = ""
|
previous_version_file_contents = ""
|
||||||
|
|||||||
@@ -241,9 +241,32 @@ class model(dict):
|
|||||||
bump = self.history
|
bump = self.history
|
||||||
return self.__newest_history_key
|
return self.__newest_history_key
|
||||||
|
|
||||||
|
def get_history_snapshot(self, timestamp):
|
||||||
|
import brotli
|
||||||
|
filepath = self.history[timestamp]
|
||||||
|
|
||||||
|
# See if a brotli versions exists and switch to that
|
||||||
|
if not filepath.endswith('.br') and os.path.isfile(f"{filepath}.br"):
|
||||||
|
filepath = f"{filepath}.br"
|
||||||
|
|
||||||
|
# OR in the backup case that the .br does not exist, but the plain one does
|
||||||
|
if filepath.endswith('.br') and not os.path.isfile(filepath):
|
||||||
|
if os.path.isfile(filepath.replace('.br', '')):
|
||||||
|
filepath = filepath.replace('.br', '')
|
||||||
|
|
||||||
|
if filepath.endswith('.br'):
|
||||||
|
# Brotli doesnt have a fileheader to detect it, so we rely on filename
|
||||||
|
# https://www.rfc-editor.org/rfc/rfc7932
|
||||||
|
with open(filepath, 'rb') as f:
|
||||||
|
return(brotli.decompress(f.read()).decode('utf-8'))
|
||||||
|
|
||||||
|
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
|
||||||
|
return f.read()
|
||||||
|
|
||||||
# Save some text file to the appropriate path and bump the history
|
# Save some text file to the appropriate path and bump the history
|
||||||
# result_obj from fetch_site_status.run()
|
# result_obj from fetch_site_status.run()
|
||||||
def save_history_text(self, contents, timestamp, snapshot_id):
|
def save_history_text(self, contents, timestamp, snapshot_id):
|
||||||
|
import brotli
|
||||||
|
|
||||||
self.ensure_data_dir_exists()
|
self.ensure_data_dir_exists()
|
||||||
|
|
||||||
@@ -252,16 +275,21 @@ class model(dict):
|
|||||||
if self.__newest_history_key and int(timestamp) == int(self.__newest_history_key):
|
if self.__newest_history_key and int(timestamp) == int(self.__newest_history_key):
|
||||||
time.sleep(timestamp - self.__newest_history_key)
|
time.sleep(timestamp - self.__newest_history_key)
|
||||||
|
|
||||||
snapshot_fname = f"{snapshot_id}.txt"
|
threshold = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024))
|
||||||
|
skip_brotli = strtobool(os.getenv('DISABLE_BROTLI_TEXT_SNAPSHOT', 'False'))
|
||||||
|
|
||||||
# Only write if it does not exist, this is so that we dont bother re-saving the same data by checksum under different filenames.
|
if not skip_brotli and len(contents) > threshold:
|
||||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
snapshot_fname = f"{snapshot_id}.txt.br"
|
||||||
if not os.path.exists(dest):
|
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||||
# in /diff/ and /preview/ we are going to assume for now that it's UTF-8 when reading
|
if not os.path.exists(dest):
|
||||||
# most sites are utf-8 and some are even broken utf-8
|
with open(dest, 'wb') as f:
|
||||||
with open(dest, 'wb') as f:
|
f.write(brotli.compress(contents, mode=brotli.MODE_TEXT))
|
||||||
f.write(contents)
|
else:
|
||||||
f.close()
|
snapshot_fname = f"{snapshot_id}.txt"
|
||||||
|
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||||
|
if not os.path.exists(dest):
|
||||||
|
with open(dest, 'wb') as f:
|
||||||
|
f.write(contents)
|
||||||
|
|
||||||
# Append to index
|
# Append to index
|
||||||
# @todo check last char was \n
|
# @todo check last char was \n
|
||||||
@@ -359,6 +387,7 @@ class model(dict):
|
|||||||
return fname
|
return fname
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def pause(self):
|
def pause(self):
|
||||||
self['paused'] = True
|
self['paused'] = True
|
||||||
|
|
||||||
@@ -388,8 +417,8 @@ class model(dict):
|
|||||||
# self.history will be keyed with the full path
|
# self.history will be keyed with the full path
|
||||||
for k, fname in self.history.items():
|
for k, fname in self.history.items():
|
||||||
if os.path.isfile(fname):
|
if os.path.isfile(fname):
|
||||||
with open(fname, "r") as f:
|
if True:
|
||||||
contents = f.read()
|
contents = self.get_history_snapshot(k)
|
||||||
res = re.findall(regex, contents, re.MULTILINE)
|
res = re.findall(regex, contents, re.MULTILINE)
|
||||||
if res:
|
if res:
|
||||||
if not csv_writer:
|
if not csv_writer:
|
||||||
|
|||||||
@@ -28,3 +28,10 @@ pytest tests/test_notification.py
|
|||||||
# Re-run with HIDE_REFERER set - could affect login
|
# Re-run with HIDE_REFERER set - could affect login
|
||||||
export HIDE_REFERER=True
|
export HIDE_REFERER=True
|
||||||
pytest tests/test_access_control.py
|
pytest tests/test_access_control.py
|
||||||
|
|
||||||
|
# Re-run a few tests that will trigger brotli based storage
|
||||||
|
export SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD=5
|
||||||
|
pytest tests/test_access_control.py
|
||||||
|
pytest tests/test_notification.py
|
||||||
|
pytest tests/test_backend.py
|
||||||
|
pytest tests/test_rss.py
|
||||||
|
|||||||
@@ -198,8 +198,8 @@ def test_check_json_without_filter(client, live_server):
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Should still see '"html": "<b>"'
|
# Should still see '"html": "<b>"'
|
||||||
assert b'"<b>' in res.data
|
assert b'"html": "<b>"' in res.data
|
||||||
assert res.data.count(b'{\n') >= 2
|
assert res.data.count(b'{') >= 2
|
||||||
|
|
||||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||||
assert b'Deleted' in res.data
|
assert b'Deleted' in res.data
|
||||||
|
|||||||
@@ -13,21 +13,33 @@ class TestDiffBuilder(unittest.TestCase):
|
|||||||
|
|
||||||
def test_expected_diff_output(self):
|
def test_expected_diff_output(self):
|
||||||
base_dir = os.path.dirname(__file__)
|
base_dir = os.path.dirname(__file__)
|
||||||
output = diff.render_diff(previous_file=base_dir + "/test-content/before.txt", newest_file=base_dir + "/test-content/after.txt")
|
with open(base_dir + "/test-content/before.txt", 'r') as f:
|
||||||
|
previous_version_file_contents = f.read()
|
||||||
|
|
||||||
|
with open(base_dir + "/test-content/after.txt", 'r') as f:
|
||||||
|
newest_version_file_contents = f.read()
|
||||||
|
|
||||||
|
output = diff.render_diff(previous_version_file_contents, newest_version_file_contents)
|
||||||
output = output.split("\n")
|
output = output.split("\n")
|
||||||
self.assertIn('(changed) ok', output)
|
self.assertIn('(changed) ok', output)
|
||||||
self.assertIn('(into) xok', output)
|
self.assertIn('(into) xok', output)
|
||||||
self.assertIn('(into) next-x-ok', output)
|
self.assertIn('(into) next-x-ok', output)
|
||||||
self.assertIn('(added) and something new', output)
|
self.assertIn('(added) and something new', output)
|
||||||
|
|
||||||
|
with open(base_dir + "/test-content/after-2.txt", 'r') as f:
|
||||||
output = diff.render_diff(previous_file=base_dir + "/test-content/before.txt", newest_file=base_dir + "/test-content/after-2.txt")
|
newest_version_file_contents = f.read()
|
||||||
|
output = diff.render_diff(previous_version_file_contents, newest_version_file_contents)
|
||||||
output = output.split("\n")
|
output = output.split("\n")
|
||||||
self.assertIn('(removed) for having learned computerese,', output)
|
self.assertIn('(removed) for having learned computerese,', output)
|
||||||
self.assertIn('(removed) I continue to examine bits, bytes and words', output)
|
self.assertIn('(removed) I continue to examine bits, bytes and words', output)
|
||||||
|
|
||||||
#diff_removed
|
#diff_removed
|
||||||
output = diff.render_diff(previous_file=base_dir + "/test-content/before.txt", newest_file=base_dir + "/test-content/after.txt", include_equal=False, include_removed=True, include_added=False)
|
with open(base_dir + "/test-content/before.txt", 'r') as f:
|
||||||
|
previous_version_file_contents = f.read()
|
||||||
|
|
||||||
|
with open(base_dir + "/test-content/after.txt", 'r') as f:
|
||||||
|
newest_version_file_contents = f.read()
|
||||||
|
output = diff.render_diff(previous_version_file_contents, newest_version_file_contents, include_equal=False, include_removed=True, include_added=False)
|
||||||
output = output.split("\n")
|
output = output.split("\n")
|
||||||
self.assertIn('(changed) ok', output)
|
self.assertIn('(changed) ok', output)
|
||||||
self.assertIn('(into) xok', output)
|
self.assertIn('(into) xok', output)
|
||||||
@@ -35,7 +47,9 @@ class TestDiffBuilder(unittest.TestCase):
|
|||||||
self.assertNotIn('(added) and something new', output)
|
self.assertNotIn('(added) and something new', output)
|
||||||
|
|
||||||
#diff_removed
|
#diff_removed
|
||||||
output = diff.render_diff(previous_file=base_dir + "/test-content/before.txt", newest_file=base_dir + "/test-content/after-2.txt", include_equal=False, include_removed=True, include_added=False)
|
with open(base_dir + "/test-content/after-2.txt", 'r') as f:
|
||||||
|
newest_version_file_contents = f.read()
|
||||||
|
output = diff.render_diff(previous_version_file_contents, newest_version_file_contents, include_equal=False, include_removed=True, include_added=False)
|
||||||
output = output.split("\n")
|
output = output.split("\n")
|
||||||
self.assertIn('(removed) for having learned computerese,', output)
|
self.assertIn('(removed) for having learned computerese,', output)
|
||||||
self.assertIn('(removed) I continue to examine bits, bytes and words', output)
|
self.assertIn('(removed) I continue to examine bits, bytes and words', output)
|
||||||
|
|||||||
@@ -69,18 +69,17 @@ class update_worker(threading.Thread):
|
|||||||
else:
|
else:
|
||||||
line_feed_sep = "\n"
|
line_feed_sep = "\n"
|
||||||
|
|
||||||
with open(watch_history[dates[-1]], 'rb') as f:
|
snapshot_contents = watch.get_history_snapshot(dates[-1])
|
||||||
snapshot_contents = f.read()
|
|
||||||
|
|
||||||
n_object.update({
|
n_object.update({
|
||||||
'watch_url': watch['url'],
|
'watch_url': watch['url'],
|
||||||
'uuid': watch_uuid,
|
'uuid': watch_uuid,
|
||||||
'screenshot': watch.get_screenshot() if watch.get('notification_screenshot') else None,
|
'screenshot': watch.get_screenshot() if watch.get('notification_screenshot') else None,
|
||||||
'current_snapshot': snapshot_contents.decode('utf-8'),
|
'current_snapshot': snapshot_contents,
|
||||||
'diff': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], line_feed_sep=line_feed_sep),
|
'diff': diff.render_diff(watch.get_history_snapshot(dates[-2]), watch.get_history_snapshot(dates[-1]), line_feed_sep=line_feed_sep),
|
||||||
'diff_added': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], include_removed=False, line_feed_sep=line_feed_sep),
|
'diff_added': diff.render_diff(watch.get_history_snapshot(dates[-2]), watch.get_history_snapshot(dates[-1]), include_removed=False, line_feed_sep=line_feed_sep),
|
||||||
'diff_removed': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], include_added=False, line_feed_sep=line_feed_sep),
|
'diff_removed': diff.render_diff(watch.get_history_snapshot(dates[-2]), watch.get_history_snapshot(dates[-1]), include_added=False, line_feed_sep=line_feed_sep),
|
||||||
'diff_full': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], include_equal=True, line_feed_sep=line_feed_sep)
|
'diff_full': diff.render_diff(watch.get_history_snapshot(dates[-2]), watch.get_history_snapshot(dates[-1]), include_equal=True, line_feed_sep=line_feed_sep)
|
||||||
})
|
})
|
||||||
logging.info (">> SENDING NOTIFICATION")
|
logging.info (">> SENDING NOTIFICATION")
|
||||||
self.notification_q.put(n_object)
|
self.notification_q.put(n_object)
|
||||||
|
|||||||
Reference in New Issue
Block a user