mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-10-30 14:17:40 +00:00 
			
		
		
		
	Compare commits
	
		
			13 Commits
		
	
	
		
			2727-notif
			...
			brotli-sna
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | 78aef62485 | ||
|   | 32463369e5 | ||
|   | 6d7d3eebc2 | ||
|   | a24551b194 | ||
|   | dfa7126f01 | ||
|   | 3ab2efd3ac | ||
|   | 219860d01b | ||
|   | 61b7a4c777 | ||
|   | 81f84bcb2f | ||
|   | ab4e5546ac | ||
|   | 1e5f51b80b | ||
|   | 88b37d5783 | ||
|   | d597a861b9 | 
| @@ -340,8 +340,6 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|             if len(dates) < 2: | ||||
|                 continue | ||||
|  | ||||
|             prev_fname = watch.history[dates[-2]] | ||||
|  | ||||
|             if not watch.viewed: | ||||
|                 # Re #239 - GUID needs to be individual for each event | ||||
|                 # @todo In the future make this a configurable link back (see work on BASE_URL https://github.com/dgtlmoon/changedetection.io/pull/228) | ||||
| @@ -362,9 +360,12 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|  | ||||
|                 watch_title = watch.get('title') if watch.get('title') else watch.get('url') | ||||
|                 fe.title(title=watch_title) | ||||
|                 latest_fname = watch.history[dates[-1]] | ||||
|  | ||||
|                 html_diff = diff.render_diff(prev_fname, latest_fname, include_equal=False, line_feed_sep="<br>") | ||||
|                 html_diff = diff.render_diff(previous_version_file_contents=watch.get_history_snapshot(dates[-2]), | ||||
|                                              newest_version_file_contents=watch.get_history_snapshot(dates[-1]), | ||||
|                                              include_equal=False, | ||||
|                                              line_feed_sep="<br>") | ||||
|  | ||||
|                 fe.content(content="<html><body><h4>{}</h4>{}</body></html>".format(watch_title, html_diff), | ||||
|                            type='CDATA') | ||||
|  | ||||
| @@ -847,28 +848,22 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|         # Save the current newest history as the most recently viewed | ||||
|         datastore.set_last_viewed(uuid, time.time()) | ||||
|  | ||||
|         newest_file = history[dates[-1]] | ||||
|  | ||||
|         # Read as binary and force decode as UTF-8 | ||||
|         # Windows may fail decode in python if we just use 'r' mode (chardet decode exception) | ||||
|         try: | ||||
|             with open(newest_file, 'r', encoding='utf-8', errors='ignore') as f: | ||||
|                 newest_version_file_contents = f.read() | ||||
|             newest_version_file_contents = watch.get_history_snapshot(dates[-1]) | ||||
|         except Exception as e: | ||||
|             newest_version_file_contents = "Unable to read {}.\n".format(newest_file) | ||||
|             newest_version_file_contents = "Unable to read {}.\n".format(dates[-1]) | ||||
|  | ||||
|         previous_version = request.args.get('previous_version') | ||||
|         try: | ||||
|             previous_file = history[previous_version] | ||||
|         except KeyError: | ||||
|             # Not present, use a default value, the second one in the sorted list. | ||||
|             previous_file = history[dates[-2]] | ||||
|         previous_timestamp = dates[-2] | ||||
|         if previous_version: | ||||
|             previous_timestamp = previous_version | ||||
|  | ||||
|         try: | ||||
|             with open(previous_file, 'r', encoding='utf-8', errors='ignore') as f: | ||||
|                 previous_version_file_contents = f.read() | ||||
|             previous_version_file_contents = watch.get_history_snapshot(previous_timestamp) | ||||
|         except Exception as e: | ||||
|             previous_version_file_contents = "Unable to read {}.\n".format(previous_file) | ||||
|             previous_version_file_contents = "Unable to read {}.\n".format(previous_timestamp) | ||||
|  | ||||
|  | ||||
|         screenshot_url = watch.get_screenshot() | ||||
| @@ -948,37 +943,35 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|             return output | ||||
|  | ||||
|         timestamp = list(watch.history.keys())[-1] | ||||
|         filename = watch.history[timestamp] | ||||
|         try: | ||||
|             with open(filename, 'r', encoding='utf-8', errors='ignore') as f: | ||||
|                 tmp = f.readlines() | ||||
|             tmp = watch.get_history_snapshot(timestamp).splitlines() | ||||
|  | ||||
|                 # Get what needs to be highlighted | ||||
|                 ignore_rules = watch.get('ignore_text', []) + datastore.data['settings']['application']['global_ignore_text'] | ||||
|             # Get what needs to be highlighted | ||||
|             ignore_rules = watch.get('ignore_text', []) + datastore.data['settings']['application']['global_ignore_text'] | ||||
|  | ||||
|                 # .readlines will keep the \n, but we will parse it here again, in the future tidy this up | ||||
|                 ignored_line_numbers = html_tools.strip_ignore_text(content="".join(tmp), | ||||
|                                                                     wordlist=ignore_rules, | ||||
|                                                                     mode='line numbers' | ||||
|                                                                     ) | ||||
|             # .readlines will keep the \n, but we will parse it here again, in the future tidy this up | ||||
|             ignored_line_numbers = html_tools.strip_ignore_text(content="\n".join(tmp), | ||||
|                                                                 wordlist=ignore_rules, | ||||
|                                                                 mode='line numbers' | ||||
|                                                                 ) | ||||
|  | ||||
|                 trigger_line_numbers = html_tools.strip_ignore_text(content="".join(tmp), | ||||
|                                                                     wordlist=watch['trigger_text'], | ||||
|                                                                     mode='line numbers' | ||||
|                                                                     ) | ||||
|                 # Prepare the classes and lines used in the template | ||||
|                 i=0 | ||||
|                 for l in tmp: | ||||
|                     classes=[] | ||||
|                     i+=1 | ||||
|                     if i in ignored_line_numbers: | ||||
|                         classes.append('ignored') | ||||
|                     if i in trigger_line_numbers: | ||||
|                         classes.append('triggered') | ||||
|                     content.append({'line': l, 'classes': ' '.join(classes)}) | ||||
|             trigger_line_numbers = html_tools.strip_ignore_text(content="\n".join(tmp), | ||||
|                                                                 wordlist=watch['trigger_text'], | ||||
|                                                                 mode='line numbers' | ||||
|                                                                 ) | ||||
|             # Prepare the classes and lines used in the template | ||||
|             i=0 | ||||
|             for l in tmp: | ||||
|                 classes=[] | ||||
|                 i+=1 | ||||
|                 if i in ignored_line_numbers: | ||||
|                     classes.append('ignored') | ||||
|                 if i in trigger_line_numbers: | ||||
|                     classes.append('triggered') | ||||
|                 content.append({'line': l, 'classes': ' '.join(classes)}) | ||||
|  | ||||
|         except Exception as e: | ||||
|             content.append({'line': "File doesnt exist or unable to read file {}".format(filename), 'classes': ''}) | ||||
|             content.append({'line': f"File doesnt exist or unable to read timestamp {timestamp}", 'classes': ''}) | ||||
|  | ||||
|         output = render_template("preview.html", | ||||
|                                  content=content, | ||||
|   | ||||
| @@ -179,9 +179,7 @@ class WatchSingleHistory(Resource): | ||||
|         if timestamp == 'latest': | ||||
|             timestamp = list(watch.history.keys())[-1] | ||||
|  | ||||
|         # @todo - Check for UTF-8 compatability | ||||
|         with open(watch.history[timestamp], 'r') as f: | ||||
|             content = f.read() | ||||
|         content = watch.get_history_snapshot(timestamp) | ||||
|  | ||||
|         response = make_response(content, 200) | ||||
|         response.mimetype = "text/plain" | ||||
|   | ||||
| @@ -31,14 +31,11 @@ def customSequenceMatcher(before, after, include_equal=False, include_removed=Tr | ||||
|  | ||||
| # only_differences - only return info about the differences, no context | ||||
| # line_feed_sep could be "<br>" or "<li>" or "\n" etc | ||||
| def render_diff(previous_file, newest_file, include_equal=False, include_removed=True, include_added=True, line_feed_sep="\n"): | ||||
|     with open(newest_file, 'r') as f: | ||||
|         newest_version_file_contents = f.read() | ||||
|         newest_version_file_contents = [line.rstrip() for line in newest_version_file_contents.splitlines()] | ||||
| def render_diff(previous_version_file_contents, newest_version_file_contents, include_equal=False, include_removed=True, include_added=True, line_feed_sep="\n"): | ||||
|  | ||||
|     if previous_file: | ||||
|         with open(previous_file, 'r') as f: | ||||
|             previous_version_file_contents = f.read() | ||||
|     newest_version_file_contents = [line.rstrip() for line in newest_version_file_contents.splitlines()] | ||||
|  | ||||
|     if previous_version_file_contents: | ||||
|             previous_version_file_contents = [line.rstrip() for line in previous_version_file_contents.splitlines()] | ||||
|     else: | ||||
|         previous_version_file_contents = "" | ||||
|   | ||||
| @@ -241,9 +241,32 @@ class model(dict): | ||||
|         bump = self.history | ||||
|         return self.__newest_history_key | ||||
|  | ||||
|     def get_history_snapshot(self, timestamp): | ||||
|         import brotli | ||||
|         filepath = self.history[timestamp] | ||||
|  | ||||
|         # See if a brotli versions exists and switch to that | ||||
|         if not filepath.endswith('.br') and os.path.isfile(f"{filepath}.br"): | ||||
|             filepath = f"{filepath}.br" | ||||
|  | ||||
|         # OR in the backup case that the .br does not exist, but the plain one does | ||||
|         if filepath.endswith('.br') and not os.path.isfile(filepath): | ||||
|             if os.path.isfile(filepath.replace('.br', '')): | ||||
|                 filepath = filepath.replace('.br', '') | ||||
|  | ||||
|         if filepath.endswith('.br'): | ||||
|             # Brotli doesnt have a fileheader to detect it, so we rely on filename | ||||
|             # https://www.rfc-editor.org/rfc/rfc7932 | ||||
|             with open(filepath, 'rb') as f: | ||||
|                 return(brotli.decompress(f.read()).decode('utf-8')) | ||||
|  | ||||
|         with open(filepath, 'r', encoding='utf-8', errors='ignore') as f: | ||||
|             return f.read() | ||||
|  | ||||
|     # Save some text file to the appropriate path and bump the history | ||||
|     # result_obj from fetch_site_status.run() | ||||
|     def save_history_text(self, contents, timestamp, snapshot_id): | ||||
|         import brotli | ||||
|  | ||||
|         self.ensure_data_dir_exists() | ||||
|  | ||||
| @@ -252,16 +275,21 @@ class model(dict): | ||||
|         if self.__newest_history_key and int(timestamp) == int(self.__newest_history_key): | ||||
|             time.sleep(timestamp - self.__newest_history_key) | ||||
|  | ||||
|         snapshot_fname = f"{snapshot_id}.txt" | ||||
|         threshold = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024)) | ||||
|         skip_brotli = strtobool(os.getenv('DISABLE_BROTLI_TEXT_SNAPSHOT', 'False')) | ||||
|  | ||||
|         # Only write if it does not exist, this is so that we dont bother re-saving the same data by checksum under different filenames. | ||||
|         dest = os.path.join(self.watch_data_dir, snapshot_fname) | ||||
|         if not os.path.exists(dest): | ||||
|             # in /diff/ and /preview/ we are going to assume for now that it's UTF-8 when reading | ||||
|             # most sites are utf-8 and some are even broken utf-8 | ||||
|             with open(dest, 'wb') as f: | ||||
|                 f.write(contents) | ||||
|                 f.close() | ||||
|         if not skip_brotli and len(contents) > threshold: | ||||
|             snapshot_fname = f"{snapshot_id}.txt.br" | ||||
|             dest = os.path.join(self.watch_data_dir, snapshot_fname) | ||||
|             if not os.path.exists(dest): | ||||
|                 with open(dest, 'wb') as f: | ||||
|                     f.write(brotli.compress(contents, mode=brotli.MODE_TEXT)) | ||||
|         else: | ||||
|             snapshot_fname = f"{snapshot_id}.txt" | ||||
|             dest = os.path.join(self.watch_data_dir, snapshot_fname) | ||||
|             if not os.path.exists(dest): | ||||
|                 with open(dest, 'wb') as f: | ||||
|                     f.write(contents) | ||||
|  | ||||
|         # Append to index | ||||
|         # @todo check last char was \n | ||||
| @@ -359,6 +387,7 @@ class model(dict): | ||||
|             return fname | ||||
|         return False | ||||
|  | ||||
|  | ||||
|     def pause(self): | ||||
|         self['paused'] = True | ||||
|  | ||||
| @@ -388,8 +417,8 @@ class model(dict): | ||||
|         # self.history will be keyed with the full path | ||||
|         for k, fname in self.history.items(): | ||||
|             if os.path.isfile(fname): | ||||
|                 with open(fname, "r") as f: | ||||
|                     contents = f.read() | ||||
|                 if True: | ||||
|                     contents = self.get_history_snapshot(k) | ||||
|                     res = re.findall(regex, contents, re.MULTILINE) | ||||
|                     if res: | ||||
|                         if not csv_writer: | ||||
|   | ||||
| @@ -28,3 +28,10 @@ pytest tests/test_notification.py | ||||
| # Re-run with HIDE_REFERER set - could affect login | ||||
| export HIDE_REFERER=True | ||||
| pytest tests/test_access_control.py | ||||
|  | ||||
| # Re-run a few tests that will trigger brotli based storage | ||||
| export SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD=5 | ||||
| pytest tests/test_access_control.py | ||||
| pytest tests/test_notification.py | ||||
| pytest tests/test_backend.py | ||||
| pytest tests/test_rss.py | ||||
|   | ||||
| @@ -198,8 +198,8 @@ def test_check_json_without_filter(client, live_server): | ||||
|     ) | ||||
|  | ||||
|     # Should still see '"html": "<b>"' | ||||
|     assert b'"<b>' in res.data | ||||
|     assert res.data.count(b'{\n') >= 2 | ||||
|     assert b'"html": "<b>"' in res.data | ||||
|     assert res.data.count(b'{') >= 2 | ||||
|  | ||||
|     res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|   | ||||
| @@ -13,21 +13,33 @@ class TestDiffBuilder(unittest.TestCase): | ||||
|  | ||||
|     def test_expected_diff_output(self): | ||||
|         base_dir = os.path.dirname(__file__) | ||||
|         output = diff.render_diff(previous_file=base_dir + "/test-content/before.txt", newest_file=base_dir + "/test-content/after.txt") | ||||
|         with open(base_dir + "/test-content/before.txt", 'r') as f: | ||||
|             previous_version_file_contents = f.read() | ||||
|  | ||||
|         with open(base_dir + "/test-content/after.txt", 'r') as f: | ||||
|             newest_version_file_contents = f.read() | ||||
|  | ||||
|         output = diff.render_diff(previous_version_file_contents, newest_version_file_contents) | ||||
|         output = output.split("\n") | ||||
|         self.assertIn('(changed) ok', output) | ||||
|         self.assertIn('(into) xok', output) | ||||
|         self.assertIn('(into) next-x-ok', output) | ||||
|         self.assertIn('(added) and something new', output) | ||||
|  | ||||
|  | ||||
|         output = diff.render_diff(previous_file=base_dir + "/test-content/before.txt", newest_file=base_dir + "/test-content/after-2.txt") | ||||
|         with open(base_dir + "/test-content/after-2.txt", 'r') as f: | ||||
|             newest_version_file_contents = f.read() | ||||
|         output = diff.render_diff(previous_version_file_contents, newest_version_file_contents) | ||||
|         output = output.split("\n") | ||||
|         self.assertIn('(removed) for having learned computerese,', output) | ||||
|         self.assertIn('(removed) I continue to examine bits, bytes and words', output) | ||||
|          | ||||
|         #diff_removed | ||||
|         output = diff.render_diff(previous_file=base_dir + "/test-content/before.txt", newest_file=base_dir + "/test-content/after.txt", include_equal=False, include_removed=True, include_added=False) | ||||
|         with open(base_dir + "/test-content/before.txt", 'r') as f: | ||||
|             previous_version_file_contents = f.read() | ||||
|  | ||||
|         with open(base_dir + "/test-content/after.txt", 'r') as f: | ||||
|             newest_version_file_contents = f.read() | ||||
|         output = diff.render_diff(previous_version_file_contents, newest_version_file_contents, include_equal=False, include_removed=True, include_added=False) | ||||
|         output = output.split("\n") | ||||
|         self.assertIn('(changed) ok', output) | ||||
|         self.assertIn('(into) xok', output) | ||||
| @@ -35,7 +47,9 @@ class TestDiffBuilder(unittest.TestCase): | ||||
|         self.assertNotIn('(added) and something new', output) | ||||
|          | ||||
|         #diff_removed | ||||
|         output = diff.render_diff(previous_file=base_dir + "/test-content/before.txt", newest_file=base_dir + "/test-content/after-2.txt", include_equal=False, include_removed=True, include_added=False) | ||||
|         with open(base_dir + "/test-content/after-2.txt", 'r') as f: | ||||
|             newest_version_file_contents = f.read() | ||||
|         output = diff.render_diff(previous_version_file_contents, newest_version_file_contents, include_equal=False, include_removed=True, include_added=False) | ||||
|         output = output.split("\n") | ||||
|         self.assertIn('(removed) for having learned computerese,', output) | ||||
|         self.assertIn('(removed) I continue to examine bits, bytes and words', output) | ||||
|   | ||||
| @@ -69,18 +69,17 @@ class update_worker(threading.Thread): | ||||
|             else: | ||||
|                 line_feed_sep = "\n" | ||||
|  | ||||
|             with open(watch_history[dates[-1]], 'rb') as f: | ||||
|                 snapshot_contents = f.read() | ||||
|             snapshot_contents = watch.get_history_snapshot(dates[-1]) | ||||
|  | ||||
|             n_object.update({ | ||||
|                 'watch_url': watch['url'], | ||||
|                 'uuid': watch_uuid, | ||||
|                 'screenshot': watch.get_screenshot() if watch.get('notification_screenshot') else None, | ||||
|                 'current_snapshot': snapshot_contents.decode('utf-8'), | ||||
|                 'diff': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], line_feed_sep=line_feed_sep), | ||||
|                 'diff_added': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], include_removed=False, line_feed_sep=line_feed_sep), | ||||
|                 'diff_removed': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], include_added=False, line_feed_sep=line_feed_sep), | ||||
|                 'diff_full': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], include_equal=True, line_feed_sep=line_feed_sep) | ||||
|                 'current_snapshot': snapshot_contents, | ||||
|                 'diff': diff.render_diff(watch.get_history_snapshot(dates[-2]), watch.get_history_snapshot(dates[-1]), line_feed_sep=line_feed_sep), | ||||
|                 'diff_added': diff.render_diff(watch.get_history_snapshot(dates[-2]), watch.get_history_snapshot(dates[-1]), include_removed=False, line_feed_sep=line_feed_sep), | ||||
|                 'diff_removed': diff.render_diff(watch.get_history_snapshot(dates[-2]), watch.get_history_snapshot(dates[-1]), include_added=False, line_feed_sep=line_feed_sep), | ||||
|                 'diff_full': diff.render_diff(watch.get_history_snapshot(dates[-2]), watch.get_history_snapshot(dates[-1]), include_equal=True, line_feed_sep=line_feed_sep) | ||||
|             }) | ||||
|             logging.info (">> SENDING NOTIFICATION") | ||||
|             self.notification_q.put(n_object) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user