From d65a2c784d73da6d4bc7d1ace207183e702a1fb8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 10 Nov 2025 11:26:48 +0100 Subject: [PATCH 01/10] Update orjson requirement from ~=3.10 to ~=3.11 (#3617) --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 22890297..49c824a6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -34,7 +34,7 @@ wtforms~=3.2 jsonpath-ng~=1.7.0 # Fast JSON serialization for better performance -orjson~=3.10 +orjson~=3.11 # dnspython - Used by paho-mqtt for MQTT broker resolution # Version pin removed since eventlet (which required the specific 2.6.1 pin) has been eliminated From e2b407c6f38e83a717178bb9aa26aead6c090f85 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Mon, 10 Nov 2025 11:59:17 +0100 Subject: [PATCH 02/10] Windows - JSON DB fixes - Forcing utf-8 for json DB read/writes should solve windows saving/loading problems. (#3615 #3611) --- changedetectionio/api/__init__.py | 2 +- .../content_fetchers/playwright.py | 2 +- changedetectionio/flask_app.py | 4 ++-- changedetectionio/model/App.py | 2 +- changedetectionio/model/Watch.py | 4 ++-- .../processors/text_json_diff/processor.py | 2 +- changedetectionio/store.py | 24 +++++++++++++------ .../tests/test_history_consistency.py | 4 ++-- .../tests/test_jsonpath_jq_selector.py | 11 ++++----- changedetectionio/tests/test_request.py | 4 ++-- 10 files changed, 34 insertions(+), 25 deletions(-) diff --git a/changedetectionio/api/__init__.py b/changedetectionio/api/__init__.py index b8d14e55..d8f9d8fd 100644 --- a/changedetectionio/api/__init__.py +++ b/changedetectionio/api/__init__.py @@ -41,7 +41,7 @@ def get_openapi_spec(): # Possibly for pip3 packages spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml') - with open(spec_path, 'r') as f: + with open(spec_path, 'r', encoding='utf-8') as f: spec_dict = yaml.safe_load(f) _openapi_spec = OpenAPI.from_dict(spec_dict) return _openapi_spec diff --git a/changedetectionio/content_fetchers/playwright.py b/changedetectionio/content_fetchers/playwright.py index 04fc3c0f..01f4c8a7 100644 --- a/changedetectionio/content_fetchers/playwright.py +++ b/changedetectionio/content_fetchers/playwright.py @@ -139,7 +139,7 @@ class fetcher(Fetcher): content = await self.page.content() destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n)) logger.debug(f"Saving step HTML to {destination}") - with open(destination, 'w') as f: + with open(destination, 'w', encoding='utf-8') as f: f.write(content) async def run(self, diff --git a/changedetectionio/flask_app.py b/changedetectionio/flask_app.py index 0a9c006d..c831c71d 100644 --- a/changedetectionio/flask_app.py +++ b/changedetectionio/flask_app.py @@ -101,12 +101,12 @@ def init_app_secret(datastore_path): path = os.path.join(datastore_path, "secret.txt") try: - with open(path, "r") as f: + with open(path, "r", encoding='utf-8') as f: secret = f.read() except FileNotFoundError: import secrets - with open(path, "w") as f: + with open(path, "w", encoding='utf-8') as f: secret = secrets.token_hex(32) f.write(secret) diff --git a/changedetectionio/model/App.py b/changedetectionio/model/App.py index 084e957a..0152aed6 100644 --- a/changedetectionio/model/App.py +++ b/changedetectionio/model/App.py @@ -81,7 +81,7 @@ class model(dict): def parse_headers_from_text_file(filepath): headers = {} - with open(filepath, 'r') as f: + with open(filepath, 'r', encoding='utf-8') as f: for l in f.readlines(): l = l.strip() if not l.startswith('#') and ':' in l: diff --git a/changedetectionio/model/Watch.py b/changedetectionio/model/Watch.py index 49ad8316..cd689054 100644 --- a/changedetectionio/model/Watch.py +++ b/changedetectionio/model/Watch.py @@ -188,7 +188,7 @@ class model(watch_base): fname = os.path.join(self.watch_data_dir, "history.txt") if os.path.isfile(fname): logger.debug(f"Reading watch history index for {self.get('uuid')}") - with open(fname, "r") as f: + with open(fname, "r", encoding='utf-8') as f: for i in f.readlines(): if ',' in i: k, v = i.strip().split(',', 2) @@ -594,7 +594,7 @@ class model(watch_base): """Return the text saved from a previous request that resulted in a non-200 error""" fname = os.path.join(self.watch_data_dir, "last-error.txt") if os.path.isfile(fname): - with open(fname, 'r') as f: + with open(fname, 'r', encoding='utf-8') as f: return f.read() return False diff --git a/changedetectionio/processors/text_json_diff/processor.py b/changedetectionio/processors/text_json_diff/processor.py index e877ef7c..62d749f7 100644 --- a/changedetectionio/processors/text_json_diff/processor.py +++ b/changedetectionio/processors/text_json_diff/processor.py @@ -280,7 +280,7 @@ class ContentProcessor: # Sort JSON to avoid false alerts from reordering try: - content = json.dumps(json.loads(content), sort_keys=True, indent=4) + content = json.dumps(json.loads(content), sort_keys=True, indent=2, ensure_ascii=False) except Exception: # Might be malformed JSON, continue anyway pass diff --git a/changedetectionio/store.py b/changedetectionio/store.py index 0cf6f3ab..5a9b7ea3 100644 --- a/changedetectionio/store.py +++ b/changedetectionio/store.py @@ -78,9 +78,13 @@ class ChangeDetectionStore: self.__data['build_sha'] = f.read() try: - # @todo retest with ", encoding='utf-8'" - with open(self.json_store_path) as json_file: - from_disk = json.load(json_file) + if HAS_ORJSON: + # orjson.loads() expects UTF-8 encoded bytes #3611 + with open(self.json_store_path, 'rb') as json_file: + from_disk = orjson.loads(json_file.read()) + else: + with open(self.json_store_path, encoding='utf-8') as json_file: + from_disk = json.load(json_file) # @todo isnt there a way todo this dict.update recursively? # Problem here is if the one on the disk is missing a sub-struct, it wont be present anymore. @@ -435,12 +439,13 @@ class ChangeDetectionStore: # system was out of memory, out of RAM etc if HAS_ORJSON: # Use orjson for faster serialization + # orjson.dumps() always returns UTF-8 encoded bytes #3611 with open(self.json_store_path+".tmp", 'wb') as json_file: json_file.write(orjson.dumps(data, option=orjson.OPT_INDENT_2)) else: # Fallback to standard json module - with open(self.json_store_path+".tmp", 'w') as json_file: - json.dump(data, json_file, indent=2) + with open(self.json_store_path+".tmp", 'w', encoding='utf-8') as json_file: + json.dump(data, json_file, indent=2, ensure_ascii=False) os.replace(self.json_store_path+".tmp", self.json_store_path) except Exception as e: logger.error(f"Error writing JSON!! (Main JSON file save was skipped) : {str(e)}") @@ -502,8 +507,13 @@ class ChangeDetectionStore: # Load from external config file if path.isfile(proxy_list_file): - with open(os.path.join(self.datastore_path, "proxies.json")) as f: - proxy_list = json.load(f) + if HAS_ORJSON: + # orjson.loads() expects UTF-8 encoded bytes #3611 + with open(os.path.join(self.datastore_path, "proxies.json"), 'rb') as f: + proxy_list = orjson.loads(f.read()) + else: + with open(os.path.join(self.datastore_path, "proxies.json"), encoding='utf-8') as f: + proxy_list = json.load(f) # Mapping from UI config if available extras = self.data['settings']['requests'].get('extra_proxies') diff --git a/changedetectionio/tests/test_history_consistency.py b/changedetectionio/tests/test_history_consistency.py index 16baed39..f88c6d0c 100644 --- a/changedetectionio/tests/test_history_consistency.py +++ b/changedetectionio/tests/test_history_consistency.py @@ -40,7 +40,7 @@ def test_consistent_history(client, live_server, measure_memory_usage, datastore json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json') json_obj = None - with open(json_db_file, 'r') as f: + with open(json_db_file, 'r', encoding='utf-8') as f: json_obj = json.load(f) # assert the right amount of watches was found in the JSON @@ -76,7 +76,7 @@ def test_consistent_history(client, live_server, measure_memory_usage, datastore assert len(files_in_watch_dir) == 3, "Should be just three files in the dir, html.br snapshot, history.txt and the extracted text snapshot" json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json') - with open(json_db_file, 'r') as f: + with open(json_db_file, 'r', encoding='utf-8') as f: assert '"default"' not in f.read(), "'default' probably shouldnt be here, it came from when the 'default' Watch vars were accidently being saved" diff --git a/changedetectionio/tests/test_jsonpath_jq_selector.py b/changedetectionio/tests/test_jsonpath_jq_selector.py index ec90de03..f3e4006e 100644 --- a/changedetectionio/tests/test_jsonpath_jq_selector.py +++ b/changedetectionio/tests/test_jsonpath_jq_selector.py @@ -442,13 +442,12 @@ def test_correct_header_detect(client, live_server, measure_memory_usage, datast snapshot_contents = watch.get_history_snapshot(timestamp=dates[0]) assert b'"hello": 123,' in res.data # properly html escaped in the front end - + import json + data = json.loads(snapshot_contents) + keys = list(data.keys()) # Should be correctly formatted and sorted, ("world" goes to end) - assert snapshot_contents == """{ - "hello": 123, - "world": 123 -}""" - + assert keys == ["hello", "world"] + delete_all_watches(client) def test_check_jsonpath_ext_filter(client, live_server, measure_memory_usage, datastore_path): diff --git a/changedetectionio/tests/test_request.py b/changedetectionio/tests/test_request.py index 5e080405..66a4b605 100644 --- a/changedetectionio/tests/test_request.py +++ b/changedetectionio/tests/test_request.py @@ -142,7 +142,7 @@ def test_body_in_request(client, live_server, measure_memory_usage, datastore_pa client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) watches_with_body = 0 - with open(os.path.join(datastore_path, 'url-watches.json')) as f: + with open(os.path.join(datastore_path, 'url-watches.json'), encoding='utf-8') as f: app_struct = json.load(f) for uuid in app_struct['watching']: if app_struct['watching'][uuid]['body']==body_value: @@ -225,7 +225,7 @@ def test_method_in_request(client, live_server, measure_memory_usage, datastore_ wait_for_all_checks(client) watches_with_method = 0 - with open(os.path.join(datastore_path, 'url-watches.json')) as f: + with open(os.path.join(datastore_path, 'url-watches.json'), encoding='utf-8') as f: app_struct = json.load(f) for uuid in app_struct['watching']: if app_struct['watching'][uuid]['method'] == 'PATCH': From c58a97f69dda957a4b8a9e30cc0c003dfa5f28e3 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Mon, 10 Nov 2025 11:59:41 +0100 Subject: [PATCH 03/10] 0.50.41 --- changedetectionio/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changedetectionio/__init__.py b/changedetectionio/__init__.py index a7c68d0d..89d02571 100644 --- a/changedetectionio/__init__.py +++ b/changedetectionio/__init__.py @@ -2,7 +2,7 @@ # Read more https://github.com/dgtlmoon/changedetection.io/wiki -__version__ = '0.50.40' +__version__ = '0.50.41' from changedetectionio.strtobool import strtobool from json.decoder import JSONDecodeError From caffd804fe5e4552053d8f8ab023eed09316b870 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Mon, 10 Nov 2025 13:31:51 +0100 Subject: [PATCH 04/10] Revert "Windows - JSON DB fixes - Forcing utf-8 for json DB read/writes should solve windows saving/loading problems. (#3615 #3611)" This reverts commit e2b407c6f38e83a717178bb9aa26aead6c090f85. --- changedetectionio/api/__init__.py | 2 +- .../content_fetchers/playwright.py | 2 +- changedetectionio/flask_app.py | 4 ++-- changedetectionio/model/App.py | 2 +- changedetectionio/model/Watch.py | 4 ++-- .../processors/text_json_diff/processor.py | 2 +- changedetectionio/store.py | 24 ++++++------------- .../tests/test_history_consistency.py | 4 ++-- .../tests/test_jsonpath_jq_selector.py | 11 +++++---- changedetectionio/tests/test_request.py | 4 ++-- 10 files changed, 25 insertions(+), 34 deletions(-) diff --git a/changedetectionio/api/__init__.py b/changedetectionio/api/__init__.py index d8f9d8fd..b8d14e55 100644 --- a/changedetectionio/api/__init__.py +++ b/changedetectionio/api/__init__.py @@ -41,7 +41,7 @@ def get_openapi_spec(): # Possibly for pip3 packages spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml') - with open(spec_path, 'r', encoding='utf-8') as f: + with open(spec_path, 'r') as f: spec_dict = yaml.safe_load(f) _openapi_spec = OpenAPI.from_dict(spec_dict) return _openapi_spec diff --git a/changedetectionio/content_fetchers/playwright.py b/changedetectionio/content_fetchers/playwright.py index 01f4c8a7..04fc3c0f 100644 --- a/changedetectionio/content_fetchers/playwright.py +++ b/changedetectionio/content_fetchers/playwright.py @@ -139,7 +139,7 @@ class fetcher(Fetcher): content = await self.page.content() destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n)) logger.debug(f"Saving step HTML to {destination}") - with open(destination, 'w', encoding='utf-8') as f: + with open(destination, 'w') as f: f.write(content) async def run(self, diff --git a/changedetectionio/flask_app.py b/changedetectionio/flask_app.py index c831c71d..0a9c006d 100644 --- a/changedetectionio/flask_app.py +++ b/changedetectionio/flask_app.py @@ -101,12 +101,12 @@ def init_app_secret(datastore_path): path = os.path.join(datastore_path, "secret.txt") try: - with open(path, "r", encoding='utf-8') as f: + with open(path, "r") as f: secret = f.read() except FileNotFoundError: import secrets - with open(path, "w", encoding='utf-8') as f: + with open(path, "w") as f: secret = secrets.token_hex(32) f.write(secret) diff --git a/changedetectionio/model/App.py b/changedetectionio/model/App.py index 0152aed6..084e957a 100644 --- a/changedetectionio/model/App.py +++ b/changedetectionio/model/App.py @@ -81,7 +81,7 @@ class model(dict): def parse_headers_from_text_file(filepath): headers = {} - with open(filepath, 'r', encoding='utf-8') as f: + with open(filepath, 'r') as f: for l in f.readlines(): l = l.strip() if not l.startswith('#') and ':' in l: diff --git a/changedetectionio/model/Watch.py b/changedetectionio/model/Watch.py index cd689054..49ad8316 100644 --- a/changedetectionio/model/Watch.py +++ b/changedetectionio/model/Watch.py @@ -188,7 +188,7 @@ class model(watch_base): fname = os.path.join(self.watch_data_dir, "history.txt") if os.path.isfile(fname): logger.debug(f"Reading watch history index for {self.get('uuid')}") - with open(fname, "r", encoding='utf-8') as f: + with open(fname, "r") as f: for i in f.readlines(): if ',' in i: k, v = i.strip().split(',', 2) @@ -594,7 +594,7 @@ class model(watch_base): """Return the text saved from a previous request that resulted in a non-200 error""" fname = os.path.join(self.watch_data_dir, "last-error.txt") if os.path.isfile(fname): - with open(fname, 'r', encoding='utf-8') as f: + with open(fname, 'r') as f: return f.read() return False diff --git a/changedetectionio/processors/text_json_diff/processor.py b/changedetectionio/processors/text_json_diff/processor.py index 62d749f7..e877ef7c 100644 --- a/changedetectionio/processors/text_json_diff/processor.py +++ b/changedetectionio/processors/text_json_diff/processor.py @@ -280,7 +280,7 @@ class ContentProcessor: # Sort JSON to avoid false alerts from reordering try: - content = json.dumps(json.loads(content), sort_keys=True, indent=2, ensure_ascii=False) + content = json.dumps(json.loads(content), sort_keys=True, indent=4) except Exception: # Might be malformed JSON, continue anyway pass diff --git a/changedetectionio/store.py b/changedetectionio/store.py index 5a9b7ea3..0cf6f3ab 100644 --- a/changedetectionio/store.py +++ b/changedetectionio/store.py @@ -78,13 +78,9 @@ class ChangeDetectionStore: self.__data['build_sha'] = f.read() try: - if HAS_ORJSON: - # orjson.loads() expects UTF-8 encoded bytes #3611 - with open(self.json_store_path, 'rb') as json_file: - from_disk = orjson.loads(json_file.read()) - else: - with open(self.json_store_path, encoding='utf-8') as json_file: - from_disk = json.load(json_file) + # @todo retest with ", encoding='utf-8'" + with open(self.json_store_path) as json_file: + from_disk = json.load(json_file) # @todo isnt there a way todo this dict.update recursively? # Problem here is if the one on the disk is missing a sub-struct, it wont be present anymore. @@ -439,13 +435,12 @@ class ChangeDetectionStore: # system was out of memory, out of RAM etc if HAS_ORJSON: # Use orjson for faster serialization - # orjson.dumps() always returns UTF-8 encoded bytes #3611 with open(self.json_store_path+".tmp", 'wb') as json_file: json_file.write(orjson.dumps(data, option=orjson.OPT_INDENT_2)) else: # Fallback to standard json module - with open(self.json_store_path+".tmp", 'w', encoding='utf-8') as json_file: - json.dump(data, json_file, indent=2, ensure_ascii=False) + with open(self.json_store_path+".tmp", 'w') as json_file: + json.dump(data, json_file, indent=2) os.replace(self.json_store_path+".tmp", self.json_store_path) except Exception as e: logger.error(f"Error writing JSON!! (Main JSON file save was skipped) : {str(e)}") @@ -507,13 +502,8 @@ class ChangeDetectionStore: # Load from external config file if path.isfile(proxy_list_file): - if HAS_ORJSON: - # orjson.loads() expects UTF-8 encoded bytes #3611 - with open(os.path.join(self.datastore_path, "proxies.json"), 'rb') as f: - proxy_list = orjson.loads(f.read()) - else: - with open(os.path.join(self.datastore_path, "proxies.json"), encoding='utf-8') as f: - proxy_list = json.load(f) + with open(os.path.join(self.datastore_path, "proxies.json")) as f: + proxy_list = json.load(f) # Mapping from UI config if available extras = self.data['settings']['requests'].get('extra_proxies') diff --git a/changedetectionio/tests/test_history_consistency.py b/changedetectionio/tests/test_history_consistency.py index f88c6d0c..16baed39 100644 --- a/changedetectionio/tests/test_history_consistency.py +++ b/changedetectionio/tests/test_history_consistency.py @@ -40,7 +40,7 @@ def test_consistent_history(client, live_server, measure_memory_usage, datastore json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json') json_obj = None - with open(json_db_file, 'r', encoding='utf-8') as f: + with open(json_db_file, 'r') as f: json_obj = json.load(f) # assert the right amount of watches was found in the JSON @@ -76,7 +76,7 @@ def test_consistent_history(client, live_server, measure_memory_usage, datastore assert len(files_in_watch_dir) == 3, "Should be just three files in the dir, html.br snapshot, history.txt and the extracted text snapshot" json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json') - with open(json_db_file, 'r', encoding='utf-8') as f: + with open(json_db_file, 'r') as f: assert '"default"' not in f.read(), "'default' probably shouldnt be here, it came from when the 'default' Watch vars were accidently being saved" diff --git a/changedetectionio/tests/test_jsonpath_jq_selector.py b/changedetectionio/tests/test_jsonpath_jq_selector.py index f3e4006e..ec90de03 100644 --- a/changedetectionio/tests/test_jsonpath_jq_selector.py +++ b/changedetectionio/tests/test_jsonpath_jq_selector.py @@ -442,12 +442,13 @@ def test_correct_header_detect(client, live_server, measure_memory_usage, datast snapshot_contents = watch.get_history_snapshot(timestamp=dates[0]) assert b'"hello": 123,' in res.data # properly html escaped in the front end - import json - data = json.loads(snapshot_contents) - keys = list(data.keys()) + # Should be correctly formatted and sorted, ("world" goes to end) - assert keys == ["hello", "world"] - + assert snapshot_contents == """{ + "hello": 123, + "world": 123 +}""" + delete_all_watches(client) def test_check_jsonpath_ext_filter(client, live_server, measure_memory_usage, datastore_path): diff --git a/changedetectionio/tests/test_request.py b/changedetectionio/tests/test_request.py index 66a4b605..5e080405 100644 --- a/changedetectionio/tests/test_request.py +++ b/changedetectionio/tests/test_request.py @@ -142,7 +142,7 @@ def test_body_in_request(client, live_server, measure_memory_usage, datastore_pa client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) watches_with_body = 0 - with open(os.path.join(datastore_path, 'url-watches.json'), encoding='utf-8') as f: + with open(os.path.join(datastore_path, 'url-watches.json')) as f: app_struct = json.load(f) for uuid in app_struct['watching']: if app_struct['watching'][uuid]['body']==body_value: @@ -225,7 +225,7 @@ def test_method_in_request(client, live_server, measure_memory_usage, datastore_ wait_for_all_checks(client) watches_with_method = 0 - with open(os.path.join(datastore_path, 'url-watches.json'), encoding='utf-8') as f: + with open(os.path.join(datastore_path, 'url-watches.json')) as f: app_struct = json.load(f) for uuid in app_struct['watching']: if app_struct['watching'][uuid]['method'] == 'PATCH': From 2ae29ab78fc55aa929fa815f439a8d3d764bccac Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Mon, 10 Nov 2025 13:32:08 +0100 Subject: [PATCH 05/10] 0.50.42 --- changedetectionio/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changedetectionio/__init__.py b/changedetectionio/__init__.py index 89d02571..fb57148b 100644 --- a/changedetectionio/__init__.py +++ b/changedetectionio/__init__.py @@ -2,7 +2,7 @@ # Read more https://github.com/dgtlmoon/changedetection.io/wiki -__version__ = '0.50.41' +__version__ = '0.50.42' from changedetectionio.strtobool import strtobool from json.decoder import JSONDecodeError From 3d3b53831eb5a3146fe102909e47c96c6e088fb3 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Wed, 12 Nov 2025 12:19:16 +0100 Subject: [PATCH 06/10] Adding data sanity checks across restarts (#3629) --- changedetectionio/__init__.py | 21 ++++++++--- changedetectionio/run_basic_tests.sh | 53 ++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 5 deletions(-) diff --git a/changedetectionio/__init__.py b/changedetectionio/__init__.py index fb57148b..11ac05bd 100644 --- a/changedetectionio/__init__.py +++ b/changedetectionio/__init__.py @@ -74,6 +74,12 @@ def main(): datastore_path = None do_cleanup = False + # Optional URL to watch since start + default_url = None + # Set a default logger level + logger_level = 'DEBUG' + include_default_watches = True + host = os.environ.get("LISTEN_HOST", "0.0.0.0").strip() port = int(os.environ.get('PORT', 5000)) ssl_mode = False @@ -87,15 +93,13 @@ def main(): datastore_path = os.path.join(os.getcwd(), "../datastore") try: - opts, args = getopt.getopt(sys.argv[1:], "6Ccsd:h:p:l:", "port") + opts, args = getopt.getopt(sys.argv[1:], "6Ccsd:h:p:l:u:", "port") except getopt.GetoptError: - print('backend.py -s SSL enable -h [host] -p [port] -d [datastore path] -l [debug level - TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL]') + print('backend.py -s SSL enable -h [host] -p [port] -d [datastore path] -u [default URL to watch] -l [debug level - TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL]') sys.exit(2) create_datastore_dir = False - # Set a default logger level - logger_level = 'DEBUG' # Set a logger level via shell env variable # Used: Dockerfile for CICD # To set logger level for pytest, see the app function in tests/conftest.py @@ -116,6 +120,10 @@ def main(): if opt == '-d': datastore_path = arg + if opt == '-u': + default_url = arg + include_default_watches = False + # Cleanup (remove text files that arent in the index) if opt == '-c': do_cleanup = True @@ -172,13 +180,16 @@ def main(): sys.exit(2) try: - datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], version_tag=__version__) + datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], version_tag=__version__, include_default_watches=include_default_watches) except JSONDecodeError as e: # Dont' start if the JSON DB looks corrupt logger.critical(f"ERROR: JSON DB or Proxy List JSON at '{app_config['datastore_path']}' appears to be corrupt, aborting.") logger.critical(str(e)) return + if default_url: + datastore.add_watch(url = default_url) + app = changedetection_app(app_config, datastore) # Get the SocketIO instance from the Flask app (created in flask_app.py) diff --git a/changedetectionio/run_basic_tests.sh b/changedetectionio/run_basic_tests.sh index 420f1f60..42437d0a 100755 --- a/changedetectionio/run_basic_tests.sh +++ b/changedetectionio/run_basic_tests.sh @@ -11,6 +11,56 @@ set -e SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +# Since theres no curl installed lets roll with python3 +check_sanity() { + local port="$1" + if [ -z "$port" ]; then + echo "Usage: check_sanity " >&2 + return 1 + fi + + python3 - "$port" <<'PYCODE' +import sys, time, urllib.request, socket + +port = sys.argv[1] +url = f'http://localhost:{port}' +ok = False + +for _ in range(6): # --retry 6 + try: + r = urllib.request.urlopen(url, timeout=3).read().decode() + if 'est-url-is-sanity' in r: + ok = True + break + except (urllib.error.URLError, ConnectionRefusedError, socket.error): + time.sleep(1) +sys.exit(0 if ok else 1) +PYCODE +} + +data_sanity_test () { + # Restart data sanity test + cd .. + TMPDIR=$(mktemp -d) + PORT_N=$((5000 + RANDOM % (6501 - 5000))) + ./changedetection.py -p $PORT_N -d $TMPDIR -u "https://localhost?test-url-is-sanity=1" & + PID=$! + sleep 5 + kill $PID + sleep 2 + ./changedetection.py -p $PORT_N -d $TMPDIR & + PID=$! + sleep 5 + # On a restart the URL should still be there + check_sanity $PORT_N || exit 1 + kill $PID + cd $OLDPWD + + # datastore looks alright, continue +} + +data_sanity_test + # REMOVE_REQUESTS_OLD_SCREENSHOTS disabled so that we can write a screenshot and send it in test_notifications.py without a real browser REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest -n 30 --dist load tests/test_*.py @@ -41,3 +91,6 @@ FETCH_WORKERS=130 pytest tests/test_history_consistency.py -v -l # Check file:// will pickup a file when enabled echo "Hello world" > /tmp/test-file.txt ALLOW_FILE_URI=yes pytest tests/test_security.py + + + From b9305faf2103f1a7fb2d8577254a83ccc53dc728 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Wed, 12 Nov 2025 12:58:59 +0100 Subject: [PATCH 07/10] Forcing UTF-8 when reading JSON DB (Fixes data not loaded for some platforms #3622 #3611 #3628), Always create new versions of the backup DB if one exists for that step when running updates, Adding extra sanity checks on DB load --- changedetectionio/api/__init__.py | 2 +- .../content_fetchers/playwright.py | 2 +- changedetectionio/flask_app.py | 4 +- changedetectionio/model/App.py | 2 +- changedetectionio/model/Watch.py | 4 +- .../processors/text_json_diff/processor.py | 2 +- changedetectionio/store.py | 82 ++++++++++++------- .../tests/test_history_consistency.py | 4 +- .../tests/test_jsonpath_jq_selector.py | 11 ++- changedetectionio/tests/test_request.py | 4 +- 10 files changed, 70 insertions(+), 47 deletions(-) diff --git a/changedetectionio/api/__init__.py b/changedetectionio/api/__init__.py index b8d14e55..d8f9d8fd 100644 --- a/changedetectionio/api/__init__.py +++ b/changedetectionio/api/__init__.py @@ -41,7 +41,7 @@ def get_openapi_spec(): # Possibly for pip3 packages spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml') - with open(spec_path, 'r') as f: + with open(spec_path, 'r', encoding='utf-8') as f: spec_dict = yaml.safe_load(f) _openapi_spec = OpenAPI.from_dict(spec_dict) return _openapi_spec diff --git a/changedetectionio/content_fetchers/playwright.py b/changedetectionio/content_fetchers/playwright.py index 04fc3c0f..01f4c8a7 100644 --- a/changedetectionio/content_fetchers/playwright.py +++ b/changedetectionio/content_fetchers/playwright.py @@ -139,7 +139,7 @@ class fetcher(Fetcher): content = await self.page.content() destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n)) logger.debug(f"Saving step HTML to {destination}") - with open(destination, 'w') as f: + with open(destination, 'w', encoding='utf-8') as f: f.write(content) async def run(self, diff --git a/changedetectionio/flask_app.py b/changedetectionio/flask_app.py index 0a9c006d..c831c71d 100644 --- a/changedetectionio/flask_app.py +++ b/changedetectionio/flask_app.py @@ -101,12 +101,12 @@ def init_app_secret(datastore_path): path = os.path.join(datastore_path, "secret.txt") try: - with open(path, "r") as f: + with open(path, "r", encoding='utf-8') as f: secret = f.read() except FileNotFoundError: import secrets - with open(path, "w") as f: + with open(path, "w", encoding='utf-8') as f: secret = secrets.token_hex(32) f.write(secret) diff --git a/changedetectionio/model/App.py b/changedetectionio/model/App.py index 084e957a..0152aed6 100644 --- a/changedetectionio/model/App.py +++ b/changedetectionio/model/App.py @@ -81,7 +81,7 @@ class model(dict): def parse_headers_from_text_file(filepath): headers = {} - with open(filepath, 'r') as f: + with open(filepath, 'r', encoding='utf-8') as f: for l in f.readlines(): l = l.strip() if not l.startswith('#') and ':' in l: diff --git a/changedetectionio/model/Watch.py b/changedetectionio/model/Watch.py index 49ad8316..cd689054 100644 --- a/changedetectionio/model/Watch.py +++ b/changedetectionio/model/Watch.py @@ -188,7 +188,7 @@ class model(watch_base): fname = os.path.join(self.watch_data_dir, "history.txt") if os.path.isfile(fname): logger.debug(f"Reading watch history index for {self.get('uuid')}") - with open(fname, "r") as f: + with open(fname, "r", encoding='utf-8') as f: for i in f.readlines(): if ',' in i: k, v = i.strip().split(',', 2) @@ -594,7 +594,7 @@ class model(watch_base): """Return the text saved from a previous request that resulted in a non-200 error""" fname = os.path.join(self.watch_data_dir, "last-error.txt") if os.path.isfile(fname): - with open(fname, 'r') as f: + with open(fname, 'r', encoding='utf-8') as f: return f.read() return False diff --git a/changedetectionio/processors/text_json_diff/processor.py b/changedetectionio/processors/text_json_diff/processor.py index e877ef7c..62d749f7 100644 --- a/changedetectionio/processors/text_json_diff/processor.py +++ b/changedetectionio/processors/text_json_diff/processor.py @@ -280,7 +280,7 @@ class ContentProcessor: # Sort JSON to avoid false alerts from reordering try: - content = json.dumps(json.loads(content), sort_keys=True, indent=4) + content = json.dumps(json.loads(content), sort_keys=True, indent=2, ensure_ascii=False) except Exception: # Might be malformed JSON, continue anyway pass diff --git a/changedetectionio/store.py b/changedetectionio/store.py index 0cf6f3ab..567a4d89 100644 --- a/changedetectionio/store.py +++ b/changedetectionio/store.py @@ -78,37 +78,46 @@ class ChangeDetectionStore: self.__data['build_sha'] = f.read() try: - # @todo retest with ", encoding='utf-8'" - with open(self.json_store_path) as json_file: - from_disk = json.load(json_file) + if HAS_ORJSON: + # orjson.loads() expects UTF-8 encoded bytes #3611 + with open(self.json_store_path, 'rb') as json_file: + from_disk = orjson.loads(json_file.read()) + else: + with open(self.json_store_path, encoding='utf-8') as json_file: + from_disk = json.load(json_file) - # @todo isnt there a way todo this dict.update recursively? - # Problem here is if the one on the disk is missing a sub-struct, it wont be present anymore. - if 'watching' in from_disk: - self.__data['watching'].update(from_disk['watching']) + if not from_disk: + # No FileNotFound exception was thrown but somehow the JSON was empty - abort for safety. + logger.critical(f"JSON DB existed but was empty on load - empty JSON file? '{self.json_store_path}' Aborting") + raise Exception('JSON DB existed but was empty on load - Aborting') - if 'app_guid' in from_disk: - self.__data['app_guid'] = from_disk['app_guid'] + # @todo isnt there a way todo this dict.update recursively? + # Problem here is if the one on the disk is missing a sub-struct, it wont be present anymore. + if 'watching' in from_disk: + self.__data['watching'].update(from_disk['watching']) - if 'settings' in from_disk: - if 'headers' in from_disk['settings']: - self.__data['settings']['headers'].update(from_disk['settings']['headers']) + if 'app_guid' in from_disk: + self.__data['app_guid'] = from_disk['app_guid'] - if 'requests' in from_disk['settings']: - self.__data['settings']['requests'].update(from_disk['settings']['requests']) + if 'settings' in from_disk: + if 'headers' in from_disk['settings']: + self.__data['settings']['headers'].update(from_disk['settings']['headers']) - if 'application' in from_disk['settings']: - self.__data['settings']['application'].update(from_disk['settings']['application']) + if 'requests' in from_disk['settings']: + self.__data['settings']['requests'].update(from_disk['settings']['requests']) - # Convert each existing watch back to the Watch.model object - for uuid, watch in self.__data['watching'].items(): - self.__data['watching'][uuid] = self.rehydrate_entity(uuid, watch) - logger.info(f"Watching: {uuid} {watch['url']}") + if 'application' in from_disk['settings']: + self.__data['settings']['application'].update(from_disk['settings']['application']) - # And for Tags also, should be Restock type because it has extra settings - for uuid, tag in self.__data['settings']['application']['tags'].items(): - self.__data['settings']['application']['tags'][uuid] = self.rehydrate_entity(uuid, tag, processor_override='restock_diff') - logger.info(f"Tag: {uuid} {tag['title']}") + # Convert each existing watch back to the Watch.model object + for uuid, watch in self.__data['watching'].items(): + self.__data['watching'][uuid] = self.rehydrate_entity(uuid, watch) + logger.info(f"Watching: {uuid} {watch['url']}") + + # And for Tags also, should be Restock type because it has extra settings + for uuid, tag in self.__data['settings']['application']['tags'].items(): + self.__data['settings']['application']['tags'][uuid] = self.rehydrate_entity(uuid, tag, processor_override='restock_diff') + logger.info(f"Tag: {uuid} {tag['title']}") # First time ran, Create the datastore. except (FileNotFoundError): @@ -435,12 +444,13 @@ class ChangeDetectionStore: # system was out of memory, out of RAM etc if HAS_ORJSON: # Use orjson for faster serialization + # orjson.dumps() always returns UTF-8 encoded bytes #3611 with open(self.json_store_path+".tmp", 'wb') as json_file: json_file.write(orjson.dumps(data, option=orjson.OPT_INDENT_2)) else: # Fallback to standard json module - with open(self.json_store_path+".tmp", 'w') as json_file: - json.dump(data, json_file, indent=2) + with open(self.json_store_path+".tmp", 'w', encoding='utf-8') as json_file: + json.dump(data, json_file, indent=2, ensure_ascii=False) os.replace(self.json_store_path+".tmp", self.json_store_path) except Exception as e: logger.error(f"Error writing JSON!! (Main JSON file save was skipped) : {str(e)}") @@ -502,8 +512,13 @@ class ChangeDetectionStore: # Load from external config file if path.isfile(proxy_list_file): - with open(os.path.join(self.datastore_path, "proxies.json")) as f: - proxy_list = json.load(f) + if HAS_ORJSON: + # orjson.loads() expects UTF-8 encoded bytes #3611 + with open(os.path.join(self.datastore_path, "proxies.json"), 'rb') as f: + proxy_list = orjson.loads(f.read()) + else: + with open(os.path.join(self.datastore_path, "proxies.json"), encoding='utf-8') as f: + proxy_list = json.load(f) # Mapping from UI config if available extras = self.data['settings']['requests'].get('extra_proxies') @@ -760,7 +775,16 @@ class ChangeDetectionStore: logger.critical(f"Applying update_{update_n}") # Wont exist on fresh installs if os.path.exists(self.json_store_path): - shutil.copyfile(self.json_store_path, os.path.join(self.datastore_path, f"url-watches-before-{update_n}.json")) + i = 0 + while True: + i+=1 + dest = os.path.join(self.datastore_path, f"url-watches-before-{update_n}-{i}.json") + if not os.path.exists(dest): + logger.debug(f"Copying url-watches.json DB to '{dest}' backup.") + shutil.copyfile(self.json_store_path, dest) + break + else: + logger.warning(f"Backup of url-watches.json '{dest}', DB already exists, trying {i+1}.. ") try: update_method = getattr(self, f"update_{update_n}")() diff --git a/changedetectionio/tests/test_history_consistency.py b/changedetectionio/tests/test_history_consistency.py index 16baed39..f88c6d0c 100644 --- a/changedetectionio/tests/test_history_consistency.py +++ b/changedetectionio/tests/test_history_consistency.py @@ -40,7 +40,7 @@ def test_consistent_history(client, live_server, measure_memory_usage, datastore json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json') json_obj = None - with open(json_db_file, 'r') as f: + with open(json_db_file, 'r', encoding='utf-8') as f: json_obj = json.load(f) # assert the right amount of watches was found in the JSON @@ -76,7 +76,7 @@ def test_consistent_history(client, live_server, measure_memory_usage, datastore assert len(files_in_watch_dir) == 3, "Should be just three files in the dir, html.br snapshot, history.txt and the extracted text snapshot" json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json') - with open(json_db_file, 'r') as f: + with open(json_db_file, 'r', encoding='utf-8') as f: assert '"default"' not in f.read(), "'default' probably shouldnt be here, it came from when the 'default' Watch vars were accidently being saved" diff --git a/changedetectionio/tests/test_jsonpath_jq_selector.py b/changedetectionio/tests/test_jsonpath_jq_selector.py index ec90de03..f3e4006e 100644 --- a/changedetectionio/tests/test_jsonpath_jq_selector.py +++ b/changedetectionio/tests/test_jsonpath_jq_selector.py @@ -442,13 +442,12 @@ def test_correct_header_detect(client, live_server, measure_memory_usage, datast snapshot_contents = watch.get_history_snapshot(timestamp=dates[0]) assert b'"hello": 123,' in res.data # properly html escaped in the front end - + import json + data = json.loads(snapshot_contents) + keys = list(data.keys()) # Should be correctly formatted and sorted, ("world" goes to end) - assert snapshot_contents == """{ - "hello": 123, - "world": 123 -}""" - + assert keys == ["hello", "world"] + delete_all_watches(client) def test_check_jsonpath_ext_filter(client, live_server, measure_memory_usage, datastore_path): diff --git a/changedetectionio/tests/test_request.py b/changedetectionio/tests/test_request.py index 5e080405..66a4b605 100644 --- a/changedetectionio/tests/test_request.py +++ b/changedetectionio/tests/test_request.py @@ -142,7 +142,7 @@ def test_body_in_request(client, live_server, measure_memory_usage, datastore_pa client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) watches_with_body = 0 - with open(os.path.join(datastore_path, 'url-watches.json')) as f: + with open(os.path.join(datastore_path, 'url-watches.json'), encoding='utf-8') as f: app_struct = json.load(f) for uuid in app_struct['watching']: if app_struct['watching'][uuid]['body']==body_value: @@ -225,7 +225,7 @@ def test_method_in_request(client, live_server, measure_memory_usage, datastore_ wait_for_all_checks(client) watches_with_method = 0 - with open(os.path.join(datastore_path, 'url-watches.json')) as f: + with open(os.path.join(datastore_path, 'url-watches.json'), encoding='utf-8') as f: app_struct = json.load(f) for uuid in app_struct['watching']: if app_struct['watching'][uuid]['method'] == 'PATCH': From 4ba5fcce8fb47f0af7b7ecab381cc4a4d38a3ffc Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Wed, 12 Nov 2025 13:00:42 +0100 Subject: [PATCH 08/10] 0.50.43 --- changedetectionio/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changedetectionio/__init__.py b/changedetectionio/__init__.py index 11ac05bd..13c42252 100644 --- a/changedetectionio/__init__.py +++ b/changedetectionio/__init__.py @@ -2,7 +2,7 @@ # Read more https://github.com/dgtlmoon/changedetection.io/wiki -__version__ = '0.50.42' +__version__ = '0.50.43' from changedetectionio.strtobool import strtobool from json.decoder import JSONDecodeError From 805cd618d4a77180fe17dd91e848c9696d9c795b Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Wed, 12 Nov 2025 17:37:12 +0100 Subject: [PATCH 09/10] Always backup JSON DB on new versions as well as the existing between updates. --- changedetectionio/store.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/changedetectionio/store.py b/changedetectionio/store.py index 567a4d89..a05ec623 100644 --- a/changedetectionio/store.py +++ b/changedetectionio/store.py @@ -44,7 +44,7 @@ class ChangeDetectionStore: lock = Lock() # For general updates/writes that can wait a few seconds needs_write = False - + datastore_path = None # For when we edit, we should write to disk needs_write_urgent = False @@ -54,18 +54,30 @@ class ChangeDetectionStore: def __init__(self, datastore_path="/datastore", include_default_watches=True, version_tag="0.0.0"): # Should only be active for docker # logging.basicConfig(filename='/dev/stdout', level=logging.INFO) - + self.datastore_path = datastore_path self.needs_write = False self.start_time = time.time() self.stop_thread = False + self.save_version_copy_json_db(version_tag) self.reload_state(datastore_path=datastore_path, include_default_watches=include_default_watches, version_tag=version_tag) + def save_version_copy_json_db(self, version_tag): + import re + + version_text = re.sub(r'\D+', '-', version_tag) + db_path = os.path.join(self.datastore_path, "url-watches.json") + db_path_version_backup = os.path.join(self.datastore_path, f"url-watches-{version_text}.json") + + if not os.path.isfile(db_path_version_backup) and os.path.isfile(db_path): + from shutil import copyfile + logger.info(f"Backing up JSON DB due to new version to '{db_path_version_backup}'.") + copyfile(db_path, db_path_version_backup) + def reload_state(self, datastore_path, include_default_watches, version_tag): logger.info(f"Datastore path is '{datastore_path}'") self.__data = App.model() - self.datastore_path = datastore_path self.json_store_path = os.path.join(self.datastore_path, "url-watches.json") # Base definition for all watchers # deepcopy part of #569 - not sure why its needed exactly From 8dc39d4a3d5de0f884f9c0d16e0158dc4b2f0378 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Wed, 12 Nov 2025 17:38:18 +0100 Subject: [PATCH 10/10] RSS feeds for a single watches! --- changedetectionio/blueprint/rss/__init__.py | 18 ++- changedetectionio/blueprint/rss/blueprint.py | 139 +++++++++++++----- .../settings/templates/settings.html | 2 +- changedetectionio/blueprint/ui/edit.py | 7 +- .../blueprint/ui/templates/edit.html | 1 + changedetectionio/forms.py | 2 +- changedetectionio/model/App.py | 4 +- changedetectionio/notification/handler.py | 2 + changedetectionio/store.py | 55 ++++--- changedetectionio/templates/base.html | 9 +- changedetectionio/tests/test_backend.py | 3 +- changedetectionio/tests/test_rss.py | 73 +++++++++ 12 files changed, 249 insertions(+), 66 deletions(-) diff --git a/changedetectionio/blueprint/rss/__init__.py b/changedetectionio/blueprint/rss/__init__.py index d4e09194..adecd339 100644 --- a/changedetectionio/blueprint/rss/__init__.py +++ b/changedetectionio/blueprint/rss/__init__.py @@ -1 +1,17 @@ -RSS_FORMAT_TYPES = [('plaintext', 'Plain text'), ('html', 'HTML Color')] +from copy import deepcopy +from loguru import logger + +from changedetectionio.model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH +from changedetectionio.notification import valid_notification_formats +RSS_CONTENT_FORMAT_DEFAULT = 'text' + +# Some stuff not related +RSS_FORMAT_TYPES = deepcopy(valid_notification_formats) +if RSS_FORMAT_TYPES.get('markdown'): + del RSS_FORMAT_TYPES['markdown'] + +if RSS_FORMAT_TYPES.get(USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH): + del RSS_FORMAT_TYPES[USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH] + +if not RSS_FORMAT_TYPES.get(RSS_CONTENT_FORMAT_DEFAULT): + logger.critical(f"RSS_CONTENT_FORMAT_DEFAULT not in the acceptable list {RSS_CONTENT_FORMAT_DEFAULT}") diff --git a/changedetectionio/blueprint/rss/blueprint.py b/changedetectionio/blueprint/rss/blueprint.py index 602aa3db..7a27dcc7 100644 --- a/changedetectionio/blueprint/rss/blueprint.py +++ b/changedetectionio/blueprint/rss/blueprint.py @@ -37,6 +37,51 @@ def clean_entry_content(content): def construct_blueprint(datastore: ChangeDetectionStore): rss_blueprint = Blueprint('rss', __name__) + # Helper function to generate GUID for RSS entries + def generate_watch_guid(watch): + """Generate a unique GUID for a watch RSS entry.""" + return f"{watch['uuid']}/{watch.last_changed}" + + # Helper function to generate diff content for a watch + def generate_watch_diff_content(watch, dates, rss_content_format): + """ + Generate HTML diff content for a watch given its history dates. + Returns the rendered HTML content ready for RSS/display. + """ + from changedetectionio import diff + + # Same logic as watch-overview.html + if datastore.data['settings']['application']['ui'].get('use_page_title_in_list') or watch.get('use_page_title_in_list'): + watch_label = watch.label + else: + watch_label = watch.get('url') + + try: + html_diff = diff.render_diff( + previous_version_file_contents=watch.get_history_snapshot(timestamp=dates[-2]), + newest_version_file_contents=watch.get_history_snapshot(timestamp=dates[-1]), + include_equal=False + ) + + requested_output_format = datastore.data['settings']['application'].get('rss_content_format') + url, html_diff, n_title = apply_service_tweaks(url='', n_body=html_diff, n_title=None, requested_output_format=requested_output_format) + + except FileNotFoundError as e: + html_diff = f"History snapshot file for watch {watch.get('uuid')}@{watch.last_changed} - '{watch.get('title')} not found." + + # @note: We use
 because nearly all RSS readers render only HTML (Thunderbird for example cant do just plaintext)
+        rss_template = "
{{watch_label}} had a change.\n\n{{html_diff}}\n
" + if 'html' in rss_content_format: + rss_template = "\n

{{watch_label}}

\n

{{html_diff}}

\n\n" + + content = jinja_render(template_str=rss_template, watch_label=watch_label, html_diff=html_diff, watch_url=watch.link) + + # Out of range chars could also break feedgen + if scan_invalid_chars_in_rss(content): + content = clean_entry_content(content) + + return content, watch_label + # Some RSS reader situations ended up with rss/ (forward slash after RSS) due # to some earlier blueprint rerouting work, it should goto feed. @rss_blueprint.route("/", methods=['GET']) @@ -51,6 +96,8 @@ def construct_blueprint(datastore: ChangeDetectionStore): # Always requires token set app_rss_token = datastore.data['settings']['application'].get('rss_access_token') rss_url_token = request.args.get('token') + rss_content_format = datastore.data['settings']['application'].get('rss_content_format') + if rss_url_token != app_rss_token: return "Access denied, bad token", 403 @@ -81,10 +128,6 @@ def construct_blueprint(datastore: ChangeDetectionStore): fg.description('Feed description') fg.link(href='https://changedetection.io') - html_colour_enable = False - if datastore.data['settings']['application'].get('rss_content_format') == 'html': - html_colour_enable = True - for watch in sorted_watches: dates = list(watch.history.keys()) @@ -95,7 +138,7 @@ def construct_blueprint(datastore: ChangeDetectionStore): if not watch.viewed: # Re #239 - GUID needs to be individual for each event # @todo In the future make this a configurable link back (see work on BASE_URL https://github.com/dgtlmoon/changedetection.io/pull/228) - guid = "{}/{}".format(watch['uuid'], watch.last_changed) + guid = generate_watch_guid(watch) fe = fg.add_entry() # Include a link to the diff page, they will have to login here to see if password protection is enabled. @@ -109,38 +152,9 @@ def construct_blueprint(datastore: ChangeDetectionStore): fe.link(link=diff_link) - # Same logic as watch-overview.html - if datastore.data['settings']['application']['ui'].get('use_page_title_in_list') or watch.get('use_page_title_in_list'): - watch_label = watch.label - else: - watch_label = watch.get('url') + content, watch_label = generate_watch_diff_content(watch, dates, rss_content_format) fe.title(title=watch_label) - try: - - html_diff = diff.render_diff(previous_version_file_contents=watch.get_history_snapshot(timestamp=dates[-2]), - newest_version_file_contents=watch.get_history_snapshot(timestamp=dates[-1]), - include_equal=False, - line_feed_sep="
" - ) - - - requested_output_format = 'htmlcolor' if html_colour_enable else 'html' - html_diff = apply_service_tweaks(url='', n_body=html_diff, n_title=None, requested_output_format=requested_output_format) - - except FileNotFoundError as e: - html_diff = f"History snapshot file for watch {watch.get('uuid')}@{watch.last_changed} - '{watch.get('title')} not found." - - # @todo Make this configurable and also consider html-colored markup - # @todo User could decide if goes to the diff page, or to the watch link - rss_template = "\n

{{watch_title}}

\n

{{html_diff}}

\n\n" - - content = jinja_render(template_str=rss_template, watch_title=watch_label, html_diff=html_diff, watch_url=watch.link) - - # Out of range chars could also break feedgen - if scan_invalid_chars_in_rss(content): - content = clean_entry_content(content) - fe.content(content=content, type='CDATA') fe.guid(guid, permalink=False) dt = datetime.datetime.fromtimestamp(int(watch.newest_history_key)) @@ -152,4 +166,59 @@ def construct_blueprint(datastore: ChangeDetectionStore): logger.trace(f"RSS generated in {time.time() - now:.3f}s") return response + @rss_blueprint.route("/watch/", methods=['GET']) + def rss_single_watch(uuid): + """ + Display the most recent change for a single watch as RSS feed. + Returns RSS XML with a single entry showing the diff between the last two snapshots. + """ + # Always requires token set + app_rss_token = datastore.data['settings']['application'].get('rss_access_token') + rss_url_token = request.args.get('token') + rss_content_format = datastore.data['settings']['application'].get('rss_content_format') + + if rss_url_token != app_rss_token: + return "Access denied, bad token", 403 + + # Get the watch by UUID + watch = datastore.data['watching'].get(uuid) + if not watch: + return f"Watch with UUID {uuid} not found", 404 + + # Check if watch has at least 2 history snapshots + dates = list(watch.history.keys()) + if len(dates) < 2: + return f"Watch {uuid} does not have enough history snapshots to show changes (need at least 2)", 400 + + # Add uuid to watch for proper functioning + watch['uuid'] = uuid + + # Generate the diff content using the shared helper function + content, watch_label = generate_watch_diff_content(watch, dates, rss_content_format) + + # Create RSS feed with single entry + fg = FeedGenerator() + fg.title(f'changedetection.io - {watch.label}') + fg.description('Changes') + fg.link(href='https://changedetection.io') + + # Add single entry for this watch + guid = generate_watch_guid(watch) + fe = fg.add_entry() + + # Include a link to the diff page + diff_link = {'href': url_for('ui.ui_views.diff_history_page', uuid=watch['uuid'], _external=True)} + fe.link(link=diff_link) + + fe.title(title=watch_label) + fe.content(content=content, type='CDATA') + fe.guid(guid, permalink=False) + dt = datetime.datetime.fromtimestamp(int(watch.newest_history_key)) + dt = dt.replace(tzinfo=pytz.UTC) + fe.pubDate(dt) + + response = make_response(fg.rss_str()) + response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8') + return response + return rss_blueprint \ No newline at end of file diff --git a/changedetectionio/blueprint/settings/templates/settings.html b/changedetectionio/blueprint/settings/templates/settings.html index 9913e095..5d4b5536 100644 --- a/changedetectionio/blueprint/settings/templates/settings.html +++ b/changedetectionio/blueprint/settings/templates/settings.html @@ -86,7 +86,7 @@
{{ render_checkbox_field(form.application.form.rss_reader_mode) }} - Transforms RSS/RDF feed watches into beautiful text only + When watching RSS/Atom feeds, convert them into clean text for better change detection.
diff --git a/changedetectionio/blueprint/ui/edit.py b/changedetectionio/blueprint/ui/edit.py index f68bdbd1..d394a906 100644 --- a/changedetectionio/blueprint/ui/edit.py +++ b/changedetectionio/blueprint/ui/edit.py @@ -236,7 +236,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe # Import the global plugin system from changedetectionio.pluggy_interface import collect_ui_edit_stats_extras - + app_rss_token = datastore.data['settings']['application'].get('rss_access_token'), template_args = { 'available_processors': processors.available_processors(), 'available_timezones': sorted(available_timezones()), @@ -252,6 +252,11 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe 'has_special_tag_options': _watch_has_tag_options_set(watch=watch), 'jq_support': jq_support, 'playwright_enabled': os.getenv('PLAYWRIGHT_DRIVER_URL', False), + 'app_rss_token': app_rss_token, + 'rss_uuid_feed' : { + 'label': watch.label, + 'url': url_for('rss.rss_single_watch', uuid=watch['uuid'], token=app_rss_token) + }, 'settings_application': datastore.data['settings']['application'], 'system_has_playwright_configured': os.getenv('PLAYWRIGHT_DRIVER_URL'), 'system_has_webdriver_configured': os.getenv('WEBDRIVER_URL'), diff --git a/changedetectionio/blueprint/ui/templates/edit.html b/changedetectionio/blueprint/ui/templates/edit.html index f6e7f6a0..bb4c50a0 100644 --- a/changedetectionio/blueprint/ui/templates/edit.html +++ b/changedetectionio/blueprint/ui/templates/edit.html @@ -476,6 +476,7 @@ Math: {{ 1 + 1 }}") }} class="pure-button button-error">Clear History{% endif %} Clone & Edit + RSS Feed for this watch diff --git a/changedetectionio/forms.py b/changedetectionio/forms.py index ce441df9..51760efe 100644 --- a/changedetectionio/forms.py +++ b/changedetectionio/forms.py @@ -1000,7 +1000,7 @@ class globalSettingsApplicationForm(commonSettingsForm): validators=[validators.NumberRange(min=0, message="Should be atleast zero (disabled)")]) - rss_content_format = SelectField('RSS Content format', choices=RSS_FORMAT_TYPES) + rss_content_format = SelectField('RSS Content format', choices=list(RSS_FORMAT_TYPES.items())) removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"}) render_anchor_tag_content = BooleanField('Render anchor tag content', default=False) diff --git a/changedetectionio/model/App.py b/changedetectionio/model/App.py index 0152aed6..cbf81370 100644 --- a/changedetectionio/model/App.py +++ b/changedetectionio/model/App.py @@ -1,7 +1,7 @@ from os import getenv from copy import deepcopy -from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES +from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES, RSS_CONTENT_FORMAT_DEFAULT from changedetectionio.notification import ( default_notification_body, @@ -54,7 +54,7 @@ class model(dict): 'password': False, 'render_anchor_tag_content': False, 'rss_access_token': None, - 'rss_content_format': RSS_FORMAT_TYPES[0][0], + 'rss_content_format': RSS_CONTENT_FORMAT_DEFAULT, 'rss_hide_muted_watches': True, 'rss_reader_mode': False, 'scheduler_timezone_default': None, # Default IANA timezone name diff --git a/changedetectionio/notification/handler.py b/changedetectionio/notification/handler.py index 1e80db8b..f3be4104 100644 --- a/changedetectionio/notification/handler.py +++ b/changedetectionio/notification/handler.py @@ -187,6 +187,8 @@ def replace_placemarkers_in_text(text, url, requested_output_format): def apply_service_tweaks(url, n_body, n_title, requested_output_format): + logger.debug(f"Applying markup in '{requested_output_format}' mode") + # Re 323 - Limit discord length to their 2000 char limit total or it wont send. # Because different notifications may require different pre-processing, run each sequentially :( # 2000 bytes minus - diff --git a/changedetectionio/store.py b/changedetectionio/store.py index a05ec623..ba223ae3 100644 --- a/changedetectionio/store.py +++ b/changedetectionio/store.py @@ -6,6 +6,7 @@ from flask import ( flash ) +from .blueprint.rss import RSS_CONTENT_FORMAT_DEFAULT from .html_tools import TRANSLATE_WHITESPACE_TABLE from .model import App, Watch, USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH from copy import deepcopy, copy @@ -775,6 +776,28 @@ class ChangeDetectionStore: return updates_available + def add_notification_url(self, notification_url): + + logger.debug(f">>> Adding new notification_url - '{notification_url}'") + + notification_urls = self.data['settings']['application'].get('notification_urls', []) + + if notification_url in notification_urls: + return notification_url + + with self.lock: + notification_urls = self.__data['settings']['application'].get('notification_urls', []) + + if notification_url in notification_urls: + return notification_url + + # Append and update the datastore + notification_urls.append(notification_url) + self.__data['settings']['application']['notification_urls'] = notification_urls + self.needs_write = True + + return notification_url + # Run all updates # IMPORTANT - Each update could be run even when they have a new install and the schema is correct # So therefor - each `update_n` should be very careful about checking if it needs to actually run @@ -1087,25 +1110,15 @@ class ChangeDetectionStore: formats['markdown'] = 'Markdown' re_run(formats) - def add_notification_url(self, notification_url): - - logger.debug(f">>> Adding new notification_url - '{notification_url}'") - - notification_urls = self.data['settings']['application'].get('notification_urls', []) - - if notification_url in notification_urls: - return notification_url - - with self.lock: - notification_urls = self.__data['settings']['application'].get('notification_urls', []) - - if notification_url in notification_urls: - return notification_url - - # Append and update the datastore - notification_urls.append(notification_url) - self.__data['settings']['application']['notification_urls'] = notification_urls - self.needs_write = True - - return notification_url + # RSS types should be inline with the same names as notification types + def update_24(self): + rss_format = self.data['settings']['application'].get('rss_content_format') + if not rss_format or 'text' in rss_format: + # might have been 'plaintext, 'plain text' or something + self.data['settings']['application']['rss_content_format'] = RSS_CONTENT_FORMAT_DEFAULT + elif 'html' in rss_format: + self.data['settings']['application']['rss_content_format'] = 'htmlcolor' + else: + # safe fallback to text + self.data['settings']['application']['rss_content_format'] = RSS_CONTENT_FORMAT_DEFAULT diff --git a/changedetectionio/templates/base.html b/changedetectionio/templates/base.html index 5b398d30..dd56ed27 100644 --- a/changedetectionio/templates/base.html +++ b/changedetectionio/templates/base.html @@ -8,8 +8,13 @@ Change Detection{{extra_title}} {% if app_rss_token %} - - {% endif %} + + + {% if rss_uuid_feed %} + + + {%- endif -%} + {%- endif -%} {% if extra_stylesheets %} diff --git a/changedetectionio/tests/test_backend.py b/changedetectionio/tests/test_backend.py index e9e81fe5..0fa74094 100644 --- a/changedetectionio/tests/test_backend.py +++ b/changedetectionio/tests/test_backend.py @@ -77,10 +77,9 @@ def test_check_basic_change_detection_functionality(client, live_server, measure assert b'' not in content + assert 'body' not in content + assert '(changed) Which is across multiple lines\n' + assert 'modified head title had a change.' # Because it picked it up as watch_title in default template + elif expected_type == 'html': + assert '<p>' in content + assert '<body>' in content + assert '<p>(changed) Which is across multiple lines<br>' in content + assert f'href="{url}">modified head title had a change.</a>' + elif expected_type == 'htmlcolor': + assert '<body>' in content + assert ' role="note" aria-label="Changed text" title="Changed text">Which is across multiple lines</span>' in content + assert f'href="{url}">modified head title had a change.</a>' + else: + raise Exception(f"Unknown type {expected_type}") + + + item = root.findall('.//item')[0].findtext('description') + check_formatting(expected_type=rss_content_format, content=item, url=test_url) + + # Now the default one is over, lets try all the others + for k in list(RSS_FORMAT_TYPES.keys()): + res = client.post( + url_for("settings.settings_page"), + data={"application-rss_content_format": k}, + follow_redirects=True + ) + assert b'Settings updated' in res.data + + res = client.get( + url_for('rss.rss_single_watch', uuid=uuid, token=app_rss_token), + follow_redirects=False + ) + assert res.status_code == 200 + root = ET.fromstring(res.data) + item = root.findall('.//item')[0].findtext('description') + check_formatting(expected_type=k, content=item, url=test_url)