diff --git a/changedetectionio/api/__init__.py b/changedetectionio/api/__init__.py index b8d14e55..d8f9d8fd 100644 --- a/changedetectionio/api/__init__.py +++ b/changedetectionio/api/__init__.py @@ -41,7 +41,7 @@ def get_openapi_spec(): # Possibly for pip3 packages spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml') - with open(spec_path, 'r') as f: + with open(spec_path, 'r', encoding='utf-8') as f: spec_dict = yaml.safe_load(f) _openapi_spec = OpenAPI.from_dict(spec_dict) return _openapi_spec diff --git a/changedetectionio/content_fetchers/playwright.py b/changedetectionio/content_fetchers/playwright.py index 04fc3c0f..01f4c8a7 100644 --- a/changedetectionio/content_fetchers/playwright.py +++ b/changedetectionio/content_fetchers/playwright.py @@ -139,7 +139,7 @@ class fetcher(Fetcher): content = await self.page.content() destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n)) logger.debug(f"Saving step HTML to {destination}") - with open(destination, 'w') as f: + with open(destination, 'w', encoding='utf-8') as f: f.write(content) async def run(self, diff --git a/changedetectionio/flask_app.py b/changedetectionio/flask_app.py index 0a9c006d..c831c71d 100644 --- a/changedetectionio/flask_app.py +++ b/changedetectionio/flask_app.py @@ -101,12 +101,12 @@ def init_app_secret(datastore_path): path = os.path.join(datastore_path, "secret.txt") try: - with open(path, "r") as f: + with open(path, "r", encoding='utf-8') as f: secret = f.read() except FileNotFoundError: import secrets - with open(path, "w") as f: + with open(path, "w", encoding='utf-8') as f: secret = secrets.token_hex(32) f.write(secret) diff --git a/changedetectionio/model/App.py b/changedetectionio/model/App.py index 084e957a..0152aed6 100644 --- a/changedetectionio/model/App.py +++ b/changedetectionio/model/App.py @@ -81,7 +81,7 @@ class model(dict): def parse_headers_from_text_file(filepath): headers = {} - with open(filepath, 'r') as f: + with open(filepath, 'r', encoding='utf-8') as f: for l in f.readlines(): l = l.strip() if not l.startswith('#') and ':' in l: diff --git a/changedetectionio/model/Watch.py b/changedetectionio/model/Watch.py index 49ad8316..cd689054 100644 --- a/changedetectionio/model/Watch.py +++ b/changedetectionio/model/Watch.py @@ -188,7 +188,7 @@ class model(watch_base): fname = os.path.join(self.watch_data_dir, "history.txt") if os.path.isfile(fname): logger.debug(f"Reading watch history index for {self.get('uuid')}") - with open(fname, "r") as f: + with open(fname, "r", encoding='utf-8') as f: for i in f.readlines(): if ',' in i: k, v = i.strip().split(',', 2) @@ -594,7 +594,7 @@ class model(watch_base): """Return the text saved from a previous request that resulted in a non-200 error""" fname = os.path.join(self.watch_data_dir, "last-error.txt") if os.path.isfile(fname): - with open(fname, 'r') as f: + with open(fname, 'r', encoding='utf-8') as f: return f.read() return False diff --git a/changedetectionio/processors/text_json_diff/processor.py b/changedetectionio/processors/text_json_diff/processor.py index e877ef7c..62d749f7 100644 --- a/changedetectionio/processors/text_json_diff/processor.py +++ b/changedetectionio/processors/text_json_diff/processor.py @@ -280,7 +280,7 @@ class ContentProcessor: # Sort JSON to avoid false alerts from reordering try: - content = json.dumps(json.loads(content), sort_keys=True, indent=4) + content = json.dumps(json.loads(content), sort_keys=True, indent=2, ensure_ascii=False) except Exception: # Might be malformed JSON, continue anyway pass diff --git a/changedetectionio/store.py b/changedetectionio/store.py index 0cf6f3ab..5a9b7ea3 100644 --- a/changedetectionio/store.py +++ b/changedetectionio/store.py @@ -78,9 +78,13 @@ class ChangeDetectionStore: self.__data['build_sha'] = f.read() try: - # @todo retest with ", encoding='utf-8'" - with open(self.json_store_path) as json_file: - from_disk = json.load(json_file) + if HAS_ORJSON: + # orjson.loads() expects UTF-8 encoded bytes #3611 + with open(self.json_store_path, 'rb') as json_file: + from_disk = orjson.loads(json_file.read()) + else: + with open(self.json_store_path, encoding='utf-8') as json_file: + from_disk = json.load(json_file) # @todo isnt there a way todo this dict.update recursively? # Problem here is if the one on the disk is missing a sub-struct, it wont be present anymore. @@ -435,12 +439,13 @@ class ChangeDetectionStore: # system was out of memory, out of RAM etc if HAS_ORJSON: # Use orjson for faster serialization + # orjson.dumps() always returns UTF-8 encoded bytes #3611 with open(self.json_store_path+".tmp", 'wb') as json_file: json_file.write(orjson.dumps(data, option=orjson.OPT_INDENT_2)) else: # Fallback to standard json module - with open(self.json_store_path+".tmp", 'w') as json_file: - json.dump(data, json_file, indent=2) + with open(self.json_store_path+".tmp", 'w', encoding='utf-8') as json_file: + json.dump(data, json_file, indent=2, ensure_ascii=False) os.replace(self.json_store_path+".tmp", self.json_store_path) except Exception as e: logger.error(f"Error writing JSON!! (Main JSON file save was skipped) : {str(e)}") @@ -502,8 +507,13 @@ class ChangeDetectionStore: # Load from external config file if path.isfile(proxy_list_file): - with open(os.path.join(self.datastore_path, "proxies.json")) as f: - proxy_list = json.load(f) + if HAS_ORJSON: + # orjson.loads() expects UTF-8 encoded bytes #3611 + with open(os.path.join(self.datastore_path, "proxies.json"), 'rb') as f: + proxy_list = orjson.loads(f.read()) + else: + with open(os.path.join(self.datastore_path, "proxies.json"), encoding='utf-8') as f: + proxy_list = json.load(f) # Mapping from UI config if available extras = self.data['settings']['requests'].get('extra_proxies') diff --git a/changedetectionio/tests/test_history_consistency.py b/changedetectionio/tests/test_history_consistency.py index 16baed39..f88c6d0c 100644 --- a/changedetectionio/tests/test_history_consistency.py +++ b/changedetectionio/tests/test_history_consistency.py @@ -40,7 +40,7 @@ def test_consistent_history(client, live_server, measure_memory_usage, datastore json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json') json_obj = None - with open(json_db_file, 'r') as f: + with open(json_db_file, 'r', encoding='utf-8') as f: json_obj = json.load(f) # assert the right amount of watches was found in the JSON @@ -76,7 +76,7 @@ def test_consistent_history(client, live_server, measure_memory_usage, datastore assert len(files_in_watch_dir) == 3, "Should be just three files in the dir, html.br snapshot, history.txt and the extracted text snapshot" json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json') - with open(json_db_file, 'r') as f: + with open(json_db_file, 'r', encoding='utf-8') as f: assert '"default"' not in f.read(), "'default' probably shouldnt be here, it came from when the 'default' Watch vars were accidently being saved" diff --git a/changedetectionio/tests/test_jsonpath_jq_selector.py b/changedetectionio/tests/test_jsonpath_jq_selector.py index ec90de03..f3e4006e 100644 --- a/changedetectionio/tests/test_jsonpath_jq_selector.py +++ b/changedetectionio/tests/test_jsonpath_jq_selector.py @@ -442,13 +442,12 @@ def test_correct_header_detect(client, live_server, measure_memory_usage, datast snapshot_contents = watch.get_history_snapshot(timestamp=dates[0]) assert b'"hello": 123,' in res.data # properly html escaped in the front end - + import json + data = json.loads(snapshot_contents) + keys = list(data.keys()) # Should be correctly formatted and sorted, ("world" goes to end) - assert snapshot_contents == """{ - "hello": 123, - "world": 123 -}""" - + assert keys == ["hello", "world"] + delete_all_watches(client) def test_check_jsonpath_ext_filter(client, live_server, measure_memory_usage, datastore_path): diff --git a/changedetectionio/tests/test_request.py b/changedetectionio/tests/test_request.py index 5e080405..66a4b605 100644 --- a/changedetectionio/tests/test_request.py +++ b/changedetectionio/tests/test_request.py @@ -142,7 +142,7 @@ def test_body_in_request(client, live_server, measure_memory_usage, datastore_pa client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) watches_with_body = 0 - with open(os.path.join(datastore_path, 'url-watches.json')) as f: + with open(os.path.join(datastore_path, 'url-watches.json'), encoding='utf-8') as f: app_struct = json.load(f) for uuid in app_struct['watching']: if app_struct['watching'][uuid]['body']==body_value: @@ -225,7 +225,7 @@ def test_method_in_request(client, live_server, measure_memory_usage, datastore_ wait_for_all_checks(client) watches_with_method = 0 - with open(os.path.join(datastore_path, 'url-watches.json')) as f: + with open(os.path.join(datastore_path, 'url-watches.json'), encoding='utf-8') as f: app_struct = json.load(f) for uuid in app_struct['watching']: if app_struct['watching'][uuid]['method'] == 'PATCH':