Merge branch 'master' into windows-diff-file-handling-improvements

Use python better read operations
Use os.path.join for correct OS path separators
2025-10-31 14:47:21 +00:00 · 2022-10-17 18:01:48 +02:00 · 2022-10-17 17:59:24 +02:00 · 2022-10-17 17:10:44 +02:00 · 2022-10-17 17:07:13 +02:00
3 changed files with 56 additions and 6 deletions
--- a/changedetectionio/init.py
+++ b/changedetectionio/init.py
@@ -819,8 +819,8 @@ def changedetection_app(config=None, datastore_o=None):
        # Read as binary and force decode as UTF-8
        # Windows may fail decode in python if we just use 'r' mode (chardet decode exception)
        try:
-            with open(newest_file, 'rb') as f:
-                newest_version_file_contents = f.read().decode('utf-8')
+            with open(newest_file, 'r', encoding='utf-8', errors='ignore') as f:
+                newest_version_file_contents = f.read()
        except Exception as e:
            newest_version_file_contents = "Unable to read {}.\n".format(newest_file)

@@ -832,8 +832,8 @@ def changedetection_app(config=None, datastore_o=None):
            previous_file = history[dates[-2]]

        try:
-            with open(previous_file, 'rb') as f:
-                previous_version_file_contents = f.read().decode('utf-8')
+            with open(previous_file, 'r', encoding='utf-8', errors='ignore') as f:
+                previous_version_file_contents = f.read()
        except Exception as e:
            previous_version_file_contents = "Unable to read {}.\n".format(previous_file)

@@ -909,7 +909,7 @@ def changedetection_app(config=None, datastore_o=None):
        timestamp = list(watch.history.keys())[-1]
        filename = watch.history[timestamp]
        try:
-            with open(filename, 'r') as f:
+            with open(filename, 'r', encoding='utf-8', errors='ignore') as f:
                tmp = f.readlines()

                # Get what needs to be highlighted
--- a/changedetectionio/model/Watch.py
+++ b/changedetectionio/model/Watch.py
@@ -158,7 +158,8 @@ class model(dict):

        logging.debug("Saving history text {}".format(snapshot_fname))

-        # in /diff/ we are going to assume for now that it's UTF-8 when reading
+        # in /diff/ and /preview/ we are going to assume for now that it's UTF-8 when reading
+        # most sites are utf-8 and some are even broken utf-8
        with open(snapshot_fname, 'wb') as f:
            f.write(contents)
            f.close()
--- a/49
+++ b/49
@@ -0,0 +1,49 @@
+diff --git a/changedetectionio/__init__.py b/changedetectionio/__init__.py
+index c745dd3e..19873cce 100644
+--- a/changedetectionio/__init__.py
+++ b/changedetectionio/__init__.py
+@@ -819,8 +819,8 @@ def changedetection_app(config=None, datastore_o=None):
+         # Read as binary and force decode as UTF-8
+         # Windows may fail decode in python if we just use 'r' mode (chardet decode exception)
+         try:
+-            with open(newest_file, 'rb') as f:
+-                newest_version_file_contents = f.read().decode('utf-8')
+            with open(newest_file, 'r', encoding='utf-8', errors='ignore') as f:
+                newest_version_file_contents = f.read()
+         except Exception as e:
+             newest_version_file_contents = "Unable to read {}.\n".format(newest_file)
+ 
+@@ -832,8 +832,8 @@ def changedetection_app(config=None, datastore_o=None):
+             previous_file = history[dates[-2]]
+ 
+         try:
+-            with open(previous_file, 'rb') as f:
+-                previous_version_file_contents = f.read().decode('utf-8')
+            with open(previous_file, 'r', encoding='utf-8', errors='ignore') as f:
+                previous_version_file_contents = f.read()
+         except Exception as e:
+             previous_version_file_contents = "Unable to read {}.\n".format(previous_file)
+ 
+@@ -909,7 +909,7 @@ def changedetection_app(config=None, datastore_o=None):
+         timestamp = list(watch.history.keys())[-1]
+         filename = watch.history[timestamp]
+         try:
+-            with open(filename, 'r') as f:
+            with open(filename, 'r', encoding='utf-8', errors='ignore') as f:
+                 tmp = f.readlines()
+ 
+                 # Get what needs to be highlighted
+diff --git a/changedetectionio/model/Watch.py b/changedetectionio/model/Watch.py
+index 9a87ad71..566eb88e 100644
+--- a/changedetectionio/model/Watch.py
+++ b/changedetectionio/model/Watch.py
+@@ -158,7 +158,8 @@ class model(dict):
+ 
+         logging.debug("Saving history text {}".format(snapshot_fname))
+ 
+-        # in /diff/ we are going to assume for now that it's UTF-8 when reading
+        # in /diff/ and /preview/ we are going to assume for now that it's UTF-8 when reading
+        # most sites are utf-8 and some are even broken utf-8
+         with open(snapshot_fname, 'wb') as f:
+             f.write(contents)
+             f.close()
Author	SHA1	Message	Date
dgtlmoon	f16aa1b658	Merge branch 'master' into windows-diff-file-handling-improvements	2022-10-17 18:01:48 +02:00
dgtlmoon	6d02110473	Use python better read operations	2022-10-17 17:59:24 +02:00
dgtlmoon	c782a7e5c8	Use os.path.join for correct OS path separators	2022-10-17 17:10:44 +02:00
dgtlmoon	d4b241720a	Force UTF-8 encoding when processing diff text	2022-10-17 17:07:13 +02:00