Forcing utf-8 #3611

0.50.40
Page <title> should only be captured on HTML documents (#3608 )
2025-11-10 03:27:14 +00:00 · 2025-11-09 18:38:53 +01:00 · 2025-11-07 13:21:22 +01:00 · 2025-11-07 11:51:10 +01:00 · 2025-11-07 11:42:57 +01:00 · 2025-11-05 21:49:17 +01:00
27 changed files with 159 additions and 177 deletions
--- a/changedetectionio/PLUGIN_README.md
+++ b/changedetectionio/PLUGIN_README.md
@@ -64,7 +64,7 @@ def count_words_in_history(watch):
            return 0
            
        latest_key = list(watch.history.keys())[-1]
-        latest_content = watch.get_history_snapshot(latest_key)
+        latest_content = watch.get_history_snapshot(timestamp=latest_key)
        return len(latest_content.split())
    except Exception as e:
        logger.error(f"Error counting words: {str(e)}")
--- a/changedetectionio/init.py
+++ b/changedetectionio/init.py
@@ -2,7 +2,7 @@

 # Read more https://github.com/dgtlmoon/changedetection.io/wiki

-__version__ = '0.50.39'
+__version__ = '0.50.40'

 from changedetectionio.strtobool import strtobool
 from json.decoder import JSONDecodeError
--- a/changedetectionio/api/Watch.py
+++ b/changedetectionio/api/Watch.py
@@ -175,7 +175,7 @@ class WatchSingleHistory(Resource):
                response = make_response("No content found", 404)
                response.mimetype = "text/plain"
        else:
-            content = watch.get_history_snapshot(timestamp)
+            content = watch.get_history_snapshot(timestamp=timestamp)
            response = make_response(content, 200)
            response.mimetype = "text/plain"

--- a/changedetectionio/api/init.py
+++ b/changedetectionio/api/init.py
@@ -41,7 +41,7 @@ def get_openapi_spec():
        # Possibly for pip3 packages
        spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml')

-    with open(spec_path, 'r') as f:
+    with open(spec_path, 'r', encoding='utf-8') as f:
        spec_dict = yaml.safe_load(f)
    _openapi_spec = OpenAPI.from_dict(spec_dict)
    return _openapi_spec
--- a/changedetectionio/async_update_worker.py
+++ b/changedetectionio/async_update_worker.py
@@ -353,12 +353,15 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
                count = watch.get('check_count', 0) + 1

                # Always record page title (used in notifications, and can change even when the content is the same)
-                try:
-                    page_title = html_tools.extract_title(data=update_handler.fetcher.content)
-                    logger.debug(f"UUID: {uuid} Page <title> is '{page_title}'")
-                    datastore.update_watch(uuid=uuid, update_obj={'page_title': page_title})
-                except Exception as e:
-                    logger.warning(f"UUID: {uuid} Exception when extracting <title> - {str(e)}")
+                if update_obj.get('content-type') and 'html' in update_obj.get('content-type'):
+                    try:
+                        page_title = html_tools.extract_title(data=update_handler.fetcher.content)
+                        if page_title:
+                            page_title = page_title.strip()[:2000]
+                            logger.debug(f"UUID: {uuid} Page <title> is '{page_title}'")
+                            datastore.update_watch(uuid=uuid, update_obj={'page_title': page_title})
+                    except Exception as e:
+                        logger.warning(f"UUID: {uuid} Exception when extracting <title> - {str(e)}")

                # Record server header
                try:
--- a/changedetectionio/blueprint/rss/blueprint.py
+++ b/changedetectionio/blueprint/rss/blueprint.py
@@ -118,8 +118,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
                fe.title(title=watch_label)
                try:

-                    html_diff = diff.render_diff(previous_version_file_contents=watch.get_history_snapshot(dates[-2]),
-                                                 newest_version_file_contents=watch.get_history_snapshot(dates[-1]),
+                    html_diff = diff.render_diff(previous_version_file_contents=watch.get_history_snapshot(timestamp=dates[-2]),
+                                                 newest_version_file_contents=watch.get_history_snapshot(timestamp=dates[-1]),
                                                 include_equal=False,
                                                 line_feed_sep="<br>"
                                                 )
--- a/changedetectionio/blueprint/ui/notification.py
+++ b/changedetectionio/blueprint/ui/notification.py
@@ -106,7 +106,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
                trigger_text = watch.get('trigger_text', [])
                # Add text that was triggered
                if len(dates):
-                    snapshot_contents = watch.get_history_snapshot(dates[-1])
+                    snapshot_contents = watch.get_history_snapshot(timestamp=dates[-1])
                else:
                    snapshot_contents = "No snapshot/history available, the watch should fetch atleast once."

@@ -123,8 +123,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):


            if len(dates) > 1:
-                prev_snapshot = watch.get_history_snapshot(dates[-2])
-                current_snapshot = watch.get_history_snapshot(dates[-1])
+                prev_snapshot = watch.get_history_snapshot(timestamp=dates[-2])
+                current_snapshot = watch.get_history_snapshot(timestamp=dates[-1])

            n_object.update(set_basic_notification_vars(snapshot_contents=snapshot_contents,
                                                        current_snapshot=current_snapshot,
--- a/changedetectionio/blueprint/ui/views.py
+++ b/changedetectionio/blueprint/ui/views.py
@@ -47,7 +47,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe

            try:
                versions = list(watch.history.keys())
-                content = watch.get_history_snapshot(timestamp)
+                content = watch.get_history_snapshot(timestamp=timestamp)

                triggered_line_numbers = html_tools.strip_ignore_text(content=content,
                                                                      wordlist=watch['trigger_text'],
--- a/changedetectionio/conditions/plugins/wordcount_plugin.py
+++ b/changedetectionio/conditions/plugins/wordcount_plugin.py
@@ -14,7 +14,7 @@ def count_words_in_history(watch, incoming_text=None):
        elif watch.history.keys():
            # When called from UI extras to count latest snapshot
            latest_key = list(watch.history.keys())[-1]
-            latest_content = watch.get_history_snapshot(latest_key)
+            latest_content = watch.get_history_snapshot(timestamp=latest_key)
            return len(latest_content.split())
        return 0
    except Exception as e:
--- a/changedetectionio/content_fetchers/playwright.py
+++ b/changedetectionio/content_fetchers/playwright.py
@@ -139,7 +139,7 @@ class fetcher(Fetcher):
        content = await self.page.content()
        destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n))
        logger.debug(f"Saving step HTML to {destination}")
-        with open(destination, 'w') as f:
+        with open(destination, 'w', encoding='utf-8') as f:
            f.write(content)

    async def run(self,
--- a/changedetectionio/flask_app.py
+++ b/changedetectionio/flask_app.py
@@ -101,12 +101,12 @@ def init_app_secret(datastore_path):
    path = os.path.join(datastore_path, "secret.txt")

    try:
-        with open(path, "r") as f:
+        with open(path, "r", encoding='utf-8') as f:
            secret = f.read()

    except FileNotFoundError:
        import secrets
-        with open(path, "w") as f:
+        with open(path, "w", encoding='utf-8') as f:
            secret = secrets.token_hex(32)
            f.write(secret)

@@ -794,15 +794,19 @@ def ticker_thread_check_time_launch_checks():

            # @todo - Maybe make this a hook?
            # Time schedule limit - Decide between watch or global settings
+            scheduler_source = None
            if watch.get('time_between_check_use_default'):
                time_schedule_limit = datastore.data['settings']['requests'].get('time_schedule_limit', {})
-                logger.trace(f"{uuid} Time scheduler - Using system/global settings")
+                scheduler_source = 'system/global settings'
+
            else:
                time_schedule_limit = watch.get('time_schedule_limit')
-                logger.trace(f"{uuid} Time scheduler - Using watch settings (not global settings)")
+                scheduler_source = 'watch'
+
            tz_name = datastore.data['settings']['application'].get('scheduler_timezone_default', os.getenv('TZ', 'UTC').strip())

            if time_schedule_limit and time_schedule_limit.get('enabled'):
+                logger.trace(f"{uuid} Time scheduler - Using scheduler settings from {scheduler_source}")
                try:
                    result = is_within_schedule(time_schedule_limit=time_schedule_limit,
                                                default_tz=tz_name
@@ -814,6 +818,7 @@ def ticker_thread_check_time_launch_checks():
                    logger.error(
                        f"{uuid} - Recheck scheduler, error handling timezone, check skipped - TZ name '{tz_name}' - {str(e)}")
                    return False
+
            # If they supplied an individual entry minutes to threshold.
            threshold = recheck_time_system_seconds if watch.get('time_between_check_use_default') else watch.threshold_seconds()

--- a/changedetectionio/forms.py
+++ b/changedetectionio/forms.py
@@ -503,7 +503,9 @@ class ValidateJinja2Template(object):
            jinja2_env = create_jinja_env(loader=BaseLoader)

            # Add notification tokens for validation
-            jinja2_env.globals.update(NotificationContextData())
+            static_token_placeholders = NotificationContextData()
+            static_token_placeholders.set_random_for_validation()
+            jinja2_env.globals.update(static_token_placeholders)
            if hasattr(field, 'extra_notification_tokens'):
                jinja2_env.globals.update(field.extra_notification_tokens)

--- a/changedetectionio/model/App.py
+++ b/changedetectionio/model/App.py
@@ -81,7 +81,7 @@ class model(dict):

 def parse_headers_from_text_file(filepath):
    headers = {}
-    with open(filepath, 'r') as f:
+    with open(filepath, 'r', encoding='utf-8') as f:
        for l in f.readlines():
            l = l.strip()
            if not l.startswith('#') and ':' in l:
--- a/changedetectionio/model/Watch.py
+++ b/changedetectionio/model/Watch.py
@@ -188,7 +188,7 @@ class model(watch_base):
        fname = os.path.join(self.watch_data_dir, "history.txt")
        if os.path.isfile(fname):
            logger.debug(f"Reading watch history index for {self.get('uuid')}")
-            with open(fname, "r") as f:
+            with open(fname, "r", encoding='utf-8') as f:
                for i in f.readlines():
                    if ',' in i:
                        k, v = i.strip().split(',', 2)
@@ -276,9 +276,17 @@ class model(watch_base):
        # When the 'last viewed' timestamp is less than the oldest snapshot, return oldest
        return sorted_keys[-1]

-    def get_history_snapshot(self, timestamp):
+    def get_history_snapshot(self, timestamp=None, filepath=None):
+        """
+        Accepts either timestamp or filepath
+        :param timestamp:
+        :param filepath:
+        :return:
+        """
        import brotli
-        filepath = self.history[timestamp]
+
+        if not filepath:
+            filepath = self.history[timestamp]

        # See if a brotli versions exists and switch to that
        if not filepath.endswith('.br') and os.path.isfile(f"{filepath}.br"):
@@ -382,7 +390,7 @@ class model(watch_base):
        # Compare each lines (set) against each history text file (set) looking for something new..
        existing_history = set({})
        for k, v in self.history.items():
-            content = self.get_history_snapshot(k)
+            content = self.get_history_snapshot(filepath=v)

            if ignore_whitespace:
                alist = set([line.translate(TRANSLATE_WHITESPACE_TABLE).lower() for line in content.splitlines()])
@@ -586,7 +594,7 @@ class model(watch_base):
        """Return the text saved from a previous request that resulted in a non-200 error"""
        fname = os.path.join(self.watch_data_dir, "last-error.txt")
        if os.path.isfile(fname):
-            with open(fname, 'r') as f:
+            with open(fname, 'r', encoding='utf-8') as f:
                return f.read()
        return False

@@ -639,7 +647,7 @@ class model(watch_base):
        for k, fname in self.history.items():
            if os.path.isfile(fname):
                if True:
-                    contents = self.get_history_snapshot(k)
+                    contents = self.get_history_snapshot(timestamp=k)
                    res = re.findall(regex, contents, re.MULTILINE)
                    if res:
                        if not csv_writer:
@@ -732,7 +740,7 @@ class model(watch_base):
            # If a previous attempt doesnt yet exist, just snarf the previous snapshot instead
            dates = list(self.history.keys())
            if len(dates):
-                return self.get_history_snapshot(dates[-1])
+                return self.get_history_snapshot(timestamp=dates[-1])
            else:
                return ''

--- a/changedetectionio/notification_service.py
+++ b/changedetectionio/notification_service.py
@@ -133,7 +133,7 @@ class NotificationService:

        # Add text that was triggered
        if len(dates):
-            snapshot_contents = watch.get_history_snapshot(dates[-1])
+            snapshot_contents = watch.get_history_snapshot(timestamp=dates[-1])
        else:
            snapshot_contents = "No snapshot/history available, the watch should fetch atleast once."

@@ -154,8 +154,8 @@ class NotificationService:
        current_snapshot = "Example text: example test\nExample text: change detection is fantastic\nExample text: even more examples\nExample text: a lot more examples"

        if len(dates) > 1:
-            prev_snapshot = watch.get_history_snapshot(dates[-2])
-            current_snapshot = watch.get_history_snapshot(dates[-1])
+            prev_snapshot = watch.get_history_snapshot(timestamp=dates[-2])
+            current_snapshot = watch.get_history_snapshot(timestamp=dates[-1])


        n_object.update(set_basic_notification_vars(snapshot_contents=snapshot_contents,
--- a/changedetectionio/processors/text_json_diff/processor.py
+++ b/changedetectionio/processors/text_json_diff/processor.py
@@ -280,7 +280,7 @@ class ContentProcessor:

        # Sort JSON to avoid false alerts from reordering
        try:
-            content = json.dumps(json.loads(content), sort_keys=True, indent=4)
+            content = json.dumps(json.loads(content), sort_keys=True, indent=2, ensure_ascii=False)
        except Exception:
            # Might be malformed JSON, continue anyway
            pass
--- a/changedetectionio/realtime/socket_server.py
+++ b/changedetectionio/realtime/socket_server.py
@@ -37,18 +37,6 @@ class SignalHandler:
        notification_event_signal.connect(self.handle_notification_event, weak=False)
        logger.info("SignalHandler: Connected to notification_event signal")

-        # Create and start the queue update thread using standard threading
-        import threading
-        self.polling_emitter_thread = threading.Thread(
-            target=self.polling_emit_running_or_queued_watches_threaded,
-            daemon=True
-        )
-        self.polling_emitter_thread.start()
-        logger.info("Started polling thread using threading (eventlet-free)")
-
-        # Store the thread reference in socketio for clean shutdown
-        self.socketio_instance.polling_emitter_thread = self.polling_emitter_thread
-
    def handle_signal(self, *args, **kwargs):
        logger.trace(f"SignalHandler: Signal received with {len(args)} args and {len(kwargs)} kwargs")
        # Safely extract the watch UUID from kwargs
@@ -124,74 +112,6 @@ class SignalHandler:
        except Exception as e:
            logger.error(f"Socket.IO error in handle_notification_event: {str(e)}")

-    def polling_emit_running_or_queued_watches_threaded(self):
-        """Threading version of polling for Windows compatibility"""
-        import time
-        import threading
-        logger.info("Queue update thread started (threading mode)")
-
-        # Import here to avoid circular imports
-        from changedetectionio.flask_app import app
-        from changedetectionio import worker_handler
-        watch_check_update = signal('watch_check_update')
-
-        # Track previous state to avoid unnecessary emissions
-        previous_running_uuids = set()
-
-        # Run until app shutdown - check exit flag more frequently for fast shutdown
-        exit_event = getattr(app.config, 'exit', threading.Event())
-
-        while not exit_event.is_set():
-            try:
-                # Get current running UUIDs from async workers
-                running_uuids = set(worker_handler.get_running_uuids())
-
-                # Only send updates for UUIDs that changed state
-                newly_running = running_uuids - previous_running_uuids
-                no_longer_running = previous_running_uuids - running_uuids
-
-                # Send updates for newly running UUIDs (but exit fast if shutdown requested)
-                for uuid in newly_running:
-                    if exit_event.is_set():
-                        break
-                    logger.trace(f"Threading polling: UUID {uuid} started processing")
-                    with app.app_context():
-                        watch_check_update.send(app_context=app, watch_uuid=uuid)
-                    time.sleep(0.01)  # Small yield
-
-                # Send updates for UUIDs that finished processing (but exit fast if shutdown requested)
-                if not exit_event.is_set():
-                    for uuid in no_longer_running:
-                        if exit_event.is_set():
-                            break
-                        logger.trace(f"Threading polling: UUID {uuid} finished processing")
-                        with app.app_context():
-                            watch_check_update.send(app_context=app, watch_uuid=uuid)
-                        time.sleep(0.01)  # Small yield
-
-                # Update tracking for next iteration
-                previous_running_uuids = running_uuids
-
-                # Sleep between polling cycles, but check exit flag every 0.5 seconds for fast shutdown
-                for _ in range(20):  # 20 * 0.5 = 10 seconds total
-                    if exit_event.is_set():
-                        break
-                    time.sleep(0.5)
-
-            except Exception as e:
-                logger.error(f"Error in threading polling: {str(e)}")
-                # Even during error recovery, check for exit quickly
-                for _ in range(1):  # 1 * 0.5 = 0.5 seconds
-                    if exit_event.is_set():
-                        break
-                    time.sleep(0.5)
-
-        # Check if we're in pytest environment - if so, be more gentle with logging
-        import sys
-        in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
-
-        if not in_pytest:
-            logger.info("Queue update thread stopped (threading mode)")


 def handle_watch_update(socketio, **kwargs):
@@ -383,19 +303,6 @@ def init_socketio(app, datastore):
        """Shutdown the SocketIO server fast and aggressively"""
        try:
            logger.info("Socket.IO: Fast shutdown initiated...")
-
-            # For threading mode, give the thread a very short time to exit gracefully
-            if hasattr(socketio, 'polling_emitter_thread'):
-                if socketio.polling_emitter_thread.is_alive():
-                    logger.info("Socket.IO: Waiting 1 second for polling thread to stop...")
-                    socketio.polling_emitter_thread.join(timeout=1.0)  # Only 1 second timeout
-                    if socketio.polling_emitter_thread.is_alive():
-                        logger.info("Socket.IO: Polling thread still running after timeout - continuing with shutdown")
-                    else:
-                        logger.info("Socket.IO: Polling thread stopped quickly")
-                else:
-                    logger.info("Socket.IO: Polling thread already stopped")
-
            logger.info("Socket.IO: Fast shutdown complete")
        except Exception as e:
            logger.error(f"Socket.IO error during shutdown: {str(e)}")
--- a/changedetectionio/store.py
+++ b/changedetectionio/store.py
@@ -22,6 +22,13 @@ import uuid as uuid_builder
 from loguru import logger
 from blinker import signal

+# Try to import orjson for faster JSON serialization
+try:
+    import orjson
+    HAS_ORJSON = True
+except ImportError:
+    HAS_ORJSON = False
+
 from .processors import get_custom_watch_obj_for_processor
 from .processors.restock_diff import Restock

@@ -71,9 +78,12 @@ class ChangeDetectionStore:
                self.__data['build_sha'] = f.read()

        try:
-            # @todo retest with ", encoding='utf-8'"
-            with open(self.json_store_path) as json_file:
-                from_disk = json.load(json_file)
+            if HAS_ORJSON:
+                with open(self.json_store_path, 'rb') as json_file:
+                    from_disk = orjson.loads(json_file.read())
+            else:
+                with open(self.json_store_path, encoding='utf-8') as json_file:
+                    from_disk = json.load(json_file)

                # @todo isnt there a way todo this dict.update recursively?
                # Problem here is if the one on the disk is missing a sub-struct, it wont be present anymore.
@@ -426,9 +436,14 @@ class ChangeDetectionStore:
                # Re #286  - First write to a temp file, then confirm it looks OK and rename it
                # This is a fairly basic strategy to deal with the case that the file is corrupted,
                # system was out of memory, out of RAM etc
-                with open(self.json_store_path+".tmp", 'w') as json_file:
-                    # Use compact JSON in production for better performance
-                    json.dump(data, json_file, indent=2)
+                if HAS_ORJSON:
+                    # Use orjson for faster serialization
+                    with open(self.json_store_path+".tmp", 'wb') as json_file:
+                        json_file.write(orjson.dumps(data, option=orjson.OPT_INDENT_2))
+                else:
+                    # Fallback to standard json module
+                    with open(self.json_store_path+".tmp", 'w', encoding='utf-8') as json_file:
+                        json.dump(data, json_file, indent=2, ensure_ascii=False)
                os.replace(self.json_store_path+".tmp", self.json_store_path)
            except Exception as e:
                logger.error(f"Error writing JSON!! (Main JSON file save was skipped) : {str(e)}")
@@ -490,8 +505,12 @@ class ChangeDetectionStore:

        # Load from external config file
        if path.isfile(proxy_list_file):
-            with open(os.path.join(self.datastore_path, "proxies.json")) as f:
-                proxy_list = json.load(f)
+            if HAS_ORJSON:
+                with open(os.path.join(self.datastore_path, "proxies.json"), 'rb') as f:
+                    proxy_list = orjson.loads(f.read())
+            else:
+                with open(os.path.join(self.datastore_path, "proxies.json"), encoding='utf-8') as f:
+                    proxy_list = json.load(f)

        # Mapping from UI config if available
        extras = self.data['settings']['requests'].get('extra_proxies')
--- a/changedetectionio/tests/test_backend.py
+++ b/changedetectionio/tests/test_backend.py
@@ -19,18 +19,9 @@ def test_inscriptus():

 def test_check_basic_change_detection_functionality(client, live_server, measure_memory_usage, datastore_path):
    set_original_response(datastore_path=datastore_path)
-   #  live_server_setup(live_server) # Setup on conftest per function

-    # Add our URL to the import page
-    res = client.post(
-        url_for("imports.import_page"),
-        data={"urls": url_for('test_endpoint', _external=True)},
-        follow_redirects=True
-    )
+    uuid = client.application.config.get('DATASTORE').add_watch(url=url_for('test_endpoint', _external=True))

-    assert b"1 Imported" in res.data
-
-    wait_for_all_checks(client)

    # Do this a few times.. ensures we dont accidently set the status
    for n in range(3):
@@ -115,7 +106,6 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
        # It should report nothing found (no new 'has-unread-changes' class)
        res = client.get(url_for("watchlist.index"))

-
        assert b'has-unread-changes' not in res.data
        assert b'class="has-unread-changes' not in res.data
        assert b'head title' in res.data  # Should be ON by default
@@ -129,23 +119,6 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
    res = client.get(url_for("watchlist.index"))
    assert b'head title and more' in res.data

-    # disable <title> pickup
-    res = client.post(
-        url_for("settings.settings_page"),
-        data={"application-ui-use_page_title_in_list": "", "requests-time_between_check-minutes": 180,
-              'application-fetch_backend': "html_requests"},
-        follow_redirects=True
-    )
-
-    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
-    wait_for_all_checks(client)
-
-    res = client.get(url_for("watchlist.index"))
-    assert b'has-unread-changes' in res.data
-    assert b'class="has-unread-changes' in res.data
-    assert b'head title' not in res.data  # should now be off
-
-
    # Be sure the last_viewed is going to be greater than the last snapshot
    time.sleep(1)

@@ -166,6 +139,63 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
    # Cleanup everything
    delete_all_watches(client)

+def test_title_scraper(client, live_server, measure_memory_usage, datastore_path):
+
+    set_original_response(datastore_path=datastore_path)
+    uuid = client.application.config.get('DATASTORE').add_watch(url=url_for('test_endpoint', _external=True))
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
+    wait_for_all_checks()
+
+    # It should report nothing found (no new 'has-unread-changes' class)
+    res = client.get(url_for("watchlist.index"))
+
+    assert b'head title' in res.data  # Should be ON by default
+
+    # Recheck it but only with a title change, content wasnt changed
+    set_original_response(datastore_path=datastore_path, extra_title=" and more")
+
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
+    wait_for_all_checks(client)
+    res = client.get(url_for("watchlist.index"))
+    assert b'head title and more' in res.data
+
+    # disable <title> pickup
+    res = client.post(
+        url_for("settings.settings_page"),
+        data={"application-ui-use_page_title_in_list": "",
+              "requests-time_between_check-minutes": 180,
+              'application-fetch_backend': "html_requests"},
+        follow_redirects=True
+    )
+
+    set_original_response(datastore_path=datastore_path, extra_title=" SHOULD NOT APPEAR")
+
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
+    wait_for_all_checks(client)
+    res = client.get(url_for("watchlist.index"))
+    assert b'SHOULD NOT APPEAR' not in res.data
+
+    delete_all_watches(client)
+
+def test_title_scraper_html_only(client, live_server, measure_memory_usage, datastore_path):
+
+    with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
+        f.write('"My text document\nWhere I talk about <title>\nwhich should not get registered\n</title>')
+
+    test_url = url_for('test_endpoint', content_type="text/plain", _external=True)
+
+    uuid = client.application.config.get('DATASTORE').add_watch(test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
+    wait_for_all_checks()
+
+    # It should report nothing found (no new 'has-unread-changes' class)
+    res = client.get(url_for("watchlist.index"))
+
+    assert b'which should not get registered' not in res.data  # Should be ON by default
+    assert not live_server.app.config['DATASTORE'].data['watching'][uuid].get('title')
+
+
+

 # Server says its plaintext, we should always treat it as plaintext, and then if they have a filter, try to apply that
 def test_requests_timeout(client, live_server, measure_memory_usage, datastore_path):
--- a/changedetectionio/tests/test_history_consistency.py
+++ b/changedetectionio/tests/test_history_consistency.py
@@ -40,7 +40,7 @@ def test_consistent_history(client, live_server, measure_memory_usage, datastore
    json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json')

    json_obj = None
-    with open(json_db_file, 'r') as f:
+    with open(json_db_file, 'r', encoding='utf-8') as f:
        json_obj = json.load(f)

    # assert the right amount of watches was found in the JSON
--- a/changedetectionio/tests/test_jsonpath_jq_selector.py
+++ b/changedetectionio/tests/test_jsonpath_jq_selector.py
@@ -353,7 +353,7 @@ def check_json_ext_filter(json_filter, client, live_server, datastore_path):

    watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
    dates = list(watch.history.keys())
-    snapshot_contents = watch.get_history_snapshot(dates[0])
+    snapshot_contents = watch.get_history_snapshot(timestamp=dates[0])

    assert snapshot_contents[0] == '['

@@ -439,7 +439,7 @@ def test_correct_header_detect(client, live_server, measure_memory_usage, datast

    watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
    dates = list(watch.history.keys())
-    snapshot_contents = watch.get_history_snapshot(dates[0])
+    snapshot_contents = watch.get_history_snapshot(timestamp=dates[0])

    assert b'&#34;hello&#34;: 123,' in res.data # properly html escaped in the front end

--- a/changedetectionio/tests/test_notification.py
+++ b/changedetectionio/tests/test_notification.py
@@ -302,15 +302,20 @@ def test_notification_urls_jinja2_apprise_integration(client, live_server, measu
        data={
              "application-fetch_backend": "html_requests",
              "application-minutes_between_check": 180,
-              "application-notification_body": '{ "url" : "{{ watch_url }}", "secret": 444, "somebug": "网站监测 内容更新了" }',
+              "application-notification_body": '{ "url" : "{{ watch_url }}", "secret": 444, "somebug": "网站监测 内容更新了", "another": "{{diff|truncate(1500)}}" }',
              "application-notification_format": default_notification_format,
              "application-notification_urls": test_notification_url,
              # https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
-              "application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }} ",
+              "application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }}  {{diff|truncate(200)}} ",
              },
        follow_redirects=True
    )
    assert b'Settings updated' in res.data
+    assert '网站监测'.encode() in res.data
+    assert b'{{diff|truncate(1500)}}' in res.data
+    assert b'{{diff|truncate(200)}}' in res.data
+
+


 def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_memory_usage, datastore_path):
--- a/changedetectionio/tests/test_pdf.py
+++ b/changedetectionio/tests/test_pdf.py
@@ -22,7 +22,7 @@ def test_fetch_pdf(client, live_server, measure_memory_usage, datastore_path):

    watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
    dates = list(watch.history.keys())
-    snapshot_contents = watch.get_history_snapshot(dates[0])
+    snapshot_contents = watch.get_history_snapshot(timestamp=dates[0])

    # PDF header should not be there (it was converted to text)
    assert 'PDF' not in snapshot_contents
@@ -75,7 +75,7 @@ def test_fetch_pdf(client, live_server, measure_memory_usage, datastore_path):

    dates = list(watch.history.keys())
    # new snapshot was also OK, no HTML
-    snapshot_contents = watch.get_history_snapshot(dates[1])
+    snapshot_contents = watch.get_history_snapshot(timestamp=dates[1])
    assert 'html' not in snapshot_contents.lower()
    assert f'Original file size - {os.path.getsize(os.path.join(datastore_path, "endpoint-test.pdf"))}' in snapshot_contents
    assert f'here is a change' in snapshot_contents
--- a/changedetectionio/tests/test_request.py
+++ b/changedetectionio/tests/test_request.py
@@ -142,7 +142,7 @@ def test_body_in_request(client, live_server, measure_memory_usage, datastore_pa
    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
    wait_for_all_checks(client)
    watches_with_body = 0
-    with open(os.path.join(datastore_path, 'url-watches.json')) as f:
+    with open(os.path.join(datastore_path, 'url-watches.json'), encoding='utf-8') as f:
        app_struct = json.load(f)
        for uuid in app_struct['watching']:
            if app_struct['watching'][uuid]['body']==body_value:
@@ -225,7 +225,7 @@ def test_method_in_request(client, live_server, measure_memory_usage, datastore_
    wait_for_all_checks(client)

    watches_with_method = 0
-    with open(os.path.join(datastore_path, 'url-watches.json')) as f:
+    with open(os.path.join(datastore_path, 'url-watches.json'), encoding='utf-8') as f:
        app_struct = json.load(f)
        for uuid in app_struct['watching']:
            if app_struct['watching'][uuid]['method'] == 'PATCH':
--- a/changedetectionio/tests/test_rss_reader_mode.py
+++ b/changedetectionio/tests/test_rss_reader_mode.py
@@ -65,7 +65,7 @@ def test_rss_reader_mode(client, live_server, measure_memory_usage, datastore_pa

    watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
    dates = list(watch.history.keys())
-    snapshot_contents = watch.get_history_snapshot(dates[0])
+    snapshot_contents = watch.get_history_snapshot(timestamp=dates[0])
    assert 'Wet noodles escape' in snapshot_contents
    assert '<br>' not in snapshot_contents
    assert '&lt;' not in snapshot_contents
@@ -91,7 +91,7 @@ def test_rss_reader_mode_with_css_filters(client, live_server, measure_memory_us

    watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
    dates = list(watch.history.keys())
-    snapshot_contents = watch.get_history_snapshot(dates[0])
+    snapshot_contents = watch.get_history_snapshot(timestamp=dates[0])
    assert 'Wet noodles escape' not in snapshot_contents
    assert '<br>' not in snapshot_contents
    assert '&lt;' not in snapshot_contents
--- a/changedetectionio/tests/unit/test_conditions.py
+++ b/changedetectionio/tests/unit/test_conditions.py
@@ -55,8 +55,8 @@ class TestTriggerConditions(unittest.TestCase):
        self.assertEqual(len(history), 2)

        # Retrieve and check snapshots
-        #snapshot1 = watch.get_history_snapshot(str(timestamp1))
-        #snapshot2 = watch.get_history_snapshot(str(timestamp2))
+        #snapshot1 = watch.get_history_snapshot(timestamp=str(timestamp1))
+        #snapshot2 = watch.get_history_snapshot(timestamp=str(timestamp2))

        self.store.data['watching'][self.watch_uuid].update(
            {
--- a/requirements.txt
+++ b/requirements.txt
@@ -33,6 +33,9 @@ chardet>2.3.0
 wtforms~=3.2
 jsonpath-ng~=1.7.0

+# Fast JSON serialization for better performance
+orjson~=3.10
+
 # dnspython - Used by paho-mqtt for MQTT broker resolution  
 # Version pin removed since eventlet (which required the specific 2.6.1 pin) has been eliminated
 # paho-mqtt will install compatible dnspython version automatically
Author	SHA1	Message	Date
dgtlmoon	71389cb3a6	Forcing utf-8 #3611	2025-11-09 18:38:53 +01:00
dgtlmoon	9bc812a167	0.50.40 Some checks failed Build and push containers / metadata (push) Has been cancelled Details Build and push containers / build-push-containers (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details	2025-11-07 13:21:22 +01:00
dgtlmoon	fd2080567d	Page <title> should only be captured on HTML documents (#3608 )	2025-11-07 11:51:10 +01:00
dgtlmoon	969c75e7be	Notification body/title - Fixing validation on empty strings #3606 (#3607 )	2025-11-07 11:42:57 +01:00
dgtlmoon	4b14cec5f4	Real time UI - Remove polling thread for updates - it's all done realtime by signals (#3603 ) Some checks failed Build and push containers / metadata (push) Has been cancelled Details Build and push containers / build-push-containers (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details CodeQL / Analyze (javascript) (push) Has been cancelled Details CodeQL / Analyze (python) (push) Has been cancelled Details	2025-11-05 21:49:17 +01:00
dgtlmoon	a8d5ea067d	Watch history - Don't rescan whole history.txt when looking up a timestamp <->filepath (#3602 )	2025-11-05 18:50:27 +01:00
dgtlmoon	2f6873f7d5	Datastore - Use `orjson` for faster saves (#3601 ) Some checks failed Build and push containers / metadata (push) Has been cancelled Details Build and push containers / build-push-containers (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/amd64 (alpine) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm64 (alpine) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/amd64 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm/v7 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm/v8 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm64 (main) (push) Has been cancelled Details	2025-11-05 15:12:11 +01:00
dgtlmoon	dfa85ab932	Scheduler - Saving a couple of CPU cycles in logging strategy Some checks failed Build and push containers / metadata (push) Has been cancelled Details Build and push containers / build-push-containers (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details	2025-11-03 19:22:24 +01:00