More translatable strings

Language updates
2026-01-25 16:40:19 +00:00 · 2026-01-17 13:59:08 +01:00 · 2026-01-17 13:54:06 +01:00
5 changed files with 4 additions and 257 deletions
--- a/.github/workflows/test-stack-reusable-workflow.yml
+++ b/.github/workflows/test-stack-reusable-workflow.yml
@@ -84,7 +84,6 @@ jobs:
          docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model'
          docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security'
          docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver'
-          docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_html_to_text'

  # Basic pytest tests with ancillary services
  basic-tests:
--- a/changedetectionio/async_update_worker.py
+++ b/changedetectionio/async_update_worker.py
@@ -75,6 +75,7 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore, exec
            continue

        uuid = queued_item_data.item.get('uuid')
+
        # RACE CONDITION FIX: Check if this UUID is already being processed by another worker
        from changedetectionio import worker_handler
        from changedetectionio.queuedWatchMetaData import PrioritizedItem
--- a/changedetectionio/blueprint/ui/init.py
+++ b/changedetectionio/blueprint/ui/init.py
@@ -1,5 +1,4 @@
 import time
-import threading
 from flask import Blueprint, request, redirect, url_for, flash, render_template, session
 from flask_babel import gettext
 from loguru import logger
@@ -152,24 +151,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
            confirmtext = request.form.get('confirmtext')

            if confirmtext == 'clear':
-                # Run in background thread to avoid blocking
-                def clear_history_background():
-                    # Capture UUIDs first to avoid race conditions
-                    watch_uuids = list(datastore.data['watching'].keys())
-                    logger.info(f"Background: Clearing history for {len(watch_uuids)} watches")
-
-                    for uuid in watch_uuids:
-                        try:
-                            datastore.clear_watch_history(uuid)
-                        except Exception as e:
-                            logger.error(f"Error clearing history for watch {uuid}: {e}")
-
-                    logger.info("Background: Completed clearing history")
-
-                # Start daemon thread
-                threading.Thread(target=clear_history_background, daemon=True).start()
-
-                flash(gettext("History clearing started in background"))
+                for uuid in datastore.data['watching'].keys():
+                    datastore.clear_watch_history(uuid)
+                flash(gettext("Cleared snapshot history for all watches"))
            else:
                flash(gettext('Incorrect confirmation text.'), 'error')

--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -539,18 +539,6 @@ def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False


 def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False, timeout=10) -> str:
-    """
-    Convert HTML content to plain text using inscriptis.
-
-    Thread-Safety: This function uses inscriptis.get_text() which internally calls
-    lxml.html.fromstring() with the default parser. Testing with 50 concurrent threads
-    confirms this approach is thread-safe and produces deterministic output.
-
-    Alternative Approach Rejected: An explicit HTMLParser instance (thread-local or fresh)
-    would also be thread-safe, but was found to break change detection logic in subtle ways
-    (test_check_basic_change_detection_functionality). The default parser provides correct
-    and reliable behavior.
-    """
    from inscriptis import get_text
    from inscriptis.model.config import ParserConfig

--- a/changedetectionio/tests/unit/test_html_to_text.py
+++ b/changedetectionio/tests/unit/test_html_to_text.py
@@ -1,225 +0,0 @@
-#!/usr/bin/env python3
-# coding=utf-8
-
-"""Unit tests for html_tools.html_to_text function."""
-
-import hashlib
-import threading
-from queue import Queue
-import pytest
-
-from changedetectionio.html_tools import html_to_text
-
-
-class TestHtmlToText:
-    """Test html_to_text function for correctness and thread-safety."""
-
-    def test_basic_text_extraction(self):
-        """Test basic HTML to text conversion."""
-        html = '<html><body><h1>Title</h1><p>Paragraph text.</p></body></html>'
-        text = html_to_text(html)
-
-        assert 'Title' in text
-        assert 'Paragraph text.' in text
-        assert '<' not in text  # HTML tags should be stripped
-        assert '>' not in text
-
-    def test_empty_html(self):
-        """Test handling of empty HTML."""
-        html = '<html><body></body></html>'
-        text = html_to_text(html)
-
-        # Should return empty or whitespace only
-        assert text.strip() == ''
-
-    def test_nested_elements(self):
-        """Test extraction from nested HTML elements."""
-        html = '''
-        <html>
-            <body>
-                <div>
-                    <h1>Header</h1>
-                    <div>
-                        <p>First paragraph</p>
-                        <p>Second paragraph</p>
-                    </div>
-                </div>
-            </body>
-        </html>
-        '''
-        text = html_to_text(html)
-
-        assert 'Header' in text
-        assert 'First paragraph' in text
-        assert 'Second paragraph' in text
-
-    def test_anchor_tag_rendering(self):
-        """Test anchor tag rendering option."""
-        html = '<html><body><a href="https://example.com">Link text</a></body></html>'
-
-        # Without rendering anchors
-        text_without = html_to_text(html, render_anchor_tag_content=False)
-        assert 'Link text' in text_without
-        assert 'https://example.com' not in text_without
-
-        # With rendering anchors
-        text_with = html_to_text(html, render_anchor_tag_content=True)
-        assert 'Link text' in text_with
-        assert 'https://example.com' in text_with or '[Link text]' in text_with
-
-    def test_rss_mode(self):
-        """Test RSS mode converts title tags to h1."""
-        html = '<item><title>RSS Title</title><description>Content</description></item>'
-
-        # is_rss=True should convert <title> to <h1>
-        text = html_to_text(html, is_rss=True)
-
-        assert 'RSS Title' in text
-        assert 'Content' in text
-
-    def test_special_characters(self):
-        """Test handling of special characters and entities."""
-        html = '<html><body><p>Test &amp; &lt;special&gt; characters</p></body></html>'
-        text = html_to_text(html)
-
-        # Entities should be decoded
-        assert 'Test &' in text or 'Test &amp;' in text
-        assert 'special' in text
-
-    def test_whitespace_handling(self):
-        """Test that whitespace is properly handled."""
-        html = '<html><body><p>Line 1</p><p>Line 2</p></body></html>'
-        text = html_to_text(html)
-
-        # Should have some separation between lines
-        assert 'Line 1' in text
-        assert 'Line 2' in text
-        assert text.count('\n') >= 1  # At least one newline
-
-    def test_deterministic_output(self):
-        """Test that the same HTML always produces the same text."""
-        html = '<html><body><h1>Test</h1><p>Content here</p></body></html>'
-
-        # Extract text multiple times
-        results = [html_to_text(html) for _ in range(10)]
-
-        # All results should be identical
-        assert len(set(results)) == 1, "html_to_text should be deterministic"
-
-    def test_thread_safety_determinism(self):
-        """
-        Test that html_to_text produces deterministic output under high concurrency.
-
-        This is the critical test for the lxml threading bug fix.
-        Without the thread-local parser fix, this test would occasionally fail
-        under high concurrency when multiple threads share the global parser.
-        """
-        html = '''
-        <html>
-            <head><title>Test Page</title></head>
-            <body>
-                <h1>Main Heading</h1>
-                <div class="content">
-                    <p>First paragraph with <b>bold text</b>.</p>
-                    <p>Second paragraph with <i>italic text</i>.</p>
-                    <ul>
-                        <li>Item 1</li>
-                        <li>Item 2</li>
-                        <li>Item 3</li>
-                    </ul>
-                </div>
-            </body>
-        </html>
-        '''
-
-        results_queue = Queue()
-
-        def worker(worker_id, iterations=10):
-            """Worker that converts HTML to text multiple times."""
-            for i in range(iterations):
-                text = html_to_text(html)
-                md5 = hashlib.md5(text.encode('utf-8')).hexdigest()
-                results_queue.put((worker_id, i, md5))
-
-        # Launch many threads simultaneously
-        num_threads = 50
-        threads = []
-
-        for i in range(num_threads):
-            t = threading.Thread(target=worker, args=(i,))
-            threads.append(t)
-            t.start()
-
-        # Wait for all threads to complete
-        for t in threads:
-            t.join()
-
-        # Collect all MD5 results
-        md5_values = []
-        while not results_queue.empty():
-            _, _, md5 = results_queue.get()
-            md5_values.append(md5)
-
-        # All MD5s should be identical
-        unique_md5s = set(md5_values)
-
-        assert len(unique_md5s) == 1, (
-            f"Thread-safety issue detected! Found {len(unique_md5s)} different MD5 values: {unique_md5s}. "
-            "The thread-local parser fix may not be working correctly."
-        )
-
-        print(f"✓ Thread-safety test passed: {len(md5_values)} conversions, all identical")
-
-    def test_thread_local_parser_exists(self):
-        """Verify that thread-local storage is properly initialized."""
-        # Call html_to_text at least once to initialize thread-local storage
-        html_to_text('<html><body>Test</body></html>')
-
-        # Check that thread-local storage attribute exists
-        assert hasattr(html_to_text, '_thread_local'), (
-            "html_to_text should have _thread_local attribute for thread-safe parsers"
-        )
-
-    def test_different_threads_get_different_parsers(self):
-        """Verify that different threads CAN get different parser instances."""
-        parser_ids = Queue()
-
-        def get_parser_id():
-            """Get the parser ID in this thread."""
-            # Trigger parser creation
-            html_to_text('<html><body>Test</body></html>')
-
-            # Get the parser instance for this thread
-            if hasattr(html_to_text._thread_local, 'parser'):
-                parser = html_to_text._thread_local.parser
-                parser_ids.put(id(parser))
-
-        # Launch multiple threads
-        threads = []
-        for _ in range(5):
-            t = threading.Thread(target=get_parser_id)
-            threads.append(t)
-            t.start()
-
-        for t in threads:
-            t.join()
-
-        # Collect all parser IDs
-        ids = []
-        while not parser_ids.empty():
-            ids.append(parser_ids.get())
-
-        # We should have at least 2 different parser instances
-        # (threads can reuse IDs after completion, so not necessarily all unique)
-        unique_ids = set(ids)
-        assert len(unique_ids) >= 2, (
-            f"Expected at least 2 unique parsers, but got {len(unique_ids)}. "
-            "Thread-local storage may not be working correctly."
-        )
-
-        print(f"✓ Parser isolation test passed: {len(ids)} threads, {len(unique_ids)} unique parsers")
-
-
-if __name__ == '__main__':
-    # Can run this file directly for quick testing
-    pytest.main([__file__, '-v'])
Author	SHA1	Message	Date
dgtlmoon	541ed62bba	More translatable strings	2026-01-17 13:59:08 +01:00
dgtlmoon	b1a45964e6	Language updates	2026-01-17 13:54:06 +01:00