Text tweak

Including extra args on the WatchHistoryDiff API endpoint
Language set redirect - keep hash
2026-06-10 19:01:06 +00:00 · 2026-01-03 14:09:32 +01:00 · 2026-01-03 14:06:06 +01:00 · 2026-01-03 01:59:42 +01:00 · 2026-01-02 19:51:21 +01:00 · 2026-01-02 17:46:25 +01:00
9 changed files with 417 additions and 54 deletions
@@ -302,18 +302,28 @@ class WatchHistoryDiff(Resource):
        from_version_file_contents = watch.get_history_snapshot(from_timestamp)
        to_version_file_contents = watch.get_history_snapshot(to_timestamp)

-        # Get diff preferences (using defaults similar to the existing code)
-        diff_prefs = {
-            'diff_ignoreWhitespace': False,
-            'diff_changesOnly': True
-        }
+        # Get diff preferences from query parameters (matching UI preferences in DIFF_PREFERENCES_CONFIG)
+        # Support both 'type' (UI parameter) and 'word_diff' (API parameter) for backward compatibility
+        diff_type = request.args.get('type', 'diffLines')
+        if diff_type == 'diffWords':
+            word_diff = True

-        # Generate the diff
+        # Get boolean diff preferences with defaults from DIFF_PREFERENCES_CONFIG
+        changes_only = strtobool(request.args.get('changesOnly', 'true'))
+        ignore_whitespace = strtobool(request.args.get('ignoreWhitespace', 'false'))
+        include_removed = strtobool(request.args.get('removed', 'true'))
+        include_added = strtobool(request.args.get('added', 'true'))
+        include_replaced = strtobool(request.args.get('replaced', 'true'))
+
+        # Generate the diff with all preferences
        content = diff.render_diff(
            previous_version_file_contents=from_version_file_contents,
            newest_version_file_contents=to_version_file_contents,
-            ignore_junk=diff_prefs.get('diff_ignoreWhitespace'),
-            include_equal=not diff_prefs.get('diff_changesOnly'),
+            ignore_junk=ignore_whitespace,
+            include_equal=changes_only,
+            include_removed=include_removed,
+            include_added=include_added,
+            include_replaced=include_replaced,
            word_diff=word_diff,
        )

@@ -65,20 +65,22 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
        # RACE CONDITION FIX: Check if this UUID is already being processed by another worker
        from changedetectionio import worker_handler
        from changedetectionio.queuedWatchMetaData import PrioritizedItem
-        if worker_handler.is_watch_running(uuid):
-            logger.trace(f"Worker {worker_id} skipping UUID {uuid} - already being processed, re-queuing for later")
-            # Re-queue with MUCH lower priority (higher number = processed later)
-            # This prevents tight loop where high-priority item keeps getting picked immediately
+        if worker_handler.is_watch_running_by_another_worker(uuid, worker_id):
+            logger.trace(f"Worker {worker_id} detected UUID {uuid} already being processed by another worker - deferring")
+            # Sleep to avoid tight loop and give the other worker time to finish
+            await asyncio.sleep(10.0)
+
+            # Re-queue with lower priority so it gets checked again after current processing finishes
            deferred_priority = max(1000, queued_item_data.priority * 10)
            deferred_item = PrioritizedItem(priority=deferred_priority, item=queued_item_data.item)
            worker_handler.queue_item_async_safe(q, deferred_item, silent=True)
-            await asyncio.sleep(0.1)  # Brief pause to avoid tight loop
+            logger.debug(f"Worker {worker_id} re-queued UUID {uuid} for subsequent check")
            continue

        fetch_start_time = round(time.time())

-        # Mark this UUID as being processed
-        worker_handler.set_uuid_processing(uuid, processing=True)
+        # Mark this UUID as being processed by this worker
+        worker_handler.set_uuid_processing(uuid, worker_id=worker_id, processing=True)
        
        try:
            if uuid in list(datastore.data['watching'].keys()) and datastore.data['watching'][uuid].get('url'):
@@ -421,8 +423,8 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
            # Always cleanup - this runs whether there was an exception or not
            if uuid:
                try:
-                    # Mark UUID as no longer being processed
-                    worker_handler.set_uuid_processing(uuid, processing=False)
+                    # Mark UUID as no longer being processed by this worker
+                    worker_handler.set_uuid_processing(uuid, worker_id=worker_id, processing=False)
                    
                    # Send completion signal
                    if watch:
@@ -204,7 +204,7 @@ class fetcher(Fetcher):
        import re
        self.delete_browser_steps_screenshots()

-        n = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
+        n = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 12)) + self.render_extract_delay
        extra_wait = min(n, 15)

        logger.debug(f"Extra wait set to {extra_wait}s, requested was {n}s.")
@@ -288,28 +288,27 @@ class fetcher(Fetcher):
        # Enable Network domain to detect when first bytes arrive
        await self.page._client.send('Network.enable')

+        # Now set up the frame navigation handlers
+        async def handle_frame_navigation(event=None):
+            # Wait n seconds after the frameStartedLoading, not from any frameStartedLoading/frameStartedNavigating
+            logger.debug(f"Frame navigated: {event}")
+            w = extra_wait - 2 if extra_wait > 4 else 2
+            logger.debug(f"Waiting {w} seconds before calling Page.stopLoading...")
+            await asyncio.sleep(w)
+            logger.debug("Issuing stopLoading command...")
+            await self.page._client.send('Page.stopLoading')
+            logger.debug("stopLoading command sent!")
+
        async def setup_frame_handlers_on_first_response(event):
            # Only trigger for the main document response
            if event.get('type') == 'Document':
                logger.debug("First response received, setting up frame handlers for forced page stop load.")
-
-                # De-register this listener - we only need it once
-                self.page._client.remove_listener('Network.responseReceived', setup_frame_handlers_on_first_response)
-
-                # Now set up the frame navigation handlers
-                async def handle_frame_navigation(event):
-                    # Wait n seconds after the frameStartedLoading, not from any frameStartedLoading/frameStartedNavigating
-                    logger.debug(f"Frame navigated: {event}")
-                    w = extra_wait - 2 if extra_wait > 4 else 2
-                    logger.debug(f"Waiting {w} seconds before calling Page.stopLoading...")
-                    await asyncio.sleep(w)
-                    logger.debug("Issuing stopLoading command...")
-                    await self.page._client.send('Page.stopLoading')
-                    logger.debug("stopLoading command sent!")
-
                self.page._client.on('Page.frameStartedNavigating', lambda e: asyncio.create_task(handle_frame_navigation(e)))
                self.page._client.on('Page.frameStartedLoading', lambda e: asyncio.create_task(handle_frame_navigation(e)))
                self.page._client.on('Page.frameStoppedLoading', lambda e: logger.debug(f"Frame stopped loading: {e}"))
+                logger.debug("First response received, setting up frame handlers for forced page stop load DONE SETUP")
+                # De-register this listener - we only need it once
+                self.page._client.remove_listener('Network.responseReceived', setup_frame_handlers_on_first_response)

        # Listen for first response to trigger frame handler setup
        self.page._client.on('Network.responseReceived', setup_frame_handlers_on_first_response)
@@ -318,8 +317,11 @@ class fetcher(Fetcher):
        attempt=0
        while not response:
            logger.debug(f"Attempting page fetch {url} attempt {attempt}")
+            asyncio.create_task(handle_frame_navigation())
            response = await self.page.goto(url, timeout=0)
            await asyncio.sleep(1 + extra_wait)
+            await self.page._client.send('Page.stopLoading')
+
            if response:
                break
            if not response:
@@ -15,6 +15,22 @@ document.addEventListener('DOMContentLoaded', function() {
  // Open modal when language button is clicked
  languageButton.addEventListener('click', function(e) {
    e.preventDefault();
+
+    // Update all language links to include current hash in the redirect parameter
+    const currentPath = window.location.pathname;
+    const currentHash = window.location.hash;
+
+    if (currentHash) {
+      const languageOptions = languageModal.querySelectorAll('.language-option');
+      languageOptions.forEach(function(option) {
+        const url = new URL(option.href, window.location.origin);
+        // Update the redirect parameter to include the hash
+        const redirectPath = currentPath + currentHash;
+        url.searchParams.set('redirect', redirectPath);
+        option.setAttribute('href', url.pathname + url.search + url.hash);
+      });
+    }
+
    languageModal.showModal();
  });

@@ -165,18 +165,83 @@ def test_api_simple(client, live_server, measure_memory_usage, datastore_path):
    assert b'<div id' in res.data


-    # Fetch the difference between two versions
+    # Fetch the difference between two versions (default text format)
    res = client.get(
        url_for("watchhistorydiff", uuid=watch_uuid, from_timestamp='previous', to_timestamp='latest'),
        headers={'x-api-key': api_key},
    )
    assert b'(changed) Which is across' in res.data
+
+    # Test htmlcolor format
    res = client.get(
        url_for("watchhistorydiff", uuid=watch_uuid, from_timestamp='previous', to_timestamp='latest')+'?format=htmlcolor',
        headers={'x-api-key': api_key},
    )
    assert b'aria-label="Changed text" title="Changed text">Which is across multiple lines' in res.data

+    # Test html format
+    res = client.get(
+        url_for("watchhistorydiff", uuid=watch_uuid, from_timestamp='previous', to_timestamp='latest')+'?format=html',
+        headers={'x-api-key': api_key},
+    )
+    assert res.status_code == 200
+    assert b'<br>' in res.data
+
+    # Test markdown format
+    res = client.get(
+        url_for("watchhistorydiff", uuid=watch_uuid, from_timestamp='previous', to_timestamp='latest')+'?format=markdown',
+        headers={'x-api-key': api_key},
+    )
+    assert res.status_code == 200
+
+    # Test new diff preference parameters
+    # Test removed=false (should hide removed content)
+    res = client.get(
+        url_for("watchhistorydiff", uuid=watch_uuid, from_timestamp='previous', to_timestamp='latest')+'?removed=false',
+        headers={'x-api-key': api_key},
+    )
+    # Should not contain removed content indicator
+    assert b'(removed)' not in res.data
+    # Should still contain added content
+    assert b'(added)' in res.data or b'which has this one new line' in res.data
+
+    # Test added=false (should hide added content)
+    # Note: The test data has replacements, not pure additions, so we test differently
+    res = client.get(
+        url_for("watchhistorydiff", uuid=watch_uuid, from_timestamp='previous', to_timestamp='latest')+'?added=false&replaced=false',
+        headers={'x-api-key': api_key},
+    )
+    # With both added and replaced disabled, should have minimal content
+    # Should not contain added indicators
+    assert b'(added)' not in res.data
+
+    # Test replaced=false (should hide replaced/changed content)
+    res = client.get(
+        url_for("watchhistorydiff", uuid=watch_uuid, from_timestamp='previous', to_timestamp='latest')+'?replaced=false',
+        headers={'x-api-key': api_key},
+    )
+    # Should not contain changed content indicator
+    assert b'(changed)' not in res.data
+
+    # Test type=diffWords for word-level diff
+    res = client.get(
+        url_for("watchhistorydiff", uuid=watch_uuid, from_timestamp='previous', to_timestamp='latest')+'?type=diffWords&format=htmlcolor',
+        headers={'x-api-key': api_key},
+    )
+    # Should contain HTML formatted diff
+    assert res.status_code == 200
+    assert len(res.data) > 0
+
+    # Test combined parameters: show only additions with word diff
+    res = client.get(
+        url_for("watchhistorydiff", uuid=watch_uuid, from_timestamp='previous', to_timestamp='latest')+'?removed=false&replaced=false&type=diffWords',
+        headers={'x-api-key': api_key},
+    )
+    assert res.status_code == 200
+    # Should not contain removed or changed markers
+    assert b'(removed)' not in res.data
+    assert b'(changed)' not in res.data
+

    # Fetch the whole watch
    res = client.get(
@@ -240,7 +240,6 @@ def test_restock_itemprop_with_tag(client, live_server, measure_memory_usage, da


 def test_itemprop_percent_threshold(client, live_server, measure_memory_usage, datastore_path):
-    

    delete_all_watches(client)

@@ -299,7 +298,26 @@ def test_itemprop_percent_threshold(client, live_server, measure_memory_usage, d
    assert b'has-unread-changes' not in res.data


+    # Re #2600 - Switch the mode to normal type and back, and see if the values stick..
+    ###################################################################################
+    uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))

+    res = client.post(
+        url_for("ui.ui_edit.edit_page", uuid=uuid),
+        data={"restock_settings-follow_price_changes": "y",
+              "restock_settings-price_change_threshold_percent": 5.05,
+              "processor": "text_json_diff",
+              "url": test_url,
+              'fetch_backend': "html_requests",
+              "time_between_check_use_default": "y"
+              },
+        follow_redirects=True
+    )
+    assert b"Updated watch." in res.data
+    # And back again
+    live_server.app.config['DATASTORE'].data['watching'][uuid]['processor'] = 'restock_diff'
+    res = client.get(url_for("ui.ui_edit.edit_page", uuid=uuid))
+    assert b'type="text" value="5.05"' in res.data

    delete_all_watches(client)

@@ -443,3 +461,4 @@ def test_special_prop_examples(client, live_server, measure_memory_usage, datast
            res = client.get(url_for("watchlist.index"))
            assert b'ception' not in res.data
            assert b'155.55' in res.data
+
@@ -16,8 +16,8 @@ running_async_tasks = []
 async_loop = None
 async_loop_thread = None

-# Track currently processing UUIDs for async workers
-currently_processing_uuids = set()
+# Track currently processing UUIDs for async workers - maps {uuid: worker_id}
+currently_processing_uuids = {}

 # Configuration - async workers only
 USE_ASYNC_WORKERS = True
@@ -168,23 +168,31 @@ def get_worker_count():

 def get_running_uuids():
    """Get list of UUIDs currently being processed by async workers"""
-    return list(currently_processing_uuids)
+    return list(currently_processing_uuids.keys())


-def set_uuid_processing(uuid, processing=True):
-    """Mark a UUID as being processed or completed"""
+def set_uuid_processing(uuid, worker_id=None, processing=True):
+    """Mark a UUID as being processed or completed by a specific worker"""
    global currently_processing_uuids
    if processing:
-        currently_processing_uuids.add(uuid)
-        logger.debug(f"Started processing UUID: {uuid}")
+        currently_processing_uuids[uuid] = worker_id
+        logger.debug(f"Worker {worker_id} started processing UUID: {uuid}")
    else:
-        currently_processing_uuids.discard(uuid)
-        logger.debug(f"Finished processing UUID: {uuid}")
+        currently_processing_uuids.pop(uuid, None)
+        logger.debug(f"Worker {worker_id} finished processing UUID: {uuid}")


 def is_watch_running(watch_uuid):
-    """Check if a specific watch is currently being processed"""
-    return watch_uuid in get_running_uuids()
+    """Check if a specific watch is currently being processed by any worker"""
+    return watch_uuid in currently_processing_uuids
+
+
+def is_watch_running_by_another_worker(watch_uuid, current_worker_id):
+    """Check if a specific watch is currently being processed by a different worker"""
+    if watch_uuid not in currently_processing_uuids:
+        return False
+    processing_worker_id = currently_processing_uuids[watch_uuid]
+    return processing_worker_id != current_worker_id


 def queue_item_async_safe(update_q, item, silent=False):
@@ -28,7 +28,7 @@ info:
    
    For example: `x-api-key: YOUR_API_KEY`
    
-  version: 0.1.3
+  version: 0.1.4
  contact:
    name: ChangeDetection.io
    url: https://github.com/dgtlmoon/changedetection.io
@@ -761,9 +761,9 @@ paths:
    get:
      operationId: getWatchHistoryDiff
      tags: [Watch History]
-      summary: Get diff between two snapshots
+      summary: Get the difference between two snapshots
      description: |
-        Generate a formatted diff (comparison) between two historical snapshots of a web page change monitor (watch).
+        Generate a difference (comparison) between two historical snapshots of a web page change monitor (watch).

        This endpoint compares content between two points in time and returns the differences in your chosen format.
        Perfect for reviewing what changed between specific versions or comparing recent changes.
@@ -798,6 +798,10 @@ paths:
            # Compare two specific timestamps in plain text with word-level diff
            curl -X GET "http://localhost:5000/api/v1/watch/095be615-a8ad-4c33-8e9c-c7612fbf6c9f/difference/1640995200/1640998800?format=text&word_diff=true" \
              -H "x-api-key: YOUR_API_KEY"
+
+            # Show only additions (hide removed/replaced content), ignore whitespace
+            curl -X GET "http://localhost:5000/api/v1/watch/095be615-a8ad-4c33-8e9c-c7612fbf6c9f/difference/previous/latest?format=htmlcolor&removed=false&replaced=false&ignoreWhitespace=true" \
+              -H "x-api-key: YOUR_API_KEY"
        - lang: 'Python'
          source: |
            import requests
@@ -822,6 +826,20 @@ paths:
                params={'format': 'text', 'word_diff': 'true'}
            )
            print(response.text)
+
+            # Show only additions, ignore whitespace and use word-level diff
+            response = requests.get(
+                f'http://localhost:5000/api/v1/watch/{uuid}/difference/previous/latest',
+                headers=headers,
+                params={
+                    'format': 'htmlcolor',
+                    'type': 'diffWords',
+                    'removed': 'false',
+                    'replaced': 'false',
+                    'ignoreWhitespace': 'true'
+                }
+            )
+            print(response.text)
      parameters:
        - name: uuid
          in: path
@@ -861,9 +879,10 @@ paths:
            - `text` (default): Plain text with (removed) and (added) prefixes
            - `html`: Basic HTML format
            - `htmlcolor`: Rich HTML with colored backgrounds (red for deletions, green for additions)
+            - `markdown`: Markdown format with HTML rendering
          schema:
            type: string
-            enum: [text, html, htmlcolor]
+            enum: [text, html, htmlcolor, markdown]
            default: text
        - name: word_diff
          in: query
@@ -888,6 +907,69 @@ paths:
            type: string
            enum: ["true", "false", "1", "0", "yes", "no", "on", "off"]
            default: "false"
+        - name: type
+          in: query
+          description: |
+            Diff granularity type:
+            - `diffLines` (default): Line-level comparison, showing which lines changed
+            - `diffWords`: Word-level comparison, showing which words changed within lines
+
+            This parameter is an alternative to `word_diff` for better alignment with the UI.
+            If both are specified, `type=diffWords` will enable word-level diffing.
+          schema:
+            type: string
+            enum: [diffLines, diffWords]
+            default: diffLines
+        - name: changesOnly
+          in: query
+          description: |
+            When enabled, only show lines/content that changed (no surrounding context).
+            When disabled, include unchanged lines for context around changes.
+            Accepts: true, false, 1, 0, yes, no, on, off
+          schema:
+            type: string
+            enum: ["true", "false", "1", "0", "yes", "no", "on", "off"]
+            default: "true"
+        - name: ignoreWhitespace
+          in: query
+          description: |
+            When enabled, ignore whitespace-only changes (spaces, tabs, newlines).
+            Useful for focusing on content changes and ignoring formatting differences.
+            Accepts: true, false, 1, 0, yes, no, on, off
+          schema:
+            type: string
+            enum: ["true", "false", "1", "0", "yes", "no", "on", "off"]
+            default: "false"
+        - name: removed
+          in: query
+          description: |
+            Include removed/deleted content in the diff output.
+            When disabled, content that was deleted will not appear in the diff.
+            Accepts: true, false, 1, 0, yes, no, on, off
+          schema:
+            type: string
+            enum: ["true", "false", "1", "0", "yes", "no", "on", "off"]
+            default: "true"
+        - name: added
+          in: query
+          description: |
+            Include added/new content in the diff output.
+            When disabled, content that was added will not appear in the diff.
+            Accepts: true, false, 1, 0, yes, no, on, off
+          schema:
+            type: string
+            enum: ["true", "false", "1", "0", "yes", "no", "on", "off"]
+            default: "true"
+        - name: replaced
+          in: query
+          description: |
+            Include replaced/modified content in the diff output.
+            When disabled, content that was modified (changed from one value to another) will not appear in the diff.
+            Accepts: true, false, 1, 0, yes, no, on, off
+          schema:
+            type: string
+            enum: ["true", "false", "1", "0", "yes", "no", "on", "off"]
+            default: "true"
      responses:
        '200':
          description: Formatted diff between the two snapshots
Author	SHA1	Message	Date
dgtlmoon	21f4b02847	Text tweak	2026-01-03 14:09:32 +01:00
dgtlmoon	08e55a31c0	Including extra args on the WatchHistoryDiff API endpoint	2026-01-03 14:06:06 +01:00
dgtlmoon	cedabf4ff6	Language set redirect - keep hash Build and push containers / metadata (push) Has been cancelled Details Build and push containers / build-push-containers (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details	2026-01-03 01:59:42 +01:00
dgtlmoon	03116fef8f	Adding small test for switching modes (#3701 ) Build and push containers / metadata (push) Has been cancelled Details Build and push containers / build-push-containers (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/amd64 (alpine) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm64 (alpine) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/amd64 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm/v7 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm/v8 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm64 (main) (push) Has been cancelled Details	2026-01-02 19:51:21 +01:00
dgtlmoon	b1257dd196	UI - Handling redirects on login to the correct page (#3699 )	2026-01-02 17:46:25 +01:00
dgtlmoon	7e61f5b663	more resilient same UUID being processed (#3700 )	2026-01-02 17:46:12 +01:00
dgtlmoon	afa8451448	Puppeteer - Improvements to timeout handling	2026-01-02 17:45:41 +01:00