Apply same fix to puppeteer

Fix SCREENSHOT_MAX_HEIGHT not enforced: cap viewport step_size and clip stitched output to max capture height #3810
2026-04-11 13:38:02 +00:00 · 2026-04-09 07:09:49 +02:00 · 2026-04-09 07:05:38 +02:00
10 changed files with 22 additions and 154 deletions
--- a/changedetectionio/blueprint/backups/templates/backup_restore.html
+++ b/changedetectionio/blueprint/backups/templates/backup_restore.html
@@ -20,7 +20,8 @@
                <p>{{ _('Restore a backup. Must be a .zip backup file created on/after v0.53.1 (new database layout).') }}</p>
                <p>{{ _('Note: This does not override the main application settings, only watches and groups.') }}</p>
                <p class="pure-form-message">
-                    {{ _('Max upload size: %(upload)s MB, Max decompressed size: %(decomp)s MB', upload=max_upload_mb, decomp=max_decompressed_mb) }}
+                    {{ _('Max upload size: %(upload)s MB &nbsp;·&nbsp; Max decompressed size: %(decomp)s MB',
+                         upload=max_upload_mb, decomp=max_decompressed_mb) }}
                </p>

                <form class="pure-form pure-form-stacked settings"
--- a/changedetectionio/blueprint/imports/templates/import.html
+++ b/changedetectionio/blueprint/imports/templates/import.html
@@ -9,7 +9,6 @@
            <li class="tab" id=""><a href="#url-list">{{ _('URL List') }}</a></li>
            <li class="tab"><a href="#distill-io">{{ _('Distill.io') }}</a></li>
            <li class="tab"><a href="#xlsx">{{ _('.XLSX & Wachete') }}</a></li>
-            <li class="tab"><a href="{{url_for('backups.restore.restore')}}">{{ _('Backup Restore') }}</a></li>
        </ul>
    </div>

--- a/changedetectionio/content_fetchers/playwright.py
+++ b/changedetectionio/content_fetchers/playwright.py
@@ -49,6 +49,9 @@ async def capture_full_page_async(page, screenshot_format='JPEG', watch_uuid=Non
    if page_height > page.viewport_size['height']:
        if page_height < step_size:
            step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
+        # Never set viewport taller than our max capture height - otherwise one screenshot chunk
+        # captures the whole (e.g. 8098px) page even when SCREENSHOT_MAX_HEIGHT=1000
+        step_size = min(step_size, SCREENSHOT_MAX_TOTAL_HEIGHT)
        viewport_start = time.time()
        logger.debug(f"{watch_info}Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size")
        # Set viewport to a larger size to capture more content at once
--- a/changedetectionio/content_fetchers/puppeteer.py
+++ b/changedetectionio/content_fetchers/puppeteer.py
@@ -75,6 +75,9 @@ async def capture_full_page(page, screenshot_format='JPEG', watch_uuid=None, loc
    if page_height > page.viewport['height']:
        if page_height < step_size:
            step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
+        # Never set viewport taller than our max capture height - otherwise one screenshot chunk
+        # captures the whole page even when SCREENSHOT_MAX_HEIGHT is set smaller
+        step_size = min(step_size, SCREENSHOT_MAX_TOTAL_HEIGHT)
        viewport_start = time.time()
        await page.setViewport({'width': page.viewport['width'], 'height': step_size})
        viewport_time = time.time() - viewport_start
--- a/changedetectionio/content_fetchers/screenshot_handler.py
+++ b/changedetectionio/content_fetchers/screenshot_handler.py
@@ -56,6 +56,10 @@ def stitch_images_worker_raw_bytes(pipe_conn, original_page_height, capture_heig
            im.close()
        del images

+        # Clip stitched image to capture_height (chunks may overshoot by up to step_size-1 px)
+        if total_height > capture_height:
+            stitched = stitched.crop((0, 0, max_width, capture_height))
+
        # Draw caption only if page was trimmed
        if original_page_height > capture_height:
            draw = ImageDraw.Draw(stitched)
--- a/changedetectionio/diff/init.py
+++ b/changedetectionio/diff/init.py
@@ -45,36 +45,6 @@ CHANGED_INTO_PLACEMARKER_CLOSED = '@changed_into_PLACEMARKER_CLOSED'
 # Compiled regex patterns for performance
 WHITESPACE_NORMALIZE_RE = re.compile(r'\s+')

-# Regexes built from the constants above — no brittle hardcoded strings
-_EXTRACT_REMOVED_RE = re.compile(
-    re.escape(REMOVED_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(REMOVED_PLACEMARKER_CLOSED)
-    + r'|' +
-    re.escape(CHANGED_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(CHANGED_PLACEMARKER_CLOSED)
-)
-_EXTRACT_ADDED_RE = re.compile(
-    re.escape(ADDED_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(ADDED_PLACEMARKER_CLOSED)
-    + r'|' +
-    re.escape(CHANGED_INTO_PLACEMARKER_OPEN) + r'(.*?)' + re.escape(CHANGED_INTO_PLACEMARKER_CLOSED)
-)
-
-
-def extract_changed_from(raw_diff: str) -> str:
-    """Extract only the removed/changed-from fragments from a raw diff string.
-
-    Useful for {{diff_changed_from}} — gives just the old value (e.g. old price),
-    not the full surrounding line. Multiple fragments joined with newlines.
-    """
-    return '\n'.join(m.group(1) or m.group(2) for m in _EXTRACT_REMOVED_RE.finditer(raw_diff))
-
-
-def extract_changed_to(raw_diff: str) -> str:
-    """Extract only the added/changed-into fragments from a raw diff string.
-
-    Useful for {{diff_changed_to}} — gives just the new value (e.g. new price),
-    not the full surrounding line. Multiple fragments joined with newlines.
-    """
-    return '\n'.join(m.group(1) or m.group(2) for m in _EXTRACT_ADDED_RE.finditer(raw_diff))
-

 def render_inline_word_diff(before_line: str, after_line: str, ignore_junk: bool = False, markdown_style: str = None, tokenizer: str = 'words_and_html') -> tuple[str, bool]:
    """
--- a/changedetectionio/notification_service.py
+++ b/changedetectionio/notification_service.py
@@ -88,28 +88,6 @@ class FormattableTimestamp(str):
            return self._dt.isoformat()


-class FormattableExtract(str):
-    """
-    A str subclass that holds only the extracted changed fragments from a diff.
-    Used for {{diff_changed_from}} and {{diff_changed_to}} tokens.
-
-        {{ diff_changed_from }}   → old value(s) only, e.g. "$99.99"
-        {{ diff_changed_to }}     → new value(s) only, e.g. "$109.99"
-
-    Multiple changed fragments are joined with newlines.
-    Being a str subclass means it is natively JSON serializable.
-    """
-    def __new__(cls, prev_snapshot, current_snapshot, extract_fn):
-        if prev_snapshot or current_snapshot:
-            from changedetectionio import diff as diff_module
-            raw = diff_module.render_diff(prev_snapshot, current_snapshot, word_diff=True)
-            extracted = extract_fn(raw)
-        else:
-            extracted = ''
-        instance = super().__new__(cls, extracted)
-        return instance
-
-
 class FormattableDiff(str):
    """
    A str subclass representing a rendered diff. As a plain string it renders
@@ -183,8 +161,6 @@ class NotificationContextData(dict):
            'diff_patch': FormattableDiff('', '', patch_format=True),
            'diff_removed': FormattableDiff('', '', include_added=False),
            'diff_removed_clean': FormattableDiff('', '', include_added=False, include_change_type_prefix=False),
-            'diff_changed_from': FormattableExtract('', '', extract_fn=lambda x: x),
-            'diff_changed_to': FormattableExtract('', '', extract_fn=lambda x: x),
            'diff_url': None,
            'markup_text_links_to_html_links': False, # If automatic conversion of plaintext to HTML should happen
            'notification_timestamp': time.time(),
@@ -268,27 +244,16 @@ def add_rendered_diff_to_notification_vars(notification_scan_text:str, prev_snap
        'diff_removed_clean': {'word_diff': word_diff, 'include_added': False, 'include_change_type_prefix': False},
    }

-    from changedetectionio.diff import extract_changed_from, extract_changed_to
-    extract_specs = {
-        'diff_changed_from': extract_changed_from,
-        'diff_changed_to':   extract_changed_to,
-    }
-
    ret = {}
    rendered_count = 0
-    # Only create FormattableDiff/FormattableExtract objects for diff keys actually used in the notification text
+    # Only create FormattableDiff objects for diff keys actually used in the notification text
    for key in NotificationContextData().keys():
-        if not key.startswith('diff'):
-            continue
-        pattern = rf"(?<![A-Za-z0-9_]){re.escape(key)}(?![A-Za-z0-9_])"
-        if not re.search(pattern, notification_scan_text, re.IGNORECASE):
-            continue
-        if key in diff_specs:
-            ret[key] = FormattableDiff(prev_snapshot, current_snapshot, **diff_specs[key])
-            rendered_count += 1
-        elif key in extract_specs:
-            ret[key] = FormattableExtract(prev_snapshot, current_snapshot, extract_fn=extract_specs[key])
-            rendered_count += 1
+        if key.startswith('diff') and key in diff_specs:
+            # Check if this placeholder is actually used in the notification text
+            pattern = rf"(?<![A-Za-z0-9_]){re.escape(key)}(?![A-Za-z0-9_])"
+            if re.search(pattern, notification_scan_text, re.IGNORECASE):
+                ret[key] = FormattableDiff(prev_snapshot, current_snapshot, **diff_specs[key])
+                rendered_count += 1

    if rendered_count:
        logger.trace(f"Rendered {rendered_count} diff placeholder(s) {sorted(ret.keys())} in {time.time() - now:.3f}s")
--- a/changedetectionio/templates/_common_fields.html
+++ b/changedetectionio/templates/_common_fields.html
@@ -98,14 +98,6 @@
                <td><code>{{ '{{diff_patch}}' }}</code></td>
                <td>{{ _('The diff output - patch in unified format') }}</td>
            </tr>
-            <tr>
-                <td><code>{{ '{{diff_changed_from}}' }}</code></td>
-                <td>{{ _('Only the changed fragments from the previous version — e.g. the old price. Multiple changes joined by newline.') }}</td>
-            </tr>
-            <tr>
-                <td><code>{{ '{{diff_changed_to}}' }}</code></td>
-                <td>{{ _('Only the changed fragments from the new version — e.g. the new price. Multiple changes joined by newline.') }}</td>
-            </tr>
            <tr>
                <td><code>{{ '{{current_snapshot}}' }}</code></td>
                <td>{{ _('The current snapshot text contents value, useful when combined with JSON or CSS filters') }}
--- a/changedetectionio/tests/smtp/test_notification_smtp.py
+++ b/changedetectionio/tests/smtp/test_notification_smtp.py
@@ -11,10 +11,10 @@ from changedetectionio.tests.util import set_original_response, set_modified_res
    set_longer_modified_response, delete_all_watches

 import logging
-import os
+

 # NOTE - RELIES ON mailserver as hostname running, see github build recipes
-smtp_test_server = os.getenv('SMTP_TEST_MAILSERVER', 'mailserver')
+smtp_test_server = 'mailserver'

 ALL_MARKUP_TOKENS = ''.join(f"TOKEN: '{t}'\n{{{{{t}}}}}\n" for t in NotificationContextData().keys())

--- a/changedetectionio/tests/unit/test_notification_diff.py
+++ b/changedetectionio/tests/unit/test_notification_diff.py
@@ -15,9 +15,7 @@ from changedetectionio.diff import (
    CHANGED_PLACEMARKER_OPEN,
    CHANGED_PLACEMARKER_CLOSED,
    CHANGED_INTO_PLACEMARKER_OPEN,
-    CHANGED_INTO_PLACEMARKER_CLOSED,
-    extract_changed_from,
-    extract_changed_to,
+    CHANGED_INTO_PLACEMARKER_CLOSED
 )


@@ -383,72 +381,5 @@ Line 3 with tabs and spaces"""
        self.assertNotIn('[-Line 2-]', output)
        self.assertNotIn('[+Line 2+]', output)

-    def test_diff_changed_from_to_word_level(self):
-        """Primary use case: extract just the old/new value from a changed line (e.g. price monitoring)"""
-        before = "Widget costs $99.99 per month"
-        after  = "Widget costs $109.99 per month"
-
-        raw = diff.render_diff(before, after, word_diff=True)
-
-        self.assertEqual(extract_changed_from(raw), "$99.99")
-        self.assertEqual(extract_changed_to(raw),   "$109.99")
-
-    def test_diff_changed_from_to_multiple_changes(self):
-        """Multiple changed fragments on different lines are joined with newline.
-        An unchanged line between the two changes ensures each is a 1-to-1 replace,
-        so word_diff fires per line rather than falling back to multi-line block mode."""
-        before = "Price $99\nunchanged\nTax $5"
-        after  = "Price $149\nunchanged\nTax $12"
-
-        raw = diff.render_diff(before, after, word_diff=True)
-
-        self.assertEqual(extract_changed_from(raw), "$99\n$5")
-        self.assertEqual(extract_changed_to(raw),   "$149\n$12")
-
-    def test_diff_changed_from_to_pure_insert_delete(self):
-        """Pure line additions/deletions (no inline word diff) are also captured"""
-        before = "old line"
-        after  = "new line"
-
-        # word_diff=False forces line-level CHANGED markers
-        raw = diff.render_diff(before, after, word_diff=False)
-
-        self.assertEqual(extract_changed_from(raw), "old line")
-        self.assertEqual(extract_changed_to(raw),   "new line")
-
-    def test_diff_changed_from_to_similar_numbers(self):
-        """$90.00 → $9.00 must not produce a partial match like '0.00'.
-        The tokenizer splits on whitespace only, so '$90.00' and '$9.00' are
-        each a single atomic token — diff never sees their internal characters."""
-        before = "for sale $90.00"
-        after  = "for sale $9.00"
-
-        raw = diff.render_diff(before, after, word_diff=True)
-
-        self.assertEqual(extract_changed_from(raw), "$90.00")
-        self.assertEqual(extract_changed_to(raw),   "$9.00")
-
-    def test_diff_changed_from_to_whole_line_replaced(self):
-        """When every token on the line changed (no common tokens), render_inline_word_diff
-        takes the whole_line_replaced path using CHANGED/CHANGED_INTO markers instead of
-        REMOVED/ADDED. Extraction must still work via the alternation in the regex."""
-        before = "$99"
-        after  = "$109"
-
-        raw = diff.render_diff(before, after, word_diff=True)
-
-        self.assertEqual(extract_changed_from(raw), "$99")
-        self.assertEqual(extract_changed_to(raw),   "$109")
-
-    def test_diff_changed_from_to_no_change(self):
-        """No changes → empty string"""
-        content = "nothing changed here"
-
-        raw = diff.render_diff(content, content, word_diff=True)
-
-        self.assertEqual(extract_changed_from(raw), "")
-        self.assertEqual(extract_changed_to(raw),   "")
-
-
 if __name__ == '__main__':
    unittest.main()
Author	SHA1	Message	Date
dgtlmoon	38acada222	Apply same fix to puppeteer	2026-04-09 07:09:49 +02:00
dgtlmoon	962c3b7b8a	Fix SCREENSHOT_MAX_HEIGHT not enforced: cap viewport step_size and clip stitched output to max capture height #3810	2026-04-09 07:05:38 +02:00