Restock - No need to extract the text because it's not used anyway

0.54.4
CVE-2026-29038 - Reflected XSS in RSS Tag Error Response
2026-06-27 02:51:11 +00:00 · 2026-03-04 12:12:11 +01:00 · 2026-03-04 11:30:05 +01:00 · 2026-03-04 10:56:02 +01:00 · 2026-03-04 10:54:52 +01:00 · 2026-03-04 10:52:58 +01:00
17 changed files with 911 additions and 53 deletions
@@ -52,4 +52,13 @@ jobs:
    uses: ./.github/workflows/test-stack-reusable-workflow.yml
    with:
      python-version: '3.13'
-      skip-pypuppeteer: true
+      skip-pypuppeteer: true
+
+
+  test-application-3-14:
+    #if: github.event_name == 'push' && github.ref == 'refs/heads/master'
+    needs: lint-code
+    uses: ./.github/workflows/test-stack-reusable-workflow.yml
+    with:
+      python-version: '3.14'
+      skip-pypuppeteer: false
@@ -706,7 +706,19 @@ jobs:
      - name: Check upgrade works without error
        run: |
          echo "=== Testing upgrade path from 0.49.1 to ${{ github.ref_name }} (${{ github.sha }}) ==="
-
+          sudo apt-get update && sudo apt-get install -y --no-install-recommends \
+              g++ \
+              gcc \
+              libc-dev \
+              libffi-dev \
+              libjpeg-dev \
+              libssl-dev \
+              libxslt-dev \
+              make \
+              patch \
+              pkg-config \
+              zlib1g-dev
+          
          # Checkout old version and create datastore
          git checkout 0.49.1
          python3 -m venv .venv
@@ -2,7 +2,7 @@

 # Read more https://github.com/dgtlmoon/changedetection.io/wiki
 # Semver means never use .01, or 00. Should be .1.
-__version__ = '0.54.3'
+__version__ = '0.54.4'

 from changedetectionio.strtobool import strtobool
 from json.decoder import JSONDecodeError
@@ -40,11 +40,6 @@ def create_backup(datastore_path, watches: dict, tags: dict = None):
            zipObj.write(url_watches_json, arcname="url-watches.json")
            logger.debug("Added url-watches.json to backup")

-        # Add the flask app secret (if it exists)
-        secret_file = os.path.join(datastore_path, "secret.txt")
-        if os.path.isfile(secret_file):
-            zipObj.write(secret_file, arcname="secret.txt")
-
        # Add tag data directories (each tag has its own {uuid}/tag.json)
        for uuid, tag in (tags or {}).items():
            for f in Path(tag.data_dir).glob('*'):
@@ -151,19 +146,22 @@ def construct_blueprint(datastore: ChangeDetectionStore):
    def download_backup(filename):
        import re
        filename = filename.strip()
-        backup_filename_regex = BACKUP_FILENAME_FORMAT.format("\d+")
-
-        full_path = os.path.join(os.path.abspath(datastore.datastore_path), filename)
-        if not full_path.startswith(os.path.abspath(datastore.datastore_path)):
-            abort(404)
+        backup_filename_regex = BACKUP_FILENAME_FORMAT.format(r"\d+")

+        # Resolve 'latest' before any validation so checks run against the real filename.
        if filename == 'latest':
            backups = find_backups()
+            if not backups:
+                abort(404)
            filename = backups[0]['filename']

        if not re.match(r"^" + backup_filename_regex + "$", filename):
            abort(400)  # Bad Request if the filename doesn't match the pattern

+        full_path = os.path.join(os.path.abspath(datastore.datastore_path), filename)
+        if not full_path.startswith(os.path.abspath(datastore.datastore_path) + os.sep):
+            abort(404)
+
        logger.debug(f"Backup download request for '{full_path}'")
        return send_from_directory(os.path.abspath(datastore.datastore_path), filename, as_attachment=True)

@@ -1,6 +1,7 @@
 import io
 import json
 import os
+import re
 import shutil
 import tempfile
 import threading
@@ -14,6 +15,16 @@ from loguru import logger

 from changedetectionio.flask_app import login_optionally_required

+# Maximum size of the uploaded zip file. Override via env var MAX_RESTORE_UPLOAD_MB.
+_MAX_UPLOAD_BYTES = int(os.getenv("MAX_RESTORE_UPLOAD_MB", 256)) * 1024 * 1024
+# Maximum total uncompressed size of all entries (zip-bomb guard). Override via MAX_RESTORE_DECOMPRESSED_MB.
+_MAX_DECOMPRESSED_BYTES = int(os.getenv("MAX_RESTORE_DECOMPRESSED_MB", 1024)) * 1024 * 1024
+# Only top-level directories whose name is a valid UUID are treated as watch/tag entries.
+_UUID_RE = re.compile(
+    r'^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$',
+    re.IGNORECASE,
+)
+

 class RestoreForm(Form):
    zip_file = FileField(_l('Backup zip file'), validators=[
@@ -50,7 +61,18 @@ def import_from_zip(zip_stream, datastore, include_groups, include_groups_replac
    with tempfile.TemporaryDirectory() as tmpdir:
        logger.debug(f"Restore: extracting zip to {tmpdir}")
        with zipfile.ZipFile(zip_stream, 'r') as zf:
-            zf.extractall(tmpdir)
+            total_uncompressed = sum(m.file_size for m in zf.infolist())
+            if total_uncompressed > _MAX_DECOMPRESSED_BYTES:
+                raise ValueError(
+                    f"Backup archive decompressed size ({total_uncompressed // (1024 * 1024)} MB) "
+                    f"exceeds the {_MAX_DECOMPRESSED_BYTES // (1024 * 1024)} MB limit"
+                )
+            resolved_dest = os.path.realpath(tmpdir)
+            for member in zf.infolist():
+                member_dest = os.path.realpath(os.path.join(resolved_dest, member.filename))
+                if not member_dest.startswith(resolved_dest + os.sep) and member_dest != resolved_dest:
+                    raise ValueError(f"Zip Slip path traversal detected in backup archive: {member.filename!r}")
+                zf.extract(member, tmpdir)
        logger.debug("Restore: zip extracted, scanning UUID directories")

        for entry in os.scandir(tmpdir):
@@ -58,6 +80,9 @@ def import_from_zip(zip_stream, datastore, include_groups, include_groups_replac
                continue

            uuid = entry.name
+            if not _UUID_RE.match(uuid):
+                logger.warning(f"Restore: skipping non-UUID directory {uuid!r}")
+                continue
            tag_json_path = os.path.join(entry.path, 'tag.json')
            watch_json_path = os.path.join(entry.path, 'watch.json')

@@ -155,7 +180,9 @@ def construct_restore_blueprint(datastore):
        form = RestoreForm()
        return render_template("backup_restore.html",
                               form=form,
-                               restore_running=any(t.is_alive() for t in restore_threads))
+                               restore_running=any(t.is_alive() for t in restore_threads),
+                               max_upload_mb=_MAX_UPLOAD_BYTES // (1024 * 1024),
+                               max_decompressed_mb=_MAX_DECOMPRESSED_BYTES // (1024 * 1024))

    @login_optionally_required
    @restore_blueprint.route("/restore/start", methods=['POST'])
@@ -173,10 +200,22 @@ def construct_restore_blueprint(datastore):
            flash(gettext("File must be a .zip backup file"), "error")
            return redirect(url_for('backups.restore.restore'))

-        # Read into memory now — the request stream is gone once we return
+        # Reject oversized uploads before reading the stream into memory.
+        content_length = request.content_length
+        if content_length and content_length > _MAX_UPLOAD_BYTES:
+            flash(gettext("Backup file is too large (max %(mb)s MB)", mb=_MAX_UPLOAD_BYTES // (1024 * 1024)), "error")
+            return redirect(url_for('backups.restore.restore'))
+
+        # Read into memory now — the request stream is gone once we return.
+        # Read one byte beyond the limit so we can detect truncated-but-still-oversized streams.
        try:
-            zip_bytes = io.BytesIO(zip_file.read())
-            zipfile.ZipFile(zip_bytes)  # quick validity check before spawning
+            raw = zip_file.read(_MAX_UPLOAD_BYTES + 1)
+            if len(raw) > _MAX_UPLOAD_BYTES:
+                flash(gettext("Backup file is too large (max %(mb)s MB)", mb=_MAX_UPLOAD_BYTES // (1024 * 1024)), "error")
+                return redirect(url_for('backups.restore.restore'))
+            zip_bytes = io.BytesIO(raw)
+            with zipfile.ZipFile(zip_bytes):  # quick validity check before spawning
+                pass
            zip_bytes.seek(0)
        except zipfile.BadZipFile:
            flash(gettext("Invalid or corrupted zip file"), "error")
@@ -201,6 +240,7 @@ def construct_restore_blueprint(datastore):
            name="BackupRestore"
        )
        restore_thread.start()
+        restore_threads[:] = [t for t in restore_threads if t.is_alive()]
        restore_threads.append(restore_thread)
        flash(gettext("Restore started in background, check back in a few minutes."))
        return redirect(url_for('backups.restore.restore'))
@@ -19,6 +19,10 @@

                <p>{{ _('Restore a backup. Must be a .zip backup file created on/after v0.53.1 (new database layout).') }}</p>
                <p>{{ _('Note: This does not override the main application settings, only watches and groups.') }}</p>
+                <p class="pure-form-message">
+                    {{ _('Max upload size: %(upload)s MB &nbsp;·&nbsp; Max decompressed size: %(decomp)s MB',
+                         upload=max_upload_mb, decomp=max_decompressed_mb) }}
+                </p>

                <form class="pure-form pure-form-stacked settings"
                      action="{{ url_for('backups.restore.backups_restore_start') }}"
@@ -7,7 +7,7 @@ def construct_tag_routes(rss_blueprint, datastore):
        datastore: The ChangeDetectionStore instance
    """

-    @rss_blueprint.route("/tag/<string:tag_uuid>", methods=['GET'])
+    @rss_blueprint.route("/tag/<uuid_str:tag_uuid>", methods=['GET'])
    def rss_tag_feed(tag_uuid):

        from flask import make_response, request, url_for
@@ -156,9 +156,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
    @login_optionally_required
    def clear_all_history():
        if request.method == 'POST':
-            confirmtext = request.form.get('confirmtext')
+            confirmtext = request.form.get('confirmtext', '')

-            if confirmtext == 'clear':
+            if confirmtext.strip().lower() == gettext('clear').strip().lower():
                # Run in background thread to avoid blocking
                def clear_history_background():
                    # Capture UUIDs first to avoid race conditions
@@ -608,13 +608,12 @@ class ValidateCSSJSONXPATHInput(object):
                    raise ValidationError("XPath not permitted in this field!")
                from lxml import etree, html
                import elementpath
-                # xpath 2.0-3.1
-                from elementpath.xpath3 import XPath3Parser
+                from changedetectionio.html_tools import SafeXPath3Parser
                tree = html.fromstring("<html></html>")
                line = line.replace('xpath:', '')

                try:
-                    elementpath.select(tree, line.strip(), parser=XPath3Parser)
+                    elementpath.select(tree, line.strip(), parser=SafeXPath3Parser)
                except elementpath.ElementPathError as e:
                    message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
                    raise ValidationError(message % (line, str(e)))
@@ -23,6 +23,53 @@ class JSONNotFound(ValueError):
    def __init__(self, msg):
        ValueError.__init__(self, msg)

+
+_DEFAULT_UNSAFE_XPATH3_FUNCTIONS = [
+    'unparsed-text',
+    'unparsed-text-lines',
+    'unparsed-text-available',
+    'doc',
+    'doc-available',
+    'environment-variable',
+    'available-environment-variables',
+]
+
+
+def _build_safe_xpath3_parser():
+    """Return an XPath3Parser subclass with filesystem/environment access functions removed.
+
+    XPath 3.0 includes functions that can read arbitrary files or environment variables:
+      - unparsed-text / unparsed-text-lines / unparsed-text-available  (file read)
+      - doc / doc-available                                             (XML fetch from URI)
+      - environment-variable / available-environment-variables         (env var leakage)
+
+    Subclassing gives us an independent symbol_table copy (not shared with the parent class),
+    so removing entries here does not affect XPath3Parser itself.
+
+    Override the blocked list via the XPATH_BLOCKED_FUNCTIONS environment variable
+    (comma-separated, e.g. "unparsed-text,doc,environment-variable").
+    """
+    import os
+    from elementpath.xpath3 import XPath3Parser
+
+    class SafeXPath3Parser(XPath3Parser):
+        pass
+
+    env_override = os.getenv('XPATH_BLOCKED_FUNCTIONS')
+    if env_override is not None:
+        blocked = [f.strip() for f in env_override.split(',') if f.strip()]
+    else:
+        blocked = _DEFAULT_UNSAFE_XPATH3_FUNCTIONS
+
+    for _fn in blocked:
+        SafeXPath3Parser.symbol_table.pop(_fn, None)
+
+    return SafeXPath3Parser
+
+
+# Module-level singleton — built once, reused everywhere.
+SafeXPath3Parser = _build_safe_xpath3_parser()
+
 # Doesn't look like python supports forward slash auto enclosure in re.findall
 # So convert it to inline flag "(?i)foobar" type configuration
@lru_cache(maxsize=100)
@@ -183,8 +230,6 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
    """
    from lxml import etree, html
    import elementpath
-    # xpath 2.0-3.1
-    from elementpath.xpath3 import XPath3Parser

    parser = etree.HTMLParser()
    tree = None
@@ -210,7 +255,7 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
            # This allows //title to match elements in the default namespace
            namespaces[''] = tree.nsmap[None]

-        r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=XPath3Parser)
+        r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=SafeXPath3Parser)
        #@note: //title/text() now works with default namespaces (fixed by registering '' prefix)
        #@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)

@@ -235,6 +280,9 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
            else:
                html_block += elementpath_tostring(element)

+        # Drop element references before the finally block so tree.clear() can release
+        # the libxml2 document immediately (elements pin the C-level doc via refcount).
+        del r
        return html_block
    finally:
        # Explicitly clear the tree to free memory
@@ -437,17 +437,18 @@ class perform_site_check(difference_detection_processor):

        # Only try to process restock information (like scraping for keywords) if the page was actually rendered correctly.
        # Otherwise it will assume "in stock" because nothing suggesting the opposite was found
-        from ...html_tools import html_to_text
-        text = html_to_text(self.fetcher.content)
-        logger.debug(f"Length of text after conversion: {len(text)}")
-        if not len(text):
-            from ...content_fetchers.exceptions import ReplyWithContentButNoText
-            raise ReplyWithContentButNoText(url=watch.link,
-                                            status_code=self.fetcher.get_last_status_code(),
-                                            screenshot=self.fetcher.screenshot,
-                                            html_content=self.fetcher.content,
-                                            xpath_data=self.fetcher.xpath_data
-                                            )
+#useless
+#        from ...html_tools import html_to_text
+#        text = html_to_text(self.fetcher.content)
+#        logger.debug(f"Length of text after conversion: {len(text)}")
+#        if not len(text):
+#            from ...content_fetchers.exceptions import ReplyWithContentButNoText
+#            raise ReplyWithContentButNoText(url=watch.link,
+#                                            status_code=self.fetcher.get_last_status_code(),
+#                                            screenshot=self.fetcher.screenshot,
+#                                            html_content=self.fetcher.content,
+#                                            xpath_data=self.fetcher.xpath_data
+#                                            )

        # Which restock settings to compare against?
        # Settings are stored in restock_diff.json (migrated from watch.json by update_30).
@@ -2,6 +2,7 @@
 import psutil
 import time
 from threading import Thread
+import multiprocessing

 import pytest
 import arrow
@@ -191,6 +192,34 @@ def cleanup(datastore_path):
            if os.path.isfile(f):
                os.unlink(f)

+def pytest_configure(config):
+    """Configure pytest environment before tests run.
+
+    CRITICAL: Set multiprocessing start method to 'fork' for Python 3.14+ compatibility.
+
+    Python 3.14 changed the default start method from 'fork' to 'forkserver' on Linux.
+    The forkserver method requires all objects to be picklable, but pytest-flask's
+    LiveServer uses nested functions that can't be pickled.
+
+    Setting 'fork' explicitly:
+    - Maintains compatibility with Python 3.10-3.13 (where 'fork' was already default)
+    - Fixes Python 3.14 pickling errors
+    - Only affects Unix-like systems (Windows uses 'spawn' regardless)
+
+    See: https://github.com/python/cpython/issues/126831
+    See: https://docs.python.org/3/whatsnew/3.14.html
+    """
+    # Only set if not already set (respects existing configuration)
+    if multiprocessing.get_start_method(allow_none=True) is None:
+        try:
+            # 'fork' is available on Unix-like systems (Linux, macOS)
+            # On Windows, this will have no effect as 'spawn' is the only option
+            multiprocessing.set_start_method('fork', force=False)
+            logger.debug("Set multiprocessing start method to 'fork' for Python 3.14+ compatibility")
+        except (ValueError, RuntimeError):
+            # Already set, not available on this platform, or context already created
+            pass
+
 def pytest_addoption(parser):
    """Add custom command-line options for pytest.

@@ -3,7 +3,7 @@
 from .util import set_original_response, live_server_setup, wait_for_all_checks
 from flask import url_for
 import io
-from zipfile import ZipFile
+from zipfile import ZipFile, ZIP_DEFLATED
 import re
 import time
 from changedetectionio.model import Watch, Tag
@@ -68,6 +68,9 @@ def test_backup(client, live_server, measure_memory_usage, datastore_path):
    # Check for changedetection.json (settings file)
    assert 'changedetection.json' in l, "changedetection.json should be in backup"

+    # secret.txt must never be included — it contains the Flask session key
+    assert 'secret.txt' not in l, "secret.txt (Flask session key) must not be included in backup"
+
    # Get the latest one
    res = client.get(
        url_for("backups.remove_backups"),
@@ -196,4 +199,63 @@ def test_backup_restore(client, live_server, measure_memory_usage, datastore_pat
    assert restored_tag2 is not None, f"Tag {tag_uuid2} not found after restore"
    assert restored_tag2['title'] == "Tasty backup tag number two", "Restored tag 2 title does not match"
    assert isinstance(restored_tag2, Tag.model), \
-        f"Tag 2 not properly rehydrated, got {type(restored_tag2)}"
+        f"Tag 2 not properly rehydrated, got {type(restored_tag2)}"
+
+
+def test_backup_restore_zip_slip_rejected(client, live_server, measure_memory_usage, datastore_path):
+    """Zip Slip path traversal entries in a restore zip must be rejected."""
+    import pytest
+    from changedetectionio.blueprint.backups.restore import import_from_zip
+
+    # Build a zip with a path traversal entry that would escape the extraction dir
+    malicious_zip = io.BytesIO()
+    with ZipFile(malicious_zip, 'w') as zf:
+        zf.writestr("../escaped.txt", "ATTACKER-CONTROLLED")
+    malicious_zip.seek(0)
+
+    datastore = live_server.app.config['DATASTORE']
+
+    with pytest.raises(ValueError, match="Zip Slip"):
+        import_from_zip(
+            zip_stream=malicious_zip,
+            datastore=datastore,
+            include_groups=True,
+            include_groups_replace=True,
+            include_watches=True,
+            include_watches_replace=True,
+        )
+
+
+def test_backup_restore_zip_bomb_rejected(client, live_server, measure_memory_usage, datastore_path):
+    """A zip whose total uncompressed size exceeds the limit must be rejected.
+
+    The guard reads file_size from the zip central-directory metadata — no
+    actual decompression happens, so this test is fast and uses minimal RAM.
+    100 KB of zeros compresses to ~100 bytes; monkeypatching the limit to
+    50 KB is enough to trigger the check without creating any large files.
+    """
+    import pytest
+    import changedetectionio.blueprint.backups.restore as restore_mod
+    from changedetectionio.blueprint.backups.restore import import_from_zip
+
+    # ~100 KB of zeros → deflate compresses to ~100 bytes, but file_size metadata = 100 KB
+    bomb_zip = io.BytesIO()
+    with ZipFile(bomb_zip, 'w', compression=ZIP_DEFLATED) as zf:
+        zf.writestr("data.txt", b"\x00" * (100 * 1024))
+    bomb_zip.seek(0)
+
+    datastore = live_server.app.config['DATASTORE']
+    original_limit = restore_mod._MAX_DECOMPRESSED_BYTES
+    try:
+        restore_mod._MAX_DECOMPRESSED_BYTES = 50 * 1024  # 50 KB limit for this test
+        with pytest.raises(ValueError, match="decompressed size"):
+            import_from_zip(
+                zip_stream=bomb_zip,
+                datastore=datastore,
+                include_groups=True,
+                include_groups_replace=True,
+                include_watches=True,
+                include_watches_replace=True,
+            )
+    finally:
+        restore_mod._MAX_DECOMPRESSED_BYTES = original_limit
@@ -624,3 +624,76 @@ def test_session_locale_overrides_accept_language(client, live_server, measure_m
    assert "분".encode() in res.data, "Expected Korean '분' for Minutes"
    assert "小時".encode() not in res.data, "Should not have Traditional Chinese '小時' when Korean is set"
    assert "分鐘".encode() not in res.data, "Should not have Traditional Chinese '分鐘' when Korean is set"
+
+
+def test_clear_history_translated_confirmation(client, live_server, measure_memory_usage, datastore_path):
+    """
+    Test that clearing snapshot history works with translated confirmation text.
+
+    Issue #3865: When the app language is set to German, the clear history
+    confirmation dialog shows the translated word (e.g. 'loschen') but the
+    backend only accepted the English word 'clear', making it impossible
+    to clear snapshots in non-English languages.
+    """
+    from flask import url_for
+
+    test_url = url_for('test_endpoint', _external=True)
+
+    # Add a watch so there is history to clear
+    res = client.post(
+        url_for("imports.import_page"),
+        data={"urls": test_url},
+        follow_redirects=True
+    )
+    assert b"1 Imported" in res.data
+    wait_for_all_checks(client)
+
+    # Set language to German
+    res = client.get(
+        url_for("set_language", locale="de"),
+        follow_redirects=True
+    )
+    assert res.status_code == 200
+
+    # Verify the clear history page shows the German confirmation word
+    res = client.get(
+        url_for("ui.clear_all_history"),
+        follow_redirects=True
+    )
+    assert res.status_code == 200
+    assert "löschen".encode() in res.data, "Expected German word 'loschen' on clear history page"
+
+    # Submit the form with the German translated word
+    res = client.post(
+        url_for("ui.clear_all_history"),
+        data={"confirmtext": "löschen"},
+        follow_redirects=True
+    )
+    assert res.status_code == 200
+    # Should NOT show error message
+    assert b"Incorrect confirmation text" not in res.data, \
+        "German confirmation word 'loschen' should be accepted (issue #3865)"
+
+    # Switch back to English and verify English word still works
+    res = client.get(
+        url_for("set_language", locale="en_US"),
+        follow_redirects=True
+    )
+
+    res = client.post(
+        url_for("ui.clear_all_history"),
+        data={"confirmtext": "clear"},
+        follow_redirects=True
+    )
+    assert res.status_code == 200
+    assert b"Incorrect confirmation text" not in res.data, \
+        "English confirmation word 'clear' should still be accepted"
+
+    # Verify that missing/empty confirmtext does not crash the server
+    res = client.post(
+        url_for("ui.clear_all_history"),
+        data={},
+        follow_redirects=True
+    )
+    assert res.status_code == 200, \
+        "Missing confirmtext should not crash the server"
@@ -592,3 +592,74 @@ def test_rss_xpath(client, live_server, measure_memory_usage, datastore_path):
        set_rss_atom_feed_response(header=feed_header, datastore_path=datastore_path)
        for content_type in RSS_XML_CONTENT_TYPES:
            _subtest_xpath_rss(client, content_type=content_type, datastore_path=datastore_path)
+
+
+# GHSA-6fmw-82m7-jq6p — XPath arbitrary file read via unparsed-text() and friends
+# Unit-level: verify xpath_filter() and SafeXPath3Parser block all dangerous functions.
+def test_xpath_blocked_functions_unit():
+    """Dangerous XPath 3.0 functions must be rejected at the parser level (no live server needed)."""
+    import elementpath
+    from changedetectionio.html_tools import xpath_filter, SafeXPath3Parser
+    from lxml import html
+
+    html_content = '<html><body><p>safe content</p></body></html>'
+
+    dangerous_expressions = [
+        "unparsed-text('file:///etc/passwd')",
+        "unparsed-text-lines('file:///etc/passwd')",
+        "unparsed-text-available('file:///etc/passwd')",
+        "doc('file:///etc/passwd')",
+        "doc-available('file:///etc/passwd')",
+        "environment-variable('PATH')",
+        "available-environment-variables()",
+    ]
+
+    for expr in dangerous_expressions:
+        # xpath_filter() must raise, not silently return file contents
+        try:
+            result = xpath_filter(expr, html_content)
+            assert False, f"xpath_filter should have raised for: {expr!r}, got: {result!r}"
+        except elementpath.ElementPathError:
+            pass  # expected
+
+        # SafeXPath3Parser must reject the expression at parse time
+        tree = html.fromstring(html_content)
+        try:
+            elementpath.select(tree, expr, parser=SafeXPath3Parser)
+            assert False, f"SafeXPath3Parser should have raised for: {expr!r}"
+        except elementpath.ElementPathError:
+            pass  # expected
+
+    # Sanity check: normal XPath still works
+    result = xpath_filter('//p/text()', html_content)
+    assert result == 'safe content'
+
+
+# GHSA-6fmw-82m7-jq6p — form validation must also reject dangerous XPath expressions.
+def test_xpath_blocked_functions_form_validation(client, live_server, measure_memory_usage, datastore_path):
+    """Edit-form validation must reject dangerous XPath 3.0 functions before they are stored."""
+    from flask import url_for
+
+    set_original_response(datastore_path=datastore_path)
+    test_url = url_for('test_endpoint', _external=True)
+    client.application.config.get('DATASTORE').add_watch(url=test_url)
+    client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
+    wait_for_all_checks(client)
+
+    dangerous_expressions = [
+        "xpath:unparsed-text('file:///etc/passwd')",
+        "xpath:environment-variable('PATH')",
+        "xpath:doc('file:///etc/passwd')",
+    ]
+
+    for expr in dangerous_expressions:
+        res = client.post(
+            url_for("ui.ui_edit.edit_page", uuid="first"),
+            data={"include_filters": expr, "url": test_url, "tags": "", "headers": "",
+                  'fetch_backend': "html_requests", "time_between_check_use_default": "y"},
+            follow_redirects=True
+        )
+        assert b"is not a valid XPath expression" in res.data, \
+            f"Form should reject dangerous expression: {expr!r}"
+
+    delete_all_watches(client)
@@ -113,8 +113,156 @@ tags:

  - name: Plugin API Extensions
    description: |
-      Retrieve the live OpenAPI specification for this instance. Unlike the static spec, this endpoint
-      returns the fully merged spec including schemas for any processor plugins installed on this instance.
+      ## How Processor Plugins Extend the API
+
+      changedetection.io uses a **processor plugin** system to handle different types of change detection.
+      Each processor lives in `changedetectionio/processors/<name>/` and may include an `api.yaml` file
+      that extends the core Watch schema with processor-specific configuration fields.
+
+      ### How it works
+
+      At startup, changedetection.io scans all installed processors for an `api.yaml` file. Any schemas
+      and code samples defined there are deep-merged into the live API specification, making the
+      processor's configuration fields valid on all watch create and update requests.
+
+      The live, fully-merged spec is always available at `/api/v1/full-spec` — use that URL with
+      Swagger UI or Redoc to see the complete schema for your specific installation.
+
+      ---
+
+      ### Writing a processor `api.yaml`
+
+      Place an `api.yaml` in the processor plugin's own directory, alongside its `__init__.py`
+      (e.g. `changedetectionio/processors/my_processor/api.yaml`). The schema name **must** follow the
+      convention `processor_config_<processor_name>` (e.g. `processor_config_restock_diff`). That same
+      key is used as the JSON field name when creating or updating a watch.
+
+      A minimal `api.yaml` for a hypothetical `my_processor`:
+
+      ```yaml
+      components:
+        schemas:
+          processor_config_my_processor:
+            type: object
+            description: Configuration for my_processor
+            properties:
+              some_option:
+                type: boolean
+                default: true
+                description: Enable some behaviour
+
+      paths:
+        /watch:
+          post:
+            x-code-samples:
+              - lang: curl
+                label: my_processor example
+                source: |
+                  curl -X POST "http://localhost:5000/api/v1/watch" \
+                    -H "x-api-key: YOUR_API_KEY" \
+                    -H "Content-Type: application/json" \
+                    -d '{
+                      "url": "https://example.com",
+                      "processor": "my_processor",
+                      "processor_config_my_processor": { "some_option": true }
+                    }'
+      ```
+
+      The `paths` section in `api.yaml` is used only for injecting additional `x-code-samples` into
+      existing endpoints — you cannot define new routes via plugin.
+
+      ---
+
+      ### Built-in plugin: `restock_diff`
+
+      The `restock_diff` processor is always shipped with changedetection.io. It monitors product
+      availability and price changes using structured data (JSON-LD / schema.org microdata) and
+      text heuristics. It is activated by setting `"processor": "restock_diff"` on a watch.
+
+      It adds the `processor_config_restock_diff` block to the Watch schema with these fields:
+
+      | Field | Type | Default | Description |
+      |---|---|---|---|
+      | `in_stock_processing` | string | `in_stock_only` | `in_stock_only` — only alert Out-of-Stock→In-Stock · `all_changes` — alert any availability change · `off` — disable stock tracking |
+      | `follow_price_changes` | boolean | `true` | Monitor and alert on price changes |
+      | `price_change_min` | number\|null | — | Alert when price drops **below** this value |
+      | `price_change_max` | number\|null | — | Alert when price rises **above** this value |
+      | `price_change_threshold_percent` | number\|null | — | Minimum % change since the original price to trigger an alert |
+
+      #### CREATE — Add a restock/price monitor
+
+      ```bash
+      curl -X POST "http://localhost:5000/api/v1/watch" \
+        -H "x-api-key: YOUR_API_KEY" \
+        -H "Content-Type: application/json" \
+        -d '{
+          "url": "https://example.com/product/widget",
+          "processor": "restock_diff",
+          "processor_config_restock_diff": {
+            "in_stock_processing": "in_stock_only",
+            "follow_price_changes": true,
+            "price_change_threshold_percent": 5
+          }
+        }'
+      ```
+
+      #### READ — Retrieve the monitor
+
+      The response JSON includes `processor_config_restock_diff` alongside all standard watch fields:
+
+      ```bash
+      curl -X GET "http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091" \
+        -H "x-api-key: YOUR_API_KEY"
+      ```
+
+      ```json
+      {
+        "uuid": "cc0cfffa-f449-477b-83ea-0caafd1dc091",
+        "url": "https://example.com/product/widget",
+        "processor": "restock_diff",
+        "processor_config_restock_diff": {
+          "in_stock_processing": "in_stock_only",
+          "follow_price_changes": true,
+          "price_change_threshold_percent": 5,
+          "price_change_min": null,
+          "price_change_max": null
+        }
+      }
+      ```
+
+      #### UPDATE — Change thresholds without recreating the monitor
+
+      Only fields included in the request body are updated; omitted fields are left unchanged.
+
+      ```bash
+      curl -X PUT "http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091" \
+        -H "x-api-key: YOUR_API_KEY" \
+        -H "Content-Type: application/json" \
+        -d '{
+          "processor_config_restock_diff": {
+            "in_stock_processing": "all_changes",
+            "follow_price_changes": true,
+            "price_change_min": 10.00,
+            "price_change_max": 500.00
+          }
+        }'
+      ```
+
+      #### DELETE — Remove the monitor
+
+      ```bash
+      curl -X DELETE "http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091" \
+        -H "x-api-key: YOUR_API_KEY"
+      ```
+
+      ---
+
+      For the complete schema-validated documentation including all processor fields, fetch the live spec
+      and load it into Swagger UI or Redoc:
+
+      ```
+      GET /api/v1/full-spec
+      ```

 components:
  securitySchemes:
@@ -1921,15 +2069,35 @@ paths:

        Unlike the static `api-spec.yaml` shipped with the application, this endpoint returns the
        spec dynamically merged with any `api.yaml` schemas provided by installed processor plugins.
-        Use this URL with Swagger UI or Redoc to get accurate documentation for your specific install.
+
+        **Use this URL** with Swagger UI or Redoc to get schema-accurate documentation for your
+        specific install — it includes every `processor_config_<name>` schema block contributed by
+        installed processors (e.g. `processor_config_restock_diff` from the built-in restock plugin).
+
+        This endpoint requires no authentication and returns YAML.
+
+        To load it directly in Swagger UI, paste the URL into the "Explore" box:
+        ```
+        http://localhost:5000/api/v1/full-spec
+        ```
      security: []
      x-code-samples:
        - lang: 'curl'
          source: |
+            # Fetch the live merged spec (no API key needed)
            curl -X GET "http://localhost:5000/api/v1/full-spec"
+        - lang: 'Python'
+          source: |
+            import requests
+
+            # No authentication required
+            response = requests.get('http://localhost:5000/api/v1/full-spec')
+            print(response.text)  # Returns YAML
      responses:
        '200':
-          description: Merged OpenAPI specification in YAML format
+          description: |
+            Merged OpenAPI specification in YAML format. Includes all processor plugin schemas
+            (e.g. `processor_config_restock_diff`) not present in the static `api-spec.yaml`.
          content:
            application/yaml:
              schema:
Author	SHA1	Message	Date
dgtlmoon	d07859f694	Restock - No need to extract the text because it's not used anyway	2026-03-04 12:12:11 +01:00
dgtlmoon	1e12ae404f	0.54.4 Build and push containers / metadata (push) Has been cancelled Details Build and push containers / build-push-containers (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-14 (push) Has been cancelled Details CodeQL / Analyze (javascript) (push) Has been cancelled Details CodeQL / Analyze (python) (push) Has been cancelled Details	2026-03-04 11:30:05 +01:00
dgtlmoon	ec7d56f85d	CVE-2026-29038 - Reflected XSS in RSS Tag Error Response	2026-03-04 10:56:02 +01:00
dgtlmoon	417d57e574	CVE-2026-29039 - XPath - Arbitrary File Read via unparsed-text()	2026-03-04 10:54:52 +01:00
dgtlmoon	1d7d812eb0	CVE-2026-29065 - fix(backups): patch zip slip advisory, zip bomb, upload size limit, UUID validation, secret.txt leakage, and download edge cases	2026-03-04 10:52:58 +01:00
dgtlmoon	524393a1fb	Updating API docs with better processor plugin info (#3942 ) Build and push containers / metadata (push) Has been cancelled Details Build and push containers / build-push-containers (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/amd64 (alpine) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm64 (alpine) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/amd64 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm/v7 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm/v8 (main) (push) Has been cancelled Details ChangeDetection.io Container Build Test / Build linux/arm64 (main) (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-14 (push) Has been cancelled Details	2026-03-02 12:12:39 +01:00
dgtlmoon	b09ebcbef6	Python 3.14 CI test and support (#3941 )	2026-03-02 11:28:52 +01:00
Muhammet Eren Karakuş	30ac10ff24	fix(i18n): accept translated confirmation text when clearing snapshot history (#3940 ) Build and push containers / metadata (push) Has been cancelled Details Build and push containers / build-push-containers (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Build distribution 📦 (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Test the built package works basically. (push) Has been cancelled Details Publish Python 🐍distribution 📦 to PyPI and TestPyPI / Publish Python 🐍 distribution 📦 to PyPI (push) Has been cancelled Details ChangeDetection.io App Test / lint-code (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-10 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-11 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-12 (push) Has been cancelled Details ChangeDetection.io App Test / test-application-3-13 (push) Has been cancelled Details	2026-03-02 10:44:42 +01:00