mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-06-14 04:42:06 +00:00
Compare commits
4 Commits
0.54.4
..
python-314
| Author | SHA1 | Date | |
|---|---|---|---|
| fecd181e07 | |||
| 525e390523 | |||
| 7fe332ad95 | |||
| b65a01ec02 |
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||||
# Semver means never use .01, or 00. Should be .1.
|
# Semver means never use .01, or 00. Should be .1.
|
||||||
__version__ = '0.54.4'
|
__version__ = '0.54.3'
|
||||||
|
|
||||||
from changedetectionio.strtobool import strtobool
|
from changedetectionio.strtobool import strtobool
|
||||||
from json.decoder import JSONDecodeError
|
from json.decoder import JSONDecodeError
|
||||||
|
|||||||
@@ -40,6 +40,11 @@ def create_backup(datastore_path, watches: dict, tags: dict = None):
|
|||||||
zipObj.write(url_watches_json, arcname="url-watches.json")
|
zipObj.write(url_watches_json, arcname="url-watches.json")
|
||||||
logger.debug("Added url-watches.json to backup")
|
logger.debug("Added url-watches.json to backup")
|
||||||
|
|
||||||
|
# Add the flask app secret (if it exists)
|
||||||
|
secret_file = os.path.join(datastore_path, "secret.txt")
|
||||||
|
if os.path.isfile(secret_file):
|
||||||
|
zipObj.write(secret_file, arcname="secret.txt")
|
||||||
|
|
||||||
# Add tag data directories (each tag has its own {uuid}/tag.json)
|
# Add tag data directories (each tag has its own {uuid}/tag.json)
|
||||||
for uuid, tag in (tags or {}).items():
|
for uuid, tag in (tags or {}).items():
|
||||||
for f in Path(tag.data_dir).glob('*'):
|
for f in Path(tag.data_dir).glob('*'):
|
||||||
@@ -146,22 +151,19 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
def download_backup(filename):
|
def download_backup(filename):
|
||||||
import re
|
import re
|
||||||
filename = filename.strip()
|
filename = filename.strip()
|
||||||
backup_filename_regex = BACKUP_FILENAME_FORMAT.format(r"\d+")
|
backup_filename_regex = BACKUP_FILENAME_FORMAT.format("\d+")
|
||||||
|
|
||||||
|
full_path = os.path.join(os.path.abspath(datastore.datastore_path), filename)
|
||||||
|
if not full_path.startswith(os.path.abspath(datastore.datastore_path)):
|
||||||
|
abort(404)
|
||||||
|
|
||||||
# Resolve 'latest' before any validation so checks run against the real filename.
|
|
||||||
if filename == 'latest':
|
if filename == 'latest':
|
||||||
backups = find_backups()
|
backups = find_backups()
|
||||||
if not backups:
|
|
||||||
abort(404)
|
|
||||||
filename = backups[0]['filename']
|
filename = backups[0]['filename']
|
||||||
|
|
||||||
if not re.match(r"^" + backup_filename_regex + "$", filename):
|
if not re.match(r"^" + backup_filename_regex + "$", filename):
|
||||||
abort(400) # Bad Request if the filename doesn't match the pattern
|
abort(400) # Bad Request if the filename doesn't match the pattern
|
||||||
|
|
||||||
full_path = os.path.join(os.path.abspath(datastore.datastore_path), filename)
|
|
||||||
if not full_path.startswith(os.path.abspath(datastore.datastore_path) + os.sep):
|
|
||||||
abort(404)
|
|
||||||
|
|
||||||
logger.debug(f"Backup download request for '{full_path}'")
|
logger.debug(f"Backup download request for '{full_path}'")
|
||||||
return send_from_directory(os.path.abspath(datastore.datastore_path), filename, as_attachment=True)
|
return send_from_directory(os.path.abspath(datastore.datastore_path), filename, as_attachment=True)
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
|
||||||
import shutil
|
import shutil
|
||||||
import tempfile
|
import tempfile
|
||||||
import threading
|
import threading
|
||||||
@@ -15,16 +14,6 @@ from loguru import logger
|
|||||||
|
|
||||||
from changedetectionio.flask_app import login_optionally_required
|
from changedetectionio.flask_app import login_optionally_required
|
||||||
|
|
||||||
# Maximum size of the uploaded zip file. Override via env var MAX_RESTORE_UPLOAD_MB.
|
|
||||||
_MAX_UPLOAD_BYTES = int(os.getenv("MAX_RESTORE_UPLOAD_MB", 256)) * 1024 * 1024
|
|
||||||
# Maximum total uncompressed size of all entries (zip-bomb guard). Override via MAX_RESTORE_DECOMPRESSED_MB.
|
|
||||||
_MAX_DECOMPRESSED_BYTES = int(os.getenv("MAX_RESTORE_DECOMPRESSED_MB", 1024)) * 1024 * 1024
|
|
||||||
# Only top-level directories whose name is a valid UUID are treated as watch/tag entries.
|
|
||||||
_UUID_RE = re.compile(
|
|
||||||
r'^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$',
|
|
||||||
re.IGNORECASE,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class RestoreForm(Form):
|
class RestoreForm(Form):
|
||||||
zip_file = FileField(_l('Backup zip file'), validators=[
|
zip_file = FileField(_l('Backup zip file'), validators=[
|
||||||
@@ -61,18 +50,7 @@ def import_from_zip(zip_stream, datastore, include_groups, include_groups_replac
|
|||||||
with tempfile.TemporaryDirectory() as tmpdir:
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
logger.debug(f"Restore: extracting zip to {tmpdir}")
|
logger.debug(f"Restore: extracting zip to {tmpdir}")
|
||||||
with zipfile.ZipFile(zip_stream, 'r') as zf:
|
with zipfile.ZipFile(zip_stream, 'r') as zf:
|
||||||
total_uncompressed = sum(m.file_size for m in zf.infolist())
|
zf.extractall(tmpdir)
|
||||||
if total_uncompressed > _MAX_DECOMPRESSED_BYTES:
|
|
||||||
raise ValueError(
|
|
||||||
f"Backup archive decompressed size ({total_uncompressed // (1024 * 1024)} MB) "
|
|
||||||
f"exceeds the {_MAX_DECOMPRESSED_BYTES // (1024 * 1024)} MB limit"
|
|
||||||
)
|
|
||||||
resolved_dest = os.path.realpath(tmpdir)
|
|
||||||
for member in zf.infolist():
|
|
||||||
member_dest = os.path.realpath(os.path.join(resolved_dest, member.filename))
|
|
||||||
if not member_dest.startswith(resolved_dest + os.sep) and member_dest != resolved_dest:
|
|
||||||
raise ValueError(f"Zip Slip path traversal detected in backup archive: {member.filename!r}")
|
|
||||||
zf.extract(member, tmpdir)
|
|
||||||
logger.debug("Restore: zip extracted, scanning UUID directories")
|
logger.debug("Restore: zip extracted, scanning UUID directories")
|
||||||
|
|
||||||
for entry in os.scandir(tmpdir):
|
for entry in os.scandir(tmpdir):
|
||||||
@@ -80,9 +58,6 @@ def import_from_zip(zip_stream, datastore, include_groups, include_groups_replac
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
uuid = entry.name
|
uuid = entry.name
|
||||||
if not _UUID_RE.match(uuid):
|
|
||||||
logger.warning(f"Restore: skipping non-UUID directory {uuid!r}")
|
|
||||||
continue
|
|
||||||
tag_json_path = os.path.join(entry.path, 'tag.json')
|
tag_json_path = os.path.join(entry.path, 'tag.json')
|
||||||
watch_json_path = os.path.join(entry.path, 'watch.json')
|
watch_json_path = os.path.join(entry.path, 'watch.json')
|
||||||
|
|
||||||
@@ -180,9 +155,7 @@ def construct_restore_blueprint(datastore):
|
|||||||
form = RestoreForm()
|
form = RestoreForm()
|
||||||
return render_template("backup_restore.html",
|
return render_template("backup_restore.html",
|
||||||
form=form,
|
form=form,
|
||||||
restore_running=any(t.is_alive() for t in restore_threads),
|
restore_running=any(t.is_alive() for t in restore_threads))
|
||||||
max_upload_mb=_MAX_UPLOAD_BYTES // (1024 * 1024),
|
|
||||||
max_decompressed_mb=_MAX_DECOMPRESSED_BYTES // (1024 * 1024))
|
|
||||||
|
|
||||||
@login_optionally_required
|
@login_optionally_required
|
||||||
@restore_blueprint.route("/restore/start", methods=['POST'])
|
@restore_blueprint.route("/restore/start", methods=['POST'])
|
||||||
@@ -200,22 +173,10 @@ def construct_restore_blueprint(datastore):
|
|||||||
flash(gettext("File must be a .zip backup file"), "error")
|
flash(gettext("File must be a .zip backup file"), "error")
|
||||||
return redirect(url_for('backups.restore.restore'))
|
return redirect(url_for('backups.restore.restore'))
|
||||||
|
|
||||||
# Reject oversized uploads before reading the stream into memory.
|
# Read into memory now — the request stream is gone once we return
|
||||||
content_length = request.content_length
|
|
||||||
if content_length and content_length > _MAX_UPLOAD_BYTES:
|
|
||||||
flash(gettext("Backup file is too large (max %(mb)s MB)", mb=_MAX_UPLOAD_BYTES // (1024 * 1024)), "error")
|
|
||||||
return redirect(url_for('backups.restore.restore'))
|
|
||||||
|
|
||||||
# Read into memory now — the request stream is gone once we return.
|
|
||||||
# Read one byte beyond the limit so we can detect truncated-but-still-oversized streams.
|
|
||||||
try:
|
try:
|
||||||
raw = zip_file.read(_MAX_UPLOAD_BYTES + 1)
|
zip_bytes = io.BytesIO(zip_file.read())
|
||||||
if len(raw) > _MAX_UPLOAD_BYTES:
|
zipfile.ZipFile(zip_bytes) # quick validity check before spawning
|
||||||
flash(gettext("Backup file is too large (max %(mb)s MB)", mb=_MAX_UPLOAD_BYTES // (1024 * 1024)), "error")
|
|
||||||
return redirect(url_for('backups.restore.restore'))
|
|
||||||
zip_bytes = io.BytesIO(raw)
|
|
||||||
with zipfile.ZipFile(zip_bytes): # quick validity check before spawning
|
|
||||||
pass
|
|
||||||
zip_bytes.seek(0)
|
zip_bytes.seek(0)
|
||||||
except zipfile.BadZipFile:
|
except zipfile.BadZipFile:
|
||||||
flash(gettext("Invalid or corrupted zip file"), "error")
|
flash(gettext("Invalid or corrupted zip file"), "error")
|
||||||
@@ -240,7 +201,6 @@ def construct_restore_blueprint(datastore):
|
|||||||
name="BackupRestore"
|
name="BackupRestore"
|
||||||
)
|
)
|
||||||
restore_thread.start()
|
restore_thread.start()
|
||||||
restore_threads[:] = [t for t in restore_threads if t.is_alive()]
|
|
||||||
restore_threads.append(restore_thread)
|
restore_threads.append(restore_thread)
|
||||||
flash(gettext("Restore started in background, check back in a few minutes."))
|
flash(gettext("Restore started in background, check back in a few minutes."))
|
||||||
return redirect(url_for('backups.restore.restore'))
|
return redirect(url_for('backups.restore.restore'))
|
||||||
|
|||||||
@@ -19,10 +19,6 @@
|
|||||||
|
|
||||||
<p>{{ _('Restore a backup. Must be a .zip backup file created on/after v0.53.1 (new database layout).') }}</p>
|
<p>{{ _('Restore a backup. Must be a .zip backup file created on/after v0.53.1 (new database layout).') }}</p>
|
||||||
<p>{{ _('Note: This does not override the main application settings, only watches and groups.') }}</p>
|
<p>{{ _('Note: This does not override the main application settings, only watches and groups.') }}</p>
|
||||||
<p class="pure-form-message">
|
|
||||||
{{ _('Max upload size: %(upload)s MB · Max decompressed size: %(decomp)s MB',
|
|
||||||
upload=max_upload_mb, decomp=max_decompressed_mb) }}
|
|
||||||
</p>
|
|
||||||
|
|
||||||
<form class="pure-form pure-form-stacked settings"
|
<form class="pure-form pure-form-stacked settings"
|
||||||
action="{{ url_for('backups.restore.backups_restore_start') }}"
|
action="{{ url_for('backups.restore.backups_restore_start') }}"
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ def construct_tag_routes(rss_blueprint, datastore):
|
|||||||
datastore: The ChangeDetectionStore instance
|
datastore: The ChangeDetectionStore instance
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@rss_blueprint.route("/tag/<uuid_str:tag_uuid>", methods=['GET'])
|
@rss_blueprint.route("/tag/<string:tag_uuid>", methods=['GET'])
|
||||||
def rss_tag_feed(tag_uuid):
|
def rss_tag_feed(tag_uuid):
|
||||||
|
|
||||||
from flask import make_response, request, url_for
|
from flask import make_response, request, url_for
|
||||||
|
|||||||
@@ -156,9 +156,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_pool,
|
|||||||
@login_optionally_required
|
@login_optionally_required
|
||||||
def clear_all_history():
|
def clear_all_history():
|
||||||
if request.method == 'POST':
|
if request.method == 'POST':
|
||||||
confirmtext = request.form.get('confirmtext', '')
|
confirmtext = request.form.get('confirmtext')
|
||||||
|
|
||||||
if confirmtext.strip().lower() == gettext('clear').strip().lower():
|
if confirmtext == 'clear':
|
||||||
# Run in background thread to avoid blocking
|
# Run in background thread to avoid blocking
|
||||||
def clear_history_background():
|
def clear_history_background():
|
||||||
# Capture UUIDs first to avoid race conditions
|
# Capture UUIDs first to avoid race conditions
|
||||||
|
|||||||
@@ -608,12 +608,13 @@ class ValidateCSSJSONXPATHInput(object):
|
|||||||
raise ValidationError("XPath not permitted in this field!")
|
raise ValidationError("XPath not permitted in this field!")
|
||||||
from lxml import etree, html
|
from lxml import etree, html
|
||||||
import elementpath
|
import elementpath
|
||||||
from changedetectionio.html_tools import SafeXPath3Parser
|
# xpath 2.0-3.1
|
||||||
|
from elementpath.xpath3 import XPath3Parser
|
||||||
tree = html.fromstring("<html></html>")
|
tree = html.fromstring("<html></html>")
|
||||||
line = line.replace('xpath:', '')
|
line = line.replace('xpath:', '')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
elementpath.select(tree, line.strip(), parser=SafeXPath3Parser)
|
elementpath.select(tree, line.strip(), parser=XPath3Parser)
|
||||||
except elementpath.ElementPathError as e:
|
except elementpath.ElementPathError as e:
|
||||||
message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
|
message = field.gettext('\'%s\' is not a valid XPath expression. (%s)')
|
||||||
raise ValidationError(message % (line, str(e)))
|
raise ValidationError(message % (line, str(e)))
|
||||||
|
|||||||
@@ -23,53 +23,6 @@ class JSONNotFound(ValueError):
|
|||||||
def __init__(self, msg):
|
def __init__(self, msg):
|
||||||
ValueError.__init__(self, msg)
|
ValueError.__init__(self, msg)
|
||||||
|
|
||||||
|
|
||||||
_DEFAULT_UNSAFE_XPATH3_FUNCTIONS = [
|
|
||||||
'unparsed-text',
|
|
||||||
'unparsed-text-lines',
|
|
||||||
'unparsed-text-available',
|
|
||||||
'doc',
|
|
||||||
'doc-available',
|
|
||||||
'environment-variable',
|
|
||||||
'available-environment-variables',
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def _build_safe_xpath3_parser():
|
|
||||||
"""Return an XPath3Parser subclass with filesystem/environment access functions removed.
|
|
||||||
|
|
||||||
XPath 3.0 includes functions that can read arbitrary files or environment variables:
|
|
||||||
- unparsed-text / unparsed-text-lines / unparsed-text-available (file read)
|
|
||||||
- doc / doc-available (XML fetch from URI)
|
|
||||||
- environment-variable / available-environment-variables (env var leakage)
|
|
||||||
|
|
||||||
Subclassing gives us an independent symbol_table copy (not shared with the parent class),
|
|
||||||
so removing entries here does not affect XPath3Parser itself.
|
|
||||||
|
|
||||||
Override the blocked list via the XPATH_BLOCKED_FUNCTIONS environment variable
|
|
||||||
(comma-separated, e.g. "unparsed-text,doc,environment-variable").
|
|
||||||
"""
|
|
||||||
import os
|
|
||||||
from elementpath.xpath3 import XPath3Parser
|
|
||||||
|
|
||||||
class SafeXPath3Parser(XPath3Parser):
|
|
||||||
pass
|
|
||||||
|
|
||||||
env_override = os.getenv('XPATH_BLOCKED_FUNCTIONS')
|
|
||||||
if env_override is not None:
|
|
||||||
blocked = [f.strip() for f in env_override.split(',') if f.strip()]
|
|
||||||
else:
|
|
||||||
blocked = _DEFAULT_UNSAFE_XPATH3_FUNCTIONS
|
|
||||||
|
|
||||||
for _fn in blocked:
|
|
||||||
SafeXPath3Parser.symbol_table.pop(_fn, None)
|
|
||||||
|
|
||||||
return SafeXPath3Parser
|
|
||||||
|
|
||||||
|
|
||||||
# Module-level singleton — built once, reused everywhere.
|
|
||||||
SafeXPath3Parser = _build_safe_xpath3_parser()
|
|
||||||
|
|
||||||
# Doesn't look like python supports forward slash auto enclosure in re.findall
|
# Doesn't look like python supports forward slash auto enclosure in re.findall
|
||||||
# So convert it to inline flag "(?i)foobar" type configuration
|
# So convert it to inline flag "(?i)foobar" type configuration
|
||||||
@lru_cache(maxsize=100)
|
@lru_cache(maxsize=100)
|
||||||
@@ -230,6 +183,8 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
|
|||||||
"""
|
"""
|
||||||
from lxml import etree, html
|
from lxml import etree, html
|
||||||
import elementpath
|
import elementpath
|
||||||
|
# xpath 2.0-3.1
|
||||||
|
from elementpath.xpath3 import XPath3Parser
|
||||||
|
|
||||||
parser = etree.HTMLParser()
|
parser = etree.HTMLParser()
|
||||||
tree = None
|
tree = None
|
||||||
@@ -255,7 +210,7 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
|
|||||||
# This allows //title to match elements in the default namespace
|
# This allows //title to match elements in the default namespace
|
||||||
namespaces[''] = tree.nsmap[None]
|
namespaces[''] = tree.nsmap[None]
|
||||||
|
|
||||||
r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=SafeXPath3Parser)
|
r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=XPath3Parser)
|
||||||
#@note: //title/text() now works with default namespaces (fixed by registering '' prefix)
|
#@note: //title/text() now works with default namespaces (fixed by registering '' prefix)
|
||||||
#@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)
|
#@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)
|
||||||
|
|
||||||
@@ -280,9 +235,6 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
|
|||||||
else:
|
else:
|
||||||
html_block += elementpath_tostring(element)
|
html_block += elementpath_tostring(element)
|
||||||
|
|
||||||
# Drop element references before the finally block so tree.clear() can release
|
|
||||||
# the libxml2 document immediately (elements pin the C-level doc via refcount).
|
|
||||||
del r
|
|
||||||
return html_block
|
return html_block
|
||||||
finally:
|
finally:
|
||||||
# Explicitly clear the tree to free memory
|
# Explicitly clear the tree to free memory
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
from .util import set_original_response, live_server_setup, wait_for_all_checks
|
from .util import set_original_response, live_server_setup, wait_for_all_checks
|
||||||
from flask import url_for
|
from flask import url_for
|
||||||
import io
|
import io
|
||||||
from zipfile import ZipFile, ZIP_DEFLATED
|
from zipfile import ZipFile
|
||||||
import re
|
import re
|
||||||
import time
|
import time
|
||||||
from changedetectionio.model import Watch, Tag
|
from changedetectionio.model import Watch, Tag
|
||||||
@@ -68,9 +68,6 @@ def test_backup(client, live_server, measure_memory_usage, datastore_path):
|
|||||||
# Check for changedetection.json (settings file)
|
# Check for changedetection.json (settings file)
|
||||||
assert 'changedetection.json' in l, "changedetection.json should be in backup"
|
assert 'changedetection.json' in l, "changedetection.json should be in backup"
|
||||||
|
|
||||||
# secret.txt must never be included — it contains the Flask session key
|
|
||||||
assert 'secret.txt' not in l, "secret.txt (Flask session key) must not be included in backup"
|
|
||||||
|
|
||||||
# Get the latest one
|
# Get the latest one
|
||||||
res = client.get(
|
res = client.get(
|
||||||
url_for("backups.remove_backups"),
|
url_for("backups.remove_backups"),
|
||||||
@@ -199,63 +196,4 @@ def test_backup_restore(client, live_server, measure_memory_usage, datastore_pat
|
|||||||
assert restored_tag2 is not None, f"Tag {tag_uuid2} not found after restore"
|
assert restored_tag2 is not None, f"Tag {tag_uuid2} not found after restore"
|
||||||
assert restored_tag2['title'] == "Tasty backup tag number two", "Restored tag 2 title does not match"
|
assert restored_tag2['title'] == "Tasty backup tag number two", "Restored tag 2 title does not match"
|
||||||
assert isinstance(restored_tag2, Tag.model), \
|
assert isinstance(restored_tag2, Tag.model), \
|
||||||
f"Tag 2 not properly rehydrated, got {type(restored_tag2)}"
|
f"Tag 2 not properly rehydrated, got {type(restored_tag2)}"
|
||||||
|
|
||||||
|
|
||||||
def test_backup_restore_zip_slip_rejected(client, live_server, measure_memory_usage, datastore_path):
|
|
||||||
"""Zip Slip path traversal entries in a restore zip must be rejected."""
|
|
||||||
import pytest
|
|
||||||
from changedetectionio.blueprint.backups.restore import import_from_zip
|
|
||||||
|
|
||||||
# Build a zip with a path traversal entry that would escape the extraction dir
|
|
||||||
malicious_zip = io.BytesIO()
|
|
||||||
with ZipFile(malicious_zip, 'w') as zf:
|
|
||||||
zf.writestr("../escaped.txt", "ATTACKER-CONTROLLED")
|
|
||||||
malicious_zip.seek(0)
|
|
||||||
|
|
||||||
datastore = live_server.app.config['DATASTORE']
|
|
||||||
|
|
||||||
with pytest.raises(ValueError, match="Zip Slip"):
|
|
||||||
import_from_zip(
|
|
||||||
zip_stream=malicious_zip,
|
|
||||||
datastore=datastore,
|
|
||||||
include_groups=True,
|
|
||||||
include_groups_replace=True,
|
|
||||||
include_watches=True,
|
|
||||||
include_watches_replace=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_backup_restore_zip_bomb_rejected(client, live_server, measure_memory_usage, datastore_path):
|
|
||||||
"""A zip whose total uncompressed size exceeds the limit must be rejected.
|
|
||||||
|
|
||||||
The guard reads file_size from the zip central-directory metadata — no
|
|
||||||
actual decompression happens, so this test is fast and uses minimal RAM.
|
|
||||||
100 KB of zeros compresses to ~100 bytes; monkeypatching the limit to
|
|
||||||
50 KB is enough to trigger the check without creating any large files.
|
|
||||||
"""
|
|
||||||
import pytest
|
|
||||||
import changedetectionio.blueprint.backups.restore as restore_mod
|
|
||||||
from changedetectionio.blueprint.backups.restore import import_from_zip
|
|
||||||
|
|
||||||
# ~100 KB of zeros → deflate compresses to ~100 bytes, but file_size metadata = 100 KB
|
|
||||||
bomb_zip = io.BytesIO()
|
|
||||||
with ZipFile(bomb_zip, 'w', compression=ZIP_DEFLATED) as zf:
|
|
||||||
zf.writestr("data.txt", b"\x00" * (100 * 1024))
|
|
||||||
bomb_zip.seek(0)
|
|
||||||
|
|
||||||
datastore = live_server.app.config['DATASTORE']
|
|
||||||
original_limit = restore_mod._MAX_DECOMPRESSED_BYTES
|
|
||||||
try:
|
|
||||||
restore_mod._MAX_DECOMPRESSED_BYTES = 50 * 1024 # 50 KB limit for this test
|
|
||||||
with pytest.raises(ValueError, match="decompressed size"):
|
|
||||||
import_from_zip(
|
|
||||||
zip_stream=bomb_zip,
|
|
||||||
datastore=datastore,
|
|
||||||
include_groups=True,
|
|
||||||
include_groups_replace=True,
|
|
||||||
include_watches=True,
|
|
||||||
include_watches_replace=True,
|
|
||||||
)
|
|
||||||
finally:
|
|
||||||
restore_mod._MAX_DECOMPRESSED_BYTES = original_limit
|
|
||||||
@@ -624,76 +624,3 @@ def test_session_locale_overrides_accept_language(client, live_server, measure_m
|
|||||||
assert "분".encode() in res.data, "Expected Korean '분' for Minutes"
|
assert "분".encode() in res.data, "Expected Korean '분' for Minutes"
|
||||||
assert "小時".encode() not in res.data, "Should not have Traditional Chinese '小時' when Korean is set"
|
assert "小時".encode() not in res.data, "Should not have Traditional Chinese '小時' when Korean is set"
|
||||||
assert "分鐘".encode() not in res.data, "Should not have Traditional Chinese '分鐘' when Korean is set"
|
assert "分鐘".encode() not in res.data, "Should not have Traditional Chinese '分鐘' when Korean is set"
|
||||||
|
|
||||||
|
|
||||||
def test_clear_history_translated_confirmation(client, live_server, measure_memory_usage, datastore_path):
|
|
||||||
"""
|
|
||||||
Test that clearing snapshot history works with translated confirmation text.
|
|
||||||
|
|
||||||
Issue #3865: When the app language is set to German, the clear history
|
|
||||||
confirmation dialog shows the translated word (e.g. 'loschen') but the
|
|
||||||
backend only accepted the English word 'clear', making it impossible
|
|
||||||
to clear snapshots in non-English languages.
|
|
||||||
"""
|
|
||||||
from flask import url_for
|
|
||||||
|
|
||||||
test_url = url_for('test_endpoint', _external=True)
|
|
||||||
|
|
||||||
# Add a watch so there is history to clear
|
|
||||||
res = client.post(
|
|
||||||
url_for("imports.import_page"),
|
|
||||||
data={"urls": test_url},
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
assert b"1 Imported" in res.data
|
|
||||||
wait_for_all_checks(client)
|
|
||||||
|
|
||||||
# Set language to German
|
|
||||||
res = client.get(
|
|
||||||
url_for("set_language", locale="de"),
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
assert res.status_code == 200
|
|
||||||
|
|
||||||
# Verify the clear history page shows the German confirmation word
|
|
||||||
res = client.get(
|
|
||||||
url_for("ui.clear_all_history"),
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
assert res.status_code == 200
|
|
||||||
assert "löschen".encode() in res.data, "Expected German word 'loschen' on clear history page"
|
|
||||||
|
|
||||||
# Submit the form with the German translated word
|
|
||||||
res = client.post(
|
|
||||||
url_for("ui.clear_all_history"),
|
|
||||||
data={"confirmtext": "löschen"},
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
assert res.status_code == 200
|
|
||||||
# Should NOT show error message
|
|
||||||
assert b"Incorrect confirmation text" not in res.data, \
|
|
||||||
"German confirmation word 'loschen' should be accepted (issue #3865)"
|
|
||||||
|
|
||||||
# Switch back to English and verify English word still works
|
|
||||||
res = client.get(
|
|
||||||
url_for("set_language", locale="en_US"),
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
|
|
||||||
res = client.post(
|
|
||||||
url_for("ui.clear_all_history"),
|
|
||||||
data={"confirmtext": "clear"},
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
assert res.status_code == 200
|
|
||||||
assert b"Incorrect confirmation text" not in res.data, \
|
|
||||||
"English confirmation word 'clear' should still be accepted"
|
|
||||||
|
|
||||||
# Verify that missing/empty confirmtext does not crash the server
|
|
||||||
res = client.post(
|
|
||||||
url_for("ui.clear_all_history"),
|
|
||||||
data={},
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
assert res.status_code == 200, \
|
|
||||||
"Missing confirmtext should not crash the server"
|
|
||||||
|
|||||||
@@ -592,74 +592,3 @@ def test_rss_xpath(client, live_server, measure_memory_usage, datastore_path):
|
|||||||
set_rss_atom_feed_response(header=feed_header, datastore_path=datastore_path)
|
set_rss_atom_feed_response(header=feed_header, datastore_path=datastore_path)
|
||||||
for content_type in RSS_XML_CONTENT_TYPES:
|
for content_type in RSS_XML_CONTENT_TYPES:
|
||||||
_subtest_xpath_rss(client, content_type=content_type, datastore_path=datastore_path)
|
_subtest_xpath_rss(client, content_type=content_type, datastore_path=datastore_path)
|
||||||
|
|
||||||
|
|
||||||
# GHSA-6fmw-82m7-jq6p — XPath arbitrary file read via unparsed-text() and friends
|
|
||||||
# Unit-level: verify xpath_filter() and SafeXPath3Parser block all dangerous functions.
|
|
||||||
def test_xpath_blocked_functions_unit():
|
|
||||||
"""Dangerous XPath 3.0 functions must be rejected at the parser level (no live server needed)."""
|
|
||||||
import elementpath
|
|
||||||
from changedetectionio.html_tools import xpath_filter, SafeXPath3Parser
|
|
||||||
from lxml import html
|
|
||||||
|
|
||||||
html_content = '<html><body><p>safe content</p></body></html>'
|
|
||||||
|
|
||||||
dangerous_expressions = [
|
|
||||||
"unparsed-text('file:///etc/passwd')",
|
|
||||||
"unparsed-text-lines('file:///etc/passwd')",
|
|
||||||
"unparsed-text-available('file:///etc/passwd')",
|
|
||||||
"doc('file:///etc/passwd')",
|
|
||||||
"doc-available('file:///etc/passwd')",
|
|
||||||
"environment-variable('PATH')",
|
|
||||||
"available-environment-variables()",
|
|
||||||
]
|
|
||||||
|
|
||||||
for expr in dangerous_expressions:
|
|
||||||
# xpath_filter() must raise, not silently return file contents
|
|
||||||
try:
|
|
||||||
result = xpath_filter(expr, html_content)
|
|
||||||
assert False, f"xpath_filter should have raised for: {expr!r}, got: {result!r}"
|
|
||||||
except elementpath.ElementPathError:
|
|
||||||
pass # expected
|
|
||||||
|
|
||||||
# SafeXPath3Parser must reject the expression at parse time
|
|
||||||
tree = html.fromstring(html_content)
|
|
||||||
try:
|
|
||||||
elementpath.select(tree, expr, parser=SafeXPath3Parser)
|
|
||||||
assert False, f"SafeXPath3Parser should have raised for: {expr!r}"
|
|
||||||
except elementpath.ElementPathError:
|
|
||||||
pass # expected
|
|
||||||
|
|
||||||
# Sanity check: normal XPath still works
|
|
||||||
result = xpath_filter('//p/text()', html_content)
|
|
||||||
assert result == 'safe content'
|
|
||||||
|
|
||||||
|
|
||||||
# GHSA-6fmw-82m7-jq6p — form validation must also reject dangerous XPath expressions.
|
|
||||||
def test_xpath_blocked_functions_form_validation(client, live_server, measure_memory_usage, datastore_path):
|
|
||||||
"""Edit-form validation must reject dangerous XPath 3.0 functions before they are stored."""
|
|
||||||
from flask import url_for
|
|
||||||
|
|
||||||
set_original_response(datastore_path=datastore_path)
|
|
||||||
test_url = url_for('test_endpoint', _external=True)
|
|
||||||
client.application.config.get('DATASTORE').add_watch(url=test_url)
|
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
|
||||||
wait_for_all_checks(client)
|
|
||||||
|
|
||||||
dangerous_expressions = [
|
|
||||||
"xpath:unparsed-text('file:///etc/passwd')",
|
|
||||||
"xpath:environment-variable('PATH')",
|
|
||||||
"xpath:doc('file:///etc/passwd')",
|
|
||||||
]
|
|
||||||
|
|
||||||
for expr in dangerous_expressions:
|
|
||||||
res = client.post(
|
|
||||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
|
||||||
data={"include_filters": expr, "url": test_url, "tags": "", "headers": "",
|
|
||||||
'fetch_backend': "html_requests", "time_between_check_use_default": "y"},
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
assert b"is not a valid XPath expression" in res.data, \
|
|
||||||
f"Form should reject dangerous expression: {expr!r}"
|
|
||||||
|
|
||||||
delete_all_watches(client)
|
|
||||||
|
|||||||
+4
-172
@@ -113,156 +113,8 @@ tags:
|
|||||||
|
|
||||||
- name: Plugin API Extensions
|
- name: Plugin API Extensions
|
||||||
description: |
|
description: |
|
||||||
## How Processor Plugins Extend the API
|
Retrieve the live OpenAPI specification for this instance. Unlike the static spec, this endpoint
|
||||||
|
returns the fully merged spec including schemas for any processor plugins installed on this instance.
|
||||||
changedetection.io uses a **processor plugin** system to handle different types of change detection.
|
|
||||||
Each processor lives in `changedetectionio/processors/<name>/` and may include an `api.yaml` file
|
|
||||||
that extends the core Watch schema with processor-specific configuration fields.
|
|
||||||
|
|
||||||
### How it works
|
|
||||||
|
|
||||||
At startup, changedetection.io scans all installed processors for an `api.yaml` file. Any schemas
|
|
||||||
and code samples defined there are deep-merged into the live API specification, making the
|
|
||||||
processor's configuration fields valid on all watch create and update requests.
|
|
||||||
|
|
||||||
The live, fully-merged spec is always available at `/api/v1/full-spec` — use that URL with
|
|
||||||
Swagger UI or Redoc to see the complete schema for your specific installation.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### Writing a processor `api.yaml`
|
|
||||||
|
|
||||||
Place an `api.yaml` in the processor plugin's own directory, alongside its `__init__.py`
|
|
||||||
(e.g. `changedetectionio/processors/my_processor/api.yaml`). The schema name **must** follow the
|
|
||||||
convention `processor_config_<processor_name>` (e.g. `processor_config_restock_diff`). That same
|
|
||||||
key is used as the JSON field name when creating or updating a watch.
|
|
||||||
|
|
||||||
A minimal `api.yaml` for a hypothetical `my_processor`:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
components:
|
|
||||||
schemas:
|
|
||||||
processor_config_my_processor:
|
|
||||||
type: object
|
|
||||||
description: Configuration for my_processor
|
|
||||||
properties:
|
|
||||||
some_option:
|
|
||||||
type: boolean
|
|
||||||
default: true
|
|
||||||
description: Enable some behaviour
|
|
||||||
|
|
||||||
paths:
|
|
||||||
/watch:
|
|
||||||
post:
|
|
||||||
x-code-samples:
|
|
||||||
- lang: curl
|
|
||||||
label: my_processor example
|
|
||||||
source: |
|
|
||||||
curl -X POST "http://localhost:5000/api/v1/watch" \
|
|
||||||
-H "x-api-key: YOUR_API_KEY" \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{
|
|
||||||
"url": "https://example.com",
|
|
||||||
"processor": "my_processor",
|
|
||||||
"processor_config_my_processor": { "some_option": true }
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
The `paths` section in `api.yaml` is used only for injecting additional `x-code-samples` into
|
|
||||||
existing endpoints — you cannot define new routes via plugin.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
### Built-in plugin: `restock_diff`
|
|
||||||
|
|
||||||
The `restock_diff` processor is always shipped with changedetection.io. It monitors product
|
|
||||||
availability and price changes using structured data (JSON-LD / schema.org microdata) and
|
|
||||||
text heuristics. It is activated by setting `"processor": "restock_diff"` on a watch.
|
|
||||||
|
|
||||||
It adds the `processor_config_restock_diff` block to the Watch schema with these fields:
|
|
||||||
|
|
||||||
| Field | Type | Default | Description |
|
|
||||||
|---|---|---|---|
|
|
||||||
| `in_stock_processing` | string | `in_stock_only` | `in_stock_only` — only alert Out-of-Stock→In-Stock · `all_changes` — alert any availability change · `off` — disable stock tracking |
|
|
||||||
| `follow_price_changes` | boolean | `true` | Monitor and alert on price changes |
|
|
||||||
| `price_change_min` | number\|null | — | Alert when price drops **below** this value |
|
|
||||||
| `price_change_max` | number\|null | — | Alert when price rises **above** this value |
|
|
||||||
| `price_change_threshold_percent` | number\|null | — | Minimum % change since the original price to trigger an alert |
|
|
||||||
|
|
||||||
#### CREATE — Add a restock/price monitor
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl -X POST "http://localhost:5000/api/v1/watch" \
|
|
||||||
-H "x-api-key: YOUR_API_KEY" \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{
|
|
||||||
"url": "https://example.com/product/widget",
|
|
||||||
"processor": "restock_diff",
|
|
||||||
"processor_config_restock_diff": {
|
|
||||||
"in_stock_processing": "in_stock_only",
|
|
||||||
"follow_price_changes": true,
|
|
||||||
"price_change_threshold_percent": 5
|
|
||||||
}
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
#### READ — Retrieve the monitor
|
|
||||||
|
|
||||||
The response JSON includes `processor_config_restock_diff` alongside all standard watch fields:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl -X GET "http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091" \
|
|
||||||
-H "x-api-key: YOUR_API_KEY"
|
|
||||||
```
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"uuid": "cc0cfffa-f449-477b-83ea-0caafd1dc091",
|
|
||||||
"url": "https://example.com/product/widget",
|
|
||||||
"processor": "restock_diff",
|
|
||||||
"processor_config_restock_diff": {
|
|
||||||
"in_stock_processing": "in_stock_only",
|
|
||||||
"follow_price_changes": true,
|
|
||||||
"price_change_threshold_percent": 5,
|
|
||||||
"price_change_min": null,
|
|
||||||
"price_change_max": null
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
#### UPDATE — Change thresholds without recreating the monitor
|
|
||||||
|
|
||||||
Only fields included in the request body are updated; omitted fields are left unchanged.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl -X PUT "http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091" \
|
|
||||||
-H "x-api-key: YOUR_API_KEY" \
|
|
||||||
-H "Content-Type: application/json" \
|
|
||||||
-d '{
|
|
||||||
"processor_config_restock_diff": {
|
|
||||||
"in_stock_processing": "all_changes",
|
|
||||||
"follow_price_changes": true,
|
|
||||||
"price_change_min": 10.00,
|
|
||||||
"price_change_max": 500.00
|
|
||||||
}
|
|
||||||
}'
|
|
||||||
```
|
|
||||||
|
|
||||||
#### DELETE — Remove the monitor
|
|
||||||
|
|
||||||
```bash
|
|
||||||
curl -X DELETE "http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091" \
|
|
||||||
-H "x-api-key: YOUR_API_KEY"
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
For the complete schema-validated documentation including all processor fields, fetch the live spec
|
|
||||||
and load it into Swagger UI or Redoc:
|
|
||||||
|
|
||||||
```
|
|
||||||
GET /api/v1/full-spec
|
|
||||||
```
|
|
||||||
|
|
||||||
components:
|
components:
|
||||||
securitySchemes:
|
securitySchemes:
|
||||||
@@ -2069,35 +1921,15 @@ paths:
|
|||||||
|
|
||||||
Unlike the static `api-spec.yaml` shipped with the application, this endpoint returns the
|
Unlike the static `api-spec.yaml` shipped with the application, this endpoint returns the
|
||||||
spec dynamically merged with any `api.yaml` schemas provided by installed processor plugins.
|
spec dynamically merged with any `api.yaml` schemas provided by installed processor plugins.
|
||||||
|
Use this URL with Swagger UI or Redoc to get accurate documentation for your specific install.
|
||||||
**Use this URL** with Swagger UI or Redoc to get schema-accurate documentation for your
|
|
||||||
specific install — it includes every `processor_config_<name>` schema block contributed by
|
|
||||||
installed processors (e.g. `processor_config_restock_diff` from the built-in restock plugin).
|
|
||||||
|
|
||||||
This endpoint requires no authentication and returns YAML.
|
|
||||||
|
|
||||||
To load it directly in Swagger UI, paste the URL into the "Explore" box:
|
|
||||||
```
|
|
||||||
http://localhost:5000/api/v1/full-spec
|
|
||||||
```
|
|
||||||
security: []
|
security: []
|
||||||
x-code-samples:
|
x-code-samples:
|
||||||
- lang: 'curl'
|
- lang: 'curl'
|
||||||
source: |
|
source: |
|
||||||
# Fetch the live merged spec (no API key needed)
|
|
||||||
curl -X GET "http://localhost:5000/api/v1/full-spec"
|
curl -X GET "http://localhost:5000/api/v1/full-spec"
|
||||||
- lang: 'Python'
|
|
||||||
source: |
|
|
||||||
import requests
|
|
||||||
|
|
||||||
# No authentication required
|
|
||||||
response = requests.get('http://localhost:5000/api/v1/full-spec')
|
|
||||||
print(response.text) # Returns YAML
|
|
||||||
responses:
|
responses:
|
||||||
'200':
|
'200':
|
||||||
description: |
|
description: Merged OpenAPI specification in YAML format
|
||||||
Merged OpenAPI specification in YAML format. Includes all processor plugin schemas
|
|
||||||
(e.g. `processor_config_restock_diff`) not present in the static `api-spec.yaml`.
|
|
||||||
content:
|
content:
|
||||||
application/yaml:
|
application/yaml:
|
||||||
schema:
|
schema:
|
||||||
|
|||||||
+9
-353
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user