Files
changedetection.io/changedetectionio/tests/test_backup.py
T

261 lines
10 KiB
Python

#!/usr/bin/env python3
from .util import set_original_response, live_server_setup, wait_for_all_checks
from flask import url_for
import io
from zipfile import ZipFile, ZIP_DEFLATED
import re
import time
from changedetectionio.model import Watch, Tag
def test_backup(client, live_server, measure_memory_usage, datastore_path):
set_original_response(datastore_path=datastore_path)
# Add our URL to the import page
res = client.post(
url_for("imports.import_page"),
data={"urls": url_for('test_endpoint', _external=True)+"?somechar=őőőőőőőő"},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
# Launch the thread in the background to create the backup
res = client.get(
url_for("backups.request_backup"),
follow_redirects=True
)
time.sleep(4)
res = client.get(
url_for("backups.create"),
follow_redirects=True
)
# Can see the download link to the backup
assert b'<a href="/backups/download/changedetection-backup-20' in res.data
assert b'Remove backups' in res.data
# Get the latest one
res = client.get(
url_for("backups.download_backup", filename="latest"),
follow_redirects=True
)
# Should get the right zip content type
assert res.content_type == "application/zip"
# Should be PK/ZIP stream
assert res.data.count(b'PK') >= 2
backup = ZipFile(io.BytesIO(res.data))
l = backup.namelist()
# Check for UUID-based txt files (history, snapshot, and last-checksum)
uuid4hex_txt = re.compile('^[a-f0-9]{8}-?[a-f0-9]{4}-?4[a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}.*txt', re.I)
txt_files = list(filter(uuid4hex_txt.match, l))
# Should be three txt files in the archive (history, snapshot, and last-checksum)
assert len(txt_files) == 3
# Check for watch.json files (new format)
uuid4hex_json = re.compile('^[a-f0-9]{8}-?[a-f0-9]{4}-?4[a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}/watch\.json$', re.I)
json_files = list(filter(uuid4hex_json.match, l))
# Should be one watch.json file in the archive (the imported watch)
assert len(json_files) == 1, f"Expected 1 watch.json file, found {len(json_files)}: {json_files}"
# Check for changedetection.json (settings file)
assert 'changedetection.json' in l, "changedetection.json should be in backup"
# secret.txt must never be included — it contains the Flask session key
assert 'secret.txt' not in l, "secret.txt (Flask session key) must not be included in backup"
# Get the latest one
res = client.get(
url_for("backups.remove_backups"),
follow_redirects=True
)
assert b'No backups found.' in res.data
def test_watch_data_package_download(client, live_server, measure_memory_usage, datastore_path):
"""Test downloading a single watch's data as a zip package"""
set_original_response(datastore_path=datastore_path)
uuid = client.application.config.get('DATASTORE').add_watch(url=url_for('test_endpoint', _external=True))
tag_uuid = client.application.config.get('DATASTORE').add_tag(title="Tasty backup tag")
tag_uuid2 = client.application.config.get('DATASTORE').add_tag(title="Tasty backup tag number two")
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
# Download the watch data package
res = client.get(url_for("ui.ui_edit.watch_get_data_package", uuid=uuid))
# Should get the right zip content type
assert res.content_type == "application/zip"
# Should be PK/ZIP stream (PKzip header)
assert res.data[:2] == b'PK', "File should start with PK (PKzip header)"
assert res.data.count(b'PK') >= 2, "Should have multiple PK markers (zip file structure)"
# Verify zip contents
backup = ZipFile(io.BytesIO(res.data))
files = backup.namelist()
# Should have files in a UUID directory
assert any(uuid in f for f in files), f"Files should be in UUID directory: {files}"
# Should contain watch.json
watch_json_path = f"{uuid}/watch.json"
assert watch_json_path in files, f"Should contain watch.json, got: {files}"
# Should contain history/snapshot files
uuid4hex_txt = re.compile(f'^{re.escape(uuid)}/.*\\.txt', re.I)
txt_files = list(filter(uuid4hex_txt.match, files))
assert len(txt_files) > 0, f"Should have at least one .txt file (history/snapshot), got: {files}"
def test_backup_restore(client, live_server, measure_memory_usage, datastore_path):
"""Test that a full backup zip can be restored — watches and tags survive a round-trip."""
set_original_response(datastore_path=datastore_path)
datastore = live_server.app.config['DATASTORE']
watch_url = url_for('test_endpoint', _external=True)
# Set up: one watch and two tags
uuid = datastore.add_watch(url=watch_url)
tag_uuid = datastore.add_tag(title="Tasty backup tag")
tag_uuid2 = datastore.add_tag(title="Tasty backup tag number two")
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
# Create a full backup
client.get(url_for("backups.request_backup"), follow_redirects=True)
time.sleep(4)
# Download the latest backup zip
res = client.get(url_for("backups.download_backup", filename="latest"), follow_redirects=True)
assert res.content_type == "application/zip"
zip_data = res.data
# Confirm the zip contains both watch.json and tag.json entries
backup = ZipFile(io.BytesIO(zip_data))
names = backup.namelist()
assert f"{uuid}/watch.json" in names, f"watch.json missing from backup: {names}"
assert f"{tag_uuid}/tag.json" in names, f"tag.json for tag 1 missing from backup: {names}"
assert f"{tag_uuid2}/tag.json" in names, f"tag.json for tag 2 missing from backup: {names}"
# --- Wipe everything ---
datastore.delete('all')
client.get(url_for("tags.delete_all"), follow_redirects=True)
assert uuid not in datastore.data['watching'], "Watch should be gone after delete"
assert tag_uuid not in datastore.data['settings']['application']['tags'], "Tag 1 should be gone after delete"
assert tag_uuid2 not in datastore.data['settings']['application']['tags'], "Tag 2 should be gone after delete"
# --- Restore from the backup zip ---
res = client.post(
url_for("backups.restore.backups_restore_start"),
data={
'zip_file': (io.BytesIO(zip_data), 'backup.zip'),
'include_groups': 'y',
'include_groups_replace_existing': 'y',
'include_watches': 'y',
'include_watches_replace_existing': 'y',
},
content_type='multipart/form-data',
follow_redirects=True
)
assert res.status_code == 200
# Wait for the thread to finish
time.sleep(2)
# --- Watch checks ---
restored_watch = datastore.data['watching'].get(uuid)
assert restored_watch is not None, f"Watch {uuid} not found after restore"
assert restored_watch['url'] == watch_url, "Restored watch URL does not match"
assert isinstance(restored_watch, Watch.model), \
f"Watch not properly rehydrated, got {type(restored_watch)}"
assert restored_watch.history_n >= 1, \
f"Restored watch should have at least 1 history entry, got {restored_watch.history_n}"
# --- Tag checks ---
restored_tags = datastore.data['settings']['application']['tags']
restored_tag = restored_tags.get(tag_uuid)
assert restored_tag is not None, f"Tag {tag_uuid} not found after restore"
assert restored_tag['title'] == "Tasty backup tag", "Restored tag 1 title does not match"
assert isinstance(restored_tag, Tag.model), \
f"Tag 1 not properly rehydrated, got {type(restored_tag)}"
restored_tag2 = restored_tags.get(tag_uuid2)
assert restored_tag2 is not None, f"Tag {tag_uuid2} not found after restore"
assert restored_tag2['title'] == "Tasty backup tag number two", "Restored tag 2 title does not match"
assert isinstance(restored_tag2, Tag.model), \
f"Tag 2 not properly rehydrated, got {type(restored_tag2)}"
def test_backup_restore_zip_slip_rejected(client, live_server, measure_memory_usage, datastore_path):
"""Zip Slip path traversal entries in a restore zip must be rejected."""
import pytest
from changedetectionio.blueprint.backups.restore import import_from_zip
# Build a zip with a path traversal entry that would escape the extraction dir
malicious_zip = io.BytesIO()
with ZipFile(malicious_zip, 'w') as zf:
zf.writestr("../escaped.txt", "ATTACKER-CONTROLLED")
malicious_zip.seek(0)
datastore = live_server.app.config['DATASTORE']
with pytest.raises(ValueError, match="Zip Slip"):
import_from_zip(
zip_stream=malicious_zip,
datastore=datastore,
include_groups=True,
include_groups_replace=True,
include_watches=True,
include_watches_replace=True,
)
def test_backup_restore_zip_bomb_rejected(client, live_server, measure_memory_usage, datastore_path):
"""A zip whose total uncompressed size exceeds the limit must be rejected.
The guard reads file_size from the zip central-directory metadata — no
actual decompression happens, so this test is fast and uses minimal RAM.
100 KB of zeros compresses to ~100 bytes; monkeypatching the limit to
50 KB is enough to trigger the check without creating any large files.
"""
import pytest
import changedetectionio.blueprint.backups.restore as restore_mod
from changedetectionio.blueprint.backups.restore import import_from_zip
# ~100 KB of zeros → deflate compresses to ~100 bytes, but file_size metadata = 100 KB
bomb_zip = io.BytesIO()
with ZipFile(bomb_zip, 'w', compression=ZIP_DEFLATED) as zf:
zf.writestr("data.txt", b"\x00" * (100 * 1024))
bomb_zip.seek(0)
datastore = live_server.app.config['DATASTORE']
original_limit = restore_mod._MAX_DECOMPRESSED_BYTES
try:
restore_mod._MAX_DECOMPRESSED_BYTES = 50 * 1024 # 50 KB limit for this test
with pytest.raises(ValueError, match="decompressed size"):
import_from_zip(
zip_stream=bomb_zip,
datastore=datastore,
include_groups=True,
include_groups_replace=True,
include_watches=True,
include_watches_replace=True,
)
finally:
restore_mod._MAX_DECOMPRESSED_BYTES = original_limit