#!/usr/bin/env python3 from .util import set_original_response, live_server_setup, wait_for_all_checks from flask import url_for import io from zipfile import ZipFile, ZIP_DEFLATED import re import time from changedetectionio.model import Watch, Tag def test_backup(client, live_server, measure_memory_usage, datastore_path): set_original_response(datastore_path=datastore_path) # Add our URL to the import page res = client.post( url_for("imports.import_page"), data={"urls": url_for('test_endpoint', _external=True)+"?somechar=őőőőőőőő"}, follow_redirects=True ) assert b"1 Imported" in res.data wait_for_all_checks(client) # Launch the thread in the background to create the backup res = client.get( url_for("backups.request_backup"), follow_redirects=True ) time.sleep(4) res = client.get( url_for("backups.create"), follow_redirects=True ) # Can see the download link to the backup assert b'= 2 backup = ZipFile(io.BytesIO(res.data)) l = backup.namelist() # Check for UUID-based txt files (history, snapshot, and last-checksum) uuid4hex_txt = re.compile('^[a-f0-9]{8}-?[a-f0-9]{4}-?4[a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}.*txt', re.I) txt_files = list(filter(uuid4hex_txt.match, l)) # Should be three txt files in the archive (history, snapshot, and last-checksum) assert len(txt_files) == 3 # Check for watch.json files (new format) uuid4hex_json = re.compile('^[a-f0-9]{8}-?[a-f0-9]{4}-?4[a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}/watch\.json$', re.I) json_files = list(filter(uuid4hex_json.match, l)) # Should be one watch.json file in the archive (the imported watch) assert len(json_files) == 1, f"Expected 1 watch.json file, found {len(json_files)}: {json_files}" # Check for changedetection.json (settings file) assert 'changedetection.json' in l, "changedetection.json should be in backup" # secret.txt must never be included — it contains the Flask session key assert 'secret.txt' not in l, "secret.txt (Flask session key) must not be included in backup" # Get the latest one res = client.get( url_for("backups.remove_backups"), follow_redirects=True ) assert b'No backups found.' in res.data def test_watch_data_package_download(client, live_server, measure_memory_usage, datastore_path): """Test downloading a single watch's data as a zip package""" set_original_response(datastore_path=datastore_path) uuid = client.application.config.get('DATASTORE').add_watch(url=url_for('test_endpoint', _external=True)) tag_uuid = client.application.config.get('DATASTORE').add_tag(title="Tasty backup tag") tag_uuid2 = client.application.config.get('DATASTORE').add_tag(title="Tasty backup tag number two") client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) # Download the watch data package res = client.get(url_for("ui.ui_edit.watch_get_data_package", uuid=uuid)) # Should get the right zip content type assert res.content_type == "application/zip" # Should be PK/ZIP stream (PKzip header) assert res.data[:2] == b'PK', "File should start with PK (PKzip header)" assert res.data.count(b'PK') >= 2, "Should have multiple PK markers (zip file structure)" # Verify zip contents backup = ZipFile(io.BytesIO(res.data)) files = backup.namelist() # Should have files in a UUID directory assert any(uuid in f for f in files), f"Files should be in UUID directory: {files}" # Should contain watch.json watch_json_path = f"{uuid}/watch.json" assert watch_json_path in files, f"Should contain watch.json, got: {files}" # Should contain history/snapshot files uuid4hex_txt = re.compile(f'^{re.escape(uuid)}/.*\\.txt', re.I) txt_files = list(filter(uuid4hex_txt.match, files)) assert len(txt_files) > 0, f"Should have at least one .txt file (history/snapshot), got: {files}" def test_backup_restore(client, live_server, measure_memory_usage, datastore_path): """Test that a full backup zip can be restored — watches and tags survive a round-trip.""" set_original_response(datastore_path=datastore_path) datastore = live_server.app.config['DATASTORE'] watch_url = url_for('test_endpoint', _external=True) # Set up: one watch and two tags uuid = datastore.add_watch(url=watch_url) tag_uuid = datastore.add_tag(title="Tasty backup tag") tag_uuid2 = datastore.add_tag(title="Tasty backup tag number two") client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) wait_for_all_checks(client) # Create a full backup client.get(url_for("backups.request_backup"), follow_redirects=True) time.sleep(4) # Download the latest backup zip res = client.get(url_for("backups.download_backup", filename="latest"), follow_redirects=True) assert res.content_type == "application/zip" zip_data = res.data # Confirm the zip contains both watch.json and tag.json entries backup = ZipFile(io.BytesIO(zip_data)) names = backup.namelist() assert f"{uuid}/watch.json" in names, f"watch.json missing from backup: {names}" assert f"{tag_uuid}/tag.json" in names, f"tag.json for tag 1 missing from backup: {names}" assert f"{tag_uuid2}/tag.json" in names, f"tag.json for tag 2 missing from backup: {names}" # --- Wipe everything --- datastore.delete('all') client.get(url_for("tags.delete_all"), follow_redirects=True) assert uuid not in datastore.data['watching'], "Watch should be gone after delete" assert tag_uuid not in datastore.data['settings']['application']['tags'], "Tag 1 should be gone after delete" assert tag_uuid2 not in datastore.data['settings']['application']['tags'], "Tag 2 should be gone after delete" # --- Restore from the backup zip --- res = client.post( url_for("backups.restore.backups_restore_start"), data={ 'zip_file': (io.BytesIO(zip_data), 'backup.zip'), 'include_groups': 'y', 'include_groups_replace_existing': 'y', 'include_watches': 'y', 'include_watches_replace_existing': 'y', }, content_type='multipart/form-data', follow_redirects=True ) assert res.status_code == 200 # Wait for the thread to finish time.sleep(2) # --- Watch checks --- restored_watch = datastore.data['watching'].get(uuid) assert restored_watch is not None, f"Watch {uuid} not found after restore" assert restored_watch['url'] == watch_url, "Restored watch URL does not match" assert isinstance(restored_watch, Watch.model), \ f"Watch not properly rehydrated, got {type(restored_watch)}" assert restored_watch.history_n >= 1, \ f"Restored watch should have at least 1 history entry, got {restored_watch.history_n}" # --- Tag checks --- restored_tags = datastore.data['settings']['application']['tags'] restored_tag = restored_tags.get(tag_uuid) assert restored_tag is not None, f"Tag {tag_uuid} not found after restore" assert restored_tag['title'] == "Tasty backup tag", "Restored tag 1 title does not match" assert isinstance(restored_tag, Tag.model), \ f"Tag 1 not properly rehydrated, got {type(restored_tag)}" restored_tag2 = restored_tags.get(tag_uuid2) assert restored_tag2 is not None, f"Tag {tag_uuid2} not found after restore" assert restored_tag2['title'] == "Tasty backup tag number two", "Restored tag 2 title does not match" assert isinstance(restored_tag2, Tag.model), \ f"Tag 2 not properly rehydrated, got {type(restored_tag2)}" def test_backup_restore_zip_slip_rejected(client, live_server, measure_memory_usage, datastore_path): """Zip Slip path traversal entries in a restore zip must be rejected.""" import pytest from changedetectionio.blueprint.backups.restore import import_from_zip # Build a zip with a path traversal entry that would escape the extraction dir malicious_zip = io.BytesIO() with ZipFile(malicious_zip, 'w') as zf: zf.writestr("../escaped.txt", "ATTACKER-CONTROLLED") malicious_zip.seek(0) datastore = live_server.app.config['DATASTORE'] with pytest.raises(ValueError, match="Zip Slip"): import_from_zip( zip_stream=malicious_zip, datastore=datastore, include_groups=True, include_groups_replace=True, include_watches=True, include_watches_replace=True, ) def test_backup_restore_zip_bomb_rejected(client, live_server, measure_memory_usage, datastore_path): """A zip whose total uncompressed size exceeds the limit must be rejected. The guard reads file_size from the zip central-directory metadata — no actual decompression happens, so this test is fast and uses minimal RAM. 100 KB of zeros compresses to ~100 bytes; monkeypatching the limit to 50 KB is enough to trigger the check without creating any large files. """ import pytest import changedetectionio.blueprint.backups.restore as restore_mod from changedetectionio.blueprint.backups.restore import import_from_zip # ~100 KB of zeros → deflate compresses to ~100 bytes, but file_size metadata = 100 KB bomb_zip = io.BytesIO() with ZipFile(bomb_zip, 'w', compression=ZIP_DEFLATED) as zf: zf.writestr("data.txt", b"\x00" * (100 * 1024)) bomb_zip.seek(0) datastore = live_server.app.config['DATASTORE'] original_limit = restore_mod._MAX_DECOMPRESSED_BYTES try: restore_mod._MAX_DECOMPRESSED_BYTES = 50 * 1024 # 50 KB limit for this test with pytest.raises(ValueError, match="decompressed size"): import_from_zip( zip_stream=bomb_zip, datastore=datastore, include_groups=True, include_groups_replace=True, include_watches=True, include_watches_replace=True, ) finally: restore_mod._MAX_DECOMPRESSED_BYTES = original_limit