From 26ce7ec7bc2026ecc346f70eb2d42cf225e9ce8f Mon Sep 17 00:00:00 2001 From: Cam Mannett Date: Sat, 30 Mar 2024 09:18:03 +0000 Subject: [PATCH] Vulkan acceleration structure manager Manages the capturing, serialising, and replay of acceleration structures for Vulkan. It works in a similar way to how device memory is handled: * A temporary host-accessible buffer is created for each AS * The AS is serialised (in the Vulkan AS sense of the word) into it * The buffer handle is stored in the initial contents and downloaded when appropriate * Replay is handled similarly but in reverse * Workaround added for broken Mali AS serialising Note this is missing the descriptor and SPIR-V handling, that work will follow in a later PR. The serialiser version has been bumped for backwards compatibility with 'RenderDoc for Arm GPUs 2024.0'. --- renderdoc/driver/vulkan/CMakeLists.txt | 2 + .../driver/vulkan/renderdoc_vulkan.vcxproj | 2 + .../vulkan/renderdoc_vulkan.vcxproj.filters | 6 + .../vulkan/vk_acceleration_structure.cpp | 392 ++++++++++++++++++ .../driver/vulkan/vk_acceleration_structure.h | 59 +++ renderdoc/driver/vulkan/vk_common.cpp | 36 +- renderdoc/driver/vulkan/vk_common.h | 16 +- renderdoc/driver/vulkan/vk_core.cpp | 44 +- renderdoc/driver/vulkan/vk_core.h | 32 +- renderdoc/driver/vulkan/vk_initstate.cpp | 31 +- renderdoc/driver/vulkan/vk_manager.cpp | 10 +- renderdoc/driver/vulkan/vk_manager.h | 2 + renderdoc/driver/vulkan/vk_memory.cpp | 9 +- renderdoc/driver/vulkan/vk_replay.cpp | 6 +- renderdoc/driver/vulkan/vk_serialise.cpp | 19 +- .../driver/vulkan/wrappers/vk_cmd_funcs.cpp | 67 ++- .../vulkan/wrappers/vk_device_funcs.cpp | 43 ++ .../driver/vulkan/wrappers/vk_get_funcs.cpp | 11 + .../vulkan/wrappers/vk_resource_funcs.cpp | 50 ++- 19 files changed, 770 insertions(+), 67 deletions(-) create mode 100644 renderdoc/driver/vulkan/vk_acceleration_structure.cpp create mode 100644 renderdoc/driver/vulkan/vk_acceleration_structure.h diff --git a/renderdoc/driver/vulkan/CMakeLists.txt b/renderdoc/driver/vulkan/CMakeLists.txt index cb2594262..58511977c 100644 --- a/renderdoc/driver/vulkan/CMakeLists.txt +++ b/renderdoc/driver/vulkan/CMakeLists.txt @@ -40,6 +40,8 @@ set(sources vk_serialise.cpp vk_stringise.cpp vk_layer.cpp + vk_acceleration_structure.h + vk_acceleration_structure.cpp imagestate_tests.cpp imgrefs_tests.cpp official/vk_layer.h diff --git a/renderdoc/driver/vulkan/renderdoc_vulkan.vcxproj b/renderdoc/driver/vulkan/renderdoc_vulkan.vcxproj index 1dabd623d..5ee1574f7 100644 --- a/renderdoc/driver/vulkan/renderdoc_vulkan.vcxproj +++ b/renderdoc/driver/vulkan/renderdoc_vulkan.vcxproj @@ -155,6 +155,7 @@ true + @@ -186,6 +187,7 @@ + diff --git a/renderdoc/driver/vulkan/renderdoc_vulkan.vcxproj.filters b/renderdoc/driver/vulkan/renderdoc_vulkan.vcxproj.filters index 7abbafeab..7d1445b16 100644 --- a/renderdoc/driver/vulkan/renderdoc_vulkan.vcxproj.filters +++ b/renderdoc/driver/vulkan/renderdoc_vulkan.vcxproj.filters @@ -151,6 +151,9 @@ Replay + + Replay + @@ -159,6 +162,9 @@ Replay + + Replay + Util diff --git a/renderdoc/driver/vulkan/vk_acceleration_structure.cpp b/renderdoc/driver/vulkan/vk_acceleration_structure.cpp new file mode 100644 index 000000000..b1d7b056d --- /dev/null +++ b/renderdoc/driver/vulkan/vk_acceleration_structure.cpp @@ -0,0 +1,392 @@ +/****************************************************************************** + * The MIT License (MIT) + * + * Copyright (c) 2024 Baldur Karlsson + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + ******************************************************************************/ + +#include "vk_acceleration_structure.h" +#include "core/settings.h" +#include "vk_core.h" + +RDOC_EXTERN_CONFIG(bool, Vulkan_Debug_SingleSubmitFlushing); + +namespace +{ +// Although the serialised data is implementation-defined in general, the header is defined: +// https://registry.khronos.org/vulkan/specs/1.3-extensions/html/chap37.html#vkCmdCopyAccelerationStructureToMemoryKHR +constexpr std::size_t handleCountOffset = VK_UUID_SIZE + VK_UUID_SIZE + 8 + 8; +constexpr VkDeviceSize handleCountSize = 8; + +// Spec says VkCopyAccelerationStructureToMemoryInfoKHR::dst::deviceAddress must be 256 bytes aligned +constexpr VkDeviceSize asBufferAlignment = 256; +} + +bool VulkanAccelerationStructureManager::Prepare(VkAccelerationStructureKHR unwrappedAs, + const rdcarray &queueFamilyIndices, + ASMemory &result) +{ + const VkDeviceSize serialisedSize = SerialisedASSize(unwrappedAs); + + const VkDevice d = m_pDriver->GetDev(); + VkResult vkr = VK_SUCCESS; + + // since this happens during capture, we don't want to start serialising extra buffer creates, + // leave this buffer as unwrapped + VkBuffer dstBuf = VK_NULL_HANDLE; + + VkBufferCreateInfo bufInfo = { + VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + NULL, + 0, + serialisedSize, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + }; + + // we make the buffer concurrently accessible by all queue families to not invalidate the + // contents of the memory we're reading back from. + bufInfo.sharingMode = VK_SHARING_MODE_CONCURRENT; + bufInfo.queueFamilyIndexCount = (uint32_t)queueFamilyIndices.size(); + bufInfo.pQueueFamilyIndices = queueFamilyIndices.data(); + + // spec requires that CONCURRENT must specify more than one queue family. If there is only one + // queue family, we can safely use exclusive. + if(bufInfo.queueFamilyIndexCount == 1) + bufInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + + vkr = ObjDisp(d)->CreateBuffer(Unwrap(d), &bufInfo, NULL, &dstBuf); + m_pDriver->CheckVkResult(vkr); + + m_pDriver->AddPendingObjectCleanup( + [d, dstBuf]() { ObjDisp(d)->DestroyBuffer(Unwrap(d), dstBuf, NULL); }); + + VkMemoryRequirements mrq = {}; + ObjDisp(d)->GetBufferMemoryRequirements(Unwrap(d), dstBuf, &mrq); + + mrq.alignment = RDCMAX(mrq.alignment, asBufferAlignment); + + const MemoryAllocation readbackmem = m_pDriver->AllocateMemoryForResource( + true, mrq, MemoryScope::InitialContents, MemoryType::Readback); + if(readbackmem.mem == VK_NULL_HANDLE) + return false; + + vkr = ObjDisp(d)->BindBufferMemory(Unwrap(d), dstBuf, Unwrap(readbackmem.mem), readbackmem.offs); + m_pDriver->CheckVkResult(vkr); + + const VkBufferDeviceAddressInfo addrInfo = {VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, NULL, + dstBuf}; + const VkDeviceAddress dstBufAddr = ObjDisp(d)->GetBufferDeviceAddressKHR(Unwrap(d), &addrInfo); + + VkCommandBuffer cmd = m_pDriver->GetInitStateCmd(); + if(cmd == VK_NULL_HANDLE) + { + RDCERR("Couldn't acquire command buffer"); + return false; + } + + const VkDeviceSize nonCoherentAtomSize = m_pDriver->GetDeviceProps().limits.nonCoherentAtomSize; + byte *mappedDstBuffer = NULL; + VkDeviceSize size; + + if(m_pDriver->GetDriverInfo().MaliBrokenASDeviceSerialisation()) + { + size = AlignUp(serialisedSize, nonCoherentAtomSize); + + vkr = ObjDisp(d)->MapMemory(Unwrap(d), Unwrap(readbackmem.mem), readbackmem.offs, size, 0, + (void **)&mappedDstBuffer); + m_pDriver->CheckVkResult(vkr); + + // Copy the data using host-commands but into mapped memory + VkCopyAccelerationStructureToMemoryInfoKHR copyInfo = { + VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_TO_MEMORY_INFO_KHR, NULL}; + copyInfo.src = unwrappedAs; + copyInfo.dst.hostAddress = mappedDstBuffer; + copyInfo.mode = VK_COPY_ACCELERATION_STRUCTURE_MODE_SERIALIZE_KHR; + ObjDisp(d)->CopyAccelerationStructureToMemoryKHR(Unwrap(d), VK_NULL_HANDLE, ©Info); + } + else + { + VkCopyAccelerationStructureToMemoryInfoKHR copyInfo = { + VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_TO_MEMORY_INFO_KHR, NULL}; + copyInfo.src = unwrappedAs; + copyInfo.dst.deviceAddress = dstBufAddr; + copyInfo.mode = VK_COPY_ACCELERATION_STRUCTURE_MODE_SERIALIZE_KHR; + ObjDisp(d)->CmdCopyAccelerationStructureToMemoryKHR(Unwrap(cmd), ©Info); + + // It's not ideal but we have to flush here because we need to map the data in order to read + // the BLAS addresses which means we need to have ensured that it has been copied beforehand + m_pDriver->CloseInitStateCmd(); + m_pDriver->SubmitCmds(); + m_pDriver->FlushQ(); + + // Now serialised AS data has been copied to a readable buffer, we need to expose the data to + // the host + size = AlignUp(handleCountOffset + handleCountSize, nonCoherentAtomSize); + + vkr = ObjDisp(d)->MapMemory(Unwrap(d), Unwrap(readbackmem.mem), readbackmem.offs, size, 0, + (void **)&mappedDstBuffer); + m_pDriver->CheckVkResult(vkr); + } + + // invalidate the cpu cache for this memory range to avoid reading stale data + const VkMappedMemoryRange range = { + VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, NULL, Unwrap(readbackmem.mem), readbackmem.offs, size, + }; + vkr = ObjDisp(d)->InvalidateMappedMemoryRanges(Unwrap(d), 1, &range); + m_pDriver->CheckVkResult(vkr); + + // Count the BLAS device addresses to update the AS type + const uint64_t handleCount = *(uint64_t *)(mappedDstBuffer + handleCountOffset); + result = {readbackmem, true}; + result.isTLAS = handleCount > 0; + + ObjDisp(d)->UnmapMemory(Unwrap(d), Unwrap(result.alloc.mem)); + + return true; +} + +template +bool VulkanAccelerationStructureManager::Serialise(SerialiserType &ser, ResourceId id, + const VkInitialContents *initial, + CaptureState state) +{ + VkDevice d = !IsStructuredExporting(state) ? m_pDriver->GetDev() : VK_NULL_HANDLE; + const bool replayingAndReading = ser.IsReading() && IsReplayMode(state); + VkResult vkr = VK_SUCCESS; + + byte *contents = NULL; + uint64_t contentsSize = initial ? initial->mem.size : 0; + MemoryAllocation mappedMem; + + // Serialise this separately so that it can be used on reading to prepare the upload memory + SERIALISE_ELEMENT(contentsSize); + + const VkDeviceSize nonCoherentAtomSize = m_pDriver->GetDeviceProps().limits.nonCoherentAtomSize; + + // the memory/buffer that we allocated on read, to upload the initial contents. + MemoryAllocation uploadMemory; + VkBuffer uploadBuf = VK_NULL_HANDLE; + + if(ser.IsWriting()) + { + if(initial && initial->mem.mem != VK_NULL_HANDLE) + { + const VkDeviceSize size = AlignUp(initial->mem.size, nonCoherentAtomSize); + + mappedMem = initial->mem; + vkr = ObjDisp(d)->MapMemory(Unwrap(d), Unwrap(mappedMem.mem), initial->mem.offs, size, 0, + (void **)&contents); + m_pDriver->CheckVkResult(vkr); + + // invalidate the cpu cache for this memory range to avoid reading stale data + const VkMappedMemoryRange range = { + VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, NULL, Unwrap(mappedMem.mem), mappedMem.offs, size, + }; + + vkr = ObjDisp(d)->InvalidateMappedMemoryRanges(Unwrap(d), 1, &range); + m_pDriver->CheckVkResult(vkr); + } + } + else if(IsReplayMode(state) && !ser.IsErrored()) + { + // create a buffer with memory attached, which we will fill with the initial contents + const VkBufferCreateInfo bufInfo = { + VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + NULL, + 0, + contentsSize, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + }; + + vkr = m_pDriver->vkCreateBuffer(d, &bufInfo, NULL, &uploadBuf); + m_pDriver->CheckVkResult(vkr); + + VkMemoryRequirements mrq = {}; + m_pDriver->vkGetBufferMemoryRequirements(d, uploadBuf, &mrq); + + mrq.alignment = RDCMAX(mrq.alignment, asBufferAlignment); + + uploadMemory = m_pDriver->AllocateMemoryForResource(true, mrq, MemoryScope::InitialContents, + MemoryType::Upload); + + if(uploadMemory.mem == VK_NULL_HANDLE) + return false; + + vkr = m_pDriver->vkBindBufferMemory(d, uploadBuf, uploadMemory.mem, uploadMemory.offs); + m_pDriver->CheckVkResult(vkr); + + mappedMem = uploadMemory; + + vkr = ObjDisp(d)->MapMemory(Unwrap(d), Unwrap(mappedMem.mem), mappedMem.offs, + AlignUp(mappedMem.size, nonCoherentAtomSize), 0, (void **)&contents); + m_pDriver->CheckVkResult(vkr); + + if(!contents) + { + RDCERR("Manually reporting failed memory map"); + m_pDriver->CheckVkResult(VK_ERROR_MEMORY_MAP_FAILED); + return false; + } + + if(vkr != VK_SUCCESS) + return false; + } + + // not using SERIALISE_ELEMENT_ARRAY so we can deliberately avoid allocation - we serialise + // directly into upload memory + ser.Serialise("Serialised AS"_lit, contents, contentsSize, SerialiserFlags::NoFlags).Important(); + + // unmap the resource we mapped before - we need to do this on read and on write. + bool isTLAS = false; + if(!IsStructuredExporting(state) && mappedMem.mem != VK_NULL_HANDLE) + { + if(replayingAndReading) + { + // first ensure we flush the writes from the cpu to gpu memory + const VkMappedMemoryRange range = { + VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, NULL, Unwrap(mappedMem.mem), mappedMem.offs, + AlignUp(mappedMem.size, nonCoherentAtomSize), + }; + + vkr = ObjDisp(d)->FlushMappedMemoryRanges(Unwrap(d), 1, &range); + m_pDriver->CheckVkResult(vkr); + + // Read the AS's BLAS handle count to determine if it's top or bottom level + isTLAS = *((uint64_t *)(contents + handleCountOffset)) > 0; + } + + ObjDisp(d)->UnmapMemory(Unwrap(d), Unwrap(mappedMem.mem)); + } + + SERIALISE_CHECK_READ_ERRORS(); + + if(IsReplayMode(state) && contentsSize > 0) + { + VkInitialContents initialContents(eResAccelerationStructureKHR, uploadMemory); + initialContents.isTLAS = isTLAS; + initialContents.buf = uploadBuf; + + m_pDriver->GetResourceManager()->SetInitialContents(id, initialContents); + } + + return true; +} + +template bool VulkanAccelerationStructureManager::Serialise(ReadSerialiser &ser, ResourceId id, + const VkInitialContents *initial, + CaptureState state); +template bool VulkanAccelerationStructureManager::Serialise(WriteSerialiser &ser, ResourceId id, + const VkInitialContents *initial, + CaptureState state); + +void VulkanAccelerationStructureManager::Apply(ResourceId id, const VkInitialContents &initial) +{ + VkCommandBuffer cmd = m_pDriver->GetInitStateCmd(); + if(cmd == VK_NULL_HANDLE) + { + RDCERR("Couldn't acquire command buffer"); + return; + } + + const VkAccelerationStructureKHR unwrappedAs = + Unwrap(m_pDriver->GetResourceManager()->GetCurrentHandle(id)); + const VkDevice d = m_pDriver->GetDev(); + + VkMarkerRegion::Begin(StringFormat::Fmt("Initial state for %s", ToStr(id).c_str()), cmd); + + if(m_pDriver->GetDriverInfo().MaliBrokenASDeviceSerialisation()) + { + const VkDeviceSize size = + AlignUp(initial.mem.size, m_pDriver->GetDeviceProps().limits.nonCoherentAtomSize); + + // Copy the data using host-commands but from mapped memory + byte *mappedSrcBuffer = NULL; + VkResult vkr = ObjDisp(d)->MapMemory(Unwrap(d), Unwrap(initial.mem.mem), initial.mem.offs, size, + 0, (void **)&mappedSrcBuffer); + m_pDriver->CheckVkResult(vkr); + + VkCopyMemoryToAccelerationStructureInfoKHR copyInfo = { + VK_STRUCTURE_TYPE_COPY_MEMORY_TO_ACCELERATION_STRUCTURE_INFO_KHR}; + copyInfo.src.hostAddress = mappedSrcBuffer; + copyInfo.dst = unwrappedAs; + copyInfo.mode = VK_COPY_ACCELERATION_STRUCTURE_MODE_DESERIALIZE_KHR; + ObjDisp(d)->CopyMemoryToAccelerationStructureKHR(Unwrap(d), VK_NULL_HANDLE, ©Info); + } + else + { + const VkBufferDeviceAddressInfo addrInfo = {VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, NULL, + Unwrap(initial.buf)}; + const VkDeviceAddress uploadBufAddr = ObjDisp(d)->GetBufferDeviceAddressKHR(Unwrap(d), &addrInfo); + + VkCopyMemoryToAccelerationStructureInfoKHR copyInfo = { + VK_STRUCTURE_TYPE_COPY_MEMORY_TO_ACCELERATION_STRUCTURE_INFO_KHR}; + copyInfo.src.deviceAddress = uploadBufAddr; + copyInfo.dst = unwrappedAs; + copyInfo.mode = VK_COPY_ACCELERATION_STRUCTURE_MODE_DESERIALIZE_KHR; + ObjDisp(d)->CmdCopyMemoryToAccelerationStructureKHR(Unwrap(cmd), ©Info); + } + + VkMarkerRegion::End(cmd); + + if(Vulkan_Debug_SingleSubmitFlushing()) + { + m_pDriver->CloseInitStateCmd(); + m_pDriver->SubmitCmds(); + m_pDriver->FlushQ(); + } +} + +VkDeviceSize VulkanAccelerationStructureManager::SerialisedASSize(VkAccelerationStructureKHR as) +{ + VkDevice d = m_pDriver->GetDev(); + + // Create query pool + VkQueryPoolCreateInfo info = {VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO}; + info.queryCount = 1; + info.queryType = VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR; + + VkQueryPool pool; + VkResult vkr = ObjDisp(d)->CreateQueryPool(Unwrap(d), &info, NULL, &pool); + m_pDriver->CheckVkResult(vkr); + + // Reset query pool + VkCommandBuffer cmd = m_pDriver->GetInitStateCmd(); + ObjDisp(d)->CmdResetQueryPool(Unwrap(cmd), pool, 0, 1); + + // Get the size + ObjDisp(d)->CmdWriteAccelerationStructuresPropertiesKHR( + Unwrap(cmd), 1, &as, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, pool, 0); + + m_pDriver->CloseInitStateCmd(); + m_pDriver->SubmitCmds(); + m_pDriver->FlushQ(); + + VkDeviceSize size = 0; + vkr = ObjDisp(d)->GetQueryPoolResults(Unwrap(d), pool, 0, 1, sizeof(VkDeviceSize), &size, + sizeof(VkDeviceSize), VK_QUERY_RESULT_WAIT_BIT); + m_pDriver->CheckVkResult(vkr); + + // Clean up + ObjDisp(d)->DestroyQueryPool(Unwrap(d), pool, NULL); + + return size; +} diff --git a/renderdoc/driver/vulkan/vk_acceleration_structure.h b/renderdoc/driver/vulkan/vk_acceleration_structure.h new file mode 100644 index 000000000..4aa8d5115 --- /dev/null +++ b/renderdoc/driver/vulkan/vk_acceleration_structure.h @@ -0,0 +1,59 @@ +/****************************************************************************** + * The MIT License (MIT) + * + * Copyright (c) 2024 Baldur Karlsson + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + ******************************************************************************/ + +#pragma once + +#include "vk_manager.h" + +class WrappedVulkan; + +class VulkanAccelerationStructureManager +{ +public: + struct ASMemory + { + MemoryAllocation alloc; + bool isTLAS; + }; + + VulkanAccelerationStructureManager(WrappedVulkan *driver) : m_pDriver(driver) {} + + // Called when the initial state is prepared. Any TLAS and BLAS data is copied into temporary + // buffers and the handles for that memory and the buffers is stored in the init state + bool Prepare(VkAccelerationStructureKHR unwrappedAs, const rdcarray &queueFamilyIndices, + ASMemory &result); + + template + bool Serialise(SerialiserType &ser, ResourceId id, const VkInitialContents *initial, + CaptureState state); + + // Called when the initial state is applied. The AS data is deserialised from the upload buffer + // into the acceleration structure + void Apply(ResourceId id, const VkInitialContents &initial); + +private: + VkDeviceSize SerialisedASSize(VkAccelerationStructureKHR as); + + WrappedVulkan *m_pDriver; +}; diff --git a/renderdoc/driver/vulkan/vk_common.cpp b/renderdoc/driver/vulkan/vk_common.cpp index 2b2c7896e..5d5bebca8 100644 --- a/renderdoc/driver/vulkan/vk_common.cpp +++ b/renderdoc/driver/vulkan/vk_common.cpp @@ -362,6 +362,10 @@ bool VkInitParams::IsSupportedVersion(uint64_t ver) if(ver == CurrentVersion) return true; + // 0x15 -> 0x16 - added support for acceleration structures + if(ver == 0x15) + return true; + // 0x14 -> 0x15 - added support for mutable descriptors if(ver == 0x14) return true; @@ -1154,6 +1158,19 @@ VkDriverInfo::VkDriverInfo(const VkPhysicalDeviceProperties &physProps, qualcommLeakingUBOOffsets = true; } } + + if(driverProps.driverID == VK_DRIVER_ID_ARM_PROPRIETARY) + { + if(Major() >= 36 && Major() < 43) + { + if(active) + RDCLOG( + "Using host acceleration structure deserialisation commands on Mali - update to a " + "newer " + "driver for fix"); + maliBrokenASDeviceSerialisation = true; + } + } } FrameRefType GetRefType(DescriptorSlotType descType) @@ -1168,7 +1185,8 @@ FrameRefType GetRefType(DescriptorSlotType descType) case DescriptorSlotType::UniformBuffer: case DescriptorSlotType::UniformBufferDynamic: case DescriptorSlotType::InputAttachment: - case DescriptorSlotType::InlineBlock: return eFrameRef_Read; + case DescriptorSlotType::InlineBlock: + case DescriptorSlotType::AccelerationStructure: return eFrameRef_Read; case DescriptorSlotType::StorageImage: case DescriptorSlotType::StorageTexelBuffer: case DescriptorSlotType::StorageBuffer: @@ -1207,6 +1225,13 @@ void DescriptorSetSlot::SetTexelBuffer(VkDescriptorType writeType, ResourceId id resource = id; } +void DescriptorSetSlot::SetAccelerationStructure(VkDescriptorType writeType, + VkAccelerationStructureKHR accelerationStructure) +{ + type = convert(writeType); + resource = GetResID(accelerationStructure); +} + void AddBindFrameRef(DescriptorBindRefs &refs, ResourceId id, FrameRefType ref) { if(id == ResourceId()) @@ -1263,7 +1288,7 @@ void DescriptorSetSlot::AccumulateBindRefs(DescriptorBindRefs &refs, VulkanResou RDCCOMPILE_ASSERT(offsetof(DescriptorSetSlot, offset) == 8, "DescriptorSetSlot first uint64_t bitpacking isn't working as expected"); - VkResourceRecord *bufView = NULL, *imgView = NULL, *buffer = NULL; + VkResourceRecord *bufView = NULL, *imgView = NULL, *buffer = NULL, *accStruct = NULL; switch(type) { @@ -1277,6 +1302,9 @@ void DescriptorSetSlot::AccumulateBindRefs(DescriptorBindRefs &refs, VulkanResou case DescriptorSlotType::SampledImage: case DescriptorSlotType::StorageImage: case DescriptorSlotType::InputAttachment: imgView = rm->GetResourceRecord(resource); break; + case DescriptorSlotType::AccelerationStructure: + accStruct = rm->GetResourceRecord(resource); + break; default: break; } @@ -1312,6 +1340,10 @@ void DescriptorSetSlot::AccumulateBindRefs(DescriptorBindRefs &refs, VulkanResou if(buffer->storable) refs.storableRefs.insert(buffer); } + if(accStruct) + { + AddBindFrameRef(refs, resource, eFrameRef_Read); + } } #if ENABLED(ENABLE_UNIT_TESTS) diff --git a/renderdoc/driver/vulkan/vk_common.h b/renderdoc/driver/vulkan/vk_common.h index a3e16e7a1..4c2d9d321 100644 --- a/renderdoc/driver/vulkan/vk_common.h +++ b/renderdoc/driver/vulkan/vk_common.h @@ -305,6 +305,9 @@ public: // If we do have a pipeline to bind, we should never be perturbing dynamic state in between static // pipeline binds. bool NVStaticPipelineRebindStates() const { return nvidiaStaticPipelineRebindStates; } + // On Mali there are some known issues regarding acceleration structure serialisation to device + // memory, for the affected driver versions we switch to the host command variants + bool MaliBrokenASDeviceSerialisation() const { return maliBrokenASDeviceSerialisation; } private: GPUVendor m_Vendor; @@ -320,6 +323,7 @@ private: bool qualcommLineWidthCrash = false; bool intelBrokenOcclusionQueries = false; bool nvidiaStaticPipelineRebindStates = false; + bool maliBrokenASDeviceSerialisation = false; }; enum @@ -527,6 +531,7 @@ enum class DescriptorSlotType : EnumBaseType StorageBufferDynamic, InputAttachment, InlineBlock, + AccelerationStructure, Count, }; @@ -550,7 +555,9 @@ constexpr VkDescriptorType convert(DescriptorSlotType type) ? VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC : type == DescriptorSlotType::InputAttachment ? VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT : type == DescriptorSlotType::InlineBlock ? VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK - : VK_DESCRIPTOR_TYPE_MAX_ENUM; + : type == DescriptorSlotType::AccelerationStructure + ? VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR + : VK_DESCRIPTOR_TYPE_MAX_ENUM; } constexpr DescriptorSlotType convert(VkDescriptorType type) @@ -682,6 +689,8 @@ struct DescriptorSetSlot void SetBuffer(VkDescriptorType writeType, const VkDescriptorBufferInfo &bufInfo); void SetImage(VkDescriptorType writeType, const VkDescriptorImageInfo &imInfo, bool useSampler); void SetTexelBuffer(VkDescriptorType writeType, ResourceId id); + void SetAccelerationStructure(VkDescriptorType writeType, + VkAccelerationStructureKHR accelerationStructure); // 48-bit truncated VK_WHOLE_SIZE static const VkDeviceSize WholeSizeRange = 0xFFFFFFFFFFFF; @@ -714,8 +723,8 @@ struct DescriptorSetSlot // as a different type and the resource ID is partly trampled. Since these are disjoint we know // that even if they're stale they're valid IDs. - // main contents: buffer, image, texel buffer view. NOT the sampler for sampler-only descriptors, - // just to avoid confusion + // main contents: buffer, image, texel buffer view, or acceleration structure. NOT the sampler for + // sampler-only descriptors, just to avoid confusion ResourceId resource; // sampler for sampler-only descriptors, or sampler for combined image-sampler descriptors ResourceId sampler; @@ -1977,7 +1986,6 @@ DECLARE_DESERIALISE_TYPE(VkWriteDescriptorSetInlineUniformBlock); // plain structs with no next chain DECLARE_REFLECTION_STRUCT(VkAabbPositionsKHR); DECLARE_REFLECTION_STRUCT(VkAccelerationStructureBuildRangeInfoKHR); -DECLARE_REFLECTION_STRUCT(VkAccelerationStructureGeometryDataKHR); DECLARE_REFLECTION_STRUCT(VkAccelerationStructureInstanceKHR); DECLARE_REFLECTION_STRUCT(VkAllocationCallbacks); DECLARE_REFLECTION_STRUCT(VkAttachmentDescription); diff --git a/renderdoc/driver/vulkan/vk_core.cpp b/renderdoc/driver/vulkan/vk_core.cpp index 6b25e99b0..38ee9c775 100644 --- a/renderdoc/driver/vulkan/vk_core.cpp +++ b/renderdoc/driver/vulkan/vk_core.cpp @@ -147,6 +147,7 @@ WrappedVulkan::WrappedVulkan() m_SetDeviceLoaderData = NULL; m_ResourceManager = new VulkanResourceManager(m_State, this); + m_ASManager = new VulkanAccelerationStructureManager(this); m_Instance = VK_NULL_HANDLE; m_PhysicalDevice = VK_NULL_HANDLE; @@ -191,6 +192,8 @@ WrappedVulkan::~WrappedVulkan() m_ResourceManager->ClearWithoutReleasing(); SAFE_DELETE(m_ResourceManager); + SAFE_DELETE(m_ASManager); + SAFE_DELETE(m_FrameReader); for(size_t i = 0; i < m_ThreadSerialisers.size(); i++) @@ -1948,6 +1951,35 @@ VkResult WrappedVulkan::FilterDeviceExtensionProperties(VkPhysicalDevice physDev return true; } + if(!strcmp(ext.extensionName, VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME)) + { + // require GPDP2 + if(instDevInfo->ext_KHR_get_physical_device_properties2) + { + VkPhysicalDeviceAccelerationStructureFeaturesKHR accStruct = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR}; + VkPhysicalDeviceFeatures2 base = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2}; + base.pNext = &accStruct; + ObjDisp(physDev)->GetPhysicalDeviceFeatures2(Unwrap(physDev), &base); + + if(accStruct.accelerationStructureCaptureReplay) + { + // supported, don't remove + return false; + } + else if(!filterWarned) + { + RDCWARN( + "VkPhysicalDeviceAccelerationStructureFeaturesKHR." + "accelerationStructureCaptureReplay " + "is false, can't support capture of VK_KHR_acceleration_structure"); + } + } + + // if it wasn't supported, remove the extension + return true; + } + // not an extension with conditional support, don't remove return false; }); @@ -2191,7 +2223,7 @@ void WrappedVulkan::StartFrameCapture(DeviceOwnedWindow devWnd) // reference the buffer GetResourceManager()->MarkResourceFrameReferenced((*it)->GetResourceID(), eFrameRef_Read); // and its backing memory - GetResourceManager()->MarkMemoryFrameReferenced((*it)->baseResource, (*it)->memOffset, + GetResourceManager()->MarkMemoryFrameReferenced((*it)->baseResourceMem, (*it)->memOffset, (*it)->memSize, eFrameRef_ReadBeforeWrite); } } @@ -2464,10 +2496,13 @@ bool WrappedVulkan::EndFrameCapture(DeviceOwnedWindow devWnd) SubmitAndFlushExtQueue(swapQueueIndex); } + const VkDeviceSize alignedSize = + AlignUp(readbackMem.size, GetDeviceProps().limits.nonCoherentAtomSize); + // map memory and readback byte *pData = NULL; - vkr = vt->MapMemory(Unwrap(device), Unwrap(readbackMem.mem), readbackMem.offs, readbackMem.size, - 0, (void **)&pData); + vkr = vt->MapMemory(Unwrap(device), Unwrap(readbackMem.mem), readbackMem.offs, alignedSize, 0, + (void **)&pData); CheckVkResult(vkr); RDCASSERT(pData != NULL); @@ -2480,7 +2515,7 @@ bool WrappedVulkan::EndFrameCapture(DeviceOwnedWindow devWnd) NULL, Unwrap(readbackMem.mem), readbackMem.offs, - readbackMem.size, + alignedSize, }; vkr = vt->InvalidateMappedMemoryRanges(Unwrap(device), 1, &range); @@ -5188,6 +5223,7 @@ void WrappedVulkan::AddUsage(VulkanActionTreeNode &actionNode, rdcarray m_CleanupEvents; @@ -1122,6 +1120,7 @@ public: VulkanResourceManager *GetResourceManager() { return m_ResourceManager; } VulkanDebugManager *GetDebugManager() { return m_DebugManager; } VulkanShaderCache *GetShaderCache() { return m_ShaderCache; } + VulkanAccelerationStructureManager *GetAccelerationStructureManager() { return m_ASManager; } CaptureState GetState() { return m_State; } VulkanReplay *GetReplay() { return m_Replay; } // replay interface @@ -1177,8 +1176,12 @@ public: uint32_t GetUploadMemoryIndex(uint32_t resourceCompatibleBitmask); uint32_t GetGPULocalMemoryIndex(uint32_t resourceCompatibleBitmask); + // Low-level implementation, always prefer the two below + MemoryAllocation AllocateMemoryForResource(bool buffer, VkMemoryRequirements mrq, + MemoryScope scope, MemoryType type); MemoryAllocation AllocateMemoryForResource(VkImage im, MemoryScope scope, MemoryType type); MemoryAllocation AllocateMemoryForResource(VkBuffer buf, MemoryScope scope, MemoryType type); + void FreeMemoryAllocation(MemoryAllocation alloc); void ChooseMemoryIndices(); @@ -1277,6 +1280,7 @@ public: bool TaskShaders() const { return m_TaskShaders; } bool MeshShaders() const { return m_MeshShaders; } bool ListRestart() const { return m_ListRestart; } + bool AccelerationStructures() const { return m_AccelerationStructures; } VulkanRenderState &GetRenderState() { return m_RenderState; } void SetActionCB(VulkanActionCallback *cb) { m_ActionCallback = cb; } void SetSubmitChain(void *submitChain) { m_SubmitChain = submitChain; } @@ -2827,14 +2831,6 @@ public: void, vkCmdBuildAccelerationStructuresKHR, VkCommandBuffer commandBuffer, uint32_t infoCount, const VkAccelerationStructureBuildGeometryInfoKHR *pInfos, const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos); - VkResult vkCopyAccelerationStructureKHR(VkDevice device, VkDeferredOperationKHR deferredOperation, - const VkCopyAccelerationStructureInfoKHR *pInfo); - VkResult vkCopyAccelerationStructureToMemoryKHR( - VkDevice device, VkDeferredOperationKHR deferredOperation, - const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo); - VkResult vkCopyMemoryToAccelerationStructureKHR( - VkDevice device, VkDeferredOperationKHR deferredOperation, - const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo); IMPLEMENT_FUNCTION_SERIALISED(void, vkCmdCopyAccelerationStructureKHR, VkCommandBuffer commandBuffer, const VkCopyAccelerationStructureInfoKHR *pInfo); @@ -2848,6 +2844,14 @@ public: VkCommandBuffer commandBuffer, uint32_t accelerationStructureCount, const VkAccelerationStructureKHR *pAccelerationStructures, VkQueryType queryType, VkQueryPool queryPool, uint32_t firstQuery); + VkResult vkCopyAccelerationStructureKHR(VkDevice device, VkDeferredOperationKHR deferredOperation, + const VkCopyAccelerationStructureInfoKHR *pInfo); + VkResult vkCopyAccelerationStructureToMemoryKHR( + VkDevice device, VkDeferredOperationKHR deferredOperation, + const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo); + VkResult vkCopyMemoryToAccelerationStructureKHR( + VkDevice device, VkDeferredOperationKHR deferredOperation, + const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo); IMPLEMENT_FUNCTION_SERIALISED(VkResult, vkCreateAccelerationStructureKHR, VkDevice device, const VkAccelerationStructureCreateInfoKHR *pCreateInfo, const VkAllocationCallbacks *, diff --git a/renderdoc/driver/vulkan/vk_initstate.cpp b/renderdoc/driver/vulkan/vk_initstate.cpp index 731c988df..82b2115f9 100644 --- a/renderdoc/driver/vulkan/vk_initstate.cpp +++ b/renderdoc/driver/vulkan/vk_initstate.cpp @@ -571,6 +571,26 @@ bool WrappedVulkan::Prepare_InitialState(WrappedVkRes *res) return true; } + else if(type == eResAccelerationStructureKHR) + { + VulkanAccelerationStructureManager::ASMemory result; + VkAccelerationStructureKHR as = ToUnwrappedHandle(res); + if(!GetAccelerationStructureManager()->Prepare(as, m_QueueFamilyIndices, result)) + { + SET_ERROR_RESULT(m_LastCaptureError, ResultCode::OutOfMemory, + "Couldn't allocate readback memory"); + m_CaptureFailure = true; + return false; + } + + VkInitialContents ic = VkInitialContents(type, result.alloc); + ic.isTLAS = result.isTLAS; + + GetResourceManager()->SetInitialContents(id, ic); + m_PreparedNotSerialisedInitStates.push_back(id); + + return true; + } else { RDCERR("Unhandled resource type %d", type); @@ -608,7 +628,8 @@ uint64_t WrappedVulkan::GetSize_InitialState(ResourceId id, const VkInitialConte // buffers only have initial states when they're sparse return ret; } - else if(initial.type == eResImage || initial.type == eResDeviceMemory) + else if(initial.type == eResImage || initial.type == eResDeviceMemory || + initial.type == eResAccelerationStructureKHR) { // the size primarily comes from the buffer, the size of which we conveniently have stored. return ret + uint64_t(128 + initial.mem.size + WriteSerialiser::GetChunkAlignment()); @@ -1637,6 +1658,10 @@ bool WrappedVulkan::Serialise_InitialState(SerialiserType &ser, ResourceId id, V } } } + else if(type == eResAccelerationStructureKHR) + { + ret = GetAccelerationStructureManager()->Serialise(ser, id, initial, m_State); + } else { RDCERR("Unhandled resource type %s", ToStr(type).c_str()); @@ -2312,6 +2337,10 @@ void WrappedVulkan::Apply_InitialState(WrappedVkRes *live, const VkInitialConten FlushQ(); } } + else if(type == eResAccelerationStructureKHR) + { + GetAccelerationStructureManager()->Apply(id, initial); + } else { RDCERR("Unhandled resource type %d", type); diff --git a/renderdoc/driver/vulkan/vk_manager.cpp b/renderdoc/driver/vulkan/vk_manager.cpp index 72c6b9b17..cf0791125 100644 --- a/renderdoc/driver/vulkan/vk_manager.cpp +++ b/renderdoc/driver/vulkan/vk_manager.cpp @@ -1038,7 +1038,15 @@ rdcarray VulkanResourceManager::InitialContentResources() rdcarray resources = ResourceManager::InitialContentResources(); std::sort(resources.begin(), resources.end(), [this](ResourceId a, ResourceId b) { - return m_InitialContents[a].data.type < m_InitialContents[b].data.type; + const InitialContentData &aData = m_InitialContents[a].data; + const InitialContentData &bData = m_InitialContents[b].data; + + // Always sort BLASs before TLASs, as a TLAS holds device addresses for it's BLASs + // and we make sure those addresses are valid + if(!aData.isTLAS && bData.isTLAS) + return true; + + return aData.type < bData.type; }); return resources; } diff --git a/renderdoc/driver/vulkan/vk_manager.h b/renderdoc/driver/vulkan/vk_manager.h index 9a39fd8a8..441720685 100644 --- a/renderdoc/driver/vulkan/vk_manager.h +++ b/renderdoc/driver/vulkan/vk_manager.h @@ -133,6 +133,8 @@ struct VkInitialContents // sparse bind. Similar to the descriptors above rdcarray *sparseTables; SparseBinding *sparseBind; + + bool isTLAS; // If the contents are an AS, this determines if it is a TLAS or BLAS }; struct VulkanResourceManagerConfiguration diff --git a/renderdoc/driver/vulkan/vk_memory.cpp b/renderdoc/driver/vulkan/vk_memory.cpp index be97f64d3..9fac1ddd0 100644 --- a/renderdoc/driver/vulkan/vk_memory.cpp +++ b/renderdoc/driver/vulkan/vk_memory.cpp @@ -319,9 +319,16 @@ MemoryAllocation WrappedVulkan::AllocateMemoryForResource(bool buffer, VkMemoryR break; } + // if ray tracing acceleration structures are in use, then allocate memory with buffer device + // address support enabled + VkMemoryAllocateFlagsInfo flagsInfo = { + VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, + NULL, + VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT, + }; VkMemoryAllocateInfo info = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - NULL, + AccelerationStructures() ? &flagsInfo : NULL, allocSize * 1024 * 1024, memoryTypeIndex, }; diff --git a/renderdoc/driver/vulkan/vk_replay.cpp b/renderdoc/driver/vulkan/vk_replay.cpp index 15c91413b..5795b1ec4 100644 --- a/renderdoc/driver/vulkan/vk_replay.cpp +++ b/renderdoc/driver/vulkan/vk_replay.cpp @@ -2121,6 +2121,9 @@ void VulkanReplay::SavePipelineState(uint32_t eventId) dstel.type = BindType::InputAttachment; break; case DescriptorSlotType::InlineBlock: dstel.type = BindType::ConstantBuffer; break; + case DescriptorSlotType::AccelerationStructure: + dstel.type = BindType::ReadWriteBuffer; + break; case DescriptorSlotType::Unwritten: case DescriptorSlotType::Count: dstel.type = BindType::Unknown; break; } @@ -2272,7 +2275,8 @@ void VulkanReplay::SavePipelineState(uint32_t eventId) else if(descriptorType == DescriptorSlotType::StorageBuffer || descriptorType == DescriptorSlotType::StorageBufferDynamic || descriptorType == DescriptorSlotType::UniformBuffer || - descriptorType == DescriptorSlotType::UniformBufferDynamic) + descriptorType == DescriptorSlotType::UniformBufferDynamic || + descriptorType == DescriptorSlotType::AccelerationStructure) { destSlots.binds[a].viewResourceId = ResourceId(); diff --git a/renderdoc/driver/vulkan/vk_serialise.cpp b/renderdoc/driver/vulkan/vk_serialise.cpp index 96b6c2b6f..a6dbf9642 100644 --- a/renderdoc/driver/vulkan/vk_serialise.cpp +++ b/renderdoc/driver/vulkan/vk_serialise.cpp @@ -11866,8 +11866,16 @@ void DoSerialise(SerialiserType &ser, VkAccelerationStructureGeometryKHR &el) RDCASSERT(ser.IsReading() || el.sType == VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR); SerialiseNext(ser, el.sType, el.pNext); + // el.geometry is a union so we need to determine which member it is supposed to be before + // serialising further SERIALISE_MEMBER(geometryType).Important(); - SERIALISE_MEMBER(geometry); + if(el.geometryType == VK_GEOMETRY_TYPE_TRIANGLES_KHR) + ser.Serialise("geometry.triangles"_lit, el.geometry.triangles); + else if(el.geometryType == VK_GEOMETRY_TYPE_AABBS_KHR) + ser.Serialise("geometry.aabbs"_lit, el.geometry.aabbs); + else + ser.Serialise("geometry.instances"_lit, el.geometry.instances); + SERIALISE_MEMBER_VKFLAGS(VkGeometryFlagsKHR, flags); } @@ -12067,14 +12075,6 @@ void Deserialise(const VkWriteDescriptorSetAccelerationStructureKHR &el) delete[] el.pAccelerationStructures; } -template -void DoSerialise(SerialiserType &ser, VkAccelerationStructureGeometryDataKHR &el) -{ - SERIALISE_MEMBER(triangles); - SERIALISE_MEMBER(aabbs); - SERIALISE_MEMBER(instances); -} - template void DoSerialise(SerialiserType &ser, VkDeviceOrHostAddressConstKHR &el) { @@ -12537,7 +12537,6 @@ INSTANTIATE_SERIALISE_TYPE(VkWriteDescriptorSetAccelerationStructureKHR); // plain structs with no next chain INSTANTIATE_SERIALISE_TYPE(VkAabbPositionsKHR); INSTANTIATE_SERIALISE_TYPE(VkAccelerationStructureBuildRangeInfoKHR); -INSTANTIATE_SERIALISE_TYPE(VkAccelerationStructureGeometryDataKHR); INSTANTIATE_SERIALISE_TYPE(VkAccelerationStructureInstanceKHR); INSTANTIATE_SERIALISE_TYPE(VkAllocationCallbacks); INSTANTIATE_SERIALISE_TYPE(VkAttachmentDescription); diff --git a/renderdoc/driver/vulkan/wrappers/vk_cmd_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_cmd_funcs.cpp index ae1733e47..50abf8bff 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_cmd_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_cmd_funcs.cpp @@ -7633,6 +7633,16 @@ bool WrappedVulkan::Serialise_vkCmdBuildAccelerationStructuresIndirectKHR( for(uint32_t i = 0; i < infoCount; ++i) tmpMaxPrimitiveCounts[i] = maxPrimitives[i].data(); + m_LastCmdBufferID = GetResourceManager()->GetOriginalID(GetResID(commandBuffer)); + + if(IsActiveReplaying(m_State)) + { + if(InRerecordRange(m_LastCmdBufferID)) + commandBuffer = RerecordCmdBuf(m_LastCmdBufferID); + else + return true; + } + ObjDisp(commandBuffer) ->CmdBuildAccelerationStructuresIndirectKHR(Unwrap(commandBuffer), infoCount, unwrappedInfos, pIndirectDeviceAddresses, @@ -7732,10 +7742,21 @@ bool WrappedVulkan::Serialise_vkCmdBuildAccelerationStructuresKHR( unwrappedInfos[i] = *UnwrapStructAndChain(m_State, memory, &pInfos[i]); // Convert the rangeInfos back to a C-style array-of-arrays - rdcarray tmpBuildRangeInfos(nullptr, infoCount); + rdcarray tmpBuildRangeInfos; + tmpBuildRangeInfos.resize(infoCount); for(uint32_t i = 0; i < infoCount; ++i) tmpBuildRangeInfos[i] = rangeInfos[i].data(); + m_LastCmdBufferID = GetResourceManager()->GetOriginalID(GetResID(commandBuffer)); + + if(IsActiveReplaying(m_State)) + { + if(InRerecordRange(m_LastCmdBufferID)) + commandBuffer = RerecordCmdBuf(m_LastCmdBufferID); + else + return true; + } + ObjDisp(commandBuffer) ->CmdBuildAccelerationStructuresKHR(Unwrap(commandBuffer), infoCount, unwrappedInfos, tmpBuildRangeInfos.data()); @@ -7792,28 +7813,6 @@ void WrappedVulkan::vkCmdBuildAccelerationStructuresKHR( } } -// CPU-side VK_KHR_acceleration_structure calls are not supported for now -VkResult WrappedVulkan::vkCopyAccelerationStructureKHR(VkDevice device, - VkDeferredOperationKHR deferredOperation, - const VkCopyAccelerationStructureInfoKHR *pInfo) -{ - return VK_ERROR_UNKNOWN; -} - -VkResult WrappedVulkan::vkCopyAccelerationStructureToMemoryKHR( - VkDevice device, VkDeferredOperationKHR deferredOperation, - const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo) -{ - return VK_ERROR_UNKNOWN; -} - -VkResult WrappedVulkan::vkCopyMemoryToAccelerationStructureKHR( - VkDevice device, VkDeferredOperationKHR deferredOperation, - const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo) -{ - return VK_ERROR_UNKNOWN; -} - template bool WrappedVulkan::Serialise_vkCmdCopyAccelerationStructureKHR( SerialiserType &ser, VkCommandBuffer commandBuffer, @@ -7963,6 +7962,28 @@ VkResult WrappedVulkan::vkWriteAccelerationStructuresPropertiesKHR( Unwrap(device), accelerationStructureCount, unwrappedASes, queryType, dataSize, pData, stride); } +// CPU-side VK_KHR_acceleration_structure calls are not supported for now +VkResult WrappedVulkan::vkCopyAccelerationStructureKHR(VkDevice device, + VkDeferredOperationKHR deferredOperation, + const VkCopyAccelerationStructureInfoKHR *pInfo) +{ + return VK_ERROR_UNKNOWN; +} + +VkResult WrappedVulkan::vkCopyAccelerationStructureToMemoryKHR( + VkDevice device, VkDeferredOperationKHR deferredOperation, + const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo) +{ + return VK_ERROR_UNKNOWN; +} + +VkResult WrappedVulkan::vkCopyMemoryToAccelerationStructureKHR( + VkDevice device, VkDeferredOperationKHR deferredOperation, + const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo) +{ + return VK_ERROR_UNKNOWN; +} + INSTANTIATE_FUNCTION_SERIALISED(VkResult, vkCreateCommandPool, VkDevice device, const VkCommandPoolCreateInfo *pCreateInfo, const VkAllocationCallbacks *, VkCommandPool *pCommandPool); diff --git a/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp index 47887b187..8bb4be7ba 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp @@ -3272,6 +3272,37 @@ bool WrappedVulkan::Serialise_vkCreateDevice(SerialiserType &ser, VkPhysicalDevi RDCWARN("meshShaderQueries = false, mesh shader performance counters unavailable"); } END_PHYS_EXT_CHECK(); + + BEGIN_PHYS_EXT_CHECK(VkPhysicalDeviceAccelerationStructureFeaturesKHR, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR); + { + CHECK_PHYS_EXT_FEATURE(accelerationStructure) + CHECK_PHYS_EXT_FEATURE(accelerationStructureCaptureReplay) + CHECK_PHYS_EXT_FEATURE(accelerationStructureIndirectBuild) + CHECK_PHYS_EXT_FEATURE(descriptorBindingAccelerationStructureUpdateAfterBind) + + if(ext->accelerationStructure && !avail.accelerationStructureCaptureReplay) + { + SET_ERROR_RESULT( + m_FailedReplayResult, ResultCode::APIHardwareUnsupported, + "Capture requires accelerationStructure support, which is available, but " + "accelerationStructureCaptureReplay support is not available which is required to " + "replay\n" + "\n%s", + GetPhysDeviceCompatString(false, false).c_str()); + return false; + } + + m_AccelerationStructures = ext->accelerationStructure != VK_FALSE; + if(m_AccelerationStructures) + { + RDCLOG( + "Ray tracing acceleration structures requested, allocating all device memory with " + "VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT"); + ext->accelerationStructureCaptureReplay = VK_TRUE; + } + } + END_PHYS_EXT_CHECK(); } if(availFeatures.depthClamp) @@ -4317,6 +4348,18 @@ VkResult WrappedVulkan::vkCreateDevice(VkPhysicalDevice physicalDevice, if(separateDepthStencilFeatures) m_SeparateDepthStencil |= (separateDepthStencilFeatures->separateDepthStencilLayouts != VK_FALSE); + // we need to enable acceleration structure capture/replay. We verified that this is OK before + // whitelisting the extension + + VkPhysicalDeviceAccelerationStructureFeaturesKHR *accFeatures = + (VkPhysicalDeviceAccelerationStructureFeaturesKHR *)FindNextStruct( + &createInfo, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR); + if(accFeatures && accFeatures->accelerationStructure) + { + accFeatures->accelerationStructureCaptureReplay = VK_TRUE; + m_AccelerationStructures = true; + } + VkResult ret; SERIALISE_TIME_CALL(ret = createFunc(Unwrap(physicalDevice), &createInfo, NULL, pDevice)); diff --git a/renderdoc/driver/vulkan/wrappers/vk_get_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_get_funcs.cpp index 7a7cf4347..668ea1012 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_get_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_get_funcs.cpp @@ -852,6 +852,17 @@ void WrappedVulkan::vkGetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice } #undef DISABLE_EDS3_FEATURE + + // we don't want to report support for acceleration structure host commands + VkPhysicalDeviceAccelerationStructureFeaturesKHR *accStruct = + (VkPhysicalDeviceAccelerationStructureFeaturesKHR *)FindNextStruct( + pFeatures, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR); + + if(accStruct && accStruct->accelerationStructureHostCommands) + { + RDCWARN("Disabling support for acceleration structure host commands"); + accStruct->accelerationStructureHostCommands = VK_FALSE; + } } void WrappedVulkan::vkGetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice, diff --git a/renderdoc/driver/vulkan/wrappers/vk_resource_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_resource_funcs.cpp index e845d1b0f..6caf2b5f8 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_resource_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_resource_funcs.cpp @@ -1719,10 +1719,17 @@ VkResult WrappedVulkan::vkCreateBuffer(VkDevice device, const VkBufferCreateInfo // effectively free as a usage bit for all sensible implementations so we just add it here. adjusted_info.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; - // If we're using this buffer for device addresses, ensure we force on capture replay bit. - // We ensured the physical device can support this feature before whitelisting the extension. - if(IsCaptureMode(m_State) && (adjusted_info.usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT)) - adjusted_info.flags |= VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT; + if(IsCaptureMode(m_State)) + { + // If we're using this buffer for AS storage we need to enable BDA + if(adjusted_info.usage & VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR) + adjusted_info.usage |= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; + + // If we're using this buffer for device addresses, ensure we force on capture replay bit. + // We ensured the physical device can support this feature before whitelisting the extension. + if(adjusted_info.usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) + adjusted_info.flags |= VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT; + } byte *tempMem = GetTempMemory(GetNextPatchSize(adjusted_info.pNext)); @@ -3139,7 +3146,7 @@ bool WrappedVulkan::Serialise_vkCreateAccelerationStructureKHR( VkAccelerationStructureKHR acc = VK_NULL_HANDLE; VkResult ret = - ObjDisp(device)->CreateAccelerationStructureKHR(Unwrap(device), &CreateInfo, NULL, &acc); + ObjDisp(device)->CreateAccelerationStructureKHR(Unwrap(device), &unwrappedInfo, NULL, &acc); if(ret != VK_SUCCESS) { @@ -3186,6 +3193,13 @@ VkResult WrappedVulkan::vkCreateAccelerationStructureKHR( const VkAllocationCallbacks *, VkAccelerationStructureKHR *pAccelerationStructure) { VkAccelerationStructureCreateInfoKHR unwrappedInfo = *pCreateInfo; + + // Ensure we force on capture replay bit. We ensured the physical device can support this feature + // before whitelisting the extension. + if(IsCaptureMode(m_State)) + unwrappedInfo.createFlags |= + VK_ACCELERATION_STRUCTURE_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT_KHR; + unwrappedInfo.buffer = Unwrap(unwrappedInfo.buffer); VkResult ret; SERIALISE_TIME_CALL(ret = ObjDisp(device)->CreateAccelerationStructureKHR( @@ -3197,13 +3211,27 @@ VkResult WrappedVulkan::vkCreateAccelerationStructureKHR( if(IsCaptureMode(m_State)) { + // We're capturing, so get the device address of the created AS + VkAccelerationStructureCreateInfoKHR serialisedCreateInfo = *pCreateInfo; + serialisedCreateInfo.createFlags |= + VK_ACCELERATION_STRUCTURE_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT_KHR; + + const VkAccelerationStructureDeviceAddressInfoKHR getInfo = { + VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR, + NULL, + Unwrap(*pAccelerationStructure), + }; + const VkDeviceAddress addr = + ObjDisp(device)->GetAccelerationStructureDeviceAddressKHR(Unwrap(device), &getInfo); + serialisedCreateInfo.deviceAddress = addr; + Chunk *chunk = NULL; { CACHE_THREAD_SERIALISER(); SCOPED_SERIALISE_CHUNK(VulkanChunk::vkCreateAccelerationStructureKHR); - Serialise_vkCreateAccelerationStructureKHR(ser, device, pCreateInfo, NULL, + Serialise_vkCreateAccelerationStructureKHR(ser, device, &serialisedCreateInfo, NULL, pAccelerationStructure); chunk = scope.Get(); @@ -3223,6 +3251,16 @@ VkResult WrappedVulkan::vkCreateAccelerationStructureKHR( record->storable = bufferRecord->storable; record->memOffset = bufferRecord->memOffset + pCreateInfo->offset; record->memSize = pCreateInfo->size; + + GetResourceManager()->MarkDirtyResource(id); + if(pCreateInfo->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR || + pCreateInfo->type == VK_ACCELERATION_STRUCTURE_TYPE_GENERIC_KHR) + { + // We force reference BLASs as it is not feasible to track at the API level which TLASs + // reference them. We force ref generics too as they could bottom or top level so we + // conservatively assume they are bottom + AddForcedReference(record); + } } else {