diff --git a/renderdoc/driver/vulkan/CMakeLists.txt b/renderdoc/driver/vulkan/CMakeLists.txt
index cb2594262..58511977c 100644
--- a/renderdoc/driver/vulkan/CMakeLists.txt
+++ b/renderdoc/driver/vulkan/CMakeLists.txt
@@ -40,6 +40,8 @@ set(sources
vk_serialise.cpp
vk_stringise.cpp
vk_layer.cpp
+ vk_acceleration_structure.h
+ vk_acceleration_structure.cpp
imagestate_tests.cpp
imgrefs_tests.cpp
official/vk_layer.h
diff --git a/renderdoc/driver/vulkan/renderdoc_vulkan.vcxproj b/renderdoc/driver/vulkan/renderdoc_vulkan.vcxproj
index 1dabd623d..5ee1574f7 100644
--- a/renderdoc/driver/vulkan/renderdoc_vulkan.vcxproj
+++ b/renderdoc/driver/vulkan/renderdoc_vulkan.vcxproj
@@ -155,6 +155,7 @@
true
+
@@ -186,6 +187,7 @@
+
diff --git a/renderdoc/driver/vulkan/renderdoc_vulkan.vcxproj.filters b/renderdoc/driver/vulkan/renderdoc_vulkan.vcxproj.filters
index 7abbafeab..7d1445b16 100644
--- a/renderdoc/driver/vulkan/renderdoc_vulkan.vcxproj.filters
+++ b/renderdoc/driver/vulkan/renderdoc_vulkan.vcxproj.filters
@@ -151,6 +151,9 @@
Replay
+
+ Replay
+
@@ -159,6 +162,9 @@
Replay
+
+ Replay
+
Util
diff --git a/renderdoc/driver/vulkan/vk_acceleration_structure.cpp b/renderdoc/driver/vulkan/vk_acceleration_structure.cpp
new file mode 100644
index 000000000..b1d7b056d
--- /dev/null
+++ b/renderdoc/driver/vulkan/vk_acceleration_structure.cpp
@@ -0,0 +1,392 @@
+/******************************************************************************
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2024 Baldur Karlsson
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ ******************************************************************************/
+
+#include "vk_acceleration_structure.h"
+#include "core/settings.h"
+#include "vk_core.h"
+
+RDOC_EXTERN_CONFIG(bool, Vulkan_Debug_SingleSubmitFlushing);
+
+namespace
+{
+// Although the serialised data is implementation-defined in general, the header is defined:
+// https://registry.khronos.org/vulkan/specs/1.3-extensions/html/chap37.html#vkCmdCopyAccelerationStructureToMemoryKHR
+constexpr std::size_t handleCountOffset = VK_UUID_SIZE + VK_UUID_SIZE + 8 + 8;
+constexpr VkDeviceSize handleCountSize = 8;
+
+// Spec says VkCopyAccelerationStructureToMemoryInfoKHR::dst::deviceAddress must be 256 bytes aligned
+constexpr VkDeviceSize asBufferAlignment = 256;
+}
+
+bool VulkanAccelerationStructureManager::Prepare(VkAccelerationStructureKHR unwrappedAs,
+ const rdcarray &queueFamilyIndices,
+ ASMemory &result)
+{
+ const VkDeviceSize serialisedSize = SerialisedASSize(unwrappedAs);
+
+ const VkDevice d = m_pDriver->GetDev();
+ VkResult vkr = VK_SUCCESS;
+
+ // since this happens during capture, we don't want to start serialising extra buffer creates,
+ // leave this buffer as unwrapped
+ VkBuffer dstBuf = VK_NULL_HANDLE;
+
+ VkBufferCreateInfo bufInfo = {
+ VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ NULL,
+ 0,
+ serialisedSize,
+ VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+ };
+
+ // we make the buffer concurrently accessible by all queue families to not invalidate the
+ // contents of the memory we're reading back from.
+ bufInfo.sharingMode = VK_SHARING_MODE_CONCURRENT;
+ bufInfo.queueFamilyIndexCount = (uint32_t)queueFamilyIndices.size();
+ bufInfo.pQueueFamilyIndices = queueFamilyIndices.data();
+
+ // spec requires that CONCURRENT must specify more than one queue family. If there is only one
+ // queue family, we can safely use exclusive.
+ if(bufInfo.queueFamilyIndexCount == 1)
+ bufInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
+
+ vkr = ObjDisp(d)->CreateBuffer(Unwrap(d), &bufInfo, NULL, &dstBuf);
+ m_pDriver->CheckVkResult(vkr);
+
+ m_pDriver->AddPendingObjectCleanup(
+ [d, dstBuf]() { ObjDisp(d)->DestroyBuffer(Unwrap(d), dstBuf, NULL); });
+
+ VkMemoryRequirements mrq = {};
+ ObjDisp(d)->GetBufferMemoryRequirements(Unwrap(d), dstBuf, &mrq);
+
+ mrq.alignment = RDCMAX(mrq.alignment, asBufferAlignment);
+
+ const MemoryAllocation readbackmem = m_pDriver->AllocateMemoryForResource(
+ true, mrq, MemoryScope::InitialContents, MemoryType::Readback);
+ if(readbackmem.mem == VK_NULL_HANDLE)
+ return false;
+
+ vkr = ObjDisp(d)->BindBufferMemory(Unwrap(d), dstBuf, Unwrap(readbackmem.mem), readbackmem.offs);
+ m_pDriver->CheckVkResult(vkr);
+
+ const VkBufferDeviceAddressInfo addrInfo = {VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, NULL,
+ dstBuf};
+ const VkDeviceAddress dstBufAddr = ObjDisp(d)->GetBufferDeviceAddressKHR(Unwrap(d), &addrInfo);
+
+ VkCommandBuffer cmd = m_pDriver->GetInitStateCmd();
+ if(cmd == VK_NULL_HANDLE)
+ {
+ RDCERR("Couldn't acquire command buffer");
+ return false;
+ }
+
+ const VkDeviceSize nonCoherentAtomSize = m_pDriver->GetDeviceProps().limits.nonCoherentAtomSize;
+ byte *mappedDstBuffer = NULL;
+ VkDeviceSize size;
+
+ if(m_pDriver->GetDriverInfo().MaliBrokenASDeviceSerialisation())
+ {
+ size = AlignUp(serialisedSize, nonCoherentAtomSize);
+
+ vkr = ObjDisp(d)->MapMemory(Unwrap(d), Unwrap(readbackmem.mem), readbackmem.offs, size, 0,
+ (void **)&mappedDstBuffer);
+ m_pDriver->CheckVkResult(vkr);
+
+ // Copy the data using host-commands but into mapped memory
+ VkCopyAccelerationStructureToMemoryInfoKHR copyInfo = {
+ VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_TO_MEMORY_INFO_KHR, NULL};
+ copyInfo.src = unwrappedAs;
+ copyInfo.dst.hostAddress = mappedDstBuffer;
+ copyInfo.mode = VK_COPY_ACCELERATION_STRUCTURE_MODE_SERIALIZE_KHR;
+ ObjDisp(d)->CopyAccelerationStructureToMemoryKHR(Unwrap(d), VK_NULL_HANDLE, ©Info);
+ }
+ else
+ {
+ VkCopyAccelerationStructureToMemoryInfoKHR copyInfo = {
+ VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_TO_MEMORY_INFO_KHR, NULL};
+ copyInfo.src = unwrappedAs;
+ copyInfo.dst.deviceAddress = dstBufAddr;
+ copyInfo.mode = VK_COPY_ACCELERATION_STRUCTURE_MODE_SERIALIZE_KHR;
+ ObjDisp(d)->CmdCopyAccelerationStructureToMemoryKHR(Unwrap(cmd), ©Info);
+
+ // It's not ideal but we have to flush here because we need to map the data in order to read
+ // the BLAS addresses which means we need to have ensured that it has been copied beforehand
+ m_pDriver->CloseInitStateCmd();
+ m_pDriver->SubmitCmds();
+ m_pDriver->FlushQ();
+
+ // Now serialised AS data has been copied to a readable buffer, we need to expose the data to
+ // the host
+ size = AlignUp(handleCountOffset + handleCountSize, nonCoherentAtomSize);
+
+ vkr = ObjDisp(d)->MapMemory(Unwrap(d), Unwrap(readbackmem.mem), readbackmem.offs, size, 0,
+ (void **)&mappedDstBuffer);
+ m_pDriver->CheckVkResult(vkr);
+ }
+
+ // invalidate the cpu cache for this memory range to avoid reading stale data
+ const VkMappedMemoryRange range = {
+ VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, NULL, Unwrap(readbackmem.mem), readbackmem.offs, size,
+ };
+ vkr = ObjDisp(d)->InvalidateMappedMemoryRanges(Unwrap(d), 1, &range);
+ m_pDriver->CheckVkResult(vkr);
+
+ // Count the BLAS device addresses to update the AS type
+ const uint64_t handleCount = *(uint64_t *)(mappedDstBuffer + handleCountOffset);
+ result = {readbackmem, true};
+ result.isTLAS = handleCount > 0;
+
+ ObjDisp(d)->UnmapMemory(Unwrap(d), Unwrap(result.alloc.mem));
+
+ return true;
+}
+
+template
+bool VulkanAccelerationStructureManager::Serialise(SerialiserType &ser, ResourceId id,
+ const VkInitialContents *initial,
+ CaptureState state)
+{
+ VkDevice d = !IsStructuredExporting(state) ? m_pDriver->GetDev() : VK_NULL_HANDLE;
+ const bool replayingAndReading = ser.IsReading() && IsReplayMode(state);
+ VkResult vkr = VK_SUCCESS;
+
+ byte *contents = NULL;
+ uint64_t contentsSize = initial ? initial->mem.size : 0;
+ MemoryAllocation mappedMem;
+
+ // Serialise this separately so that it can be used on reading to prepare the upload memory
+ SERIALISE_ELEMENT(contentsSize);
+
+ const VkDeviceSize nonCoherentAtomSize = m_pDriver->GetDeviceProps().limits.nonCoherentAtomSize;
+
+ // the memory/buffer that we allocated on read, to upload the initial contents.
+ MemoryAllocation uploadMemory;
+ VkBuffer uploadBuf = VK_NULL_HANDLE;
+
+ if(ser.IsWriting())
+ {
+ if(initial && initial->mem.mem != VK_NULL_HANDLE)
+ {
+ const VkDeviceSize size = AlignUp(initial->mem.size, nonCoherentAtomSize);
+
+ mappedMem = initial->mem;
+ vkr = ObjDisp(d)->MapMemory(Unwrap(d), Unwrap(mappedMem.mem), initial->mem.offs, size, 0,
+ (void **)&contents);
+ m_pDriver->CheckVkResult(vkr);
+
+ // invalidate the cpu cache for this memory range to avoid reading stale data
+ const VkMappedMemoryRange range = {
+ VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, NULL, Unwrap(mappedMem.mem), mappedMem.offs, size,
+ };
+
+ vkr = ObjDisp(d)->InvalidateMappedMemoryRanges(Unwrap(d), 1, &range);
+ m_pDriver->CheckVkResult(vkr);
+ }
+ }
+ else if(IsReplayMode(state) && !ser.IsErrored())
+ {
+ // create a buffer with memory attached, which we will fill with the initial contents
+ const VkBufferCreateInfo bufInfo = {
+ VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ NULL,
+ 0,
+ contentsSize,
+ VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+ };
+
+ vkr = m_pDriver->vkCreateBuffer(d, &bufInfo, NULL, &uploadBuf);
+ m_pDriver->CheckVkResult(vkr);
+
+ VkMemoryRequirements mrq = {};
+ m_pDriver->vkGetBufferMemoryRequirements(d, uploadBuf, &mrq);
+
+ mrq.alignment = RDCMAX(mrq.alignment, asBufferAlignment);
+
+ uploadMemory = m_pDriver->AllocateMemoryForResource(true, mrq, MemoryScope::InitialContents,
+ MemoryType::Upload);
+
+ if(uploadMemory.mem == VK_NULL_HANDLE)
+ return false;
+
+ vkr = m_pDriver->vkBindBufferMemory(d, uploadBuf, uploadMemory.mem, uploadMemory.offs);
+ m_pDriver->CheckVkResult(vkr);
+
+ mappedMem = uploadMemory;
+
+ vkr = ObjDisp(d)->MapMemory(Unwrap(d), Unwrap(mappedMem.mem), mappedMem.offs,
+ AlignUp(mappedMem.size, nonCoherentAtomSize), 0, (void **)&contents);
+ m_pDriver->CheckVkResult(vkr);
+
+ if(!contents)
+ {
+ RDCERR("Manually reporting failed memory map");
+ m_pDriver->CheckVkResult(VK_ERROR_MEMORY_MAP_FAILED);
+ return false;
+ }
+
+ if(vkr != VK_SUCCESS)
+ return false;
+ }
+
+ // not using SERIALISE_ELEMENT_ARRAY so we can deliberately avoid allocation - we serialise
+ // directly into upload memory
+ ser.Serialise("Serialised AS"_lit, contents, contentsSize, SerialiserFlags::NoFlags).Important();
+
+ // unmap the resource we mapped before - we need to do this on read and on write.
+ bool isTLAS = false;
+ if(!IsStructuredExporting(state) && mappedMem.mem != VK_NULL_HANDLE)
+ {
+ if(replayingAndReading)
+ {
+ // first ensure we flush the writes from the cpu to gpu memory
+ const VkMappedMemoryRange range = {
+ VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, NULL, Unwrap(mappedMem.mem), mappedMem.offs,
+ AlignUp(mappedMem.size, nonCoherentAtomSize),
+ };
+
+ vkr = ObjDisp(d)->FlushMappedMemoryRanges(Unwrap(d), 1, &range);
+ m_pDriver->CheckVkResult(vkr);
+
+ // Read the AS's BLAS handle count to determine if it's top or bottom level
+ isTLAS = *((uint64_t *)(contents + handleCountOffset)) > 0;
+ }
+
+ ObjDisp(d)->UnmapMemory(Unwrap(d), Unwrap(mappedMem.mem));
+ }
+
+ SERIALISE_CHECK_READ_ERRORS();
+
+ if(IsReplayMode(state) && contentsSize > 0)
+ {
+ VkInitialContents initialContents(eResAccelerationStructureKHR, uploadMemory);
+ initialContents.isTLAS = isTLAS;
+ initialContents.buf = uploadBuf;
+
+ m_pDriver->GetResourceManager()->SetInitialContents(id, initialContents);
+ }
+
+ return true;
+}
+
+template bool VulkanAccelerationStructureManager::Serialise(ReadSerialiser &ser, ResourceId id,
+ const VkInitialContents *initial,
+ CaptureState state);
+template bool VulkanAccelerationStructureManager::Serialise(WriteSerialiser &ser, ResourceId id,
+ const VkInitialContents *initial,
+ CaptureState state);
+
+void VulkanAccelerationStructureManager::Apply(ResourceId id, const VkInitialContents &initial)
+{
+ VkCommandBuffer cmd = m_pDriver->GetInitStateCmd();
+ if(cmd == VK_NULL_HANDLE)
+ {
+ RDCERR("Couldn't acquire command buffer");
+ return;
+ }
+
+ const VkAccelerationStructureKHR unwrappedAs =
+ Unwrap(m_pDriver->GetResourceManager()->GetCurrentHandle(id));
+ const VkDevice d = m_pDriver->GetDev();
+
+ VkMarkerRegion::Begin(StringFormat::Fmt("Initial state for %s", ToStr(id).c_str()), cmd);
+
+ if(m_pDriver->GetDriverInfo().MaliBrokenASDeviceSerialisation())
+ {
+ const VkDeviceSize size =
+ AlignUp(initial.mem.size, m_pDriver->GetDeviceProps().limits.nonCoherentAtomSize);
+
+ // Copy the data using host-commands but from mapped memory
+ byte *mappedSrcBuffer = NULL;
+ VkResult vkr = ObjDisp(d)->MapMemory(Unwrap(d), Unwrap(initial.mem.mem), initial.mem.offs, size,
+ 0, (void **)&mappedSrcBuffer);
+ m_pDriver->CheckVkResult(vkr);
+
+ VkCopyMemoryToAccelerationStructureInfoKHR copyInfo = {
+ VK_STRUCTURE_TYPE_COPY_MEMORY_TO_ACCELERATION_STRUCTURE_INFO_KHR};
+ copyInfo.src.hostAddress = mappedSrcBuffer;
+ copyInfo.dst = unwrappedAs;
+ copyInfo.mode = VK_COPY_ACCELERATION_STRUCTURE_MODE_DESERIALIZE_KHR;
+ ObjDisp(d)->CopyMemoryToAccelerationStructureKHR(Unwrap(d), VK_NULL_HANDLE, ©Info);
+ }
+ else
+ {
+ const VkBufferDeviceAddressInfo addrInfo = {VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, NULL,
+ Unwrap(initial.buf)};
+ const VkDeviceAddress uploadBufAddr = ObjDisp(d)->GetBufferDeviceAddressKHR(Unwrap(d), &addrInfo);
+
+ VkCopyMemoryToAccelerationStructureInfoKHR copyInfo = {
+ VK_STRUCTURE_TYPE_COPY_MEMORY_TO_ACCELERATION_STRUCTURE_INFO_KHR};
+ copyInfo.src.deviceAddress = uploadBufAddr;
+ copyInfo.dst = unwrappedAs;
+ copyInfo.mode = VK_COPY_ACCELERATION_STRUCTURE_MODE_DESERIALIZE_KHR;
+ ObjDisp(d)->CmdCopyMemoryToAccelerationStructureKHR(Unwrap(cmd), ©Info);
+ }
+
+ VkMarkerRegion::End(cmd);
+
+ if(Vulkan_Debug_SingleSubmitFlushing())
+ {
+ m_pDriver->CloseInitStateCmd();
+ m_pDriver->SubmitCmds();
+ m_pDriver->FlushQ();
+ }
+}
+
+VkDeviceSize VulkanAccelerationStructureManager::SerialisedASSize(VkAccelerationStructureKHR as)
+{
+ VkDevice d = m_pDriver->GetDev();
+
+ // Create query pool
+ VkQueryPoolCreateInfo info = {VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO};
+ info.queryCount = 1;
+ info.queryType = VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR;
+
+ VkQueryPool pool;
+ VkResult vkr = ObjDisp(d)->CreateQueryPool(Unwrap(d), &info, NULL, &pool);
+ m_pDriver->CheckVkResult(vkr);
+
+ // Reset query pool
+ VkCommandBuffer cmd = m_pDriver->GetInitStateCmd();
+ ObjDisp(d)->CmdResetQueryPool(Unwrap(cmd), pool, 0, 1);
+
+ // Get the size
+ ObjDisp(d)->CmdWriteAccelerationStructuresPropertiesKHR(
+ Unwrap(cmd), 1, &as, VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR, pool, 0);
+
+ m_pDriver->CloseInitStateCmd();
+ m_pDriver->SubmitCmds();
+ m_pDriver->FlushQ();
+
+ VkDeviceSize size = 0;
+ vkr = ObjDisp(d)->GetQueryPoolResults(Unwrap(d), pool, 0, 1, sizeof(VkDeviceSize), &size,
+ sizeof(VkDeviceSize), VK_QUERY_RESULT_WAIT_BIT);
+ m_pDriver->CheckVkResult(vkr);
+
+ // Clean up
+ ObjDisp(d)->DestroyQueryPool(Unwrap(d), pool, NULL);
+
+ return size;
+}
diff --git a/renderdoc/driver/vulkan/vk_acceleration_structure.h b/renderdoc/driver/vulkan/vk_acceleration_structure.h
new file mode 100644
index 000000000..4aa8d5115
--- /dev/null
+++ b/renderdoc/driver/vulkan/vk_acceleration_structure.h
@@ -0,0 +1,59 @@
+/******************************************************************************
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2024 Baldur Karlsson
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ ******************************************************************************/
+
+#pragma once
+
+#include "vk_manager.h"
+
+class WrappedVulkan;
+
+class VulkanAccelerationStructureManager
+{
+public:
+ struct ASMemory
+ {
+ MemoryAllocation alloc;
+ bool isTLAS;
+ };
+
+ VulkanAccelerationStructureManager(WrappedVulkan *driver) : m_pDriver(driver) {}
+
+ // Called when the initial state is prepared. Any TLAS and BLAS data is copied into temporary
+ // buffers and the handles for that memory and the buffers is stored in the init state
+ bool Prepare(VkAccelerationStructureKHR unwrappedAs, const rdcarray &queueFamilyIndices,
+ ASMemory &result);
+
+ template
+ bool Serialise(SerialiserType &ser, ResourceId id, const VkInitialContents *initial,
+ CaptureState state);
+
+ // Called when the initial state is applied. The AS data is deserialised from the upload buffer
+ // into the acceleration structure
+ void Apply(ResourceId id, const VkInitialContents &initial);
+
+private:
+ VkDeviceSize SerialisedASSize(VkAccelerationStructureKHR as);
+
+ WrappedVulkan *m_pDriver;
+};
diff --git a/renderdoc/driver/vulkan/vk_common.cpp b/renderdoc/driver/vulkan/vk_common.cpp
index 2b2c7896e..5d5bebca8 100644
--- a/renderdoc/driver/vulkan/vk_common.cpp
+++ b/renderdoc/driver/vulkan/vk_common.cpp
@@ -362,6 +362,10 @@ bool VkInitParams::IsSupportedVersion(uint64_t ver)
if(ver == CurrentVersion)
return true;
+ // 0x15 -> 0x16 - added support for acceleration structures
+ if(ver == 0x15)
+ return true;
+
// 0x14 -> 0x15 - added support for mutable descriptors
if(ver == 0x14)
return true;
@@ -1154,6 +1158,19 @@ VkDriverInfo::VkDriverInfo(const VkPhysicalDeviceProperties &physProps,
qualcommLeakingUBOOffsets = true;
}
}
+
+ if(driverProps.driverID == VK_DRIVER_ID_ARM_PROPRIETARY)
+ {
+ if(Major() >= 36 && Major() < 43)
+ {
+ if(active)
+ RDCLOG(
+ "Using host acceleration structure deserialisation commands on Mali - update to a "
+ "newer "
+ "driver for fix");
+ maliBrokenASDeviceSerialisation = true;
+ }
+ }
}
FrameRefType GetRefType(DescriptorSlotType descType)
@@ -1168,7 +1185,8 @@ FrameRefType GetRefType(DescriptorSlotType descType)
case DescriptorSlotType::UniformBuffer:
case DescriptorSlotType::UniformBufferDynamic:
case DescriptorSlotType::InputAttachment:
- case DescriptorSlotType::InlineBlock: return eFrameRef_Read;
+ case DescriptorSlotType::InlineBlock:
+ case DescriptorSlotType::AccelerationStructure: return eFrameRef_Read;
case DescriptorSlotType::StorageImage:
case DescriptorSlotType::StorageTexelBuffer:
case DescriptorSlotType::StorageBuffer:
@@ -1207,6 +1225,13 @@ void DescriptorSetSlot::SetTexelBuffer(VkDescriptorType writeType, ResourceId id
resource = id;
}
+void DescriptorSetSlot::SetAccelerationStructure(VkDescriptorType writeType,
+ VkAccelerationStructureKHR accelerationStructure)
+{
+ type = convert(writeType);
+ resource = GetResID(accelerationStructure);
+}
+
void AddBindFrameRef(DescriptorBindRefs &refs, ResourceId id, FrameRefType ref)
{
if(id == ResourceId())
@@ -1263,7 +1288,7 @@ void DescriptorSetSlot::AccumulateBindRefs(DescriptorBindRefs &refs, VulkanResou
RDCCOMPILE_ASSERT(offsetof(DescriptorSetSlot, offset) == 8,
"DescriptorSetSlot first uint64_t bitpacking isn't working as expected");
- VkResourceRecord *bufView = NULL, *imgView = NULL, *buffer = NULL;
+ VkResourceRecord *bufView = NULL, *imgView = NULL, *buffer = NULL, *accStruct = NULL;
switch(type)
{
@@ -1277,6 +1302,9 @@ void DescriptorSetSlot::AccumulateBindRefs(DescriptorBindRefs &refs, VulkanResou
case DescriptorSlotType::SampledImage:
case DescriptorSlotType::StorageImage:
case DescriptorSlotType::InputAttachment: imgView = rm->GetResourceRecord(resource); break;
+ case DescriptorSlotType::AccelerationStructure:
+ accStruct = rm->GetResourceRecord(resource);
+ break;
default: break;
}
@@ -1312,6 +1340,10 @@ void DescriptorSetSlot::AccumulateBindRefs(DescriptorBindRefs &refs, VulkanResou
if(buffer->storable)
refs.storableRefs.insert(buffer);
}
+ if(accStruct)
+ {
+ AddBindFrameRef(refs, resource, eFrameRef_Read);
+ }
}
#if ENABLED(ENABLE_UNIT_TESTS)
diff --git a/renderdoc/driver/vulkan/vk_common.h b/renderdoc/driver/vulkan/vk_common.h
index a3e16e7a1..4c2d9d321 100644
--- a/renderdoc/driver/vulkan/vk_common.h
+++ b/renderdoc/driver/vulkan/vk_common.h
@@ -305,6 +305,9 @@ public:
// If we do have a pipeline to bind, we should never be perturbing dynamic state in between static
// pipeline binds.
bool NVStaticPipelineRebindStates() const { return nvidiaStaticPipelineRebindStates; }
+ // On Mali there are some known issues regarding acceleration structure serialisation to device
+ // memory, for the affected driver versions we switch to the host command variants
+ bool MaliBrokenASDeviceSerialisation() const { return maliBrokenASDeviceSerialisation; }
private:
GPUVendor m_Vendor;
@@ -320,6 +323,7 @@ private:
bool qualcommLineWidthCrash = false;
bool intelBrokenOcclusionQueries = false;
bool nvidiaStaticPipelineRebindStates = false;
+ bool maliBrokenASDeviceSerialisation = false;
};
enum
@@ -527,6 +531,7 @@ enum class DescriptorSlotType : EnumBaseType
StorageBufferDynamic,
InputAttachment,
InlineBlock,
+ AccelerationStructure,
Count,
};
@@ -550,7 +555,9 @@ constexpr VkDescriptorType convert(DescriptorSlotType type)
? VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC
: type == DescriptorSlotType::InputAttachment ? VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT
: type == DescriptorSlotType::InlineBlock ? VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK
- : VK_DESCRIPTOR_TYPE_MAX_ENUM;
+ : type == DescriptorSlotType::AccelerationStructure
+ ? VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR
+ : VK_DESCRIPTOR_TYPE_MAX_ENUM;
}
constexpr DescriptorSlotType convert(VkDescriptorType type)
@@ -682,6 +689,8 @@ struct DescriptorSetSlot
void SetBuffer(VkDescriptorType writeType, const VkDescriptorBufferInfo &bufInfo);
void SetImage(VkDescriptorType writeType, const VkDescriptorImageInfo &imInfo, bool useSampler);
void SetTexelBuffer(VkDescriptorType writeType, ResourceId id);
+ void SetAccelerationStructure(VkDescriptorType writeType,
+ VkAccelerationStructureKHR accelerationStructure);
// 48-bit truncated VK_WHOLE_SIZE
static const VkDeviceSize WholeSizeRange = 0xFFFFFFFFFFFF;
@@ -714,8 +723,8 @@ struct DescriptorSetSlot
// as a different type and the resource ID is partly trampled. Since these are disjoint we know
// that even if they're stale they're valid IDs.
- // main contents: buffer, image, texel buffer view. NOT the sampler for sampler-only descriptors,
- // just to avoid confusion
+ // main contents: buffer, image, texel buffer view, or acceleration structure. NOT the sampler for
+ // sampler-only descriptors, just to avoid confusion
ResourceId resource;
// sampler for sampler-only descriptors, or sampler for combined image-sampler descriptors
ResourceId sampler;
@@ -1977,7 +1986,6 @@ DECLARE_DESERIALISE_TYPE(VkWriteDescriptorSetInlineUniformBlock);
// plain structs with no next chain
DECLARE_REFLECTION_STRUCT(VkAabbPositionsKHR);
DECLARE_REFLECTION_STRUCT(VkAccelerationStructureBuildRangeInfoKHR);
-DECLARE_REFLECTION_STRUCT(VkAccelerationStructureGeometryDataKHR);
DECLARE_REFLECTION_STRUCT(VkAccelerationStructureInstanceKHR);
DECLARE_REFLECTION_STRUCT(VkAllocationCallbacks);
DECLARE_REFLECTION_STRUCT(VkAttachmentDescription);
diff --git a/renderdoc/driver/vulkan/vk_core.cpp b/renderdoc/driver/vulkan/vk_core.cpp
index 6b25e99b0..38ee9c775 100644
--- a/renderdoc/driver/vulkan/vk_core.cpp
+++ b/renderdoc/driver/vulkan/vk_core.cpp
@@ -147,6 +147,7 @@ WrappedVulkan::WrappedVulkan()
m_SetDeviceLoaderData = NULL;
m_ResourceManager = new VulkanResourceManager(m_State, this);
+ m_ASManager = new VulkanAccelerationStructureManager(this);
m_Instance = VK_NULL_HANDLE;
m_PhysicalDevice = VK_NULL_HANDLE;
@@ -191,6 +192,8 @@ WrappedVulkan::~WrappedVulkan()
m_ResourceManager->ClearWithoutReleasing();
SAFE_DELETE(m_ResourceManager);
+ SAFE_DELETE(m_ASManager);
+
SAFE_DELETE(m_FrameReader);
for(size_t i = 0; i < m_ThreadSerialisers.size(); i++)
@@ -1948,6 +1951,35 @@ VkResult WrappedVulkan::FilterDeviceExtensionProperties(VkPhysicalDevice physDev
return true;
}
+ if(!strcmp(ext.extensionName, VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME))
+ {
+ // require GPDP2
+ if(instDevInfo->ext_KHR_get_physical_device_properties2)
+ {
+ VkPhysicalDeviceAccelerationStructureFeaturesKHR accStruct = {
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR};
+ VkPhysicalDeviceFeatures2 base = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2};
+ base.pNext = &accStruct;
+ ObjDisp(physDev)->GetPhysicalDeviceFeatures2(Unwrap(physDev), &base);
+
+ if(accStruct.accelerationStructureCaptureReplay)
+ {
+ // supported, don't remove
+ return false;
+ }
+ else if(!filterWarned)
+ {
+ RDCWARN(
+ "VkPhysicalDeviceAccelerationStructureFeaturesKHR."
+ "accelerationStructureCaptureReplay "
+ "is false, can't support capture of VK_KHR_acceleration_structure");
+ }
+ }
+
+ // if it wasn't supported, remove the extension
+ return true;
+ }
+
// not an extension with conditional support, don't remove
return false;
});
@@ -2191,7 +2223,7 @@ void WrappedVulkan::StartFrameCapture(DeviceOwnedWindow devWnd)
// reference the buffer
GetResourceManager()->MarkResourceFrameReferenced((*it)->GetResourceID(), eFrameRef_Read);
// and its backing memory
- GetResourceManager()->MarkMemoryFrameReferenced((*it)->baseResource, (*it)->memOffset,
+ GetResourceManager()->MarkMemoryFrameReferenced((*it)->baseResourceMem, (*it)->memOffset,
(*it)->memSize, eFrameRef_ReadBeforeWrite);
}
}
@@ -2464,10 +2496,13 @@ bool WrappedVulkan::EndFrameCapture(DeviceOwnedWindow devWnd)
SubmitAndFlushExtQueue(swapQueueIndex);
}
+ const VkDeviceSize alignedSize =
+ AlignUp(readbackMem.size, GetDeviceProps().limits.nonCoherentAtomSize);
+
// map memory and readback
byte *pData = NULL;
- vkr = vt->MapMemory(Unwrap(device), Unwrap(readbackMem.mem), readbackMem.offs, readbackMem.size,
- 0, (void **)&pData);
+ vkr = vt->MapMemory(Unwrap(device), Unwrap(readbackMem.mem), readbackMem.offs, alignedSize, 0,
+ (void **)&pData);
CheckVkResult(vkr);
RDCASSERT(pData != NULL);
@@ -2480,7 +2515,7 @@ bool WrappedVulkan::EndFrameCapture(DeviceOwnedWindow devWnd)
NULL,
Unwrap(readbackMem.mem),
readbackMem.offs,
- readbackMem.size,
+ alignedSize,
};
vkr = vt->InvalidateMappedMemoryRanges(Unwrap(device), 1, &range);
@@ -5188,6 +5223,7 @@ void WrappedVulkan::AddUsage(VulkanActionTreeNode &actionNode, rdcarray m_CleanupEvents;
@@ -1122,6 +1120,7 @@ public:
VulkanResourceManager *GetResourceManager() { return m_ResourceManager; }
VulkanDebugManager *GetDebugManager() { return m_DebugManager; }
VulkanShaderCache *GetShaderCache() { return m_ShaderCache; }
+ VulkanAccelerationStructureManager *GetAccelerationStructureManager() { return m_ASManager; }
CaptureState GetState() { return m_State; }
VulkanReplay *GetReplay() { return m_Replay; }
// replay interface
@@ -1177,8 +1176,12 @@ public:
uint32_t GetUploadMemoryIndex(uint32_t resourceCompatibleBitmask);
uint32_t GetGPULocalMemoryIndex(uint32_t resourceCompatibleBitmask);
+ // Low-level implementation, always prefer the two below
+ MemoryAllocation AllocateMemoryForResource(bool buffer, VkMemoryRequirements mrq,
+ MemoryScope scope, MemoryType type);
MemoryAllocation AllocateMemoryForResource(VkImage im, MemoryScope scope, MemoryType type);
MemoryAllocation AllocateMemoryForResource(VkBuffer buf, MemoryScope scope, MemoryType type);
+ void FreeMemoryAllocation(MemoryAllocation alloc);
void ChooseMemoryIndices();
@@ -1277,6 +1280,7 @@ public:
bool TaskShaders() const { return m_TaskShaders; }
bool MeshShaders() const { return m_MeshShaders; }
bool ListRestart() const { return m_ListRestart; }
+ bool AccelerationStructures() const { return m_AccelerationStructures; }
VulkanRenderState &GetRenderState() { return m_RenderState; }
void SetActionCB(VulkanActionCallback *cb) { m_ActionCallback = cb; }
void SetSubmitChain(void *submitChain) { m_SubmitChain = submitChain; }
@@ -2827,14 +2831,6 @@ public:
void, vkCmdBuildAccelerationStructuresKHR, VkCommandBuffer commandBuffer, uint32_t infoCount,
const VkAccelerationStructureBuildGeometryInfoKHR *pInfos,
const VkAccelerationStructureBuildRangeInfoKHR *const *ppBuildRangeInfos);
- VkResult vkCopyAccelerationStructureKHR(VkDevice device, VkDeferredOperationKHR deferredOperation,
- const VkCopyAccelerationStructureInfoKHR *pInfo);
- VkResult vkCopyAccelerationStructureToMemoryKHR(
- VkDevice device, VkDeferredOperationKHR deferredOperation,
- const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo);
- VkResult vkCopyMemoryToAccelerationStructureKHR(
- VkDevice device, VkDeferredOperationKHR deferredOperation,
- const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo);
IMPLEMENT_FUNCTION_SERIALISED(void, vkCmdCopyAccelerationStructureKHR,
VkCommandBuffer commandBuffer,
const VkCopyAccelerationStructureInfoKHR *pInfo);
@@ -2848,6 +2844,14 @@ public:
VkCommandBuffer commandBuffer, uint32_t accelerationStructureCount,
const VkAccelerationStructureKHR *pAccelerationStructures, VkQueryType queryType,
VkQueryPool queryPool, uint32_t firstQuery);
+ VkResult vkCopyAccelerationStructureKHR(VkDevice device, VkDeferredOperationKHR deferredOperation,
+ const VkCopyAccelerationStructureInfoKHR *pInfo);
+ VkResult vkCopyAccelerationStructureToMemoryKHR(
+ VkDevice device, VkDeferredOperationKHR deferredOperation,
+ const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo);
+ VkResult vkCopyMemoryToAccelerationStructureKHR(
+ VkDevice device, VkDeferredOperationKHR deferredOperation,
+ const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo);
IMPLEMENT_FUNCTION_SERIALISED(VkResult, vkCreateAccelerationStructureKHR, VkDevice device,
const VkAccelerationStructureCreateInfoKHR *pCreateInfo,
const VkAllocationCallbacks *,
diff --git a/renderdoc/driver/vulkan/vk_initstate.cpp b/renderdoc/driver/vulkan/vk_initstate.cpp
index 731c988df..82b2115f9 100644
--- a/renderdoc/driver/vulkan/vk_initstate.cpp
+++ b/renderdoc/driver/vulkan/vk_initstate.cpp
@@ -571,6 +571,26 @@ bool WrappedVulkan::Prepare_InitialState(WrappedVkRes *res)
return true;
}
+ else if(type == eResAccelerationStructureKHR)
+ {
+ VulkanAccelerationStructureManager::ASMemory result;
+ VkAccelerationStructureKHR as = ToUnwrappedHandle(res);
+ if(!GetAccelerationStructureManager()->Prepare(as, m_QueueFamilyIndices, result))
+ {
+ SET_ERROR_RESULT(m_LastCaptureError, ResultCode::OutOfMemory,
+ "Couldn't allocate readback memory");
+ m_CaptureFailure = true;
+ return false;
+ }
+
+ VkInitialContents ic = VkInitialContents(type, result.alloc);
+ ic.isTLAS = result.isTLAS;
+
+ GetResourceManager()->SetInitialContents(id, ic);
+ m_PreparedNotSerialisedInitStates.push_back(id);
+
+ return true;
+ }
else
{
RDCERR("Unhandled resource type %d", type);
@@ -608,7 +628,8 @@ uint64_t WrappedVulkan::GetSize_InitialState(ResourceId id, const VkInitialConte
// buffers only have initial states when they're sparse
return ret;
}
- else if(initial.type == eResImage || initial.type == eResDeviceMemory)
+ else if(initial.type == eResImage || initial.type == eResDeviceMemory ||
+ initial.type == eResAccelerationStructureKHR)
{
// the size primarily comes from the buffer, the size of which we conveniently have stored.
return ret + uint64_t(128 + initial.mem.size + WriteSerialiser::GetChunkAlignment());
@@ -1637,6 +1658,10 @@ bool WrappedVulkan::Serialise_InitialState(SerialiserType &ser, ResourceId id, V
}
}
}
+ else if(type == eResAccelerationStructureKHR)
+ {
+ ret = GetAccelerationStructureManager()->Serialise(ser, id, initial, m_State);
+ }
else
{
RDCERR("Unhandled resource type %s", ToStr(type).c_str());
@@ -2312,6 +2337,10 @@ void WrappedVulkan::Apply_InitialState(WrappedVkRes *live, const VkInitialConten
FlushQ();
}
}
+ else if(type == eResAccelerationStructureKHR)
+ {
+ GetAccelerationStructureManager()->Apply(id, initial);
+ }
else
{
RDCERR("Unhandled resource type %d", type);
diff --git a/renderdoc/driver/vulkan/vk_manager.cpp b/renderdoc/driver/vulkan/vk_manager.cpp
index 72c6b9b17..cf0791125 100644
--- a/renderdoc/driver/vulkan/vk_manager.cpp
+++ b/renderdoc/driver/vulkan/vk_manager.cpp
@@ -1038,7 +1038,15 @@ rdcarray VulkanResourceManager::InitialContentResources()
rdcarray resources =
ResourceManager::InitialContentResources();
std::sort(resources.begin(), resources.end(), [this](ResourceId a, ResourceId b) {
- return m_InitialContents[a].data.type < m_InitialContents[b].data.type;
+ const InitialContentData &aData = m_InitialContents[a].data;
+ const InitialContentData &bData = m_InitialContents[b].data;
+
+ // Always sort BLASs before TLASs, as a TLAS holds device addresses for it's BLASs
+ // and we make sure those addresses are valid
+ if(!aData.isTLAS && bData.isTLAS)
+ return true;
+
+ return aData.type < bData.type;
});
return resources;
}
diff --git a/renderdoc/driver/vulkan/vk_manager.h b/renderdoc/driver/vulkan/vk_manager.h
index 9a39fd8a8..441720685 100644
--- a/renderdoc/driver/vulkan/vk_manager.h
+++ b/renderdoc/driver/vulkan/vk_manager.h
@@ -133,6 +133,8 @@ struct VkInitialContents
// sparse bind. Similar to the descriptors above
rdcarray *sparseTables;
SparseBinding *sparseBind;
+
+ bool isTLAS; // If the contents are an AS, this determines if it is a TLAS or BLAS
};
struct VulkanResourceManagerConfiguration
diff --git a/renderdoc/driver/vulkan/vk_memory.cpp b/renderdoc/driver/vulkan/vk_memory.cpp
index be97f64d3..9fac1ddd0 100644
--- a/renderdoc/driver/vulkan/vk_memory.cpp
+++ b/renderdoc/driver/vulkan/vk_memory.cpp
@@ -319,9 +319,16 @@ MemoryAllocation WrappedVulkan::AllocateMemoryForResource(bool buffer, VkMemoryR
break;
}
+ // if ray tracing acceleration structures are in use, then allocate memory with buffer device
+ // address support enabled
+ VkMemoryAllocateFlagsInfo flagsInfo = {
+ VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
+ NULL,
+ VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT,
+ };
VkMemoryAllocateInfo info = {
VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
- NULL,
+ AccelerationStructures() ? &flagsInfo : NULL,
allocSize * 1024 * 1024,
memoryTypeIndex,
};
diff --git a/renderdoc/driver/vulkan/vk_replay.cpp b/renderdoc/driver/vulkan/vk_replay.cpp
index 15c91413b..5795b1ec4 100644
--- a/renderdoc/driver/vulkan/vk_replay.cpp
+++ b/renderdoc/driver/vulkan/vk_replay.cpp
@@ -2121,6 +2121,9 @@ void VulkanReplay::SavePipelineState(uint32_t eventId)
dstel.type = BindType::InputAttachment;
break;
case DescriptorSlotType::InlineBlock: dstel.type = BindType::ConstantBuffer; break;
+ case DescriptorSlotType::AccelerationStructure:
+ dstel.type = BindType::ReadWriteBuffer;
+ break;
case DescriptorSlotType::Unwritten:
case DescriptorSlotType::Count: dstel.type = BindType::Unknown; break;
}
@@ -2272,7 +2275,8 @@ void VulkanReplay::SavePipelineState(uint32_t eventId)
else if(descriptorType == DescriptorSlotType::StorageBuffer ||
descriptorType == DescriptorSlotType::StorageBufferDynamic ||
descriptorType == DescriptorSlotType::UniformBuffer ||
- descriptorType == DescriptorSlotType::UniformBufferDynamic)
+ descriptorType == DescriptorSlotType::UniformBufferDynamic ||
+ descriptorType == DescriptorSlotType::AccelerationStructure)
{
destSlots.binds[a].viewResourceId = ResourceId();
diff --git a/renderdoc/driver/vulkan/vk_serialise.cpp b/renderdoc/driver/vulkan/vk_serialise.cpp
index 96b6c2b6f..a6dbf9642 100644
--- a/renderdoc/driver/vulkan/vk_serialise.cpp
+++ b/renderdoc/driver/vulkan/vk_serialise.cpp
@@ -11866,8 +11866,16 @@ void DoSerialise(SerialiserType &ser, VkAccelerationStructureGeometryKHR &el)
RDCASSERT(ser.IsReading() || el.sType == VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR);
SerialiseNext(ser, el.sType, el.pNext);
+ // el.geometry is a union so we need to determine which member it is supposed to be before
+ // serialising further
SERIALISE_MEMBER(geometryType).Important();
- SERIALISE_MEMBER(geometry);
+ if(el.geometryType == VK_GEOMETRY_TYPE_TRIANGLES_KHR)
+ ser.Serialise("geometry.triangles"_lit, el.geometry.triangles);
+ else if(el.geometryType == VK_GEOMETRY_TYPE_AABBS_KHR)
+ ser.Serialise("geometry.aabbs"_lit, el.geometry.aabbs);
+ else
+ ser.Serialise("geometry.instances"_lit, el.geometry.instances);
+
SERIALISE_MEMBER_VKFLAGS(VkGeometryFlagsKHR, flags);
}
@@ -12067,14 +12075,6 @@ void Deserialise(const VkWriteDescriptorSetAccelerationStructureKHR &el)
delete[] el.pAccelerationStructures;
}
-template
-void DoSerialise(SerialiserType &ser, VkAccelerationStructureGeometryDataKHR &el)
-{
- SERIALISE_MEMBER(triangles);
- SERIALISE_MEMBER(aabbs);
- SERIALISE_MEMBER(instances);
-}
-
template
void DoSerialise(SerialiserType &ser, VkDeviceOrHostAddressConstKHR &el)
{
@@ -12537,7 +12537,6 @@ INSTANTIATE_SERIALISE_TYPE(VkWriteDescriptorSetAccelerationStructureKHR);
// plain structs with no next chain
INSTANTIATE_SERIALISE_TYPE(VkAabbPositionsKHR);
INSTANTIATE_SERIALISE_TYPE(VkAccelerationStructureBuildRangeInfoKHR);
-INSTANTIATE_SERIALISE_TYPE(VkAccelerationStructureGeometryDataKHR);
INSTANTIATE_SERIALISE_TYPE(VkAccelerationStructureInstanceKHR);
INSTANTIATE_SERIALISE_TYPE(VkAllocationCallbacks);
INSTANTIATE_SERIALISE_TYPE(VkAttachmentDescription);
diff --git a/renderdoc/driver/vulkan/wrappers/vk_cmd_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_cmd_funcs.cpp
index ae1733e47..50abf8bff 100644
--- a/renderdoc/driver/vulkan/wrappers/vk_cmd_funcs.cpp
+++ b/renderdoc/driver/vulkan/wrappers/vk_cmd_funcs.cpp
@@ -7633,6 +7633,16 @@ bool WrappedVulkan::Serialise_vkCmdBuildAccelerationStructuresIndirectKHR(
for(uint32_t i = 0; i < infoCount; ++i)
tmpMaxPrimitiveCounts[i] = maxPrimitives[i].data();
+ m_LastCmdBufferID = GetResourceManager()->GetOriginalID(GetResID(commandBuffer));
+
+ if(IsActiveReplaying(m_State))
+ {
+ if(InRerecordRange(m_LastCmdBufferID))
+ commandBuffer = RerecordCmdBuf(m_LastCmdBufferID);
+ else
+ return true;
+ }
+
ObjDisp(commandBuffer)
->CmdBuildAccelerationStructuresIndirectKHR(Unwrap(commandBuffer), infoCount,
unwrappedInfos, pIndirectDeviceAddresses,
@@ -7732,10 +7742,21 @@ bool WrappedVulkan::Serialise_vkCmdBuildAccelerationStructuresKHR(
unwrappedInfos[i] = *UnwrapStructAndChain(m_State, memory, &pInfos[i]);
// Convert the rangeInfos back to a C-style array-of-arrays
- rdcarray tmpBuildRangeInfos(nullptr, infoCount);
+ rdcarray tmpBuildRangeInfos;
+ tmpBuildRangeInfos.resize(infoCount);
for(uint32_t i = 0; i < infoCount; ++i)
tmpBuildRangeInfos[i] = rangeInfos[i].data();
+ m_LastCmdBufferID = GetResourceManager()->GetOriginalID(GetResID(commandBuffer));
+
+ if(IsActiveReplaying(m_State))
+ {
+ if(InRerecordRange(m_LastCmdBufferID))
+ commandBuffer = RerecordCmdBuf(m_LastCmdBufferID);
+ else
+ return true;
+ }
+
ObjDisp(commandBuffer)
->CmdBuildAccelerationStructuresKHR(Unwrap(commandBuffer), infoCount, unwrappedInfos,
tmpBuildRangeInfos.data());
@@ -7792,28 +7813,6 @@ void WrappedVulkan::vkCmdBuildAccelerationStructuresKHR(
}
}
-// CPU-side VK_KHR_acceleration_structure calls are not supported for now
-VkResult WrappedVulkan::vkCopyAccelerationStructureKHR(VkDevice device,
- VkDeferredOperationKHR deferredOperation,
- const VkCopyAccelerationStructureInfoKHR *pInfo)
-{
- return VK_ERROR_UNKNOWN;
-}
-
-VkResult WrappedVulkan::vkCopyAccelerationStructureToMemoryKHR(
- VkDevice device, VkDeferredOperationKHR deferredOperation,
- const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo)
-{
- return VK_ERROR_UNKNOWN;
-}
-
-VkResult WrappedVulkan::vkCopyMemoryToAccelerationStructureKHR(
- VkDevice device, VkDeferredOperationKHR deferredOperation,
- const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
-{
- return VK_ERROR_UNKNOWN;
-}
-
template
bool WrappedVulkan::Serialise_vkCmdCopyAccelerationStructureKHR(
SerialiserType &ser, VkCommandBuffer commandBuffer,
@@ -7963,6 +7962,28 @@ VkResult WrappedVulkan::vkWriteAccelerationStructuresPropertiesKHR(
Unwrap(device), accelerationStructureCount, unwrappedASes, queryType, dataSize, pData, stride);
}
+// CPU-side VK_KHR_acceleration_structure calls are not supported for now
+VkResult WrappedVulkan::vkCopyAccelerationStructureKHR(VkDevice device,
+ VkDeferredOperationKHR deferredOperation,
+ const VkCopyAccelerationStructureInfoKHR *pInfo)
+{
+ return VK_ERROR_UNKNOWN;
+}
+
+VkResult WrappedVulkan::vkCopyAccelerationStructureToMemoryKHR(
+ VkDevice device, VkDeferredOperationKHR deferredOperation,
+ const VkCopyAccelerationStructureToMemoryInfoKHR *pInfo)
+{
+ return VK_ERROR_UNKNOWN;
+}
+
+VkResult WrappedVulkan::vkCopyMemoryToAccelerationStructureKHR(
+ VkDevice device, VkDeferredOperationKHR deferredOperation,
+ const VkCopyMemoryToAccelerationStructureInfoKHR *pInfo)
+{
+ return VK_ERROR_UNKNOWN;
+}
+
INSTANTIATE_FUNCTION_SERIALISED(VkResult, vkCreateCommandPool, VkDevice device,
const VkCommandPoolCreateInfo *pCreateInfo,
const VkAllocationCallbacks *, VkCommandPool *pCommandPool);
diff --git a/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp
index 47887b187..8bb4be7ba 100644
--- a/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp
+++ b/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp
@@ -3272,6 +3272,37 @@ bool WrappedVulkan::Serialise_vkCreateDevice(SerialiserType &ser, VkPhysicalDevi
RDCWARN("meshShaderQueries = false, mesh shader performance counters unavailable");
}
END_PHYS_EXT_CHECK();
+
+ BEGIN_PHYS_EXT_CHECK(VkPhysicalDeviceAccelerationStructureFeaturesKHR,
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR);
+ {
+ CHECK_PHYS_EXT_FEATURE(accelerationStructure)
+ CHECK_PHYS_EXT_FEATURE(accelerationStructureCaptureReplay)
+ CHECK_PHYS_EXT_FEATURE(accelerationStructureIndirectBuild)
+ CHECK_PHYS_EXT_FEATURE(descriptorBindingAccelerationStructureUpdateAfterBind)
+
+ if(ext->accelerationStructure && !avail.accelerationStructureCaptureReplay)
+ {
+ SET_ERROR_RESULT(
+ m_FailedReplayResult, ResultCode::APIHardwareUnsupported,
+ "Capture requires accelerationStructure support, which is available, but "
+ "accelerationStructureCaptureReplay support is not available which is required to "
+ "replay\n"
+ "\n%s",
+ GetPhysDeviceCompatString(false, false).c_str());
+ return false;
+ }
+
+ m_AccelerationStructures = ext->accelerationStructure != VK_FALSE;
+ if(m_AccelerationStructures)
+ {
+ RDCLOG(
+ "Ray tracing acceleration structures requested, allocating all device memory with "
+ "VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT");
+ ext->accelerationStructureCaptureReplay = VK_TRUE;
+ }
+ }
+ END_PHYS_EXT_CHECK();
}
if(availFeatures.depthClamp)
@@ -4317,6 +4348,18 @@ VkResult WrappedVulkan::vkCreateDevice(VkPhysicalDevice physicalDevice,
if(separateDepthStencilFeatures)
m_SeparateDepthStencil |= (separateDepthStencilFeatures->separateDepthStencilLayouts != VK_FALSE);
+ // we need to enable acceleration structure capture/replay. We verified that this is OK before
+ // whitelisting the extension
+
+ VkPhysicalDeviceAccelerationStructureFeaturesKHR *accFeatures =
+ (VkPhysicalDeviceAccelerationStructureFeaturesKHR *)FindNextStruct(
+ &createInfo, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR);
+ if(accFeatures && accFeatures->accelerationStructure)
+ {
+ accFeatures->accelerationStructureCaptureReplay = VK_TRUE;
+ m_AccelerationStructures = true;
+ }
+
VkResult ret;
SERIALISE_TIME_CALL(ret = createFunc(Unwrap(physicalDevice), &createInfo, NULL, pDevice));
diff --git a/renderdoc/driver/vulkan/wrappers/vk_get_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_get_funcs.cpp
index 7a7cf4347..668ea1012 100644
--- a/renderdoc/driver/vulkan/wrappers/vk_get_funcs.cpp
+++ b/renderdoc/driver/vulkan/wrappers/vk_get_funcs.cpp
@@ -852,6 +852,17 @@ void WrappedVulkan::vkGetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice
}
#undef DISABLE_EDS3_FEATURE
+
+ // we don't want to report support for acceleration structure host commands
+ VkPhysicalDeviceAccelerationStructureFeaturesKHR *accStruct =
+ (VkPhysicalDeviceAccelerationStructureFeaturesKHR *)FindNextStruct(
+ pFeatures, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR);
+
+ if(accStruct && accStruct->accelerationStructureHostCommands)
+ {
+ RDCWARN("Disabling support for acceleration structure host commands");
+ accStruct->accelerationStructureHostCommands = VK_FALSE;
+ }
}
void WrappedVulkan::vkGetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
diff --git a/renderdoc/driver/vulkan/wrappers/vk_resource_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_resource_funcs.cpp
index e845d1b0f..6caf2b5f8 100644
--- a/renderdoc/driver/vulkan/wrappers/vk_resource_funcs.cpp
+++ b/renderdoc/driver/vulkan/wrappers/vk_resource_funcs.cpp
@@ -1719,10 +1719,17 @@ VkResult WrappedVulkan::vkCreateBuffer(VkDevice device, const VkBufferCreateInfo
// effectively free as a usage bit for all sensible implementations so we just add it here.
adjusted_info.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
- // If we're using this buffer for device addresses, ensure we force on capture replay bit.
- // We ensured the physical device can support this feature before whitelisting the extension.
- if(IsCaptureMode(m_State) && (adjusted_info.usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT))
- adjusted_info.flags |= VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT;
+ if(IsCaptureMode(m_State))
+ {
+ // If we're using this buffer for AS storage we need to enable BDA
+ if(adjusted_info.usage & VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR)
+ adjusted_info.usage |= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
+
+ // If we're using this buffer for device addresses, ensure we force on capture replay bit.
+ // We ensured the physical device can support this feature before whitelisting the extension.
+ if(adjusted_info.usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT)
+ adjusted_info.flags |= VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT;
+ }
byte *tempMem = GetTempMemory(GetNextPatchSize(adjusted_info.pNext));
@@ -3139,7 +3146,7 @@ bool WrappedVulkan::Serialise_vkCreateAccelerationStructureKHR(
VkAccelerationStructureKHR acc = VK_NULL_HANDLE;
VkResult ret =
- ObjDisp(device)->CreateAccelerationStructureKHR(Unwrap(device), &CreateInfo, NULL, &acc);
+ ObjDisp(device)->CreateAccelerationStructureKHR(Unwrap(device), &unwrappedInfo, NULL, &acc);
if(ret != VK_SUCCESS)
{
@@ -3186,6 +3193,13 @@ VkResult WrappedVulkan::vkCreateAccelerationStructureKHR(
const VkAllocationCallbacks *, VkAccelerationStructureKHR *pAccelerationStructure)
{
VkAccelerationStructureCreateInfoKHR unwrappedInfo = *pCreateInfo;
+
+ // Ensure we force on capture replay bit. We ensured the physical device can support this feature
+ // before whitelisting the extension.
+ if(IsCaptureMode(m_State))
+ unwrappedInfo.createFlags |=
+ VK_ACCELERATION_STRUCTURE_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT_KHR;
+
unwrappedInfo.buffer = Unwrap(unwrappedInfo.buffer);
VkResult ret;
SERIALISE_TIME_CALL(ret = ObjDisp(device)->CreateAccelerationStructureKHR(
@@ -3197,13 +3211,27 @@ VkResult WrappedVulkan::vkCreateAccelerationStructureKHR(
if(IsCaptureMode(m_State))
{
+ // We're capturing, so get the device address of the created AS
+ VkAccelerationStructureCreateInfoKHR serialisedCreateInfo = *pCreateInfo;
+ serialisedCreateInfo.createFlags |=
+ VK_ACCELERATION_STRUCTURE_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT_KHR;
+
+ const VkAccelerationStructureDeviceAddressInfoKHR getInfo = {
+ VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR,
+ NULL,
+ Unwrap(*pAccelerationStructure),
+ };
+ const VkDeviceAddress addr =
+ ObjDisp(device)->GetAccelerationStructureDeviceAddressKHR(Unwrap(device), &getInfo);
+ serialisedCreateInfo.deviceAddress = addr;
+
Chunk *chunk = NULL;
{
CACHE_THREAD_SERIALISER();
SCOPED_SERIALISE_CHUNK(VulkanChunk::vkCreateAccelerationStructureKHR);
- Serialise_vkCreateAccelerationStructureKHR(ser, device, pCreateInfo, NULL,
+ Serialise_vkCreateAccelerationStructureKHR(ser, device, &serialisedCreateInfo, NULL,
pAccelerationStructure);
chunk = scope.Get();
@@ -3223,6 +3251,16 @@ VkResult WrappedVulkan::vkCreateAccelerationStructureKHR(
record->storable = bufferRecord->storable;
record->memOffset = bufferRecord->memOffset + pCreateInfo->offset;
record->memSize = pCreateInfo->size;
+
+ GetResourceManager()->MarkDirtyResource(id);
+ if(pCreateInfo->type == VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR ||
+ pCreateInfo->type == VK_ACCELERATION_STRUCTURE_TYPE_GENERIC_KHR)
+ {
+ // We force reference BLASs as it is not feasible to track at the API level which TLASs
+ // reference them. We force ref generics too as they could bottom or top level so we
+ // conservatively assume they are bottom
+ AddForcedReference(record);
+ }
}
else
{