Force on BDA unconditionally when ASs are enabled

This commit is contained in:
baldurk
2024-12-12 16:29:33 +00:00
parent 348ca307dd
commit 3a91874abb
2 changed files with 48 additions and 2 deletions
@@ -4442,6 +4442,8 @@ VkResult WrappedVulkan::vkCreateDevice(VkPhysicalDevice physicalDevice,
{
accFeatures->accelerationStructureCaptureReplay = VK_TRUE;
m_AccelerationStructures = true;
RDCLOG("Acceleration structures enabled, ALL MEMORY WILL BE MARKED AS BDA");
}
VkPhysicalDeviceRayTracingPipelineFeaturesKHR *rtpFeatures =
@@ -506,6 +506,30 @@ VkResult WrappedVulkan::vkAllocateMemory(VkDevice device, const VkMemoryAllocate
VkMemoryAllocateFlagsInfo *memFlags = (VkMemoryAllocateFlagsInfo *)FindNextStruct(
&unwrapped, VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO);
static VkMemoryAllocateFlagsInfo rtForcedFlags = {VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO};
// massive wart and oversight in RT APIs. ASs are bound to buffers which are then bound to memory.
// Buffers are not required to be BDA, but we need them to be BDA capture/replay'd in order to
// capture/replay the AS itself. However, we have no way of knowing which memory such a buffer
// will be bound against since there's no requirement for the buffer to be marked as BDA. This
// means that when RT is enabled ALL MEMORY IN THE ENTIRE PROGRAM must be marked as BDA just in
// case.
if(IsCaptureMode(m_State) && AccelerationStructures())
{
// force BDA flag when creating, by adding the struct if needed
if(memFlags)
{
memFlags->flags |= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT;
}
else
{
rtForcedFlags.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT |
VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT;
rtForcedFlags.pNext = unwrapped.pNext;
unwrapped.pNext = &rtForcedFlags;
}
}
// since the application must specify VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT itself, we can
// assume the struct is present and just add the capture-replay flag to allow us to specify the
// address on replay. We ensured the physical device can support this feature (and it was enabled)
@@ -665,6 +689,22 @@ VkResult WrappedVulkan::vkAllocateMemory(VkDevice device, const VkMemoryAllocate
memFlags = (VkMemoryAllocateFlagsInfo *)FindNextStruct(
&serialisedInfo, VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO);
// see above for this gross workaround we have to do
if(AccelerationStructures())
{
if(memFlags)
{
memFlags->flags |= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT;
}
else
{
// if we don't have memFlags here we also filled out rtForcedFlags above
memFlags = &rtForcedFlags;
rtForcedFlags.pNext = serialisedInfo.pNext;
serialisedInfo.pNext = &rtForcedFlags;
}
}
if(memFlags && (memFlags->flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT))
{
VkDeviceMemoryOpaqueCaptureAddressInfo getInfo = {
@@ -1818,12 +1858,16 @@ VkResult WrappedVulkan::vkCreateBuffer(VkDevice device, const VkBufferCreateInfo
};
VkResourceRecord *record = GetResourceManager()->AddResourceRecord(*pBuffer);
record->memSize = pCreateInfo->size;
record->memSize = serialisedCreateInfo.size;
// If we're using this buffer for AS storage we need to enable BDA
if(serialisedCreateInfo.usage & VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR)
serialisedCreateInfo.usage |= VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
// if we're using VK_[KHR|EXT]_buffer_device_address, we fetch the device address that's been
// allocated and insert it into the next chain and patch the flags so that it replays
// naturally.
if((pCreateInfo->usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) != 0)
if((serialisedCreateInfo.usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) != 0)
{
VkBufferDeviceAddressInfo getInfo = {
VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,