Apply workaround for presumed windows bug with BDA capture/replay

* This replaces the previous workaround disabling BDA capture/replay on
  dedicated image memory, as that is not feasible with future functionality and
  appears not to be the source of the bug seen before.
* Although this is believed to be a windows bug, it only manifests on NV so is
  treated as an NV bug for now. A future NV driver should contain a workaround
  for this so it can be disabled later.
This commit is contained in:
baldurk
2025-07-29 15:32:33 +01:00
parent 3be544c51d
commit 259225cacb
4 changed files with 51 additions and 40 deletions
+6
View File
@@ -1156,6 +1156,12 @@ VkDriverInfo::VkDriverInfo(const VkPhysicalDeviceProperties &physProps,
RDCLOG("Enabling NV workaround for static pipeline force-bind to preserve state");
nvidiaStaticPipelineRebindStates = true;
}
// this is fixed in a windows version but we can't easily query that, so instead we are waiting
// for a driver-based workaround and apply the workaround ourselves in the meantime
if(active)
RDCLOG("Enabling NV workaround for unaligned BDA memory capture/replay");
nvidiaUnalignedBDAIssue = true;
}
if(driverProps.driverID == VK_DRIVER_ID_AMD_PROPRIETARY ||
+5
View File
@@ -374,6 +374,10 @@ public:
// On Mali there are some known issues regarding acceleration structure serialisation to device
// memory, for the affected driver versions we switch to the host command variants
bool MaliBrokenASDeviceSerialisation() const { return maliBrokenASDeviceSerialisation; }
// on NV BDA capture/replay can sometimes fail for memory on certain windows versions. Although
// this is believed to be a windows bug currently, it only manifests on NV and a workaround will
// be arriving in later NV drivers, so for now we treat this as a driver bug.
bool NVUnalignedBDAIssue() const { return nvidiaUnalignedBDAIssue; }
private:
GPUVendor m_Vendor;
@@ -390,6 +394,7 @@ private:
bool intelBrokenOcclusionQueries = false;
bool nvidiaStaticPipelineRebindStates = false;
bool maliBrokenASDeviceSerialisation = false;
bool nvidiaUnalignedBDAIssue = false;
};
struct DynamicRenderingLocalRead
@@ -348,6 +348,13 @@ bool WrappedVulkan::Serialise_vkAllocateMemory(SerialiserType &ser, VkDevice dev
return false;
}
// apply workaround for presumed windows bug
if(GetDriverInfo().NVUnalignedBDAIssue())
{
// all memory allocations must be 64kB aligned. The rest of the workaround only applies during capture
patched.allocationSize = AlignUp(patched.allocationSize, VkDeviceSize(64 * 1024));
}
VkResult ret = ObjDisp(device)->AllocateMemory(Unwrap(device), &patched, NULL, &mem);
if(ret != VK_SUCCESS)
@@ -526,30 +533,22 @@ VkResult WrappedVulkan::vkAllocateMemory(VkDevice device, const VkMemoryAllocate
// will be bound against since there's no requirement for the buffer to be marked as BDA. This
// means that when RT is enabled ALL MEMORY IN THE ENTIRE PROGRAM must be marked as BDA just in
// case.
//
// we don't force this on for memory allocations that are going to be used for dedicated images
bool forceBDA = false;
if(IsCaptureMode(m_State) && AccelerationStructures())
{
const VkMemoryDedicatedAllocateInfo *dedicated =
(const VkMemoryDedicatedAllocateInfo *)FindNextStruct(
pAllocateInfo, VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO);
if(dedicated == NULL || dedicated->image == VK_NULL_HANDLE)
{
// force BDA flag when creating, by adding the struct if needed
forceBDA = true;
// force BDA flag when creating, by adding the struct if needed
forceBDA = true;
if(memFlags)
{
memFlags->flags |= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT;
}
else
{
rtForcedFlags.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT |
VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT;
rtForcedFlags.pNext = unwrapped.pNext;
unwrapped.pNext = &rtForcedFlags;
}
if(memFlags)
{
memFlags->flags |= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT;
}
else
{
rtForcedFlags.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT |
VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT;
rtForcedFlags.pNext = unwrapped.pNext;
unwrapped.pNext = &rtForcedFlags;
}
}
@@ -574,6 +573,27 @@ VkResult WrappedVulkan::vkAllocateMemory(VkDevice device, const VkMemoryAllocate
}
}
// apply workaround for presumed windows bug
if(GetDriverInfo().NVUnalignedBDAIssue())
{
const VkDeviceSize kb64 = 64 * 1024;
const VkDeviceSize mb2 = 2 * 1024 * 1024;
// all memory allocations must be 64kB aligned. We do this silently, not affecting the serialised size
unwrapped.allocationSize = AlignUp(unwrapped.allocationSize, kb64);
// <2 MB allocations must have an extra 64kB
if(unwrapped.allocationSize < mb2)
{
unwrapped.allocationSize += kb64;
}
else
{
// >= 2MB allocations must be aligned to 2MB
unwrapped.allocationSize = AlignUp(unwrapped.allocationSize, mb2);
}
}
VkResult ret;
SERIALISE_TIME_CALL(
ret = ObjDisp(device)->AllocateMemory(Unwrap(device), &unwrapped, NULL, pMemory));
@@ -429,26 +429,6 @@ bool WrappedVulkan::Serialise_vkCreateSwapchainKHR(SerialiserType &ser, VkDevice
GetGPULocalMemoryIndex(mrq.memoryTypeBits),
};
VkMemoryDedicatedAllocateInfo dedicated = {
VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
NULL,
Unwrap(im),
};
// if the acceleration structures feature is enabled, we _must_ be on at least vulkan 1.1 (the
// extension requires it). Vulkan 1.1 unconditionally allows the use of dedicated image
// allocations without any feature bits (the original extension didn't have any either).
//
// we do this because without it, the extra memory allocation may be promoted to BDA by
// self-capturing and cause problems with address space clashes. We don't need the dedicated
// allocation but it avoids that behaviour as it may not be legal to query the address of a
// dedicated image memory allocation and in any case we know that it can't be legally used for
// BDA or AS backing memory
if(AccelerationStructures())
{
allocInfo.pNext = &dedicated;
}
vkr = ObjDisp(device)->AllocateMemory(Unwrap(device), &allocInfo, NULL, &mem);
CHECK_VKR(this, vkr);