mirror of
https://github.com/baldurk/renderdoc.git
synced 2026-05-05 17:40:39 +00:00
Apply workaround for presumed windows bug with BDA capture/replay
* This replaces the previous workaround disabling BDA capture/replay on dedicated image memory, as that is not feasible with future functionality and appears not to be the source of the bug seen before. * Although this is believed to be a windows bug, it only manifests on NV so is treated as an NV bug for now. A future NV driver should contain a workaround for this so it can be disabled later.
This commit is contained in:
@@ -1156,6 +1156,12 @@ VkDriverInfo::VkDriverInfo(const VkPhysicalDeviceProperties &physProps,
|
||||
RDCLOG("Enabling NV workaround for static pipeline force-bind to preserve state");
|
||||
nvidiaStaticPipelineRebindStates = true;
|
||||
}
|
||||
|
||||
// this is fixed in a windows version but we can't easily query that, so instead we are waiting
|
||||
// for a driver-based workaround and apply the workaround ourselves in the meantime
|
||||
if(active)
|
||||
RDCLOG("Enabling NV workaround for unaligned BDA memory capture/replay");
|
||||
nvidiaUnalignedBDAIssue = true;
|
||||
}
|
||||
|
||||
if(driverProps.driverID == VK_DRIVER_ID_AMD_PROPRIETARY ||
|
||||
|
||||
@@ -374,6 +374,10 @@ public:
|
||||
// On Mali there are some known issues regarding acceleration structure serialisation to device
|
||||
// memory, for the affected driver versions we switch to the host command variants
|
||||
bool MaliBrokenASDeviceSerialisation() const { return maliBrokenASDeviceSerialisation; }
|
||||
// on NV BDA capture/replay can sometimes fail for memory on certain windows versions. Although
|
||||
// this is believed to be a windows bug currently, it only manifests on NV and a workaround will
|
||||
// be arriving in later NV drivers, so for now we treat this as a driver bug.
|
||||
bool NVUnalignedBDAIssue() const { return nvidiaUnalignedBDAIssue; }
|
||||
private:
|
||||
GPUVendor m_Vendor;
|
||||
|
||||
@@ -390,6 +394,7 @@ private:
|
||||
bool intelBrokenOcclusionQueries = false;
|
||||
bool nvidiaStaticPipelineRebindStates = false;
|
||||
bool maliBrokenASDeviceSerialisation = false;
|
||||
bool nvidiaUnalignedBDAIssue = false;
|
||||
};
|
||||
|
||||
struct DynamicRenderingLocalRead
|
||||
|
||||
@@ -348,6 +348,13 @@ bool WrappedVulkan::Serialise_vkAllocateMemory(SerialiserType &ser, VkDevice dev
|
||||
return false;
|
||||
}
|
||||
|
||||
// apply workaround for presumed windows bug
|
||||
if(GetDriverInfo().NVUnalignedBDAIssue())
|
||||
{
|
||||
// all memory allocations must be 64kB aligned. The rest of the workaround only applies during capture
|
||||
patched.allocationSize = AlignUp(patched.allocationSize, VkDeviceSize(64 * 1024));
|
||||
}
|
||||
|
||||
VkResult ret = ObjDisp(device)->AllocateMemory(Unwrap(device), &patched, NULL, &mem);
|
||||
|
||||
if(ret != VK_SUCCESS)
|
||||
@@ -526,30 +533,22 @@ VkResult WrappedVulkan::vkAllocateMemory(VkDevice device, const VkMemoryAllocate
|
||||
// will be bound against since there's no requirement for the buffer to be marked as BDA. This
|
||||
// means that when RT is enabled ALL MEMORY IN THE ENTIRE PROGRAM must be marked as BDA just in
|
||||
// case.
|
||||
//
|
||||
// we don't force this on for memory allocations that are going to be used for dedicated images
|
||||
bool forceBDA = false;
|
||||
if(IsCaptureMode(m_State) && AccelerationStructures())
|
||||
{
|
||||
const VkMemoryDedicatedAllocateInfo *dedicated =
|
||||
(const VkMemoryDedicatedAllocateInfo *)FindNextStruct(
|
||||
pAllocateInfo, VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO);
|
||||
if(dedicated == NULL || dedicated->image == VK_NULL_HANDLE)
|
||||
{
|
||||
// force BDA flag when creating, by adding the struct if needed
|
||||
forceBDA = true;
|
||||
// force BDA flag when creating, by adding the struct if needed
|
||||
forceBDA = true;
|
||||
|
||||
if(memFlags)
|
||||
{
|
||||
memFlags->flags |= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT;
|
||||
}
|
||||
else
|
||||
{
|
||||
rtForcedFlags.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT |
|
||||
VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT;
|
||||
rtForcedFlags.pNext = unwrapped.pNext;
|
||||
unwrapped.pNext = &rtForcedFlags;
|
||||
}
|
||||
if(memFlags)
|
||||
{
|
||||
memFlags->flags |= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT;
|
||||
}
|
||||
else
|
||||
{
|
||||
rtForcedFlags.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT |
|
||||
VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT;
|
||||
rtForcedFlags.pNext = unwrapped.pNext;
|
||||
unwrapped.pNext = &rtForcedFlags;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -574,6 +573,27 @@ VkResult WrappedVulkan::vkAllocateMemory(VkDevice device, const VkMemoryAllocate
|
||||
}
|
||||
}
|
||||
|
||||
// apply workaround for presumed windows bug
|
||||
if(GetDriverInfo().NVUnalignedBDAIssue())
|
||||
{
|
||||
const VkDeviceSize kb64 = 64 * 1024;
|
||||
const VkDeviceSize mb2 = 2 * 1024 * 1024;
|
||||
|
||||
// all memory allocations must be 64kB aligned. We do this silently, not affecting the serialised size
|
||||
unwrapped.allocationSize = AlignUp(unwrapped.allocationSize, kb64);
|
||||
|
||||
// <2 MB allocations must have an extra 64kB
|
||||
if(unwrapped.allocationSize < mb2)
|
||||
{
|
||||
unwrapped.allocationSize += kb64;
|
||||
}
|
||||
else
|
||||
{
|
||||
// >= 2MB allocations must be aligned to 2MB
|
||||
unwrapped.allocationSize = AlignUp(unwrapped.allocationSize, mb2);
|
||||
}
|
||||
}
|
||||
|
||||
VkResult ret;
|
||||
SERIALISE_TIME_CALL(
|
||||
ret = ObjDisp(device)->AllocateMemory(Unwrap(device), &unwrapped, NULL, pMemory));
|
||||
|
||||
@@ -429,26 +429,6 @@ bool WrappedVulkan::Serialise_vkCreateSwapchainKHR(SerialiserType &ser, VkDevice
|
||||
GetGPULocalMemoryIndex(mrq.memoryTypeBits),
|
||||
};
|
||||
|
||||
VkMemoryDedicatedAllocateInfo dedicated = {
|
||||
VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
|
||||
NULL,
|
||||
Unwrap(im),
|
||||
};
|
||||
|
||||
// if the acceleration structures feature is enabled, we _must_ be on at least vulkan 1.1 (the
|
||||
// extension requires it). Vulkan 1.1 unconditionally allows the use of dedicated image
|
||||
// allocations without any feature bits (the original extension didn't have any either).
|
||||
//
|
||||
// we do this because without it, the extra memory allocation may be promoted to BDA by
|
||||
// self-capturing and cause problems with address space clashes. We don't need the dedicated
|
||||
// allocation but it avoids that behaviour as it may not be legal to query the address of a
|
||||
// dedicated image memory allocation and in any case we know that it can't be legally used for
|
||||
// BDA or AS backing memory
|
||||
if(AccelerationStructures())
|
||||
{
|
||||
allocInfo.pNext = &dedicated;
|
||||
}
|
||||
|
||||
vkr = ObjDisp(device)->AllocateMemory(Unwrap(device), &allocInfo, NULL, &mem);
|
||||
CHECK_VKR(this, vkr);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user