diff --git a/renderdoc/driver/vulkan/vk_common.cpp b/renderdoc/driver/vulkan/vk_common.cpp index e5eb03346..4f611bcc5 100644 --- a/renderdoc/driver/vulkan/vk_common.cpp +++ b/renderdoc/driver/vulkan/vk_common.cpp @@ -1156,6 +1156,12 @@ VkDriverInfo::VkDriverInfo(const VkPhysicalDeviceProperties &physProps, RDCLOG("Enabling NV workaround for static pipeline force-bind to preserve state"); nvidiaStaticPipelineRebindStates = true; } + + // this is fixed in a windows version but we can't easily query that, so instead we are waiting + // for a driver-based workaround and apply the workaround ourselves in the meantime + if(active) + RDCLOG("Enabling NV workaround for unaligned BDA memory capture/replay"); + nvidiaUnalignedBDAIssue = true; } if(driverProps.driverID == VK_DRIVER_ID_AMD_PROPRIETARY || diff --git a/renderdoc/driver/vulkan/vk_common.h b/renderdoc/driver/vulkan/vk_common.h index 8bbfbb14d..b3e17b514 100644 --- a/renderdoc/driver/vulkan/vk_common.h +++ b/renderdoc/driver/vulkan/vk_common.h @@ -374,6 +374,10 @@ public: // On Mali there are some known issues regarding acceleration structure serialisation to device // memory, for the affected driver versions we switch to the host command variants bool MaliBrokenASDeviceSerialisation() const { return maliBrokenASDeviceSerialisation; } + // on NV BDA capture/replay can sometimes fail for memory on certain windows versions. Although + // this is believed to be a windows bug currently, it only manifests on NV and a workaround will + // be arriving in later NV drivers, so for now we treat this as a driver bug. + bool NVUnalignedBDAIssue() const { return nvidiaUnalignedBDAIssue; } private: GPUVendor m_Vendor; @@ -390,6 +394,7 @@ private: bool intelBrokenOcclusionQueries = false; bool nvidiaStaticPipelineRebindStates = false; bool maliBrokenASDeviceSerialisation = false; + bool nvidiaUnalignedBDAIssue = false; }; struct DynamicRenderingLocalRead diff --git a/renderdoc/driver/vulkan/wrappers/vk_resource_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_resource_funcs.cpp index 1f0b0a32e..b0bf6f58b 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_resource_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_resource_funcs.cpp @@ -348,6 +348,13 @@ bool WrappedVulkan::Serialise_vkAllocateMemory(SerialiserType &ser, VkDevice dev return false; } + // apply workaround for presumed windows bug + if(GetDriverInfo().NVUnalignedBDAIssue()) + { + // all memory allocations must be 64kB aligned. The rest of the workaround only applies during capture + patched.allocationSize = AlignUp(patched.allocationSize, VkDeviceSize(64 * 1024)); + } + VkResult ret = ObjDisp(device)->AllocateMemory(Unwrap(device), &patched, NULL, &mem); if(ret != VK_SUCCESS) @@ -526,30 +533,22 @@ VkResult WrappedVulkan::vkAllocateMemory(VkDevice device, const VkMemoryAllocate // will be bound against since there's no requirement for the buffer to be marked as BDA. This // means that when RT is enabled ALL MEMORY IN THE ENTIRE PROGRAM must be marked as BDA just in // case. - // - // we don't force this on for memory allocations that are going to be used for dedicated images bool forceBDA = false; if(IsCaptureMode(m_State) && AccelerationStructures()) { - const VkMemoryDedicatedAllocateInfo *dedicated = - (const VkMemoryDedicatedAllocateInfo *)FindNextStruct( - pAllocateInfo, VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO); - if(dedicated == NULL || dedicated->image == VK_NULL_HANDLE) - { - // force BDA flag when creating, by adding the struct if needed - forceBDA = true; + // force BDA flag when creating, by adding the struct if needed + forceBDA = true; - if(memFlags) - { - memFlags->flags |= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT; - } - else - { - rtForcedFlags.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT | - VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT; - rtForcedFlags.pNext = unwrapped.pNext; - unwrapped.pNext = &rtForcedFlags; - } + if(memFlags) + { + memFlags->flags |= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT; + } + else + { + rtForcedFlags.flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT | + VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT; + rtForcedFlags.pNext = unwrapped.pNext; + unwrapped.pNext = &rtForcedFlags; } } @@ -574,6 +573,27 @@ VkResult WrappedVulkan::vkAllocateMemory(VkDevice device, const VkMemoryAllocate } } + // apply workaround for presumed windows bug + if(GetDriverInfo().NVUnalignedBDAIssue()) + { + const VkDeviceSize kb64 = 64 * 1024; + const VkDeviceSize mb2 = 2 * 1024 * 1024; + + // all memory allocations must be 64kB aligned. We do this silently, not affecting the serialised size + unwrapped.allocationSize = AlignUp(unwrapped.allocationSize, kb64); + + // <2 MB allocations must have an extra 64kB + if(unwrapped.allocationSize < mb2) + { + unwrapped.allocationSize += kb64; + } + else + { + // >= 2MB allocations must be aligned to 2MB + unwrapped.allocationSize = AlignUp(unwrapped.allocationSize, mb2); + } + } + VkResult ret; SERIALISE_TIME_CALL( ret = ObjDisp(device)->AllocateMemory(Unwrap(device), &unwrapped, NULL, pMemory)); diff --git a/renderdoc/driver/vulkan/wrappers/vk_wsi_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_wsi_funcs.cpp index d136120d6..7b86237c7 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_wsi_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_wsi_funcs.cpp @@ -429,26 +429,6 @@ bool WrappedVulkan::Serialise_vkCreateSwapchainKHR(SerialiserType &ser, VkDevice GetGPULocalMemoryIndex(mrq.memoryTypeBits), }; - VkMemoryDedicatedAllocateInfo dedicated = { - VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, - NULL, - Unwrap(im), - }; - - // if the acceleration structures feature is enabled, we _must_ be on at least vulkan 1.1 (the - // extension requires it). Vulkan 1.1 unconditionally allows the use of dedicated image - // allocations without any feature bits (the original extension didn't have any either). - // - // we do this because without it, the extra memory allocation may be promoted to BDA by - // self-capturing and cause problems with address space clashes. We don't need the dedicated - // allocation but it avoids that behaviour as it may not be legal to query the address of a - // dedicated image memory allocation and in any case we know that it can't be legally used for - // BDA or AS backing memory - if(AccelerationStructures()) - { - allocInfo.pNext = &dedicated; - } - vkr = ObjDisp(device)->AllocateMemory(Unwrap(device), &allocInfo, NULL, &mem); CHECK_VKR(this, vkr);