diff --git a/ReleaseNotes.md b/ReleaseNotes.md index ea0ea8f3b..756310e55 100644 --- a/ReleaseNotes.md +++ b/ReleaseNotes.md @@ -25,7 +25,6 @@ Known Issues ======== * Memory/image barriers are as yet unverified, potentially could lead to bad capture or replay. -* GPU-GPU synchronisation with events might not work correctly. * Sparse images with mips or array slices will not properly replay * Stepping inside vkCmdExecuteCommands * Only 2D non-array non-integer textures can currently be displayed. diff --git a/renderdoc/driver/vulkan/vk_core.cpp b/renderdoc/driver/vulkan/vk_core.cpp index 3c4d0e099..f3ff6d319 100644 --- a/renderdoc/driver/vulkan/vk_core.cpp +++ b/renderdoc/driver/vulkan/vk_core.cpp @@ -1259,11 +1259,17 @@ void WrappedVulkan::ContextReplayLog(LogState readType, uint32_t startEventID, u std::sort(m_Events.begin(), m_Events.end(), SortEID()); m_ParentDrawcall.children.clear(); } + + ObjDisp(GetDev())->DeviceWaitIdle(Unwrap(GetDev())); + + // destroy any events we created for waiting on + for(size_t i=0; i < m_CleanupEvents.size(); i++) + ObjDisp(GetDev())->DestroyEvent(Unwrap(GetDev()), m_CleanupEvents[i]); + + m_CleanupEvents.clear(); if(m_PartialReplayData.resultPartialCmdBuffer != VK_NULL_HANDLE) { - ObjDisp(GetDev())->DeviceWaitIdle(Unwrap(m_PartialReplayData.partialDevice)); - // deliberately call our own function, so this is destroyed as a wrapped object vkDestroyCommandBuffer(m_PartialReplayData.partialDevice, m_PartialReplayData.resultPartialCmdBuffer); m_PartialReplayData.resultPartialCmdBuffer = VK_NULL_HANDLE; diff --git a/renderdoc/driver/vulkan/vk_core.h b/renderdoc/driver/vulkan/vk_core.h index a6666fc45..96867abfd 100644 --- a/renderdoc/driver/vulkan/vk_core.h +++ b/renderdoc/driver/vulkan/vk_core.h @@ -214,7 +214,8 @@ private: // -> flush/waitidle -> freecmds } m_InternalCmds; - vector m_FreeMems; + vector m_CleanupMems; + vector m_CleanupEvents; // return the pre-selected device and queue VkDevice GetDev() { RDCASSERT(m_Device != VK_NULL_HANDLE); return m_Device; } diff --git a/renderdoc/driver/vulkan/vk_initstate.cpp b/renderdoc/driver/vulkan/vk_initstate.cpp index f63e4cacc..6bde15b62 100644 --- a/renderdoc/driver/vulkan/vk_initstate.cpp +++ b/renderdoc/driver/vulkan/vk_initstate.cpp @@ -512,7 +512,7 @@ bool WrappedVulkan::Serialise_SparseInitialState(ResourceId id, WrappedVkBuffer SAFE_DELETE_ARRAY(data); - m_FreeMems.push_back(mem); + m_CleanupMems.push_back(mem); GetResourceManager()->SetInitialContents(id, VulkanResourceManager::InitialContentData(GetWrapped(buf), 0, (byte *)info)); } @@ -723,7 +723,7 @@ bool WrappedVulkan::Serialise_SparseInitialState(ResourceId id, WrappedVkImage * SAFE_DELETE_ARRAY(data); - m_FreeMems.push_back(mem); + m_CleanupMems.push_back(mem); GetResourceManager()->SetInitialContents(id, VulkanResourceManager::InitialContentData(GetWrapped(buf), eInitialContents_Sparse, blob)); } @@ -1575,7 +1575,7 @@ bool WrappedVulkan::Serialise_InitialState(WrappedVkRes *res) ObjDisp(d)->FreeMemory(Unwrap(d), uploadmem); // remember to free this memory on shutdown - m_FreeMems.push_back(mem); + m_CleanupMems.push_back(mem); GetResourceManager()->SetInitialContents(id, VulkanResourceManager::InitialContentData(GetWrapped(im), 0, NULL)); } @@ -1639,7 +1639,7 @@ bool WrappedVulkan::Serialise_InitialState(WrappedVkRes *res) SAFE_DELETE_ARRAY(data); - m_FreeMems.push_back(mem); + m_CleanupMems.push_back(mem); GetResourceManager()->SetInitialContents(id, VulkanResourceManager::InitialContentData(GetWrapped(buf), (uint32_t)dataSize, NULL)); } diff --git a/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp index 16c4f09ca..003ccb8b8 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp @@ -166,12 +166,12 @@ void WrappedVulkan::Shutdown() // no explicit vkDestroyDevice, we destroy the device here then the instance // destroy any replay objects that aren't specifically to do with the frame capture - for(size_t i=0; i < m_FreeMems.size(); i++) + for(size_t i=0; i < m_CleanupMems.size(); i++) { - ObjDisp(m_Device)->FreeMemory(Unwrap(m_Device), Unwrap(m_FreeMems[i])); - GetResourceManager()->ReleaseWrappedResource(m_FreeMems[i]); + ObjDisp(m_Device)->FreeMemory(Unwrap(m_Device), Unwrap(m_CleanupMems[i])); + GetResourceManager()->ReleaseWrappedResource(m_CleanupMems[i]); } - m_FreeMems.clear(); + m_CleanupMems.clear(); // destroy debug manager and any objects it created SAFE_DELETE(m_DebugManager); diff --git a/renderdoc/driver/vulkan/wrappers/vk_sync_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_sync_funcs.cpp index fcddb9cdc..a630dff90 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_sync_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_sync_funcs.cpp @@ -53,12 +53,11 @@ * DeviceWaitIdle. * * On the GPU-side, whenever a command buffer contains a CmdWaitEvents we - * currently just ignore it, assuming any previous work that it was intended - * to wait on has completed. VKTODOMED this might not be the case. Probably we - * will need to record any command buffers that do a CmdWaitEvents and ensure - * there's a sync. E.g. on submit, submit only that command buffer alone and - * then do DeviceWaitIdle, or at least do a GPU stall by inserting an event - * of our own - not the replays - to make the GPU wait. + * create an event, reset it, and call CmdSetEvent right before the + * CmdWaitEvents. This should provide the strictest possible ordering guarantee + * for the CmdWaitEvents (since the event set it was waiting on must have + * happened at or before where we are setting the event, so our event is as or + * more conservative than the original event). * * In future it would be nice to save the state of events at the start of * the frame and restore them, via GetEventStatus/SetEvent/ResetEvent. However @@ -151,8 +150,6 @@ bool WrappedVulkan::Serialise_vkGetFenceStatus( { device = GetResourceManager()->GetLiveHandle(id); - // VKTODOLOW conservatively assume we have to wait for the device to be idle - // this could probably be smarter ObjDisp(device)->DeviceWaitIdle(Unwrap(device)); } @@ -203,9 +200,8 @@ bool WrappedVulkan::Serialise_vkResetFences( if(m_State < WRITING) { - // VKTODOMED does it even make sense to reset fences currently? - // when we wait on them, we wait for device idle (as we can't track - // and save/restore signalled status of fence). + // we don't care about fence states as we cannot record them perfectly and just + // do full waitidle flushes. } return true; @@ -264,8 +260,6 @@ bool WrappedVulkan::Serialise_vkWaitForFences( { device = GetResourceManager()->GetLiveHandle(id); - // VKTODOLOW conservatively assume we have to wait for the device to be idle - // this could probably be smarter ObjDisp(device)->DeviceWaitIdle(Unwrap(device)); } @@ -447,8 +441,6 @@ bool WrappedVulkan::Serialise_vkGetEventStatus( { device = GetResourceManager()->GetLiveHandle(id); - // VKTODOLOW conservatively assume we have to wait for the device to be idle - // this could probably be smarter ObjDisp(device)->DeviceWaitIdle(Unwrap(device)); } @@ -687,22 +679,10 @@ bool WrappedVulkan::Serialise_vkCmdWaitEvents( SERIALISE_ELEMENT(VkPipelineStageFlags, src, srcStageMask); SERIALISE_ELEMENT(VkPipelineStageFlags, dest, destStageMask); - SERIALISE_ELEMENT(uint32_t, evcount, eventCount); - - vector events; - - for(uint32_t i=0; i < evcount; i++) - { - ResourceId id; - if(m_State >= WRITING) - id = GetResID(pEvents[i]); - - localSerialiser->Serialise("pEvents[]", id); - - if(m_State < WRITING) - events.push_back(Unwrap(GetResourceManager()->GetLiveHandle(id))); - } + // we don't serialise the original events as we are going to replace this + // with our own + // we keep the original memory barriers SERIALISE_ELEMENT(uint32_t, memCount, memBarrierCount); vector mems; @@ -749,7 +729,22 @@ bool WrappedVulkan::Serialise_vkCmdWaitEvents( if(IsPartialCmd(cmdid) && InPartialRange()) { cmdBuffer = PartialCmdBuf(); - ObjDisp(cmdBuffer)->CmdWaitEvents(Unwrap(cmdBuffer), evcount, &events[0], src, dest, (uint32_t)mems.size(), (const void **)&mems[0]); + + VkEventCreateInfo evInfo = { + VK_STRUCTURE_TYPE_EVENT_CREATE_INFO, NULL, 0, + }; + + VkEvent ev = VK_NULL_HANDLE; + ObjDisp(cmdBuffer)->CreateEvent(Unwrap(GetDev()), &evInfo, &ev); + // don't wrap this event + + ObjDisp(cmdBuffer)->ResetEvent(Unwrap(GetDev()), ev); + ObjDisp(cmdBuffer)->CmdSetEvent(Unwrap(cmdBuffer), ev, VK_PIPELINE_STAGE_ALL_GRAPHICS); + + ObjDisp(cmdBuffer)->CmdWaitEvents(Unwrap(cmdBuffer), 1, &ev, src, dest, (uint32_t)mems.size(), (const void **)&mems[0]); + + // register to clean this event up once we're done replaying this section of the log + m_CleanupEvents.push_back(ev); ResourceId cmd = GetResID(PartialCmdBuf()); GetResourceManager()->RecordTransitions(m_BakedCmdBufferInfo[cmd].imgtransitions, m_ImageLayouts, (uint32_t)imTrans.size(), &imTrans[0]); @@ -759,7 +754,21 @@ bool WrappedVulkan::Serialise_vkCmdWaitEvents( { cmdBuffer = GetResourceManager()->GetLiveHandle(cmdid); - ObjDisp(cmdBuffer)->CmdWaitEvents(Unwrap(cmdBuffer), evcount, &events[0], src, dest, (uint32_t)mems.size(), (const void **)&mems[0]); + VkEventCreateInfo evInfo = { + VK_STRUCTURE_TYPE_EVENT_CREATE_INFO, NULL, 0, + }; + + VkEvent ev = VK_NULL_HANDLE; + ObjDisp(cmdBuffer)->CreateEvent(Unwrap(GetDev()), &evInfo, &ev); + // don't wrap this event + + ObjDisp(cmdBuffer)->ResetEvent(Unwrap(GetDev()), ev); + ObjDisp(cmdBuffer)->CmdSetEvent(Unwrap(cmdBuffer), ev, VK_PIPELINE_STAGE_ALL_GRAPHICS); + + ObjDisp(cmdBuffer)->CmdWaitEvents(Unwrap(cmdBuffer), 1, &ev, src, dest, (uint32_t)mems.size(), (const void **)&mems[0]); + + // register to clean this event up once we're done replaying this section of the log + m_CleanupEvents.push_back(ev); ResourceId cmd = GetResID(cmdBuffer); GetResourceManager()->RecordTransitions(m_BakedCmdBufferInfo[cmd].imgtransitions, m_ImageLayouts, (uint32_t)imTrans.size(), &imTrans[0]);