From 64c77e917ee6b57b33758b40049d657ab4f4b772 Mon Sep 17 00:00:00 2001 From: baldurk Date: Tue, 7 Jan 2025 16:54:52 +0000 Subject: [PATCH] Be more consistent and conservative with GPU syncing on D3D12 * In some cases we were only syncing one queue when we needed to flush and sync the whole GPU. Rename functions to be more clear about what is being synced, and only sync one queue/our internal queue when we know that's the only work we need to wait on. --- .../driver/d3d12/d3d12_command_queue_wrap.cpp | 14 +-- renderdoc/driver/d3d12/d3d12_counters.cpp | 2 +- renderdoc/driver/d3d12/d3d12_debug.cpp | 6 +- renderdoc/driver/d3d12/d3d12_device.cpp | 91 ++++++++++--------- renderdoc/driver/d3d12/d3d12_device.h | 22 +++-- .../driver/d3d12/d3d12_msaa_array_conv.cpp | 4 +- renderdoc/driver/d3d12/d3d12_pixelhistory.cpp | 13 +-- renderdoc/driver/d3d12/d3d12_postvs.cpp | 22 ++--- renderdoc/driver/d3d12/d3d12_replay.cpp | 6 +- renderdoc/driver/d3d12/d3d12_shaderdebug.cpp | 6 +- renderdoc/driver/d3d12/d3d12_state.cpp | 2 +- 11 files changed, 97 insertions(+), 91 deletions(-) diff --git a/renderdoc/driver/d3d12/d3d12_command_queue_wrap.cpp b/renderdoc/driver/d3d12/d3d12_command_queue_wrap.cpp index aaadee300..1ab983535 100644 --- a/renderdoc/driver/d3d12/d3d12_command_queue_wrap.cpp +++ b/renderdoc/driver/d3d12/d3d12_command_queue_wrap.cpp @@ -474,7 +474,7 @@ bool WrappedID3D12CommandQueue::Serialise_ExecuteCommandLists(SerialiserType &se ToStr(GetResourceManager()->GetOriginalID(m_PrevQueueId)).c_str(), ToStr(GetResourceManager()->GetOriginalID(GetResID(pQueue))).c_str()); if(m_PrevQueueId != ResourceId()) - m_pDevice->GPUSync(GetResourceManager()->GetCurrentAs(m_PrevQueueId)); + m_pDevice->DeviceWaitForIdle(); m_PrevQueueId = GetResID(pQueue); } @@ -493,7 +493,7 @@ bool WrappedID3D12CommandQueue::Serialise_ExecuteCommandLists(SerialiserType &se ID3D12CommandList *list = Unwrap(ppCommandLists[i]); real->ExecuteCommandLists(1, &list); if(D3D12_Debug_SingleSubmitFlushing() || D3D12_Debug_RT_Auditing()) - m_pDevice->GPUSync(); + m_pDevice->DeviceWaitForIdle(); BakedCmdListInfo &info = m_Cmd.m_BakedCmdListInfo[cmd]; @@ -582,7 +582,7 @@ bool WrappedID3D12CommandQueue::Serialise_ExecuteCommandLists(SerialiserType &se if(!info.executeEvents.empty()) { // ensure all GPU work has finished for readback of arguments - m_pDevice->GPUSync(); + m_pDevice->DeviceWaitForIdle(); if(m_pDevice->HasFatalError()) return false; @@ -778,7 +778,7 @@ bool WrappedID3D12CommandQueue::Serialise_ExecuteCommandLists(SerialiserType &se for(size_t i = 0; i < rerecordedCmds.size(); i++) { real->ExecuteCommandLists(1, &rerecordedCmds[i]); - m_pDevice->GPUSync(); + m_pDevice->DeviceWaitForIdle(); } } else @@ -1092,7 +1092,7 @@ void WrappedID3D12CommandQueue::ExecuteCommandListsInternal(UINT NumCommandLists queueReadback.list->Close(); ID3D12CommandList *listptr = Unwrap(queueReadback.list); queueReadback.unwrappedQueue->ExecuteCommandLists(1, &listptr); - m_pDevice->GPUSync(queueReadback.unwrappedQueue, Unwrap(queueReadback.fence)); + m_pDevice->QueueWaitForIdle(queueReadback.unwrappedQueue, Unwrap(queueReadback.fence)); data = queueReadback.readbackMapped; } @@ -1397,7 +1397,7 @@ bool WrappedID3D12CommandQueue::Serialise_Signal(SerialiserType &ser, ID3D12Fenc if(IsReplayingAndReading() && pFence) { m_pReal->Signal(Unwrap(pFence), Value); - m_pDevice->GPUSync(pQueue); + m_pDevice->DeviceWaitForIdle(); } return true; @@ -1435,7 +1435,7 @@ bool WrappedID3D12CommandQueue::Serialise_Wait(SerialiserType &ser, ID3D12Fence if(IsReplayingAndReading() && pFence) { - m_pDevice->GPUSync(pQueue); + m_pDevice->DeviceWaitForIdle(); } return true; diff --git a/renderdoc/driver/d3d12/d3d12_counters.cpp b/renderdoc/driver/d3d12/d3d12_counters.cpp index daf5274f1..bd7d381ff 100644 --- a/renderdoc/driver/d3d12/d3d12_counters.cpp +++ b/renderdoc/driver/d3d12/d3d12_counters.cpp @@ -706,7 +706,7 @@ rdcarray D3D12Replay::FetchCounters(const rdcarray &c m_pDevice->ExecuteLists(); m_pDevice->FlushLists(); - m_pDevice->GPUSyncAllQueues(); + m_pDevice->DeviceWaitForIdle(); D3D12_RANGE range; range.Begin = 0; diff --git a/renderdoc/driver/d3d12/d3d12_debug.cpp b/renderdoc/driver/d3d12/d3d12_debug.cpp index 939221356..42e6b2e91 100644 --- a/renderdoc/driver/d3d12/d3d12_debug.cpp +++ b/renderdoc/driver/d3d12/d3d12_debug.cpp @@ -2116,7 +2116,7 @@ void D3D12DebugManager::GetBufferData(ID3D12Resource *buffer, uint64_t offset, u if(buffer == NULL) return; - m_pDevice->GPUSyncAllQueues(); + m_pDevice->ReplayWorkWaitForIdle(); D3D12_RESOURCE_DESC desc = buffer->GetDesc(); D3D12_HEAP_PROPERTIES heapProps = {}; @@ -2207,7 +2207,7 @@ void D3D12DebugManager::GetBufferData(ID3D12Resource *buffer, uint64_t offset, u ID3D12CommandList *l = m_DebugList; m_pDevice->GetQueue()->ExecuteCommandLists(1, &l); - m_pDevice->GPUSync(); + m_pDevice->InternalQueueWaitForIdle(); m_DebugAlloc->Reset(); D3D12_RANGE range = {0, (size_t)chunkSize}; @@ -2247,7 +2247,7 @@ void D3D12DebugManager::GetBufferData(ID3D12Resource *buffer, uint64_t offset, u ID3D12CommandList *l = m_DebugList; m_pDevice->GetQueue()->ExecuteCommandLists(1, &l); - m_pDevice->GPUSync(); + m_pDevice->InternalQueueWaitForIdle(); m_DebugAlloc->Reset(); } diff --git a/renderdoc/driver/d3d12/d3d12_device.cpp b/renderdoc/driver/d3d12/d3d12_device.cpp index 85b28a905..0bec315bb 100644 --- a/renderdoc/driver/d3d12/d3d12_device.cpp +++ b/renderdoc/driver/d3d12/d3d12_device.cpp @@ -678,9 +678,9 @@ WrappedID3D12Device::WrappedID3D12Device(ID3D12Device *realDevice, D3D12InitPara m_HeaderChunk = NULL; m_Alloc = m_DataUploadAlloc = NULL; - m_GPUSyncFence = NULL; - m_GPUSyncHandle = NULL; - m_GPUSyncCounter = 0; + m_WFIFence = NULL; + m_WFIHandle = NULL; + m_WFICounter = 0; m_OverlaySyncHandle = NULL; initStateCurBatch = 0; @@ -900,12 +900,9 @@ WrappedID3D12Device::~WrappedID3D12Device() for(size_t i = 0; i < m_InternalCmds.freecmds.size(); i++) SAFE_RELEASE(m_InternalCmds.freecmds[i]); + DeviceWaitForIdle(); for(size_t i = 0; i < m_QueueFences.size(); i++) - { - GPUSync(m_Queues[i], m_QueueFences[i]); - SAFE_RELEASE(m_QueueFences[i]); - } for(auto it = m_UploadBuffers.begin(); it != m_UploadBuffers.end(); ++it) { @@ -2180,7 +2177,7 @@ bool WrappedID3D12Device::Serialise_MapDataWrite(SerialiserType &ser, ID3D12Reso m_CurDataUpload++; if(m_CurDataUpload == ARRAY_COUNT(m_DataUploadList)) { - GPUSync(); + InternalQueueWaitForIdle(); m_CurDataUpload = 0; } } @@ -2331,7 +2328,7 @@ bool WrappedID3D12Device::Serialise_WriteToSubresource(SerialiserType &ser, ID3D m_CurDataUpload++; if(m_CurDataUpload == ARRAY_COUNT(m_DataUploadList)) { - GPUSync(); + InternalQueueWaitForIdle(); m_CurDataUpload = 0; } } @@ -2728,7 +2725,7 @@ void WrappedID3D12Device::StartFrameCapture(DeviceOwnedWindow devWnd) initStateCurBatch = 0; initStateCurList = NULL; - GPUSyncAllQueues(); + DeviceWaitForIdle(); // wait until we've synced all queues to check for these GetResourceManager()->GetRTManager()->TickASManagement(); @@ -2863,7 +2860,7 @@ bool WrappedID3D12Device::EndFrameCapture(DeviceOwnedWindow devWnd) m_State = CaptureState::BackgroundCapturing; - GPUSync(); + DeviceWaitForIdle(); } rdcarray maps = GetMaps(); @@ -3217,7 +3214,7 @@ bool WrappedID3D12Device::DiscardFrameCapture(DeviceOwnedWindow devWnd) m_State = CaptureState::BackgroundCapturing; - GPUSync(); + DeviceWaitForIdle(); queues = m_Queues; } @@ -4468,10 +4465,10 @@ void WrappedID3D12Device::CreateInternalResources() CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, __uuidof(ID3D12CommandAllocator), (void **)&m_Alloc); InternalRef(); - CreateFence(0, D3D12_FENCE_FLAG_NONE, __uuidof(ID3D12Fence), (void **)&m_GPUSyncFence); - m_GPUSyncFence->SetName(L"m_GPUSyncFence"); + CreateFence(0, D3D12_FENCE_FLAG_NONE, __uuidof(ID3D12Fence), (void **)&m_WFIFence); + m_WFIFence->SetName(L"m_WFIFence"); InternalRef(); - m_GPUSyncHandle = ::CreateEvent(NULL, FALSE, FALSE, NULL); + m_WFIHandle = ::CreateEvent(NULL, FALSE, FALSE, NULL); CreateFence(0, D3D12_FENCE_FLAG_NONE, __uuidof(ID3D12Fence), (void **)&m_OverlayFence); m_OverlayFence->SetName(L"m_OverlayFence"); @@ -4493,7 +4490,7 @@ void WrappedID3D12Device::CreateInternalResources() } GetResourceManager()->SetInternalResource(m_Alloc); - GetResourceManager()->SetInternalResource(m_GPUSyncFence); + GetResourceManager()->SetInternalResource(m_WFIFence); CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, __uuidof(ID3D12CommandAllocator), (void **)&m_DataUploadAlloc); @@ -4542,7 +4539,7 @@ void WrappedID3D12Device::CreateInternalResources() RDCERR("Failed to create RTV heap"); } - m_GPUSyncCounter = 0; + m_WFICounter = 0; if(m_TextRenderer == NULL) m_TextRenderer = new D3D12TextRenderer(this); @@ -4555,7 +4552,7 @@ void WrappedID3D12Device::CreateInternalResources() void WrappedID3D12Device::DestroyInternalResources() { - if(m_GPUSyncHandle == NULL) + if(m_WFIHandle == NULL) return; SAFE_RELEASE(m_pAMDExtObject); @@ -4585,9 +4582,9 @@ void WrappedID3D12Device::DestroyInternalResources() } SAFE_RELEASE(m_Alloc); - SAFE_RELEASE(m_GPUSyncFence); + SAFE_RELEASE(m_WFIFence); SAFE_RELEASE(m_OverlayFence); - CloseHandle(m_GPUSyncHandle); + CloseHandle(m_WFIHandle); CloseHandle(m_OverlaySyncHandle); } @@ -4595,57 +4592,61 @@ void WrappedID3D12Device::DataUploadSync() { if(m_CurDataUpload >= 0) { - GPUSync(); + InternalQueueWaitForIdle(); m_CurDataUpload = 0; } } -void WrappedID3D12Device::GPUSync(ID3D12CommandQueue *queue, ID3D12Fence *fence) +void WrappedID3D12Device::InternalQueueWaitForIdle() { - m_GPUSyncCounter++; + QueueWaitForIdle(GetQueue(), m_WFIFence); +} + +void WrappedID3D12Device::QueueWaitForIdle(ID3D12CommandQueue *queue, ID3D12Fence *fence) +{ + m_WFICounter++; if(HasFatalError()) return; - if(queue == NULL) - queue = GetQueue(); - - if(fence == NULL) - fence = m_GPUSyncFence; - - HRESULT hr = queue->Signal(fence, m_GPUSyncCounter); + HRESULT hr = queue->Signal(fence, m_WFICounter); CHECK_HR(this, hr); RDCASSERTEQUAL(hr, S_OK); - fence->SetEventOnCompletion(m_GPUSyncCounter, m_GPUSyncHandle); + fence->SetEventOnCompletion(m_WFICounter, m_WFIHandle); // wait 10s for hardware GPUs, 100s for CPU if(m_Replay && m_Replay->GetDriverInfo().vendor == GPUVendor::Software) - WaitForSingleObject(m_GPUSyncHandle, 100000); + WaitForSingleObject(m_WFIHandle, 100000); else - WaitForSingleObject(m_GPUSyncHandle, 10000); + WaitForSingleObject(m_WFIHandle, 10000); hr = m_pDevice->GetDeviceRemovedReason(); CHECK_HR(this, hr); RDCASSERTEQUAL(hr, S_OK); } -void WrappedID3D12Device::GPUSyncAllQueues() +void WrappedID3D12Device::ReplayWorkWaitForIdle() { - if(m_GPUSynced) + if(m_WaitedForIdleAfterReplay) return; - for(size_t i = 0; i < m_QueueFences.size(); i++) - GPUSync(m_Queues[i], m_QueueFences[i]); + DeviceWaitForIdle(); - m_GPUSynced = true; + m_WaitedForIdleAfterReplay = true; +} + +void WrappedID3D12Device::DeviceWaitForIdle() +{ + for(size_t i = 0; i < m_QueueFences.size(); i++) + QueueWaitForIdle(m_Queues[i], m_QueueFences[i]); } ID3D12GraphicsCommandListX *WrappedID3D12Device::GetNewList() { ID3D12GraphicsCommandListX *ret = NULL; - m_GPUSynced = false; + m_WaitedForIdleAfterReplay = false; if(!m_InternalCmds.freecmds.empty()) { @@ -4783,7 +4784,7 @@ void WrappedID3D12Device::FlushLists(bool forceSync, ID3D12CommandQueue *queue) if(!m_InternalCmds.submittedcmds.empty() || forceSync) { - GPUSync(queue); + QueueWaitForIdle(queue, m_WFIFence); if(!m_InternalCmds.submittedcmds.empty()) m_InternalCmds.freecmds.append(m_InternalCmds.submittedcmds); @@ -5382,22 +5383,22 @@ void WrappedID3D12Device::ReplayLog(uint32_t startEventID, uint32_t endEventID, { bool partial = true; - m_GPUSynced = false; + m_WaitedForIdleAfterReplay = false; if(startEventID == 0 && (replayType == eReplay_WithoutDraw || replayType == eReplay_Full)) { startEventID = 1; partial = false; - m_GPUSyncCounter++; + m_WFICounter++; - GPUSyncAllQueues(); + DeviceWaitForIdle(); // I'm not sure the reason for this, but the debug layer warns about being unable to resubmit // command lists due to the 'previous queue fence' not being ready yet, even if no fences are // signalled or waited. So instead we just signal a dummy fence each new 'frame' for(size_t i = 0; i < m_Queues.size(); i++) - CHECK_HR(this, m_Queues[i]->Signal(m_QueueFences[i], m_GPUSyncCounter)); + CHECK_HR(this, m_Queues[i]->Signal(m_QueueFences[i], m_WFICounter)); FlushLists(true); m_CurDataUpload = 0; @@ -5423,7 +5424,7 @@ void WrappedID3D12Device::ReplayLog(uint32_t startEventID, uint32_t endEventID, ExecuteLists(); FlushLists(true); - GPUSyncAllQueues(); + DeviceWaitForIdle(); // clear any previous ray dispatch references D3D12CommandData &cmd = *m_Queue->GetCommandData(); diff --git a/renderdoc/driver/d3d12/d3d12_device.h b/renderdoc/driver/d3d12/d3d12_device.h index 0bef42478..1811fe047 100644 --- a/renderdoc/driver/d3d12/d3d12_device.h +++ b/renderdoc/driver/d3d12/d3d12_device.h @@ -595,8 +595,8 @@ private: rdcarray m_Queues; rdcarray m_QueueFences; - // if we've called GPUSyncAllQueues since the last replay - bool m_GPUSynced = false; + // if we've called ReplayWorkWaitForIdle since the last replay or internal work + bool m_WaitedForIdleAfterReplay = false; // list of queues and buffers kept alive during capture artificially even if the user destroys // them, so we can use them in the capture. Storing this separately prevents races where a @@ -634,9 +634,9 @@ private: ID3D12GraphicsCommandList *m_DataUploadList[64] = {}; size_t m_CurDataUpload = 0; ID3D12DescriptorHeap *m_RTVHeap = NULL; - ID3D12Fence *m_GPUSyncFence; - HANDLE m_GPUSyncHandle; - UINT64 m_GPUSyncCounter; + ID3D12Fence *m_WFIFence; + HANDLE m_WFIHandle; + UINT64 m_WFICounter; ID3D12Fence *m_OverlayFence = NULL; UINT64 m_CurOverlay = 0; @@ -1073,8 +1073,16 @@ public: void DataUploadSync(); - void GPUSync(ID3D12CommandQueue *queue = NULL, ID3D12Fence *fence = NULL); - void GPUSyncAllQueues(); + // Sync a single queue, by submitting the fence then waiting on it + void QueueWaitForIdle(ID3D12CommandQueue *queue, ID3D12Fence *fence); + // Sync to the internal queue - used to ensure any internal work has finished (e.g. FlushLists() above) + // or generally any internal command buffers submitted to the GetQueue() main internal queue. + void InternalQueueWaitForIdle(); + // Sync all queues - this always flushes the entire GPU + void DeviceWaitForIdle(); + // Sync all queues but only once after each replay or internal work submit. used when fetching data + // or after a replay to ensure work completes on all captured queues before doing any analysis work + void ReplayWorkWaitForIdle(); RDCDriver GetFrameCaptureDriver() { return RDCDriver::D3D12; } void StartFrameCapture(DeviceOwnedWindow devWnd); diff --git a/renderdoc/driver/d3d12/d3d12_msaa_array_conv.cpp b/renderdoc/driver/d3d12/d3d12_msaa_array_conv.cpp index fad3faa46..6a68e4d32 100644 --- a/renderdoc/driver/d3d12/d3d12_msaa_array_conv.cpp +++ b/renderdoc/driver/d3d12/d3d12_msaa_array_conv.cpp @@ -327,7 +327,7 @@ void D3D12DebugManager::CopyTex2DMSToArray(ID3D12GraphicsCommandList *list, ID3D12CommandList *l = list; m_pDevice->GetQueue()->GetReal()->ExecuteCommandLists(1, &l); - m_pDevice->GPUSync(m_pDevice->GetQueue()->GetReal(), Unwrap(m_DebugFence)); + m_pDevice->QueueWaitForIdle(m_pDevice->GetQueue()->GetReal(), Unwrap(m_DebugFence)); m_DebugAlloc->Reset(); } } @@ -600,7 +600,7 @@ void D3D12DebugManager::CopyArrayToTex2DMS(ID3D12Resource *destMS, ID3D12Resourc ID3D12CommandList *l = m_DebugList; m_pDevice->GetQueue()->ExecuteCommandLists(1, &l); - m_pDevice->GPUSync(); + m_pDevice->InternalQueueWaitForIdle(); m_DebugAlloc->Reset(); SAFE_RELEASE(pso); diff --git a/renderdoc/driver/d3d12/d3d12_pixelhistory.cpp b/renderdoc/driver/d3d12/d3d12_pixelhistory.cpp index 8c4fb39b3..881db1c62 100644 --- a/renderdoc/driver/d3d12/d3d12_pixelhistory.cpp +++ b/renderdoc/driver/d3d12/d3d12_pixelhistory.cpp @@ -909,7 +909,6 @@ struct D3D12OcclusionCallback : public D3D12PixelHistoryCallback m_pDevice->ExecuteLists(); m_pDevice->FlushLists(true); - m_pDevice->GPUSyncAllQueues(); D3D12_RANGE range; range.Begin = 0; @@ -1512,7 +1511,6 @@ struct D3D12TestsFailedCallback : public D3D12PixelHistoryCallback m_pDevice->ExecuteLists(); m_pDevice->FlushLists(true); - m_pDevice->GPUSyncAllQueues(); D3D12_RANGE range; range.Begin = 0; @@ -2573,7 +2571,6 @@ struct D3D12PixelHistoryDiscardedFragmentsCallback : D3D12PixelHistoryCallback m_pDevice->ExecuteLists(); m_pDevice->FlushLists(true); - m_pDevice->GPUSyncAllQueues(); D3D12_RANGE range; range.Begin = 0; @@ -2899,7 +2896,7 @@ rdcarray D3D12Replay::PixelHistory(rdcarray event D3D12MarkerRegion occlRegion(m_pDevice->GetQueue()->GetReal(), "D3D12OcclusionCallback"); m_pDevice->ReplayLog(0, events.back().eventId, eReplay_Full); m_pDevice->FlushLists(true); - m_pDevice->GPUSyncAllQueues(); + m_pDevice->DeviceWaitForIdle(); occlCb.FetchOcclusionResults(); SAFE_RELEASE(pOcclusionQueryHeap); } @@ -2954,7 +2951,7 @@ rdcarray D3D12Replay::PixelHistory(rdcarray event "D3D12ColorAndStencilCallback"); m_pDevice->ReplayLog(0, events.back().eventId, eReplay_Full); m_pDevice->FlushLists(true); - m_pDevice->GPUSyncAllQueues(); + m_pDevice->DeviceWaitForIdle(); } // If there are any draw events, do another replay pass, in order to figure @@ -2976,7 +2973,7 @@ rdcarray D3D12Replay::PixelHistory(rdcarray event drawEvents); m_pDevice->ReplayLog(0, events.back().eventId, eReplay_Full); m_pDevice->FlushLists(true); - m_pDevice->GPUSyncAllQueues(); + m_pDevice->DeviceWaitForIdle(); tfCb->FetchOcclusionResults(); SAFE_RELEASE(pTfOcclusionQueryHeap); } @@ -3133,7 +3130,7 @@ rdcarray D3D12Replay::PixelHistory(rdcarray event "D3D12PixelHistoryPerFragmentCallback"); m_pDevice->ReplayLog(0, eventsWithFrags.rbegin()->first, eReplay_Full); m_pDevice->FlushLists(true); - m_pDevice->GPUSyncAllQueues(); + m_pDevice->DeviceWaitForIdle(); } bytebuf fragData; @@ -3187,7 +3184,7 @@ rdcarray D3D12Replay::PixelHistory(rdcarray event m_pDevice->ReplayLog(0, events.back().eventId, eReplay_Full); m_pDevice->FlushLists(true); - m_pDevice->GPUSyncAllQueues(); + m_pDevice->DeviceWaitForIdle(); discardedCb.FetchOcclusionResults(); SAFE_RELEASE(pDiscardedFragsOcclusionQueryHeap); diff --git a/renderdoc/driver/d3d12/d3d12_postvs.cpp b/renderdoc/driver/d3d12/d3d12_postvs.cpp index 2bea39590..0298c425d 100644 --- a/renderdoc/driver/d3d12/d3d12_postvs.cpp +++ b/renderdoc/driver/d3d12/d3d12_postvs.cpp @@ -2366,7 +2366,7 @@ void D3D12Replay::InitPostMSBuffers(uint32_t eventId) ID3D12CommandList *l = list; m_pDevice->GetQueue()->ExecuteCommandLists(1, &l); - m_pDevice->GPUSync(); + m_pDevice->InternalQueueWaitForIdle(); GetDebugManager()->ResetDebugAlloc(); @@ -2414,7 +2414,7 @@ void D3D12Replay::InitPostMSBuffers(uint32_t eventId) list->Close(); m_pDevice->GetQueue()->ExecuteCommandLists(1, &l); - m_pDevice->GPUSync(); + m_pDevice->InternalQueueWaitForIdle(); GetDebugManager()->ResetDebugAlloc(); @@ -2555,7 +2555,7 @@ void D3D12Replay::InitPostMSBuffers(uint32_t eventId) ID3D12CommandList *l = list; m_pDevice->GetQueue()->ExecuteCommandLists(1, &l); - m_pDevice->GPUSync(); + m_pDevice->InternalQueueWaitForIdle(); GetDebugManager()->ResetDebugAlloc(); @@ -3085,7 +3085,7 @@ void D3D12Replay::InitPostVSBuffers(uint32_t eventId) { if(recreate) { - m_pDevice->GPUSync(); + m_pDevice->InternalQueueWaitForIdle(); uint64_t newSize = m_SOBufferSize; if(!CreateSOBuffers()) @@ -3184,7 +3184,7 @@ void D3D12Replay::InitPostVSBuffers(uint32_t eventId) if(recreate) { - m_pDevice->GPUSync(); + m_pDevice->InternalQueueWaitForIdle(); uint64_t newSize = m_SOBufferSize; if(!CreateSOBuffers()) @@ -3314,7 +3314,7 @@ void D3D12Replay::InitPostVSBuffers(uint32_t eventId) ID3D12CommandList *l = list; m_pDevice->GetQueue()->ExecuteCommandLists(1, &l); - m_pDevice->GPUSync(); + m_pDevice->InternalQueueWaitForIdle(); GetDebugManager()->ResetDebugAlloc(); @@ -3589,7 +3589,7 @@ void D3D12Replay::InitPostVSBuffers(uint32_t eventId) ID3D12CommandList *l = list; m_pDevice->GetQueue()->ExecuteCommandLists(1, &l); - m_pDevice->GPUSync(); + m_pDevice->InternalQueueWaitForIdle(); // check that things are OK, and resize up if needed D3D12_RANGE range; @@ -3700,7 +3700,7 @@ void D3D12Replay::InitPostVSBuffers(uint32_t eventId) l = list; m_pDevice->GetQueue()->ExecuteCommandLists(1, &l); - m_pDevice->GPUSync(); + m_pDevice->InternalQueueWaitForIdle(); GetDebugManager()->ResetDebugAlloc(); @@ -3722,7 +3722,7 @@ void D3D12Replay::InitPostVSBuffers(uint32_t eventId) l = list; m_pDevice->GetQueue()->ExecuteCommandLists(1, &l); - m_pDevice->GPUSync(); + m_pDevice->InternalQueueWaitForIdle(); GetDebugManager()->ResetDebugAlloc(); @@ -3774,7 +3774,7 @@ void D3D12Replay::InitPostVSBuffers(uint32_t eventId) ID3D12CommandList *l = list; m_pDevice->GetQueue()->ExecuteCommandLists(1, &l); - m_pDevice->GPUSync(); + m_pDevice->InternalQueueWaitForIdle(); // check that things are OK, and resize up if needed D3D12_RANGE range; @@ -3850,7 +3850,7 @@ void D3D12Replay::InitPostVSBuffers(uint32_t eventId) ID3D12CommandList *l = list; m_pDevice->GetQueue()->ExecuteCommandLists(1, &l); - m_pDevice->GPUSync(); + m_pDevice->InternalQueueWaitForIdle(); GetDebugManager()->ResetDebugAlloc(); diff --git a/renderdoc/driver/d3d12/d3d12_replay.cpp b/renderdoc/driver/d3d12/d3d12_replay.cpp index aa0ee6a3a..e335bfad7 100644 --- a/renderdoc/driver/d3d12/d3d12_replay.cpp +++ b/renderdoc/driver/d3d12/d3d12_replay.cpp @@ -349,7 +349,7 @@ void D3D12Replay::ReplayLog(uint32_t endEventID, ReplayLogType replayType) m_pDevice->ReplayLog(0, endEventID, replayType); if(replayType == eReplay_WithoutDraw) - m_pDevice->GPUSyncAllQueues(); + m_pDevice->ReplayWorkWaitForIdle(); } SDFile *D3D12Replay::GetStructuredFile() @@ -3726,7 +3726,7 @@ void D3D12Replay::RefreshDerivedReplacements() } } - m_pDevice->GPUSync(); + m_pDevice->DeviceWaitForIdle(); for(ID3D12PipelineState *pipe : deletequeue) { @@ -3740,7 +3740,7 @@ void D3D12Replay::GetTextureData(ResourceId tex, const Subresource &sub, bool wasms = false; bool resolve = params.resolve; - m_pDevice->GPUSyncAllQueues(); + m_pDevice->ReplayWorkWaitForIdle(); ID3D12Resource *resource = NULL; diff --git a/renderdoc/driver/d3d12/d3d12_shaderdebug.cpp b/renderdoc/driver/d3d12/d3d12_shaderdebug.cpp index 7d2656598..2153b8d8b 100644 --- a/renderdoc/driver/d3d12/d3d12_shaderdebug.cpp +++ b/renderdoc/driver/d3d12/d3d12_shaderdebug.cpp @@ -137,7 +137,7 @@ bool D3D12ShaderDebug::CalculateMathIntrinsic(bool dxil, WrappedID3D12Device *de { ID3D12CommandList *l = cmdList; device->GetQueue()->ExecuteCommandLists(1, &l); - device->GPUSync(); + device->InternalQueueWaitForIdle(); } D3D12_RANGE range = {0, sizeof(Vec4f) * 6}; @@ -382,7 +382,7 @@ bool D3D12ShaderDebug::CalculateSampleGather( { ID3D12CommandList *l = cmdList; device->GetQueue()->ExecuteCommandLists(1, &l); - device->GPUSync(); + device->InternalQueueWaitForIdle(); } rs = prevState; @@ -2900,7 +2900,7 @@ struct PSInitialData { ID3D12CommandList *l = cmdList; m_pDevice->GetQueue()->ExecuteCommandLists(1, &l); - m_pDevice->GPUSync(); + m_pDevice->InternalQueueWaitForIdle(); } { diff --git a/renderdoc/driver/d3d12/d3d12_state.cpp b/renderdoc/driver/d3d12/d3d12_state.cpp index 4654be3fa..52c814f5b 100644 --- a/renderdoc/driver/d3d12/d3d12_state.cpp +++ b/renderdoc/driver/d3d12/d3d12_state.cpp @@ -67,7 +67,7 @@ void D3D12RenderState::ResolvePendingIndirectState(WrappedID3D12Device *device) if(indirectState.argsBuf == NULL) return; - device->GPUSync(); + device->DeviceWaitForIdle(); D3D12_RANGE range = {0, D3D12CommandData::m_IndirectSize}; byte *mapPtr = NULL;