diff --git a/docs/credits_acknowledgements.rst b/docs/credits_acknowledgements.rst index e88e7f597..ca44acc5b 100644 --- a/docs/credits_acknowledgements.rst +++ b/docs/credits_acknowledgements.rst @@ -64,7 +64,7 @@ The following libraries and components are incorporated into RenderDoc, listed h Used to display message boxes cross-platform from the non-UI core code. -* `AMD GPUPerfAPI `_ - Copyright (c) 2016 Advanced Micro Devices, Inc., distributed under the MIT license. +* `AMD GPUPerfAPI `_ - Copyright (c) 2016-2018 Advanced Micro Devices, Inc., distributed under the MIT license. Provides hardware-specific counters over and above what individual hardware-agnostic graphics APIs are able to provide. @@ -72,7 +72,7 @@ The following libraries and components are incorporated into RenderDoc, listed h Providing higher-resolution icons than the famfamfam Silk set, these icons allow scaling to those using high-DPI displays. -* `AMD Radeon GPU Analyzer `_ - Copyright (c) 2015 Advanced Micro Devices, Inc., distributed under the MIT license. +* `AMD Radeon GPU Analyzer `_ - Copyright (c) 2015-2018 Advanced Micro Devices, Inc., distributed under the MIT license. Provides the ability to disassemble shaders from any API representation into compiled GCN ISA for lower level analysis. diff --git a/renderdoc/driver/d3d11/d3d11_counters.cpp b/renderdoc/driver/d3d11/d3d11_counters.cpp index 26ab4e66c..5b6064931 100644 --- a/renderdoc/driver/d3d11/d3d11_counters.cpp +++ b/renderdoc/driver/d3d11/d3d11_counters.cpp @@ -50,10 +50,8 @@ vector D3D11Replay::EnumerateCounters() if(m_pAMDCounters) { - for(uint32_t i = 0; i < m_pAMDCounters->GetNumCounters(); i++) - { - ret.push_back(MakeAMDCounter(i)); - } + vector amdCounters = m_pAMDCounters->GetPublicCounterIds(); + ret.insert(ret.end(), amdCounters.begin(), amdCounters.end()); } return ret; @@ -65,7 +63,7 @@ CounterDescription D3D11Replay::DescribeCounter(GPUCounter counterID) desc.counter = counterID; /////AMD////// - if(counterID >= GPUCounter::FirstAMD && counterID < GPUCounter::FirstIntel) + if(IsAMDCounter(counterID)) { if(m_pAMDCounters) { @@ -244,7 +242,7 @@ void D3D11Replay::FillTimers(D3D11CounterContext &ctx, const DrawcallDescription m_pDevice->ReplayLog(ctx.eventStart, d.eventId, eReplay_WithoutDraw); - m_pImmediateContext->Flush(); + SerializeImmediateContext(); if(timer->stats) m_pImmediateContext->Begin(timer->stats); @@ -264,6 +262,44 @@ void D3D11Replay::FillTimers(D3D11CounterContext &ctx, const DrawcallDescription } } +void D3D11Replay::SerializeImmediateContext() +{ + ID3D11Query *query = 0; + D3D11_QUERY_DESC desc = {D3D11_QUERY_EVENT}; + + HRESULT hr = m_pDevice->CreateQuery(&desc, &query); + if(FAILED(hr)) + { + return; + } + + BOOL completed = FALSE; + + m_pImmediateContext->End(query); + + m_pImmediateContext->Flush(); + + do + { + hr = m_pImmediateContext->GetData(query, &completed, sizeof(BOOL), 0); + if(hr == S_FALSE) + { + ::Sleep(0); + } + else if(SUCCEEDED(hr) && completed) + { + break; + } + else + { + // error + break; + } + } while(!completed); + + query->Release(); +} + void D3D11Replay::FillTimersAMD(uint32_t &eventStartID, uint32_t &sampleIndex, vector &eventIDs, const DrawcallDescription &drawnode) { @@ -283,7 +319,7 @@ void D3D11Replay::FillTimersAMD(uint32_t &eventStartID, uint32_t &sampleIndex, m_pDevice->ReplayLog(eventStartID, d.eventId, eReplay_WithoutDraw); - m_pImmediateContext->Flush(); + SerializeImmediateContext(); m_pAMDCounters->BeginSample(sampleIndex); @@ -307,7 +343,7 @@ vector D3D11Replay::FetchCountersAMD(const vector &co { // This function is only called internally, and violating this assertion means our // caller has invoked this method incorrectly - RDCASSERT((counters[i] >= (GPUCounter::FirstAMD)) && (counters[i] < (GPUCounter::FirstIntel))); + RDCASSERT(IsAMDCounter(counters[i])); m_pAMDCounters->EnableCounter(counters[i]); } @@ -336,83 +372,7 @@ vector D3D11Replay::FetchCountersAMD(const vector &co m_pAMDCounters->EndSesssion(); - bool isReady = false; - - do - { - isReady = m_pAMDCounters->IsSessionReady(sessionID); - } while(!isReady); - - for(uint32_t s = 0; s < sampleIndex; s++) - { - for(size_t c = 0; c < counters.size(); c++) - { - const CounterDescription desc = m_pAMDCounters->GetCounterDescription(counters[c]); - - switch(desc.resultType) - { - case CompType::UInt: - { - if(desc.resultByteWidth == sizeof(uint32_t)) - { - uint32_t value = m_pAMDCounters->GetSampleUint32(sessionID, s, counters[c]); - - if(desc.unit == CounterUnit::Percentage) - { - value = RDCCLAMP(value, 0U, 100U); - } - - ret.push_back(CounterResult(eventIDs[s], counters[c], value)); - } - else if(desc.resultByteWidth == sizeof(uint64_t)) - { - uint64_t value = m_pAMDCounters->GetSampleUint64(sessionID, s, counters[c]); - - if(desc.unit == CounterUnit::Percentage) - { - value = RDCCLAMP(value, 0ULL, 100ULL); - } - - ret.push_back( - - CounterResult(eventIDs[s], counters[c], value)); - } - else - { - RDCERR("Unexpected byte width %u", desc.resultByteWidth); - } - } - break; - case CompType::Float: - { - float value = m_pAMDCounters->GetSampleFloat32(sessionID, s, counters[c]); - - if(desc.unit == CounterUnit::Percentage) - { - value = RDCCLAMP(value, 0.0f, 100.0f); - } - - ret.push_back(CounterResult(eventIDs[s], counters[c], value)); - } - break; - case CompType::Double: - { - double value = m_pAMDCounters->GetSampleFloat64(sessionID, s, counters[c]); - - if(desc.unit == CounterUnit::Percentage) - { - value = RDCCLAMP(value, 0.0, 100.0); - } - - ret.push_back(CounterResult(eventIDs[s], counters[c], value)); - } - break; - default: RDCASSERT(0); break; - }; - } - } - - return ret; + return m_pAMDCounters->GetCounterData(sessionID, sampleIndex, eventIDs, counters); } vector D3D11Replay::FetchCounters(const vector &counters) @@ -429,15 +389,14 @@ vector D3D11Replay::FetchCounters(const vector &count vector d3dCounters; std::copy_if(counters.begin(), counters.end(), std::back_inserter(d3dCounters), - [](const GPUCounter &c) { return c < GPUCounter::FirstAMD; }); + [](const GPUCounter &c) { return !IsAMDCounter(c); }); if(m_pAMDCounters) { // Filter out the AMD counters vector amdCounters; - std::copy_if( - counters.begin(), counters.end(), std::back_inserter(amdCounters), - [](const GPUCounter &c) { return c >= GPUCounter::FirstAMD && c < GPUCounter::FirstIntel; }); + std::copy_if(counters.begin(), counters.end(), std::back_inserter(amdCounters), + [](const GPUCounter &c) { return IsAMDCounter(c); }); if(!amdCounters.empty()) { diff --git a/renderdoc/driver/d3d11/d3d11_replay.cpp b/renderdoc/driver/d3d11/d3d11_replay.cpp index 9254663de..75fdfac8b 100644 --- a/renderdoc/driver/d3d11/d3d11_replay.cpp +++ b/renderdoc/driver/d3d11/d3d11_replay.cpp @@ -157,7 +157,10 @@ void D3D11Replay::CreateResources() RenderDoc::Inst().SetProgress(LoadProgress::DebugManagerInit, 0.9f); AMDCounters *counters = new AMDCounters(); - if(counters->Init((void *)m_pDevice)) + + ID3D11Device *d3dDevice = m_pDevice->GetReal(); + + if(counters->Init(AMDCounters::ApiType::Dx11, (void *)d3dDevice)) { m_pAMDCounters = counters; } diff --git a/renderdoc/driver/d3d11/d3d11_replay.h b/renderdoc/driver/d3d11/d3d11_replay.h index ee9c336c0..5ec474af1 100644 --- a/renderdoc/driver/d3d11/d3d11_replay.h +++ b/renderdoc/driver/d3d11/d3d11_replay.h @@ -248,6 +248,7 @@ private: void FillTimers(D3D11CounterContext &ctx, const DrawcallDescription &drawnode); void FillTimersAMD(uint32_t &eventStartID, uint32_t &sampleIndex, vector &eventIDs, const DrawcallDescription &drawnode); + void SerializeImmediateContext(); bool RenderTextureInternal(TextureDisplay cfg, bool blendAlpha); diff --git a/renderdoc/driver/d3d12/d3d12_command_list_wrap.cpp b/renderdoc/driver/d3d12/d3d12_command_list_wrap.cpp index 94928480f..10e711f01 100644 --- a/renderdoc/driver/d3d12/d3d12_command_list_wrap.cpp +++ b/renderdoc/driver/d3d12/d3d12_command_list_wrap.cpp @@ -75,6 +75,9 @@ bool WrappedID3D12GraphicsCommandList::Serialise_Close(SerialiserType &ser) for(int i = 0; i < markerCount; i++) D3D12MarkerRegion::End(list); + if(m_Cmd->m_DrawcallCallback) + m_Cmd->m_DrawcallCallback->PreCloseCommandList(list); + list->Close(); if(m_Cmd->m_Partial[D3D12CommandData::Primary].partialParent == CommandList) diff --git a/renderdoc/driver/d3d12/d3d12_commands.h b/renderdoc/driver/d3d12/d3d12_commands.h index 30cc4e34f..c5bc17c91 100644 --- a/renderdoc/driver/d3d12/d3d12_commands.h +++ b/renderdoc/driver/d3d12/d3d12_commands.h @@ -109,6 +109,8 @@ struct D3D12DrawcallCallback virtual bool PostDispatch(uint32_t eid, ID3D12GraphicsCommandList *cmd) = 0; virtual void PostRedispatch(uint32_t eid, ID3D12GraphicsCommandList *cmd) = 0; + // called immediately before a command list is closed + virtual void PreCloseCommandList(ID3D12GraphicsCommandList *cmd) = 0; // if a command list is recorded once and submitted N > 1 times, then the same // drawcall will have several EIDs that refer to it. We'll only do the full // callbacks above for the first EID, then call this function for the others diff --git a/renderdoc/driver/d3d12/d3d12_counters.cpp b/renderdoc/driver/d3d12/d3d12_counters.cpp index 57542159b..6a88292b1 100644 --- a/renderdoc/driver/d3d12/d3d12_counters.cpp +++ b/renderdoc/driver/d3d12/d3d12_counters.cpp @@ -22,6 +22,10 @@ * THE SOFTWARE. ******************************************************************************/ +#include +#include +#include "driver/ihv/amd/amd_counters.h" +#include "d3d12_command_list.h" #include "d3d12_command_queue.h" #include "d3d12_common.h" #include "d3d12_device.h" @@ -44,6 +48,12 @@ vector D3D12Replay::EnumerateCounters() ret.push_back(GPUCounter::PSInvocations); ret.push_back(GPUCounter::CSInvocations); + if(m_pAMDCounters) + { + vector amdCounters = m_pAMDCounters->GetPublicCounterIds(); + ret.insert(ret.end(), amdCounters.begin(), amdCounters.end()); + } + return ret; } @@ -51,6 +61,17 @@ CounterDescription D3D12Replay::DescribeCounter(GPUCounter counterID) { CounterDescription desc; desc.counter = counterID; + /////AMD////// + if(IsAMDCounter(counterID)) + { + if(m_pAMDCounters) + { + desc = m_pAMDCounters->GetCounterDescription(counterID); + + return desc; + } + } + // 0808CC9B-79DF-4549-81F7-85494E648F22 desc.uuid.words[0] = 0x0808CC9B; desc.uuid.words[1] = 0x79DF4549; @@ -167,6 +188,171 @@ CounterDescription D3D12Replay::DescribeCounter(GPUCounter counterID) return desc; } +struct D3D12AMDDrawCallback : public D3D12DrawcallCallback +{ + D3D12AMDDrawCallback(WrappedID3D12Device *dev, D3D12Replay *rp, uint32_t &sampleIndex, + vector &eventIDs) + : m_pDevice(dev), m_pReplay(rp), m_pSampleId(&sampleIndex), m_pEventIds(&eventIDs) + { + m_pDevice->GetQueue()->GetCommandData()->m_DrawcallCallback = this; + } + + virtual ~D3D12AMDDrawCallback() + { + m_pDevice->GetQueue()->GetCommandData()->m_DrawcallCallback = NULL; + } + + void PreDraw(uint32_t eid, ID3D12GraphicsCommandList *cmd) override + { + m_pEventIds->push_back(eid); + + WrappedID3D12GraphicsCommandList *pWrappedCmdList = (WrappedID3D12GraphicsCommandList *)cmd; + + if(m_begunCommandLists.find(pWrappedCmdList->GetReal()) == m_begunCommandLists.end()) + { + m_begunCommandLists.insert(pWrappedCmdList->GetReal()); + + m_pReplay->GetAMDCounters()->BeginSampleList(pWrappedCmdList->GetReal()); + } + + m_pReplay->GetAMDCounters()->BeginSampleInSampleList(*m_pSampleId, pWrappedCmdList->GetReal()); + + ++*m_pSampleId; + } + + bool PostDraw(uint32_t eid, ID3D12GraphicsCommandList *cmd) override + { + WrappedID3D12GraphicsCommandList *pWrappedCmdList = (WrappedID3D12GraphicsCommandList *)cmd; + + m_pReplay->GetAMDCounters()->EndSampleInSampleList(pWrappedCmdList->GetReal()); + return false; + } + + void PreCloseCommandList(ID3D12GraphicsCommandList *cmd) override + { + WrappedID3D12GraphicsCommandList *pWrappedCmdList = (WrappedID3D12GraphicsCommandList *)cmd; + + auto iter = m_begunCommandLists.find(pWrappedCmdList->GetReal()); + + if(iter != m_begunCommandLists.end()) + { + m_pReplay->GetAMDCounters()->EndSampleList(*iter); + m_begunCommandLists.erase(iter); + } + } + + void PostRedraw(uint32_t eid, ID3D12GraphicsCommandList *cmd) override {} + // we don't need to distinguish, call the Draw functions + void PreDispatch(uint32_t eid, ID3D12GraphicsCommandList *cmd) override { PreDraw(eid, cmd); } + bool PostDispatch(uint32_t eid, ID3D12GraphicsCommandList *cmd) override + { + return PostDraw(eid, cmd); + } + void PostRedispatch(uint32_t eid, ID3D12GraphicsCommandList *cmd) override + { + PostRedraw(eid, cmd); + } + + void AliasEvent(uint32_t primary, uint32_t alias) override + { + m_AliasEvents.push_back(std::make_pair(primary, alias)); + } + + uint32_t *m_pSampleId; + WrappedID3D12Device *m_pDevice; + D3D12Replay *m_pReplay; + vector *m_pEventIds; + set m_begunCommandLists; + + // events which are the 'same' from being the same command buffer resubmitted + // multiple times in the frame. We will only get the full callback when we're + // recording the command buffer, and will be given the first EID. After that + // we'll just be told which other EIDs alias this event. + vector > m_AliasEvents; +}; + +void D3D12Replay::FillTimersAMD(uint32_t *eventStartID, uint32_t *sampleIndex, + vector *eventIDs) +{ + uint32_t maxEID = m_pDevice->GetQueue()->GetMaxEID(); + + m_pAMDDrawCallback = new D3D12AMDDrawCallback(m_pDevice, this, *sampleIndex, *eventIDs); + + // replay the events to perform all the queries + m_pDevice->ReplayLog(*eventStartID, maxEID, eReplay_Full); +} + +vector D3D12Replay::FetchCountersAMD(const vector &counters) +{ + m_pAMDCounters->DisableAllCounters(); + + // enable counters it needs + for(size_t i = 0; i < counters.size(); i++) + { + // This function is only called internally, and violating this assertion means our + // caller has invoked this method incorrectly + RDCASSERT(IsAMDCounter(counters[i])); + m_pAMDCounters->EnableCounter(counters[i]); + } + + uint32_t sessionID = m_pAMDCounters->BeginSession(); + + uint32_t passCount = m_pAMDCounters->GetPassCount(); + + uint32_t sampleIndex = 0; + + vector eventIDs; + + for(uint32_t i = 0; i < passCount; i++) + { + m_pAMDCounters->BeginPass(); + + uint32_t eventStartID = 0; + + sampleIndex = 0; + + eventIDs.clear(); + + FillTimersAMD(&eventStartID, &sampleIndex, &eventIDs); + + m_pAMDCounters->EndPass(); + } + + m_pAMDCounters->EndSesssion(); + + vector ret = + m_pAMDCounters->GetCounterData(sessionID, sampleIndex, eventIDs, counters); + + for(size_t i = 0; i < m_pAMDDrawCallback->m_AliasEvents.size(); i++) + { + for(size_t c = 0; c < counters.size(); c++) + { + CounterResult search; + search.counter = counters[c]; + search.eventId = m_pAMDDrawCallback->m_AliasEvents[i].first; + + // find the result we're aliasing + auto it = std::find(ret.begin(), ret.end(), search); + if(it != ret.end()) + { + // duplicate the result and append + CounterResult aliased = *it; + aliased.eventId = m_pAMDDrawCallback->m_AliasEvents[i].second; + ret.push_back(aliased); + } + else + { + RDCERR("Expected to find alias-target result for EID %u counter %u, but didn't", + search.eventId, search.counter); + } + } + } + + SAFE_DELETE(m_pAMDDrawCallback); + + return ret; +} + struct D3D12GPUTimerCallback : public D3D12DrawcallCallback { D3D12GPUTimerCallback(WrappedID3D12Device *dev, D3D12Replay *rp, ID3D12QueryHeap *tqh, @@ -182,7 +368,7 @@ struct D3D12GPUTimerCallback : public D3D12DrawcallCallback m_pDevice->GetQueue()->GetCommandData()->m_DrawcallCallback = this; } ~D3D12GPUTimerCallback() { m_pDevice->GetQueue()->GetCommandData()->m_DrawcallCallback = NULL; } - void PreDraw(uint32_t eid, ID3D12GraphicsCommandList *cmd) + void PreDraw(uint32_t eid, ID3D12GraphicsCommandList *cmd) override { if(cmd->GetType() == D3D12_COMMAND_LIST_TYPE_DIRECT) { @@ -192,7 +378,7 @@ struct D3D12GPUTimerCallback : public D3D12DrawcallCallback cmd->EndQuery(m_TimerQueryHeap, D3D12_QUERY_TYPE_TIMESTAMP, m_NumTimestampQueries * 2 + 0); } - bool PostDraw(uint32_t eid, ID3D12GraphicsCommandList *cmd) + bool PostDraw(uint32_t eid, ID3D12GraphicsCommandList *cmd) override { cmd->EndQuery(m_TimerQueryHeap, D3D12_QUERY_TYPE_TIMESTAMP, m_NumTimestampQueries * 2 + 1); m_NumTimestampQueries++; @@ -209,12 +395,19 @@ struct D3D12GPUTimerCallback : public D3D12DrawcallCallback return false; } - void PostRedraw(uint32_t eid, ID3D12GraphicsCommandList *cmd) {} + void PostRedraw(uint32_t eid, ID3D12GraphicsCommandList *cmd) override {} // we don't need to distinguish, call the Draw functions - void PreDispatch(uint32_t eid, ID3D12GraphicsCommandList *cmd) { PreDraw(eid, cmd); } - bool PostDispatch(uint32_t eid, ID3D12GraphicsCommandList *cmd) { return PostDraw(eid, cmd); } - void PostRedispatch(uint32_t eid, ID3D12GraphicsCommandList *cmd) { PostRedraw(eid, cmd); } - void AliasEvent(uint32_t primary, uint32_t alias) + void PreDispatch(uint32_t eid, ID3D12GraphicsCommandList *cmd) override { PreDraw(eid, cmd); } + bool PostDispatch(uint32_t eid, ID3D12GraphicsCommandList *cmd) override + { + return PostDraw(eid, cmd); + } + void PostRedispatch(uint32_t eid, ID3D12GraphicsCommandList *cmd) override + { + PostRedraw(eid, cmd); + } + void PreCloseCommandList(ID3D12GraphicsCommandList *cmd) override{}; + void AliasEvent(uint32_t primary, uint32_t alias) override { m_AliasEvents.push_back(std::make_pair(primary, alias)); } @@ -241,6 +434,35 @@ vector D3D12Replay::FetchCounters(const vector &count uint32_t maxEID = m_pDevice->GetQueue()->GetMaxEID(); vector ret; + if(counters.empty()) + { + RDCERR("No counters specified to FetchCounters"); + return ret; + } + + SCOPED_TIMER("Fetch Counters, counters to fetch %u", counters.size()); + + vector d3dCounters; + std::copy_if(counters.begin(), counters.end(), std::back_inserter(d3dCounters), + [](const GPUCounter &c) { return !IsAMDCounter(c); }); + + if(m_pAMDCounters) + { + // Filter out the AMD counters + vector amdCounters; + std::copy_if(counters.begin(), counters.end(), std::back_inserter(amdCounters), + [](const GPUCounter &c) { return IsAMDCounter(c); }); + + if(!amdCounters.empty()) + { + ret = FetchCountersAMD(amdCounters); + } + } + + if(d3dCounters.empty()) + { + return ret; + } D3D12_HEAP_PROPERTIES heapProps; heapProps.Type = D3D12_HEAP_TYPE_READBACK; diff --git a/renderdoc/driver/d3d12/d3d12_overlay.cpp b/renderdoc/driver/d3d12/d3d12_overlay.cpp index 150ad13f1..3a4571abb 100644 --- a/renderdoc/driver/d3d12/d3d12_overlay.cpp +++ b/renderdoc/driver/d3d12/d3d12_overlay.cpp @@ -236,6 +236,7 @@ struct D3D12QuadOverdrawCallback : public D3D12DrawcallCallback void PreDispatch(uint32_t eid, ID3D12GraphicsCommandList *cmd) {} bool PostDispatch(uint32_t eid, ID3D12GraphicsCommandList *cmd) { return false; } void PostRedispatch(uint32_t eid, ID3D12GraphicsCommandList *cmd) {} + void PreCloseCommandList(ID3D12GraphicsCommandList *cmd) {} void AliasEvent(uint32_t primary, uint32_t alias) { // don't care diff --git a/renderdoc/driver/d3d12/d3d12_postvs.cpp b/renderdoc/driver/d3d12/d3d12_postvs.cpp index eacb34b3a..e839791d4 100644 --- a/renderdoc/driver/d3d12/d3d12_postvs.cpp +++ b/renderdoc/driver/d3d12/d3d12_postvs.cpp @@ -1327,6 +1327,7 @@ struct D3D12InitPostVSCallback : public D3D12DrawcallCallback void PreDispatch(uint32_t eid, ID3D12GraphicsCommandList *cmd) {} bool PostDispatch(uint32_t eid, ID3D12GraphicsCommandList *cmd) { return false; } void PostRedispatch(uint32_t eid, ID3D12GraphicsCommandList *cmd) {} + void PreCloseCommandList(ID3D12GraphicsCommandList *cmd) {} void AliasEvent(uint32_t primary, uint32_t alias) { if(std::find(m_Events.begin(), m_Events.end(), primary) != m_Events.end()) diff --git a/renderdoc/driver/d3d12/d3d12_replay.cpp b/renderdoc/driver/d3d12/d3d12_replay.cpp index 60a5a5d75..e3139c48b 100644 --- a/renderdoc/driver/d3d12/d3d12_replay.cpp +++ b/renderdoc/driver/d3d12/d3d12_replay.cpp @@ -25,6 +25,7 @@ #include "d3d12_replay.h" #include "driver/dx/official/d3dcompiler.h" #include "driver/dxgi/dxgi_common.h" +#include "driver/ihv/amd/amd_counters.h" #include "maths/camera.h" #include "maths/matrix.h" #include "serialise/rdcfile.h" @@ -114,6 +115,27 @@ void D3D12Replay::CreateResources() m_PixelPick.Init(m_pDevice, m_DebugManager); m_Histogram.Init(m_pDevice, m_DebugManager); } + + if(RenderDoc::Inst().IsReplayApp()) + { + AMDCounters *counters = new AMDCounters(); + + ID3D12Device *d3dDevice = m_pDevice->GetReal(); + + if(counters->Init(AMDCounters::ApiType::Dx12, (void *)d3dDevice)) + { + m_pAMDCounters = counters; + } + else + { + delete counters; + m_pAMDCounters = NULL; + } + } + else + { + m_pAMDCounters = NULL; + } } void D3D12Replay::DestroyResources() @@ -135,6 +157,8 @@ void D3D12Replay::DestroyResources() SAFE_RELEASE(m_CustomShaderTex); SAFE_DELETE(m_DebugManager); + + SAFE_DELETE(m_pAMDCounters); } ReplayStatus D3D12Replay::ReadLogInitialisation(RDCFile *rdc, bool storeStructuredBuffers) diff --git a/renderdoc/driver/d3d12/d3d12_replay.h b/renderdoc/driver/d3d12/d3d12_replay.h index 443cb79c3..fc21c80fb 100644 --- a/renderdoc/driver/d3d12/d3d12_replay.h +++ b/renderdoc/driver/d3d12/d3d12_replay.h @@ -24,12 +24,15 @@ #pragma once +#include #include "api/replay/renderdoc_replay.h" #include "core/core.h" #include "replay/replay_driver.h" #include "d3d12_common.h" #include "d3d12_state.h" +class AMDCounters; +struct D3D12AMDDrawCallback; class WrappedID3D12Device; class D3D12DebugManager; @@ -184,6 +187,7 @@ public: bool IsRenderOutput(ResourceId id); void FileChanged() {} + AMDCounters *GetAMDCounters() { return m_pAMDCounters; } private: void FillRegisterSpaces(const D3D12RenderState::RootSignature &rootSig, rdcarray &spaces, @@ -392,4 +396,12 @@ private: D3D12DebugManager *m_DebugManager = NULL; IDXGIFactory4 *m_pFactory = NULL; + + AMDCounters *m_pAMDCounters = NULL; + + D3D12AMDDrawCallback *m_pAMDDrawCallback = NULL; + + void FillTimersAMD(uint32_t *eventStartID, uint32_t *sampleIndex, vector *eventIDs); + + vector FetchCountersAMD(const vector &counters); }; diff --git a/renderdoc/driver/gl/gl_counters.cpp b/renderdoc/driver/gl/gl_counters.cpp index 94841f872..51beee5cd 100644 --- a/renderdoc/driver/gl/gl_counters.cpp +++ b/renderdoc/driver/gl/gl_counters.cpp @@ -22,6 +22,9 @@ * THE SOFTWARE. ******************************************************************************/ +#include +#include +#include "driver/ihv/amd/amd_counters.h" #include "gl_driver.h" #include "gl_replay.h" #include "gl_resources.h" @@ -32,10 +35,20 @@ void GLReplay::PreContextInitCounters() void GLReplay::PostContextInitCounters() { + AMDCounters *counters = new AMDCounters(); + if(counters->Init(AMDCounters::ApiType::Ogl, m_ReplayCtx.ctx)) + { + m_pAMDCounters = counters; + } + else + { + delete counters; + m_pAMDCounters = NULL; + } } - void GLReplay::PreContextShutdownCounters() { + SAFE_DELETE(m_pAMDCounters); } void GLReplay::PostContextShutdownCounters() @@ -60,6 +73,12 @@ vector GLReplay::EnumerateCounters() ret.push_back(GPUCounter::PSInvocations); ret.push_back(GPUCounter::CSInvocations); + if(m_pAMDCounters) + { + vector amdCounters = m_pAMDCounters->GetPublicCounterIds(); + ret.insert(ret.end(), amdCounters.begin(), amdCounters.end()); + } + return ret; } @@ -68,6 +87,18 @@ CounterDescription GLReplay::DescribeCounter(GPUCounter counterID) CounterDescription desc = {}; desc.counter = counterID; + + /////AMD////// + if(IsAMDCounter(counterID)) + { + if(m_pAMDCounters) + { + desc = m_pAMDCounters->GetCounterDescription(counterID); + + return desc; + } + } + // FFBA5548-FBF8-405D-BA18-F0329DA370A0 desc.uuid.words[0] = 0xFFBA5548; desc.uuid.words[1] = 0xFBF8405D; @@ -267,11 +298,82 @@ void GLReplay::FillTimers(GLCounterContext &ctx, const DrawcallDescription &draw } } -vector GLReplay::FetchCounters(const vector &counters) +void GLReplay::FillTimersAMD(uint32_t *eventStartID, uint32_t *sampleIndex, + vector *eventIDs, const DrawcallDescription &drawnode) +{ + if(drawnode.children.empty()) + return; + + for(size_t i = 0; i < drawnode.children.size(); i++) + { + const DrawcallDescription &d = drawnode.children[i]; + + FillTimersAMD(eventStartID, sampleIndex, eventIDs, drawnode.children[i]); + + if(d.events.empty()) + continue; + + eventIDs->push_back(d.eventId); + + m_pDriver->ReplayLog(*eventStartID, d.eventId, eReplay_WithoutDraw); + + m_pAMDCounters->BeginSample(*sampleIndex); + + m_pDriver->ReplayLog(*eventStartID, d.eventId, eReplay_OnlyDraw); + + m_pAMDCounters->EndSample(); + + *eventStartID = d.eventId + 1; + ++*sampleIndex; + } +} + +vector GLReplay::FetchCountersAMD(const vector &counters) +{ + m_pAMDCounters->DisableAllCounters(); + + // enable counters it needs + for(size_t i = 0; i < counters.size(); i++) + { + // This function is only called internally, and violating this assertion means our + // caller has invoked this method incorrectly + RDCASSERT(IsAMDCounter(counters[i])); + m_pAMDCounters->EnableCounter(counters[i]); + } + + uint32_t sessionID = m_pAMDCounters->BeginSession(); + + uint32_t passCount = m_pAMDCounters->GetPassCount(); + + uint32_t sampleIndex = 0; + + vector eventIDs; + + for(uint32_t p = 0; p < passCount; p++) + { + m_pAMDCounters->BeginPass(); + + uint32_t eventStartID = 0; + + sampleIndex = 0; + + eventIDs.clear(); + + FillTimersAMD(&eventStartID, &sampleIndex, &eventIDs, m_pDriver->GetRootDraw()); + + m_pAMDCounters->EndPass(); + } + + m_pAMDCounters->EndSesssion(); + + return m_pAMDCounters->GetCounterData(sessionID, sampleIndex, eventIDs, counters); +} + +vector GLReplay::FetchCounters(const vector &allCounters) { vector ret; - if(counters.empty()) + if(allCounters.empty()) { RDCERR("No counters specified to FetchCounters"); return ret; @@ -279,6 +381,28 @@ vector GLReplay::FetchCounters(const vector &counters MakeCurrentReplayContext(&m_ReplayCtx); + vector counters; + std::copy_if(allCounters.begin(), allCounters.end(), std::back_inserter(counters), + [](const GPUCounter &c) { return !IsAMDCounter(c); }); + + if(m_pAMDCounters) + { + // Filter out the AMD counters + vector amdCounters; + std::copy_if(allCounters.begin(), allCounters.end(), std::back_inserter(amdCounters), + [](const GPUCounter &c) { return IsAMDCounter(c); }); + + if(!amdCounters.empty()) + { + ret = FetchCountersAMD(amdCounters); + } + } + + if(counters.empty()) + { + return ret; + } + GLCounterContext ctx; ctx.eventStart = 0; diff --git a/renderdoc/driver/gl/gl_replay.h b/renderdoc/driver/gl/gl_replay.h index 406b82610..2b1d26a13 100644 --- a/renderdoc/driver/gl/gl_replay.h +++ b/renderdoc/driver/gl/gl_replay.h @@ -30,9 +30,10 @@ #include "replay/replay_driver.h" #include "gl_common.h" -using std::pair; using std::map; +using std::pair; +class AMDCounters; class WrappedOpenGL; struct GLCounterContext; @@ -426,6 +427,14 @@ private: D3D11Pipe::State m_D3D11State; D3D12Pipe::State m_D3D12State; VKPipe::State m_VKState; + + // AMD counter instance + AMDCounters *m_pAMDCounters = NULL; + + void FillTimersAMD(uint32_t *eventStartID, uint32_t *sampleIndex, vector *eventIDs, + const DrawcallDescription &drawnode); + + vector FetchCountersAMD(const vector &counters); }; const GLHookSet &GetRealGLFunctions(); diff --git a/renderdoc/driver/ihv/amd/CMakeLists.txt b/renderdoc/driver/ihv/amd/CMakeLists.txt index aa6ee9ef2..ff8b16b97 100644 --- a/renderdoc/driver/ihv/amd/CMakeLists.txt +++ b/renderdoc/driver/ihv/amd/CMakeLists.txt @@ -1,4 +1,6 @@ set(sources + amd_counters.cpp + amd_counters.h amd_isa.cpp amd_isa.h amd_isa_devices.cpp diff --git a/renderdoc/driver/ihv/amd/amd_counters.cpp b/renderdoc/driver/ihv/amd/amd_counters.cpp index f35c26612..ca9e7e77b 100644 --- a/renderdoc/driver/ihv/amd/amd_counters.cpp +++ b/renderdoc/driver/ihv/amd/amd_counters.cpp @@ -23,29 +23,84 @@ ******************************************************************************/ #include "amd_counters.h" -#include #include "common/common.h" +#include "common/timing.h" #include "core/plugins.h" #include "official/GPUPerfAPI/Include/GPUPerfAPI.h" #include "official/GPUPerfAPI/Include/GPUPerfAPIFunctionTypes.h" - #include "strings/string_utils.h" +inline bool AMD_FAILED(GPA_Status status) +{ + return status != GPA_STATUS_OK; +} + +inline bool AMD_SUCCEEDED(GPA_Status status) +{ + return status == GPA_STATUS_OK; +} + +static void GPA_LoggingCallback(GPA_Logging_Type messageType, const char *pMessage) +{ + if(messageType == GPA_LOGGING_ERROR) + { + RDCWARN(pMessage); + } + else + { + RDCLOG(pMessage); + } +} + AMDCounters::AMDCounters() : m_pGPUPerfAPI(NULL) { } -bool AMDCounters::Init(void *pContext) +std::string AMDCounters::FormatErrMessage(const char *operation, uint32_t status) { - const char *dllName = "GPUPerfAPIDX11-x64.dll"; + std::string err = + StringFormat::Fmt("%s. %s", operation, m_pGPUPerfAPI->GPA_GetStatusAsStr((GPA_Status)status)); + return err; +} + +bool AMDCounters::Init(ApiType apiType, void *pContext) +{ +#if DISABLED(RDOC_WIN32) && DISABLED(RDOC_LINUX) + return false; +#else + + std::string dllName("GPUPerfAPI"); + + switch(apiType) + { + case ApiType::Dx11: dllName += "DX11"; break; + case ApiType::Dx12: dllName += "DX12"; break; + case ApiType::Ogl: dllName += "GL"; break; + case ApiType::Vk: dllName += "VK"; break; + default: + RDCWARN( + "AMD GPU performance counters could not be initialized successfully. " + "Unsupported API type specified"); + return false; + } + +#if ENABLED(RDOC_WIN32) +#if ENABLED(RDOC_X64) + dllName += "-x64"; +#endif + dllName += ".dll"; +#else + dllName = "lib" + dllName; + dllName += ".so"; +#endif // first try in the plugin location it will be in distributed builds - HMODULE module = LoadLibraryA(LocatePluginFile("amd/counters", dllName).c_str()); + std::string dllPath = LocatePluginFile("amd/counters", dllName.c_str()); - // if that failed then try checking for it just in the default search path + void *module = Process::LoadModule(dllPath.c_str()); if(module == NULL) { - module = LoadLibraryA(dllName); + module = Process::LoadModule(dllName.c_str()); } if(module == NULL) @@ -53,39 +108,52 @@ bool AMDCounters::Init(void *pContext) RDCWARN( "AMD GPU performance counters could not be initialized successfully. " "Are you missing the DLLs?"); - return false; } - m_pGPUPerfAPI = new GPAApi(); GPA_GetFuncTablePtrType getFuncTable = - (GPA_GetFuncTablePtrType)GetProcAddress(module, "GPA_GetFuncTable"); + (GPA_GetFuncTablePtrType)Process::GetFunctionAddress(module, "GPA_GetFuncTable"); + m_pGPUPerfAPI = new GPAApi(); if(getFuncTable) { getFuncTable((void **)&m_pGPUPerfAPI); } else { - RDCWARN( - "GPA version is out of date, doesn't expose GPA_GetFuncTable. Make sure you have 3.0 or " - "above."); + delete m_pGPUPerfAPI; + GPA_LoggingCallback(GPA_LOGGING_ERROR, + "Failed to get GPA function table. Invalid dynamic library?"); + return false; + } + + GPA_Logging_Type loggingType = GPA_LOGGING_ERROR; +#if ENABLED(RDOC_DEVEL) + loggingType = GPA_LOGGING_ERROR_AND_MESSAGE; +#endif + GPA_Status status = m_pGPUPerfAPI->GPA_RegisterLoggingCallback(loggingType, GPA_LoggingCallback); + if(AMD_FAILED(status)) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, + FormatErrMessage("Failed to initialize logging", status).c_str()); + return false; + } + + status = m_pGPUPerfAPI->GPA_Initialize(); + if(AMD_FAILED(status)) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, FormatErrMessage("Initialization failed", status).c_str()); delete m_pGPUPerfAPI; m_pGPUPerfAPI = NULL; return false; } - if(m_pGPUPerfAPI->GPA_Initialize() != GPA_STATUS_OK) - { - delete m_pGPUPerfAPI; - m_pGPUPerfAPI = NULL; - return false; - } - - if(m_pGPUPerfAPI->GPA_OpenContext(pContext, GPA_OPENCONTEXT_HIDE_PUBLIC_COUNTERS_BIT | - GPA_OPENCONTEXT_CLOCK_MODE_PEAK_BIT) != - GPA_STATUS_OK) + status = m_pGPUPerfAPI->GPA_OpenContext( + pContext, GPA_OPENCONTEXT_HIDE_SOFTWARE_COUNTERS_BIT | GPA_OPENCONTEXT_CLOCK_MODE_PEAK_BIT); + if(AMD_FAILED(status)) { + GPA_LoggingCallback(GPA_LOGGING_ERROR, + FormatErrMessage("Open context for counters failed", status).c_str()); m_pGPUPerfAPI->GPA_Destroy(); delete m_pGPUPerfAPI; m_pGPUPerfAPI = NULL; @@ -95,66 +163,127 @@ bool AMDCounters::Init(void *pContext) m_Counters = EnumerateCounters(); return true; +#endif } AMDCounters::~AMDCounters() { if(m_pGPUPerfAPI) { - GPA_Status status = GPA_STATUS_OK; + GPA_Status status = m_pGPUPerfAPI->GPA_CloseContext(); + if(AMD_FAILED(status)) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, + FormatErrMessage("Close context failed", status).c_str()); + } - status = m_pGPUPerfAPI->GPA_CloseContext(); status = m_pGPUPerfAPI->GPA_Destroy(); + if(AMD_FAILED(status)) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, FormatErrMessage("Destroy failed", status).c_str()); + } delete m_pGPUPerfAPI; } } -vector AMDCounters::EnumerateCounters() +std::map AMDCounters::EnumerateCounters() { + std::map counters; + gpa_uint32 num; - m_pGPUPerfAPI->GPA_GetNumCounters(&num); - - vector counters; + GPA_Status status = m_pGPUPerfAPI->GPA_GetNumCounters(&num); + if(AMD_FAILED(status)) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, + FormatErrMessage("Get number of counters", status).c_str()); + return counters; + } for(uint32_t i = 0; i < num; ++i) { - InternalCounterDescription internalDesc; - internalDesc.desc = InternalGetCounterDescription(i); + GPA_Usage_Type usageType; - internalDesc.internalIndex = i; - internalDesc.desc.counter = MakeAMDCounter(i); - counters.push_back(internalDesc); + status = m_pGPUPerfAPI->GPA_GetCounterUsageType(i, &usageType); + if(AMD_FAILED(status)) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, + FormatErrMessage("Get counter usage type.", status).c_str()); + return counters; + } + + // Ignore percentage counters due to aggregate roll-up support + if(usageType == GPA_USAGE_TYPE_PERCENTAGE) + { + continue; + } + + CounterDescription desc = InternalGetCounterDescription(i); + + desc.counter = MakeAMDCounter(i); + counters[i] = desc; + + m_PublicToInternalCounter[desc.counter] = i; } return counters; } -uint32_t AMDCounters::GetNumCounters() +std::vector AMDCounters::GetPublicCounterIds() const { - return (uint32_t)m_Counters.size(); + std::vector ret; + + for(const std::pair &entry : m_PublicToInternalCounter) + ret.push_back(entry.first); + + return ret; } CounterDescription AMDCounters::GetCounterDescription(GPUCounter counter) { - return m_Counters[GPUCounterToCounterIndex(counter)].desc; + return m_Counters[m_PublicToInternalCounter[counter]]; } CounterDescription AMDCounters::InternalGetCounterDescription(uint32_t internalIndex) { CounterDescription desc = {}; const char *tmp = NULL; - m_pGPUPerfAPI->GPA_GetCounterName(internalIndex, &tmp); + GPA_Status status = m_pGPUPerfAPI->GPA_GetCounterName(internalIndex, &tmp); + if(AMD_FAILED(status)) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, FormatErrMessage("Get counter name.", status).c_str()); + return desc; + } + desc.name = tmp; - m_pGPUPerfAPI->GPA_GetCounterDescription(internalIndex, &tmp); + status = m_pGPUPerfAPI->GPA_GetCounterDescription(internalIndex, &tmp); + if(AMD_FAILED(status)) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, + FormatErrMessage("Get counter description.", status).c_str()); + return desc; + } + desc.description = tmp; - m_pGPUPerfAPI->GPA_GetCounterCategory(internalIndex, &tmp); + status = m_pGPUPerfAPI->GPA_GetCounterCategory(internalIndex, &tmp); + if(AMD_FAILED(status)) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, FormatErrMessage("Get counter category.", status).c_str()); + return desc; + } + desc.category = tmp; GPA_Usage_Type usageType; - m_pGPUPerfAPI->GPA_GetCounterUsageType(internalIndex, &usageType); + status = m_pGPUPerfAPI->GPA_GetCounterUsageType(internalIndex, &usageType); + if(AMD_FAILED(status)) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, + FormatErrMessage("Get counter usage type.", status).c_str()); + return desc; + } switch(usageType) { @@ -184,7 +313,13 @@ CounterDescription AMDCounters::InternalGetCounterDescription(uint32_t internalI GPA_Type type; - m_pGPUPerfAPI->GPA_GetCounterDataType(internalIndex, &type); + status = m_pGPUPerfAPI->GPA_GetCounterDataType(internalIndex, &type); + if(AMD_FAILED(status)) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, + FormatErrMessage("Get counter data type.", status).c_str()); + return desc; + } // results should either be float32/64 or uint32/64 as the GetSample functions only support those switch(type) @@ -227,41 +362,164 @@ CounterDescription AMDCounters::InternalGetCounterDescription(uint32_t internalI void AMDCounters::EnableCounter(GPUCounter counter) { - const uint32_t internalIndex = m_Counters[GPUCounterToCounterIndex(counter)].internalIndex; + const uint32_t internalIndex = m_PublicToInternalCounter[counter]; - m_pGPUPerfAPI->GPA_EnableCounter(internalIndex); + GPA_Status status = m_pGPUPerfAPI->GPA_EnableCounter(internalIndex); + if(AMD_FAILED(status)) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, FormatErrMessage("Enable counter.", status).c_str()); + } } void AMDCounters::EnableAllCounters() { - m_pGPUPerfAPI->GPA_EnableAllCounters(); + GPA_Status status = m_pGPUPerfAPI->GPA_EnableAllCounters(); + if(AMD_FAILED(status)) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, FormatErrMessage("Enable all counters.", status).c_str()); + } } void AMDCounters::DisableAllCounters() { - m_pGPUPerfAPI->GPA_DisableAllCounters(); + GPA_Status status = m_pGPUPerfAPI->GPA_DisableAllCounters(); + if(AMD_FAILED(status)) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, FormatErrMessage("Disable all counters.", status).c_str()); + } } uint32_t AMDCounters::GetPassCount() { - gpa_uint32 numRequiredPasses; - m_pGPUPerfAPI->GPA_GetPassCount(&numRequiredPasses); + gpa_uint32 numRequiredPasses = 0; + GPA_Status status = m_pGPUPerfAPI->GPA_GetPassCount(&numRequiredPasses); + if(AMD_FAILED(status)) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, FormatErrMessage("Get pass count.", status).c_str()); + } return (uint32_t)numRequiredPasses; } uint32_t AMDCounters::BeginSession() { - gpa_uint32 sessionID; + gpa_uint32 sessionID = 0; - m_pGPUPerfAPI->GPA_BeginSession(&sessionID); + GPA_Status status = m_pGPUPerfAPI->GPA_BeginSession(&sessionID); + if(AMD_FAILED(status)) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, FormatErrMessage("Begin session.", status).c_str()); + } return (uint32_t)sessionID; } void AMDCounters::EndSesssion() { - m_pGPUPerfAPI->GPA_EndSession(); + GPA_Status status = m_pGPUPerfAPI->GPA_EndSession(); + if(AMD_FAILED(status)) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, FormatErrMessage("End session.", status).c_str()); + } +} + +std::vector AMDCounters::GetCounterData(uint32_t sessionID, uint32_t maxSampleIndex, + const std::vector &eventIDs, + const std::vector &counters) +{ + std::vector ret; + + bool isReady = false; + + const uint32_t timeoutPeriod = 10000; // ms + + PerformanceTimer timeout; + + do + { + isReady = IsSessionReady(sessionID); + if(!isReady) + { + Threading::Sleep(0); + + PerformanceTimer endTime; + + if(timeout.GetMilliseconds() > timeoutPeriod) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, "GetCounterData failed due to elapsed timeout."); + return ret; + } + } + } while(!isReady); + + for(uint32_t s = 0; s < maxSampleIndex; s++) + { + for(size_t c = 0; c < counters.size(); c++) + { + const CounterDescription desc = GetCounterDescription(counters[c]); + + switch(desc.resultType) + { + case CompType::UInt: + { + if(desc.resultByteWidth == sizeof(uint32_t)) + { + uint32_t value = GetSampleUint32(sessionID, s, counters[c]); + + if(desc.unit == CounterUnit::Percentage) + { + value = RDCCLAMP(value, 0U, 100U); + } + + ret.push_back(CounterResult(eventIDs[s], counters[c], value)); + } + else if(desc.resultByteWidth == sizeof(uint64_t)) + { + uint64_t value = GetSampleUint64(sessionID, s, counters[c]); + + if(desc.unit == CounterUnit::Percentage) + { + value = RDCCLAMP(value, (uint64_t)0, (uint64_t)100); + } + + ret.push_back(CounterResult(eventIDs[s], counters[c], value)); + } + else + { + RDCERR("Unexpected byte width %u", desc.resultByteWidth); + } + } + break; + case CompType::Float: + { + float value = GetSampleFloat32(sessionID, s, counters[c]); + + if(desc.unit == CounterUnit::Percentage) + { + value = RDCCLAMP(value, 0.0f, 100.0f); + } + + ret.push_back(CounterResult(eventIDs[s], counters[c], value)); + } + break; + case CompType::Double: + { + double value = GetSampleFloat64(sessionID, s, counters[c]); + + if(desc.unit == CounterUnit::Percentage) + { + value = RDCCLAMP(value, 0.0, 100.0); + } + + ret.push_back(CounterResult(eventIDs[s], counters[c], value)); + } + break; + default: RDCASSERT(0); break; + }; + } + } + + return ret; } bool AMDCounters::IsSessionReady(uint32_t sessionIndex) @@ -269,98 +527,212 @@ bool AMDCounters::IsSessionReady(uint32_t sessionIndex) gpa_uint8 readyResult = 0; GPA_Status status = m_pGPUPerfAPI->GPA_IsSessionReady(&readyResult, sessionIndex); + if(AMD_FAILED(status)) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, FormatErrMessage("Is session ready", status).c_str()); + } return readyResult && status == GPA_STATUS_OK; } void AMDCounters::BeginPass() { - m_pGPUPerfAPI->GPA_BeginPass(); + GPA_Status status = m_pGPUPerfAPI->GPA_BeginPass(); + if(AMD_FAILED(status)) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, FormatErrMessage("Begin pass.", status).c_str()); + } } void AMDCounters::EndPass() { - m_pGPUPerfAPI->GPA_EndPass(); + GPA_Status status = m_pGPUPerfAPI->GPA_EndPass(); + if(AMD_FAILED(status)) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, FormatErrMessage("End pass.", status).c_str()); + } } void AMDCounters::BeginSample(uint32_t index) { - m_pGPUPerfAPI->GPA_BeginSample(index); + GPA_Status status = m_pGPUPerfAPI->GPA_BeginSample(index); + if(AMD_FAILED(status)) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, FormatErrMessage("Begin sample.", status).c_str()); + } } void AMDCounters::EndSample() { - m_pGPUPerfAPI->GPA_EndSample(); + GPA_Status status = m_pGPUPerfAPI->GPA_EndSample(); + if(AMD_FAILED(status)) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, FormatErrMessage("End sample.", status).c_str()); + } +} + +void AMDCounters::BeginSampleList(void *pSampleList) +{ + GPA_Status status = m_pGPUPerfAPI->GPA_BeginSampleList(pSampleList); + if(AMD_FAILED(status)) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, FormatErrMessage("BeginSampleList.", status).c_str()); + } +} + +void AMDCounters::EndSampleList(void *pSampleList) +{ + GPA_Status status = m_pGPUPerfAPI->GPA_EndSampleList(pSampleList); + if(AMD_FAILED(status)) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, FormatErrMessage("EndSampleList.", status).c_str()); + } +} + +void AMDCounters::BeginSampleInSampleList(uint32_t sampleID, void *pSampleList) +{ + GPA_Status status = m_pGPUPerfAPI->GPA_BeginSampleInSampleList(sampleID, pSampleList); + if(AMD_FAILED(status)) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, + FormatErrMessage("BeginSampleInSampleList.", status).c_str()); + } +} + +void AMDCounters::EndSampleInSampleList(void *pSampleList) +{ + GPA_Status status = m_pGPUPerfAPI->GPA_EndSampleInSampleList(pSampleList); + if(AMD_FAILED(status)) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, + FormatErrMessage("EndSampleInSampleList.", status).c_str()); + } } uint32_t AMDCounters::GetSampleUint32(uint32_t session, uint32_t sample, GPUCounter counter) { - const uint32_t internalIndex = m_Counters[GPUCounterToCounterIndex(counter)].internalIndex; - uint32_t value; + const uint32_t internalIndex = m_PublicToInternalCounter[counter]; + uint32_t value = 0; - m_pGPUPerfAPI->GPA_GetSampleUInt32(session, sample, internalIndex, &value); + GPA_Status status = m_pGPUPerfAPI->GPA_GetSampleUInt32(session, sample, internalIndex, &value); + if(AMD_FAILED(status)) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, FormatErrMessage("Get sample uint32.", status).c_str()); + return value; + } // normalise units as expected GPA_Usage_Type usageType; - m_pGPUPerfAPI->GPA_GetCounterUsageType(internalIndex, &usageType); + status = m_pGPUPerfAPI->GPA_GetCounterUsageType(internalIndex, &usageType); + if(AMD_FAILED(status)) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, + FormatErrMessage("Get counter usage type.", status).c_str()); + return value; + } if(usageType == GPA_USAGE_TYPE_KILOBYTES) + { value *= 1000; + } return value; } uint64_t AMDCounters::GetSampleUint64(uint32_t session, uint32_t sample, GPUCounter counter) { - const uint32_t internalIndex = m_Counters[GPUCounterToCounterIndex(counter)].internalIndex; - gpa_uint64 value; + const uint32_t internalIndex = m_PublicToInternalCounter[counter]; + gpa_uint64 value = 0; - m_pGPUPerfAPI->GPA_GetSampleUInt64(session, sample, internalIndex, &value); + GPA_Status status = m_pGPUPerfAPI->GPA_GetSampleUInt64(session, sample, internalIndex, &value); + if(AMD_FAILED(status)) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, FormatErrMessage("Get sample uint64.", status).c_str()); + return value; + } // normalise units as expected GPA_Usage_Type usageType; - m_pGPUPerfAPI->GPA_GetCounterUsageType(internalIndex, &usageType); + status = m_pGPUPerfAPI->GPA_GetCounterUsageType(internalIndex, &usageType); + if(AMD_FAILED(status)) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, + FormatErrMessage("Get counter usage type.", status).c_str()); + return value; + } if(usageType == GPA_USAGE_TYPE_KILOBYTES) + { value *= 1000; + } return value; } float AMDCounters::GetSampleFloat32(uint32_t session, uint32_t sample, GPUCounter counter) { - const uint32_t internalIndex = m_Counters[GPUCounterToCounterIndex(counter)].internalIndex; - float value; + const uint32_t internalIndex = m_PublicToInternalCounter[counter]; + float value = 0; - m_pGPUPerfAPI->GPA_GetSampleFloat32(session, sample, internalIndex, &value); + GPA_Status status = m_pGPUPerfAPI->GPA_GetSampleFloat32(session, sample, internalIndex, &value); + if(AMD_FAILED(status)) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, FormatErrMessage("Get sample float32.", status).c_str()); + return value; + } // normalise units as expected GPA_Usage_Type usageType; - m_pGPUPerfAPI->GPA_GetCounterUsageType(internalIndex, &usageType); + status = m_pGPUPerfAPI->GPA_GetCounterUsageType(internalIndex, &usageType); + if(AMD_FAILED(status)) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, + FormatErrMessage("Get counter usage type.", status).c_str()); + return value; + } if(usageType == GPA_USAGE_TYPE_KILOBYTES) + { value *= 1000.0f; + } else if(usageType == GPA_USAGE_TYPE_MILLISECONDS) + { value /= 1000.0f; + } return value; } double AMDCounters::GetSampleFloat64(uint32_t session, uint32_t sample, GPUCounter counter) { - const uint32_t internalIndex = m_Counters[GPUCounterToCounterIndex(counter)].internalIndex; + const uint32_t internalIndex = m_PublicToInternalCounter[counter]; double value; - m_pGPUPerfAPI->GPA_GetSampleFloat64(session, sample, internalIndex, &value); + GPA_Status status = m_pGPUPerfAPI->GPA_GetSampleFloat64(session, sample, internalIndex, &value); + if(AMD_FAILED(status)) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, FormatErrMessage("Get sample float64.", status).c_str()); + return value; + } // normalise units as expected GPA_Usage_Type usageType; - m_pGPUPerfAPI->GPA_GetCounterUsageType(internalIndex, &usageType); + status = m_pGPUPerfAPI->GPA_GetCounterUsageType(internalIndex, &usageType); + if(AMD_FAILED(status)) + { + GPA_LoggingCallback(GPA_LOGGING_ERROR, + FormatErrMessage("Get counter usage type.", status).c_str()); + return value; + } if(usageType == GPA_USAGE_TYPE_KILOBYTES) + { value *= 1000.0; + } else if(usageType == GPA_USAGE_TYPE_MILLISECONDS) + { value /= 1000.0; + } return value; } \ No newline at end of file diff --git a/renderdoc/driver/ihv/amd/amd_counters.h b/renderdoc/driver/ihv/amd/amd_counters.h index 820036d7c..5cc569b35 100644 --- a/renderdoc/driver/ihv/amd/amd_counters.h +++ b/renderdoc/driver/ihv/amd/amd_counters.h @@ -24,8 +24,10 @@ #pragma once +#include #include #include "api/replay/renderdoc_replay.h" +#include "driver/vulkan/official/vulkan.h" struct _GPAApi; @@ -33,15 +35,22 @@ inline constexpr GPUCounter MakeAMDCounter(int index) { return GPUCounter((int)GPUCounter::FirstAMD + index); } - class AMDCounters { public: + enum class ApiType : uint32_t + { + Dx11 = 0, + Dx12 = 1, + Ogl = 2, + Vk = 3 + }; + AMDCounters(); - bool Init(void *pContext); + bool Init(ApiType apiType, void *pContext); ~AMDCounters(); - uint32_t GetNumCounters(); + std::vector GetPublicCounterIds() const; CounterDescription GetCounterDescription(GPUCounter index); @@ -54,12 +63,30 @@ public: uint32_t BeginSession(); void EndSesssion(); + // DX11 and OGL entry points void BeginPass(); void EndPass(); void BeginSample(uint32_t index); void EndSample(); + // DX12 and VK entry points + void BeginSampleList(void *pSampleList); + + void EndSampleList(void *pSampleList); + + void BeginSampleInSampleList(uint32_t sampleID, void *pSampleList); + + void EndSampleInSampleList(void *pSampleList); + + // Session data retrieval + std::vector GetCounterData(uint32_t sessionID, uint32_t maxSampleIndex, + const std::vector &eventIDs, + const std::vector &counters); + +private: + _GPAApi *m_pGPUPerfAPI; + std::string FormatErrMessage(const char *operation, uint32_t status); bool IsSessionReady(uint32_t sessionIndex); uint32_t GetSampleUint32(uint32_t session, uint32_t sample, GPUCounter counter); @@ -70,22 +97,10 @@ public: double GetSampleFloat64(uint32_t session, uint32_t sample, GPUCounter counter); -private: - _GPAApi *m_pGPUPerfAPI; - - static uint32_t GPUCounterToCounterIndex(GPUCounter counter) - { - return (uint32_t)(counter) - (uint32_t)(GPUCounter::FirstAMD); - } - CounterDescription InternalGetCounterDescription(uint32_t index); - struct InternalCounterDescription - { - CounterDescription desc; - uint32_t internalIndex; - }; + std::map EnumerateCounters(); + std::map m_Counters; - std::vector EnumerateCounters(); - std::vector m_Counters; + std::map m_PublicToInternalCounter; }; \ No newline at end of file diff --git a/renderdoc/driver/ihv/amd/official/GPUPerfAPI/Include/GPUPerfAPI-VK.h b/renderdoc/driver/ihv/amd/official/GPUPerfAPI/Include/GPUPerfAPI-VK.h index 013cec5e6..2c74df8a3 100644 --- a/renderdoc/driver/ihv/amd/official/GPUPerfAPI/Include/GPUPerfAPI-VK.h +++ b/renderdoc/driver/ihv/amd/official/GPUPerfAPI/Include/GPUPerfAPI-VK.h @@ -11,8 +11,6 @@ #ifndef _GPUPERFAPI_VK_H_ #define _GPUPERFAPI_VK_H_ -#include - /// The struct that should be supplied to GPA_OpenContext(). /// The instance, physicalDevice, and device should be set prior to /// calling OpenContext() to reflect the Vulkan objects on which profiling diff --git a/renderdoc/driver/ihv/amd/official/GPUPerfAPI/Include/GPUPerfAPITypes.h b/renderdoc/driver/ihv/amd/official/GPUPerfAPI/Include/GPUPerfAPITypes.h index 91d674a7e..7194f5434 100644 --- a/renderdoc/driver/ihv/amd/official/GPUPerfAPI/Include/GPUPerfAPITypes.h +++ b/renderdoc/driver/ihv/amd/official/GPUPerfAPI/Include/GPUPerfAPITypes.h @@ -24,9 +24,8 @@ typedef unsigned short gpa_uint16; typedef unsigned int gpa_uint32; typedef unsigned __int64 gpa_uint64; -#endif // _WIN32 -#ifdef __linux__ +#elif defined(__linux__) #ifndef GPALIB_DECL #ifdef __cplusplus @@ -57,7 +56,18 @@ #define strcat_s(dst, ndst, src) strcat(dst, src) #define strtok_s(a, b, c) strtok(a, b) -#endif // __linux__ +#else + typedef int8_t gpa_int8; + typedef int16_t gpa_int16; + typedef int32_t gpa_int32; + typedef int64_t gpa_int64; + typedef float gpa_float32; + typedef double gpa_float64; + typedef uint8_t gpa_uint8; + typedef uint16_t gpa_uint16; + typedef uint32_t gpa_uint32; + typedef uint64_t gpa_uint64; +#endif // Limit definitions /// macro for max int8 diff --git a/renderdoc/driver/vulkan/vk_core.h b/renderdoc/driver/vulkan/vk_core.h index 88ff06b90..3d9402a9c 100644 --- a/renderdoc/driver/vulkan/vk_core.h +++ b/renderdoc/driver/vulkan/vk_core.h @@ -34,8 +34,8 @@ #include "vk_replay.h" #include "vk_state.h" -using std::vector; using std::list; +using std::vector; class VulkanShaderCache; class VulkanTextRenderer; @@ -171,6 +171,9 @@ struct VulkanDrawcallCallback virtual bool PostMisc(uint32_t eid, DrawFlags flags, VkCommandBuffer cmd) = 0; virtual void PostRemisc(uint32_t eid, DrawFlags flags, VkCommandBuffer cmd) = 0; + // called immediately before the command buffer is ended + virtual void PreEndCommandBuffer(VkCommandBuffer cmd) = 0; + // if a command buffer is recorded once and submitted N > 1 times, then the same // drawcall will have several EIDs that refer to it. We'll only do the full // callbacks above for the first EID, then call this function for the others diff --git a/renderdoc/driver/vulkan/vk_counters.cpp b/renderdoc/driver/vulkan/vk_counters.cpp index fc0af9a01..fc78e2ac6 100644 --- a/renderdoc/driver/vulkan/vk_counters.cpp +++ b/renderdoc/driver/vulkan/vk_counters.cpp @@ -22,10 +22,15 @@ * THE SOFTWARE. ******************************************************************************/ +#include +#include #include "vk_core.h" #include "vk_replay.h" #include "vk_resources.h" +#include "driver/ihv/amd/amd_counters.h" +#include "driver/ihv/amd/official/GPUPerfAPI/Include/GPUPerfAPI-VK.h" + vector VulkanReplay::EnumerateCounters() { vector ret; @@ -55,6 +60,12 @@ vector VulkanReplay::EnumerateCounters() ret.push_back(GPUCounter::CSInvocations); } + if(m_pAMDCounters) + { + vector amdCounters = m_pAMDCounters->GetPublicCounterIds(); + ret.insert(ret.end(), amdCounters.begin(), amdCounters.end()); + } + return ret; } @@ -62,6 +73,18 @@ CounterDescription VulkanReplay::DescribeCounter(GPUCounter counterID) { CounterDescription desc = {}; desc.counter = counterID; + + /////AMD////// + if(IsAMDCounter(counterID)) + { + if(m_pAMDCounters) + { + desc = m_pAMDCounters->GetCounterDescription(counterID); + + return desc; + } + } + // 6839CB5B-FBD2-4550-B606-8C65157C684C desc.uuid.words[0] = 0x6839CB5B; desc.uuid.words[1] = 0xFBD24550; @@ -176,6 +199,172 @@ CounterDescription VulkanReplay::DescribeCounter(GPUCounter counterID) return desc; } +struct VulkanAMDDrawCallback : public VulkanDrawcallCallback +{ + VulkanAMDDrawCallback(WrappedVulkan *dev, VulkanReplay *rp, uint32_t &sampleIndex, + vector &eventIDs) + : m_pDriver(dev), m_pReplay(rp), m_pSampleId(&sampleIndex), m_pEventIds(&eventIDs) + { + m_pDriver->SetDrawcallCB(this); + } + + virtual ~VulkanAMDDrawCallback() { m_pDriver->SetDrawcallCB(NULL); } + void PreDraw(uint32_t eid, VkCommandBuffer cmd) override + { + m_pEventIds->push_back(eid); + + VkCommandBuffer realCmdBuffer = Unwrap(cmd); + + if(m_begunCommandBuffers.find(realCmdBuffer) == m_begunCommandBuffers.end()) + { + m_begunCommandBuffers.insert(realCmdBuffer); + + m_pReplay->GetAMDCounters()->BeginSampleList(realCmdBuffer); + } + + m_pReplay->GetAMDCounters()->BeginSampleInSampleList(*m_pSampleId, realCmdBuffer); + + ++*m_pSampleId; + } + + bool PostDraw(uint32_t eid, VkCommandBuffer cmd) override + { + VkCommandBuffer realCmdBuffer = Unwrap(cmd); + + m_pReplay->GetAMDCounters()->EndSampleInSampleList(realCmdBuffer); + return false; + } + + void PreEndCommandBuffer(VkCommandBuffer cmd) override + { + VkCommandBuffer realCmdBuffer = Unwrap(cmd); + + auto iter = m_begunCommandBuffers.find(realCmdBuffer); + + if(iter != m_begunCommandBuffers.end()) + { + m_pReplay->GetAMDCounters()->EndSampleList(*iter); + m_begunCommandBuffers.erase(iter); + } + } + + void PostRedraw(uint32_t eid, VkCommandBuffer cmd) override {} + // we don't need to distinguish, call the Draw functions + void PreDispatch(uint32_t eid, VkCommandBuffer cmd) override { PreDraw(eid, cmd); } + bool PostDispatch(uint32_t eid, VkCommandBuffer cmd) override { return PostDraw(eid, cmd); } + void PostRedispatch(uint32_t eid, VkCommandBuffer cmd) override { PostRedraw(eid, cmd); } + void PreMisc(uint32_t eid, DrawFlags flags, VkCommandBuffer cmd) override { PreDraw(eid, cmd); } + bool PostMisc(uint32_t eid, DrawFlags flags, VkCommandBuffer cmd) override + { + return PostDraw(eid, cmd); + } + void PostRemisc(uint32_t eid, DrawFlags flags, VkCommandBuffer cmd) override + { + PostRedraw(eid, cmd); + } + + void AliasEvent(uint32_t primary, uint32_t alias) override + { + m_AliasEvents.push_back(std::make_pair(primary, alias)); + } + + uint32_t *m_pSampleId; + WrappedVulkan *m_pDriver; + VulkanReplay *m_pReplay; + vector *m_pEventIds; + set m_begunCommandBuffers; + // events which are the 'same' from being the same command buffer resubmitted + // multiple times in the frame. We will only get the full callback when we're + // recording the command buffer, and will be given the first EID. After that + // we'll just be told which other EIDs alias this event. + vector > m_AliasEvents; +}; + +void VulkanReplay::FillTimersAMD(uint32_t *eventStartID, uint32_t *sampleIndex, + vector *eventIDs) +{ + uint32_t maxEID = m_pDriver->GetMaxEID(); + + m_pAMDDrawCallback = new VulkanAMDDrawCallback(m_pDriver, this, *sampleIndex, *eventIDs); + + // replay the events to perform all the queries + m_pDriver->ReplayLog(*eventStartID, maxEID, eReplay_Full); +} + +vector VulkanReplay::FetchCountersAMD(const vector &counters) +{ + m_pAMDCounters->DisableAllCounters(); + + // enable counters it needs + for(size_t i = 0; i < counters.size(); i++) + { + // This function is only called internally, and violating this assertion means our + // caller has invoked this method incorrectly + RDCASSERT(IsAMDCounter(counters[i])); + m_pAMDCounters->EnableCounter(counters[i]); + } + + uint32_t sessionID = m_pAMDCounters->BeginSession(); + + uint32_t passCount = m_pAMDCounters->GetPassCount(); + + uint32_t sampleIndex = 0; + + vector eventIDs; + + for(uint32_t i = 0; i < passCount; i++) + { + m_pAMDCounters->BeginPass(); + + uint32_t eventStartID = 0; + + sampleIndex = 0; + + eventIDs.clear(); + + FillTimersAMD(&eventStartID, &sampleIndex, &eventIDs); + + m_pAMDCounters->EndPass(); + } + + m_pAMDCounters->EndSesssion(); + + std::vector ret = + m_pAMDCounters->GetCounterData(sessionID, sampleIndex, eventIDs, counters); + + for(size_t i = 0; i < m_pAMDDrawCallback->m_AliasEvents.size(); i++) + { + for(size_t c = 0; c < counters.size(); c++) + { + CounterResult search; + search.counter = counters[c]; + search.eventId = m_pAMDDrawCallback->m_AliasEvents[i].first; + + // find the result we're aliasing + auto it = std::find(ret.begin(), ret.end(), search); + if(it != ret.end()) + { + // duplicate the result and append + CounterResult aliased = *it; + aliased.eventId = m_pAMDDrawCallback->m_AliasEvents[i].second; + ret.push_back(aliased); + } + else + { + RDCERR("Expected to find alias-target result for EID %u counter %u, but didn't", + search.eventId, search.counter); + } + } + } + + SAFE_DELETE(m_pAMDDrawCallback); + + // sort so that the alias results appear in the right places + std::sort(ret.begin(), ret.end()); + + return ret; +} + struct VulkanGPUTimerCallback : public VulkanDrawcallCallback { VulkanGPUTimerCallback(WrappedVulkan *vk, VulkanReplay *rp, VkQueryPool tsqp, VkQueryPool occqp, @@ -189,7 +378,7 @@ struct VulkanGPUTimerCallback : public VulkanDrawcallCallback m_pDriver->SetDrawcallCB(this); } ~VulkanGPUTimerCallback() { m_pDriver->SetDrawcallCB(NULL); } - void PreDraw(uint32_t eid, VkCommandBuffer cmd) + void PreDraw(uint32_t eid, VkCommandBuffer cmd) override { if(m_OcclusionQueryPool != VK_NULL_HANDLE) ObjDisp(cmd)->CmdBeginQuery(Unwrap(cmd), m_OcclusionQueryPool, (uint32_t)m_Results.size(), @@ -200,7 +389,7 @@ struct VulkanGPUTimerCallback : public VulkanDrawcallCallback m_TimeStampQueryPool, (uint32_t)(m_Results.size() * 2 + 0)); } - bool PostDraw(uint32_t eid, VkCommandBuffer cmd) + bool PostDraw(uint32_t eid, VkCommandBuffer cmd) override { ObjDisp(cmd)->CmdWriteTimestamp(Unwrap(cmd), VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, m_TimeStampQueryPool, (uint32_t)(m_Results.size() * 2 + 1)); @@ -212,19 +401,26 @@ struct VulkanGPUTimerCallback : public VulkanDrawcallCallback return false; } - void PostRedraw(uint32_t eid, VkCommandBuffer cmd) {} + void PostRedraw(uint32_t eid, VkCommandBuffer cmd) override {} // we don't need to distinguish, call the Draw functions - void PreDispatch(uint32_t eid, VkCommandBuffer cmd) { PreDraw(eid, cmd); } - bool PostDispatch(uint32_t eid, VkCommandBuffer cmd) { return PostDraw(eid, cmd); } - void PostRedispatch(uint32_t eid, VkCommandBuffer cmd) { PostRedraw(eid, cmd); } - void PreMisc(uint32_t eid, DrawFlags flags, VkCommandBuffer cmd) { PreDraw(eid, cmd); } - bool PostMisc(uint32_t eid, DrawFlags flags, VkCommandBuffer cmd) { return PostDraw(eid, cmd); } - void PostRemisc(uint32_t eid, DrawFlags flags, VkCommandBuffer cmd) { PostRedraw(eid, cmd); } - void AliasEvent(uint32_t primary, uint32_t alias) + void PreDispatch(uint32_t eid, VkCommandBuffer cmd) override { PreDraw(eid, cmd); } + bool PostDispatch(uint32_t eid, VkCommandBuffer cmd) override { return PostDraw(eid, cmd); } + void PostRedispatch(uint32_t eid, VkCommandBuffer cmd) override { PostRedraw(eid, cmd); } + void PreMisc(uint32_t eid, DrawFlags flags, VkCommandBuffer cmd) override { PreDraw(eid, cmd); } + bool PostMisc(uint32_t eid, DrawFlags flags, VkCommandBuffer cmd) override + { + return PostDraw(eid, cmd); + } + void PostRemisc(uint32_t eid, DrawFlags flags, VkCommandBuffer cmd) override + { + PostRedraw(eid, cmd); + } + void AliasEvent(uint32_t primary, uint32_t alias) override { m_AliasEvents.push_back(std::make_pair(primary, alias)); } + void PreEndCommandBuffer(VkCommandBuffer cmd) override {} WrappedVulkan *m_pDriver; VulkanReplay *m_pReplay; VkQueryPool m_TimeStampQueryPool; @@ -242,6 +438,30 @@ vector VulkanReplay::FetchCounters(const vector &coun { uint32_t maxEID = m_pDriver->GetMaxEID(); + vector vkCounters; + std::copy_if(counters.begin(), counters.end(), std::back_inserter(vkCounters), + [](const GPUCounter &c) { return !IsAMDCounter(c); }); + + vector ret; + + if(m_pAMDCounters) + { + // Filter out the AMD counters + vector amdCounters; + std::copy_if(counters.begin(), counters.end(), std::back_inserter(amdCounters), + [](const GPUCounter &c) { return IsAMDCounter(c); }); + + if(!amdCounters.empty()) + { + ret = FetchCountersAMD(amdCounters); + } + } + + if(vkCounters.empty()) + { + return ret; + } + VkPhysicalDeviceFeatures availableFeatures = m_pDriver->GetDeviceFeatures(); VkDevice dev = m_pDriver->GetDev(); @@ -277,9 +497,9 @@ vector VulkanReplay::FetchCounters(const vector &coun bool occlNeeded = false; bool statsNeeded = false; - for(size_t c = 0; c < counters.size(); c++) + for(size_t c = 0; c < vkCounters.size(); c++) { - switch(counters[c]) + switch(vkCounters[c]) { case GPUCounter::InputVerticesRead: case GPUCounter::IAPrimitives: @@ -374,18 +594,16 @@ vector VulkanReplay::FetchCounters(const vector &coun ObjDisp(dev)->DestroyQueryPool(Unwrap(dev), pipeStatsPool, NULL); } - vector ret; - for(size_t i = 0; i < cb.m_Results.size(); i++) { - for(size_t c = 0; c < counters.size(); c++) + for(size_t c = 0; c < vkCounters.size(); c++) { CounterResult result; result.eventId = cb.m_Results[i]; - result.counter = counters[c]; + result.counter = vkCounters[c]; - switch(counters[c]) + switch(vkCounters[c]) { case GPUCounter::EventGPUDuration: { @@ -419,10 +637,10 @@ vector VulkanReplay::FetchCounters(const vector &coun for(size_t i = 0; i < cb.m_AliasEvents.size(); i++) { - for(size_t c = 0; c < counters.size(); c++) + for(size_t c = 0; c < vkCounters.size(); c++) { CounterResult search; - search.counter = counters[c]; + search.counter = vkCounters[c]; search.eventId = cb.m_AliasEvents[i].first; // find the result we're aliasing diff --git a/renderdoc/driver/vulkan/vk_debug.cpp b/renderdoc/driver/vulkan/vk_debug.cpp index 1fe7eefd9..aee26b686 100644 --- a/renderdoc/driver/vulkan/vk_debug.cpp +++ b/renderdoc/driver/vulkan/vk_debug.cpp @@ -26,6 +26,8 @@ #include #include "3rdparty/glslang/SPIRV/spirv.hpp" #include "data/glsl_shaders.h" +#include "driver/ihv/amd/amd_counters.h" +#include "driver/ihv/amd/official/GPUPerfAPI/Include/GPUPerfAPI-VK.h" #include "maths/camera.h" #include "maths/formatpacking.h" #include "maths/matrix.h" @@ -1392,6 +1394,20 @@ void VulkanReplay::CreateResources() }); CREATE_OBJECT(m_MeshFetchDescSet, m_General.DescriptorPool, m_MeshFetchDescSetLayout); + + GPA_vkContextOpenInfo context = {Unwrap(m_pDriver->GetInstance()), + Unwrap(m_pDriver->GetPhysDev()), Unwrap(m_pDriver->GetDev())}; + + AMDCounters *counters = new AMDCounters(); + if(counters->Init(AMDCounters::ApiType::Vk, (void *)&context)) + { + m_pAMDCounters = counters; + } + else + { + delete counters; + m_pAMDCounters = NULL; + } } void VulkanReplay::DestroyResources() @@ -1407,6 +1423,8 @@ void VulkanReplay::DestroyResources() m_VertexPick.Destroy(m_pDriver); m_PixelPick.Destroy(m_pDriver); m_Histogram.Destroy(m_pDriver); + + SAFE_DELETE(m_pAMDCounters); } void VulkanReplay::GeneralMisc::Init(WrappedVulkan *driver, VkDescriptorPool descriptorPool) diff --git a/renderdoc/driver/vulkan/vk_overlay.cpp b/renderdoc/driver/vulkan/vk_overlay.cpp index 94aca9180..85eab4eb2 100644 --- a/renderdoc/driver/vulkan/vk_overlay.cpp +++ b/renderdoc/driver/vulkan/vk_overlay.cpp @@ -259,6 +259,7 @@ struct VulkanQuadOverdrawCallback : public VulkanDrawcallCallback void PreMisc(uint32_t eid, DrawFlags flags, VkCommandBuffer cmd) {} bool PostMisc(uint32_t eid, DrawFlags flags, VkCommandBuffer cmd) { return false; } void PostRemisc(uint32_t eid, DrawFlags flags, VkCommandBuffer cmd) {} + void PreEndCommandBuffer(VkCommandBuffer cmd) {} void AliasEvent(uint32_t primary, uint32_t alias) { // don't care diff --git a/renderdoc/driver/vulkan/vk_postvs.cpp b/renderdoc/driver/vulkan/vk_postvs.cpp index 5dce2ac72..708fbc8b0 100644 --- a/renderdoc/driver/vulkan/vk_postvs.cpp +++ b/renderdoc/driver/vulkan/vk_postvs.cpp @@ -1780,6 +1780,7 @@ struct VulkanInitPostVSCallback : public VulkanDrawcallCallback void PreMisc(uint32_t eid, DrawFlags flags, VkCommandBuffer cmd) {} bool PostMisc(uint32_t eid, DrawFlags flags, VkCommandBuffer cmd) { return false; } void PostRemisc(uint32_t eid, DrawFlags flags, VkCommandBuffer cmd) {} + void PreEndCommandBuffer(VkCommandBuffer cmd) {} void AliasEvent(uint32_t primary, uint32_t alias) { if(std::find(m_Events.begin(), m_Events.end(), primary) != m_Events.end()) diff --git a/renderdoc/driver/vulkan/vk_replay.h b/renderdoc/driver/vulkan/vk_replay.h index 4c34bad55..fe37a9985 100644 --- a/renderdoc/driver/vulkan/vk_replay.h +++ b/renderdoc/driver/vulkan/vk_replay.h @@ -120,9 +120,11 @@ using std::map; #define VULKAN_MESH_VIEW_SAMPLES VK_SAMPLE_COUNT_1_BIT #endif +class AMDCounters; class WrappedVulkan; class VulkanDebugManager; class VulkanResourceManager; +struct VulkanAMDDrawCallback; struct VulkanPostVSData { @@ -317,6 +319,7 @@ public: std::vector &otherJSONs); static void InstallVulkanLayer(bool systemLevel); + AMDCounters *GetAMDCounters() { return m_pAMDCounters; } private: bool RenderTextureInternal(TextureDisplay cfg, VkRenderPassBeginInfo rpbegin, int flags); @@ -576,4 +579,12 @@ private: D3D11Pipe::State m_D3D11State; D3D12Pipe::State m_D3D12State; GLPipe::State m_GLState; + + void FillTimersAMD(uint32_t *eventStartID, uint32_t *sampleIndex, vector *eventIDs); + + vector FetchCountersAMD(const vector &counters); + + AMDCounters *m_pAMDCounters = NULL; + + VulkanAMDDrawCallback *m_pAMDDrawCallback = NULL; }; diff --git a/renderdoc/driver/vulkan/wrappers/vk_cmd_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_cmd_funcs.cpp index 9cfcee598..d51c5a23c 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_cmd_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_cmd_funcs.cpp @@ -889,6 +889,9 @@ bool WrappedVulkan::Serialise_vkEndCommandBuffer(SerialiserType &ser, VkCommandB for(int i = 0; i < m_BakedCmdBufferInfo[BakedCommandBuffer].markerCount; i++) ObjDisp(commandBuffer)->CmdDebugMarkerEndEXT(Unwrap(commandBuffer)); + if(m_DrawcallCallback) + m_DrawcallCallback->PreEndCommandBuffer(commandBuffer); + ObjDisp(commandBuffer)->EndCommandBuffer(Unwrap(commandBuffer)); if(m_Partial[Primary].partialParent == BakedCommandBuffer) diff --git a/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp index 58dfcd799..30a14bea3 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp @@ -982,6 +982,19 @@ bool WrappedVulkan::Serialise_vkCreateDevice(SerialiserType &ser, VkPhysicalDevi RDCLOG("Enabling VK_EXT_debug_marker"); } + if(supportedExtensions.find(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME) == + supportedExtensions.end()) + { + RDCWARN("Unsupported required instance extension for AMD performance counters '%s'", + VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); + } + else + { + if(std::find(Extensions.begin(), Extensions.end(), + VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME) == Extensions.end()) + Extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); + } + // enable VK_EXT_debug_marker if it's available, to fetch shader disassembly if(supportedExtensions.find(VK_AMD_SHADER_INFO_EXTENSION_NAME) != supportedExtensions.end()) {