mirror of
https://github.com/baldurk/renderdoc.git
synced 2026-05-04 09:00:44 +00:00
Collect Nsight Perf SDK counters in D3D12
This adds the NVD3D12Counters class, which implements D3D12 counter collection on NVIDIA hardware via the NVIDIA Nsight Perf SDK. A helper class NVCounterEnumerator is added to provide functionality common to NVIDIA counters for other graphics APIs.
This commit is contained in:
committed by
Baldur Karlsson
parent
2b5268b898
commit
abab16f8c6
@@ -25,6 +25,7 @@
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
#include "driver/ihv/amd/amd_counters.h"
|
||||
#include "driver/ihv/nv/nv_d3d12_counters.h"
|
||||
#include "d3d12_command_list.h"
|
||||
#include "d3d12_command_queue.h"
|
||||
#include "d3d12_common.h"
|
||||
@@ -54,6 +55,11 @@ rdcarray<GPUCounter> D3D12Replay::EnumerateCounters()
|
||||
ret.append(m_pAMDCounters->GetPublicCounterIds());
|
||||
}
|
||||
|
||||
if(m_pNVCounters)
|
||||
{
|
||||
ret.append(m_pNVCounters->EnumerateCounters());
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -72,6 +78,13 @@ CounterDescription D3D12Replay::DescribeCounter(GPUCounter counterID)
|
||||
}
|
||||
}
|
||||
|
||||
/////NVIDIA//////
|
||||
if(m_pNVCounters && m_pNVCounters->HasCounter(counterID))
|
||||
{
|
||||
desc = m_pNVCounters->DescribeCounter(counterID);
|
||||
return desc;
|
||||
}
|
||||
|
||||
// 0808CC9B-79DF-4549-81F7-85494E648F22
|
||||
desc.uuid.words[0] = 0x0808CC9B;
|
||||
desc.uuid.words[1] = 0x79DF4549;
|
||||
@@ -497,7 +510,7 @@ rdcarray<CounterResult> D3D12Replay::FetchCounters(const rdcarray<GPUCounter> &c
|
||||
|
||||
rdcarray<GPUCounter> d3dCounters;
|
||||
std::copy_if(counters.begin(), counters.end(), std::back_inserter(d3dCounters),
|
||||
[](const GPUCounter &c) { return !IsAMDCounter(c); });
|
||||
[](const GPUCounter &c) { return IsGenericCounter(c); });
|
||||
|
||||
if(m_pAMDCounters)
|
||||
{
|
||||
@@ -512,6 +525,19 @@ rdcarray<CounterResult> D3D12Replay::FetchCounters(const rdcarray<GPUCounter> &c
|
||||
}
|
||||
}
|
||||
|
||||
if(m_pNVCounters)
|
||||
{
|
||||
// Filter out the NVIDIA counters
|
||||
rdcarray<GPUCounter> nvCounters;
|
||||
std::copy_if(counters.begin(), counters.end(), std::back_inserter(nvCounters),
|
||||
[=](const GPUCounter &c) { return m_pNVCounters->HasCounter(c); });
|
||||
if(!nvCounters.empty())
|
||||
{
|
||||
rdcarray<CounterResult> results = m_pNVCounters->FetchCounters(nvCounters, *m_pDevice);
|
||||
ret.append(results);
|
||||
}
|
||||
}
|
||||
|
||||
if(d3dCounters.empty())
|
||||
{
|
||||
return ret;
|
||||
|
||||
@@ -871,6 +871,7 @@ public:
|
||||
CaptureState GetState() { return m_State; }
|
||||
D3D12Replay *GetReplay() { return m_Replay; }
|
||||
WrappedID3D12CommandQueue *GetQueue() { return m_Queue; }
|
||||
const rdcarray<WrappedID3D12CommandQueue *> &GetQueues() { return m_Queues; }
|
||||
ID3D12CommandAllocator *GetAlloc() { return m_Alloc; }
|
||||
ID3D12InfoQueue *GetInfoQueue() { return m_pInfoQueue; }
|
||||
void ApplyBarriers(rdcarray<D3D12_RESOURCE_BARRIER> &barriers);
|
||||
|
||||
@@ -29,6 +29,7 @@
|
||||
#include "driver/dxgi/dxgi_common.h"
|
||||
#include "driver/ihv/amd/amd_counters.h"
|
||||
#include "driver/ihv/amd/amd_rgp.h"
|
||||
#include "driver/ihv/nv/nv_d3d12_counters.h"
|
||||
#include "maths/camera.h"
|
||||
#include "maths/formatpacking.h"
|
||||
#include "maths/matrix.h"
|
||||
@@ -151,35 +152,44 @@ void D3D12Replay::CreateResources()
|
||||
|
||||
if(!m_Proxy && D3D12_HardwareCounters())
|
||||
{
|
||||
AMDCounters *counters = NULL;
|
||||
|
||||
if(m_DriverInfo.vendor == GPUVendor::AMD || m_DriverInfo.vendor == GPUVendor::Samsung)
|
||||
{
|
||||
RDCLOG("AMD GPU detected - trying to initialise AMD counters");
|
||||
counters = new AMDCounters(m_pDevice->IsDebugLayerEnabled());
|
||||
}
|
||||
else
|
||||
{
|
||||
RDCLOG("%s GPU detected - no counters available", ToStr(m_DriverInfo.vendor).c_str());
|
||||
AMDCounters *countersAMD = new AMDCounters(m_pDevice->IsDebugLayerEnabled());
|
||||
|
||||
ID3D12Device *d3dDevice = m_pDevice->GetReal();
|
||||
|
||||
if(countersAMD && countersAMD->Init(AMDCounters::ApiType::Dx12, (void *)d3dDevice))
|
||||
{
|
||||
m_pAMDCounters = countersAMD;
|
||||
}
|
||||
else
|
||||
{
|
||||
delete countersAMD;
|
||||
}
|
||||
}
|
||||
|
||||
ID3D12Device *d3dDevice = m_pDevice->GetReal();
|
||||
if(m_DriverInfo.vendor == GPUVendor::nVidia)
|
||||
{
|
||||
RDCLOG("NVIDIA GPU detected - trying to initialise NVIDIA counters");
|
||||
|
||||
if(counters && counters->Init(AMDCounters::ApiType::Dx12, (void *)d3dDevice))
|
||||
{
|
||||
m_pAMDCounters = counters;
|
||||
}
|
||||
else
|
||||
{
|
||||
delete counters;
|
||||
m_pAMDCounters = NULL;
|
||||
NVD3D12Counters *countersNV = new NVD3D12Counters();
|
||||
|
||||
bool initSuccess = false;
|
||||
if(countersNV && countersNV->Init(m_pDevice->GetReal()))
|
||||
{
|
||||
m_pNVCounters = countersNV;
|
||||
initSuccess = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
delete countersNV;
|
||||
}
|
||||
|
||||
RDCLOG("NVIDIA D3D12 counter initialisation: %s", initSuccess ? "SUCCEEDED" : "FAILED");
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
m_pAMDCounters = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void D3D12Replay::DestroyResources()
|
||||
@@ -209,6 +219,8 @@ void D3D12Replay::DestroyResources()
|
||||
SAFE_DELETE(m_DebugManager);
|
||||
|
||||
SAFE_DELETE(m_pAMDCounters);
|
||||
|
||||
SAFE_DELETE(m_pNVCounters);
|
||||
}
|
||||
|
||||
RDResult D3D12Replay::ReadLogInitialisation(RDCFile *rdc, bool storeStructuredBuffers)
|
||||
|
||||
@@ -34,6 +34,8 @@ class AMDCounters;
|
||||
struct D3D12AMDActionCallback;
|
||||
class WrappedID3D12Device;
|
||||
|
||||
class NVD3D12Counters;
|
||||
|
||||
class D3D12DebugManager;
|
||||
|
||||
struct PortableHandle;
|
||||
@@ -506,6 +508,7 @@ private:
|
||||
std::map<rdcfixedarray<uint32_t, 4>, bytebuf> m_PatchedPSCache;
|
||||
|
||||
void FillTimersAMD(uint32_t *eventStartID, uint32_t *sampleIndex, rdcarray<uint32_t> *eventIDs);
|
||||
|
||||
rdcarray<CounterResult> FetchCountersAMD(const rdcarray<GPUCounter> &counters);
|
||||
|
||||
NVD3D12Counters *m_pNVCounters = NULL;
|
||||
};
|
||||
|
||||
@@ -60,7 +60,7 @@
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup>
|
||||
<ClCompile>
|
||||
<AdditionalIncludeDirectories>$(SolutionDir)renderdoc\;$(SolutionDir)renderdoc\3rdparty\</AdditionalIncludeDirectories>
|
||||
<AdditionalIncludeDirectories>$(SolutionDir)renderdoc\;$(SolutionDir)renderdoc\3rdparty\;$(SolutionDir)renderdoc\driver\ihv\nv\official\PerfSDK\redist\include;$(SolutionDir)renderdoc\driver\ihv\nv\official\PerfSDK\redist\NvPerfUtility\include</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>RENDERDOC_EXPORTS;RENDERDOC_PLATFORM_WIN32;WIN32;NDEBUG;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
||||
@@ -99,10 +99,14 @@
|
||||
<ClCompile Include="nvapi_hooks.cpp" />
|
||||
<ClCompile Include="nvapi_wrapper.cpp" />
|
||||
<ClCompile Include="nv_counters.cpp" />
|
||||
<ClCompile Include="nv_counter_enumerator.cpp" />
|
||||
<ClCompile Include="nv_d3d12_counters.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="nvapi_wrapper.h" />
|
||||
<ClInclude Include="nv_counters.h" />
|
||||
<ClInclude Include="nv_counter_enumerator.h" />
|
||||
<ClInclude Include="nv_d3d12_counters.h" />
|
||||
<ClInclude Include="official\nvapi\nvapi.h" />
|
||||
<ClInclude Include="official\nvapi\nvapi_interface.h" />
|
||||
<ClInclude Include="official\PerfKit\include\NvPmApi.h" />
|
||||
|
||||
@@ -0,0 +1,484 @@
|
||||
/******************************************************************************
|
||||
* The MIT License (MIT)
|
||||
*
|
||||
* Copyright (c) 2022 Baldur Karlsson
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
******************************************************************************/
|
||||
|
||||
#include "nv_counter_enumerator.h"
|
||||
|
||||
#include "common/common.h"
|
||||
#include "common/formatting.h"
|
||||
#include "os/os_specific.h"
|
||||
#include "strings/string_utils.h"
|
||||
|
||||
#if ENABLED(RDOC_WIN32)
|
||||
#include "windows-desktop-x64/nvperf_host_impl.h"
|
||||
#elif ENABLED(RDOC_LINUX)
|
||||
#include "linux-desktop-x64/nvperf_host_impl.h"
|
||||
#endif
|
||||
|
||||
#include "NvPerfCounterConfiguration.h"
|
||||
#include "NvPerfCounterData.h"
|
||||
#include "NvPerfMetricsEvaluator.h"
|
||||
|
||||
struct NVCounterEnumerator::Impl
|
||||
{
|
||||
public:
|
||||
nv::perf::MetricsEvaluator Evaluator;
|
||||
|
||||
nv::perf::CounterConfiguration
|
||||
SelectedConfiguration; // configImage etc. for the current selection
|
||||
rdcarray<GPUCounter> SelectedExternalIds;
|
||||
rdcarray<NVPW_MetricEvalRequest> SelectedEvalRequests;
|
||||
size_t SelectedNumPasses;
|
||||
|
||||
const rdcarray<GPUCounter> &ExternalIds()
|
||||
{
|
||||
InitEnumerateCounters();
|
||||
return m_ExternalIds;
|
||||
}
|
||||
const rdcarray<CounterDescription> &ExternalDescriptions()
|
||||
{
|
||||
InitEnumerateCounters();
|
||||
return m_ExternalDescriptions;
|
||||
}
|
||||
const rdcarray<NVPW_MetricEvalRequest> &AllEvalRequests()
|
||||
{
|
||||
InitEnumerateCounters();
|
||||
return m_AllEvalRequests;
|
||||
}
|
||||
|
||||
private:
|
||||
void InitEnumerateCounters();
|
||||
bool m_EnumerationDone = false;
|
||||
|
||||
rdcarray<GPUCounter> m_ExternalIds;
|
||||
rdcarray<CounterDescription> m_ExternalDescriptions;
|
||||
rdcarray<NVPW_MetricEvalRequest> m_AllEvalRequests;
|
||||
};
|
||||
|
||||
NVCounterEnumerator::NVCounterEnumerator()
|
||||
{
|
||||
m_Impl = new NVCounterEnumerator::Impl();
|
||||
}
|
||||
|
||||
NVCounterEnumerator::~NVCounterEnumerator()
|
||||
{
|
||||
delete m_Impl;
|
||||
}
|
||||
|
||||
static CounterUnit ToCounterUnit(const std::vector<NVPW_DimUnitFactor> &dimUnits)
|
||||
{
|
||||
if(dimUnits.size() == 0)
|
||||
{
|
||||
return CounterUnit::Ratio;
|
||||
}
|
||||
if(dimUnits.size() == 1 && dimUnits[0].exponent == 1)
|
||||
{
|
||||
switch(dimUnits[0].dimUnit)
|
||||
{
|
||||
case NVPW_DIM_UNIT_BYTES: return CounterUnit::Bytes;
|
||||
case NVPW_DIM_UNIT_SECONDS: return CounterUnit::Seconds;
|
||||
case NVPW_DIM_UNIT_PERCENT: return CounterUnit::Percentage;
|
||||
case NVPW_DIM_UNIT_FBP_CYCLES: return CounterUnit::Cycles;
|
||||
case NVPW_DIM_UNIT_GPC_CYCLES: return CounterUnit::Cycles;
|
||||
case NVPW_DIM_UNIT_SYS_CYCLES: return CounterUnit::Cycles;
|
||||
case NVPW_DIM_UNIT_DRAM_CYCLES: return CounterUnit::Cycles;
|
||||
case NVPW_DIM_UNIT_PCIE_CYCLES:
|
||||
return CounterUnit::Cycles;
|
||||
// fallthrough...
|
||||
}
|
||||
}
|
||||
|
||||
// catch-all
|
||||
return CounterUnit::Absolute;
|
||||
}
|
||||
|
||||
bool NVCounterEnumerator::Init(nv::perf::MetricsEvaluator &&metricsEvaluator)
|
||||
{
|
||||
m_Impl->Evaluator = std::move(metricsEvaluator);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void NVCounterEnumerator::Impl::InitEnumerateCounters()
|
||||
{
|
||||
// Defer counter enumeration until the first time this is called
|
||||
if(m_EnumerationDone)
|
||||
return;
|
||||
|
||||
m_EnumerationDone = true;
|
||||
|
||||
struct MetricAttribute
|
||||
{
|
||||
NVPW_MetricType metricType;
|
||||
NVPW_RollupOp rollupOp;
|
||||
NVPW_Submetric submetric;
|
||||
};
|
||||
const MetricAttribute metricAttributes[] = {
|
||||
{NVPW_METRIC_TYPE_COUNTER, NVPW_ROLLUP_OP_SUM, NVPW_SUBMETRIC_NONE},
|
||||
{NVPW_METRIC_TYPE_COUNTER, NVPW_ROLLUP_OP_AVG, NVPW_SUBMETRIC_NONE},
|
||||
{NVPW_METRIC_TYPE_COUNTER, NVPW_ROLLUP_OP_MAX, NVPW_SUBMETRIC_NONE},
|
||||
{NVPW_METRIC_TYPE_COUNTER, NVPW_ROLLUP_OP_MIN, NVPW_SUBMETRIC_NONE},
|
||||
{NVPW_METRIC_TYPE_RATIO, NVPW_ROLLUP_OP_AVG, NVPW_SUBMETRIC_RATIO},
|
||||
{NVPW_METRIC_TYPE_RATIO, NVPW_ROLLUP_OP_AVG, NVPW_SUBMETRIC_MAX_RATE},
|
||||
{NVPW_METRIC_TYPE_RATIO, NVPW_ROLLUP_OP_AVG, NVPW_SUBMETRIC_PCT},
|
||||
};
|
||||
for(size_t i = 0; i < sizeof(metricAttributes) / sizeof(metricAttributes[0]); i++)
|
||||
{
|
||||
const auto &attributes = metricAttributes[i];
|
||||
NVPW_MetricType metricType = attributes.metricType;
|
||||
NVPW_RollupOp rollupOp = attributes.rollupOp;
|
||||
NVPW_Submetric submetric = attributes.submetric;
|
||||
|
||||
for(const char *counterName : nv::perf::EnumerateMetrics(Evaluator, metricType))
|
||||
{
|
||||
if(strstr(counterName, "Triage") != NULL)
|
||||
continue; // filter out Triage counters (they are all duplicates)
|
||||
|
||||
size_t metricIndex;
|
||||
if(!nv::perf::GetMetricTypeAndIndex(Evaluator, counterName, metricType, metricIndex))
|
||||
continue;
|
||||
RDCASSERT(metricType == attributes.metricType);
|
||||
|
||||
NVPW_MetricEvalRequest evalReq = {};
|
||||
evalReq.metricIndex = metricIndex;
|
||||
evalReq.metricType = (uint8_t)metricType;
|
||||
evalReq.rollupOp = (uint8_t)rollupOp;
|
||||
evalReq.submetric = (uint16_t)submetric;
|
||||
|
||||
std::vector<NVPW_DimUnitFactor> dimUnits;
|
||||
GetMetricDimUnits(Evaluator, evalReq, dimUnits);
|
||||
|
||||
{
|
||||
//-----------------
|
||||
// Filter out metrics that count "cycles".
|
||||
// The RenderDoc replay loop is not designed for reproducing representative cycle counts.
|
||||
auto itr =
|
||||
std::find_if(dimUnits.begin(), dimUnits.end(), [](const NVPW_DimUnitFactor &factor) {
|
||||
switch(factor.dimUnit)
|
||||
{
|
||||
case NVPW_DIM_UNIT_DRAM_CYCLES:
|
||||
case NVPW_DIM_UNIT_FBP_CYCLES:
|
||||
case NVPW_DIM_UNIT_GPC_CYCLES:
|
||||
case NVPW_DIM_UNIT_NVLRX_CYCLES:
|
||||
case NVPW_DIM_UNIT_NVLTX_CYCLES:
|
||||
case NVPW_DIM_UNIT_PCIE_CYCLES:
|
||||
case NVPW_DIM_UNIT_SYS_CYCLES: return true;
|
||||
default: break;
|
||||
}
|
||||
return false;
|
||||
});
|
||||
if(itr != dimUnits.end())
|
||||
continue;
|
||||
}
|
||||
|
||||
CounterDescription desc = {};
|
||||
desc.resultType = CompType::Float;
|
||||
desc.resultByteWidth = 8;
|
||||
|
||||
//-----------------
|
||||
// Counter name, including rollup and submetric qualifiers
|
||||
desc.name = counterName;
|
||||
desc.name.append(nv::perf::ToCString((NVPW_RollupOp)evalReq.rollupOp));
|
||||
desc.name.append(nv::perf::ToCString((NVPW_Submetric)evalReq.submetric));
|
||||
|
||||
//-----------------
|
||||
// Counter description, including metric type and dim unit
|
||||
desc.description = rdcstr(GetMetricDescription(Evaluator, metricType, metricIndex));
|
||||
desc.description.append("<br/>HW Unit: <em>");
|
||||
NVPW_HwUnit hwunit = nv::perf::GetMetricHwUnit(Evaluator, metricType, metricIndex);
|
||||
desc.description.append(nv::perf::ToCString(Evaluator, hwunit));
|
||||
desc.description.append("</em>");
|
||||
desc.description.append("<br/>MetricType: <em>");
|
||||
desc.description.append(nv::perf::ToCString(metricType));
|
||||
desc.description.append("</em>");
|
||||
desc.description.append("<br/>RollupOp: <em>");
|
||||
desc.description.append(nv::perf::ToCString(rollupOp));
|
||||
desc.description.append("</em>");
|
||||
desc.description.append("<br/>Submetric: <em>");
|
||||
desc.description.append(nv::perf::ToCString(submetric));
|
||||
desc.description.append("</em>");
|
||||
desc.description.append("<br/>DimUnit: <em>");
|
||||
desc.description.append(
|
||||
nv::perf::ToString(dimUnits, [this](NVPW_DimUnitName dimUnit, bool plural) {
|
||||
return ToCString(Evaluator, dimUnit, plural);
|
||||
}).c_str());
|
||||
desc.description.append("</em>");
|
||||
|
||||
//-----------------
|
||||
// Categorize counter by DimUnit
|
||||
desc.category =
|
||||
rdcstr(nv::perf::ToString(dimUnits, [this](NVPW_DimUnitName dimUnit, bool plural) {
|
||||
return ToCString(Evaluator, dimUnit, plural);
|
||||
}).c_str());
|
||||
|
||||
//-----------------
|
||||
// Convert Perf SDK units to Renderdoc units (only works for limited subset of units)
|
||||
desc.unit = ToCounterUnit(dimUnits);
|
||||
|
||||
//-----------------
|
||||
// Assign external counter ID and UUID
|
||||
GPUCounter counterID =
|
||||
GPUCounter((uint32_t)GPUCounter::FirstNvidia + (uint32_t)m_AllEvalRequests.size());
|
||||
desc.counter = counterID;
|
||||
desc.uuid.words[0] = 0x25B624D0;
|
||||
desc.uuid.words[1] = 0x33244527;
|
||||
desc.uuid.words[2] = 0x9F71CD67;
|
||||
desc.uuid.words[3] = 0x61B37980 ^ strhash(desc.name.c_str());
|
||||
|
||||
m_ExternalIds.push_back(counterID);
|
||||
m_ExternalDescriptions.push_back(desc);
|
||||
m_AllEvalRequests.push_back(evalReq);
|
||||
}
|
||||
}
|
||||
|
||||
//-----------------
|
||||
// Sort counter IDs by category and name so that counters appear sorted in the selection UI
|
||||
std::sort(m_ExternalIds.begin(), m_ExternalIds.end(),
|
||||
[this](const GPUCounter &a, const GPUCounter &b) {
|
||||
uint32_t a_localId = (uint32_t)a - (uint32_t)GPUCounter::FirstNvidia;
|
||||
uint32_t b_localId = (uint32_t)b - (uint32_t)GPUCounter::FirstNvidia;
|
||||
const CounterDescription &a_desc = m_ExternalDescriptions[a_localId];
|
||||
const CounterDescription &b_desc = m_ExternalDescriptions[b_localId];
|
||||
int result = strcmp(a_desc.category.c_str(), b_desc.category.c_str());
|
||||
if(result < 0)
|
||||
return true;
|
||||
if(result > 0)
|
||||
return false;
|
||||
result = strcmp(a_desc.name.c_str(), b_desc.name.c_str());
|
||||
if(result < 0)
|
||||
return true;
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
||||
rdcarray<GPUCounter> NVCounterEnumerator::GetPublicCounterIds()
|
||||
{
|
||||
return m_Impl->ExternalIds();
|
||||
}
|
||||
|
||||
CounterDescription NVCounterEnumerator::GetCounterDescription(GPUCounter counterID)
|
||||
{
|
||||
uint32_t LocalId = (uint32_t)counterID - (uint32_t)GPUCounter::FirstNvidia;
|
||||
return m_Impl->ExternalDescriptions()[LocalId];
|
||||
}
|
||||
|
||||
bool NVCounterEnumerator::HasCounter(GPUCounter counterID)
|
||||
{
|
||||
uint32_t LocalId = (uint32_t)counterID - (uint32_t)GPUCounter::FirstNvidia;
|
||||
return LocalId < m_Impl->ExternalDescriptions().size();
|
||||
}
|
||||
|
||||
bool NVCounterEnumerator::CreateConfig(const char *pChipName,
|
||||
NVPA_RawMetricsConfig *pRawMetricsConfig,
|
||||
const rdcarray<GPUCounter> &counters)
|
||||
{
|
||||
nv::perf::MetricsConfigBuilder metricsConfigBuilder;
|
||||
if(!metricsConfigBuilder.Initialize(m_Impl->Evaluator, pRawMetricsConfig, pChipName))
|
||||
{
|
||||
RDCERR("NvPerf failed to initialize config builder");
|
||||
return false;
|
||||
}
|
||||
|
||||
for(GPUCounter counterID : counters)
|
||||
{
|
||||
RDCASSERT(IsNvidiaCounter(counterID));
|
||||
if(!IsNvidiaCounter(counterID))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
size_t counterIndex = (uint32_t)counterID - (uint32_t)GPUCounter::FirstNvidia;
|
||||
const NVPW_MetricEvalRequest &evalReq = m_Impl->AllEvalRequests()[counterIndex];
|
||||
|
||||
m_Impl->SelectedExternalIds.push_back(counterID);
|
||||
m_Impl->SelectedEvalRequests.push_back(m_Impl->AllEvalRequests()[counterIndex]);
|
||||
if(!metricsConfigBuilder.AddMetrics(&evalReq, 1))
|
||||
{
|
||||
// std::string metricName = nv::perf::ToString(m_Impl->Evaluator, evalReq);
|
||||
const char *metricName = nv::perf::ToCString(
|
||||
m_Impl->Evaluator, (NVPW_MetricType)evalReq.metricType, evalReq.metricIndex);
|
||||
RDCERR("NvPerf failed to configure metric: %s", metricName);
|
||||
}
|
||||
}
|
||||
|
||||
if(!metricsConfigBuilder.PrepareConfigImage())
|
||||
{
|
||||
RDCERR("NvPerf failed to prepare config image");
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t configImageSize = metricsConfigBuilder.GetConfigImageSize();
|
||||
size_t counterDataPrefixSize = metricsConfigBuilder.GetCounterDataPrefixSize();
|
||||
m_Impl->SelectedConfiguration.configImage.resize(configImageSize);
|
||||
m_Impl->SelectedConfiguration.counterDataPrefix.resize(counterDataPrefixSize);
|
||||
metricsConfigBuilder.GetConfigImage(m_Impl->SelectedConfiguration.configImage.size(),
|
||||
m_Impl->SelectedConfiguration.configImage.data());
|
||||
metricsConfigBuilder.GetCounterDataPrefix(m_Impl->SelectedConfiguration.counterDataPrefix.size(),
|
||||
m_Impl->SelectedConfiguration.counterDataPrefix.data());
|
||||
m_Impl->SelectedNumPasses = metricsConfigBuilder.GetNumPasses();
|
||||
return true;
|
||||
}
|
||||
|
||||
void NVCounterEnumerator::GetConfig(const uint8_t *&pConfigImage, size_t &configImageSize,
|
||||
const uint8_t *&pCounterDataPrefix, size_t &counterDataPrefixSize)
|
||||
{
|
||||
pConfigImage = m_Impl->SelectedConfiguration.configImage.data();
|
||||
configImageSize = m_Impl->SelectedConfiguration.configImage.size();
|
||||
pCounterDataPrefix = m_Impl->SelectedConfiguration.counterDataPrefix.data();
|
||||
counterDataPrefixSize = m_Impl->SelectedConfiguration.counterDataPrefix.size();
|
||||
}
|
||||
|
||||
void NVCounterEnumerator::ClearConfig()
|
||||
{
|
||||
m_Impl->SelectedExternalIds.clear();
|
||||
m_Impl->SelectedEvalRequests.clear();
|
||||
m_Impl->SelectedConfiguration = {}; // clear the byte vectors
|
||||
m_Impl->SelectedNumPasses = 0u;
|
||||
}
|
||||
|
||||
size_t NVCounterEnumerator::GetMaxNumReplayPasses(uint16_t numNestingLevels)
|
||||
{
|
||||
// Calculate max number of replay passes
|
||||
RDCASSERT(m_Impl->SelectedNumPasses > 0u);
|
||||
return (size_t)numNestingLevels * m_Impl->SelectedNumPasses + 1u;
|
||||
}
|
||||
|
||||
bool NVCounterEnumerator::EvaluateMetrics(const uint8_t *counterDataImage,
|
||||
size_t counterDataImageSize,
|
||||
rdcarray<CounterResult> &values)
|
||||
{
|
||||
bool setDeviceSuccess = nv::perf::MetricsEvaluatorSetDeviceAttributes(
|
||||
m_Impl->Evaluator, counterDataImage, counterDataImageSize);
|
||||
if(!setDeviceSuccess)
|
||||
{
|
||||
RDCERR("NvPerf failed to determine device attributes from counter data");
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t numRanges = nv::perf::CounterDataGetNumRanges(counterDataImage);
|
||||
|
||||
std::vector<double> doubleValues;
|
||||
doubleValues.resize(m_Impl->SelectedEvalRequests.size());
|
||||
for(uint32_t rangeIndex = 0; rangeIndex < numRanges; ++rangeIndex)
|
||||
{
|
||||
const char *leafRangeName = NULL;
|
||||
std::string rangeName = nv::perf::profiler::CounterDataGetRangeName(
|
||||
counterDataImage, rangeIndex, '/', &leafRangeName);
|
||||
if(!leafRangeName)
|
||||
{
|
||||
RDCERR("Failed to access NvPerf range name");
|
||||
continue;
|
||||
}
|
||||
errno = 0;
|
||||
uint32_t eid = (uint32_t)strtoul(leafRangeName, NULL, 10);
|
||||
if(errno != 0)
|
||||
{
|
||||
RDCERR("Failed to parse NvPerf range name: %s", leafRangeName);
|
||||
continue;
|
||||
}
|
||||
|
||||
bool evalSuccess =
|
||||
nv::perf::EvaluateToGpuValues(m_Impl->Evaluator, counterDataImage, counterDataImageSize,
|
||||
rangeIndex, m_Impl->SelectedEvalRequests.size(),
|
||||
m_Impl->SelectedEvalRequests.data(), doubleValues.data());
|
||||
if(!evalSuccess)
|
||||
{
|
||||
RDCERR("NvPerf failed to evaluate GPU metrics for range: %s", leafRangeName);
|
||||
continue;
|
||||
}
|
||||
for(size_t counterIndex = 0; counterIndex < m_Impl->SelectedExternalIds.size(); ++counterIndex)
|
||||
{
|
||||
CounterResult counterResult(eid, m_Impl->SelectedExternalIds[counterIndex],
|
||||
doubleValues[counterIndex]);
|
||||
values.push_back(counterResult);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool NVCounterEnumerator::InitializeNvPerf()
|
||||
{
|
||||
rdcstr pluginsFolder = FileIO::GetAppFolderFilename("plugins/nv");
|
||||
const char *paths[] = {
|
||||
pluginsFolder.c_str(), "./plugins/nv", ".",
|
||||
};
|
||||
NVPW_SetLibraryLoadPaths_Params params{NVPW_SetLibraryLoadPaths_Params_STRUCT_SIZE};
|
||||
params.numPaths = sizeof(paths) / sizeof(paths[0]);
|
||||
params.ppPaths = paths;
|
||||
NVPA_Status result = NVPW_SetLibraryLoadPaths(¶ms);
|
||||
if(result != NVPA_STATUS_SUCCESS)
|
||||
{
|
||||
RDCWARN("NvPerf could not set library search path");
|
||||
}
|
||||
return nv::perf::InitializeNvPerf();
|
||||
}
|
||||
|
||||
CounterDescription NVCounterEnumerator::LibraryNotFoundMessage()
|
||||
{
|
||||
rdcstr pluginPath = FileIO::GetAppFolderFilename(
|
||||
#if ENABLED(RDOC_WIN32)
|
||||
"plugins\\nv\\nvperf_grfx_host.dll"
|
||||
#elif ENABLED(RDOC_LINUX)
|
||||
"plugins/nv/libnvperf_grfx_host.so"
|
||||
#endif
|
||||
);
|
||||
if(pluginPath.empty())
|
||||
{
|
||||
pluginPath =
|
||||
#if ENABLED(RDOC_WIN32)
|
||||
".\\plugins\\nv\\nvperf_grfx_host.dll"
|
||||
#elif ENABLED(RDOC_LINUX)
|
||||
"./plugins/nv/libnvperf_grfx_host.so"
|
||||
#endif
|
||||
;
|
||||
}
|
||||
|
||||
CounterDescription desc = {};
|
||||
desc.resultType = CompType::Typeless;
|
||||
desc.resultByteWidth = 0;
|
||||
desc.name = "ERROR: Could not find Nsight Perf SDK library";
|
||||
desc.description = StringFormat::Fmt(
|
||||
"To use these counters, please:"
|
||||
"<ol>"
|
||||
"<li>download the Nsight Perf SDK from:<br/><a "
|
||||
"href=\"https://developer.nvidia.com/nsight-perf-sdk\">https://developer.nvidia.com/"
|
||||
"nsight-perf-sdk</a></li>"
|
||||
"<li>extract the SDK contents</li>"
|
||||
"<li>copy the <strong>"
|
||||
#if ENABLED(RDOC_WIN32)
|
||||
"nvperf_grfx_host.dll"
|
||||
#elif ENABLED(RDOC_LINUX)
|
||||
"libnvperf_grfx_host.so"
|
||||
#endif
|
||||
"</strong> file to:<br/><strong>%s</strong></li>"
|
||||
"<li>reopen this capture</li>"
|
||||
"</ol>",
|
||||
pluginPath.c_str());
|
||||
desc.unit = CounterUnit::Absolute;
|
||||
desc.counter = GPUCounter::FirstNvidia;
|
||||
|
||||
// Create the plugin directory, so user will have somewhere to place the plugin file
|
||||
FileIO::CreateParentDirectory(pluginPath);
|
||||
|
||||
return desc;
|
||||
}
|
||||
@@ -0,0 +1,71 @@
|
||||
/******************************************************************************
|
||||
* The MIT License (MIT)
|
||||
*
|
||||
* Copyright (c) 2022 Baldur Karlsson
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
******************************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include "api/replay/data_types.h"
|
||||
#include "api/replay/rdcarray.h"
|
||||
#include "api/replay/replay_enums.h"
|
||||
#include "common/common.h"
|
||||
|
||||
struct NVPA_RawMetricsConfig;
|
||||
namespace nv
|
||||
{
|
||||
namespace perf
|
||||
{
|
||||
class MetricsEvaluator;
|
||||
}
|
||||
}
|
||||
|
||||
class NVCounterEnumerator
|
||||
{
|
||||
public:
|
||||
NVCounterEnumerator();
|
||||
~NVCounterEnumerator();
|
||||
|
||||
// This function takes ownership of metricsEvaluator.
|
||||
bool Init(nv::perf::MetricsEvaluator &&metricsEvaluator);
|
||||
|
||||
rdcarray<GPUCounter> GetPublicCounterIds();
|
||||
CounterDescription GetCounterDescription(GPUCounter counterID);
|
||||
bool HasCounter(GPUCounter counterID);
|
||||
|
||||
bool CreateConfig(const char *pChipName, NVPA_RawMetricsConfig *pRawMetricsConfig,
|
||||
const rdcarray<GPUCounter> &counters);
|
||||
void GetConfig(const uint8_t *&pConfigImage, size_t &configImageSize,
|
||||
const uint8_t *&pCounterDataPrefix, size_t &counterDataPrefixSize);
|
||||
void ClearConfig();
|
||||
size_t GetMaxNumReplayPasses(uint16_t numNestingLevels);
|
||||
|
||||
bool EvaluateMetrics(const uint8_t *counterDataImage, size_t counterDataImageSize,
|
||||
rdcarray<CounterResult> &values);
|
||||
|
||||
static bool InitializeNvPerf();
|
||||
static CounterDescription LibraryNotFoundMessage();
|
||||
|
||||
private:
|
||||
struct Impl;
|
||||
Impl *m_Impl;
|
||||
};
|
||||
@@ -23,6 +23,7 @@
|
||||
******************************************************************************/
|
||||
|
||||
#include "nv_counters.h"
|
||||
|
||||
#include "common/common.h"
|
||||
#include "core/plugins.h"
|
||||
#include "os/os_specific.h"
|
||||
|
||||
@@ -0,0 +1,408 @@
|
||||
/******************************************************************************
|
||||
* The MIT License (MIT)
|
||||
*
|
||||
* Copyright (c) 2022 Baldur Karlsson
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
******************************************************************************/
|
||||
|
||||
#include "nv_d3d12_counters.h"
|
||||
|
||||
#include "nv_counter_enumerator.h"
|
||||
|
||||
#include "api/replay/shader_types.h"
|
||||
#include "driver/d3d12/d3d12_command_list.h"
|
||||
#include "driver/d3d12/d3d12_command_queue.h"
|
||||
#include "driver/d3d12/d3d12_commands.h"
|
||||
#include "driver/d3d12/d3d12_device.h"
|
||||
#include "driver/d3d12/d3d12_replay.h"
|
||||
|
||||
#include "NvPerfD3D12.h"
|
||||
#include "NvPerfRangeProfilerD3D12.h"
|
||||
#include "NvPerfScopeExitGuard.h"
|
||||
|
||||
struct NVD3D12Counters::Impl
|
||||
{
|
||||
NVCounterEnumerator *CounterEnumerator;
|
||||
bool LibraryNotFound = false;
|
||||
|
||||
Impl() : CounterEnumerator(NULL) {}
|
||||
~Impl()
|
||||
{
|
||||
delete CounterEnumerator;
|
||||
CounterEnumerator = NULL;
|
||||
}
|
||||
|
||||
bool TryInitializePerfSDK(ID3D12Device *device)
|
||||
{
|
||||
if(!NVCounterEnumerator::InitializeNvPerf())
|
||||
{
|
||||
RDCERR("NvPerf library failed to initialize");
|
||||
LibraryNotFound = true;
|
||||
|
||||
// NOTE: Return success here so that we can later show a message
|
||||
// directing the user to download the Nsight Perf SDK library.
|
||||
return true;
|
||||
}
|
||||
|
||||
if(!nv::perf::D3D12LoadDriver())
|
||||
{
|
||||
RDCERR("NvPerf failed to load D3D12 driver");
|
||||
return false;
|
||||
}
|
||||
|
||||
if(!nv::perf::profiler::D3D12IsGpuSupported(device))
|
||||
{
|
||||
RDCERR("NvPerf does not support profiling on this GPU");
|
||||
return false;
|
||||
}
|
||||
|
||||
nv::perf::DeviceIdentifiers deviceIdentifiers = nv::perf::D3D12GetDeviceIdentifiers(device);
|
||||
if(!deviceIdentifiers.pChipName)
|
||||
{
|
||||
RDCERR("NvPerf could not determine chip name");
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t scratchBufferSize =
|
||||
nv::perf::D3D12CalculateMetricsEvaluatorScratchBufferSize(deviceIdentifiers.pChipName);
|
||||
if(!scratchBufferSize)
|
||||
{
|
||||
RDCERR("NvPerf could not determine the scratch buffer size for metrics evaluation");
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<uint8_t> scratchBuffer;
|
||||
scratchBuffer.resize(scratchBufferSize);
|
||||
NVPW_MetricsEvaluator *pMetricsEvaluator = nv::perf::D3D12CreateMetricsEvaluator(
|
||||
scratchBuffer.data(), scratchBuffer.size(), deviceIdentifiers.pChipName);
|
||||
if(!pMetricsEvaluator)
|
||||
{
|
||||
RDCERR("NvPerf could not initialize metrics evaluator");
|
||||
return false;
|
||||
}
|
||||
|
||||
nv::perf::MetricsEvaluator metricsEvaluator(pMetricsEvaluator, std::move(scratchBuffer));
|
||||
|
||||
CounterEnumerator = new NVCounterEnumerator;
|
||||
if(!CounterEnumerator->Init(std::move(metricsEvaluator)))
|
||||
{
|
||||
RDCERR("NvPerf could not initialize metrics evaluator");
|
||||
delete CounterEnumerator;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
static bool CanProfileEvent(const ActionDescription &actionnode)
|
||||
{
|
||||
if(!actionnode.children.empty())
|
||||
return false; // Only profile events for leaf nodes
|
||||
|
||||
if(actionnode.events.empty())
|
||||
return false; // Skip nodes with no events
|
||||
|
||||
if(!(actionnode.flags & (ActionFlags::Clear | ActionFlags::Drawcall | ActionFlags::Dispatch |
|
||||
ActionFlags::Present | ActionFlags::Copy | ActionFlags::Resolve)))
|
||||
return false; // Filter out events we cannot profile
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void RecurseDiscoverEvents(uint32_t &numEvents, const ActionDescription &actionnode)
|
||||
{
|
||||
for(size_t i = 0; i < actionnode.children.size(); i++)
|
||||
{
|
||||
RecurseDiscoverEvents(numEvents, actionnode.children[i]);
|
||||
}
|
||||
|
||||
if(!Impl::CanProfileEvent(actionnode))
|
||||
return;
|
||||
|
||||
numEvents++;
|
||||
}
|
||||
};
|
||||
|
||||
NVD3D12Counters::NVD3D12Counters() : m_Impl(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
NVD3D12Counters::~NVD3D12Counters()
|
||||
{
|
||||
delete m_Impl;
|
||||
m_Impl = NULL;
|
||||
}
|
||||
|
||||
bool NVD3D12Counters::Init(ID3D12Device *device)
|
||||
{
|
||||
m_Impl = new Impl;
|
||||
|
||||
if(!m_Impl)
|
||||
return false;
|
||||
|
||||
bool initSuccess = m_Impl->TryInitializePerfSDK(device);
|
||||
if(!initSuccess)
|
||||
{
|
||||
delete m_Impl;
|
||||
m_Impl = NULL;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
rdcarray<GPUCounter> NVD3D12Counters::EnumerateCounters() const
|
||||
{
|
||||
if(m_Impl->LibraryNotFound)
|
||||
{
|
||||
return {GPUCounter::FirstNvidia};
|
||||
}
|
||||
return m_Impl->CounterEnumerator->GetPublicCounterIds();
|
||||
}
|
||||
|
||||
bool NVD3D12Counters::HasCounter(GPUCounter counterID) const
|
||||
{
|
||||
if(m_Impl->LibraryNotFound)
|
||||
{
|
||||
return counterID == GPUCounter::FirstNvidia;
|
||||
}
|
||||
return m_Impl->CounterEnumerator->HasCounter(counterID);
|
||||
}
|
||||
|
||||
CounterDescription NVD3D12Counters::DescribeCounter(GPUCounter counterID) const
|
||||
{
|
||||
if(m_Impl->LibraryNotFound)
|
||||
{
|
||||
RDCASSERT(counterID == GPUCounter::FirstNvidia);
|
||||
// Dummy counter shows message directing user to download the Nsight Perf SDK library
|
||||
return NVCounterEnumerator::LibraryNotFoundMessage();
|
||||
}
|
||||
return m_Impl->CounterEnumerator->GetCounterDescription(counterID);
|
||||
}
|
||||
|
||||
struct D3D12NvidiaActionCallback final : public D3D12ActionCallback
|
||||
{
|
||||
D3D12NvidiaActionCallback(WrappedID3D12Device *dev,
|
||||
nv::perf::profiler::D3D12RangeCommands *pRangeCommands)
|
||||
: m_pDevice(dev), m_pRangeCommands(pRangeCommands)
|
||||
{
|
||||
m_pDevice->GetQueue()->GetCommandData()->m_ActionCallback = this;
|
||||
}
|
||||
|
||||
virtual ~D3D12NvidiaActionCallback()
|
||||
{
|
||||
m_pDevice->GetQueue()->GetCommandData()->m_ActionCallback = NULL;
|
||||
}
|
||||
|
||||
void PreDraw(uint32_t eid, ID3D12GraphicsCommandListX *cmd) final
|
||||
{
|
||||
rdcstr eidName = StringFormat::Fmt("%d", eid);
|
||||
|
||||
WrappedID3D12GraphicsCommandList *pWrappedCmdList = (WrappedID3D12GraphicsCommandList *)cmd;
|
||||
m_pRangeCommands->PushRange(pWrappedCmdList->GetReal(), eidName.c_str());
|
||||
}
|
||||
|
||||
bool PostDraw(uint32_t eid, ID3D12GraphicsCommandListX *cmd) final
|
||||
{
|
||||
WrappedID3D12GraphicsCommandList *pWrappedCmdList = (WrappedID3D12GraphicsCommandList *)cmd;
|
||||
m_pRangeCommands->PopRange(pWrappedCmdList->GetReal());
|
||||
return false;
|
||||
}
|
||||
|
||||
void PreCloseCommandList(ID3D12GraphicsCommandListX *cmd) final {}
|
||||
void PostRedraw(uint32_t eid, ID3D12GraphicsCommandListX *cmd) final {}
|
||||
void PreDispatch(uint32_t eid, ID3D12GraphicsCommandListX *cmd) final { PreDraw(eid, cmd); }
|
||||
bool PostDispatch(uint32_t eid, ID3D12GraphicsCommandListX *cmd) final
|
||||
{
|
||||
return PostDraw(eid, cmd);
|
||||
}
|
||||
void PostRedispatch(uint32_t eid, ID3D12GraphicsCommandListX *cmd) final { PostRedraw(eid, cmd); }
|
||||
void PreMisc(uint32_t eid, ActionFlags flags, ID3D12GraphicsCommandListX *cmd) final
|
||||
{
|
||||
if(flags & ActionFlags::PassBoundary)
|
||||
return;
|
||||
PreDraw(eid, cmd);
|
||||
}
|
||||
bool PostMisc(uint32_t eid, ActionFlags flags, ID3D12GraphicsCommandListX *cmd) final
|
||||
{
|
||||
if(flags & ActionFlags::PassBoundary)
|
||||
return false;
|
||||
return PostDraw(eid, cmd);
|
||||
}
|
||||
void PostRemisc(uint32_t eid, ActionFlags flags, ID3D12GraphicsCommandListX *cmd) final
|
||||
{
|
||||
if(flags & ActionFlags::PassBoundary)
|
||||
return;
|
||||
PostRedraw(eid, cmd);
|
||||
}
|
||||
|
||||
void AliasEvent(uint32_t primary, uint32_t alias) final {}
|
||||
WrappedID3D12Device *m_pDevice;
|
||||
nv::perf::profiler::D3D12RangeCommands *m_pRangeCommands;
|
||||
};
|
||||
|
||||
rdcarray<CounterResult> NVD3D12Counters::FetchCounters(const rdcarray<GPUCounter> &counters,
|
||||
WrappedID3D12Device &device)
|
||||
{
|
||||
if(m_Impl->LibraryNotFound)
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
uint32_t maxEID = device.GetQueue()->GetMaxEID();
|
||||
ID3D12Device *d3dDevice = device.GetReal();
|
||||
|
||||
nv::perf::profiler::D3D12RangeCommands rangeCommands;
|
||||
rangeCommands.Initialize(d3dDevice);
|
||||
RDCASSERT(rangeCommands.isNvidiaDevice);
|
||||
if(!rangeCommands.isNvidiaDevice)
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
uint32_t maxNumRanges = 0;
|
||||
{
|
||||
// replay the events to determine how many profile-able events there are
|
||||
FrameRecord frameRecord = device.GetReplay()->GetFrameRecord();
|
||||
for(size_t i = 0; i < frameRecord.actionList.size(); i++)
|
||||
{
|
||||
Impl::RecurseDiscoverEvents(maxNumRanges, frameRecord.actionList[i]);
|
||||
}
|
||||
}
|
||||
|
||||
nv::perf::profiler::SessionOptions sessionOptions = {};
|
||||
sessionOptions.maxNumRanges = maxNumRanges;
|
||||
sessionOptions.avgRangeNameLength = 16;
|
||||
sessionOptions.numTraceBuffers = 1;
|
||||
|
||||
nv::perf::profiler::RangeProfilerD3D12 rangeProfiler;
|
||||
|
||||
rdcarray<CounterResult> results;
|
||||
const rdcarray<WrappedID3D12CommandQueue *> &commandQueues = device.GetQueues();
|
||||
for(WrappedID3D12CommandQueue *pWrappedQueue : commandQueues)
|
||||
{
|
||||
ID3D12CommandQueue *d3dQueue = pWrappedQueue->GetReal();
|
||||
|
||||
switch(d3dQueue->GetDesc().Type)
|
||||
{
|
||||
case D3D12_COMMAND_LIST_TYPE_DIRECT:
|
||||
case D3D12_COMMAND_LIST_TYPE_COMPUTE:
|
||||
// Profiling is supported for 3D and compute queues.
|
||||
break;
|
||||
case D3D12_COMMAND_LIST_TYPE_BUNDLE:
|
||||
case D3D12_COMMAND_LIST_TYPE_COPY:
|
||||
case D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE:
|
||||
case D3D12_COMMAND_LIST_TYPE_VIDEO_PROCESS:
|
||||
case D3D12_COMMAND_LIST_TYPE_VIDEO_ENCODE:
|
||||
// Profiling is not supported for copy or video queues.
|
||||
continue;
|
||||
}
|
||||
|
||||
if(!rangeProfiler.BeginSession(d3dQueue, sessionOptions))
|
||||
{
|
||||
RDCERR("NvPerf failed to start profiling session");
|
||||
continue; // Try the next command queue
|
||||
}
|
||||
auto sessionGuard = nv::perf::ScopeExitGuard([&rangeProfiler]() { rangeProfiler.EndSession(); });
|
||||
|
||||
// Create counter configuration, and set it.
|
||||
{
|
||||
nv::perf::DeviceIdentifiers deviceIdentifiers = nv::perf::D3D12GetDeviceIdentifiers(d3dDevice);
|
||||
NVPA_RawMetricsConfig *pRawMetricsConfig =
|
||||
nv::perf::profiler::D3D12CreateRawMetricsConfig(deviceIdentifiers.pChipName);
|
||||
m_Impl->CounterEnumerator->CreateConfig(deviceIdentifiers.pChipName, pRawMetricsConfig,
|
||||
counters);
|
||||
}
|
||||
|
||||
nv::perf::profiler::SetConfigParams setConfigParams;
|
||||
setConfigParams.numNestingLevels = 1;
|
||||
setConfigParams.numStatisticalSamples = 1;
|
||||
m_Impl->CounterEnumerator->GetConfig(
|
||||
setConfigParams.pConfigImage, setConfigParams.configImageSize,
|
||||
setConfigParams.pCounterDataPrefix, setConfigParams.counterDataPrefixSize);
|
||||
|
||||
size_t maxNumReplayPasses =
|
||||
m_Impl->CounterEnumerator->GetMaxNumReplayPasses(setConfigParams.numNestingLevels);
|
||||
RDCASSERT(maxNumReplayPasses > 0u);
|
||||
|
||||
if(!rangeProfiler.EnqueueCounterCollection(setConfigParams))
|
||||
{
|
||||
RDCERR("NvPerf failed to schedule counter collection");
|
||||
continue; // Try the next command queue
|
||||
}
|
||||
|
||||
D3D12NvidiaActionCallback actionCallback(&device, &rangeCommands);
|
||||
|
||||
std::vector<uint8_t> counterDataImage;
|
||||
for(size_t replayPass = 0;; ++replayPass)
|
||||
{
|
||||
if(!rangeProfiler.BeginPass())
|
||||
{
|
||||
RDCERR("NvPerf failed to start counter collection pass");
|
||||
break;
|
||||
}
|
||||
|
||||
// replay the events to perform all the queries
|
||||
uint32_t eventStartID = 0;
|
||||
device.ReplayLog(eventStartID, maxEID, eReplay_Full);
|
||||
|
||||
if(!rangeProfiler.EndPass())
|
||||
{
|
||||
RDCERR("NvPerf failed to end counter collection pass!");
|
||||
break;
|
||||
}
|
||||
|
||||
// device->GPUSync(d3dQueue);
|
||||
|
||||
nv::perf::profiler::DecodeResult decodeResult;
|
||||
if(!rangeProfiler.DecodeCounters(decodeResult))
|
||||
{
|
||||
RDCERR("NvPerf failed to decode counters in collection pass");
|
||||
break;
|
||||
}
|
||||
|
||||
if(decodeResult.allPassesDecoded)
|
||||
{
|
||||
counterDataImage = std::move(decodeResult.counterDataImage);
|
||||
break; // Success!
|
||||
}
|
||||
|
||||
if(replayPass >= maxNumReplayPasses - 1)
|
||||
{
|
||||
RDCERR("NvPerf exceeded the maximum expected number of replay passes");
|
||||
break; // Failure
|
||||
}
|
||||
}
|
||||
|
||||
if(counterDataImage.empty())
|
||||
{
|
||||
RDCERR("No data found in NvPerf counter data image");
|
||||
return {};
|
||||
}
|
||||
|
||||
if(!m_Impl->CounterEnumerator->EvaluateMetrics(counterDataImage.data(), counterDataImage.size(),
|
||||
results))
|
||||
{
|
||||
RDCERR("NvPerf failed to evaluate metrics from counter data");
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
@@ -0,0 +1,51 @@
|
||||
/******************************************************************************
|
||||
* The MIT License (MIT)
|
||||
*
|
||||
* Copyright (c) 2022 Baldur Karlsson
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
******************************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "api/replay/data_types.h"
|
||||
#include "api/replay/rdcarray.h"
|
||||
#include "api/replay/replay_enums.h"
|
||||
|
||||
struct ID3D12Device;
|
||||
class WrappedID3D12Device;
|
||||
|
||||
class NVD3D12Counters final
|
||||
{
|
||||
public:
|
||||
NVD3D12Counters();
|
||||
~NVD3D12Counters();
|
||||
|
||||
bool Init(ID3D12Device *device);
|
||||
|
||||
rdcarray<GPUCounter> EnumerateCounters() const;
|
||||
bool HasCounter(GPUCounter counterID) const;
|
||||
CounterDescription DescribeCounter(GPUCounter counterID) const;
|
||||
rdcarray<CounterResult> FetchCounters(const rdcarray<GPUCounter> &counters,
|
||||
WrappedID3D12Device &device);
|
||||
|
||||
private:
|
||||
struct Impl;
|
||||
Impl *m_Impl;
|
||||
};
|
||||
Reference in New Issue
Block a user