Collect Nsight Perf SDK counters in D3D12

This adds the NVD3D12Counters class, which implements D3D12 counter
collection on NVIDIA hardware via the NVIDIA Nsight Perf SDK.

A helper class NVCounterEnumerator is added to provide functionality
common to NVIDIA counters for other graphics APIs.
This commit is contained in:
Jonathan Glines
2022-11-09 09:25:48 -08:00
committed by Baldur Karlsson
parent 2b5268b898
commit abab16f8c6
10 changed files with 1084 additions and 23 deletions
+27 -1
View File
@@ -25,6 +25,7 @@
#include <algorithm>
#include <iterator>
#include "driver/ihv/amd/amd_counters.h"
#include "driver/ihv/nv/nv_d3d12_counters.h"
#include "d3d12_command_list.h"
#include "d3d12_command_queue.h"
#include "d3d12_common.h"
@@ -54,6 +55,11 @@ rdcarray<GPUCounter> D3D12Replay::EnumerateCounters()
ret.append(m_pAMDCounters->GetPublicCounterIds());
}
if(m_pNVCounters)
{
ret.append(m_pNVCounters->EnumerateCounters());
}
return ret;
}
@@ -72,6 +78,13 @@ CounterDescription D3D12Replay::DescribeCounter(GPUCounter counterID)
}
}
/////NVIDIA//////
if(m_pNVCounters && m_pNVCounters->HasCounter(counterID))
{
desc = m_pNVCounters->DescribeCounter(counterID);
return desc;
}
// 0808CC9B-79DF-4549-81F7-85494E648F22
desc.uuid.words[0] = 0x0808CC9B;
desc.uuid.words[1] = 0x79DF4549;
@@ -497,7 +510,7 @@ rdcarray<CounterResult> D3D12Replay::FetchCounters(const rdcarray<GPUCounter> &c
rdcarray<GPUCounter> d3dCounters;
std::copy_if(counters.begin(), counters.end(), std::back_inserter(d3dCounters),
[](const GPUCounter &c) { return !IsAMDCounter(c); });
[](const GPUCounter &c) { return IsGenericCounter(c); });
if(m_pAMDCounters)
{
@@ -512,6 +525,19 @@ rdcarray<CounterResult> D3D12Replay::FetchCounters(const rdcarray<GPUCounter> &c
}
}
if(m_pNVCounters)
{
// Filter out the NVIDIA counters
rdcarray<GPUCounter> nvCounters;
std::copy_if(counters.begin(), counters.end(), std::back_inserter(nvCounters),
[=](const GPUCounter &c) { return m_pNVCounters->HasCounter(c); });
if(!nvCounters.empty())
{
rdcarray<CounterResult> results = m_pNVCounters->FetchCounters(nvCounters, *m_pDevice);
ret.append(results);
}
}
if(d3dCounters.empty())
{
return ret;
+1
View File
@@ -871,6 +871,7 @@ public:
CaptureState GetState() { return m_State; }
D3D12Replay *GetReplay() { return m_Replay; }
WrappedID3D12CommandQueue *GetQueue() { return m_Queue; }
const rdcarray<WrappedID3D12CommandQueue *> &GetQueues() { return m_Queues; }
ID3D12CommandAllocator *GetAlloc() { return m_Alloc; }
ID3D12InfoQueue *GetInfoQueue() { return m_pInfoQueue; }
void ApplyBarriers(rdcarray<D3D12_RESOURCE_BARRIER> &barriers);
+32 -20
View File
@@ -29,6 +29,7 @@
#include "driver/dxgi/dxgi_common.h"
#include "driver/ihv/amd/amd_counters.h"
#include "driver/ihv/amd/amd_rgp.h"
#include "driver/ihv/nv/nv_d3d12_counters.h"
#include "maths/camera.h"
#include "maths/formatpacking.h"
#include "maths/matrix.h"
@@ -151,35 +152,44 @@ void D3D12Replay::CreateResources()
if(!m_Proxy && D3D12_HardwareCounters())
{
AMDCounters *counters = NULL;
if(m_DriverInfo.vendor == GPUVendor::AMD || m_DriverInfo.vendor == GPUVendor::Samsung)
{
RDCLOG("AMD GPU detected - trying to initialise AMD counters");
counters = new AMDCounters(m_pDevice->IsDebugLayerEnabled());
}
else
{
RDCLOG("%s GPU detected - no counters available", ToStr(m_DriverInfo.vendor).c_str());
AMDCounters *countersAMD = new AMDCounters(m_pDevice->IsDebugLayerEnabled());
ID3D12Device *d3dDevice = m_pDevice->GetReal();
if(countersAMD && countersAMD->Init(AMDCounters::ApiType::Dx12, (void *)d3dDevice))
{
m_pAMDCounters = countersAMD;
}
else
{
delete countersAMD;
}
}
ID3D12Device *d3dDevice = m_pDevice->GetReal();
if(m_DriverInfo.vendor == GPUVendor::nVidia)
{
RDCLOG("NVIDIA GPU detected - trying to initialise NVIDIA counters");
if(counters && counters->Init(AMDCounters::ApiType::Dx12, (void *)d3dDevice))
{
m_pAMDCounters = counters;
}
else
{
delete counters;
m_pAMDCounters = NULL;
NVD3D12Counters *countersNV = new NVD3D12Counters();
bool initSuccess = false;
if(countersNV && countersNV->Init(m_pDevice->GetReal()))
{
m_pNVCounters = countersNV;
initSuccess = true;
}
else
{
delete countersNV;
}
RDCLOG("NVIDIA D3D12 counter initialisation: %s", initSuccess ? "SUCCEEDED" : "FAILED");
}
}
}
else
{
m_pAMDCounters = NULL;
}
}
void D3D12Replay::DestroyResources()
@@ -209,6 +219,8 @@ void D3D12Replay::DestroyResources()
SAFE_DELETE(m_DebugManager);
SAFE_DELETE(m_pAMDCounters);
SAFE_DELETE(m_pNVCounters);
}
RDResult D3D12Replay::ReadLogInitialisation(RDCFile *rdc, bool storeStructuredBuffers)
+4 -1
View File
@@ -34,6 +34,8 @@ class AMDCounters;
struct D3D12AMDActionCallback;
class WrappedID3D12Device;
class NVD3D12Counters;
class D3D12DebugManager;
struct PortableHandle;
@@ -506,6 +508,7 @@ private:
std::map<rdcfixedarray<uint32_t, 4>, bytebuf> m_PatchedPSCache;
void FillTimersAMD(uint32_t *eventStartID, uint32_t *sampleIndex, rdcarray<uint32_t> *eventIDs);
rdcarray<CounterResult> FetchCountersAMD(const rdcarray<GPUCounter> &counters);
NVD3D12Counters *m_pNVCounters = NULL;
};
+5 -1
View File
@@ -60,7 +60,7 @@
</ItemDefinitionGroup>
<ItemDefinitionGroup>
<ClCompile>
<AdditionalIncludeDirectories>$(SolutionDir)renderdoc\;$(SolutionDir)renderdoc\3rdparty\</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>$(SolutionDir)renderdoc\;$(SolutionDir)renderdoc\3rdparty\;$(SolutionDir)renderdoc\driver\ihv\nv\official\PerfSDK\redist\include;$(SolutionDir)renderdoc\driver\ihv\nv\official\PerfSDK\redist\NvPerfUtility\include</AdditionalIncludeDirectories>
<PreprocessorDefinitions>RENDERDOC_EXPORTS;RENDERDOC_PLATFORM_WIN32;WIN32;NDEBUG;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<WarningLevel>Level4</WarningLevel>
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
@@ -99,10 +99,14 @@
<ClCompile Include="nvapi_hooks.cpp" />
<ClCompile Include="nvapi_wrapper.cpp" />
<ClCompile Include="nv_counters.cpp" />
<ClCompile Include="nv_counter_enumerator.cpp" />
<ClCompile Include="nv_d3d12_counters.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="nvapi_wrapper.h" />
<ClInclude Include="nv_counters.h" />
<ClInclude Include="nv_counter_enumerator.h" />
<ClInclude Include="nv_d3d12_counters.h" />
<ClInclude Include="official\nvapi\nvapi.h" />
<ClInclude Include="official\nvapi\nvapi_interface.h" />
<ClInclude Include="official\PerfKit\include\NvPmApi.h" />
@@ -0,0 +1,484 @@
/******************************************************************************
* The MIT License (MIT)
*
* Copyright (c) 2022 Baldur Karlsson
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
******************************************************************************/
#include "nv_counter_enumerator.h"
#include "common/common.h"
#include "common/formatting.h"
#include "os/os_specific.h"
#include "strings/string_utils.h"
#if ENABLED(RDOC_WIN32)
#include "windows-desktop-x64/nvperf_host_impl.h"
#elif ENABLED(RDOC_LINUX)
#include "linux-desktop-x64/nvperf_host_impl.h"
#endif
#include "NvPerfCounterConfiguration.h"
#include "NvPerfCounterData.h"
#include "NvPerfMetricsEvaluator.h"
struct NVCounterEnumerator::Impl
{
public:
nv::perf::MetricsEvaluator Evaluator;
nv::perf::CounterConfiguration
SelectedConfiguration; // configImage etc. for the current selection
rdcarray<GPUCounter> SelectedExternalIds;
rdcarray<NVPW_MetricEvalRequest> SelectedEvalRequests;
size_t SelectedNumPasses;
const rdcarray<GPUCounter> &ExternalIds()
{
InitEnumerateCounters();
return m_ExternalIds;
}
const rdcarray<CounterDescription> &ExternalDescriptions()
{
InitEnumerateCounters();
return m_ExternalDescriptions;
}
const rdcarray<NVPW_MetricEvalRequest> &AllEvalRequests()
{
InitEnumerateCounters();
return m_AllEvalRequests;
}
private:
void InitEnumerateCounters();
bool m_EnumerationDone = false;
rdcarray<GPUCounter> m_ExternalIds;
rdcarray<CounterDescription> m_ExternalDescriptions;
rdcarray<NVPW_MetricEvalRequest> m_AllEvalRequests;
};
NVCounterEnumerator::NVCounterEnumerator()
{
m_Impl = new NVCounterEnumerator::Impl();
}
NVCounterEnumerator::~NVCounterEnumerator()
{
delete m_Impl;
}
static CounterUnit ToCounterUnit(const std::vector<NVPW_DimUnitFactor> &dimUnits)
{
if(dimUnits.size() == 0)
{
return CounterUnit::Ratio;
}
if(dimUnits.size() == 1 && dimUnits[0].exponent == 1)
{
switch(dimUnits[0].dimUnit)
{
case NVPW_DIM_UNIT_BYTES: return CounterUnit::Bytes;
case NVPW_DIM_UNIT_SECONDS: return CounterUnit::Seconds;
case NVPW_DIM_UNIT_PERCENT: return CounterUnit::Percentage;
case NVPW_DIM_UNIT_FBP_CYCLES: return CounterUnit::Cycles;
case NVPW_DIM_UNIT_GPC_CYCLES: return CounterUnit::Cycles;
case NVPW_DIM_UNIT_SYS_CYCLES: return CounterUnit::Cycles;
case NVPW_DIM_UNIT_DRAM_CYCLES: return CounterUnit::Cycles;
case NVPW_DIM_UNIT_PCIE_CYCLES:
return CounterUnit::Cycles;
// fallthrough...
}
}
// catch-all
return CounterUnit::Absolute;
}
bool NVCounterEnumerator::Init(nv::perf::MetricsEvaluator &&metricsEvaluator)
{
m_Impl->Evaluator = std::move(metricsEvaluator);
return true;
}
void NVCounterEnumerator::Impl::InitEnumerateCounters()
{
// Defer counter enumeration until the first time this is called
if(m_EnumerationDone)
return;
m_EnumerationDone = true;
struct MetricAttribute
{
NVPW_MetricType metricType;
NVPW_RollupOp rollupOp;
NVPW_Submetric submetric;
};
const MetricAttribute metricAttributes[] = {
{NVPW_METRIC_TYPE_COUNTER, NVPW_ROLLUP_OP_SUM, NVPW_SUBMETRIC_NONE},
{NVPW_METRIC_TYPE_COUNTER, NVPW_ROLLUP_OP_AVG, NVPW_SUBMETRIC_NONE},
{NVPW_METRIC_TYPE_COUNTER, NVPW_ROLLUP_OP_MAX, NVPW_SUBMETRIC_NONE},
{NVPW_METRIC_TYPE_COUNTER, NVPW_ROLLUP_OP_MIN, NVPW_SUBMETRIC_NONE},
{NVPW_METRIC_TYPE_RATIO, NVPW_ROLLUP_OP_AVG, NVPW_SUBMETRIC_RATIO},
{NVPW_METRIC_TYPE_RATIO, NVPW_ROLLUP_OP_AVG, NVPW_SUBMETRIC_MAX_RATE},
{NVPW_METRIC_TYPE_RATIO, NVPW_ROLLUP_OP_AVG, NVPW_SUBMETRIC_PCT},
};
for(size_t i = 0; i < sizeof(metricAttributes) / sizeof(metricAttributes[0]); i++)
{
const auto &attributes = metricAttributes[i];
NVPW_MetricType metricType = attributes.metricType;
NVPW_RollupOp rollupOp = attributes.rollupOp;
NVPW_Submetric submetric = attributes.submetric;
for(const char *counterName : nv::perf::EnumerateMetrics(Evaluator, metricType))
{
if(strstr(counterName, "Triage") != NULL)
continue; // filter out Triage counters (they are all duplicates)
size_t metricIndex;
if(!nv::perf::GetMetricTypeAndIndex(Evaluator, counterName, metricType, metricIndex))
continue;
RDCASSERT(metricType == attributes.metricType);
NVPW_MetricEvalRequest evalReq = {};
evalReq.metricIndex = metricIndex;
evalReq.metricType = (uint8_t)metricType;
evalReq.rollupOp = (uint8_t)rollupOp;
evalReq.submetric = (uint16_t)submetric;
std::vector<NVPW_DimUnitFactor> dimUnits;
GetMetricDimUnits(Evaluator, evalReq, dimUnits);
{
//-----------------
// Filter out metrics that count "cycles".
// The RenderDoc replay loop is not designed for reproducing representative cycle counts.
auto itr =
std::find_if(dimUnits.begin(), dimUnits.end(), [](const NVPW_DimUnitFactor &factor) {
switch(factor.dimUnit)
{
case NVPW_DIM_UNIT_DRAM_CYCLES:
case NVPW_DIM_UNIT_FBP_CYCLES:
case NVPW_DIM_UNIT_GPC_CYCLES:
case NVPW_DIM_UNIT_NVLRX_CYCLES:
case NVPW_DIM_UNIT_NVLTX_CYCLES:
case NVPW_DIM_UNIT_PCIE_CYCLES:
case NVPW_DIM_UNIT_SYS_CYCLES: return true;
default: break;
}
return false;
});
if(itr != dimUnits.end())
continue;
}
CounterDescription desc = {};
desc.resultType = CompType::Float;
desc.resultByteWidth = 8;
//-----------------
// Counter name, including rollup and submetric qualifiers
desc.name = counterName;
desc.name.append(nv::perf::ToCString((NVPW_RollupOp)evalReq.rollupOp));
desc.name.append(nv::perf::ToCString((NVPW_Submetric)evalReq.submetric));
//-----------------
// Counter description, including metric type and dim unit
desc.description = rdcstr(GetMetricDescription(Evaluator, metricType, metricIndex));
desc.description.append("<br/>HW Unit: <em>");
NVPW_HwUnit hwunit = nv::perf::GetMetricHwUnit(Evaluator, metricType, metricIndex);
desc.description.append(nv::perf::ToCString(Evaluator, hwunit));
desc.description.append("</em>");
desc.description.append("<br/>MetricType: <em>");
desc.description.append(nv::perf::ToCString(metricType));
desc.description.append("</em>");
desc.description.append("<br/>RollupOp: <em>");
desc.description.append(nv::perf::ToCString(rollupOp));
desc.description.append("</em>");
desc.description.append("<br/>Submetric: <em>");
desc.description.append(nv::perf::ToCString(submetric));
desc.description.append("</em>");
desc.description.append("<br/>DimUnit: <em>");
desc.description.append(
nv::perf::ToString(dimUnits, [this](NVPW_DimUnitName dimUnit, bool plural) {
return ToCString(Evaluator, dimUnit, plural);
}).c_str());
desc.description.append("</em>");
//-----------------
// Categorize counter by DimUnit
desc.category =
rdcstr(nv::perf::ToString(dimUnits, [this](NVPW_DimUnitName dimUnit, bool plural) {
return ToCString(Evaluator, dimUnit, plural);
}).c_str());
//-----------------
// Convert Perf SDK units to Renderdoc units (only works for limited subset of units)
desc.unit = ToCounterUnit(dimUnits);
//-----------------
// Assign external counter ID and UUID
GPUCounter counterID =
GPUCounter((uint32_t)GPUCounter::FirstNvidia + (uint32_t)m_AllEvalRequests.size());
desc.counter = counterID;
desc.uuid.words[0] = 0x25B624D0;
desc.uuid.words[1] = 0x33244527;
desc.uuid.words[2] = 0x9F71CD67;
desc.uuid.words[3] = 0x61B37980 ^ strhash(desc.name.c_str());
m_ExternalIds.push_back(counterID);
m_ExternalDescriptions.push_back(desc);
m_AllEvalRequests.push_back(evalReq);
}
}
//-----------------
// Sort counter IDs by category and name so that counters appear sorted in the selection UI
std::sort(m_ExternalIds.begin(), m_ExternalIds.end(),
[this](const GPUCounter &a, const GPUCounter &b) {
uint32_t a_localId = (uint32_t)a - (uint32_t)GPUCounter::FirstNvidia;
uint32_t b_localId = (uint32_t)b - (uint32_t)GPUCounter::FirstNvidia;
const CounterDescription &a_desc = m_ExternalDescriptions[a_localId];
const CounterDescription &b_desc = m_ExternalDescriptions[b_localId];
int result = strcmp(a_desc.category.c_str(), b_desc.category.c_str());
if(result < 0)
return true;
if(result > 0)
return false;
result = strcmp(a_desc.name.c_str(), b_desc.name.c_str());
if(result < 0)
return true;
return false;
});
}
rdcarray<GPUCounter> NVCounterEnumerator::GetPublicCounterIds()
{
return m_Impl->ExternalIds();
}
CounterDescription NVCounterEnumerator::GetCounterDescription(GPUCounter counterID)
{
uint32_t LocalId = (uint32_t)counterID - (uint32_t)GPUCounter::FirstNvidia;
return m_Impl->ExternalDescriptions()[LocalId];
}
bool NVCounterEnumerator::HasCounter(GPUCounter counterID)
{
uint32_t LocalId = (uint32_t)counterID - (uint32_t)GPUCounter::FirstNvidia;
return LocalId < m_Impl->ExternalDescriptions().size();
}
bool NVCounterEnumerator::CreateConfig(const char *pChipName,
NVPA_RawMetricsConfig *pRawMetricsConfig,
const rdcarray<GPUCounter> &counters)
{
nv::perf::MetricsConfigBuilder metricsConfigBuilder;
if(!metricsConfigBuilder.Initialize(m_Impl->Evaluator, pRawMetricsConfig, pChipName))
{
RDCERR("NvPerf failed to initialize config builder");
return false;
}
for(GPUCounter counterID : counters)
{
RDCASSERT(IsNvidiaCounter(counterID));
if(!IsNvidiaCounter(counterID))
{
continue;
}
size_t counterIndex = (uint32_t)counterID - (uint32_t)GPUCounter::FirstNvidia;
const NVPW_MetricEvalRequest &evalReq = m_Impl->AllEvalRequests()[counterIndex];
m_Impl->SelectedExternalIds.push_back(counterID);
m_Impl->SelectedEvalRequests.push_back(m_Impl->AllEvalRequests()[counterIndex]);
if(!metricsConfigBuilder.AddMetrics(&evalReq, 1))
{
// std::string metricName = nv::perf::ToString(m_Impl->Evaluator, evalReq);
const char *metricName = nv::perf::ToCString(
m_Impl->Evaluator, (NVPW_MetricType)evalReq.metricType, evalReq.metricIndex);
RDCERR("NvPerf failed to configure metric: %s", metricName);
}
}
if(!metricsConfigBuilder.PrepareConfigImage())
{
RDCERR("NvPerf failed to prepare config image");
return false;
}
size_t configImageSize = metricsConfigBuilder.GetConfigImageSize();
size_t counterDataPrefixSize = metricsConfigBuilder.GetCounterDataPrefixSize();
m_Impl->SelectedConfiguration.configImage.resize(configImageSize);
m_Impl->SelectedConfiguration.counterDataPrefix.resize(counterDataPrefixSize);
metricsConfigBuilder.GetConfigImage(m_Impl->SelectedConfiguration.configImage.size(),
m_Impl->SelectedConfiguration.configImage.data());
metricsConfigBuilder.GetCounterDataPrefix(m_Impl->SelectedConfiguration.counterDataPrefix.size(),
m_Impl->SelectedConfiguration.counterDataPrefix.data());
m_Impl->SelectedNumPasses = metricsConfigBuilder.GetNumPasses();
return true;
}
void NVCounterEnumerator::GetConfig(const uint8_t *&pConfigImage, size_t &configImageSize,
const uint8_t *&pCounterDataPrefix, size_t &counterDataPrefixSize)
{
pConfigImage = m_Impl->SelectedConfiguration.configImage.data();
configImageSize = m_Impl->SelectedConfiguration.configImage.size();
pCounterDataPrefix = m_Impl->SelectedConfiguration.counterDataPrefix.data();
counterDataPrefixSize = m_Impl->SelectedConfiguration.counterDataPrefix.size();
}
void NVCounterEnumerator::ClearConfig()
{
m_Impl->SelectedExternalIds.clear();
m_Impl->SelectedEvalRequests.clear();
m_Impl->SelectedConfiguration = {}; // clear the byte vectors
m_Impl->SelectedNumPasses = 0u;
}
size_t NVCounterEnumerator::GetMaxNumReplayPasses(uint16_t numNestingLevels)
{
// Calculate max number of replay passes
RDCASSERT(m_Impl->SelectedNumPasses > 0u);
return (size_t)numNestingLevels * m_Impl->SelectedNumPasses + 1u;
}
bool NVCounterEnumerator::EvaluateMetrics(const uint8_t *counterDataImage,
size_t counterDataImageSize,
rdcarray<CounterResult> &values)
{
bool setDeviceSuccess = nv::perf::MetricsEvaluatorSetDeviceAttributes(
m_Impl->Evaluator, counterDataImage, counterDataImageSize);
if(!setDeviceSuccess)
{
RDCERR("NvPerf failed to determine device attributes from counter data");
return false;
}
size_t numRanges = nv::perf::CounterDataGetNumRanges(counterDataImage);
std::vector<double> doubleValues;
doubleValues.resize(m_Impl->SelectedEvalRequests.size());
for(uint32_t rangeIndex = 0; rangeIndex < numRanges; ++rangeIndex)
{
const char *leafRangeName = NULL;
std::string rangeName = nv::perf::profiler::CounterDataGetRangeName(
counterDataImage, rangeIndex, '/', &leafRangeName);
if(!leafRangeName)
{
RDCERR("Failed to access NvPerf range name");
continue;
}
errno = 0;
uint32_t eid = (uint32_t)strtoul(leafRangeName, NULL, 10);
if(errno != 0)
{
RDCERR("Failed to parse NvPerf range name: %s", leafRangeName);
continue;
}
bool evalSuccess =
nv::perf::EvaluateToGpuValues(m_Impl->Evaluator, counterDataImage, counterDataImageSize,
rangeIndex, m_Impl->SelectedEvalRequests.size(),
m_Impl->SelectedEvalRequests.data(), doubleValues.data());
if(!evalSuccess)
{
RDCERR("NvPerf failed to evaluate GPU metrics for range: %s", leafRangeName);
continue;
}
for(size_t counterIndex = 0; counterIndex < m_Impl->SelectedExternalIds.size(); ++counterIndex)
{
CounterResult counterResult(eid, m_Impl->SelectedExternalIds[counterIndex],
doubleValues[counterIndex]);
values.push_back(counterResult);
}
}
return true;
}
bool NVCounterEnumerator::InitializeNvPerf()
{
rdcstr pluginsFolder = FileIO::GetAppFolderFilename("plugins/nv");
const char *paths[] = {
pluginsFolder.c_str(), "./plugins/nv", ".",
};
NVPW_SetLibraryLoadPaths_Params params{NVPW_SetLibraryLoadPaths_Params_STRUCT_SIZE};
params.numPaths = sizeof(paths) / sizeof(paths[0]);
params.ppPaths = paths;
NVPA_Status result = NVPW_SetLibraryLoadPaths(&params);
if(result != NVPA_STATUS_SUCCESS)
{
RDCWARN("NvPerf could not set library search path");
}
return nv::perf::InitializeNvPerf();
}
CounterDescription NVCounterEnumerator::LibraryNotFoundMessage()
{
rdcstr pluginPath = FileIO::GetAppFolderFilename(
#if ENABLED(RDOC_WIN32)
"plugins\\nv\\nvperf_grfx_host.dll"
#elif ENABLED(RDOC_LINUX)
"plugins/nv/libnvperf_grfx_host.so"
#endif
);
if(pluginPath.empty())
{
pluginPath =
#if ENABLED(RDOC_WIN32)
".\\plugins\\nv\\nvperf_grfx_host.dll"
#elif ENABLED(RDOC_LINUX)
"./plugins/nv/libnvperf_grfx_host.so"
#endif
;
}
CounterDescription desc = {};
desc.resultType = CompType::Typeless;
desc.resultByteWidth = 0;
desc.name = "ERROR: Could not find Nsight Perf SDK library";
desc.description = StringFormat::Fmt(
"To use these counters, please:"
"<ol>"
"<li>download the Nsight Perf SDK from:<br/><a "
"href=\"https://developer.nvidia.com/nsight-perf-sdk\">https://developer.nvidia.com/"
"nsight-perf-sdk</a></li>"
"<li>extract the SDK contents</li>"
"<li>copy the <strong>"
#if ENABLED(RDOC_WIN32)
"nvperf_grfx_host.dll"
#elif ENABLED(RDOC_LINUX)
"libnvperf_grfx_host.so"
#endif
"</strong> file to:<br/><strong>%s</strong></li>"
"<li>reopen this capture</li>"
"</ol>",
pluginPath.c_str());
desc.unit = CounterUnit::Absolute;
desc.counter = GPUCounter::FirstNvidia;
// Create the plugin directory, so user will have somewhere to place the plugin file
FileIO::CreateParentDirectory(pluginPath);
return desc;
}
@@ -0,0 +1,71 @@
/******************************************************************************
* The MIT License (MIT)
*
* Copyright (c) 2022 Baldur Karlsson
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
******************************************************************************/
#pragma once
#include <stdint.h>
#include "api/replay/data_types.h"
#include "api/replay/rdcarray.h"
#include "api/replay/replay_enums.h"
#include "common/common.h"
struct NVPA_RawMetricsConfig;
namespace nv
{
namespace perf
{
class MetricsEvaluator;
}
}
class NVCounterEnumerator
{
public:
NVCounterEnumerator();
~NVCounterEnumerator();
// This function takes ownership of metricsEvaluator.
bool Init(nv::perf::MetricsEvaluator &&metricsEvaluator);
rdcarray<GPUCounter> GetPublicCounterIds();
CounterDescription GetCounterDescription(GPUCounter counterID);
bool HasCounter(GPUCounter counterID);
bool CreateConfig(const char *pChipName, NVPA_RawMetricsConfig *pRawMetricsConfig,
const rdcarray<GPUCounter> &counters);
void GetConfig(const uint8_t *&pConfigImage, size_t &configImageSize,
const uint8_t *&pCounterDataPrefix, size_t &counterDataPrefixSize);
void ClearConfig();
size_t GetMaxNumReplayPasses(uint16_t numNestingLevels);
bool EvaluateMetrics(const uint8_t *counterDataImage, size_t counterDataImageSize,
rdcarray<CounterResult> &values);
static bool InitializeNvPerf();
static CounterDescription LibraryNotFoundMessage();
private:
struct Impl;
Impl *m_Impl;
};
+1
View File
@@ -23,6 +23,7 @@
******************************************************************************/
#include "nv_counters.h"
#include "common/common.h"
#include "core/plugins.h"
#include "os/os_specific.h"
@@ -0,0 +1,408 @@
/******************************************************************************
* The MIT License (MIT)
*
* Copyright (c) 2022 Baldur Karlsson
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
******************************************************************************/
#include "nv_d3d12_counters.h"
#include "nv_counter_enumerator.h"
#include "api/replay/shader_types.h"
#include "driver/d3d12/d3d12_command_list.h"
#include "driver/d3d12/d3d12_command_queue.h"
#include "driver/d3d12/d3d12_commands.h"
#include "driver/d3d12/d3d12_device.h"
#include "driver/d3d12/d3d12_replay.h"
#include "NvPerfD3D12.h"
#include "NvPerfRangeProfilerD3D12.h"
#include "NvPerfScopeExitGuard.h"
struct NVD3D12Counters::Impl
{
NVCounterEnumerator *CounterEnumerator;
bool LibraryNotFound = false;
Impl() : CounterEnumerator(NULL) {}
~Impl()
{
delete CounterEnumerator;
CounterEnumerator = NULL;
}
bool TryInitializePerfSDK(ID3D12Device *device)
{
if(!NVCounterEnumerator::InitializeNvPerf())
{
RDCERR("NvPerf library failed to initialize");
LibraryNotFound = true;
// NOTE: Return success here so that we can later show a message
// directing the user to download the Nsight Perf SDK library.
return true;
}
if(!nv::perf::D3D12LoadDriver())
{
RDCERR("NvPerf failed to load D3D12 driver");
return false;
}
if(!nv::perf::profiler::D3D12IsGpuSupported(device))
{
RDCERR("NvPerf does not support profiling on this GPU");
return false;
}
nv::perf::DeviceIdentifiers deviceIdentifiers = nv::perf::D3D12GetDeviceIdentifiers(device);
if(!deviceIdentifiers.pChipName)
{
RDCERR("NvPerf could not determine chip name");
return false;
}
size_t scratchBufferSize =
nv::perf::D3D12CalculateMetricsEvaluatorScratchBufferSize(deviceIdentifiers.pChipName);
if(!scratchBufferSize)
{
RDCERR("NvPerf could not determine the scratch buffer size for metrics evaluation");
return false;
}
std::vector<uint8_t> scratchBuffer;
scratchBuffer.resize(scratchBufferSize);
NVPW_MetricsEvaluator *pMetricsEvaluator = nv::perf::D3D12CreateMetricsEvaluator(
scratchBuffer.data(), scratchBuffer.size(), deviceIdentifiers.pChipName);
if(!pMetricsEvaluator)
{
RDCERR("NvPerf could not initialize metrics evaluator");
return false;
}
nv::perf::MetricsEvaluator metricsEvaluator(pMetricsEvaluator, std::move(scratchBuffer));
CounterEnumerator = new NVCounterEnumerator;
if(!CounterEnumerator->Init(std::move(metricsEvaluator)))
{
RDCERR("NvPerf could not initialize metrics evaluator");
delete CounterEnumerator;
return false;
}
return true;
};
static bool CanProfileEvent(const ActionDescription &actionnode)
{
if(!actionnode.children.empty())
return false; // Only profile events for leaf nodes
if(actionnode.events.empty())
return false; // Skip nodes with no events
if(!(actionnode.flags & (ActionFlags::Clear | ActionFlags::Drawcall | ActionFlags::Dispatch |
ActionFlags::Present | ActionFlags::Copy | ActionFlags::Resolve)))
return false; // Filter out events we cannot profile
return true;
}
static void RecurseDiscoverEvents(uint32_t &numEvents, const ActionDescription &actionnode)
{
for(size_t i = 0; i < actionnode.children.size(); i++)
{
RecurseDiscoverEvents(numEvents, actionnode.children[i]);
}
if(!Impl::CanProfileEvent(actionnode))
return;
numEvents++;
}
};
NVD3D12Counters::NVD3D12Counters() : m_Impl(NULL)
{
}
NVD3D12Counters::~NVD3D12Counters()
{
delete m_Impl;
m_Impl = NULL;
}
bool NVD3D12Counters::Init(ID3D12Device *device)
{
m_Impl = new Impl;
if(!m_Impl)
return false;
bool initSuccess = m_Impl->TryInitializePerfSDK(device);
if(!initSuccess)
{
delete m_Impl;
m_Impl = NULL;
return false;
}
return true;
}
rdcarray<GPUCounter> NVD3D12Counters::EnumerateCounters() const
{
if(m_Impl->LibraryNotFound)
{
return {GPUCounter::FirstNvidia};
}
return m_Impl->CounterEnumerator->GetPublicCounterIds();
}
bool NVD3D12Counters::HasCounter(GPUCounter counterID) const
{
if(m_Impl->LibraryNotFound)
{
return counterID == GPUCounter::FirstNvidia;
}
return m_Impl->CounterEnumerator->HasCounter(counterID);
}
CounterDescription NVD3D12Counters::DescribeCounter(GPUCounter counterID) const
{
if(m_Impl->LibraryNotFound)
{
RDCASSERT(counterID == GPUCounter::FirstNvidia);
// Dummy counter shows message directing user to download the Nsight Perf SDK library
return NVCounterEnumerator::LibraryNotFoundMessage();
}
return m_Impl->CounterEnumerator->GetCounterDescription(counterID);
}
struct D3D12NvidiaActionCallback final : public D3D12ActionCallback
{
D3D12NvidiaActionCallback(WrappedID3D12Device *dev,
nv::perf::profiler::D3D12RangeCommands *pRangeCommands)
: m_pDevice(dev), m_pRangeCommands(pRangeCommands)
{
m_pDevice->GetQueue()->GetCommandData()->m_ActionCallback = this;
}
virtual ~D3D12NvidiaActionCallback()
{
m_pDevice->GetQueue()->GetCommandData()->m_ActionCallback = NULL;
}
void PreDraw(uint32_t eid, ID3D12GraphicsCommandListX *cmd) final
{
rdcstr eidName = StringFormat::Fmt("%d", eid);
WrappedID3D12GraphicsCommandList *pWrappedCmdList = (WrappedID3D12GraphicsCommandList *)cmd;
m_pRangeCommands->PushRange(pWrappedCmdList->GetReal(), eidName.c_str());
}
bool PostDraw(uint32_t eid, ID3D12GraphicsCommandListX *cmd) final
{
WrappedID3D12GraphicsCommandList *pWrappedCmdList = (WrappedID3D12GraphicsCommandList *)cmd;
m_pRangeCommands->PopRange(pWrappedCmdList->GetReal());
return false;
}
void PreCloseCommandList(ID3D12GraphicsCommandListX *cmd) final {}
void PostRedraw(uint32_t eid, ID3D12GraphicsCommandListX *cmd) final {}
void PreDispatch(uint32_t eid, ID3D12GraphicsCommandListX *cmd) final { PreDraw(eid, cmd); }
bool PostDispatch(uint32_t eid, ID3D12GraphicsCommandListX *cmd) final
{
return PostDraw(eid, cmd);
}
void PostRedispatch(uint32_t eid, ID3D12GraphicsCommandListX *cmd) final { PostRedraw(eid, cmd); }
void PreMisc(uint32_t eid, ActionFlags flags, ID3D12GraphicsCommandListX *cmd) final
{
if(flags & ActionFlags::PassBoundary)
return;
PreDraw(eid, cmd);
}
bool PostMisc(uint32_t eid, ActionFlags flags, ID3D12GraphicsCommandListX *cmd) final
{
if(flags & ActionFlags::PassBoundary)
return false;
return PostDraw(eid, cmd);
}
void PostRemisc(uint32_t eid, ActionFlags flags, ID3D12GraphicsCommandListX *cmd) final
{
if(flags & ActionFlags::PassBoundary)
return;
PostRedraw(eid, cmd);
}
void AliasEvent(uint32_t primary, uint32_t alias) final {}
WrappedID3D12Device *m_pDevice;
nv::perf::profiler::D3D12RangeCommands *m_pRangeCommands;
};
rdcarray<CounterResult> NVD3D12Counters::FetchCounters(const rdcarray<GPUCounter> &counters,
WrappedID3D12Device &device)
{
if(m_Impl->LibraryNotFound)
{
return {};
}
uint32_t maxEID = device.GetQueue()->GetMaxEID();
ID3D12Device *d3dDevice = device.GetReal();
nv::perf::profiler::D3D12RangeCommands rangeCommands;
rangeCommands.Initialize(d3dDevice);
RDCASSERT(rangeCommands.isNvidiaDevice);
if(!rangeCommands.isNvidiaDevice)
{
return {};
}
uint32_t maxNumRanges = 0;
{
// replay the events to determine how many profile-able events there are
FrameRecord frameRecord = device.GetReplay()->GetFrameRecord();
for(size_t i = 0; i < frameRecord.actionList.size(); i++)
{
Impl::RecurseDiscoverEvents(maxNumRanges, frameRecord.actionList[i]);
}
}
nv::perf::profiler::SessionOptions sessionOptions = {};
sessionOptions.maxNumRanges = maxNumRanges;
sessionOptions.avgRangeNameLength = 16;
sessionOptions.numTraceBuffers = 1;
nv::perf::profiler::RangeProfilerD3D12 rangeProfiler;
rdcarray<CounterResult> results;
const rdcarray<WrappedID3D12CommandQueue *> &commandQueues = device.GetQueues();
for(WrappedID3D12CommandQueue *pWrappedQueue : commandQueues)
{
ID3D12CommandQueue *d3dQueue = pWrappedQueue->GetReal();
switch(d3dQueue->GetDesc().Type)
{
case D3D12_COMMAND_LIST_TYPE_DIRECT:
case D3D12_COMMAND_LIST_TYPE_COMPUTE:
// Profiling is supported for 3D and compute queues.
break;
case D3D12_COMMAND_LIST_TYPE_BUNDLE:
case D3D12_COMMAND_LIST_TYPE_COPY:
case D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE:
case D3D12_COMMAND_LIST_TYPE_VIDEO_PROCESS:
case D3D12_COMMAND_LIST_TYPE_VIDEO_ENCODE:
// Profiling is not supported for copy or video queues.
continue;
}
if(!rangeProfiler.BeginSession(d3dQueue, sessionOptions))
{
RDCERR("NvPerf failed to start profiling session");
continue; // Try the next command queue
}
auto sessionGuard = nv::perf::ScopeExitGuard([&rangeProfiler]() { rangeProfiler.EndSession(); });
// Create counter configuration, and set it.
{
nv::perf::DeviceIdentifiers deviceIdentifiers = nv::perf::D3D12GetDeviceIdentifiers(d3dDevice);
NVPA_RawMetricsConfig *pRawMetricsConfig =
nv::perf::profiler::D3D12CreateRawMetricsConfig(deviceIdentifiers.pChipName);
m_Impl->CounterEnumerator->CreateConfig(deviceIdentifiers.pChipName, pRawMetricsConfig,
counters);
}
nv::perf::profiler::SetConfigParams setConfigParams;
setConfigParams.numNestingLevels = 1;
setConfigParams.numStatisticalSamples = 1;
m_Impl->CounterEnumerator->GetConfig(
setConfigParams.pConfigImage, setConfigParams.configImageSize,
setConfigParams.pCounterDataPrefix, setConfigParams.counterDataPrefixSize);
size_t maxNumReplayPasses =
m_Impl->CounterEnumerator->GetMaxNumReplayPasses(setConfigParams.numNestingLevels);
RDCASSERT(maxNumReplayPasses > 0u);
if(!rangeProfiler.EnqueueCounterCollection(setConfigParams))
{
RDCERR("NvPerf failed to schedule counter collection");
continue; // Try the next command queue
}
D3D12NvidiaActionCallback actionCallback(&device, &rangeCommands);
std::vector<uint8_t> counterDataImage;
for(size_t replayPass = 0;; ++replayPass)
{
if(!rangeProfiler.BeginPass())
{
RDCERR("NvPerf failed to start counter collection pass");
break;
}
// replay the events to perform all the queries
uint32_t eventStartID = 0;
device.ReplayLog(eventStartID, maxEID, eReplay_Full);
if(!rangeProfiler.EndPass())
{
RDCERR("NvPerf failed to end counter collection pass!");
break;
}
// device->GPUSync(d3dQueue);
nv::perf::profiler::DecodeResult decodeResult;
if(!rangeProfiler.DecodeCounters(decodeResult))
{
RDCERR("NvPerf failed to decode counters in collection pass");
break;
}
if(decodeResult.allPassesDecoded)
{
counterDataImage = std::move(decodeResult.counterDataImage);
break; // Success!
}
if(replayPass >= maxNumReplayPasses - 1)
{
RDCERR("NvPerf exceeded the maximum expected number of replay passes");
break; // Failure
}
}
if(counterDataImage.empty())
{
RDCERR("No data found in NvPerf counter data image");
return {};
}
if(!m_Impl->CounterEnumerator->EvaluateMetrics(counterDataImage.data(), counterDataImage.size(),
results))
{
RDCERR("NvPerf failed to evaluate metrics from counter data");
return {};
}
}
return results;
}
@@ -0,0 +1,51 @@
/******************************************************************************
* The MIT License (MIT)
*
* Copyright (c) 2022 Baldur Karlsson
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
******************************************************************************/
#pragma once
#include "api/replay/data_types.h"
#include "api/replay/rdcarray.h"
#include "api/replay/replay_enums.h"
struct ID3D12Device;
class WrappedID3D12Device;
class NVD3D12Counters final
{
public:
NVD3D12Counters();
~NVD3D12Counters();
bool Init(ID3D12Device *device);
rdcarray<GPUCounter> EnumerateCounters() const;
bool HasCounter(GPUCounter counterID) const;
CounterDescription DescribeCounter(GPUCounter counterID) const;
rdcarray<CounterResult> FetchCounters(const rdcarray<GPUCounter> &counters,
WrappedID3D12Device &device);
private:
struct Impl;
Impl *m_Impl;
};