diff --git a/renderdoc/driver/d3d12/d3d12_counters.cpp b/renderdoc/driver/d3d12/d3d12_counters.cpp index da16c9a29..e0b18d857 100644 --- a/renderdoc/driver/d3d12/d3d12_counters.cpp +++ b/renderdoc/driver/d3d12/d3d12_counters.cpp @@ -25,6 +25,7 @@ #include #include #include "driver/ihv/amd/amd_counters.h" +#include "driver/ihv/nv/nv_d3d12_counters.h" #include "d3d12_command_list.h" #include "d3d12_command_queue.h" #include "d3d12_common.h" @@ -54,6 +55,11 @@ rdcarray D3D12Replay::EnumerateCounters() ret.append(m_pAMDCounters->GetPublicCounterIds()); } + if(m_pNVCounters) + { + ret.append(m_pNVCounters->EnumerateCounters()); + } + return ret; } @@ -72,6 +78,13 @@ CounterDescription D3D12Replay::DescribeCounter(GPUCounter counterID) } } + /////NVIDIA////// + if(m_pNVCounters && m_pNVCounters->HasCounter(counterID)) + { + desc = m_pNVCounters->DescribeCounter(counterID); + return desc; + } + // 0808CC9B-79DF-4549-81F7-85494E648F22 desc.uuid.words[0] = 0x0808CC9B; desc.uuid.words[1] = 0x79DF4549; @@ -497,7 +510,7 @@ rdcarray D3D12Replay::FetchCounters(const rdcarray &c rdcarray d3dCounters; std::copy_if(counters.begin(), counters.end(), std::back_inserter(d3dCounters), - [](const GPUCounter &c) { return !IsAMDCounter(c); }); + [](const GPUCounter &c) { return IsGenericCounter(c); }); if(m_pAMDCounters) { @@ -512,6 +525,19 @@ rdcarray D3D12Replay::FetchCounters(const rdcarray &c } } + if(m_pNVCounters) + { + // Filter out the NVIDIA counters + rdcarray nvCounters; + std::copy_if(counters.begin(), counters.end(), std::back_inserter(nvCounters), + [=](const GPUCounter &c) { return m_pNVCounters->HasCounter(c); }); + if(!nvCounters.empty()) + { + rdcarray results = m_pNVCounters->FetchCounters(nvCounters, *m_pDevice); + ret.append(results); + } + } + if(d3dCounters.empty()) { return ret; diff --git a/renderdoc/driver/d3d12/d3d12_device.h b/renderdoc/driver/d3d12/d3d12_device.h index dd6c57b6d..ca3f7ae09 100644 --- a/renderdoc/driver/d3d12/d3d12_device.h +++ b/renderdoc/driver/d3d12/d3d12_device.h @@ -871,6 +871,7 @@ public: CaptureState GetState() { return m_State; } D3D12Replay *GetReplay() { return m_Replay; } WrappedID3D12CommandQueue *GetQueue() { return m_Queue; } + const rdcarray &GetQueues() { return m_Queues; } ID3D12CommandAllocator *GetAlloc() { return m_Alloc; } ID3D12InfoQueue *GetInfoQueue() { return m_pInfoQueue; } void ApplyBarriers(rdcarray &barriers); diff --git a/renderdoc/driver/d3d12/d3d12_replay.cpp b/renderdoc/driver/d3d12/d3d12_replay.cpp index 34cc7b2de..9d07f7806 100644 --- a/renderdoc/driver/d3d12/d3d12_replay.cpp +++ b/renderdoc/driver/d3d12/d3d12_replay.cpp @@ -29,6 +29,7 @@ #include "driver/dxgi/dxgi_common.h" #include "driver/ihv/amd/amd_counters.h" #include "driver/ihv/amd/amd_rgp.h" +#include "driver/ihv/nv/nv_d3d12_counters.h" #include "maths/camera.h" #include "maths/formatpacking.h" #include "maths/matrix.h" @@ -151,35 +152,44 @@ void D3D12Replay::CreateResources() if(!m_Proxy && D3D12_HardwareCounters()) { - AMDCounters *counters = NULL; - if(m_DriverInfo.vendor == GPUVendor::AMD || m_DriverInfo.vendor == GPUVendor::Samsung) { RDCLOG("AMD GPU detected - trying to initialise AMD counters"); - counters = new AMDCounters(m_pDevice->IsDebugLayerEnabled()); - } - else - { - RDCLOG("%s GPU detected - no counters available", ToStr(m_DriverInfo.vendor).c_str()); + AMDCounters *countersAMD = new AMDCounters(m_pDevice->IsDebugLayerEnabled()); + + ID3D12Device *d3dDevice = m_pDevice->GetReal(); + + if(countersAMD && countersAMD->Init(AMDCounters::ApiType::Dx12, (void *)d3dDevice)) + { + m_pAMDCounters = countersAMD; + } + else + { + delete countersAMD; + } } - ID3D12Device *d3dDevice = m_pDevice->GetReal(); + if(m_DriverInfo.vendor == GPUVendor::nVidia) + { + RDCLOG("NVIDIA GPU detected - trying to initialise NVIDIA counters"); - if(counters && counters->Init(AMDCounters::ApiType::Dx12, (void *)d3dDevice)) - { - m_pAMDCounters = counters; - } - else - { - delete counters; - m_pAMDCounters = NULL; + NVD3D12Counters *countersNV = new NVD3D12Counters(); + + bool initSuccess = false; + if(countersNV && countersNV->Init(m_pDevice->GetReal())) + { + m_pNVCounters = countersNV; + initSuccess = true; + } + else + { + delete countersNV; + } + + RDCLOG("NVIDIA D3D12 counter initialisation: %s", initSuccess ? "SUCCEEDED" : "FAILED"); } } } - else - { - m_pAMDCounters = NULL; - } } void D3D12Replay::DestroyResources() @@ -209,6 +219,8 @@ void D3D12Replay::DestroyResources() SAFE_DELETE(m_DebugManager); SAFE_DELETE(m_pAMDCounters); + + SAFE_DELETE(m_pNVCounters); } RDResult D3D12Replay::ReadLogInitialisation(RDCFile *rdc, bool storeStructuredBuffers) diff --git a/renderdoc/driver/d3d12/d3d12_replay.h b/renderdoc/driver/d3d12/d3d12_replay.h index 2a3a5273d..b78088882 100644 --- a/renderdoc/driver/d3d12/d3d12_replay.h +++ b/renderdoc/driver/d3d12/d3d12_replay.h @@ -34,6 +34,8 @@ class AMDCounters; struct D3D12AMDActionCallback; class WrappedID3D12Device; +class NVD3D12Counters; + class D3D12DebugManager; struct PortableHandle; @@ -506,6 +508,7 @@ private: std::map, bytebuf> m_PatchedPSCache; void FillTimersAMD(uint32_t *eventStartID, uint32_t *sampleIndex, rdcarray *eventIDs); - rdcarray FetchCountersAMD(const rdcarray &counters); + + NVD3D12Counters *m_pNVCounters = NULL; }; diff --git a/renderdoc/driver/ihv/nv/NV.vcxproj b/renderdoc/driver/ihv/nv/NV.vcxproj index 58f2215a8..7dd22af64 100644 --- a/renderdoc/driver/ihv/nv/NV.vcxproj +++ b/renderdoc/driver/ihv/nv/NV.vcxproj @@ -60,7 +60,7 @@ - $(SolutionDir)renderdoc\;$(SolutionDir)renderdoc\3rdparty\ + $(SolutionDir)renderdoc\;$(SolutionDir)renderdoc\3rdparty\;$(SolutionDir)renderdoc\driver\ihv\nv\official\PerfSDK\redist\include;$(SolutionDir)renderdoc\driver\ihv\nv\official\PerfSDK\redist\NvPerfUtility\include RENDERDOC_EXPORTS;RENDERDOC_PLATFORM_WIN32;WIN32;NDEBUG;_WINDOWS;_USRDLL;%(PreprocessorDefinitions) Level4 MultiThreadedDLL @@ -99,10 +99,14 @@ + + + + diff --git a/renderdoc/driver/ihv/nv/nv_counter_enumerator.cpp b/renderdoc/driver/ihv/nv/nv_counter_enumerator.cpp new file mode 100644 index 000000000..ddef6f73d --- /dev/null +++ b/renderdoc/driver/ihv/nv/nv_counter_enumerator.cpp @@ -0,0 +1,484 @@ +/****************************************************************************** + * The MIT License (MIT) + * + * Copyright (c) 2022 Baldur Karlsson + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + ******************************************************************************/ + +#include "nv_counter_enumerator.h" + +#include "common/common.h" +#include "common/formatting.h" +#include "os/os_specific.h" +#include "strings/string_utils.h" + +#if ENABLED(RDOC_WIN32) +#include "windows-desktop-x64/nvperf_host_impl.h" +#elif ENABLED(RDOC_LINUX) +#include "linux-desktop-x64/nvperf_host_impl.h" +#endif + +#include "NvPerfCounterConfiguration.h" +#include "NvPerfCounterData.h" +#include "NvPerfMetricsEvaluator.h" + +struct NVCounterEnumerator::Impl +{ +public: + nv::perf::MetricsEvaluator Evaluator; + + nv::perf::CounterConfiguration + SelectedConfiguration; // configImage etc. for the current selection + rdcarray SelectedExternalIds; + rdcarray SelectedEvalRequests; + size_t SelectedNumPasses; + + const rdcarray &ExternalIds() + { + InitEnumerateCounters(); + return m_ExternalIds; + } + const rdcarray &ExternalDescriptions() + { + InitEnumerateCounters(); + return m_ExternalDescriptions; + } + const rdcarray &AllEvalRequests() + { + InitEnumerateCounters(); + return m_AllEvalRequests; + } + +private: + void InitEnumerateCounters(); + bool m_EnumerationDone = false; + + rdcarray m_ExternalIds; + rdcarray m_ExternalDescriptions; + rdcarray m_AllEvalRequests; +}; + +NVCounterEnumerator::NVCounterEnumerator() +{ + m_Impl = new NVCounterEnumerator::Impl(); +} + +NVCounterEnumerator::~NVCounterEnumerator() +{ + delete m_Impl; +} + +static CounterUnit ToCounterUnit(const std::vector &dimUnits) +{ + if(dimUnits.size() == 0) + { + return CounterUnit::Ratio; + } + if(dimUnits.size() == 1 && dimUnits[0].exponent == 1) + { + switch(dimUnits[0].dimUnit) + { + case NVPW_DIM_UNIT_BYTES: return CounterUnit::Bytes; + case NVPW_DIM_UNIT_SECONDS: return CounterUnit::Seconds; + case NVPW_DIM_UNIT_PERCENT: return CounterUnit::Percentage; + case NVPW_DIM_UNIT_FBP_CYCLES: return CounterUnit::Cycles; + case NVPW_DIM_UNIT_GPC_CYCLES: return CounterUnit::Cycles; + case NVPW_DIM_UNIT_SYS_CYCLES: return CounterUnit::Cycles; + case NVPW_DIM_UNIT_DRAM_CYCLES: return CounterUnit::Cycles; + case NVPW_DIM_UNIT_PCIE_CYCLES: + return CounterUnit::Cycles; + // fallthrough... + } + } + + // catch-all + return CounterUnit::Absolute; +} + +bool NVCounterEnumerator::Init(nv::perf::MetricsEvaluator &&metricsEvaluator) +{ + m_Impl->Evaluator = std::move(metricsEvaluator); + + return true; +} + +void NVCounterEnumerator::Impl::InitEnumerateCounters() +{ + // Defer counter enumeration until the first time this is called + if(m_EnumerationDone) + return; + + m_EnumerationDone = true; + + struct MetricAttribute + { + NVPW_MetricType metricType; + NVPW_RollupOp rollupOp; + NVPW_Submetric submetric; + }; + const MetricAttribute metricAttributes[] = { + {NVPW_METRIC_TYPE_COUNTER, NVPW_ROLLUP_OP_SUM, NVPW_SUBMETRIC_NONE}, + {NVPW_METRIC_TYPE_COUNTER, NVPW_ROLLUP_OP_AVG, NVPW_SUBMETRIC_NONE}, + {NVPW_METRIC_TYPE_COUNTER, NVPW_ROLLUP_OP_MAX, NVPW_SUBMETRIC_NONE}, + {NVPW_METRIC_TYPE_COUNTER, NVPW_ROLLUP_OP_MIN, NVPW_SUBMETRIC_NONE}, + {NVPW_METRIC_TYPE_RATIO, NVPW_ROLLUP_OP_AVG, NVPW_SUBMETRIC_RATIO}, + {NVPW_METRIC_TYPE_RATIO, NVPW_ROLLUP_OP_AVG, NVPW_SUBMETRIC_MAX_RATE}, + {NVPW_METRIC_TYPE_RATIO, NVPW_ROLLUP_OP_AVG, NVPW_SUBMETRIC_PCT}, + }; + for(size_t i = 0; i < sizeof(metricAttributes) / sizeof(metricAttributes[0]); i++) + { + const auto &attributes = metricAttributes[i]; + NVPW_MetricType metricType = attributes.metricType; + NVPW_RollupOp rollupOp = attributes.rollupOp; + NVPW_Submetric submetric = attributes.submetric; + + for(const char *counterName : nv::perf::EnumerateMetrics(Evaluator, metricType)) + { + if(strstr(counterName, "Triage") != NULL) + continue; // filter out Triage counters (they are all duplicates) + + size_t metricIndex; + if(!nv::perf::GetMetricTypeAndIndex(Evaluator, counterName, metricType, metricIndex)) + continue; + RDCASSERT(metricType == attributes.metricType); + + NVPW_MetricEvalRequest evalReq = {}; + evalReq.metricIndex = metricIndex; + evalReq.metricType = (uint8_t)metricType; + evalReq.rollupOp = (uint8_t)rollupOp; + evalReq.submetric = (uint16_t)submetric; + + std::vector dimUnits; + GetMetricDimUnits(Evaluator, evalReq, dimUnits); + + { + //----------------- + // Filter out metrics that count "cycles". + // The RenderDoc replay loop is not designed for reproducing representative cycle counts. + auto itr = + std::find_if(dimUnits.begin(), dimUnits.end(), [](const NVPW_DimUnitFactor &factor) { + switch(factor.dimUnit) + { + case NVPW_DIM_UNIT_DRAM_CYCLES: + case NVPW_DIM_UNIT_FBP_CYCLES: + case NVPW_DIM_UNIT_GPC_CYCLES: + case NVPW_DIM_UNIT_NVLRX_CYCLES: + case NVPW_DIM_UNIT_NVLTX_CYCLES: + case NVPW_DIM_UNIT_PCIE_CYCLES: + case NVPW_DIM_UNIT_SYS_CYCLES: return true; + default: break; + } + return false; + }); + if(itr != dimUnits.end()) + continue; + } + + CounterDescription desc = {}; + desc.resultType = CompType::Float; + desc.resultByteWidth = 8; + + //----------------- + // Counter name, including rollup and submetric qualifiers + desc.name = counterName; + desc.name.append(nv::perf::ToCString((NVPW_RollupOp)evalReq.rollupOp)); + desc.name.append(nv::perf::ToCString((NVPW_Submetric)evalReq.submetric)); + + //----------------- + // Counter description, including metric type and dim unit + desc.description = rdcstr(GetMetricDescription(Evaluator, metricType, metricIndex)); + desc.description.append("
HW Unit: "); + NVPW_HwUnit hwunit = nv::perf::GetMetricHwUnit(Evaluator, metricType, metricIndex); + desc.description.append(nv::perf::ToCString(Evaluator, hwunit)); + desc.description.append(""); + desc.description.append("
MetricType: "); + desc.description.append(nv::perf::ToCString(metricType)); + desc.description.append(""); + desc.description.append("
RollupOp: "); + desc.description.append(nv::perf::ToCString(rollupOp)); + desc.description.append(""); + desc.description.append("
Submetric: "); + desc.description.append(nv::perf::ToCString(submetric)); + desc.description.append(""); + desc.description.append("
DimUnit: "); + desc.description.append( + nv::perf::ToString(dimUnits, [this](NVPW_DimUnitName dimUnit, bool plural) { + return ToCString(Evaluator, dimUnit, plural); + }).c_str()); + desc.description.append(""); + + //----------------- + // Categorize counter by DimUnit + desc.category = + rdcstr(nv::perf::ToString(dimUnits, [this](NVPW_DimUnitName dimUnit, bool plural) { + return ToCString(Evaluator, dimUnit, plural); + }).c_str()); + + //----------------- + // Convert Perf SDK units to Renderdoc units (only works for limited subset of units) + desc.unit = ToCounterUnit(dimUnits); + + //----------------- + // Assign external counter ID and UUID + GPUCounter counterID = + GPUCounter((uint32_t)GPUCounter::FirstNvidia + (uint32_t)m_AllEvalRequests.size()); + desc.counter = counterID; + desc.uuid.words[0] = 0x25B624D0; + desc.uuid.words[1] = 0x33244527; + desc.uuid.words[2] = 0x9F71CD67; + desc.uuid.words[3] = 0x61B37980 ^ strhash(desc.name.c_str()); + + m_ExternalIds.push_back(counterID); + m_ExternalDescriptions.push_back(desc); + m_AllEvalRequests.push_back(evalReq); + } + } + + //----------------- + // Sort counter IDs by category and name so that counters appear sorted in the selection UI + std::sort(m_ExternalIds.begin(), m_ExternalIds.end(), + [this](const GPUCounter &a, const GPUCounter &b) { + uint32_t a_localId = (uint32_t)a - (uint32_t)GPUCounter::FirstNvidia; + uint32_t b_localId = (uint32_t)b - (uint32_t)GPUCounter::FirstNvidia; + const CounterDescription &a_desc = m_ExternalDescriptions[a_localId]; + const CounterDescription &b_desc = m_ExternalDescriptions[b_localId]; + int result = strcmp(a_desc.category.c_str(), b_desc.category.c_str()); + if(result < 0) + return true; + if(result > 0) + return false; + result = strcmp(a_desc.name.c_str(), b_desc.name.c_str()); + if(result < 0) + return true; + return false; + }); +} + +rdcarray NVCounterEnumerator::GetPublicCounterIds() +{ + return m_Impl->ExternalIds(); +} + +CounterDescription NVCounterEnumerator::GetCounterDescription(GPUCounter counterID) +{ + uint32_t LocalId = (uint32_t)counterID - (uint32_t)GPUCounter::FirstNvidia; + return m_Impl->ExternalDescriptions()[LocalId]; +} + +bool NVCounterEnumerator::HasCounter(GPUCounter counterID) +{ + uint32_t LocalId = (uint32_t)counterID - (uint32_t)GPUCounter::FirstNvidia; + return LocalId < m_Impl->ExternalDescriptions().size(); +} + +bool NVCounterEnumerator::CreateConfig(const char *pChipName, + NVPA_RawMetricsConfig *pRawMetricsConfig, + const rdcarray &counters) +{ + nv::perf::MetricsConfigBuilder metricsConfigBuilder; + if(!metricsConfigBuilder.Initialize(m_Impl->Evaluator, pRawMetricsConfig, pChipName)) + { + RDCERR("NvPerf failed to initialize config builder"); + return false; + } + + for(GPUCounter counterID : counters) + { + RDCASSERT(IsNvidiaCounter(counterID)); + if(!IsNvidiaCounter(counterID)) + { + continue; + } + size_t counterIndex = (uint32_t)counterID - (uint32_t)GPUCounter::FirstNvidia; + const NVPW_MetricEvalRequest &evalReq = m_Impl->AllEvalRequests()[counterIndex]; + + m_Impl->SelectedExternalIds.push_back(counterID); + m_Impl->SelectedEvalRequests.push_back(m_Impl->AllEvalRequests()[counterIndex]); + if(!metricsConfigBuilder.AddMetrics(&evalReq, 1)) + { + // std::string metricName = nv::perf::ToString(m_Impl->Evaluator, evalReq); + const char *metricName = nv::perf::ToCString( + m_Impl->Evaluator, (NVPW_MetricType)evalReq.metricType, evalReq.metricIndex); + RDCERR("NvPerf failed to configure metric: %s", metricName); + } + } + + if(!metricsConfigBuilder.PrepareConfigImage()) + { + RDCERR("NvPerf failed to prepare config image"); + return false; + } + + size_t configImageSize = metricsConfigBuilder.GetConfigImageSize(); + size_t counterDataPrefixSize = metricsConfigBuilder.GetCounterDataPrefixSize(); + m_Impl->SelectedConfiguration.configImage.resize(configImageSize); + m_Impl->SelectedConfiguration.counterDataPrefix.resize(counterDataPrefixSize); + metricsConfigBuilder.GetConfigImage(m_Impl->SelectedConfiguration.configImage.size(), + m_Impl->SelectedConfiguration.configImage.data()); + metricsConfigBuilder.GetCounterDataPrefix(m_Impl->SelectedConfiguration.counterDataPrefix.size(), + m_Impl->SelectedConfiguration.counterDataPrefix.data()); + m_Impl->SelectedNumPasses = metricsConfigBuilder.GetNumPasses(); + return true; +} + +void NVCounterEnumerator::GetConfig(const uint8_t *&pConfigImage, size_t &configImageSize, + const uint8_t *&pCounterDataPrefix, size_t &counterDataPrefixSize) +{ + pConfigImage = m_Impl->SelectedConfiguration.configImage.data(); + configImageSize = m_Impl->SelectedConfiguration.configImage.size(); + pCounterDataPrefix = m_Impl->SelectedConfiguration.counterDataPrefix.data(); + counterDataPrefixSize = m_Impl->SelectedConfiguration.counterDataPrefix.size(); +} + +void NVCounterEnumerator::ClearConfig() +{ + m_Impl->SelectedExternalIds.clear(); + m_Impl->SelectedEvalRequests.clear(); + m_Impl->SelectedConfiguration = {}; // clear the byte vectors + m_Impl->SelectedNumPasses = 0u; +} + +size_t NVCounterEnumerator::GetMaxNumReplayPasses(uint16_t numNestingLevels) +{ + // Calculate max number of replay passes + RDCASSERT(m_Impl->SelectedNumPasses > 0u); + return (size_t)numNestingLevels * m_Impl->SelectedNumPasses + 1u; +} + +bool NVCounterEnumerator::EvaluateMetrics(const uint8_t *counterDataImage, + size_t counterDataImageSize, + rdcarray &values) +{ + bool setDeviceSuccess = nv::perf::MetricsEvaluatorSetDeviceAttributes( + m_Impl->Evaluator, counterDataImage, counterDataImageSize); + if(!setDeviceSuccess) + { + RDCERR("NvPerf failed to determine device attributes from counter data"); + return false; + } + + size_t numRanges = nv::perf::CounterDataGetNumRanges(counterDataImage); + + std::vector doubleValues; + doubleValues.resize(m_Impl->SelectedEvalRequests.size()); + for(uint32_t rangeIndex = 0; rangeIndex < numRanges; ++rangeIndex) + { + const char *leafRangeName = NULL; + std::string rangeName = nv::perf::profiler::CounterDataGetRangeName( + counterDataImage, rangeIndex, '/', &leafRangeName); + if(!leafRangeName) + { + RDCERR("Failed to access NvPerf range name"); + continue; + } + errno = 0; + uint32_t eid = (uint32_t)strtoul(leafRangeName, NULL, 10); + if(errno != 0) + { + RDCERR("Failed to parse NvPerf range name: %s", leafRangeName); + continue; + } + + bool evalSuccess = + nv::perf::EvaluateToGpuValues(m_Impl->Evaluator, counterDataImage, counterDataImageSize, + rangeIndex, m_Impl->SelectedEvalRequests.size(), + m_Impl->SelectedEvalRequests.data(), doubleValues.data()); + if(!evalSuccess) + { + RDCERR("NvPerf failed to evaluate GPU metrics for range: %s", leafRangeName); + continue; + } + for(size_t counterIndex = 0; counterIndex < m_Impl->SelectedExternalIds.size(); ++counterIndex) + { + CounterResult counterResult(eid, m_Impl->SelectedExternalIds[counterIndex], + doubleValues[counterIndex]); + values.push_back(counterResult); + } + } + + return true; +} + +bool NVCounterEnumerator::InitializeNvPerf() +{ + rdcstr pluginsFolder = FileIO::GetAppFolderFilename("plugins/nv"); + const char *paths[] = { + pluginsFolder.c_str(), "./plugins/nv", ".", + }; + NVPW_SetLibraryLoadPaths_Params params{NVPW_SetLibraryLoadPaths_Params_STRUCT_SIZE}; + params.numPaths = sizeof(paths) / sizeof(paths[0]); + params.ppPaths = paths; + NVPA_Status result = NVPW_SetLibraryLoadPaths(¶ms); + if(result != NVPA_STATUS_SUCCESS) + { + RDCWARN("NvPerf could not set library search path"); + } + return nv::perf::InitializeNvPerf(); +} + +CounterDescription NVCounterEnumerator::LibraryNotFoundMessage() +{ + rdcstr pluginPath = FileIO::GetAppFolderFilename( +#if ENABLED(RDOC_WIN32) + "plugins\\nv\\nvperf_grfx_host.dll" +#elif ENABLED(RDOC_LINUX) + "plugins/nv/libnvperf_grfx_host.so" +#endif + ); + if(pluginPath.empty()) + { + pluginPath = +#if ENABLED(RDOC_WIN32) + ".\\plugins\\nv\\nvperf_grfx_host.dll" +#elif ENABLED(RDOC_LINUX) + "./plugins/nv/libnvperf_grfx_host.so" +#endif + ; + } + + CounterDescription desc = {}; + desc.resultType = CompType::Typeless; + desc.resultByteWidth = 0; + desc.name = "ERROR: Could not find Nsight Perf SDK library"; + desc.description = StringFormat::Fmt( + "To use these counters, please:" + "
    " + "
  1. download the Nsight Perf SDK from:
    https://developer.nvidia.com/" + "nsight-perf-sdk
  2. " + "
  3. extract the SDK contents
  4. " + "
  5. copy the " +#if ENABLED(RDOC_WIN32) + "nvperf_grfx_host.dll" +#elif ENABLED(RDOC_LINUX) + "libnvperf_grfx_host.so" +#endif + " file to:
    %s
  6. " + "
  7. reopen this capture
  8. " + "
", + pluginPath.c_str()); + desc.unit = CounterUnit::Absolute; + desc.counter = GPUCounter::FirstNvidia; + + // Create the plugin directory, so user will have somewhere to place the plugin file + FileIO::CreateParentDirectory(pluginPath); + + return desc; +} diff --git a/renderdoc/driver/ihv/nv/nv_counter_enumerator.h b/renderdoc/driver/ihv/nv/nv_counter_enumerator.h new file mode 100644 index 000000000..58459f69f --- /dev/null +++ b/renderdoc/driver/ihv/nv/nv_counter_enumerator.h @@ -0,0 +1,71 @@ +/****************************************************************************** + * The MIT License (MIT) + * + * Copyright (c) 2022 Baldur Karlsson + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + ******************************************************************************/ + +#pragma once + +#include +#include "api/replay/data_types.h" +#include "api/replay/rdcarray.h" +#include "api/replay/replay_enums.h" +#include "common/common.h" + +struct NVPA_RawMetricsConfig; +namespace nv +{ +namespace perf +{ +class MetricsEvaluator; +} +} + +class NVCounterEnumerator +{ +public: + NVCounterEnumerator(); + ~NVCounterEnumerator(); + + // This function takes ownership of metricsEvaluator. + bool Init(nv::perf::MetricsEvaluator &&metricsEvaluator); + + rdcarray GetPublicCounterIds(); + CounterDescription GetCounterDescription(GPUCounter counterID); + bool HasCounter(GPUCounter counterID); + + bool CreateConfig(const char *pChipName, NVPA_RawMetricsConfig *pRawMetricsConfig, + const rdcarray &counters); + void GetConfig(const uint8_t *&pConfigImage, size_t &configImageSize, + const uint8_t *&pCounterDataPrefix, size_t &counterDataPrefixSize); + void ClearConfig(); + size_t GetMaxNumReplayPasses(uint16_t numNestingLevels); + + bool EvaluateMetrics(const uint8_t *counterDataImage, size_t counterDataImageSize, + rdcarray &values); + + static bool InitializeNvPerf(); + static CounterDescription LibraryNotFoundMessage(); + +private: + struct Impl; + Impl *m_Impl; +}; diff --git a/renderdoc/driver/ihv/nv/nv_counters.cpp b/renderdoc/driver/ihv/nv/nv_counters.cpp index 16dbb9a09..b4f78643d 100644 --- a/renderdoc/driver/ihv/nv/nv_counters.cpp +++ b/renderdoc/driver/ihv/nv/nv_counters.cpp @@ -23,6 +23,7 @@ ******************************************************************************/ #include "nv_counters.h" + #include "common/common.h" #include "core/plugins.h" #include "os/os_specific.h" diff --git a/renderdoc/driver/ihv/nv/nv_d3d12_counters.cpp b/renderdoc/driver/ihv/nv/nv_d3d12_counters.cpp new file mode 100644 index 000000000..1a4e28728 --- /dev/null +++ b/renderdoc/driver/ihv/nv/nv_d3d12_counters.cpp @@ -0,0 +1,408 @@ +/****************************************************************************** + * The MIT License (MIT) + * + * Copyright (c) 2022 Baldur Karlsson + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + ******************************************************************************/ + +#include "nv_d3d12_counters.h" + +#include "nv_counter_enumerator.h" + +#include "api/replay/shader_types.h" +#include "driver/d3d12/d3d12_command_list.h" +#include "driver/d3d12/d3d12_command_queue.h" +#include "driver/d3d12/d3d12_commands.h" +#include "driver/d3d12/d3d12_device.h" +#include "driver/d3d12/d3d12_replay.h" + +#include "NvPerfD3D12.h" +#include "NvPerfRangeProfilerD3D12.h" +#include "NvPerfScopeExitGuard.h" + +struct NVD3D12Counters::Impl +{ + NVCounterEnumerator *CounterEnumerator; + bool LibraryNotFound = false; + + Impl() : CounterEnumerator(NULL) {} + ~Impl() + { + delete CounterEnumerator; + CounterEnumerator = NULL; + } + + bool TryInitializePerfSDK(ID3D12Device *device) + { + if(!NVCounterEnumerator::InitializeNvPerf()) + { + RDCERR("NvPerf library failed to initialize"); + LibraryNotFound = true; + + // NOTE: Return success here so that we can later show a message + // directing the user to download the Nsight Perf SDK library. + return true; + } + + if(!nv::perf::D3D12LoadDriver()) + { + RDCERR("NvPerf failed to load D3D12 driver"); + return false; + } + + if(!nv::perf::profiler::D3D12IsGpuSupported(device)) + { + RDCERR("NvPerf does not support profiling on this GPU"); + return false; + } + + nv::perf::DeviceIdentifiers deviceIdentifiers = nv::perf::D3D12GetDeviceIdentifiers(device); + if(!deviceIdentifiers.pChipName) + { + RDCERR("NvPerf could not determine chip name"); + return false; + } + + size_t scratchBufferSize = + nv::perf::D3D12CalculateMetricsEvaluatorScratchBufferSize(deviceIdentifiers.pChipName); + if(!scratchBufferSize) + { + RDCERR("NvPerf could not determine the scratch buffer size for metrics evaluation"); + return false; + } + + std::vector scratchBuffer; + scratchBuffer.resize(scratchBufferSize); + NVPW_MetricsEvaluator *pMetricsEvaluator = nv::perf::D3D12CreateMetricsEvaluator( + scratchBuffer.data(), scratchBuffer.size(), deviceIdentifiers.pChipName); + if(!pMetricsEvaluator) + { + RDCERR("NvPerf could not initialize metrics evaluator"); + return false; + } + + nv::perf::MetricsEvaluator metricsEvaluator(pMetricsEvaluator, std::move(scratchBuffer)); + + CounterEnumerator = new NVCounterEnumerator; + if(!CounterEnumerator->Init(std::move(metricsEvaluator))) + { + RDCERR("NvPerf could not initialize metrics evaluator"); + delete CounterEnumerator; + return false; + } + return true; + }; + + static bool CanProfileEvent(const ActionDescription &actionnode) + { + if(!actionnode.children.empty()) + return false; // Only profile events for leaf nodes + + if(actionnode.events.empty()) + return false; // Skip nodes with no events + + if(!(actionnode.flags & (ActionFlags::Clear | ActionFlags::Drawcall | ActionFlags::Dispatch | + ActionFlags::Present | ActionFlags::Copy | ActionFlags::Resolve))) + return false; // Filter out events we cannot profile + + return true; + } + + static void RecurseDiscoverEvents(uint32_t &numEvents, const ActionDescription &actionnode) + { + for(size_t i = 0; i < actionnode.children.size(); i++) + { + RecurseDiscoverEvents(numEvents, actionnode.children[i]); + } + + if(!Impl::CanProfileEvent(actionnode)) + return; + + numEvents++; + } +}; + +NVD3D12Counters::NVD3D12Counters() : m_Impl(NULL) +{ +} + +NVD3D12Counters::~NVD3D12Counters() +{ + delete m_Impl; + m_Impl = NULL; +} + +bool NVD3D12Counters::Init(ID3D12Device *device) +{ + m_Impl = new Impl; + + if(!m_Impl) + return false; + + bool initSuccess = m_Impl->TryInitializePerfSDK(device); + if(!initSuccess) + { + delete m_Impl; + m_Impl = NULL; + return false; + } + + return true; +} + +rdcarray NVD3D12Counters::EnumerateCounters() const +{ + if(m_Impl->LibraryNotFound) + { + return {GPUCounter::FirstNvidia}; + } + return m_Impl->CounterEnumerator->GetPublicCounterIds(); +} + +bool NVD3D12Counters::HasCounter(GPUCounter counterID) const +{ + if(m_Impl->LibraryNotFound) + { + return counterID == GPUCounter::FirstNvidia; + } + return m_Impl->CounterEnumerator->HasCounter(counterID); +} + +CounterDescription NVD3D12Counters::DescribeCounter(GPUCounter counterID) const +{ + if(m_Impl->LibraryNotFound) + { + RDCASSERT(counterID == GPUCounter::FirstNvidia); + // Dummy counter shows message directing user to download the Nsight Perf SDK library + return NVCounterEnumerator::LibraryNotFoundMessage(); + } + return m_Impl->CounterEnumerator->GetCounterDescription(counterID); +} + +struct D3D12NvidiaActionCallback final : public D3D12ActionCallback +{ + D3D12NvidiaActionCallback(WrappedID3D12Device *dev, + nv::perf::profiler::D3D12RangeCommands *pRangeCommands) + : m_pDevice(dev), m_pRangeCommands(pRangeCommands) + { + m_pDevice->GetQueue()->GetCommandData()->m_ActionCallback = this; + } + + virtual ~D3D12NvidiaActionCallback() + { + m_pDevice->GetQueue()->GetCommandData()->m_ActionCallback = NULL; + } + + void PreDraw(uint32_t eid, ID3D12GraphicsCommandListX *cmd) final + { + rdcstr eidName = StringFormat::Fmt("%d", eid); + + WrappedID3D12GraphicsCommandList *pWrappedCmdList = (WrappedID3D12GraphicsCommandList *)cmd; + m_pRangeCommands->PushRange(pWrappedCmdList->GetReal(), eidName.c_str()); + } + + bool PostDraw(uint32_t eid, ID3D12GraphicsCommandListX *cmd) final + { + WrappedID3D12GraphicsCommandList *pWrappedCmdList = (WrappedID3D12GraphicsCommandList *)cmd; + m_pRangeCommands->PopRange(pWrappedCmdList->GetReal()); + return false; + } + + void PreCloseCommandList(ID3D12GraphicsCommandListX *cmd) final {} + void PostRedraw(uint32_t eid, ID3D12GraphicsCommandListX *cmd) final {} + void PreDispatch(uint32_t eid, ID3D12GraphicsCommandListX *cmd) final { PreDraw(eid, cmd); } + bool PostDispatch(uint32_t eid, ID3D12GraphicsCommandListX *cmd) final + { + return PostDraw(eid, cmd); + } + void PostRedispatch(uint32_t eid, ID3D12GraphicsCommandListX *cmd) final { PostRedraw(eid, cmd); } + void PreMisc(uint32_t eid, ActionFlags flags, ID3D12GraphicsCommandListX *cmd) final + { + if(flags & ActionFlags::PassBoundary) + return; + PreDraw(eid, cmd); + } + bool PostMisc(uint32_t eid, ActionFlags flags, ID3D12GraphicsCommandListX *cmd) final + { + if(flags & ActionFlags::PassBoundary) + return false; + return PostDraw(eid, cmd); + } + void PostRemisc(uint32_t eid, ActionFlags flags, ID3D12GraphicsCommandListX *cmd) final + { + if(flags & ActionFlags::PassBoundary) + return; + PostRedraw(eid, cmd); + } + + void AliasEvent(uint32_t primary, uint32_t alias) final {} + WrappedID3D12Device *m_pDevice; + nv::perf::profiler::D3D12RangeCommands *m_pRangeCommands; +}; + +rdcarray NVD3D12Counters::FetchCounters(const rdcarray &counters, + WrappedID3D12Device &device) +{ + if(m_Impl->LibraryNotFound) + { + return {}; + } + + uint32_t maxEID = device.GetQueue()->GetMaxEID(); + ID3D12Device *d3dDevice = device.GetReal(); + + nv::perf::profiler::D3D12RangeCommands rangeCommands; + rangeCommands.Initialize(d3dDevice); + RDCASSERT(rangeCommands.isNvidiaDevice); + if(!rangeCommands.isNvidiaDevice) + { + return {}; + } + + uint32_t maxNumRanges = 0; + { + // replay the events to determine how many profile-able events there are + FrameRecord frameRecord = device.GetReplay()->GetFrameRecord(); + for(size_t i = 0; i < frameRecord.actionList.size(); i++) + { + Impl::RecurseDiscoverEvents(maxNumRanges, frameRecord.actionList[i]); + } + } + + nv::perf::profiler::SessionOptions sessionOptions = {}; + sessionOptions.maxNumRanges = maxNumRanges; + sessionOptions.avgRangeNameLength = 16; + sessionOptions.numTraceBuffers = 1; + + nv::perf::profiler::RangeProfilerD3D12 rangeProfiler; + + rdcarray results; + const rdcarray &commandQueues = device.GetQueues(); + for(WrappedID3D12CommandQueue *pWrappedQueue : commandQueues) + { + ID3D12CommandQueue *d3dQueue = pWrappedQueue->GetReal(); + + switch(d3dQueue->GetDesc().Type) + { + case D3D12_COMMAND_LIST_TYPE_DIRECT: + case D3D12_COMMAND_LIST_TYPE_COMPUTE: + // Profiling is supported for 3D and compute queues. + break; + case D3D12_COMMAND_LIST_TYPE_BUNDLE: + case D3D12_COMMAND_LIST_TYPE_COPY: + case D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE: + case D3D12_COMMAND_LIST_TYPE_VIDEO_PROCESS: + case D3D12_COMMAND_LIST_TYPE_VIDEO_ENCODE: + // Profiling is not supported for copy or video queues. + continue; + } + + if(!rangeProfiler.BeginSession(d3dQueue, sessionOptions)) + { + RDCERR("NvPerf failed to start profiling session"); + continue; // Try the next command queue + } + auto sessionGuard = nv::perf::ScopeExitGuard([&rangeProfiler]() { rangeProfiler.EndSession(); }); + + // Create counter configuration, and set it. + { + nv::perf::DeviceIdentifiers deviceIdentifiers = nv::perf::D3D12GetDeviceIdentifiers(d3dDevice); + NVPA_RawMetricsConfig *pRawMetricsConfig = + nv::perf::profiler::D3D12CreateRawMetricsConfig(deviceIdentifiers.pChipName); + m_Impl->CounterEnumerator->CreateConfig(deviceIdentifiers.pChipName, pRawMetricsConfig, + counters); + } + + nv::perf::profiler::SetConfigParams setConfigParams; + setConfigParams.numNestingLevels = 1; + setConfigParams.numStatisticalSamples = 1; + m_Impl->CounterEnumerator->GetConfig( + setConfigParams.pConfigImage, setConfigParams.configImageSize, + setConfigParams.pCounterDataPrefix, setConfigParams.counterDataPrefixSize); + + size_t maxNumReplayPasses = + m_Impl->CounterEnumerator->GetMaxNumReplayPasses(setConfigParams.numNestingLevels); + RDCASSERT(maxNumReplayPasses > 0u); + + if(!rangeProfiler.EnqueueCounterCollection(setConfigParams)) + { + RDCERR("NvPerf failed to schedule counter collection"); + continue; // Try the next command queue + } + + D3D12NvidiaActionCallback actionCallback(&device, &rangeCommands); + + std::vector counterDataImage; + for(size_t replayPass = 0;; ++replayPass) + { + if(!rangeProfiler.BeginPass()) + { + RDCERR("NvPerf failed to start counter collection pass"); + break; + } + + // replay the events to perform all the queries + uint32_t eventStartID = 0; + device.ReplayLog(eventStartID, maxEID, eReplay_Full); + + if(!rangeProfiler.EndPass()) + { + RDCERR("NvPerf failed to end counter collection pass!"); + break; + } + + // device->GPUSync(d3dQueue); + + nv::perf::profiler::DecodeResult decodeResult; + if(!rangeProfiler.DecodeCounters(decodeResult)) + { + RDCERR("NvPerf failed to decode counters in collection pass"); + break; + } + + if(decodeResult.allPassesDecoded) + { + counterDataImage = std::move(decodeResult.counterDataImage); + break; // Success! + } + + if(replayPass >= maxNumReplayPasses - 1) + { + RDCERR("NvPerf exceeded the maximum expected number of replay passes"); + break; // Failure + } + } + + if(counterDataImage.empty()) + { + RDCERR("No data found in NvPerf counter data image"); + return {}; + } + + if(!m_Impl->CounterEnumerator->EvaluateMetrics(counterDataImage.data(), counterDataImage.size(), + results)) + { + RDCERR("NvPerf failed to evaluate metrics from counter data"); + return {}; + } + } + + return results; +} diff --git a/renderdoc/driver/ihv/nv/nv_d3d12_counters.h b/renderdoc/driver/ihv/nv/nv_d3d12_counters.h new file mode 100644 index 000000000..f120879e3 --- /dev/null +++ b/renderdoc/driver/ihv/nv/nv_d3d12_counters.h @@ -0,0 +1,51 @@ +/****************************************************************************** + * The MIT License (MIT) + * + * Copyright (c) 2022 Baldur Karlsson + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + ******************************************************************************/ + +#pragma once + +#include "api/replay/data_types.h" +#include "api/replay/rdcarray.h" +#include "api/replay/replay_enums.h" + +struct ID3D12Device; +class WrappedID3D12Device; + +class NVD3D12Counters final +{ +public: + NVD3D12Counters(); + ~NVD3D12Counters(); + + bool Init(ID3D12Device *device); + + rdcarray EnumerateCounters() const; + bool HasCounter(GPUCounter counterID) const; + CounterDescription DescribeCounter(GPUCounter counterID) const; + rdcarray FetchCounters(const rdcarray &counters, + WrappedID3D12Device &device); + +private: + struct Impl; + Impl *m_Impl; +};