mirror of
https://github.com/baldurk/renderdoc.git
synced 2026-05-13 05:20:45 +00:00
Add stat tracking of GPU overhead during capture
This commit is contained in:
@@ -671,6 +671,7 @@ void WrappedID3D12CommandQueue::CheckAndFreeRayDispatches()
|
||||
{
|
||||
if(signalled >= ray.fenceValue)
|
||||
{
|
||||
GetResourceManager()->GetRTManager()->AddDispatchTimer(ray.query);
|
||||
ray.Release();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2444,8 +2444,9 @@ HRESULT WrappedID3D12Device::Present(ID3D12GraphicsCommandList *pOverlayCommandL
|
||||
if(D3D12_Debug_RT_Overlay() && m_UsedRT)
|
||||
{
|
||||
ASStats blasStats = {}, tlasStats = {};
|
||||
RTGPUPatchingStats gpuStats = {};
|
||||
|
||||
GetResourceManager()->GetRTManager()->GatherASAgeStatistics(blasStats, tlasStats);
|
||||
GetResourceManager()->GetRTManager()->GatherRTStatistics(blasStats, tlasStats, gpuStats);
|
||||
|
||||
overlayText += " TLAS BLAS\n";
|
||||
|
||||
@@ -2467,6 +2468,15 @@ HRESULT WrappedID3D12Device::Present(ID3D12GraphicsCommandList *pOverlayCommandL
|
||||
float(blasStats.overheadBytes + tlasStats.overheadBytes) / 1048576.0f,
|
||||
float(blasStats.diskBytes + tlasStats.diskBytes) / 1048576.0f, blasStats.diskCached,
|
||||
tlasStats.diskCached);
|
||||
|
||||
overlayText += StringFormat::Fmt(
|
||||
"%3u BLAS input copies with %9.2f KB in %5.2f ms = %9.2f MB/s\n"
|
||||
"%2u dispatches patched in %4.2f ms\n",
|
||||
gpuStats.builds, float(gpuStats.buildBytes) / 1024.0f, gpuStats.totalBuildMS,
|
||||
gpuStats.totalBuildMS == 0.0
|
||||
? 0.0
|
||||
: (float(gpuStats.buildBytes) / 1048576.0f) / (gpuStats.totalBuildMS / 1024.0f),
|
||||
gpuStats.dispatches, gpuStats.totalDispatchesMS);
|
||||
}
|
||||
|
||||
m_TextRenderer->RenderText(list, 0.0f, 0.0f, overlayText);
|
||||
|
||||
@@ -925,6 +925,75 @@ void D3D12RTManager::VerifyRecord(const uint64_t recordSize, byte *wrappedRecord
|
||||
RDCASSERT(memcmp(record.data(), unwrappedRef, record.size()) == 0);
|
||||
}
|
||||
|
||||
uint32_t D3D12RTManager::GetFreeQuery()
|
||||
{
|
||||
SCOPED_LOCK(m_TimerStatsLock);
|
||||
if(m_TimerQueryHeap == NULL)
|
||||
{
|
||||
D3D12_QUERY_HEAP_DESC timerQueryDesc;
|
||||
// allow for up to 50 dispatches per frame, 500 AS builds, and assume 5 frames before we see the results
|
||||
timerQueryDesc.Count = (50 + 500) * 5 * 2;
|
||||
timerQueryDesc.NodeMask = 1;
|
||||
timerQueryDesc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
|
||||
HRESULT hr = m_wrappedDevice->GetReal()->CreateQueryHeap(
|
||||
&timerQueryDesc, __uuidof(ID3D12QueryHeap), (void **)&m_TimerQueryHeap);
|
||||
CHECK_HR(m_wrappedDevice, hr);
|
||||
if(FAILED(hr))
|
||||
RDCERR("Failed to create timer query heap HRESULT: %s", ToStr(hr).c_str());
|
||||
|
||||
m_GPUBufferAllocator.Alloc(D3D12GpuBufferHeapType::ReadBackHeap,
|
||||
D3D12GpuBufferHeapMemoryFlag::Default,
|
||||
timerQueryDesc.Count * sizeof(UINT64), 64, &m_TimerReadbackBuffer);
|
||||
|
||||
if(m_TimerReadbackBuffer && m_TimerQueryHeap)
|
||||
{
|
||||
m_Timestamps = (uint64_t *)m_TimerReadbackBuffer->Map();
|
||||
for(uint32_t i = 0; i < timerQueryDesc.Count; i += 2)
|
||||
m_FreeQueries.push_back(i);
|
||||
}
|
||||
|
||||
m_wrappedDevice->GetQueue()->GetTimestampFrequency(&m_TimerFrequency);
|
||||
}
|
||||
|
||||
if(!m_FreeQueries.empty())
|
||||
return m_FreeQueries.takeAt(m_FreeQueries.size() - 1);
|
||||
return ~0U;
|
||||
}
|
||||
|
||||
void D3D12RTManager::AddDispatchTimer(uint32_t q)
|
||||
{
|
||||
// could track this maybe, for now drop it on the floor
|
||||
if(q == ~0U)
|
||||
return;
|
||||
|
||||
uint64_t *timestamps = m_Timestamps + q;
|
||||
|
||||
{
|
||||
SCOPED_LOCK(m_TimerStatsLock);
|
||||
m_AccumulatedStats.dispatches++;
|
||||
m_AccumulatedStats.totalDispatchesMS +=
|
||||
((timestamps[1] - timestamps[0]) / double(m_TimerFrequency)) * 1024.0;
|
||||
m_FreeQueries.push_back(q);
|
||||
}
|
||||
}
|
||||
|
||||
void D3D12RTManager::AddBuildTimer(uint32_t q, uint64_t size)
|
||||
{
|
||||
if(q == ~0U)
|
||||
return;
|
||||
|
||||
uint64_t *timestamps = m_Timestamps + q;
|
||||
|
||||
{
|
||||
SCOPED_LOCK(m_TimerStatsLock);
|
||||
m_AccumulatedStats.builds++;
|
||||
m_AccumulatedStats.buildBytes += size;
|
||||
m_AccumulatedStats.totalBuildMS +=
|
||||
((timestamps[1] - timestamps[0]) / double(m_TimerFrequency)) * 1024.0;
|
||||
m_FreeQueries.push_back(q);
|
||||
}
|
||||
}
|
||||
|
||||
void D3D12RTManager::AddPendingASBuilds(ID3D12Fence *fence, UINT64 waitValue,
|
||||
const rdcarray<std::function<bool()>> &callbacks)
|
||||
{
|
||||
@@ -1063,10 +1132,14 @@ void D3D12RTManager::CheckPendingASBuilds()
|
||||
m_PendingASBuilds.removeIf([](const PendingASBuild &build) { return build.fence == NULL; });
|
||||
}
|
||||
|
||||
void D3D12RTManager::GatherASAgeStatistics(ASStats &blasAges, ASStats &tlasAges)
|
||||
void D3D12RTManager::GatherRTStatistics(ASStats &blasAges, ASStats &tlasAges,
|
||||
RTGPUPatchingStats &gpuStats)
|
||||
{
|
||||
double now = m_Timestamp.GetMilliseconds();
|
||||
|
||||
gpuStats = m_AccumulatedStats;
|
||||
m_AccumulatedStats = {};
|
||||
|
||||
SCOPED_LOCK(m_ASBuildDataLock);
|
||||
|
||||
blasAges.bucket[0].msThreshold = tlasAges.bucket[0].msThreshold = 50;
|
||||
@@ -1129,6 +1202,13 @@ PatchedRayDispatch D3D12RTManager::PatchRayDispatch(ID3D12GraphicsCommandList4 *
|
||||
|
||||
D3D12MarkerRegion region(unwrappedCmd, "PatchRayDispatch");
|
||||
|
||||
ret.resources.query = GetFreeQuery();
|
||||
|
||||
if(ret.resources.query != ~0U)
|
||||
{
|
||||
unwrappedCmd->EndQuery(m_TimerQueryHeap, D3D12_QUERY_TYPE_TIMESTAMP, ret.resources.query);
|
||||
}
|
||||
|
||||
PrepareRayDispatchBuffer(NULL);
|
||||
|
||||
D3D12GpuBuffer *scratchBuffer = NULL;
|
||||
@@ -1344,6 +1424,15 @@ PatchedRayDispatch D3D12RTManager::PatchRayDispatch(ID3D12GraphicsCommandList4 *
|
||||
|
||||
ret.resources.argumentBuffer = NULL;
|
||||
|
||||
if(ret.resources.query != ~0U)
|
||||
{
|
||||
unwrappedCmd->EndQuery(m_TimerQueryHeap, D3D12_QUERY_TYPE_TIMESTAMP, ret.resources.query + 1);
|
||||
unwrappedCmd->ResolveQueryData(
|
||||
m_TimerQueryHeap, D3D12_QUERY_TYPE_TIMESTAMP, ret.resources.query, 2,
|
||||
m_TimerReadbackBuffer->Resource(),
|
||||
m_TimerReadbackBuffer->Offset() + sizeof(uint64_t) * ret.resources.query);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1617,6 +1706,13 @@ ASBuildData *D3D12RTManager::CopyBuildInputs(
|
||||
ret->timestamp = m_Timestamp.GetMilliseconds();
|
||||
ret->rtManager = this;
|
||||
|
||||
ret->query = GetFreeQuery();
|
||||
|
||||
if(ret->query != ~0U)
|
||||
{
|
||||
unwrappedCmd->EndQuery(m_TimerQueryHeap, D3D12_QUERY_TYPE_TIMESTAMP, ret->query);
|
||||
}
|
||||
|
||||
if(inputs.Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL)
|
||||
{
|
||||
ret->NumBLAS = inputs.NumDescs;
|
||||
@@ -1908,6 +2004,14 @@ ASBuildData *D3D12RTManager::CopyBuildInputs(
|
||||
m_InMemASBuildDatas.push_back(ret);
|
||||
}
|
||||
|
||||
if(ret->query != ~0U)
|
||||
{
|
||||
unwrappedCmd->EndQuery(m_TimerQueryHeap, D3D12_QUERY_TYPE_TIMESTAMP, ret->query + 1);
|
||||
unwrappedCmd->ResolveQueryData(m_TimerQueryHeap, D3D12_QUERY_TYPE_TIMESTAMP, ret->query, 2,
|
||||
m_TimerReadbackBuffer->Resource(),
|
||||
m_TimerReadbackBuffer->Offset() + sizeof(uint64_t) * ret->query);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -3244,6 +3348,12 @@ void D3D12GpuBuffer::Release()
|
||||
}
|
||||
}
|
||||
|
||||
void ASBuildData::MarkWorkComplete()
|
||||
{
|
||||
complete = true;
|
||||
rtManager->AddBuildTimer(query, buffer ? buffer->Size() : 0);
|
||||
}
|
||||
|
||||
void ASBuildData::AddRef()
|
||||
{
|
||||
InterlockedIncrement(&m_RefCount);
|
||||
|
||||
@@ -1034,6 +1034,8 @@ struct PatchedRayDispatch
|
||||
|
||||
D3D12GpuBuffer *readbackBuffer;
|
||||
|
||||
uint32_t query;
|
||||
|
||||
// for convenience, when these resources are referenced in a queue they get a fence value to
|
||||
// indicate when they're safe to release. This values are unset when returned from patching or
|
||||
// referenced in the list and is set in each queue's copy of the references.
|
||||
@@ -1077,6 +1079,16 @@ struct ASStats
|
||||
uint32_t diskCached;
|
||||
};
|
||||
|
||||
struct RTGPUPatchingStats
|
||||
{
|
||||
uint32_t builds;
|
||||
uint64_t buildBytes;
|
||||
double totalBuildMS;
|
||||
|
||||
uint32_t dispatches;
|
||||
double totalDispatchesMS;
|
||||
};
|
||||
|
||||
// this is a refcounted GPU buffer with the build data, together with the metadata
|
||||
struct ASBuildData
|
||||
{
|
||||
@@ -1144,7 +1156,7 @@ struct ASBuildData
|
||||
// geometry GPU addresses have been de-based to contain only offsets
|
||||
rdcarray<RTGeometryDesc> geoms;
|
||||
|
||||
void MarkWorkComplete() { complete = true; }
|
||||
void MarkWorkComplete();
|
||||
bool IsWorkComplete() const { return complete; }
|
||||
|
||||
void AddRef();
|
||||
@@ -1153,6 +1165,7 @@ struct ASBuildData
|
||||
D3D12GpuBuffer *buffer = NULL;
|
||||
rdcstr filename;
|
||||
uint64_t bytesOnDisk = 0;
|
||||
uint32_t query = 0;
|
||||
|
||||
std::function<bool()> cleanupCallback;
|
||||
|
||||
@@ -1205,6 +1218,7 @@ public:
|
||||
SAFE_RELEASE(m_RayPatchingData.indirectComSig);
|
||||
SAFE_RELEASE(m_RayPatchingData.indirectPrepPipe);
|
||||
SAFE_RELEASE(m_RayPatchingData.indirectPrepRootSig);
|
||||
SAFE_RELEASE(m_TimerQueryHeap);
|
||||
}
|
||||
|
||||
void InitInternalResources();
|
||||
@@ -1227,7 +1241,7 @@ public:
|
||||
m_DiskCachedASBuildDatas.removeOne(data);
|
||||
}
|
||||
|
||||
void GatherASAgeStatistics(ASStats &blasAges, ASStats &tlasAges);
|
||||
void GatherRTStatistics(ASStats &blasAges, ASStats &tlasAges, RTGPUPatchingStats &gpuStats);
|
||||
|
||||
D3D12GpuBuffer *UnrollBLASInstancesList(
|
||||
ID3D12GraphicsCommandList4 *unwrappedCmd,
|
||||
@@ -1267,6 +1281,9 @@ public:
|
||||
void VerifyRecord(const uint64_t recordSize, byte *table, byte *ref,
|
||||
WrappedID3D12DescriptorHeap *resHeap, WrappedID3D12DescriptorHeap *sampHeap);
|
||||
|
||||
void AddDispatchTimer(uint32_t q);
|
||||
void AddBuildTimer(uint32_t q, uint64_t size);
|
||||
|
||||
private:
|
||||
void InitRayDispatchPatchingResources();
|
||||
void InitTLASInstanceCopyingResources();
|
||||
@@ -1327,6 +1344,16 @@ private:
|
||||
ID3D12CommandSignature *indirectComSig = NULL;
|
||||
} m_RayPatchingData;
|
||||
|
||||
ID3D12QueryHeap *m_TimerQueryHeap = NULL;
|
||||
D3D12GpuBuffer *m_TimerReadbackBuffer = NULL;
|
||||
uint64_t *m_Timestamps = NULL;
|
||||
uint64_t m_TimerFrequency;
|
||||
Threading::CriticalSection m_TimerStatsLock;
|
||||
rdcarray<uint32_t> m_FreeQueries;
|
||||
RTGPUPatchingStats m_AccumulatedStats = {};
|
||||
|
||||
uint32_t GetFreeQuery();
|
||||
|
||||
struct PendingASBuild
|
||||
{
|
||||
ID3D12Fence *fence;
|
||||
|
||||
Reference in New Issue
Block a user