From 987d08c301f3bef9375b03b090d7f6935f71696a Mon Sep 17 00:00:00 2001 From: baldurk Date: Mon, 7 Oct 2024 15:59:08 +0100 Subject: [PATCH] Add some debug-tracked-only AS stats for overlay --- renderdoc/driver/d3d12/d3d12_device.cpp | 30 +++++++ renderdoc/driver/d3d12/d3d12_device.h | 1 + .../d3d12/d3d12_device_rescreate_wrap.cpp | 1 + renderdoc/driver/d3d12/d3d12_manager.cpp | 81 ++++++++++++++++++- renderdoc/driver/d3d12/d3d12_manager.h | 38 ++++++++- 5 files changed, 149 insertions(+), 2 deletions(-) diff --git a/renderdoc/driver/d3d12/d3d12_device.cpp b/renderdoc/driver/d3d12/d3d12_device.cpp index 3c450ee90..a8f43f1f5 100644 --- a/renderdoc/driver/d3d12/d3d12_device.cpp +++ b/renderdoc/driver/d3d12/d3d12_device.cpp @@ -50,6 +50,7 @@ RDOC_EXTERN_CONFIG(bool, Replay_Debug_SingleThreadedCompilation); RDOC_DEBUG_CONFIG(bool, D3D12_Debug_SingleSubmitFlushing, false, "Every command buffer is submitted and fully flushed to the GPU, to narrow down " "the source of problems."); +RDOC_DEBUG_CONFIG(bool, D3D12_Debug_RTOverlay, false, "Add some RT tracking to the overlay."); WRAPPED_POOL_INST(WrappedID3D12Device); @@ -2432,6 +2433,35 @@ HRESULT WrappedID3D12Device::Present(ID3D12GraphicsCommandList *pOverlayCommandL rdcstr overlayText = RenderDoc::Inst().GetOverlayText(RDCDriver::D3D12, devWnd, m_FrameCounter, 0); +#if ENABLED(RDOC_DEVEL) + if(D3D12_Debug_RTOverlay() && m_UsedRT) + { + double now = GetResourceManager()->GetRTManager()->GetCurrentASTimestamp(); + ASStats blasStats = {}, tlasStats = {}; + + ASBuildData::GatherASAgeStatistics(GetResourceManager(), now, blasStats, tlasStats); + + overlayText += " TLAS BLAS\n"; + + for(size_t i = 0; i < ARRAY_COUNT(tlasStats.bucket); i++) + { + if(tlasStats.bucket[i].msThreshold == ~0U) + overlayText += " older "; + else + overlayText += StringFormat::Fmt("<=% 4ums ", tlasStats.bucket[i].msThreshold); + + overlayText += StringFormat::Fmt( + "% 4u (% 3.2f MB) % 4u (%.2f MB)\n", tlasStats.bucket[i].count, + float(tlasStats.bucket[i].bytes) / 1048576.0f, blasStats.bucket[i].count, + float(blasStats.bucket[i].bytes) / 1048576.0f); + } + + overlayText += StringFormat::Fmt( + "%.2f MB overhead\n", + float(blasStats.overheadBytes + tlasStats.overheadBytes) / 1048576.0f); + } +#endif + m_TextRenderer->RenderText(list, 0.0f, 0.0f, overlayText); // transition backbuffer back again diff --git a/renderdoc/driver/d3d12/d3d12_device.h b/renderdoc/driver/d3d12/d3d12_device.h index 7acca0bae..60e23b6a4 100644 --- a/renderdoc/driver/d3d12/d3d12_device.h +++ b/renderdoc/driver/d3d12/d3d12_device.h @@ -723,6 +723,7 @@ private: uint32_t m_SubmitCounter = 0; bool m_UsedDXIL = false; + bool m_UsedRT = false; DriverInformation m_DriverInfo = {}; diff --git a/renderdoc/driver/d3d12/d3d12_device_rescreate_wrap.cpp b/renderdoc/driver/d3d12/d3d12_device_rescreate_wrap.cpp index 5b22acf3f..c3be71aa9 100644 --- a/renderdoc/driver/d3d12/d3d12_device_rescreate_wrap.cpp +++ b/renderdoc/driver/d3d12/d3d12_device_rescreate_wrap.cpp @@ -544,6 +544,7 @@ HRESULT WrappedID3D12Device::CreateResource( InitialLayout.ToStates() == D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE) { wrapped->MarkAsAccelerationStructureResource(); + m_UsedRT = true; } else { diff --git a/renderdoc/driver/d3d12/d3d12_manager.cpp b/renderdoc/driver/d3d12/d3d12_manager.cpp index 2f955af76..857305bb0 100644 --- a/renderdoc/driver/d3d12/d3d12_manager.cpp +++ b/renderdoc/driver/d3d12/d3d12_manager.cpp @@ -1279,6 +1279,7 @@ ASBuildData *D3D12RTManager::CopyBuildInputs( ASBuildData *ret = new ASBuildData; ret->Type = inputs.Type; ret->Flags = inputs.Flags; + ret->timestamp = m_Timestamp.GetMilliseconds(); if(inputs.Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL) { @@ -1335,6 +1336,7 @@ ASBuildData *D3D12RTManager::CopyBuildInputs( // calculate how much data is needed. Add 256 bytes padding uint64_t byteSize = 0; + uint64_t bytesOverhead = 0; for(const ASBuildData::RTGeometryDesc &desc : ret->geoms) { if(desc.Type == D3D12_RAYTRACING_GEOMETRY_TYPE_PROCEDURAL_PRIMITIVE_AABBS) @@ -1344,6 +1346,10 @@ ASBuildData *D3D12RTManager::CopyBuildInputs( byteSize += (desc.AABBs.AABBCount - 1) * desc.AABBs.AABBs.StrideInBytes; byteSize += sizeof(D3D12_RAYTRACING_AABB); byteSize = AlignUp16(byteSize); + + if(desc.AABBs.AABBs.StrideInBytes > sizeof(D3D12_RAYTRACING_AABB)) + bytesOverhead += (desc.AABBs.AABBCount - 1) * + (desc.AABBs.AABBs.StrideInBytes - sizeof(D3D12_RAYTRACING_AABB)); } } else @@ -1371,10 +1377,24 @@ ASBuildData *D3D12RTManager::CopyBuildInputs( (untrustedVertexCount / 100) * D3D12_Debug_RTMaxVertexPercentIncrease() + D3D12_Debug_RTMaxVertexIncrement(); + RDCASSERT(vbSize >= desc.Triangles.VertexBuffer.StrideInBytes * untrustedVertexCount); + vbSize = RDCMIN(vbSize, desc.Triangles.VertexBuffer.StrideInBytes * estimatedVertexCount); byteSize += vbSize; byteSize = AlignUp16(byteSize); + + uint64_t tightStride = GetByteSize(1, 1, 1, desc.Triangles.VertexFormat, 0); + + if(desc.Triangles.VertexBuffer.StrideInBytes > tightStride) + { + bytesOverhead += vbSize - (tightStride * untrustedVertexCount); + } + else if(vbSize > desc.Triangles.VertexBuffer.StrideInBytes * untrustedVertexCount) + { + bytesOverhead += + vbSize - (desc.Triangles.VertexBuffer.StrideInBytes * untrustedVertexCount); + } } else { @@ -1382,13 +1402,21 @@ ASBuildData *D3D12RTManager::CopyBuildInputs( { byteSize += (desc.Triangles.VertexCount - 1) * desc.Triangles.VertexBuffer.StrideInBytes; - byteSize += GetByteSize(1, 1, 1, desc.Triangles.VertexFormat, 0); + uint64_t tightStride = GetByteSize(1, 1, 1, desc.Triangles.VertexFormat, 0); + + if(desc.Triangles.VertexBuffer.StrideInBytes > tightStride) + bytesOverhead += (desc.Triangles.VertexCount - 1) * + (desc.Triangles.VertexBuffer.StrideInBytes - tightStride); + + byteSize += tightStride; byteSize = AlignUp16(byteSize); } } } } + ret->bytesOverhead = bytesOverhead; + m_GPUBufferAllocator.Alloc(D3D12GpuBufferHeapType::ReadBackHeap, D3D12GpuBufferHeapMemoryFlag::Default, byteSize, 256, &ret->buffer); @@ -2612,8 +2640,59 @@ void ASBuildData::Release() unsigned int ret = InterlockedDecrement(&m_RefCount); if(ret == 0) { + { +#if ENABLED(RDOC_DEVEL) + SCOPED_WRITELOCK(dataslock); + datas.removeOne(this); +#endif + } + SAFE_RELEASE(buffer); delete this; } } + +#if ENABLED(RDOC_DEVEL) +Threading::RWLock ASBuildData::dataslock; +rdcarray ASBuildData::datas; +#endif + +void ASBuildData::GatherASAgeStatistics(D3D12ResourceManager *rm, double now, ASStats &blasAges, + ASStats &tlasAges) +{ +#if ENABLED(RDOC_DEVEL) + SCOPED_READLOCK(dataslock); + + blasAges.bucket[0].msThreshold = tlasAges.bucket[0].msThreshold = 50; + blasAges.bucket[1].msThreshold = tlasAges.bucket[1].msThreshold = 500; + blasAges.bucket[2].msThreshold = tlasAges.bucket[2].msThreshold = 5000; + blasAges.bucket[3].msThreshold = tlasAges.bucket[3].msThreshold = ~0U; + + for(ASBuildData *buildData : datas) + { + if(buildData) + { + uint32_t age = uint32_t(now - buildData->timestamp); + + ASStats &ages = buildData->Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL + ? tlasAges + : blasAges; + + uint64_t size = buildData->buffer ? buildData->buffer->Size() : 0; + + ages.overheadBytes += buildData->bytesOverhead; + + for(size_t i = 0; i < ARRAY_COUNT(tlasAges.bucket); i++) + { + if(age <= ages.bucket[i].msThreshold) + { + ages.bucket[i].count++; + ages.bucket[i].bytes += size; + break; + } + } + } + } +#endif +} diff --git a/renderdoc/driver/d3d12/d3d12_manager.h b/renderdoc/driver/d3d12/d3d12_manager.h index 2dc5af3b1..a793a8ef3 100644 --- a/renderdoc/driver/d3d12/d3d12_manager.h +++ b/renderdoc/driver/d3d12/d3d12_manager.h @@ -1035,6 +1035,18 @@ struct PatchedRayDispatch struct D3D12ShaderExportDatabase; +struct ASStats +{ + struct + { + uint32_t msThreshold; + uint32_t count; + uint64_t bytes; + } bucket[4]; + + uint64_t overheadBytes; +}; + // this is a refcounted GPU buffer with the build data, together with the metadata struct ASBuildData { @@ -1107,13 +1119,33 @@ struct ASBuildData D3D12GpuBuffer *buffer = NULL; + static void GatherASAgeStatistics(D3D12ResourceManager *rm, double now, ASStats &blasAges, + ASStats &tlasAges); + private: - ASBuildData() = default; + ASBuildData() + { +#if ENABLED(RDOC_DEVEL) + SCOPED_WRITELOCK(dataslock); + datas.push_back(this); +#endif + } + + // timestamp this build data was recorded on + double timestamp = 0; + + // how many bytes of overhead are currently present, due to copying with strided vertex/AABB data + uint64_t bytesOverhead = 0; unsigned int m_RefCount = 1; friend class D3D12RTManager; friend class D3D12ResourceManager; + +#if ENABLED(RDOC_DEVEL) + static Threading::RWLock dataslock; + static rdcarray datas; +#endif }; DECLARE_REFLECTION_STRUCT(ASBuildData::RVAWithStride); @@ -1185,6 +1217,8 @@ public: // temp buffer for AS serialise copies D3D12GpuBuffer *ASSerialiseBuffer = NULL; + double GetCurrentASTimestamp() { return m_Timestamp.GetMilliseconds(); } + private: void InitRayDispatchPatchingResources(); void InitReplayBlasPatchingResources(); @@ -1195,6 +1229,8 @@ private: WrappedID3D12Device *m_wrappedDevice; D3D12GpuBufferAllocator &m_GPUBufferAllocator; + PerformanceTimer m_Timestamp; + ID3D12GraphicsCommandListX *m_cmdList; ID3D12CommandAllocator *m_cmdAlloc; ID3D12CommandQueue *m_cmdQueue;