Add some debug-tracked-only AS stats for overlay

This commit is contained in:
baldurk
2024-10-07 15:59:08 +01:00
parent ea762adbfa
commit 987d08c301
5 changed files with 149 additions and 2 deletions
+30
View File
@@ -50,6 +50,7 @@ RDOC_EXTERN_CONFIG(bool, Replay_Debug_SingleThreadedCompilation);
RDOC_DEBUG_CONFIG(bool, D3D12_Debug_SingleSubmitFlushing, false,
"Every command buffer is submitted and fully flushed to the GPU, to narrow down "
"the source of problems.");
RDOC_DEBUG_CONFIG(bool, D3D12_Debug_RTOverlay, false, "Add some RT tracking to the overlay.");
WRAPPED_POOL_INST(WrappedID3D12Device);
@@ -2432,6 +2433,35 @@ HRESULT WrappedID3D12Device::Present(ID3D12GraphicsCommandList *pOverlayCommandL
rdcstr overlayText =
RenderDoc::Inst().GetOverlayText(RDCDriver::D3D12, devWnd, m_FrameCounter, 0);
#if ENABLED(RDOC_DEVEL)
if(D3D12_Debug_RTOverlay() && m_UsedRT)
{
double now = GetResourceManager()->GetRTManager()->GetCurrentASTimestamp();
ASStats blasStats = {}, tlasStats = {};
ASBuildData::GatherASAgeStatistics(GetResourceManager(), now, blasStats, tlasStats);
overlayText += " TLAS BLAS\n";
for(size_t i = 0; i < ARRAY_COUNT(tlasStats.bucket); i++)
{
if(tlasStats.bucket[i].msThreshold == ~0U)
overlayText += " older ";
else
overlayText += StringFormat::Fmt("<=% 4ums ", tlasStats.bucket[i].msThreshold);
overlayText += StringFormat::Fmt(
"% 4u (% 3.2f MB) % 4u (%.2f MB)\n", tlasStats.bucket[i].count,
float(tlasStats.bucket[i].bytes) / 1048576.0f, blasStats.bucket[i].count,
float(blasStats.bucket[i].bytes) / 1048576.0f);
}
overlayText += StringFormat::Fmt(
"%.2f MB overhead\n",
float(blasStats.overheadBytes + tlasStats.overheadBytes) / 1048576.0f);
}
#endif
m_TextRenderer->RenderText(list, 0.0f, 0.0f, overlayText);
// transition backbuffer back again
+1
View File
@@ -723,6 +723,7 @@ private:
uint32_t m_SubmitCounter = 0;
bool m_UsedDXIL = false;
bool m_UsedRT = false;
DriverInformation m_DriverInfo = {};
@@ -544,6 +544,7 @@ HRESULT WrappedID3D12Device::CreateResource(
InitialLayout.ToStates() == D3D12_RESOURCE_STATE_RAYTRACING_ACCELERATION_STRUCTURE)
{
wrapped->MarkAsAccelerationStructureResource();
m_UsedRT = true;
}
else
{
+80 -1
View File
@@ -1279,6 +1279,7 @@ ASBuildData *D3D12RTManager::CopyBuildInputs(
ASBuildData *ret = new ASBuildData;
ret->Type = inputs.Type;
ret->Flags = inputs.Flags;
ret->timestamp = m_Timestamp.GetMilliseconds();
if(inputs.Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL)
{
@@ -1335,6 +1336,7 @@ ASBuildData *D3D12RTManager::CopyBuildInputs(
// calculate how much data is needed. Add 256 bytes padding
uint64_t byteSize = 0;
uint64_t bytesOverhead = 0;
for(const ASBuildData::RTGeometryDesc &desc : ret->geoms)
{
if(desc.Type == D3D12_RAYTRACING_GEOMETRY_TYPE_PROCEDURAL_PRIMITIVE_AABBS)
@@ -1344,6 +1346,10 @@ ASBuildData *D3D12RTManager::CopyBuildInputs(
byteSize += (desc.AABBs.AABBCount - 1) * desc.AABBs.AABBs.StrideInBytes;
byteSize += sizeof(D3D12_RAYTRACING_AABB);
byteSize = AlignUp16(byteSize);
if(desc.AABBs.AABBs.StrideInBytes > sizeof(D3D12_RAYTRACING_AABB))
bytesOverhead += (desc.AABBs.AABBCount - 1) *
(desc.AABBs.AABBs.StrideInBytes - sizeof(D3D12_RAYTRACING_AABB));
}
}
else
@@ -1371,10 +1377,24 @@ ASBuildData *D3D12RTManager::CopyBuildInputs(
(untrustedVertexCount / 100) * D3D12_Debug_RTMaxVertexPercentIncrease() +
D3D12_Debug_RTMaxVertexIncrement();
RDCASSERT(vbSize >= desc.Triangles.VertexBuffer.StrideInBytes * untrustedVertexCount);
vbSize = RDCMIN(vbSize, desc.Triangles.VertexBuffer.StrideInBytes * estimatedVertexCount);
byteSize += vbSize;
byteSize = AlignUp16(byteSize);
uint64_t tightStride = GetByteSize(1, 1, 1, desc.Triangles.VertexFormat, 0);
if(desc.Triangles.VertexBuffer.StrideInBytes > tightStride)
{
bytesOverhead += vbSize - (tightStride * untrustedVertexCount);
}
else if(vbSize > desc.Triangles.VertexBuffer.StrideInBytes * untrustedVertexCount)
{
bytesOverhead +=
vbSize - (desc.Triangles.VertexBuffer.StrideInBytes * untrustedVertexCount);
}
}
else
{
@@ -1382,13 +1402,21 @@ ASBuildData *D3D12RTManager::CopyBuildInputs(
{
byteSize += (desc.Triangles.VertexCount - 1) * desc.Triangles.VertexBuffer.StrideInBytes;
byteSize += GetByteSize(1, 1, 1, desc.Triangles.VertexFormat, 0);
uint64_t tightStride = GetByteSize(1, 1, 1, desc.Triangles.VertexFormat, 0);
if(desc.Triangles.VertexBuffer.StrideInBytes > tightStride)
bytesOverhead += (desc.Triangles.VertexCount - 1) *
(desc.Triangles.VertexBuffer.StrideInBytes - tightStride);
byteSize += tightStride;
byteSize = AlignUp16(byteSize);
}
}
}
}
ret->bytesOverhead = bytesOverhead;
m_GPUBufferAllocator.Alloc(D3D12GpuBufferHeapType::ReadBackHeap,
D3D12GpuBufferHeapMemoryFlag::Default, byteSize, 256, &ret->buffer);
@@ -2612,8 +2640,59 @@ void ASBuildData::Release()
unsigned int ret = InterlockedDecrement(&m_RefCount);
if(ret == 0)
{
{
#if ENABLED(RDOC_DEVEL)
SCOPED_WRITELOCK(dataslock);
datas.removeOne(this);
#endif
}
SAFE_RELEASE(buffer);
delete this;
}
}
#if ENABLED(RDOC_DEVEL)
Threading::RWLock ASBuildData::dataslock;
rdcarray<ASBuildData *> ASBuildData::datas;
#endif
void ASBuildData::GatherASAgeStatistics(D3D12ResourceManager *rm, double now, ASStats &blasAges,
ASStats &tlasAges)
{
#if ENABLED(RDOC_DEVEL)
SCOPED_READLOCK(dataslock);
blasAges.bucket[0].msThreshold = tlasAges.bucket[0].msThreshold = 50;
blasAges.bucket[1].msThreshold = tlasAges.bucket[1].msThreshold = 500;
blasAges.bucket[2].msThreshold = tlasAges.bucket[2].msThreshold = 5000;
blasAges.bucket[3].msThreshold = tlasAges.bucket[3].msThreshold = ~0U;
for(ASBuildData *buildData : datas)
{
if(buildData)
{
uint32_t age = uint32_t(now - buildData->timestamp);
ASStats &ages = buildData->Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL
? tlasAges
: blasAges;
uint64_t size = buildData->buffer ? buildData->buffer->Size() : 0;
ages.overheadBytes += buildData->bytesOverhead;
for(size_t i = 0; i < ARRAY_COUNT(tlasAges.bucket); i++)
{
if(age <= ages.bucket[i].msThreshold)
{
ages.bucket[i].count++;
ages.bucket[i].bytes += size;
break;
}
}
}
}
#endif
}
+37 -1
View File
@@ -1035,6 +1035,18 @@ struct PatchedRayDispatch
struct D3D12ShaderExportDatabase;
struct ASStats
{
struct
{
uint32_t msThreshold;
uint32_t count;
uint64_t bytes;
} bucket[4];
uint64_t overheadBytes;
};
// this is a refcounted GPU buffer with the build data, together with the metadata
struct ASBuildData
{
@@ -1107,13 +1119,33 @@ struct ASBuildData
D3D12GpuBuffer *buffer = NULL;
static void GatherASAgeStatistics(D3D12ResourceManager *rm, double now, ASStats &blasAges,
ASStats &tlasAges);
private:
ASBuildData() = default;
ASBuildData()
{
#if ENABLED(RDOC_DEVEL)
SCOPED_WRITELOCK(dataslock);
datas.push_back(this);
#endif
}
// timestamp this build data was recorded on
double timestamp = 0;
// how many bytes of overhead are currently present, due to copying with strided vertex/AABB data
uint64_t bytesOverhead = 0;
unsigned int m_RefCount = 1;
friend class D3D12RTManager;
friend class D3D12ResourceManager;
#if ENABLED(RDOC_DEVEL)
static Threading::RWLock dataslock;
static rdcarray<ASBuildData *> datas;
#endif
};
DECLARE_REFLECTION_STRUCT(ASBuildData::RVAWithStride);
@@ -1185,6 +1217,8 @@ public:
// temp buffer for AS serialise copies
D3D12GpuBuffer *ASSerialiseBuffer = NULL;
double GetCurrentASTimestamp() { return m_Timestamp.GetMilliseconds(); }
private:
void InitRayDispatchPatchingResources();
void InitReplayBlasPatchingResources();
@@ -1195,6 +1229,8 @@ private:
WrappedID3D12Device *m_wrappedDevice;
D3D12GpuBufferAllocator &m_GPUBufferAllocator;
PerformanceTimer m_Timestamp;
ID3D12GraphicsCommandListX *m_cmdList;
ID3D12CommandAllocator *m_cmdAlloc;
ID3D12CommandQueue *m_cmdQueue;