diff --git a/renderdoc/driver/d3d12/d3d12_command_queue_wrap.cpp b/renderdoc/driver/d3d12/d3d12_command_queue_wrap.cpp index 178a27ed6..e35831305 100644 --- a/renderdoc/driver/d3d12/d3d12_command_queue_wrap.cpp +++ b/renderdoc/driver/d3d12/d3d12_command_queue_wrap.cpp @@ -786,7 +786,7 @@ void WrappedID3D12CommandQueue::ExecuteCommandListsInternal(UINT NumCommandLists } // check AS builds now - GetResourceManager()->GetRTManager()->CheckPendingASBuilds(); + GetResourceManager()->GetRTManager()->TickASManagement(); } if(IsCaptureMode(m_State)) diff --git a/renderdoc/driver/d3d12/d3d12_device.cpp b/renderdoc/driver/d3d12/d3d12_device.cpp index c86b31f87..be55a4f23 100644 --- a/renderdoc/driver/d3d12/d3d12_device.cpp +++ b/renderdoc/driver/d3d12/d3d12_device.cpp @@ -2440,13 +2440,11 @@ HRESULT WrappedID3D12Device::Present(ID3D12GraphicsCommandList *pOverlayCommandL rdcstr overlayText = RenderDoc::Inst().GetOverlayText(RDCDriver::D3D12, devWnd, m_FrameCounter, 0); -#if ENABLED(RDOC_DEVEL) if(D3D12_Debug_RTOverlay() && m_UsedRT) { - double now = GetResourceManager()->GetRTManager()->GetCurrentASTimestamp(); ASStats blasStats = {}, tlasStats = {}; - ASBuildData::GatherASAgeStatistics(GetResourceManager(), now, blasStats, tlasStats); + GetResourceManager()->GetRTManager()->GatherASAgeStatistics(blasStats, tlasStats); overlayText += " TLAS BLAS\n"; @@ -2464,10 +2462,11 @@ HRESULT WrappedID3D12Device::Present(ID3D12GraphicsCommandList *pOverlayCommandL } overlayText += StringFormat::Fmt( - "%.2f MB overhead\n", - float(blasStats.overheadBytes + tlasStats.overheadBytes) / 1048576.0f); + "%.2f MB overhead, %.2f MB (%u BLAS %u TLAS) cached to disk\n", + float(blasStats.overheadBytes + tlasStats.overheadBytes) / 1048576.0f, + float(blasStats.diskBytes + tlasStats.diskBytes) / 1048576.0f, blasStats.diskCached, + tlasStats.diskCached); } -#endif m_TextRenderer->RenderText(list, 0.0f, 0.0f, overlayText); @@ -2698,7 +2697,7 @@ void WrappedID3D12Device::StartFrameCapture(DeviceOwnedWindow devWnd) GPUSyncAllQueues(); // wait until we've synced all queues to check for these - GetResourceManager()->GetRTManager()->CheckPendingASBuilds(); + GetResourceManager()->GetRTManager()->TickASManagement(); GetResourceManager()->PrepareInitialContents(); diff --git a/renderdoc/driver/d3d12/d3d12_initstate.cpp b/renderdoc/driver/d3d12/d3d12_initstate.cpp index a9b2f32f4..a6b50a211 100644 --- a/renderdoc/driver/d3d12/d3d12_initstate.cpp +++ b/renderdoc/driver/d3d12/d3d12_initstate.cpp @@ -616,6 +616,8 @@ uint64_t D3D12ResourceManager::GetSize_InitialState(ResourceId id, const D3D12In if(buildData->buffer) ret += 64 + buildData->buffer->Size(); + ret += 64 + buildData->bytesOnDisk; + return ret; } } @@ -1364,6 +1366,10 @@ bool D3D12ResourceManager::Serialise_InitialState(SerialiserType &ser, ResourceI ret = false; } } + else if(!initial->buildData->filename.empty()) + { + ContentsLength = initial->buildData->bytesOnDisk; + } buildData = initial->buildData; } @@ -1422,11 +1428,20 @@ bool D3D12ResourceManager::Serialise_InitialState(SerialiserType &ser, ResourceI BufferContents = tempAlloc = new byte[(size_t)ContentsLength]; } - // not using SERIALISE_ELEMENT_ARRAY so we can deliberately avoid allocation - we serialise - // directly into already allocated memory (either directly upload memory for BLAS, or - // temporary memory to patch for TLASs) - ser.Serialise("BufferContents"_lit, BufferContents, ContentsLength, SerialiserFlags::NoFlags) - .Important(); + if(!buildData->filename.empty() && ser.IsWriting()) + { + StreamReader reader(FileIO::fopen(buildData->filename, FileIO::ReadBinary)); + + ser.SerialiseStream("BufferContents"_lit, reader); + } + else + { + // not using SERIALISE_ELEMENT_ARRAY so we can deliberately avoid allocation - we serialise + // directly into already allocated memory (either directly upload memory for BLAS, or + // temporary memory to patch for TLASs) + ser.Serialise("BufferContents"_lit, BufferContents, ContentsLength, SerialiserFlags::NoFlags) + .Important(); + } if(buildData) { diff --git a/renderdoc/driver/d3d12/d3d12_manager.cpp b/renderdoc/driver/d3d12/d3d12_manager.cpp index 5920482e1..e37221a3b 100644 --- a/renderdoc/driver/d3d12/d3d12_manager.cpp +++ b/renderdoc/driver/d3d12/d3d12_manager.cpp @@ -27,6 +27,7 @@ #include "core/settings.h" #include "driver/dx/official/d3dcompiler.h" #include "driver/dxgi/dxgi_common.h" +#include "strings/string_utils.h" #include "d3d12_command_list.h" #include "d3d12_command_queue.h" #include "d3d12_device.h" @@ -45,6 +46,15 @@ RDOC_CONFIG( uint32_t, D3D12_Debug_RTMaxVertexPercentIncrease, 10, "Percentage increase for the API-provided max vertex when building a BLAS with an index " "buffer, to account for incorrectly set values by application."); +RDOC_CONFIG(uint32_t, D3D12_Debug_RTASCacheThreshold, 5000, + "How many milliseconds to wait before caching an AS to disk if it has been unmodified " + "for that long"); + +// batch 50 at a time, if we have one check per frame this would cache 5000 BLASs in 100 frames +// which is a reasonable background pace +RDOC_CONFIG(uint32_t, D3D12_Debug_RTASCacheBatchSize, 50, + "The maximum number of ASs to cache to disk in a single batch (batch processing " + "happens at indeterminate intervals but no more than once per submission"); void D3D12Descriptor::Init(const D3D12_SAMPLER_DESC2 *pDesc) { @@ -810,6 +820,104 @@ void D3D12RTManager::AddPendingASBuilds(ID3D12Fence *fence, UINT64 waitValue, } } +void D3D12RTManager::TickASManagement() +{ + CheckPendingASBuilds(); + CheckASCaching(); +} + +void D3D12RTManager::CheckASCaching() +{ + double now = m_Timestamp.GetMilliseconds(); + + SCOPED_LOCK(m_ASBuildDataLock); + + const uint32_t ageThreshold = D3D12_Debug_RTASCacheThreshold(); + const size_t maxCacheBatch = D3D12_Debug_RTASCacheBatchSize(); + + // see if any AS builds are finished and old enough that we should flush them to disk. + // to avoid doing too much work at a time we do these in batches of up to N. They're pushed in + // order so the first one is oldest. We don't care too much about completion (there may be a + // slight gap between record/create and submission, but that will be dominated by the time + // between submission and it being old enough) but we don't want an AS which is built but never + // submitted or destroyed and stays potential forever to block caching, so we skip over any such + // ASs and start from the first old-enough AS. + size_t first = ~0U; + for(size_t i = 0; i < m_InMemASBuildDatas.size(); i++) + { + ASBuildData *buildData = m_InMemASBuildDatas[i]; + + uint32_t age = uint32_t(now - buildData->timestamp); + + // if we encounter one that is too young, bail out as all later ones will be too young as well + if(age < ageThreshold) + break; + + // skip any that are somehow old enough but not complete + if(!buildData->IsWorkComplete()) + continue; + + // this build is both complete and old enough, store + first = i; + } + + // if we didn't find one at all, stop now. + if(first == ~0U) + return; + + // the build data at [first] is both old enough to be cached and complete! we take a few more - + // up to a small batch at a time. + size_t last; + for(last = first; last < m_InMemASBuildDatas.size() && last < first + maxCacheBatch; last++) + { + ASBuildData *buildData = m_InMemASBuildDatas[last]; + + uint32_t age = uint32_t(now - buildData->timestamp); + + // as soon as we find a build which is either too new or not complete, we're finished. + if(age < ageThreshold || !buildData->IsWorkComplete()) + { + // decrement last now so that it is inclusive of the range. We know [first] will have passed + // because it can only have gotten older + last--; + break; + } + } + + // if the whole list was old then last could be pointing off the end + if(last == m_InMemASBuildDatas.size()) + last--; + + // whether there were more to batch or not, last is the last element (and may be equal to first) + + for(size_t i = first; i <= last; i++) + { + ASBuildData *buildData = m_InMemASBuildDatas[i]; + + RDCDEBUG("Flushing AS build data of size %llu to disk", buildData->buffer->Size()); + + // de-interleave positions in geoms here if their stride is greater than vertex format? + buildData->filename = StringFormat::Fmt( + "%s/rdoc_as_%llu_%llu.bin", get_dirname(RenderDoc::Inst().GetCaptureFileTemplate()).c_str(), + Timing::GetTick(), Threading::GetCurrentID()); + buildData->bytesOnDisk = buildData->buffer->Size(); + FileIO::CreateParentDirectory(buildData->filename); + + { + StreamWriter writer(FileIO::fopen(buildData->filename, FileIO::WriteBinary), Ownership::Stream); + writer.Write(buildData->buffer->Map(), buildData->buffer->Size()); + } + + buildData->buffer->Unmap(); + SAFE_RELEASE(buildData->buffer); + + m_DiskCachedASBuildDatas.push_back(buildData); + } + + // remove the build datas that we've processed + m_InMemASBuildDatas.erase(first, last - first + 1); +} + void D3D12RTManager::CheckPendingASBuilds() { std::map fenceValues; @@ -836,6 +944,61 @@ void D3D12RTManager::CheckPendingASBuilds() m_PendingASBuilds.removeIf([](const PendingASBuild &build) { return build.fence == NULL; }); } +void D3D12RTManager::GatherASAgeStatistics(ASStats &blasAges, ASStats &tlasAges) +{ + double now = m_Timestamp.GetMilliseconds(); + + SCOPED_LOCK(m_ASBuildDataLock); + + blasAges.bucket[0].msThreshold = tlasAges.bucket[0].msThreshold = 50; + blasAges.bucket[1].msThreshold = tlasAges.bucket[1].msThreshold = 250; + blasAges.bucket[2].msThreshold = tlasAges.bucket[2].msThreshold = 2000; + blasAges.bucket[3].msThreshold = tlasAges.bucket[3].msThreshold = ~0U; + + for(ASBuildData *buildData : m_DiskCachedASBuildDatas) + { + if(buildData && !buildData->filename.empty()) + { + ASStats &ages = buildData->Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL + ? tlasAges + : blasAges; + + ages.diskBytes += buildData->bytesOnDisk; + ages.diskCached++; + } + } + + for(ASBuildData *buildData : m_InMemASBuildDatas) + { + if(buildData) + { + uint32_t age = uint32_t(now - buildData->timestamp); + + ASStats &ages = buildData->Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL + ? tlasAges + : blasAges; + + // should never encounter this + if(!buildData->filename.empty()) + continue; + + uint64_t size = buildData->buffer ? buildData->buffer->Size() : 0; + + ages.overheadBytes += buildData->bytesOverhead; + + for(size_t i = 0; i < ARRAY_COUNT(tlasAges.bucket); i++) + { + if(age <= ages.bucket[i].msThreshold) + { + ages.bucket[i].count++; + ages.bucket[i].bytes += size; + break; + } + } + } + } +} + PatchedRayDispatch D3D12RTManager::PatchRayDispatch(ID3D12GraphicsCommandList4 *unwrappedCmd, rdcarray heaps, const D3D12_DISPATCH_RAYS_DESC &desc) @@ -1281,6 +1444,7 @@ ASBuildData *D3D12RTManager::CopyBuildInputs( ret->Type = inputs.Type; ret->Flags = inputs.Flags; ret->timestamp = m_Timestamp.GetMilliseconds(); + ret->rtManager = this; if(inputs.Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL) { @@ -1565,6 +1729,14 @@ ASBuildData *D3D12RTManager::CopyBuildInputs( barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; unwrappedCmd->ResourceBarrier(1, &barrier); + // only bother tracking build data with a buffer attached, as without the buffer there is nothing + // to cache and we don't care too much about missing stats for empty/degenerate ASs + if(ret->buffer) + { + SCOPED_LOCK(m_ASBuildDataLock); + m_InMemASBuildDatas.push_back(ret); + } + return ret; } @@ -2896,59 +3068,14 @@ void ASBuildData::Release() unsigned int ret = InterlockedDecrement(&m_RefCount); if(ret == 0) { - { -#if ENABLED(RDOC_DEVEL) - SCOPED_WRITELOCK(dataslock); - datas.removeOne(this); -#endif - } + if(rtManager) + rtManager->RemoveASBuildData(this); SAFE_RELEASE(buffer); + if(!filename.empty()) + FileIO::Delete(filename); + delete this; } } - -#if ENABLED(RDOC_DEVEL) -Threading::RWLock ASBuildData::dataslock; -rdcarray ASBuildData::datas; -#endif - -void ASBuildData::GatherASAgeStatistics(D3D12ResourceManager *rm, double now, ASStats &blasAges, - ASStats &tlasAges) -{ -#if ENABLED(RDOC_DEVEL) - SCOPED_READLOCK(dataslock); - - blasAges.bucket[0].msThreshold = tlasAges.bucket[0].msThreshold = 50; - blasAges.bucket[1].msThreshold = tlasAges.bucket[1].msThreshold = 500; - blasAges.bucket[2].msThreshold = tlasAges.bucket[2].msThreshold = 5000; - blasAges.bucket[3].msThreshold = tlasAges.bucket[3].msThreshold = ~0U; - - for(ASBuildData *buildData : datas) - { - if(buildData) - { - uint32_t age = uint32_t(now - buildData->timestamp); - - ASStats &ages = buildData->Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL - ? tlasAges - : blasAges; - - uint64_t size = buildData->buffer ? buildData->buffer->Size() : 0; - - ages.overheadBytes += buildData->bytesOverhead; - - for(size_t i = 0; i < ARRAY_COUNT(tlasAges.bucket); i++) - { - if(age <= ages.bucket[i].msThreshold) - { - ages.bucket[i].count++; - ages.bucket[i].bytes += size; - break; - } - } - } - } -#endif -} diff --git a/renderdoc/driver/d3d12/d3d12_manager.h b/renderdoc/driver/d3d12/d3d12_manager.h index 306049b9d..15b48c4f1 100644 --- a/renderdoc/driver/d3d12/d3d12_manager.h +++ b/renderdoc/driver/d3d12/d3d12_manager.h @@ -1056,6 +1056,8 @@ struct ASStats } bucket[4]; uint64_t overheadBytes; + uint64_t diskBytes; + uint32_t diskCached; }; // this is a refcounted GPU buffer with the build data, together with the metadata @@ -1132,20 +1134,18 @@ struct ASBuildData void Release(); D3D12GpuBuffer *buffer = NULL; - - static void GatherASAgeStatistics(D3D12ResourceManager *rm, double now, ASStats &blasAges, - ASStats &tlasAges); + rdcstr filename; + uint64_t bytesOnDisk = 0; std::function cleanupCallback; private: - ASBuildData() - { -#if ENABLED(RDOC_DEVEL) - SCOPED_WRITELOCK(dataslock); - datas.push_back(this); -#endif - } + ASBuildData() = default; + + friend class D3D12RTManager; + friend class D3D12ResourceManager; + + D3D12RTManager *rtManager = NULL; // timestamp this build data was recorded on double timestamp = 0; @@ -1157,14 +1157,6 @@ private: uint64_t bytesOverhead = 0; unsigned int m_RefCount = 1; - - friend class D3D12RTManager; - friend class D3D12ResourceManager; - -#if ENABLED(RDOC_DEVEL) - static Threading::RWLock dataslock; - static rdcarray datas; -#endif }; DECLARE_REFLECTION_STRUCT(ASBuildData::RVAWithStride); @@ -1218,6 +1210,16 @@ public: ASBuildData *CopyBuildInputs(ID3D12GraphicsCommandList4 *unwrappedCmd, const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS &inputs); + void RemoveASBuildData(ASBuildData *data) + { + SCOPED_LOCK(m_ASBuildDataLock); + if(data->buffer) + m_InMemASBuildDatas.removeOne(data); + else + m_DiskCachedASBuildDatas.removeOne(data); + } + + void GatherASAgeStatistics(ASStats &blasAges, ASStats &tlasAges); D3D12GpuBuffer *UnrollBLASInstancesList( ID3D12GraphicsCommandList4 *unwrappedCmd, @@ -1237,7 +1239,7 @@ public: void AddPendingASBuilds(ID3D12Fence *fence, UINT64 waitValue, const rdcarray> &callbacks); - void CheckPendingASBuilds(); + void TickASManagement(); void ResizeSerialisationBuffer(UINT64 ScratchDataSizeInBytes); @@ -1254,6 +1256,9 @@ private: void InitTLASInstanceCopyingResources(); void InitReplayBlasPatchingResources(); + void CheckASCaching(); + void CheckPendingASBuilds(); + void CopyFromVA(ID3D12GraphicsCommandList4 *unwrappedCmd, ID3D12Resource *dstRes, uint64_t dstOffset, D3D12_GPU_VIRTUAL_ADDRESS sourceVA, uint64_t byteSize); @@ -1284,6 +1289,10 @@ private: // export databases that are alive rdcarray m_ExportDatabases; + Threading::CriticalSection m_ASBuildDataLock; + rdcarray m_InMemASBuildDatas; + rdcarray m_DiskCachedASBuildDatas; + // is the lookup buffer dirty and needs to be recreated with the latest data? bool m_LookupBufferDirty = true; diff --git a/renderdoc/serialise/serialiser.h b/renderdoc/serialise/serialiser.h index e88b5d541..9f7d519d0 100644 --- a/renderdoc/serialise/serialiser.h +++ b/renderdoc/serialise/serialiser.h @@ -1056,7 +1056,9 @@ public: Serialiser &SerialiseStream(const rdcstr &name, StreamReader &stream, RENDERDOC_ProgressCallback progress = RENDERDOC_ProgressCallback()) { - RDCCOMPILE_ASSERT(IsWriting(), "Can't read into a StreamReader"); + // we don't make this a compile-time assert so this code can be compiled in a + // templated-serialisation function (but we still assert as this will not be valid to execute + RDCASSERTMSG("Can't read into a StreamReader", IsWriting()); uint64_t totalSize = stream.GetSize();