Cache ASs to disk after a number of seconds

This commit is contained in:
baldurk
2024-11-15 16:47:45 +00:00
parent 5a9f200f94
commit e19869239b
6 changed files with 235 additions and 83 deletions
@@ -786,7 +786,7 @@ void WrappedID3D12CommandQueue::ExecuteCommandListsInternal(UINT NumCommandLists
}
// check AS builds now
GetResourceManager()->GetRTManager()->CheckPendingASBuilds();
GetResourceManager()->GetRTManager()->TickASManagement();
}
if(IsCaptureMode(m_State))
+6 -7
View File
@@ -2440,13 +2440,11 @@ HRESULT WrappedID3D12Device::Present(ID3D12GraphicsCommandList *pOverlayCommandL
rdcstr overlayText =
RenderDoc::Inst().GetOverlayText(RDCDriver::D3D12, devWnd, m_FrameCounter, 0);
#if ENABLED(RDOC_DEVEL)
if(D3D12_Debug_RTOverlay() && m_UsedRT)
{
double now = GetResourceManager()->GetRTManager()->GetCurrentASTimestamp();
ASStats blasStats = {}, tlasStats = {};
ASBuildData::GatherASAgeStatistics(GetResourceManager(), now, blasStats, tlasStats);
GetResourceManager()->GetRTManager()->GatherASAgeStatistics(blasStats, tlasStats);
overlayText += " TLAS BLAS\n";
@@ -2464,10 +2462,11 @@ HRESULT WrappedID3D12Device::Present(ID3D12GraphicsCommandList *pOverlayCommandL
}
overlayText += StringFormat::Fmt(
"%.2f MB overhead\n",
float(blasStats.overheadBytes + tlasStats.overheadBytes) / 1048576.0f);
"%.2f MB overhead, %.2f MB (%u BLAS %u TLAS) cached to disk\n",
float(blasStats.overheadBytes + tlasStats.overheadBytes) / 1048576.0f,
float(blasStats.diskBytes + tlasStats.diskBytes) / 1048576.0f, blasStats.diskCached,
tlasStats.diskCached);
}
#endif
m_TextRenderer->RenderText(list, 0.0f, 0.0f, overlayText);
@@ -2698,7 +2697,7 @@ void WrappedID3D12Device::StartFrameCapture(DeviceOwnedWindow devWnd)
GPUSyncAllQueues();
// wait until we've synced all queues to check for these
GetResourceManager()->GetRTManager()->CheckPendingASBuilds();
GetResourceManager()->GetRTManager()->TickASManagement();
GetResourceManager()->PrepareInitialContents();
+20 -5
View File
@@ -616,6 +616,8 @@ uint64_t D3D12ResourceManager::GetSize_InitialState(ResourceId id, const D3D12In
if(buildData->buffer)
ret += 64 + buildData->buffer->Size();
ret += 64 + buildData->bytesOnDisk;
return ret;
}
}
@@ -1364,6 +1366,10 @@ bool D3D12ResourceManager::Serialise_InitialState(SerialiserType &ser, ResourceI
ret = false;
}
}
else if(!initial->buildData->filename.empty())
{
ContentsLength = initial->buildData->bytesOnDisk;
}
buildData = initial->buildData;
}
@@ -1422,11 +1428,20 @@ bool D3D12ResourceManager::Serialise_InitialState(SerialiserType &ser, ResourceI
BufferContents = tempAlloc = new byte[(size_t)ContentsLength];
}
// not using SERIALISE_ELEMENT_ARRAY so we can deliberately avoid allocation - we serialise
// directly into already allocated memory (either directly upload memory for BLAS, or
// temporary memory to patch for TLASs)
ser.Serialise("BufferContents"_lit, BufferContents, ContentsLength, SerialiserFlags::NoFlags)
.Important();
if(!buildData->filename.empty() && ser.IsWriting())
{
StreamReader reader(FileIO::fopen(buildData->filename, FileIO::ReadBinary));
ser.SerialiseStream("BufferContents"_lit, reader);
}
else
{
// not using SERIALISE_ELEMENT_ARRAY so we can deliberately avoid allocation - we serialise
// directly into already allocated memory (either directly upload memory for BLAS, or
// temporary memory to patch for TLASs)
ser.Serialise("BufferContents"_lit, BufferContents, ContentsLength, SerialiserFlags::NoFlags)
.Important();
}
if(buildData)
{
+177 -50
View File
@@ -27,6 +27,7 @@
#include "core/settings.h"
#include "driver/dx/official/d3dcompiler.h"
#include "driver/dxgi/dxgi_common.h"
#include "strings/string_utils.h"
#include "d3d12_command_list.h"
#include "d3d12_command_queue.h"
#include "d3d12_device.h"
@@ -45,6 +46,15 @@ RDOC_CONFIG(
uint32_t, D3D12_Debug_RTMaxVertexPercentIncrease, 10,
"Percentage increase for the API-provided max vertex when building a BLAS with an index "
"buffer, to account for incorrectly set values by application.");
RDOC_CONFIG(uint32_t, D3D12_Debug_RTASCacheThreshold, 5000,
"How many milliseconds to wait before caching an AS to disk if it has been unmodified "
"for that long");
// batch 50 at a time, if we have one check per frame this would cache 5000 BLASs in 100 frames
// which is a reasonable background pace
RDOC_CONFIG(uint32_t, D3D12_Debug_RTASCacheBatchSize, 50,
"The maximum number of ASs to cache to disk in a single batch (batch processing "
"happens at indeterminate intervals but no more than once per submission");
void D3D12Descriptor::Init(const D3D12_SAMPLER_DESC2 *pDesc)
{
@@ -810,6 +820,104 @@ void D3D12RTManager::AddPendingASBuilds(ID3D12Fence *fence, UINT64 waitValue,
}
}
void D3D12RTManager::TickASManagement()
{
CheckPendingASBuilds();
CheckASCaching();
}
void D3D12RTManager::CheckASCaching()
{
double now = m_Timestamp.GetMilliseconds();
SCOPED_LOCK(m_ASBuildDataLock);
const uint32_t ageThreshold = D3D12_Debug_RTASCacheThreshold();
const size_t maxCacheBatch = D3D12_Debug_RTASCacheBatchSize();
// see if any AS builds are finished and old enough that we should flush them to disk.
// to avoid doing too much work at a time we do these in batches of up to N. They're pushed in
// order so the first one is oldest. We don't care too much about completion (there may be a
// slight gap between record/create and submission, but that will be dominated by the time
// between submission and it being old enough) but we don't want an AS which is built but never
// submitted or destroyed and stays potential forever to block caching, so we skip over any such
// ASs and start from the first old-enough AS.
size_t first = ~0U;
for(size_t i = 0; i < m_InMemASBuildDatas.size(); i++)
{
ASBuildData *buildData = m_InMemASBuildDatas[i];
uint32_t age = uint32_t(now - buildData->timestamp);
// if we encounter one that is too young, bail out as all later ones will be too young as well
if(age < ageThreshold)
break;
// skip any that are somehow old enough but not complete
if(!buildData->IsWorkComplete())
continue;
// this build is both complete and old enough, store
first = i;
}
// if we didn't find one at all, stop now.
if(first == ~0U)
return;
// the build data at [first] is both old enough to be cached and complete! we take a few more -
// up to a small batch at a time.
size_t last;
for(last = first; last < m_InMemASBuildDatas.size() && last < first + maxCacheBatch; last++)
{
ASBuildData *buildData = m_InMemASBuildDatas[last];
uint32_t age = uint32_t(now - buildData->timestamp);
// as soon as we find a build which is either too new or not complete, we're finished.
if(age < ageThreshold || !buildData->IsWorkComplete())
{
// decrement last now so that it is inclusive of the range. We know [first] will have passed
// because it can only have gotten older
last--;
break;
}
}
// if the whole list was old then last could be pointing off the end
if(last == m_InMemASBuildDatas.size())
last--;
// whether there were more to batch or not, last is the last element (and may be equal to first)
for(size_t i = first; i <= last; i++)
{
ASBuildData *buildData = m_InMemASBuildDatas[i];
RDCDEBUG("Flushing AS build data of size %llu to disk", buildData->buffer->Size());
// de-interleave positions in geoms here if their stride is greater than vertex format?
buildData->filename = StringFormat::Fmt(
"%s/rdoc_as_%llu_%llu.bin", get_dirname(RenderDoc::Inst().GetCaptureFileTemplate()).c_str(),
Timing::GetTick(), Threading::GetCurrentID());
buildData->bytesOnDisk = buildData->buffer->Size();
FileIO::CreateParentDirectory(buildData->filename);
{
StreamWriter writer(FileIO::fopen(buildData->filename, FileIO::WriteBinary), Ownership::Stream);
writer.Write(buildData->buffer->Map(), buildData->buffer->Size());
}
buildData->buffer->Unmap();
SAFE_RELEASE(buildData->buffer);
m_DiskCachedASBuildDatas.push_back(buildData);
}
// remove the build datas that we've processed
m_InMemASBuildDatas.erase(first, last - first + 1);
}
void D3D12RTManager::CheckPendingASBuilds()
{
std::map<ID3D12Fence *, UINT64> fenceValues;
@@ -836,6 +944,61 @@ void D3D12RTManager::CheckPendingASBuilds()
m_PendingASBuilds.removeIf([](const PendingASBuild &build) { return build.fence == NULL; });
}
void D3D12RTManager::GatherASAgeStatistics(ASStats &blasAges, ASStats &tlasAges)
{
double now = m_Timestamp.GetMilliseconds();
SCOPED_LOCK(m_ASBuildDataLock);
blasAges.bucket[0].msThreshold = tlasAges.bucket[0].msThreshold = 50;
blasAges.bucket[1].msThreshold = tlasAges.bucket[1].msThreshold = 250;
blasAges.bucket[2].msThreshold = tlasAges.bucket[2].msThreshold = 2000;
blasAges.bucket[3].msThreshold = tlasAges.bucket[3].msThreshold = ~0U;
for(ASBuildData *buildData : m_DiskCachedASBuildDatas)
{
if(buildData && !buildData->filename.empty())
{
ASStats &ages = buildData->Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL
? tlasAges
: blasAges;
ages.diskBytes += buildData->bytesOnDisk;
ages.diskCached++;
}
}
for(ASBuildData *buildData : m_InMemASBuildDatas)
{
if(buildData)
{
uint32_t age = uint32_t(now - buildData->timestamp);
ASStats &ages = buildData->Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL
? tlasAges
: blasAges;
// should never encounter this
if(!buildData->filename.empty())
continue;
uint64_t size = buildData->buffer ? buildData->buffer->Size() : 0;
ages.overheadBytes += buildData->bytesOverhead;
for(size_t i = 0; i < ARRAY_COUNT(tlasAges.bucket); i++)
{
if(age <= ages.bucket[i].msThreshold)
{
ages.bucket[i].count++;
ages.bucket[i].bytes += size;
break;
}
}
}
}
}
PatchedRayDispatch D3D12RTManager::PatchRayDispatch(ID3D12GraphicsCommandList4 *unwrappedCmd,
rdcarray<ResourceId> heaps,
const D3D12_DISPATCH_RAYS_DESC &desc)
@@ -1281,6 +1444,7 @@ ASBuildData *D3D12RTManager::CopyBuildInputs(
ret->Type = inputs.Type;
ret->Flags = inputs.Flags;
ret->timestamp = m_Timestamp.GetMilliseconds();
ret->rtManager = this;
if(inputs.Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL)
{
@@ -1565,6 +1729,14 @@ ASBuildData *D3D12RTManager::CopyBuildInputs(
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
unwrappedCmd->ResourceBarrier(1, &barrier);
// only bother tracking build data with a buffer attached, as without the buffer there is nothing
// to cache and we don't care too much about missing stats for empty/degenerate ASs
if(ret->buffer)
{
SCOPED_LOCK(m_ASBuildDataLock);
m_InMemASBuildDatas.push_back(ret);
}
return ret;
}
@@ -2896,59 +3068,14 @@ void ASBuildData::Release()
unsigned int ret = InterlockedDecrement(&m_RefCount);
if(ret == 0)
{
{
#if ENABLED(RDOC_DEVEL)
SCOPED_WRITELOCK(dataslock);
datas.removeOne(this);
#endif
}
if(rtManager)
rtManager->RemoveASBuildData(this);
SAFE_RELEASE(buffer);
if(!filename.empty())
FileIO::Delete(filename);
delete this;
}
}
#if ENABLED(RDOC_DEVEL)
Threading::RWLock ASBuildData::dataslock;
rdcarray<ASBuildData *> ASBuildData::datas;
#endif
void ASBuildData::GatherASAgeStatistics(D3D12ResourceManager *rm, double now, ASStats &blasAges,
ASStats &tlasAges)
{
#if ENABLED(RDOC_DEVEL)
SCOPED_READLOCK(dataslock);
blasAges.bucket[0].msThreshold = tlasAges.bucket[0].msThreshold = 50;
blasAges.bucket[1].msThreshold = tlasAges.bucket[1].msThreshold = 500;
blasAges.bucket[2].msThreshold = tlasAges.bucket[2].msThreshold = 5000;
blasAges.bucket[3].msThreshold = tlasAges.bucket[3].msThreshold = ~0U;
for(ASBuildData *buildData : datas)
{
if(buildData)
{
uint32_t age = uint32_t(now - buildData->timestamp);
ASStats &ages = buildData->Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL
? tlasAges
: blasAges;
uint64_t size = buildData->buffer ? buildData->buffer->Size() : 0;
ages.overheadBytes += buildData->bytesOverhead;
for(size_t i = 0; i < ARRAY_COUNT(tlasAges.bucket); i++)
{
if(age <= ages.bucket[i].msThreshold)
{
ages.bucket[i].count++;
ages.bucket[i].bytes += size;
break;
}
}
}
}
#endif
}
+28 -19
View File
@@ -1056,6 +1056,8 @@ struct ASStats
} bucket[4];
uint64_t overheadBytes;
uint64_t diskBytes;
uint32_t diskCached;
};
// this is a refcounted GPU buffer with the build data, together with the metadata
@@ -1132,20 +1134,18 @@ struct ASBuildData
void Release();
D3D12GpuBuffer *buffer = NULL;
static void GatherASAgeStatistics(D3D12ResourceManager *rm, double now, ASStats &blasAges,
ASStats &tlasAges);
rdcstr filename;
uint64_t bytesOnDisk = 0;
std::function<bool()> cleanupCallback;
private:
ASBuildData()
{
#if ENABLED(RDOC_DEVEL)
SCOPED_WRITELOCK(dataslock);
datas.push_back(this);
#endif
}
ASBuildData() = default;
friend class D3D12RTManager;
friend class D3D12ResourceManager;
D3D12RTManager *rtManager = NULL;
// timestamp this build data was recorded on
double timestamp = 0;
@@ -1157,14 +1157,6 @@ private:
uint64_t bytesOverhead = 0;
unsigned int m_RefCount = 1;
friend class D3D12RTManager;
friend class D3D12ResourceManager;
#if ENABLED(RDOC_DEVEL)
static Threading::RWLock dataslock;
static rdcarray<ASBuildData *> datas;
#endif
};
DECLARE_REFLECTION_STRUCT(ASBuildData::RVAWithStride);
@@ -1218,6 +1210,16 @@ public:
ASBuildData *CopyBuildInputs(ID3D12GraphicsCommandList4 *unwrappedCmd,
const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS &inputs);
void RemoveASBuildData(ASBuildData *data)
{
SCOPED_LOCK(m_ASBuildDataLock);
if(data->buffer)
m_InMemASBuildDatas.removeOne(data);
else
m_DiskCachedASBuildDatas.removeOne(data);
}
void GatherASAgeStatistics(ASStats &blasAges, ASStats &tlasAges);
D3D12GpuBuffer *UnrollBLASInstancesList(
ID3D12GraphicsCommandList4 *unwrappedCmd,
@@ -1237,7 +1239,7 @@ public:
void AddPendingASBuilds(ID3D12Fence *fence, UINT64 waitValue,
const rdcarray<std::function<bool()>> &callbacks);
void CheckPendingASBuilds();
void TickASManagement();
void ResizeSerialisationBuffer(UINT64 ScratchDataSizeInBytes);
@@ -1254,6 +1256,9 @@ private:
void InitTLASInstanceCopyingResources();
void InitReplayBlasPatchingResources();
void CheckASCaching();
void CheckPendingASBuilds();
void CopyFromVA(ID3D12GraphicsCommandList4 *unwrappedCmd, ID3D12Resource *dstRes,
uint64_t dstOffset, D3D12_GPU_VIRTUAL_ADDRESS sourceVA, uint64_t byteSize);
@@ -1284,6 +1289,10 @@ private:
// export databases that are alive
rdcarray<D3D12ShaderExportDatabase *> m_ExportDatabases;
Threading::CriticalSection m_ASBuildDataLock;
rdcarray<ASBuildData *> m_InMemASBuildDatas;
rdcarray<ASBuildData *> m_DiskCachedASBuildDatas;
// is the lookup buffer dirty and needs to be recreated with the latest data?
bool m_LookupBufferDirty = true;
+3 -1
View File
@@ -1056,7 +1056,9 @@ public:
Serialiser &SerialiseStream(const rdcstr &name, StreamReader &stream,
RENDERDOC_ProgressCallback progress = RENDERDOC_ProgressCallback())
{
RDCCOMPILE_ASSERT(IsWriting(), "Can't read into a StreamReader");
// we don't make this a compile-time assert so this code can be compiled in a
// templated-serialisation function (but we still assert as this will not be valid to execute
RDCASSERTMSG("Can't read into a StreamReader", IsWriting());
uint64_t totalSize = stream.GetSize();