mirror of
https://github.com/baldurk/renderdoc.git
synced 2026-05-06 01:50:38 +00:00
Cache ASs to disk after a number of seconds
This commit is contained in:
@@ -786,7 +786,7 @@ void WrappedID3D12CommandQueue::ExecuteCommandListsInternal(UINT NumCommandLists
|
||||
}
|
||||
|
||||
// check AS builds now
|
||||
GetResourceManager()->GetRTManager()->CheckPendingASBuilds();
|
||||
GetResourceManager()->GetRTManager()->TickASManagement();
|
||||
}
|
||||
|
||||
if(IsCaptureMode(m_State))
|
||||
|
||||
@@ -2440,13 +2440,11 @@ HRESULT WrappedID3D12Device::Present(ID3D12GraphicsCommandList *pOverlayCommandL
|
||||
rdcstr overlayText =
|
||||
RenderDoc::Inst().GetOverlayText(RDCDriver::D3D12, devWnd, m_FrameCounter, 0);
|
||||
|
||||
#if ENABLED(RDOC_DEVEL)
|
||||
if(D3D12_Debug_RTOverlay() && m_UsedRT)
|
||||
{
|
||||
double now = GetResourceManager()->GetRTManager()->GetCurrentASTimestamp();
|
||||
ASStats blasStats = {}, tlasStats = {};
|
||||
|
||||
ASBuildData::GatherASAgeStatistics(GetResourceManager(), now, blasStats, tlasStats);
|
||||
GetResourceManager()->GetRTManager()->GatherASAgeStatistics(blasStats, tlasStats);
|
||||
|
||||
overlayText += " TLAS BLAS\n";
|
||||
|
||||
@@ -2464,10 +2462,11 @@ HRESULT WrappedID3D12Device::Present(ID3D12GraphicsCommandList *pOverlayCommandL
|
||||
}
|
||||
|
||||
overlayText += StringFormat::Fmt(
|
||||
"%.2f MB overhead\n",
|
||||
float(blasStats.overheadBytes + tlasStats.overheadBytes) / 1048576.0f);
|
||||
"%.2f MB overhead, %.2f MB (%u BLAS %u TLAS) cached to disk\n",
|
||||
float(blasStats.overheadBytes + tlasStats.overheadBytes) / 1048576.0f,
|
||||
float(blasStats.diskBytes + tlasStats.diskBytes) / 1048576.0f, blasStats.diskCached,
|
||||
tlasStats.diskCached);
|
||||
}
|
||||
#endif
|
||||
|
||||
m_TextRenderer->RenderText(list, 0.0f, 0.0f, overlayText);
|
||||
|
||||
@@ -2698,7 +2697,7 @@ void WrappedID3D12Device::StartFrameCapture(DeviceOwnedWindow devWnd)
|
||||
GPUSyncAllQueues();
|
||||
|
||||
// wait until we've synced all queues to check for these
|
||||
GetResourceManager()->GetRTManager()->CheckPendingASBuilds();
|
||||
GetResourceManager()->GetRTManager()->TickASManagement();
|
||||
|
||||
GetResourceManager()->PrepareInitialContents();
|
||||
|
||||
|
||||
@@ -616,6 +616,8 @@ uint64_t D3D12ResourceManager::GetSize_InitialState(ResourceId id, const D3D12In
|
||||
if(buildData->buffer)
|
||||
ret += 64 + buildData->buffer->Size();
|
||||
|
||||
ret += 64 + buildData->bytesOnDisk;
|
||||
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
@@ -1364,6 +1366,10 @@ bool D3D12ResourceManager::Serialise_InitialState(SerialiserType &ser, ResourceI
|
||||
ret = false;
|
||||
}
|
||||
}
|
||||
else if(!initial->buildData->filename.empty())
|
||||
{
|
||||
ContentsLength = initial->buildData->bytesOnDisk;
|
||||
}
|
||||
|
||||
buildData = initial->buildData;
|
||||
}
|
||||
@@ -1422,11 +1428,20 @@ bool D3D12ResourceManager::Serialise_InitialState(SerialiserType &ser, ResourceI
|
||||
BufferContents = tempAlloc = new byte[(size_t)ContentsLength];
|
||||
}
|
||||
|
||||
// not using SERIALISE_ELEMENT_ARRAY so we can deliberately avoid allocation - we serialise
|
||||
// directly into already allocated memory (either directly upload memory for BLAS, or
|
||||
// temporary memory to patch for TLASs)
|
||||
ser.Serialise("BufferContents"_lit, BufferContents, ContentsLength, SerialiserFlags::NoFlags)
|
||||
.Important();
|
||||
if(!buildData->filename.empty() && ser.IsWriting())
|
||||
{
|
||||
StreamReader reader(FileIO::fopen(buildData->filename, FileIO::ReadBinary));
|
||||
|
||||
ser.SerialiseStream("BufferContents"_lit, reader);
|
||||
}
|
||||
else
|
||||
{
|
||||
// not using SERIALISE_ELEMENT_ARRAY so we can deliberately avoid allocation - we serialise
|
||||
// directly into already allocated memory (either directly upload memory for BLAS, or
|
||||
// temporary memory to patch for TLASs)
|
||||
ser.Serialise("BufferContents"_lit, BufferContents, ContentsLength, SerialiserFlags::NoFlags)
|
||||
.Important();
|
||||
}
|
||||
|
||||
if(buildData)
|
||||
{
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#include "core/settings.h"
|
||||
#include "driver/dx/official/d3dcompiler.h"
|
||||
#include "driver/dxgi/dxgi_common.h"
|
||||
#include "strings/string_utils.h"
|
||||
#include "d3d12_command_list.h"
|
||||
#include "d3d12_command_queue.h"
|
||||
#include "d3d12_device.h"
|
||||
@@ -45,6 +46,15 @@ RDOC_CONFIG(
|
||||
uint32_t, D3D12_Debug_RTMaxVertexPercentIncrease, 10,
|
||||
"Percentage increase for the API-provided max vertex when building a BLAS with an index "
|
||||
"buffer, to account for incorrectly set values by application.");
|
||||
RDOC_CONFIG(uint32_t, D3D12_Debug_RTASCacheThreshold, 5000,
|
||||
"How many milliseconds to wait before caching an AS to disk if it has been unmodified "
|
||||
"for that long");
|
||||
|
||||
// batch 50 at a time, if we have one check per frame this would cache 5000 BLASs in 100 frames
|
||||
// which is a reasonable background pace
|
||||
RDOC_CONFIG(uint32_t, D3D12_Debug_RTASCacheBatchSize, 50,
|
||||
"The maximum number of ASs to cache to disk in a single batch (batch processing "
|
||||
"happens at indeterminate intervals but no more than once per submission");
|
||||
|
||||
void D3D12Descriptor::Init(const D3D12_SAMPLER_DESC2 *pDesc)
|
||||
{
|
||||
@@ -810,6 +820,104 @@ void D3D12RTManager::AddPendingASBuilds(ID3D12Fence *fence, UINT64 waitValue,
|
||||
}
|
||||
}
|
||||
|
||||
void D3D12RTManager::TickASManagement()
|
||||
{
|
||||
CheckPendingASBuilds();
|
||||
CheckASCaching();
|
||||
}
|
||||
|
||||
void D3D12RTManager::CheckASCaching()
|
||||
{
|
||||
double now = m_Timestamp.GetMilliseconds();
|
||||
|
||||
SCOPED_LOCK(m_ASBuildDataLock);
|
||||
|
||||
const uint32_t ageThreshold = D3D12_Debug_RTASCacheThreshold();
|
||||
const size_t maxCacheBatch = D3D12_Debug_RTASCacheBatchSize();
|
||||
|
||||
// see if any AS builds are finished and old enough that we should flush them to disk.
|
||||
// to avoid doing too much work at a time we do these in batches of up to N. They're pushed in
|
||||
// order so the first one is oldest. We don't care too much about completion (there may be a
|
||||
// slight gap between record/create and submission, but that will be dominated by the time
|
||||
// between submission and it being old enough) but we don't want an AS which is built but never
|
||||
// submitted or destroyed and stays potential forever to block caching, so we skip over any such
|
||||
// ASs and start from the first old-enough AS.
|
||||
size_t first = ~0U;
|
||||
for(size_t i = 0; i < m_InMemASBuildDatas.size(); i++)
|
||||
{
|
||||
ASBuildData *buildData = m_InMemASBuildDatas[i];
|
||||
|
||||
uint32_t age = uint32_t(now - buildData->timestamp);
|
||||
|
||||
// if we encounter one that is too young, bail out as all later ones will be too young as well
|
||||
if(age < ageThreshold)
|
||||
break;
|
||||
|
||||
// skip any that are somehow old enough but not complete
|
||||
if(!buildData->IsWorkComplete())
|
||||
continue;
|
||||
|
||||
// this build is both complete and old enough, store
|
||||
first = i;
|
||||
}
|
||||
|
||||
// if we didn't find one at all, stop now.
|
||||
if(first == ~0U)
|
||||
return;
|
||||
|
||||
// the build data at [first] is both old enough to be cached and complete! we take a few more -
|
||||
// up to a small batch at a time.
|
||||
size_t last;
|
||||
for(last = first; last < m_InMemASBuildDatas.size() && last < first + maxCacheBatch; last++)
|
||||
{
|
||||
ASBuildData *buildData = m_InMemASBuildDatas[last];
|
||||
|
||||
uint32_t age = uint32_t(now - buildData->timestamp);
|
||||
|
||||
// as soon as we find a build which is either too new or not complete, we're finished.
|
||||
if(age < ageThreshold || !buildData->IsWorkComplete())
|
||||
{
|
||||
// decrement last now so that it is inclusive of the range. We know [first] will have passed
|
||||
// because it can only have gotten older
|
||||
last--;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// if the whole list was old then last could be pointing off the end
|
||||
if(last == m_InMemASBuildDatas.size())
|
||||
last--;
|
||||
|
||||
// whether there were more to batch or not, last is the last element (and may be equal to first)
|
||||
|
||||
for(size_t i = first; i <= last; i++)
|
||||
{
|
||||
ASBuildData *buildData = m_InMemASBuildDatas[i];
|
||||
|
||||
RDCDEBUG("Flushing AS build data of size %llu to disk", buildData->buffer->Size());
|
||||
|
||||
// de-interleave positions in geoms here if their stride is greater than vertex format?
|
||||
buildData->filename = StringFormat::Fmt(
|
||||
"%s/rdoc_as_%llu_%llu.bin", get_dirname(RenderDoc::Inst().GetCaptureFileTemplate()).c_str(),
|
||||
Timing::GetTick(), Threading::GetCurrentID());
|
||||
buildData->bytesOnDisk = buildData->buffer->Size();
|
||||
FileIO::CreateParentDirectory(buildData->filename);
|
||||
|
||||
{
|
||||
StreamWriter writer(FileIO::fopen(buildData->filename, FileIO::WriteBinary), Ownership::Stream);
|
||||
writer.Write(buildData->buffer->Map(), buildData->buffer->Size());
|
||||
}
|
||||
|
||||
buildData->buffer->Unmap();
|
||||
SAFE_RELEASE(buildData->buffer);
|
||||
|
||||
m_DiskCachedASBuildDatas.push_back(buildData);
|
||||
}
|
||||
|
||||
// remove the build datas that we've processed
|
||||
m_InMemASBuildDatas.erase(first, last - first + 1);
|
||||
}
|
||||
|
||||
void D3D12RTManager::CheckPendingASBuilds()
|
||||
{
|
||||
std::map<ID3D12Fence *, UINT64> fenceValues;
|
||||
@@ -836,6 +944,61 @@ void D3D12RTManager::CheckPendingASBuilds()
|
||||
m_PendingASBuilds.removeIf([](const PendingASBuild &build) { return build.fence == NULL; });
|
||||
}
|
||||
|
||||
void D3D12RTManager::GatherASAgeStatistics(ASStats &blasAges, ASStats &tlasAges)
|
||||
{
|
||||
double now = m_Timestamp.GetMilliseconds();
|
||||
|
||||
SCOPED_LOCK(m_ASBuildDataLock);
|
||||
|
||||
blasAges.bucket[0].msThreshold = tlasAges.bucket[0].msThreshold = 50;
|
||||
blasAges.bucket[1].msThreshold = tlasAges.bucket[1].msThreshold = 250;
|
||||
blasAges.bucket[2].msThreshold = tlasAges.bucket[2].msThreshold = 2000;
|
||||
blasAges.bucket[3].msThreshold = tlasAges.bucket[3].msThreshold = ~0U;
|
||||
|
||||
for(ASBuildData *buildData : m_DiskCachedASBuildDatas)
|
||||
{
|
||||
if(buildData && !buildData->filename.empty())
|
||||
{
|
||||
ASStats &ages = buildData->Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL
|
||||
? tlasAges
|
||||
: blasAges;
|
||||
|
||||
ages.diskBytes += buildData->bytesOnDisk;
|
||||
ages.diskCached++;
|
||||
}
|
||||
}
|
||||
|
||||
for(ASBuildData *buildData : m_InMemASBuildDatas)
|
||||
{
|
||||
if(buildData)
|
||||
{
|
||||
uint32_t age = uint32_t(now - buildData->timestamp);
|
||||
|
||||
ASStats &ages = buildData->Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL
|
||||
? tlasAges
|
||||
: blasAges;
|
||||
|
||||
// should never encounter this
|
||||
if(!buildData->filename.empty())
|
||||
continue;
|
||||
|
||||
uint64_t size = buildData->buffer ? buildData->buffer->Size() : 0;
|
||||
|
||||
ages.overheadBytes += buildData->bytesOverhead;
|
||||
|
||||
for(size_t i = 0; i < ARRAY_COUNT(tlasAges.bucket); i++)
|
||||
{
|
||||
if(age <= ages.bucket[i].msThreshold)
|
||||
{
|
||||
ages.bucket[i].count++;
|
||||
ages.bucket[i].bytes += size;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PatchedRayDispatch D3D12RTManager::PatchRayDispatch(ID3D12GraphicsCommandList4 *unwrappedCmd,
|
||||
rdcarray<ResourceId> heaps,
|
||||
const D3D12_DISPATCH_RAYS_DESC &desc)
|
||||
@@ -1281,6 +1444,7 @@ ASBuildData *D3D12RTManager::CopyBuildInputs(
|
||||
ret->Type = inputs.Type;
|
||||
ret->Flags = inputs.Flags;
|
||||
ret->timestamp = m_Timestamp.GetMilliseconds();
|
||||
ret->rtManager = this;
|
||||
|
||||
if(inputs.Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL)
|
||||
{
|
||||
@@ -1565,6 +1729,14 @@ ASBuildData *D3D12RTManager::CopyBuildInputs(
|
||||
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
|
||||
unwrappedCmd->ResourceBarrier(1, &barrier);
|
||||
|
||||
// only bother tracking build data with a buffer attached, as without the buffer there is nothing
|
||||
// to cache and we don't care too much about missing stats for empty/degenerate ASs
|
||||
if(ret->buffer)
|
||||
{
|
||||
SCOPED_LOCK(m_ASBuildDataLock);
|
||||
m_InMemASBuildDatas.push_back(ret);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -2896,59 +3068,14 @@ void ASBuildData::Release()
|
||||
unsigned int ret = InterlockedDecrement(&m_RefCount);
|
||||
if(ret == 0)
|
||||
{
|
||||
{
|
||||
#if ENABLED(RDOC_DEVEL)
|
||||
SCOPED_WRITELOCK(dataslock);
|
||||
datas.removeOne(this);
|
||||
#endif
|
||||
}
|
||||
if(rtManager)
|
||||
rtManager->RemoveASBuildData(this);
|
||||
|
||||
SAFE_RELEASE(buffer);
|
||||
|
||||
if(!filename.empty())
|
||||
FileIO::Delete(filename);
|
||||
|
||||
delete this;
|
||||
}
|
||||
}
|
||||
|
||||
#if ENABLED(RDOC_DEVEL)
|
||||
Threading::RWLock ASBuildData::dataslock;
|
||||
rdcarray<ASBuildData *> ASBuildData::datas;
|
||||
#endif
|
||||
|
||||
void ASBuildData::GatherASAgeStatistics(D3D12ResourceManager *rm, double now, ASStats &blasAges,
|
||||
ASStats &tlasAges)
|
||||
{
|
||||
#if ENABLED(RDOC_DEVEL)
|
||||
SCOPED_READLOCK(dataslock);
|
||||
|
||||
blasAges.bucket[0].msThreshold = tlasAges.bucket[0].msThreshold = 50;
|
||||
blasAges.bucket[1].msThreshold = tlasAges.bucket[1].msThreshold = 500;
|
||||
blasAges.bucket[2].msThreshold = tlasAges.bucket[2].msThreshold = 5000;
|
||||
blasAges.bucket[3].msThreshold = tlasAges.bucket[3].msThreshold = ~0U;
|
||||
|
||||
for(ASBuildData *buildData : datas)
|
||||
{
|
||||
if(buildData)
|
||||
{
|
||||
uint32_t age = uint32_t(now - buildData->timestamp);
|
||||
|
||||
ASStats &ages = buildData->Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL
|
||||
? tlasAges
|
||||
: blasAges;
|
||||
|
||||
uint64_t size = buildData->buffer ? buildData->buffer->Size() : 0;
|
||||
|
||||
ages.overheadBytes += buildData->bytesOverhead;
|
||||
|
||||
for(size_t i = 0; i < ARRAY_COUNT(tlasAges.bucket); i++)
|
||||
{
|
||||
if(age <= ages.bucket[i].msThreshold)
|
||||
{
|
||||
ages.bucket[i].count++;
|
||||
ages.bucket[i].bytes += size;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -1056,6 +1056,8 @@ struct ASStats
|
||||
} bucket[4];
|
||||
|
||||
uint64_t overheadBytes;
|
||||
uint64_t diskBytes;
|
||||
uint32_t diskCached;
|
||||
};
|
||||
|
||||
// this is a refcounted GPU buffer with the build data, together with the metadata
|
||||
@@ -1132,20 +1134,18 @@ struct ASBuildData
|
||||
void Release();
|
||||
|
||||
D3D12GpuBuffer *buffer = NULL;
|
||||
|
||||
static void GatherASAgeStatistics(D3D12ResourceManager *rm, double now, ASStats &blasAges,
|
||||
ASStats &tlasAges);
|
||||
rdcstr filename;
|
||||
uint64_t bytesOnDisk = 0;
|
||||
|
||||
std::function<bool()> cleanupCallback;
|
||||
|
||||
private:
|
||||
ASBuildData()
|
||||
{
|
||||
#if ENABLED(RDOC_DEVEL)
|
||||
SCOPED_WRITELOCK(dataslock);
|
||||
datas.push_back(this);
|
||||
#endif
|
||||
}
|
||||
ASBuildData() = default;
|
||||
|
||||
friend class D3D12RTManager;
|
||||
friend class D3D12ResourceManager;
|
||||
|
||||
D3D12RTManager *rtManager = NULL;
|
||||
|
||||
// timestamp this build data was recorded on
|
||||
double timestamp = 0;
|
||||
@@ -1157,14 +1157,6 @@ private:
|
||||
uint64_t bytesOverhead = 0;
|
||||
|
||||
unsigned int m_RefCount = 1;
|
||||
|
||||
friend class D3D12RTManager;
|
||||
friend class D3D12ResourceManager;
|
||||
|
||||
#if ENABLED(RDOC_DEVEL)
|
||||
static Threading::RWLock dataslock;
|
||||
static rdcarray<ASBuildData *> datas;
|
||||
#endif
|
||||
};
|
||||
|
||||
DECLARE_REFLECTION_STRUCT(ASBuildData::RVAWithStride);
|
||||
@@ -1218,6 +1210,16 @@ public:
|
||||
|
||||
ASBuildData *CopyBuildInputs(ID3D12GraphicsCommandList4 *unwrappedCmd,
|
||||
const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS &inputs);
|
||||
void RemoveASBuildData(ASBuildData *data)
|
||||
{
|
||||
SCOPED_LOCK(m_ASBuildDataLock);
|
||||
if(data->buffer)
|
||||
m_InMemASBuildDatas.removeOne(data);
|
||||
else
|
||||
m_DiskCachedASBuildDatas.removeOne(data);
|
||||
}
|
||||
|
||||
void GatherASAgeStatistics(ASStats &blasAges, ASStats &tlasAges);
|
||||
|
||||
D3D12GpuBuffer *UnrollBLASInstancesList(
|
||||
ID3D12GraphicsCommandList4 *unwrappedCmd,
|
||||
@@ -1237,7 +1239,7 @@ public:
|
||||
|
||||
void AddPendingASBuilds(ID3D12Fence *fence, UINT64 waitValue,
|
||||
const rdcarray<std::function<bool()>> &callbacks);
|
||||
void CheckPendingASBuilds();
|
||||
void TickASManagement();
|
||||
|
||||
void ResizeSerialisationBuffer(UINT64 ScratchDataSizeInBytes);
|
||||
|
||||
@@ -1254,6 +1256,9 @@ private:
|
||||
void InitTLASInstanceCopyingResources();
|
||||
void InitReplayBlasPatchingResources();
|
||||
|
||||
void CheckASCaching();
|
||||
void CheckPendingASBuilds();
|
||||
|
||||
void CopyFromVA(ID3D12GraphicsCommandList4 *unwrappedCmd, ID3D12Resource *dstRes,
|
||||
uint64_t dstOffset, D3D12_GPU_VIRTUAL_ADDRESS sourceVA, uint64_t byteSize);
|
||||
|
||||
@@ -1284,6 +1289,10 @@ private:
|
||||
// export databases that are alive
|
||||
rdcarray<D3D12ShaderExportDatabase *> m_ExportDatabases;
|
||||
|
||||
Threading::CriticalSection m_ASBuildDataLock;
|
||||
rdcarray<ASBuildData *> m_InMemASBuildDatas;
|
||||
rdcarray<ASBuildData *> m_DiskCachedASBuildDatas;
|
||||
|
||||
// is the lookup buffer dirty and needs to be recreated with the latest data?
|
||||
bool m_LookupBufferDirty = true;
|
||||
|
||||
|
||||
@@ -1056,7 +1056,9 @@ public:
|
||||
Serialiser &SerialiseStream(const rdcstr &name, StreamReader &stream,
|
||||
RENDERDOC_ProgressCallback progress = RENDERDOC_ProgressCallback())
|
||||
{
|
||||
RDCCOMPILE_ASSERT(IsWriting(), "Can't read into a StreamReader");
|
||||
// we don't make this a compile-time assert so this code can be compiled in a
|
||||
// templated-serialisation function (but we still assert as this will not be valid to execute
|
||||
RDCASSERTMSG("Can't read into a StreamReader", IsWriting());
|
||||
|
||||
uint64_t totalSize = stream.GetSize();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user