Change boundDescs to a rdcpair to avoid perf issues at runtime

With large bindless descriptor tables, traversing all entries to
fill the boundDescs vector was a perf bottleneck. By switching to
storing the first descriptor pointer and a count, the bottleneck is
eliminated. Traversal during queue submit is slightly more complicated
but the same number of descriptors are visited.
This commit is contained in:
Steve Karolewics
2020-01-16 09:27:01 -08:00
committed by Baldur Karlsson
parent f12ae2d9fa
commit 7076e674ca
3 changed files with 27 additions and 26 deletions
@@ -1687,11 +1687,8 @@ void WrappedID3D12GraphicsCommandList::SetComputeRootDescriptorTable(
if(!RenderDoc::Inst().GetCaptureOptions().refAllResources)
{
rdcarray<D3D12Descriptor *> &descs = m_ListRecord->cmdInfo->boundDescs;
descs.reserve(descs.size() + num);
for(UINT d = 0; d < num; d++)
descs.push_back(rangeStart + d);
rdcarray<rdcpair<D3D12Descriptor *, UINT>> &descs = m_ListRecord->cmdInfo->boundDescs;
descs.push_back(make_rdcpair(rangeStart, num));
}
prevTableOffset = offset + num;
@@ -2251,11 +2248,8 @@ void WrappedID3D12GraphicsCommandList::SetGraphicsRootDescriptorTable(
if(!RenderDoc::Inst().GetCaptureOptions().refAllResources)
{
rdcarray<D3D12Descriptor *> &descs = m_ListRecord->cmdInfo->boundDescs;
descs.reserve(descs.size() + num);
for(UINT d = 0; d < num; d++)
descs.push_back(rangeStart + d);
rdcarray<rdcpair<D3D12Descriptor *, UINT>> &descs = m_ListRecord->cmdInfo->boundDescs;
descs.push_back(make_rdcpair(rangeStart, num));
}
prevTableOffset = offset + num;
@@ -421,23 +421,27 @@ void WrappedID3D12CommandQueue::ExecuteCommandListsInternal(UINT NumCommandLists
for(auto it = record->bakedCommands->cmdInfo->boundDescs.begin();
it != record->bakedCommands->cmdInfo->boundDescs.end(); ++it)
{
D3D12Descriptor *desc = *it;
ResourceId id, id2;
FrameRefType ref = eFrameRef_Read;
desc->GetRefIDs(id, id2, ref);
if(id != ResourceId())
rdcpair<D3D12Descriptor *, UINT> &descRange = *it;
for(UINT d = 0; d < descRange.second; ++d)
{
refdIDs.insert(id);
GetResourceManager()->MarkResourceFrameReferenced(id, ref);
}
D3D12Descriptor *desc = descRange.first + d;
if(id2 != ResourceId())
{
refdIDs.insert(id2);
GetResourceManager()->MarkResourceFrameReferenced(id2, ref);
ResourceId id, id2;
FrameRefType ref = eFrameRef_Read;
desc->GetRefIDs(id, id2, ref);
if(id != ResourceId())
{
refdIDs.insert(id);
GetResourceManager()->MarkResourceFrameReferenced(id, ref);
}
if(id2 != ResourceId())
{
refdIDs.insert(id2);
GetResourceManager()->MarkResourceFrameReferenced(id2, ref);
}
}
}
+4 -1
View File
@@ -426,11 +426,14 @@ struct CmdListRecordingInfo
// a list of descriptors that are bound at any point in this command list
// used to look up all the frame refs per-descriptor and apply them on queue
// submit with latest binding refs.
// This stores the start of the range and the number of descriptors, and full
// traversal occurs during queue submit, to avoid perf issues during regular
// application operation.
// We allow duplicates in here since it's a better tradeoff to let the vector
// expand a bit more to contain duplicates and then deal with it during frame
// capture, than to constantly be deduplicating during record (e.g. with a
// set or sorted vector).
rdcarray<D3D12Descriptor *> boundDescs;
rdcarray<rdcpair<D3D12Descriptor *, UINT>> boundDescs;
// bundles executed
rdcarray<D3D12ResourceRecord *> bundles;