Add some DX12 performance optimisations

* Switches some contended mutexes for R/W locks
This commit is contained in:
baldurk
2018-04-27 15:11:55 +01:00
parent de969105d4
commit abd67a7702
10 changed files with 67 additions and 88 deletions
+10 -7
View File
@@ -151,7 +151,10 @@ struct ResourceRecord
}
if(!dataWritten)
recordlist.insert(m_Chunks.begin(), m_Chunks.end());
{
for(auto it = m_Chunks.begin(); it != m_Chunks.end(); ++it)
recordlist[it->first] = it->second;
}
}
void AddRef() { Atomic::Inc32(&RefCount); }
@@ -175,10 +178,10 @@ struct ResourceRecord
void AddChunk(Chunk *chunk, int32_t ID = 0)
{
LockChunks();
if(ID == 0)
ID = GetID();
m_Chunks[ID] = chunk;
LockChunks();
m_Chunks.push_back(std::make_pair(ID, chunk));
UnlockChunks();
}
@@ -232,16 +235,16 @@ struct ResourceRecord
Chunk *GetLastChunk() const
{
RDCASSERT(HasChunks());
return m_Chunks.rbegin()->second;
return m_Chunks.back().second;
}
int32_t GetLastChunkID() const
{
RDCASSERT(HasChunks());
return m_Chunks.rbegin()->first;
return m_Chunks.back().first;
}
void PopChunk() { m_Chunks.erase(m_Chunks.rbegin()->first); }
void PopChunk() { m_Chunks.pop_back(); }
byte *GetDataPtr() { return DataPtr + DataOffset; }
bool HasDataPtr() { return DataPtr != NULL; }
void SetDataOffset(uint64_t offs) { DataOffset = offs; }
@@ -278,7 +281,7 @@ protected:
return Atomic::Inc32(&globalIDCounter);
}
std::map<int32_t, Chunk *> m_Chunks;
std::vector<std::pair<int32_t, Chunk *>> m_Chunks;
Threading::CriticalSection *m_ChunkLock;
map<ResourceId, FrameRefType> m_FrameRefs;
@@ -252,6 +252,10 @@ public:
IMPLEMENT_FUNCTION_SERIALISED(virtual void STDMETHODCALLTYPE, ExecuteCommandLists,
UINT NumCommandLists, ID3D12CommandList *const *ppCommandLists);
virtual void ExecuteCommandListsInternal(UINT NumCommandLists,
ID3D12CommandList *const *ppCommandLists,
bool InFrameCaptureBoundary);
IMPLEMENT_FUNCTION_SERIALISED(virtual void STDMETHODCALLTYPE, SetMarker, UINT Metadata,
const void *pData, UINT Size);
@@ -321,8 +321,15 @@ bool WrappedID3D12CommandQueue::Serialise_ExecuteCommandLists(SerialiserType &se
return true;
}
void STDMETHODCALLTYPE WrappedID3D12CommandQueue::ExecuteCommandLists(
UINT NumCommandLists, ID3D12CommandList *const *ppCommandLists)
void WrappedID3D12CommandQueue::ExecuteCommandLists(UINT NumCommandLists,
ID3D12CommandList *const *ppCommandLists)
{
ExecuteCommandListsInternal(NumCommandLists, ppCommandLists, false);
}
void WrappedID3D12CommandQueue::ExecuteCommandListsInternal(UINT NumCommandLists,
ID3D12CommandList *const *ppCommandLists,
bool InFrameCaptureBoundary)
{
ID3D12CommandList **unwrapped = m_pDevice->GetTempArray<ID3D12CommandList *>(NumCommandLists);
for(UINT i = 0; i < NumCommandLists; i++)
@@ -342,7 +349,9 @@ void STDMETHODCALLTYPE WrappedID3D12CommandQueue::ExecuteCommandLists(
if(IsCaptureMode(m_State))
{
SCOPED_LOCK(m_Lock);
SCOPED_LOCK(m_pDevice->GetCapTransitionLock());
if(!InFrameCaptureBoundary)
m_pDevice->GetCapTransitionLock().ReadLock();
bool capframe = IsActiveCapturing(m_State);
std::set<ResourceId> refdIDs;
@@ -529,6 +538,9 @@ void STDMETHODCALLTYPE WrappedID3D12CommandQueue::ExecuteCommandLists(
m_QueueRecord->AddChunk(scope.Get());
}
}
if(!InFrameCaptureBoundary)
m_pDevice->GetCapTransitionLock().ReadUnlock();
}
}
+11 -10
View File
@@ -886,7 +886,7 @@ void WrappedID3D12Device::Unmap(ID3D12Resource *Resource, UINT Subresource, byte
bool capframe = false;
{
SCOPED_LOCK(m_CapTransitionLock);
SCOPED_READLOCK(m_CapTransitionLock);
capframe = IsActiveCapturing(m_State);
}
@@ -1119,7 +1119,7 @@ void WrappedID3D12Device::WriteToSubresource(ID3D12Resource *Resource, UINT Subr
{
bool capframe = false;
{
SCOPED_LOCK(m_CapTransitionLock);
SCOPED_READLOCK(m_CapTransitionLock);
capframe = IsActiveCapturing(m_State);
}
@@ -1316,7 +1316,7 @@ void WrappedID3D12Device::StartFrameCapture(void *dev, void *wnd)
// will check to see if they need to markdirty or markpendingdirty
// and go into the frame record.
{
SCOPED_LOCK(m_CapTransitionLock);
SCOPED_WRITELOCK(m_CapTransitionLock);
initStateCurBatch = 0;
initStateCurList = NULL;
@@ -1332,7 +1332,7 @@ void WrappedID3D12Device::StartFrameCapture(void *dev, void *wnd)
initStateCurBatch = 0;
initStateCurList = NULL;
ExecuteLists();
ExecuteLists(NULL, true);
FlushLists();
RDCDEBUG("Attempting capture");
@@ -1406,7 +1406,7 @@ bool WrappedID3D12Device::EndFrameCapture(void *dev, void *wnd)
// transition back to IDLE and readback initial states atomically
{
SCOPED_LOCK(m_CapTransitionLock);
SCOPED_WRITELOCK(m_CapTransitionLock);
EndCaptureFrame(backbuffer);
m_State = CaptureState::BackgroundCapturing;
@@ -1495,7 +1495,7 @@ bool WrappedID3D12Device::EndFrameCapture(void *dev, void *wnd)
list->Close();
ExecuteLists();
ExecuteLists(NULL, true);
FlushLists();
byte *data = NULL;
@@ -2392,13 +2392,14 @@ void WrappedID3D12Device::CloseInitialStateList()
initStateCurBatch = 0;
}
void WrappedID3D12Device::ExecuteList(ID3D12GraphicsCommandList2 *list, ID3D12CommandQueue *queue)
void WrappedID3D12Device::ExecuteList(ID3D12GraphicsCommandList2 *list,
WrappedID3D12CommandQueue *queue, bool InFrameCaptureBoundary)
{
if(queue == NULL)
queue = GetQueue();
ID3D12CommandList *l = list;
queue->ExecuteCommandLists(1, &l);
queue->ExecuteCommandListsInternal(1, &l, InFrameCaptureBoundary);
for(auto it = m_InternalCmds.pendingcmds.begin(); it != m_InternalCmds.pendingcmds.end(); ++it)
{
@@ -2412,7 +2413,7 @@ void WrappedID3D12Device::ExecuteList(ID3D12GraphicsCommandList2 *list, ID3D12Co
m_InternalCmds.submittedcmds.push_back(list);
}
void WrappedID3D12Device::ExecuteLists(ID3D12CommandQueue *queue)
void WrappedID3D12Device::ExecuteLists(WrappedID3D12CommandQueue *queue, bool InFrameCaptureBoundary)
{
// nothing to do
if(m_InternalCmds.pendingcmds.empty())
@@ -2426,7 +2427,7 @@ void WrappedID3D12Device::ExecuteLists(ID3D12CommandQueue *queue)
if(queue == NULL)
queue = GetQueue();
queue->ExecuteCommandLists((UINT)cmds.size(), &cmds[0]);
queue->ExecuteCommandListsInternal((UINT)cmds.size(), &cmds[0], InFrameCaptureBoundary);
m_InternalCmds.submittedcmds.insert(m_InternalCmds.submittedcmds.end(),
m_InternalCmds.pendingcmds.begin(),
+6 -5
View File
@@ -312,7 +312,7 @@ private:
bool m_AppControlledCapture;
Threading::CriticalSection m_CapTransitionLock;
Threading::RWLock m_CapTransitionLock;
CaptureState m_State;
uint32_t m_SubmitCounter = 0;
@@ -351,7 +351,7 @@ private:
{
D3D12_CPU_DESCRIPTOR_HANDLE rtvs[8];
ID3D12CommandQueue *queue;
WrappedID3D12CommandQueue *queue;
int32_t lastPresentedBuffer;
};
@@ -397,7 +397,7 @@ public:
D3D12ResourceManager *GetResourceManager() { return m_ResourceManager; }
D3D12ShaderCache *GetShaderCache() { return m_ShaderCache; }
ResourceId GetResourceID() { return m_ResourceID; }
Threading::CriticalSection &GetCapTransitionLock() { return m_CapTransitionLock; }
Threading::RWLock &GetCapTransitionLock() { return m_CapTransitionLock; }
void ReleaseSwapchainResources(IDXGISwapChain *swap, IUnknown **backbuffers, int numBackbuffers);
void FirstFrame(WrappedIDXGISwapChain4 *swap);
FrameRecord &GetFrameRecord() { return m_FrameRecord; }
@@ -485,8 +485,9 @@ public:
void AddCaptureSubmission();
void ExecuteList(ID3D12GraphicsCommandList2 *list, ID3D12CommandQueue *queue = NULL);
void ExecuteLists(ID3D12CommandQueue *queue = NULL);
void ExecuteList(ID3D12GraphicsCommandList2 *list, WrappedID3D12CommandQueue *queue = NULL,
bool InFrameCaptureBoundary = false);
void ExecuteLists(WrappedID3D12CommandQueue *queue = NULL, bool InFrameCaptureBoundary = false);
void FlushLists(bool forceSync = false, ID3D12CommandQueue *queue = NULL);
void GPUSync(ID3D12CommandQueue *queue = NULL, ID3D12Fence *fence = NULL);
+12 -36
View File
@@ -748,7 +748,7 @@ HRESULT WrappedID3D12Device::CreateDescriptorHeap(const D3D12_DESCRIPTOR_HEAP_DE
record->AddChunk(scope.Get());
{
SCOPED_LOCK(m_CapTransitionLock);
SCOPED_READLOCK(m_CapTransitionLock);
if(IsBackgroundCapturing(m_State))
GetResourceManager()->MarkDirtyResource(wrapped->GetResourceID());
else
@@ -912,7 +912,7 @@ void WrappedID3D12Device::CreateConstantBufferView(const D3D12_CONSTANT_BUFFER_V
bool capframe = false;
{
SCOPED_LOCK(m_CapTransitionLock);
SCOPED_READLOCK(m_CapTransitionLock);
capframe = IsActiveCapturing(m_State);
}
@@ -955,7 +955,7 @@ void WrappedID3D12Device::CreateShaderResourceView(ID3D12Resource *pResource,
bool capframe = false;
{
SCOPED_LOCK(m_CapTransitionLock);
SCOPED_READLOCK(m_CapTransitionLock);
capframe = IsActiveCapturing(m_State);
}
@@ -1007,7 +1007,7 @@ void WrappedID3D12Device::CreateUnorderedAccessView(ID3D12Resource *pResource,
bool capframe = false;
{
SCOPED_LOCK(m_CapTransitionLock);
SCOPED_READLOCK(m_CapTransitionLock);
capframe = IsActiveCapturing(m_State);
}
@@ -1053,7 +1053,7 @@ void WrappedID3D12Device::CreateRenderTargetView(ID3D12Resource *pResource,
bool capframe = false;
{
SCOPED_LOCK(m_CapTransitionLock);
SCOPED_READLOCK(m_CapTransitionLock);
capframe = IsActiveCapturing(m_State);
}
@@ -1097,7 +1097,7 @@ void WrappedID3D12Device::CreateDepthStencilView(ID3D12Resource *pResource,
bool capframe = false;
{
SCOPED_LOCK(m_CapTransitionLock);
SCOPED_READLOCK(m_CapTransitionLock);
capframe = IsActiveCapturing(m_State);
}
@@ -1138,7 +1138,7 @@ void WrappedID3D12Device::CreateSampler(const D3D12_SAMPLER_DESC *pDesc,
bool capframe = false;
{
SCOPED_LOCK(m_CapTransitionLock);
SCOPED_READLOCK(m_CapTransitionLock);
capframe = IsActiveCapturing(m_State);
}
@@ -1329,7 +1329,7 @@ HRESULT WrappedID3D12Device::CreateCommittedResource(const D3D12_HEAP_PROPERTIES
record->AddChunk(scope.Get());
{
SCOPED_LOCK(m_CapTransitionLock);
SCOPED_READLOCK(m_CapTransitionLock);
if(IsBackgroundCapturing(m_State))
GetResourceManager()->MarkDirtyResource(wrapped->GetResourceID());
else
@@ -1576,7 +1576,7 @@ HRESULT WrappedID3D12Device::CreatePlacedResource(ID3D12Heap *pHeap, UINT64 Heap
record->AddChunk(scope.Get());
{
SCOPED_LOCK(m_CapTransitionLock);
SCOPED_READLOCK(m_CapTransitionLock);
if(IsBackgroundCapturing(m_State))
GetResourceManager()->MarkDirtyResource(wrapped->GetResourceID());
else
@@ -1969,7 +1969,7 @@ void WrappedID3D12Device::CopyDescriptors(
bool capframe = false;
{
SCOPED_LOCK(m_CapTransitionLock);
SCOPED_READLOCK(m_CapTransitionLock);
capframe = IsActiveCapturing(m_State);
}
@@ -2076,7 +2076,7 @@ void WrappedID3D12Device::CopyDescriptorsSimple(UINT NumDescriptors,
bool capframe = false;
{
SCOPED_LOCK(m_CapTransitionLock);
SCOPED_READLOCK(m_CapTransitionLock);
capframe = IsActiveCapturing(m_State);
}
@@ -2172,18 +2172,6 @@ HRESULT WrappedID3D12Device::MakeResident(UINT NumObjects, ID3D12Pageable *const
}
}
bool capframe = false;
{
SCOPED_LOCK(m_CapTransitionLock);
capframe = IsActiveCapturing(m_State);
}
if(capframe)
{
// serialise
}
return m_pDevice->MakeResident(NumObjects, unwrapped);
}
@@ -2211,18 +2199,6 @@ HRESULT WrappedID3D12Device::Evict(UINT NumObjects, ID3D12Pageable *const *ppObj
}
}
bool capframe = false;
{
SCOPED_LOCK(m_CapTransitionLock);
capframe = IsActiveCapturing(m_State);
}
if(capframe)
{
// serialise
}
return m_pDevice->Evict(NumObjects, unwrapped);
}
@@ -2742,7 +2718,7 @@ HRESULT WrappedID3D12Device::EnqueueMakeResident(D3D12_RESIDENCY_FLAGS Flags, UI
bool capframe = false;
{
SCOPED_LOCK(m_CapTransitionLock);
SCOPED_READLOCK(m_CapTransitionLock);
capframe = IsActiveCapturing(m_State);
}
+4 -4
View File
@@ -117,7 +117,7 @@ bool D3D12ResourceManager::Prepare_InitialState(ID3D12DeviceChild *res)
{
m_Device->CloseInitialStateList();
m_Device->ExecuteLists();
m_Device->ExecuteLists(NULL, true);
m_Device->FlushLists();
m_Device->Evict(1, &pageable);
@@ -228,7 +228,7 @@ bool D3D12ResourceManager::Prepare_InitialState(ID3D12DeviceChild *res)
{
m_Device->CloseInitialStateList();
m_Device->ExecuteLists();
m_Device->ExecuteLists(NULL, true);
m_Device->FlushLists();
m_Device->Evict(1, &pageable);
@@ -374,7 +374,7 @@ bool D3D12ResourceManager::Serialise_InitialState(SerialiserType &ser, ResourceI
if(ser.IsWriting())
{
m_Device->ExecuteLists();
m_Device->ExecuteLists(NULL, true);
m_Device->FlushLists();
mappedBuffer = (ID3D12Resource *)initContents.resource;
@@ -738,7 +738,7 @@ void D3D12ResourceManager::Apply_InitialState(ID3D12DeviceChild *live, D3D12Init
#if ENABLED(SINGLE_FLUSH_VALIDATE)
m_Device->CloseInitialStateList();
m_Device->ExecuteLists();
m_Device->ExecuteLists(NULL, true);
m_Device->FlushLists(true);
#endif
}
+4 -4
View File
@@ -320,11 +320,11 @@ struct GPUAddressRangeTracker
GPUAddressRangeTracker &operator=(const GPUAddressRangeTracker &);
std::vector<GPUAddressRange> addresses;
Threading::CriticalSection addressLock;
Threading::RWLock addressLock;
void AddTo(GPUAddressRange range)
{
SCOPED_LOCK(addressLock);
SCOPED_WRITELOCK(addressLock);
auto it = std::lower_bound(addresses.begin(), addresses.end(), range.start);
RDCASSERT(it == addresses.begin() || it == addresses.end() || range.start < it->start ||
range.start >= it->end);
@@ -334,7 +334,7 @@ struct GPUAddressRangeTracker
void RemoveFrom(D3D12_GPU_VIRTUAL_ADDRESS baseAddr)
{
SCOPED_LOCK(addressLock);
SCOPED_WRITELOCK(addressLock);
auto it = std::lower_bound(addresses.begin(), addresses.end(), baseAddr);
RDCASSERT(it != addresses.end() && baseAddr >= it->start && baseAddr < it->end);
@@ -353,7 +353,7 @@ struct GPUAddressRangeTracker
// this should really be a read-write lock
{
SCOPED_LOCK(addressLock);
SCOPED_READLOCK(addressLock);
auto it = std::lower_bound(addresses.begin(), addresses.end(), addr);
if(it == addresses.end())
+1 -1
View File
@@ -446,7 +446,7 @@ HRESULT STDMETHODCALLTYPE WrappedID3D12Resource::WriteToSubresource(UINT DstSubr
void WrappedID3D12Resource::RefBuffers(D3D12ResourceManager *rm)
{
// only buffers go into m_Addresses
SCOPED_LOCK(m_Addresses.addressLock);
SCOPED_READLOCK(m_Addresses.addressLock);
for(size_t i = 0; i < m_Addresses.addresses.size(); i++)
rm->MarkResourceFrameReferenced(m_Addresses.addresses[i].id, eFrameRef_Read);
}
-18
View File
@@ -238,24 +238,6 @@ struct GLResourceRecord : public ResourceRecord
int64_t persistentMaps; // counter indicating how many coherent maps are 'live'
} Map;
template <typename ChunkFilter>
void FilterChunks(const ChunkFilter &filter)
{
LockChunks();
std::vector<std::map<int32_t, Chunk *>::iterator> deletions;
for(auto it = m_Chunks.begin(); it != m_Chunks.end(); ++it)
{
if(filter(it->second))
deletions.push_back(it);
}
for(size_t i = 0; i < deletions.size(); i++)
{
SAFE_DELETE(deletions[i]->second);
m_Chunks.erase(deletions[i]);
}
UnlockChunks();
}
void VerifyDataType(GLenum target)
{
#if ENABLED(RDOC_DEVEL)