From a59ebcbc3bfebf9ea7c61e76d724df472daacf11 Mon Sep 17 00:00:00 2001 From: baldurk Date: Wed, 18 Jan 2017 14:26:11 +0100 Subject: [PATCH] For large D3D12 upload heap buffers, allocate on default heap instead * The cost of memcpy'ing data across to upload heap buffers for initial contents and maps is significant, so for large buffers it's better to allocate them GPU-side so we can do GPU copies. * This also means that maps cannot happen directly, so instead while reading we allocate a GPU-side copy of the map source data, and perform a buffer copy instead. --- renderdoc/driver/d3d12/d3d12_device.cpp | 146 +++++++++++++++++-- renderdoc/driver/d3d12/d3d12_device.h | 4 + renderdoc/driver/d3d12/d3d12_device_wrap.cpp | 14 ++ 3 files changed, 151 insertions(+), 13 deletions(-) diff --git a/renderdoc/driver/d3d12/d3d12_device.cpp b/renderdoc/driver/d3d12/d3d12_device.cpp index 9eab9f495..bbb151e6e 100644 --- a/renderdoc/driver/d3d12/d3d12_device.cpp +++ b/renderdoc/driver/d3d12/d3d12_device.cpp @@ -352,6 +352,11 @@ WrappedID3D12Device::~WrappedID3D12Device() SAFE_RELEASE(m_QueueFences[i]); } + for(auto it = m_UploadBuffers.begin(); it != m_UploadBuffers.end(); ++it) + { + SAFE_RELEASE(it->second); + } + DestroyInternalResources(); if(m_DeviceRecord) @@ -534,6 +539,43 @@ HRESULT WrappedID3D12Device::QueryInterface(REFIID riid, void **ppvObject) return m_RefCounter.QueryInterface(riid, ppvObject); } +ID3D12Resource *WrappedID3D12Device::GetUploadBuffer(uint64_t chunkOffset, uint64_t byteSize) +{ + ID3D12Resource *buf = m_UploadBuffers[chunkOffset]; + + if(buf != NULL) + return buf; + + D3D12_RESOURCE_DESC soBufDesc; + soBufDesc.Alignment = 0; + soBufDesc.DepthOrArraySize = 1; + soBufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + soBufDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + soBufDesc.Format = DXGI_FORMAT_UNKNOWN; + soBufDesc.Height = 1; + soBufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + soBufDesc.MipLevels = 1; + soBufDesc.SampleDesc.Count = 1; + soBufDesc.SampleDesc.Quality = 0; + soBufDesc.Width = byteSize; + + D3D12_HEAP_PROPERTIES heapProps; + heapProps.Type = D3D12_HEAP_TYPE_UPLOAD; + heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + heapProps.CreationNodeMask = 1; + heapProps.VisibleNodeMask = 1; + + HRESULT hr = CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &soBufDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, NULL, + __uuidof(ID3D12Resource), (void **)&buf); + + m_UploadBuffers[chunkOffset] = buf; + + RDCASSERT(hr == S_OK, hr, S_OK, byteSize); + return buf; +} + void WrappedID3D12Device::ApplyInitialContents() { initStateCurBatch = 0; @@ -841,20 +883,58 @@ bool WrappedID3D12Device::Serialise_MapDataWrite(Serialiser *localSerialiser, range.Begin = range.End = 0; mapPtr = NULL; - HRESULT hr = r->Map(sub, &range, (void **)&mapPtr); - if(SUCCEEDED(hr)) + if(m_UploadResourceIds.find(res) != m_UploadResourceIds.end()) { - memcpy(mapPtr + begin, data, size_t(end - begin)); + D3D12CommandData &cmd = *m_Queue->GetCommandData(); - range.Begin = (size_t)begin; - range.End = (size_t)end; + ID3D12Resource *uploadBuf = GetUploadBuffer(cmd.m_CurChunkOffset, end - begin); - r->Unmap(sub, &range); + // during reading, fill out the buffer itself + if(m_State == READING) + { + D3D12_RANGE range = {0, 0}; + void *dst = NULL; + HRESULT hr = uploadBuf->Map(sub, &range, &dst); + + if(SUCCEEDED(hr)) + { + memcpy(dst, data, end - begin); + + range.Begin = 0; + range.End = end - begin; + + uploadBuf->Unmap(sub, &range); + } + else + { + RDCERR("Failed to map resource on replay %08x", hr); + } + } + + // then afterwards just execute a list to copy the result + ID3D12GraphicsCommandList *list = GetNewList(); + list->CopyBufferRegion(r, begin, uploadBuf, 0, end - begin); + list->Close(); + ExecuteList(list); } else { - RDCERR("Failed to map resource on replay %08x", hr); + HRESULT hr = r->Map(sub, &range, (void **)&mapPtr); + + if(SUCCEEDED(hr)) + { + memcpy(mapPtr + begin, data, size_t(end - begin)); + + range.Begin = (size_t)begin; + range.End = (size_t)end; + + r->Unmap(sub, &range); + } + else + { + RDCERR("Failed to map resource on replay %08x", hr); + } } SAFE_DELETE_ARRAY(data); @@ -919,17 +999,57 @@ bool WrappedID3D12Device::Serialise_WriteToSubresource(Serialiser *localSerialis { ID3D12Resource *r = GetResourceManager()->GetLiveAs(res); - HRESULT hr = r->Map(sub, NULL, NULL); - - if(SUCCEEDED(hr)) + if(m_UploadResourceIds.find(res) != m_UploadResourceIds.end()) { - r->WriteToSubresource(sub, HasBox ? &box : NULL, data, rowPitch, depthPitch); + D3D12CommandData &cmd = *m_Queue->GetCommandData(); - r->Unmap(sub, NULL); + ID3D12Resource *uploadBuf = GetUploadBuffer(cmd.m_CurChunkOffset, dataSize); + + // during reading, fill out the buffer itself + if(m_State == READING) + { + D3D12_RANGE range = {0, 0}; + void *dst = NULL; + HRESULT hr = uploadBuf->Map(sub, &range, &dst); + + if(SUCCEEDED(hr)) + { + memcpy(dst, data, dataSize); + + range.Begin = 0; + range.End = dataSize; + + uploadBuf->Unmap(sub, &range); + } + else + { + RDCERR("Failed to map resource on replay %08x", hr); + } + } + + // then afterwards just execute a list to copy the result + ID3D12GraphicsCommandList *list = GetNewList(); + UINT64 copySize = dataSize; + if(HasBox) + copySize = RDCMIN(copySize, UINT64(box.right - box.left)); + list->CopyBufferRegion(r, HasBox ? box.left : 0, uploadBuf, 0, copySize); + list->Close(); + ExecuteList(list); } else { - RDCERR("Failed to map resource on replay %08x", hr); + HRESULT hr = r->Map(sub, NULL, NULL); + + if(SUCCEEDED(hr)) + { + r->WriteToSubresource(sub, HasBox ? &box : NULL, data, rowPitch, depthPitch); + + r->Unmap(sub, NULL); + } + else + { + RDCERR("Failed to map resource on replay %08x", hr); + } } SAFE_DELETE_ARRAY(data); diff --git a/renderdoc/driver/d3d12/d3d12_device.h b/renderdoc/driver/d3d12/d3d12_device.h index e52678863..91e708cba 100644 --- a/renderdoc/driver/d3d12/d3d12_device.h +++ b/renderdoc/driver/d3d12/d3d12_device.h @@ -249,6 +249,9 @@ private: D3D12Replay m_Replay; D3D12DebugManager *m_DebugManager; + set m_UploadResourceIds; + map m_UploadBuffers; + Threading::CriticalSection m_MapsLock; vector m_Maps; @@ -427,6 +430,7 @@ public: ID3D12GraphicsCommandList *GetNewList(); ID3D12GraphicsCommandList *GetInitialStateList(); void CloseInitialStateList(); + ID3D12Resource *GetUploadBuffer(uint64_t chunkOffset, uint64_t byteSize); void ApplyInitialContents(); void ExecuteList(ID3D12GraphicsCommandList *list, ID3D12CommandQueue *queue = NULL); diff --git a/renderdoc/driver/d3d12/d3d12_device_wrap.cpp b/renderdoc/driver/d3d12/d3d12_device_wrap.cpp index dc8cb0f61..c80ae4ef6 100644 --- a/renderdoc/driver/d3d12/d3d12_device_wrap.cpp +++ b/renderdoc/driver/d3d12/d3d12_device_wrap.cpp @@ -23,6 +23,7 @@ ******************************************************************************/ #include "d3d12_device.h" +#include "driver/dxgi/dxgi_common.h" #include "d3d12_command_list.h" #include "d3d12_command_queue.h" #include "d3d12_resources.h" @@ -935,6 +936,19 @@ bool WrappedID3D12Device::Serialise_CreateCommittedResource( { pOptimizedClearValue = HasClearValue ? &clearVal : NULL; + if(props.Type == D3D12_HEAP_TYPE_UPLOAD && desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) + { + // place large resources in local memory so that initial contents and maps can + // be cached and copied on the GPU instead of memcpy'd from the CPU every time. + // smaller resources it's better to just leave them as upload and map into them + if(desc.Width >= 1024 * 1024) + { + RDCLOG("Remapping committed resource %llu from upload to default for efficient replay", Res); + props.Type = D3D12_HEAP_TYPE_DEFAULT; + m_UploadResourceIds.insert(Res); + } + } + if(desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) { GPUAddressRange range;