For large D3D12 upload heap buffers, allocate on default heap instead

* The cost of memcpy'ing data across to upload heap buffers for initial
  contents and maps is significant, so for large buffers it's better to
  allocate them GPU-side so we can do GPU copies.
* This also means that maps cannot happen directly, so instead while
  reading we allocate a GPU-side copy of the map source data, and
  perform a buffer copy instead.
This commit is contained in:
baldurk
2017-01-18 14:26:11 +01:00
parent 112d750e30
commit a59ebcbc3b
3 changed files with 151 additions and 13 deletions
+133 -13
View File
@@ -352,6 +352,11 @@ WrappedID3D12Device::~WrappedID3D12Device()
SAFE_RELEASE(m_QueueFences[i]);
}
for(auto it = m_UploadBuffers.begin(); it != m_UploadBuffers.end(); ++it)
{
SAFE_RELEASE(it->second);
}
DestroyInternalResources();
if(m_DeviceRecord)
@@ -534,6 +539,43 @@ HRESULT WrappedID3D12Device::QueryInterface(REFIID riid, void **ppvObject)
return m_RefCounter.QueryInterface(riid, ppvObject);
}
ID3D12Resource *WrappedID3D12Device::GetUploadBuffer(uint64_t chunkOffset, uint64_t byteSize)
{
ID3D12Resource *buf = m_UploadBuffers[chunkOffset];
if(buf != NULL)
return buf;
D3D12_RESOURCE_DESC soBufDesc;
soBufDesc.Alignment = 0;
soBufDesc.DepthOrArraySize = 1;
soBufDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
soBufDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
soBufDesc.Format = DXGI_FORMAT_UNKNOWN;
soBufDesc.Height = 1;
soBufDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
soBufDesc.MipLevels = 1;
soBufDesc.SampleDesc.Count = 1;
soBufDesc.SampleDesc.Quality = 0;
soBufDesc.Width = byteSize;
D3D12_HEAP_PROPERTIES heapProps;
heapProps.Type = D3D12_HEAP_TYPE_UPLOAD;
heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
heapProps.CreationNodeMask = 1;
heapProps.VisibleNodeMask = 1;
HRESULT hr = CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &soBufDesc,
D3D12_RESOURCE_STATE_GENERIC_READ, NULL,
__uuidof(ID3D12Resource), (void **)&buf);
m_UploadBuffers[chunkOffset] = buf;
RDCASSERT(hr == S_OK, hr, S_OK, byteSize);
return buf;
}
void WrappedID3D12Device::ApplyInitialContents()
{
initStateCurBatch = 0;
@@ -841,20 +883,58 @@ bool WrappedID3D12Device::Serialise_MapDataWrite(Serialiser *localSerialiser,
range.Begin = range.End = 0;
mapPtr = NULL;
HRESULT hr = r->Map(sub, &range, (void **)&mapPtr);
if(SUCCEEDED(hr))
if(m_UploadResourceIds.find(res) != m_UploadResourceIds.end())
{
memcpy(mapPtr + begin, data, size_t(end - begin));
D3D12CommandData &cmd = *m_Queue->GetCommandData();
range.Begin = (size_t)begin;
range.End = (size_t)end;
ID3D12Resource *uploadBuf = GetUploadBuffer(cmd.m_CurChunkOffset, end - begin);
r->Unmap(sub, &range);
// during reading, fill out the buffer itself
if(m_State == READING)
{
D3D12_RANGE range = {0, 0};
void *dst = NULL;
HRESULT hr = uploadBuf->Map(sub, &range, &dst);
if(SUCCEEDED(hr))
{
memcpy(dst, data, end - begin);
range.Begin = 0;
range.End = end - begin;
uploadBuf->Unmap(sub, &range);
}
else
{
RDCERR("Failed to map resource on replay %08x", hr);
}
}
// then afterwards just execute a list to copy the result
ID3D12GraphicsCommandList *list = GetNewList();
list->CopyBufferRegion(r, begin, uploadBuf, 0, end - begin);
list->Close();
ExecuteList(list);
}
else
{
RDCERR("Failed to map resource on replay %08x", hr);
HRESULT hr = r->Map(sub, &range, (void **)&mapPtr);
if(SUCCEEDED(hr))
{
memcpy(mapPtr + begin, data, size_t(end - begin));
range.Begin = (size_t)begin;
range.End = (size_t)end;
r->Unmap(sub, &range);
}
else
{
RDCERR("Failed to map resource on replay %08x", hr);
}
}
SAFE_DELETE_ARRAY(data);
@@ -919,17 +999,57 @@ bool WrappedID3D12Device::Serialise_WriteToSubresource(Serialiser *localSerialis
{
ID3D12Resource *r = GetResourceManager()->GetLiveAs<ID3D12Resource>(res);
HRESULT hr = r->Map(sub, NULL, NULL);
if(SUCCEEDED(hr))
if(m_UploadResourceIds.find(res) != m_UploadResourceIds.end())
{
r->WriteToSubresource(sub, HasBox ? &box : NULL, data, rowPitch, depthPitch);
D3D12CommandData &cmd = *m_Queue->GetCommandData();
r->Unmap(sub, NULL);
ID3D12Resource *uploadBuf = GetUploadBuffer(cmd.m_CurChunkOffset, dataSize);
// during reading, fill out the buffer itself
if(m_State == READING)
{
D3D12_RANGE range = {0, 0};
void *dst = NULL;
HRESULT hr = uploadBuf->Map(sub, &range, &dst);
if(SUCCEEDED(hr))
{
memcpy(dst, data, dataSize);
range.Begin = 0;
range.End = dataSize;
uploadBuf->Unmap(sub, &range);
}
else
{
RDCERR("Failed to map resource on replay %08x", hr);
}
}
// then afterwards just execute a list to copy the result
ID3D12GraphicsCommandList *list = GetNewList();
UINT64 copySize = dataSize;
if(HasBox)
copySize = RDCMIN(copySize, UINT64(box.right - box.left));
list->CopyBufferRegion(r, HasBox ? box.left : 0, uploadBuf, 0, copySize);
list->Close();
ExecuteList(list);
}
else
{
RDCERR("Failed to map resource on replay %08x", hr);
HRESULT hr = r->Map(sub, NULL, NULL);
if(SUCCEEDED(hr))
{
r->WriteToSubresource(sub, HasBox ? &box : NULL, data, rowPitch, depthPitch);
r->Unmap(sub, NULL);
}
else
{
RDCERR("Failed to map resource on replay %08x", hr);
}
}
SAFE_DELETE_ARRAY(data);
+4
View File
@@ -249,6 +249,9 @@ private:
D3D12Replay m_Replay;
D3D12DebugManager *m_DebugManager;
set<ResourceId> m_UploadResourceIds;
map<uint64_t, ID3D12Resource *> m_UploadBuffers;
Threading::CriticalSection m_MapsLock;
vector<MapState> m_Maps;
@@ -427,6 +430,7 @@ public:
ID3D12GraphicsCommandList *GetNewList();
ID3D12GraphicsCommandList *GetInitialStateList();
void CloseInitialStateList();
ID3D12Resource *GetUploadBuffer(uint64_t chunkOffset, uint64_t byteSize);
void ApplyInitialContents();
void ExecuteList(ID3D12GraphicsCommandList *list, ID3D12CommandQueue *queue = NULL);
@@ -23,6 +23,7 @@
******************************************************************************/
#include "d3d12_device.h"
#include "driver/dxgi/dxgi_common.h"
#include "d3d12_command_list.h"
#include "d3d12_command_queue.h"
#include "d3d12_resources.h"
@@ -935,6 +936,19 @@ bool WrappedID3D12Device::Serialise_CreateCommittedResource(
{
pOptimizedClearValue = HasClearValue ? &clearVal : NULL;
if(props.Type == D3D12_HEAP_TYPE_UPLOAD && desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER)
{
// place large resources in local memory so that initial contents and maps can
// be cached and copied on the GPU instead of memcpy'd from the CPU every time.
// smaller resources it's better to just leave them as upload and map into them
if(desc.Width >= 1024 * 1024)
{
RDCLOG("Remapping committed resource %llu from upload to default for efficient replay", Res);
props.Type = D3D12_HEAP_TYPE_DEFAULT;
m_UploadResourceIds.insert(Res);
}
}
if(desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER)
{
GPUAddressRange range;