Files
renderdoc/renderdoc/driver/vulkan/vk_memory.cpp
T
2019-02-01 18:32:13 +00:00

378 lines
13 KiB
C++

/******************************************************************************
* The MIT License (MIT)
*
* Copyright (c) 2015-2019 Baldur Karlsson
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
******************************************************************************/
#include "vk_core.h"
uint32_t WrappedVulkan::GetReadbackMemoryIndex(uint32_t resourceRequiredBitmask)
{
if(resourceRequiredBitmask & (1 << m_PhysicalDeviceData.readbackMemIndex))
return m_PhysicalDeviceData.readbackMemIndex;
return m_PhysicalDeviceData.GetMemoryIndex(resourceRequiredBitmask,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0);
}
uint32_t WrappedVulkan::GetUploadMemoryIndex(uint32_t resourceRequiredBitmask)
{
if(resourceRequiredBitmask & (1 << m_PhysicalDeviceData.uploadMemIndex))
return m_PhysicalDeviceData.uploadMemIndex;
return m_PhysicalDeviceData.GetMemoryIndex(resourceRequiredBitmask,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0);
}
uint32_t WrappedVulkan::GetGPULocalMemoryIndex(uint32_t resourceRequiredBitmask)
{
if(resourceRequiredBitmask & (1 << m_PhysicalDeviceData.GPULocalMemIndex))
return m_PhysicalDeviceData.GPULocalMemIndex;
return m_PhysicalDeviceData.GetMemoryIndex(resourceRequiredBitmask,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
}
uint32_t WrappedVulkan::PhysicalDeviceData::GetMemoryIndex(uint32_t resourceRequiredBitmask,
uint32_t allocRequiredProps,
uint32_t allocUndesiredProps)
{
uint32_t best = memProps.memoryTypeCount;
for(uint32_t memIndex = 0; memIndex < memProps.memoryTypeCount; memIndex++)
{
if(resourceRequiredBitmask & (1 << memIndex))
{
uint32_t memTypeFlags = memProps.memoryTypes[memIndex].propertyFlags;
if((memTypeFlags & allocRequiredProps) == allocRequiredProps)
{
if(memTypeFlags & allocUndesiredProps)
best = memIndex;
else
return memIndex;
}
}
}
if(best == memProps.memoryTypeCount)
{
RDCERR("Couldn't find any matching heap! requirements %x / %x too strict",
resourceRequiredBitmask, allocRequiredProps);
return 0;
}
return best;
}
#define CREATE_NON_COHERENT_ATTRACTIVE_MEMORY 0
void WrappedVulkan::RemapMemoryIndices(VkPhysicalDeviceMemoryProperties *memProps,
uint32_t **memIdxMap)
{
uint32_t *memmap = new uint32_t[VK_MAX_MEMORY_TYPES];
*memIdxMap = memmap;
m_MemIdxMaps.push_back(memmap);
for(size_t i = 0; i < VK_MAX_MEMORY_TYPES; i++)
memmap[i] = ~0U;
// basic idea here:
// We want to discourage coherent memory maps as much as possible while capturing,
// as they're painful to track. Unfortunately the spec guarantees that at least
// one such memory type will be available, and we must follow that.
//
// So, rather than removing the coherent memory type we make it as unappealing as
// possible and try and ensure that only someone looking specifically for a coherent
// memory type will find it. That way hopefully memory selection algorithms will
// pick non-coherent memory and do proper flushing as necessary.
// we want to add a new heap, hopefully there is room
#if CREATE_NON_COHERENT_ATTRACTIVE_MEMORY
RDCASSERT(memProps->memoryHeapCount < VK_MAX_MEMORY_HEAPS - 1);
uint32_t coherentHeap = memProps->memoryHeapCount;
memProps->memoryHeapCount++;
// make a new heap that's tiny. If any applications look at heap sizes to determine
// viability, they'll dislike the look of this one (the real heaps should be much
// bigger).
memProps->memoryHeaps[coherentHeap].flags = 0; // not device local
memProps->memoryHeaps[coherentHeap].size = 32 * 1024 * 1024;
#endif
// for every coherent memory type, add a non-coherent type first, then
// mark the coherent type with our crappy heap
uint32_t origCount = memProps->memoryTypeCount;
VkMemoryType origTypes[VK_MAX_MEMORY_TYPES];
memcpy(origTypes, memProps->memoryTypes, sizeof(origTypes));
uint32_t newtypeidx = 0;
for(uint32_t i = 0; i < origCount; i++)
{
#if CREATE_NON_COHERENT_ATTRACTIVE_MEMORY
if((origTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0)
{
// coherent type found.
// can we still add a new type without exceeding the max?
if(memProps->memoryTypeCount + 1 <= VK_MAX_MEMORY_TYPES)
{
// copy both types from the original type
memProps->memoryTypes[newtypeidx] = origTypes[i];
memProps->memoryTypes[newtypeidx + 1] = origTypes[i];
// mark first as non-coherent, cached
memProps->memoryTypes[newtypeidx].propertyFlags &= ~VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
memProps->memoryTypes[newtypeidx].propertyFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
// point second at bad heap
memProps->memoryTypes[newtypeidx + 1].heapIndex = coherentHeap;
// point both new types at this original type
memmap[newtypeidx++] = i;
memmap[newtypeidx++] = i;
// we added a type
memProps->memoryTypeCount++;
}
else
{
// can't add a new type, but we can at least repoint this coherent
// type at the bad heap to discourage use
memProps->memoryTypes[newtypeidx] = origTypes[i];
memProps->memoryTypes[newtypeidx].heapIndex = coherentHeap;
memmap[newtypeidx++] = i;
}
}
else
#endif
{
// non-coherent already or non-hostvisible, just copy through
memProps->memoryTypes[newtypeidx] = origTypes[i];
memmap[newtypeidx++] = i;
}
}
}
MemoryAllocation WrappedVulkan::AllocateMemoryForResource(bool buffer, VkMemoryRequirements mrq,
MemoryScope scope, MemoryType type)
{
MemoryAllocation ret;
ret.scope = scope;
ret.type = type;
ret.buffer = buffer;
ret.size = AlignUp(mrq.size, mrq.alignment);
RDCDEBUG("Allocating 0x%llx with alignment 0x%llx in 0x%x for a %s (%s in %s)", ret.size,
mrq.alignment, mrq.memoryTypeBits, buffer ? "buffer" : "image", ToStr(type).c_str(),
ToStr(scope).c_str());
std::vector<MemoryAllocation> &blockList = m_MemoryBlocks[(size_t)scope];
// first try to find a match
int i = 0;
for(MemoryAllocation &block : blockList)
{
RDCDEBUG(
"Considering block %d: memory type %u and type %s. Total size 0x%llx, current offset "
"0x%llx, last alloc was %s",
i, block.memoryTypeIndex, ToStr(block.type).c_str(), block.size, block.offs,
block.buffer ? "buffer" : "image");
i++;
// skip this block if it's not the memory type we want
if(ret.type != block.type || (mrq.memoryTypeBits & (1 << block.memoryTypeIndex)) == 0)
{
RDCDEBUG("block type %d or memory type %d is incompatible", block.type, block.memoryTypeIndex);
continue;
}
// offs is where we can put our next sub-allocation
VkDeviceSize offs = block.offs;
// if we are on a buffer/image, account for any alignment we might have to do
if(ret.buffer != block.buffer)
offs = AlignUp(offs, m_PhysicalDeviceData.props.limits.bufferImageGranularity);
// align as required by the resource
offs = AlignUp(offs, mrq.alignment);
if(offs > block.size)
{
RDCDEBUG("Next offset 0x%llx would be off the end of the memory (size 0x%llx).", offs,
block.size);
continue;
}
VkDeviceSize avail = block.size - offs;
RDCDEBUG("At next offset 0x%llx, there's 0x%llx bytes available for 0x%llx bytes requested",
offs, avail, ret.size);
// if the allocation will fit, we've found our candidate.
if(ret.size <= avail)
{
// update the block offset and buffer/image bit
block.offs = offs + ret.size;
block.buffer = ret.buffer;
// update our return value
ret.offs = offs;
ret.mem = block.mem;
RDCDEBUG("Allocating using this block: 0x%llx -> 0x%llx", ret.offs, block.offs);
// stop searching
break;
}
}
if(ret.mem == VK_NULL_HANDLE)
{
RDCDEBUG("No available block found - allocating new block");
VkDeviceSize &allocSize = m_MemoryBlockSize[(size_t)scope];
// we start allocating 32M, then increment each time we need a new block.
switch(allocSize)
{
case 0: allocSize = 32; break;
case 32: allocSize = 64; break;
case 64: allocSize = 128; break;
case 128:
case 256: allocSize = 256; break;
default:
RDCDEBUG("Unexpected previous allocation size 0x%llx bytes, allocating 256MB", allocSize);
allocSize = 256;
break;
}
uint32_t memoryTypeIndex = 0;
switch(ret.type)
{
case MemoryType::Upload: memoryTypeIndex = GetUploadMemoryIndex(mrq.memoryTypeBits); break;
case MemoryType::GPULocal:
memoryTypeIndex = GetGPULocalMemoryIndex(mrq.memoryTypeBits);
break;
case MemoryType::Readback:
memoryTypeIndex = GetReadbackMemoryIndex(mrq.memoryTypeBits);
break;
}
VkMemoryAllocateInfo info = {
VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, NULL, allocSize * 1024 * 1024, memoryTypeIndex,
};
if(ret.size > info.allocationSize)
{
// if we get an over-sized allocation, first try to immediately jump to the largest block
// size.
allocSize = 256;
info.allocationSize = allocSize * 1024 * 1024;
// if it's still over-sized, just allocate precisely enough and give it a dedicated allocation
if(ret.size > info.allocationSize)
{
RDCDEBUG("Over-sized allocation for 0x%llx bytes", ret.size);
info.allocationSize = ret.size;
}
}
RDCDEBUG("Creating new allocation of 0x%llx bytes", info.allocationSize);
MemoryAllocation chunk;
chunk.buffer = ret.buffer;
chunk.memoryTypeIndex = memoryTypeIndex;
chunk.scope = scope;
chunk.type = type;
chunk.size = info.allocationSize;
// the offset starts immediately after this allocation
chunk.offs = ret.size;
VkDevice d = GetDev();
// do the actual allocation
VkResult vkr = ObjDisp(d)->AllocateMemory(Unwrap(d), &info, NULL, &chunk.mem);
RDCASSERTEQUAL(vkr, VK_SUCCESS);
GetResourceManager()->WrapResource(Unwrap(d), chunk.mem);
// push the new chunk
blockList.push_back(chunk);
// return the first bytes in the new chunk
ret.offs = 0;
ret.mem = chunk.mem;
}
return ret;
}
MemoryAllocation WrappedVulkan::AllocateMemoryForResource(VkImage im, MemoryScope scope,
MemoryType type)
{
VkDevice d = GetDev();
VkMemoryRequirements mrq = {};
ObjDisp(d)->GetImageMemoryRequirements(Unwrap(d), Unwrap(im), &mrq);
return AllocateMemoryForResource(false, mrq, scope, type);
}
MemoryAllocation WrappedVulkan::AllocateMemoryForResource(VkBuffer buf, MemoryScope scope,
MemoryType type)
{
VkDevice d = GetDev();
VkMemoryRequirements mrq = {};
ObjDisp(d)->GetBufferMemoryRequirements(Unwrap(d), Unwrap(buf), &mrq);
return AllocateMemoryForResource(true, mrq, scope, type);
}
void WrappedVulkan::FreeAllMemory(MemoryScope scope)
{
std::vector<MemoryAllocation> &allocList = m_MemoryBlocks[(size_t)scope];
if(allocList.empty())
return;
VkDevice d = GetDev();
for(MemoryAllocation alloc : allocList)
{
ObjDisp(d)->FreeMemory(Unwrap(d), Unwrap(alloc.mem), NULL);
GetResourceManager()->ReleaseWrappedResource(alloc.mem);
}
allocList.clear();
}
void WrappedVulkan::FreeMemoryAllocation(MemoryAllocation alloc)
{
// don't do anything at the moment, we only support freeing the whole scope at once.
}