Modify memory properties to discourage coherent maps & remap memory idxs

This commit is contained in:
baldurk
2015-10-30 13:58:49 +01:00
parent 6bdf285166
commit 156c75b26d
9 changed files with 233 additions and 28 deletions
+3
View File
@@ -345,6 +345,9 @@ WrappedVulkan::~WrappedVulkan()
SAFE_DELETE(m_pSerialiser);
for(size_t i=0; i < m_MemIdxMaps.size(); i++)
delete[] m_MemIdxMaps[i];
for(size_t i=0; i < m_ThreadSerialisers.size(); i++)
delete m_ThreadSerialisers[i];
+7 -15
View File
@@ -173,6 +173,8 @@ private:
uint32_t uploadMemIndex;
uint32_t GPULocalMemIndex;
uint32_t *memIdxMap;
VkPhysicalDeviceFeatures features;
VkPhysicalDeviceProperties props;
VkPhysicalDeviceMemoryProperties memProps;
@@ -185,6 +187,11 @@ private:
uint32_t m_QueueFamilyIdx; // the family index that we've selected in CreateDevice for our queue
VkQueue m_Queue; // the queue used for our own command buffer work
vector<VkPhysicalDevice> m_PhysicalDevices;
vector<uint32_t *> m_MemIdxMaps;
void RemapMemoryIndices(VkPhysicalDeviceMemoryProperties *memProps, uint32_t **memIdxMap);
struct
{
void Reset()
@@ -379,21 +386,6 @@ private:
vector<VkDescriptorInfo *> currentBindings;
};
struct MapState
{
MapState()
: device(VK_NULL_HANDLE), mapOffset(0), mapSize(0), mapFlags(0)
, mapFrame(0), mapFlushed(false), mappedPtr(NULL), refData(NULL)
{ }
VkDevice device;
VkDeviceSize mapOffset, mapSize;
VkMemoryMapFlags mapFlags;
uint32_t mapFrame;
bool mapFlushed;
void *mappedPtr;
byte *refData;
};
// capture-side data
// holds the current list of mapped memory. Locked against concurrent use
+84
View File
@@ -81,3 +81,87 @@ uint32_t WrappedVulkan::PhysicalDeviceData::GetMemoryIndex(uint32_t resourceRequ
}
return best;
}
void WrappedVulkan::RemapMemoryIndices(VkPhysicalDeviceMemoryProperties *memProps, uint32_t **memIdxMap)
{
uint32_t *memmap = new uint32_t[32];
*memIdxMap = memmap;
m_MemIdxMaps.push_back(memmap);
RDCEraseMem(memmap, sizeof(uint32_t)*32);
// basic idea here:
// We want to discourage coherent memory maps as much as possible while capturing,
// as they're painful to track. Unfortunately the spec guarantees that at least
// one such memory type will be available, and we must follow that.
//
// So, rather than removing the coherent memory type we make it as unappealing as
// possible and try and ensure that only someone looking specifically for a coherent
// memory type will find it. That way hopefully memory selection algorithms will
// pick non-coherent memory and do proper flushing as necessary.
// we want to add a new heap, hopefully there is room
RDCASSERT(memProps->memoryHeapCount < VK_MAX_MEMORY_HEAPS-1);
uint32_t coherentHeap = memProps->memoryHeapCount;
memProps->memoryHeapCount++;
// make a new heap that's tiny. If any applications look at heap sizes to determine
// viability, they'll dislike the look of this one (the real heaps should be much
// bigger).
memProps->memoryHeaps[coherentHeap].flags = VK_MEMORY_HEAP_HOST_LOCAL_BIT;
memProps->memoryHeaps[coherentHeap].size = 32*1024*1024;
// for every coherent memory type, add a non-coherent type first, then
// mark the coherent type with our crappy heap
uint32_t origCount = memProps->memoryTypeCount;
VkMemoryType origTypes[VK_MAX_MEMORY_TYPES];
memcpy(origTypes, memProps->memoryTypes, sizeof(origTypes));
uint32_t newtypeidx = 0;
for(uint32_t i=0; i < origCount; i++)
{
if((origTypes[i].propertyFlags & (VK_MEMORY_PROPERTY_HOST_NON_COHERENT_BIT|VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) == VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
{
// coherent type found.
// can we still add a new type without exceeding the max?
if(memProps->memoryTypeCount+1 <= VK_MAX_MEMORY_TYPES)
{
// copy both types from the original type
memProps->memoryTypes[newtypeidx] = origTypes[i];
memProps->memoryTypes[newtypeidx+1] = origTypes[i];
// mark first as non-coherent
memProps->memoryTypes[newtypeidx].propertyFlags &= ~VK_MEMORY_PROPERTY_HOST_UNCACHED_BIT;
memProps->memoryTypes[newtypeidx].propertyFlags |= VK_MEMORY_PROPERTY_HOST_NON_COHERENT_BIT;
// point second at bad heap
memProps->memoryTypes[newtypeidx+1].heapIndex = coherentHeap;
// point both new types at this original type
memmap[newtypeidx++] = i;
memmap[newtypeidx++] = i;
// we added a type
memProps->memoryTypeCount++;
}
else
{
// can't add a new type, but we can at least repoint this coherent
// type at the bad heap to discourage use
memProps->memoryTypes[newtypeidx] = origTypes[i];
memProps->memoryTypes[newtypeidx].heapIndex = coherentHeap;
memmap[newtypeidx++] = i;
}
}
else
{
// non-coherent already or non-hostvisible, just copy through
memProps->memoryTypes[newtypeidx] = origTypes[i];
memmap[newtypeidx++] = i;
}
}
}
+9
View File
@@ -54,6 +54,13 @@ WRAPPED_POOL_INST(WrappedVkCmdPool)
WRAPPED_POOL_INST(WrappedVkSwapchainKHR)
byte VkResourceRecord::markerValue[32] = {
0xaa, 0xbb, 0xcc, 0xdd,
0x88, 0x77, 0x66, 0x55,
0x01, 0x23, 0x45, 0x67,
0x98, 0x76, 0x54, 0x32,
};
bool IsDispatchableRes(WrappedVkRes *ptr)
{
return (WrappedVkPhysicalDevice::IsAlloc(ptr) || WrappedVkInstance::IsAlloc(ptr)
@@ -485,6 +492,8 @@ VkResourceRecord::~VkResourceRecord()
delete[] descBindings[i];
descBindings.clear();
SAFE_DELETE(memProps);
SAFE_DELETE(layout);
SAFE_DELETE(swapInfo);
SAFE_DELETE(cmdInfo);
+24
View File
@@ -592,6 +592,20 @@ struct CmdBufferRecordingInfo
set<VkDescriptorSet> boundDescSets;
};
struct MapState
{
MapState()
: mapOffset(0), mapSize(0), mapFlags(0)
, mapFrame(0), mapFlushed(false), mappedPtr(NULL), refData(NULL)
{ }
VkDeviceSize mapOffset, mapSize;
VkMemoryMapFlags mapFlags;
uint32_t mapFrame;
bool mapFlushed;
void *mappedPtr;
byte *refData;
};
struct DescSetLayout;
struct VkResourceRecord : public ResourceRecord
@@ -599,12 +613,15 @@ struct VkResourceRecord : public ResourceRecord
public:
enum { NullResource = (unsigned int)NULL };
static byte markerValue[32];
VkResourceRecord(ResourceId id) :
ResourceRecord(id, true),
bakedCommands(NULL),
pool(NULL),
mem(VK_NULL_HANDLE),
memOffset(0),
memProps(NULL),
layout(NULL),
swapInfo(NULL),
cmdInfo(NULL)
@@ -656,6 +673,13 @@ struct VkResourceRecord : public ResourceRecord
VkDeviceMemory mem;
VkDeviceSize memOffset;
VkPhysicalDeviceMemoryProperties *memProps;
// externally allocated/freed, a mapping from memory idx
// in our modified properties that were passed to the app
// to the memory indices that actually exist
uint32_t *memIdxMap;
// this points to the base resource, either memory or an image -
// ie. the resource that can be modified or changes (or can become dirty)
@@ -214,6 +214,12 @@ bool WrappedVulkan::Serialise_vkEnumeratePhysicalDevices(
SERIALISE_ELEMENT(uint32_t, physIndex, *pPhysicalDeviceCount);
SERIALISE_ELEMENT(ResourceId, physId, GetResID(*pPhysicalDevices));
uint32_t memIdxMap[32] = {0};
if(m_State >= WRITING)
memcpy(memIdxMap, GetRecord(*pPhysicalDevices)->memIdxMap, sizeof(memIdxMap));
localSerialiser->SerialisePODArray<32>("memIdxMap", memIdxMap);
VkPhysicalDevice pd = VK_NULL_HANDLE;
if(m_State >= WRITING)
@@ -243,6 +249,18 @@ bool WrappedVulkan::Serialise_vkEnumeratePhysicalDevices(
GetResourceManager()->WrapResource(instance, pd);
GetResourceManager()->AddLiveResource(physId, pd);
if(physIndex >= m_PhysicalDevices.size())
{
m_PhysicalDevices.resize(physIndex+1);
m_MemIdxMaps.resize(physIndex+1);
}
m_PhysicalDevices[physIndex] = pd;
uint32_t *storedMap = new uint32_t[32];
memcpy(storedMap, memIdxMap, sizeof(memIdxMap));
m_MemIdxMaps[physIndex] = storedMap;
}
return true;
@@ -277,18 +295,29 @@ VkResult WrappedVulkan::vkEnumeratePhysicalDevices(
else
{
GetResourceManager()->WrapResource(instance, devices[i]);
if(m_State >= WRITING)
{
CACHE_THREAD_SERIALISER();
SCOPED_SERIALISE_CONTEXT(ENUM_PHYSICALS);
Serialise_vkEnumeratePhysicalDevices(localSerialiser, instance, &i, &devices[i]);
// add the record first since it's used in the serialise function below to fetch
// the memory indices
VkResourceRecord *record = GetResourceManager()->AddResourceRecord(devices[i]);
RDCASSERT(record);
record->memProps = new VkPhysicalDeviceMemoryProperties();
record->AddChunk(scope.Get());
ObjDisp(devices[i])->GetPhysicalDeviceMemoryProperties(Unwrap(devices[i]), record->memProps);
// we remap memory indices to discourage coherent maps as much as possible
RemapMemoryIndices(record->memProps, &record->memIdxMap);
{
CACHE_THREAD_SERIALISER();
SCOPED_SERIALISE_CONTEXT(ENUM_PHYSICALS);
Serialise_vkEnumeratePhysicalDevices(localSerialiser, instance, &i, &devices[i]);
record->AddChunk(scope.Get());
}
VkResourceRecord *instrecord = GetRecord(instance);
@@ -449,6 +478,15 @@ bool WrappedVulkan::Serialise_vkCreateDevice(
m_PhysicalDeviceData.uploadMemIndex = m_PhysicalDeviceData.GetMemoryIndex(~0U, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0);
m_PhysicalDeviceData.GPULocalMemIndex = m_PhysicalDeviceData.GetMemoryIndex(~0U, VK_MEMORY_PROPERTY_DEVICE_ONLY, 0);
for(size_t i=0; i < m_PhysicalDevices.size(); i++)
{
if(physicalDevice == m_PhysicalDevices[i])
{
m_PhysicalDeviceData.memIdxMap = m_MemIdxMaps[i];
break;
}
}
m_DebugManager = new VulkanDebugManager(this, device);
SAFE_DELETE_ARRAY(modQueues);
@@ -551,7 +589,14 @@ VkResult WrappedVulkan::vkCreateDevice(
chunk = scope.Get();
}
GetRecord(m_Instance)->AddChunk(chunk);
VkResourceRecord *record = GetResourceManager()->AddResourceRecord(*pDevice);
RDCASSERT(record);
record->AddChunk(chunk);
record->memIdxMap = GetRecord(physicalDevice)->memIdxMap;
GetRecord(m_Instance)->AddParent(record);
}
else
{
@@ -89,6 +89,12 @@ VkResult WrappedVulkan::vkGetPhysicalDeviceMemoryProperties(
VkPhysicalDevice physicalDevice,
VkPhysicalDeviceMemoryProperties* pMemoryProperties)
{
if(pMemoryProperties)
{
*pMemoryProperties = *GetRecord(physicalDevice)->memProps;
return VK_SUCCESS;
}
return ObjDisp(physicalDevice)->GetPhysicalDeviceMemoryProperties(Unwrap(physicalDevice), pMemoryProperties);
}
@@ -106,7 +112,24 @@ VkResult WrappedVulkan::vkGetBufferMemoryRequirements(
VkBuffer buffer,
VkMemoryRequirements* pMemoryRequirements)
{
return ObjDisp(device)->GetBufferMemoryRequirements(Unwrap(device), Unwrap(buffer), pMemoryRequirements);
VkResult vkr = ObjDisp(device)->GetBufferMemoryRequirements(Unwrap(device), Unwrap(buffer), pMemoryRequirements);
// don't do remapping here on replay.
if(m_State < WRITING)
return vkr;
uint32_t bits = pMemoryRequirements->memoryTypeBits;
uint32_t *memIdxMap = GetRecord(device)->memIdxMap;
pMemoryRequirements->memoryTypeBits = 0;
// for each of our fake memory indices, check if the real
// memory type it points to is set - if so, set our fake bit
for(uint32_t i=0; i < VK_MAX_MEMORY_TYPES; i++)
if(bits & (1<<memIdxMap[i]) )
pMemoryRequirements->memoryTypeBits |= (1<<i);
return vkr;
}
VkResult WrappedVulkan::vkGetImageMemoryRequirements(
@@ -114,7 +137,24 @@ VkResult WrappedVulkan::vkGetImageMemoryRequirements(
VkImage image,
VkMemoryRequirements* pMemoryRequirements)
{
return ObjDisp(device)->GetImageMemoryRequirements(Unwrap(device), Unwrap(image), pMemoryRequirements);
VkResult vkr = ObjDisp(device)->GetImageMemoryRequirements(Unwrap(device), Unwrap(image), pMemoryRequirements);
// don't do remapping here on replay.
if(m_State < WRITING)
return vkr;
uint32_t bits = pMemoryRequirements->memoryTypeBits;
uint32_t *memIdxMap = GetRecord(device)->memIdxMap;
pMemoryRequirements->memoryTypeBits = 0;
// for each of our fake memory indices, check if the real
// memory type it points to is set - if so, set our fake bit
for(uint32_t i=0; i < VK_MAX_MEMORY_TYPES; i++)
if(bits & (1<<memIdxMap[i]) )
pMemoryRequirements->memoryTypeBits |= (1<<i);
return vkr;
}
VkResult WrappedVulkan::vkGetImageSparseMemoryRequirements(
@@ -451,10 +451,14 @@ VkResult WrappedVulkan::vkQueueSubmit(
if(found)
{
// VKTODOLOW won't work with multiple devices - maybe find device for the specified queue?
// we probably only want to flush maps associated with this queue anyway
VkDevice dev = GetDev();
{
RDCLOG("Persistent map flush forced for %llu (%llu -> %llu) [mapped in %u, flushed %u]", it->first, (uint64_t)diffStart, (uint64_t)diffEnd, it->second.mapFrame, it->second.mapFlushed);
VkMappedMemoryRange range = { VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, NULL, GetResourceManager()->GetCurrentHandle<VkDeviceMemory>(it->first), it->second.mapOffset+diffStart, diffEnd-diffStart };
vkFlushMappedMemoryRanges(it->second.device, 1, &range);
vkFlushMappedMemoryRanges(dev, 1, &range);
}
GetResourceManager()->MarkPendingDirty(it->first);
@@ -42,8 +42,11 @@ bool WrappedVulkan::Serialise_vkAllocMemory(
device = GetResourceManager()->GetLiveHandle<VkDevice>(devId);
// serialised memory type index is non-remapped, so we remap now.
// VKTODOLOW may need to re-write info to change memory type index to the
// appropriate index on replay
info.memoryTypeIndex = m_PhysicalDeviceData.memIdxMap[info.memoryTypeIndex];
VkResult ret = ObjDisp(device)->AllocMemory(Unwrap(device), &info, &mem);
if(ret != VK_SUCCESS)
@@ -67,7 +70,9 @@ VkResult WrappedVulkan::vkAllocMemory(
const VkMemoryAllocInfo* pAllocInfo,
VkDeviceMemory* pMem)
{
VkResult ret = ObjDisp(device)->AllocMemory(Unwrap(device), pAllocInfo, pMem);
VkMemoryAllocInfo info = *pAllocInfo;
info.memoryTypeIndex = GetRecord(device)->memIdxMap[info.memoryTypeIndex];
VkResult ret = ObjDisp(device)->AllocMemory(Unwrap(device), &info, pMem);
if(ret == VK_SUCCESS)
{
@@ -138,7 +143,6 @@ VkResult WrappedVulkan::vkMapMemory(
if(m_State >= WRITING)
{
MapState state;
state.device = device;
state.mappedPtr = *ppData;
state.mapOffset = offset;
state.mapSize = size == 0 ? GetRecord(mem)->Length : size;