mirror of
https://github.com/baldurk/renderdoc.git
synced 2026-05-06 01:50:38 +00:00
Modify memory properties to discourage coherent maps & remap memory idxs
This commit is contained in:
@@ -345,6 +345,9 @@ WrappedVulkan::~WrappedVulkan()
|
||||
|
||||
SAFE_DELETE(m_pSerialiser);
|
||||
|
||||
for(size_t i=0; i < m_MemIdxMaps.size(); i++)
|
||||
delete[] m_MemIdxMaps[i];
|
||||
|
||||
for(size_t i=0; i < m_ThreadSerialisers.size(); i++)
|
||||
delete m_ThreadSerialisers[i];
|
||||
|
||||
|
||||
@@ -173,6 +173,8 @@ private:
|
||||
uint32_t uploadMemIndex;
|
||||
uint32_t GPULocalMemIndex;
|
||||
|
||||
uint32_t *memIdxMap;
|
||||
|
||||
VkPhysicalDeviceFeatures features;
|
||||
VkPhysicalDeviceProperties props;
|
||||
VkPhysicalDeviceMemoryProperties memProps;
|
||||
@@ -185,6 +187,11 @@ private:
|
||||
uint32_t m_QueueFamilyIdx; // the family index that we've selected in CreateDevice for our queue
|
||||
VkQueue m_Queue; // the queue used for our own command buffer work
|
||||
|
||||
vector<VkPhysicalDevice> m_PhysicalDevices;
|
||||
|
||||
vector<uint32_t *> m_MemIdxMaps;
|
||||
void RemapMemoryIndices(VkPhysicalDeviceMemoryProperties *memProps, uint32_t **memIdxMap);
|
||||
|
||||
struct
|
||||
{
|
||||
void Reset()
|
||||
@@ -379,21 +386,6 @@ private:
|
||||
vector<VkDescriptorInfo *> currentBindings;
|
||||
};
|
||||
|
||||
struct MapState
|
||||
{
|
||||
MapState()
|
||||
: device(VK_NULL_HANDLE), mapOffset(0), mapSize(0), mapFlags(0)
|
||||
, mapFrame(0), mapFlushed(false), mappedPtr(NULL), refData(NULL)
|
||||
{ }
|
||||
VkDevice device;
|
||||
VkDeviceSize mapOffset, mapSize;
|
||||
VkMemoryMapFlags mapFlags;
|
||||
uint32_t mapFrame;
|
||||
bool mapFlushed;
|
||||
void *mappedPtr;
|
||||
byte *refData;
|
||||
};
|
||||
|
||||
// capture-side data
|
||||
|
||||
// holds the current list of mapped memory. Locked against concurrent use
|
||||
|
||||
@@ -81,3 +81,87 @@ uint32_t WrappedVulkan::PhysicalDeviceData::GetMemoryIndex(uint32_t resourceRequ
|
||||
}
|
||||
return best;
|
||||
}
|
||||
|
||||
void WrappedVulkan::RemapMemoryIndices(VkPhysicalDeviceMemoryProperties *memProps, uint32_t **memIdxMap)
|
||||
{
|
||||
uint32_t *memmap = new uint32_t[32];
|
||||
*memIdxMap = memmap;
|
||||
m_MemIdxMaps.push_back(memmap);
|
||||
|
||||
RDCEraseMem(memmap, sizeof(uint32_t)*32);
|
||||
|
||||
// basic idea here:
|
||||
// We want to discourage coherent memory maps as much as possible while capturing,
|
||||
// as they're painful to track. Unfortunately the spec guarantees that at least
|
||||
// one such memory type will be available, and we must follow that.
|
||||
//
|
||||
// So, rather than removing the coherent memory type we make it as unappealing as
|
||||
// possible and try and ensure that only someone looking specifically for a coherent
|
||||
// memory type will find it. That way hopefully memory selection algorithms will
|
||||
// pick non-coherent memory and do proper flushing as necessary.
|
||||
|
||||
// we want to add a new heap, hopefully there is room
|
||||
RDCASSERT(memProps->memoryHeapCount < VK_MAX_MEMORY_HEAPS-1);
|
||||
|
||||
uint32_t coherentHeap = memProps->memoryHeapCount;
|
||||
memProps->memoryHeapCount++;
|
||||
|
||||
// make a new heap that's tiny. If any applications look at heap sizes to determine
|
||||
// viability, they'll dislike the look of this one (the real heaps should be much
|
||||
// bigger).
|
||||
memProps->memoryHeaps[coherentHeap].flags = VK_MEMORY_HEAP_HOST_LOCAL_BIT;
|
||||
memProps->memoryHeaps[coherentHeap].size = 32*1024*1024;
|
||||
|
||||
// for every coherent memory type, add a non-coherent type first, then
|
||||
// mark the coherent type with our crappy heap
|
||||
|
||||
uint32_t origCount = memProps->memoryTypeCount;
|
||||
VkMemoryType origTypes[VK_MAX_MEMORY_TYPES];
|
||||
memcpy(origTypes, memProps->memoryTypes, sizeof(origTypes));
|
||||
|
||||
uint32_t newtypeidx = 0;
|
||||
|
||||
for(uint32_t i=0; i < origCount; i++)
|
||||
{
|
||||
if((origTypes[i].propertyFlags & (VK_MEMORY_PROPERTY_HOST_NON_COHERENT_BIT|VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) == VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
|
||||
{
|
||||
// coherent type found.
|
||||
|
||||
// can we still add a new type without exceeding the max?
|
||||
if(memProps->memoryTypeCount+1 <= VK_MAX_MEMORY_TYPES)
|
||||
{
|
||||
// copy both types from the original type
|
||||
memProps->memoryTypes[newtypeidx] = origTypes[i];
|
||||
memProps->memoryTypes[newtypeidx+1] = origTypes[i];
|
||||
|
||||
// mark first as non-coherent
|
||||
memProps->memoryTypes[newtypeidx].propertyFlags &= ~VK_MEMORY_PROPERTY_HOST_UNCACHED_BIT;
|
||||
memProps->memoryTypes[newtypeidx].propertyFlags |= VK_MEMORY_PROPERTY_HOST_NON_COHERENT_BIT;
|
||||
|
||||
// point second at bad heap
|
||||
memProps->memoryTypes[newtypeidx+1].heapIndex = coherentHeap;
|
||||
|
||||
// point both new types at this original type
|
||||
memmap[newtypeidx++] = i;
|
||||
memmap[newtypeidx++] = i;
|
||||
|
||||
// we added a type
|
||||
memProps->memoryTypeCount++;
|
||||
}
|
||||
else
|
||||
{
|
||||
// can't add a new type, but we can at least repoint this coherent
|
||||
// type at the bad heap to discourage use
|
||||
memProps->memoryTypes[newtypeidx] = origTypes[i];
|
||||
memProps->memoryTypes[newtypeidx].heapIndex = coherentHeap;
|
||||
memmap[newtypeidx++] = i;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// non-coherent already or non-hostvisible, just copy through
|
||||
memProps->memoryTypes[newtypeidx] = origTypes[i];
|
||||
memmap[newtypeidx++] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -54,6 +54,13 @@ WRAPPED_POOL_INST(WrappedVkCmdPool)
|
||||
|
||||
WRAPPED_POOL_INST(WrappedVkSwapchainKHR)
|
||||
|
||||
byte VkResourceRecord::markerValue[32] = {
|
||||
0xaa, 0xbb, 0xcc, 0xdd,
|
||||
0x88, 0x77, 0x66, 0x55,
|
||||
0x01, 0x23, 0x45, 0x67,
|
||||
0x98, 0x76, 0x54, 0x32,
|
||||
};
|
||||
|
||||
bool IsDispatchableRes(WrappedVkRes *ptr)
|
||||
{
|
||||
return (WrappedVkPhysicalDevice::IsAlloc(ptr) || WrappedVkInstance::IsAlloc(ptr)
|
||||
@@ -485,6 +492,8 @@ VkResourceRecord::~VkResourceRecord()
|
||||
delete[] descBindings[i];
|
||||
descBindings.clear();
|
||||
|
||||
SAFE_DELETE(memProps);
|
||||
|
||||
SAFE_DELETE(layout);
|
||||
SAFE_DELETE(swapInfo);
|
||||
SAFE_DELETE(cmdInfo);
|
||||
|
||||
@@ -592,6 +592,20 @@ struct CmdBufferRecordingInfo
|
||||
set<VkDescriptorSet> boundDescSets;
|
||||
};
|
||||
|
||||
struct MapState
|
||||
{
|
||||
MapState()
|
||||
: mapOffset(0), mapSize(0), mapFlags(0)
|
||||
, mapFrame(0), mapFlushed(false), mappedPtr(NULL), refData(NULL)
|
||||
{ }
|
||||
VkDeviceSize mapOffset, mapSize;
|
||||
VkMemoryMapFlags mapFlags;
|
||||
uint32_t mapFrame;
|
||||
bool mapFlushed;
|
||||
void *mappedPtr;
|
||||
byte *refData;
|
||||
};
|
||||
|
||||
struct DescSetLayout;
|
||||
|
||||
struct VkResourceRecord : public ResourceRecord
|
||||
@@ -599,12 +613,15 @@ struct VkResourceRecord : public ResourceRecord
|
||||
public:
|
||||
enum { NullResource = (unsigned int)NULL };
|
||||
|
||||
static byte markerValue[32];
|
||||
|
||||
VkResourceRecord(ResourceId id) :
|
||||
ResourceRecord(id, true),
|
||||
bakedCommands(NULL),
|
||||
pool(NULL),
|
||||
mem(VK_NULL_HANDLE),
|
||||
memOffset(0),
|
||||
memProps(NULL),
|
||||
layout(NULL),
|
||||
swapInfo(NULL),
|
||||
cmdInfo(NULL)
|
||||
@@ -656,6 +673,13 @@ struct VkResourceRecord : public ResourceRecord
|
||||
|
||||
VkDeviceMemory mem;
|
||||
VkDeviceSize memOffset;
|
||||
|
||||
VkPhysicalDeviceMemoryProperties *memProps;
|
||||
|
||||
// externally allocated/freed, a mapping from memory idx
|
||||
// in our modified properties that were passed to the app
|
||||
// to the memory indices that actually exist
|
||||
uint32_t *memIdxMap;
|
||||
|
||||
// this points to the base resource, either memory or an image -
|
||||
// ie. the resource that can be modified or changes (or can become dirty)
|
||||
|
||||
@@ -214,6 +214,12 @@ bool WrappedVulkan::Serialise_vkEnumeratePhysicalDevices(
|
||||
SERIALISE_ELEMENT(uint32_t, physIndex, *pPhysicalDeviceCount);
|
||||
SERIALISE_ELEMENT(ResourceId, physId, GetResID(*pPhysicalDevices));
|
||||
|
||||
uint32_t memIdxMap[32] = {0};
|
||||
if(m_State >= WRITING)
|
||||
memcpy(memIdxMap, GetRecord(*pPhysicalDevices)->memIdxMap, sizeof(memIdxMap));
|
||||
|
||||
localSerialiser->SerialisePODArray<32>("memIdxMap", memIdxMap);
|
||||
|
||||
VkPhysicalDevice pd = VK_NULL_HANDLE;
|
||||
|
||||
if(m_State >= WRITING)
|
||||
@@ -243,6 +249,18 @@ bool WrappedVulkan::Serialise_vkEnumeratePhysicalDevices(
|
||||
|
||||
GetResourceManager()->WrapResource(instance, pd);
|
||||
GetResourceManager()->AddLiveResource(physId, pd);
|
||||
|
||||
if(physIndex >= m_PhysicalDevices.size())
|
||||
{
|
||||
m_PhysicalDevices.resize(physIndex+1);
|
||||
m_MemIdxMaps.resize(physIndex+1);
|
||||
}
|
||||
|
||||
m_PhysicalDevices[physIndex] = pd;
|
||||
|
||||
uint32_t *storedMap = new uint32_t[32];
|
||||
memcpy(storedMap, memIdxMap, sizeof(memIdxMap));
|
||||
m_MemIdxMaps[physIndex] = storedMap;
|
||||
}
|
||||
|
||||
return true;
|
||||
@@ -277,18 +295,29 @@ VkResult WrappedVulkan::vkEnumeratePhysicalDevices(
|
||||
else
|
||||
{
|
||||
GetResourceManager()->WrapResource(instance, devices[i]);
|
||||
|
||||
|
||||
if(m_State >= WRITING)
|
||||
{
|
||||
CACHE_THREAD_SERIALISER();
|
||||
|
||||
SCOPED_SERIALISE_CONTEXT(ENUM_PHYSICALS);
|
||||
Serialise_vkEnumeratePhysicalDevices(localSerialiser, instance, &i, &devices[i]);
|
||||
|
||||
// add the record first since it's used in the serialise function below to fetch
|
||||
// the memory indices
|
||||
VkResourceRecord *record = GetResourceManager()->AddResourceRecord(devices[i]);
|
||||
RDCASSERT(record);
|
||||
|
||||
record->memProps = new VkPhysicalDeviceMemoryProperties();
|
||||
|
||||
record->AddChunk(scope.Get());
|
||||
ObjDisp(devices[i])->GetPhysicalDeviceMemoryProperties(Unwrap(devices[i]), record->memProps);
|
||||
|
||||
// we remap memory indices to discourage coherent maps as much as possible
|
||||
RemapMemoryIndices(record->memProps, &record->memIdxMap);
|
||||
|
||||
{
|
||||
CACHE_THREAD_SERIALISER();
|
||||
|
||||
SCOPED_SERIALISE_CONTEXT(ENUM_PHYSICALS);
|
||||
Serialise_vkEnumeratePhysicalDevices(localSerialiser, instance, &i, &devices[i]);
|
||||
|
||||
record->AddChunk(scope.Get());
|
||||
}
|
||||
|
||||
VkResourceRecord *instrecord = GetRecord(instance);
|
||||
|
||||
@@ -449,6 +478,15 @@ bool WrappedVulkan::Serialise_vkCreateDevice(
|
||||
m_PhysicalDeviceData.uploadMemIndex = m_PhysicalDeviceData.GetMemoryIndex(~0U, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0);
|
||||
m_PhysicalDeviceData.GPULocalMemIndex = m_PhysicalDeviceData.GetMemoryIndex(~0U, VK_MEMORY_PROPERTY_DEVICE_ONLY, 0);
|
||||
|
||||
for(size_t i=0; i < m_PhysicalDevices.size(); i++)
|
||||
{
|
||||
if(physicalDevice == m_PhysicalDevices[i])
|
||||
{
|
||||
m_PhysicalDeviceData.memIdxMap = m_MemIdxMaps[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
m_DebugManager = new VulkanDebugManager(this, device);
|
||||
|
||||
SAFE_DELETE_ARRAY(modQueues);
|
||||
@@ -551,7 +589,14 @@ VkResult WrappedVulkan::vkCreateDevice(
|
||||
chunk = scope.Get();
|
||||
}
|
||||
|
||||
GetRecord(m_Instance)->AddChunk(chunk);
|
||||
VkResourceRecord *record = GetResourceManager()->AddResourceRecord(*pDevice);
|
||||
RDCASSERT(record);
|
||||
|
||||
record->AddChunk(chunk);
|
||||
|
||||
record->memIdxMap = GetRecord(physicalDevice)->memIdxMap;
|
||||
|
||||
GetRecord(m_Instance)->AddParent(record);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
@@ -89,6 +89,12 @@ VkResult WrappedVulkan::vkGetPhysicalDeviceMemoryProperties(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
VkPhysicalDeviceMemoryProperties* pMemoryProperties)
|
||||
{
|
||||
if(pMemoryProperties)
|
||||
{
|
||||
*pMemoryProperties = *GetRecord(physicalDevice)->memProps;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
return ObjDisp(physicalDevice)->GetPhysicalDeviceMemoryProperties(Unwrap(physicalDevice), pMemoryProperties);
|
||||
}
|
||||
|
||||
@@ -106,7 +112,24 @@ VkResult WrappedVulkan::vkGetBufferMemoryRequirements(
|
||||
VkBuffer buffer,
|
||||
VkMemoryRequirements* pMemoryRequirements)
|
||||
{
|
||||
return ObjDisp(device)->GetBufferMemoryRequirements(Unwrap(device), Unwrap(buffer), pMemoryRequirements);
|
||||
VkResult vkr = ObjDisp(device)->GetBufferMemoryRequirements(Unwrap(device), Unwrap(buffer), pMemoryRequirements);
|
||||
|
||||
// don't do remapping here on replay.
|
||||
if(m_State < WRITING)
|
||||
return vkr;
|
||||
|
||||
uint32_t bits = pMemoryRequirements->memoryTypeBits;
|
||||
uint32_t *memIdxMap = GetRecord(device)->memIdxMap;
|
||||
|
||||
pMemoryRequirements->memoryTypeBits = 0;
|
||||
|
||||
// for each of our fake memory indices, check if the real
|
||||
// memory type it points to is set - if so, set our fake bit
|
||||
for(uint32_t i=0; i < VK_MAX_MEMORY_TYPES; i++)
|
||||
if(bits & (1<<memIdxMap[i]) )
|
||||
pMemoryRequirements->memoryTypeBits |= (1<<i);
|
||||
|
||||
return vkr;
|
||||
}
|
||||
|
||||
VkResult WrappedVulkan::vkGetImageMemoryRequirements(
|
||||
@@ -114,7 +137,24 @@ VkResult WrappedVulkan::vkGetImageMemoryRequirements(
|
||||
VkImage image,
|
||||
VkMemoryRequirements* pMemoryRequirements)
|
||||
{
|
||||
return ObjDisp(device)->GetImageMemoryRequirements(Unwrap(device), Unwrap(image), pMemoryRequirements);
|
||||
VkResult vkr = ObjDisp(device)->GetImageMemoryRequirements(Unwrap(device), Unwrap(image), pMemoryRequirements);
|
||||
|
||||
// don't do remapping here on replay.
|
||||
if(m_State < WRITING)
|
||||
return vkr;
|
||||
|
||||
uint32_t bits = pMemoryRequirements->memoryTypeBits;
|
||||
uint32_t *memIdxMap = GetRecord(device)->memIdxMap;
|
||||
|
||||
pMemoryRequirements->memoryTypeBits = 0;
|
||||
|
||||
// for each of our fake memory indices, check if the real
|
||||
// memory type it points to is set - if so, set our fake bit
|
||||
for(uint32_t i=0; i < VK_MAX_MEMORY_TYPES; i++)
|
||||
if(bits & (1<<memIdxMap[i]) )
|
||||
pMemoryRequirements->memoryTypeBits |= (1<<i);
|
||||
|
||||
return vkr;
|
||||
}
|
||||
|
||||
VkResult WrappedVulkan::vkGetImageSparseMemoryRequirements(
|
||||
|
||||
@@ -451,10 +451,14 @@ VkResult WrappedVulkan::vkQueueSubmit(
|
||||
|
||||
if(found)
|
||||
{
|
||||
// VKTODOLOW won't work with multiple devices - maybe find device for the specified queue?
|
||||
// we probably only want to flush maps associated with this queue anyway
|
||||
VkDevice dev = GetDev();
|
||||
|
||||
{
|
||||
RDCLOG("Persistent map flush forced for %llu (%llu -> %llu) [mapped in %u, flushed %u]", it->first, (uint64_t)diffStart, (uint64_t)diffEnd, it->second.mapFrame, it->second.mapFlushed);
|
||||
VkMappedMemoryRange range = { VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, NULL, GetResourceManager()->GetCurrentHandle<VkDeviceMemory>(it->first), it->second.mapOffset+diffStart, diffEnd-diffStart };
|
||||
vkFlushMappedMemoryRanges(it->second.device, 1, &range);
|
||||
vkFlushMappedMemoryRanges(dev, 1, &range);
|
||||
}
|
||||
|
||||
GetResourceManager()->MarkPendingDirty(it->first);
|
||||
|
||||
@@ -42,8 +42,11 @@ bool WrappedVulkan::Serialise_vkAllocMemory(
|
||||
|
||||
device = GetResourceManager()->GetLiveHandle<VkDevice>(devId);
|
||||
|
||||
// serialised memory type index is non-remapped, so we remap now.
|
||||
// VKTODOLOW may need to re-write info to change memory type index to the
|
||||
// appropriate index on replay
|
||||
info.memoryTypeIndex = m_PhysicalDeviceData.memIdxMap[info.memoryTypeIndex];
|
||||
|
||||
VkResult ret = ObjDisp(device)->AllocMemory(Unwrap(device), &info, &mem);
|
||||
|
||||
if(ret != VK_SUCCESS)
|
||||
@@ -67,7 +70,9 @@ VkResult WrappedVulkan::vkAllocMemory(
|
||||
const VkMemoryAllocInfo* pAllocInfo,
|
||||
VkDeviceMemory* pMem)
|
||||
{
|
||||
VkResult ret = ObjDisp(device)->AllocMemory(Unwrap(device), pAllocInfo, pMem);
|
||||
VkMemoryAllocInfo info = *pAllocInfo;
|
||||
info.memoryTypeIndex = GetRecord(device)->memIdxMap[info.memoryTypeIndex];
|
||||
VkResult ret = ObjDisp(device)->AllocMemory(Unwrap(device), &info, pMem);
|
||||
|
||||
if(ret == VK_SUCCESS)
|
||||
{
|
||||
@@ -138,7 +143,6 @@ VkResult WrappedVulkan::vkMapMemory(
|
||||
if(m_State >= WRITING)
|
||||
{
|
||||
MapState state;
|
||||
state.device = device;
|
||||
state.mappedPtr = *ppData;
|
||||
state.mapOffset = offset;
|
||||
state.mapSize = size == 0 ? GetRecord(mem)->Length : size;
|
||||
|
||||
Reference in New Issue
Block a user