diff --git a/renderdoc/driver/vulkan/vk_core.cpp b/renderdoc/driver/vulkan/vk_core.cpp index 6c082ebd6..38d0fd382 100644 --- a/renderdoc/driver/vulkan/vk_core.cpp +++ b/renderdoc/driver/vulkan/vk_core.cpp @@ -345,6 +345,9 @@ WrappedVulkan::~WrappedVulkan() SAFE_DELETE(m_pSerialiser); + for(size_t i=0; i < m_MemIdxMaps.size(); i++) + delete[] m_MemIdxMaps[i]; + for(size_t i=0; i < m_ThreadSerialisers.size(); i++) delete m_ThreadSerialisers[i]; diff --git a/renderdoc/driver/vulkan/vk_core.h b/renderdoc/driver/vulkan/vk_core.h index 436580c5f..d3f485f8e 100644 --- a/renderdoc/driver/vulkan/vk_core.h +++ b/renderdoc/driver/vulkan/vk_core.h @@ -173,6 +173,8 @@ private: uint32_t uploadMemIndex; uint32_t GPULocalMemIndex; + uint32_t *memIdxMap; + VkPhysicalDeviceFeatures features; VkPhysicalDeviceProperties props; VkPhysicalDeviceMemoryProperties memProps; @@ -185,6 +187,11 @@ private: uint32_t m_QueueFamilyIdx; // the family index that we've selected in CreateDevice for our queue VkQueue m_Queue; // the queue used for our own command buffer work + vector m_PhysicalDevices; + + vector m_MemIdxMaps; + void RemapMemoryIndices(VkPhysicalDeviceMemoryProperties *memProps, uint32_t **memIdxMap); + struct { void Reset() @@ -379,21 +386,6 @@ private: vector currentBindings; }; - struct MapState - { - MapState() - : device(VK_NULL_HANDLE), mapOffset(0), mapSize(0), mapFlags(0) - , mapFrame(0), mapFlushed(false), mappedPtr(NULL), refData(NULL) - { } - VkDevice device; - VkDeviceSize mapOffset, mapSize; - VkMemoryMapFlags mapFlags; - uint32_t mapFrame; - bool mapFlushed; - void *mappedPtr; - byte *refData; - }; - // capture-side data // holds the current list of mapped memory. Locked against concurrent use diff --git a/renderdoc/driver/vulkan/vk_memory.cpp b/renderdoc/driver/vulkan/vk_memory.cpp index 350745579..ed36f545b 100644 --- a/renderdoc/driver/vulkan/vk_memory.cpp +++ b/renderdoc/driver/vulkan/vk_memory.cpp @@ -81,3 +81,87 @@ uint32_t WrappedVulkan::PhysicalDeviceData::GetMemoryIndex(uint32_t resourceRequ } return best; } + +void WrappedVulkan::RemapMemoryIndices(VkPhysicalDeviceMemoryProperties *memProps, uint32_t **memIdxMap) +{ + uint32_t *memmap = new uint32_t[32]; + *memIdxMap = memmap; + m_MemIdxMaps.push_back(memmap); + + RDCEraseMem(memmap, sizeof(uint32_t)*32); + + // basic idea here: + // We want to discourage coherent memory maps as much as possible while capturing, + // as they're painful to track. Unfortunately the spec guarantees that at least + // one such memory type will be available, and we must follow that. + // + // So, rather than removing the coherent memory type we make it as unappealing as + // possible and try and ensure that only someone looking specifically for a coherent + // memory type will find it. That way hopefully memory selection algorithms will + // pick non-coherent memory and do proper flushing as necessary. + + // we want to add a new heap, hopefully there is room + RDCASSERT(memProps->memoryHeapCount < VK_MAX_MEMORY_HEAPS-1); + + uint32_t coherentHeap = memProps->memoryHeapCount; + memProps->memoryHeapCount++; + + // make a new heap that's tiny. If any applications look at heap sizes to determine + // viability, they'll dislike the look of this one (the real heaps should be much + // bigger). + memProps->memoryHeaps[coherentHeap].flags = VK_MEMORY_HEAP_HOST_LOCAL_BIT; + memProps->memoryHeaps[coherentHeap].size = 32*1024*1024; + + // for every coherent memory type, add a non-coherent type first, then + // mark the coherent type with our crappy heap + + uint32_t origCount = memProps->memoryTypeCount; + VkMemoryType origTypes[VK_MAX_MEMORY_TYPES]; + memcpy(origTypes, memProps->memoryTypes, sizeof(origTypes)); + + uint32_t newtypeidx = 0; + + for(uint32_t i=0; i < origCount; i++) + { + if((origTypes[i].propertyFlags & (VK_MEMORY_PROPERTY_HOST_NON_COHERENT_BIT|VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) == VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + { + // coherent type found. + + // can we still add a new type without exceeding the max? + if(memProps->memoryTypeCount+1 <= VK_MAX_MEMORY_TYPES) + { + // copy both types from the original type + memProps->memoryTypes[newtypeidx] = origTypes[i]; + memProps->memoryTypes[newtypeidx+1] = origTypes[i]; + + // mark first as non-coherent + memProps->memoryTypes[newtypeidx].propertyFlags &= ~VK_MEMORY_PROPERTY_HOST_UNCACHED_BIT; + memProps->memoryTypes[newtypeidx].propertyFlags |= VK_MEMORY_PROPERTY_HOST_NON_COHERENT_BIT; + + // point second at bad heap + memProps->memoryTypes[newtypeidx+1].heapIndex = coherentHeap; + + // point both new types at this original type + memmap[newtypeidx++] = i; + memmap[newtypeidx++] = i; + + // we added a type + memProps->memoryTypeCount++; + } + else + { + // can't add a new type, but we can at least repoint this coherent + // type at the bad heap to discourage use + memProps->memoryTypes[newtypeidx] = origTypes[i]; + memProps->memoryTypes[newtypeidx].heapIndex = coherentHeap; + memmap[newtypeidx++] = i; + } + } + else + { + // non-coherent already or non-hostvisible, just copy through + memProps->memoryTypes[newtypeidx] = origTypes[i]; + memmap[newtypeidx++] = i; + } + } +} diff --git a/renderdoc/driver/vulkan/vk_resources.cpp b/renderdoc/driver/vulkan/vk_resources.cpp index e9e6a43b9..8b50d12b8 100644 --- a/renderdoc/driver/vulkan/vk_resources.cpp +++ b/renderdoc/driver/vulkan/vk_resources.cpp @@ -54,6 +54,13 @@ WRAPPED_POOL_INST(WrappedVkCmdPool) WRAPPED_POOL_INST(WrappedVkSwapchainKHR) +byte VkResourceRecord::markerValue[32] = { + 0xaa, 0xbb, 0xcc, 0xdd, + 0x88, 0x77, 0x66, 0x55, + 0x01, 0x23, 0x45, 0x67, + 0x98, 0x76, 0x54, 0x32, +}; + bool IsDispatchableRes(WrappedVkRes *ptr) { return (WrappedVkPhysicalDevice::IsAlloc(ptr) || WrappedVkInstance::IsAlloc(ptr) @@ -485,6 +492,8 @@ VkResourceRecord::~VkResourceRecord() delete[] descBindings[i]; descBindings.clear(); + SAFE_DELETE(memProps); + SAFE_DELETE(layout); SAFE_DELETE(swapInfo); SAFE_DELETE(cmdInfo); diff --git a/renderdoc/driver/vulkan/vk_resources.h b/renderdoc/driver/vulkan/vk_resources.h index fb8eb196a..f23dff536 100644 --- a/renderdoc/driver/vulkan/vk_resources.h +++ b/renderdoc/driver/vulkan/vk_resources.h @@ -592,6 +592,20 @@ struct CmdBufferRecordingInfo set boundDescSets; }; +struct MapState +{ + MapState() + : mapOffset(0), mapSize(0), mapFlags(0) + , mapFrame(0), mapFlushed(false), mappedPtr(NULL), refData(NULL) + { } + VkDeviceSize mapOffset, mapSize; + VkMemoryMapFlags mapFlags; + uint32_t mapFrame; + bool mapFlushed; + void *mappedPtr; + byte *refData; +}; + struct DescSetLayout; struct VkResourceRecord : public ResourceRecord @@ -599,12 +613,15 @@ struct VkResourceRecord : public ResourceRecord public: enum { NullResource = (unsigned int)NULL }; + static byte markerValue[32]; + VkResourceRecord(ResourceId id) : ResourceRecord(id, true), bakedCommands(NULL), pool(NULL), mem(VK_NULL_HANDLE), memOffset(0), + memProps(NULL), layout(NULL), swapInfo(NULL), cmdInfo(NULL) @@ -656,6 +673,13 @@ struct VkResourceRecord : public ResourceRecord VkDeviceMemory mem; VkDeviceSize memOffset; + + VkPhysicalDeviceMemoryProperties *memProps; + + // externally allocated/freed, a mapping from memory idx + // in our modified properties that were passed to the app + // to the memory indices that actually exist + uint32_t *memIdxMap; // this points to the base resource, either memory or an image - // ie. the resource that can be modified or changes (or can become dirty) diff --git a/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp index c18d5fd3e..8702610ff 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp @@ -214,6 +214,12 @@ bool WrappedVulkan::Serialise_vkEnumeratePhysicalDevices( SERIALISE_ELEMENT(uint32_t, physIndex, *pPhysicalDeviceCount); SERIALISE_ELEMENT(ResourceId, physId, GetResID(*pPhysicalDevices)); + uint32_t memIdxMap[32] = {0}; + if(m_State >= WRITING) + memcpy(memIdxMap, GetRecord(*pPhysicalDevices)->memIdxMap, sizeof(memIdxMap)); + + localSerialiser->SerialisePODArray<32>("memIdxMap", memIdxMap); + VkPhysicalDevice pd = VK_NULL_HANDLE; if(m_State >= WRITING) @@ -243,6 +249,18 @@ bool WrappedVulkan::Serialise_vkEnumeratePhysicalDevices( GetResourceManager()->WrapResource(instance, pd); GetResourceManager()->AddLiveResource(physId, pd); + + if(physIndex >= m_PhysicalDevices.size()) + { + m_PhysicalDevices.resize(physIndex+1); + m_MemIdxMaps.resize(physIndex+1); + } + + m_PhysicalDevices[physIndex] = pd; + + uint32_t *storedMap = new uint32_t[32]; + memcpy(storedMap, memIdxMap, sizeof(memIdxMap)); + m_MemIdxMaps[physIndex] = storedMap; } return true; @@ -277,18 +295,29 @@ VkResult WrappedVulkan::vkEnumeratePhysicalDevices( else { GetResourceManager()->WrapResource(instance, devices[i]); - + if(m_State >= WRITING) { - CACHE_THREAD_SERIALISER(); - - SCOPED_SERIALISE_CONTEXT(ENUM_PHYSICALS); - Serialise_vkEnumeratePhysicalDevices(localSerialiser, instance, &i, &devices[i]); - + // add the record first since it's used in the serialise function below to fetch + // the memory indices VkResourceRecord *record = GetResourceManager()->AddResourceRecord(devices[i]); RDCASSERT(record); + + record->memProps = new VkPhysicalDeviceMemoryProperties(); - record->AddChunk(scope.Get()); + ObjDisp(devices[i])->GetPhysicalDeviceMemoryProperties(Unwrap(devices[i]), record->memProps); + + // we remap memory indices to discourage coherent maps as much as possible + RemapMemoryIndices(record->memProps, &record->memIdxMap); + + { + CACHE_THREAD_SERIALISER(); + + SCOPED_SERIALISE_CONTEXT(ENUM_PHYSICALS); + Serialise_vkEnumeratePhysicalDevices(localSerialiser, instance, &i, &devices[i]); + + record->AddChunk(scope.Get()); + } VkResourceRecord *instrecord = GetRecord(instance); @@ -449,6 +478,15 @@ bool WrappedVulkan::Serialise_vkCreateDevice( m_PhysicalDeviceData.uploadMemIndex = m_PhysicalDeviceData.GetMemoryIndex(~0U, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0); m_PhysicalDeviceData.GPULocalMemIndex = m_PhysicalDeviceData.GetMemoryIndex(~0U, VK_MEMORY_PROPERTY_DEVICE_ONLY, 0); + for(size_t i=0; i < m_PhysicalDevices.size(); i++) + { + if(physicalDevice == m_PhysicalDevices[i]) + { + m_PhysicalDeviceData.memIdxMap = m_MemIdxMaps[i]; + break; + } + } + m_DebugManager = new VulkanDebugManager(this, device); SAFE_DELETE_ARRAY(modQueues); @@ -551,7 +589,14 @@ VkResult WrappedVulkan::vkCreateDevice( chunk = scope.Get(); } - GetRecord(m_Instance)->AddChunk(chunk); + VkResourceRecord *record = GetResourceManager()->AddResourceRecord(*pDevice); + RDCASSERT(record); + + record->AddChunk(chunk); + + record->memIdxMap = GetRecord(physicalDevice)->memIdxMap; + + GetRecord(m_Instance)->AddParent(record); } else { diff --git a/renderdoc/driver/vulkan/wrappers/vk_get_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_get_funcs.cpp index 80aa5c6cf..1ff40d7f2 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_get_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_get_funcs.cpp @@ -89,6 +89,12 @@ VkResult WrappedVulkan::vkGetPhysicalDeviceMemoryProperties( VkPhysicalDevice physicalDevice, VkPhysicalDeviceMemoryProperties* pMemoryProperties) { + if(pMemoryProperties) + { + *pMemoryProperties = *GetRecord(physicalDevice)->memProps; + return VK_SUCCESS; + } + return ObjDisp(physicalDevice)->GetPhysicalDeviceMemoryProperties(Unwrap(physicalDevice), pMemoryProperties); } @@ -106,7 +112,24 @@ VkResult WrappedVulkan::vkGetBufferMemoryRequirements( VkBuffer buffer, VkMemoryRequirements* pMemoryRequirements) { - return ObjDisp(device)->GetBufferMemoryRequirements(Unwrap(device), Unwrap(buffer), pMemoryRequirements); + VkResult vkr = ObjDisp(device)->GetBufferMemoryRequirements(Unwrap(device), Unwrap(buffer), pMemoryRequirements); + + // don't do remapping here on replay. + if(m_State < WRITING) + return vkr; + + uint32_t bits = pMemoryRequirements->memoryTypeBits; + uint32_t *memIdxMap = GetRecord(device)->memIdxMap; + + pMemoryRequirements->memoryTypeBits = 0; + + // for each of our fake memory indices, check if the real + // memory type it points to is set - if so, set our fake bit + for(uint32_t i=0; i < VK_MAX_MEMORY_TYPES; i++) + if(bits & (1<memoryTypeBits |= (1<GetImageMemoryRequirements(Unwrap(device), Unwrap(image), pMemoryRequirements); + VkResult vkr = ObjDisp(device)->GetImageMemoryRequirements(Unwrap(device), Unwrap(image), pMemoryRequirements); + + // don't do remapping here on replay. + if(m_State < WRITING) + return vkr; + + uint32_t bits = pMemoryRequirements->memoryTypeBits; + uint32_t *memIdxMap = GetRecord(device)->memIdxMap; + + pMemoryRequirements->memoryTypeBits = 0; + + // for each of our fake memory indices, check if the real + // memory type it points to is set - if so, set our fake bit + for(uint32_t i=0; i < VK_MAX_MEMORY_TYPES; i++) + if(bits & (1<memoryTypeBits |= (1< %llu) [mapped in %u, flushed %u]", it->first, (uint64_t)diffStart, (uint64_t)diffEnd, it->second.mapFrame, it->second.mapFlushed); VkMappedMemoryRange range = { VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, NULL, GetResourceManager()->GetCurrentHandle(it->first), it->second.mapOffset+diffStart, diffEnd-diffStart }; - vkFlushMappedMemoryRanges(it->second.device, 1, &range); + vkFlushMappedMemoryRanges(dev, 1, &range); } GetResourceManager()->MarkPendingDirty(it->first); diff --git a/renderdoc/driver/vulkan/wrappers/vk_resource_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_resource_funcs.cpp index 6ced4e4cd..04055a292 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_resource_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_resource_funcs.cpp @@ -42,8 +42,11 @@ bool WrappedVulkan::Serialise_vkAllocMemory( device = GetResourceManager()->GetLiveHandle(devId); + // serialised memory type index is non-remapped, so we remap now. // VKTODOLOW may need to re-write info to change memory type index to the // appropriate index on replay + info.memoryTypeIndex = m_PhysicalDeviceData.memIdxMap[info.memoryTypeIndex]; + VkResult ret = ObjDisp(device)->AllocMemory(Unwrap(device), &info, &mem); if(ret != VK_SUCCESS) @@ -67,7 +70,9 @@ VkResult WrappedVulkan::vkAllocMemory( const VkMemoryAllocInfo* pAllocInfo, VkDeviceMemory* pMem) { - VkResult ret = ObjDisp(device)->AllocMemory(Unwrap(device), pAllocInfo, pMem); + VkMemoryAllocInfo info = *pAllocInfo; + info.memoryTypeIndex = GetRecord(device)->memIdxMap[info.memoryTypeIndex]; + VkResult ret = ObjDisp(device)->AllocMemory(Unwrap(device), &info, pMem); if(ret == VK_SUCCESS) { @@ -138,7 +143,6 @@ VkResult WrappedVulkan::vkMapMemory( if(m_State >= WRITING) { MapState state; - state.device = device; state.mappedPtr = *ppData; state.mapOffset = offset; state.mapSize = size == 0 ? GetRecord(mem)->Length : size;