From 929bcfc0ce6b10e9479b842a86cff6b353bff2db Mon Sep 17 00:00:00 2001 From: baldurk Date: Fri, 13 Mar 2020 10:43:49 +0000 Subject: [PATCH] Improve memory type selection on vulkan * The memory properties no longer have bits to be avoided, but instead bits to be desired - readback memory type should be cached if at all possible. --- renderdoc/driver/vulkan/vk_core.h | 10 +-- renderdoc/driver/vulkan/vk_memory.cpp | 65 +++++++++++++------ .../vulkan/wrappers/vk_device_funcs.cpp | 37 +++++++++-- 3 files changed, 80 insertions(+), 32 deletions(-) diff --git a/renderdoc/driver/vulkan/vk_core.h b/renderdoc/driver/vulkan/vk_core.h index e95ede805..e7e1cbaa9 100644 --- a/renderdoc/driver/vulkan/vk_core.h +++ b/renderdoc/driver/vulkan/vk_core.h @@ -358,8 +358,8 @@ private: struct PhysicalDeviceData { - uint32_t GetMemoryIndex(uint32_t resourceRequiredBitmask, uint32_t allocRequiredProps, - uint32_t allocUndesiredProps); + uint32_t GetMemoryIndex(uint32_t resourceCompatibleBitmask, uint32_t allocRequiredProps, + uint32_t allocPreferredProps); // store the three most common memory indices: // - memory for copying into and reading back from the GPU @@ -974,9 +974,9 @@ public: return m_DescriptorSetState[descSet].layout; } - uint32_t GetReadbackMemoryIndex(uint32_t resourceRequiredBitmask); - uint32_t GetUploadMemoryIndex(uint32_t resourceRequiredBitmask); - uint32_t GetGPULocalMemoryIndex(uint32_t resourceRequiredBitmask); + uint32_t GetReadbackMemoryIndex(uint32_t resourceCompatibleBitmask); + uint32_t GetUploadMemoryIndex(uint32_t resourceCompatibleBitmask); + uint32_t GetGPULocalMemoryIndex(uint32_t resourceCompatibleBitmask); EventFlags GetEventFlags(uint32_t eid) { return m_EventFlags[eid]; } rdcarray GetUsage(ResourceId id) { return m_ResourceUses[id]; } diff --git a/renderdoc/driver/vulkan/vk_memory.cpp b/renderdoc/driver/vulkan/vk_memory.cpp index f56cac8bb..3bacacd96 100644 --- a/renderdoc/driver/vulkan/vk_memory.cpp +++ b/renderdoc/driver/vulkan/vk_memory.cpp @@ -24,62 +24,85 @@ #include "vk_core.h" -uint32_t WrappedVulkan::GetReadbackMemoryIndex(uint32_t resourceRequiredBitmask) +uint32_t WrappedVulkan::GetReadbackMemoryIndex(uint32_t resourceCompatibleBitmask) { - if(resourceRequiredBitmask & (1 << m_PhysicalDeviceData.readbackMemIndex)) + if(m_PhysicalDeviceData.readbackMemIndex < 32 && + resourceCompatibleBitmask & (1 << m_PhysicalDeviceData.readbackMemIndex)) return m_PhysicalDeviceData.readbackMemIndex; - return m_PhysicalDeviceData.GetMemoryIndex(resourceRequiredBitmask, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0); + // for readbacks we want cached + return m_PhysicalDeviceData.GetMemoryIndex(resourceCompatibleBitmask, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, + VK_MEMORY_PROPERTY_HOST_CACHED_BIT); } -uint32_t WrappedVulkan::GetUploadMemoryIndex(uint32_t resourceRequiredBitmask) +uint32_t WrappedVulkan::GetUploadMemoryIndex(uint32_t resourceCompatibleBitmask) { - if(resourceRequiredBitmask & (1 << m_PhysicalDeviceData.uploadMemIndex)) + if(m_PhysicalDeviceData.uploadMemIndex < 32 && + resourceCompatibleBitmask & (1 << m_PhysicalDeviceData.uploadMemIndex)) return m_PhysicalDeviceData.uploadMemIndex; - return m_PhysicalDeviceData.GetMemoryIndex(resourceRequiredBitmask, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0); + // for upload, writing directly into device local memory is preferred + return m_PhysicalDeviceData.GetMemoryIndex(resourceCompatibleBitmask, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); } -uint32_t WrappedVulkan::GetGPULocalMemoryIndex(uint32_t resourceRequiredBitmask) +uint32_t WrappedVulkan::GetGPULocalMemoryIndex(uint32_t resourceCompatibleBitmask) { - if(resourceRequiredBitmask & (1 << m_PhysicalDeviceData.GPULocalMemIndex)) + if(m_PhysicalDeviceData.GPULocalMemIndex < 32 && + resourceCompatibleBitmask & (1 << m_PhysicalDeviceData.GPULocalMemIndex)) return m_PhysicalDeviceData.GPULocalMemIndex; - return m_PhysicalDeviceData.GetMemoryIndex(resourceRequiredBitmask, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); + // we don't actually need to require device local, but it is preferred + return m_PhysicalDeviceData.GetMemoryIndex(resourceCompatibleBitmask, 0, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); } -uint32_t WrappedVulkan::PhysicalDeviceData::GetMemoryIndex(uint32_t resourceRequiredBitmask, +uint32_t WrappedVulkan::PhysicalDeviceData::GetMemoryIndex(uint32_t resourceCompatibleBitmask, uint32_t allocRequiredProps, - uint32_t allocUndesiredProps) + uint32_t allocPreferredProps) { uint32_t best = memProps.memoryTypeCount; for(uint32_t memIndex = 0; memIndex < memProps.memoryTypeCount; memIndex++) { - if(resourceRequiredBitmask & (1 << memIndex)) + if(resourceCompatibleBitmask & (1 << memIndex)) { uint32_t memTypeFlags = memProps.memoryTypes[memIndex].propertyFlags; if((memTypeFlags & allocRequiredProps) == allocRequiredProps) { - if(memTypeFlags & allocUndesiredProps) - best = memIndex; - else + // if this type has all preferred props, it is the best we can do. The driver is required to + // order memory types that are otherwise equal in order of ascending performance. + if((memTypeFlags & allocPreferredProps) == allocPreferredProps) return memIndex; + + // no best yet, this is the best we have + if(best == memProps.memoryTypeCount) + { + best = memIndex; + } + else + { + // compare to the previous best. If it has more preferred props set, this is the new best + uint32_t prevBestFlags = memProps.memoryTypes[best].propertyFlags; + if((prevBestFlags & allocPreferredProps) < (memTypeFlags & allocPreferredProps)) + { + best = memIndex; + } + } } } } if(best == memProps.memoryTypeCount) { - RDCERR("Couldn't find any matching heap! requirements %x / %x too strict", - resourceRequiredBitmask, allocRequiredProps); + RDCERR("Couldn't find any matching heap! mrq allows %x but required properties %x too strict", + resourceCompatibleBitmask, allocRequiredProps); return 0; } + return best; } diff --git a/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp index 176967a14..620c3ad2f 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp @@ -3465,12 +3465,37 @@ VkResult WrappedVulkan::vkCreateDevice(VkPhysicalDevice physicalDevice, ->GetPhysicalDeviceFormatProperties(Unwrap(physicalDevice), VkFormat(i), &m_PhysicalDeviceData.fmtprops[i]); - m_PhysicalDeviceData.readbackMemIndex = - m_PhysicalDeviceData.GetMemoryIndex(~0U, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0); - m_PhysicalDeviceData.uploadMemIndex = - m_PhysicalDeviceData.GetMemoryIndex(~0U, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0); - m_PhysicalDeviceData.GPULocalMemIndex = m_PhysicalDeviceData.GetMemoryIndex( - ~0U, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT); + // we need to do this little dance because Get*MemoryIndex checks to see if the existing + // readback index is valid, and if so just returns it without doing the proper checks. + // so first we set the indices to something invalid then call the function + m_PhysicalDeviceData.readbackMemIndex = m_PhysicalDeviceData.uploadMemIndex = + m_PhysicalDeviceData.GPULocalMemIndex = ~0U; + + m_PhysicalDeviceData.readbackMemIndex = GetReadbackMemoryIndex(~0U); + m_PhysicalDeviceData.uploadMemIndex = GetUploadMemoryIndex(~0U); + m_PhysicalDeviceData.GPULocalMemIndex = GetGPULocalMemoryIndex(~0U); + + for(uint32_t i = 0; i < m_PhysicalDeviceData.memProps.memoryTypeCount; i++) + { + rdcstr selected; + + if(m_PhysicalDeviceData.GPULocalMemIndex == i) + selected += "GPULocal|"; + if(m_PhysicalDeviceData.readbackMemIndex == i) + selected += "readback|"; + if(m_PhysicalDeviceData.uploadMemIndex == i) + selected += "upload|"; + + selected.pop_back(); + + const VkMemoryType &type = m_PhysicalDeviceData.memProps.memoryTypes[i]; + const VkMemoryHeap &heap = m_PhysicalDeviceData.memProps.memoryHeaps[type.heapIndex]; + + RDCLOG(" Memory type %u: %s in heap %u (%s) (%.1f GB) [%s]", i, + ToStr((VkMemoryPropertyFlagBits)type.propertyFlags).c_str(), type.heapIndex, + ToStr((VkMemoryHeapFlagBits)heap.flags).c_str(), + float(heap.size) / (1024.0f * 1024.0f * 1024.0f), selected.c_str()); + } m_PhysicalDeviceData.queueCount = qCount; memcpy(m_PhysicalDeviceData.queueProps, props, qCount * sizeof(VkQueueFamilyProperties));