Improve memory type selection on vulkan

* The memory properties no longer have bits to be avoided, but instead bits to
  be desired - readback memory type should be cached if at all possible.
This commit is contained in:
baldurk
2020-03-13 10:43:49 +00:00
parent 3bfaf929e6
commit 929bcfc0ce
3 changed files with 80 additions and 32 deletions
+5 -5
View File
@@ -358,8 +358,8 @@ private:
struct PhysicalDeviceData
{
uint32_t GetMemoryIndex(uint32_t resourceRequiredBitmask, uint32_t allocRequiredProps,
uint32_t allocUndesiredProps);
uint32_t GetMemoryIndex(uint32_t resourceCompatibleBitmask, uint32_t allocRequiredProps,
uint32_t allocPreferredProps);
// store the three most common memory indices:
// - memory for copying into and reading back from the GPU
@@ -974,9 +974,9 @@ public:
return m_DescriptorSetState[descSet].layout;
}
uint32_t GetReadbackMemoryIndex(uint32_t resourceRequiredBitmask);
uint32_t GetUploadMemoryIndex(uint32_t resourceRequiredBitmask);
uint32_t GetGPULocalMemoryIndex(uint32_t resourceRequiredBitmask);
uint32_t GetReadbackMemoryIndex(uint32_t resourceCompatibleBitmask);
uint32_t GetUploadMemoryIndex(uint32_t resourceCompatibleBitmask);
uint32_t GetGPULocalMemoryIndex(uint32_t resourceCompatibleBitmask);
EventFlags GetEventFlags(uint32_t eid) { return m_EventFlags[eid]; }
rdcarray<EventUsage> GetUsage(ResourceId id) { return m_ResourceUses[id]; }
+44 -21
View File
@@ -24,62 +24,85 @@
#include "vk_core.h"
uint32_t WrappedVulkan::GetReadbackMemoryIndex(uint32_t resourceRequiredBitmask)
uint32_t WrappedVulkan::GetReadbackMemoryIndex(uint32_t resourceCompatibleBitmask)
{
if(resourceRequiredBitmask & (1 << m_PhysicalDeviceData.readbackMemIndex))
if(m_PhysicalDeviceData.readbackMemIndex < 32 &&
resourceCompatibleBitmask & (1 << m_PhysicalDeviceData.readbackMemIndex))
return m_PhysicalDeviceData.readbackMemIndex;
return m_PhysicalDeviceData.GetMemoryIndex(resourceRequiredBitmask,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0);
// for readbacks we want cached
return m_PhysicalDeviceData.GetMemoryIndex(resourceCompatibleBitmask,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
}
uint32_t WrappedVulkan::GetUploadMemoryIndex(uint32_t resourceRequiredBitmask)
uint32_t WrappedVulkan::GetUploadMemoryIndex(uint32_t resourceCompatibleBitmask)
{
if(resourceRequiredBitmask & (1 << m_PhysicalDeviceData.uploadMemIndex))
if(m_PhysicalDeviceData.uploadMemIndex < 32 &&
resourceCompatibleBitmask & (1 << m_PhysicalDeviceData.uploadMemIndex))
return m_PhysicalDeviceData.uploadMemIndex;
return m_PhysicalDeviceData.GetMemoryIndex(resourceRequiredBitmask,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0);
// for upload, writing directly into device local memory is preferred
return m_PhysicalDeviceData.GetMemoryIndex(resourceCompatibleBitmask,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
}
uint32_t WrappedVulkan::GetGPULocalMemoryIndex(uint32_t resourceRequiredBitmask)
uint32_t WrappedVulkan::GetGPULocalMemoryIndex(uint32_t resourceCompatibleBitmask)
{
if(resourceRequiredBitmask & (1 << m_PhysicalDeviceData.GPULocalMemIndex))
if(m_PhysicalDeviceData.GPULocalMemIndex < 32 &&
resourceCompatibleBitmask & (1 << m_PhysicalDeviceData.GPULocalMemIndex))
return m_PhysicalDeviceData.GPULocalMemIndex;
return m_PhysicalDeviceData.GetMemoryIndex(resourceRequiredBitmask,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
// we don't actually need to require device local, but it is preferred
return m_PhysicalDeviceData.GetMemoryIndex(resourceCompatibleBitmask, 0,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
}
uint32_t WrappedVulkan::PhysicalDeviceData::GetMemoryIndex(uint32_t resourceRequiredBitmask,
uint32_t WrappedVulkan::PhysicalDeviceData::GetMemoryIndex(uint32_t resourceCompatibleBitmask,
uint32_t allocRequiredProps,
uint32_t allocUndesiredProps)
uint32_t allocPreferredProps)
{
uint32_t best = memProps.memoryTypeCount;
for(uint32_t memIndex = 0; memIndex < memProps.memoryTypeCount; memIndex++)
{
if(resourceRequiredBitmask & (1 << memIndex))
if(resourceCompatibleBitmask & (1 << memIndex))
{
uint32_t memTypeFlags = memProps.memoryTypes[memIndex].propertyFlags;
if((memTypeFlags & allocRequiredProps) == allocRequiredProps)
{
if(memTypeFlags & allocUndesiredProps)
best = memIndex;
else
// if this type has all preferred props, it is the best we can do. The driver is required to
// order memory types that are otherwise equal in order of ascending performance.
if((memTypeFlags & allocPreferredProps) == allocPreferredProps)
return memIndex;
// no best yet, this is the best we have
if(best == memProps.memoryTypeCount)
{
best = memIndex;
}
else
{
// compare to the previous best. If it has more preferred props set, this is the new best
uint32_t prevBestFlags = memProps.memoryTypes[best].propertyFlags;
if((prevBestFlags & allocPreferredProps) < (memTypeFlags & allocPreferredProps))
{
best = memIndex;
}
}
}
}
}
if(best == memProps.memoryTypeCount)
{
RDCERR("Couldn't find any matching heap! requirements %x / %x too strict",
resourceRequiredBitmask, allocRequiredProps);
RDCERR("Couldn't find any matching heap! mrq allows %x but required properties %x too strict",
resourceCompatibleBitmask, allocRequiredProps);
return 0;
}
return best;
}
@@ -3465,12 +3465,37 @@ VkResult WrappedVulkan::vkCreateDevice(VkPhysicalDevice physicalDevice,
->GetPhysicalDeviceFormatProperties(Unwrap(physicalDevice), VkFormat(i),
&m_PhysicalDeviceData.fmtprops[i]);
m_PhysicalDeviceData.readbackMemIndex =
m_PhysicalDeviceData.GetMemoryIndex(~0U, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0);
m_PhysicalDeviceData.uploadMemIndex =
m_PhysicalDeviceData.GetMemoryIndex(~0U, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0);
m_PhysicalDeviceData.GPULocalMemIndex = m_PhysicalDeviceData.GetMemoryIndex(
~0U, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
// we need to do this little dance because Get*MemoryIndex checks to see if the existing
// readback index is valid, and if so just returns it without doing the proper checks.
// so first we set the indices to something invalid then call the function
m_PhysicalDeviceData.readbackMemIndex = m_PhysicalDeviceData.uploadMemIndex =
m_PhysicalDeviceData.GPULocalMemIndex = ~0U;
m_PhysicalDeviceData.readbackMemIndex = GetReadbackMemoryIndex(~0U);
m_PhysicalDeviceData.uploadMemIndex = GetUploadMemoryIndex(~0U);
m_PhysicalDeviceData.GPULocalMemIndex = GetGPULocalMemoryIndex(~0U);
for(uint32_t i = 0; i < m_PhysicalDeviceData.memProps.memoryTypeCount; i++)
{
rdcstr selected;
if(m_PhysicalDeviceData.GPULocalMemIndex == i)
selected += "GPULocal|";
if(m_PhysicalDeviceData.readbackMemIndex == i)
selected += "readback|";
if(m_PhysicalDeviceData.uploadMemIndex == i)
selected += "upload|";
selected.pop_back();
const VkMemoryType &type = m_PhysicalDeviceData.memProps.memoryTypes[i];
const VkMemoryHeap &heap = m_PhysicalDeviceData.memProps.memoryHeaps[type.heapIndex];
RDCLOG(" Memory type %u: %s in heap %u (%s) (%.1f GB) [%s]", i,
ToStr((VkMemoryPropertyFlagBits)type.propertyFlags).c_str(), type.heapIndex,
ToStr((VkMemoryHeapFlagBits)heap.flags).c_str(),
float(heap.size) / (1024.0f * 1024.0f * 1024.0f), selected.c_str());
}
m_PhysicalDeviceData.queueCount = qCount;
memcpy(m_PhysicalDeviceData.queueProps, props, qCount * sizeof(VkQueueFamilyProperties));