mirror of
https://github.com/baldurk/renderdoc.git
synced 2026-05-29 13:20:54 +00:00
Improve memory type selection on vulkan
* The memory properties no longer have bits to be avoided, but instead bits to be desired - readback memory type should be cached if at all possible.
This commit is contained in:
@@ -358,8 +358,8 @@ private:
|
||||
|
||||
struct PhysicalDeviceData
|
||||
{
|
||||
uint32_t GetMemoryIndex(uint32_t resourceRequiredBitmask, uint32_t allocRequiredProps,
|
||||
uint32_t allocUndesiredProps);
|
||||
uint32_t GetMemoryIndex(uint32_t resourceCompatibleBitmask, uint32_t allocRequiredProps,
|
||||
uint32_t allocPreferredProps);
|
||||
|
||||
// store the three most common memory indices:
|
||||
// - memory for copying into and reading back from the GPU
|
||||
@@ -974,9 +974,9 @@ public:
|
||||
return m_DescriptorSetState[descSet].layout;
|
||||
}
|
||||
|
||||
uint32_t GetReadbackMemoryIndex(uint32_t resourceRequiredBitmask);
|
||||
uint32_t GetUploadMemoryIndex(uint32_t resourceRequiredBitmask);
|
||||
uint32_t GetGPULocalMemoryIndex(uint32_t resourceRequiredBitmask);
|
||||
uint32_t GetReadbackMemoryIndex(uint32_t resourceCompatibleBitmask);
|
||||
uint32_t GetUploadMemoryIndex(uint32_t resourceCompatibleBitmask);
|
||||
uint32_t GetGPULocalMemoryIndex(uint32_t resourceCompatibleBitmask);
|
||||
|
||||
EventFlags GetEventFlags(uint32_t eid) { return m_EventFlags[eid]; }
|
||||
rdcarray<EventUsage> GetUsage(ResourceId id) { return m_ResourceUses[id]; }
|
||||
|
||||
@@ -24,62 +24,85 @@
|
||||
|
||||
#include "vk_core.h"
|
||||
|
||||
uint32_t WrappedVulkan::GetReadbackMemoryIndex(uint32_t resourceRequiredBitmask)
|
||||
uint32_t WrappedVulkan::GetReadbackMemoryIndex(uint32_t resourceCompatibleBitmask)
|
||||
{
|
||||
if(resourceRequiredBitmask & (1 << m_PhysicalDeviceData.readbackMemIndex))
|
||||
if(m_PhysicalDeviceData.readbackMemIndex < 32 &&
|
||||
resourceCompatibleBitmask & (1 << m_PhysicalDeviceData.readbackMemIndex))
|
||||
return m_PhysicalDeviceData.readbackMemIndex;
|
||||
|
||||
return m_PhysicalDeviceData.GetMemoryIndex(resourceRequiredBitmask,
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0);
|
||||
// for readbacks we want cached
|
||||
return m_PhysicalDeviceData.GetMemoryIndex(resourceCompatibleBitmask,
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
|
||||
VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
|
||||
}
|
||||
|
||||
uint32_t WrappedVulkan::GetUploadMemoryIndex(uint32_t resourceRequiredBitmask)
|
||||
uint32_t WrappedVulkan::GetUploadMemoryIndex(uint32_t resourceCompatibleBitmask)
|
||||
{
|
||||
if(resourceRequiredBitmask & (1 << m_PhysicalDeviceData.uploadMemIndex))
|
||||
if(m_PhysicalDeviceData.uploadMemIndex < 32 &&
|
||||
resourceCompatibleBitmask & (1 << m_PhysicalDeviceData.uploadMemIndex))
|
||||
return m_PhysicalDeviceData.uploadMemIndex;
|
||||
|
||||
return m_PhysicalDeviceData.GetMemoryIndex(resourceRequiredBitmask,
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0);
|
||||
// for upload, writing directly into device local memory is preferred
|
||||
return m_PhysicalDeviceData.GetMemoryIndex(resourceCompatibleBitmask,
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
|
||||
}
|
||||
|
||||
uint32_t WrappedVulkan::GetGPULocalMemoryIndex(uint32_t resourceRequiredBitmask)
|
||||
uint32_t WrappedVulkan::GetGPULocalMemoryIndex(uint32_t resourceCompatibleBitmask)
|
||||
{
|
||||
if(resourceRequiredBitmask & (1 << m_PhysicalDeviceData.GPULocalMemIndex))
|
||||
if(m_PhysicalDeviceData.GPULocalMemIndex < 32 &&
|
||||
resourceCompatibleBitmask & (1 << m_PhysicalDeviceData.GPULocalMemIndex))
|
||||
return m_PhysicalDeviceData.GPULocalMemIndex;
|
||||
|
||||
return m_PhysicalDeviceData.GetMemoryIndex(resourceRequiredBitmask,
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
|
||||
// we don't actually need to require device local, but it is preferred
|
||||
return m_PhysicalDeviceData.GetMemoryIndex(resourceCompatibleBitmask, 0,
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
|
||||
}
|
||||
|
||||
uint32_t WrappedVulkan::PhysicalDeviceData::GetMemoryIndex(uint32_t resourceRequiredBitmask,
|
||||
uint32_t WrappedVulkan::PhysicalDeviceData::GetMemoryIndex(uint32_t resourceCompatibleBitmask,
|
||||
uint32_t allocRequiredProps,
|
||||
uint32_t allocUndesiredProps)
|
||||
uint32_t allocPreferredProps)
|
||||
{
|
||||
uint32_t best = memProps.memoryTypeCount;
|
||||
|
||||
for(uint32_t memIndex = 0; memIndex < memProps.memoryTypeCount; memIndex++)
|
||||
{
|
||||
if(resourceRequiredBitmask & (1 << memIndex))
|
||||
if(resourceCompatibleBitmask & (1 << memIndex))
|
||||
{
|
||||
uint32_t memTypeFlags = memProps.memoryTypes[memIndex].propertyFlags;
|
||||
|
||||
if((memTypeFlags & allocRequiredProps) == allocRequiredProps)
|
||||
{
|
||||
if(memTypeFlags & allocUndesiredProps)
|
||||
best = memIndex;
|
||||
else
|
||||
// if this type has all preferred props, it is the best we can do. The driver is required to
|
||||
// order memory types that are otherwise equal in order of ascending performance.
|
||||
if((memTypeFlags & allocPreferredProps) == allocPreferredProps)
|
||||
return memIndex;
|
||||
|
||||
// no best yet, this is the best we have
|
||||
if(best == memProps.memoryTypeCount)
|
||||
{
|
||||
best = memIndex;
|
||||
}
|
||||
else
|
||||
{
|
||||
// compare to the previous best. If it has more preferred props set, this is the new best
|
||||
uint32_t prevBestFlags = memProps.memoryTypes[best].propertyFlags;
|
||||
if((prevBestFlags & allocPreferredProps) < (memTypeFlags & allocPreferredProps))
|
||||
{
|
||||
best = memIndex;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(best == memProps.memoryTypeCount)
|
||||
{
|
||||
RDCERR("Couldn't find any matching heap! requirements %x / %x too strict",
|
||||
resourceRequiredBitmask, allocRequiredProps);
|
||||
RDCERR("Couldn't find any matching heap! mrq allows %x but required properties %x too strict",
|
||||
resourceCompatibleBitmask, allocRequiredProps);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return best;
|
||||
}
|
||||
|
||||
|
||||
@@ -3465,12 +3465,37 @@ VkResult WrappedVulkan::vkCreateDevice(VkPhysicalDevice physicalDevice,
|
||||
->GetPhysicalDeviceFormatProperties(Unwrap(physicalDevice), VkFormat(i),
|
||||
&m_PhysicalDeviceData.fmtprops[i]);
|
||||
|
||||
m_PhysicalDeviceData.readbackMemIndex =
|
||||
m_PhysicalDeviceData.GetMemoryIndex(~0U, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0);
|
||||
m_PhysicalDeviceData.uploadMemIndex =
|
||||
m_PhysicalDeviceData.GetMemoryIndex(~0U, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0);
|
||||
m_PhysicalDeviceData.GPULocalMemIndex = m_PhysicalDeviceData.GetMemoryIndex(
|
||||
~0U, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
|
||||
// we need to do this little dance because Get*MemoryIndex checks to see if the existing
|
||||
// readback index is valid, and if so just returns it without doing the proper checks.
|
||||
// so first we set the indices to something invalid then call the function
|
||||
m_PhysicalDeviceData.readbackMemIndex = m_PhysicalDeviceData.uploadMemIndex =
|
||||
m_PhysicalDeviceData.GPULocalMemIndex = ~0U;
|
||||
|
||||
m_PhysicalDeviceData.readbackMemIndex = GetReadbackMemoryIndex(~0U);
|
||||
m_PhysicalDeviceData.uploadMemIndex = GetUploadMemoryIndex(~0U);
|
||||
m_PhysicalDeviceData.GPULocalMemIndex = GetGPULocalMemoryIndex(~0U);
|
||||
|
||||
for(uint32_t i = 0; i < m_PhysicalDeviceData.memProps.memoryTypeCount; i++)
|
||||
{
|
||||
rdcstr selected;
|
||||
|
||||
if(m_PhysicalDeviceData.GPULocalMemIndex == i)
|
||||
selected += "GPULocal|";
|
||||
if(m_PhysicalDeviceData.readbackMemIndex == i)
|
||||
selected += "readback|";
|
||||
if(m_PhysicalDeviceData.uploadMemIndex == i)
|
||||
selected += "upload|";
|
||||
|
||||
selected.pop_back();
|
||||
|
||||
const VkMemoryType &type = m_PhysicalDeviceData.memProps.memoryTypes[i];
|
||||
const VkMemoryHeap &heap = m_PhysicalDeviceData.memProps.memoryHeaps[type.heapIndex];
|
||||
|
||||
RDCLOG(" Memory type %u: %s in heap %u (%s) (%.1f GB) [%s]", i,
|
||||
ToStr((VkMemoryPropertyFlagBits)type.propertyFlags).c_str(), type.heapIndex,
|
||||
ToStr((VkMemoryHeapFlagBits)heap.flags).c_str(),
|
||||
float(heap.size) / (1024.0f * 1024.0f * 1024.0f), selected.c_str());
|
||||
}
|
||||
|
||||
m_PhysicalDeviceData.queueCount = qCount;
|
||||
memcpy(m_PhysicalDeviceData.queueProps, props, qCount * sizeof(VkQueueFamilyProperties));
|
||||
|
||||
Reference in New Issue
Block a user