Use sparse page table class to track sparse mappings on vulkan resources

This commit is contained in:
baldurk
2021-03-05 15:28:47 +00:00
parent 4afd76b669
commit da4706d423
5 changed files with 170 additions and 272 deletions
+31 -12
View File
@@ -589,7 +589,7 @@ void VulkanResourceManager::InsertDeviceMemoryRefs(WriteSerialiser &ser)
}
}
void VulkanResourceManager::MarkSparseMapReferenced(ResourceInfo *sparse)
void VulkanResourceManager::MarkSparseMapReferenced(const ResourceInfo *sparse)
{
if(sparse == NULL)
{
@@ -597,18 +597,37 @@ void VulkanResourceManager::MarkSparseMapReferenced(ResourceInfo *sparse)
return;
}
for(size_t i = 0; i < sparse->opaquemappings.size(); i++)
MarkMemoryFrameReferenced(GetResID(sparse->opaquemappings[i].memory),
sparse->opaquemappings[i].memoryOffset,
sparse->opaquemappings[i].size, eFrameRef_Read);
for(int a = 0; a < NUM_VK_IMAGE_ASPECTS; a++)
for(size_t a = 0; a <= sparse->altSparseAspects.size(); a++)
{
VkDeviceSize totalSize =
VkDeviceSize(sparse->imgdim.width) * sparse->imgdim.height * sparse->imgdim.depth;
for(VkDeviceSize i = 0; sparse->pages[a] && i < totalSize; i++)
MarkMemoryFrameReferenced(GetResID(sparse->pages[a][i].first), 0, VK_WHOLE_SIZE,
eFrameRef_Read);
const Sparse::PageTable &table = a < sparse->altSparseAspects.size()
? sparse->altSparseAspects[a].table
: sparse->sparseTable;
uint32_t numSubs = table.getNumSubresources();
const Sparse::MipTail &mipTail = table.getMipTail();
for(uint32_t s = 0; s < numSubs + mipTail.mappings.size(); s++)
{
const Sparse::PageRangeMapping &mapping =
s < numSubs ? table.getSubresource(s) : table.getMipTail().mappings[s - numSubs];
if(mapping.hasSingleMapping())
{
MarkMemoryFrameReferenced(
mapping.singleMapping.memory, mapping.singleMapping.offset,
mapping.singlePageReused ? table.getPageByteSize() : table.getSubresourceByteSize(s),
eFrameRef_Read);
}
else
{
// this is a huge perf cliff as we've lost any batching and we perform as badly as if every
// page was mapped to a different resource, so we hope applications don't hit this often.
for(const Sparse::Page &page : mapping.pages)
{
MarkMemoryFrameReferenced(page.memory, page.offset, table.getPageByteSize(),
eFrameRef_Read);
}
}
}
}
}
+6 -6
View File
@@ -185,18 +185,18 @@ public:
// handling memory & image layouts
template <typename SrcBarrierType>
void RecordSingleBarrier(rdcarray<rdcpair<ResourceId, ImageRegionState> > &states, ResourceId id,
void RecordSingleBarrier(rdcarray<rdcpair<ResourceId, ImageRegionState>> &states, ResourceId id,
const SrcBarrierType &t, uint32_t nummips, uint32_t numslices);
void RecordBarriers(rdcarray<rdcpair<ResourceId, ImageRegionState> > &states,
void RecordBarriers(rdcarray<rdcpair<ResourceId, ImageRegionState>> &states,
const std::map<ResourceId, ImageLayouts> &layouts, uint32_t numBarriers,
const VkImageMemoryBarrier *barriers);
void MergeBarriers(rdcarray<rdcpair<ResourceId, ImageRegionState> > &dststates,
rdcarray<rdcpair<ResourceId, ImageRegionState> > &srcstates);
void MergeBarriers(rdcarray<rdcpair<ResourceId, ImageRegionState>> &dststates,
rdcarray<rdcpair<ResourceId, ImageRegionState>> &srcstates);
void ApplyBarriers(uint32_t queueFamilyIndex,
rdcarray<rdcpair<ResourceId, ImageRegionState> > &states,
rdcarray<rdcpair<ResourceId, ImageRegionState>> &states,
std::map<ResourceId, ImageLayouts> &layouts);
void RecordBarriers(rdcflatmap<ResourceId, ImageState> &states, uint32_t queueFamilyIndex,
@@ -374,7 +374,7 @@ public:
}
// helper for sparse mappings
void MarkSparseMapReferenced(ResourceInfo *sparse);
void MarkSparseMapReferenced(const ResourceInfo *sparse);
void SetInternalResource(ResourceId id);
+41 -208
View File
@@ -4031,229 +4031,62 @@ void VkResourceRecord::MarkBufferViewFrameReferenced(VkResourceRecord *bufView,
void ResourceInfo::Update(uint32_t numBindings, const VkSparseImageMemoryBind *pBindings)
{
// update image page table mappings
for(uint32_t b = 0; b < numBindings; b++)
// update texel mappings
for(uint32_t i = 0; i < numBindings; i++)
{
const VkSparseImageMemoryBind &newBind = pBindings[b];
const VkSparseImageMemoryBind &bind = pBindings[i];
// VKTODOMED handle sparse image arrays or sparse images with mips
RDCASSERT(newBind.subresource.arrayLayer == 0 && newBind.subresource.mipLevel == 0);
Sparse::PageTable &table = getSparseTableForAspect(bind.subresource.aspectMask);
rdcpair<VkDeviceMemory, VkDeviceSize> *pageTable = pages[newBind.subresource.aspectMask];
const uint32_t sub =
table.calcSubresource(bind.subresource.arrayLayer, bind.subresource.mipLevel);
VkOffset3D offsInPages = newBind.offset;
offsInPages.x /= pagedim.width;
offsInPages.y /= pagedim.height;
offsInPages.z /= pagedim.depth;
VkExtent3D extInPages = newBind.extent;
extInPages.width /= pagedim.width;
extInPages.height /= pagedim.height;
extInPages.depth /= pagedim.depth;
rdcpair<VkDeviceMemory, VkDeviceSize> mempair =
make_rdcpair(newBind.memory, newBind.memoryOffset);
for(uint32_t z = offsInPages.z; z < offsInPages.z + extInPages.depth; z++)
{
for(uint32_t y = offsInPages.y; y < offsInPages.y + extInPages.height; y++)
{
for(uint32_t x = offsInPages.x; x < offsInPages.x + extInPages.width; x++)
{
pageTable[z * imgdim.width * imgdim.height + y * imgdim.width + x] = mempair;
}
}
}
table.setImageBoxRange(
sub, {(uint32_t)bind.offset.x, (uint32_t)bind.offset.y, (uint32_t)bind.offset.z},
{bind.extent.width, bind.extent.height, bind.extent.depth}, GetResID(bind.memory),
bind.memoryOffset, false);
}
}
void ResourceInfo::Update(uint32_t numBindings, const VkSparseMemoryBind *pBindings)
{
// update opaque mappings
// update mip tail mappings
const bool isBuffer = (imageInfo.extent.width == 0);
for(uint32_t b = 0; b < numBindings; b++)
for(uint32_t i = 0; i < numBindings; i++)
{
const VkSparseMemoryBind &newRange = pBindings[b];
const VkSparseMemoryBind &bind = pBindings[i];
bool found = false;
// this could be improved to do a binary search since the vector is sorted.
// for(auto it = opaquemappings.begin(); it != opaquemappings.end(); ++it)
for(size_t i = 0; i < opaquemappings.size(); i++)
// don't need to figure out which aspect we're in if we only have one table
if(isBuffer || altSparseAspects.empty())
{
VkSparseMemoryBind &curRange = opaquemappings[i];
// the binding we're applying is after this item in the list,
// keep searching
if(curRange.resourceOffset + curRange.size <= newRange.resourceOffset)
continue;
// the binding we're applying is before this item, but doesn't
// overlap. Insert before us in the list
if(curRange.resourceOffset >= newRange.resourceOffset + newRange.size)
{
opaquemappings.insert(i, newRange);
found = true;
break;
}
// with sparse mappings it will be reasonably common to update an exact
// existing range, so check that first
if(newRange.resourceOffset == curRange.resourceOffset && newRange.size == curRange.size)
{
curRange = newRange;
found = true;
break;
}
// handle subranges within the current range
if(newRange.resourceOffset >= curRange.resourceOffset &&
newRange.resourceOffset + newRange.size <= curRange.resourceOffset + curRange.size)
{
// they start in the same place
if(newRange.resourceOffset == curRange.resourceOffset)
{
// change the current range to be the leftover second half
curRange.resourceOffset += newRange.size;
curRange.size -= newRange.size;
// insert the new mapping before our current one
opaquemappings.insert(i, newRange);
found = true;
break;
}
// they end in the same place
else if(newRange.resourceOffset + newRange.size == curRange.resourceOffset + curRange.size)
{
// save a copy
VkSparseMemoryBind first = curRange;
// set the new size of the first half
first.size = newRange.resourceOffset - curRange.resourceOffset;
// add the new range where the current iterator was
curRange = newRange;
// insert the old truncated mapping before our current position
opaquemappings.insert(i, first);
found = true;
break;
}
// the new range is a subsection
else
{
// save a copy
VkSparseMemoryBind first = curRange;
// set the new size of the first part
first.size = newRange.resourceOffset - first.resourceOffset;
// set the current range (third part) to start after the new range ends
curRange.size =
(curRange.resourceOffset + curRange.size) - (newRange.resourceOffset + newRange.size);
curRange.resourceOffset = newRange.resourceOffset + newRange.size;
// first insert the new range before our current range
opaquemappings.insert(i, newRange);
// now insert the remaining first part before that
opaquemappings.insert(i, first);
found = true;
break;
}
}
// this new range overlaps the current one and some subsequent ranges. Merge together
// find where this new range stops overlapping
size_t endi = i;
for(; endi < opaquemappings.size(); endi++)
{
if(newRange.resourceOffset + newRange.size <=
opaquemappings[endi].resourceOffset + opaquemappings[endi].size)
break;
}
VkSparseMemoryBind &endRange = opaquemappings[endi];
// see if there are any leftovers of the overlapped ranges at the start or end
bool leftoverstart = (newRange.resourceOffset < curRange.resourceOffset);
bool leftoverend = (endi < opaquemappings.size() && (endRange.resourceOffset + endRange.size >
curRange.resourceOffset + curRange.size));
// no leftovers, the new range entirely covers the current and last (if there is one)
if(!leftoverstart && !leftoverend)
{
// erase all of the ranges. If endi is a valid index, it won't be erased, so we overwrite
// it. Otherwise there was no subsequent range so we just push_back()
opaquemappings.erase(i, endi - i);
if(endi < opaquemappings.size())
endRange = newRange;
else
opaquemappings.push_back(newRange);
}
// leftover at the start, but not the end
else if(leftoverstart && !leftoverend)
{
// save the current range
VkSparseMemoryBind first = curRange;
// modify the size to reflect what's left over
first.size = newRange.resourceOffset - first.resourceOffset;
// as above, erase and either re-insert or push_back()
opaquemappings.erase(i, endi - i);
if(endi < opaquemappings.size())
{
endRange = newRange;
opaquemappings.insert(endi, first);
}
else
{
opaquemappings.push_back(first);
opaquemappings.push_back(newRange);
}
}
// leftover at the end, but not the start
else if(!leftoverstart && leftoverend)
{
// erase up to but not including endit
opaquemappings.erase(i, endi - i);
// modify the leftovers at the end
endRange.resourceOffset = newRange.resourceOffset + newRange.size;
// insert the new range before
opaquemappings.insert(i, newRange);
}
// leftovers at both ends
else
{
// save the current range
VkSparseMemoryBind first = curRange;
// modify the size to reflect what's left over
first.size = newRange.resourceOffset - first.resourceOffset;
// erase up to but not including endit
opaquemappings.erase(i, endi - i);
// modify the leftovers at the end
endRange.size =
(endRange.resourceOffset + endRange.size) - (newRange.resourceOffset + newRange.size);
endRange.resourceOffset = newRange.resourceOffset + newRange.size;
// insert the new range before
opaquemappings.insert(i, newRange);
// insert the modified leftovers before that
opaquemappings.insert(i, first);
}
found = true;
break;
sparseTable.setMipTailRange(bind.resourceOffset, GetResID(bind.memory), bind.memoryOffset,
bind.size, false);
}
else
{
bool found = false;
// if it wasn't found, this binding is after all mappings in our list
if(!found)
opaquemappings.push_back(newRange);
// ask each table if this offset is within its range
for(size_t a = 0; a <= altSparseAspects.size(); a++)
{
Sparse::PageTable &table =
a < altSparseAspects.size() ? altSparseAspects[a].table : sparseTable;
if(table.isByteOffsetInResource(bind.resourceOffset))
{
found = true;
table.setMipTailRange(bind.resourceOffset, GetResID(bind.memory), bind.memoryOffset,
bind.size, false);
}
}
// just in case, if we don't find it in any then assume it's metadata
if(!found)
getSparseTableForAspect(VK_IMAGE_ASPECT_METADATA_BIT)
.setMipTailRange(bind.resourceOffset, GetResID(bind.memory), bind.memoryOffset,
bind.size, false);
}
}
}
+32 -18
View File
@@ -955,33 +955,47 @@ struct SwapchainInfo
PresentInfo lastPresent;
};
struct AspectSparseTable
{
VkImageAspectFlags aspectMask;
Sparse::PageTable table;
};
DECLARE_REFLECTION_STRUCT(AspectSparseTable);
// these structs are allocated for images and buffers, then pointed to (non-owning) by views
struct ResourceInfo
{
ResourceInfo()
// commonly we expect only one aspect (COLOR is vastly likely and METADATA is rare) so have one
// directly accessible. If we have others (like separate DEPTH and STENCIL, or anything and
// METADATA) we put them in the array.
Sparse::PageTable sparseTable;
rdcarray<AspectSparseTable> altSparseAspects;
VkImageAspectFlags sparseAspect;
Sparse::PageTable &getSparseTableForAspect(VkImageAspectFlags aspects)
{
RDCEraseEl(imgdim);
RDCEraseEl(pagedim);
RDCEraseEl(pages);
// if we only have one table, return it
if(altSparseAspects.empty())
return sparseTable;
// or if it matches the main aspect
if(aspects == sparseAspect)
return sparseTable;
for(size_t i = 0; i < altSparseAspects.size(); i++)
if(altSparseAspects[i].aspectMask == aspects)
return altSparseAspects[i].table;
RDCERR("Unexpected aspect %s for sparse table", ToStr((VkImageAspectFlagBits)aspects).c_str());
return sparseTable;
}
// for buffers or non-sparse-resident images (bound with opaque mappings)
rdcarray<VkSparseMemoryBind> opaquemappings;
VkMemoryRequirements memreqs;
// for sparse resident images:
// total image size (in pages)
VkExtent3D imgdim;
// size of a page
VkExtent3D pagedim;
// pagetable per image aspect (some may be NULL) color, depth, stencil, metadata
// in order of width first, then height, then depth
rdcpair<VkDeviceMemory, VkDeviceSize> *pages[NUM_VK_IMAGE_ASPECTS];
VkMemoryRequirements memreqs = {};
ImageInfo imageInfo;
bool IsSparse() const { return pages[0] != NULL; }
bool IsSparse() const { return sparseTable.getPageByteSize() > 0; }
void Update(uint32_t numBindings, const VkSparseMemoryBind *pBindings);
void Update(uint32_t numBindings, const VkSparseImageMemoryBind *pBindings);
};
@@ -1706,6 +1706,11 @@ VkResult WrappedVulkan::vkCreateBuffer(VkDevice device, const VkBufferCreateInfo
ObjDisp(device)->GetBufferMemoryRequirements(Unwrap(device), Unwrap(*pBuffer),
&record->resInfo->memreqs);
// initialise the sparse page table
if(isSparse)
record->resInfo->sparseTable.Initialise(pCreateInfo->size,
record->resInfo->memreqs.alignment & 0xFFFFFFFFU);
// for external buffers, try creating a non-external version and take the worst case of
// memory requirements, in case the non-external one (as we will replay it) needs more
// memory or a stricter alignment
@@ -1950,7 +1955,10 @@ bool WrappedVulkan::Serialise_vkCreateImage(SerialiserType &ser, VkDevice device
APIProps.YUVTextures |= IsYUVFormat(CreateInfo.format);
if(CreateInfo.flags & (VK_IMAGE_CREATE_SPARSE_BINDING_BIT | VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT))
const bool isSparse = (CreateInfo.flags & (VK_IMAGE_CREATE_SPARSE_BINDING_BIT |
VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT)) != 0;
if(isSparse)
{
APIProps.SparseResources = true;
}
@@ -2049,6 +2057,9 @@ bool WrappedVulkan::Serialise_vkCreateImage(SerialiserType &ser, VkDevice device
state->wrappedHandle = img;
*state = state->InitialState();
}
if(isSparse)
state->isMemoryBound = true;
}
const char *prefix = "Image";
@@ -2222,6 +2233,9 @@ VkResult WrappedVulkan::vkCreateImage(VkDevice device, const VkImageCreateInfo *
{
ResourceId id = GetResourceManager()->WrapResource(Unwrap(device), *pImage);
const bool isSparse = (pCreateInfo->flags & (VK_IMAGE_CREATE_SPARSE_BINDING_BIT |
VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT)) != 0;
if(IsCaptureMode(m_State))
{
Chunk *chunk = NULL;
@@ -2247,9 +2261,6 @@ VkResult WrappedVulkan::vkCreateImage(VkDevice device, const VkImageCreateInfo *
// pre-populate memory requirements
ObjDisp(device)->GetImageMemoryRequirements(Unwrap(device), Unwrap(*pImage), &resInfo.memreqs);
bool isSparse = (pCreateInfo->flags & (VK_IMAGE_CREATE_SPARSE_BINDING_BIT |
VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT)) != 0;
bool isLinear = (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR);
bool isExternal = false;
@@ -2352,45 +2363,61 @@ VkResult WrappedVulkan::vkCreateImage(VkDevice device, const VkImageCreateInfo *
if(isSparse)
{
uint32_t pageByteSize = resInfo.memreqs.alignment & 0xFFFFFFFFu;
if(pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT)
{
// must record image and page dimension, and create page tables
uint32_t numreqs = NUM_VK_IMAGE_ASPECTS;
VkSparseImageMemoryRequirements reqs[NUM_VK_IMAGE_ASPECTS];
uint32_t numreqs = 8;
VkSparseImageMemoryRequirements reqs[8];
ObjDisp(device)->GetImageSparseMemoryRequirements(Unwrap(device), Unwrap(*pImage),
&numreqs, reqs);
RDCASSERT(numreqs > 0);
// we only support at most DEPTH, STENCIL, METADATA = 3 aspects
RDCASSERT(numreqs > 0 && numreqs <= 3, numreqs);
resInfo.pagedim = reqs[0].formatProperties.imageGranularity;
resInfo.imgdim = pCreateInfo->extent;
resInfo.imgdim.width /= resInfo.pagedim.width;
resInfo.imgdim.height /= resInfo.pagedim.height;
resInfo.imgdim.depth /= resInfo.pagedim.depth;
// if we don't have just a single
resInfo.altSparseAspects.resize(numreqs - 1);
uint32_t numpages = resInfo.imgdim.width * resInfo.imgdim.height * resInfo.imgdim.depth;
Sparse::Coord dim = {pCreateInfo->extent.width, pCreateInfo->extent.height,
pCreateInfo->extent.depth};
for(uint32_t i = 0; i < numreqs; i++)
for(uint32_t r = 0; r < numreqs; r++)
{
// assume all page sizes are the same for all aspects
RDCASSERT(resInfo.pagedim.width == reqs[i].formatProperties.imageGranularity.width &&
resInfo.pagedim.height == reqs[i].formatProperties.imageGranularity.height &&
resInfo.pagedim.depth == reqs[i].formatProperties.imageGranularity.depth);
if(r == 0)
resInfo.sparseAspect = reqs[r].formatProperties.aspectMask;
else
resInfo.altSparseAspects[r - 1].aspectMask = reqs[r].formatProperties.aspectMask;
int a = 0;
for(a = 0; a < NUM_VK_IMAGE_ASPECTS; a++)
{
if(reqs[i].formatProperties.aspectMask & (1 << a))
break;
}
Sparse::PageTable &table =
r == 0 ? resInfo.sparseTable : resInfo.altSparseAspects[r - 1].table;
resInfo.pages[a] = new rdcpair<VkDeviceMemory, VkDeviceSize>[numpages];
bool singleMipTail =
(reqs[r].formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT) != 0;
const VkExtent3D &gran = reqs[r].formatProperties.imageGranularity;
Sparse::Coord pageSize = {gran.width, gran.height, gran.depth};
table.Initialise(
dim, pCreateInfo->mipLevels, pCreateInfo->arrayLayers, pageByteSize, pageSize,
// we MIN here so if the driver returns 999 we have a consistent value, so we can
// compare against it on replay
RDCMIN(reqs[r].imageMipTailFirstLod, pCreateInfo->mipLevels),
reqs[r].imageMipTailOffset,
// if formatProperties.flags does not contain
// VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT (otherwise the value is undefined).
singleMipTail || pCreateInfo->arrayLayers == 0 ? 0 : reqs[r].imageMipTailStride,
// If formatProperties.flags contains VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT,
// this is the size of the whole mip tail, otherwise this is the size of the mip
// tail of a single array layer.
singleMipTail ? reqs[r].imageMipTailSize
: reqs[r].imageMipTailSize * pCreateInfo->arrayLayers);
}
}
else
{
// don't have to do anything, image is opaque and must be fully bound, just need
// to track the memory bindings.
// set page table up as if it were a buffer
resInfo.sparseTable.Initialise(resInfo.memreqs.size, pageByteSize);
}
}
}
@@ -2401,7 +2428,12 @@ VkResult WrappedVulkan::vkCreateImage(VkDevice device, const VkImageCreateInfo *
m_CreationInfo.m_Image[id].Init(GetResourceManager(), m_CreationInfo, pCreateInfo);
}
InsertImageState(*pImage, id, ImageInfo(*pCreateInfo), eFrameRef_None);
LockedImageStateRef state =
InsertImageState(*pImage, id, ImageInfo(*pCreateInfo), eFrameRef_None);
// sparse resources are always treated as if memory is bound, don't skip anything
if(isSparse)
state->isMemoryBound = true;
}
return ret;