From 473d8a8e303f63e59cc6fc289cb3c342c26de674 Mon Sep 17 00:00:00 2001 From: Benson Joeris Date: Fri, 21 Jun 2019 14:32:53 -0400 Subject: [PATCH] Vulkan: Optimize image barriers in Apply_InitialState This changes the behaviour of Apply_InitialState for images with BufferCopy type InitialState. Previously, the entire image was transitioned to DST_OPTIMAL, along with a possible queue family ownership transfer. Now, only the image subresources that are actually copied/cleared are transitioned. Change-Id: I92ab4d7160e99b81222231d1d974e707a55f7aef --- renderdoc/driver/vulkan/vk_common.cpp | 6 +- renderdoc/driver/vulkan/vk_common.h | 6 +- renderdoc/driver/vulkan/vk_core.h | 4 + renderdoc/driver/vulkan/vk_initstate.cpp | 199 +++++++++++++---------- renderdoc/driver/vulkan/vk_resources.cpp | 48 ++++++ renderdoc/driver/vulkan/vk_resources.h | 2 + 6 files changed, 173 insertions(+), 92 deletions(-) diff --git a/renderdoc/driver/vulkan/vk_common.cpp b/renderdoc/driver/vulkan/vk_common.cpp index 497ef77b3..97e84fc19 100644 --- a/renderdoc/driver/vulkan/vk_common.cpp +++ b/renderdoc/driver/vulkan/vk_common.cpp @@ -407,7 +407,7 @@ int StageIndex(VkShaderStageFlagBits stageFlag) return 0; } -void DoPipelineBarrier(VkCommandBuffer cmd, uint32_t count, VkImageMemoryBarrier *barriers) +void DoPipelineBarrier(VkCommandBuffer cmd, uint32_t count, const VkImageMemoryBarrier *barriers) { RDCASSERT(cmd != VK_NULL_HANDLE); ObjDisp(cmd)->CmdPipelineBarrier(Unwrap(cmd), VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, @@ -417,7 +417,7 @@ void DoPipelineBarrier(VkCommandBuffer cmd, uint32_t count, VkImageMemoryBarrier count, barriers); // image memory barriers } -void DoPipelineBarrier(VkCommandBuffer cmd, uint32_t count, VkBufferMemoryBarrier *barriers) +void DoPipelineBarrier(VkCommandBuffer cmd, uint32_t count, const VkBufferMemoryBarrier *barriers) { RDCASSERT(cmd != VK_NULL_HANDLE); ObjDisp(cmd)->CmdPipelineBarrier(Unwrap(cmd), VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, @@ -427,7 +427,7 @@ void DoPipelineBarrier(VkCommandBuffer cmd, uint32_t count, VkBufferMemoryBarrie 0, NULL); // image memory barriers } -void DoPipelineBarrier(VkCommandBuffer cmd, uint32_t count, VkMemoryBarrier *barriers) +void DoPipelineBarrier(VkCommandBuffer cmd, uint32_t count, const VkMemoryBarrier *barriers) { RDCASSERT(cmd != VK_NULL_HANDLE); ObjDisp(cmd)->CmdPipelineBarrier(Unwrap(cmd), VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, diff --git a/renderdoc/driver/vulkan/vk_common.h b/renderdoc/driver/vulkan/vk_common.h index 22486350c..b7723b19d 100644 --- a/renderdoc/driver/vulkan/vk_common.h +++ b/renderdoc/driver/vulkan/vk_common.h @@ -105,9 +105,9 @@ VkAccessFlags MakeAccessMask(VkImageLayout layout); void SanitiseOldImageLayout(VkImageLayout &layout); void SanitiseNewImageLayout(VkImageLayout &layout); -void DoPipelineBarrier(VkCommandBuffer cmd, uint32_t count, VkImageMemoryBarrier *barriers); -void DoPipelineBarrier(VkCommandBuffer cmd, uint32_t count, VkBufferMemoryBarrier *barriers); -void DoPipelineBarrier(VkCommandBuffer cmd, uint32_t count, VkMemoryBarrier *barriers); +void DoPipelineBarrier(VkCommandBuffer cmd, uint32_t count, const VkImageMemoryBarrier *barriers); +void DoPipelineBarrier(VkCommandBuffer cmd, uint32_t count, const VkBufferMemoryBarrier *barriers); +void DoPipelineBarrier(VkCommandBuffer cmd, uint32_t count, const VkMemoryBarrier *barriers); int SampleCount(VkSampleCountFlagBits countFlag); int SampleIndex(VkSampleCountFlagBits countFlag); diff --git a/renderdoc/driver/vulkan/vk_core.h b/renderdoc/driver/vulkan/vk_core.h index 1cc5fd782..b91503c03 100644 --- a/renderdoc/driver/vulkan/vk_core.h +++ b/renderdoc/driver/vulkan/vk_core.h @@ -899,6 +899,10 @@ private: int32_t messageCode, const char *pLayerPrefix, const char *pMessage, void *pUserData); void AddFrameTerminator(uint64_t queueMarkerTag); + std::vector ImageInitializationBarriers(ResourceId id, WrappedVkRes *live, + bool initialized, + const ImgRefs *imgRefs) const; + void SubmitExtQBarriers(const std::map> &extQBarriers); public: WrappedVulkan(); diff --git a/renderdoc/driver/vulkan/vk_initstate.cpp b/renderdoc/driver/vulkan/vk_initstate.cpp index 7f43a1760..93c7b660c 100644 --- a/renderdoc/driver/vulkan/vk_initstate.cpp +++ b/renderdoc/driver/vulkan/vk_initstate.cpp @@ -1313,6 +1313,106 @@ void WrappedVulkan::Create_InitialState(ResourceId id, WrappedVkRes *live, bool } } +std::vector WrappedVulkan::ImageInitializationBarriers( + ResourceId id, WrappedVkRes *live, bool initialized, const ImgRefs *imgRefs) const +{ + std::vector barriers; + + const ImageLayouts &imageLayouts = m_ImageLayouts.at(id); + VkImageMemoryBarrier barrier = { + VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + NULL, + 0, // srcAccessmask initialized below + VK_ACCESS_TRANSFER_WRITE_BIT, + VK_IMAGE_LAYOUT_UNDEFINED, // oldLayout initialized below + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + imageLayouts.queueFamilyIndex, + m_QueueFamilyIdx, + ToHandle(live), + {}, // subresourceRange initialized below + }; + + for(size_t si = 0; si < imageLayouts.subresourceStates.size(); si++) + { + barrier.subresourceRange = imageLayouts.subresourceStates[si].subresourceRange; + barrier.oldLayout = imageLayouts.subresourceStates[si].newLayout; + SanitiseOldImageLayout(barrier.oldLayout); + + barrier.srcAccessMask = VK_ACCESS_ALL_WRITE_BITS | MakeAccessMask(barrier.oldLayout); + + if(!initialized) + { + barriers.push_back(barrier); + } + else + { + auto initReqs = imgRefs->SubresourceRangeInitReqs(barrier.subresourceRange); + for(auto initIt = initReqs.begin(); initIt != initReqs.end(); ++initIt) + { + if(initIt->second == eInitReq_Reset || initIt->second == eInitReq_Clear) + { + barrier.subresourceRange = initIt->first; + barriers.push_back(barrier); + } + } + } + } + return barriers; +} + +void InvertImageInitializationBarriers(std::vector &barriers) +{ + for(auto it = barriers.begin(); it != barriers.end(); ++it) + { + // update the live image layout back + std::swap(it->oldLayout, it->newLayout); + + // make sure the apply completes before any further work + it->srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + it->dstAccessMask = VK_ACCESS_ALL_READ_BITS | MakeAccessMask(it->newLayout); + + std::swap(it->srcQueueFamilyIndex, it->dstQueueFamilyIndex); + } +} + +std::map > GetExtQBarriers( + const std::vector &barriers) +{ + std::map > extQBarriers; + + for(auto barrierIt = barriers.begin(); barrierIt != barriers.end(); ++barrierIt) + { + if(barrierIt->srcQueueFamilyIndex != barrierIt->dstQueueFamilyIndex) + { + extQBarriers[barrierIt->srcQueueFamilyIndex].push_back(*barrierIt); + } + } + return extQBarriers; +} + +void WrappedVulkan::SubmitExtQBarriers( + const std::map > &extQBarriers) +{ + VkCommandBufferBeginInfo beginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, NULL, + VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT}; + for(auto extQBarrierIt = extQBarriers.begin(); extQBarrierIt != extQBarriers.end(); ++extQBarrierIt) + { + uint32_t queueFamilyIndex = extQBarrierIt->first; + const std::vector &queueFamilyBarriers = extQBarrierIt->second; + + VkCommandBuffer extQCmd = GetExtQueueCmd(queueFamilyIndex); + + VkResult vkr = ObjDisp(extQCmd)->BeginCommandBuffer(Unwrap(extQCmd), &beginInfo); + RDCASSERTEQUAL(vkr, VK_SUCCESS); + + DoPipelineBarrier(extQCmd, (uint32_t)queueFamilyBarriers.size(), queueFamilyBarriers.data()); + vkr = ObjDisp(extQCmd)->EndCommandBuffer(Unwrap(extQCmd)); + RDCASSERTEQUAL(vkr, VK_SUCCESS); + + SubmitAndFlushExtQueue(queueFamilyIndex); + } +} + void WrappedVulkan::Apply_InitialState(WrappedVkRes *live, const VkInitialContents &initial) { VkResourceType type = initial.type; @@ -1834,21 +1934,12 @@ void WrappedVulkan::Apply_InitialState(WrappedVkRes *live, const VkInitialConten } } - VkImageMemoryBarrier dstimBarrier = { - VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - NULL, - 0, - 0, - VK_IMAGE_LAYOUT_UNDEFINED, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - m_ImageLayouts[id].queueFamilyIndex, - m_QueueFamilyIdx, - ToHandle(live), - {aspectFlags, 0, 1, 0, (uint32_t)m_CreationInfo.m_Image[id].arrayLayers}, - }; + std::vector barriers = + ImageInitializationBarriers(id, live, initialized, imgRefs); + DoPipelineBarrier(cmd, (uint32_t)barriers.size(), barriers.data()); - if(aspectFlags == VK_IMAGE_ASPECT_DEPTH_BIT && !IsDepthOnlyFormat(fmt)) - dstimBarrier.subresourceRange.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT; + std::map > extQBarriers = GetExtQBarriers(barriers); + SubmitExtQBarriers(extQBarriers); VkDeviceSize bufOffset = 0; @@ -1857,44 +1948,6 @@ void WrappedVulkan::Apply_InitialState(WrappedVkRes *live, const VkInitialConten if(IsBlockFormat(fmt)) bufAlignment = (VkDeviceSize)GetByteSize(1, 1, 1, fmt, 0); - // first update the live image layout into destination optimal (the initial state - // image is always and permanently in source optimal already). - dstimBarrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - dstimBarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - - VkCommandBuffer extQCmd = VK_NULL_HANDLE; - - if(dstimBarrier.srcQueueFamilyIndex != dstimBarrier.dstQueueFamilyIndex) - { - extQCmd = GetExtQueueCmd(dstimBarrier.srcQueueFamilyIndex); - - vkr = ObjDisp(extQCmd)->BeginCommandBuffer(Unwrap(extQCmd), &beginInfo); - RDCASSERTEQUAL(vkr, VK_SUCCESS); - } - - for(size_t si = 0; si < m_ImageLayouts[id].subresourceStates.size(); si++) - { - dstimBarrier.subresourceRange = m_ImageLayouts[id].subresourceStates[si].subresourceRange; - dstimBarrier.oldLayout = m_ImageLayouts[id].subresourceStates[si].newLayout; - - SanitiseOldImageLayout(dstimBarrier.oldLayout); - - dstimBarrier.srcAccessMask = VK_ACCESS_ALL_WRITE_BITS | MakeAccessMask(dstimBarrier.oldLayout); - - DoPipelineBarrier(cmd, 1, &dstimBarrier); - - if(extQCmd != VK_NULL_HANDLE) - DoPipelineBarrier(extQCmd, 1, &dstimBarrier); - } - - if(extQCmd != VK_NULL_HANDLE) - { - vkr = ObjDisp(extQCmd)->EndCommandBuffer(Unwrap(extQCmd)); - RDCASSERTEQUAL(vkr, VK_SUCCESS); - - SubmitAndFlushExtQueue(dstimBarrier.srcQueueFamilyIndex); - } - std::vector copyRegions; std::vector clearRegions; @@ -1911,7 +1964,7 @@ void WrappedVulkan::Apply_InitialState(WrappedVkRes *live, const VkInitialConten copyRegions.push_back(region); \ else if(initReq == eInitReq_Clear) \ clearRegions.push_back(ImageRange(region.imageSubresource)); \ - } + } // copy each slice/mip individually for(int a = 0; a < m_CreationInfo.m_Image[id].arrayLayers; a++) @@ -2013,49 +2066,23 @@ void WrappedVulkan::Apply_InitialState(WrappedVkRes *live, const VkInitialConten } } - // update the live image layout back - dstimBarrier.oldLayout = dstimBarrier.newLayout; + InvertImageInitializationBarriers(barriers); - // make sure the apply completes before any further work - dstimBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - dstimBarrier.dstAccessMask = VK_ACCESS_ALL_READ_BITS; - - std::swap(dstimBarrier.srcQueueFamilyIndex, dstimBarrier.dstQueueFamilyIndex); - - if(extQCmd != VK_NULL_HANDLE) - { - vkr = ObjDisp(extQCmd)->BeginCommandBuffer(Unwrap(extQCmd), &beginInfo); - RDCASSERTEQUAL(vkr, VK_SUCCESS); - } - - for(size_t si = 0; si < m_ImageLayouts[id].subresourceStates.size(); si++) - { - dstimBarrier.subresourceRange = m_ImageLayouts[id].subresourceStates[si].subresourceRange; - dstimBarrier.newLayout = m_ImageLayouts[id].subresourceStates[si].newLayout; - - SanitiseNewImageLayout(dstimBarrier.newLayout); - - dstimBarrier.dstAccessMask |= MakeAccessMask(dstimBarrier.newLayout); - - DoPipelineBarrier(cmd, 1, &dstimBarrier); - - if(extQCmd != VK_NULL_HANDLE) - DoPipelineBarrier(extQCmd, 1, &dstimBarrier); - } + DoPipelineBarrier(cmd, (uint32_t)barriers.size(), barriers.data()); vkr = ObjDisp(cmd)->EndCommandBuffer(Unwrap(cmd)); RDCASSERTEQUAL(vkr, VK_SUCCESS); - if(extQCmd != VK_NULL_HANDLE) + if(extQBarriers.size() > 0) { + for(auto it = extQBarriers.begin(); it != extQBarriers.end(); ++it) + InvertImageInitializationBarriers(it->second); + // ensure work is completed before we pass ownership back to original queue SubmitCmds(); FlushQ(); - vkr = ObjDisp(extQCmd)->EndCommandBuffer(Unwrap(extQCmd)); - RDCASSERTEQUAL(vkr, VK_SUCCESS); - - SubmitAndFlushExtQueue(dstimBarrier.dstQueueFamilyIndex); + SubmitExtQBarriers(extQBarriers); } #if ENABLED(SINGLE_FLUSH_VALIDATE) diff --git a/renderdoc/driver/vulkan/vk_resources.cpp b/renderdoc/driver/vulkan/vk_resources.cpp index d765cc5d3..0b03c4596 100644 --- a/renderdoc/driver/vulkan/vk_resources.cpp +++ b/renderdoc/driver/vulkan/vk_resources.cpp @@ -2963,6 +2963,54 @@ int ImgRefs::SubresourceIndex(int aspectIndex, int level, int layer) const return (aspectIndex * splitLevelCount + level) * splitLayerCount + layer; } +std::vector > ImgRefs::SubresourceRangeInitReqs( + VkImageSubresourceRange range) const +{ + VkImageSubresourceRange out(range); + std::vector > res; + std::vector splitAspects; + if(areAspectsSplit) + { + for(auto aspectIt = ImageAspectFlagIter::begin(aspectMask & range.aspectMask); + aspectIt != ImageAspectFlagIter::end(); ++aspectIt) + { + splitAspects.push_back(*aspectIt); + } + } + else + { + splitAspects.push_back(range.aspectMask); + } + + int splitLevelCount = 1; + if(areLevelsSplit || range.baseMipLevel != 0 || range.levelCount < (uint32_t)imageInfo.levelCount) + { + splitLevelCount = range.levelCount; + out.levelCount = 1; + } + int splitLayerCount = 1; + if(areLayersSplit || range.baseArrayLayer != 0 || range.layerCount < (uint32_t)imageInfo.layerCount) + { + splitLayerCount = range.layerCount; + out.layerCount = 1; + } + int aspectIndex = 0; + for(auto aspectIt = splitAspects.begin(); aspectIt != splitAspects.end(); ++aspectIt, ++aspectIndex) + { + out.aspectMask = *aspectIt; + for(int level = range.baseMipLevel; level < splitLevelCount; ++level) + { + out.baseMipLevel = level; + for(int layer = range.baseArrayLayer; layer < splitLayerCount; ++layer) + { + out.baseArrayLayer = layer; + res.push_back(make_rdcpair(out, SubresourceInitReq(aspectIndex, level, layer))); + } + } + } + return res; +} + void ImgRefs::Split(bool splitAspects, bool splitLevels, bool splitLayers) { int newSplitAspectCount = 1; diff --git a/renderdoc/driver/vulkan/vk_resources.h b/renderdoc/driver/vulkan/vk_resources.h index c1692829c..645465f9d 100644 --- a/renderdoc/driver/vulkan/vk_resources.h +++ b/renderdoc/driver/vulkan/vk_resources.h @@ -1143,6 +1143,8 @@ struct ImgRefs { return InitReq(SubresourceRef(aspectIndex, level, layer)); } + std::vector > SubresourceRangeInitReqs( + VkImageSubresourceRange range) const; void Split(bool splitAspects, bool splitLevels, bool splitLayers); template FrameRefType Update(ImageRange range, FrameRefType refType, Compose comp);