diff --git a/renderdoc/driver/vulkan/vk_common.h b/renderdoc/driver/vulkan/vk_common.h index 5890e3e14..18c81971c 100644 --- a/renderdoc/driver/vulkan/vk_common.h +++ b/renderdoc/driver/vulkan/vk_common.h @@ -463,6 +463,8 @@ enum class VulkanChunk : uint32_t vkCmdSetDeviceMask, vkCmdDispatchBase, vkGetDeviceQueue2, + vkCmdDrawIndirectCountKHR, + vkCmdDrawIndexedIndirectCountKHR, Max, }; diff --git a/renderdoc/driver/vulkan/vk_core.cpp b/renderdoc/driver/vulkan/vk_core.cpp index 78afc5307..dbab296e8 100644 --- a/renderdoc/driver/vulkan/vk_core.cpp +++ b/renderdoc/driver/vulkan/vk_core.cpp @@ -691,6 +691,9 @@ static const VkExtensionProperties supportedExtensions[] = { VK_KHR_DISPLAY_SWAPCHAIN_EXTENSION_NAME, VK_KHR_DISPLAY_SWAPCHAIN_SPEC_VERSION, }, #endif + { + VK_KHR_DRAW_INDIRECT_COUNT_EXTENSION_NAME, VK_KHR_DRAW_INDIRECT_COUNT_SPEC_VERSION, + }, { VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, VK_KHR_DRIVER_PROPERTIES_SPEC_VERSION, }, @@ -2593,6 +2596,15 @@ bool WrappedVulkan::ProcessChunk(ReadSerialiser &ser, VulkanChunk chunk) return Serialise_vkGetDeviceQueue2(ser, VK_NULL_HANDLE, NULL, NULL); break; + case VulkanChunk::vkCmdDrawIndirectCountKHR: + return Serialise_vkCmdDrawIndirectCountKHR(ser, VK_NULL_HANDLE, VK_NULL_HANDLE, 0, + VK_NULL_HANDLE, 0, 0, 0); + break; + case VulkanChunk::vkCmdDrawIndexedIndirectCountKHR: + return Serialise_vkCmdDrawIndexedIndirectCountKHR(ser, VK_NULL_HANDLE, VK_NULL_HANDLE, 0, + VK_NULL_HANDLE, 0, 0, 0); + break; + default: { SystemChunk system = (SystemChunk)chunk; diff --git a/renderdoc/driver/vulkan/vk_core.h b/renderdoc/driver/vulkan/vk_core.h index 016dcbb47..1c32e862e 100644 --- a/renderdoc/driver/vulkan/vk_core.h +++ b/renderdoc/driver/vulkan/vk_core.h @@ -1886,4 +1886,15 @@ public: VkResult vkGetDisplayPlaneCapabilities2KHR(VkPhysicalDevice physicalDevice, const VkDisplayPlaneInfo2KHR *pDisplayPlaneInfo, VkDisplayPlaneCapabilities2KHR *pCapabilities); + + // VK_KHR_draw_indirect_count + IMPLEMENT_FUNCTION_SERIALISED(void, vkCmdDrawIndirectCountKHR, VkCommandBuffer commandBuffer, + VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer, + VkDeviceSize countBufferOffset, uint32_t maxDrawCount, + uint32_t stride); + + IMPLEMENT_FUNCTION_SERIALISED(void, vkCmdDrawIndexedIndirectCountKHR, + VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, + VkBuffer countBuffer, VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, uint32_t stride); }; diff --git a/renderdoc/driver/vulkan/vk_hookset_defs.h b/renderdoc/driver/vulkan/vk_hookset_defs.h index fdd7e5579..971f21b5e 100644 --- a/renderdoc/driver/vulkan/vk_hookset_defs.h +++ b/renderdoc/driver/vulkan/vk_hookset_defs.h @@ -348,7 +348,8 @@ CheckExt(EXT_sampler_filter_minmax, VKXX); \ CheckExt(KHR_sampler_ycbcr_conversion, VK11); \ CheckExt(KHR_device_group, VK11); \ - CheckExt(MVK_moltenvk, VKXX); + CheckExt(MVK_moltenvk, VKXX); \ + CheckExt(KHR_draw_indirect_count, VKXX); #define HookInitVulkanInstanceExts() \ HookInitExtension(KHR_surface, DestroySurfaceKHR); \ @@ -458,6 +459,8 @@ HookInitExtension(KHR_device_group &&KHR_surface, GetDeviceGroupSurfacePresentModesKHR); \ HookInitExtension(KHR_device_group &&KHR_swapchain, AcquireNextImage2KHR); \ HookInitExtension(protected_memory, GetDeviceQueue2); \ + HookInitExtension(KHR_draw_indirect_count, CmdDrawIndirectCountKHR); \ + HookInitExtension(KHR_draw_indirect_count, CmdDrawIndexedIndirectCountKHR); \ HookInitDevice_PlatformSpecific() #define DefineHooks() \ @@ -968,6 +971,12 @@ HookDefine3(VkResult, vkGetDisplayPlaneCapabilities2KHR, VkPhysicalDevice, physicalDevice, \ const VkDisplayPlaneInfo2KHR *, pDisplayPlaneInfo, VkDisplayPlaneCapabilities2KHR *, \ pCapabilities); \ + HookDefine7(void, vkCmdDrawIndirectCountKHR, VkCommandBuffer, commandBuffer, VkBuffer, buffer, \ + VkDeviceSize, offset, VkBuffer, countBuffer, VkDeviceSize, countBufferOffset, \ + uint32_t, maxDrawCount, uint32_t, stride); \ + HookDefine7(void, vkCmdDrawIndexedIndirectCountKHR, VkCommandBuffer, commandBuffer, VkBuffer, \ + buffer, VkDeviceSize, offset, VkBuffer, countBuffer, VkDeviceSize, \ + countBufferOffset, uint32_t, maxDrawCount, uint32_t, stride); \ HookDefine_PlatformSpecific() struct VkLayerInstanceDispatchTableExtended : VkLayerInstanceDispatchTable diff --git a/renderdoc/driver/vulkan/vk_stringise.cpp b/renderdoc/driver/vulkan/vk_stringise.cpp index 998cebc2e..ccaa225ae 100644 --- a/renderdoc/driver/vulkan/vk_stringise.cpp +++ b/renderdoc/driver/vulkan/vk_stringise.cpp @@ -28,7 +28,7 @@ template <> std::string DoStringise(const VulkanChunk &el) { - RDCCOMPILE_ASSERT((uint32_t)VulkanChunk::Max == 1116, "Chunks changed without updating names"); + RDCCOMPILE_ASSERT((uint32_t)VulkanChunk::Max == 1118, "Chunks changed without updating names"); BEGIN_ENUM_STRINGISE(VulkanChunk) { @@ -148,6 +148,8 @@ std::string DoStringise(const VulkanChunk &el) STRINGISE_ENUM_CLASS(vkCmdSetDeviceMask); STRINGISE_ENUM_CLASS(vkCmdDispatchBase); STRINGISE_ENUM_CLASS(vkGetDeviceQueue2); + STRINGISE_ENUM_CLASS(vkCmdDrawIndirectCountKHR); + STRINGISE_ENUM_CLASS(vkCmdDrawIndexedIndirectCountKHR); STRINGISE_ENUM_CLASS_NAMED(Max, "Max Chunk"); } END_ENUM_STRINGISE() diff --git a/renderdoc/driver/vulkan/wrappers/vk_draw_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_draw_funcs.cpp index 97b500f20..91ee6d680 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_draw_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_draw_funcs.cpp @@ -2403,6 +2403,633 @@ void WrappedVulkan::vkCmdDispatchBase(VkCommandBuffer commandBuffer, uint32_t ba } } +template +bool WrappedVulkan::Serialise_vkCmdDrawIndirectCountKHR( + SerialiserType &ser, VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, + VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride) +{ + SERIALISE_ELEMENT(commandBuffer); + SERIALISE_ELEMENT(buffer); + SERIALISE_ELEMENT(offset); + SERIALISE_ELEMENT(countBuffer); + SERIALISE_ELEMENT(countBufferOffset); + SERIALISE_ELEMENT(maxDrawCount); + SERIALISE_ELEMENT(stride); + + Serialise_DebugMessages(ser); + + SERIALISE_CHECK_READ_ERRORS(); + + if(IsReplayingAndReading()) + { + m_LastCmdBufferID = GetResourceManager()->GetOriginalID(GetResID(commandBuffer)); + + // do execution (possibly partial) + if(IsActiveReplaying(m_State)) + { + // this count is wrong if we're not re-recording and fetching the actual count below, but it's + // impossible without having a particular submission in mind because without a specific + // instance we can't know what the actual count was (it could vary between submissions). + // Fortunately when we're not in the re-recording command buffer the EID tracking isn't + // needed. + uint32_t count = maxDrawCount; + + if(InRerecordRange(m_LastCmdBufferID)) + { + commandBuffer = RerecordCmdBuf(m_LastCmdBufferID); + + uint32_t curEID = m_RootEventID; + + if(m_FirstEventID <= 1) + { + curEID = m_BakedCmdBufferInfo[m_LastCmdBufferID].curEventID; + + if(m_Partial[Primary].partialParent == m_LastCmdBufferID) + curEID += m_Partial[Primary].baseEvent; + else if(m_Partial[Secondary].partialParent == m_LastCmdBufferID) + curEID += m_Partial[Secondary].baseEvent; + } + + DrawcallUse use(m_CurChunkOffset, 0); + auto it = std::lower_bound(m_DrawcallUses.begin(), m_DrawcallUses.end(), use); + + if(it == m_DrawcallUses.end() || GetDrawcall(it->eventId) == NULL) + { + RDCERR("Unexpected drawcall not found in uses vector, offset %llu", m_CurChunkOffset); + } + else + { + uint32_t baseEventID = it->eventId; + + // get the number of draws by looking at how many children the parent drawcall has. + count = (uint32_t)GetDrawcall(it->eventId)->children.size(); + + // when we have a callback, submit every drawcall individually to the callback + if(m_DrawcallCallback && IsDrawInRenderPass()) + { + for(uint32_t i = 0; i < count; i++) + { + uint32_t eventId = HandlePreCallback(commandBuffer, DrawFlags::Drawcall, i + 1); + + ObjDisp(commandBuffer) + ->CmdDrawIndirect(Unwrap(commandBuffer), Unwrap(buffer), offset, 1, stride); + + if(eventId && m_DrawcallCallback->PostDraw(eventId, commandBuffer)) + { + ObjDisp(commandBuffer) + ->CmdDrawIndirect(Unwrap(commandBuffer), Unwrap(buffer), offset, 1, stride); + m_DrawcallCallback->PostRedraw(eventId, commandBuffer); + } + + offset += stride; + } + } + // To add the multidraw, we made an event N that is the 'parent' marker, then + // N+1, N+2, N+3, ... for each of the sub-draws. If the first sub-draw is selected + // then we'll replay up to N but not N+1, so just do nothing - we DON'T want to draw + // the first sub-draw in that range. + else if(m_LastEventID > baseEventID) + { + uint32_t drawidx = 0; + + if(m_FirstEventID <= 1) + { + // if we're replaying part-way into a multidraw, we can replay the first part + // 'easily' + // by just reducing the Count parameter to however many we want to replay. This only + // works if we're replaying from the first multidraw to the nth (n less than Count) + count = RDCMIN(count, m_LastEventID - baseEventID); + } + else + { + // otherwise we do the 'hard' case, draw only one multidraw + // note we'll never be asked to do e.g. 3rd-7th of a multidraw. Only ever 0th-nth or + // a single draw. + // + // We also need to draw the same number of draws so that DrawIndex is faithful. In + // order to preserve the draw index we write a custom indirect buffer that has zeros + // for the parameters of all previous draws. + drawidx = (curEID - baseEventID - 1); + + offset += stride * drawidx; + + // ensure the custom buffer is large enough + VkDeviceSize bufLength = sizeof(VkDrawIndirectCommand) * (drawidx + 1); + + RDCASSERT(bufLength <= m_IndirectBufferSize, bufLength, m_IndirectBufferSize); + + VkBufferMemoryBarrier bufBarrier = { + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + NULL, + VK_ACCESS_INDIRECT_COMMAND_READ_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, + VK_QUEUE_FAMILY_IGNORED, + VK_QUEUE_FAMILY_IGNORED, + Unwrap(m_IndirectBuffer.buf), + 0, + VK_WHOLE_SIZE, + }; + + VkCommandBufferBeginInfo beginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, NULL, + VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT}; + + ObjDisp(m_IndirectCommandBuffer) + ->BeginCommandBuffer(Unwrap(m_IndirectCommandBuffer), &beginInfo); + + // wait for any previous indirect draws to complete before filling/transferring + DoPipelineBarrier(m_IndirectCommandBuffer, 1, &bufBarrier); + + // initialise to 0 so all other draws don't draw anything + ObjDisp(m_IndirectCommandBuffer) + ->CmdFillBuffer(Unwrap(m_IndirectCommandBuffer), Unwrap(m_IndirectBuffer.buf), 0, + m_IndirectBufferSize, 0); + + // wait for fill to complete before copy + bufBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + bufBarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + + DoPipelineBarrier(m_IndirectCommandBuffer, 1, &bufBarrier); + + // copy over the actual parameter set into the right place + VkBufferCopy region = {offset, bufLength - sizeof(VkDrawIndirectCommand), + sizeof(VkDrawIndirectCommand)}; + ObjDisp(m_IndirectCommandBuffer) + ->CmdCopyBuffer(Unwrap(m_IndirectCommandBuffer), Unwrap(buffer), + Unwrap(m_IndirectBuffer.buf), 1, ®ion); + + // finally wait for copy to complete before drawing from it + bufBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + bufBarrier.dstAccessMask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + + DoPipelineBarrier(m_IndirectCommandBuffer, 1, &bufBarrier); + + ObjDisp(m_IndirectCommandBuffer)->EndCommandBuffer(Unwrap(m_IndirectCommandBuffer)); + + // draw from our custom buffer + m_IndirectDraw = true; + buffer = m_IndirectBuffer.buf; + offset = 0; + count = drawidx + 1; + stride = sizeof(VkDrawIndirectCommand); + } + + if(IsDrawInRenderPass()) + { + uint32_t eventId = HandlePreCallback(commandBuffer, DrawFlags::Drawcall, drawidx + 1); + + ObjDisp(commandBuffer) + ->CmdDrawIndirect(Unwrap(commandBuffer), Unwrap(buffer), offset, count, stride); + + if(eventId && m_DrawcallCallback->PostDraw(eventId, commandBuffer)) + { + ObjDisp(commandBuffer) + ->CmdDrawIndirect(Unwrap(commandBuffer), Unwrap(buffer), offset, count, stride); + m_DrawcallCallback->PostRedraw(eventId, commandBuffer); + } + } + } + } + } + + // multidraws skip the event ID past the whole thing + m_BakedCmdBufferInfo[m_LastCmdBufferID].curEventID += count + 1; + } + else + { + VkIndirectPatchData indirectPatch = + FetchIndirectData(VkIndirectPatchType::DrawIndirectCount, commandBuffer, buffer, offset, + maxDrawCount, stride, countBuffer, countBufferOffset); + + ObjDisp(commandBuffer) + ->CmdDrawIndirectCountKHR(Unwrap(commandBuffer), Unwrap(buffer), offset, + Unwrap(countBuffer), countBufferOffset, maxDrawCount, stride); + + // add on the size we'll need for an indirect buffer in the worst case. + // Note that we'll only ever be partially replaying one draw at a time, so we only need the + // worst case. + m_IndirectBufferSize = + RDCMAX(m_IndirectBufferSize, + sizeof(VkDrawIndirectCommand) + (maxDrawCount > 0 ? maxDrawCount - 1 : 0) * stride); + + string name = "vkCmdDrawIndirectCountKHR"; + + if(!IsDrawInRenderPass()) + { + AddDebugMessage(MessageCategory::Execution, MessageSeverity::High, + MessageSource::IncorrectAPIUse, + "Drawcall in happening outside of render pass, or in secondary command " + "buffer without RENDER_PASS_CONTINUE_BIT"); + } + + SDChunk *baseChunk = m_StructuredFile->chunks.back(); + + DrawcallDescription draw; + draw.name = name; + draw.flags = DrawFlags::MultiDraw | DrawFlags::PushMarker; + AddEvent(); + AddDrawcall(draw, true); + + VulkanDrawcallTreeNode &drawNode = GetDrawcallStack().back()->children.back(); + + drawNode.indirectPatch = indirectPatch; + + drawNode.resourceUsage.push_back(std::make_pair( + GetResID(buffer), EventUsage(drawNode.draw.eventId, ResourceUsage::Indirect))); + + m_BakedCmdBufferInfo[m_LastCmdBufferID].curEventID++; + + for(uint32_t i = 0; i < maxDrawCount; i++) + { + DrawcallDescription multi; + multi.drawIndex = i; + + multi.name = name; + + multi.flags |= DrawFlags::Drawcall | DrawFlags::Instanced | DrawFlags::Indirect; + + // add a fake chunk for this individual indirect draw + SDChunk *fakeChunk = new SDChunk("Indirect sub-command"); + fakeChunk->metadata = baseChunk->metadata; + fakeChunk->metadata.chunkID = (uint32_t)VulkanChunk::vkCmdIndirectSubCommand; + + { + StructuredSerialiser structuriser(fakeChunk, ser.GetChunkLookup()); + + structuriser.Serialise("drawIndex", 0U); + structuriser.Serialise("offset", offset); + structuriser.Serialise("command", VkDrawIndirectCommand()); + } + + m_StructuredFile->chunks.push_back(fakeChunk); + + AddEvent(); + AddDrawcall(multi, true); + + m_BakedCmdBufferInfo[m_LastCmdBufferID].curEventID++; + } + + draw.name = name; + draw.flags = DrawFlags::PopMarker; + AddDrawcall(draw, false); + } + } + + return true; +} + +void WrappedVulkan::vkCmdDrawIndirectCountKHR(VkCommandBuffer commandBuffer, VkBuffer buffer, + VkDeviceSize offset, VkBuffer countBuffer, + VkDeviceSize countBufferOffset, uint32_t maxDrawCount, + uint32_t stride) +{ + SCOPED_DBG_SINK(); + + SERIALISE_TIME_CALL(ObjDisp(commandBuffer) + ->CmdDrawIndirectCountKHR(Unwrap(commandBuffer), Unwrap(buffer), offset, + Unwrap(countBuffer), countBufferOffset, + maxDrawCount, stride)); + + if(IsCaptureMode(m_State)) + { + VkResourceRecord *record = GetRecord(commandBuffer); + + CACHE_THREAD_SERIALISER(); + + ser.SetDrawChunk(); + SCOPED_SERIALISE_CHUNK(VulkanChunk::vkCmdDrawIndirectCountKHR); + Serialise_vkCmdDrawIndirectCountKHR(ser, commandBuffer, buffer, offset, countBuffer, + countBufferOffset, maxDrawCount, stride); + + record->AddChunk(scope.Get()); + + record->MarkResourceFrameReferenced(GetResID(buffer), eFrameRef_Read); + record->MarkResourceFrameReferenced(GetRecord(buffer)->baseResource, eFrameRef_Read); + if(GetRecord(buffer)->sparseInfo) + record->cmdInfo->sparse.insert(GetRecord(buffer)->sparseInfo); + + record->MarkResourceFrameReferenced(GetResID(countBuffer), eFrameRef_Read); + record->MarkResourceFrameReferenced(GetRecord(countBuffer)->baseResource, eFrameRef_Read); + if(GetRecord(countBuffer)->sparseInfo) + record->cmdInfo->sparse.insert(GetRecord(countBuffer)->sparseInfo); + } +} + +template +bool WrappedVulkan::Serialise_vkCmdDrawIndexedIndirectCountKHR( + SerialiserType &ser, VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, + VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride) +{ + SERIALISE_ELEMENT(commandBuffer); + SERIALISE_ELEMENT(buffer); + SERIALISE_ELEMENT(offset); + SERIALISE_ELEMENT(countBuffer); + SERIALISE_ELEMENT(countBufferOffset); + SERIALISE_ELEMENT(maxDrawCount); + SERIALISE_ELEMENT(stride); + + Serialise_DebugMessages(ser); + + SERIALISE_CHECK_READ_ERRORS(); + + if(IsReplayingAndReading()) + { + m_LastCmdBufferID = GetResourceManager()->GetOriginalID(GetResID(commandBuffer)); + + // do execution (possibly partial) + if(IsActiveReplaying(m_State)) + { + // this count is wrong if we're not re-recording and fetching the actual count below, but it's + // impossible without having a particular submission in mind because without a specific + // instance we can't know what the actual count was (it could vary between submissions). + // Fortunately when we're not in the re-recording command buffer the EID tracking isn't + // needed. + uint32_t count = maxDrawCount; + + if(InRerecordRange(m_LastCmdBufferID)) + { + commandBuffer = RerecordCmdBuf(m_LastCmdBufferID); + + uint32_t curEID = m_RootEventID; + + if(m_FirstEventID <= 1) + { + curEID = m_BakedCmdBufferInfo[m_LastCmdBufferID].curEventID; + + if(m_Partial[Primary].partialParent == m_LastCmdBufferID) + curEID += m_Partial[Primary].baseEvent; + else if(m_Partial[Secondary].partialParent == m_LastCmdBufferID) + curEID += m_Partial[Secondary].baseEvent; + } + + DrawcallUse use(m_CurChunkOffset, 0); + auto it = std::lower_bound(m_DrawcallUses.begin(), m_DrawcallUses.end(), use); + + if(it == m_DrawcallUses.end() || GetDrawcall(it->eventId) == NULL) + { + RDCERR("Unexpected drawcall not found in uses vector, offset %llu", m_CurChunkOffset); + } + else + { + uint32_t baseEventID = it->eventId; + + // get the number of draws by looking at how many children the parent drawcall has. + count = (uint32_t)GetDrawcall(it->eventId)->children.size(); + + // when we have a callback, submit every drawcall individually to the callback + if(m_DrawcallCallback && IsDrawInRenderPass()) + { + for(uint32_t i = 0; i < count; i++) + { + uint32_t eventId = HandlePreCallback(commandBuffer, DrawFlags::Drawcall, i + 1); + + ObjDisp(commandBuffer) + ->CmdDrawIndexedIndirect(Unwrap(commandBuffer), Unwrap(buffer), offset, 1, stride); + + if(eventId && m_DrawcallCallback->PostDraw(eventId, commandBuffer)) + { + ObjDisp(commandBuffer) + ->CmdDrawIndexedIndirect(Unwrap(commandBuffer), Unwrap(buffer), offset, 1, + stride); + m_DrawcallCallback->PostRedraw(eventId, commandBuffer); + } + + offset += stride; + } + } + // To add the multidraw, we made an event N that is the 'parent' marker, then + // N+1, N+2, N+3, ... for each of the sub-draws. If the first sub-draw is selected + // then we'll replay up to N but not N+1, so just do nothing - we DON'T want to draw + // the first sub-draw in that range. + else if(m_LastEventID > baseEventID) + { + uint32_t drawidx = 0; + + if(m_FirstEventID <= 1) + { + // if we're replaying part-way into a multidraw, we can replay the first part + // 'easily' + // by just reducing the Count parameter to however many we want to replay. This only + // works if we're replaying from the first multidraw to the nth (n less than Count) + count = RDCMIN(count, m_LastEventID - baseEventID); + } + else + { + // otherwise we do the 'hard' case, draw only one multidraw + // note we'll never be asked to do e.g. 3rd-7th of a multidraw. Only ever 0th-nth or + // a single draw. + // + // We also need to draw the same number of draws so that DrawIndex is faithful. In + // order to preserve the draw index we write a custom indirect buffer that has zeros + // for the parameters of all previous draws. + drawidx = (curEID - baseEventID - 1); + + offset += stride * drawidx; + + // ensure the custom buffer is large enough + VkDeviceSize bufLength = sizeof(VkDrawIndexedIndirectCommand) * (drawidx + 1); + + RDCASSERT(bufLength <= m_IndirectBufferSize, bufLength, m_IndirectBufferSize); + + VkBufferMemoryBarrier bufBarrier = { + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + NULL, + VK_ACCESS_INDIRECT_COMMAND_READ_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, + VK_QUEUE_FAMILY_IGNORED, + VK_QUEUE_FAMILY_IGNORED, + Unwrap(m_IndirectBuffer.buf), + 0, + VK_WHOLE_SIZE, + }; + + VkCommandBufferBeginInfo beginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, NULL, + VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT}; + + ObjDisp(m_IndirectCommandBuffer) + ->BeginCommandBuffer(Unwrap(m_IndirectCommandBuffer), &beginInfo); + + // wait for any previous indirect draws to complete before filling/transferring + DoPipelineBarrier(m_IndirectCommandBuffer, 1, &bufBarrier); + + // initialise to 0 so all other draws don't draw anything + ObjDisp(m_IndirectCommandBuffer) + ->CmdFillBuffer(Unwrap(m_IndirectCommandBuffer), Unwrap(m_IndirectBuffer.buf), 0, + m_IndirectBufferSize, 0); + + // wait for fill to complete before copy + bufBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + bufBarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + + DoPipelineBarrier(m_IndirectCommandBuffer, 1, &bufBarrier); + + // copy over the actual parameter set into the right place + VkBufferCopy region = {offset, bufLength - sizeof(VkDrawIndexedIndirectCommand), + sizeof(VkDrawIndexedIndirectCommand)}; + ObjDisp(m_IndirectCommandBuffer) + ->CmdCopyBuffer(Unwrap(m_IndirectCommandBuffer), Unwrap(buffer), + Unwrap(m_IndirectBuffer.buf), 1, ®ion); + + // finally wait for copy to complete before drawing from it + bufBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + bufBarrier.dstAccessMask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + + DoPipelineBarrier(m_IndirectCommandBuffer, 1, &bufBarrier); + + ObjDisp(m_IndirectCommandBuffer)->EndCommandBuffer(Unwrap(m_IndirectCommandBuffer)); + + // draw from our custom buffer + m_IndirectDraw = true; + buffer = m_IndirectBuffer.buf; + offset = 0; + count = drawidx + 1; + stride = sizeof(VkDrawIndexedIndirectCommand); + } + + if(IsDrawInRenderPass()) + { + uint32_t eventId = HandlePreCallback(commandBuffer, DrawFlags::Drawcall, drawidx + 1); + + ObjDisp(commandBuffer) + ->CmdDrawIndexedIndirect(Unwrap(commandBuffer), Unwrap(buffer), offset, count, + stride); + + if(eventId && m_DrawcallCallback->PostDraw(eventId, commandBuffer)) + { + ObjDisp(commandBuffer) + ->CmdDrawIndexedIndirect(Unwrap(commandBuffer), Unwrap(buffer), offset, count, + stride); + m_DrawcallCallback->PostRedraw(eventId, commandBuffer); + } + } + } + } + } + + // multidraws skip the event ID past the whole thing + m_BakedCmdBufferInfo[m_LastCmdBufferID].curEventID += count + 1; + } + else + { + VkIndirectPatchData indirectPatch = + FetchIndirectData(VkIndirectPatchType::DrawIndexedIndirectCount, commandBuffer, buffer, + offset, maxDrawCount, stride, countBuffer, countBufferOffset); + + ObjDisp(commandBuffer) + ->CmdDrawIndexedIndirectCountKHR(Unwrap(commandBuffer), Unwrap(buffer), offset, + Unwrap(countBuffer), countBufferOffset, maxDrawCount, + stride); + + // add on the size we'll need for an indirect buffer in the worst case. + // Note that we'll only ever be partially replaying one draw at a time, so we only need the + // worst case. + m_IndirectBufferSize = + RDCMAX(m_IndirectBufferSize, sizeof(VkDrawIndexedIndirectCommand) + + (maxDrawCount > 0 ? maxDrawCount - 1 : 0) * stride); + + string name = "vkCmdDrawIndexedIndirectCountKHR"; + + if(!IsDrawInRenderPass()) + { + AddDebugMessage(MessageCategory::Execution, MessageSeverity::High, + MessageSource::IncorrectAPIUse, + "Drawcall in happening outside of render pass, or in secondary command " + "buffer without RENDER_PASS_CONTINUE_BIT"); + } + + SDChunk *baseChunk = m_StructuredFile->chunks.back(); + + DrawcallDescription draw; + draw.name = name; + draw.flags = DrawFlags::MultiDraw | DrawFlags::PushMarker; + AddEvent(); + AddDrawcall(draw, true); + + VulkanDrawcallTreeNode &drawNode = GetDrawcallStack().back()->children.back(); + + drawNode.indirectPatch = indirectPatch; + + drawNode.resourceUsage.push_back(std::make_pair( + GetResID(buffer), EventUsage(drawNode.draw.eventId, ResourceUsage::Indirect))); + + m_BakedCmdBufferInfo[m_LastCmdBufferID].curEventID++; + + for(uint32_t i = 0; i < maxDrawCount; i++) + { + DrawcallDescription multi; + multi.drawIndex = i; + + multi.name = name; + + multi.flags |= + DrawFlags::Drawcall | DrawFlags::Instanced | DrawFlags::Indexed | DrawFlags::Indirect; + + // add a fake chunk for this individual indirect draw + SDChunk *fakeChunk = new SDChunk("Indirect sub-command"); + fakeChunk->metadata = baseChunk->metadata; + fakeChunk->metadata.chunkID = (uint32_t)VulkanChunk::vkCmdIndirectSubCommand; + + { + StructuredSerialiser structuriser(fakeChunk, ser.GetChunkLookup()); + + structuriser.Serialise("drawIndex", 0U); + structuriser.Serialise("offset", offset); + structuriser.Serialise("command", VkDrawIndexedIndirectCommand()); + } + + m_StructuredFile->chunks.push_back(fakeChunk); + + AddEvent(); + AddDrawcall(multi, true); + + m_BakedCmdBufferInfo[m_LastCmdBufferID].curEventID++; + } + + draw.name = name; + draw.flags = DrawFlags::PopMarker; + AddDrawcall(draw, false); + } + } + + return true; +} + +void WrappedVulkan::vkCmdDrawIndexedIndirectCountKHR(VkCommandBuffer commandBuffer, VkBuffer buffer, + VkDeviceSize offset, VkBuffer countBuffer, + VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, uint32_t stride) +{ + SCOPED_DBG_SINK(); + + SERIALISE_TIME_CALL(ObjDisp(commandBuffer) + ->CmdDrawIndexedIndirectCountKHR(Unwrap(commandBuffer), Unwrap(buffer), + offset, Unwrap(countBuffer), + countBufferOffset, maxDrawCount, stride)); + + if(IsCaptureMode(m_State)) + { + VkResourceRecord *record = GetRecord(commandBuffer); + + CACHE_THREAD_SERIALISER(); + + ser.SetDrawChunk(); + SCOPED_SERIALISE_CHUNK(VulkanChunk::vkCmdDrawIndexedIndirectCountKHR); + Serialise_vkCmdDrawIndexedIndirectCountKHR(ser, commandBuffer, buffer, offset, countBuffer, + countBufferOffset, maxDrawCount, stride); + + record->AddChunk(scope.Get()); + + record->MarkResourceFrameReferenced(GetResID(buffer), eFrameRef_Read); + record->MarkResourceFrameReferenced(GetRecord(buffer)->baseResource, eFrameRef_Read); + if(GetRecord(buffer)->sparseInfo) + record->cmdInfo->sparse.insert(GetRecord(buffer)->sparseInfo); + + record->MarkResourceFrameReferenced(GetResID(countBuffer), eFrameRef_Read); + record->MarkResourceFrameReferenced(GetRecord(countBuffer)->baseResource, eFrameRef_Read); + if(GetRecord(countBuffer)->sparseInfo) + record->cmdInfo->sparse.insert(GetRecord(countBuffer)->sparseInfo); + } +} + INSTANTIATE_FUNCTION_SERIALISED(void, vkCmdDraw, VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance); @@ -2467,4 +3094,14 @@ INSTANTIATE_FUNCTION_SERIALISED(void, vkCmdResolveImage, VkCommandBuffer command INSTANTIATE_FUNCTION_SERIALISED(void, vkCmdDispatchBase, VkCommandBuffer commandBuffer, uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ, - uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ); \ No newline at end of file + uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ); + +INSTANTIATE_FUNCTION_SERIALISED(void, vkCmdDrawIndirectCountKHR, VkCommandBuffer commandBuffer, + VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer, + VkDeviceSize countBufferOffset, uint32_t maxDrawCount, + uint32_t stride); + +INSTANTIATE_FUNCTION_SERIALISED(void, vkCmdDrawIndexedIndirectCountKHR, + VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, + VkBuffer countBuffer, VkDeviceSize countBufferOffset, + uint32_t maxDrawCount, uint32_t stride); \ No newline at end of file diff --git a/renderdoc/driver/vulkan/wrappers/vk_queue_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_queue_funcs.cpp index ba2e27b56..0a6a6f3ce 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_queue_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_queue_funcs.cpp @@ -541,14 +541,60 @@ void WrappedVulkan::InsertDrawsAndRefreshIDs(vector &cmd n.draw.dispatchDimension[2] = args->z; } else if(n.indirectPatch.type == VkIndirectPatchType::DrawIndirect || - n.indirectPatch.type == VkIndirectPatchType::DrawIndexedIndirect) + n.indirectPatch.type == VkIndirectPatchType::DrawIndexedIndirect || + n.indirectPatch.type == VkIndirectPatchType::DrawIndirectCount || + n.indirectPatch.type == VkIndirectPatchType::DrawIndexedIndirectCount) { + bool hasCount = (n.indirectPatch.type == VkIndirectPatchType::DrawIndirectCount || + n.indirectPatch.type == VkIndirectPatchType::DrawIndexedIndirectCount); bytebuf argbuf; GetDebugManager()->GetBufferData(GetResID(n.indirectPatch.buf), 0, 0, argbuf); byte *ptr = argbuf.begin(), *end = argbuf.end(); - if(n.indirectPatch.count == 1) + uint32_t indirectCount = n.indirectPatch.count; + if(hasCount) + { + if(argbuf.size() >= 16) + { + uint32_t *count = (uint32_t *)end; + count -= 4; + indirectCount = *count; + } + else + { + RDCERR("Couldn't get indirect draw count"); + } + } + + if(indirectCount > n.indirectPatch.count) + { + RDCERR("Indirect count higher than maxCount, clamping"); + } + else if(indirectCount < n.indirectPatch.count) + { + // need to remove any draws we reserved that didn't actually happen, and shift any + // subsequent event and draw Ids + uint32_t shiftCount = n.indirectPatch.count - indirectCount; + + // i is the pushmarker, so i + 1 is the first of the sub draws. + // i + 1 + n.indirectPatch.count is the last of the draws, we don't want to erase the next + // one (the popmarker) + cmdBufNodes.erase(cmdBufNodes.begin() + i + 1 + indirectCount, + cmdBufNodes.begin() + i + 1 + n.indirectPatch.count); + for(size_t j = i + 1 + indirectCount; j < cmdBufNodes.size(); j++) + { + cmdBufNodes[j].draw.eventId -= shiftCount; + cmdBufNodes[j].draw.drawcallId -= shiftCount; + + for(APIEvent &ev : cmdBufNodes[j].draw.events) + ev.eventId -= shiftCount; + } + } + + // indirect count versions always have a multidraw marker regions, but static count of 1 would + // be in-lined as a single draw, so we patch in-place + if(!hasCount && indirectCount == 1) { bool valid = PatchIndirectDraw(n.indirectPatch.type, n.draw, ptr, end); @@ -563,10 +609,16 @@ void WrappedVulkan::InsertDrawsAndRefreshIDs(vector &cmd else { // we should have N draws immediately following this one, check that that's the case - RDCASSERT(i + n.indirectPatch.count < cmdBufNodes.size(), i, n.indirectPatch.count, + RDCASSERT(i + indirectCount < cmdBufNodes.size(), i, indirectCount, n.indirectPatch.count, cmdBufNodes.size()); - for(size_t j = 0; j < (size_t)n.indirectPatch.count && i + j + 1 < cmdBufNodes.size(); j++) + // if there was a count, patch that onto the root drawcall name + if(hasCount) + { + n.draw.name = StringFormat::Fmt("%s(<%u>)", n.draw.name.c_str(), indirectCount); + } + + for(size_t j = 0; j < (size_t)indirectCount && i + j + 1 < cmdBufNodes.size(); j++) { VulkanDrawcallTreeNode &n2 = cmdBufNodes[i + j + 1];