diff --git a/renderdoc/driver/vulkan/vk_bindless_feedback.cpp b/renderdoc/driver/vulkan/vk_bindless_feedback.cpp index 72e296794..945b23cb2 100644 --- a/renderdoc/driver/vulkan/vk_bindless_feedback.cpp +++ b/renderdoc/driver/vulkan/vk_bindless_feedback.cpp @@ -884,17 +884,7 @@ void VulkanReplay::FetchShaderFeedback(uint32_t eventId) { modifiedstate.BeginRenderPassAndApplyState(m_pDriver, cmd, VulkanRenderState::BindGraphics); - if(drawcall->flags & DrawFlags::Indexed) - { - ObjDisp(cmd)->CmdDrawIndexed(Unwrap(cmd), drawcall->numIndices, drawcall->numInstances, - drawcall->indexOffset, drawcall->baseVertex, - drawcall->instanceOffset); - } - else - { - ObjDisp(cmd)->CmdDraw(Unwrap(cmd), drawcall->numIndices, drawcall->numInstances, - drawcall->vertexOffset, drawcall->instanceOffset); - } + m_pDriver->ReplayDraw(cmd, *drawcall); modifiedstate.EndRenderPass(cmd); } diff --git a/renderdoc/driver/vulkan/vk_core.cpp b/renderdoc/driver/vulkan/vk_core.cpp index b94a8d386..79b03b047 100644 --- a/renderdoc/driver/vulkan/vk_core.cpp +++ b/renderdoc/driver/vulkan/vk_core.cpp @@ -2434,7 +2434,7 @@ ReplayStatus WrappedVulkan::ReadLogInitialisation(RDCFile *rdc, bool storeStruct // create indirect draw buffer m_IndirectBufferSize = AlignUp(m_IndirectBufferSize + 63, (size_t)64); - m_IndirectBuffer.Create(this, GetDev(), m_IndirectBufferSize, 1, + m_IndirectBuffer.Create(this, GetDev(), m_IndirectBufferSize * 2, 1, GPUBuffer::eGPUBufferGPULocal | GPUBuffer::eGPUBufferIndirectBuffer); m_IndirectCommandBuffer = GetNextCmd(); @@ -4442,6 +4442,106 @@ void WrappedVulkan::UpdateImageStates(const rdcflatmap & } } +void WrappedVulkan::ReplayDraw(VkCommandBuffer cmd, const DrawcallDescription &drawcall) +{ + // if this isn't a multidraw (or it's the first draw in a multidraw, it's fairly easy + if(drawcall.drawIndex == 0) + { + if(drawcall.flags & DrawFlags::Indexed) + ObjDisp(cmd)->CmdDrawIndexed(Unwrap(cmd), drawcall.numIndices, drawcall.numInstances, + drawcall.indexOffset, drawcall.baseVertex, + drawcall.instanceOffset); + else + ObjDisp(cmd)->CmdDraw(Unwrap(cmd), drawcall.numIndices, drawcall.numInstances, + drawcall.vertexOffset, drawcall.instanceOffset); + } + else + { + // otherwise it's a bit more complex, we need to set up a multidraw with the first N draws nop'd + // out and the parameters added into the last one + + VkMarkerRegion::Begin(StringFormat::Fmt("ReplayDraw(drawIndex=%u)", drawcall.drawIndex), cmd); + + bytebuf params; + + if(drawcall.flags & DrawFlags::Indexed) + { + VkDrawIndexedIndirectCommand drawParams; + drawParams.indexCount = drawcall.numIndices; + drawParams.instanceCount = drawcall.numInstances; + drawParams.firstIndex = drawcall.indexOffset; + drawParams.vertexOffset = drawcall.baseVertex; + drawParams.firstInstance = drawcall.instanceOffset; + + params.resize(sizeof(drawParams)); + memcpy(params.data(), &drawParams, sizeof(drawParams)); + } + else + { + VkDrawIndirectCommand drawParams; + + drawParams.vertexCount = drawcall.numIndices; + drawParams.instanceCount = drawcall.numInstances; + drawParams.firstVertex = drawcall.vertexOffset; + drawParams.firstInstance = drawcall.instanceOffset; + + params.resize(sizeof(drawParams)); + memcpy(params.data(), &drawParams, sizeof(drawParams)); + } + + // ensure the custom buffer is large enough + VkDeviceSize bufLength = params.size() * (drawcall.drawIndex + 1); + + RDCASSERT(bufLength <= m_IndirectBufferSize, bufLength, m_IndirectBufferSize); + + VkBufferMemoryBarrier bufBarrier = { + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + NULL, + VK_ACCESS_INDIRECT_COMMAND_READ_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, + VK_QUEUE_FAMILY_IGNORED, + VK_QUEUE_FAMILY_IGNORED, + Unwrap(m_IndirectBuffer.buf), + m_IndirectBufferSize, + m_IndirectBufferSize, + }; + + // wait for any previous indirect draws to complete before filling/transferring + DoPipelineBarrier(cmd, 1, &bufBarrier); + + // initialise to 0 so all other draws don't draw anything + ObjDisp(cmd)->CmdFillBuffer(Unwrap(cmd), Unwrap(m_IndirectBuffer.buf), m_IndirectBufferSize, + m_IndirectBufferSize, 0); + + // wait for fill to complete before update + bufBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + bufBarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + + DoPipelineBarrier(cmd, 1, &bufBarrier); + + // upload the parameters for the draw we want + ObjDisp(cmd)->CmdUpdateBuffer(Unwrap(cmd), Unwrap(m_IndirectBuffer.buf), + m_IndirectBufferSize + params.size() * drawcall.drawIndex, + params.size(), params.data()); + + // finally wait for copy to complete before drawing from it + bufBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + bufBarrier.dstAccessMask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + + DoPipelineBarrier(cmd, 1, &bufBarrier); + + if(drawcall.flags & DrawFlags::Indexed) + ObjDisp(cmd)->CmdDrawIndexedIndirect(Unwrap(cmd), Unwrap(m_IndirectBuffer.buf), + m_IndirectBufferSize, drawcall.drawIndex + 1, + (uint32_t)params.size()); + else + ObjDisp(cmd)->CmdDrawIndirect(Unwrap(cmd), Unwrap(m_IndirectBuffer.buf), m_IndirectBufferSize, + drawcall.drawIndex + 1, (uint32_t)params.size()); + + VkMarkerRegion::End(cmd); + } +} + #if ENABLED(ENABLE_UNIT_TESTS) #undef None diff --git a/renderdoc/driver/vulkan/vk_core.h b/renderdoc/driver/vulkan/vk_core.h index 4d6f8a7b7..cda4ce24c 100644 --- a/renderdoc/driver/vulkan/vk_core.h +++ b/renderdoc/driver/vulkan/vk_core.h @@ -1030,6 +1030,7 @@ public: } void Shutdown(); void ReplayLog(uint32_t startEventID, uint32_t endEventID, ReplayLogType replayType); + void ReplayDraw(VkCommandBuffer cmd, const DrawcallDescription &drawcall); ReplayStatus ReadLogInitialisation(RDCFile *rdc, bool storeStructuredBuffers); SDFile &GetStructuredFile() { return *m_StructuredFile; } diff --git a/renderdoc/driver/vulkan/vk_overlay.cpp b/renderdoc/driver/vulkan/vk_overlay.cpp index 79fc471b8..fa8861fe7 100644 --- a/renderdoc/driver/vulkan/vk_overlay.cpp +++ b/renderdoc/driver/vulkan/vk_overlay.cpp @@ -1036,8 +1036,11 @@ ResourceId VulkanReplay::RenderOverlay(ResourceId texid, FloatVector clearCol, D // do single draw m_pDriver->m_RenderState.BeginRenderPassAndApplyState(m_pDriver, cmd, VulkanRenderState::BindGraphics); - ObjDisp(cmd)->CmdDrawIndexed(Unwrap(cmd), patchedIndexCount, mainDraw->numInstances, 0, 0, - mainDraw->instanceOffset); + DrawcallDescription draw = *mainDraw; + draw.numIndices = patchedIndexCount; + draw.baseVertex = 0; + draw.indexOffset = 0; + m_pDriver->ReplayDraw(cmd, draw); m_pDriver->m_RenderState.EndRenderPass(cmd); vkr = ObjDisp(cmd)->EndCommandBuffer(Unwrap(cmd)); diff --git a/renderdoc/driver/vulkan/vk_pixelhistory.cpp b/renderdoc/driver/vulkan/vk_pixelhistory.cpp index 9e5d1a07b..6e66fbcbe 100644 --- a/renderdoc/driver/vulkan/vk_pixelhistory.cpp +++ b/renderdoc/driver/vulkan/vk_pixelhistory.cpp @@ -1177,13 +1177,7 @@ private: uint32_t occlIndex = (uint32_t)m_OcclusionQueries.size(); ObjDisp(cmd)->CmdBeginQuery(Unwrap(cmd), m_OcclusionPool, occlIndex, m_QueryFlags); - if(drawcall->flags & DrawFlags::Indexed) - ObjDisp(cmd)->CmdDrawIndexed(Unwrap(cmd), drawcall->numIndices, drawcall->numInstances, - drawcall->indexOffset, drawcall->baseVertex, - drawcall->instanceOffset); - else - ObjDisp(cmd)->CmdDraw(Unwrap(cmd), drawcall->numIndices, drawcall->numInstances, - drawcall->vertexOffset, drawcall->instanceOffset); + m_pDriver->ReplayDraw(cmd, *drawcall); ObjDisp(cmd)->CmdEndQuery(Unwrap(cmd), m_OcclusionPool, occlIndex); m_OcclusionQueries.insert(std::make_pair(eventId, occlIndex)); @@ -1602,13 +1596,7 @@ private: } const DrawcallDescription *drawcall = m_pDriver->GetDrawcall(eventId); - if(drawcall->flags & DrawFlags::Indexed) - ObjDisp(cmd)->CmdDrawIndexed(Unwrap(cmd), drawcall->numIndices, drawcall->numInstances, - drawcall->indexOffset, drawcall->baseVertex, - drawcall->instanceOffset); - else - ObjDisp(cmd)->CmdDraw(Unwrap(cmd), drawcall->numIndices, drawcall->numInstances, - drawcall->vertexOffset, drawcall->instanceOffset); + m_pDriver->ReplayDraw(cmd, *drawcall); m_pDriver->GetCmdRenderState().EndRenderPass(cmd); } @@ -2141,13 +2129,7 @@ private: ObjDisp(cmd)->CmdBeginQuery(Unwrap(cmd), m_OcclusionPool, index, m_QueryFlags); const DrawcallDescription *drawcall = m_pDriver->GetDrawcall(eventId); - if(drawcall->flags & DrawFlags::Indexed) - ObjDisp(cmd)->CmdDrawIndexed(Unwrap(cmd), drawcall->numIndices, drawcall->numInstances, - drawcall->indexOffset, drawcall->baseVertex, - drawcall->instanceOffset); - else - ObjDisp(cmd)->CmdDraw(Unwrap(cmd), drawcall->numIndices, drawcall->numInstances, - drawcall->vertexOffset, drawcall->instanceOffset); + m_pDriver->ReplayDraw(cmd, *drawcall); ObjDisp(cmd)->CmdEndQuery(Unwrap(cmd), m_OcclusionPool, index); } @@ -2368,13 +2350,7 @@ struct VulkanPixelHistoryPerFragmentCallback : VulkanPixelHistoryCallback ObjDisp(cmd)->CmdSetStencilWriteMask(Unwrap(cmd), VK_STENCIL_FACE_FRONT_AND_BACK, 0xff); ObjDisp(cmd)->CmdSetStencilReference(Unwrap(cmd), VK_STENCIL_FACE_FRONT_AND_BACK, f); const DrawcallDescription *drawcall = m_pDriver->GetDrawcall(eid); - if(drawcall->flags & DrawFlags::Indexed) - ObjDisp(cmd)->CmdDrawIndexed(Unwrap(cmd), drawcall->numIndices, drawcall->numInstances, - drawcall->indexOffset, drawcall->baseVertex, - drawcall->instanceOffset); - else - ObjDisp(cmd)->CmdDraw(Unwrap(cmd), drawcall->numIndices, drawcall->numInstances, - drawcall->vertexOffset, drawcall->instanceOffset); + m_pDriver->ReplayDraw(cmd, *drawcall); state.EndRenderPass(cmd); if(i == 1) @@ -2465,13 +2441,7 @@ struct VulkanPixelHistoryPerFragmentCallback : VulkanPixelHistoryCallback ObjDisp(cmd)->CmdSetStencilWriteMask(Unwrap(cmd), VK_STENCIL_FACE_FRONT_AND_BACK, 0xff); ObjDisp(cmd)->CmdSetStencilReference(Unwrap(cmd), VK_STENCIL_FACE_FRONT_AND_BACK, f); const DrawcallDescription *drawcall = m_pDriver->GetDrawcall(eid); - if(drawcall->flags & DrawFlags::Indexed) - ObjDisp(cmd)->CmdDrawIndexed(Unwrap(cmd), drawcall->numIndices, drawcall->numInstances, - drawcall->indexOffset, drawcall->baseVertex, - drawcall->instanceOffset); - else - ObjDisp(cmd)->CmdDraw(Unwrap(cmd), drawcall->numIndices, drawcall->numInstances, - drawcall->vertexOffset, drawcall->instanceOffset); + m_pDriver->ReplayDraw(cmd, *drawcall); state.EndRenderPass(cmd); CopyImagePixel(cmd, colourCopyParams, (fragsProcessed + f) * sizeof(PerFragmentInfo) + @@ -2736,20 +2706,15 @@ struct VulkanPixelHistoryDiscardedFragmentsCallback : VulkanPixelHistoryCallback ObjDisp(cmd)->CmdBeginQuery(Unwrap(cmd), m_OcclusionPool, queryId, m_QueryFlags); const DrawcallDescription *drawcall = m_pDriver->GetDrawcall(eid); uint32_t primId = primIds[i]; + DrawcallDescription draw = *drawcall; + draw.numIndices = RENDERDOC_NumVerticesPerPrimitive(drawcall->topology); + draw.indexOffset += RENDERDOC_VertexOffset(drawcall->topology, primId); + draw.vertexOffset += RENDERDOC_VertexOffset(drawcall->topology, primId); // TODO once pixel history distinguishes between instances, draw only the instance for - // this fragment - if(drawcall->flags & DrawFlags::Indexed) - ObjDisp(cmd)->CmdDrawIndexed( - Unwrap(cmd), RENDERDOC_NumVerticesPerPrimitive(drawcall->topology), - RDCMAX(1U, drawcall->numInstances), - drawcall->indexOffset + RENDERDOC_VertexOffset(drawcall->topology, primId), - drawcall->baseVertex, drawcall->instanceOffset); - else - ObjDisp(cmd)->CmdDraw( - Unwrap(cmd), RENDERDOC_NumVerticesPerPrimitive(drawcall->topology), - RDCMAX(1U, drawcall->numInstances), - drawcall->vertexOffset + RENDERDOC_VertexOffset(drawcall->topology, primId), - drawcall->instanceOffset); + // this fragment. + // TODO replay with a dummy index buffer so that all primitives other than the target one are + // degenerate - that way the vertex index etc is still the same as it should be. + m_pDriver->ReplayDraw(cmd, draw); ObjDisp(cmd)->CmdEndQuery(Unwrap(cmd), m_OcclusionPool, queryId); m_OcclusionIndices[make_rdcpair(eid, primId)] = queryId; diff --git a/renderdoc/driver/vulkan/vk_postvs.cpp b/renderdoc/driver/vulkan/vk_postvs.cpp index ac71716e2..2b26086df 100644 --- a/renderdoc/driver/vulkan/vk_postvs.cpp +++ b/renderdoc/driver/vulkan/vk_postvs.cpp @@ -2676,17 +2676,7 @@ void VulkanReplay::FetchTessGSOut(uint32_t eventId, VulkanRenderState &state) ObjDisp(cmd)->CmdBeginTransformFeedbackEXT(Unwrap(cmd), 0, 1, NULL, NULL); - if(drawcall->flags & DrawFlags::Indexed) - { - ObjDisp(cmd)->CmdDrawIndexed(Unwrap(cmd), drawcall->numIndices, drawcall->numInstances, - drawcall->indexOffset, drawcall->baseVertex, - drawcall->instanceOffset); - } - else - { - ObjDisp(cmd)->CmdDraw(Unwrap(cmd), drawcall->numIndices, drawcall->numInstances, - drawcall->vertexOffset, drawcall->instanceOffset); - } + m_pDriver->ReplayDraw(cmd, *drawcall); ObjDisp(cmd)->CmdEndTransformFeedbackEXT(Unwrap(cmd), 0, 1, NULL, NULL); @@ -2731,6 +2721,8 @@ void VulkanReplay::FetchTessGSOut(uint32_t eventId, VulkanRenderState &state) state.BeginRenderPassAndApplyState(m_pDriver, cmd, VulkanRenderState::BindGraphics); + DrawcallDescription draw = *drawcall; + // do incremental draws to get the output size. We have to do this O(N^2) style because // there's no way to replay only a single instance. We have to replay 1, 2, 3, ... N // instances and count the total number of verts each time, then we can see from the @@ -2745,16 +2737,8 @@ void VulkanReplay::FetchTessGSOut(uint32_t eventId, VulkanRenderState &state) ObjDisp(cmd)->CmdBeginTransformFeedbackEXT(Unwrap(cmd), 0, 1, NULL, NULL); - if(drawcall->flags & DrawFlags::Indexed) - { - ObjDisp(cmd)->CmdDrawIndexed(Unwrap(cmd), drawcall->numIndices, inst, drawcall->indexOffset, - drawcall->baseVertex, drawcall->instanceOffset); - } - else - { - ObjDisp(cmd)->CmdDraw(Unwrap(cmd), drawcall->numIndices, inst, drawcall->vertexOffset, - drawcall->instanceOffset); - } + draw.numInstances = inst; + m_pDriver->ReplayDraw(cmd, draw); ObjDisp(cmd)->CmdEndTransformFeedbackEXT(Unwrap(cmd), 0, 1, NULL, NULL); diff --git a/renderdoc/driver/vulkan/vk_shaderdebug.cpp b/renderdoc/driver/vulkan/vk_shaderdebug.cpp index 0ed450759..90ced18e8 100644 --- a/renderdoc/driver/vulkan/vk_shaderdebug.cpp +++ b/renderdoc/driver/vulkan/vk_shaderdebug.cpp @@ -4084,16 +4084,7 @@ ShaderDebugTrace *VulkanReplay::DebugPixel(uint32_t eventId, uint32_t x, uint32_ modifiedstate.BeginRenderPassAndApplyState(m_pDriver, cmd, VulkanRenderState::BindGraphics); - if(draw->flags & DrawFlags::Indexed) - { - ObjDisp(cmd)->CmdDrawIndexed(Unwrap(cmd), draw->numIndices, draw->numInstances, - draw->indexOffset, draw->baseVertex, draw->instanceOffset); - } - else - { - ObjDisp(cmd)->CmdDraw(Unwrap(cmd), draw->numIndices, draw->numInstances, draw->vertexOffset, - draw->instanceOffset); - } + m_pDriver->ReplayDraw(cmd, *draw); modifiedstate.EndRenderPass(cmd); diff --git a/renderdoc/driver/vulkan/wrappers/vk_draw_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_draw_funcs.cpp index 12313726a..595d0762e 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_draw_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_draw_funcs.cpp @@ -73,7 +73,7 @@ VkIndirectPatchData WrappedVulkan::FetchIndirectData(VkIndirectPatchType type, VkBufferMemoryBarrier buf = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, NULL, - VK_ACCESS_INDIRECT_COMMAND_READ_BIT, + VK_ACCESS_INDIRECT_COMMAND_READ_BIT | VK_ACCESS_ALL_WRITE_BITS, VK_ACCESS_TRANSFER_READ_BIT, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, @@ -461,22 +461,68 @@ bool WrappedVulkan::Serialise_vkCmdDrawIndirect(SerialiserType &ser, VkCommandBu // when we have a callback, submit every drawcall individually to the callback if(m_DrawcallCallback && IsDrawInRenderPass()) { + VkMarkerRegion::Begin( + StringFormat::Fmt("Drawcall callback replay (drawCount=%u)", count), commandBuffer); + + // first copy off the buffer segment to our indirect draw buffer + VkBufferMemoryBarrier bufBarrier = { + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + NULL, + VK_ACCESS_INDIRECT_COMMAND_READ_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, + VK_QUEUE_FAMILY_IGNORED, + VK_QUEUE_FAMILY_IGNORED, + Unwrap(buffer), + offset, + (count > 0 ? stride * (count - 1) : 0) + sizeof(VkDrawIndirectCommand), + }; + + DoPipelineBarrier(commandBuffer, 1, &bufBarrier); + VkBufferCopy region = {offset, 0, bufBarrier.size}; + ObjDisp(commandBuffer) + ->CmdCopyBuffer(Unwrap(commandBuffer), Unwrap(buffer), + Unwrap(m_IndirectBuffer.buf), 1, ®ion); + + // wait for the copy to finish + bufBarrier.buffer = Unwrap(m_IndirectBuffer.buf); + bufBarrier.offset = 0; + DoPipelineBarrier(commandBuffer, 1, &bufBarrier); + + bufBarrier.size = sizeof(VkDrawIndirectCommand); + for(uint32_t i = 0; i < count; i++) { uint32_t eventId = HandlePreCallback(commandBuffer, DrawFlags::Drawcall, i + 1); + // draw up to and including i. The previous draws will be nop'd out ObjDisp(commandBuffer) - ->CmdDrawIndirect(Unwrap(commandBuffer), Unwrap(buffer), offset, 1, stride); + ->CmdDrawIndirect(Unwrap(commandBuffer), Unwrap(m_IndirectBuffer.buf), 0, i + 1, + stride); if(eventId && m_DrawcallCallback->PostDraw(eventId, commandBuffer)) { ObjDisp(commandBuffer) - ->CmdDrawIndirect(Unwrap(commandBuffer), Unwrap(buffer), offset, 1, stride); + ->CmdDrawIndirect(Unwrap(commandBuffer), Unwrap(m_IndirectBuffer.buf), 0, + i + 1, stride); m_DrawcallCallback->PostRedraw(eventId, commandBuffer); } - offset += stride; + // now that we're done, nop out this draw so that the next time around we only draw + // the next draw. + bufBarrier.srcAccessMask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + bufBarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + DoPipelineBarrier(commandBuffer, 1, &bufBarrier); + ObjDisp(commandBuffer) + ->CmdFillBuffer(Unwrap(commandBuffer), bufBarrier.buffer, bufBarrier.offset, + bufBarrier.size, 0); + bufBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + bufBarrier.dstAccessMask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + DoPipelineBarrier(commandBuffer, 1, &bufBarrier); + + bufBarrier.offset += stride; } + + VkMarkerRegion::End(commandBuffer); } // To add the multidraw, we made an event N that is the 'parent' marker, then // N+1, N+2, N+3, ... for each of the sub-draws. If the first sub-draw is selected @@ -521,7 +567,7 @@ bool WrappedVulkan::Serialise_vkCmdDrawIndirect(SerialiserType &ser, VkCommandBu VK_QUEUE_FAMILY_IGNORED, Unwrap(m_IndirectBuffer.buf), 0, - VK_WHOLE_SIZE, + m_IndirectBufferSize, }; VkCommandBufferBeginInfo beginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, @@ -601,8 +647,8 @@ bool WrappedVulkan::Serialise_vkCmdDrawIndirect(SerialiserType &ser, VkCommandBu // add on the size we'll need for an indirect buffer in the worst case. // Note that we'll only ever be partially replaying one draw at a time, so we only need the // worst case. - m_IndirectBufferSize = RDCMAX(m_IndirectBufferSize, sizeof(VkDrawIndirectCommand) + - (count > 0 ? count - 1 : 0) * stride); + m_IndirectBufferSize = + RDCMAX(m_IndirectBufferSize, sizeof(VkDrawIndirectCommand) + count * stride); rdcstr name = "vkCmdDrawIndirect"; @@ -902,7 +948,7 @@ bool WrappedVulkan::Serialise_vkCmdDrawIndexedIndirect(SerialiserType &ser, VK_QUEUE_FAMILY_IGNORED, Unwrap(m_IndirectBuffer.buf), 0, - VK_WHOLE_SIZE, + m_IndirectBufferSize, }; VkCommandBufferBeginInfo beginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, @@ -984,8 +1030,8 @@ bool WrappedVulkan::Serialise_vkCmdDrawIndexedIndirect(SerialiserType &ser, // add on the size we'll need for an indirect buffer in the worst case. // Note that we'll only ever be partially replaying one draw at a time, so we only need the // worst case. - m_IndirectBufferSize = RDCMAX(m_IndirectBufferSize, sizeof(VkDrawIndexedIndirectCommand) + - (count > 0 ? count - 1 : 0) * stride); + m_IndirectBufferSize = + RDCMAX(m_IndirectBufferSize, sizeof(VkDrawIndexedIndirectCommand) + count * stride); rdcstr name = "vkCmdDrawIndexedIndirect"; @@ -2775,7 +2821,7 @@ bool WrappedVulkan::Serialise_vkCmdDrawIndirectCount(SerialiserType &ser, VK_QUEUE_FAMILY_IGNORED, Unwrap(m_IndirectBuffer.buf), 0, - VK_WHOLE_SIZE, + m_IndirectBufferSize, }; VkCommandBufferBeginInfo beginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, NULL, @@ -3029,23 +3075,68 @@ bool WrappedVulkan::Serialise_vkCmdDrawIndexedIndirectCount( // when we have a callback, submit every drawcall individually to the callback if(m_DrawcallCallback && IsDrawInRenderPass()) { + VkMarkerRegion::Begin( + StringFormat::Fmt("Drawcall callback replay (drawCount=%u)", count), commandBuffer); + + // first copy off the buffer segment to our indirect draw buffer + VkBufferMemoryBarrier bufBarrier = { + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + NULL, + VK_ACCESS_INDIRECT_COMMAND_READ_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, + VK_QUEUE_FAMILY_IGNORED, + VK_QUEUE_FAMILY_IGNORED, + Unwrap(buffer), + offset, + (count > 0 ? stride * (count - 1) : 0) + sizeof(VkDrawIndirectCommand), + }; + + DoPipelineBarrier(commandBuffer, 1, &bufBarrier); + VkBufferCopy region = {offset, 0, bufBarrier.size}; + ObjDisp(commandBuffer) + ->CmdCopyBuffer(Unwrap(commandBuffer), Unwrap(buffer), Unwrap(m_IndirectBuffer.buf), + 1, ®ion); + + // wait for the copy to finish + bufBarrier.buffer = Unwrap(m_IndirectBuffer.buf); + bufBarrier.offset = 0; + DoPipelineBarrier(commandBuffer, 1, &bufBarrier); + + bufBarrier.size = sizeof(VkDrawIndexedIndirectCommand); + for(uint32_t i = 0; i < count; i++) { uint32_t eventId = HandlePreCallback(commandBuffer, DrawFlags::Drawcall, i + 1); + // draw up to and including i. The previous draws will be nop'd out ObjDisp(commandBuffer) - ->CmdDrawIndexedIndirect(Unwrap(commandBuffer), Unwrap(buffer), offset, 1, stride); + ->CmdDrawIndexedIndirect(Unwrap(commandBuffer), Unwrap(m_IndirectBuffer.buf), 0, + i + 1, stride); if(eventId && m_DrawcallCallback->PostDraw(eventId, commandBuffer)) { ObjDisp(commandBuffer) - ->CmdDrawIndexedIndirect(Unwrap(commandBuffer), Unwrap(buffer), offset, 1, - stride); + ->CmdDrawIndexedIndirect(Unwrap(commandBuffer), Unwrap(m_IndirectBuffer.buf), 0, + i + 1, stride); m_DrawcallCallback->PostRedraw(eventId, commandBuffer); } - offset += stride; + // now that we're done, nop out this draw so that the next time around we only draw + // the next draw. + bufBarrier.srcAccessMask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + bufBarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + DoPipelineBarrier(commandBuffer, 1, &bufBarrier); + ObjDisp(commandBuffer) + ->CmdFillBuffer(Unwrap(commandBuffer), bufBarrier.buffer, bufBarrier.offset, + bufBarrier.size, 0); + bufBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + bufBarrier.dstAccessMask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT; + DoPipelineBarrier(commandBuffer, 1, &bufBarrier); + + bufBarrier.offset += stride; } + + VkMarkerRegion::End(commandBuffer); } // To add the multidraw, we made an event N that is the 'parent' marker, then // N+1, N+2, N+3, ... for each of the sub-draws. If the first sub-draw is selected @@ -3090,7 +3181,7 @@ bool WrappedVulkan::Serialise_vkCmdDrawIndexedIndirectCount( VK_QUEUE_FAMILY_IGNORED, Unwrap(m_IndirectBuffer.buf), 0, - VK_WHOLE_SIZE, + m_IndirectBufferSize, }; VkCommandBufferBeginInfo beginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, NULL,