diff --git a/docs/behind_scenes/vulkan_support.rst b/docs/behind_scenes/vulkan_support.rst index 8d8e41112..db4f2e1aa 100644 --- a/docs/behind_scenes/vulkan_support.rst +++ b/docs/behind_scenes/vulkan_support.rst @@ -38,7 +38,6 @@ RenderDoc has support for Vulkan version 1.1, as well as a number of extensions, * Sparse textures are only supported for non-arrayed 2D textures with no mips. * Pixel history is not implemented. * Shader debugging is not currently supported. -* Mesh output of geometry/tessellation shader stages is not available. Android ------- diff --git a/qrenderdoc/Code/pyrenderdoc/renderdoc.i b/qrenderdoc/Code/pyrenderdoc/renderdoc.i index 2a46ab254..fd5c0a437 100644 --- a/qrenderdoc/Code/pyrenderdoc/renderdoc.i +++ b/qrenderdoc/Code/pyrenderdoc/renderdoc.i @@ -291,6 +291,7 @@ TEMPLATE_NAMESPACE_ARRAY_INSTANTIATE(rdcarray, VKPipe, DescriptorSet) TEMPLATE_NAMESPACE_ARRAY_INSTANTIATE(rdcarray, VKPipe, ImageData) TEMPLATE_NAMESPACE_ARRAY_INSTANTIATE(rdcarray, VKPipe, ImageLayout) TEMPLATE_NAMESPACE_ARRAY_INSTANTIATE(rdcarray, VKPipe, SpecializationConstant) +TEMPLATE_NAMESPACE_ARRAY_INSTANTIATE(rdcarray, VKPipe, XFBBuffer) TEMPLATE_NAMESPACE_ARRAY_INSTANTIATE(rdcarray, VKPipe, VertexBuffer) TEMPLATE_NAMESPACE_ARRAY_INSTANTIATE(rdcarray, VKPipe, VertexAttribute) TEMPLATE_NAMESPACE_ARRAY_INSTANTIATE(rdcarray, VKPipe, VertexBinding) diff --git a/qrenderdoc/Windows/BufferViewer.cpp b/qrenderdoc/Windows/BufferViewer.cpp index 88b435148..c805e0a7c 100644 --- a/qrenderdoc/Windows/BufferViewer.cpp +++ b/qrenderdoc/Windows/BufferViewer.cpp @@ -2449,7 +2449,7 @@ void BufferViewer::configureMeshColumns() uint numComps = sig.format.compCount; uint elemSize = sig.format.compType == CompType::Double ? 8U : 4U; - if(m_Ctx.CurPipelineState().HasAlignedPostVSData()) + if(m_Ctx.CurPipelineState().HasAlignedPostVSData(MeshDataStage::VSOut)) { if(numComps == 2) offset = AlignUp(offset, 2U * elemSize); @@ -2516,7 +2516,7 @@ void BufferViewer::configureMeshColumns() uint numComps = sig.format.compCount; uint elemSize = sig.format.compType == CompType::Double ? 8U : 4U; - if(m_Ctx.CurPipelineState().HasAlignedPostVSData()) + if(m_Ctx.CurPipelineState().HasAlignedPostVSData(MeshDataStage::GSOut)) { if(numComps == 2) offset = AlignUp(offset, 2U * elemSize); diff --git a/qrenderdoc/Windows/PipelineState/VulkanPipelineStateViewer.cpp b/qrenderdoc/Windows/PipelineState/VulkanPipelineStateViewer.cpp index 2ebc9b78d..0845d364b 100644 --- a/qrenderdoc/Windows/PipelineState/VulkanPipelineStateViewer.cpp +++ b/qrenderdoc/Windows/PipelineState/VulkanPipelineStateViewer.cpp @@ -149,6 +149,9 @@ VulkanPipelineStateViewer::VulkanPipelineStateViewer(ICaptureContext &ctx, QObject::connect(ui->viBuffers, &RDTreeWidget::leave, this, &VulkanPipelineStateViewer::vertex_leave); + QObject::connect(ui->xfbBuffers, &RDTreeWidget::itemActivated, this, + &VulkanPipelineStateViewer::resource_itemActivated); + QObject::connect(ui->fbAttach, &RDTreeWidget::itemActivated, this, &VulkanPipelineStateViewer::resource_itemActivated); @@ -219,6 +222,21 @@ VulkanPipelineStateViewer::VulkanPipelineStateViewer(ICaptureContext &ctx, ubo->setInstantTooltips(true); } + { + RDHeaderView *header = new RDHeaderView(Qt::Horizontal, this); + ui->xfbBuffers->setHeader(header); + + ui->xfbBuffers->setColumns({tr("Slot"), tr("Active"), tr("Data Buffer"), tr("Byte Offset"), + tr("Byte Length"), tr("Written Count Buffer"), + tr("Written Count Offset"), tr("Go")}); + header->setColumnStretchHints({1, 1, 4, 2, 3, 4, 2, -1}); + header->setMinimumSectionSize(40); + + ui->xfbBuffers->setHoverIconColumn(7, action, action_hover); + ui->xfbBuffers->setClearSelectionOnFocusLoss(true); + ui->xfbBuffers->setInstantTooltips(true); + } + { RDHeaderView *header = new RDHeaderView(Qt::Horizontal, this); ui->viewports->setHeader(header); @@ -331,6 +349,7 @@ VulkanPipelineStateViewer::VulkanPipelineStateViewer(ICaptureContext &ctx, ui->csShader->setFont(Formatter::PreferredFont()); ui->csResources->setFont(Formatter::PreferredFont()); ui->csUBOs->setFont(Formatter::PreferredFont()); + ui->xfbBuffers->setFont(Formatter::PreferredFont()); ui->viewports->setFont(Formatter::PreferredFont()); ui->scissors->setFont(Formatter::PreferredFont()); ui->renderpass->setFont(Formatter::PreferredFont()); @@ -1818,6 +1837,51 @@ void VulkanPipelineStateViewer::setState() m_Common.SetupShaderEditButton(b, pipe, stage->resourceId, shaderDetails); } + bool xfbSet = false; + vs = ui->xfbBuffers->verticalScrollBar()->value(); + ui->xfbBuffers->beginUpdate(); + ui->xfbBuffers->clear(); + for(int i = 0; i < state.transformFeedback.buffers.count(); i++) + { + const VKPipe::XFBBuffer &s = state.transformFeedback.buffers[i]; + + bool filledSlot = (s.bufferResourceId != ResourceId()); + bool usedSlot = (s.active); + + if(showNode(usedSlot, filledSlot)) + { + qulonglong length = s.byteSize; + + BufferDescription *buf = m_Ctx.GetBuffer(s.bufferResourceId); + + if(buf && length == UINT64_MAX) + length = buf->length - s.byteOffset; + + RDTreeWidgetItem *node = new RDTreeWidgetItem( + {i, s.active ? tr("Active") : tr("Inactive"), s.bufferResourceId, (qulonglong)s.byteOffset, + length, s.counterBufferResourceId, (qulonglong)s.counterBufferOffset, QString()}); + + node->setTag(QVariant::fromValue( + VulkanBufferTag(false, ~0U, s.bufferResourceId, s.byteOffset, length))); + + if(!filledSlot) + setEmptyRow(node); + + if(!usedSlot) + setInactiveRow(node); + + xfbSet = true; + + ui->xfbBuffers->addTopLevelItem(node); + } + } + ui->xfbBuffers->verticalScrollBar()->setValue(vs); + ui->xfbBuffers->clearSelection(); + ui->xfbBuffers->endUpdate(); + + ui->xfbBuffers->setVisible(xfbSet); + ui->xfbGroup->setVisible(xfbSet); + //////////////////////////////////////////////// // Rasterizer @@ -2145,9 +2209,20 @@ void VulkanPipelineStateViewer::setState() } else { + bool xfbActive = !state.transformFeedback.buffers.isEmpty(); + + if(state.geometryShader.resourceId == ResourceId() && xfbActive) + { + ui->pipeFlow->setStageName(4, lit("XFB"), tr("Transform Feedback")); + } + else + { + ui->pipeFlow->setStageName(4, lit("GS"), tr("Geometry Shader")); + } + ui->pipeFlow->setStagesEnabled({true, true, state.tessControlShader.resourceId != ResourceId(), state.tessEvalShader.resourceId != ResourceId(), - state.geometryShader.resourceId != ResourceId(), true, + state.geometryShader.resourceId != ResourceId() || xfbActive, true, state.fragmentShader.resourceId != ResourceId(), true, false}); } } @@ -2229,44 +2304,51 @@ void VulkanPipelineStateViewer::resource_itemActivated(RDTreeWidgetItem *item, i { VulkanBufferTag buf = tag.value(); - const ShaderResource &shaderRes = buf.rwRes - ? stage->reflection->readWriteResources[buf.bindPoint] - : stage->reflection->readOnlyResources[buf.bindPoint]; + QString format; - QString format = lit("// struct %1\n").arg(shaderRes.variableType.descriptor.name); - - if(shaderRes.variableType.members.count() > 1) + if(stage->reflection && + buf.bindPoint < (buf.rwRes ? stage->reflection->readWriteResources.size() + : stage->reflection->readOnlyResources.size())) { - format += lit("// members skipped as they are fixed size:\n"); - for(int i = 0; i < shaderRes.variableType.members.count() - 1; i++) - format += QFormatStr("// %1 %2;\n") - .arg(shaderRes.variableType.members[i].type.descriptor.name) - .arg(shaderRes.variableType.members[i].name); - } + const ShaderResource &shaderRes = buf.rwRes + ? stage->reflection->readWriteResources[buf.bindPoint] + : stage->reflection->readOnlyResources[buf.bindPoint]; - if(!shaderRes.variableType.members.isEmpty()) - { - format += lit("{\n") + formatMembers(1, QString(), shaderRes.variableType.members) + lit("}"); - } - else - { - const auto &desc = shaderRes.variableType.descriptor; + format = lit("// struct %1\n").arg(shaderRes.variableType.descriptor.name); - format = QString(); - if(desc.rowMajorStorage) - format += lit("row_major "); + if(shaderRes.variableType.members.count() > 1) + { + format += lit("// members skipped as they are fixed size:\n"); + for(int i = 0; i < shaderRes.variableType.members.count() - 1; i++) + format += QFormatStr("// %1 %2;\n") + .arg(shaderRes.variableType.members[i].type.descriptor.name) + .arg(shaderRes.variableType.members[i].name); + } - format += ToQStr(desc.type); - if(desc.rows > 1 && desc.columns > 1) - format += QFormatStr("%1x%2").arg(desc.rows).arg(desc.columns); - else if(desc.columns > 1) - format += QString::number(desc.columns); + if(!shaderRes.variableType.members.isEmpty()) + { + format += lit("{\n") + formatMembers(1, QString(), shaderRes.variableType.members) + lit("}"); + } + else + { + const auto &desc = shaderRes.variableType.descriptor; - if(!desc.name.isEmpty()) - format += lit(" ") + desc.name; + format = QString(); + if(desc.rowMajorStorage) + format += lit("row_major "); - if(desc.elements > 1) - format += QFormatStr("[%1]").arg(desc.elements); + format += ToQStr(desc.type); + if(desc.rows > 1 && desc.columns > 1) + format += QFormatStr("%1x%2").arg(desc.rows).arg(desc.columns); + else if(desc.columns > 1) + format += QString::number(desc.columns); + + if(!desc.name.isEmpty()) + format += lit(" ") + desc.name; + + if(desc.elements > 1) + format += QFormatStr("[%1]").arg(desc.elements); + } } if(buf.ID != ResourceId()) @@ -2914,6 +2996,50 @@ void VulkanPipelineStateViewer::exportHTML(QXmlStreamWriter &xml, const VKPipe:: } } +void VulkanPipelineStateViewer::exportHTML(QXmlStreamWriter &xml, const VKPipe::TransformFeedback &xfb) +{ + { + xml.writeStartElement(lit("h3")); + xml.writeCharacters(tr("Transform Feedback Bindings")); + xml.writeEndElement(); + + QList rows; + + int i = 0; + for(const VKPipe::XFBBuffer &b : xfb.buffers) + { + QString name = m_Ctx.GetResourceName(b.bufferResourceId); + uint64_t length = b.byteSize; + QString counterName = m_Ctx.GetResourceName(b.counterBufferResourceId); + + if(b.bufferResourceId == ResourceId()) + { + name = tr("Empty"); + } + else + { + BufferDescription *buf = m_Ctx.GetBuffer(b.bufferResourceId); + if(buf && length == UINT64_MAX) + length = buf->length - b.byteOffset; + } + + if(b.counterBufferResourceId == ResourceId()) + { + counterName = tr("Empty"); + } + + rows.push_back({i, name, (qulonglong)b.byteOffset, (qulonglong)length, counterName, + (qulonglong)b.counterBufferOffset}); + + i++; + } + + m_Common.exportHTMLTable(xml, {tr("Slot"), tr("Buffer"), tr("Byte Offset"), tr("Byte Length"), + tr("Counter Buffer"), tr("Counter Offset")}, + rows); + } +} + void VulkanPipelineStateViewer::exportHTML(QXmlStreamWriter &xml, const VKPipe::Rasterizer &rs) { { @@ -3248,7 +3374,10 @@ void VulkanPipelineStateViewer::on_exportHTML_clicked() case 1: exportHTML(xml, m_Ctx.CurVulkanPipelineState()->vertexShader); break; case 2: exportHTML(xml, m_Ctx.CurVulkanPipelineState()->tessControlShader); break; case 3: exportHTML(xml, m_Ctx.CurVulkanPipelineState()->tessEvalShader); break; - case 4: exportHTML(xml, m_Ctx.CurVulkanPipelineState()->geometryShader); break; + case 4: + exportHTML(xml, m_Ctx.CurVulkanPipelineState()->geometryShader); + exportHTML(xml, m_Ctx.CurVulkanPipelineState()->transformFeedback); + break; case 5: exportHTML(xml, m_Ctx.CurVulkanPipelineState()->rasterizer); break; case 6: exportHTML(xml, m_Ctx.CurVulkanPipelineState()->fragmentShader); break; case 7: diff --git a/qrenderdoc/Windows/PipelineState/VulkanPipelineStateViewer.h b/qrenderdoc/Windows/PipelineState/VulkanPipelineStateViewer.h index 751550bca..cef7877a8 100644 --- a/qrenderdoc/Windows/PipelineState/VulkanPipelineStateViewer.h +++ b/qrenderdoc/Windows/PipelineState/VulkanPipelineStateViewer.h @@ -117,6 +117,7 @@ private: void exportHTML(QXmlStreamWriter &xml, const VKPipe::VertexInput &vi); void exportHTML(QXmlStreamWriter &xml, const VKPipe::InputAssembly &ia); void exportHTML(QXmlStreamWriter &xml, const VKPipe::Shader &sh); + void exportHTML(QXmlStreamWriter &xml, const VKPipe::TransformFeedback &rs); void exportHTML(QXmlStreamWriter &xml, const VKPipe::Rasterizer &rs); void exportHTML(QXmlStreamWriter &xml, const VKPipe::ColorBlendState &cb); void exportHTML(QXmlStreamWriter &xml, const VKPipe::DepthStencil &ds); diff --git a/qrenderdoc/Windows/PipelineState/VulkanPipelineStateViewer.ui b/qrenderdoc/Windows/PipelineState/VulkanPipelineStateViewer.ui index 3a5842e97..34d15d95e 100644 --- a/qrenderdoc/Windows/PipelineState/VulkanPipelineStateViewer.ui +++ b/qrenderdoc/Windows/PipelineState/VulkanPipelineStateViewer.ui @@ -545,7 +545,7 @@ :/page_white_edit.png:/page_white_edit.png - + QToolButton::MenuButtonPopup @@ -1585,6 +1585,61 @@ + + + + + 0 + 1 + + + + Transform Feedback + + + + 2 + + + 2 + + + 2 + + + 2 + + + + + QFrame::Box + + + QFrame::Plain + + + QAbstractItemView::NoEditTriggers + + + false + + + 0 + + + false + + + true + + + false + + + + + + diff --git a/renderdoc/api/replay/pipestate.h b/renderdoc/api/replay/pipestate.h index 26993938b..265168ea8 100644 --- a/renderdoc/api/replay/pipestate.h +++ b/renderdoc/api/replay/pipestate.h @@ -153,10 +153,14 @@ vectors not crossing ``float4`` boundaries). APIs that use stream-out or transfo tightly packed data, but APIs that rewrite shaders to dump data might have these alignment requirements. +:param MeshDataStage stage: The mesh data stage for the output data. :return: A boolean indicating if post-VS data is aligned. :rtype: ``bool`` )"); - bool HasAlignedPostVSData() const { return IsCaptureLoaded() && IsCaptureVK(); } + bool HasAlignedPostVSData(MeshDataStage stage) const + { + return IsCaptureLoaded() && IsCaptureVK() && stage == MeshDataStage::VSOut; + } DOCUMENT(R"(For APIs that have explicit barriers, retrieves the current layout of a resource. :return: The name of the current resource layout. diff --git a/renderdoc/api/replay/vk_pipestate.h b/renderdoc/api/replay/vk_pipestate.h index 79a8abe75..5418db076 100644 --- a/renderdoc/api/replay/vk_pipestate.h +++ b/renderdoc/api/replay/vk_pipestate.h @@ -483,6 +483,67 @@ struct Tessellation bool domainOriginUpperLeft = true; }; +DOCUMENT("Describes a single transform feedback binding."); +struct XFBBuffer +{ + DOCUMENT(""); + XFBBuffer() = default; + XFBBuffer(const XFBBuffer &) = default; + + bool operator==(const XFBBuffer &o) const + { + return active == o.active && bufferResourceId == o.bufferResourceId && + byteOffset == o.byteOffset && byteSize == o.byteSize && + counterBufferResourceId == o.counterBufferResourceId && + counterBufferOffset == o.counterBufferOffset; + } + bool operator<(const XFBBuffer &o) const + { + if(!(active == o.active)) + return active < o.active; + if(!(bufferResourceId == o.bufferResourceId)) + return bufferResourceId < o.bufferResourceId; + if(!(byteOffset == o.byteOffset)) + return byteOffset < o.byteOffset; + if(!(byteSize == o.byteSize)) + return byteSize < o.byteSize; + if(!(counterBufferResourceId == o.counterBufferResourceId)) + return counterBufferResourceId < o.counterBufferResourceId; + if(!(counterBufferOffset == o.counterBufferOffset)) + return counterBufferOffset < o.counterBufferOffset; + return false; + } + + DOCUMENT("A flag indicating if this buffer is active or not."); + bool active = false; + + DOCUMENT("The :class:`ResourceId` of the bound data buffer."); + ResourceId bufferResourceId; + + DOCUMENT("The offset in bytes to the start of the data in the :data:`bufferResourceId`."); + uint64_t byteOffset = 0; + + DOCUMENT("The size in bytes of the data buffer."); + uint64_t byteSize = 0; + + DOCUMENT("The :class:`ResourceId` of the buffer storing the counter value (if set)."); + ResourceId counterBufferResourceId; + + DOCUMENT("The offset in bytes to the counter in the :data:`counterBufferResourceId`."); + uint64_t counterBufferOffset = 0; +}; + +DOCUMENT("Describes the state of the fixed-function transform feedback."); +struct TransformFeedback +{ + DOCUMENT(""); + TransformFeedback() = default; + TransformFeedback(const TransformFeedback &) = default; + + DOCUMENT("The bound transform feedback buffers."); + rdcarray buffers; +}; + DOCUMENT("Describes a combined viewport and scissor region."); struct ViewportScissor { @@ -864,6 +925,9 @@ struct State DOCUMENT("A :class:`VKTessellation` describing the tessellation stage."); Tessellation tessellation; + DOCUMENT("A :class:`VKTransformFeedback` describing the tessellation stage."); + TransformFeedback transformFeedback; + DOCUMENT("A :class:`VKViewState` describing the viewport setup."); ViewState viewportScissor; DOCUMENT("A :class:`VKRasterizer` describing rasterization."); @@ -898,6 +962,8 @@ DECLARE_REFLECTION_STRUCT(VKPipe::VertexInput); DECLARE_REFLECTION_STRUCT(VKPipe::SpecializationConstant); DECLARE_REFLECTION_STRUCT(VKPipe::Shader); DECLARE_REFLECTION_STRUCT(VKPipe::Tessellation); +DECLARE_REFLECTION_STRUCT(VKPipe::XFBBuffer); +DECLARE_REFLECTION_STRUCT(VKPipe::TransformFeedback); DECLARE_REFLECTION_STRUCT(VKPipe::ViewportScissor); DECLARE_REFLECTION_STRUCT(VKPipe::ViewState); DECLARE_REFLECTION_STRUCT(VKPipe::Rasterizer); diff --git a/renderdoc/driver/shaders/spirv/spirv_common.h b/renderdoc/driver/shaders/spirv/spirv_common.h index 7daebd1b1..56e7aea51 100644 --- a/renderdoc/driver/shaders/spirv/spirv_common.h +++ b/renderdoc/driver/shaders/spirv/spirv_common.h @@ -84,6 +84,9 @@ struct SPIRVPatchData // ID of the base variable uint32_t ID; + // ID of the struct parent of this variable + uint32_t structID; + // the access chain of indices std::vector accessChain; @@ -95,6 +98,9 @@ struct SPIRVPatchData // SPIR-V. std::vector inputs; std::vector outputs; + + // the output topology for tessellation and geometry shaders + Topology outTopo = Topology::Unknown; }; struct SPVModule diff --git a/renderdoc/driver/shaders/spirv/spirv_disassemble.cpp b/renderdoc/driver/shaders/spirv/spirv_disassemble.cpp index 45ccbc05e..3a9930282 100644 --- a/renderdoc/driver/shaders/spirv/spirv_disassemble.cpp +++ b/renderdoc/driver/shaders/spirv/spirv_disassemble.cpp @@ -454,6 +454,8 @@ struct SPVTypeData SPVTypeData *baseType; + uint32_t id; + string name; bool IsBasicInt() const { return type == eUInt || type == eSInt; } @@ -3771,10 +3773,10 @@ struct bindpair typedef bindpair cblockpair; typedef bindpair shaderrespair; -void AddSignatureParameter(bool isInput, ShaderStage stage, uint32_t id, uint32_t ®Index, - std::vector accessChain, string varName, SPVTypeData *type, - const vector &decorations, vector &sigarray, - SPIRVPatchData &patchData) +void AddSignatureParameter(bool isInput, ShaderStage stage, uint32_t id, uint32_t structID, + uint32_t ®Index, std::vector accessChain, string varName, + SPVTypeData *type, const vector &decorations, + vector &sigarray, SPIRVPatchData &patchData) { SigParameter sig; @@ -3783,6 +3785,7 @@ void AddSignatureParameter(bool isInput, ShaderStage stage, uint32_t id, uint32_ SPIRVPatchData::InterfaceAccess patch; patch.accessChain = accessChain; patch.ID = id; + patch.structID = structID; bool rowmajor = true; @@ -3867,7 +3870,7 @@ void AddSignatureParameter(bool isInput, ShaderStage stage, uint32_t id, uint32_ string baseName = isArray ? StringFormat::Fmt("%s[%u]", varName.c_str(), a) : varName; - AddSignatureParameter(isInput, stage, id, regIndex, patch.accessChain, + AddSignatureParameter(isInput, stage, id, type->id, regIndex, patch.accessChain, baseName + "." + type->children[c].second, type->children[c].first, type->childDecorations[c], sigarray, patchData); @@ -4056,7 +4059,7 @@ void SPVModule::MakeReflection(ShaderStage stage, const string &entryPoint, nm = StringFormat::Fmt("sig%u", inst->id); uint32_t dummy = 0; - AddSignatureParameter(isInput, stage, inst->id, dummy, std::vector(), nm, + AddSignatureParameter(isInput, stage, inst->id, 0, dummy, std::vector(), nm, inst->var->type, inst->decorations, *sigarray, patchData); // eliminate any members of gl_PerVertex that are actually unused and just came along @@ -4509,7 +4512,27 @@ void SPVModule::MakeReflection(ShaderStage stage, const string &entryPoint, { for(const SPVExecutionMode &mode : inst->entry->modes) { - if(mode.mode == spv::ExecutionModeDepthGreater) + if(mode.mode == spv::ExecutionModeTriangles) + { + patchData.outTopo = Topology::TriangleList; + } + else if(mode.mode == spv::ExecutionModeIsolines) + { + patchData.outTopo = Topology::LineList; + } + else if(mode.mode == spv::ExecutionModeOutputPoints) + { + patchData.outTopo = Topology::PointList; + } + else if(mode.mode == spv::ExecutionModeOutputLineStrip) + { + patchData.outTopo = Topology::LineStrip; + } + else if(mode.mode == spv::ExecutionModeOutputTriangleStrip) + { + patchData.outTopo = Topology::TriangleStrip; + } + else if(mode.mode == spv::ExecutionModeDepthGreater) { for(SigParameter &sig : outputs) { @@ -4518,7 +4541,7 @@ void SPVModule::MakeReflection(ShaderStage stage, const string &entryPoint, } break; } - if(mode.mode == spv::ExecutionModeDepthLess) + else if(mode.mode == spv::ExecutionModeDepthLess) { for(SigParameter &sig : outputs) { @@ -5019,6 +5042,7 @@ void ParseSPIRV(uint32_t *spirv, size_t spirvLength, SPVModule &module) { op.type = new SPVTypeData(); op.type->type = SPVTypeData::eStruct; + op.type->id = spirv[it + 1]; for(int i = 2; i < WordCount; i++) { diff --git a/renderdoc/driver/vulkan/vk_common.h b/renderdoc/driver/vulkan/vk_common.h index 5002da8df..54146e04e 100644 --- a/renderdoc/driver/vulkan/vk_common.h +++ b/renderdoc/driver/vulkan/vk_common.h @@ -469,6 +469,12 @@ enum class VulkanChunk : uint32_t vkCmdBeginRenderPass2KHR, vkCmdNextSubpass2KHR, vkCmdEndRenderPass2KHR, + vkCmdBindTransformFeedbackBuffersEXT, + vkCmdBeginTransformFeedbackEXT, + vkCmdEndTransformFeedbackEXT, + vkCmdBeginQueryIndexedEXT, + vkCmdEndQueryIndexedEXT, + vkCmdDrawIndirectByteCountEXT, Max, }; @@ -668,6 +674,7 @@ DECLARE_REFLECTION_STRUCT(VkSubpassEndInfoKHR); DECLARE_REFLECTION_STRUCT(VkDispatchIndirectCommand); DECLARE_REFLECTION_STRUCT(VkDrawIndirectCommand); DECLARE_REFLECTION_STRUCT(VkDrawIndexedIndirectCommand); +DECLARE_REFLECTION_STRUCT(VkPipelineRasterizationStateStreamCreateInfoEXT); DECLARE_DESERIALISE_TYPE(VkDeviceCreateInfo); DECLARE_DESERIALISE_TYPE(VkBufferCreateInfo); @@ -747,6 +754,7 @@ DECLARE_DESERIALISE_TYPE(VkImageFormatListCreateInfoKHR); DECLARE_DESERIALISE_TYPE(VkRenderPassCreateInfo2KHR); DECLARE_DESERIALISE_TYPE(VkSubpassBeginInfoKHR); DECLARE_DESERIALISE_TYPE(VkSubpassEndInfoKHR); +DECLARE_DESERIALISE_TYPE(VkPipelineRasterizationStateStreamCreateInfoEXT); #if defined(VK_KHR_external_memory_win32) || defined(VK_NV_external_memory_win32) DECLARE_REFLECTION_STRUCT(VkImportMemoryWin32HandleInfoNV); diff --git a/renderdoc/driver/vulkan/vk_core.cpp b/renderdoc/driver/vulkan/vk_core.cpp index dbe5ac31b..17d014f1b 100644 --- a/renderdoc/driver/vulkan/vk_core.cpp +++ b/renderdoc/driver/vulkan/vk_core.cpp @@ -674,6 +674,9 @@ static const VkExtensionProperties supportedExtensions[] = { { VK_EXT_SWAPCHAIN_COLOR_SPACE_EXTENSION_NAME, VK_EXT_SWAPCHAIN_COLOR_SPACE_SPEC_VERSION, }, + { + VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, VK_EXT_TRANSFORM_FEEDBACK_SPEC_VERSION, + }, { VK_EXT_VALIDATION_CACHE_EXTENSION_NAME, VK_EXT_VALIDATION_CACHE_SPEC_VERSION, }, @@ -2691,6 +2694,21 @@ bool WrappedVulkan::ProcessChunk(ReadSerialiser &ser, VulkanChunk chunk) case VulkanChunk::vkCmdEndRenderPass2KHR: return Serialise_vkCmdEndRenderPass2KHR(ser, VK_NULL_HANDLE, NULL); + case VulkanChunk::vkCmdBindTransformFeedbackBuffersEXT: + return Serialise_vkCmdBindTransformFeedbackBuffersEXT(ser, VK_NULL_HANDLE, 0, 0, NULL, NULL, + NULL); + case VulkanChunk::vkCmdBeginTransformFeedbackEXT: + return Serialise_vkCmdBeginTransformFeedbackEXT(ser, VK_NULL_HANDLE, 0, 0, NULL, NULL); + case VulkanChunk::vkCmdEndTransformFeedbackEXT: + return Serialise_vkCmdEndTransformFeedbackEXT(ser, VK_NULL_HANDLE, 0, 0, NULL, NULL); + case VulkanChunk::vkCmdBeginQueryIndexedEXT: + return Serialise_vkCmdBeginQueryIndexedEXT(ser, VK_NULL_HANDLE, VK_NULL_HANDLE, 0, 0, 0); + case VulkanChunk::vkCmdEndQueryIndexedEXT: + return Serialise_vkCmdEndQueryIndexedEXT(ser, VK_NULL_HANDLE, VK_NULL_HANDLE, 0, 0); + case VulkanChunk::vkCmdDrawIndirectByteCountEXT: + return Serialise_vkCmdDrawIndirectByteCountEXT(ser, VK_NULL_HANDLE, 0, 0, VK_NULL_HANDLE, 0, + 0, 0); + default: { SystemChunk system = (SystemChunk)chunk; @@ -2912,6 +2930,10 @@ void WrappedVulkan::ReplayLog(uint32_t startEventID, uint32_t endEventID, Replay { VkCommandBuffer cmd = m_OutsideCmdBuffer; + // end any active XFB + if(!m_RenderState.xfbcounters.empty()) + m_RenderState.EndTransformFeedback(cmd); + // check if the render pass is active - it could have become active // even if it wasn't before (if the above event was a CmdBeginRenderPass) if(m_Partial[Primary].renderPassActive) @@ -3320,6 +3342,16 @@ void WrappedVulkan::AddUsage(VulkanDrawcallTreeNode &drawNode, vector vbuffers; + std::vector xfbbuffers; + uint32_t xfbfirst = 0; + uint32_t xfbcount = 0; ResourceId renderPass; ResourceId framebuffer; @@ -711,7 +716,7 @@ private: { T *ret = GetTempArray(count); for(uint32_t i = 0; i < count; i++) - ret[i] = Unwrap(wrapped[i]); + ret[i] = wrapped ? Unwrap(wrapped[i]) : VK_NULL_HANDLE; return ret; } @@ -1926,4 +1931,28 @@ public: const VkSubpassEndInfoKHR *pSubpassEndInfo); IMPLEMENT_FUNCTION_SERIALISED(void, vkCmdEndRenderPass2KHR, VkCommandBuffer commandBuffer, const VkSubpassEndInfoKHR *pSubpassEndInfo); + + // VK_EXT_transform_feedback + + IMPLEMENT_FUNCTION_SERIALISED(void, vkCmdBindTransformFeedbackBuffersEXT, + VkCommandBuffer commandBuffer, uint32_t firstBinding, + uint32_t bindingCount, const VkBuffer *pBuffers, + const VkDeviceSize *pOffsets, const VkDeviceSize *pSizes); + IMPLEMENT_FUNCTION_SERIALISED(void, vkCmdBeginTransformFeedbackEXT, VkCommandBuffer commandBuffer, + uint32_t firstBuffer, uint32_t bufferCount, + const VkBuffer *pCounterBuffers, + const VkDeviceSize *pCounterBufferOffsets); + IMPLEMENT_FUNCTION_SERIALISED(void, vkCmdEndTransformFeedbackEXT, VkCommandBuffer commandBuffer, + uint32_t firstBuffer, uint32_t bufferCount, + const VkBuffer *pCounterBuffers, + const VkDeviceSize *pCounterBufferOffsets); + IMPLEMENT_FUNCTION_SERIALISED(void, vkCmdBeginQueryIndexedEXT, VkCommandBuffer commandBuffer, + VkQueryPool queryPool, uint32_t query, VkQueryControlFlags flags, + uint32_t index); + IMPLEMENT_FUNCTION_SERIALISED(void, vkCmdEndQueryIndexedEXT, VkCommandBuffer commandBuffer, + VkQueryPool queryPool, uint32_t query, uint32_t index); + IMPLEMENT_FUNCTION_SERIALISED(void, vkCmdDrawIndirectByteCountEXT, VkCommandBuffer commandBuffer, + uint32_t instanceCount, uint32_t firstInstance, + VkBuffer counterBuffer, VkDeviceSize counterBufferOffset, + uint32_t counterOffset, uint32_t vertexStride); }; diff --git a/renderdoc/driver/vulkan/vk_debug.cpp b/renderdoc/driver/vulkan/vk_debug.cpp index d0356bcac..96deb134d 100644 --- a/renderdoc/driver/vulkan/vk_debug.cpp +++ b/renderdoc/driver/vulkan/vk_debug.cpp @@ -1463,6 +1463,7 @@ void VulkanReplay::DestroyResources() m_VertexPick.Destroy(m_pDriver); m_PixelPick.Destroy(m_pDriver); m_Histogram.Destroy(m_pDriver); + m_PostVS.Destroy(m_pDriver); SAFE_DELETE(m_pAMDCounters); } @@ -2412,3 +2413,9 @@ void VulkanReplay::HistogramMinMax::Destroy(WrappedVulkan *driver) m_HistogramReadback.Destroy(); m_HistogramUBO.Destroy(); } + +void VulkanReplay::PostVS::Destroy(WrappedVulkan *driver) +{ + if(XFBQueryPool != VK_NULL_HANDLE) + driver->vkDestroyQueryPool(driver->GetDev(), XFBQueryPool, NULL); +} \ No newline at end of file diff --git a/renderdoc/driver/vulkan/vk_hookset_defs.h b/renderdoc/driver/vulkan/vk_hookset_defs.h index 3e6dc423e..b78df2307 100644 --- a/renderdoc/driver/vulkan/vk_hookset_defs.h +++ b/renderdoc/driver/vulkan/vk_hookset_defs.h @@ -352,7 +352,8 @@ CheckExt(KHR_draw_indirect_count, VKXX); \ CheckExt(EXT_validation_cache, VKXX); \ CheckExt(KHR_shared_presentable_image, VKXX); \ - CheckExt(KHR_create_renderpass2, VKXX); + CheckExt(KHR_create_renderpass2, VKXX); \ + CheckExt(EXT_transform_feedback, VKXX); #define HookInitVulkanInstanceExts() \ HookInitExtension(KHR_surface, DestroySurfaceKHR); \ @@ -473,6 +474,12 @@ HookInitExtension(KHR_create_renderpass2, CmdBeginRenderPass2KHR); \ HookInitExtension(KHR_create_renderpass2, CmdNextSubpass2KHR); \ HookInitExtension(KHR_create_renderpass2, CmdEndRenderPass2KHR); \ + HookInitExtension(EXT_transform_feedback, CmdBindTransformFeedbackBuffersEXT); \ + HookInitExtension(EXT_transform_feedback, CmdBeginTransformFeedbackEXT); \ + HookInitExtension(EXT_transform_feedback, CmdEndTransformFeedbackEXT); \ + HookInitExtension(EXT_transform_feedback, CmdBeginQueryIndexedEXT); \ + HookInitExtension(EXT_transform_feedback, CmdEndQueryIndexedEXT); \ + HookInitExtension(EXT_transform_feedback, CmdDrawIndirectByteCountEXT); \ HookInitDevice_PlatformSpecific() #define DefineHooks() \ @@ -1010,6 +1017,22 @@ pSubpassEndInfo); \ HookDefine2(void, vkCmdEndRenderPass2KHR, VkCommandBuffer, commandBuffer, \ const VkSubpassEndInfoKHR *, pSubpassEndInfo); \ + HookDefine6(void, vkCmdBindTransformFeedbackBuffersEXT, VkCommandBuffer, commandBuffer, \ + uint32_t, firstBinding, uint32_t, bindingCount, const VkBuffer *, pBuffers, \ + const VkDeviceSize *, pOffsets, const VkDeviceSize *, pSizes); \ + HookDefine5(void, vkCmdBeginTransformFeedbackEXT, VkCommandBuffer, commandBuffer, uint32_t, \ + firstBuffer, uint32_t, bufferCount, const VkBuffer *, pCounterBuffers, \ + const VkDeviceSize *, pCounterBufferOffsets); \ + HookDefine5(void, vkCmdEndTransformFeedbackEXT, VkCommandBuffer, commandBuffer, uint32_t, \ + firstBuffer, uint32_t, bufferCount, const VkBuffer *, pCounterBuffers, \ + const VkDeviceSize *, pCounterBufferOffsets); \ + HookDefine5(void, vkCmdBeginQueryIndexedEXT, VkCommandBuffer, commandBuffer, VkQueryPool, \ + queryPool, uint32_t, query, VkQueryControlFlags, flags, uint32_t, index); \ + HookDefine4(void, vkCmdEndQueryIndexedEXT, VkCommandBuffer, commandBuffer, VkQueryPool, \ + queryPool, uint32_t, query, uint32_t, index); \ + HookDefine7(void, vkCmdDrawIndirectByteCountEXT, VkCommandBuffer, commandBuffer, uint32_t, \ + instanceCount, uint32_t, firstInstance, VkBuffer, counterBuffer, VkDeviceSize, \ + counterBufferOffset, uint32_t, counterOffset, uint32_t, vertexStride); \ HookDefine_PlatformSpecific() struct VkLayerInstanceDispatchTableExtended : VkLayerInstanceDispatchTable diff --git a/renderdoc/driver/vulkan/vk_info.cpp b/renderdoc/driver/vulkan/vk_info.cpp index 684b4922e..14cdf599b 100644 --- a/renderdoc/driver/vulkan/vk_info.cpp +++ b/renderdoc/driver/vulkan/vk_info.cpp @@ -281,6 +281,16 @@ void VulkanCreationInfo::Pipeline::Init(VulkanResourceManager *resourceMan, Vulk depthBiasSlopeFactor = pCreateInfo->pRasterizationState->depthBiasSlopeFactor; lineWidth = pCreateInfo->pRasterizationState->lineWidth; + // VkPipelineRasterizationStateStreamCreateInfoEXT + rasterizationStream = 0; + + const VkPipelineRasterizationStateStreamCreateInfoEXT *rastStream = + (const VkPipelineRasterizationStateStreamCreateInfoEXT *)FindNextStruct( + pCreateInfo->pRasterizationState, + VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_STREAM_CREATE_INFO_EXT); + if(rastStream) + rasterizationStream = rastStream->rasterizationStream; + // VkPipelineRasterizationConservativeStateCreateInfoEXT conservativeRasterizationMode = VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT; extraPrimitiveOverestimationSize = 0.0f; diff --git a/renderdoc/driver/vulkan/vk_info.h b/renderdoc/driver/vulkan/vk_info.h index 10a74c02b..916767618 100644 --- a/renderdoc/driver/vulkan/vk_info.h +++ b/renderdoc/driver/vulkan/vk_info.h @@ -190,6 +190,9 @@ struct VulkanCreationInfo float depthBiasSlopeFactor; float lineWidth; + // VkPipelineRasterizationStateStreamCreateInfoEXT + uint32_t rasterizationStream; + // VkPipelineRasterizationConservativeStateCreateInfoEXT VkConservativeRasterizationModeEXT conservativeRasterizationMode; float extraPrimitiveOverestimationSize; diff --git a/renderdoc/driver/vulkan/vk_postvs.cpp b/renderdoc/driver/vulkan/vk_postvs.cpp index 540ceee53..be331e3d1 100644 --- a/renderdoc/driver/vulkan/vk_postvs.cpp +++ b/renderdoc/driver/vulkan/vk_postvs.cpp @@ -31,6 +31,12 @@ #include "vk_debug.h" #include "vk_shader_cache.h" +struct VkXfbQueryResult +{ + uint64_t numPrimitivesWritten; + uint64_t numPrimitivesGenerated; +}; + static const char *PatchedMeshOutputEntryPoint = "rdc"; static const uint32_t MeshOutputDispatchWidth = 128; static const uint32_t MeshOutputTBufferArraySize = 16; @@ -1070,11 +1076,161 @@ static void ConvertToMeshOutputCompute(const ShaderReflection &refl, const SPIRV editor.StripNops(); } +static void AddXFBAnnotations(const ShaderReflection &refl, const SPIRVPatchData &patchData, + const char *entryName, std::vector &modSpirv, + uint32_t &xfbStride) +{ + SPIRVEditor editor(modSpirv); + + rdcarray outsig = refl.outputSignature; + std::vector outpatch = patchData.outputs; + + uint32_t entryid = 0; + for(const SPIRVEntry &entry : editor.GetEntries()) + { + if(entry.name == entryName) + { + entryid = entry.id; + break; + } + } + + bool hasXFB = false; + + for(SPIRVIterator it = editor.EndEntries(); it < editor.BeginDebug(); ++it) + { + if(it.opcode() == spv::OpExecutionMode && it.word(1) == entryid && + it.word(2) == spv::ExecutionModeXfb) + { + hasXFB = true; + break; + } + } + + if(hasXFB) + { + for(SPIRVIterator it = editor.BeginDecorations(); it < editor.EndDecorations(); ++it) + { + // remove any existing xfb decorations + if(it.opcode() == spv::OpDecorate && + (it.word(2) == spv::DecorationXfbBuffer || it.word(2) == spv::DecorationXfbStride)) + { + editor.PreModify(it); + + SPIRVOperation op(it); + + // invalid to have a nop here, but it will be stripped out later + op.nopRemove(1); + op[0] = SPV_NOP; + + editor.PostModify(it); + } + + // offset is trickier, need to see if it'll match one we want later + if((it.opcode() == spv::OpDecorate && it.word(2) == spv::DecorationOffset) || + (it.opcode() == spv::OpMemberDecorate && it.word(3) == spv::DecorationOffset)) + { + for(size_t i = 0; i < outsig.size(); i++) + { + if(outpatch[i].structID && !outpatch[i].accessChain.empty()) + { + if(it.opcode() == spv::OpMemberDecorate && it.word(1) == outpatch[i].structID && + it.word(2) == outpatch[i].accessChain.back()) + { + editor.PreModify(it); + + SPIRVOperation op(it); + + op.nopRemove(1); + op[0] = SPV_NOP; + + editor.PostModify(it); + } + } + else + { + if(it.opcode() == spv::OpDecorate && it.word(1) == outpatch[i].ID) + { + editor.PreModify(it); + + SPIRVOperation op(it); + + op.nopRemove(1); + op[0] = SPV_NOP; + + editor.PostModify(it); + } + } + } + } + } + } + else + { + editor.AddOperation(editor.EndEntries(), + SPIRVOperation(spv::OpExecutionMode, {entryid, spv::ExecutionModeXfb})); + } + + editor.AddCapability(spv::CapabilityTransformFeedback); + + // find the position output and move it to the front + for(size_t i = 0; i < outsig.size(); i++) + { + if(outsig[i].systemValue == ShaderBuiltin::Position) + { + outsig.insert(0, outsig[i]); + outsig.erase(i + 1); + + outpatch.insert(outpatch.begin(), outpatch[i]); + outpatch.erase(outpatch.begin() + i + 1); + break; + } + } + + for(size_t i = 0; i < outsig.size(); i++) + { + if(outpatch[i].structID && !outpatch[i].accessChain.empty()) + { + editor.AddDecoration(SPIRVOperation( + spv::OpMemberDecorate, + {outpatch[i].structID, outpatch[i].accessChain.back(), spv::DecorationOffset, xfbStride})); + } + else + { + editor.AddDecoration(SPIRVOperation( + spv::OpDecorate, {outpatch[i].ID, (uint32_t)spv::DecorationOffset, xfbStride})); + } + + uint32_t compByteSize = 4; + + if(outsig[i].compType == CompType::Double) + compByteSize = 8; + + xfbStride += outsig[i].compCount * compByteSize; + } + + std::set vars; + + for(size_t i = 0; i < outpatch.size(); i++) + { + if(vars.find(outpatch[i].ID) == vars.end()) + { + editor.AddDecoration( + SPIRVOperation(spv::OpDecorate, {outpatch[i].ID, (uint32_t)spv::DecorationXfbBuffer, 0})); + editor.AddDecoration(SPIRVOperation( + spv::OpDecorate, {outpatch[i].ID, (uint32_t)spv::DecorationXfbStride, xfbStride})); + vars.insert(outpatch[i].ID); + } + } + + editor.StripNops(); +} + void VulkanReplay::ClearPostVSCache() { VkDevice dev = m_Device; - for(auto it = m_PostVSData.begin(); it != m_PostVSData.end(); ++it) + for(auto it = m_PostVS.Data.begin(); it != m_PostVS.Data.end(); ++it) { if(it->second.vsout.idxbuf != VK_NULL_HANDLE) { @@ -1085,28 +1241,17 @@ void VulkanReplay::ClearPostVSCache() m_pDriver->vkFreeMemory(dev, it->second.vsout.bufmem, NULL); } - m_PostVSData.clear(); + m_PostVS.Data.clear(); } -void VulkanReplay::InitPostVSBuffers(uint32_t eventId) +void VulkanReplay::FetchVSOut(uint32_t eventId) { - // go through any aliasing - if(m_PostVSAlias.find(eventId) != m_PostVSAlias.end()) - eventId = m_PostVSAlias[eventId]; - - if(m_PostVSData.find(eventId) != m_PostVSData.end()) - return; - const VulkanRenderState &state = m_pDriver->m_RenderState; VulkanCreationInfo &creationInfo = m_pDriver->m_CreationInfo; - if(state.graphics.pipeline == ResourceId() || state.renderPass == ResourceId()) - return; - const VulkanCreationInfo::Pipeline &pipeInfo = creationInfo.m_Pipeline[state.graphics.pipeline]; - if(pipeInfo.shaders[0].module == ResourceId()) - return; + const DrawcallDescription *drawcall = m_pDriver->GetDrawcall(eventId); const VulkanCreationInfo::ShaderModule &moduleInfo = creationInfo.m_ShaderModule[pipeInfo.shaders[0].module]; @@ -1118,29 +1263,24 @@ void VulkanReplay::InitPostVSBuffers(uint32_t eventId) if(refl->outputSignature.empty()) { // empty vertex output signature - m_PostVSData[eventId].vsin.topo = pipeInfo.topology; - m_PostVSData[eventId].vsout.buf = VK_NULL_HANDLE; - m_PostVSData[eventId].vsout.bufmem = VK_NULL_HANDLE; - m_PostVSData[eventId].vsout.instStride = 0; - m_PostVSData[eventId].vsout.vertStride = 0; - m_PostVSData[eventId].vsout.numViews = 1; - m_PostVSData[eventId].vsout.nearPlane = 0.0f; - m_PostVSData[eventId].vsout.farPlane = 0.0f; - m_PostVSData[eventId].vsout.useIndices = false; - m_PostVSData[eventId].vsout.hasPosOut = false; - m_PostVSData[eventId].vsout.idxbuf = VK_NULL_HANDLE; - m_PostVSData[eventId].vsout.idxbufmem = VK_NULL_HANDLE; + m_PostVS.Data[eventId].vsin.topo = pipeInfo.topology; + m_PostVS.Data[eventId].vsout.buf = VK_NULL_HANDLE; + m_PostVS.Data[eventId].vsout.bufmem = VK_NULL_HANDLE; + m_PostVS.Data[eventId].vsout.instStride = 0; + m_PostVS.Data[eventId].vsout.vertStride = 0; + m_PostVS.Data[eventId].vsout.numViews = 1; + m_PostVS.Data[eventId].vsout.nearPlane = 0.0f; + m_PostVS.Data[eventId].vsout.farPlane = 0.0f; + m_PostVS.Data[eventId].vsout.useIndices = false; + m_PostVS.Data[eventId].vsout.hasPosOut = false; + m_PostVS.Data[eventId].vsout.idxbuf = VK_NULL_HANDLE; + m_PostVS.Data[eventId].vsout.idxbufmem = VK_NULL_HANDLE; - m_PostVSData[eventId].vsout.topo = pipeInfo.topology; + m_PostVS.Data[eventId].vsout.topo = pipeInfo.topology; return; } - const DrawcallDescription *drawcall = m_pDriver->GetDrawcall(eventId); - - if(drawcall == NULL || drawcall->numIndices == 0 || drawcall->numInstances == 0) - return; - // we go through the driver for all these creations since they need to be properly // registered in order to be put in the partial replay state VkResult vkr = VK_SUCCESS; @@ -2108,7 +2248,7 @@ void VulkanReplay::InitPostVSBuffers(uint32_t eventId) RDCASSERTEQUAL(vkr, VK_SUCCESS); // fill destination buffer with 0s to ensure unwritten vertices have sane data - ObjDisp(dev)->CmdFillBuffer(Unwrap(cmd), Unwrap(meshBuffer), 0, bufInfo.size, 0xbaadf00d); + ObjDisp(dev)->CmdFillBuffer(Unwrap(cmd), Unwrap(meshBuffer), 0, bufInfo.size, 0); VkBufferMemoryBarrier meshbufbarrier = { VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, @@ -2280,36 +2420,36 @@ void VulkanReplay::InitPostVSBuffers(uint32_t eventId) m_pDriver->vkDestroyBufferView(m_Device, uniqIdxBufView, NULL); } - // fill out m_PostVSData - m_PostVSData[eventId].vsin.topo = pipeCreateInfo.pInputAssemblyState->topology; - m_PostVSData[eventId].vsout.topo = pipeCreateInfo.pInputAssemblyState->topology; - m_PostVSData[eventId].vsout.buf = meshBuffer; - m_PostVSData[eventId].vsout.bufmem = meshMem; + // fill out m_PostVS.Data + m_PostVS.Data[eventId].vsin.topo = pipeCreateInfo.pInputAssemblyState->topology; + m_PostVS.Data[eventId].vsout.topo = pipeCreateInfo.pInputAssemblyState->topology; + m_PostVS.Data[eventId].vsout.buf = meshBuffer; + m_PostVS.Data[eventId].vsout.bufmem = meshMem; - m_PostVSData[eventId].vsout.baseVertex = 0; + m_PostVS.Data[eventId].vsout.baseVertex = 0; - m_PostVSData[eventId].vsout.numViews = numViews; + m_PostVS.Data[eventId].vsout.numViews = numViews; - m_PostVSData[eventId].vsout.vertStride = bufStride; - m_PostVSData[eventId].vsout.nearPlane = nearp; - m_PostVSData[eventId].vsout.farPlane = farp; + m_PostVS.Data[eventId].vsout.vertStride = bufStride; + m_PostVS.Data[eventId].vsout.nearPlane = nearp; + m_PostVS.Data[eventId].vsout.farPlane = farp; - m_PostVSData[eventId].vsout.useIndices = bool(drawcall->flags & DrawFlags::Indexed); - m_PostVSData[eventId].vsout.numVerts = drawcall->numIndices; + m_PostVS.Data[eventId].vsout.useIndices = bool(drawcall->flags & DrawFlags::Indexed); + m_PostVS.Data[eventId].vsout.numVerts = drawcall->numIndices; - m_PostVSData[eventId].vsout.instStride = 0; + m_PostVS.Data[eventId].vsout.instStride = 0; if(drawcall->flags & DrawFlags::Instanced) - m_PostVSData[eventId].vsout.instStride = uint32_t(bufSize / (drawcall->numInstances * numViews)); + m_PostVS.Data[eventId].vsout.instStride = uint32_t(bufSize / (drawcall->numInstances * numViews)); - m_PostVSData[eventId].vsout.idxbuf = VK_NULL_HANDLE; - if(m_PostVSData[eventId].vsout.useIndices && state.ibuffer.buf != ResourceId()) + m_PostVS.Data[eventId].vsout.idxbuf = VK_NULL_HANDLE; + if(m_PostVS.Data[eventId].vsout.useIndices && state.ibuffer.buf != ResourceId()) { - m_PostVSData[eventId].vsout.idxbuf = rebasedIdxBuf; - m_PostVSData[eventId].vsout.idxbufmem = rebasedIdxBufMem; - m_PostVSData[eventId].vsout.idxFmt = idxsize == 2 ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32; + m_PostVS.Data[eventId].vsout.idxbuf = rebasedIdxBuf; + m_PostVS.Data[eventId].vsout.idxbufmem = rebasedIdxBufMem; + m_PostVS.Data[eventId].vsout.idxFmt = idxsize == 2 ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32; } - m_PostVSData[eventId].vsout.hasPosOut = + m_PostVS.Data[eventId].vsout.hasPosOut = refl->outputSignature[0].systemValue == ShaderBuiltin::Position; // delete descriptors. Technically we don't have to free the descriptor sets, but our tracking on @@ -2330,6 +2470,543 @@ void VulkanReplay::InitPostVSBuffers(uint32_t eventId) // delete shader/shader module m_pDriver->vkDestroyShaderModule(dev, module, NULL); } + +void VulkanReplay::FetchTessGSOut(uint32_t eventId) +{ + VulkanRenderState state = m_pDriver->m_RenderState; + VulkanCreationInfo &creationInfo = m_pDriver->m_CreationInfo; + + const VulkanCreationInfo::Pipeline &pipeInfo = creationInfo.m_Pipeline[state.graphics.pipeline]; + + const DrawcallDescription *drawcall = m_pDriver->GetDrawcall(eventId); + + // first try geometry stage + int stageIndex = 3; + + // if there is no such shader bound, try tessellation + if(!pipeInfo.shaders[stageIndex].refl) + stageIndex = 2; + + // if still nothing, do vertex + if(!pipeInfo.shaders[stageIndex].refl) + stageIndex = 0; + + ShaderReflection *lastRefl = pipeInfo.shaders[stageIndex].refl; + + RDCASSERT(lastRefl); + + uint32_t primitiveMultiplier = 1; + + // transform feedback expands strips to lists + switch(pipeInfo.shaders[stageIndex].patchData->outTopo) + { + case Topology::PointList: + m_PostVS.Data[eventId].gsout.topo = VK_PRIMITIVE_TOPOLOGY_POINT_LIST; + break; + case Topology::LineList: + case Topology::LineStrip: + m_PostVS.Data[eventId].gsout.topo = VK_PRIMITIVE_TOPOLOGY_LINE_LIST; + primitiveMultiplier = 2; + break; + default: + RDCERR("Unexpected output topology %s", + ToStr(pipeInfo.shaders[stageIndex].patchData->outTopo).c_str()); + // deliberate fallthrough + case Topology::TriangleList: + case Topology::TriangleStrip: + m_PostVS.Data[eventId].gsout.topo = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + primitiveMultiplier = 3; + break; + } + + if(lastRefl->outputSignature.empty()) + { + // empty vertex output signature + m_PostVS.Data[eventId].gsout.buf = VK_NULL_HANDLE; + m_PostVS.Data[eventId].gsout.bufmem = VK_NULL_HANDLE; + m_PostVS.Data[eventId].gsout.instStride = 0; + m_PostVS.Data[eventId].gsout.vertStride = 0; + m_PostVS.Data[eventId].gsout.numViews = 1; + m_PostVS.Data[eventId].gsout.nearPlane = 0.0f; + m_PostVS.Data[eventId].gsout.farPlane = 0.0f; + m_PostVS.Data[eventId].gsout.useIndices = false; + m_PostVS.Data[eventId].gsout.hasPosOut = false; + m_PostVS.Data[eventId].gsout.idxbuf = VK_NULL_HANDLE; + m_PostVS.Data[eventId].gsout.idxbufmem = VK_NULL_HANDLE; + return; + } + + if(!ObjDisp(m_Device)->CmdBeginTransformFeedbackEXT) + { + RDCLOG( + "VK_EXT_transform_feedback_extension not available, can't fetch tessellation/geometry " + "output"); + return; + } + + const VulkanCreationInfo::ShaderModule &moduleInfo = + creationInfo.m_ShaderModule[pipeInfo.shaders[stageIndex].module]; + + std::vector modSpirv = moduleInfo.spirv.spirv; + + uint32_t xfbStride = 0; + + // adds XFB annotations in order of the output signature (with the position first) + AddXFBAnnotations(*lastRefl, *pipeInfo.shaders[stageIndex].patchData, + pipeInfo.shaders[stageIndex].entryPoint.c_str(), modSpirv, xfbStride); + + // create vertex shader with modified code + VkShaderModuleCreateInfo moduleCreateInfo = { + VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, NULL, 0, + modSpirv.size() * sizeof(uint32_t), &modSpirv[0], + }; + + VkResult vkr = VK_SUCCESS; + VkDevice dev = m_Device; + + VkShaderModule module; + vkr = m_pDriver->vkCreateShaderModule(dev, &moduleCreateInfo, NULL, &module); + RDCASSERTEQUAL(vkr, VK_SUCCESS); + + VkGraphicsPipelineCreateInfo pipeCreateInfo; + + // get pipeline create info + m_pDriver->GetShaderCache()->MakeGraphicsPipelineInfo(pipeCreateInfo, state.graphics.pipeline); + + VkPipelineRasterizationStateCreateInfo *rs = + (VkPipelineRasterizationStateCreateInfo *)pipeCreateInfo.pRasterizationState; + rs->rasterizerDiscardEnable = true; + + for(uint32_t i = 0; i < pipeCreateInfo.stageCount; i++) + { + VkPipelineShaderStageCreateInfo &stage = + (VkPipelineShaderStageCreateInfo &)pipeCreateInfo.pStages[i]; + + if(StageIndex(stage.stage) == stageIndex) + { + stage.module = module; + break; + } + } + + // create a empty renderpass and framebuffer so we can draw + VkFramebuffer fb = VK_NULL_HANDLE; + VkRenderPass rp = VK_NULL_HANDLE; + + VkSubpassDescription sub = {0, VK_PIPELINE_BIND_POINT_GRAPHICS}; + VkRenderPassCreateInfo rpinfo = { + VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, NULL, 0, 0, NULL, 1, &sub, + }; + + vkr = m_pDriver->vkCreateRenderPass(m_Device, &rpinfo, NULL, &rp); + RDCASSERTEQUAL(vkr, VK_SUCCESS); + + VkFramebufferCreateInfo fbinfo = { + VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, NULL, 0, rp, 0, NULL, 16U, 16U, 1, + }; + + vkr = m_pDriver->vkCreateFramebuffer(m_Device, &fbinfo, NULL, &fb); + RDCASSERTEQUAL(vkr, VK_SUCCESS); + + pipeCreateInfo.renderPass = rp; + pipeCreateInfo.subpass = 0; + + VkPipeline pipe = VK_NULL_HANDLE; + vkr = m_pDriver->vkCreateGraphicsPipelines(m_Device, VK_NULL_HANDLE, 1, &pipeCreateInfo, NULL, + &pipe); + RDCASSERTEQUAL(vkr, VK_SUCCESS); + + state.graphics.pipeline = GetResID(pipe); + state.framebuffer = GetResID(fb); + state.renderPass = GetResID(rp); + state.renderArea.offset.x = 0; + state.renderArea.offset.y = 0; + state.renderArea.extent.width = 16; + state.renderArea.extent.height = 16; + + // disable any existing XFB + state.xfbbuffers.clear(); + state.xfbcounters.clear(); + + if(m_PostVS.XFBQueryPoolSize < drawcall->numInstances) + { + if(m_PostVS.XFBQueryPoolSize != VK_NULL_HANDLE) + m_pDriver->vkDestroyQueryPool(m_Device, m_PostVS.XFBQueryPool, NULL); + + VkQueryPoolCreateInfo info = { + VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, + NULL, + 0, + VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT, + drawcall->numInstances, + 0, + }; + + vkr = m_pDriver->vkCreateQueryPool(m_Device, &info, NULL, &m_PostVS.XFBQueryPool); + RDCASSERTEQUAL(vkr, VK_SUCCESS); + + m_PostVS.XFBQueryPoolSize = drawcall->numInstances; + } + + VkBuffer meshBuffer = VK_NULL_HANDLE; + VkDeviceMemory meshMem = VK_NULL_HANDLE; + + // start with bare minimum size, which might be enough if no expansion happens + VkDeviceSize bufferSize = 0; + VkDeviceSize dataSize = + uint64_t(drawcall->numIndices) * uint64_t(drawcall->numInstances) * uint64_t(xfbStride); + + VkXfbQueryResult queryResult = {}; + + while(bufferSize < dataSize) + { + bufferSize = dataSize; + + if(meshBuffer != VK_NULL_HANDLE) + { + m_pDriver->vkDestroyBuffer(dev, meshBuffer, NULL); + m_pDriver->vkFreeMemory(dev, meshMem, NULL); + + meshBuffer = VK_NULL_HANDLE; + meshMem = VK_NULL_HANDLE; + } + + VkBufferCreateInfo bufInfo = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO}; + + bufInfo.size = bufferSize; + + bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + bufInfo.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; + bufInfo.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT; + bufInfo.usage |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; + + vkr = m_pDriver->vkCreateBuffer(dev, &bufInfo, NULL, &meshBuffer); + RDCASSERTEQUAL(vkr, VK_SUCCESS); + + VkMemoryRequirements mrq = {0}; + m_pDriver->vkGetBufferMemoryRequirements(dev, meshBuffer, &mrq); + + VkMemoryAllocateInfo allocInfo = { + VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, NULL, mrq.size, + m_pDriver->GetGPULocalMemoryIndex(mrq.memoryTypeBits), + }; + + vkr = m_pDriver->vkAllocateMemory(dev, &allocInfo, NULL, &meshMem); + RDCASSERTEQUAL(vkr, VK_SUCCESS); + + vkr = m_pDriver->vkBindBufferMemory(dev, meshBuffer, meshMem, 0); + RDCASSERTEQUAL(vkr, VK_SUCCESS); + + VkCommandBuffer cmd = m_pDriver->GetNextCmd(); + + VkCommandBufferBeginInfo beginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, NULL, + VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT}; + + vkr = ObjDisp(dev)->BeginCommandBuffer(Unwrap(cmd), &beginInfo); + RDCASSERTEQUAL(vkr, VK_SUCCESS); + + ObjDisp(dev)->CmdResetQueryPool(Unwrap(cmd), Unwrap(m_PostVS.XFBQueryPool), 0, 1); + + // fill destination buffer with 0s to ensure unwritten vertices have sane data + ObjDisp(dev)->CmdFillBuffer(Unwrap(cmd), Unwrap(meshBuffer), 0, bufInfo.size, 0xbbaaddee); + + VkBufferMemoryBarrier meshbufbarrier = { + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + NULL, + VK_ACCESS_TRANSFER_WRITE_BIT, + VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT, + VK_QUEUE_FAMILY_IGNORED, + VK_QUEUE_FAMILY_IGNORED, + Unwrap(meshBuffer), + 0, + bufInfo.size, + }; + + // wait for the above fill to finish. + DoPipelineBarrier(cmd, 1, &meshbufbarrier); + + state.BeginRenderPassAndApplyState(cmd, VulkanRenderState::BindGraphics); + + ObjDisp(cmd)->CmdBeginQuery(Unwrap(cmd), Unwrap(m_PostVS.XFBQueryPool), 0, 0); + + ObjDisp(cmd)->CmdBindTransformFeedbackBuffersEXT(Unwrap(cmd), 0, 1, UnwrapPtr(meshBuffer), + &meshbufbarrier.offset, &meshbufbarrier.size); + + ObjDisp(cmd)->CmdBeginTransformFeedbackEXT(Unwrap(cmd), 0, 1, NULL, NULL); + + if(drawcall->flags & DrawFlags::Indexed) + { + ObjDisp(cmd)->CmdDrawIndexed(Unwrap(cmd), drawcall->numIndices, drawcall->numInstances, + drawcall->indexOffset, drawcall->baseVertex, + drawcall->instanceOffset); + } + else + { + ObjDisp(cmd)->CmdDraw(Unwrap(cmd), drawcall->numIndices, drawcall->numInstances, + drawcall->vertexOffset, drawcall->instanceOffset); + } + + ObjDisp(cmd)->CmdEndTransformFeedbackEXT(Unwrap(cmd), 0, 1, NULL, NULL); + + ObjDisp(cmd)->CmdEndQuery(Unwrap(cmd), Unwrap(m_PostVS.XFBQueryPool), 0); + + state.EndRenderPass(cmd); + + vkr = ObjDisp(dev)->EndCommandBuffer(Unwrap(cmd)); + RDCASSERTEQUAL(vkr, VK_SUCCESS); + + m_pDriver->SubmitCmds(); + m_pDriver->FlushQ(); + + vkr = ObjDisp(dev)->GetQueryPoolResults( + Unwrap(dev), Unwrap(m_PostVS.XFBQueryPool), 0, 1, sizeof(VkXfbQueryResult), &queryResult, + sizeof(VkXfbQueryResult), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); + RDCASSERTEQUAL(vkr, VK_SUCCESS); + + VkDeviceSize generatedSize = queryResult.numPrimitivesGenerated * 3 * xfbStride; + + // output buffer isn't big enough, delete it and re-run so we recreate it larger + if(generatedSize > dataSize) + dataSize = generatedSize; + } + + std::vector instData; + + // instanced draws must be replayed one at a time so we can record the number of primitives from + // each drawcall, as due to expansion this can vary per-instance. + if(drawcall->flags & DrawFlags::Instanced && drawcall->numInstances > 1) + { + VkCommandBuffer cmd = m_pDriver->GetNextCmd(); + + VkCommandBufferBeginInfo beginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, NULL, + VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT}; + + vkr = ObjDisp(dev)->BeginCommandBuffer(Unwrap(cmd), &beginInfo); + RDCASSERTEQUAL(vkr, VK_SUCCESS); + + ObjDisp(dev)->CmdResetQueryPool(Unwrap(cmd), Unwrap(m_PostVS.XFBQueryPool), 0, + drawcall->numInstances); + + state.BeginRenderPassAndApplyState(cmd, VulkanRenderState::BindGraphics); + + // do incremental draws to get the output size. We have to do this O(N^2) style because + // there's no way to replay only a single instance. We have to replay 1, 2, 3, ... N + // instances and count the total number of verts each time, then we can see from the + // difference how much each instance wrote. + for(uint32_t inst = 1; inst <= drawcall->numInstances; inst++) + { + ObjDisp(cmd)->CmdBeginQuery(Unwrap(cmd), Unwrap(m_PostVS.XFBQueryPool), inst - 1, 0); + + VkDeviceSize offset = 0; + ObjDisp(cmd)->CmdBindTransformFeedbackBuffersEXT(Unwrap(cmd), 0, 1, UnwrapPtr(meshBuffer), + &offset, &bufferSize); + + ObjDisp(cmd)->CmdBeginTransformFeedbackEXT(Unwrap(cmd), 0, 1, NULL, NULL); + + if(drawcall->flags & DrawFlags::Indexed) + { + ObjDisp(cmd)->CmdDrawIndexed(Unwrap(cmd), drawcall->numIndices, inst, drawcall->indexOffset, + drawcall->baseVertex, drawcall->instanceOffset); + } + else + { + ObjDisp(cmd)->CmdDraw(Unwrap(cmd), drawcall->numIndices, inst, drawcall->vertexOffset, + drawcall->instanceOffset); + } + + ObjDisp(cmd)->CmdEndTransformFeedbackEXT(Unwrap(cmd), 0, 1, NULL, NULL); + + ObjDisp(cmd)->CmdEndQuery(Unwrap(cmd), Unwrap(m_PostVS.XFBQueryPool), inst - 1); + } + + state.EndRenderPass(cmd); + + vkr = ObjDisp(dev)->EndCommandBuffer(Unwrap(cmd)); + RDCASSERTEQUAL(vkr, VK_SUCCESS); + + m_pDriver->SubmitCmds(); + m_pDriver->FlushQ(); + + std::vector queryResults; + queryResults.resize(drawcall->numInstances); + vkr = ObjDisp(dev)->GetQueryPoolResults( + Unwrap(dev), Unwrap(m_PostVS.XFBQueryPool), 0, drawcall->numInstances, + sizeof(VkXfbQueryResult) * drawcall->numInstances, queryResults.data(), + sizeof(VkXfbQueryResult), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); + RDCASSERTEQUAL(vkr, VK_SUCCESS); + + uint64_t prevVertCount = 0; + + for(uint32_t inst = 0; inst < drawcall->numInstances; inst++) + { + uint64_t vertCount = queryResults[inst].numPrimitivesWritten * primitiveMultiplier; + + VulkanPostVSData::InstData d; + d.numVerts = uint32_t(vertCount - prevVertCount); + d.bufOffset = uint32_t(xfbStride * prevVertCount); + prevVertCount = vertCount; + + instData.push_back(d); + } + } + + float nearp = 0.1f; + float farp = 100.0f; + + Vec4f pos0; + + bool found = false; + + // we read back the buffer in chunks, since we're likely to find a match in the first few + // vertices. + + VkDeviceSize readbackoffset = 0; + const VkDeviceSize readbacksize = 1024 * 1024; + + while(readbackoffset < bufferSize) + { + bytebuf data; + GetBufferData(GetResID(meshBuffer), readbackoffset, readbacksize, data); + + if(data.empty()) + break; + + if(readbackoffset == 0) + memcpy(&pos0, data.data(), sizeof(pos0)); + + for(uint32_t i = 0; i < data.size() / xfbStride; i++) + { + ////////////////////////////////////////////////////////////////////////////////// + // derive near/far, assuming a standard perspective matrix + // + // the transformation from from pre-projection {Z,W} to post-projection {Z,W} + // is linear. So we can say Zpost = Zpre*m + c . Here we assume Wpre = 1 + // and we know Wpost = Zpre from the perspective matrix. + // we can then see from the perspective matrix that + // m = F/(F-N) + // c = -(F*N)/(F-N) + // + // with re-arranging and substitution, we then get: + // N = -c/m + // F = c/(1-m) + // + // so if we can derive m and c then we can determine N and F. We can do this with + // two points, and we pick them reasonably distinct on z to reduce floating-point + // error + + Vec4f *pos = (Vec4f *)(data.data() + xfbStride * i); + + // skip invalid vertices (w=0) + if(pos->w != 0.0f && fabs(pos->w - pos0.w) > 0.01f && fabs(pos->z - pos0.z) > 0.01f) + { + Vec2f A(pos0.w, pos0.z); + Vec2f B(pos->w, pos->z); + + float m = (B.y - A.y) / (B.x - A.x); + float c = B.y - B.x * m; + + if(m == 1.0f) + continue; + + if(-c / m <= 0.000001f) + continue; + + nearp = -c / m; + farp = c / (1 - m); + + found = true; + + break; + } + } + + if(found) + break; + + // read the next segment + readbackoffset += readbacksize; + } + + // if we didn't find anything, all z's and w's were identical. + // If the z is positive and w greater for the first element then + // we detect this projection as reversed z with infinite far plane + if(!found && pos0.z > 0.0f && pos0.w > pos0.z) + { + nearp = pos0.z; + farp = FLT_MAX; + } + + // fill out m_PostVS.Data + m_PostVS.Data[eventId].gsout.buf = meshBuffer; + m_PostVS.Data[eventId].gsout.bufmem = meshMem; + + m_PostVS.Data[eventId].gsout.baseVertex = 0; + + m_PostVS.Data[eventId].gsout.numViews = 1; + + m_PostVS.Data[eventId].gsout.vertStride = xfbStride; + m_PostVS.Data[eventId].gsout.nearPlane = nearp; + m_PostVS.Data[eventId].gsout.farPlane = farp; + + m_PostVS.Data[eventId].gsout.useIndices = false; + + m_PostVS.Data[eventId].gsout.numVerts = + uint32_t(queryResult.numPrimitivesWritten) * primitiveMultiplier; + + // set instance stride to 0. If there's any stride needed, it will be calculated using instData + m_PostVS.Data[eventId].gsout.instStride = 0; + m_PostVS.Data[eventId].gsout.instData = instData; + + m_PostVS.Data[eventId].gsout.idxbuf = VK_NULL_HANDLE; + m_PostVS.Data[eventId].gsout.idxbufmem = VK_NULL_HANDLE; + + m_PostVS.Data[eventId].gsout.hasPosOut = true; + + // delete framebuffer and renderpass + m_pDriver->vkDestroyFramebuffer(dev, fb, NULL); + m_pDriver->vkDestroyRenderPass(dev, rp, NULL); + + // delete pipeline + m_pDriver->vkDestroyPipeline(dev, pipe, NULL); + + // delete shader/shader module + m_pDriver->vkDestroyShaderModule(dev, module, NULL); +} + +void VulkanReplay::InitPostVSBuffers(uint32_t eventId) +{ + // go through any aliasing + if(m_PostVS.Alias.find(eventId) != m_PostVS.Alias.end()) + eventId = m_PostVS.Alias[eventId]; + + if(m_PostVS.Data.find(eventId) != m_PostVS.Data.end()) + return; + + const VulkanRenderState &state = m_pDriver->m_RenderState; + VulkanCreationInfo &creationInfo = m_pDriver->m_CreationInfo; + + if(state.graphics.pipeline == ResourceId() || state.renderPass == ResourceId()) + return; + + const VulkanCreationInfo::Pipeline &pipeInfo = creationInfo.m_Pipeline[state.graphics.pipeline]; + + if(pipeInfo.shaders[0].module == ResourceId()) + return; + + const DrawcallDescription *drawcall = m_pDriver->GetDrawcall(eventId); + + if(drawcall == NULL || drawcall->numIndices == 0 || drawcall->numInstances == 0) + return; + + FetchVSOut(eventId); + + // if there's no tessellation or geometry shader active, bail out now + if(pipeInfo.shaders[2].module == ResourceId() && pipeInfo.shaders[3].module == ResourceId()) + return; + + FetchTessGSOut(eventId); +} + struct VulkanInitPostVSCallback : public VulkanDrawcallCallback { VulkanInitPostVSCallback(WrappedVulkan *vk, const vector &events) @@ -2384,14 +3061,14 @@ MeshFormat VulkanReplay::GetPostVSBuffers(uint32_t eventId, uint32_t instID, uin MeshDataStage stage) { // go through any aliasing - if(m_PostVSAlias.find(eventId) != m_PostVSAlias.end()) - eventId = m_PostVSAlias[eventId]; + if(m_PostVS.Alias.find(eventId) != m_PostVS.Alias.end()) + eventId = m_PostVS.Alias[eventId]; VulkanPostVSData postvs; RDCEraseEl(postvs); - if(m_PostVSData.find(eventId) != m_PostVSData.end()) - postvs = m_PostVSData[eventId]; + if(m_PostVS.Data.find(eventId) != m_PostVS.Data.end()) + postvs = m_PostVS.Data[eventId]; const DrawcallDescription *drawcall = m_pDriver->GetDrawcall(eventId); @@ -2445,5 +3122,13 @@ MeshFormat VulkanReplay::GetPostVSBuffers(uint32_t eventId, uint32_t instID, uin ret.nearPlane = s.nearPlane; ret.farPlane = s.farPlane; + if(instID < s.instData.size()) + { + VulkanPostVSData::InstData inst = s.instData[instID]; + + ret.vertexByteOffset = inst.bufOffset; + ret.numIndices = inst.numVerts; + } + return ret; } diff --git a/renderdoc/driver/vulkan/vk_replay.cpp b/renderdoc/driver/vulkan/vk_replay.cpp index 655a80e7c..a3e851a0f 100644 --- a/renderdoc/driver/vulkan/vk_replay.cpp +++ b/renderdoc/driver/vulkan/vk_replay.cpp @@ -934,6 +934,33 @@ void VulkanReplay::SavePipelineState() m_VulkanPipelineState.tessellation.domainOriginUpperLeft = p.tessellationDomainOrigin == VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT; + // Transform feedback + m_VulkanPipelineState.transformFeedback.buffers.resize(state.xfbbuffers.size()); + for(size_t i = 0; i < state.xfbbuffers.size(); i++) + { + m_VulkanPipelineState.transformFeedback.buffers[i].bufferResourceId = + rm->GetOriginalID(state.xfbbuffers[i].buf); + m_VulkanPipelineState.transformFeedback.buffers[i].byteOffset = state.xfbbuffers[i].offs; + m_VulkanPipelineState.transformFeedback.buffers[i].byteSize = state.xfbbuffers[i].size; + + m_VulkanPipelineState.transformFeedback.buffers[i].active = false; + m_VulkanPipelineState.transformFeedback.buffers[i].counterBufferResourceId = ResourceId(); + m_VulkanPipelineState.transformFeedback.buffers[i].counterBufferOffset = 0; + + if(i >= state.firstxfbcounter) + { + size_t xfb = i - state.firstxfbcounter; + if(xfb < state.xfbcounters.size()) + { + m_VulkanPipelineState.transformFeedback.buffers[i].active = true; + m_VulkanPipelineState.transformFeedback.buffers[i].counterBufferResourceId = + rm->GetOriginalID(state.xfbcounters[xfb].buf); + m_VulkanPipelineState.transformFeedback.buffers[i].counterBufferOffset = + state.xfbcounters[xfb].offs; + } + } + } + // Viewport/Scissors size_t numViewScissors = p.viewportCount; m_VulkanPipelineState.viewportScissor.viewportScissors.resize(numViewScissors); diff --git a/renderdoc/driver/vulkan/vk_replay.h b/renderdoc/driver/vulkan/vk_replay.h index e45627bac..4ae99aab5 100644 --- a/renderdoc/driver/vulkan/vk_replay.h +++ b/renderdoc/driver/vulkan/vk_replay.h @@ -128,6 +128,12 @@ struct VulkanAMDDrawCallback; struct VulkanPostVSData { + struct InstData + { + uint32_t numVerts = 0; + uint32_t bufOffset = 0; + }; + struct StageData { VkBuffer buf; @@ -140,6 +146,9 @@ struct VulkanPostVSData uint32_t vertStride; uint32_t instStride; + // complex case - expansion per instance + std::vector instData; + uint32_t numViews; bool useIndices; @@ -249,8 +258,9 @@ public: void InitPostVSBuffers(uint32_t eventId); void InitPostVSBuffers(const std::vector &passEvents); + // indicates that EID alias is the same as eventId - void AliasPostVSBuffers(uint32_t eventId, uint32_t alias) { m_PostVSAlias[alias] = eventId; } + void AliasPostVSBuffers(uint32_t eventId, uint32_t alias) { m_PostVS.Alias[alias] = eventId; } void ClearPostVSCache(); MeshFormat GetPostVSBuffers(uint32_t eventId, uint32_t instID, uint32_t viewID, @@ -331,6 +341,9 @@ public: AMDCounters *GetAMDCounters() { return m_pAMDCounters; } private: + void FetchVSOut(uint32_t eventId); + void FetchTessGSOut(uint32_t eventId); + bool RenderTextureInternal(TextureDisplay cfg, VkRenderPassBeginInfo rpbegin, int flags); void CreateTexImageView(VkImageAspectFlags aspectFlags, VkImage liveIm, @@ -573,8 +586,16 @@ private: VkPipeline m_MinMaxResultPipe[3] = {VK_NULL_HANDLE}; } m_Histogram; - std::map m_PostVSData; - std::map m_PostVSAlias; + struct PostVS + { + void Destroy(WrappedVulkan *driver); + + VkQueryPool XFBQueryPool = VK_NULL_HANDLE; + uint32_t XFBQueryPoolSize = 0; + + std::map Data; + std::map Alias; + } m_PostVS; std::vector m_Resources; std::map m_ResourceIdx; diff --git a/renderdoc/driver/vulkan/vk_serialise.cpp b/renderdoc/driver/vulkan/vk_serialise.cpp index 86bba6e2b..343b2c249 100644 --- a/renderdoc/driver/vulkan/vk_serialise.cpp +++ b/renderdoc/driver/vulkan/vk_serialise.cpp @@ -262,7 +262,11 @@ SERIALISE_VK_HANDLES(); \ /* VK_EXT_validation_cache */ \ PNEXT_STRUCT(VK_STRUCTURE_TYPE_SHADER_MODULE_VALIDATION_CACHE_CREATE_INFO_EXT, \ - VkShaderModuleValidationCacheCreateInfoEXT) + VkShaderModuleValidationCacheCreateInfoEXT) \ + \ + /* VK_EXT_transform_feedback */ \ + PNEXT_STRUCT(VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_STREAM_CREATE_INFO_EXT, \ + VkPipelineRasterizationStateStreamCreateInfoEXT) template static void SerialiseNext(SerialiserType &ser, VkStructureType &sType, const void *&pNext) @@ -2560,6 +2564,23 @@ void DoSerialise(SerialiserType &ser, VkShaderModuleValidationCacheCreateInfoEXT // SERIALISE_MEMBER(validationCache); } +template +void DoSerialise(SerialiserType &ser, VkPipelineRasterizationStateStreamCreateInfoEXT &el) +{ + RDCASSERT(ser.IsReading() || + el.sType == VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_STREAM_CREATE_INFO_EXT); + SerialiseNext(ser, el.sType, el.pNext); + + SERIALISE_MEMBER_TYPED(VkFlagWithNoBits, flags); + SERIALISE_MEMBER(rasterizationStream); +} + +template <> +void Deserialise(const VkPipelineRasterizationStateStreamCreateInfoEXT &el) +{ + DeserialiseNext(el.pNext); +} + template void DoSerialise(SerialiserType &ser, VkAttachmentDescription2KHR &el) { diff --git a/renderdoc/driver/vulkan/vk_state.cpp b/renderdoc/driver/vulkan/vk_state.cpp index ed92d1160..8f0f8def7 100644 --- a/renderdoc/driver/vulkan/vk_state.cpp +++ b/renderdoc/driver/vulkan/vk_state.cpp @@ -123,6 +123,50 @@ void VulkanRenderState::BeginRenderPassAndApplyState(VkCommandBuffer cmd, Pipeli UnwrapPtr(GetResourceManager()->GetCurrentHandle(vbuffers[i].buf)), &vbuffers[i].offs); } + + for(size_t i = 0; i < xfbbuffers.size(); i++) + { + if(xfbbuffers[i].buf == ResourceId()) + continue; + + ObjDisp(cmd)->CmdBindTransformFeedbackBuffersEXT( + Unwrap(cmd), (uint32_t)i, 1, + UnwrapPtr(GetResourceManager()->GetCurrentHandle(xfbbuffers[i].buf)), + &xfbbuffers[i].offs, &xfbbuffers[i].size); + } + + if(!xfbcounters.empty()) + { + std::vector buffers; + std::vector offsets; + + for(size_t i = 0; i < xfbcounters.size(); i++) + { + buffers.push_back(Unwrap(GetResourceManager()->GetCurrentHandle(xfbcounters[i].buf))); + offsets.push_back(xfbcounters[i].offs); + } + + ObjDisp(cmd)->CmdBeginTransformFeedbackEXT( + Unwrap(cmd), firstxfbcounter, (uint32_t)xfbcounters.size(), buffers.data(), offsets.data()); + } +} + +void VulkanRenderState::EndTransformFeedback(VkCommandBuffer cmd) +{ + if(!xfbcounters.empty()) + { + std::vector buffers; + std::vector offsets; + + for(size_t i = 0; i < xfbcounters.size(); i++) + { + buffers.push_back(Unwrap(GetResourceManager()->GetCurrentHandle(xfbcounters[i].buf))); + offsets.push_back(xfbcounters[i].offs); + } + + ObjDisp(cmd)->CmdEndTransformFeedbackEXT( + Unwrap(cmd), firstxfbcounter, (uint32_t)xfbcounters.size(), buffers.data(), offsets.data()); + } } void VulkanRenderState::BindPipeline(VkCommandBuffer cmd, PipelineBinding binding, bool subpass0) diff --git a/renderdoc/driver/vulkan/vk_state.h b/renderdoc/driver/vulkan/vk_state.h index 4005070b4..a364c5b40 100644 --- a/renderdoc/driver/vulkan/vk_state.h +++ b/renderdoc/driver/vulkan/vk_state.h @@ -44,6 +44,7 @@ struct VulkanRenderState VulkanRenderState(WrappedVulkan *driver, VulkanCreationInfo *createInfo); VulkanRenderState &operator=(const VulkanRenderState &o); void BeginRenderPassAndApplyState(VkCommandBuffer cmd, PipelineBinding binding); + void EndTransformFeedback(VkCommandBuffer cmd); void EndRenderPass(VkCommandBuffer cmd); void BindPipeline(VkCommandBuffer cmd, PipelineBinding binding, bool subpass0); @@ -103,6 +104,22 @@ struct VulkanRenderState }; vector vbuffers; + struct XFBBuffer + { + ResourceId buf; + VkDeviceSize offs; + VkDeviceSize size; + }; + vector xfbbuffers; + + struct XFBCounter + { + ResourceId buf; + VkDeviceSize offs; + }; + uint32_t firstxfbcounter = 0; + vector xfbcounters; + VulkanResourceManager *GetResourceManager(); VulkanCreationInfo *m_CreationInfo; WrappedVulkan *m_pDriver; diff --git a/renderdoc/driver/vulkan/vk_stringise.cpp b/renderdoc/driver/vulkan/vk_stringise.cpp index c40d23f53..95a5bd6ed 100644 --- a/renderdoc/driver/vulkan/vk_stringise.cpp +++ b/renderdoc/driver/vulkan/vk_stringise.cpp @@ -28,7 +28,7 @@ template <> std::string DoStringise(const VulkanChunk &el) { - RDCCOMPILE_ASSERT((uint32_t)VulkanChunk::Max == 1122, "Chunks changed without updating names"); + RDCCOMPILE_ASSERT((uint32_t)VulkanChunk::Max == 1128, "Chunks changed without updating names"); BEGIN_ENUM_STRINGISE(VulkanChunk) { @@ -154,6 +154,12 @@ std::string DoStringise(const VulkanChunk &el) STRINGISE_ENUM_CLASS(vkCmdBeginRenderPass2KHR); STRINGISE_ENUM_CLASS(vkCmdNextSubpass2KHR); STRINGISE_ENUM_CLASS(vkCmdEndRenderPass2KHR); + STRINGISE_ENUM_CLASS(vkCmdBindTransformFeedbackBuffersEXT) + STRINGISE_ENUM_CLASS(vkCmdBeginTransformFeedbackEXT) + STRINGISE_ENUM_CLASS(vkCmdEndTransformFeedbackEXT) + STRINGISE_ENUM_CLASS(vkCmdBeginQueryIndexedEXT) + STRINGISE_ENUM_CLASS(vkCmdEndQueryIndexedEXT) + STRINGISE_ENUM_CLASS(vkCmdDrawIndirectByteCountEXT) STRINGISE_ENUM_CLASS_NAMED(Max, "Max Chunk"); } END_ENUM_STRINGISE() diff --git a/renderdoc/driver/vulkan/wrappers/vk_cmd_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_cmd_funcs.cpp index 0bfadc438..7fb30cf5d 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_cmd_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_cmd_funcs.cpp @@ -661,6 +661,7 @@ bool WrappedVulkan::Serialise_vkBeginCommandBuffer(SerialiserType &ser, VkComman m_Partial[p].partialParent = BakedCommandBuffer; m_Partial[p].baseEvent = it->baseEvent; m_Partial[p].renderPassActive = false; + m_RenderState.xfbcounters.clear(); rerecord = true; partial = true; @@ -902,6 +903,12 @@ bool WrappedVulkan::Serialise_vkEndCommandBuffer(SerialiserType &ser, VkCommandB BakedCommandBuffer); #endif + if(m_Partial[Primary].partialParent == BakedCommandBuffer && + !m_RenderState.xfbcounters.empty()) + { + m_RenderState.EndTransformFeedback(commandBuffer); + } + // finish any render pass that was still active in the primary partial parent if(m_Partial[Primary].partialParent == BakedCommandBuffer && m_Partial[Primary].renderPassActive) @@ -4276,6 +4283,388 @@ void WrappedVulkan::vkCmdSetDeviceMask(VkCommandBuffer commandBuffer, uint32_t d } } +template +bool WrappedVulkan::Serialise_vkCmdBindTransformFeedbackBuffersEXT( + SerialiserType &ser, VkCommandBuffer commandBuffer, uint32_t firstBinding, uint32_t bindingCount, + const VkBuffer *pBuffers, const VkDeviceSize *pOffsets, const VkDeviceSize *pSizes) +{ + SERIALISE_ELEMENT(commandBuffer); + SERIALISE_ELEMENT(firstBinding); + SERIALISE_ELEMENT(bindingCount); + SERIALISE_ELEMENT_ARRAY(pBuffers, bindingCount); + SERIALISE_ELEMENT_ARRAY(pOffsets, bindingCount); + SERIALISE_ELEMENT_ARRAY(pSizes, bindingCount); + + Serialise_DebugMessages(ser); + + SERIALISE_CHECK_READ_ERRORS(); + + if(IsReplayingAndReading()) + { + m_LastCmdBufferID = GetResourceManager()->GetOriginalID(GetResID(commandBuffer)); + + if(IsActiveReplaying(m_State)) + { + if(InRerecordRange(m_LastCmdBufferID)) + { + commandBuffer = RerecordCmdBuf(m_LastCmdBufferID); + ObjDisp(commandBuffer) + ->CmdBindTransformFeedbackBuffersEXT(Unwrap(commandBuffer), firstBinding, bindingCount, + UnwrapArray(pBuffers, bindingCount), pOffsets, + pSizes); + + if(ShouldUpdateRenderState(m_LastCmdBufferID)) + { + if(m_RenderState.xfbbuffers.size() < firstBinding + bindingCount) + m_RenderState.xfbbuffers.resize(firstBinding + bindingCount); + + for(uint32_t i = 0; i < bindingCount; i++) + { + m_RenderState.xfbbuffers[firstBinding + i].buf = GetResID(pBuffers[i]); + m_RenderState.xfbbuffers[firstBinding + i].offs = pOffsets[i]; + m_RenderState.xfbbuffers[firstBinding + i].size = pSizes[i]; + } + } + } + } + else + { + // track while reading, as we need to track resource usage + if(m_BakedCmdBufferInfo[m_LastCmdBufferID].state.xfbbuffers.size() < firstBinding + bindingCount) + m_BakedCmdBufferInfo[m_LastCmdBufferID].state.xfbbuffers.resize(firstBinding + bindingCount); + + for(uint32_t i = 0; i < bindingCount; i++) + m_BakedCmdBufferInfo[m_LastCmdBufferID].state.xfbbuffers[firstBinding + i] = + GetResID(pBuffers[i]); + + ObjDisp(commandBuffer) + ->CmdBindTransformFeedbackBuffersEXT(Unwrap(commandBuffer), firstBinding, bindingCount, + UnwrapArray(pBuffers, bindingCount), pOffsets, pSizes); + } + } + + return true; +} + +void WrappedVulkan::vkCmdBindTransformFeedbackBuffersEXT(VkCommandBuffer commandBuffer, + uint32_t firstBinding, uint32_t bindingCount, + const VkBuffer *pBuffers, + const VkDeviceSize *pOffsets, + const VkDeviceSize *pSizes) +{ + SCOPED_DBG_SINK(); + + SERIALISE_TIME_CALL(ObjDisp(commandBuffer) + ->CmdBindTransformFeedbackBuffersEXT( + Unwrap(commandBuffer), firstBinding, bindingCount, + UnwrapArray(pBuffers, bindingCount), pOffsets, pSizes)); + + if(IsCaptureMode(m_State)) + { + VkResourceRecord *record = GetRecord(commandBuffer); + + CACHE_THREAD_SERIALISER(); + + SCOPED_SERIALISE_CHUNK(VulkanChunk::vkCmdBindTransformFeedbackBuffersEXT); + Serialise_vkCmdBindTransformFeedbackBuffersEXT(ser, commandBuffer, firstBinding, bindingCount, + pBuffers, pOffsets, pSizes); + + record->AddChunk(scope.Get()); + for(uint32_t i = 0; i < bindingCount; i++) + { + record->MarkResourceFrameReferenced(GetResID(pBuffers[i]), eFrameRef_Read); + record->MarkResourceFrameReferenced(GetRecord(pBuffers[i])->baseResource, eFrameRef_Read); + if(GetRecord(pBuffers[i])->sparseInfo) + record->cmdInfo->sparse.insert(GetRecord(pBuffers[i])->sparseInfo); + } + } +} + +template +bool WrappedVulkan::Serialise_vkCmdBeginTransformFeedbackEXT( + SerialiserType &ser, VkCommandBuffer commandBuffer, uint32_t firstBuffer, uint32_t bufferCount, + const VkBuffer *pCounterBuffers, const VkDeviceSize *pCounterBufferOffsets) +{ + SERIALISE_ELEMENT(commandBuffer); + SERIALISE_ELEMENT(firstBuffer); + SERIALISE_ELEMENT(bufferCount); + SERIALISE_ELEMENT_ARRAY(pCounterBuffers, bufferCount); + SERIALISE_ELEMENT_ARRAY(pCounterBufferOffsets, bufferCount); + + Serialise_DebugMessages(ser); + + SERIALISE_CHECK_READ_ERRORS(); + + if(IsReplayingAndReading()) + { + m_LastCmdBufferID = GetResourceManager()->GetOriginalID(GetResID(commandBuffer)); + + if(IsActiveReplaying(m_State)) + { + if(InRerecordRange(m_LastCmdBufferID)) + { + commandBuffer = RerecordCmdBuf(m_LastCmdBufferID); + + // only if we're partially recording do we update this state + if(ShouldUpdateRenderState(m_LastCmdBufferID)) + { + m_RenderState.firstxfbcounter = firstBuffer; + m_RenderState.xfbcounters.resize(bufferCount); + + for(uint32_t i = 0; i < bufferCount; i++) + { + m_RenderState.xfbcounters[i].buf = + pCounterBuffers ? GetResID(pCounterBuffers[i]) : ResourceId(); + m_RenderState.xfbcounters[i].offs = pCounterBufferOffsets ? pCounterBufferOffsets[i] : 0; + } + } + + ObjDisp(commandBuffer) + ->CmdBeginTransformFeedbackEXT(Unwrap(commandBuffer), firstBuffer, bufferCount, + UnwrapArray(pCounterBuffers, bufferCount), + pCounterBufferOffsets); + } + } + else + { + ObjDisp(commandBuffer) + ->CmdBeginTransformFeedbackEXT(Unwrap(commandBuffer), firstBuffer, bufferCount, + UnwrapArray(pCounterBuffers, bufferCount), + pCounterBufferOffsets); + + // track while reading, for fetching the right set of outputs in AddDrawcall + m_BakedCmdBufferInfo[m_LastCmdBufferID].state.xfbfirst = firstBuffer; + m_BakedCmdBufferInfo[m_LastCmdBufferID].state.xfbcount = bufferCount; + } + } + + return true; +} + +void WrappedVulkan::vkCmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, + uint32_t firstBuffer, uint32_t bufferCount, + const VkBuffer *pCounterBuffers, + const VkDeviceSize *pCounterBufferOffsets) +{ + SCOPED_DBG_SINK(); + + SERIALISE_TIME_CALL(ObjDisp(commandBuffer) + ->CmdBeginTransformFeedbackEXT( + Unwrap(commandBuffer), firstBuffer, bufferCount, + UnwrapArray(pCounterBuffers, bufferCount), pCounterBufferOffsets)); + + if(IsCaptureMode(m_State)) + { + VkResourceRecord *record = GetRecord(commandBuffer); + + CACHE_THREAD_SERIALISER(); + ser.SetDrawChunk(); + SCOPED_SERIALISE_CHUNK(VulkanChunk::vkCmdBeginTransformFeedbackEXT); + Serialise_vkCmdBeginTransformFeedbackEXT(ser, commandBuffer, firstBuffer, bufferCount, + pCounterBuffers, pCounterBufferOffsets); + + record->AddChunk(scope.Get()); + } +} + +template +bool WrappedVulkan::Serialise_vkCmdEndTransformFeedbackEXT( + SerialiserType &ser, VkCommandBuffer commandBuffer, uint32_t firstBuffer, uint32_t bufferCount, + const VkBuffer *pCounterBuffers, const VkDeviceSize *pCounterBufferOffsets) +{ + SERIALISE_ELEMENT(commandBuffer); + SERIALISE_ELEMENT(firstBuffer); + SERIALISE_ELEMENT(bufferCount); + SERIALISE_ELEMENT_ARRAY(pCounterBuffers, bufferCount); + SERIALISE_ELEMENT_ARRAY(pCounterBufferOffsets, bufferCount); + + Serialise_DebugMessages(ser); + + SERIALISE_CHECK_READ_ERRORS(); + + if(IsReplayingAndReading()) + { + m_LastCmdBufferID = GetResourceManager()->GetOriginalID(GetResID(commandBuffer)); + + if(IsActiveReplaying(m_State)) + { + if(InRerecordRange(m_LastCmdBufferID)) + { + commandBuffer = RerecordCmdBuf(m_LastCmdBufferID); + + // only if we're partially recording do we update this state + if(ShouldUpdateRenderState(m_LastCmdBufferID)) + { + m_RenderState.firstxfbcounter = 0; + m_RenderState.xfbcounters.clear(); + } + + ObjDisp(commandBuffer) + ->CmdEndTransformFeedbackEXT(Unwrap(commandBuffer), firstBuffer, bufferCount, + UnwrapArray(pCounterBuffers, bufferCount), + pCounterBufferOffsets); + } + } + else + { + ObjDisp(commandBuffer) + ->CmdEndTransformFeedbackEXT(Unwrap(commandBuffer), firstBuffer, bufferCount, + UnwrapArray(pCounterBuffers, bufferCount), + pCounterBufferOffsets); + + // track while reading, for fetching the right set of outputs in AddDrawcall + m_BakedCmdBufferInfo[m_LastCmdBufferID].state.xfbfirst = 0; + m_BakedCmdBufferInfo[m_LastCmdBufferID].state.xfbcount = 0; + } + } + + return true; +} + +void WrappedVulkan::vkCmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, + uint32_t firstBuffer, uint32_t bufferCount, + const VkBuffer *pCounterBuffers, + const VkDeviceSize *pCounterBufferOffsets) +{ + SCOPED_DBG_SINK(); + + SERIALISE_TIME_CALL(ObjDisp(commandBuffer) + ->CmdEndTransformFeedbackEXT( + Unwrap(commandBuffer), firstBuffer, bufferCount, + UnwrapArray(pCounterBuffers, bufferCount), pCounterBufferOffsets)); + + if(IsCaptureMode(m_State)) + { + VkResourceRecord *record = GetRecord(commandBuffer); + + CACHE_THREAD_SERIALISER(); + ser.SetDrawChunk(); + SCOPED_SERIALISE_CHUNK(VulkanChunk::vkCmdEndTransformFeedbackEXT); + Serialise_vkCmdEndTransformFeedbackEXT(ser, commandBuffer, firstBuffer, bufferCount, + pCounterBuffers, pCounterBufferOffsets); + + record->AddChunk(scope.Get()); + } +} + +template +bool WrappedVulkan::Serialise_vkCmdBeginQueryIndexedEXT(SerialiserType &ser, + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, uint32_t query, + VkQueryControlFlags flags, uint32_t index) +{ + SERIALISE_ELEMENT(commandBuffer); + SERIALISE_ELEMENT(queryPool); + SERIALISE_ELEMENT(query); + SERIALISE_ELEMENT_TYPED(VkQueryControlFlagBits, flags); + SERIALISE_ELEMENT(index); + + Serialise_DebugMessages(ser); + + SERIALISE_CHECK_READ_ERRORS(); + + if(IsReplayingAndReading()) + { + m_LastCmdBufferID = GetResourceManager()->GetOriginalID(GetResID(commandBuffer)); + + if(IsActiveReplaying(m_State)) + { + if(InRerecordRange(m_LastCmdBufferID)) + commandBuffer = RerecordCmdBuf(m_LastCmdBufferID); + else + commandBuffer = VK_NULL_HANDLE; + } + + if(commandBuffer != VK_NULL_HANDLE) + ObjDisp(commandBuffer) + ->CmdBeginQueryIndexedEXT(Unwrap(commandBuffer), Unwrap(queryPool), query, flags, index); + } + + return true; +} + +void WrappedVulkan::vkCmdBeginQueryIndexedEXT(VkCommandBuffer commandBuffer, VkQueryPool queryPool, + uint32_t query, VkQueryControlFlags flags, + uint32_t index) +{ + SCOPED_DBG_SINK(); + + SERIALISE_TIME_CALL( + ObjDisp(commandBuffer) + ->CmdBeginQueryIndexedEXT(Unwrap(commandBuffer), Unwrap(queryPool), query, flags, index)); + + if(IsCaptureMode(m_State)) + { + VkResourceRecord *record = GetRecord(commandBuffer); + + CACHE_THREAD_SERIALISER(); + + SCOPED_SERIALISE_CHUNK(VulkanChunk::vkCmdBeginQueryIndexedEXT); + Serialise_vkCmdBeginQueryIndexedEXT(ser, commandBuffer, queryPool, query, flags, index); + + record->AddChunk(scope.Get()); + record->MarkResourceFrameReferenced(GetResID(queryPool), eFrameRef_Read); + } +} + +template +bool WrappedVulkan::Serialise_vkCmdEndQueryIndexedEXT(SerialiserType &ser, + VkCommandBuffer commandBuffer, + VkQueryPool queryPool, uint32_t query, + uint32_t index) +{ + SERIALISE_ELEMENT(commandBuffer); + SERIALISE_ELEMENT(queryPool); + SERIALISE_ELEMENT(query); + SERIALISE_ELEMENT(index); + + Serialise_DebugMessages(ser); + + SERIALISE_CHECK_READ_ERRORS(); + + if(IsReplayingAndReading()) + { + m_LastCmdBufferID = GetResourceManager()->GetOriginalID(GetResID(commandBuffer)); + + if(IsActiveReplaying(m_State)) + { + if(InRerecordRange(m_LastCmdBufferID)) + commandBuffer = RerecordCmdBuf(m_LastCmdBufferID); + else + commandBuffer = VK_NULL_HANDLE; + } + + if(commandBuffer != VK_NULL_HANDLE) + ObjDisp(commandBuffer) + ->CmdEndQueryIndexedEXT(Unwrap(commandBuffer), Unwrap(queryPool), query, index); + } + + return true; +} + +void WrappedVulkan::vkCmdEndQueryIndexedEXT(VkCommandBuffer commandBuffer, VkQueryPool queryPool, + uint32_t query, uint32_t index) +{ + SCOPED_DBG_SINK(); + + SERIALISE_TIME_CALL( + ObjDisp(commandBuffer) + ->CmdEndQueryIndexedEXT(Unwrap(commandBuffer), Unwrap(queryPool), query, index)); + + if(IsCaptureMode(m_State)) + { + VkResourceRecord *record = GetRecord(commandBuffer); + + CACHE_THREAD_SERIALISER(); + + SCOPED_SERIALISE_CHUNK(VulkanChunk::vkCmdEndQueryIndexedEXT); + Serialise_vkCmdEndQueryIndexedEXT(ser, commandBuffer, queryPool, query, index); + + record->AddChunk(scope.Get()); + record->MarkResourceFrameReferenced(GetResID(queryPool), eFrameRef_Read); + } +} + INSTANTIATE_FUNCTION_SERIALISED(VkResult, vkCreateCommandPool, VkDevice device, const VkCommandPoolCreateInfo *pCreateInfo, const VkAllocationCallbacks *pAllocator, VkCommandPool *pCommandPool); @@ -4396,4 +4785,22 @@ INSTANTIATE_FUNCTION_SERIALISED(void, vkCmdInsertDebugUtilsLabelEXT, VkCommandBu const VkDebugUtilsLabelEXT *pLabelInfo); INSTANTIATE_FUNCTION_SERIALISED(void, vkCmdSetDeviceMask, VkCommandBuffer commandBuffer, - uint32_t deviceMask); \ No newline at end of file + uint32_t deviceMask); + +INSTANTIATE_FUNCTION_SERIALISED(void, vkCmdBindTransformFeedbackBuffersEXT, + VkCommandBuffer commandBuffer, uint32_t firstBinding, + uint32_t bindingCount, const VkBuffer *pBuffers, + const VkDeviceSize *pOffsets, const VkDeviceSize *pSizes); +INSTANTIATE_FUNCTION_SERIALISED(void, vkCmdBeginTransformFeedbackEXT, VkCommandBuffer commandBuffer, + uint32_t firstBuffer, uint32_t bufferCount, + const VkBuffer *pCounterBuffers, + const VkDeviceSize *pCounterBufferOffsets); +INSTANTIATE_FUNCTION_SERIALISED(void, vkCmdEndTransformFeedbackEXT, VkCommandBuffer commandBuffer, + uint32_t firstBuffer, uint32_t bufferCount, + const VkBuffer *pCounterBuffers, + const VkDeviceSize *pCounterBufferOffsets); +INSTANTIATE_FUNCTION_SERIALISED(void, vkCmdBeginQueryIndexedEXT, VkCommandBuffer commandBuffer, + VkQueryPool queryPool, uint32_t query, VkQueryControlFlags flags, + uint32_t index); +INSTANTIATE_FUNCTION_SERIALISED(void, vkCmdEndQueryIndexedEXT, VkCommandBuffer commandBuffer, + VkQueryPool queryPool, uint32_t query, uint32_t index); \ No newline at end of file diff --git a/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp index 43ab9df43..3aa32fcbc 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp @@ -1080,6 +1080,20 @@ bool WrappedVulkan::Serialise_vkCreateDevice(SerialiserType &ser, VkPhysicalDevi RDCLOG("Enabling VK_MVK_moltenvk"); } + // enable VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME if it's available, to fetch mesh output in + // tessellation/geometry stages + if(supportedExtensions.find(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME) != supportedExtensions.end()) + { + Extensions.push_back(VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME); + RDCLOG("Enabling VK_EXT_transform_feedback_extension"); + } + else + { + RDCWARN( + "VK_EXT_transform_feedback_extension not available, mesh output from " + "geometry/tessellation stages will not be available"); + } + createInfo.enabledLayerCount = (uint32_t)Layers.size(); const char **layerArray = NULL; diff --git a/renderdoc/driver/vulkan/wrappers/vk_draw_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_draw_funcs.cpp index 5487a42ba..0d6ed5655 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_draw_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_draw_funcs.cpp @@ -52,6 +52,7 @@ VkIndirectPatchData WrappedVulkan::FetchIndirectData(VkIndirectPatchType type, case VkIndirectPatchType::DrawIndexedIndirectCount: dataSize = sizeof(VkDrawIndexedIndirectCommand) + (count > 0 ? count - 1 : 0) * stride; break; + case VkIndirectPatchType::DrawIndirectByteCount: dataSize = 4; break; } bufInfo.size = AlignUp16(dataSize); @@ -80,6 +81,9 @@ VkIndirectPatchData WrappedVulkan::FetchIndirectData(VkIndirectPatchType type, dataSize, }; + if(type == VkIndirectPatchType::DrawIndirectByteCount) + buf.srcAccessMask |= VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT; + ObjDisp(commandBuffer) ->CmdPipelineBarrier(Unwrap(commandBuffer), VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, NULL, 1, &buf, 0, NULL); @@ -3069,6 +3073,134 @@ void WrappedVulkan::vkCmdDrawIndexedIndirectCountKHR(VkCommandBuffer commandBuff } } +template +bool WrappedVulkan::Serialise_vkCmdDrawIndirectByteCountEXT( + SerialiserType &ser, VkCommandBuffer commandBuffer, uint32_t instanceCount, + uint32_t firstInstance, VkBuffer counterBuffer, VkDeviceSize counterBufferOffset, + uint32_t counterOffset, uint32_t vertexStride) +{ + SERIALISE_ELEMENT(commandBuffer); + SERIALISE_ELEMENT(instanceCount); + SERIALISE_ELEMENT(firstInstance); + SERIALISE_ELEMENT(counterBuffer); + SERIALISE_ELEMENT(counterBufferOffset); + SERIALISE_ELEMENT(counterOffset); + SERIALISE_ELEMENT(vertexStride); + + Serialise_DebugMessages(ser); + + SERIALISE_CHECK_READ_ERRORS(); + + if(IsReplayingAndReading()) + { + m_LastCmdBufferID = GetResourceManager()->GetOriginalID(GetResID(commandBuffer)); + + // do execution (possibly partial) + if(IsActiveReplaying(m_State)) + { + if(InRerecordRange(m_LastCmdBufferID) && IsDrawInRenderPass()) + { + commandBuffer = RerecordCmdBuf(m_LastCmdBufferID); + + uint32_t eventId = HandlePreCallback(commandBuffer); + + ObjDisp(commandBuffer) + ->CmdDrawIndirectByteCountEXT(Unwrap(commandBuffer), instanceCount, firstInstance, + Unwrap(counterBuffer), counterBufferOffset, counterOffset, + vertexStride); + + if(eventId && m_DrawcallCallback->PostDraw(eventId, commandBuffer)) + { + ObjDisp(commandBuffer) + ->CmdDrawIndirectByteCountEXT(Unwrap(commandBuffer), instanceCount, firstInstance, + Unwrap(counterBuffer), counterBufferOffset, + counterOffset, vertexStride); + m_DrawcallCallback->PostRedraw(eventId, commandBuffer); + } + } + } + else + { + VkIndirectPatchData indirectPatch = + FetchIndirectData(VkIndirectPatchType::DrawIndirectByteCount, commandBuffer, + counterBuffer, counterBufferOffset, 1, vertexStride); + indirectPatch.vertexoffset = counterOffset; + + ObjDisp(commandBuffer) + ->CmdDrawIndirectByteCountEXT(Unwrap(commandBuffer), instanceCount, firstInstance, + Unwrap(counterBuffer), counterBufferOffset, counterOffset, + vertexStride); + + string name = "vkCmdDrawIndirectByteCountEXT"; + + if(!IsDrawInRenderPass()) + { + AddDebugMessage(MessageCategory::Execution, MessageSeverity::High, + MessageSource::IncorrectAPIUse, + "Drawcall in happening outside of render pass, or in secondary command " + "buffer without RENDER_PASS_CONTINUE_BIT"); + } + + DrawcallDescription draw; + + AddEvent(); + + draw.name = name; + draw.instanceOffset = firstInstance; + draw.numInstances = instanceCount; + draw.flags = DrawFlags::Drawcall | DrawFlags::Instanced | DrawFlags::Indirect; + + AddDrawcall(draw, true); + + VulkanDrawcallTreeNode &drawNode = GetDrawcallStack().back()->children.back(); + + drawNode.indirectPatch = indirectPatch; + + drawNode.resourceUsage.push_back(std::make_pair( + GetResID(counterBuffer), EventUsage(drawNode.draw.eventId, ResourceUsage::Indirect))); + + return true; + } + } + + return true; +} + +void WrappedVulkan::vkCmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer, + uint32_t instanceCount, uint32_t firstInstance, + VkBuffer counterBuffer, + VkDeviceSize counterBufferOffset, + uint32_t counterOffset, uint32_t vertexStride) +{ + SCOPED_DBG_SINK(); + + SERIALISE_TIME_CALL(ObjDisp(commandBuffer) + ->CmdDrawIndirectByteCountEXT(Unwrap(commandBuffer), instanceCount, + firstInstance, Unwrap(counterBuffer), + counterBufferOffset, counterOffset, + vertexStride)); + + if(IsCaptureMode(m_State)) + { + VkResourceRecord *record = GetRecord(commandBuffer); + + CACHE_THREAD_SERIALISER(); + + ser.SetDrawChunk(); + SCOPED_SERIALISE_CHUNK(VulkanChunk::vkCmdDrawIndirectByteCountEXT); + Serialise_vkCmdDrawIndirectByteCountEXT(ser, commandBuffer, instanceCount, firstInstance, + counterBuffer, counterBufferOffset, counterOffset, + vertexStride); + + record->AddChunk(scope.Get()); + + record->MarkResourceFrameReferenced(GetResID(counterBuffer), eFrameRef_Read); + record->MarkResourceFrameReferenced(GetRecord(counterBuffer)->baseResource, eFrameRef_Read); + if(GetRecord(counterBuffer)->sparseInfo) + record->cmdInfo->sparse.insert(GetRecord(counterBuffer)->sparseInfo); + } +} + INSTANTIATE_FUNCTION_SERIALISED(void, vkCmdDraw, VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance); @@ -3143,4 +3275,9 @@ INSTANTIATE_FUNCTION_SERIALISED(void, vkCmdDrawIndirectCountKHR, VkCommandBuffer INSTANTIATE_FUNCTION_SERIALISED(void, vkCmdDrawIndexedIndirectCountKHR, VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer, VkDeviceSize countBufferOffset, - uint32_t maxDrawCount, uint32_t stride); \ No newline at end of file + uint32_t maxDrawCount, uint32_t stride); + +INSTANTIATE_FUNCTION_SERIALISED(void, vkCmdDrawIndirectByteCountEXT, VkCommandBuffer commandBuffer, + uint32_t instanceCount, uint32_t firstInstance, + VkBuffer counterBuffer, VkDeviceSize counterBufferOffset, + uint32_t counterOffset, uint32_t vertexStride); \ No newline at end of file diff --git a/renderdoc/driver/vulkan/wrappers/vk_queue_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_queue_funcs.cpp index 0a6a6f3ce..774197ea9 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_queue_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_queue_funcs.cpp @@ -449,6 +449,17 @@ bool WrappedVulkan::PatchIndirectDraw(VkIndirectPatchType type, DrawcallDescript valid = true; } } + else if(type == VkIndirectPatchType::DrawIndirectByteCount) + { + if(argptr && argptr + 4 <= argend) + { + uint32_t *arg = (uint32_t *)argptr; + + draw.numIndices = *arg; + + valid = true; + } + } else if(type == VkIndirectPatchType::DrawIndexedIndirect || type == VkIndirectPatchType::DrawIndexedIndirectCount) { @@ -540,7 +551,8 @@ void WrappedVulkan::InsertDrawsAndRefreshIDs(vector &cmd n.draw.dispatchDimension[1] = args->y; n.draw.dispatchDimension[2] = args->z; } - else if(n.indirectPatch.type == VkIndirectPatchType::DrawIndirect || + else if(n.indirectPatch.type == VkIndirectPatchType::DrawIndirectByteCount || + n.indirectPatch.type == VkIndirectPatchType::DrawIndirect || n.indirectPatch.type == VkIndirectPatchType::DrawIndexedIndirect || n.indirectPatch.type == VkIndirectPatchType::DrawIndirectCount || n.indirectPatch.type == VkIndirectPatchType::DrawIndexedIndirectCount) @@ -598,6 +610,16 @@ void WrappedVulkan::InsertDrawsAndRefreshIDs(vector &cmd { bool valid = PatchIndirectDraw(n.indirectPatch.type, n.draw, ptr, end); + if(n.indirectPatch.type == VkIndirectPatchType::DrawIndirectByteCount) + { + if(n.draw.numIndices > n.indirectPatch.vertexoffset) + n.draw.numIndices -= n.indirectPatch.vertexoffset; + else + n.draw.numIndices = 0; + + n.draw.numIndices /= n.indirectPatch.stride; + } + if(valid) n.draw.name = StringFormat::Fmt("%s(%u) => <%u, %u>", n.draw.name.c_str(), n.indirectPatch.count, diff --git a/renderdoc/replay/renderdoc_serialise.inl b/renderdoc/replay/renderdoc_serialise.inl index e1c63a287..6d3db0ddf 100644 --- a/renderdoc/replay/renderdoc_serialise.inl +++ b/renderdoc/replay/renderdoc_serialise.inl @@ -1924,6 +1924,27 @@ void DoSerialise(SerialiserType &ser, VKPipe::Tessellation &el) SIZE_CHECK(8); } +template +void DoSerialise(SerialiserType &ser, VKPipe::XFBBuffer &el) +{ + SERIALISE_MEMBER(active); + SERIALISE_MEMBER(bufferResourceId); + SERIALISE_MEMBER(byteOffset); + SERIALISE_MEMBER(byteSize); + SERIALISE_MEMBER(counterBufferResourceId); + SERIALISE_MEMBER(counterBufferOffset); + + SIZE_CHECK(16); +} + +template +void DoSerialise(SerialiserType &ser, VKPipe::TransformFeedback &el) +{ + SERIALISE_MEMBER(buffers); + + SIZE_CHECK(16); +} + template void DoSerialise(SerialiserType &ser, VKPipe::ViewportScissor &el) { @@ -2118,7 +2139,7 @@ void DoSerialise(SerialiserType &ser, VKPipe::State &el) SERIALISE_MEMBER(images); - SIZE_CHECK(1344); + SIZE_CHECK(1360); } #pragma endregion Vulkan pipeline state