diff --git a/renderdoc/driver/vulkan/vk_common.cpp b/renderdoc/driver/vulkan/vk_common.cpp index 665424ac8..470960140 100644 --- a/renderdoc/driver/vulkan/vk_common.cpp +++ b/renderdoc/driver/vulkan/vk_common.cpp @@ -380,27 +380,6 @@ bool VkInitParams::IsSupportedVersion(uint64_t ver) return false; } -VkAccessFlags MakeAccessMask(VkImageLayout layout) -{ - switch(layout) - { - case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: - return VkAccessFlags(VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT); - case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: - return VkAccessFlags(VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT); - case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: return VkAccessFlags(VK_ACCESS_TRANSFER_WRITE_BIT); - case VK_IMAGE_LAYOUT_PREINITIALIZED: return VkAccessFlags(VK_ACCESS_HOST_WRITE_BIT); - case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL: - return VkAccessFlags(VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_SHADER_READ_BIT); - case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: - return VkAccessFlags(VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | VK_ACCESS_SHADER_READ_BIT); - case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: return VkAccessFlags(VK_ACCESS_TRANSFER_READ_BIT); - default: break; - } - - return VkAccessFlags(0); -} void SanitiseReplayImageLayout(VkImageLayout &layout) { // we don't replay with present layouts since we don't create actual swapchains. So change any diff --git a/renderdoc/driver/vulkan/vk_common.h b/renderdoc/driver/vulkan/vk_common.h index a1de1096f..6e080bb90 100644 --- a/renderdoc/driver/vulkan/vk_common.h +++ b/renderdoc/driver/vulkan/vk_common.h @@ -89,9 +89,6 @@ BlendOperation MakeBlendOp(VkBlendOp op); StencilOperation MakeStencilOp(VkStencilOp op); rdcstr HumanDriverName(VkDriverId driverId); -// set conservative access bits for this image layout -VkAccessFlags MakeAccessMask(VkImageLayout layout); - void SanitiseOldImageLayout(VkImageLayout &layout); void SanitiseNewImageLayout(VkImageLayout &layout); void SanitiseReplayImageLayout(VkImageLayout &layout); @@ -297,6 +294,28 @@ void UnwrapNextChain(CaptureState state, const char *structName, byte *&tempMem, VkBaseInStructure *infoStruct); void CopyNextChainForPatching(const char *structName, byte *&tempMem, VkBaseInStructure *infoStruct); +template +VkStruct *UnwrapStructAndChain(CaptureState state, byte *&tempMem, const VkStruct *base) +{ + VkBaseInStructure dummy; + dummy.pNext = (const VkBaseInStructure *)base; + + UnwrapNextChain(state, TypeName().c_str(), tempMem, &dummy); + + return (VkStruct *)dummy.pNext; +} + +template +void AppendNextStruct(VkStruct &base, void *newStruct) +{ + VkBaseOutStructure *next = (VkBaseOutStructure *)&base; + + while(next->pNext) + next = next->pNext; + + next->pNext = (VkBaseOutStructure *)newStruct; +} + template const VkBaseInStructure *FindNextStruct(const VkStruct *haystack, VkStructureType needle) { @@ -679,6 +698,13 @@ enum class VulkanChunk : uint32_t vkCmdCopyImageToBuffer2KHR, vkCmdBlitImage2KHR, vkCmdResolveImage2KHR, + vkCmdSetEvent2KHR, + vkCmdResetEvent2KHR, + vkCmdWaitEvents2KHR, + vkCmdPipelineBarrier2KHR, + vkCmdWriteTimestamp2KHR, + vkQueueSubmit2KHR, + vkCmdWriteBufferMarker2AMD, Max, }; @@ -748,6 +774,7 @@ DECLARE_REFLECTION_STRUCT(VkBufferDeviceAddressCreateInfoEXT); DECLARE_REFLECTION_STRUCT(VkBufferDeviceAddressInfo); DECLARE_REFLECTION_STRUCT(VkBufferImageCopy2KHR); DECLARE_REFLECTION_STRUCT(VkBufferMemoryBarrier); +DECLARE_REFLECTION_STRUCT(VkBufferMemoryBarrier2KHR); DECLARE_REFLECTION_STRUCT(VkBufferMemoryRequirementsInfo2); DECLARE_REFLECTION_STRUCT(VkBufferOpaqueCaptureAddressCreateInfo); DECLARE_REFLECTION_STRUCT(VkBufferViewCreateInfo); @@ -756,6 +783,7 @@ DECLARE_REFLECTION_STRUCT(VkCommandBufferAllocateInfo); DECLARE_REFLECTION_STRUCT(VkCommandBufferBeginInfo); DECLARE_REFLECTION_STRUCT(VkCommandBufferInheritanceConditionalRenderingInfoEXT); DECLARE_REFLECTION_STRUCT(VkCommandBufferInheritanceInfo); +DECLARE_REFLECTION_STRUCT(VkCommandBufferSubmitInfoKHR); DECLARE_REFLECTION_STRUCT(VkCommandPoolCreateInfo); DECLARE_REFLECTION_STRUCT(VkComputePipelineCreateInfo); DECLARE_REFLECTION_STRUCT(VkConditionalRenderingBeginInfoEXT); @@ -776,6 +804,7 @@ DECLARE_REFLECTION_STRUCT(VkDebugUtilsObjectTagInfoEXT); DECLARE_REFLECTION_STRUCT(VkDedicatedAllocationBufferCreateInfoNV); DECLARE_REFLECTION_STRUCT(VkDedicatedAllocationImageCreateInfoNV); DECLARE_REFLECTION_STRUCT(VkDedicatedAllocationMemoryAllocateInfoNV); +DECLARE_REFLECTION_STRUCT(VkDependencyInfoKHR); DECLARE_REFLECTION_STRUCT(VkDescriptorPoolCreateInfo); DECLARE_REFLECTION_STRUCT(VkDescriptorPoolInlineUniformBlockCreateInfoEXT); DECLARE_REFLECTION_STRUCT(VkDescriptorSetAllocateInfo); @@ -836,6 +865,7 @@ DECLARE_REFLECTION_STRUCT(VkImageCreateInfo); DECLARE_REFLECTION_STRUCT(VkImageFormatListCreateInfo); DECLARE_REFLECTION_STRUCT(VkImageFormatProperties2); DECLARE_REFLECTION_STRUCT(VkImageMemoryBarrier); +DECLARE_REFLECTION_STRUCT(VkImageMemoryBarrier2KHR); DECLARE_REFLECTION_STRUCT(VkImageMemoryRequirementsInfo2); DECLARE_REFLECTION_STRUCT(VkImagePlaneMemoryRequirementsInfo); DECLARE_REFLECTION_STRUCT(VkImageResolve2KHR); @@ -855,6 +885,7 @@ DECLARE_REFLECTION_STRUCT(VkMappedMemoryRange); DECLARE_REFLECTION_STRUCT(VkMemoryAllocateFlagsInfo); DECLARE_REFLECTION_STRUCT(VkMemoryAllocateInfo); DECLARE_REFLECTION_STRUCT(VkMemoryBarrier); +DECLARE_REFLECTION_STRUCT(VkMemoryBarrier2KHR); DECLARE_REFLECTION_STRUCT(VkMemoryDedicatedAllocateInfo); DECLARE_REFLECTION_STRUCT(VkMemoryDedicatedRequirements); DECLARE_REFLECTION_STRUCT(VkMemoryFdPropertiesKHR); @@ -947,6 +978,7 @@ DECLARE_REFLECTION_STRUCT(VkPhysicalDeviceSubgroupProperties); DECLARE_REFLECTION_STRUCT(VkPhysicalDeviceSubgroupSizeControlFeaturesEXT); DECLARE_REFLECTION_STRUCT(VkPhysicalDeviceSubgroupSizeControlPropertiesEXT); DECLARE_REFLECTION_STRUCT(VkPhysicalDeviceSurfaceInfo2KHR); +DECLARE_REFLECTION_STRUCT(VkPhysicalDeviceSynchronization2FeaturesKHR); DECLARE_REFLECTION_STRUCT(VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT); DECLARE_REFLECTION_STRUCT(VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT); DECLARE_REFLECTION_STRUCT(VkPhysicalDeviceTimelineSemaphoreFeatures); @@ -1022,6 +1054,7 @@ DECLARE_REFLECTION_STRUCT(VkSamplerYcbcrConversionInfo); DECLARE_REFLECTION_STRUCT(VkSemaphoreCreateInfo); DECLARE_REFLECTION_STRUCT(VkSemaphoreGetFdInfoKHR); DECLARE_REFLECTION_STRUCT(VkSemaphoreSignalInfo); +DECLARE_REFLECTION_STRUCT(VkSemaphoreSubmitInfoKHR); DECLARE_REFLECTION_STRUCT(VkSemaphoreTypeCreateInfo); DECLARE_REFLECTION_STRUCT(VkSemaphoreWaitInfo); DECLARE_REFLECTION_STRUCT(VkShaderModuleCreateInfo); @@ -1030,6 +1063,7 @@ DECLARE_REFLECTION_STRUCT(VkSharedPresentSurfaceCapabilitiesKHR); DECLARE_REFLECTION_STRUCT(VkSparseImageFormatProperties2); DECLARE_REFLECTION_STRUCT(VkSparseImageMemoryRequirements2); DECLARE_REFLECTION_STRUCT(VkSubmitInfo); +DECLARE_REFLECTION_STRUCT(VkSubmitInfo2KHR); DECLARE_REFLECTION_STRUCT(VkSubpassBeginInfo); DECLARE_REFLECTION_STRUCT(VkSubpassDependency2); DECLARE_REFLECTION_STRUCT(VkSubpassDescription2); @@ -1071,6 +1105,7 @@ DECLARE_DESERIALISE_TYPE(VkBufferCopy2KHR); DECLARE_DESERIALISE_TYPE(VkBufferCreateInfo); DECLARE_DESERIALISE_TYPE(VkBufferImageCopy2KHR); DECLARE_DESERIALISE_TYPE(VkBufferMemoryBarrier); +DECLARE_DESERIALISE_TYPE(VkBufferMemoryBarrier2KHR); DECLARE_DESERIALISE_TYPE(VkBufferMemoryRequirementsInfo2); DECLARE_DESERIALISE_TYPE(VkBufferOpaqueCaptureAddressCreateInfo); DECLARE_DESERIALISE_TYPE(VkBufferViewCreateInfo); @@ -1079,6 +1114,7 @@ DECLARE_DESERIALISE_TYPE(VkCommandBufferAllocateInfo); DECLARE_DESERIALISE_TYPE(VkCommandBufferBeginInfo); DECLARE_DESERIALISE_TYPE(VkCommandBufferInheritanceConditionalRenderingInfoEXT); DECLARE_DESERIALISE_TYPE(VkCommandBufferInheritanceInfo); +DECLARE_DESERIALISE_TYPE(VkCommandBufferSubmitInfoKHR); DECLARE_DESERIALISE_TYPE(VkCommandPoolCreateInfo); DECLARE_DESERIALISE_TYPE(VkComputePipelineCreateInfo); DECLARE_DESERIALISE_TYPE(VkConditionalRenderingBeginInfoEXT); @@ -1099,6 +1135,7 @@ DECLARE_DESERIALISE_TYPE(VkDebugUtilsObjectTagInfoEXT); DECLARE_DESERIALISE_TYPE(VkDedicatedAllocationBufferCreateInfoNV); DECLARE_DESERIALISE_TYPE(VkDedicatedAllocationImageCreateInfoNV); DECLARE_DESERIALISE_TYPE(VkDedicatedAllocationMemoryAllocateInfoNV); +DECLARE_DESERIALISE_TYPE(VkDependencyInfoKHR); DECLARE_DESERIALISE_TYPE(VkDescriptorPoolCreateInfo); DECLARE_DESERIALISE_TYPE(VkDescriptorPoolInlineUniformBlockCreateInfoEXT); DECLARE_DESERIALISE_TYPE(VkDescriptorSetAllocateInfo); @@ -1158,6 +1195,7 @@ DECLARE_DESERIALISE_TYPE(VkImageCreateInfo); DECLARE_DESERIALISE_TYPE(VkImageFormatListCreateInfo); DECLARE_DESERIALISE_TYPE(VkImageFormatProperties2); DECLARE_DESERIALISE_TYPE(VkImageMemoryBarrier); +DECLARE_DESERIALISE_TYPE(VkImageMemoryBarrier2KHR); DECLARE_DESERIALISE_TYPE(VkImageMemoryRequirementsInfo2); DECLARE_DESERIALISE_TYPE(VkImagePlaneMemoryRequirementsInfo); DECLARE_DESERIALISE_TYPE(VkImageResolve2KHR); @@ -1177,6 +1215,7 @@ DECLARE_DESERIALISE_TYPE(VkMappedMemoryRange); DECLARE_DESERIALISE_TYPE(VkMemoryAllocateFlagsInfo); DECLARE_DESERIALISE_TYPE(VkMemoryAllocateInfo); DECLARE_DESERIALISE_TYPE(VkMemoryBarrier); +DECLARE_DESERIALISE_TYPE(VkMemoryBarrier2KHR); DECLARE_DESERIALISE_TYPE(VkMemoryDedicatedAllocateInfo); DECLARE_DESERIALISE_TYPE(VkMemoryDedicatedRequirements); DECLARE_DESERIALISE_TYPE(VkMemoryFdPropertiesKHR); @@ -1266,6 +1305,7 @@ DECLARE_DESERIALISE_TYPE(VkPhysicalDeviceSubgroupProperties); DECLARE_DESERIALISE_TYPE(VkPhysicalDeviceSubgroupSizeControlFeaturesEXT); DECLARE_DESERIALISE_TYPE(VkPhysicalDeviceSubgroupSizeControlPropertiesEXT); DECLARE_DESERIALISE_TYPE(VkPhysicalDeviceSurfaceInfo2KHR); +DECLARE_DESERIALISE_TYPE(VkPhysicalDeviceSynchronization2FeaturesKHR); DECLARE_DESERIALISE_TYPE(VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT); DECLARE_DESERIALISE_TYPE(VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT); DECLARE_DESERIALISE_TYPE(VkPhysicalDeviceTimelineSemaphoreFeatures); @@ -1338,6 +1378,7 @@ DECLARE_DESERIALISE_TYPE(VkSamplerYcbcrConversionInfo); DECLARE_DESERIALISE_TYPE(VkSemaphoreCreateInfo); DECLARE_DESERIALISE_TYPE(VkSemaphoreGetFdInfoKHR); DECLARE_DESERIALISE_TYPE(VkSemaphoreSignalInfo); +DECLARE_DESERIALISE_TYPE(VkSemaphoreSubmitInfoKHR); DECLARE_DESERIALISE_TYPE(VkSemaphoreTypeCreateInfo); DECLARE_DESERIALISE_TYPE(VkSemaphoreWaitInfo); DECLARE_DESERIALISE_TYPE(VkShaderModuleCreateInfo); @@ -1346,6 +1387,7 @@ DECLARE_DESERIALISE_TYPE(VkSharedPresentSurfaceCapabilitiesKHR); DECLARE_DESERIALISE_TYPE(VkSparseImageFormatProperties2); DECLARE_DESERIALISE_TYPE(VkSparseImageMemoryRequirements2); DECLARE_DESERIALISE_TYPE(VkSubmitInfo); +DECLARE_DESERIALISE_TYPE(VkSubmitInfo2KHR); DECLARE_DESERIALISE_TYPE(VkSubpassBeginInfo); DECLARE_DESERIALISE_TYPE(VkSubpassDependency2); DECLARE_DESERIALISE_TYPE(VkSubpassDescription2); @@ -1518,9 +1560,20 @@ DECLARE_REFLECTION_STRUCT(VkPresentFrameTokenGGP); DECLARE_DESERIALISE_TYPE(VkPresentFrameTokenGGP); #endif +// we add these fake enums so we have a type for type-dispatch in the serialiser. Due to C ABI rules +// the vulkan API doesn't define native 64-bit enums itself +enum VkAccessFlagBits2KHR +{ +}; + +enum VkPipelineStageFlagBits2KHR +{ +}; + // enums DECLARE_REFLECTION_ENUM(VkAccessFlagBits); +DECLARE_REFLECTION_ENUM(VkAccessFlagBits2KHR); DECLARE_REFLECTION_ENUM(VkAcquireProfilingLockFlagBitsKHR); DECLARE_REFLECTION_ENUM(VkAttachmentDescriptionFlagBits); DECLARE_REFLECTION_ENUM(VkAttachmentLoadOp); @@ -1603,6 +1656,7 @@ DECLARE_REFLECTION_ENUM(VkPipelineCreationFeedbackFlagBitsEXT); DECLARE_REFLECTION_ENUM(VkPipelineExecutableStatisticFormatKHR); DECLARE_REFLECTION_ENUM(VkPipelineShaderStageCreateFlagBits); DECLARE_REFLECTION_ENUM(VkPipelineStageFlagBits); +DECLARE_REFLECTION_ENUM(VkPipelineStageFlagBits2KHR); DECLARE_REFLECTION_ENUM(VkPointClippingBehavior); DECLARE_REFLECTION_ENUM(VkPolygonMode); DECLARE_REFLECTION_ENUM(VkPresentModeKHR); @@ -1637,6 +1691,7 @@ DECLARE_REFLECTION_ENUM(VkStencilFaceFlagBits); DECLARE_REFLECTION_ENUM(VkStencilOp); DECLARE_REFLECTION_ENUM(VkStructureType); DECLARE_REFLECTION_ENUM(VkSubgroupFeatureFlagBits); +DECLARE_REFLECTION_ENUM(VkSubmitFlagBitsKHR); DECLARE_REFLECTION_ENUM(VkSubpassContents); DECLARE_REFLECTION_ENUM(VkSubpassDescriptionFlagBits); DECLARE_REFLECTION_ENUM(VkSurfaceCounterFlagBitsEXT); diff --git a/renderdoc/driver/vulkan/vk_core.cpp b/renderdoc/driver/vulkan/vk_core.cpp index 6ef1e10e4..fd4369431 100644 --- a/renderdoc/driver/vulkan/vk_core.cpp +++ b/renderdoc/driver/vulkan/vk_core.cpp @@ -1195,6 +1195,9 @@ static const VkExtensionProperties supportedExtensions[] = { { VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_EXTENSION_NAME, VK_KHR_SWAPCHAIN_MUTABLE_FORMAT_SPEC_VERSION, }, + { + VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, VK_KHR_SYNCHRONIZATION_2_SPEC_VERSION, + }, { VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, VK_KHR_TIMELINE_SEMAPHORE_SPEC_VERSION, }, @@ -3278,6 +3281,24 @@ bool WrappedVulkan::ProcessChunk(ReadSerialiser &ser, VulkanChunk chunk) case VulkanChunk::vkCmdResolveImage2KHR: return Serialise_vkCmdResolveImage2KHR(ser, VK_NULL_HANDLE, NULL); + case VulkanChunk::vkCmdSetEvent2KHR: + return Serialise_vkCmdSetEvent2KHR(ser, VK_NULL_HANDLE, VK_NULL_HANDLE, NULL); + case VulkanChunk::vkCmdResetEvent2KHR: + return Serialise_vkCmdResetEvent2KHR(ser, VK_NULL_HANDLE, VK_NULL_HANDLE, + VK_PIPELINE_STAGE_2_NONE_KHR); + case VulkanChunk::vkCmdWaitEvents2KHR: + return Serialise_vkCmdWaitEvents2KHR(ser, VK_NULL_HANDLE, 0, NULL, NULL); + case VulkanChunk::vkCmdPipelineBarrier2KHR: + return Serialise_vkCmdPipelineBarrier2KHR(ser, VK_NULL_HANDLE, NULL); + case VulkanChunk::vkCmdWriteTimestamp2KHR: + return Serialise_vkCmdWriteTimestamp2KHR(ser, VK_NULL_HANDLE, VK_PIPELINE_STAGE_2_NONE_KHR, + VK_NULL_HANDLE, 0); + case VulkanChunk::vkQueueSubmit2KHR: + return Serialise_vkQueueSubmit2KHR(ser, VK_NULL_HANDLE, 1, NULL, VK_NULL_HANDLE); + case VulkanChunk::vkCmdWriteBufferMarker2AMD: + return Serialise_vkCmdWriteBufferMarker2AMD(ser, VK_NULL_HANDLE, VK_PIPELINE_STAGE_2_NONE_KHR, + VK_NULL_HANDLE, 0, 0); + // chunks that are reserved but not yet serialised case VulkanChunk::vkResetCommandPool: case VulkanChunk::vkCreateDepthTargetView: diff --git a/renderdoc/driver/vulkan/vk_core.h b/renderdoc/driver/vulkan/vk_core.h index f922074fc..a2f7e72ea 100644 --- a/renderdoc/driver/vulkan/vk_core.h +++ b/renderdoc/driver/vulkan/vk_core.h @@ -927,6 +927,10 @@ private: bool PatchIndirectDraw(size_t drawIndex, uint32_t paramStride, VkIndirectPatchType type, DrawcallDescription &draw, byte *&argptr, byte *argend); void InsertDrawsAndRefreshIDs(BakedCmdBufferInfo &cmdBufInfo); + void CaptureQueueSubmit(VkQueue queue, const rdcarray &commandBuffers, + VkFence fence); + void ReplayQueueSubmit(VkQueue queue, VkSubmitInfo2KHR submitInfo); + void DoSubmit(VkQueue queue, VkSubmitInfo2KHR submitInfo); rdcarray m_DrawcallStack; @@ -2021,6 +2025,10 @@ public: IMPLEMENT_FUNCTION_SERIALISED(void, vkCmdWriteBufferMarkerAMD, VkCommandBuffer commandBuffer, VkPipelineStageFlagBits pipelineStage, VkBuffer dstBuffer, VkDeviceSize dstOffset, uint32_t marker); + // VK_KHR_synchronization2 interaction + IMPLEMENT_FUNCTION_SERIALISED(void, vkCmdWriteBufferMarker2AMD, VkCommandBuffer commandBuffer, + VkPipelineStageFlags2KHR stage, VkBuffer dstBuffer, + VkDeviceSize dstOffset, uint32_t marker); // VK_EXT_debug_utils IMPLEMENT_FUNCTION_SERIALISED(VkResult, vkSetDebugUtilsObjectNameEXT, VkDevice device, @@ -2405,4 +2413,25 @@ public: const VkBlitImageInfo2KHR *pBlitImageInfo); IMPLEMENT_FUNCTION_SERIALISED(void, vkCmdResolveImage2KHR, VkCommandBuffer commandBuffer, const VkResolveImageInfo2KHR *pResolveImageInfo); + + // VK_KHR_synchronization2 + IMPLEMENT_FUNCTION_SERIALISED(void, vkCmdSetEvent2KHR, VkCommandBuffer commandBuffer, + VkEvent event, const VkDependencyInfoKHR *pDependencyInfo); + + IMPLEMENT_FUNCTION_SERIALISED(void, vkCmdResetEvent2KHR, VkCommandBuffer commandBuffer, + VkEvent event, VkPipelineStageFlags2KHR stageMask); + + IMPLEMENT_FUNCTION_SERIALISED(void, vkCmdWaitEvents2KHR, VkCommandBuffer commandBuffer, + uint32_t eventCount, const VkEvent *pEvents, + const VkDependencyInfoKHR *pDependencyInfos); + + IMPLEMENT_FUNCTION_SERIALISED(void, vkCmdPipelineBarrier2KHR, VkCommandBuffer commandBuffer, + const VkDependencyInfoKHR *pDependencyInfo); + + IMPLEMENT_FUNCTION_SERIALISED(void, vkCmdWriteTimestamp2KHR, VkCommandBuffer commandBuffer, + VkPipelineStageFlags2KHR stage, VkQueryPool queryPool, + uint32_t query); + + IMPLEMENT_FUNCTION_SERIALISED(VkResult, vkQueueSubmit2KHR, VkQueue queue, uint32_t submitCount, + const VkSubmitInfo2KHR *pSubmits, VkFence fence); }; diff --git a/renderdoc/driver/vulkan/vk_hookset_defs.h b/renderdoc/driver/vulkan/vk_hookset_defs.h index bd4597df9..6b9b54f77 100644 --- a/renderdoc/driver/vulkan/vk_hookset_defs.h +++ b/renderdoc/driver/vulkan/vk_hookset_defs.h @@ -526,7 +526,8 @@ DeclExt(EXT_pipeline_creation_cache_control); \ DeclExt(EXT_private_data); \ DeclExt(EXT_extended_dynamic_state); \ - DeclExt(KHR_copy_commands2); + DeclExt(KHR_copy_commands2); \ + DeclExt(KHR_synchronization2); // for simplicity and since the check itself is platform agnostic, // these aren't protected in platform defines @@ -625,7 +626,8 @@ CheckExt(EXT_pipeline_creation_cache_control, VKXX); \ CheckExt(EXT_private_data, VKXX); \ CheckExt(EXT_extended_dynamic_state, VKXX); \ - CheckExt(KHR_copy_commands2, VKXX); + CheckExt(KHR_copy_commands2, VKXX); \ + CheckExt(KHR_synchronization2, VKXX); #define HookInitVulkanInstanceExts_PhysDev() \ HookInitExtension(KHR_surface, GetPhysicalDeviceSurfaceSupportKHR); \ @@ -859,6 +861,14 @@ HookInitExtension(KHR_copy_commands2, CmdCopyImageToBuffer2KHR); \ HookInitExtension(KHR_copy_commands2, CmdBlitImage2KHR); \ HookInitExtension(KHR_copy_commands2, CmdResolveImage2KHR); \ + HookInitExtension(KHR_synchronization2, CmdSetEvent2KHR); \ + HookInitExtension(KHR_synchronization2, CmdResetEvent2KHR); \ + HookInitExtension(KHR_synchronization2, CmdWaitEvents2KHR); \ + HookInitExtension(KHR_synchronization2, CmdPipelineBarrier2KHR); \ + HookInitExtension(KHR_synchronization2, CmdWriteTimestamp2KHR); \ + HookInitExtension(KHR_synchronization2, QueueSubmit2KHR); \ + HookInitExtension(KHR_synchronization2 &&AMD_buffer_marker, CmdWriteBufferMarker2AMD); \ + /* No GetQueueCheckpointData2NV without VK_NV_device_diagnostic_checkpoints */ \ HookInitExtension_Device_Win32(); \ HookInitExtension_Device_Linux(); \ HookInitExtension_Device_GGP(); \ @@ -1527,6 +1537,21 @@ const VkBlitImageInfo2KHR *, pBlitImageInfo); \ HookDefine2(void, vkCmdResolveImage2KHR, VkCommandBuffer, commandBuffer, \ const VkResolveImageInfo2KHR *, pResolveImageInfo); \ + HookDefine3(void, vkCmdSetEvent2KHR, VkCommandBuffer, commandBuffer, VkEvent, event, \ + const VkDependencyInfoKHR *, pDependencyInfo); \ + HookDefine3(void, vkCmdResetEvent2KHR, VkCommandBuffer, commandBuffer, VkEvent, event, \ + VkPipelineStageFlags2KHR, stageMask); \ + HookDefine4(void, vkCmdWaitEvents2KHR, VkCommandBuffer, commandBuffer, uint32_t, eventCount, \ + const VkEvent *, pEvents, const VkDependencyInfoKHR *, pDependencyInfos); \ + HookDefine2(void, vkCmdPipelineBarrier2KHR, VkCommandBuffer, commandBuffer, \ + const VkDependencyInfoKHR *, pDependencyInfo); \ + HookDefine4(void, vkCmdWriteTimestamp2KHR, VkCommandBuffer, commandBuffer, \ + VkPipelineStageFlags2KHR, stage, VkQueryPool, queryPool, uint32_t, query); \ + HookDefine4(VkResult, vkQueueSubmit2KHR, VkQueue, queue, uint32_t, submitCount, \ + const VkSubmitInfo2KHR *, pSubmits, VkFence, fence); \ + HookDefine5(void, vkCmdWriteBufferMarker2AMD, VkCommandBuffer, commandBuffer, \ + VkPipelineStageFlags2KHR, stage, VkBuffer, dstBuffer, VkDeviceSize, dstOffset, \ + uint32_t, marker); \ HookDefine_Win32(); \ HookDefine_Linux(); \ HookDefine_GGP(); \ diff --git a/renderdoc/driver/vulkan/vk_manager.cpp b/renderdoc/driver/vulkan/vk_manager.cpp index 4c860ae6e..b01b235f0 100644 --- a/renderdoc/driver/vulkan/vk_manager.cpp +++ b/renderdoc/driver/vulkan/vk_manager.cpp @@ -213,6 +213,11 @@ void VulkanResourceManager::RecordBarriers(rdcarrayid : GetResID(t.image); if(id == ResourceId()) @@ -248,6 +253,30 @@ void VulkanResourceManager::RecordBarriers(rdcarray &states, + uint32_t queueFamilyIndex, uint32_t numBarriers, + const VkImageMemoryBarrier2KHR *barriers) +{ + rdcarray downcast; + downcast.reserve(numBarriers); + VkImageMemoryBarrier b = {}; + b.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + for(uint32_t i = 0; i < numBarriers; i++) + { + // just truncate, the lower bits all match + b.srcAccessMask = uint32_t(barriers[i].srcAccessMask); + b.dstAccessMask = uint32_t(barriers[i].dstAccessMask); + b.oldLayout = barriers[i].oldLayout; + b.newLayout = barriers[i].newLayout; + b.srcQueueFamilyIndex = barriers[i].srcQueueFamilyIndex; + b.dstQueueFamilyIndex = barriers[i].dstQueueFamilyIndex; + b.image = barriers[i].image; + b.subresourceRange = barriers[i].subresourceRange; + downcast.push_back(b); + } + RecordBarriers(states, queueFamilyIndex, (uint32_t)downcast.size(), downcast.data()); +} + void VulkanResourceManager::MergeBarriers(rdcarray> &dststates, rdcarray> &srcstates) { @@ -792,6 +821,11 @@ void VulkanResourceManager::RecordBarriers(rdcflatmap &s { const VkImageMemoryBarrier &t = barriers[ti]; + // ignore barriers that are do-nothing. Best case this doesn't change our tracking at all and + // worst case this is a KHR_synchronization2 barrier that should not change the layout. + if(t.oldLayout == t.newLayout) + continue; + ResourceId id = IsReplayMode(m_State) ? GetNonDispWrapper(t.image)->id : GetResID(t.image); if(id == ResourceId()) diff --git a/renderdoc/driver/vulkan/vk_manager.h b/renderdoc/driver/vulkan/vk_manager.h index 01a8fb461..116dd5f24 100644 --- a/renderdoc/driver/vulkan/vk_manager.h +++ b/renderdoc/driver/vulkan/vk_manager.h @@ -274,6 +274,13 @@ public: void RecordBarriers(rdcflatmap &states, uint32_t queueFamilyIndex, uint32_t numBarriers, const VkImageMemoryBarrier *barriers); + // we "downcast" to VkImageMemoryBarrier since we don't care about access bits or pipeline stages, + // only layouts, and to date the VkImageMemoryBarrier can represent everything in + // VkImageMemoryBarrier2KHR. This includes new image layouts added (which should only be used if + // the extension is supported). + void RecordBarriers(rdcflatmap &states, uint32_t queueFamilyIndex, + uint32_t numBarriers, const VkImageMemoryBarrier2KHR *barriers); + template void SerialiseImageStates(SerialiserType &ser, std::map &states); diff --git a/renderdoc/driver/vulkan/vk_next_chains.cpp b/renderdoc/driver/vulkan/vk_next_chains.cpp index 1e824e48e..26b73389a 100644 --- a/renderdoc/driver/vulkan/vk_next_chains.cpp +++ b/renderdoc/driver/vulkan/vk_next_chains.cpp @@ -178,6 +178,7 @@ static void AppendModifiedChainedStruct(byte *&tempMem, VkStruct *outputStruct, COPY_STRUCT(VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, VkMemoryAllocateFlagsInfo); \ COPY_STRUCT(VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, VkMemoryAllocateInfo); \ COPY_STRUCT(VK_STRUCTURE_TYPE_MEMORY_BARRIER, VkMemoryBarrier); \ + COPY_STRUCT(VK_STRUCTURE_TYPE_MEMORY_BARRIER_2_KHR, VkMemoryBarrier2KHR); \ COPY_STRUCT(VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, VkMemoryDedicatedRequirements); \ COPY_STRUCT(VK_STRUCTURE_TYPE_MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO, \ VkMemoryOpaqueCaptureAddressAllocateInfo); \ @@ -337,6 +338,8 @@ static void AppendModifiedChainedStruct(byte *&tempMem, VkStruct *outputStruct, VkPhysicalDeviceSubgroupSizeControlFeaturesEXT) \ COPY_STRUCT(VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT, \ VkPhysicalDeviceSubgroupSizeControlPropertiesEXT) \ + COPY_STRUCT(VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES_KHR, \ + VkPhysicalDeviceSynchronization2FeaturesKHR); \ COPY_STRUCT(VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT, \ VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT); \ COPY_STRUCT(VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT, \ @@ -510,6 +513,8 @@ static void AppendModifiedChainedStruct(byte *&tempMem, VkStruct *outputStruct, UnwrapInPlace(out->image), UnwrapInPlace(out->memory)); \ UNWRAP_STRUCT(VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, VkBufferMemoryBarrier, \ UnwrapInPlace(out->buffer)); \ + UNWRAP_STRUCT(VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2_KHR, VkBufferMemoryBarrier2KHR, \ + UnwrapInPlace(out->buffer)); \ /* VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_EXT aliased by KHR */ \ UNWRAP_STRUCT(VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, VkBufferDeviceAddressInfo, \ UnwrapInPlace(out->buffer)); \ @@ -521,6 +526,8 @@ static void AppendModifiedChainedStruct(byte *&tempMem, VkStruct *outputStruct, UnwrapInPlace(out->commandPool)); \ UNWRAP_STRUCT(VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO, VkCommandBufferInheritanceInfo, \ UnwrapInPlace(out->renderPass), UnwrapInPlace(out->framebuffer)); \ + UNWRAP_STRUCT(VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR, VkCommandBufferSubmitInfoKHR, \ + UnwrapInPlace(out->commandBuffer)); \ UNWRAP_STRUCT(VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT, \ VkConditionalRenderingBeginInfoEXT, UnwrapInPlace(out->buffer)); \ UNWRAP_STRUCT(VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET, VkCopyDescriptorSet, \ @@ -535,6 +542,8 @@ static void AppendModifiedChainedStruct(byte *&tempMem, VkStruct *outputStruct, VkDeviceMemoryOpaqueCaptureAddressInfo, UnwrapInPlace(out->memory)); \ UNWRAP_STRUCT(VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, VkImageMemoryBarrier, \ UnwrapInPlace(out->image)); \ + UNWRAP_STRUCT(VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2_KHR, VkImageMemoryBarrier2KHR, \ + UnwrapInPlace(out->image)); \ UNWRAP_STRUCT(VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2, \ VkImageMemoryRequirementsInfo2, UnwrapInPlace(out->image)); \ UNWRAP_STRUCT(VK_STRUCTURE_TYPE_IMAGE_SPARSE_MEMORY_REQUIREMENTS_INFO_2, \ @@ -555,6 +564,8 @@ static void AppendModifiedChainedStruct(byte *&tempMem, VkStruct *outputStruct, UnwrapInPlace(out->conversion)); \ UNWRAP_STRUCT(VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO, VkSemaphoreSignalInfo, \ UnwrapInPlace(out->semaphore)); \ + UNWRAP_STRUCT(VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR, VkSemaphoreSubmitInfoKHR, \ + UnwrapInPlace(out->semaphore)); \ UNWRAP_STRUCT_CAPTURE_ONLY(VK_STRUCTURE_TYPE_ACQUIRE_NEXT_IMAGE_INFO_KHR, \ VkAcquireNextImageInfoKHR, UnwrapInPlace(out->swapchain), \ UnwrapInPlace(out->semaphore), UnwrapInPlace(out->fence)); \ @@ -613,6 +624,7 @@ static void AppendModifiedChainedStruct(byte *&tempMem, VkStruct *outputStruct, case VK_STRUCTURE_TYPE_ACQUIRE_PROFILING_LOCK_INFO_KHR: \ case VK_STRUCTURE_TYPE_BIND_ACCELERATION_STRUCTURE_MEMORY_INFO_NV: \ case VK_STRUCTURE_TYPE_CHECKPOINT_DATA_NV: \ + case VK_STRUCTURE_TYPE_CHECKPOINT_DATA_2_NV: \ case VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_RENDER_PASS_TRANSFORM_INFO_QCOM: \ case VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_NV: \ case VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_INFO_KHR: \ @@ -712,6 +724,7 @@ static void AppendModifiedChainedStruct(byte *&tempMem, VkStruct *outputStruct, case VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_W_SCALING_STATE_CREATE_INFO_NV: \ case VK_STRUCTURE_TYPE_QUERY_POOL_PERFORMANCE_QUERY_CREATE_INFO_INTEL: \ case VK_STRUCTURE_TYPE_QUEUE_FAMILY_CHECKPOINT_PROPERTIES_NV: \ + case VK_STRUCTURE_TYPE_QUEUE_FAMILY_CHECKPOINT_PROPERTIES_2_NV: \ case VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR: \ case VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_NV: \ case VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_INTERFACE_CREATE_INFO_KHR: \ @@ -723,17 +736,7 @@ static void AppendModifiedChainedStruct(byte *&tempMem, VkStruct *outputStruct, case VK_STRUCTURE_TYPE_SURFACE_FULL_SCREEN_EXCLUSIVE_WIN32_INFO_EXT: \ case VK_STRUCTURE_TYPE_SWAPCHAIN_DISPLAY_NATIVE_HDR_CREATE_INFO_AMD: \ case VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR: \ - case VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_NV: \ - case VK_STRUCTURE_TYPE_MEMORY_BARRIER_2_KHR: \ - case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2_KHR: \ - case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2_KHR: \ - case VK_STRUCTURE_TYPE_DEPENDENCY_INFO_KHR: \ - case VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR: \ - case VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR: \ - case VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR: \ - case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES_KHR: \ - case VK_STRUCTURE_TYPE_QUEUE_FAMILY_CHECKPOINT_PROPERTIES_2_NV: \ - case VK_STRUCTURE_TYPE_CHECKPOINT_DATA_2_NV: + case VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_NV: size_t GetNextPatchSize(const void *pNext) { @@ -837,6 +840,26 @@ size_t GetNextPatchSize(const void *pNext) memSize += GetNextPatchSize(info->pRegions[i].pNext); break; } + case VK_STRUCTURE_TYPE_DEPENDENCY_INFO_KHR: + { + memSize += sizeof(VkDependencyInfoKHR); + + VkDependencyInfoKHR *info = (VkDependencyInfoKHR *)next; + + memSize += info->memoryBarrierCount * sizeof(VkBufferMemoryBarrier2KHR); + for(uint32_t i = 0; i < info->memoryBarrierCount; i++) + memSize += GetNextPatchSize(info->pMemoryBarriers[i].pNext); + + memSize += info->bufferMemoryBarrierCount * sizeof(VkBufferMemoryBarrier2KHR); + for(uint32_t i = 0; i < info->bufferMemoryBarrierCount; i++) + memSize += GetNextPatchSize(info->pBufferMemoryBarriers[i].pNext); + + memSize += info->imageMemoryBarrierCount * sizeof(VkImageMemoryBarrier2KHR); + for(uint32_t i = 0; i < info->imageMemoryBarrierCount; i++) + memSize += GetNextPatchSize(info->pImageMemoryBarriers[i].pNext); + + break; + } case VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO: { memSize += sizeof(VkDescriptorSetAllocateInfo); @@ -955,6 +978,25 @@ size_t GetNextPatchSize(const void *pNext) memSize += info->signalSemaphoreCount * sizeof(VkSemaphore); break; } + case VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR: + { + memSize += sizeof(VkSubmitInfo2KHR); + + VkSubmitInfo2KHR *info = (VkSubmitInfo2KHR *)next; + + memSize += info->waitSemaphoreInfoCount * sizeof(VkSemaphoreSubmitInfoKHR); + for(uint32_t i = 0; i < info->waitSemaphoreInfoCount; i++) + memSize += GetNextPatchSize(info->pWaitSemaphoreInfos[i].pNext); + + memSize += info->commandBufferInfoCount * sizeof(VkCommandBufferSubmitInfoKHR); + for(uint32_t i = 0; i < info->commandBufferInfoCount; i++) + memSize += GetNextPatchSize(info->pCommandBufferInfos[i].pNext); + + memSize += info->signalSemaphoreInfoCount * sizeof(VkSemaphoreSubmitInfoKHR); + for(uint32_t i = 0; i < info->signalSemaphoreInfoCount; i++) + memSize += GetNextPatchSize(info->pSignalSemaphoreInfos[i].pNext); + break; + } case VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET: { memSize += sizeof(VkWriteDescriptorSet); @@ -1426,6 +1468,52 @@ void UnwrapNextChain(CaptureState state, const char *structName, byte *&tempMem, break; } + case VK_STRUCTURE_TYPE_DEPENDENCY_INFO_KHR: + { + const VkDependencyInfoKHR *in = (const VkDependencyInfoKHR *)nextInput; + VkDependencyInfoKHR *out = (VkDependencyInfoKHR *)tempMem; + + // append immediately so tempMem is incremented + AppendModifiedChainedStruct(tempMem, out, nextChainTail); + + // allocate unwrapped arrays + VkMemoryBarrier2KHR *outMemoryBarriers = (VkMemoryBarrier2KHR *)tempMem; + tempMem += sizeof(VkMemoryBarrier2KHR) * in->memoryBarrierCount; + VkBufferMemoryBarrier2KHR *outBufferBarriers = (VkBufferMemoryBarrier2KHR *)tempMem; + tempMem += sizeof(VkBufferMemoryBarrier2KHR) * in->bufferMemoryBarrierCount; + VkImageMemoryBarrier2KHR *outImageBarriers = (VkImageMemoryBarrier2KHR *)tempMem; + tempMem += sizeof(VkImageMemoryBarrier2KHR) * in->imageMemoryBarrierCount; + + *out = *in; + out->pMemoryBarriers = outMemoryBarriers; + out->pBufferMemoryBarriers = outBufferBarriers; + out->pImageMemoryBarriers = outImageBarriers; + + for(uint32_t i = 0; i < in->memoryBarrierCount; i++) + { + outMemoryBarriers[i] = in->pMemoryBarriers[i]; + UnwrapNextChain(state, "VkMemoryBarrier2KHR", tempMem, + (VkBaseInStructure *)&outMemoryBarriers[i]); + } + + for(uint32_t i = 0; i < in->bufferMemoryBarrierCount; i++) + { + outBufferBarriers[i] = in->pBufferMemoryBarriers[i]; + UnwrapInPlace(outBufferBarriers[i].buffer); + UnwrapNextChain(state, "VkBufferMemoryBarrier2KHR", tempMem, + (VkBaseInStructure *)&outBufferBarriers[i]); + } + + for(uint32_t i = 0; i < in->imageMemoryBarrierCount; i++) + { + outImageBarriers[i] = in->pImageMemoryBarriers[i]; + UnwrapInPlace(outImageBarriers[i].image); + UnwrapNextChain(state, "VkImageMemoryBarrier2KHR", tempMem, + (VkBaseInStructure *)&outImageBarriers[i]); + } + + break; + } case VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO: { const VkDescriptorSetAllocateInfo *in = (const VkDescriptorSetAllocateInfo *)nextInput; @@ -1730,6 +1818,51 @@ void UnwrapNextChain(CaptureState state, const char *structName, byte *&tempMem, break; } + case VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR: + { + const VkSubmitInfo2KHR *in = (const VkSubmitInfo2KHR *)nextInput; + VkSubmitInfo2KHR *out = (VkSubmitInfo2KHR *)tempMem; + + // append immediately so tempMem is incremented + AppendModifiedChainedStruct(tempMem, out, nextChainTail); + + // allocate unwrapped arrays + VkSemaphoreSubmitInfoKHR *outWaitSemaphores = (VkSemaphoreSubmitInfoKHR *)tempMem; + tempMem += sizeof(VkSemaphoreSubmitInfoKHR) * in->waitSemaphoreInfoCount; + VkCommandBufferSubmitInfoKHR *outCmdBuffers = (VkCommandBufferSubmitInfoKHR *)tempMem; + tempMem += sizeof(VkCommandBufferSubmitInfoKHR) * in->commandBufferInfoCount; + VkSemaphoreSubmitInfoKHR *outSignalSemaphores = (VkSemaphoreSubmitInfoKHR *)tempMem; + tempMem += sizeof(VkSemaphoreSubmitInfoKHR) * in->signalSemaphoreInfoCount; + + *out = *in; + out->pWaitSemaphoreInfos = outWaitSemaphores; + out->pCommandBufferInfos = outCmdBuffers; + out->pSignalSemaphoreInfos = outSignalSemaphores; + + for(uint32_t i = 0; i < in->waitSemaphoreInfoCount; i++) + { + outWaitSemaphores[i] = in->pWaitSemaphoreInfos[i]; + UnwrapInPlace(outWaitSemaphores[i].semaphore); + UnwrapNextChain(state, "VkSemaphoreSubmitInfoKHR", tempMem, + (VkBaseInStructure *)&outWaitSemaphores[i]); + } + for(uint32_t i = 0; i < in->commandBufferInfoCount; i++) + { + outCmdBuffers[i] = in->pCommandBufferInfos[i]; + UnwrapInPlace(outCmdBuffers[i].commandBuffer); + UnwrapNextChain(state, "VkCommandBufferSubmitInfoKHR", tempMem, + (VkBaseInStructure *)&outCmdBuffers[i]); + } + for(uint32_t i = 0; i < in->signalSemaphoreInfoCount; i++) + { + outSignalSemaphores[i] = in->pSignalSemaphoreInfos[i]; + UnwrapInPlace(outSignalSemaphores[i].semaphore); + UnwrapNextChain(state, "VkSemaphoreSubmitInfoKHR", tempMem, + (VkBaseInStructure *)&outSignalSemaphores[i]); + } + + break; + } case VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET: { const VkWriteDescriptorSet *in = (const VkWriteDescriptorSet *)nextInput; @@ -2023,6 +2156,9 @@ void CopyNextChainForPatching(const char *structName, byte *&tempMem, VkBaseInSt case VK_STRUCTURE_TYPE_COPY_IMAGE_INFO_2_KHR: CopyNextChainedStruct(sizeof(VkCopyImageInfo2KHR), tempMem, nextInput, nextChainTail); break; + case VK_STRUCTURE_TYPE_DEPENDENCY_INFO_KHR: + CopyNextChainedStruct(sizeof(VkDependencyInfoKHR), tempMem, nextInput, nextChainTail); + break; case VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO: CopyNextChainedStruct(sizeof(VkDescriptorSetAllocateInfo), tempMem, nextInput, nextChainTail); break; @@ -2063,6 +2199,9 @@ void CopyNextChainForPatching(const char *structName, byte *&tempMem, VkBaseInSt case VK_STRUCTURE_TYPE_SUBMIT_INFO: CopyNextChainedStruct(sizeof(VkSubmitInfo), tempMem, nextInput, nextChainTail); break; + case VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR: + CopyNextChainedStruct(sizeof(VkSubmitInfo2KHR), tempMem, nextInput, nextChainTail); + break; case VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET: CopyNextChainedStruct(sizeof(VkWriteDescriptorSet), tempMem, nextInput, nextChainTail); break; diff --git a/renderdoc/driver/vulkan/vk_serialise.cpp b/renderdoc/driver/vulkan/vk_serialise.cpp index 3ddfa5e8d..c509b19f9 100644 --- a/renderdoc/driver/vulkan/vk_serialise.cpp +++ b/renderdoc/driver/vulkan/vk_serialise.cpp @@ -158,6 +158,9 @@ DECL_VKFLAG_EXT(VkSwapchainCreate, KHR); DECL_VKFLAG_EMPTY_EXT(VkValidationCacheCreate, EXT); DECL_VKFLAG_EMPTY_EXT(VkPipelineRasterizationDepthClipStateCreate, EXT); DECL_VKFLAG_EXT(VkToolPurpose, EXT); +DECL_VKFLAG_EXT(VkSubmit, KHR); +DECL_VKFLAG_EXT(VkPipelineStage, 2KHR); +DECL_VKFLAG_EXT(VkAccess, 2KHR); // serialise a member as flags - cast to the Bits enum for serialisation so the stringification // picks up the bitfield and doesn't treat it as uint32_t. Then we rename the type back to the base @@ -1053,6 +1056,17 @@ SERIALISE_VK_HANDLES(); PNEXT_STRUCT(VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, VkPresentInfoKHR) \ PNEXT_STRUCT(VK_STRUCTURE_TYPE_ACQUIRE_NEXT_IMAGE_INFO_KHR, VkAcquireNextImageInfoKHR) \ \ + /* VK_KHR_synchronization2 */ \ + PNEXT_STRUCT(VK_STRUCTURE_TYPE_MEMORY_BARRIER_2_KHR, VkMemoryBarrier2KHR) \ + PNEXT_STRUCT(VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2_KHR, VkBufferMemoryBarrier2KHR) \ + PNEXT_STRUCT(VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2_KHR, VkImageMemoryBarrier2KHR) \ + PNEXT_STRUCT(VK_STRUCTURE_TYPE_DEPENDENCY_INFO_KHR, VkDependencyInfoKHR) \ + PNEXT_STRUCT(VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR, VkSubmitInfo2KHR) \ + PNEXT_STRUCT(VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR, VkSemaphoreSubmitInfoKHR) \ + PNEXT_STRUCT(VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR, VkCommandBufferSubmitInfoKHR) \ + PNEXT_STRUCT(VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES_KHR, \ + VkPhysicalDeviceSynchronization2FeaturesKHR) \ + \ /* VK_KHR_timeline_semaphore */ \ PNEXT_STRUCT(VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES, \ VkPhysicalDeviceTimelineSemaphoreFeatures) \ @@ -1224,16 +1238,6 @@ SERIALISE_VK_HANDLES(); /* VK_KHR_ray_query */ \ PNEXT_UNSUPPORTED(VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR) \ \ - /* VK_KHR_synchronization2 */ \ - PNEXT_UNSUPPORTED(VK_STRUCTURE_TYPE_MEMORY_BARRIER_2_KHR) \ - PNEXT_UNSUPPORTED(VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2_KHR) \ - PNEXT_UNSUPPORTED(VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2_KHR) \ - PNEXT_UNSUPPORTED(VK_STRUCTURE_TYPE_DEPENDENCY_INFO_KHR) \ - PNEXT_UNSUPPORTED(VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR) \ - PNEXT_UNSUPPORTED(VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR) \ - PNEXT_UNSUPPORTED(VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR) \ - PNEXT_UNSUPPORTED(VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES_KHR) \ - \ /* VK_NV_clip_space_w_scaling */ \ PNEXT_UNSUPPORTED(VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_W_SCALING_STATE_CREATE_INFO_NV) \ \ @@ -8178,6 +8182,173 @@ void Deserialise(const VkDeviceGroupRenderPassBeginInfo &el) delete[] el.pDeviceRenderAreas; } +template +void DoSerialise(SerialiserType &ser, VkMemoryBarrier2KHR &el) +{ + RDCASSERT(ser.IsReading() || el.sType == VK_STRUCTURE_TYPE_MEMORY_BARRIER_2_KHR); + SerialiseNext(ser, el.sType, el.pNext); + + SERIALISE_MEMBER_VKFLAGS(VkPipelineStageFlags2KHR, srcStageMask); + SERIALISE_MEMBER_VKFLAGS(VkAccessFlags2KHR, srcAccessMask); + SERIALISE_MEMBER_VKFLAGS(VkPipelineStageFlags2KHR, dstStageMask); + SERIALISE_MEMBER_VKFLAGS(VkAccessFlags2KHR, dstAccessMask); +} + +template <> +void Deserialise(const VkMemoryBarrier2KHR &el) +{ + DeserialiseNext(el.pNext); +} + +template +void DoSerialise(SerialiserType &ser, VkBufferMemoryBarrier2KHR &el) +{ + RDCASSERT(ser.IsReading() || el.sType == VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2_KHR); + SerialiseNext(ser, el.sType, el.pNext); + + SERIALISE_MEMBER_VKFLAGS(VkPipelineStageFlags2KHR, srcStageMask); + SERIALISE_MEMBER_VKFLAGS(VkAccessFlags2KHR, srcAccessMask); + SERIALISE_MEMBER_VKFLAGS(VkPipelineStageFlags2KHR, dstStageMask); + SERIALISE_MEMBER_VKFLAGS(VkAccessFlags2KHR, dstAccessMask); + // serialise as signed because then QUEUE_FAMILY_IGNORED is -1 and queue + // family index won't be legitimately larger than 2 billion + SERIALISE_MEMBER_TYPED(int32_t, srcQueueFamilyIndex); + SERIALISE_MEMBER_TYPED(int32_t, dstQueueFamilyIndex); + SERIALISE_MEMBER(buffer); + SERIALISE_MEMBER(offset); + SERIALISE_MEMBER(size); +} + +template <> +void Deserialise(const VkBufferMemoryBarrier2KHR &el) +{ + DeserialiseNext(el.pNext); +} + +template +void DoSerialise(SerialiserType &ser, VkImageMemoryBarrier2KHR &el) +{ + RDCASSERT(ser.IsReading() || el.sType == VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2_KHR); + SerialiseNext(ser, el.sType, el.pNext); + + SERIALISE_MEMBER_VKFLAGS(VkPipelineStageFlags2KHR, srcStageMask); + SERIALISE_MEMBER_VKFLAGS(VkAccessFlags2KHR, srcAccessMask); + SERIALISE_MEMBER_VKFLAGS(VkPipelineStageFlags2KHR, dstStageMask); + SERIALISE_MEMBER_VKFLAGS(VkAccessFlags2KHR, dstAccessMask); + SERIALISE_MEMBER(oldLayout); + SERIALISE_MEMBER(newLayout); + // serialise as signed because then QUEUE_FAMILY_IGNORED is -1 and queue + // family index won't be legitimately larger than 2 billion + SERIALISE_MEMBER_TYPED(int32_t, srcQueueFamilyIndex); + SERIALISE_MEMBER_TYPED(int32_t, dstQueueFamilyIndex); + SERIALISE_MEMBER(image); + SERIALISE_MEMBER(subresourceRange); +} + +template <> +void Deserialise(const VkImageMemoryBarrier2KHR &el) +{ + DeserialiseNext(el.pNext); +} + +template +void DoSerialise(SerialiserType &ser, VkPhysicalDeviceSynchronization2FeaturesKHR &el) +{ + RDCASSERT(ser.IsReading() || + el.sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES_KHR); + SerialiseNext(ser, el.sType, el.pNext); + + SERIALISE_MEMBER(synchronization2); +} + +template <> +void Deserialise(const VkPhysicalDeviceSynchronization2FeaturesKHR &el) +{ + DeserialiseNext(el.pNext); +} + +template +void DoSerialise(SerialiserType &ser, VkSemaphoreSubmitInfoKHR &el) +{ + RDCASSERT(ser.IsReading() || el.sType == VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR); + SerialiseNext(ser, el.sType, el.pNext); + + SERIALISE_MEMBER(semaphore); + SERIALISE_MEMBER(value); + SERIALISE_MEMBER_VKFLAGS(VkPipelineStageFlags2KHR, stageMask); + SERIALISE_MEMBER(deviceIndex); +} + +template <> +void Deserialise(const VkSemaphoreSubmitInfoKHR &el) +{ + DeserialiseNext(el.pNext); +} + +template +void DoSerialise(SerialiserType &ser, VkCommandBufferSubmitInfoKHR &el) +{ + RDCASSERT(ser.IsReading() || el.sType == VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR); + SerialiseNext(ser, el.sType, el.pNext); + + SERIALISE_MEMBER(commandBuffer); + SERIALISE_MEMBER(deviceMask); +} + +template <> +void Deserialise(const VkCommandBufferSubmitInfoKHR &el) +{ + DeserialiseNext(el.pNext); +} + +template +void DoSerialise(SerialiserType &ser, VkSubmitInfo2KHR &el) +{ + RDCASSERT(ser.IsReading() || el.sType == VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR); + SerialiseNext(ser, el.sType, el.pNext); + + SERIALISE_MEMBER_VKFLAGS(VkSubmitFlagsKHR, flags); + SERIALISE_MEMBER(waitSemaphoreInfoCount); + SERIALISE_MEMBER_ARRAY(pWaitSemaphoreInfos, waitSemaphoreInfoCount); + SERIALISE_MEMBER(commandBufferInfoCount); + SERIALISE_MEMBER_ARRAY(pCommandBufferInfos, commandBufferInfoCount); + SERIALISE_MEMBER(signalSemaphoreInfoCount); + SERIALISE_MEMBER_ARRAY(pSignalSemaphoreInfos, signalSemaphoreInfoCount); +} + +template <> +void Deserialise(const VkSubmitInfo2KHR &el) +{ + DeserialiseNext(el.pNext); + delete[] el.pWaitSemaphoreInfos; + delete[] el.pCommandBufferInfos; + delete[] el.pSignalSemaphoreInfos; +} + +template +void DoSerialise(SerialiserType &ser, VkDependencyInfoKHR &el) +{ + RDCASSERT(ser.IsReading() || el.sType == VK_STRUCTURE_TYPE_DEPENDENCY_INFO_KHR); + SerialiseNext(ser, el.sType, el.pNext); + + SERIALISE_MEMBER_VKFLAGS(VkDependencyFlags, dependencyFlags); + SERIALISE_MEMBER(memoryBarrierCount); + SERIALISE_MEMBER_ARRAY(pMemoryBarriers, memoryBarrierCount); + SERIALISE_MEMBER(bufferMemoryBarrierCount); + SERIALISE_MEMBER_ARRAY(pBufferMemoryBarriers, bufferMemoryBarrierCount); + SERIALISE_MEMBER(imageMemoryBarrierCount); + SERIALISE_MEMBER_ARRAY(pImageMemoryBarriers, imageMemoryBarrierCount); +} + +template <> +void Deserialise(const VkDependencyInfoKHR &el) +{ + DeserialiseNext(el.pNext); + delete[] el.pMemoryBarriers; + delete[] el.pBufferMemoryBarriers; + delete[] el.pImageMemoryBarriers; +} + template void DoSerialise(SerialiserType &ser, VkHdrMetadataEXT &el) { @@ -8914,6 +9085,7 @@ INSTANTIATE_SERIALISE_TYPE(VkBufferDeviceAddressCreateInfoEXT); INSTANTIATE_SERIALISE_TYPE(VkBufferDeviceAddressInfo); INSTANTIATE_SERIALISE_TYPE(VkBufferImageCopy2KHR); INSTANTIATE_SERIALISE_TYPE(VkBufferMemoryBarrier); +INSTANTIATE_SERIALISE_TYPE(VkBufferMemoryBarrier2KHR); INSTANTIATE_SERIALISE_TYPE(VkBufferMemoryRequirementsInfo2); INSTANTIATE_SERIALISE_TYPE(VkBufferOpaqueCaptureAddressCreateInfo); INSTANTIATE_SERIALISE_TYPE(VkBufferViewCreateInfo); @@ -8922,6 +9094,7 @@ INSTANTIATE_SERIALISE_TYPE(VkCommandBufferAllocateInfo); INSTANTIATE_SERIALISE_TYPE(VkCommandBufferBeginInfo); INSTANTIATE_SERIALISE_TYPE(VkCommandBufferInheritanceConditionalRenderingInfoEXT); INSTANTIATE_SERIALISE_TYPE(VkCommandBufferInheritanceInfo); +INSTANTIATE_SERIALISE_TYPE(VkCommandBufferSubmitInfoKHR); INSTANTIATE_SERIALISE_TYPE(VkCommandPoolCreateInfo); INSTANTIATE_SERIALISE_TYPE(VkComputePipelineCreateInfo); INSTANTIATE_SERIALISE_TYPE(VkConditionalRenderingBeginInfoEXT); @@ -8942,6 +9115,7 @@ INSTANTIATE_SERIALISE_TYPE(VkDebugUtilsObjectTagInfoEXT); INSTANTIATE_SERIALISE_TYPE(VkDedicatedAllocationBufferCreateInfoNV); INSTANTIATE_SERIALISE_TYPE(VkDedicatedAllocationImageCreateInfoNV); INSTANTIATE_SERIALISE_TYPE(VkDedicatedAllocationMemoryAllocateInfoNV); +INSTANTIATE_SERIALISE_TYPE(VkDependencyInfoKHR); INSTANTIATE_SERIALISE_TYPE(VkDescriptorPoolCreateInfo); INSTANTIATE_SERIALISE_TYPE(VkDescriptorSetAllocateInfo); INSTANTIATE_SERIALISE_TYPE(VkDescriptorSetLayoutBindingFlagsCreateInfo) @@ -9001,6 +9175,7 @@ INSTANTIATE_SERIALISE_TYPE(VkImageCreateInfo); INSTANTIATE_SERIALISE_TYPE(VkImageFormatListCreateInfo); INSTANTIATE_SERIALISE_TYPE(VkImageFormatProperties2); INSTANTIATE_SERIALISE_TYPE(VkImageMemoryBarrier); +INSTANTIATE_SERIALISE_TYPE(VkImageMemoryBarrier2KHR); INSTANTIATE_SERIALISE_TYPE(VkImageMemoryRequirementsInfo2); INSTANTIATE_SERIALISE_TYPE(VkImagePlaneMemoryRequirementsInfo); INSTANTIATE_SERIALISE_TYPE(VkImageResolve2KHR); @@ -9020,6 +9195,7 @@ INSTANTIATE_SERIALISE_TYPE(VkMappedMemoryRange); INSTANTIATE_SERIALISE_TYPE(VkMemoryAllocateFlagsInfo); INSTANTIATE_SERIALISE_TYPE(VkMemoryAllocateInfo); INSTANTIATE_SERIALISE_TYPE(VkMemoryBarrier); +INSTANTIATE_SERIALISE_TYPE(VkMemoryBarrier2KHR); INSTANTIATE_SERIALISE_TYPE(VkMemoryDedicatedAllocateInfo); INSTANTIATE_SERIALISE_TYPE(VkMemoryDedicatedRequirements); INSTANTIATE_SERIALISE_TYPE(VkMemoryFdPropertiesKHR); @@ -9094,8 +9270,8 @@ INSTANTIATE_SERIALISE_TYPE(VkPhysicalDeviceSamplerFilterMinmaxProperties); INSTANTIATE_SERIALISE_TYPE(VkPhysicalDeviceSamplerYcbcrConversionFeatures); INSTANTIATE_SERIALISE_TYPE(VkPhysicalDeviceScalarBlockLayoutFeatures); INSTANTIATE_SERIALISE_TYPE(VkPhysicalDeviceSeparateDepthStencilLayoutsFeaturesKHR); -INSTANTIATE_SERIALISE_TYPE(VkPhysicalDeviceShaderAtomicInt64Features); INSTANTIATE_SERIALISE_TYPE(VkPhysicalDeviceShaderAtomicFloatFeaturesEXT); +INSTANTIATE_SERIALISE_TYPE(VkPhysicalDeviceShaderAtomicInt64Features); INSTANTIATE_SERIALISE_TYPE(VkPhysicalDeviceShaderClockFeaturesKHR); INSTANTIATE_SERIALISE_TYPE(VkPhysicalDeviceShaderCorePropertiesAMD); INSTANTIATE_SERIALISE_TYPE(VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT); @@ -9110,6 +9286,7 @@ INSTANTIATE_SERIALISE_TYPE(VkPhysicalDeviceSubgroupProperties); INSTANTIATE_SERIALISE_TYPE(VkPhysicalDeviceSubgroupSizeControlFeaturesEXT); INSTANTIATE_SERIALISE_TYPE(VkPhysicalDeviceSubgroupSizeControlPropertiesEXT); INSTANTIATE_SERIALISE_TYPE(VkPhysicalDeviceSurfaceInfo2KHR); +INSTANTIATE_SERIALISE_TYPE(VkPhysicalDeviceSynchronization2FeaturesKHR); INSTANTIATE_SERIALISE_TYPE(VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT); INSTANTIATE_SERIALISE_TYPE(VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT); INSTANTIATE_SERIALISE_TYPE(VkPhysicalDeviceTimelineSemaphoreFeatures); @@ -9185,6 +9362,7 @@ INSTANTIATE_SERIALISE_TYPE(VkSamplerYcbcrConversionInfo); INSTANTIATE_SERIALISE_TYPE(VkSemaphoreCreateInfo); INSTANTIATE_SERIALISE_TYPE(VkSemaphoreGetFdInfoKHR); INSTANTIATE_SERIALISE_TYPE(VkSemaphoreSignalInfo); +INSTANTIATE_SERIALISE_TYPE(VkSemaphoreSubmitInfoKHR); INSTANTIATE_SERIALISE_TYPE(VkSemaphoreTypeCreateInfo); INSTANTIATE_SERIALISE_TYPE(VkSemaphoreWaitInfo); INSTANTIATE_SERIALISE_TYPE(VkShaderModuleCreateInfo); @@ -9193,6 +9371,7 @@ INSTANTIATE_SERIALISE_TYPE(VkSharedPresentSurfaceCapabilitiesKHR); INSTANTIATE_SERIALISE_TYPE(VkSparseImageFormatProperties2); INSTANTIATE_SERIALISE_TYPE(VkSparseImageMemoryRequirements2); INSTANTIATE_SERIALISE_TYPE(VkSubmitInfo); +INSTANTIATE_SERIALISE_TYPE(VkSubmitInfo2KHR); INSTANTIATE_SERIALISE_TYPE(VkSubpassBeginInfo); INSTANTIATE_SERIALISE_TYPE(VkSubpassDependency2); INSTANTIATE_SERIALISE_TYPE(VkSubpassDescription2); diff --git a/renderdoc/driver/vulkan/vk_stringise.cpp b/renderdoc/driver/vulkan/vk_stringise.cpp index b09d5c948..8e8f94cd6 100644 --- a/renderdoc/driver/vulkan/vk_stringise.cpp +++ b/renderdoc/driver/vulkan/vk_stringise.cpp @@ -28,7 +28,7 @@ template <> rdcstr DoStringise(const VulkanChunk &el) { - RDCCOMPILE_ASSERT((uint32_t)VulkanChunk::Max == 1159, "Chunks changed without updating names"); + RDCCOMPILE_ASSERT((uint32_t)VulkanChunk::Max == 1166, "Chunks changed without updating names"); BEGIN_ENUM_STRINGISE(VulkanChunk) { @@ -191,6 +191,13 @@ rdcstr DoStringise(const VulkanChunk &el) STRINGISE_ENUM_CLASS(vkCmdCopyImageToBuffer2KHR); STRINGISE_ENUM_CLASS(vkCmdBlitImage2KHR); STRINGISE_ENUM_CLASS(vkCmdResolveImage2KHR); + STRINGISE_ENUM_CLASS(vkCmdSetEvent2KHR); + STRINGISE_ENUM_CLASS(vkCmdResetEvent2KHR); + STRINGISE_ENUM_CLASS(vkCmdWaitEvents2KHR); + STRINGISE_ENUM_CLASS(vkCmdPipelineBarrier2KHR); + STRINGISE_ENUM_CLASS(vkCmdWriteTimestamp2KHR); + STRINGISE_ENUM_CLASS(vkQueueSubmit2KHR); + STRINGISE_ENUM_CLASS(vkCmdWriteBufferMarker2AMD); STRINGISE_ENUM_CLASS_NAMED(Max, "Max Chunk"); } END_ENUM_STRINGISE() @@ -2972,6 +2979,107 @@ rdcstr DoStringise(const VkToolPurposeFlagBitsEXT &el) END_BITFIELD_STRINGISE(); } +template <> +rdcstr DoStringise(const VkSubmitFlagBitsKHR &el) +{ + BEGIN_BITFIELD_STRINGISE(VkSubmitFlagBitsKHR); + { + STRINGISE_BITFIELD_BIT(VK_SUBMIT_PROTECTED_BIT_KHR); + } + END_BITFIELD_STRINGISE(); +} + +template <> +rdcstr DoStringise(const VkPipelineStageFlagBits2KHR &el) +{ + BEGIN_BITFIELD_STRINGISE(VkPipelineStageFlagBits2KHR); + { + STRINGISE_BITFIELD_VALUE(VK_PIPELINE_STAGE_2_NONE_KHR); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_HOST_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_COPY_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_RESOLVE_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_BLIT_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_CLEAR_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_INDEX_INPUT_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_PRE_RASTERIZATION_SHADERS_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_TRANSFORM_FEEDBACK_BIT_EXT); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_CONDITIONAL_RENDERING_BIT_EXT); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_COMMAND_PREPROCESS_BIT_NV); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_SHADING_RATE_IMAGE_BIT_NV); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_NV); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_NV); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_FRAGMENT_DENSITY_PROCESS_BIT_EXT); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_NV); + STRINGISE_BITFIELD_BIT(VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_NV); + } + END_BITFIELD_STRINGISE(); +} + +template <> +rdcstr DoStringise(const VkAccessFlagBits2KHR &el) +{ + BEGIN_BITFIELD_STRINGISE(VkAccessFlagBits2KHR); + { + STRINGISE_BITFIELD_VALUE(VK_ACCESS_2_NONE_KHR); + STRINGISE_BITFIELD_BIT(VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_ACCESS_2_INDEX_READ_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_ACCESS_2_UNIFORM_READ_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_ACCESS_2_SHADER_READ_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_ACCESS_2_SHADER_WRITE_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_ACCESS_2_TRANSFER_READ_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_ACCESS_2_HOST_READ_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_ACCESS_2_HOST_WRITE_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_ACCESS_2_MEMORY_READ_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_ACCESS_2_MEMORY_WRITE_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_ACCESS_2_SHADER_SAMPLED_READ_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_ACCESS_2_SHADER_STORAGE_READ_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT); + STRINGISE_BITFIELD_BIT(VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT); + STRINGISE_BITFIELD_BIT(VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT); + STRINGISE_BITFIELD_BIT(VK_ACCESS_2_CONDITIONAL_RENDERING_READ_BIT_EXT); + STRINGISE_BITFIELD_BIT(VK_ACCESS_2_COMMAND_PREPROCESS_READ_BIT_NV); + STRINGISE_BITFIELD_BIT(VK_ACCESS_2_COMMAND_PREPROCESS_WRITE_BIT_NV); + STRINGISE_BITFIELD_BIT(VK_ACCESS_2_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_ACCESS_2_SHADING_RATE_IMAGE_READ_BIT_NV); + STRINGISE_BITFIELD_BIT(VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR); + STRINGISE_BITFIELD_BIT(VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_NV); + STRINGISE_BITFIELD_BIT(VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_NV); + STRINGISE_BITFIELD_BIT(VK_ACCESS_2_FRAGMENT_DENSITY_MAP_READ_BIT_EXT); + STRINGISE_BITFIELD_BIT(VK_ACCESS_2_COLOR_ATTACHMENT_READ_NONCOHERENT_BIT_EXT); + } + END_BITFIELD_STRINGISE(); +} + template <> rdcstr DoStringise(const VkExtent3D &el) { diff --git a/renderdoc/driver/vulkan/wrappers/vk_cmd_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_cmd_funcs.cpp index ec6c4cdf9..dc2058291 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_cmd_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_cmd_funcs.cpp @@ -789,6 +789,8 @@ bool WrappedVulkan::Serialise_vkBeginCommandBuffer(SerialiserType &ser, VkComman m_BakedCmdBufferInfo[m_LastCmdBufferID].beginFlags = m_BakedCmdBufferInfo[BakedCommandBuffer].beginFlags = BeginInfo.flags; m_BakedCmdBufferInfo[m_LastCmdBufferID].markerCount = 0; + m_BakedCmdBufferInfo[m_LastCmdBufferID].imageStates.clear(); + m_BakedCmdBufferInfo[BakedCommandBuffer].imageStates.clear(); VkCommandBufferBeginInfo unwrappedBeginInfo = BeginInfo; VkCommandBufferInheritanceInfo unwrappedInheritInfo; @@ -3640,6 +3642,243 @@ void WrappedVulkan::vkCmdWriteTimestamp(VkCommandBuffer commandBuffer, } } +template +bool WrappedVulkan::Serialise_vkCmdPipelineBarrier2KHR(SerialiserType &ser, + VkCommandBuffer commandBuffer, + const VkDependencyInfoKHR *pDependencyInfo) +{ + SERIALISE_ELEMENT(commandBuffer); + SERIALISE_ELEMENT_LOCAL(DependencyInfo, *pDependencyInfo); + + Serialise_DebugMessages(ser); + + SERIALISE_CHECK_READ_ERRORS(); + + rdcarray imgBarriers; + rdcarray bufBarriers; + + // it's possible for buffer or image to be NULL if it refers to a resource that is otherwise + // not in the log (barriers do not mark resources referenced). If the resource in question does + // not exist, then it's safe to skip this barrier. + // + // Since it's a convenient place, we unwrap at the same time. + if(IsReplayingAndReading()) + { + m_LastCmdBufferID = GetResourceManager()->GetOriginalID(GetResID(commandBuffer)); + + for(uint32_t i = 0; i < DependencyInfo.bufferMemoryBarrierCount; i++) + { + if(DependencyInfo.pBufferMemoryBarriers[i].buffer != VK_NULL_HANDLE) + { + bufBarriers.push_back(DependencyInfo.pBufferMemoryBarriers[i]); + bufBarriers.back().buffer = Unwrap(bufBarriers.back().buffer); + + RemapQueueFamilyIndices(bufBarriers.back().srcQueueFamilyIndex, + bufBarriers.back().dstQueueFamilyIndex); + + if(IsLoading(m_State)) + { + m_BakedCmdBufferInfo[m_LastCmdBufferID].resourceUsage.push_back(make_rdcpair( + GetResID(DependencyInfo.pBufferMemoryBarriers[i].buffer), + EventUsage(m_BakedCmdBufferInfo[m_LastCmdBufferID].curEventID, ResourceUsage::Barrier))); + } + } + } + + for(uint32_t i = 0; i < DependencyInfo.imageMemoryBarrierCount; i++) + { + if(DependencyInfo.pImageMemoryBarriers[i].image != VK_NULL_HANDLE) + { + imgBarriers.push_back(DependencyInfo.pImageMemoryBarriers[i]); + imgBarriers.back().image = Unwrap(imgBarriers.back().image); + + RemapQueueFamilyIndices(imgBarriers.back().srcQueueFamilyIndex, + imgBarriers.back().dstQueueFamilyIndex); + + if(IsLoading(m_State)) + { + m_BakedCmdBufferInfo[m_LastCmdBufferID].resourceUsage.push_back(make_rdcpair( + GetResID(DependencyInfo.pImageMemoryBarriers[i].image), + EventUsage(m_BakedCmdBufferInfo[m_LastCmdBufferID].curEventID, ResourceUsage::Barrier))); + } + } + } + + VkDependencyInfoKHR UnwrappedDependencyInfo = DependencyInfo; + + UnwrappedDependencyInfo.pBufferMemoryBarriers = bufBarriers.data(); + UnwrappedDependencyInfo.bufferMemoryBarrierCount = (uint32_t)bufBarriers.size(); + UnwrappedDependencyInfo.pImageMemoryBarriers = imgBarriers.data(); + UnwrappedDependencyInfo.imageMemoryBarrierCount = (uint32_t)imgBarriers.size(); + + if(IsActiveReplaying(m_State)) + { + if(InRerecordRange(m_LastCmdBufferID)) + commandBuffer = RerecordCmdBuf(m_LastCmdBufferID); + else + commandBuffer = VK_NULL_HANDLE; + } + else + { + for(uint32_t i = 0; i < DependencyInfo.imageMemoryBarrierCount; i++) + { + const VkImageMemoryBarrier2KHR &b = DependencyInfo.pImageMemoryBarriers[i]; + if(b.image != VK_NULL_HANDLE && b.oldLayout == VK_IMAGE_LAYOUT_UNDEFINED && + b.newLayout != VK_IMAGE_LAYOUT_UNDEFINED) + { + m_BakedCmdBufferInfo[m_LastCmdBufferID].resourceUsage.push_back(make_rdcpair( + GetResID(b.image), EventUsage(m_BakedCmdBufferInfo[m_LastCmdBufferID].curEventID, + ResourceUsage::Discard))); + } + } + } + + if(commandBuffer != VK_NULL_HANDLE) + { + GetResourceManager()->RecordBarriers(m_BakedCmdBufferInfo[m_LastCmdBufferID].imageStates, + FindCommandQueueFamily(m_LastCmdBufferID), + (uint32_t)imgBarriers.size(), imgBarriers.data()); + + // now sanitise layouts before passing to vulkan + for(VkImageMemoryBarrier2KHR &barrier : imgBarriers) + { + if(barrier.oldLayout == barrier.newLayout) + { + barrier.oldLayout = barrier.newLayout = VK_IMAGE_LAYOUT_UNDEFINED; + continue; + } + + if(!IsLoading(m_State) && barrier.oldLayout == VK_IMAGE_LAYOUT_PREINITIALIZED) + { + // This is a transition from PRENITIALIZED, but we've already done this barrier once (when + // loading); Since we couldn't transition back to PREINITIALIZED, we instead left the + // image in GENERAL. + barrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL; + } + else + { + SanitiseReplayImageLayout(barrier.oldLayout); + } + SanitiseReplayImageLayout(barrier.newLayout); + } + + ObjDisp(commandBuffer)->CmdPipelineBarrier2KHR(Unwrap(commandBuffer), &UnwrappedDependencyInfo); + + if(IsActiveReplaying(m_State) && + m_ReplayOptions.optimisation != ReplayOptimisationLevel::Fastest) + { + for(uint32_t i = 0; i < DependencyInfo.imageMemoryBarrierCount; i++) + { + const VkImageMemoryBarrier2KHR &b = DependencyInfo.pImageMemoryBarriers[i]; + if(b.image != VK_NULL_HANDLE && b.oldLayout == VK_IMAGE_LAYOUT_UNDEFINED && + b.newLayout != VK_IMAGE_LAYOUT_UNDEFINED) + { + GetDebugManager()->FillWithDiscardPattern( + commandBuffer, DiscardType::UndefinedTransition, b.image, b.newLayout, + b.subresourceRange, {{0, 0}, {65536, 65536}}); + } + } + } + } + } + + return true; +} + +void WrappedVulkan::vkCmdPipelineBarrier2KHR(VkCommandBuffer commandBuffer, + const VkDependencyInfoKHR *pDependencyInfo) +{ + SCOPED_DBG_SINK(); + + byte *tempMem = GetTempMemory(GetNextPatchSize(pDependencyInfo)); + VkDependencyInfoKHR *unwrappedInfo = UnwrapStructAndChain(m_State, tempMem, pDependencyInfo); + + SERIALISE_TIME_CALL( + ObjDisp(commandBuffer)->CmdPipelineBarrier2KHR(Unwrap(commandBuffer), unwrappedInfo)); + + if(IsCaptureMode(m_State)) + { + VkResourceRecord *record = GetRecord(commandBuffer); + + CACHE_THREAD_SERIALISER(); + + SCOPED_SERIALISE_CHUNK(VulkanChunk::vkCmdPipelineBarrier2KHR); + Serialise_vkCmdPipelineBarrier2KHR(ser, commandBuffer, pDependencyInfo); + + record->AddChunk(scope.Get(&record->cmdInfo->alloc)); + + if(pDependencyInfo->imageMemoryBarrierCount > 0) + { + GetResourceManager()->RecordBarriers( + record->cmdInfo->imageStates, record->pool->cmdPoolInfo->queueFamilyIndex, + pDependencyInfo->imageMemoryBarrierCount, pDependencyInfo->pImageMemoryBarriers); + } + } +} + +template +bool WrappedVulkan::Serialise_vkCmdWriteTimestamp2KHR(SerialiserType &ser, + VkCommandBuffer commandBuffer, + VkPipelineStageFlags2KHR stage, + VkQueryPool queryPool, uint32_t query) +{ + SERIALISE_ELEMENT(commandBuffer); + SERIALISE_ELEMENT_TYPED(VkPipelineStageFlagBits2KHR, stage) + .TypedAs("VkPipelineStageFlags2KHR"_lit); + SERIALISE_ELEMENT(queryPool); + SERIALISE_ELEMENT(query); + + Serialise_DebugMessages(ser); + + SERIALISE_CHECK_READ_ERRORS(); + + if(IsReplayingAndReading()) + { + m_LastCmdBufferID = GetResourceManager()->GetOriginalID(GetResID(commandBuffer)); + + if(IsActiveReplaying(m_State)) + { + if(InRerecordRange(m_LastCmdBufferID)) + commandBuffer = RerecordCmdBuf(m_LastCmdBufferID); + else + commandBuffer = VK_NULL_HANDLE; + } + + if(commandBuffer != VK_NULL_HANDLE) + { + ObjDisp(commandBuffer) + ->CmdWriteTimestamp2KHR(Unwrap(commandBuffer), stage, Unwrap(queryPool), query); + } + } + + return true; +} + +void WrappedVulkan::vkCmdWriteTimestamp2KHR(VkCommandBuffer commandBuffer, + VkPipelineStageFlags2KHR stage, VkQueryPool queryPool, + uint32_t query) +{ + SCOPED_DBG_SINK(); + + SERIALISE_TIME_CALL( + ObjDisp(commandBuffer) + ->CmdWriteTimestamp2KHR(Unwrap(commandBuffer), stage, Unwrap(queryPool), query)); + + if(IsCaptureMode(m_State)) + { + VkResourceRecord *record = GetRecord(commandBuffer); + + CACHE_THREAD_SERIALISER(); + + SCOPED_SERIALISE_CHUNK(VulkanChunk::vkCmdWriteTimestamp2KHR); + Serialise_vkCmdWriteTimestamp2KHR(ser, commandBuffer, stage, queryPool, query); + + record->AddChunk(scope.Get(&record->cmdInfo->alloc)); + + record->MarkResourceFrameReferenced(GetResID(queryPool), eFrameRef_Read); + } +} + template bool WrappedVulkan::Serialise_vkCmdCopyQueryPoolResults( SerialiserType &ser, VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery, @@ -5150,6 +5389,72 @@ void WrappedVulkan::vkCmdWriteBufferMarkerAMD(VkCommandBuffer commandBuffer, } } +template +bool WrappedVulkan::Serialise_vkCmdWriteBufferMarker2AMD(SerialiserType &ser, + VkCommandBuffer commandBuffer, + VkPipelineStageFlags2KHR stage, + VkBuffer dstBuffer, VkDeviceSize dstOffset, + uint32_t marker) +{ + SERIALISE_ELEMENT(commandBuffer); + SERIALISE_ELEMENT_TYPED(VkPipelineStageFlagBits2KHR, stage) + .TypedAs("VkPipelineStageFlags2KHR"_lit); + SERIALISE_ELEMENT(dstBuffer); + SERIALISE_ELEMENT(dstOffset); + SERIALISE_ELEMENT(marker); + + Serialise_DebugMessages(ser); + + SERIALISE_CHECK_READ_ERRORS(); + + if(IsReplayingAndReading()) + { + m_LastCmdBufferID = GetResourceManager()->GetOriginalID(GetResID(commandBuffer)); + + if(IsActiveReplaying(m_State)) + { + if(InRerecordRange(m_LastCmdBufferID)) + commandBuffer = RerecordCmdBuf(m_LastCmdBufferID); + else + commandBuffer = VK_NULL_HANDLE; + } + + if(commandBuffer != VK_NULL_HANDLE) + { + ObjDisp(commandBuffer) + ->CmdWriteBufferMarker2AMD(Unwrap(commandBuffer), stage, Unwrap(dstBuffer), dstOffset, + marker); + } + } + + return true; +} + +void WrappedVulkan::vkCmdWriteBufferMarker2AMD(VkCommandBuffer commandBuffer, + VkPipelineStageFlags2KHR stage, VkBuffer dstBuffer, + VkDeviceSize dstOffset, uint32_t marker) +{ + SCOPED_DBG_SINK(); + + SERIALISE_TIME_CALL(ObjDisp(commandBuffer) + ->CmdWriteBufferMarker2AMD(Unwrap(commandBuffer), stage, + Unwrap(dstBuffer), dstOffset, marker)); + + if(IsCaptureMode(m_State)) + { + VkResourceRecord *record = GetRecord(commandBuffer); + + CACHE_THREAD_SERIALISER(); + + SCOPED_SERIALISE_CHUNK(VulkanChunk::vkCmdWriteBufferMarker2AMD); + Serialise_vkCmdWriteBufferMarker2AMD(ser, commandBuffer, stage, dstBuffer, dstOffset, marker); + + record->AddChunk(scope.Get(&record->cmdInfo->alloc)); + + record->MarkBufferFrameReferenced(GetRecord(dstBuffer), dstOffset, 4, eFrameRef_PartialWrite); + } +} + template bool WrappedVulkan::Serialise_vkCmdBeginDebugUtilsLabelEXT(SerialiserType &ser, VkCommandBuffer commandBuffer, @@ -6053,6 +6358,10 @@ INSTANTIATE_FUNCTION_SERIALISED(void, vkCmdWriteBufferMarkerAMD, VkCommandBuffer VkPipelineStageFlagBits pipelineStage, VkBuffer dstBuffer, VkDeviceSize dstOffset, uint32_t marker); +INSTANTIATE_FUNCTION_SERIALISED(void, vkCmdWriteBufferMarker2AMD, VkCommandBuffer commandBuffer, + VkPipelineStageFlags2KHR stage, VkBuffer dstBuffer, + VkDeviceSize dstOffset, uint32_t marker); + INSTANTIATE_FUNCTION_SERIALISED(void, vkCmdBeginDebugUtilsLabelEXT, VkCommandBuffer commandBuffer, const VkDebugUtilsLabelEXT *pLabelInfo); @@ -6091,3 +6400,10 @@ INSTANTIATE_FUNCTION_SERIALISED(void, vkCmdBindVertexBuffers2EXT, VkCommandBuffe uint32_t firstBinding, uint32_t bindingCount, const VkBuffer *pBuffers, const VkDeviceSize *pOffsets, const VkDeviceSize *pSizes, const VkDeviceSize *pStrides); + +INSTANTIATE_FUNCTION_SERIALISED(void, vkCmdPipelineBarrier2KHR, VkCommandBuffer commandBuffer, + const VkDependencyInfoKHR *pDependencyInfo); + +INSTANTIATE_FUNCTION_SERIALISED(void, vkCmdWriteTimestamp2KHR, VkCommandBuffer commandBuffer, + VkPipelineStageFlags2KHR stage, VkQueryPool queryPool, + uint32_t query); diff --git a/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp index d6b1bdf65..374f0a07e 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp @@ -2697,6 +2697,13 @@ bool WrappedVulkan::Serialise_vkCreateDevice(SerialiserType &ser, VkPhysicalDevi CHECK_PHYS_EXT_FEATURE(workgroupMemoryExplicitLayout16BitAccess); } END_PHYS_EXT_CHECK(); + + BEGIN_PHYS_EXT_CHECK(VkPhysicalDeviceSynchronization2FeaturesKHR, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES_KHR); + { + CHECK_PHYS_EXT_FEATURE(synchronization2); + } + END_PHYS_EXT_CHECK(); } if(availFeatures.depthClamp) diff --git a/renderdoc/driver/vulkan/wrappers/vk_queue_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_queue_funcs.cpp index ea91300b4..a5a361657 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_queue_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_queue_funcs.cpp @@ -175,319 +175,299 @@ void WrappedVulkan::vkGetDeviceQueue(VkDevice device, uint32_t queueFamilyIndex, } } -static void appendChain(VkBaseInStructure *chain, void *item) +void WrappedVulkan::DoSubmit(VkQueue queue, VkSubmitInfo2KHR submitInfo) { - while(chain->pNext != NULL) - chain = (VkBaseInStructure *)chain->pNext; - chain->pNext = (VkBaseInStructure *)item; + if(GetExtensions(NULL).ext_KHR_synchronization2) + { + // if we have KHR_sync2 this is easy! unwrap, add our submit chain, and do it + + byte *tempMem = GetTempMemory(GetNextPatchSize(&submitInfo)); + VkSubmitInfo2KHR *unwrapped = UnwrapStructAndChain(m_State, tempMem, &submitInfo); + AppendNextStruct(*unwrapped, m_SubmitChain); + + // don't submit the fence, since we have nothing to wait on it being signalled, and we + // might not have it correctly in the unsignalled state. + ObjDisp(queue)->QueueSubmit2KHR(Unwrap(queue), 1, unwrapped, VK_NULL_HANDLE); + } + else + { + // otherwise we need to decompose into an original submit + + VkSubmitInfo info = {VK_STRUCTURE_TYPE_SUBMIT_INFO}; + rdcarray commandBuffers; + rdcarray groupMasks; + + VkProtectedSubmitInfo prot = {VK_STRUCTURE_TYPE_PROTECTED_SUBMIT_INFO, NULL, VK_TRUE}; + VkDeviceGroupSubmitInfo group = {VK_STRUCTURE_TYPE_DEVICE_GROUP_SUBMIT_INFO}; + + // we expect the pNext chain to be NULL, as there's nothing we should be replaying that we can + // represent in this decomposed version + RDCASSERTEQUAL((void *)submitInfo.pNext, NULL); + + if(submitInfo.flags & VK_SUBMIT_PROTECTED_BIT_KHR) + { + // we created the protected structure with the flag as TRUE since otherwise we just don't + // chain it on at all + AppendNextStruct(info, &prot); + } + + commandBuffers.resize(submitInfo.commandBufferInfoCount); + for(uint32_t i = 0; i < submitInfo.commandBufferInfoCount; i++) + { + commandBuffers[i] = submitInfo.pCommandBufferInfos[i].commandBuffer; + + if(submitInfo.pCommandBufferInfos[i].deviceMask != 0) + { + groupMasks.resize(submitInfo.commandBufferInfoCount); + groupMasks[i] = submitInfo.pCommandBufferInfos[i].deviceMask; + } + } + + info.commandBufferCount = submitInfo.commandBufferInfoCount; + info.pCommandBuffers = commandBuffers.data(); + + if(!groupMasks.empty()) + { + group.commandBufferCount = info.commandBufferCount; + group.pCommandBufferDeviceMasks = groupMasks.data(); + // if we set up group masks, chain on the struct + AppendNextStruct(info, &group); + } + + byte *tempMem = GetTempMemory(GetNextPatchSize(&info)); + VkSubmitInfo *unwrapped = UnwrapStructAndChain(m_State, tempMem, &info); + AppendNextStruct(*unwrapped, m_SubmitChain); + + // don't submit the fence, since we have nothing to wait on it being signalled, and we + // might not have it correctly in the unsignalled state. + ObjDisp(queue)->QueueSubmit(Unwrap(queue), 1, unwrapped, VK_NULL_HANDLE); + } } -template -bool WrappedVulkan::Serialise_vkQueueSubmit(SerialiserType &ser, VkQueue queue, uint32_t submitCount, - const VkSubmitInfo *pSubmits, VkFence fence) +void WrappedVulkan::ReplayQueueSubmit(VkQueue queue, VkSubmitInfo2KHR submitInfo) { - SERIALISE_ELEMENT(queue); - SERIALISE_ELEMENT(submitCount); - SERIALISE_ELEMENT_ARRAY(pSubmits, submitCount); - SERIALISE_ELEMENT(fence); - - Serialise_DebugMessages(ser); - - SERIALISE_CHECK_READ_ERRORS(); - - if(IsReplayingAndReading()) + if(IsLoading(m_State)) { - // if there are multiple queue submissions in flight, wait for the previous queue to finish - // before executing this, as we don't have the sync information to properly sync. - if(m_PrevQueue != queue) + DoSubmit(queue, submitInfo); + + AddEvent(); + + // we're adding multiple events, need to increment ourselves + m_RootEventID++; + + rdcstr basename = StringFormat::Fmt("vkQueueSubmit(%u)", submitInfo.commandBufferInfoCount); + + for(uint32_t c = 0; c < submitInfo.commandBufferInfoCount; c++) { - RDCDEBUG("Previous queue execution was on queue %s, now executing %s, syncing GPU", - ToStr(GetResID(m_PrevQueue)).c_str(), ToStr(GetResID(queue)).c_str()); - if(m_PrevQueue != VK_NULL_HANDLE) - ObjDisp(m_PrevQueue)->QueueWaitIdle(Unwrap(m_PrevQueue)); + ResourceId cmd = GetResourceManager()->GetOriginalID( + GetResID(submitInfo.pCommandBufferInfos[c].commandBuffer)); - m_PrevQueue = queue; - } + BakedCmdBufferInfo &cmdBufInfo = m_BakedCmdBufferInfo[cmd]; - // if we ever waited on any semaphores, wait for idle here. - bool doWait = false; - for(uint32_t i = 0; i < submitCount; i++) - if(pSubmits[i].waitSemaphoreCount > 0) - doWait = true; + UpdateImageStates(m_BakedCmdBufferInfo[cmd].imageStates); - if(doWait) - ObjDisp(queue)->QueueWaitIdle(Unwrap(queue)); + rdcstr name = StringFormat::Fmt("=> %s[%u]: vkBeginCommandBuffer(%s)", basename.c_str(), c, + ToStr(cmd).c_str()); - // add a drawcall use for this submission, to tally up with any debug messages that come from it - if(IsLoading(m_State)) - { - DrawcallUse use(m_CurChunkOffset, m_RootEventID); - - // insert in sorted location - auto drawit = std::lower_bound(m_DrawcallUses.begin(), m_DrawcallUses.end(), use); - m_DrawcallUses.insert(drawit - m_DrawcallUses.begin(), use); - } - - for(uint32_t sub = 0; sub < submitCount; sub++) - { - VkSubmitInfo submitInfo = pSubmits[sub]; - submitInfo.pWaitSemaphores = NULL; - submitInfo.waitSemaphoreCount = 0; - submitInfo.pSignalSemaphores = NULL; - submitInfo.signalSemaphoreCount = 0; - - if(IsLoading(m_State)) + DrawcallDescription draw; + if(!Vulkan_HideCommandBoundaries()) { - // don't submit the fence, since we have nothing to wait on it being signalled, and we might - // not have it correctly in the unsignalled state. - VkSubmitInfo unwrapped = submitInfo; - - size_t tempMemSize = unwrapped.commandBufferCount * sizeof(VkCommandBuffer) + - GetNextPatchSize(unwrapped.pNext); - - byte *tempMem = GetTempMemory(tempMemSize); - - VkCommandBuffer *unwrappedCmds = (VkCommandBuffer *)tempMem; - unwrapped.pCommandBuffers = unwrappedCmds; - for(uint32_t i = 0; i < unwrapped.commandBufferCount; i++) - unwrappedCmds[i] = Unwrap(submitInfo.pCommandBuffers[i]); - - tempMem += unwrapped.commandBufferCount * sizeof(VkCommandBuffer); - - UnwrapNextChain(m_State, "VkSubmitInfo", tempMem, (VkBaseInStructure *)&unwrapped); - appendChain((VkBaseInStructure *)&unwrapped, m_SubmitChain); - - ObjDisp(queue)->QueueSubmit(Unwrap(queue), 1, &unwrapped, VK_NULL_HANDLE); - + // add a fake marker + draw.name = name; + draw.flags |= DrawFlags::PassBoundary | DrawFlags::BeginPass; AddEvent(); - // we're adding multiple events, need to increment ourselves + m_RootEvents.back().chunkIndex = cmdBufInfo.beginChunk; + m_Events.back().chunkIndex = cmdBufInfo.beginChunk; + + AddDrawcall(draw, true); m_RootEventID++; - - rdcstr basename = StringFormat::Fmt("vkQueueSubmit(%u)", submitInfo.commandBufferCount); - - for(uint32_t c = 0; c < submitInfo.commandBufferCount; c++) - { - ResourceId cmd = - GetResourceManager()->GetOriginalID(GetResID(submitInfo.pCommandBuffers[c])); - - BakedCmdBufferInfo &cmdBufInfo = m_BakedCmdBufferInfo[cmd]; - - UpdateImageStates(m_BakedCmdBufferInfo[cmd].imageStates); - - rdcstr name = StringFormat::Fmt("=> %s[%u]: vkBeginCommandBuffer(%s)", basename.c_str(), - c, ToStr(cmd).c_str()); - - DrawcallDescription draw; - if(!Vulkan_HideCommandBoundaries()) - { - // add a fake marker - draw.name = name; - draw.flags |= DrawFlags::PassBoundary | DrawFlags::BeginPass; - AddEvent(); - - m_RootEvents.back().chunkIndex = cmdBufInfo.beginChunk; - m_Events.back().chunkIndex = cmdBufInfo.beginChunk; - - AddDrawcall(draw, true); - m_RootEventID++; - } - - // insert the baked command buffer in-line into this list of notes, assigning new event - // and drawIDs - InsertDrawsAndRefreshIDs(cmdBufInfo); - - for(size_t e = 0; e < cmdBufInfo.draw->executedCmds.size(); e++) - { - rdcarray &submits = - m_Partial[Secondary].cmdBufferSubmits[cmdBufInfo.draw->executedCmds[e]]; - - for(size_t s = 0; s < submits.size(); s++) - { - if(!submits[s].rebased) - { - submits[s].baseEvent += m_RootEventID; - submits[s].rebased = true; - } - } - } - - for(size_t i = 0; i < cmdBufInfo.debugMessages.size(); i++) - { - m_DebugMessages.push_back(cmdBufInfo.debugMessages[i]); - m_DebugMessages.back().eventId += m_RootEventID; - } - - // only primary command buffers can be submitted - m_Partial[Primary].cmdBufferSubmits[cmd].push_back(Submission(m_RootEventID)); - - m_RootEventID += cmdBufInfo.eventCount; - m_RootDrawcallID += cmdBufInfo.drawCount; - - if(!Vulkan_HideCommandBoundaries()) - { - // non-marker events would have been gathered into an APICalls draw, but markers can - // still - // be dangling. Add them here. - uint32_t i = 0; - for(APIEvent &apievent : cmdBufInfo.curEvents) - { - apievent.eventId = m_RootEventID - cmdBufInfo.curEvents.count() + i; - - m_RootEvents.push_back(apievent); - m_Events.resize(apievent.eventId + 1); - m_Events[apievent.eventId] = apievent; - - i++; - } - - cmdBufInfo.curEvents.clear(); - - name = StringFormat::Fmt("=> %s[%u]: vkEndCommandBuffer(%s)", basename.c_str(), c, - ToStr(cmd).c_str()); - draw.name = name; - draw.flags = DrawFlags::PassBoundary | DrawFlags::EndPass; - AddEvent(); - - m_RootEvents.back().chunkIndex = cmdBufInfo.endChunk; - m_Events.back().chunkIndex = cmdBufInfo.endChunk; - - AddDrawcall(draw, true); - m_RootEventID++; - } - } - - // account for the outer loop thinking we've added one event and incrementing, - // since we've done all the handling ourselves this will be off by one. - m_RootEventID--; } - else + + // insert the baked command buffer in-line into this list of notes, assigning new event + // and drawIDs + InsertDrawsAndRefreshIDs(cmdBufInfo); + + for(size_t e = 0; e < cmdBufInfo.draw->executedCmds.size(); e++) { - // account for the queue submit event - m_RootEventID++; + rdcarray &submits = + m_Partial[Secondary].cmdBufferSubmits[cmdBufInfo.draw->executedCmds[e]]; - uint32_t startEID = m_RootEventID; - - // advance m_CurEventID to match the events added when reading - for(uint32_t c = 0; c < submitInfo.commandBufferCount; c++) + for(size_t s = 0; s < submits.size(); s++) { - ResourceId cmd = - GetResourceManager()->GetOriginalID(GetResID(submitInfo.pCommandBuffers[c])); - - m_RootEventID += m_BakedCmdBufferInfo[cmd].eventCount; - m_RootDrawcallID += m_BakedCmdBufferInfo[cmd].drawCount; - - // 2 extra for the virtual labels around the command buffer - if(!Vulkan_HideCommandBoundaries()) + if(!submits[s].rebased) { - m_RootEventID += 2; - m_RootDrawcallID += 2; + submits[s].baseEvent += m_RootEventID; + submits[s].rebased = true; } } + } - // same accounting for the outer loop as above - m_RootEventID--; + for(size_t i = 0; i < cmdBufInfo.debugMessages.size(); i++) + { + m_DebugMessages.push_back(cmdBufInfo.debugMessages[i]); + m_DebugMessages.back().eventId += m_RootEventID; + } - if(submitInfo.commandBufferCount == 0) + // only primary command buffers can be submitted + m_Partial[Primary].cmdBufferSubmits[cmd].push_back(Submission(m_RootEventID)); + + m_RootEventID += cmdBufInfo.eventCount; + m_RootDrawcallID += cmdBufInfo.drawCount; + + if(!Vulkan_HideCommandBoundaries()) + { + // non-marker events would have been gathered into an APICalls draw, but markers can + // still + // be dangling. Add them here. + uint32_t i = 0; + for(APIEvent &apievent : cmdBufInfo.curEvents) { - // do nothing, don't bother with the logic below + apievent.eventId = m_RootEventID - cmdBufInfo.curEvents.count() + i; + + m_RootEvents.push_back(apievent); + m_Events.resize(apievent.eventId + 1); + m_Events[apievent.eventId] = apievent; + + i++; } - else if(m_LastEventID <= startEID) - { + + cmdBufInfo.curEvents.clear(); + + name = StringFormat::Fmt("=> %s[%u]: vkEndCommandBuffer(%s)", basename.c_str(), c, + ToStr(cmd).c_str()); + draw.name = name; + draw.flags = DrawFlags::PassBoundary | DrawFlags::EndPass; + AddEvent(); + + m_RootEvents.back().chunkIndex = cmdBufInfo.endChunk; + m_Events.back().chunkIndex = cmdBufInfo.endChunk; + + AddDrawcall(draw, true); + m_RootEventID++; + } + } + + // account for the outer loop thinking we've added one event and incrementing, + // since we've done all the handling ourselves this will be off by one. + m_RootEventID--; + } + else + { + // account for the queue submit event + m_RootEventID++; + + uint32_t startEID = m_RootEventID; + + // advance m_CurEventID to match the events added when reading + for(uint32_t c = 0; c < submitInfo.commandBufferInfoCount; c++) + { + ResourceId cmd = GetResourceManager()->GetOriginalID( + GetResID(submitInfo.pCommandBufferInfos[c].commandBuffer)); + + m_RootEventID += m_BakedCmdBufferInfo[cmd].eventCount; + m_RootDrawcallID += m_BakedCmdBufferInfo[cmd].drawCount; + + // 2 extra for the virtual labels around the command buffer + if(!Vulkan_HideCommandBoundaries()) + { + m_RootEventID += 2; + m_RootDrawcallID += 2; + } + } + + // same accounting for the outer loop as above + m_RootEventID--; + + if(submitInfo.commandBufferInfoCount == 0) + { + // do nothing, don't bother with the logic below + } + else if(m_LastEventID <= startEID) + { #if ENABLED(VERBOSE_PARTIAL_REPLAY) - RDCDEBUG("Queue Submit no replay %u == %u", m_LastEventID, startEID); + RDCDEBUG("Queue Submit no replay %u == %u", m_LastEventID, startEID); #endif + } + else + { +#if ENABLED(VERBOSE_PARTIAL_REPLAY) + RDCDEBUG("Queue Submit from re-recorded commands, root EID %u last EID", m_RootEventID, + m_LastEventID); +#endif + + uint32_t eid = startEID; + + rdcarray rerecordedCmds; + + for(uint32_t c = 0; c < submitInfo.commandBufferInfoCount; c++) + { + VkCommandBufferSubmitInfoKHR info = submitInfo.pCommandBufferInfos[c]; + ResourceId cmdId = GetResourceManager()->GetOriginalID(GetResID(info.commandBuffer)); + + // account for the virtual vkBeginCommandBuffer label at the start of the events here + // so it matches up to baseEvent + if(!Vulkan_HideCommandBoundaries()) + { + eid++; + } + +#if ENABLED(VERBOSE_PARTIAL_REPLAY) + uint32_t end = eid + m_BakedCmdBufferInfo[cmdId].eventCount; +#endif + + if(eid <= m_LastEventID) + { + VkCommandBuffer cmd = RerecordCmdBuf(cmdId); +#if ENABLED(VERBOSE_PARTIAL_REPLAY) + ResourceId rerecord = GetResID(cmd); + RDCDEBUG("Queue Submit re-recorded replay of %s, using %s (%u -> %u <= %u)", + ToStr(cmdId).c_str(), ToStr(rerecord).c_str(), eid, end, m_LastEventID); +#endif + info.commandBuffer = cmd; + rerecordedCmds.push_back(info); + + UpdateImageStates(m_BakedCmdBufferInfo[cmdId].imageStates); } else { #if ENABLED(VERBOSE_PARTIAL_REPLAY) - RDCDEBUG("Queue Submit from re-recorded commands, root EID %u last EID", m_RootEventID, - m_LastEventID); + RDCDEBUG("Queue not submitting %s", ToStr(cmdId).c_str()); #endif + } - uint32_t eid = startEID; + eid += m_BakedCmdBufferInfo[cmdId].eventCount; - rdcarray rerecordedCmds; - - for(uint32_t c = 0; c < submitInfo.commandBufferCount; c++) - { - ResourceId cmdId = - GetResourceManager()->GetOriginalID(GetResID(submitInfo.pCommandBuffers[c])); - - // account for the virtual vkBeginCommandBuffer label at the start of the events here - // so it matches up to baseEvent - if(!Vulkan_HideCommandBoundaries()) - { - eid++; - } - -#if ENABLED(VERBOSE_PARTIAL_REPLAY) - uint32_t end = eid + m_BakedCmdBufferInfo[cmdId].eventCount; -#endif - - if(eid <= m_LastEventID) - { - VkCommandBuffer cmd = RerecordCmdBuf(cmdId); -#if ENABLED(VERBOSE_PARTIAL_REPLAY) - ResourceId rerecord = GetResID(cmd); - RDCDEBUG("Queue Submit re-recorded replay of %s, using %s (%u -> %u <= %u)", - ToStr(cmdId).c_str(), ToStr(rerecord).c_str(), eid, end, m_LastEventID); -#endif - rerecordedCmds.push_back(Unwrap(cmd)); - - UpdateImageStates(m_BakedCmdBufferInfo[cmdId].imageStates); - } - else - { -#if ENABLED(VERBOSE_PARTIAL_REPLAY) - RDCDEBUG("Queue not submitting %s", ToStr(cmdId).c_str()); -#endif - } - - eid += m_BakedCmdBufferInfo[cmdId].eventCount; - - // 1 extra to account for the virtual end command buffer label (begin is accounted for - // above) - if(!Vulkan_HideCommandBoundaries()) - { - eid++; - } - } - - VkSubmitInfo rerecordedSubmit = submitInfo; - - byte *tempMem = GetTempMemory(GetNextPatchSize(rerecordedSubmit.pNext)); - - UnwrapNextChain(m_State, "VkSubmitInfo", tempMem, (VkBaseInStructure *)&rerecordedSubmit); - appendChain((VkBaseInStructure *)&rerecordedSubmit, m_SubmitChain); - - rerecordedSubmit.commandBufferCount = (uint32_t)rerecordedCmds.size(); - rerecordedSubmit.pCommandBuffers = &rerecordedCmds[0]; - -#if ENABLED(SINGLE_FLUSH_VALIDATE) - rerecordedSubmit.commandBufferCount = 1; - for(size_t i = 0; i < rerecordedCmds.size(); i++) - { - ObjDisp(queue)->QueueSubmit(Unwrap(queue), 1, &rerecordedSubmit, VK_NULL_HANDLE); - rerecordedSubmit.pCommandBuffers++; - - FlushQ(); - } -#else - // don't submit the fence, since we have nothing to wait on it being signalled, and we - // might not have it correctly in the unsignalled state. - ObjDisp(queue)->QueueSubmit(Unwrap(queue), 1, &rerecordedSubmit, VK_NULL_HANDLE); -#endif + // 1 extra to account for the virtual end command buffer label (begin is accounted for + // above) + if(!Vulkan_HideCommandBoundaries()) + { + eid++; } } + submitInfo.pCommandBufferInfos = rerecordedCmds.data(); + #if ENABLED(SINGLE_FLUSH_VALIDATE) - FlushQ(); + submitInfo.commandBufferInfoCount = 1; + for(size_t i = 0; i < rerecordedCmds.size(); i++) + { + DoSubmit(queue, submitInfo); + submitInfo.pCommandBuffers++; + + FlushQ(); + } +#else + submitInfo.commandBufferInfoCount = (uint32_t)rerecordedCmds.size(); + + DoSubmit(queue, submitInfo); #endif } } - return true; +#if ENABLED(SINGLE_FLUSH_VALIDATE) + FlushQ(); +#endif } bool WrappedVulkan::PatchIndirectDraw(size_t drawIndex, uint32_t paramStride, @@ -854,6 +834,428 @@ void WrappedVulkan::InsertDrawsAndRefreshIDs(BakedCmdBufferInfo &cmdBufInfo) } } +void WrappedVulkan::CaptureQueueSubmit(VkQueue queue, + const rdcarray &commandBuffers, VkFence fence) +{ + bool capframe = IsActiveCapturing(m_State); + bool backframe = IsBackgroundCapturing(m_State); + + std::set refdIDs; + + std::set descriptorSets; + + // pull in any copy sources, conservatively + if(capframe) + { + SCOPED_LOCK(m_CapDescriptorsLock); + descriptorSets.swap(m_CapDescriptors); + } + + for(size_t i = 0; i < commandBuffers.size(); i++) + { + ResourceId cmd = GetResID(commandBuffers[i]); + + VkResourceRecord *record = GetRecord(commandBuffers[i]); + + UpdateImageStates(record->bakedCommands->cmdInfo->imageStates); + + if(Vulkan_Debug_VerboseCommandRecording()) + { + RDCLOG("vkQueueSubmit() to queue %s, cmd %zu of %zu: %s baked to %s", + ToStr(GetResID(queue)).c_str(), i, commandBuffers.size(), + ToStr(record->GetResourceID()).c_str(), + ToStr(record->bakedCommands->GetResourceID()).c_str()); + } + + if(capframe) + { + // add the bound descriptor sets + for(auto it = record->bakedCommands->cmdInfo->boundDescSets.begin(); + it != record->bakedCommands->cmdInfo->boundDescSets.end(); ++it) + { + descriptorSets.insert(*it); + } + + for(auto it = record->bakedCommands->cmdInfo->sparse.begin(); + it != record->bakedCommands->cmdInfo->sparse.end(); ++it) + GetResourceManager()->MarkSparseMapReferenced(*it); + + // pull in frame refs from this baked command buffer + record->bakedCommands->AddResourceReferences(GetResourceManager()); + record->bakedCommands->AddReferencedIDs(refdIDs); + + GetResourceManager()->MergeReferencedMemory(record->bakedCommands->cmdInfo->memFrameRefs); + + // ref the parent command buffer's alloc record, this will pull in the cmd buffer pool + GetResourceManager()->MarkResourceFrameReferenced( + record->cmdInfo->allocRecord->GetResourceID(), eFrameRef_Read); + + const rdcarray &subcmds = record->bakedCommands->cmdInfo->subcmds; + + for(size_t sub = 0; sub < subcmds.size(); sub++) + { + VkResourceRecord *bakedSubcmds = subcmds[sub]->bakedCommands; + bakedSubcmds->AddResourceReferences(GetResourceManager()); + bakedSubcmds->AddReferencedIDs(refdIDs); + UpdateImageStates(bakedSubcmds->cmdInfo->imageStates); + GetResourceManager()->MergeReferencedMemory(bakedSubcmds->cmdInfo->memFrameRefs); + GetResourceManager()->MarkResourceFrameReferenced( + subcmds[sub]->cmdInfo->allocRecord->GetResourceID(), eFrameRef_Read); + + bakedSubcmds->AddRef(); + } + + { + SCOPED_LOCK(m_CmdBufferRecordsLock); + m_CmdBufferRecords.push_back(record->bakedCommands); + for(size_t sub = 0; sub < subcmds.size(); sub++) + m_CmdBufferRecords.push_back(subcmds[sub]->bakedCommands); + } + + record->bakedCommands->AddRef(); + } + } + + if(backframe) + { + rdcarray maps; + { + SCOPED_LOCK(m_CoherentMapsLock); + maps = m_CoherentMaps; + } + + for(auto it = maps.begin(); it != maps.end(); ++it) + { + VkResourceRecord *record = *it; + GetResourceManager()->MarkResourceFrameReferenced(record->GetResourceID(), + eFrameRef_ReadBeforeWrite); + } + + // pull in frame refs while background capturing too + for(size_t i = 0; i < commandBuffers.size(); i++) + { + VkResourceRecord *record = GetRecord(commandBuffers[i]); + + record->bakedCommands->AddResourceReferences(GetResourceManager()); + + for(VkResourceRecord *sub : record->bakedCommands->cmdInfo->subcmds) + sub->bakedCommands->AddResourceReferences(GetResourceManager()); + } + + // every 20 submits clean background references, in case the application isn't presenting. + if((Atomic::Inc64(&m_QueueCounter) % 20) == 0) + { + GetResourceManager()->CleanBackgroundFrameReferences(); + } + } + + if(capframe) + { + VulkanResourceManager *rm = GetResourceManager(); + + // for each descriptor set, mark it referenced as well as all resources currently bound to it + for(auto it = descriptorSets.begin(); it != descriptorSets.end(); ++it) + { + rm->MarkResourceFrameReferenced(GetResID(*it), eFrameRef_Read); + + VkResourceRecord *setrecord = GetRecord(*it); + + DescriptorBindRefs refs; + + DescSetLayout *layout = setrecord->descInfo->layout; + + for(size_t b = 0, num = layout->bindings.size(); b < num; b++) + { + const DescSetLayout::Binding &bind = layout->bindings[b]; + + // skip empty bindings + if(bind.descriptorType == VK_DESCRIPTOR_TYPE_MAX_ENUM || + bind.descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) + continue; + + uint32_t count = bind.descriptorCount; + if(bind.variableSize) + count = setrecord->descInfo->data.variableDescriptorCount; + + FrameRefType ref = GetRefType(bind.descriptorType); + + for(uint32_t a = 0; a < count; a++) + setrecord->descInfo->data.binds[b][a].AccumulateBindRefs(refs, rm, ref); + } + + for(auto refit = refs.bindFrameRefs.begin(); refit != refs.bindFrameRefs.end(); ++refit) + { + refdIDs.insert(refit->first); + GetResourceManager()->MarkResourceFrameReferenced(refit->first, refit->second); + } + + for(auto refit = refs.sparseRefs.begin(); refit != refs.sparseRefs.end(); ++refit) + { + GetResourceManager()->MarkSparseMapReferenced((*refit)->resInfo); + } + + UpdateImageStates(refs.bindImageStates); + GetResourceManager()->MergeReferencedMemory(refs.bindMemRefs); + } + + GetResourceManager()->MarkResourceFrameReferenced(GetResID(queue), eFrameRef_Read); + + if(fence != VK_NULL_HANDLE) + GetResourceManager()->MarkResourceFrameReferenced(GetResID(fence), eFrameRef_Read); + + rdcarray maps; + { + SCOPED_LOCK(m_CoherentMapsLock); + maps = m_CoherentMaps; + } + + for(auto it = maps.begin(); it != maps.end(); ++it) + { + VkResourceRecord *record = *it; + MemMapState &state = *record->memMapState; + + SCOPED_LOCK(state.mrLock); + + // potential persistent map + if(state.mapCoherent && state.mappedPtr && !state.mapFlushed) + { + // only need to flush memory that could affect this submitted batch of work + if(refdIDs.find(record->GetResourceID()) == refdIDs.end()) + { + RDCDEBUG("Map of memory %s not referenced in this queue - not flushing", + ToStr(record->GetResourceID()).c_str()); + continue; + } + + size_t diffStart = 0, diffEnd = 0; + bool found = true; + + // this causes vkFlushMappedMemoryRanges call to allocate and copy to refData + // from serialised buffer. We want to copy *precisely* the serialised data, + // otherwise there is a gap in time between serialising out a snapshot of + // the buffer and whenever we then copy into the ref data, e.g. below. + // during this time, data could be written to the buffer and it won't have + // been caught in the serialised snapshot, and if it doesn't change then + // it *also* won't be caught in any future FindDiffRange() calls. + // + // Likewise once refData is allocated, the call below will also update it + // with the data serialised out for the same reason. + // + // Note: it's still possible that data is being written to by the + // application while it's being serialised out in the snapshot below. That + // is OK, since the application is responsible for ensuring it's not writing + // data that would be needed by the GPU in this submit. As long as the + // refdata we use for future use is identical to what was serialised, we + // shouldn't miss anything + state.needRefData = true; + + if(state.readbackOnGPU) + { + RDCDEBUG("Reading back %s with GPU for comparison", ToStr(record->GetResourceID()).c_str()); + + GetDebugManager()->InitReadbackBuffer(state.mapOffset + state.mapSize); + + // immediately issue a command buffer to copy back the data. We do that on this queue to + // avoid complexity with synchronising with another queue, but the transfer queue if + // available would be better for this purpose. + VkCommandBuffer copycmd = GetNextCmd(); + + VkCommandBufferBeginInfo beginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, NULL, + VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT}; + + ObjDisp(copycmd)->BeginCommandBuffer(Unwrap(copycmd), &beginInfo); + + VkBufferCopy region = {state.mapOffset, state.mapOffset, state.mapSize}; + + ObjDisp(copycmd)->CmdCopyBuffer(Unwrap(copycmd), Unwrap(state.wholeMemBuf), + Unwrap(GetDebugManager()->GetReadbackBuffer()), 1, ®ion); + + // wait for transfer to finish before reading on CPU + VkBufferMemoryBarrier bufBarrier = { + VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + NULL, + VK_ACCESS_TRANSFER_WRITE_BIT, + VK_ACCESS_HOST_READ_BIT, + VK_QUEUE_FAMILY_IGNORED, + VK_QUEUE_FAMILY_IGNORED, + Unwrap(GetDebugManager()->GetReadbackBuffer()), + 0, + VK_WHOLE_SIZE, + }; + + DoPipelineBarrier(copycmd, 1, &bufBarrier); + + ObjDisp(copycmd)->EndCommandBuffer(Unwrap(copycmd)); + + VkSubmitInfo submit = { + VK_STRUCTURE_TYPE_SUBMIT_INFO, NULL, 0, NULL, NULL, 1, UnwrapPtr(copycmd), + }; + VkResult copyret = ObjDisp(queue)->QueueSubmit(Unwrap(queue), 1, &submit, VK_NULL_HANDLE); + RDCASSERTEQUAL(copyret, VK_SUCCESS); + + ObjDisp(queue)->QueueWaitIdle(Unwrap(queue)); + + VkMappedMemoryRange range = { + VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, + NULL, + Unwrap(GetDebugManager()->GetReadbackMemory()), + 0, + VK_WHOLE_SIZE, + }; + + copyret = ObjDisp(queue)->InvalidateMappedMemoryRanges(Unwrap(m_Device), 1, &range); + RDCASSERTEQUAL(copyret, VK_SUCCESS); + + RemovePendingCommandBuffer(copycmd); + AddFreeCommandBuffer(copycmd); + + state.cpuReadPtr = GetDebugManager()->GetReadbackPtr(); + } + else + { + state.cpuReadPtr = state.mappedPtr; + } + + // if we have a previous set of data, compare. + // otherwise just serialise it all + if(state.refData) + found = FindDiffRange(((byte *)state.cpuReadPtr) + state.mapOffset, state.refData, + (size_t)state.mapSize, diffStart, diffEnd); + else + diffEnd = (size_t)state.mapSize; + + // sanitise diff start/end. Since the mapped pointer might be written on another thread + // (or even the GPU) this could cause a difference to appear and disappear transiently. In + // this case FindDiffRange could find the difference when locating the start but not find + // it when locating the end. In this case we don't need to write the difference (the + // application is responsible for ensuring it's not writing to memory the GPU might need) + if(diffEnd <= diffStart) + found = false; + + if(found) + { + // MULTIDEVICE should find the device for this queue. + // MULTIDEVICE only want to flush maps associated with this queue + VkDevice dev = GetDev(); + + { + RDCLOG("Persistent map flush forced for %s (%llu -> %llu)", + ToStr(record->GetResourceID()).c_str(), (uint64_t)diffStart, (uint64_t)diffEnd); + VkMappedMemoryRange range = { + VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, + &internalMemoryFlushMarker, + (VkDeviceMemory)(uint64_t)record->Resource, + state.mapOffset + diffStart, + diffEnd - diffStart, + }; + vkFlushMappedMemoryRanges(dev, 1, &range); + } + } + else + { + RDCDEBUG("Persistent map flush not needed for %s", ToStr(record->GetResourceID()).c_str()); + } + + // restore this just in case + state.cpuReadPtr = state.mappedPtr; + } + } + } +} + +template +bool WrappedVulkan::Serialise_vkQueueSubmit(SerialiserType &ser, VkQueue queue, uint32_t submitCount, + const VkSubmitInfo *pSubmits, VkFence fence) +{ + SERIALISE_ELEMENT(queue); + SERIALISE_ELEMENT(submitCount); + SERIALISE_ELEMENT_ARRAY(pSubmits, submitCount); + SERIALISE_ELEMENT(fence); + + Serialise_DebugMessages(ser); + + SERIALISE_CHECK_READ_ERRORS(); + + if(IsReplayingAndReading()) + { + // if there are multiple queue submissions in flight, wait for the previous queue to finish + // before executing this, as we don't have the sync information to properly sync. + if(m_PrevQueue != queue) + { + RDCDEBUG("Previous queue execution was on queue %s, now executing %s, syncing GPU", + ToStr(GetResID(m_PrevQueue)).c_str(), ToStr(GetResID(queue)).c_str()); + if(m_PrevQueue != VK_NULL_HANDLE) + ObjDisp(m_PrevQueue)->QueueWaitIdle(Unwrap(m_PrevQueue)); + + m_PrevQueue = queue; + } + + // if we ever waited on any semaphores, wait for idle here. + bool doWait = false; + for(uint32_t i = 0; i < submitCount; i++) + if(pSubmits[i].waitSemaphoreCount > 0) + doWait = true; + + if(doWait) + ObjDisp(queue)->QueueWaitIdle(Unwrap(queue)); + + // add a drawcall use for this submission, to tally up with any debug messages that come from it + if(IsLoading(m_State)) + { + DrawcallUse use(m_CurChunkOffset, m_RootEventID); + + // insert in sorted location + auto drawit = std::lower_bound(m_DrawcallUses.begin(), m_DrawcallUses.end(), use); + m_DrawcallUses.insert(drawit - m_DrawcallUses.begin(), use); + } + + rdcarray cmds; + + for(uint32_t sub = 0; sub < submitCount; sub++) + { + // make a fake VkSubmitInfo2KHR. If KHR_synchronization2 isn't supported this may then decay + // back down into separate structs but it keeps a lot of the processing the same in both paths + // and it's easier to promote this then decay if necessary (knowing no unsupported features + // will be used) + VkSubmitInfo2KHR submitInfo = {}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR; + + const VkProtectedSubmitInfo *prot = (const VkProtectedSubmitInfo *)FindNextStruct( + &pSubmits[sub], VK_STRUCTURE_TYPE_PROTECTED_SUBMIT_INFO); + const VkDeviceGroupSubmitInfo *group = (const VkDeviceGroupSubmitInfo *)FindNextStruct( + &pSubmits[sub], VK_STRUCTURE_TYPE_DEVICE_GROUP_SUBMIT_INFO); + + cmds.resize(pSubmits[sub].commandBufferCount); + for(uint32_t c = 0; c < pSubmits[sub].commandBufferCount; c++) + { + VkCommandBufferSubmitInfoKHR &cmd = cmds[c]; + cmd.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR; + cmd.commandBuffer = pSubmits[sub].pCommandBuffers[c]; + + if(group && c < group->commandBufferCount) + cmd.deviceMask = group->pCommandBufferDeviceMasks[c]; + } + + submitInfo.commandBufferInfoCount = (uint32_t)cmds.size(); + submitInfo.pCommandBufferInfos = cmds.data(); + + if(prot && prot->protectedSubmit) + submitInfo.flags |= VK_SUBMIT_PROTECTED_BIT_KHR; + + // don't replay any semaphores, this means we don't have to care about + // VkD3D12FenceSubmitInfoKHR or VkTimelineSemaphoreSubmitInfo. + // we unwrap VkProtectedSubmitInfo and VkDeviceGroupSubmitInfo above. + // VkWin32KeyedMutexAcquireReleaseInfoKHR and VkWin32KeyedMutexAcquireReleaseInfoNV we + // deliberately don't replay + // VkPerformanceQuerySubmitInfoKHR we don't replay since we don't replay perf counter work + + ReplayQueueSubmit(queue, submitInfo); + } + } + + return true; +} + VkResult WrappedVulkan::vkQueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo *pSubmits, VkFence fence) { @@ -874,22 +1276,11 @@ VkResult WrappedVulkan::vkQueueSubmit(VkQueue queue, uint32_t submitCount, m_SubmitCounter++; } - size_t tempmemSize = sizeof(VkSubmitInfo) * submitCount; - - // need to count how many semaphore and command buffer arrays to allocate for - for(uint32_t i = 0; i < submitCount; i++) - { - tempmemSize += pSubmits[i].commandBufferCount * sizeof(VkCommandBuffer); - tempmemSize += pSubmits[i].signalSemaphoreCount * sizeof(VkSemaphore); - tempmemSize += pSubmits[i].waitSemaphoreCount * sizeof(VkSemaphore); - - tempmemSize += GetNextPatchSize(pSubmits[i].pNext); - } - VkResult ret = VK_SUCCESS; bool present = false; bool beginCapture = false; bool endCapture = false; + rdcarray commandBuffers; for(uint32_t s = 0; s < submitCount; s++) { @@ -899,6 +1290,8 @@ VkResult WrappedVulkan::vkQueueSubmit(VkQueue queue, uint32_t submitCount, present |= record->bakedCommands->cmdInfo->present; beginCapture |= record->bakedCommands->cmdInfo->beginCapture; endCapture |= record->bakedCommands->cmdInfo->endCapture; + + commandBuffers.push_back(pSubmits[s].pCommandBuffers[i]); } } @@ -911,337 +1304,14 @@ VkResult WrappedVulkan::vkQueueSubmit(VkQueue queue, uint32_t submitCount, SCOPED_READLOCK(m_CapTransitionLock); bool capframe = IsActiveCapturing(m_State); - bool backframe = IsBackgroundCapturing(m_State); - std::set refdIDs; + CaptureQueueSubmit(queue, commandBuffers, fence); - std::set descriptorSets; + size_t tempmemSize = sizeof(VkSubmitInfo) * submitCount; - // pull in any copy sources, conservatively - if(capframe) - { - SCOPED_LOCK(m_CapDescriptorsLock); - descriptorSets.swap(m_CapDescriptors); - } - - for(uint32_t s = 0; s < submitCount; s++) - { - for(uint32_t i = 0; i < pSubmits[s].commandBufferCount; i++) - { - ResourceId cmd = GetResID(pSubmits[s].pCommandBuffers[i]); - - VkResourceRecord *record = GetRecord(pSubmits[s].pCommandBuffers[i]); - - UpdateImageStates(record->bakedCommands->cmdInfo->imageStates); - - if(Vulkan_Debug_VerboseCommandRecording()) - { - RDCLOG("vkQueueSubmit() to queue %s, submit %u / cmd %u: %s baked to %s", - ToStr(GetResID(queue)).c_str(), s, i, ToStr(record->GetResourceID()).c_str(), - ToStr(record->bakedCommands->GetResourceID()).c_str()); - } - - if(capframe) - { - // add the bound descriptor sets - for(auto it = record->bakedCommands->cmdInfo->boundDescSets.begin(); - it != record->bakedCommands->cmdInfo->boundDescSets.end(); ++it) - { - descriptorSets.insert(*it); - } - - for(auto it = record->bakedCommands->cmdInfo->sparse.begin(); - it != record->bakedCommands->cmdInfo->sparse.end(); ++it) - GetResourceManager()->MarkSparseMapReferenced(*it); - - // pull in frame refs from this baked command buffer - record->bakedCommands->AddResourceReferences(GetResourceManager()); - record->bakedCommands->AddReferencedIDs(refdIDs); - - GetResourceManager()->MergeReferencedMemory(record->bakedCommands->cmdInfo->memFrameRefs); - - // ref the parent command buffer's alloc record, this will pull in the cmd buffer pool - GetResourceManager()->MarkResourceFrameReferenced( - record->cmdInfo->allocRecord->GetResourceID(), eFrameRef_Read); - - const rdcarray &subcmds = record->bakedCommands->cmdInfo->subcmds; - - for(size_t sub = 0; sub < subcmds.size(); sub++) - { - VkResourceRecord *bakedSubcmds = subcmds[sub]->bakedCommands; - bakedSubcmds->AddResourceReferences(GetResourceManager()); - bakedSubcmds->AddReferencedIDs(refdIDs); - UpdateImageStates(bakedSubcmds->cmdInfo->imageStates); - GetResourceManager()->MergeReferencedMemory(bakedSubcmds->cmdInfo->memFrameRefs); - GetResourceManager()->MarkResourceFrameReferenced( - subcmds[sub]->cmdInfo->allocRecord->GetResourceID(), eFrameRef_Read); - - bakedSubcmds->AddRef(); - } - - { - SCOPED_LOCK(m_CmdBufferRecordsLock); - m_CmdBufferRecords.push_back(record->bakedCommands); - for(size_t sub = 0; sub < subcmds.size(); sub++) - m_CmdBufferRecords.push_back(subcmds[sub]->bakedCommands); - } - - record->bakedCommands->AddRef(); - } - } - } - - if(backframe) - { - rdcarray maps; - { - SCOPED_LOCK(m_CoherentMapsLock); - maps = m_CoherentMaps; - } - - for(auto it = maps.begin(); it != maps.end(); ++it) - { - VkResourceRecord *record = *it; - GetResourceManager()->MarkResourceFrameReferenced(record->GetResourceID(), - eFrameRef_ReadBeforeWrite); - } - - // pull in frame refs while background capturing too - for(uint32_t s = 0; s < submitCount; s++) - { - for(uint32_t i = 0; i < pSubmits[s].commandBufferCount; i++) - { - VkResourceRecord *record = GetRecord(pSubmits[s].pCommandBuffers[i]); - - record->bakedCommands->AddResourceReferences(GetResourceManager()); - - for(VkResourceRecord *sub : record->bakedCommands->cmdInfo->subcmds) - sub->bakedCommands->AddResourceReferences(GetResourceManager()); - } - } - - // every 20 submits clean background references, in case the application isn't presenting. - if((Atomic::Inc64(&m_QueueCounter) % 20) == 0) - { - GetResourceManager()->CleanBackgroundFrameReferences(); - } - } - - if(capframe) - { - VulkanResourceManager *rm = GetResourceManager(); - - // for each descriptor set, mark it referenced as well as all resources currently bound to it - for(auto it = descriptorSets.begin(); it != descriptorSets.end(); ++it) - { - rm->MarkResourceFrameReferenced(GetResID(*it), eFrameRef_Read); - - VkResourceRecord *setrecord = GetRecord(*it); - - DescriptorBindRefs refs; - - DescSetLayout *layout = setrecord->descInfo->layout; - - for(size_t b = 0, num = layout->bindings.size(); b < num; b++) - { - const DescSetLayout::Binding &bind = layout->bindings[b]; - - // skip empty bindings - if(bind.descriptorType == VK_DESCRIPTOR_TYPE_MAX_ENUM || - bind.descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) - continue; - - uint32_t count = bind.descriptorCount; - if(bind.variableSize) - count = setrecord->descInfo->data.variableDescriptorCount; - - FrameRefType ref = GetRefType(bind.descriptorType); - - for(uint32_t a = 0; a < count; a++) - setrecord->descInfo->data.binds[b][a].AccumulateBindRefs(refs, rm, ref); - } - - for(auto refit = refs.bindFrameRefs.begin(); refit != refs.bindFrameRefs.end(); ++refit) - { - refdIDs.insert(refit->first); - GetResourceManager()->MarkResourceFrameReferenced(refit->first, refit->second); - } - - for(auto refit = refs.sparseRefs.begin(); refit != refs.sparseRefs.end(); ++refit) - { - GetResourceManager()->MarkSparseMapReferenced((*refit)->resInfo); - } - - UpdateImageStates(refs.bindImageStates); - GetResourceManager()->MergeReferencedMemory(refs.bindMemRefs); - } - - GetResourceManager()->MarkResourceFrameReferenced(GetResID(queue), eFrameRef_Read); - - if(fence != VK_NULL_HANDLE) - GetResourceManager()->MarkResourceFrameReferenced(GetResID(fence), eFrameRef_Read); - - rdcarray maps; - { - SCOPED_LOCK(m_CoherentMapsLock); - maps = m_CoherentMaps; - } - - for(auto it = maps.begin(); it != maps.end(); ++it) - { - VkResourceRecord *record = *it; - MemMapState &state = *record->memMapState; - - SCOPED_LOCK(state.mrLock); - - // potential persistent map - if(state.mapCoherent && state.mappedPtr && !state.mapFlushed) - { - // only need to flush memory that could affect this submitted batch of work - if(refdIDs.find(record->GetResourceID()) == refdIDs.end()) - { - RDCDEBUG("Map of memory %s not referenced in this queue - not flushing", - ToStr(record->GetResourceID()).c_str()); - continue; - } - - size_t diffStart = 0, diffEnd = 0; - bool found = true; - - // this causes vkFlushMappedMemoryRanges call to allocate and copy to refData - // from serialised buffer. We want to copy *precisely* the serialised data, - // otherwise there is a gap in time between serialising out a snapshot of - // the buffer and whenever we then copy into the ref data, e.g. below. - // during this time, data could be written to the buffer and it won't have - // been caught in the serialised snapshot, and if it doesn't change then - // it *also* won't be caught in any future FindDiffRange() calls. - // - // Likewise once refData is allocated, the call below will also update it - // with the data serialised out for the same reason. - // - // Note: it's still possible that data is being written to by the - // application while it's being serialised out in the snapshot below. That - // is OK, since the application is responsible for ensuring it's not writing - // data that would be needed by the GPU in this submit. As long as the - // refdata we use for future use is identical to what was serialised, we - // shouldn't miss anything - state.needRefData = true; - - if(state.readbackOnGPU) - { - RDCDEBUG("Reading back %s with GPU for comparison", - ToStr(record->GetResourceID()).c_str()); - - GetDebugManager()->InitReadbackBuffer(state.mapOffset + state.mapSize); - - // immediately issue a command buffer to copy back the data. We do that on this queue to - // avoid complexity with synchronising with another queue, but the transfer queue if - // available would be better for this purpose. - VkCommandBuffer copycmd = GetNextCmd(); - - VkCommandBufferBeginInfo beginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, NULL, - VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT}; - - ObjDisp(copycmd)->BeginCommandBuffer(Unwrap(copycmd), &beginInfo); - - VkBufferCopy region = {state.mapOffset, state.mapOffset, state.mapSize}; - - ObjDisp(copycmd)->CmdCopyBuffer(Unwrap(copycmd), Unwrap(state.wholeMemBuf), - Unwrap(GetDebugManager()->GetReadbackBuffer()), 1, - ®ion); - - // wait for transfer to finish before reading on CPU - VkBufferMemoryBarrier bufBarrier = { - VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - NULL, - VK_ACCESS_TRANSFER_WRITE_BIT, - VK_ACCESS_HOST_READ_BIT, - VK_QUEUE_FAMILY_IGNORED, - VK_QUEUE_FAMILY_IGNORED, - Unwrap(GetDebugManager()->GetReadbackBuffer()), - 0, - VK_WHOLE_SIZE, - }; - - DoPipelineBarrier(copycmd, 1, &bufBarrier); - - ObjDisp(copycmd)->EndCommandBuffer(Unwrap(copycmd)); - - VkSubmitInfo submit = { - VK_STRUCTURE_TYPE_SUBMIT_INFO, NULL, 0, NULL, NULL, 1, UnwrapPtr(copycmd), - }; - VkResult copyret = ObjDisp(queue)->QueueSubmit(Unwrap(queue), 1, &submit, VK_NULL_HANDLE); - RDCASSERTEQUAL(copyret, VK_SUCCESS); - - ObjDisp(queue)->QueueWaitIdle(Unwrap(queue)); - - VkMappedMemoryRange range = { - VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, - NULL, - Unwrap(GetDebugManager()->GetReadbackMemory()), - 0, - VK_WHOLE_SIZE, - }; - - copyret = ObjDisp(queue)->InvalidateMappedMemoryRanges(Unwrap(m_Device), 1, &range); - RDCASSERTEQUAL(copyret, VK_SUCCESS); - - RemovePendingCommandBuffer(copycmd); - AddFreeCommandBuffer(copycmd); - - state.cpuReadPtr = GetDebugManager()->GetReadbackPtr(); - } - else - { - state.cpuReadPtr = state.mappedPtr; - } - - // if we have a previous set of data, compare. - // otherwise just serialise it all - if(state.refData) - found = FindDiffRange(((byte *)state.cpuReadPtr) + state.mapOffset, state.refData, - (size_t)state.mapSize, diffStart, diffEnd); - else - diffEnd = (size_t)state.mapSize; - - // sanitise diff start/end. Since the mapped pointer might be written on another thread - // (or even the GPU) this could cause a difference to appear and disappear transiently. In - // this case FindDiffRange could find the difference when locating the start but not find - // it when locating the end. In this case we don't need to write the difference (the - // application is responsible for ensuring it's not writing to memory the GPU might need) - if(diffEnd <= diffStart) - found = false; - - if(found) - { - // MULTIDEVICE should find the device for this queue. - // MULTIDEVICE only want to flush maps associated with this queue - VkDevice dev = GetDev(); - - { - RDCLOG("Persistent map flush forced for %s (%llu -> %llu)", - ToStr(record->GetResourceID()).c_str(), (uint64_t)diffStart, (uint64_t)diffEnd); - VkMappedMemoryRange range = { - VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, - &internalMemoryFlushMarker, - (VkDeviceMemory)(uint64_t)record->Resource, - state.mapOffset + diffStart, - diffEnd - diffStart, - }; - vkFlushMappedMemoryRanges(dev, 1, &range); - } - } - else - { - RDCDEBUG("Persistent map flush not needed for %s", - ToStr(record->GetResourceID()).c_str()); - } - - // restore this just in case - state.cpuReadPtr = state.mappedPtr; - } - } - } + // because we pass the base struct this will calculate the patch size including it + for(uint32_t i = 0; i < submitCount; i++) + tempmemSize += GetNextPatchSize(&pSubmits[i]); byte *memory = GetTempMemory(tempmemSize); @@ -1249,38 +1319,7 @@ VkResult WrappedVulkan::vkQueueSubmit(VkQueue queue, uint32_t submitCount, memory += sizeof(VkSubmitInfo) * submitCount; for(uint32_t i = 0; i < submitCount; i++) - { - RDCASSERT(pSubmits[i].sType == VK_STRUCTURE_TYPE_SUBMIT_INFO); - unwrappedSubmits[i] = pSubmits[i]; - - VkSemaphore *unwrappedWaitSems = (VkSemaphore *)memory; - memory += sizeof(VkSemaphore) * unwrappedSubmits[i].waitSemaphoreCount; - - unwrappedSubmits[i].pWaitSemaphores = - unwrappedSubmits[i].waitSemaphoreCount ? unwrappedWaitSems : NULL; - for(uint32_t o = 0; o < unwrappedSubmits[i].waitSemaphoreCount; o++) - unwrappedWaitSems[o] = Unwrap(pSubmits[i].pWaitSemaphores[o]); - - VkCommandBuffer *unwrappedCommandBuffers = (VkCommandBuffer *)memory; - memory += sizeof(VkCommandBuffer) * unwrappedSubmits[i].commandBufferCount; - - unwrappedSubmits[i].pCommandBuffers = - unwrappedSubmits[i].commandBufferCount ? unwrappedCommandBuffers : NULL; - for(uint32_t o = 0; o < unwrappedSubmits[i].commandBufferCount; o++) - unwrappedCommandBuffers[o] = Unwrap(pSubmits[i].pCommandBuffers[o]); - unwrappedCommandBuffers += unwrappedSubmits[i].commandBufferCount; - - VkSemaphore *unwrappedSignalSems = (VkSemaphore *)memory; - memory += sizeof(VkSemaphore) * unwrappedSubmits[i].signalSemaphoreCount; - - unwrappedSubmits[i].pSignalSemaphores = - unwrappedSubmits[i].signalSemaphoreCount ? unwrappedSignalSems : NULL; - for(uint32_t o = 0; o < unwrappedSubmits[i].signalSemaphoreCount; o++) - unwrappedSignalSems[o] = Unwrap(pSubmits[i].pSignalSemaphores[o]); - - UnwrapNextChain(m_State, "VkSubmitInfo", memory, (VkBaseInStructure *)&unwrappedSubmits[i]); - appendChain((VkBaseInStructure *)&unwrappedSubmits[i], m_SubmitChain); - } + unwrappedSubmits[i] = *UnwrapStructAndChain(m_State, memory, &pSubmits[i]); SERIALISE_TIME_CALL(ret = ObjDisp(queue)->QueueSubmit(Unwrap(queue), submitCount, unwrappedSubmits, Unwrap(fence))); @@ -1324,6 +1363,167 @@ VkResult WrappedVulkan::vkQueueSubmit(VkQueue queue, uint32_t submitCount, return ret; } +template +bool WrappedVulkan::Serialise_vkQueueSubmit2KHR(SerialiserType &ser, VkQueue queue, + uint32_t submitCount, + const VkSubmitInfo2KHR *pSubmits, VkFence fence) +{ + SERIALISE_ELEMENT(queue); + SERIALISE_ELEMENT(submitCount); + SERIALISE_ELEMENT_ARRAY(pSubmits, submitCount); + SERIALISE_ELEMENT(fence); + + Serialise_DebugMessages(ser); + + SERIALISE_CHECK_READ_ERRORS(); + + if(IsReplayingAndReading()) + { + // if there are multiple queue submissions in flight, wait for the previous queue to finish + // before executing this, as we don't have the sync information to properly sync. + if(m_PrevQueue != queue) + { + RDCDEBUG("Previous queue execution was on queue %s, now executing %s, syncing GPU", + ToStr(GetResID(m_PrevQueue)).c_str(), ToStr(GetResID(queue)).c_str()); + if(m_PrevQueue != VK_NULL_HANDLE) + ObjDisp(m_PrevQueue)->QueueWaitIdle(Unwrap(m_PrevQueue)); + + m_PrevQueue = queue; + } + + // if we ever waited on any semaphores, wait for idle here. + bool doWait = false; + for(uint32_t i = 0; i < submitCount; i++) + if(pSubmits[i].waitSemaphoreInfoCount > 0) + doWait = true; + + if(doWait) + ObjDisp(queue)->QueueWaitIdle(Unwrap(queue)); + + // add a drawcall use for this submission, to tally up with any debug messages that come from it + if(IsLoading(m_State)) + { + DrawcallUse use(m_CurChunkOffset, m_RootEventID); + + // insert in sorted location + auto drawit = std::lower_bound(m_DrawcallUses.begin(), m_DrawcallUses.end(), use); + m_DrawcallUses.insert(drawit - m_DrawcallUses.begin(), use); + } + + for(uint32_t sub = 0; sub < submitCount; sub++) + ReplayQueueSubmit(queue, pSubmits[sub]); + } + + return true; +} + +VkResult WrappedVulkan::vkQueueSubmit2KHR(VkQueue queue, uint32_t submitCount, + const VkSubmitInfo2KHR *pSubmits, VkFence fence) +{ + SCOPED_DBG_SINK(); + + if(!m_MarkedActive) + { + m_MarkedActive = true; + RenderDoc::Inst().AddActiveDriver(RDCDriver::Vulkan, false); + } + + if(IsActiveCapturing(m_State)) + { + // 15 is quite a lot of submissions. + const int expectedMaxSubmissions = 15; + + RenderDoc::Inst().SetProgress(CaptureProgress::FrameCapture, FakeProgress(m_SubmitCounter, 15)); + m_SubmitCounter++; + } + + VkResult ret = VK_SUCCESS; + bool present = false; + bool beginCapture = false; + bool endCapture = false; + rdcarray commandBuffers; + + for(uint32_t s = 0; s < submitCount; s++) + { + for(uint32_t i = 0; i < pSubmits[s].commandBufferInfoCount; i++) + { + VkResourceRecord *record = GetRecord(pSubmits[s].pCommandBufferInfos[i].commandBuffer); + present |= record->bakedCommands->cmdInfo->present; + beginCapture |= record->bakedCommands->cmdInfo->beginCapture; + endCapture |= record->bakedCommands->cmdInfo->endCapture; + + commandBuffers.push_back(pSubmits[s].pCommandBufferInfos[i].commandBuffer); + } + } + + if(beginCapture) + { + RenderDoc::Inst().StartFrameCapture(LayerDisp(m_Instance), NULL); + } + + { + SCOPED_READLOCK(m_CapTransitionLock); + + bool capframe = IsActiveCapturing(m_State); + + CaptureQueueSubmit(queue, commandBuffers, fence); + + size_t tempmemSize = sizeof(VkSubmitInfo2KHR) * submitCount; + + // because we pass the base struct this will calculate the patch size including it + for(uint32_t i = 0; i < submitCount; i++) + tempmemSize += GetNextPatchSize(&pSubmits[i]); + + byte *memory = GetTempMemory(tempmemSize); + + VkSubmitInfo2KHR *unwrappedSubmits = (VkSubmitInfo2KHR *)memory; + memory += sizeof(VkSubmitInfo2KHR) * submitCount; + + for(uint32_t i = 0; i < submitCount; i++) + unwrappedSubmits[i] = *UnwrapStructAndChain(m_State, memory, &pSubmits[i]); + + SERIALISE_TIME_CALL(ret = ObjDisp(queue)->QueueSubmit2KHR(Unwrap(queue), submitCount, + unwrappedSubmits, Unwrap(fence))); + + if(capframe) + { + { + CACHE_THREAD_SERIALISER(); + + ser.SetDrawChunk(); + SCOPED_SERIALISE_CHUNK(VulkanChunk::vkQueueSubmit2KHR); + Serialise_vkQueueSubmit2KHR(ser, queue, submitCount, pSubmits, fence); + + m_FrameCaptureRecord->AddChunk(scope.Get()); + } + + for(uint32_t s = 0; s < submitCount; s++) + { + for(uint32_t sem = 0; sem < pSubmits[s].waitSemaphoreInfoCount; sem++) + GetResourceManager()->MarkResourceFrameReferenced( + GetResID(pSubmits[s].pWaitSemaphoreInfos[sem].semaphore), eFrameRef_Read); + + for(uint32_t sem = 0; sem < pSubmits[s].signalSemaphoreInfoCount; sem++) + GetResourceManager()->MarkResourceFrameReferenced( + GetResID(pSubmits[s].pSignalSemaphoreInfos[sem].semaphore), eFrameRef_Read); + } + } + } + + if(endCapture) + { + RenderDoc::Inst().EndFrameCapture(LayerDisp(m_Instance), NULL); + } + + if(present) + { + AdvanceFrame(); + Present(LayerDisp(m_Instance), NULL); + } + + return ret; +} + template bool WrappedVulkan::Serialise_vkQueueBindSparse(SerialiserType &ser, VkQueue queue, uint32_t bindInfoCount, @@ -1939,3 +2139,6 @@ INSTANTIATE_FUNCTION_SERIALISED(void, vkQueueInsertDebugUtilsLabelEXT, VkQueue q INSTANTIATE_FUNCTION_SERIALISED(void, vkGetDeviceQueue2, VkDevice device, const VkDeviceQueueInfo2 *pQueueInfo, VkQueue *pQueue); + +INSTANTIATE_FUNCTION_SERIALISED(VkResult, vkQueueSubmit2KHR, VkQueue queue, uint32_t submitCount, + const VkSubmitInfo2KHR *pSubmits, VkFence fence); diff --git a/renderdoc/driver/vulkan/wrappers/vk_sync_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_sync_funcs.cpp index dbae5ef7e..4324d1775 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_sync_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_sync_funcs.cpp @@ -1139,6 +1139,340 @@ VkResult WrappedVulkan::vkSignalSemaphore(VkDevice device, const VkSemaphoreSign return ret; } +template +bool WrappedVulkan::Serialise_vkCmdSetEvent2KHR(SerialiserType &ser, VkCommandBuffer commandBuffer, + VkEvent event, + const VkDependencyInfoKHR *pDependencyInfo) +{ + SERIALISE_ELEMENT(commandBuffer); + SERIALISE_ELEMENT(event); + SERIALISE_ELEMENT_LOCAL(DependencyInfo, *pDependencyInfo); + + Serialise_DebugMessages(ser); + + SERIALISE_CHECK_READ_ERRORS(); + + if(IsReplayingAndReading()) + { + m_LastCmdBufferID = GetResourceManager()->GetOriginalID(GetResID(commandBuffer)); + + // see top of this file for current event/fence handling + + if(IsActiveReplaying(m_State)) + { + if(InRerecordRange(m_LastCmdBufferID)) + commandBuffer = RerecordCmdBuf(m_LastCmdBufferID); + else + commandBuffer = VK_NULL_HANDLE; + } + + if(commandBuffer != VK_NULL_HANDLE) + ObjDisp(commandBuffer)->CmdSetEvent2KHR(Unwrap(commandBuffer), Unwrap(event), &DependencyInfo); + } + + return true; +} + +void WrappedVulkan::vkCmdSetEvent2KHR(VkCommandBuffer commandBuffer, VkEvent event, + const VkDependencyInfoKHR *pDependencyInfo) +{ + SCOPED_DBG_SINK(); + + VkDependencyInfoKHR unwrappedInfo = *pDependencyInfo; + + byte *tempMem = GetTempMemory(GetNextPatchSize(&unwrappedInfo)); + + { + VkBaseInStructure dummy = {}; + dummy.pNext = (const VkBaseInStructure *)&unwrappedInfo; + UnwrapNextChain(m_State, "VkDependencyInfoKHR", tempMem, &dummy); + } + + SERIALISE_TIME_CALL( + ObjDisp(commandBuffer)->CmdSetEvent2KHR(Unwrap(commandBuffer), Unwrap(event), &unwrappedInfo)); + + if(IsCaptureMode(m_State)) + { + VkResourceRecord *record = GetRecord(commandBuffer); + + CACHE_THREAD_SERIALISER(); + + SCOPED_SERIALISE_CHUNK(VulkanChunk::vkCmdSetEvent2KHR); + Serialise_vkCmdSetEvent2KHR(ser, commandBuffer, event, pDependencyInfo); + + record->AddChunk(scope.Get(&record->cmdInfo->alloc)); + record->MarkResourceFrameReferenced(GetResID(event), eFrameRef_Read); + } +} + +template +bool WrappedVulkan::Serialise_vkCmdResetEvent2KHR(SerialiserType &ser, VkCommandBuffer commandBuffer, + VkEvent event, VkPipelineStageFlags2KHR stageMask) +{ + SERIALISE_ELEMENT(commandBuffer); + SERIALISE_ELEMENT(event); + SERIALISE_ELEMENT_TYPED(VkPipelineStageFlagBits2KHR, stageMask) + .TypedAs("VkPipelineStageFlags2KHR"_lit); + + Serialise_DebugMessages(ser); + + SERIALISE_CHECK_READ_ERRORS(); + + if(IsReplayingAndReading()) + { + m_LastCmdBufferID = GetResourceManager()->GetOriginalID(GetResID(commandBuffer)); + + // see top of this file for current event/fence handling + + if(IsActiveReplaying(m_State)) + { + if(InRerecordRange(m_LastCmdBufferID)) + commandBuffer = RerecordCmdBuf(m_LastCmdBufferID); + else + commandBuffer = VK_NULL_HANDLE; + } + + if(commandBuffer != VK_NULL_HANDLE) + { + // ObjDisp(commandBuffer)->CmdResetEvent2KHR(Unwrap(commandBuffer), Unwrap(event), stageMask); + } + } + + return true; +} + +void WrappedVulkan::vkCmdResetEvent2KHR(VkCommandBuffer commandBuffer, VkEvent event, + VkPipelineStageFlags2KHR stageMask) +{ + SCOPED_DBG_SINK(); + + SERIALISE_TIME_CALL( + ObjDisp(commandBuffer)->CmdResetEvent2KHR(Unwrap(commandBuffer), Unwrap(event), stageMask)); + + if(IsCaptureMode(m_State)) + { + VkResourceRecord *record = GetRecord(commandBuffer); + + CACHE_THREAD_SERIALISER(); + + SCOPED_SERIALISE_CHUNK(VulkanChunk::vkCmdResetEvent2KHR); + Serialise_vkCmdResetEvent2KHR(ser, commandBuffer, event, stageMask); + + record->AddChunk(scope.Get(&record->cmdInfo->alloc)); + record->MarkResourceFrameReferenced(GetResID(event), eFrameRef_Read); + } +} + +template +bool WrappedVulkan::Serialise_vkCmdWaitEvents2KHR(SerialiserType &ser, VkCommandBuffer commandBuffer, + uint32_t eventCount, const VkEvent *pEvents, + const VkDependencyInfoKHR *pDependencyInfos) +{ + SERIALISE_ELEMENT(commandBuffer); + + // we serialise the original events even though we are going to replace them with our own + SERIALISE_ELEMENT(eventCount); + SERIALISE_ELEMENT_ARRAY(pEvents, eventCount); + SERIALISE_ELEMENT_ARRAY(pDependencyInfos, eventCount); + + SERIALISE_CHECK_READ_ERRORS(); + + // it's possible for buffer or image to be NULL if it refers to a resource that is otherwise + // not in the log (barriers do not mark resources referenced). If the resource in question does + // not exist, then it's safe to skip this barrier. + // + // Since it's a convenient place, we unwrap at the same time. + if(IsReplayingAndReading()) + { + m_LastCmdBufferID = GetResourceManager()->GetOriginalID(GetResID(commandBuffer)); + + rdcarray imgBarriers; + rdcarray bufBarriers; + + for(uint32_t evIdx = 0; evIdx < eventCount; evIdx++) + { + imgBarriers.clear(); + bufBarriers.clear(); + + const VkDependencyInfoKHR &depInfo = pDependencyInfos[evIdx]; + + for(uint32_t i = 0; i < depInfo.bufferMemoryBarrierCount; i++) + { + if(depInfo.pBufferMemoryBarriers[i].buffer != VK_NULL_HANDLE) + { + bufBarriers.push_back(depInfo.pBufferMemoryBarriers[i]); + bufBarriers.back().buffer = Unwrap(bufBarriers.back().buffer); + + RemapQueueFamilyIndices(bufBarriers.back().srcQueueFamilyIndex, + bufBarriers.back().dstQueueFamilyIndex); + } + } + + for(uint32_t i = 0; i < depInfo.imageMemoryBarrierCount; i++) + { + if(depInfo.pImageMemoryBarriers[i].image != VK_NULL_HANDLE) + { + imgBarriers.push_back(depInfo.pImageMemoryBarriers[i]); + imgBarriers.back().image = Unwrap(imgBarriers.back().image); + + RemapQueueFamilyIndices(imgBarriers.back().srcQueueFamilyIndex, + imgBarriers.back().dstQueueFamilyIndex); + } + } + + // see top of this file for current event/fence handling + + VkEventCreateInfo evInfo = { + VK_STRUCTURE_TYPE_EVENT_CREATE_INFO, NULL, 0, + }; + + VkEvent ev = VK_NULL_HANDLE; + ObjDisp(commandBuffer)->CreateEvent(Unwrap(GetDev()), &evInfo, NULL, &ev); + // don't wrap this event + + ObjDisp(commandBuffer)->ResetEvent(Unwrap(GetDev()), ev); + + VkDependencyInfoKHR UnwrappedDependencyInfo = depInfo; + + UnwrappedDependencyInfo.pBufferMemoryBarriers = bufBarriers.data(); + UnwrappedDependencyInfo.bufferMemoryBarrierCount = (uint32_t)bufBarriers.size(); + UnwrappedDependencyInfo.pImageMemoryBarriers = imgBarriers.data(); + UnwrappedDependencyInfo.imageMemoryBarrierCount = (uint32_t)imgBarriers.size(); + + if(IsActiveReplaying(m_State)) + { + if(InRerecordRange(m_LastCmdBufferID)) + commandBuffer = RerecordCmdBuf(m_LastCmdBufferID); + else + commandBuffer = VK_NULL_HANDLE; + + // register to clean this event up once we're done replaying this section of the log + m_CleanupEvents.push_back(ev); + } + else + { + // since we cache and replay this command buffer we can't clean up this event just when + // we're done replaying this section. We have to keep this event until shutdown + m_PersistentEvents.push_back(ev); + + for(uint32_t i = 0; i < depInfo.imageMemoryBarrierCount; i++) + { + const VkImageMemoryBarrier2KHR &b = depInfo.pImageMemoryBarriers[i]; + if(b.image != VK_NULL_HANDLE && b.oldLayout == VK_IMAGE_LAYOUT_UNDEFINED && + b.newLayout != VK_IMAGE_LAYOUT_UNDEFINED) + { + m_BakedCmdBufferInfo[m_LastCmdBufferID].resourceUsage.push_back(make_rdcpair( + GetResID(b.image), EventUsage(m_BakedCmdBufferInfo[m_LastCmdBufferID].curEventID, + ResourceUsage::Discard))); + } + } + } + + GetResourceManager()->RecordBarriers(m_BakedCmdBufferInfo[m_LastCmdBufferID].imageStates, + m_commandQueueFamilies[m_LastCmdBufferID], + (uint32_t)imgBarriers.size(), &imgBarriers[0]); + + if(commandBuffer != VK_NULL_HANDLE) + { + // now sanitise layouts before passing to vulkan + for(VkImageMemoryBarrier2KHR &barrier : imgBarriers) + { + if(barrier.oldLayout == barrier.newLayout) + { + barrier.oldLayout = barrier.newLayout = VK_IMAGE_LAYOUT_UNDEFINED; + continue; + } + + if(!IsLoading(m_State) && barrier.oldLayout == VK_IMAGE_LAYOUT_PREINITIALIZED) + { + // This is a transition from PRENITIALIZED, but we've already done this barrier once + // (when loading); Since we couldn't transition back to PREINITIALIZED, we instead left + // the image in GENERAL. + barrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL; + } + else + { + SanitiseReplayImageLayout(barrier.oldLayout); + } + SanitiseReplayImageLayout(barrier.newLayout); + } + + ObjDisp(commandBuffer)->CmdSetEvent2KHR(Unwrap(commandBuffer), ev, &UnwrappedDependencyInfo); + ObjDisp(commandBuffer)->CmdWaitEvents2KHR(Unwrap(commandBuffer), 1, &ev, &UnwrappedDependencyInfo); + + if(m_ReplayOptions.optimisation != ReplayOptimisationLevel::Fastest) + { + for(uint32_t i = 0; i < depInfo.imageMemoryBarrierCount; i++) + { + const VkImageMemoryBarrier2KHR &b = depInfo.pImageMemoryBarriers[i]; + if(b.image != VK_NULL_HANDLE && b.oldLayout == VK_IMAGE_LAYOUT_UNDEFINED && + b.newLayout != VK_IMAGE_LAYOUT_UNDEFINED) + { + GetDebugManager()->FillWithDiscardPattern( + commandBuffer, DiscardType::UndefinedTransition, b.image, b.newLayout, + b.subresourceRange, {{0, 0}, {~0U, ~0U}}); + } + } + } + } + } + } + + return true; +} + +void WrappedVulkan::vkCmdWaitEvents2KHR(VkCommandBuffer commandBuffer, uint32_t eventCount, + const VkEvent *pEvents, + const VkDependencyInfoKHR *pDependencyInfos) +{ + { + size_t memSize = sizeof(VkEvent) * eventCount + sizeof(VkDependencyInfoKHR) * eventCount; + + // because we pass in the base struct, this includes the size for the VkDependencyInfoKHR itself + for(uint32_t i = 0; i < eventCount; i++) + memSize += GetNextPatchSize((const void *)&pDependencyInfos[i]); + + byte *tempMem = GetTempMemory(memSize); + + VkEvent *ev = (VkEvent *)tempMem; + VkDependencyInfoKHR *depInfo = (VkDependencyInfoKHR *)(ev + eventCount); + tempMem = (byte *)(depInfo + eventCount); + + for(uint32_t i = 0; i < eventCount; i++) + { + ev[i] = Unwrap(pEvents[i]); + depInfo[i] = *UnwrapStructAndChain(m_State, tempMem, &pDependencyInfos[i]); + } + + SERIALISE_TIME_CALL( + ObjDisp(commandBuffer)->CmdWaitEvents2KHR(Unwrap(commandBuffer), eventCount, ev, depInfo)); + } + + if(IsCaptureMode(m_State)) + { + VkResourceRecord *record = GetRecord(commandBuffer); + + CACHE_THREAD_SERIALISER(); + + SCOPED_SERIALISE_CHUNK(VulkanChunk::vkCmdWaitEvents2KHR); + Serialise_vkCmdWaitEvents2KHR(ser, commandBuffer, eventCount, pEvents, pDependencyInfos); + + for(uint32_t i = 0; i < eventCount; i++) + { + if(pDependencyInfos[i].imageMemoryBarrierCount > 0) + { + GetResourceManager()->RecordBarriers( + record->cmdInfo->imageStates, record->pool->cmdPoolInfo->queueFamilyIndex, + pDependencyInfos[i].imageMemoryBarrierCount, pDependencyInfos[i].pImageMemoryBarriers); + } + } + + record->AddChunk(scope.Get(&record->cmdInfo->alloc)); + for(uint32_t i = 0; i < eventCount; i++) + record->MarkResourceFrameReferenced(GetResID(pEvents[i]), eFrameRef_Read); + } +} + #if defined(VK_USE_PLATFORM_WIN32_KHR) VkResult WrappedVulkan::vkImportSemaphoreWin32HandleKHR( @@ -1226,3 +1560,13 @@ INSTANTIATE_FUNCTION_SERIALISED(void, vkWaitSemaphores, VkDevice device, INSTANTIATE_FUNCTION_SERIALISED(void, vkSignalSemaphore, VkDevice device, const VkSemaphoreSignalInfo *pSignalInfo); + +INSTANTIATE_FUNCTION_SERIALISED(void, vkCmdSetEvent2KHR, VkCommandBuffer commandBuffer, + VkEvent event, const VkDependencyInfoKHR *pDependencyInfo); + +INSTANTIATE_FUNCTION_SERIALISED(void, vkCmdResetEvent2KHR, VkCommandBuffer commandBuffer, + VkEvent event, VkPipelineStageFlags2KHR stageMask); + +INSTANTIATE_FUNCTION_SERIALISED(void, vkCmdWaitEvents2KHR, VkCommandBuffer commandBuffer, + uint32_t eventCount, const VkEvent *pEvents, + const VkDependencyInfoKHR *pDependencyInfos); diff --git a/util/test/demos/CMakeLists.txt b/util/test/demos/CMakeLists.txt index e13d71ef1..a0aa7b333 100644 --- a/util/test/demos/CMakeLists.txt +++ b/util/test/demos/CMakeLists.txt @@ -49,6 +49,7 @@ set(VULKAN_SRC vk/vk_spec_constants.cpp vk/vk_spirv_13_shaders.cpp vk/vk_structured_buffer_nested.cpp + vk/vk_sync2.cpp vk/vk_texture_zoo.cpp vk/vk_triangle_fan.cpp vk/vk_validation_use.cpp diff --git a/util/test/demos/demos.vcxproj b/util/test/demos/demos.vcxproj index f66f1d3d8..6ddaa2a09 100644 --- a/util/test/demos/demos.vcxproj +++ b/util/test/demos/demos.vcxproj @@ -301,6 +301,7 @@ + diff --git a/util/test/demos/demos.vcxproj.filters b/util/test/demos/demos.vcxproj.filters index 4924dccf9..6a83f9b77 100644 --- a/util/test/demos/demos.vcxproj.filters +++ b/util/test/demos/demos.vcxproj.filters @@ -589,6 +589,9 @@ Vulkan\demos + + Vulkan\demos + diff --git a/util/test/demos/vk/vk_helpers.h b/util/test/demos/vk/vk_helpers.h index 6bb710e41..bf34b4af8 100644 --- a/util/test/demos/vk/vk_helpers.h +++ b/util/test/demos/vk/vk_helpers.h @@ -370,11 +370,11 @@ struct FenceCreateInfo : public VkFenceCreateInfo struct EventCreateInfo : public VkEventCreateInfo { - EventCreateInfo() : VkEventCreateInfo() + EventCreateInfo(VkEventCreateFlags flags = 0) : VkEventCreateInfo() { sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO; pNext = NULL; - flags = 0; + this->flags = flags; } operator const VkEventCreateInfo *() const { return this; } diff --git a/util/test/demos/vk/vk_sync2.cpp b/util/test/demos/vk/vk_sync2.cpp new file mode 100644 index 000000000..d79a99f49 --- /dev/null +++ b/util/test/demos/vk/vk_sync2.cpp @@ -0,0 +1,310 @@ +/****************************************************************************** + * The MIT License (MIT) + * + * Copyright (c) 2019-2021 Baldur Karlsson + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + ******************************************************************************/ + +#include "vk_test.h" + +RD_TEST(VK_Synchronization_2, VulkanGraphicsTest) +{ + static constexpr const char *Description = "Tests use of KHR_VK_Synchronization2."; + + void Prepare(int argc, char **argv) + { + instExts.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); + devExts.push_back(VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME); + devExts.push_back(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME); + + VulkanGraphicsTest::Prepare(argc, argv); + + if(!Avail.empty()) + return; + + static VkPhysicalDeviceSynchronization2FeaturesKHR sync2Features = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES_KHR, + }; + + getPhysFeatures2(&sync2Features); + + if(!sync2Features.synchronization2) + Avail = "'synchronization2' not available"; + + devInfoNext = &sync2Features; + } + + int main() + { + // initialise, create window, create context, etc + if(!Init()) + return 3; + + VkPipelineLayout layout = createPipelineLayout(vkh::PipelineLayoutCreateInfo()); + + vkh::GraphicsPipelineCreateInfo pipeCreateInfo; + + pipeCreateInfo.layout = layout; + pipeCreateInfo.renderPass = mainWindow->rp; + + pipeCreateInfo.vertexInputState.vertexBindingDescriptions = {vkh::vertexBind(0, DefaultA2V)}; + pipeCreateInfo.vertexInputState.vertexAttributeDescriptions = { + vkh::vertexAttr(0, 0, DefaultA2V, pos), vkh::vertexAttr(1, 0, DefaultA2V, col), + vkh::vertexAttr(2, 0, DefaultA2V, uv), + }; + + pipeCreateInfo.stages = { + CompileShaderModule(VKDefaultVertex, ShaderLang::glsl, ShaderStage::vert, "main"), + CompileShaderModule(VKDefaultPixel, ShaderLang::glsl, ShaderStage::frag, "main"), + }; + + VkPipeline pipe = createGraphicsPipeline(pipeCreateInfo); + + AllocatedBuffer vb( + this, vkh::BufferCreateInfo(sizeof(DefaultTri), VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | + VK_BUFFER_USAGE_TRANSFER_DST_BIT), + VmaAllocationCreateInfo({0, VMA_MEMORY_USAGE_CPU_TO_GPU})); + + vb.upload(DefaultTri); + + vkh::ImageCreateInfo preinitInfo(4, 4, 0, VK_FORMAT_R8G8B8A8_UNORM, + VK_IMAGE_USAGE_TRANSFER_SRC_BIT); + preinitInfo.tiling = VK_IMAGE_TILING_LINEAR; + preinitInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED; + + const VkPhysicalDeviceMemoryProperties *props = NULL; + vmaGetMemoryProperties(allocator, &props); + + VkImage unboundImg = VK_NULL_HANDLE; + vkCreateImage(device, preinitInfo, NULL, &unboundImg); + setName(unboundImg, "Unbound image"); + + VkEvent ev = VK_NULL_HANDLE; + CHECK_VKR(vkCreateEvent(device, vkh::EventCreateInfo(VK_EVENT_CREATE_DEVICE_ONLY_BIT_KHR), NULL, + &ev)); + + VkQueryPoolCreateInfo poolInfo = {VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO}; + poolInfo.queryType = VK_QUERY_TYPE_TIMESTAMP; + poolInfo.queryCount = 4; + + VkQueryPool pool; + vkCreateQueryPool(device, &poolInfo, NULL, &pool); + + int queryIdx = 0; + + while(Running()) + { + VkImage preinitImg = VK_NULL_HANDLE; + VkDeviceMemory preinitMem = VK_NULL_HANDLE; + + vkCreateImage(device, preinitInfo, NULL, &preinitImg); + + setName(preinitImg, "Image:Preinitialised"); + + AllocatedImage undefImg( + this, vkh::ImageCreateInfo(4, 4, 0, VK_FORMAT_R8G8B8A8_UNORM, + VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT), + VmaAllocationCreateInfo({0, VMA_MEMORY_USAGE_GPU_ONLY})); + + setName(undefImg.image, "Image:Undefined"); + + { + VkMemoryRequirements mrq; + vkGetImageMemoryRequirements(device, preinitImg, &mrq); + + VkMemoryAllocateInfo info = {}; + info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + info.allocationSize = mrq.size; + info.memoryTypeIndex = 100; + + for(uint32_t i = 0; i < props->memoryTypeCount; i++) + { + if(mrq.memoryTypeBits & (1 << i) && + (props->memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) + { + info.memoryTypeIndex = i; + break; + } + } + + TEST_ASSERT(info.memoryTypeIndex != 100, "Couldn't find compatible memory type"); + + vkAllocateMemory(device, &info, NULL, &preinitMem); + vkBindImageMemory(device, preinitImg, preinitMem, 0); + + void *data = NULL; + vkMapMemory(device, preinitMem, 0, mrq.size, 0, &data); + memset(data, 0x40, (size_t)mrq.size); + vkUnmapMemory(device, preinitMem); + } + + VkCommandBuffer cmd = GetCommandBuffer(); + + vkBeginCommandBuffer(cmd, vkh::CommandBufferBeginInfo()); + + vkCmdResetQueryPool(cmd, pool, queryIdx % 4, 1); + + vkCmdWriteTimestamp2KHR(cmd, VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT_KHR, pool, queryIdx % 4); + + queryIdx++; + + VkImage swapimg = mainWindow->GetImage(); + if((size_t)curFrame <= mainWindow->GetCount()) + setName(swapimg, "Image:Swapchain"); + + setMarker(cmd, "Before Transition"); + + // after the first N frames, we expect the swapchain to be in PRESENT_SRC + vkh::cmdPipelineBarrier(cmd, + { + vkh::ImageMemoryBarrier(0, VK_ACCESS_TRANSFER_WRITE_BIT, + (size_t)curFrame <= mainWindow->GetCount() + ? VK_IMAGE_LAYOUT_UNDEFINED + : VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, + VK_IMAGE_LAYOUT_GENERAL, swapimg), + }); + + VkDependencyInfoKHR dependency = {VK_STRUCTURE_TYPE_DEPENDENCY_INFO_KHR}; + + VkBufferMemoryBarrier2KHR bufBarrier = {VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2_KHR}; + bufBarrier.buffer = vb.buffer; + bufBarrier.srcAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR; + bufBarrier.dstAccessMask = + VK_ACCESS_2_TRANSFER_READ_BIT_KHR | VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT_KHR; + bufBarrier.srcStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR; + bufBarrier.dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR | + VK_PIPELINE_STAGE_2_INDEX_INPUT_BIT_KHR | + VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT_KHR; + bufBarrier.size = VK_WHOLE_SIZE; + + dependency.bufferMemoryBarrierCount = 1; + dependency.pBufferMemoryBarriers = &bufBarrier; + + VkImageMemoryBarrier2KHR imgBarrier[2] = {}; + imgBarrier[0].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2_KHR; + imgBarrier[0].subresourceRange = vkh::ImageSubresourceRange(); + imgBarrier[1].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2_KHR; + imgBarrier[1].subresourceRange = vkh::ImageSubresourceRange(); + + imgBarrier[0].srcAccessMask = VK_ACCESS_2_NONE_KHR; + imgBarrier[0].srcStageMask = VK_PIPELINE_STAGE_2_NONE_KHR; + imgBarrier[0].dstAccessMask = + VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR | VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT_KHR; + imgBarrier[0].dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR | + VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT_KHR; + imgBarrier[0].oldLayout = (size_t)curFrame <= mainWindow->GetCount() + ? VK_IMAGE_LAYOUT_UNDEFINED + : VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + imgBarrier[0].newLayout = VK_IMAGE_LAYOUT_GENERAL; + imgBarrier[0].image = swapimg; + + dependency.imageMemoryBarrierCount = 1; + dependency.pImageMemoryBarriers = imgBarrier; + + vkCmdPipelineBarrier2KHR(cmd, &dependency); + + // the manual images are transitioned into general for copying, from pre-initialised and + // undefined + vkh::cmdPipelineBarrier( + cmd, { + vkh::ImageMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, + VK_IMAGE_LAYOUT_PREINITIALIZED, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, preinitImg), + vkh::ImageMemoryBarrier(0, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, undefImg.image), + }); + + // do two barriers that don't do anything useful but define no layout transition and don't + // discard + imgBarrier[0].srcAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR; + imgBarrier[0].srcStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR; + imgBarrier[0].dstAccessMask = VK_ACCESS_2_SHADER_READ_BIT_KHR; + imgBarrier[0].dstStageMask = VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT_KHR; + imgBarrier[0].oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + imgBarrier[0].newLayout = VK_IMAGE_LAYOUT_UNDEFINED; + imgBarrier[0].image = swapimg; + + imgBarrier[1].srcAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT_KHR; + imgBarrier[1].srcStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT_KHR; + imgBarrier[1].dstAccessMask = VK_ACCESS_2_SHADER_READ_BIT_KHR; + imgBarrier[1].dstStageMask = VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT_KHR; + imgBarrier[1].oldLayout = VK_IMAGE_LAYOUT_PREINITIALIZED; + imgBarrier[1].newLayout = VK_IMAGE_LAYOUT_PREINITIALIZED; + imgBarrier[1].image = preinitImg; + + dependency.bufferMemoryBarrierCount = 0; + + vkCmdResetEvent2KHR(cmd, ev, VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT_KHR); + + vkCmdSetEvent2KHR(cmd, ev, &dependency); + + vkCmdClearColorImage(cmd, swapimg, VK_IMAGE_LAYOUT_GENERAL, + vkh::ClearColorValue(0.2f, 0.2f, 0.2f, 1.0f), 1, + vkh::ImageSubresourceRange()); + + VkImageCopy region = { + {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}, + {0, 0, 0}, + {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}, + {0, 0, 0}, + {4, 4, 1}, + }; + + vkCmdCopyImage(cmd, preinitImg, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, undefImg.image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion); + + vkCmdWaitEvents2KHR(cmd, 1, &ev, &dependency); + + vkCmdBeginRenderPass( + cmd, vkh::RenderPassBeginInfo(mainWindow->rp, mainWindow->GetFB(), mainWindow->scissor), + VK_SUBPASS_CONTENTS_INLINE); + + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipe); + vkCmdSetViewport(cmd, 0, 1, &mainWindow->viewport); + vkCmdSetScissor(cmd, 0, 1, &mainWindow->scissor); + vkh::cmdBindVertexBuffers(cmd, 0, {vb.buffer}, {0}); + vkCmdDraw(cmd, 3, 1, 0, 0); + + vkCmdEndRenderPass(cmd); + + FinishUsingBackbuffer(cmd, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL); + + vkEndCommandBuffer(cmd); + + Submit(0, 1, {cmd}, {}, NULL, VK_NULL_HANDLE, true); + + Present(); + + vkDeviceWaitIdle(device); + + vkDestroyImage(device, preinitImg, NULL); + vkFreeMemory(device, preinitMem, NULL); + + undefImg.free(); + } + + vkDestroyQueryPool(device, pool, NULL); + vkDestroyEvent(device, ev, NULL); + + return 0; + } +}; + +REGISTER_TEST(); diff --git a/util/test/demos/vk/vk_test.cpp b/util/test/demos/vk/vk_test.cpp index 8fb4b68b7..3265ed23a 100644 --- a/util/test/demos/vk/vk_test.cpp +++ b/util/test/demos/vk/vk_test.cpp @@ -767,7 +767,7 @@ void VulkanGraphicsTest::FinishUsingBackbuffer(VkCommandBuffer cmd, VkAccessFlag void VulkanGraphicsTest::Submit(int index, int totalSubmits, const std::vector &cmds, const std::vector &seccmds, VulkanWindow *window, - VkQueue q) + VkQueue q, bool sync2) { if(window == NULL) window = mainWindow; @@ -775,7 +775,7 @@ void VulkanGraphicsTest::Submit(int index, int totalSubmits, const std::vectorSubmit(index, totalSubmits, cmds, seccmds, q); + window->Submit(index, totalSubmits, cmds, seccmds, q, sync2); } void VulkanGraphicsTest::Present(VulkanWindow *window, VkQueue q) @@ -1295,37 +1295,75 @@ void VulkanWindow::Acquire() } void VulkanWindow::Submit(int index, int totalSubmits, const std::vector &cmds, - const std::vector &seccmds, VkQueue q) + const std::vector &seccmds, VkQueue q, bool sync2) { - VkPipelineStageFlags waitStage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - - VkSubmitInfo submit = vkh::SubmitInfo(cmds); - - if(index == 0) - { - submit.waitSemaphoreCount = 1; - submit.pWaitDstStageMask = &waitStage; - submit.pWaitSemaphores = &renderStartSemaphore; - } - - if(index == totalSubmits - 1) - { - submit.signalSemaphoreCount = 1; - submit.pSignalSemaphores = &renderEndSemaphore; - } - VkFence fence; CHECK_VKR(vkCreateFence(m_Test->device, vkh::FenceCreateInfo(), NULL, &fence)); fences.insert(fence); + if(sync2) + { + VkSubmitInfo2KHR submit = {VK_STRUCTURE_TYPE_SUBMIT_INFO_2_KHR}; + + std::vector cmdSubmits; + for(VkCommandBuffer cmd : cmds) + cmdSubmits.push_back({VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO_KHR, NULL, cmd, 0}); + + submit.commandBufferInfoCount = (uint32_t)cmdSubmits.size(); + submit.pCommandBufferInfos = cmdSubmits.data(); + + VkSemaphoreSubmitInfoKHR renderStart = {}, renderEnd = {}; + + if(index == 0) + { + renderStart.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR; + renderStart.semaphore = renderStartSemaphore; + renderStart.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT_KHR; + + submit.waitSemaphoreInfoCount = 1; + submit.pWaitSemaphoreInfos = &renderStart; + } + + if(index == totalSubmits - 1) + { + renderEnd.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO_KHR; + renderEnd.semaphore = renderEndSemaphore; + renderEnd.stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT_KHR; + + submit.signalSemaphoreInfoCount = 1; + submit.pSignalSemaphoreInfos = &renderEnd; + } + + CHECK_VKR(vkQueueSubmit2KHR(q, 1, &submit, fence)); + } + else + { + VkPipelineStageFlags waitStage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + + VkSubmitInfo submit = vkh::SubmitInfo(cmds); + + if(index == 0) + { + submit.waitSemaphoreCount = 1; + submit.pWaitDstStageMask = &waitStage; + submit.pWaitSemaphores = &renderStartSemaphore; + } + + if(index == totalSubmits - 1) + { + submit.signalSemaphoreCount = 1; + submit.pSignalSemaphores = &renderEndSemaphore; + } + + CHECK_VKR(vkQueueSubmit(q, 1, &submit, fence)); + } + for(const VkCommandBuffer &cmd : cmds) pendingCommandBuffers[0].push_back(std::make_pair(cmd, fence)); for(const VkCommandBuffer &cmd : seccmds) pendingCommandBuffers[1].push_back(std::make_pair(cmd, fence)); - - CHECK_VKR(vkQueueSubmit(q, 1, &submit, fence)); } void VulkanWindow::Present(VkQueue queue) diff --git a/util/test/demos/vk/vk_test.h b/util/test/demos/vk/vk_test.h index c9e0b3b7a..aa538fb98 100644 --- a/util/test/demos/vk/vk_test.h +++ b/util/test/demos/vk/vk_test.h @@ -141,7 +141,7 @@ struct VulkanWindow : public GraphicsWindow bool Initialised() { return swap != VK_NULL_HANDLE; } VkCommandBuffer GetCommandBuffer(VkCommandBufferLevel level); void Submit(int index, int totalSubmits, const std::vector &cmds, - const std::vector &seccmds, VkQueue q); + const std::vector &seccmds, VkQueue q, bool sync2); void Present(VkQueue q); void Acquire(); @@ -187,7 +187,7 @@ struct VulkanGraphicsTest : public GraphicsTest VulkanWindow *window = NULL); void Submit(int index, int totalSubmits, const std::vector &cmds, const std::vector &seccmds = {}, VulkanWindow *window = NULL, - VkQueue q = VK_NULL_HANDLE); + VkQueue q = VK_NULL_HANDLE, bool sync2 = false); void Present(VulkanWindow *window = NULL, VkQueue q = VK_NULL_HANDLE); VkPipelineShaderStageCreateInfo CompileShaderModule( diff --git a/util/test/tests/Vulkan/VK_Synchronization_2.py b/util/test/tests/Vulkan/VK_Synchronization_2.py new file mode 100644 index 000000000..94a6a9a34 --- /dev/null +++ b/util/test/tests/Vulkan/VK_Synchronization_2.py @@ -0,0 +1,93 @@ +import rdtest +import renderdoc as rd + + +class VK_Synchronization_2(rdtest.TestCase): + demos_test_name = 'VK_Synchronization_2' + + def get_capture_options(self): + opts = rd.CaptureOptions() + + # Ref all resources to pull in the image with unbound data + opts.refAllResources = True + + return opts + + def check_capture(self): + self.controller.SetFrameEvent(0, False) + + pipe: rd.VKState = self.controller.GetVulkanPipelineState() + + # Check that the layout is reported correctly at the start of the frame + for img in pipe.images: + img: rd.VKImageData + res = self.get_resource(img.resourceId) + if res.name == "Image:Preinitialised": + if img.layouts[0].name != "VK_IMAGE_LAYOUT_PREINITIALIZED": + raise rdtest.TestFailureException("Pre-initialised image is in {} layout".format(img.layouts[0].name)) + elif res.name == "Image:Undefined": + if img.layouts[0].name != "VK_IMAGE_LAYOUT_UNDEFINED": + raise rdtest.TestFailureException("Undefined image is in {} layout".format(img.layouts[0].name)) + elif res.name == "Image:Swapchain": + if img.layouts[0].name != "VK_IMAGE_LAYOUT_PRESENT_SRC_KHR": + raise rdtest.TestFailureException("Swapchain image is in {} layout".format(img.layouts[0].name)) + + draw = self.find_draw("Before Transition") + + self.check(draw is not None) + + self.controller.SetFrameEvent(draw.eventId, False) + + pipe: rd.VKState = self.controller.GetVulkanPipelineState() + + pre_init = rd.ResourceId() + undef_img = rd.ResourceId() + + # Check that the layout is reported correctly before transitions still + for img in pipe.images: + img: rd.VKImageData + res = self.get_resource(img.resourceId) + if res.name == "Image:Preinitialised": + if img.layouts[0].name != "VK_IMAGE_LAYOUT_PREINITIALIZED": + raise rdtest.TestFailureException("Pre-initialised image is in {} layout".format(img.layouts[0].name)) + pre_init = img.resourceId + elif res.name == "Image:Undefined": + if img.layouts[0].name != "VK_IMAGE_LAYOUT_UNDEFINED": + raise rdtest.TestFailureException("Undefined image is in {} layout".format(img.layouts[0].name)) + undef_img = img.resourceId + elif res.name == "Image:Swapchain": + if img.layouts[0].name != "VK_IMAGE_LAYOUT_PRESENT_SRC_KHR": + raise rdtest.TestFailureException("Swapchain image is in {} layout".format(img.layouts[0].name)) + + draw = self.find_draw("vkCmdDraw") + + self.check(draw is not None) + + self.controller.SetFrameEvent(draw.eventId, False) + + # Check that the backbuffer didn't get discarded + self.check_triangle(out=draw.outputs[0]) + + col = [float(0x40) / 255.0] * 4 + + # The pre-initialised image should have the correct data still also + self.check_triangle(out=pre_init, back=col, fore=col) + + # we copied its contents into the undefined image so it should also have the right colour + self.check_triangle(out=undef_img, back=col, fore=col) + + pipe: rd.VKState = self.controller.GetVulkanPipelineState() + + # Check that after transitions, the images are in the right state + for img in pipe.images: + img: rd.VKImageData + res = self.get_resource(img.resourceId) + if res.name == "Image:Preinitialised": + if img.layouts[0].name != "VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL": + raise rdtest.TestFailureException("Pre-initialised image is in {} layout".format(img.layouts[0].name)) + elif res.name == "Image:Undefined": + if img.layouts[0].name != "VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL": + raise rdtest.TestFailureException("Undefined image is in {} layout".format(img.layouts[0].name)) + elif img.resourceId == pipe.currentPass.framebuffer.attachments[0].imageResourceId: + if img.layouts[0].name != "VK_IMAGE_LAYOUT_GENERAL": + raise rdtest.TestFailureException("Rendered swapchain image is in {} layout".format(img.layouts[0].name))