From cfefa5d5ef7127b87e9439e5dc3f626695debb5f Mon Sep 17 00:00:00 2001 From: baldurk Date: Mon, 10 Feb 2025 17:51:36 +0000 Subject: [PATCH] Add a test for new group ops --- util/test/demos/CMakeLists.txt | 1 + util/test/demos/demos.vcxproj | 1 + util/test/demos/demos.vcxproj.filters | 3 + util/test/demos/vk/vk_helpers.h | 26 + util/test/demos/vk/vk_subgroup_zoo.cpp | 451 ++++++++++++++++++ util/test/rdtest/runner.py | 2 +- util/test/rdtest/testcase.py | 4 +- .../D3D12/D3D12_Multi_Wait_Before_Signal.py | 2 +- util/test/tests/D3D12/D3D12_RGP_Capture.py | 4 +- util/test/tests/Vulkan/VK_Subgroup_Zoo.py | 233 +++++++++ 10 files changed, 721 insertions(+), 6 deletions(-) create mode 100644 util/test/demos/vk/vk_subgroup_zoo.cpp create mode 100644 util/test/tests/Vulkan/VK_Subgroup_Zoo.py diff --git a/util/test/demos/CMakeLists.txt b/util/test/demos/CMakeLists.txt index 580496043..b0679e5de 100644 --- a/util/test/demos/CMakeLists.txt +++ b/util/test/demos/CMakeLists.txt @@ -155,6 +155,7 @@ set(VULKAN_SRC vk/vk_simple_triangle.cpp vk/vk_spec_constants.cpp vk/vk_spirv_13_shaders.cpp + vk/vk_subgroup_zoo.cpp vk/vk_structured_buffer_nested.cpp vk/vk_sync2.cpp vk/vk_texture_zoo.cpp diff --git a/util/test/demos/demos.vcxproj b/util/test/demos/demos.vcxproj index 71c4b0ff9..3aea004e8 100644 --- a/util/test/demos/demos.vcxproj +++ b/util/test/demos/demos.vcxproj @@ -341,6 +341,7 @@ + diff --git a/util/test/demos/demos.vcxproj.filters b/util/test/demos/demos.vcxproj.filters index 35bd94d99..95bb97011 100644 --- a/util/test/demos/demos.vcxproj.filters +++ b/util/test/demos/demos.vcxproj.filters @@ -697,6 +697,9 @@ Vulkan\demos + + Vulkan\demos + diff --git a/util/test/demos/vk/vk_helpers.h b/util/test/demos/vk/vk_helpers.h index 40469574f..ac0eea9da 100644 --- a/util/test/demos/vk/vk_helpers.h +++ b/util/test/demos/vk/vk_helpers.h @@ -179,6 +179,19 @@ void cmdPushDescriptorSets(VkCommandBuffer cmd, VkPipelineBindPoint pipelineBind VkPipelineLayout layout, uint32_t set, std::vector writes); +template +void cmdPushConstants(VkCommandBuffer cmd, VkPipelineLayout layout, VkShaderStageFlags stages, + const T &val) +{ + vkCmdPushConstants(cmd, layout, stages, 0, sizeof(T), &val); +} + +template +void cmdPushConstants(VkCommandBuffer cmd, VkPipelineLayout layout, const T &val) +{ + cmdPushConstants(cmd, layout, VK_SHADER_STAGE_ALL, val); +} + struct ApplicationInfo : public VkApplicationInfo { ApplicationInfo() : VkApplicationInfo() { sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; } @@ -474,6 +487,19 @@ struct BufferMemoryBarrier : public VkBufferMemoryBarrier } }; +#undef MemoryBarrier + +struct MemoryBarrier : public VkMemoryBarrier +{ + MemoryBarrier(VkAccessFlags srcAccessMask, VkAccessFlags dstAccessMask) + { + sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; + pNext = NULL; + this->srcAccessMask = srcAccessMask; + this->dstAccessMask = dstAccessMask; + } +}; + struct CommandBufferAllocateInfo : public VkCommandBufferAllocateInfo { CommandBufferAllocateInfo(VkCommandPool commandPool, uint32_t commandBufferCount, diff --git a/util/test/demos/vk/vk_subgroup_zoo.cpp b/util/test/demos/vk/vk_subgroup_zoo.cpp new file mode 100644 index 000000000..94682f894 --- /dev/null +++ b/util/test/demos/vk/vk_subgroup_zoo.cpp @@ -0,0 +1,451 @@ +/****************************************************************************** + * The MIT License (MIT) + * + * Copyright (c) 2019-2024 Baldur Karlsson + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + ******************************************************************************/ + +#include "3rdparty/fmt/core.h" +#include "vk_test.h" + +RD_TEST(VK_Subgroup_Zoo, VulkanGraphicsTest) +{ + static constexpr const char *Description = + "Test of behaviour around subgroup operations in shaders."; + + const std::string common = R"EOSHADER( + +#version 460 core +#extension GL_KHR_shader_subgroup_basic : enable +#extension GL_KHR_shader_subgroup_ballot : enable +#extension GL_KHR_shader_subgroup_vote : enable +#extension GL_KHR_shader_subgroup_arithmetic : enable + +#if FEAT_SHUFFLE +#extension GL_KHR_shader_subgroup_shuffle : enable +#endif + +#if FEAT_SHUFFLE_RELATIVE +#extension GL_KHR_shader_subgroup_shuffle_relative : enable +#endif + +#if FEAT_CLUSTERED +#extension GL_KHR_shader_subgroup_clustered : enable +#endif + +#if FEAT_QUAD +#extension GL_KHR_shader_subgroup_quad : enable +#endif + +#if FEAT_ROTATE || FEAT_ROTATE_CLUSTERED +#extension GL_KHR_shader_subgroup_rotate : enable +#endif + +layout(push_constant) uniform PushData +{ + uint test; +} push; + +#define IsTest(x) (push.test == x) + +)EOSHADER"; + + const std::string vertex = common + R"EOSHADER( + +layout(location = 0) out vec4 vertdata; + +void main() +{ + vec2 positions[] = { + vec2(-1.0f, 1.0f), + vec2( 1.0f, 1.0f), + vec2(-1.0f, -1.0f), + vec2( 1.0f, -1.0f), + }; + + float scale = 1.0f; + if(IsTest(2)) + scale = 0.2f; + + gl_Position = vec4(positions[gl_VertexIndex]*vec2(scale,scale), 0, 1); + + vertdata = vec4(0); + + if(IsTest(0)) + vertdata = vec4(gl_SubgroupInvocationID, 0, 0, 1); + else if(IsTest(3)) + vertdata = vec4(subgroupAdd(gl_SubgroupInvocationID), 0, 0, 0); +} + +)EOSHADER"; + + const std::string pixel = common + R"EOSHADER( + +layout(location = 0) in vec4 vertdata; + +layout(location = 0, index = 0) out vec4 Color; + +void main() +{ + vec4 fragdata = vec4(0); + + if(IsTest(1) || IsTest(2)) + fragdata = vec4(gl_SubgroupInvocationID, 0, 0, 1); + else if(IsTest(4)) + fragdata = vec4(subgroupAdd(gl_SubgroupInvocationID), 0, 0, 0); + + Color = vertdata + fragdata; +} + +)EOSHADER"; + + const std::string comp = common + R"EOSHADER( + +struct Output +{ + vec4 vals[1024]; +}; + +layout(binding = 0, std430) buffer outbuftype { + Output data[COMP_TESTS]; +} outbuf; + +layout(local_size_x = GROUP_SIZE_X, local_size_y = GROUP_SIZE_Y, local_size_z = 1) in; + +void main() +{ + vec4 data = vec4(0); + + if(IsTest(0)) + data = vec4(gl_SubgroupInvocationID, 0, 0, 0); + else if(IsTest(1)) + data = vec4(subgroupAdd(gl_SubgroupInvocationID), 0, 0, 0); + + outbuf.data[push.test].vals[gl_LocalInvocationID.y * GROUP_SIZE_X + gl_LocalInvocationID.x] = data; +} + +)EOSHADER"; + + VkSubgroupFeatureFlags ops = 0; + + void Prepare(int argc, char **argv) + { + VulkanGraphicsTest::Prepare(argc, argv); + + if(!Avail.empty()) + return; + + if(devVersion < VK_API_VERSION_1_1) + Avail = "Vulkan device version isn't 1.1"; + + static VkPhysicalDeviceSubgroupProperties subProps = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES, + }; + + getPhysProperties2(&subProps); + + if(subProps.subgroupSize < 16) + Avail = "Subgroup size is less than 16"; + + // require at least a few ops so we only have a few conditional compilations + const VkSubgroupFeatureFlags requiredOps = + VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT | + VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT; + + ops = subProps.supportedOperations; + + if((subProps.supportedOperations & requiredOps) != requiredOps) + Avail = "Missing ops support"; + + // require all stages for simplicity + if((subProps.supportedStages & VK_SHADER_STAGE_VERTEX_BIT) == 0) + Avail = "Missing vertex subgroup support"; + + if((subProps.supportedStages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0) + Avail = "Missing pixel subgroup support"; + + if((subProps.supportedStages & VK_SHADER_STAGE_COMPUTE_BIT) == 0) + Avail = "Missing compute subgroup support"; + } + + int main() + { + // initialise, create window, create context, etc + if(!Init()) + return 3; + + VkDescriptorSetLayout setlayout = createDescriptorSetLayout(vkh::DescriptorSetLayoutCreateInfo({ + {0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT}, + })); + + VkPipelineLayout layout = createPipelineLayout(vkh::PipelineLayoutCreateInfo( + {setlayout}, {vkh::PushConstantRange(VK_SHADER_STAGE_ALL, 0, 4)})); + + const uint32_t imgDim = 128; + + AllocatedImage img( + this, + vkh::ImageCreateInfo(imgDim, imgDim, 0, VK_FORMAT_R32G32B32A32_SFLOAT, + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT), + VmaAllocationCreateInfo({0, VMA_MEMORY_USAGE_GPU_ONLY})); + + VkImageView imgview = createImageView( + vkh::ImageViewCreateInfo(img.image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32G32B32A32_SFLOAT)); + + vkh::RenderPassCreator renderPassCreateInfo; + + renderPassCreateInfo.attachments.push_back( + vkh::AttachmentDescription(VK_FORMAT_R32G32B32A32_SFLOAT, VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_LAYOUT_GENERAL, VK_ATTACHMENT_LOAD_OP_CLEAR)); + + renderPassCreateInfo.addSubpass({VkAttachmentReference({0, VK_IMAGE_LAYOUT_GENERAL})}); + + VkRenderPass renderPass = createRenderPass(renderPassCreateInfo); + + VkFramebuffer framebuffer = + createFramebuffer(vkh::FramebufferCreateInfo(renderPass, {imgview}, {imgDim, imgDim})); + + vkh::GraphicsPipelineCreateInfo pipeCreateInfo; + + pipeCreateInfo.renderPass = renderPass; + pipeCreateInfo.layout = layout; + pipeCreateInfo.inputAssemblyState.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; + + std::map macros; + + int vertTests = 0, pixTests = 0; + int numCompTests = 0; + + { + size_t pos = 0; + while(pos != std::string::npos) + { + pos = pixel.find("IsTest(", pos); + if(pos == std::string::npos) + break; + pos += sizeof("IsTest(") - 1; + pixTests = std::max(pixTests, atoi(pixel.c_str() + pos) + 1); + } + + pos = 0; + while(pos != std::string::npos) + { + pos = vertex.find("IsTest(", pos); + if(pos == std::string::npos) + break; + pos += sizeof("IsTest(") - 1; + vertTests = std::max(vertTests, atoi(vertex.c_str() + pos) + 1); + } + + pos = 0; + while(pos != std::string::npos) + { + pos = comp.find("IsTest(", pos); + if(pos == std::string::npos) + break; + pos += sizeof("IsTest(") - 1; + numCompTests = std::max(numCompTests, atoi(comp.c_str() + pos) + 1); + } + } + + const uint32_t numGraphicsTests = std::max(vertTests, pixTests); + + if(ops & VK_SUBGROUP_FEATURE_SHUFFLE_BIT) + macros["FEAT_SHUFFLE"] = "1"; + else + macros["FEAT_SHUFFLE"] = "0"; + if(ops & VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT) + macros["FEAT_SHUFFLE_RELATIVE"] = "1"; + else + macros["FEAT_SHUFFLE_RELATIVE"] = "0"; + if(ops & VK_SUBGROUP_FEATURE_CLUSTERED_BIT) + macros["FEAT_CLUSTERED"] = "1"; + else + macros["FEAT_CLUSTERED"] = "0"; + if(ops & VK_SUBGROUP_FEATURE_QUAD_BIT) + macros["FEAT_QUAD"] = "1"; + else + macros["FEAT_QUAD"] = "0"; + if(ops & VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR) + macros["FEAT_ROTATE"] = "1"; + else + macros["FEAT_ROTATE"] = "0"; + if(ops & VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR) + macros["FEAT_ROTATE_CLUSTERED"] = "1"; + else + macros["FEAT_ROTATE_CLUSTERED"] = "0"; + + pipeCreateInfo.stages = { + CompileShaderModule(vertex, ShaderLang::glsl, ShaderStage::vert, "main", macros, + SPIRVTarget::vulkan11), + CompileShaderModule(pixel, ShaderLang::glsl, ShaderStage::frag, "main", macros, + SPIRVTarget::vulkan11), + }; + + VkPipeline pipe = createGraphicsPipeline(pipeCreateInfo); + + std::string comppipe_name[4]; + VkPipeline comppipe[4]; + + macros["COMP_TESTS"] = fmt::format("{}", numCompTests); + + macros["GROUP_SIZE_X"] = "256"; + macros["GROUP_SIZE_Y"] = "1"; + comppipe_name[0] = "256x1"; + comppipe[0] = createComputePipeline(vkh::ComputePipelineCreateInfo( + layout, CompileShaderModule(comp, ShaderLang::glsl, ShaderStage::comp, "main", macros, + SPIRVTarget::vulkan11))); + + macros["GROUP_SIZE_X"] = "128"; + macros["GROUP_SIZE_Y"] = "2"; + comppipe_name[1] = "128x2"; + comppipe[1] = createComputePipeline(vkh::ComputePipelineCreateInfo( + layout, CompileShaderModule(comp, ShaderLang::glsl, ShaderStage::comp, "main", macros, + SPIRVTarget::vulkan11))); + + macros["GROUP_SIZE_X"] = "8"; + macros["GROUP_SIZE_Y"] = "128"; + comppipe_name[2] = "8x128"; + comppipe[2] = createComputePipeline(vkh::ComputePipelineCreateInfo( + layout, CompileShaderModule(comp, ShaderLang::glsl, ShaderStage::comp, "main", macros, + SPIRVTarget::vulkan11))); + + macros["GROUP_SIZE_X"] = "150"; + macros["GROUP_SIZE_Y"] = "1"; + comppipe_name[3] = "150x1"; + comppipe[3] = createComputePipeline(vkh::ComputePipelineCreateInfo( + layout, CompileShaderModule(comp, ShaderLang::glsl, ShaderStage::comp, "main", macros, + SPIRVTarget::vulkan11))); + + AllocatedBuffer bufout( + this, + vkh::BufferCreateInfo(sizeof(Vec4f) * 1024 * numCompTests, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT), + VmaAllocationCreateInfo({0, VMA_MEMORY_USAGE_CPU_TO_GPU})); + + setName(bufout.buffer, "bufout"); + + VkDescriptorSet set = allocateDescriptorSet(setlayout); + + vkh::updateDescriptorSets( + device, {vkh::WriteDescriptorSet(set, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + {vkh::DescriptorBufferInfo(bufout.buffer)})}); + + while(Running()) + { + VkCommandBuffer cmd = GetCommandBuffer(); + + vkBeginCommandBuffer(cmd, vkh::CommandBufferBeginInfo()); + + VkImage swapimg = + StartUsingBackbuffer(cmd, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL); + + vkh::cmdPipelineBarrier( + cmd, {vkh::ImageMemoryBarrier(VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_LAYOUT_GENERAL, img.image)}); + + vkh::cmdClearImage(cmd, swapimg, vkh::ClearColorValue(0.2f, 0.2f, 0.2f, 1.0f)); + + vkh::cmdPipelineBarrier( + cmd, + {vkh::ImageMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL, img.image)}); + + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipe); + + VkViewport v = {}; + v.maxDepth = 1.0f; + v.width = v.height = (float)imgDim; + + VkRect2D s = {}; + s.extent.width = s.extent.height = imgDim; + + vkCmdSetViewport(cmd, 0, 1, &v); + vkCmdSetScissor(cmd, 0, 1, &s); + + // separate render passes with a fat barrier before each to avoid subgroups crossing draws + + pushMarker(cmd, "Graphics Tests"); + + for(uint32_t i = 0; i < numGraphicsTests; i++) + { + vkh::cmdPipelineBarrier( + cmd, {}, {}, + {vkh::MemoryBarrier(VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT)}); + + vkCmdBeginRenderPass( + cmd, + vkh::RenderPassBeginInfo(renderPass, framebuffer, s, + {vkh::ClearValue(123456.0f, 789.0f, 101112.0f, 0.0f)}), + VK_SUBPASS_CONTENTS_INLINE); + + vkh::cmdPushConstants(cmd, layout, i); + vkCmdDraw(cmd, 4, 1, 0, 0); + vkCmdEndRenderPass(cmd); + } + + popMarker(cmd); + + pushMarker(cmd, "Compute Tests"); + + for(size_t p = 0; p < ARRAY_COUNT(comppipe); p++) + { + vkh::cmdPipelineBarrier( + cmd, {}, + {vkh::BufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, + bufout.buffer, 0, sizeof(Vec4f) * 1024 * numCompTests)}); + + vkCmdFillBuffer(cmd, bufout.buffer, 0, sizeof(Vec4f) * 1024 * numCompTests, 0); + + vkh::cmdPipelineBarrier( + cmd, {}, + {vkh::BufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_WRITE_BIT, + bufout.buffer, 0, sizeof(Vec4f) * 1024 * numCompTests)}); + + pushMarker(cmd, comppipe_name[p]); + + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, comppipe[p]); + vkh::cmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, {set}, {}); + + for(int i = 0; i < numCompTests; i++) + { + vkh::cmdPushConstants(cmd, layout, i); + vkCmdDispatch(cmd, 1, 1, 1); + } + + popMarker(cmd); + } + + popMarker(cmd); + + FinishUsingBackbuffer(cmd, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL); + + vkEndCommandBuffer(cmd); + + SubmitAndPresent({cmd}); + } + + return 0; + } +}; + +REGISTER_TEST(); diff --git a/util/test/rdtest/runner.py b/util/test/rdtest/runner.py index 8907981d9..43f83426e 100644 --- a/util/test/rdtest/runner.py +++ b/util/test/rdtest/runner.py @@ -313,7 +313,7 @@ def run_tests(test_include: str, test_exclude: str, in_process: bool, slow_tests instance = testclass() - supported, unsupported_reason = instance.check_support() + supported, unsupported_reason = instance.check_support(test_include=test_include) if not supported: log.print("Skipping {} as {}".format(name, unsupported_reason)) diff --git a/util/test/rdtest/testcase.py b/util/test/rdtest/testcase.py index 3d38611c6..6edd1d0ac 100644 --- a/util/test/rdtest/testcase.py +++ b/util/test/rdtest/testcase.py @@ -167,7 +167,7 @@ class TestCase: def set_test_list(tests): TestCase._test_list = tests - def check_support(self): + def check_support(self, **kwargs): if self.demos_test_name != '': if self.demos_test_name not in TestCase._test_list: return False,'Test {} not in compiled tests'.format(self.demos_test_name) @@ -680,7 +680,7 @@ class TestCase: raise KeyError("Couldn't parse path {}".format(path)) - def evaluate_source_var(self, sourceVar: rd.SourceVariableMapping, debugVars): + def evaluate_source_var(self, sourceVar: rd.SourceVariableMapping, debugVars) -> rd.ShaderVariable: debugged = rd.ShaderVariable() debugged.name = sourceVar.name debugged.type = sourceVar.type diff --git a/util/test/tests/D3D12/D3D12_Multi_Wait_Before_Signal.py b/util/test/tests/D3D12/D3D12_Multi_Wait_Before_Signal.py index 61c833b94..5a73141cd 100644 --- a/util/test/tests/D3D12/D3D12_Multi_Wait_Before_Signal.py +++ b/util/test/tests/D3D12/D3D12_Multi_Wait_Before_Signal.py @@ -4,7 +4,7 @@ import rdtest class D3D12_Multi_Wait_Before_Signal(rdtest.TestCase): demos_test_name = 'D3D12_Multi_Wait_Before_Signal' - def check_support(self): + def check_support(self, **kwargs): # TODO: Enable this if/when rdoc can reorder from the original submission # order, which blocks multiple queues with waits that get signalled by # later submissions to other queues. diff --git a/util/test/tests/D3D12/D3D12_RGP_Capture.py b/util/test/tests/D3D12/D3D12_RGP_Capture.py index d87bb5b69..780dcd8e1 100644 --- a/util/test/tests/D3D12/D3D12_RGP_Capture.py +++ b/util/test/tests/D3D12/D3D12_RGP_Capture.py @@ -10,11 +10,11 @@ except ImportError as ex: class D3D12_RGP_Capture(rdtest.TestCase): demos_test_name = 'D3D12_Simple_Triangle' - def check_support(self): + def check_support(self, **kwargs): if tkinter is None: return False, 'tkinter is required but not available' - return super().check_support() + return super().check_support(**kwargs) def check_capture(self): apiprops: rd.APIProperties = self.controller.GetAPIProperties() diff --git a/util/test/tests/Vulkan/VK_Subgroup_Zoo.py b/util/test/tests/Vulkan/VK_Subgroup_Zoo.py new file mode 100644 index 000000000..9047886a5 --- /dev/null +++ b/util/test/tests/Vulkan/VK_Subgroup_Zoo.py @@ -0,0 +1,233 @@ +import renderdoc as rd +import struct +import rdtest + + +class VK_Subgroup_Zoo(rdtest.TestCase): + demos_test_name = 'VK_Subgroup_Zoo' + + def check_support(self, **kwargs): + # Only allow this if explicitly run + if kwargs['test_include'] == 'VK_Subgroup_Zoo': + return True, '' + return False, 'Disabled test' + + def check_capture(self): + graphics_tests = [a for a in self.find_action( + "Graphics Tests").children if a.flags & rd.ActionFlags.Drawcall] + compute_dims = [a for a in self.find_action( + "Compute Tests").children if 'x' in a.customName] + + rdtest.log.begin_section("Graphics tests") + + # instances to check in instanced draws + inst_checks = [0, 1, 5, 10] + # pixels to check + pixel_checks = [ + # top quad + (0, 0), (1, 0), (0, 1), (1, 1), + # middle quad (away from triangle border) + (64, 56), (65, 56), (64, 57), (65, 57), + # middle quad (on triangle border) + (64, 64), (65, 64), (64, 65), (65, 65), + # middle quad on other triangle + (56, 64), (57, 64), (56, 65), (57, 65), + ] + # threads to check. largest dimension only (all small dim checked) + thread_checks = [ + # first few + 0, 1, 2, + # near end of 32-subgroup and boundary + 30, 31, 32, + # near end of 64-subgroup and boundary + 62, 63, 64, + # near end of 64-subgroup and boundary + 62, 63, 64, + # large values spaced out with one near the end of our unaligned size + 100, 110, 120, 140, 149, 150, 160, 200, 250, + ] + clear_col = (123456.0, 789.0, 101112.0, 0.0) + + for idx, action in enumerate(graphics_tests): + self.controller.SetFrameEvent(action.eventId, False) + + pipe = self.controller.GetPipelineState() + + # check vertex output for every vertex + for inst in [inst for inst in inst_checks if inst < action.numInstances]: + for view in range(pipe.MultiviewBroadcastCount()): + + postvs = self.get_postvs( + action, rd.MeshDataStage.VSOut, first_index=0, num_indices=action.numIndices, instance=inst) + + for vtx in range(action.numIndices): + trace = self.controller.DebugVertex( + vtx, inst, vtx, view) + + if trace.debugger is None: + self.controller.FreeTrace(trace) + + rdtest.log.error( + f"Test {test} at {action.eventId} got no debug result at {vtx} inst {inst} view {view}") + return + + _, variables = self.process_trace(trace) + + for var in trace.sourceVars: + if var.name == 'vertdata': + name = var.name + + if var.name not in postvs[vtx].keys(): + rdtest.log.error( + f"Don't have expected output for {var.name}") + continue + + real = postvs[vtx][name] + debugged = self.evaluate_source_var( + var, variables) + + if debugged.columns != 4 or len(real) != 4: + rdtest.log.error( + f"Vertex output is not the right size ({len(real)} vs {debugged.columns})") + continue + + if not rdtest.value_compare(real, debugged.value.f32v[0:4], eps=5.0E-06): + rdtest.log.error( + f"Test {idx} debugged vertex value {debugged.value.f32v[0:4]} at {vtx} instance {inst} view {view} does not match output {real}") + + self.controller.FreeTrace(trace) + + # check some assorted pixel outputs + target = pipe.GetOutputTargets()[0].resource + + for pixel in pixel_checks: + for view in range(pipe.MultiviewBroadcastCount()): + x, y = pixel + + picked = self.controller.PickPixel( + target, x, y, rd.Subresource(0, 0, 0), rd.CompType.Float) + + real = picked.floatValue + + # silently skip pixels that weren't written to + if real == clear_col: + continue + + inputs = rd.DebugPixelInputs() + inputs.sample = 0 + inputs.primitive = rd.ReplayController.NoPreference + inputs.view = view + trace = self.controller.DebugPixel(x, y, inputs) + + if trace.debugger is None: + self.controller.FreeTrace(trace) + + rdtest.log.error( + f"Test {test} at {action.eventId} got no debug result at {x},{y}") + continue + + _, variables = self.process_trace(trace) + + output_sourcevar = self.find_output_source_var( + trace, rd.ShaderBuiltin.ColorOutput, 0) + + if output_sourcevar is None: + rdtest.log.error("No output variable found") + continue + + debugged = self.evaluate_source_var( + output_sourcevar, variables) + + self.controller.FreeTrace(trace) + + debuggedValue = list(debugged.value.f32v[0:4]) + + if not rdtest.value_compare(real, debuggedValue, eps=5.0E-06): + rdtest.log.error( + f"Test {idx} debugged pixel value {debuggedValue} at {x},{y} in {view} does not match output {real}") + + rdtest.log.success(f"Test {idx} successful") + + rdtest.log.end_section("Graphics tests") + + for comp_dim in compute_dims: + rdtest.log.begin_section( + f"Compute tests with {comp_dim.customName} workgroup") + + compute_tests = [ + a for a in comp_dim.children if a.flags & rd.ActionFlags.Dispatch] + + for test, action in enumerate(compute_tests): + self.controller.SetFrameEvent(action.eventId, False) + + pipe = self.controller.GetPipelineState() + csrefl = pipe.GetShaderReflection(rd.ShaderStage.Compute) + + dim = csrefl.dispatchThreadsDimension + + rw = pipe.GetReadWriteResources(rd.ShaderStage.Compute) + + if len(rw) != 1: + rdtest.log.error("Unexpected number of RW resources") + continue + + # each test writes up to 16k data, one vec4 per thread * up to 1024 threads + bufdata = self.controller.GetBufferData( + rw[0].descriptor.resource, test*16*1024, 16*1024) + + for t in thread_checks: + xrange = 1 + yrange = dim[1] + xbase = t + ybase = 0 + + # vertical orientation + if dim[1] > dim[0]: + xrange = dim[0] + yrange = 1 + xbase = 0 + ybase = t + + for x in range(xbase, xbase+xrange): + for y in range(ybase, ybase+yrange): + z = 0 + + if x >= dim[0] or y >= dim[1]: + continue + + real = struct.unpack_from( + "4f", bufdata, 16*y*dim[0] + 16*x) + + trace = self.controller.DebugThread( + (0, 0, 0), (x, y, z)) + + _, variables = self.process_trace(trace) + + if trace.debugger is None: + self.controller.FreeTrace(trace) + + rdtest.log.error( + f"Test {test} at {action.eventId} got no debug result at {x},{y},{z}") + continue + + sourceVars = [ + v for v in trace.instInfo[-1].sourceVars if v.name == 'data'] + + if len(sourceVars) != 1: + rdtest.log.error( + "Couldn't find compute data variable") + continue + + debugged = self.evaluate_source_var( + sourceVars[0], variables) + + debuggedValue = list(debugged.value.f32v[0:4]) + + if not rdtest.value_compare(real, debuggedValue, eps=5.0E-06): + rdtest.log.error( + f"Test {test} at {action.eventId} debugged thread value {debuggedValue} at {x},{y},{z} does not match output {real}") + + rdtest.log.success(f"Test {test} successful") + + rdtest.log.end_section( + f"Compute tests with {comp_dim.customName} workgroup")