diff --git a/util/test/demos/CMakeLists.txt b/util/test/demos/CMakeLists.txt
index 580496043..b0679e5de 100644
--- a/util/test/demos/CMakeLists.txt
+++ b/util/test/demos/CMakeLists.txt
@@ -155,6 +155,7 @@ set(VULKAN_SRC
vk/vk_simple_triangle.cpp
vk/vk_spec_constants.cpp
vk/vk_spirv_13_shaders.cpp
+ vk/vk_subgroup_zoo.cpp
vk/vk_structured_buffer_nested.cpp
vk/vk_sync2.cpp
vk/vk_texture_zoo.cpp
diff --git a/util/test/demos/demos.vcxproj b/util/test/demos/demos.vcxproj
index 71c4b0ff9..3aea004e8 100644
--- a/util/test/demos/demos.vcxproj
+++ b/util/test/demos/demos.vcxproj
@@ -341,6 +341,7 @@
+
diff --git a/util/test/demos/demos.vcxproj.filters b/util/test/demos/demos.vcxproj.filters
index 35bd94d99..95bb97011 100644
--- a/util/test/demos/demos.vcxproj.filters
+++ b/util/test/demos/demos.vcxproj.filters
@@ -697,6 +697,9 @@
Vulkan\demos
+
+ Vulkan\demos
+
diff --git a/util/test/demos/vk/vk_helpers.h b/util/test/demos/vk/vk_helpers.h
index 40469574f..ac0eea9da 100644
--- a/util/test/demos/vk/vk_helpers.h
+++ b/util/test/demos/vk/vk_helpers.h
@@ -179,6 +179,19 @@ void cmdPushDescriptorSets(VkCommandBuffer cmd, VkPipelineBindPoint pipelineBind
VkPipelineLayout layout, uint32_t set,
std::vector writes);
+template
+void cmdPushConstants(VkCommandBuffer cmd, VkPipelineLayout layout, VkShaderStageFlags stages,
+ const T &val)
+{
+ vkCmdPushConstants(cmd, layout, stages, 0, sizeof(T), &val);
+}
+
+template
+void cmdPushConstants(VkCommandBuffer cmd, VkPipelineLayout layout, const T &val)
+{
+ cmdPushConstants(cmd, layout, VK_SHADER_STAGE_ALL, val);
+}
+
struct ApplicationInfo : public VkApplicationInfo
{
ApplicationInfo() : VkApplicationInfo() { sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; }
@@ -474,6 +487,19 @@ struct BufferMemoryBarrier : public VkBufferMemoryBarrier
}
};
+#undef MemoryBarrier
+
+struct MemoryBarrier : public VkMemoryBarrier
+{
+ MemoryBarrier(VkAccessFlags srcAccessMask, VkAccessFlags dstAccessMask)
+ {
+ sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
+ pNext = NULL;
+ this->srcAccessMask = srcAccessMask;
+ this->dstAccessMask = dstAccessMask;
+ }
+};
+
struct CommandBufferAllocateInfo : public VkCommandBufferAllocateInfo
{
CommandBufferAllocateInfo(VkCommandPool commandPool, uint32_t commandBufferCount,
diff --git a/util/test/demos/vk/vk_subgroup_zoo.cpp b/util/test/demos/vk/vk_subgroup_zoo.cpp
new file mode 100644
index 000000000..94682f894
--- /dev/null
+++ b/util/test/demos/vk/vk_subgroup_zoo.cpp
@@ -0,0 +1,451 @@
+/******************************************************************************
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2019-2024 Baldur Karlsson
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ ******************************************************************************/
+
+#include "3rdparty/fmt/core.h"
+#include "vk_test.h"
+
+RD_TEST(VK_Subgroup_Zoo, VulkanGraphicsTest)
+{
+ static constexpr const char *Description =
+ "Test of behaviour around subgroup operations in shaders.";
+
+ const std::string common = R"EOSHADER(
+
+#version 460 core
+#extension GL_KHR_shader_subgroup_basic : enable
+#extension GL_KHR_shader_subgroup_ballot : enable
+#extension GL_KHR_shader_subgroup_vote : enable
+#extension GL_KHR_shader_subgroup_arithmetic : enable
+
+#if FEAT_SHUFFLE
+#extension GL_KHR_shader_subgroup_shuffle : enable
+#endif
+
+#if FEAT_SHUFFLE_RELATIVE
+#extension GL_KHR_shader_subgroup_shuffle_relative : enable
+#endif
+
+#if FEAT_CLUSTERED
+#extension GL_KHR_shader_subgroup_clustered : enable
+#endif
+
+#if FEAT_QUAD
+#extension GL_KHR_shader_subgroup_quad : enable
+#endif
+
+#if FEAT_ROTATE || FEAT_ROTATE_CLUSTERED
+#extension GL_KHR_shader_subgroup_rotate : enable
+#endif
+
+layout(push_constant) uniform PushData
+{
+ uint test;
+} push;
+
+#define IsTest(x) (push.test == x)
+
+)EOSHADER";
+
+ const std::string vertex = common + R"EOSHADER(
+
+layout(location = 0) out vec4 vertdata;
+
+void main()
+{
+ vec2 positions[] = {
+ vec2(-1.0f, 1.0f),
+ vec2( 1.0f, 1.0f),
+ vec2(-1.0f, -1.0f),
+ vec2( 1.0f, -1.0f),
+ };
+
+ float scale = 1.0f;
+ if(IsTest(2))
+ scale = 0.2f;
+
+ gl_Position = vec4(positions[gl_VertexIndex]*vec2(scale,scale), 0, 1);
+
+ vertdata = vec4(0);
+
+ if(IsTest(0))
+ vertdata = vec4(gl_SubgroupInvocationID, 0, 0, 1);
+ else if(IsTest(3))
+ vertdata = vec4(subgroupAdd(gl_SubgroupInvocationID), 0, 0, 0);
+}
+
+)EOSHADER";
+
+ const std::string pixel = common + R"EOSHADER(
+
+layout(location = 0) in vec4 vertdata;
+
+layout(location = 0, index = 0) out vec4 Color;
+
+void main()
+{
+ vec4 fragdata = vec4(0);
+
+ if(IsTest(1) || IsTest(2))
+ fragdata = vec4(gl_SubgroupInvocationID, 0, 0, 1);
+ else if(IsTest(4))
+ fragdata = vec4(subgroupAdd(gl_SubgroupInvocationID), 0, 0, 0);
+
+ Color = vertdata + fragdata;
+}
+
+)EOSHADER";
+
+ const std::string comp = common + R"EOSHADER(
+
+struct Output
+{
+ vec4 vals[1024];
+};
+
+layout(binding = 0, std430) buffer outbuftype {
+ Output data[COMP_TESTS];
+} outbuf;
+
+layout(local_size_x = GROUP_SIZE_X, local_size_y = GROUP_SIZE_Y, local_size_z = 1) in;
+
+void main()
+{
+ vec4 data = vec4(0);
+
+ if(IsTest(0))
+ data = vec4(gl_SubgroupInvocationID, 0, 0, 0);
+ else if(IsTest(1))
+ data = vec4(subgroupAdd(gl_SubgroupInvocationID), 0, 0, 0);
+
+ outbuf.data[push.test].vals[gl_LocalInvocationID.y * GROUP_SIZE_X + gl_LocalInvocationID.x] = data;
+}
+
+)EOSHADER";
+
+ VkSubgroupFeatureFlags ops = 0;
+
+ void Prepare(int argc, char **argv)
+ {
+ VulkanGraphicsTest::Prepare(argc, argv);
+
+ if(!Avail.empty())
+ return;
+
+ if(devVersion < VK_API_VERSION_1_1)
+ Avail = "Vulkan device version isn't 1.1";
+
+ static VkPhysicalDeviceSubgroupProperties subProps = {
+ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES,
+ };
+
+ getPhysProperties2(&subProps);
+
+ if(subProps.subgroupSize < 16)
+ Avail = "Subgroup size is less than 16";
+
+ // require at least a few ops so we only have a few conditional compilations
+ const VkSubgroupFeatureFlags requiredOps =
+ VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT |
+ VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT;
+
+ ops = subProps.supportedOperations;
+
+ if((subProps.supportedOperations & requiredOps) != requiredOps)
+ Avail = "Missing ops support";
+
+ // require all stages for simplicity
+ if((subProps.supportedStages & VK_SHADER_STAGE_VERTEX_BIT) == 0)
+ Avail = "Missing vertex subgroup support";
+
+ if((subProps.supportedStages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
+ Avail = "Missing pixel subgroup support";
+
+ if((subProps.supportedStages & VK_SHADER_STAGE_COMPUTE_BIT) == 0)
+ Avail = "Missing compute subgroup support";
+ }
+
+ int main()
+ {
+ // initialise, create window, create context, etc
+ if(!Init())
+ return 3;
+
+ VkDescriptorSetLayout setlayout = createDescriptorSetLayout(vkh::DescriptorSetLayoutCreateInfo({
+ {0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT},
+ }));
+
+ VkPipelineLayout layout = createPipelineLayout(vkh::PipelineLayoutCreateInfo(
+ {setlayout}, {vkh::PushConstantRange(VK_SHADER_STAGE_ALL, 0, 4)}));
+
+ const uint32_t imgDim = 128;
+
+ AllocatedImage img(
+ this,
+ vkh::ImageCreateInfo(imgDim, imgDim, 0, VK_FORMAT_R32G32B32A32_SFLOAT,
+ VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT),
+ VmaAllocationCreateInfo({0, VMA_MEMORY_USAGE_GPU_ONLY}));
+
+ VkImageView imgview = createImageView(
+ vkh::ImageViewCreateInfo(img.image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32G32B32A32_SFLOAT));
+
+ vkh::RenderPassCreator renderPassCreateInfo;
+
+ renderPassCreateInfo.attachments.push_back(
+ vkh::AttachmentDescription(VK_FORMAT_R32G32B32A32_SFLOAT, VK_IMAGE_LAYOUT_UNDEFINED,
+ VK_IMAGE_LAYOUT_GENERAL, VK_ATTACHMENT_LOAD_OP_CLEAR));
+
+ renderPassCreateInfo.addSubpass({VkAttachmentReference({0, VK_IMAGE_LAYOUT_GENERAL})});
+
+ VkRenderPass renderPass = createRenderPass(renderPassCreateInfo);
+
+ VkFramebuffer framebuffer =
+ createFramebuffer(vkh::FramebufferCreateInfo(renderPass, {imgview}, {imgDim, imgDim}));
+
+ vkh::GraphicsPipelineCreateInfo pipeCreateInfo;
+
+ pipeCreateInfo.renderPass = renderPass;
+ pipeCreateInfo.layout = layout;
+ pipeCreateInfo.inputAssemblyState.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
+
+ std::map macros;
+
+ int vertTests = 0, pixTests = 0;
+ int numCompTests = 0;
+
+ {
+ size_t pos = 0;
+ while(pos != std::string::npos)
+ {
+ pos = pixel.find("IsTest(", pos);
+ if(pos == std::string::npos)
+ break;
+ pos += sizeof("IsTest(") - 1;
+ pixTests = std::max(pixTests, atoi(pixel.c_str() + pos) + 1);
+ }
+
+ pos = 0;
+ while(pos != std::string::npos)
+ {
+ pos = vertex.find("IsTest(", pos);
+ if(pos == std::string::npos)
+ break;
+ pos += sizeof("IsTest(") - 1;
+ vertTests = std::max(vertTests, atoi(vertex.c_str() + pos) + 1);
+ }
+
+ pos = 0;
+ while(pos != std::string::npos)
+ {
+ pos = comp.find("IsTest(", pos);
+ if(pos == std::string::npos)
+ break;
+ pos += sizeof("IsTest(") - 1;
+ numCompTests = std::max(numCompTests, atoi(comp.c_str() + pos) + 1);
+ }
+ }
+
+ const uint32_t numGraphicsTests = std::max(vertTests, pixTests);
+
+ if(ops & VK_SUBGROUP_FEATURE_SHUFFLE_BIT)
+ macros["FEAT_SHUFFLE"] = "1";
+ else
+ macros["FEAT_SHUFFLE"] = "0";
+ if(ops & VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT)
+ macros["FEAT_SHUFFLE_RELATIVE"] = "1";
+ else
+ macros["FEAT_SHUFFLE_RELATIVE"] = "0";
+ if(ops & VK_SUBGROUP_FEATURE_CLUSTERED_BIT)
+ macros["FEAT_CLUSTERED"] = "1";
+ else
+ macros["FEAT_CLUSTERED"] = "0";
+ if(ops & VK_SUBGROUP_FEATURE_QUAD_BIT)
+ macros["FEAT_QUAD"] = "1";
+ else
+ macros["FEAT_QUAD"] = "0";
+ if(ops & VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR)
+ macros["FEAT_ROTATE"] = "1";
+ else
+ macros["FEAT_ROTATE"] = "0";
+ if(ops & VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR)
+ macros["FEAT_ROTATE_CLUSTERED"] = "1";
+ else
+ macros["FEAT_ROTATE_CLUSTERED"] = "0";
+
+ pipeCreateInfo.stages = {
+ CompileShaderModule(vertex, ShaderLang::glsl, ShaderStage::vert, "main", macros,
+ SPIRVTarget::vulkan11),
+ CompileShaderModule(pixel, ShaderLang::glsl, ShaderStage::frag, "main", macros,
+ SPIRVTarget::vulkan11),
+ };
+
+ VkPipeline pipe = createGraphicsPipeline(pipeCreateInfo);
+
+ std::string comppipe_name[4];
+ VkPipeline comppipe[4];
+
+ macros["COMP_TESTS"] = fmt::format("{}", numCompTests);
+
+ macros["GROUP_SIZE_X"] = "256";
+ macros["GROUP_SIZE_Y"] = "1";
+ comppipe_name[0] = "256x1";
+ comppipe[0] = createComputePipeline(vkh::ComputePipelineCreateInfo(
+ layout, CompileShaderModule(comp, ShaderLang::glsl, ShaderStage::comp, "main", macros,
+ SPIRVTarget::vulkan11)));
+
+ macros["GROUP_SIZE_X"] = "128";
+ macros["GROUP_SIZE_Y"] = "2";
+ comppipe_name[1] = "128x2";
+ comppipe[1] = createComputePipeline(vkh::ComputePipelineCreateInfo(
+ layout, CompileShaderModule(comp, ShaderLang::glsl, ShaderStage::comp, "main", macros,
+ SPIRVTarget::vulkan11)));
+
+ macros["GROUP_SIZE_X"] = "8";
+ macros["GROUP_SIZE_Y"] = "128";
+ comppipe_name[2] = "8x128";
+ comppipe[2] = createComputePipeline(vkh::ComputePipelineCreateInfo(
+ layout, CompileShaderModule(comp, ShaderLang::glsl, ShaderStage::comp, "main", macros,
+ SPIRVTarget::vulkan11)));
+
+ macros["GROUP_SIZE_X"] = "150";
+ macros["GROUP_SIZE_Y"] = "1";
+ comppipe_name[3] = "150x1";
+ comppipe[3] = createComputePipeline(vkh::ComputePipelineCreateInfo(
+ layout, CompileShaderModule(comp, ShaderLang::glsl, ShaderStage::comp, "main", macros,
+ SPIRVTarget::vulkan11)));
+
+ AllocatedBuffer bufout(
+ this,
+ vkh::BufferCreateInfo(sizeof(Vec4f) * 1024 * numCompTests,
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT),
+ VmaAllocationCreateInfo({0, VMA_MEMORY_USAGE_CPU_TO_GPU}));
+
+ setName(bufout.buffer, "bufout");
+
+ VkDescriptorSet set = allocateDescriptorSet(setlayout);
+
+ vkh::updateDescriptorSets(
+ device, {vkh::WriteDescriptorSet(set, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ {vkh::DescriptorBufferInfo(bufout.buffer)})});
+
+ while(Running())
+ {
+ VkCommandBuffer cmd = GetCommandBuffer();
+
+ vkBeginCommandBuffer(cmd, vkh::CommandBufferBeginInfo());
+
+ VkImage swapimg =
+ StartUsingBackbuffer(cmd, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL);
+
+ vkh::cmdPipelineBarrier(
+ cmd, {vkh::ImageMemoryBarrier(VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
+ VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
+ VK_IMAGE_LAYOUT_GENERAL, img.image)});
+
+ vkh::cmdClearImage(cmd, swapimg, vkh::ClearColorValue(0.2f, 0.2f, 0.2f, 1.0f));
+
+ vkh::cmdPipelineBarrier(
+ cmd,
+ {vkh::ImageMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
+ VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL, img.image)});
+
+ vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipe);
+
+ VkViewport v = {};
+ v.maxDepth = 1.0f;
+ v.width = v.height = (float)imgDim;
+
+ VkRect2D s = {};
+ s.extent.width = s.extent.height = imgDim;
+
+ vkCmdSetViewport(cmd, 0, 1, &v);
+ vkCmdSetScissor(cmd, 0, 1, &s);
+
+ // separate render passes with a fat barrier before each to avoid subgroups crossing draws
+
+ pushMarker(cmd, "Graphics Tests");
+
+ for(uint32_t i = 0; i < numGraphicsTests; i++)
+ {
+ vkh::cmdPipelineBarrier(
+ cmd, {}, {},
+ {vkh::MemoryBarrier(VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
+ VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT)});
+
+ vkCmdBeginRenderPass(
+ cmd,
+ vkh::RenderPassBeginInfo(renderPass, framebuffer, s,
+ {vkh::ClearValue(123456.0f, 789.0f, 101112.0f, 0.0f)}),
+ VK_SUBPASS_CONTENTS_INLINE);
+
+ vkh::cmdPushConstants(cmd, layout, i);
+ vkCmdDraw(cmd, 4, 1, 0, 0);
+ vkCmdEndRenderPass(cmd);
+ }
+
+ popMarker(cmd);
+
+ pushMarker(cmd, "Compute Tests");
+
+ for(size_t p = 0; p < ARRAY_COUNT(comppipe); p++)
+ {
+ vkh::cmdPipelineBarrier(
+ cmd, {},
+ {vkh::BufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT,
+ bufout.buffer, 0, sizeof(Vec4f) * 1024 * numCompTests)});
+
+ vkCmdFillBuffer(cmd, bufout.buffer, 0, sizeof(Vec4f) * 1024 * numCompTests, 0);
+
+ vkh::cmdPipelineBarrier(
+ cmd, {},
+ {vkh::BufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_WRITE_BIT,
+ bufout.buffer, 0, sizeof(Vec4f) * 1024 * numCompTests)});
+
+ pushMarker(cmd, comppipe_name[p]);
+
+ vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, comppipe[p]);
+ vkh::cmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, {set}, {});
+
+ for(int i = 0; i < numCompTests; i++)
+ {
+ vkh::cmdPushConstants(cmd, layout, i);
+ vkCmdDispatch(cmd, 1, 1, 1);
+ }
+
+ popMarker(cmd);
+ }
+
+ popMarker(cmd);
+
+ FinishUsingBackbuffer(cmd, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL);
+
+ vkEndCommandBuffer(cmd);
+
+ SubmitAndPresent({cmd});
+ }
+
+ return 0;
+ }
+};
+
+REGISTER_TEST();
diff --git a/util/test/rdtest/runner.py b/util/test/rdtest/runner.py
index 8907981d9..43f83426e 100644
--- a/util/test/rdtest/runner.py
+++ b/util/test/rdtest/runner.py
@@ -313,7 +313,7 @@ def run_tests(test_include: str, test_exclude: str, in_process: bool, slow_tests
instance = testclass()
- supported, unsupported_reason = instance.check_support()
+ supported, unsupported_reason = instance.check_support(test_include=test_include)
if not supported:
log.print("Skipping {} as {}".format(name, unsupported_reason))
diff --git a/util/test/rdtest/testcase.py b/util/test/rdtest/testcase.py
index 3d38611c6..6edd1d0ac 100644
--- a/util/test/rdtest/testcase.py
+++ b/util/test/rdtest/testcase.py
@@ -167,7 +167,7 @@ class TestCase:
def set_test_list(tests):
TestCase._test_list = tests
- def check_support(self):
+ def check_support(self, **kwargs):
if self.demos_test_name != '':
if self.demos_test_name not in TestCase._test_list:
return False,'Test {} not in compiled tests'.format(self.demos_test_name)
@@ -680,7 +680,7 @@ class TestCase:
raise KeyError("Couldn't parse path {}".format(path))
- def evaluate_source_var(self, sourceVar: rd.SourceVariableMapping, debugVars):
+ def evaluate_source_var(self, sourceVar: rd.SourceVariableMapping, debugVars) -> rd.ShaderVariable:
debugged = rd.ShaderVariable()
debugged.name = sourceVar.name
debugged.type = sourceVar.type
diff --git a/util/test/tests/D3D12/D3D12_Multi_Wait_Before_Signal.py b/util/test/tests/D3D12/D3D12_Multi_Wait_Before_Signal.py
index 61c833b94..5a73141cd 100644
--- a/util/test/tests/D3D12/D3D12_Multi_Wait_Before_Signal.py
+++ b/util/test/tests/D3D12/D3D12_Multi_Wait_Before_Signal.py
@@ -4,7 +4,7 @@ import rdtest
class D3D12_Multi_Wait_Before_Signal(rdtest.TestCase):
demos_test_name = 'D3D12_Multi_Wait_Before_Signal'
- def check_support(self):
+ def check_support(self, **kwargs):
# TODO: Enable this if/when rdoc can reorder from the original submission
# order, which blocks multiple queues with waits that get signalled by
# later submissions to other queues.
diff --git a/util/test/tests/D3D12/D3D12_RGP_Capture.py b/util/test/tests/D3D12/D3D12_RGP_Capture.py
index d87bb5b69..780dcd8e1 100644
--- a/util/test/tests/D3D12/D3D12_RGP_Capture.py
+++ b/util/test/tests/D3D12/D3D12_RGP_Capture.py
@@ -10,11 +10,11 @@ except ImportError as ex:
class D3D12_RGP_Capture(rdtest.TestCase):
demos_test_name = 'D3D12_Simple_Triangle'
- def check_support(self):
+ def check_support(self, **kwargs):
if tkinter is None:
return False, 'tkinter is required but not available'
- return super().check_support()
+ return super().check_support(**kwargs)
def check_capture(self):
apiprops: rd.APIProperties = self.controller.GetAPIProperties()
diff --git a/util/test/tests/Vulkan/VK_Subgroup_Zoo.py b/util/test/tests/Vulkan/VK_Subgroup_Zoo.py
new file mode 100644
index 000000000..9047886a5
--- /dev/null
+++ b/util/test/tests/Vulkan/VK_Subgroup_Zoo.py
@@ -0,0 +1,233 @@
+import renderdoc as rd
+import struct
+import rdtest
+
+
+class VK_Subgroup_Zoo(rdtest.TestCase):
+ demos_test_name = 'VK_Subgroup_Zoo'
+
+ def check_support(self, **kwargs):
+ # Only allow this if explicitly run
+ if kwargs['test_include'] == 'VK_Subgroup_Zoo':
+ return True, ''
+ return False, 'Disabled test'
+
+ def check_capture(self):
+ graphics_tests = [a for a in self.find_action(
+ "Graphics Tests").children if a.flags & rd.ActionFlags.Drawcall]
+ compute_dims = [a for a in self.find_action(
+ "Compute Tests").children if 'x' in a.customName]
+
+ rdtest.log.begin_section("Graphics tests")
+
+ # instances to check in instanced draws
+ inst_checks = [0, 1, 5, 10]
+ # pixels to check
+ pixel_checks = [
+ # top quad
+ (0, 0), (1, 0), (0, 1), (1, 1),
+ # middle quad (away from triangle border)
+ (64, 56), (65, 56), (64, 57), (65, 57),
+ # middle quad (on triangle border)
+ (64, 64), (65, 64), (64, 65), (65, 65),
+ # middle quad on other triangle
+ (56, 64), (57, 64), (56, 65), (57, 65),
+ ]
+ # threads to check. largest dimension only (all small dim checked)
+ thread_checks = [
+ # first few
+ 0, 1, 2,
+ # near end of 32-subgroup and boundary
+ 30, 31, 32,
+ # near end of 64-subgroup and boundary
+ 62, 63, 64,
+ # near end of 64-subgroup and boundary
+ 62, 63, 64,
+ # large values spaced out with one near the end of our unaligned size
+ 100, 110, 120, 140, 149, 150, 160, 200, 250,
+ ]
+ clear_col = (123456.0, 789.0, 101112.0, 0.0)
+
+ for idx, action in enumerate(graphics_tests):
+ self.controller.SetFrameEvent(action.eventId, False)
+
+ pipe = self.controller.GetPipelineState()
+
+ # check vertex output for every vertex
+ for inst in [inst for inst in inst_checks if inst < action.numInstances]:
+ for view in range(pipe.MultiviewBroadcastCount()):
+
+ postvs = self.get_postvs(
+ action, rd.MeshDataStage.VSOut, first_index=0, num_indices=action.numIndices, instance=inst)
+
+ for vtx in range(action.numIndices):
+ trace = self.controller.DebugVertex(
+ vtx, inst, vtx, view)
+
+ if trace.debugger is None:
+ self.controller.FreeTrace(trace)
+
+ rdtest.log.error(
+ f"Test {test} at {action.eventId} got no debug result at {vtx} inst {inst} view {view}")
+ return
+
+ _, variables = self.process_trace(trace)
+
+ for var in trace.sourceVars:
+ if var.name == 'vertdata':
+ name = var.name
+
+ if var.name not in postvs[vtx].keys():
+ rdtest.log.error(
+ f"Don't have expected output for {var.name}")
+ continue
+
+ real = postvs[vtx][name]
+ debugged = self.evaluate_source_var(
+ var, variables)
+
+ if debugged.columns != 4 or len(real) != 4:
+ rdtest.log.error(
+ f"Vertex output is not the right size ({len(real)} vs {debugged.columns})")
+ continue
+
+ if not rdtest.value_compare(real, debugged.value.f32v[0:4], eps=5.0E-06):
+ rdtest.log.error(
+ f"Test {idx} debugged vertex value {debugged.value.f32v[0:4]} at {vtx} instance {inst} view {view} does not match output {real}")
+
+ self.controller.FreeTrace(trace)
+
+ # check some assorted pixel outputs
+ target = pipe.GetOutputTargets()[0].resource
+
+ for pixel in pixel_checks:
+ for view in range(pipe.MultiviewBroadcastCount()):
+ x, y = pixel
+
+ picked = self.controller.PickPixel(
+ target, x, y, rd.Subresource(0, 0, 0), rd.CompType.Float)
+
+ real = picked.floatValue
+
+ # silently skip pixels that weren't written to
+ if real == clear_col:
+ continue
+
+ inputs = rd.DebugPixelInputs()
+ inputs.sample = 0
+ inputs.primitive = rd.ReplayController.NoPreference
+ inputs.view = view
+ trace = self.controller.DebugPixel(x, y, inputs)
+
+ if trace.debugger is None:
+ self.controller.FreeTrace(trace)
+
+ rdtest.log.error(
+ f"Test {test} at {action.eventId} got no debug result at {x},{y}")
+ continue
+
+ _, variables = self.process_trace(trace)
+
+ output_sourcevar = self.find_output_source_var(
+ trace, rd.ShaderBuiltin.ColorOutput, 0)
+
+ if output_sourcevar is None:
+ rdtest.log.error("No output variable found")
+ continue
+
+ debugged = self.evaluate_source_var(
+ output_sourcevar, variables)
+
+ self.controller.FreeTrace(trace)
+
+ debuggedValue = list(debugged.value.f32v[0:4])
+
+ if not rdtest.value_compare(real, debuggedValue, eps=5.0E-06):
+ rdtest.log.error(
+ f"Test {idx} debugged pixel value {debuggedValue} at {x},{y} in {view} does not match output {real}")
+
+ rdtest.log.success(f"Test {idx} successful")
+
+ rdtest.log.end_section("Graphics tests")
+
+ for comp_dim in compute_dims:
+ rdtest.log.begin_section(
+ f"Compute tests with {comp_dim.customName} workgroup")
+
+ compute_tests = [
+ a for a in comp_dim.children if a.flags & rd.ActionFlags.Dispatch]
+
+ for test, action in enumerate(compute_tests):
+ self.controller.SetFrameEvent(action.eventId, False)
+
+ pipe = self.controller.GetPipelineState()
+ csrefl = pipe.GetShaderReflection(rd.ShaderStage.Compute)
+
+ dim = csrefl.dispatchThreadsDimension
+
+ rw = pipe.GetReadWriteResources(rd.ShaderStage.Compute)
+
+ if len(rw) != 1:
+ rdtest.log.error("Unexpected number of RW resources")
+ continue
+
+ # each test writes up to 16k data, one vec4 per thread * up to 1024 threads
+ bufdata = self.controller.GetBufferData(
+ rw[0].descriptor.resource, test*16*1024, 16*1024)
+
+ for t in thread_checks:
+ xrange = 1
+ yrange = dim[1]
+ xbase = t
+ ybase = 0
+
+ # vertical orientation
+ if dim[1] > dim[0]:
+ xrange = dim[0]
+ yrange = 1
+ xbase = 0
+ ybase = t
+
+ for x in range(xbase, xbase+xrange):
+ for y in range(ybase, ybase+yrange):
+ z = 0
+
+ if x >= dim[0] or y >= dim[1]:
+ continue
+
+ real = struct.unpack_from(
+ "4f", bufdata, 16*y*dim[0] + 16*x)
+
+ trace = self.controller.DebugThread(
+ (0, 0, 0), (x, y, z))
+
+ _, variables = self.process_trace(trace)
+
+ if trace.debugger is None:
+ self.controller.FreeTrace(trace)
+
+ rdtest.log.error(
+ f"Test {test} at {action.eventId} got no debug result at {x},{y},{z}")
+ continue
+
+ sourceVars = [
+ v for v in trace.instInfo[-1].sourceVars if v.name == 'data']
+
+ if len(sourceVars) != 1:
+ rdtest.log.error(
+ "Couldn't find compute data variable")
+ continue
+
+ debugged = self.evaluate_source_var(
+ sourceVars[0], variables)
+
+ debuggedValue = list(debugged.value.f32v[0:4])
+
+ if not rdtest.value_compare(real, debuggedValue, eps=5.0E-06):
+ rdtest.log.error(
+ f"Test {test} at {action.eventId} debugged thread value {debuggedValue} at {x},{y},{z} does not match output {real}")
+
+ rdtest.log.success(f"Test {test} successful")
+
+ rdtest.log.end_section(
+ f"Compute tests with {comp_dim.customName} workgroup")