Files
renderdoc/util/test/demos/vk/vk_workgroup_zoo.cpp
baldurk 856c838def Update copyright years to 2026 and fix copyright ranges
* In a previous update in 2021 many copyright ranges were truncated
  accidentally, and some files have been copy-pasted with wrong years. These
  dates have been fixed based on git history and original copyright messages.
2026-01-05 14:17:28 +00:00

556 lines
14 KiB
C++

/******************************************************************************
* The MIT License (MIT)
*
* Copyright (c) 2025-2026 Baldur Karlsson
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
******************************************************************************/
#include "3rdparty/fmt/core.h"
#include "vk_test.h"
RD_TEST(VK_Workgroup_Zoo, VulkanGraphicsTest)
{
static constexpr const char *Description =
"Test of behaviour around workgroup operations in shaders.";
const std::string common = R"EOSHADER(
#version 460 core
#extension GL_KHR_shader_subgroup_basic : enable
#extension GL_KHR_shader_subgroup_ballot : enable
#extension GL_KHR_shader_subgroup_vote : enable
#extension GL_KHR_shader_subgroup_arithmetic : enable
#if FEAT_SHUFFLE
#extension GL_KHR_shader_subgroup_shuffle : enable
#endif
#if FEAT_SHUFFLE_RELATIVE
#extension GL_KHR_shader_subgroup_shuffle_relative : enable
#endif
#if FEAT_CLUSTERED
#extension GL_KHR_shader_subgroup_clustered : enable
#endif
#if FEAT_QUAD
#extension GL_KHR_shader_subgroup_quad : enable
#endif
#if FEAT_ROTATE || FEAT_ROTATE_CLUSTERED
#extension GL_KHR_shader_subgroup_rotate : enable
#endif
layout(push_constant) uniform PushData
{
uint test;
uint two;
} push;
uint GetTest() { return push.test; }
#define IsTest(x) (GetTest() == x)
)EOSHADER";
const std::string compCommon = common + R"EOSHADER(
uvec3 tid;
uint flatId;
shared uvec4 gsmUint4[1024];
struct Output
{
vec4 vals[1024];
};
layout(binding = 0, std430) buffer outbuftype {
Output data[COMP_TESTS];
} outbuf;
layout(local_size_x = GROUP_SIZE_X, local_size_y = GROUP_SIZE_Y, local_size_z = GROUP_SIZE_Z) in;
void SetOutput(vec4 val)
{
outbuf.data[push.test].vals[gl_LocalInvocationID.y * GROUP_SIZE_X + gl_LocalInvocationID.x] = val;
}
void Init(vec4 val)
{
tid = gl_LocalInvocationID;
flatId = tid.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + tid.y * gl_WorkGroupSize.x + tid.x;
gsmUint4[flatId].xyz = tid;
gsmUint4[flatId].z = tid.x;
SetOutput(val);
}
)EOSHADER";
const std::string testShader = compCommon + R"EOSHADER(
vec4 funcD(uint id)
{
return vec4(subgroupAdd(id/2));
}
vec4 nestedFunc(uint id)
{
vec4 ret = funcD(id/3);
ret.w = subgroupAdd(id);
return ret;
}
vec4 funcA(uint id)
{
return nestedFunc(id*2);
}
vec4 funcB(uint id)
{
return nestedFunc(id*4);
}
vec4 funcTest(uint id)
{
if ((id % 2) == 0)
{
return vec4(0);
}
else
{
float value = subgroupAdd(id);
if (id < 10)
{
return vec4(value);
}
value += subgroupAdd(id/2);
return vec4(value);
}
}
void main()
{
vec4 testResult = vec4(0);
Init(testResult);
uint id = flatId;
if(IsTest(0))
{
testResult.x = id;
barrier();
}
else if(IsTest(1))
{
testResult.x = subgroupAdd(id);
barrier();
}
else if(IsTest(2))
{
// Diverged threads which reconverge
if (id < 10)
{
// active threads 0-9
testResult.x = subgroupAdd(id);
if ((id % 2) == 0)
testResult.y = subgroupAdd(id);
else
testResult.y = subgroupAdd(id);
testResult.x += subgroupAdd(id);
}
else
{
// active threads 10...
testResult.x = subgroupAdd(id);
}
testResult.y = subgroupAdd(id);
barrier();
}
else if(IsTest(3))
{
// Converged threads calling a function
testResult = funcTest(id);
testResult.y = subgroupAdd(id);
barrier();
}
else if(IsTest(4))
{
// Converged threads calling a function which has a nested function call in it
testResult = nestedFunc(id);
testResult.y = subgroupAdd(id);
barrier();
}
else if(IsTest(5))
{
// Diverged threads calling the same function
if (id < 10)
{
testResult = funcD(id);
}
else
{
testResult = funcD(id);
}
testResult.y = subgroupAdd(id);
barrier();
}
else if(IsTest(6))
{
// Diverged threads calling the same function which has a nested function call in it
if (id < 10)
{
testResult = funcA(id);
}
else
{
testResult = funcB(id);
}
testResult.y = subgroupAdd(id);
barrier();
}
else if(IsTest(7))
{
// Diverged threads which early exit
if (id < 10)
{
testResult.x = subgroupAdd(id+10);
SetOutput(testResult);
return;
}
testResult.x = subgroupAdd(id);
}
else if(IsTest(8))
{
// Loops with different number of iterations per thread
for (uint i = 0; i < id; i++)
{
testResult.x += subgroupAdd(id);
}
barrier();
}
else if(IsTest(9))
{
// Query functions : unit tests
testResult.x = float(gl_SubgroupSize);
testResult.y = float(gl_SubgroupInvocationID);
testResult.z = float(subgroupElect());
barrier();
}
SetOutput(testResult);
}
)EOSHADER";
const std::string perfShader = compCommon + R"EOSHADER(
void main()
{
vec4 testResult = vec4(0);
Init(testResult);
uint id = flatId;
// TEST CASES:
// 0: GPU math : loops 100
// 1: CPU math : loops 100
// 2: GPU math : loops 200
// 3: CPU math : loops 200
// 4: GPU math : loops 400
// 5: CPU math : loops 400
// 6: GPU math : loops 5000
// 7: CPU math : loops 5000
bool useCpu = ((GetTest() & 1) == 1) ? true : false;
uint count = 0;
{
uint temp = GetTest() >> 1;
if(temp == 0)
count = 100U;
if(temp == 1)
count = 200U;
if(temp == 2)
count = 400U;
if(temp == 3)
count = 5000U;
}
if(useCpu)
{
for (uint i = 0; i < count; ++i)
{
gsmUint4[id].x += i;
gsmUint4[id].y += i * i;
testResult.x = testResult.x * testResult.x;
testResult.x += dot(gsmUint4[id], gsmUint4[id]);
}
}
else
{
for (uint i = 0; i < count; ++i)
{
gsmUint4[id].x += i;
gsmUint4[id].y += i * i;
testResult.x = pow(testResult.x, float(push.two));
testResult.x += dot(gsmUint4[id], gsmUint4[id]);
}
}
barrier();
SetOutput(testResult);
}
)EOSHADER";
VkSubgroupFeatureFlags ops = 0;
void Prepare(int argc, char **argv)
{
VulkanGraphicsTest::Prepare(argc, argv);
if(!Avail.empty())
return;
if(devVersion < VK_API_VERSION_1_1)
Avail = "Vulkan device version isn't 1.1";
static VkPhysicalDeviceSubgroupProperties subProps = {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES,
};
getPhysProperties2(&subProps);
if(subProps.subgroupSize < 16)
Avail = "Subgroup size is less than 16";
// require at least a few ops so we only have a few conditional compilations
const VkSubgroupFeatureFlags requiredOps =
VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT |
VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT;
ops = subProps.supportedOperations;
if((subProps.supportedOperations & requiredOps) != requiredOps)
Avail = "Missing ops support";
if((subProps.supportedStages & VK_SHADER_STAGE_COMPUTE_BIT) == 0)
Avail = "Missing compute subgroup support";
}
int main()
{
// initialise, create window, create context, etc
if(!Init())
return 3;
VkDescriptorSetLayout setlayout = createDescriptorSetLayout(vkh::DescriptorSetLayoutCreateInfo({
{0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT},
}));
VkPipelineLayout layout = createPipelineLayout(vkh::PipelineLayoutCreateInfo(
{setlayout}, {vkh::PushConstantRange(VK_SHADER_STAGE_ALL, 0, 8)}));
std::map<std::string, std::string> macros;
int numCompTests = 0;
size_t pos = 0;
while(pos != std::string::npos)
{
pos = testShader.find("IsTest(", pos);
if(pos == std::string::npos)
break;
pos += sizeof("IsTest(") - 1;
numCompTests = std::max(numCompTests, atoi(testShader.c_str() + pos) + 1);
}
const int32_t countPerfTests = 8;
if(ops & VK_SUBGROUP_FEATURE_SHUFFLE_BIT)
macros["FEAT_SHUFFLE"] = "1";
else
macros["FEAT_SHUFFLE"] = "0";
if(ops & VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT)
macros["FEAT_SHUFFLE_RELATIVE"] = "1";
else
macros["FEAT_SHUFFLE_RELATIVE"] = "0";
if(ops & VK_SUBGROUP_FEATURE_CLUSTERED_BIT)
macros["FEAT_CLUSTERED"] = "1";
else
macros["FEAT_CLUSTERED"] = "0";
if(ops & VK_SUBGROUP_FEATURE_QUAD_BIT)
macros["FEAT_QUAD"] = "1";
else
macros["FEAT_QUAD"] = "0";
if(ops & VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR)
macros["FEAT_ROTATE"] = "1";
else
macros["FEAT_ROTATE"] = "0";
if(ops & VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR)
macros["FEAT_ROTATE_CLUSTERED"] = "1";
else
macros["FEAT_ROTATE_CLUSTERED"] = "0";
std::string comppipe_name[1];
VkPipeline compPipes[1];
uint32_t countPipes = 0;
VkPipeline perfPipes[1];
uint32_t countPerfPipes = 0;
macros["COMP_TESTS"] = fmt::format("{}", numCompTests);
macros["GROUP_SIZE_X"] = "70";
macros["GROUP_SIZE_Y"] = "1";
macros["GROUP_SIZE_Z"] = "1";
comppipe_name[countPipes] = fmt::format("{}x{}x{}", macros["GROUP_SIZE_X"],
macros["GROUP_SIZE_Y"], macros["GROUP_SIZE_Z"]);
compPipes[countPipes] = createComputePipeline(vkh::ComputePipelineCreateInfo(
layout, CompileShaderModule(testShader, ShaderLang::glsl, ShaderStage::comp, "main", macros,
SPIRVTarget::vulkan11)));
++countPipes;
perfPipes[0] = createComputePipeline(vkh::ComputePipelineCreateInfo(
layout, CompileShaderModule(perfShader, ShaderLang::glsl, ShaderStage::comp, "main", macros,
SPIRVTarget::vulkan11)));
++countPerfPipes;
AllocatedBuffer bufout(
this,
vkh::BufferCreateInfo(sizeof(Vec4f) * 1024 * numCompTests,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT),
VmaAllocationCreateInfo({0, VMA_MEMORY_USAGE_CPU_TO_GPU}));
setName(bufout.buffer, "bufout");
VkDescriptorSet set = allocateDescriptorSet(setlayout);
vkh::updateDescriptorSets(
device, {vkh::WriteDescriptorSet(set, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
{vkh::DescriptorBufferInfo(bufout.buffer)})});
while(Running())
{
VkCommandBuffer cmd = GetCommandBuffer();
vkBeginCommandBuffer(cmd, vkh::CommandBufferBeginInfo());
VkImage swapimg =
StartUsingBackbuffer(cmd, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL);
vkh::cmdClearImage(cmd, swapimg, vkh::ClearColorValue(0.2f, 0.2f, 0.2f, 1.0f));
pushMarker(cmd, "Compute Tests");
for(size_t p = 0; p < countPipes; p++)
{
vkh::cmdPipelineBarrier(
cmd, {},
{vkh::BufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT,
bufout.buffer, 0, sizeof(Vec4f) * 1024 * numCompTests)});
vkCmdFillBuffer(cmd, bufout.buffer, 0, sizeof(Vec4f) * 1024 * numCompTests, 0);
vkh::cmdPipelineBarrier(
cmd, {},
{vkh::BufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_WRITE_BIT,
bufout.buffer, 0, sizeof(Vec4f) * 1024 * numCompTests)});
pushMarker(cmd, comppipe_name[p]);
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, compPipes[p]);
vkh::cmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, {set}, {});
for(int i = 0; i < numCompTests; i++)
{
vkh::cmdPushConstants(cmd, layout, i);
vkCmdDispatch(cmd, 2, 1, 1);
}
popMarker(cmd);
}
popMarker(cmd);
pushMarker(cmd, "Perf Tests");
for(size_t p = 0; p < countPerfPipes; p++)
{
vkh::cmdPipelineBarrier(
cmd, {},
{vkh::BufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT,
bufout.buffer, 0, sizeof(Vec4f) * 1024 * numCompTests)});
vkCmdFillBuffer(cmd, bufout.buffer, 0, sizeof(Vec4f) * 1024 * numCompTests, 0);
vkh::cmdPipelineBarrier(
cmd, {},
{vkh::BufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_WRITE_BIT,
bufout.buffer, 0, sizeof(Vec4f) * 1024 * numCompTests)});
pushMarker(cmd, comppipe_name[p]);
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, perfPipes[p]);
vkh::cmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, {set}, {});
for(int i = 0; i < countPerfTests; i++)
{
bool useCpu = (i & 0x1);
int count = 0;
{
int temp = i >> 1;
if(temp == 0)
count = 100U;
if(temp == 1)
count = 200U;
if(temp == 2)
count = 400U;
if(temp == 3)
count = 5000U;
}
std::string perfTestName =
fmt::format("{} Iterations {} Math", count, useCpu ? "CPU" : "GPU");
pushMarker(cmd, perfTestName);
vkh::cmdPushConstants(cmd, layout, i);
vkCmdDispatch(cmd, 2, 1, 1);
popMarker(cmd);
}
popMarker(cmd);
}
popMarker(cmd);
FinishUsingBackbuffer(cmd, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL);
vkEndCommandBuffer(cmd);
SubmitAndPresent({cmd});
}
return 0;
}
};
REGISTER_TEST();