mirror of
https://github.com/baldurk/renderdoc.git
synced 2026-05-29 13:20:54 +00:00
856c838def
* In a previous update in 2021 many copyright ranges were truncated accidentally, and some files have been copy-pasted with wrong years. These dates have been fixed based on git history and original copyright messages.
556 lines
14 KiB
C++
556 lines
14 KiB
C++
/******************************************************************************
|
|
* The MIT License (MIT)
|
|
*
|
|
* Copyright (c) 2025-2026 Baldur Karlsson
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
* THE SOFTWARE.
|
|
******************************************************************************/
|
|
|
|
#include "3rdparty/fmt/core.h"
|
|
#include "vk_test.h"
|
|
|
|
RD_TEST(VK_Workgroup_Zoo, VulkanGraphicsTest)
|
|
{
|
|
static constexpr const char *Description =
|
|
"Test of behaviour around workgroup operations in shaders.";
|
|
|
|
const std::string common = R"EOSHADER(
|
|
|
|
#version 460 core
|
|
#extension GL_KHR_shader_subgroup_basic : enable
|
|
#extension GL_KHR_shader_subgroup_ballot : enable
|
|
#extension GL_KHR_shader_subgroup_vote : enable
|
|
#extension GL_KHR_shader_subgroup_arithmetic : enable
|
|
|
|
#if FEAT_SHUFFLE
|
|
#extension GL_KHR_shader_subgroup_shuffle : enable
|
|
#endif
|
|
|
|
#if FEAT_SHUFFLE_RELATIVE
|
|
#extension GL_KHR_shader_subgroup_shuffle_relative : enable
|
|
#endif
|
|
|
|
#if FEAT_CLUSTERED
|
|
#extension GL_KHR_shader_subgroup_clustered : enable
|
|
#endif
|
|
|
|
#if FEAT_QUAD
|
|
#extension GL_KHR_shader_subgroup_quad : enable
|
|
#endif
|
|
|
|
#if FEAT_ROTATE || FEAT_ROTATE_CLUSTERED
|
|
#extension GL_KHR_shader_subgroup_rotate : enable
|
|
#endif
|
|
|
|
layout(push_constant) uniform PushData
|
|
{
|
|
uint test;
|
|
uint two;
|
|
} push;
|
|
|
|
uint GetTest() { return push.test; }
|
|
|
|
#define IsTest(x) (GetTest() == x)
|
|
|
|
)EOSHADER";
|
|
|
|
const std::string compCommon = common + R"EOSHADER(
|
|
|
|
uvec3 tid;
|
|
uint flatId;
|
|
|
|
shared uvec4 gsmUint4[1024];
|
|
|
|
struct Output
|
|
{
|
|
vec4 vals[1024];
|
|
};
|
|
|
|
layout(binding = 0, std430) buffer outbuftype {
|
|
Output data[COMP_TESTS];
|
|
} outbuf;
|
|
|
|
layout(local_size_x = GROUP_SIZE_X, local_size_y = GROUP_SIZE_Y, local_size_z = GROUP_SIZE_Z) in;
|
|
|
|
void SetOutput(vec4 val)
|
|
{
|
|
outbuf.data[push.test].vals[gl_LocalInvocationID.y * GROUP_SIZE_X + gl_LocalInvocationID.x] = val;
|
|
}
|
|
|
|
void Init(vec4 val)
|
|
{
|
|
tid = gl_LocalInvocationID;
|
|
flatId = tid.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + tid.y * gl_WorkGroupSize.x + tid.x;
|
|
gsmUint4[flatId].xyz = tid;
|
|
gsmUint4[flatId].z = tid.x;
|
|
SetOutput(val);
|
|
}
|
|
|
|
)EOSHADER";
|
|
|
|
const std::string testShader = compCommon + R"EOSHADER(
|
|
|
|
vec4 funcD(uint id)
|
|
{
|
|
return vec4(subgroupAdd(id/2));
|
|
}
|
|
|
|
vec4 nestedFunc(uint id)
|
|
{
|
|
vec4 ret = funcD(id/3);
|
|
ret.w = subgroupAdd(id);
|
|
return ret;
|
|
}
|
|
|
|
vec4 funcA(uint id)
|
|
{
|
|
return nestedFunc(id*2);
|
|
}
|
|
|
|
vec4 funcB(uint id)
|
|
{
|
|
return nestedFunc(id*4);
|
|
}
|
|
|
|
vec4 funcTest(uint id)
|
|
{
|
|
if ((id % 2) == 0)
|
|
{
|
|
return vec4(0);
|
|
}
|
|
else
|
|
{
|
|
float value = subgroupAdd(id);
|
|
if (id < 10)
|
|
{
|
|
return vec4(value);
|
|
}
|
|
value += subgroupAdd(id/2);
|
|
return vec4(value);
|
|
}
|
|
}
|
|
|
|
void main()
|
|
{
|
|
vec4 testResult = vec4(0);
|
|
Init(testResult);
|
|
uint id = flatId;
|
|
|
|
if(IsTest(0))
|
|
{
|
|
testResult.x = id;
|
|
barrier();
|
|
}
|
|
else if(IsTest(1))
|
|
{
|
|
testResult.x = subgroupAdd(id);
|
|
barrier();
|
|
}
|
|
else if(IsTest(2))
|
|
{
|
|
// Diverged threads which reconverge
|
|
if (id < 10)
|
|
{
|
|
// active threads 0-9
|
|
testResult.x = subgroupAdd(id);
|
|
|
|
if ((id % 2) == 0)
|
|
testResult.y = subgroupAdd(id);
|
|
else
|
|
testResult.y = subgroupAdd(id);
|
|
|
|
testResult.x += subgroupAdd(id);
|
|
}
|
|
else
|
|
{
|
|
// active threads 10...
|
|
testResult.x = subgroupAdd(id);
|
|
}
|
|
testResult.y = subgroupAdd(id);
|
|
barrier();
|
|
}
|
|
else if(IsTest(3))
|
|
{
|
|
// Converged threads calling a function
|
|
testResult = funcTest(id);
|
|
testResult.y = subgroupAdd(id);
|
|
barrier();
|
|
}
|
|
else if(IsTest(4))
|
|
{
|
|
// Converged threads calling a function which has a nested function call in it
|
|
testResult = nestedFunc(id);
|
|
testResult.y = subgroupAdd(id);
|
|
barrier();
|
|
}
|
|
else if(IsTest(5))
|
|
{
|
|
// Diverged threads calling the same function
|
|
if (id < 10)
|
|
{
|
|
testResult = funcD(id);
|
|
}
|
|
else
|
|
{
|
|
testResult = funcD(id);
|
|
}
|
|
testResult.y = subgroupAdd(id);
|
|
barrier();
|
|
}
|
|
else if(IsTest(6))
|
|
{
|
|
// Diverged threads calling the same function which has a nested function call in it
|
|
if (id < 10)
|
|
{
|
|
testResult = funcA(id);
|
|
}
|
|
else
|
|
{
|
|
testResult = funcB(id);
|
|
}
|
|
testResult.y = subgroupAdd(id);
|
|
barrier();
|
|
}
|
|
else if(IsTest(7))
|
|
{
|
|
// Diverged threads which early exit
|
|
if (id < 10)
|
|
{
|
|
testResult.x = subgroupAdd(id+10);
|
|
SetOutput(testResult);
|
|
return;
|
|
}
|
|
testResult.x = subgroupAdd(id);
|
|
}
|
|
else if(IsTest(8))
|
|
{
|
|
// Loops with different number of iterations per thread
|
|
for (uint i = 0; i < id; i++)
|
|
{
|
|
testResult.x += subgroupAdd(id);
|
|
}
|
|
barrier();
|
|
}
|
|
else if(IsTest(9))
|
|
{
|
|
// Query functions : unit tests
|
|
testResult.x = float(gl_SubgroupSize);
|
|
testResult.y = float(gl_SubgroupInvocationID);
|
|
testResult.z = float(subgroupElect());
|
|
|
|
barrier();
|
|
}
|
|
|
|
SetOutput(testResult);
|
|
}
|
|
|
|
)EOSHADER";
|
|
|
|
const std::string perfShader = compCommon + R"EOSHADER(
|
|
|
|
void main()
|
|
{
|
|
vec4 testResult = vec4(0);
|
|
Init(testResult);
|
|
uint id = flatId;
|
|
|
|
// TEST CASES:
|
|
// 0: GPU math : loops 100
|
|
// 1: CPU math : loops 100
|
|
// 2: GPU math : loops 200
|
|
// 3: CPU math : loops 200
|
|
// 4: GPU math : loops 400
|
|
// 5: CPU math : loops 400
|
|
// 6: GPU math : loops 5000
|
|
// 7: CPU math : loops 5000
|
|
bool useCpu = ((GetTest() & 1) == 1) ? true : false;
|
|
|
|
uint count = 0;
|
|
{
|
|
uint temp = GetTest() >> 1;
|
|
if(temp == 0)
|
|
count = 100U;
|
|
if(temp == 1)
|
|
count = 200U;
|
|
if(temp == 2)
|
|
count = 400U;
|
|
if(temp == 3)
|
|
count = 5000U;
|
|
}
|
|
|
|
if(useCpu)
|
|
{
|
|
for (uint i = 0; i < count; ++i)
|
|
{
|
|
gsmUint4[id].x += i;
|
|
gsmUint4[id].y += i * i;
|
|
testResult.x = testResult.x * testResult.x;
|
|
testResult.x += dot(gsmUint4[id], gsmUint4[id]);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (uint i = 0; i < count; ++i)
|
|
{
|
|
gsmUint4[id].x += i;
|
|
gsmUint4[id].y += i * i;
|
|
testResult.x = pow(testResult.x, float(push.two));
|
|
testResult.x += dot(gsmUint4[id], gsmUint4[id]);
|
|
}
|
|
}
|
|
|
|
barrier();
|
|
|
|
SetOutput(testResult);
|
|
}
|
|
|
|
)EOSHADER";
|
|
|
|
VkSubgroupFeatureFlags ops = 0;
|
|
|
|
void Prepare(int argc, char **argv)
|
|
{
|
|
VulkanGraphicsTest::Prepare(argc, argv);
|
|
|
|
if(!Avail.empty())
|
|
return;
|
|
|
|
if(devVersion < VK_API_VERSION_1_1)
|
|
Avail = "Vulkan device version isn't 1.1";
|
|
|
|
static VkPhysicalDeviceSubgroupProperties subProps = {
|
|
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES,
|
|
};
|
|
|
|
getPhysProperties2(&subProps);
|
|
|
|
if(subProps.subgroupSize < 16)
|
|
Avail = "Subgroup size is less than 16";
|
|
|
|
// require at least a few ops so we only have a few conditional compilations
|
|
const VkSubgroupFeatureFlags requiredOps =
|
|
VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT |
|
|
VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT;
|
|
|
|
ops = subProps.supportedOperations;
|
|
|
|
if((subProps.supportedOperations & requiredOps) != requiredOps)
|
|
Avail = "Missing ops support";
|
|
|
|
if((subProps.supportedStages & VK_SHADER_STAGE_COMPUTE_BIT) == 0)
|
|
Avail = "Missing compute subgroup support";
|
|
}
|
|
|
|
int main()
|
|
{
|
|
// initialise, create window, create context, etc
|
|
if(!Init())
|
|
return 3;
|
|
|
|
VkDescriptorSetLayout setlayout = createDescriptorSetLayout(vkh::DescriptorSetLayoutCreateInfo({
|
|
{0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT},
|
|
}));
|
|
|
|
VkPipelineLayout layout = createPipelineLayout(vkh::PipelineLayoutCreateInfo(
|
|
{setlayout}, {vkh::PushConstantRange(VK_SHADER_STAGE_ALL, 0, 8)}));
|
|
|
|
std::map<std::string, std::string> macros;
|
|
|
|
int numCompTests = 0;
|
|
|
|
size_t pos = 0;
|
|
while(pos != std::string::npos)
|
|
{
|
|
pos = testShader.find("IsTest(", pos);
|
|
if(pos == std::string::npos)
|
|
break;
|
|
pos += sizeof("IsTest(") - 1;
|
|
numCompTests = std::max(numCompTests, atoi(testShader.c_str() + pos) + 1);
|
|
}
|
|
|
|
const int32_t countPerfTests = 8;
|
|
|
|
if(ops & VK_SUBGROUP_FEATURE_SHUFFLE_BIT)
|
|
macros["FEAT_SHUFFLE"] = "1";
|
|
else
|
|
macros["FEAT_SHUFFLE"] = "0";
|
|
if(ops & VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT)
|
|
macros["FEAT_SHUFFLE_RELATIVE"] = "1";
|
|
else
|
|
macros["FEAT_SHUFFLE_RELATIVE"] = "0";
|
|
if(ops & VK_SUBGROUP_FEATURE_CLUSTERED_BIT)
|
|
macros["FEAT_CLUSTERED"] = "1";
|
|
else
|
|
macros["FEAT_CLUSTERED"] = "0";
|
|
if(ops & VK_SUBGROUP_FEATURE_QUAD_BIT)
|
|
macros["FEAT_QUAD"] = "1";
|
|
else
|
|
macros["FEAT_QUAD"] = "0";
|
|
if(ops & VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR)
|
|
macros["FEAT_ROTATE"] = "1";
|
|
else
|
|
macros["FEAT_ROTATE"] = "0";
|
|
if(ops & VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR)
|
|
macros["FEAT_ROTATE_CLUSTERED"] = "1";
|
|
else
|
|
macros["FEAT_ROTATE_CLUSTERED"] = "0";
|
|
|
|
std::string comppipe_name[1];
|
|
VkPipeline compPipes[1];
|
|
uint32_t countPipes = 0;
|
|
VkPipeline perfPipes[1];
|
|
uint32_t countPerfPipes = 0;
|
|
|
|
macros["COMP_TESTS"] = fmt::format("{}", numCompTests);
|
|
|
|
macros["GROUP_SIZE_X"] = "70";
|
|
macros["GROUP_SIZE_Y"] = "1";
|
|
macros["GROUP_SIZE_Z"] = "1";
|
|
comppipe_name[countPipes] = fmt::format("{}x{}x{}", macros["GROUP_SIZE_X"],
|
|
macros["GROUP_SIZE_Y"], macros["GROUP_SIZE_Z"]);
|
|
|
|
compPipes[countPipes] = createComputePipeline(vkh::ComputePipelineCreateInfo(
|
|
layout, CompileShaderModule(testShader, ShaderLang::glsl, ShaderStage::comp, "main", macros,
|
|
SPIRVTarget::vulkan11)));
|
|
++countPipes;
|
|
|
|
perfPipes[0] = createComputePipeline(vkh::ComputePipelineCreateInfo(
|
|
layout, CompileShaderModule(perfShader, ShaderLang::glsl, ShaderStage::comp, "main", macros,
|
|
SPIRVTarget::vulkan11)));
|
|
++countPerfPipes;
|
|
|
|
AllocatedBuffer bufout(
|
|
this,
|
|
vkh::BufferCreateInfo(sizeof(Vec4f) * 1024 * numCompTests,
|
|
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT),
|
|
VmaAllocationCreateInfo({0, VMA_MEMORY_USAGE_CPU_TO_GPU}));
|
|
|
|
setName(bufout.buffer, "bufout");
|
|
|
|
VkDescriptorSet set = allocateDescriptorSet(setlayout);
|
|
|
|
vkh::updateDescriptorSets(
|
|
device, {vkh::WriteDescriptorSet(set, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
{vkh::DescriptorBufferInfo(bufout.buffer)})});
|
|
|
|
while(Running())
|
|
{
|
|
VkCommandBuffer cmd = GetCommandBuffer();
|
|
|
|
vkBeginCommandBuffer(cmd, vkh::CommandBufferBeginInfo());
|
|
|
|
VkImage swapimg =
|
|
StartUsingBackbuffer(cmd, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL);
|
|
|
|
vkh::cmdClearImage(cmd, swapimg, vkh::ClearColorValue(0.2f, 0.2f, 0.2f, 1.0f));
|
|
|
|
pushMarker(cmd, "Compute Tests");
|
|
|
|
for(size_t p = 0; p < countPipes; p++)
|
|
{
|
|
vkh::cmdPipelineBarrier(
|
|
cmd, {},
|
|
{vkh::BufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT,
|
|
bufout.buffer, 0, sizeof(Vec4f) * 1024 * numCompTests)});
|
|
|
|
vkCmdFillBuffer(cmd, bufout.buffer, 0, sizeof(Vec4f) * 1024 * numCompTests, 0);
|
|
|
|
vkh::cmdPipelineBarrier(
|
|
cmd, {},
|
|
{vkh::BufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_WRITE_BIT,
|
|
bufout.buffer, 0, sizeof(Vec4f) * 1024 * numCompTests)});
|
|
|
|
pushMarker(cmd, comppipe_name[p]);
|
|
|
|
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, compPipes[p]);
|
|
vkh::cmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, {set}, {});
|
|
|
|
for(int i = 0; i < numCompTests; i++)
|
|
{
|
|
vkh::cmdPushConstants(cmd, layout, i);
|
|
vkCmdDispatch(cmd, 2, 1, 1);
|
|
}
|
|
|
|
popMarker(cmd);
|
|
}
|
|
|
|
popMarker(cmd);
|
|
|
|
pushMarker(cmd, "Perf Tests");
|
|
|
|
for(size_t p = 0; p < countPerfPipes; p++)
|
|
{
|
|
vkh::cmdPipelineBarrier(
|
|
cmd, {},
|
|
{vkh::BufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT,
|
|
bufout.buffer, 0, sizeof(Vec4f) * 1024 * numCompTests)});
|
|
|
|
vkCmdFillBuffer(cmd, bufout.buffer, 0, sizeof(Vec4f) * 1024 * numCompTests, 0);
|
|
|
|
vkh::cmdPipelineBarrier(
|
|
cmd, {},
|
|
{vkh::BufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_WRITE_BIT,
|
|
bufout.buffer, 0, sizeof(Vec4f) * 1024 * numCompTests)});
|
|
|
|
pushMarker(cmd, comppipe_name[p]);
|
|
|
|
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, perfPipes[p]);
|
|
vkh::cmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, {set}, {});
|
|
|
|
for(int i = 0; i < countPerfTests; i++)
|
|
{
|
|
bool useCpu = (i & 0x1);
|
|
int count = 0;
|
|
{
|
|
int temp = i >> 1;
|
|
if(temp == 0)
|
|
count = 100U;
|
|
if(temp == 1)
|
|
count = 200U;
|
|
if(temp == 2)
|
|
count = 400U;
|
|
if(temp == 3)
|
|
count = 5000U;
|
|
}
|
|
std::string perfTestName =
|
|
fmt::format("{} Iterations {} Math", count, useCpu ? "CPU" : "GPU");
|
|
pushMarker(cmd, perfTestName);
|
|
vkh::cmdPushConstants(cmd, layout, i);
|
|
vkCmdDispatch(cmd, 2, 1, 1);
|
|
popMarker(cmd);
|
|
}
|
|
|
|
popMarker(cmd);
|
|
}
|
|
|
|
popMarker(cmd);
|
|
|
|
FinishUsingBackbuffer(cmd, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL);
|
|
|
|
vkEndCommandBuffer(cmd);
|
|
|
|
SubmitAndPresent({cmd});
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
};
|
|
|
|
REGISTER_TEST();
|