Add a test for new group ops

This commit is contained in:
baldurk
2025-02-10 17:51:36 +00:00
parent da04916a77
commit cfefa5d5ef
10 changed files with 721 additions and 6 deletions
+1
View File
@@ -155,6 +155,7 @@ set(VULKAN_SRC
vk/vk_simple_triangle.cpp
vk/vk_spec_constants.cpp
vk/vk_spirv_13_shaders.cpp
vk/vk_subgroup_zoo.cpp
vk/vk_structured_buffer_nested.cpp
vk/vk_sync2.cpp
vk/vk_texture_zoo.cpp
+1
View File
@@ -341,6 +341,7 @@
<ClCompile Include="vk\vk_shader_printf.cpp" />
<ClCompile Include="vk\vk_spec_constants.cpp" />
<ClCompile Include="vk\vk_spirv_13_shaders.cpp" />
<ClCompile Include="vk\vk_subgroup_zoo.cpp" />
<ClCompile Include="vk\vk_sync2.cpp" />
<ClCompile Include="vk\vk_template.cpp" />
<ClCompile Include="vk\vk_texture_zoo.cpp" />
+3
View File
@@ -697,6 +697,9 @@
<ClCompile Include="vk\vk_ray_query.cpp">
<Filter>Vulkan\demos</Filter>
</ClCompile>
<ClCompile Include="vk\vk_subgroup_zoo.cpp">
<Filter>Vulkan\demos</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<Filter Include="D3D11">
+26
View File
@@ -179,6 +179,19 @@ void cmdPushDescriptorSets(VkCommandBuffer cmd, VkPipelineBindPoint pipelineBind
VkPipelineLayout layout, uint32_t set,
std::vector<VkWriteDescriptorSet> writes);
template <typename T>
void cmdPushConstants(VkCommandBuffer cmd, VkPipelineLayout layout, VkShaderStageFlags stages,
const T &val)
{
vkCmdPushConstants(cmd, layout, stages, 0, sizeof(T), &val);
}
template <typename T>
void cmdPushConstants(VkCommandBuffer cmd, VkPipelineLayout layout, const T &val)
{
cmdPushConstants(cmd, layout, VK_SHADER_STAGE_ALL, val);
}
struct ApplicationInfo : public VkApplicationInfo
{
ApplicationInfo() : VkApplicationInfo() { sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; }
@@ -474,6 +487,19 @@ struct BufferMemoryBarrier : public VkBufferMemoryBarrier
}
};
#undef MemoryBarrier
struct MemoryBarrier : public VkMemoryBarrier
{
MemoryBarrier(VkAccessFlags srcAccessMask, VkAccessFlags dstAccessMask)
{
sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
pNext = NULL;
this->srcAccessMask = srcAccessMask;
this->dstAccessMask = dstAccessMask;
}
};
struct CommandBufferAllocateInfo : public VkCommandBufferAllocateInfo
{
CommandBufferAllocateInfo(VkCommandPool commandPool, uint32_t commandBufferCount,
+451
View File
@@ -0,0 +1,451 @@
/******************************************************************************
* The MIT License (MIT)
*
* Copyright (c) 2019-2024 Baldur Karlsson
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
******************************************************************************/
#include "3rdparty/fmt/core.h"
#include "vk_test.h"
RD_TEST(VK_Subgroup_Zoo, VulkanGraphicsTest)
{
static constexpr const char *Description =
"Test of behaviour around subgroup operations in shaders.";
const std::string common = R"EOSHADER(
#version 460 core
#extension GL_KHR_shader_subgroup_basic : enable
#extension GL_KHR_shader_subgroup_ballot : enable
#extension GL_KHR_shader_subgroup_vote : enable
#extension GL_KHR_shader_subgroup_arithmetic : enable
#if FEAT_SHUFFLE
#extension GL_KHR_shader_subgroup_shuffle : enable
#endif
#if FEAT_SHUFFLE_RELATIVE
#extension GL_KHR_shader_subgroup_shuffle_relative : enable
#endif
#if FEAT_CLUSTERED
#extension GL_KHR_shader_subgroup_clustered : enable
#endif
#if FEAT_QUAD
#extension GL_KHR_shader_subgroup_quad : enable
#endif
#if FEAT_ROTATE || FEAT_ROTATE_CLUSTERED
#extension GL_KHR_shader_subgroup_rotate : enable
#endif
layout(push_constant) uniform PushData
{
uint test;
} push;
#define IsTest(x) (push.test == x)
)EOSHADER";
const std::string vertex = common + R"EOSHADER(
layout(location = 0) out vec4 vertdata;
void main()
{
vec2 positions[] = {
vec2(-1.0f, 1.0f),
vec2( 1.0f, 1.0f),
vec2(-1.0f, -1.0f),
vec2( 1.0f, -1.0f),
};
float scale = 1.0f;
if(IsTest(2))
scale = 0.2f;
gl_Position = vec4(positions[gl_VertexIndex]*vec2(scale,scale), 0, 1);
vertdata = vec4(0);
if(IsTest(0))
vertdata = vec4(gl_SubgroupInvocationID, 0, 0, 1);
else if(IsTest(3))
vertdata = vec4(subgroupAdd(gl_SubgroupInvocationID), 0, 0, 0);
}
)EOSHADER";
const std::string pixel = common + R"EOSHADER(
layout(location = 0) in vec4 vertdata;
layout(location = 0, index = 0) out vec4 Color;
void main()
{
vec4 fragdata = vec4(0);
if(IsTest(1) || IsTest(2))
fragdata = vec4(gl_SubgroupInvocationID, 0, 0, 1);
else if(IsTest(4))
fragdata = vec4(subgroupAdd(gl_SubgroupInvocationID), 0, 0, 0);
Color = vertdata + fragdata;
}
)EOSHADER";
const std::string comp = common + R"EOSHADER(
struct Output
{
vec4 vals[1024];
};
layout(binding = 0, std430) buffer outbuftype {
Output data[COMP_TESTS];
} outbuf;
layout(local_size_x = GROUP_SIZE_X, local_size_y = GROUP_SIZE_Y, local_size_z = 1) in;
void main()
{
vec4 data = vec4(0);
if(IsTest(0))
data = vec4(gl_SubgroupInvocationID, 0, 0, 0);
else if(IsTest(1))
data = vec4(subgroupAdd(gl_SubgroupInvocationID), 0, 0, 0);
outbuf.data[push.test].vals[gl_LocalInvocationID.y * GROUP_SIZE_X + gl_LocalInvocationID.x] = data;
}
)EOSHADER";
VkSubgroupFeatureFlags ops = 0;
void Prepare(int argc, char **argv)
{
VulkanGraphicsTest::Prepare(argc, argv);
if(!Avail.empty())
return;
if(devVersion < VK_API_VERSION_1_1)
Avail = "Vulkan device version isn't 1.1";
static VkPhysicalDeviceSubgroupProperties subProps = {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES,
};
getPhysProperties2(&subProps);
if(subProps.subgroupSize < 16)
Avail = "Subgroup size is less than 16";
// require at least a few ops so we only have a few conditional compilations
const VkSubgroupFeatureFlags requiredOps =
VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT |
VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT;
ops = subProps.supportedOperations;
if((subProps.supportedOperations & requiredOps) != requiredOps)
Avail = "Missing ops support";
// require all stages for simplicity
if((subProps.supportedStages & VK_SHADER_STAGE_VERTEX_BIT) == 0)
Avail = "Missing vertex subgroup support";
if((subProps.supportedStages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
Avail = "Missing pixel subgroup support";
if((subProps.supportedStages & VK_SHADER_STAGE_COMPUTE_BIT) == 0)
Avail = "Missing compute subgroup support";
}
int main()
{
// initialise, create window, create context, etc
if(!Init())
return 3;
VkDescriptorSetLayout setlayout = createDescriptorSetLayout(vkh::DescriptorSetLayoutCreateInfo({
{0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT},
}));
VkPipelineLayout layout = createPipelineLayout(vkh::PipelineLayoutCreateInfo(
{setlayout}, {vkh::PushConstantRange(VK_SHADER_STAGE_ALL, 0, 4)}));
const uint32_t imgDim = 128;
AllocatedImage img(
this,
vkh::ImageCreateInfo(imgDim, imgDim, 0, VK_FORMAT_R32G32B32A32_SFLOAT,
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT),
VmaAllocationCreateInfo({0, VMA_MEMORY_USAGE_GPU_ONLY}));
VkImageView imgview = createImageView(
vkh::ImageViewCreateInfo(img.image, VK_IMAGE_VIEW_TYPE_2D, VK_FORMAT_R32G32B32A32_SFLOAT));
vkh::RenderPassCreator renderPassCreateInfo;
renderPassCreateInfo.attachments.push_back(
vkh::AttachmentDescription(VK_FORMAT_R32G32B32A32_SFLOAT, VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_LAYOUT_GENERAL, VK_ATTACHMENT_LOAD_OP_CLEAR));
renderPassCreateInfo.addSubpass({VkAttachmentReference({0, VK_IMAGE_LAYOUT_GENERAL})});
VkRenderPass renderPass = createRenderPass(renderPassCreateInfo);
VkFramebuffer framebuffer =
createFramebuffer(vkh::FramebufferCreateInfo(renderPass, {imgview}, {imgDim, imgDim}));
vkh::GraphicsPipelineCreateInfo pipeCreateInfo;
pipeCreateInfo.renderPass = renderPass;
pipeCreateInfo.layout = layout;
pipeCreateInfo.inputAssemblyState.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
std::map<std::string, std::string> macros;
int vertTests = 0, pixTests = 0;
int numCompTests = 0;
{
size_t pos = 0;
while(pos != std::string::npos)
{
pos = pixel.find("IsTest(", pos);
if(pos == std::string::npos)
break;
pos += sizeof("IsTest(") - 1;
pixTests = std::max(pixTests, atoi(pixel.c_str() + pos) + 1);
}
pos = 0;
while(pos != std::string::npos)
{
pos = vertex.find("IsTest(", pos);
if(pos == std::string::npos)
break;
pos += sizeof("IsTest(") - 1;
vertTests = std::max(vertTests, atoi(vertex.c_str() + pos) + 1);
}
pos = 0;
while(pos != std::string::npos)
{
pos = comp.find("IsTest(", pos);
if(pos == std::string::npos)
break;
pos += sizeof("IsTest(") - 1;
numCompTests = std::max(numCompTests, atoi(comp.c_str() + pos) + 1);
}
}
const uint32_t numGraphicsTests = std::max(vertTests, pixTests);
if(ops & VK_SUBGROUP_FEATURE_SHUFFLE_BIT)
macros["FEAT_SHUFFLE"] = "1";
else
macros["FEAT_SHUFFLE"] = "0";
if(ops & VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT)
macros["FEAT_SHUFFLE_RELATIVE"] = "1";
else
macros["FEAT_SHUFFLE_RELATIVE"] = "0";
if(ops & VK_SUBGROUP_FEATURE_CLUSTERED_BIT)
macros["FEAT_CLUSTERED"] = "1";
else
macros["FEAT_CLUSTERED"] = "0";
if(ops & VK_SUBGROUP_FEATURE_QUAD_BIT)
macros["FEAT_QUAD"] = "1";
else
macros["FEAT_QUAD"] = "0";
if(ops & VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR)
macros["FEAT_ROTATE"] = "1";
else
macros["FEAT_ROTATE"] = "0";
if(ops & VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR)
macros["FEAT_ROTATE_CLUSTERED"] = "1";
else
macros["FEAT_ROTATE_CLUSTERED"] = "0";
pipeCreateInfo.stages = {
CompileShaderModule(vertex, ShaderLang::glsl, ShaderStage::vert, "main", macros,
SPIRVTarget::vulkan11),
CompileShaderModule(pixel, ShaderLang::glsl, ShaderStage::frag, "main", macros,
SPIRVTarget::vulkan11),
};
VkPipeline pipe = createGraphicsPipeline(pipeCreateInfo);
std::string comppipe_name[4];
VkPipeline comppipe[4];
macros["COMP_TESTS"] = fmt::format("{}", numCompTests);
macros["GROUP_SIZE_X"] = "256";
macros["GROUP_SIZE_Y"] = "1";
comppipe_name[0] = "256x1";
comppipe[0] = createComputePipeline(vkh::ComputePipelineCreateInfo(
layout, CompileShaderModule(comp, ShaderLang::glsl, ShaderStage::comp, "main", macros,
SPIRVTarget::vulkan11)));
macros["GROUP_SIZE_X"] = "128";
macros["GROUP_SIZE_Y"] = "2";
comppipe_name[1] = "128x2";
comppipe[1] = createComputePipeline(vkh::ComputePipelineCreateInfo(
layout, CompileShaderModule(comp, ShaderLang::glsl, ShaderStage::comp, "main", macros,
SPIRVTarget::vulkan11)));
macros["GROUP_SIZE_X"] = "8";
macros["GROUP_SIZE_Y"] = "128";
comppipe_name[2] = "8x128";
comppipe[2] = createComputePipeline(vkh::ComputePipelineCreateInfo(
layout, CompileShaderModule(comp, ShaderLang::glsl, ShaderStage::comp, "main", macros,
SPIRVTarget::vulkan11)));
macros["GROUP_SIZE_X"] = "150";
macros["GROUP_SIZE_Y"] = "1";
comppipe_name[3] = "150x1";
comppipe[3] = createComputePipeline(vkh::ComputePipelineCreateInfo(
layout, CompileShaderModule(comp, ShaderLang::glsl, ShaderStage::comp, "main", macros,
SPIRVTarget::vulkan11)));
AllocatedBuffer bufout(
this,
vkh::BufferCreateInfo(sizeof(Vec4f) * 1024 * numCompTests,
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT),
VmaAllocationCreateInfo({0, VMA_MEMORY_USAGE_CPU_TO_GPU}));
setName(bufout.buffer, "bufout");
VkDescriptorSet set = allocateDescriptorSet(setlayout);
vkh::updateDescriptorSets(
device, {vkh::WriteDescriptorSet(set, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
{vkh::DescriptorBufferInfo(bufout.buffer)})});
while(Running())
{
VkCommandBuffer cmd = GetCommandBuffer();
vkBeginCommandBuffer(cmd, vkh::CommandBufferBeginInfo());
VkImage swapimg =
StartUsingBackbuffer(cmd, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL);
vkh::cmdPipelineBarrier(
cmd, {vkh::ImageMemoryBarrier(VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_LAYOUT_GENERAL, img.image)});
vkh::cmdClearImage(cmd, swapimg, vkh::ClearColorValue(0.2f, 0.2f, 0.2f, 1.0f));
vkh::cmdPipelineBarrier(
cmd,
{vkh::ImageMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL, img.image)});
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipe);
VkViewport v = {};
v.maxDepth = 1.0f;
v.width = v.height = (float)imgDim;
VkRect2D s = {};
s.extent.width = s.extent.height = imgDim;
vkCmdSetViewport(cmd, 0, 1, &v);
vkCmdSetScissor(cmd, 0, 1, &s);
// separate render passes with a fat barrier before each to avoid subgroups crossing draws
pushMarker(cmd, "Graphics Tests");
for(uint32_t i = 0; i < numGraphicsTests; i++)
{
vkh::cmdPipelineBarrier(
cmd, {}, {},
{vkh::MemoryBarrier(VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT)});
vkCmdBeginRenderPass(
cmd,
vkh::RenderPassBeginInfo(renderPass, framebuffer, s,
{vkh::ClearValue(123456.0f, 789.0f, 101112.0f, 0.0f)}),
VK_SUBPASS_CONTENTS_INLINE);
vkh::cmdPushConstants(cmd, layout, i);
vkCmdDraw(cmd, 4, 1, 0, 0);
vkCmdEndRenderPass(cmd);
}
popMarker(cmd);
pushMarker(cmd, "Compute Tests");
for(size_t p = 0; p < ARRAY_COUNT(comppipe); p++)
{
vkh::cmdPipelineBarrier(
cmd, {},
{vkh::BufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT,
bufout.buffer, 0, sizeof(Vec4f) * 1024 * numCompTests)});
vkCmdFillBuffer(cmd, bufout.buffer, 0, sizeof(Vec4f) * 1024 * numCompTests, 0);
vkh::cmdPipelineBarrier(
cmd, {},
{vkh::BufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_WRITE_BIT,
bufout.buffer, 0, sizeof(Vec4f) * 1024 * numCompTests)});
pushMarker(cmd, comppipe_name[p]);
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, comppipe[p]);
vkh::cmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, {set}, {});
for(int i = 0; i < numCompTests; i++)
{
vkh::cmdPushConstants(cmd, layout, i);
vkCmdDispatch(cmd, 1, 1, 1);
}
popMarker(cmd);
}
popMarker(cmd);
FinishUsingBackbuffer(cmd, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL);
vkEndCommandBuffer(cmd);
SubmitAndPresent({cmd});
}
return 0;
}
};
REGISTER_TEST();
+1 -1
View File
@@ -313,7 +313,7 @@ def run_tests(test_include: str, test_exclude: str, in_process: bool, slow_tests
instance = testclass()
supported, unsupported_reason = instance.check_support()
supported, unsupported_reason = instance.check_support(test_include=test_include)
if not supported:
log.print("Skipping {} as {}".format(name, unsupported_reason))
+2 -2
View File
@@ -167,7 +167,7 @@ class TestCase:
def set_test_list(tests):
TestCase._test_list = tests
def check_support(self):
def check_support(self, **kwargs):
if self.demos_test_name != '':
if self.demos_test_name not in TestCase._test_list:
return False,'Test {} not in compiled tests'.format(self.demos_test_name)
@@ -680,7 +680,7 @@ class TestCase:
raise KeyError("Couldn't parse path {}".format(path))
def evaluate_source_var(self, sourceVar: rd.SourceVariableMapping, debugVars):
def evaluate_source_var(self, sourceVar: rd.SourceVariableMapping, debugVars) -> rd.ShaderVariable:
debugged = rd.ShaderVariable()
debugged.name = sourceVar.name
debugged.type = sourceVar.type
@@ -4,7 +4,7 @@ import rdtest
class D3D12_Multi_Wait_Before_Signal(rdtest.TestCase):
demos_test_name = 'D3D12_Multi_Wait_Before_Signal'
def check_support(self):
def check_support(self, **kwargs):
# TODO: Enable this if/when rdoc can reorder from the original submission
# order, which blocks multiple queues with waits that get signalled by
# later submissions to other queues.
+2 -2
View File
@@ -10,11 +10,11 @@ except ImportError as ex:
class D3D12_RGP_Capture(rdtest.TestCase):
demos_test_name = 'D3D12_Simple_Triangle'
def check_support(self):
def check_support(self, **kwargs):
if tkinter is None:
return False, 'tkinter is required but not available'
return super().check_support()
return super().check_support(**kwargs)
def check_capture(self):
apiprops: rd.APIProperties = self.controller.GetAPIProperties()
+233
View File
@@ -0,0 +1,233 @@
import renderdoc as rd
import struct
import rdtest
class VK_Subgroup_Zoo(rdtest.TestCase):
demos_test_name = 'VK_Subgroup_Zoo'
def check_support(self, **kwargs):
# Only allow this if explicitly run
if kwargs['test_include'] == 'VK_Subgroup_Zoo':
return True, ''
return False, 'Disabled test'
def check_capture(self):
graphics_tests = [a for a in self.find_action(
"Graphics Tests").children if a.flags & rd.ActionFlags.Drawcall]
compute_dims = [a for a in self.find_action(
"Compute Tests").children if 'x' in a.customName]
rdtest.log.begin_section("Graphics tests")
# instances to check in instanced draws
inst_checks = [0, 1, 5, 10]
# pixels to check
pixel_checks = [
# top quad
(0, 0), (1, 0), (0, 1), (1, 1),
# middle quad (away from triangle border)
(64, 56), (65, 56), (64, 57), (65, 57),
# middle quad (on triangle border)
(64, 64), (65, 64), (64, 65), (65, 65),
# middle quad on other triangle
(56, 64), (57, 64), (56, 65), (57, 65),
]
# threads to check. largest dimension only (all small dim checked)
thread_checks = [
# first few
0, 1, 2,
# near end of 32-subgroup and boundary
30, 31, 32,
# near end of 64-subgroup and boundary
62, 63, 64,
# near end of 64-subgroup and boundary
62, 63, 64,
# large values spaced out with one near the end of our unaligned size
100, 110, 120, 140, 149, 150, 160, 200, 250,
]
clear_col = (123456.0, 789.0, 101112.0, 0.0)
for idx, action in enumerate(graphics_tests):
self.controller.SetFrameEvent(action.eventId, False)
pipe = self.controller.GetPipelineState()
# check vertex output for every vertex
for inst in [inst for inst in inst_checks if inst < action.numInstances]:
for view in range(pipe.MultiviewBroadcastCount()):
postvs = self.get_postvs(
action, rd.MeshDataStage.VSOut, first_index=0, num_indices=action.numIndices, instance=inst)
for vtx in range(action.numIndices):
trace = self.controller.DebugVertex(
vtx, inst, vtx, view)
if trace.debugger is None:
self.controller.FreeTrace(trace)
rdtest.log.error(
f"Test {test} at {action.eventId} got no debug result at {vtx} inst {inst} view {view}")
return
_, variables = self.process_trace(trace)
for var in trace.sourceVars:
if var.name == 'vertdata':
name = var.name
if var.name not in postvs[vtx].keys():
rdtest.log.error(
f"Don't have expected output for {var.name}")
continue
real = postvs[vtx][name]
debugged = self.evaluate_source_var(
var, variables)
if debugged.columns != 4 or len(real) != 4:
rdtest.log.error(
f"Vertex output is not the right size ({len(real)} vs {debugged.columns})")
continue
if not rdtest.value_compare(real, debugged.value.f32v[0:4], eps=5.0E-06):
rdtest.log.error(
f"Test {idx} debugged vertex value {debugged.value.f32v[0:4]} at {vtx} instance {inst} view {view} does not match output {real}")
self.controller.FreeTrace(trace)
# check some assorted pixel outputs
target = pipe.GetOutputTargets()[0].resource
for pixel in pixel_checks:
for view in range(pipe.MultiviewBroadcastCount()):
x, y = pixel
picked = self.controller.PickPixel(
target, x, y, rd.Subresource(0, 0, 0), rd.CompType.Float)
real = picked.floatValue
# silently skip pixels that weren't written to
if real == clear_col:
continue
inputs = rd.DebugPixelInputs()
inputs.sample = 0
inputs.primitive = rd.ReplayController.NoPreference
inputs.view = view
trace = self.controller.DebugPixel(x, y, inputs)
if trace.debugger is None:
self.controller.FreeTrace(trace)
rdtest.log.error(
f"Test {test} at {action.eventId} got no debug result at {x},{y}")
continue
_, variables = self.process_trace(trace)
output_sourcevar = self.find_output_source_var(
trace, rd.ShaderBuiltin.ColorOutput, 0)
if output_sourcevar is None:
rdtest.log.error("No output variable found")
continue
debugged = self.evaluate_source_var(
output_sourcevar, variables)
self.controller.FreeTrace(trace)
debuggedValue = list(debugged.value.f32v[0:4])
if not rdtest.value_compare(real, debuggedValue, eps=5.0E-06):
rdtest.log.error(
f"Test {idx} debugged pixel value {debuggedValue} at {x},{y} in {view} does not match output {real}")
rdtest.log.success(f"Test {idx} successful")
rdtest.log.end_section("Graphics tests")
for comp_dim in compute_dims:
rdtest.log.begin_section(
f"Compute tests with {comp_dim.customName} workgroup")
compute_tests = [
a for a in comp_dim.children if a.flags & rd.ActionFlags.Dispatch]
for test, action in enumerate(compute_tests):
self.controller.SetFrameEvent(action.eventId, False)
pipe = self.controller.GetPipelineState()
csrefl = pipe.GetShaderReflection(rd.ShaderStage.Compute)
dim = csrefl.dispatchThreadsDimension
rw = pipe.GetReadWriteResources(rd.ShaderStage.Compute)
if len(rw) != 1:
rdtest.log.error("Unexpected number of RW resources")
continue
# each test writes up to 16k data, one vec4 per thread * up to 1024 threads
bufdata = self.controller.GetBufferData(
rw[0].descriptor.resource, test*16*1024, 16*1024)
for t in thread_checks:
xrange = 1
yrange = dim[1]
xbase = t
ybase = 0
# vertical orientation
if dim[1] > dim[0]:
xrange = dim[0]
yrange = 1
xbase = 0
ybase = t
for x in range(xbase, xbase+xrange):
for y in range(ybase, ybase+yrange):
z = 0
if x >= dim[0] or y >= dim[1]:
continue
real = struct.unpack_from(
"4f", bufdata, 16*y*dim[0] + 16*x)
trace = self.controller.DebugThread(
(0, 0, 0), (x, y, z))
_, variables = self.process_trace(trace)
if trace.debugger is None:
self.controller.FreeTrace(trace)
rdtest.log.error(
f"Test {test} at {action.eventId} got no debug result at {x},{y},{z}")
continue
sourceVars = [
v for v in trace.instInfo[-1].sourceVars if v.name == 'data']
if len(sourceVars) != 1:
rdtest.log.error(
"Couldn't find compute data variable")
continue
debugged = self.evaluate_source_var(
sourceVars[0], variables)
debuggedValue = list(debugged.value.f32v[0:4])
if not rdtest.value_compare(real, debuggedValue, eps=5.0E-06):
rdtest.log.error(
f"Test {test} at {action.eventId} debugged thread value {debuggedValue} at {x},{y},{z} does not match output {real}")
rdtest.log.success(f"Test {test} successful")
rdtest.log.end_section(
f"Compute tests with {comp_dim.customName} workgroup")