Add D3D12_Subgroup_Zoo test

This commit is contained in:
baldurk
2025-03-18 16:13:17 +00:00
parent 083af09397
commit 9bdb5bc134
5 changed files with 547 additions and 4 deletions
@@ -0,0 +1,306 @@
/******************************************************************************
* The MIT License (MIT)
*
* Copyright (c) 2019-2025 Baldur Karlsson
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
******************************************************************************/
#include "3rdparty/fmt/core.h"
#include "d3d12_test.h"
RD_TEST(D3D12_Subgroup_Zoo, D3D12GraphicsTest)
{
static constexpr const char *Description =
"Test of behaviour around subgroup operations in shaders.";
const std::string common = R"EOSHADER(
cbuffer rootconsts : register(b0)
{
uint root_test;
}
#define IsTest(x) (root_test == x)
)EOSHADER";
const std::string vertex = common + R"EOSHADER(
struct OUT
{
float4 pos : SV_Position;
float4 data : DATA;
};
OUT main(uint vert : SV_VertexID)
{
OUT ret = (OUT)0;
float2 positions[] = {
float2(-1.0f, 1.0f),
float2( 1.0f, 1.0f),
float2(-1.0f, -1.0f),
float2( 1.0f, -1.0f),
};
float scale = 1.0f;
if(IsTest(2))
scale = 0.2f;
ret.pos = float4(positions[vert]*float2(scale,scale), 0, 1);
ret.data = 0.0f.xxxx;
uint wave = WaveGetLaneIndex();
if(IsTest(0))
ret.data = float4(wave, 0, 0, 1);
else if(IsTest(3))
ret.data = float4(WaveActiveSum(wave), 0, 0, 0);
return ret;
}
)EOSHADER";
const std::string pixel = common + R"EOSHADER(
struct IN
{
float4 pos : SV_Position;
float4 data : DATA;
};
float4 main(IN input) : SV_Target0
{
uint wave = WaveGetLaneIndex();
float4 pixdata = 0.0f.xxxx;
if(IsTest(1) || IsTest(2))
pixdata = float4(wave, 0, 0, 1);
else if(IsTest(4))
pixdata = float4(WaveActiveSum(wave), 0, 0, 0);
return input.data + pixdata;
}
)EOSHADER";
const std::string comp = common + R"EOSHADER(
RWStructuredBuffer<float4> outbuf : register(u0);
[numthreads(GROUP_SIZE_X, GROUP_SIZE_Y, 1)]
void main(uint3 tid : SV_DispatchThreadID)
{
float4 data = 0.0f.xxxx;
uint wave = WaveGetLaneIndex();
if(IsTest(0))
data = float4(wave, 0, 0, 0);
else if(IsTest(1))
data = float4(WaveActiveSum(wave), 0, 0, 0);
outbuf[root_test * 1024 + tid.y * GROUP_SIZE_X + tid.x] = data;
}
)EOSHADER";
void Prepare(int argc, char **argv)
{
D3D12GraphicsTest::Prepare(argc, argv);
if(opts1.WaveLaneCountMax < 16)
Avail = "Subgroup size is less than 16";
}
int main()
{
// initialise, create window, create device, etc
if(!Init())
return 3;
ID3D12RootSignaturePtr sig = MakeSig({constParam(D3D12_SHADER_VISIBILITY_ALL, 0, 0, 1),
uavParam(D3D12_SHADER_VISIBILITY_ALL, 0, 0)});
const uint32_t imgDim = 128;
ID3D12ResourcePtr fltTex = MakeTexture(DXGI_FORMAT_R32G32B32A32_FLOAT, imgDim, imgDim)
.RTV()
.InitialState(D3D12_RESOURCE_STATE_RENDER_TARGET);
fltTex->SetName(L"fltTex");
D3D12_CPU_DESCRIPTOR_HANDLE fltRTV = MakeRTV(fltTex).CreateCPU(0);
D3D12_GPU_DESCRIPTOR_HANDLE fltSRV = MakeSRV(fltTex).CreateGPU(8);
int vertTests = 0, pixTests = 0;
int numCompTests = 0;
{
size_t pos = 0;
while(pos != std::string::npos)
{
pos = pixel.find("IsTest(", pos);
if(pos == std::string::npos)
break;
pos += sizeof("IsTest(") - 1;
pixTests = std::max(pixTests, atoi(pixel.c_str() + pos) + 1);
}
pos = 0;
while(pos != std::string::npos)
{
pos = vertex.find("IsTest(", pos);
if(pos == std::string::npos)
break;
pos += sizeof("IsTest(") - 1;
vertTests = std::max(vertTests, atoi(vertex.c_str() + pos) + 1);
}
pos = 0;
while(pos != std::string::npos)
{
pos = comp.find("IsTest(", pos);
if(pos == std::string::npos)
break;
pos += sizeof("IsTest(") - 1;
numCompTests = std::max(numCompTests, atoi(comp.c_str() + pos) + 1);
}
}
const uint32_t numGraphicsTests = std::max(vertTests, pixTests);
struct
{
int x, y;
} compsize[] = {
{256, 1},
{128, 2},
{8, 128},
{150, 1},
};
std::string comppipe_name[ARRAY_COUNT(compsize)];
ID3D12PipelineStatePtr comppipe[ARRAY_COUNT(compsize)];
std::string defines;
defines += fmt::format("#define COMP_TESTS {}\n", numCompTests);
defines += "\n";
ID3D12PipelineStatePtr graphics = MakePSO()
.RootSig(sig)
.VS(Compile(defines + vertex, "main", "vs_6_0"))
.PS(Compile(defines + pixel, "main", "ps_6_0"))
.RTVs({DXGI_FORMAT_R32G32B32A32_FLOAT});
for(int i = 0; i < ARRAY_COUNT(comppipe); i++)
{
std::string sizedefine;
sizedefine = fmt::format("#define GROUP_SIZE_X {}\n#define GROUP_SIZE_Y {}\n", compsize[i].x,
compsize[i].y);
comppipe_name[i] = fmt::format("{}x{}", compsize[i].x, compsize[i].y);
comppipe[i] =
MakePSO().RootSig(sig).CS(Compile(defines + sizedefine + comp, "main", "cs_6_0"));
comppipe[i]->SetName(UTF82Wide(comppipe_name[i]).c_str());
}
ID3D12ResourcePtr bufOut = MakeBuffer().Size(sizeof(Vec4f) * 1024 * numCompTests).UAV();
D3D12ViewCreator uavView =
MakeUAV(bufOut).Format(DXGI_FORMAT_R32_UINT).NumElements(4 * 1024 * numCompTests);
D3D12_CPU_DESCRIPTOR_HANDLE uavcpu = uavView.CreateClearCPU(10);
D3D12_GPU_DESCRIPTOR_HANDLE uavgpu = uavView.CreateGPU(10);
bufOut->SetName(L"bufOut");
while(Running())
{
ID3D12GraphicsCommandListPtr cmd = GetCommandBuffer();
Reset(cmd);
cmd->SetDescriptorHeaps(1, &m_CBVUAVSRV.GetInterfacePtr());
ID3D12ResourcePtr bb = StartUsingBackbuffer(cmd, D3D12_RESOURCE_STATE_RENDER_TARGET);
ClearRenderTargetView(cmd, BBRTV, {0.2f, 0.2f, 0.2f, 1.0f});
cmd->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
cmd->SetPipelineState(graphics);
cmd->SetGraphicsRootSignature(sig);
RSSetViewport(cmd, {0.0f, 0.0f, (float)imgDim, (float)imgDim, 0.0f, 1.0f});
RSSetScissorRect(cmd, {0, 0, imgDim, imgDim});
pushMarker(cmd, "Graphics Tests");
for(uint32_t i = 0; i < numGraphicsTests; i++)
{
ResourceBarrier(cmd);
OMSetRenderTargets(cmd, {fltRTV}, {});
ClearRenderTargetView(cmd, fltRTV, {123456.0f, 789.0f, 101112.0f, 0.0f});
cmd->SetGraphicsRoot32BitConstant(0, i, 0);
cmd->DrawInstanced(4, 1, 0, 0);
}
popMarker(cmd);
pushMarker(cmd, "Compute Tests");
for(size_t p = 0; p < ARRAY_COUNT(comppipe); p++)
{
ResourceBarrier(cmd);
UINT zero[4] = {};
cmd->ClearUnorderedAccessViewUint(uavgpu, uavcpu, bufOut, zero, 0, NULL);
ResourceBarrier(cmd);
pushMarker(cmd, comppipe_name[p]);
cmd->SetPipelineState(comppipe[p]);
cmd->SetComputeRootSignature(sig);
cmd->SetComputeRootUnorderedAccessView(1, bufOut->GetGPUVirtualAddress());
for(int i = 0; i < numCompTests; i++)
{
cmd->SetComputeRoot32BitConstant(0, i, 0);
cmd->Dispatch(1, 1, 1);
}
popMarker(cmd);
}
popMarker(cmd);
FinishUsingBackbuffer(cmd, D3D12_RESOURCE_STATE_RENDER_TARGET);
cmd->Close();
SubmitAndPresent({cmd});
}
return 0;
}
};
REGISTER_TEST();
+1
View File
@@ -222,6 +222,7 @@
<ClCompile Include="d3d12\d3d12_sharing.cpp" />
<ClCompile Include="d3d12\d3d12_simple_dispatch.cpp" />
<ClCompile Include="d3d12\d3d12_simple_triangle.cpp" />
<ClCompile Include="d3d12\d3d12_subgroup_zoo.cpp" />
<ClCompile Include="d3d12\d3d12_swapchain_zoo.cpp" />
<ClCompile Include="d3d12\d3d12_template.cpp" />
<ClCompile Include="d3d12\d3d12_test.cpp" />
+3
View File
@@ -712,6 +712,9 @@
<ClCompile Include="d3d12\d3d12_shader_debugdata_zoo.cpp">
<Filter>D3D12\demos</Filter>
</ClCompile>
<ClCompile Include="d3d12\d3d12_subgroup_zoo.cpp">
<Filter>D3D12\demos</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<Filter Include="D3D11">
+233
View File
@@ -0,0 +1,233 @@
import renderdoc as rd
import struct
import rdtest
class D3D12_Subgroup_Zoo(rdtest.TestCase):
demos_test_name = 'D3D12_Subgroup_Zoo'
def check_support(self, **kwargs):
# Only allow this if explicitly run
if kwargs['test_include'] == 'D3D12_Subgroup_Zoo':
return True, ''
return False, 'Disabled test'
def check_capture(self):
graphics_tests = [a for a in self.find_action(
"Graphics Tests").children if a.flags & rd.ActionFlags.Drawcall]
compute_dims = [a for a in self.find_action(
"Compute Tests").children if 'x' in a.customName]
rdtest.log.begin_section("Graphics tests")
# instances to check in instanced draws
inst_checks = [0, 1, 5, 10]
# pixels to check
pixel_checks = [
# top quad
(0, 0), (1, 0), (0, 1), (1, 1),
# middle quad (away from triangle border)
(64, 56), (65, 56), (64, 57), (65, 57),
# middle quad (on triangle border)
(64, 64), (65, 64), (64, 65), (65, 65),
# middle quad on other triangle
(56, 64), (57, 64), (56, 65), (57, 65),
]
# threads to check. largest dimension only (all small dim checked)
thread_checks = [
# first few
0, 1, 2,
# near end of 32-subgroup and boundary
30, 31, 32,
# near end of 64-subgroup and boundary
62, 63, 64,
# near end of 64-subgroup and boundary
62, 63, 64,
# large values spaced out with one near the end of our unaligned size
100, 110, 120, 140, 149, 150, 160, 200, 250,
]
clear_col = (123456.0, 789.0, 101112.0, 0.0)
for idx, action in enumerate(graphics_tests):
self.controller.SetFrameEvent(action.eventId, False)
pipe = self.controller.GetPipelineState()
# check vertex output for every vertex
for inst in [inst for inst in inst_checks if inst < action.numInstances]:
for view in range(pipe.MultiviewBroadcastCount()):
postvs = self.get_postvs(
action, rd.MeshDataStage.VSOut, first_index=0, num_indices=action.numIndices, instance=inst)
for vtx in range(action.numIndices):
trace = self.controller.DebugVertex(
vtx, inst, vtx, view)
if trace.debugger is None:
self.controller.FreeTrace(trace)
rdtest.log.error(
f"Test {idx} at {action.eventId} got no debug result at {vtx} inst {inst} view {view}")
return
_, variables = self.process_trace(trace)
for var in trace.sourceVars:
if var.name == 'vertdata':
name = var.name
if var.name not in postvs[vtx].keys():
rdtest.log.error(
f"Don't have expected output for {var.name}")
continue
real = postvs[vtx][name]
debugged = self.evaluate_source_var(
var, variables)
if debugged.columns != 4 or len(real) != 4:
rdtest.log.error(
f"Vertex output is not the right size ({len(real)} vs {debugged.columns})")
continue
if not rdtest.value_compare(real, debugged.value.f32v[0:4], eps=5.0E-06):
rdtest.log.error(
f"Test {idx} at {action.eventId} debugged vertex value {debugged.value.f32v[0:4]} at {vtx} instance {inst} view {view} does not match output {real}")
self.controller.FreeTrace(trace)
# check some assorted pixel outputs
target = pipe.GetOutputTargets()[0].resource
for pixel in pixel_checks:
for view in range(pipe.MultiviewBroadcastCount()):
x, y = pixel
picked = self.controller.PickPixel(
target, x, y, rd.Subresource(0, 0, 0), rd.CompType.Float)
real = picked.floatValue
# silently skip pixels that weren't written to
if real == clear_col:
continue
inputs = rd.DebugPixelInputs()
inputs.sample = 0
inputs.primitive = rd.ReplayController.NoPreference
inputs.view = view
trace = self.controller.DebugPixel(x, y, inputs)
if trace.debugger is None:
self.controller.FreeTrace(trace)
rdtest.log.error(
f"Test {idx} at {action.eventId} got no debug result at {x},{y}")
continue
_, variables = self.process_trace(trace)
output_sourcevar = self.find_output_source_var(
trace, rd.ShaderBuiltin.ColorOutput, 0)
if output_sourcevar is None:
rdtest.log.error("No output variable found")
continue
debugged = self.evaluate_source_var(
output_sourcevar, variables)
self.controller.FreeTrace(trace)
debuggedValue = list(debugged.value.f32v[0:4])
if not rdtest.value_compare(real, debuggedValue, eps=5.0E-06):
rdtest.log.error(
f"Test {idx} at {action.eventId} debugged pixel value {debuggedValue} at {x},{y} in {view} does not match output {real}")
rdtest.log.success(f"Test {idx} successful")
rdtest.log.end_section("Graphics tests")
for comp_dim in compute_dims:
rdtest.log.begin_section(
f"Compute tests with {comp_dim.customName} workgroup")
compute_tests = [
a for a in comp_dim.children if a.flags & rd.ActionFlags.Dispatch]
for test, action in enumerate(compute_tests):
self.controller.SetFrameEvent(action.eventId, False)
pipe = self.controller.GetPipelineState()
csrefl = pipe.GetShaderReflection(rd.ShaderStage.Compute)
dim = csrefl.dispatchThreadsDimension
rw = pipe.GetReadWriteResources(rd.ShaderStage.Compute)
if len(rw) != 1:
rdtest.log.error("Unexpected number of RW resources")
continue
# each test writes up to 16k data, one vec4 per thread * up to 1024 threads
bufdata = self.controller.GetBufferData(
rw[0].descriptor.resource, test*16*1024, 16*1024)
for t in thread_checks:
xrange = 1
yrange = dim[1]
xbase = t
ybase = 0
# vertical orientation
if dim[1] > dim[0]:
xrange = dim[0]
yrange = 1
xbase = 0
ybase = t
for x in range(xbase, xbase+xrange):
for y in range(ybase, ybase+yrange):
z = 0
if x >= dim[0] or y >= dim[1]:
continue
real = struct.unpack_from(
"4f", bufdata, 16*y*dim[0] + 16*x)
trace = self.controller.DebugThread(
(0, 0, 0), (x, y, z))
_, variables = self.process_trace(trace)
if trace.debugger is None:
self.controller.FreeTrace(trace)
rdtest.log.error(
f"Test {test} at {action.eventId} got no debug result at {x},{y},{z}")
continue
sourceVars = [
v for v in trace.instInfo[-1].sourceVars if v.name == 'data']
if len(sourceVars) != 1:
rdtest.log.error(
"Couldn't find compute data variable")
continue
debugged = self.evaluate_source_var(
sourceVars[0], variables)
debuggedValue = list(debugged.value.f32v[0:4])
if not rdtest.value_compare(real, debuggedValue, eps=5.0E-06):
rdtest.log.error(
f"Test {test} at {action.eventId} debugged thread value {debuggedValue} at {x},{y},{z} does not match output {real}")
rdtest.log.success(f"Test {test} successful")
rdtest.log.end_section(
f"Compute tests with {comp_dim.customName} workgroup")
+4 -4
View File
@@ -68,7 +68,7 @@ class VK_Subgroup_Zoo(rdtest.TestCase):
self.controller.FreeTrace(trace)
rdtest.log.error(
f"Test {test} at {action.eventId} got no debug result at {vtx} inst {inst} view {view}")
f"Test {idx} at {action.eventId} got no debug result at {vtx} inst {inst} view {view}")
return
_, variables = self.process_trace(trace)
@@ -93,7 +93,7 @@ class VK_Subgroup_Zoo(rdtest.TestCase):
if not rdtest.value_compare(real, debugged.value.f32v[0:4], eps=5.0E-06):
rdtest.log.error(
f"Test {idx} debugged vertex value {debugged.value.f32v[0:4]} at {vtx} instance {inst} view {view} does not match output {real}")
f"{idx} at {action.eventId} debugged vertex value {debugged.value.f32v[0:4]} at {vtx} instance {inst} view {view} does not match output {real}")
self.controller.FreeTrace(trace)
@@ -123,7 +123,7 @@ class VK_Subgroup_Zoo(rdtest.TestCase):
self.controller.FreeTrace(trace)
rdtest.log.error(
f"Test {test} at {action.eventId} got no debug result at {x},{y}")
f"Test {idx} at {action.eventId} got no debug result at {x},{y}")
continue
_, variables = self.process_trace(trace)
@@ -144,7 +144,7 @@ class VK_Subgroup_Zoo(rdtest.TestCase):
if not rdtest.value_compare(real, debuggedValue, eps=5.0E-06):
rdtest.log.error(
f"Test {idx} debugged pixel value {debuggedValue} at {x},{y} in {view} does not match output {real}")
f"Test {idx} at {action.eventId} debugged pixel value {debuggedValue} at {x},{y} in {view} does not match output {real}")
rdtest.log.success(f"Test {idx} successful")