From 9bdb5bc1346b441d4e9a31e65047581507e5ac3e Mon Sep 17 00:00:00 2001 From: baldurk Date: Tue, 18 Mar 2025 16:13:17 +0000 Subject: [PATCH] Add D3D12_Subgroup_Zoo test --- util/test/demos/d3d12/d3d12_subgroup_zoo.cpp | 306 +++++++++++++++++++ util/test/demos/demos.vcxproj | 1 + util/test/demos/demos.vcxproj.filters | 3 + util/test/tests/D3D12/D3D12_Subgroup_Zoo.py | 233 ++++++++++++++ util/test/tests/Vulkan/VK_Subgroup_Zoo.py | 8 +- 5 files changed, 547 insertions(+), 4 deletions(-) create mode 100644 util/test/demos/d3d12/d3d12_subgroup_zoo.cpp create mode 100644 util/test/tests/D3D12/D3D12_Subgroup_Zoo.py diff --git a/util/test/demos/d3d12/d3d12_subgroup_zoo.cpp b/util/test/demos/d3d12/d3d12_subgroup_zoo.cpp new file mode 100644 index 000000000..147439353 --- /dev/null +++ b/util/test/demos/d3d12/d3d12_subgroup_zoo.cpp @@ -0,0 +1,306 @@ +/****************************************************************************** + * The MIT License (MIT) + * + * Copyright (c) 2019-2025 Baldur Karlsson + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + ******************************************************************************/ + +#include "3rdparty/fmt/core.h" +#include "d3d12_test.h" + +RD_TEST(D3D12_Subgroup_Zoo, D3D12GraphicsTest) +{ + static constexpr const char *Description = + "Test of behaviour around subgroup operations in shaders."; + + const std::string common = R"EOSHADER( + +cbuffer rootconsts : register(b0) +{ + uint root_test; +} + +#define IsTest(x) (root_test == x) + +)EOSHADER"; + + const std::string vertex = common + R"EOSHADER( + +struct OUT +{ + float4 pos : SV_Position; + float4 data : DATA; +}; + +OUT main(uint vert : SV_VertexID) +{ + OUT ret = (OUT)0; + + float2 positions[] = { + float2(-1.0f, 1.0f), + float2( 1.0f, 1.0f), + float2(-1.0f, -1.0f), + float2( 1.0f, -1.0f), + }; + + float scale = 1.0f; + if(IsTest(2)) + scale = 0.2f; + + ret.pos = float4(positions[vert]*float2(scale,scale), 0, 1); + + ret.data = 0.0f.xxxx; + + uint wave = WaveGetLaneIndex(); + + if(IsTest(0)) + ret.data = float4(wave, 0, 0, 1); + else if(IsTest(3)) + ret.data = float4(WaveActiveSum(wave), 0, 0, 0); + + return ret; +} + +)EOSHADER"; + + const std::string pixel = common + R"EOSHADER( + +struct IN +{ + float4 pos : SV_Position; + float4 data : DATA; +}; + +float4 main(IN input) : SV_Target0 +{ + uint wave = WaveGetLaneIndex(); + + float4 pixdata = 0.0f.xxxx; + + if(IsTest(1) || IsTest(2)) + pixdata = float4(wave, 0, 0, 1); + else if(IsTest(4)) + pixdata = float4(WaveActiveSum(wave), 0, 0, 0); + + return input.data + pixdata; +} + +)EOSHADER"; + + const std::string comp = common + R"EOSHADER( + +RWStructuredBuffer outbuf : register(u0); + +[numthreads(GROUP_SIZE_X, GROUP_SIZE_Y, 1)] +void main(uint3 tid : SV_DispatchThreadID) +{ + float4 data = 0.0f.xxxx; + + uint wave = WaveGetLaneIndex(); + + if(IsTest(0)) + data = float4(wave, 0, 0, 0); + else if(IsTest(1)) + data = float4(WaveActiveSum(wave), 0, 0, 0); + + outbuf[root_test * 1024 + tid.y * GROUP_SIZE_X + tid.x] = data; +} + +)EOSHADER"; + + void Prepare(int argc, char **argv) + { + D3D12GraphicsTest::Prepare(argc, argv); + + if(opts1.WaveLaneCountMax < 16) + Avail = "Subgroup size is less than 16"; + } + + int main() + { + // initialise, create window, create device, etc + if(!Init()) + return 3; + + ID3D12RootSignaturePtr sig = MakeSig({constParam(D3D12_SHADER_VISIBILITY_ALL, 0, 0, 1), + uavParam(D3D12_SHADER_VISIBILITY_ALL, 0, 0)}); + + const uint32_t imgDim = 128; + + ID3D12ResourcePtr fltTex = MakeTexture(DXGI_FORMAT_R32G32B32A32_FLOAT, imgDim, imgDim) + .RTV() + .InitialState(D3D12_RESOURCE_STATE_RENDER_TARGET); + fltTex->SetName(L"fltTex"); + D3D12_CPU_DESCRIPTOR_HANDLE fltRTV = MakeRTV(fltTex).CreateCPU(0); + D3D12_GPU_DESCRIPTOR_HANDLE fltSRV = MakeSRV(fltTex).CreateGPU(8); + + int vertTests = 0, pixTests = 0; + int numCompTests = 0; + + { + size_t pos = 0; + while(pos != std::string::npos) + { + pos = pixel.find("IsTest(", pos); + if(pos == std::string::npos) + break; + pos += sizeof("IsTest(") - 1; + pixTests = std::max(pixTests, atoi(pixel.c_str() + pos) + 1); + } + + pos = 0; + while(pos != std::string::npos) + { + pos = vertex.find("IsTest(", pos); + if(pos == std::string::npos) + break; + pos += sizeof("IsTest(") - 1; + vertTests = std::max(vertTests, atoi(vertex.c_str() + pos) + 1); + } + + pos = 0; + while(pos != std::string::npos) + { + pos = comp.find("IsTest(", pos); + if(pos == std::string::npos) + break; + pos += sizeof("IsTest(") - 1; + numCompTests = std::max(numCompTests, atoi(comp.c_str() + pos) + 1); + } + } + + const uint32_t numGraphicsTests = std::max(vertTests, pixTests); + + struct + { + int x, y; + } compsize[] = { + {256, 1}, + {128, 2}, + {8, 128}, + {150, 1}, + }; + std::string comppipe_name[ARRAY_COUNT(compsize)]; + ID3D12PipelineStatePtr comppipe[ARRAY_COUNT(compsize)]; + + std::string defines; + defines += fmt::format("#define COMP_TESTS {}\n", numCompTests); + defines += "\n"; + + ID3D12PipelineStatePtr graphics = MakePSO() + .RootSig(sig) + .VS(Compile(defines + vertex, "main", "vs_6_0")) + .PS(Compile(defines + pixel, "main", "ps_6_0")) + .RTVs({DXGI_FORMAT_R32G32B32A32_FLOAT}); + + for(int i = 0; i < ARRAY_COUNT(comppipe); i++) + { + std::string sizedefine; + sizedefine = fmt::format("#define GROUP_SIZE_X {}\n#define GROUP_SIZE_Y {}\n", compsize[i].x, + compsize[i].y); + comppipe_name[i] = fmt::format("{}x{}", compsize[i].x, compsize[i].y); + + comppipe[i] = + MakePSO().RootSig(sig).CS(Compile(defines + sizedefine + comp, "main", "cs_6_0")); + comppipe[i]->SetName(UTF82Wide(comppipe_name[i]).c_str()); + } + + ID3D12ResourcePtr bufOut = MakeBuffer().Size(sizeof(Vec4f) * 1024 * numCompTests).UAV(); + D3D12ViewCreator uavView = + MakeUAV(bufOut).Format(DXGI_FORMAT_R32_UINT).NumElements(4 * 1024 * numCompTests); + D3D12_CPU_DESCRIPTOR_HANDLE uavcpu = uavView.CreateClearCPU(10); + D3D12_GPU_DESCRIPTOR_HANDLE uavgpu = uavView.CreateGPU(10); + + bufOut->SetName(L"bufOut"); + + while(Running()) + { + ID3D12GraphicsCommandListPtr cmd = GetCommandBuffer(); + + Reset(cmd); + + cmd->SetDescriptorHeaps(1, &m_CBVUAVSRV.GetInterfacePtr()); + + ID3D12ResourcePtr bb = StartUsingBackbuffer(cmd, D3D12_RESOURCE_STATE_RENDER_TARGET); + + ClearRenderTargetView(cmd, BBRTV, {0.2f, 0.2f, 0.2f, 1.0f}); + + cmd->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + + cmd->SetPipelineState(graphics); + cmd->SetGraphicsRootSignature(sig); + + RSSetViewport(cmd, {0.0f, 0.0f, (float)imgDim, (float)imgDim, 0.0f, 1.0f}); + RSSetScissorRect(cmd, {0, 0, imgDim, imgDim}); + + pushMarker(cmd, "Graphics Tests"); + + for(uint32_t i = 0; i < numGraphicsTests; i++) + { + ResourceBarrier(cmd); + + OMSetRenderTargets(cmd, {fltRTV}, {}); + ClearRenderTargetView(cmd, fltRTV, {123456.0f, 789.0f, 101112.0f, 0.0f}); + + cmd->SetGraphicsRoot32BitConstant(0, i, 0); + cmd->DrawInstanced(4, 1, 0, 0); + } + + popMarker(cmd); + + pushMarker(cmd, "Compute Tests"); + + for(size_t p = 0; p < ARRAY_COUNT(comppipe); p++) + { + ResourceBarrier(cmd); + + UINT zero[4] = {}; + cmd->ClearUnorderedAccessViewUint(uavgpu, uavcpu, bufOut, zero, 0, NULL); + + ResourceBarrier(cmd); + pushMarker(cmd, comppipe_name[p]); + + cmd->SetPipelineState(comppipe[p]); + cmd->SetComputeRootSignature(sig); + cmd->SetComputeRootUnorderedAccessView(1, bufOut->GetGPUVirtualAddress()); + + for(int i = 0; i < numCompTests; i++) + { + cmd->SetComputeRoot32BitConstant(0, i, 0); + cmd->Dispatch(1, 1, 1); + } + + popMarker(cmd); + } + + popMarker(cmd); + + FinishUsingBackbuffer(cmd, D3D12_RESOURCE_STATE_RENDER_TARGET); + + cmd->Close(); + + SubmitAndPresent({cmd}); + } + + return 0; + } +}; + +REGISTER_TEST(); diff --git a/util/test/demos/demos.vcxproj b/util/test/demos/demos.vcxproj index bf3d1358b..91ca55a7f 100644 --- a/util/test/demos/demos.vcxproj +++ b/util/test/demos/demos.vcxproj @@ -222,6 +222,7 @@ + diff --git a/util/test/demos/demos.vcxproj.filters b/util/test/demos/demos.vcxproj.filters index 26838c3d3..9d7afe106 100644 --- a/util/test/demos/demos.vcxproj.filters +++ b/util/test/demos/demos.vcxproj.filters @@ -712,6 +712,9 @@ D3D12\demos + + D3D12\demos + diff --git a/util/test/tests/D3D12/D3D12_Subgroup_Zoo.py b/util/test/tests/D3D12/D3D12_Subgroup_Zoo.py new file mode 100644 index 000000000..e4ae84c9b --- /dev/null +++ b/util/test/tests/D3D12/D3D12_Subgroup_Zoo.py @@ -0,0 +1,233 @@ +import renderdoc as rd +import struct +import rdtest + + +class D3D12_Subgroup_Zoo(rdtest.TestCase): + demos_test_name = 'D3D12_Subgroup_Zoo' + + def check_support(self, **kwargs): + # Only allow this if explicitly run + if kwargs['test_include'] == 'D3D12_Subgroup_Zoo': + return True, '' + return False, 'Disabled test' + + def check_capture(self): + graphics_tests = [a for a in self.find_action( + "Graphics Tests").children if a.flags & rd.ActionFlags.Drawcall] + compute_dims = [a for a in self.find_action( + "Compute Tests").children if 'x' in a.customName] + + rdtest.log.begin_section("Graphics tests") + + # instances to check in instanced draws + inst_checks = [0, 1, 5, 10] + # pixels to check + pixel_checks = [ + # top quad + (0, 0), (1, 0), (0, 1), (1, 1), + # middle quad (away from triangle border) + (64, 56), (65, 56), (64, 57), (65, 57), + # middle quad (on triangle border) + (64, 64), (65, 64), (64, 65), (65, 65), + # middle quad on other triangle + (56, 64), (57, 64), (56, 65), (57, 65), + ] + # threads to check. largest dimension only (all small dim checked) + thread_checks = [ + # first few + 0, 1, 2, + # near end of 32-subgroup and boundary + 30, 31, 32, + # near end of 64-subgroup and boundary + 62, 63, 64, + # near end of 64-subgroup and boundary + 62, 63, 64, + # large values spaced out with one near the end of our unaligned size + 100, 110, 120, 140, 149, 150, 160, 200, 250, + ] + clear_col = (123456.0, 789.0, 101112.0, 0.0) + + for idx, action in enumerate(graphics_tests): + self.controller.SetFrameEvent(action.eventId, False) + + pipe = self.controller.GetPipelineState() + + # check vertex output for every vertex + for inst in [inst for inst in inst_checks if inst < action.numInstances]: + for view in range(pipe.MultiviewBroadcastCount()): + + postvs = self.get_postvs( + action, rd.MeshDataStage.VSOut, first_index=0, num_indices=action.numIndices, instance=inst) + + for vtx in range(action.numIndices): + trace = self.controller.DebugVertex( + vtx, inst, vtx, view) + + if trace.debugger is None: + self.controller.FreeTrace(trace) + + rdtest.log.error( + f"Test {idx} at {action.eventId} got no debug result at {vtx} inst {inst} view {view}") + return + + _, variables = self.process_trace(trace) + + for var in trace.sourceVars: + if var.name == 'vertdata': + name = var.name + + if var.name not in postvs[vtx].keys(): + rdtest.log.error( + f"Don't have expected output for {var.name}") + continue + + real = postvs[vtx][name] + debugged = self.evaluate_source_var( + var, variables) + + if debugged.columns != 4 or len(real) != 4: + rdtest.log.error( + f"Vertex output is not the right size ({len(real)} vs {debugged.columns})") + continue + + if not rdtest.value_compare(real, debugged.value.f32v[0:4], eps=5.0E-06): + rdtest.log.error( + f"Test {idx} at {action.eventId} debugged vertex value {debugged.value.f32v[0:4]} at {vtx} instance {inst} view {view} does not match output {real}") + + self.controller.FreeTrace(trace) + + # check some assorted pixel outputs + target = pipe.GetOutputTargets()[0].resource + + for pixel in pixel_checks: + for view in range(pipe.MultiviewBroadcastCount()): + x, y = pixel + + picked = self.controller.PickPixel( + target, x, y, rd.Subresource(0, 0, 0), rd.CompType.Float) + + real = picked.floatValue + + # silently skip pixels that weren't written to + if real == clear_col: + continue + + inputs = rd.DebugPixelInputs() + inputs.sample = 0 + inputs.primitive = rd.ReplayController.NoPreference + inputs.view = view + trace = self.controller.DebugPixel(x, y, inputs) + + if trace.debugger is None: + self.controller.FreeTrace(trace) + + rdtest.log.error( + f"Test {idx} at {action.eventId} got no debug result at {x},{y}") + continue + + _, variables = self.process_trace(trace) + + output_sourcevar = self.find_output_source_var( + trace, rd.ShaderBuiltin.ColorOutput, 0) + + if output_sourcevar is None: + rdtest.log.error("No output variable found") + continue + + debugged = self.evaluate_source_var( + output_sourcevar, variables) + + self.controller.FreeTrace(trace) + + debuggedValue = list(debugged.value.f32v[0:4]) + + if not rdtest.value_compare(real, debuggedValue, eps=5.0E-06): + rdtest.log.error( + f"Test {idx} at {action.eventId} debugged pixel value {debuggedValue} at {x},{y} in {view} does not match output {real}") + + rdtest.log.success(f"Test {idx} successful") + + rdtest.log.end_section("Graphics tests") + + for comp_dim in compute_dims: + rdtest.log.begin_section( + f"Compute tests with {comp_dim.customName} workgroup") + + compute_tests = [ + a for a in comp_dim.children if a.flags & rd.ActionFlags.Dispatch] + + for test, action in enumerate(compute_tests): + self.controller.SetFrameEvent(action.eventId, False) + + pipe = self.controller.GetPipelineState() + csrefl = pipe.GetShaderReflection(rd.ShaderStage.Compute) + + dim = csrefl.dispatchThreadsDimension + + rw = pipe.GetReadWriteResources(rd.ShaderStage.Compute) + + if len(rw) != 1: + rdtest.log.error("Unexpected number of RW resources") + continue + + # each test writes up to 16k data, one vec4 per thread * up to 1024 threads + bufdata = self.controller.GetBufferData( + rw[0].descriptor.resource, test*16*1024, 16*1024) + + for t in thread_checks: + xrange = 1 + yrange = dim[1] + xbase = t + ybase = 0 + + # vertical orientation + if dim[1] > dim[0]: + xrange = dim[0] + yrange = 1 + xbase = 0 + ybase = t + + for x in range(xbase, xbase+xrange): + for y in range(ybase, ybase+yrange): + z = 0 + + if x >= dim[0] or y >= dim[1]: + continue + + real = struct.unpack_from( + "4f", bufdata, 16*y*dim[0] + 16*x) + + trace = self.controller.DebugThread( + (0, 0, 0), (x, y, z)) + + _, variables = self.process_trace(trace) + + if trace.debugger is None: + self.controller.FreeTrace(trace) + + rdtest.log.error( + f"Test {test} at {action.eventId} got no debug result at {x},{y},{z}") + continue + + sourceVars = [ + v for v in trace.instInfo[-1].sourceVars if v.name == 'data'] + + if len(sourceVars) != 1: + rdtest.log.error( + "Couldn't find compute data variable") + continue + + debugged = self.evaluate_source_var( + sourceVars[0], variables) + + debuggedValue = list(debugged.value.f32v[0:4]) + + if not rdtest.value_compare(real, debuggedValue, eps=5.0E-06): + rdtest.log.error( + f"Test {test} at {action.eventId} debugged thread value {debuggedValue} at {x},{y},{z} does not match output {real}") + + rdtest.log.success(f"Test {test} successful") + + rdtest.log.end_section( + f"Compute tests with {comp_dim.customName} workgroup") diff --git a/util/test/tests/Vulkan/VK_Subgroup_Zoo.py b/util/test/tests/Vulkan/VK_Subgroup_Zoo.py index 9047886a5..9771a9348 100644 --- a/util/test/tests/Vulkan/VK_Subgroup_Zoo.py +++ b/util/test/tests/Vulkan/VK_Subgroup_Zoo.py @@ -68,7 +68,7 @@ class VK_Subgroup_Zoo(rdtest.TestCase): self.controller.FreeTrace(trace) rdtest.log.error( - f"Test {test} at {action.eventId} got no debug result at {vtx} inst {inst} view {view}") + f"Test {idx} at {action.eventId} got no debug result at {vtx} inst {inst} view {view}") return _, variables = self.process_trace(trace) @@ -93,7 +93,7 @@ class VK_Subgroup_Zoo(rdtest.TestCase): if not rdtest.value_compare(real, debugged.value.f32v[0:4], eps=5.0E-06): rdtest.log.error( - f"Test {idx} debugged vertex value {debugged.value.f32v[0:4]} at {vtx} instance {inst} view {view} does not match output {real}") + f"{idx} at {action.eventId} debugged vertex value {debugged.value.f32v[0:4]} at {vtx} instance {inst} view {view} does not match output {real}") self.controller.FreeTrace(trace) @@ -123,7 +123,7 @@ class VK_Subgroup_Zoo(rdtest.TestCase): self.controller.FreeTrace(trace) rdtest.log.error( - f"Test {test} at {action.eventId} got no debug result at {x},{y}") + f"Test {idx} at {action.eventId} got no debug result at {x},{y}") continue _, variables = self.process_trace(trace) @@ -144,7 +144,7 @@ class VK_Subgroup_Zoo(rdtest.TestCase): if not rdtest.value_compare(real, debuggedValue, eps=5.0E-06): rdtest.log.error( - f"Test {idx} debugged pixel value {debuggedValue} at {x},{y} in {view} does not match output {real}") + f"Test {idx} at {action.eventId} debugged pixel value {debuggedValue} at {x},{y} in {view} does not match output {real}") rdtest.log.success(f"Test {idx} successful")