mirror of
https://github.com/baldurk/renderdoc.git
synced 2026-05-04 17:10:47 +00:00
Added VK_Workgroup_Zoo, D3D12_Workgroup_Zoo tests
Tests specifically aimed at workgroup debugging i.e GSM and non-aligned subgroups Not focused on unit tests of subgroup/quad instructions that is handled by *_Subgroup_Zoo
This commit is contained in:
@@ -165,7 +165,8 @@ set(VULKAN_SRC
|
||||
vk/vk_validation_use.cpp
|
||||
vk/vk_vertex_attr_zoo.cpp
|
||||
vk/vk_video_textures.cpp
|
||||
vk/vk_vs_max_desc_set.cpp)
|
||||
vk/vk_vs_max_desc_set.cpp
|
||||
vk/vk_workgroup_zoo.cpp)
|
||||
|
||||
set(OPENGL_SRC
|
||||
3rdparty/glad/glad.c
|
||||
|
||||
@@ -57,7 +57,7 @@ RWStructuredBuffer<float4> outbuf : register(u0);
|
||||
|
||||
static uint3 tid;
|
||||
|
||||
void SetOuput(float4 data)
|
||||
void SetOutput(float4 data)
|
||||
{
|
||||
outbuf[root_test * 1024 + tid.y * GROUP_SIZE_X + tid.x] = data;
|
||||
}
|
||||
@@ -221,7 +221,7 @@ void main(uint3 inTid : SV_DispatchThreadID)
|
||||
|
||||
uint id = WaveGetLaneIndex();
|
||||
|
||||
SetOuput(id);
|
||||
SetOutput(id);
|
||||
|
||||
if(IsTest(0))
|
||||
{
|
||||
@@ -297,7 +297,7 @@ void main(uint3 inTid : SV_DispatchThreadID)
|
||||
if (id < 10)
|
||||
{
|
||||
data.x = WaveActiveSum(id+10);
|
||||
SetOuput(data);
|
||||
SetOutput(data);
|
||||
return;
|
||||
}
|
||||
data.x = WaveActiveSum(id);
|
||||
@@ -402,7 +402,7 @@ void main(uint3 inTid : SV_DispatchThreadID)
|
||||
data.w = float(WaveActiveAllEqual(test4).w);
|
||||
}
|
||||
}
|
||||
SetOuput(data);
|
||||
SetOutput(data);
|
||||
}
|
||||
|
||||
)EOSHADER";
|
||||
@@ -417,7 +417,7 @@ void main(uint3 inTid : SV_DispatchThreadID)
|
||||
|
||||
uint id = WaveGetLaneIndex();
|
||||
|
||||
SetOuput(id);
|
||||
SetOutput(id);
|
||||
|
||||
if(IsTest(0))
|
||||
{
|
||||
@@ -439,7 +439,7 @@ void main(uint3 inTid : SV_DispatchThreadID)
|
||||
data.z = WaveMultiPrefixBitOr(id, mask);
|
||||
data.w = WaveMultiPrefixBitXor(id, mask);
|
||||
}
|
||||
SetOuput(data);
|
||||
SetOutput(data);
|
||||
}
|
||||
|
||||
)EOSHADER";
|
||||
@@ -548,12 +548,7 @@ void main(uint3 inTid : SV_DispatchThreadID)
|
||||
ID3D12PipelineStatePtr comppipe65[ARRAY_COUNT(compsize)];
|
||||
|
||||
std::string defines60;
|
||||
defines60 += fmt::format("#define COMP_TESTS {}\n", numCompTests60);
|
||||
defines60 += "\n";
|
||||
|
||||
std::string defines65;
|
||||
defines65 += fmt::format("#define COMP_TESTS {}\n", numCompTests65);
|
||||
defines65 += "\n";
|
||||
|
||||
bool supportSM65 = (m_HighestShaderModel >= D3D_SHADER_MODEL_6_5) && m_DXILSupport;
|
||||
bool supportSM67 = (m_HighestShaderModel >= D3D_SHADER_MODEL_6_7) && m_DXILSupport;
|
||||
|
||||
@@ -0,0 +1,418 @@
|
||||
/******************************************************************************
|
||||
* The MIT License (MIT)
|
||||
*
|
||||
* Copyright (c) 2019-2025 Baldur Karlsson
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
******************************************************************************/
|
||||
|
||||
#include "3rdparty/fmt/core.h"
|
||||
#include "d3d12_test.h"
|
||||
|
||||
RD_TEST(D3D12_Workgroup_Zoo, D3D12GraphicsTest)
|
||||
{
|
||||
static constexpr const char *Description =
|
||||
"Test of behaviour around workgroup operations in shaders.";
|
||||
|
||||
const std::string common = R"EOSHADER(
|
||||
|
||||
cbuffer rootconsts : register(b0)
|
||||
{
|
||||
uint root_test;
|
||||
}
|
||||
|
||||
#define IsTest(x) (root_test == x)
|
||||
|
||||
)EOSHADER";
|
||||
|
||||
const std::string compCommon = common + R"EOSHADER(
|
||||
|
||||
RWStructuredBuffer<float4> outbuf : register(u0);
|
||||
|
||||
static uint3 tid;
|
||||
|
||||
groupshared uint4 gsmUint4[1024];
|
||||
|
||||
void SetOutput(float4 data)
|
||||
{
|
||||
outbuf[root_test * 1024 + tid.y * GROUP_SIZE_X + tid.x] = data;
|
||||
}
|
||||
|
||||
)EOSHADER";
|
||||
|
||||
const std::string comp = compCommon + R"EOSHADER(
|
||||
|
||||
float4 funcD(uint id)
|
||||
{
|
||||
return WaveActiveSum(id/2).xxxx;
|
||||
}
|
||||
|
||||
float4 nestedFunc(uint id)
|
||||
{
|
||||
float4 ret = funcD(id/3);
|
||||
ret.w = WaveActiveSum(id);
|
||||
return ret;
|
||||
}
|
||||
|
||||
float4 funcA(uint id)
|
||||
{
|
||||
return nestedFunc(id*2);
|
||||
}
|
||||
|
||||
float4 funcB(uint id)
|
||||
{
|
||||
return nestedFunc(id*4);
|
||||
}
|
||||
|
||||
float4 funcTest(uint id)
|
||||
{
|
||||
if ((id % 2) == 0)
|
||||
{
|
||||
return 0.xxxx;
|
||||
}
|
||||
else
|
||||
{
|
||||
float value = WaveActiveSum(id);
|
||||
if (id < 10)
|
||||
{
|
||||
return value.xxxx;
|
||||
}
|
||||
value += WaveActiveSum(id/2);
|
||||
return value.xxxx;
|
||||
}
|
||||
}
|
||||
|
||||
[numthreads(GROUP_SIZE_X, GROUP_SIZE_Y, 1)]
|
||||
void main(uint3 inTid : SV_DispatchThreadID)
|
||||
{
|
||||
tid = inTid;
|
||||
float4 data = 0.0f.xxxx;
|
||||
uint id = WaveGetLaneIndex();
|
||||
gsmUint4[id] = id;
|
||||
SetOutput(data);
|
||||
|
||||
if(IsTest(0))
|
||||
{
|
||||
data.x = id;
|
||||
}
|
||||
else if(IsTest(1))
|
||||
{
|
||||
data.x = WaveActiveSum(id);
|
||||
}
|
||||
else if(IsTest(2))
|
||||
{
|
||||
// Diverged threads which reconverge
|
||||
if (id < 10)
|
||||
{
|
||||
// active threads 0-9
|
||||
data.x = WaveActiveSum(id);
|
||||
|
||||
if ((id % 2) == 0)
|
||||
data.y = WaveActiveSum(id);
|
||||
else
|
||||
data.y = WaveActiveSum(id);
|
||||
|
||||
data.x += WaveActiveSum(id);
|
||||
}
|
||||
else
|
||||
{
|
||||
// active threads 10...
|
||||
data.x = WaveActiveSum(id);
|
||||
}
|
||||
data.y = WaveActiveSum(id);
|
||||
}
|
||||
else if(IsTest(3))
|
||||
{
|
||||
// Converged threads calling a function
|
||||
data = funcTest(id);
|
||||
data.y = WaveActiveSum(id);
|
||||
}
|
||||
else if(IsTest(4))
|
||||
{
|
||||
// Converged threads calling a function which has a nested function call in it
|
||||
data = nestedFunc(id);
|
||||
data.y = WaveActiveSum(id);
|
||||
}
|
||||
else if(IsTest(5))
|
||||
{
|
||||
// Diverged threads calling the same function
|
||||
if (id < 10)
|
||||
{
|
||||
data = funcD(id);
|
||||
}
|
||||
else
|
||||
{
|
||||
data = funcD(id);
|
||||
}
|
||||
data.y = WaveActiveSum(id);
|
||||
}
|
||||
else if(IsTest(6))
|
||||
{
|
||||
// Diverged threads calling the same function which has a nested function call in it
|
||||
if (id < 10)
|
||||
{
|
||||
data = funcA(id);
|
||||
}
|
||||
else
|
||||
{
|
||||
data = funcB(id);
|
||||
}
|
||||
data.y = WaveActiveSum(id);
|
||||
}
|
||||
else if(IsTest(7))
|
||||
{
|
||||
// Diverged threads which early exit
|
||||
if (id < 10)
|
||||
{
|
||||
data.x = WaveActiveSum(id+10);
|
||||
SetOutput(data);
|
||||
return;
|
||||
}
|
||||
data.x = WaveActiveSum(id);
|
||||
}
|
||||
else if(IsTest(8))
|
||||
{
|
||||
// Loops with different number of iterations per thread
|
||||
for (uint i = 0; i < id; i++)
|
||||
{
|
||||
data.x += WaveActiveSum(id);
|
||||
}
|
||||
}
|
||||
else if(IsTest(9))
|
||||
{
|
||||
// Query functions : unit tests
|
||||
data.x = float(WaveGetLaneCount());
|
||||
data.y = float(WaveGetLaneIndex());
|
||||
data.z = float(WaveIsFirstLane());
|
||||
}
|
||||
else if(IsTest(10))
|
||||
{
|
||||
// Vote functions : unit tests
|
||||
data.x = float(WaveActiveAnyTrue(id*2 > id+10));
|
||||
data.y = float(WaveActiveAllTrue(id < WaveGetLaneCount()));
|
||||
if (id > 10)
|
||||
{
|
||||
data.z = float(WaveActiveAllTrue(id > 10));
|
||||
uint4 ballot = WaveActiveBallot(id > 20);
|
||||
data.w = countbits(ballot.x) + countbits(ballot.y) + countbits(ballot.z) + countbits(ballot.w);
|
||||
}
|
||||
else
|
||||
{
|
||||
data.z = float(WaveActiveAllTrue(id > 3));
|
||||
uint4 ballot = WaveActiveBallot(id > 4);
|
||||
data.w = countbits(ballot.x) + countbits(ballot.y) + countbits(ballot.z) + countbits(ballot.w);
|
||||
}
|
||||
}
|
||||
else if(IsTest(11))
|
||||
{
|
||||
// Broadcast functions : unit tests
|
||||
if (id >= 2 && id <= 20)
|
||||
{
|
||||
data.x = WaveReadLaneFirst(id);
|
||||
data.y = WaveReadLaneAt(id, 5);
|
||||
data.z = WaveReadLaneAt(id, id);
|
||||
data.w = WaveReadLaneAt(data.x, 2+id%3);
|
||||
}
|
||||
}
|
||||
else if(IsTest(12))
|
||||
{
|
||||
// Scan and Prefix functions : unit tests
|
||||
if (id >= 2 && id <= 20)
|
||||
{
|
||||
data.x = WavePrefixCountBits(id > 4);
|
||||
data.y = WavePrefixCountBits(id > 10);
|
||||
data.z = WavePrefixSum(data.x);
|
||||
data.w = WavePrefixProduct(1 + data.y);
|
||||
}
|
||||
else
|
||||
{
|
||||
data.x = WavePrefixCountBits(id > 23);
|
||||
data.y = WavePrefixCountBits(id < 1);
|
||||
data.z = WavePrefixSum(data.x);
|
||||
data.w = WavePrefixSum(data.y);
|
||||
}
|
||||
}
|
||||
else if(IsTest(13))
|
||||
{
|
||||
// Reduction functions : unit tests
|
||||
if (id >= 2 && id <= 20)
|
||||
{
|
||||
data.x = float(WaveActiveMax(id));
|
||||
data.y = float(WaveActiveMin(id));
|
||||
data.z = float(WaveActiveProduct(id));
|
||||
data.w = float(WaveActiveSum(id));
|
||||
}
|
||||
}
|
||||
else if(IsTest(14))
|
||||
{
|
||||
// Reduction functions : unit tests
|
||||
if (id >= 2 && id <= 20)
|
||||
{
|
||||
data.x = float(WaveActiveCountBits(id > 23));
|
||||
data.y = float(WaveActiveBitAnd(id));
|
||||
data.z = float(WaveActiveBitOr(id));
|
||||
data.w = float(WaveActiveBitXor(id));
|
||||
}
|
||||
}
|
||||
else if(IsTest(15))
|
||||
{
|
||||
// Reduction functions : unit tests
|
||||
if (id > 13)
|
||||
{
|
||||
bool test1 = (id > 15).x;
|
||||
bool2 test2 = bool2(test1, (id < 23));
|
||||
bool3 test3 = bool3(test1, (id < 23), (id >= 25));
|
||||
bool4 test4 = bool4(test1, (id < 23), (id >= 25), (id >= 28));
|
||||
|
||||
data.x = float(WaveActiveAllEqual(test1).x);
|
||||
data.y = float(WaveActiveAllEqual(test2).y);
|
||||
data.z = float(WaveActiveAllEqual(test3).z);
|
||||
data.w = float(WaveActiveAllEqual(test4).w);
|
||||
}
|
||||
}
|
||||
|
||||
SetOutput(data);
|
||||
}
|
||||
|
||||
)EOSHADER";
|
||||
|
||||
void Prepare(int argc, char **argv)
|
||||
{
|
||||
D3D12GraphicsTest::Prepare(argc, argv);
|
||||
|
||||
if(opts1.WaveLaneCountMax < 16)
|
||||
Avail = "Subgroup size is less than 16";
|
||||
|
||||
bool supportSM60 = (m_HighestShaderModel >= D3D_SHADER_MODEL_6_0) && m_DXILSupport;
|
||||
if(!supportSM60)
|
||||
Avail = "SM 6.0 not supported";
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
// initialise, create window, create device, etc
|
||||
if(!Init())
|
||||
return 3;
|
||||
|
||||
ID3D12RootSignaturePtr sig = MakeSig({constParam(D3D12_SHADER_VISIBILITY_ALL, 0, 0, 1),
|
||||
uavParam(D3D12_SHADER_VISIBILITY_ALL, 0, 0)});
|
||||
|
||||
const uint32_t imgDim = 128;
|
||||
|
||||
ID3D12ResourcePtr fltTex = MakeTexture(DXGI_FORMAT_R32G32B32A32_FLOAT, imgDim, imgDim)
|
||||
.RTV()
|
||||
.InitialState(D3D12_RESOURCE_STATE_RENDER_TARGET);
|
||||
fltTex->SetName(L"fltTex");
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE fltRTV = MakeRTV(fltTex).CreateCPU(0);
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE fltSRV = MakeSRV(fltTex).CreateGPU(8);
|
||||
|
||||
int32_t numCompTests = 0;
|
||||
|
||||
size_t pos = 0;
|
||||
while(pos != std::string::npos)
|
||||
{
|
||||
pos = comp.find("IsTest(", pos);
|
||||
if(pos == std::string::npos)
|
||||
break;
|
||||
pos += sizeof("IsTest(") - 1;
|
||||
numCompTests = std::max(numCompTests, atoi(comp.c_str() + pos) + 1);
|
||||
}
|
||||
|
||||
struct
|
||||
{
|
||||
int x, y;
|
||||
} compsize[] = {
|
||||
{70, 1},
|
||||
};
|
||||
std::string comppipe_name[ARRAY_COUNT(compsize)];
|
||||
ID3D12PipelineStatePtr comppipe[ARRAY_COUNT(compsize)];
|
||||
|
||||
std::string defines;
|
||||
|
||||
for(int i = 0; i < ARRAY_COUNT(comppipe); i++)
|
||||
{
|
||||
std::string sizedefine;
|
||||
sizedefine = fmt::format("#define GROUP_SIZE_X {}\n#define GROUP_SIZE_Y {}\n", compsize[i].x,
|
||||
compsize[i].y);
|
||||
comppipe_name[i] = fmt::format("{}x{}", compsize[i].x, compsize[i].y);
|
||||
|
||||
comppipe[i] =
|
||||
MakePSO().RootSig(sig).CS(Compile(defines + sizedefine + comp, "main", "cs_6_0"));
|
||||
comppipe[i]->SetName(UTF82Wide(comppipe_name[i]).c_str());
|
||||
}
|
||||
|
||||
ID3D12ResourcePtr bufOut = MakeBuffer().Size(sizeof(Vec4f) * 1024 * numCompTests).UAV();
|
||||
D3D12ViewCreator uavView =
|
||||
MakeUAV(bufOut).Format(DXGI_FORMAT_R32_UINT).NumElements(4 * 1024 * numCompTests);
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE uavcpu = uavView.CreateClearCPU(10);
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE uavgpu = uavView.CreateGPU(10);
|
||||
|
||||
bufOut->SetName(L"bufOut");
|
||||
|
||||
while(Running())
|
||||
{
|
||||
ID3D12GraphicsCommandListPtr cmd = GetCommandBuffer();
|
||||
|
||||
Reset(cmd);
|
||||
|
||||
cmd->SetDescriptorHeaps(1, &m_CBVUAVSRV.GetInterfacePtr());
|
||||
|
||||
ID3D12ResourcePtr bb = StartUsingBackbuffer(cmd, D3D12_RESOURCE_STATE_RENDER_TARGET);
|
||||
|
||||
ClearRenderTargetView(cmd, BBRTV, {0.2f, 0.2f, 0.2f, 1.0f});
|
||||
|
||||
pushMarker(cmd, "Compute Tests");
|
||||
|
||||
for(size_t p = 0; p < ARRAY_COUNT(comppipe); p++)
|
||||
{
|
||||
ResourceBarrier(cmd);
|
||||
|
||||
UINT zero[4] = {};
|
||||
cmd->ClearUnorderedAccessViewUint(uavgpu, uavcpu, bufOut, zero, 0, NULL);
|
||||
|
||||
ResourceBarrier(cmd);
|
||||
pushMarker(cmd, comppipe_name[p]);
|
||||
|
||||
cmd->SetPipelineState(comppipe[p]);
|
||||
cmd->SetComputeRootSignature(sig);
|
||||
cmd->SetComputeRootUnorderedAccessView(1, bufOut->GetGPUVirtualAddress());
|
||||
|
||||
for(int i = 0; i < numCompTests; i++)
|
||||
{
|
||||
cmd->SetComputeRoot32BitConstant(0, i, 0);
|
||||
cmd->Dispatch(1, 1, 1);
|
||||
}
|
||||
|
||||
popMarker(cmd);
|
||||
}
|
||||
|
||||
popMarker(cmd);
|
||||
|
||||
FinishUsingBackbuffer(cmd, D3D12_RESOURCE_STATE_RENDER_TARGET);
|
||||
|
||||
cmd->Close();
|
||||
|
||||
SubmitAndPresent({cmd});
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_TEST();
|
||||
@@ -232,6 +232,7 @@
|
||||
<ClCompile Include="d3d12\d3d12_vertex_uav.cpp" />
|
||||
<ClCompile Include="d3d12\d3d12_video_textures.cpp" />
|
||||
<ClCompile Include="d3d12\d3d12_vrs.cpp" />
|
||||
<ClCompile Include="d3d12\d3d12_workgroup_zoo.cpp" />
|
||||
<ClCompile Include="d3d12\d3d12_write_subresource.cpp" />
|
||||
<ClCompile Include="dx\d3d_helpers.cpp" />
|
||||
<ClCompile Include="3rdparty\glad\glad.c" />
|
||||
@@ -374,6 +375,7 @@
|
||||
<ClCompile Include="vk\vk_simple_triangle.cpp" />
|
||||
<ClCompile Include="vk\vk_test.cpp" />
|
||||
<ClCompile Include="3rdparty\volk\volk.c" />
|
||||
<ClCompile Include="vk\vk_workgroup_zoo.cpp" />
|
||||
<ClCompile Include="win32\win32_platform.cpp" />
|
||||
<ClCompile Include="win32\win32_window.cpp" />
|
||||
</ItemGroup>
|
||||
|
||||
@@ -718,6 +718,12 @@
|
||||
<ClCompile Include="d3d12\d3d12_subgroup_zoo.cpp">
|
||||
<Filter>D3D12\demos</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="vk\vk_workgroup_zoo.cpp">
|
||||
<Filter>Vulkan\demos</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="d3d12\d3d12_workgroup_zoo.cpp">
|
||||
<Filter>D3D12\demos</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Filter Include="D3D11">
|
||||
|
||||
@@ -191,7 +191,7 @@ vec4 funcTest(uint id)
|
||||
}
|
||||
}
|
||||
|
||||
void SetOuput(vec4 data)
|
||||
void SetOutput(vec4 data)
|
||||
{
|
||||
outbuf.data[push.test].vals[gl_LocalInvocationID.y * GROUP_SIZE_X + gl_LocalInvocationID.x] = data;
|
||||
}
|
||||
@@ -199,7 +199,7 @@ void main()
|
||||
{
|
||||
vec4 data = vec4(0);
|
||||
uint id = gl_SubgroupInvocationID;
|
||||
SetOuput(data);
|
||||
SetOutput(data);
|
||||
|
||||
if(IsTest(0))
|
||||
{
|
||||
@@ -275,7 +275,7 @@ void main()
|
||||
if (id < 10)
|
||||
{
|
||||
data.x = subgroupAdd(id+10);
|
||||
SetOuput(data);
|
||||
SetOutput(data);
|
||||
return;
|
||||
}
|
||||
data.x = subgroupAdd(id);
|
||||
@@ -380,7 +380,7 @@ void main()
|
||||
data.w = float(subgroupAllEqual(id >= 28));
|
||||
}
|
||||
}
|
||||
SetOuput(data);
|
||||
SetOutput(data);
|
||||
}
|
||||
|
||||
)EOSHADER";
|
||||
|
||||
@@ -0,0 +1,489 @@
|
||||
/******************************************************************************
|
||||
* The MIT License (MIT)
|
||||
*
|
||||
* Copyright (c) 2019-2025 Baldur Karlsson
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
******************************************************************************/
|
||||
|
||||
#include "3rdparty/fmt/core.h"
|
||||
#include "vk_test.h"
|
||||
|
||||
RD_TEST(VK_Workgroup_Zoo, VulkanGraphicsTest)
|
||||
{
|
||||
static constexpr const char *Description =
|
||||
"Test of behaviour around workgroup operations in shaders.";
|
||||
|
||||
const std::string common = R"EOSHADER(
|
||||
|
||||
#version 460 core
|
||||
#extension GL_KHR_shader_subgroup_basic : enable
|
||||
#extension GL_KHR_shader_subgroup_ballot : enable
|
||||
#extension GL_KHR_shader_subgroup_vote : enable
|
||||
#extension GL_KHR_shader_subgroup_arithmetic : enable
|
||||
|
||||
#if FEAT_SHUFFLE
|
||||
#extension GL_KHR_shader_subgroup_shuffle : enable
|
||||
#endif
|
||||
|
||||
#if FEAT_SHUFFLE_RELATIVE
|
||||
#extension GL_KHR_shader_subgroup_shuffle_relative : enable
|
||||
#endif
|
||||
|
||||
#if FEAT_CLUSTERED
|
||||
#extension GL_KHR_shader_subgroup_clustered : enable
|
||||
#endif
|
||||
|
||||
#if FEAT_QUAD
|
||||
#extension GL_KHR_shader_subgroup_quad : enable
|
||||
#endif
|
||||
|
||||
#if FEAT_ROTATE || FEAT_ROTATE_CLUSTERED
|
||||
#extension GL_KHR_shader_subgroup_rotate : enable
|
||||
#endif
|
||||
|
||||
layout(push_constant) uniform PushData
|
||||
{
|
||||
uint test;
|
||||
} push;
|
||||
|
||||
#define IsTest(x) (push.test == x)
|
||||
|
||||
)EOSHADER";
|
||||
|
||||
const std::string comp = common + R"EOSHADER(
|
||||
|
||||
shared uvec4 gsmUint4[COMP_TESTS];
|
||||
|
||||
struct Output
|
||||
{
|
||||
vec4 vals[1024];
|
||||
};
|
||||
|
||||
layout(binding = 0, std430) buffer outbuftype {
|
||||
Output data[COMP_TESTS];
|
||||
} outbuf;
|
||||
|
||||
layout(local_size_x = GROUP_SIZE_X, local_size_y = GROUP_SIZE_Y, local_size_z = 1) in;
|
||||
|
||||
vec4 funcD(uint id)
|
||||
{
|
||||
return vec4(subgroupAdd(id/2));
|
||||
}
|
||||
|
||||
vec4 nestedFunc(uint id)
|
||||
{
|
||||
vec4 ret = funcD(id/3);
|
||||
ret.w = subgroupAdd(id);
|
||||
return ret;
|
||||
}
|
||||
|
||||
vec4 funcA(uint id)
|
||||
{
|
||||
return nestedFunc(id*2);
|
||||
}
|
||||
|
||||
vec4 funcB(uint id)
|
||||
{
|
||||
return nestedFunc(id*4);
|
||||
}
|
||||
|
||||
vec4 funcTest(uint id)
|
||||
{
|
||||
if ((id % 2) == 0)
|
||||
{
|
||||
return vec4(0);
|
||||
}
|
||||
else
|
||||
{
|
||||
float value = subgroupAdd(id);
|
||||
if (id < 10)
|
||||
{
|
||||
return vec4(value);
|
||||
}
|
||||
value += subgroupAdd(id/2);
|
||||
return vec4(value);
|
||||
}
|
||||
}
|
||||
|
||||
void SetOutput(vec4 data)
|
||||
{
|
||||
outbuf.data[push.test].vals[gl_LocalInvocationID.y * GROUP_SIZE_X + gl_LocalInvocationID.x] = data;
|
||||
}
|
||||
void main()
|
||||
{
|
||||
vec4 data = vec4(0);
|
||||
uint id = gl_SubgroupInvocationID;
|
||||
gsmUint4[id] = id;
|
||||
SetOutput(data);
|
||||
|
||||
if(IsTest(0))
|
||||
{
|
||||
data.x = id;
|
||||
}
|
||||
else if(IsTest(1))
|
||||
{
|
||||
data.x = subgroupAdd(id);
|
||||
}
|
||||
else if(IsTest(2))
|
||||
{
|
||||
// Diverged threads which reconverge
|
||||
if (id < 10)
|
||||
{
|
||||
// active threads 0-9
|
||||
data.x = subgroupAdd(id);
|
||||
|
||||
if ((id % 2) == 0)
|
||||
data.y = subgroupAdd(id);
|
||||
else
|
||||
data.y = subgroupAdd(id);
|
||||
|
||||
data.x += subgroupAdd(id);
|
||||
}
|
||||
else
|
||||
{
|
||||
// active threads 10...
|
||||
data.x = subgroupAdd(id);
|
||||
}
|
||||
data.y = subgroupAdd(id);
|
||||
}
|
||||
else if(IsTest(3))
|
||||
{
|
||||
// Converged threads calling a function
|
||||
data = funcTest(id);
|
||||
data.y = subgroupAdd(id);
|
||||
}
|
||||
else if(IsTest(4))
|
||||
{
|
||||
// Converged threads calling a function which has a nested function call in it
|
||||
data = nestedFunc(id);
|
||||
data.y = subgroupAdd(id);
|
||||
}
|
||||
else if(IsTest(5))
|
||||
{
|
||||
// Diverged threads calling the same function
|
||||
if (id < 10)
|
||||
{
|
||||
data = funcD(id);
|
||||
}
|
||||
else
|
||||
{
|
||||
data = funcD(id);
|
||||
}
|
||||
data.y = subgroupAdd(id);
|
||||
}
|
||||
else if(IsTest(6))
|
||||
{
|
||||
// Diverged threads calling the same function which has a nested function call in it
|
||||
if (id < 10)
|
||||
{
|
||||
data = funcA(id);
|
||||
}
|
||||
else
|
||||
{
|
||||
data = funcB(id);
|
||||
}
|
||||
data.y = subgroupAdd(id);
|
||||
}
|
||||
else if(IsTest(7))
|
||||
{
|
||||
// Diverged threads which early exit
|
||||
if (id < 10)
|
||||
{
|
||||
data.x = subgroupAdd(id+10);
|
||||
SetOutput(data);
|
||||
return;
|
||||
}
|
||||
data.x = subgroupAdd(id);
|
||||
}
|
||||
else if(IsTest(8))
|
||||
{
|
||||
// Loops with different number of iterations per thread
|
||||
for (uint i = 0; i < id; i++)
|
||||
{
|
||||
data.x += subgroupAdd(id);
|
||||
}
|
||||
}
|
||||
else if(IsTest(9))
|
||||
{
|
||||
// Query functions : unit tests
|
||||
data.x = float(gl_SubgroupSize);
|
||||
data.y = float(gl_SubgroupInvocationID);
|
||||
data.z = float(subgroupElect());
|
||||
}
|
||||
else if(IsTest(10))
|
||||
{
|
||||
// Vote functions : unit tests
|
||||
data.x = float(subgroupAny(id*2 > id+10));
|
||||
data.y = float(subgroupAll(id < gl_SubgroupSize));
|
||||
if (id > 10)
|
||||
{
|
||||
data.z = float(subgroupAll(id > 10));
|
||||
uvec4 ballot = subgroupBallot(id > 20);
|
||||
data.w = bitCount(ballot.x) + bitCount(ballot.y) + bitCount(ballot.z) + bitCount(ballot.w);
|
||||
}
|
||||
else
|
||||
{
|
||||
data.z = float(subgroupAll(id > 3));
|
||||
uvec4 ballot = subgroupBallot(id > 4);
|
||||
data.w = bitCount(ballot.x) + bitCount(ballot.y) + bitCount(ballot.z) + bitCount(ballot.w);
|
||||
}
|
||||
}
|
||||
else if(IsTest(11))
|
||||
{
|
||||
// Broadcast functions : unit tests
|
||||
if (id >= 2 && id <= 20)
|
||||
{
|
||||
data.x = subgroupBroadcastFirst(id);
|
||||
data.y = subgroupBroadcast(id, 5);
|
||||
data.z = subgroupShuffle(id, id);
|
||||
data.w = subgroupShuffle(data.x, 2+id%3);
|
||||
}
|
||||
}
|
||||
else if(IsTest(12))
|
||||
{
|
||||
// Scan and Prefix functions : unit tests
|
||||
if (id >= 2 && id <= 20)
|
||||
{
|
||||
uvec4 bits = subgroupBallot(id > 4);
|
||||
data.x = subgroupBallotExclusiveBitCount(bits);
|
||||
bits = subgroupBallot(id > 10);
|
||||
data.y = subgroupBallotExclusiveBitCount(bits);
|
||||
data.z = subgroupExclusiveAdd(data.x);
|
||||
data.w = subgroupExclusiveMul(1 + data.y);
|
||||
}
|
||||
else
|
||||
{
|
||||
uvec4 bits = subgroupBallot(id > 23);
|
||||
data.x = subgroupBallotExclusiveBitCount(bits);
|
||||
bits = subgroupBallot(id < 1);
|
||||
data.y = subgroupBallotExclusiveBitCount(bits);
|
||||
data.z = subgroupExclusiveAdd(data.x);
|
||||
data.w = subgroupExclusiveAdd(data.y);
|
||||
}
|
||||
}
|
||||
else if(IsTest(13))
|
||||
{
|
||||
// Reduction functions : unit tests
|
||||
if (id >= 2 && id <= 20)
|
||||
{
|
||||
data.x = float(subgroupMax(id));
|
||||
data.y = float(subgroupMin(id));
|
||||
data.z = float(subgroupMul(id));
|
||||
data.w = float(subgroupAdd(id));
|
||||
}
|
||||
}
|
||||
else if(IsTest(14))
|
||||
{
|
||||
// Reduction functions : unit tests
|
||||
if (id >= 2 && id <= 20)
|
||||
{
|
||||
uvec4 bits = subgroupBallot(id > 23);
|
||||
data.x = float(subgroupBallotBitCount(bits));
|
||||
data.y = float(subgroupAnd(id));
|
||||
data.z = float(subgroupOr(id));
|
||||
data.w = float(subgroupXor(id));
|
||||
}
|
||||
}
|
||||
else if(IsTest(15))
|
||||
{
|
||||
// Reduction functions : unit tests
|
||||
if (id > 13)
|
||||
{
|
||||
data.x = float(subgroupAllEqual(id > 15));
|
||||
data.y = float(subgroupAllEqual(id < 23));
|
||||
data.z = float(subgroupAllEqual(id >= 25));
|
||||
data.w = float(subgroupAllEqual(id >= 28));
|
||||
}
|
||||
}
|
||||
SetOutput(data);
|
||||
}
|
||||
|
||||
)EOSHADER";
|
||||
|
||||
VkSubgroupFeatureFlags ops = 0;
|
||||
|
||||
void Prepare(int argc, char **argv)
|
||||
{
|
||||
VulkanGraphicsTest::Prepare(argc, argv);
|
||||
|
||||
if(!Avail.empty())
|
||||
return;
|
||||
|
||||
if(devVersion < VK_API_VERSION_1_1)
|
||||
Avail = "Vulkan device version isn't 1.1";
|
||||
|
||||
static VkPhysicalDeviceSubgroupProperties subProps = {
|
||||
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES,
|
||||
};
|
||||
|
||||
getPhysProperties2(&subProps);
|
||||
|
||||
if(subProps.subgroupSize < 16)
|
||||
Avail = "Subgroup size is less than 16";
|
||||
|
||||
// require at least a few ops so we only have a few conditional compilations
|
||||
const VkSubgroupFeatureFlags requiredOps =
|
||||
VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT |
|
||||
VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT;
|
||||
|
||||
ops = subProps.supportedOperations;
|
||||
|
||||
if((subProps.supportedOperations & requiredOps) != requiredOps)
|
||||
Avail = "Missing ops support";
|
||||
|
||||
if((subProps.supportedStages & VK_SHADER_STAGE_COMPUTE_BIT) == 0)
|
||||
Avail = "Missing compute subgroup support";
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
// initialise, create window, create context, etc
|
||||
if(!Init())
|
||||
return 3;
|
||||
|
||||
VkDescriptorSetLayout setlayout = createDescriptorSetLayout(vkh::DescriptorSetLayoutCreateInfo({
|
||||
{0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT},
|
||||
}));
|
||||
|
||||
VkPipelineLayout layout = createPipelineLayout(vkh::PipelineLayoutCreateInfo(
|
||||
{setlayout}, {vkh::PushConstantRange(VK_SHADER_STAGE_ALL, 0, 4)}));
|
||||
|
||||
std::map<std::string, std::string> macros;
|
||||
|
||||
int numCompTests = 0;
|
||||
|
||||
size_t pos = 0;
|
||||
while(pos != std::string::npos)
|
||||
{
|
||||
pos = comp.find("IsTest(", pos);
|
||||
if(pos == std::string::npos)
|
||||
break;
|
||||
pos += sizeof("IsTest(") - 1;
|
||||
numCompTests = std::max(numCompTests, atoi(comp.c_str() + pos) + 1);
|
||||
}
|
||||
|
||||
if(ops & VK_SUBGROUP_FEATURE_SHUFFLE_BIT)
|
||||
macros["FEAT_SHUFFLE"] = "1";
|
||||
else
|
||||
macros["FEAT_SHUFFLE"] = "0";
|
||||
if(ops & VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT)
|
||||
macros["FEAT_SHUFFLE_RELATIVE"] = "1";
|
||||
else
|
||||
macros["FEAT_SHUFFLE_RELATIVE"] = "0";
|
||||
if(ops & VK_SUBGROUP_FEATURE_CLUSTERED_BIT)
|
||||
macros["FEAT_CLUSTERED"] = "1";
|
||||
else
|
||||
macros["FEAT_CLUSTERED"] = "0";
|
||||
if(ops & VK_SUBGROUP_FEATURE_QUAD_BIT)
|
||||
macros["FEAT_QUAD"] = "1";
|
||||
else
|
||||
macros["FEAT_QUAD"] = "0";
|
||||
if(ops & VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR)
|
||||
macros["FEAT_ROTATE"] = "1";
|
||||
else
|
||||
macros["FEAT_ROTATE"] = "0";
|
||||
if(ops & VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR)
|
||||
macros["FEAT_ROTATE_CLUSTERED"] = "1";
|
||||
else
|
||||
macros["FEAT_ROTATE_CLUSTERED"] = "0";
|
||||
|
||||
std::string comppipe_name[1];
|
||||
VkPipeline comppipe[1];
|
||||
uint32_t countPipes = 0;
|
||||
|
||||
macros["COMP_TESTS"] = fmt::format("{}", numCompTests);
|
||||
|
||||
macros["GROUP_SIZE_X"] = "70";
|
||||
macros["GROUP_SIZE_Y"] = "1";
|
||||
comppipe_name[countPipes] = "70x1";
|
||||
comppipe[countPipes] = createComputePipeline(vkh::ComputePipelineCreateInfo(
|
||||
layout, CompileShaderModule(comp, ShaderLang::glsl, ShaderStage::comp, "main", macros,
|
||||
SPIRVTarget::vulkan11)));
|
||||
++countPipes;
|
||||
|
||||
AllocatedBuffer bufout(
|
||||
this,
|
||||
vkh::BufferCreateInfo(sizeof(Vec4f) * 1024 * numCompTests,
|
||||
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT),
|
||||
VmaAllocationCreateInfo({0, VMA_MEMORY_USAGE_CPU_TO_GPU}));
|
||||
|
||||
setName(bufout.buffer, "bufout");
|
||||
|
||||
VkDescriptorSet set = allocateDescriptorSet(setlayout);
|
||||
|
||||
vkh::updateDescriptorSets(
|
||||
device, {vkh::WriteDescriptorSet(set, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
{vkh::DescriptorBufferInfo(bufout.buffer)})});
|
||||
|
||||
while(Running())
|
||||
{
|
||||
VkCommandBuffer cmd = GetCommandBuffer();
|
||||
|
||||
vkBeginCommandBuffer(cmd, vkh::CommandBufferBeginInfo());
|
||||
|
||||
VkImage swapimg =
|
||||
StartUsingBackbuffer(cmd, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL);
|
||||
|
||||
vkh::cmdClearImage(cmd, swapimg, vkh::ClearColorValue(0.2f, 0.2f, 0.2f, 1.0f));
|
||||
|
||||
pushMarker(cmd, "Compute Tests");
|
||||
|
||||
for(size_t p = 0; p < countPipes; p++)
|
||||
{
|
||||
vkh::cmdPipelineBarrier(
|
||||
cmd, {},
|
||||
{vkh::BufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
bufout.buffer, 0, sizeof(Vec4f) * 1024 * numCompTests)});
|
||||
|
||||
vkCmdFillBuffer(cmd, bufout.buffer, 0, sizeof(Vec4f) * 1024 * numCompTests, 0);
|
||||
|
||||
vkh::cmdPipelineBarrier(
|
||||
cmd, {},
|
||||
{vkh::BufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_WRITE_BIT,
|
||||
bufout.buffer, 0, sizeof(Vec4f) * 1024 * numCompTests)});
|
||||
|
||||
pushMarker(cmd, comppipe_name[p]);
|
||||
|
||||
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, comppipe[p]);
|
||||
vkh::cmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, {set}, {});
|
||||
|
||||
for(int i = 0; i < numCompTests; i++)
|
||||
{
|
||||
vkh::cmdPushConstants(cmd, layout, i);
|
||||
vkCmdDispatch(cmd, 1, 1, 1);
|
||||
}
|
||||
|
||||
popMarker(cmd);
|
||||
}
|
||||
|
||||
popMarker(cmd);
|
||||
|
||||
FinishUsingBackbuffer(cmd, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL);
|
||||
|
||||
vkEndCommandBuffer(cmd);
|
||||
|
||||
SubmitAndPresent({cmd});
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_TEST();
|
||||
@@ -10,3 +10,4 @@ from .shared.Overlay_Test import *
|
||||
from .shared.Buffer_Truncation import *
|
||||
from .shared.Discard_Zoo import *
|
||||
from .shared.Subgroup_Zoo import *
|
||||
from .shared.Workgroup_Zoo import *
|
||||
|
||||
@@ -13,11 +13,116 @@ class Subgroup_Zoo(rdtest.TestCase):
|
||||
return True, ''
|
||||
return False, 'Disabled test'
|
||||
|
||||
def check_compute_thread_result(self, test, action, x, y, z, dim, bufdata):
|
||||
try:
|
||||
real = struct.unpack_from(
|
||||
"4f", bufdata, 16*y*dim[0] + 16*x)
|
||||
|
||||
trace = self.controller.DebugThread(
|
||||
(0, 0, 0), (x, y, z))
|
||||
|
||||
_, variables = self.process_trace(trace)
|
||||
|
||||
if trace.debugger is None:
|
||||
raise rdtest.TestFailureException(f"Test {test} at {action.eventId} got no debug result at {x},{y},{z}")
|
||||
|
||||
# Find the source variable 'data' at the highest instruction index
|
||||
name = 'data'
|
||||
debugged = None
|
||||
countInst = len(trace.instInfo)
|
||||
for inst in range(countInst):
|
||||
sourceVars = trace.instInfo[countInst-1-inst].sourceVars
|
||||
try:
|
||||
dataVars = [v for v in sourceVars if v.name == name]
|
||||
if len(dataVars) == 0:
|
||||
continue
|
||||
debugged = self.evaluate_source_var(dataVars[0], variables)
|
||||
except KeyError as ex:
|
||||
continue
|
||||
except rdtest.TestFailureException as ex:
|
||||
continue
|
||||
break
|
||||
if debugged is None:
|
||||
raise rdtest.TestFailureException(f"Couldn't find source variable {name} at {x},{y},{z}")
|
||||
|
||||
debuggedValue = list(debugged.value.f32v[0:4])
|
||||
|
||||
if not rdtest.value_compare(real, debuggedValue, eps=5.0E-06):
|
||||
raise rdtest.TestFailureException(f"EID:{action.eventId} TID:{x},{y},{z} debugged thread value {debuggedValue} does not match output {real}")
|
||||
|
||||
except rdtest.TestFailureException as ex:
|
||||
rdtest.log.error(f"Test {test} failed {ex}")
|
||||
return False
|
||||
finally:
|
||||
self.controller.FreeTrace(trace)
|
||||
|
||||
return True
|
||||
|
||||
def check_compute_tests(self, compute_dims, thread_checks):
|
||||
overallFailed = False
|
||||
for comp_dim in compute_dims:
|
||||
rdtest.log.begin_section(
|
||||
f"Compute tests with {comp_dim.customName} workgroup")
|
||||
|
||||
compute_tests = [
|
||||
a for a in comp_dim.children if a.flags & rd.ActionFlags.Dispatch]
|
||||
|
||||
for test, action in enumerate(compute_tests):
|
||||
failed = False
|
||||
self.controller.SetFrameEvent(action.eventId, False)
|
||||
|
||||
pipe = self.controller.GetPipelineState()
|
||||
csrefl = pipe.GetShaderReflection(rd.ShaderStage.Compute)
|
||||
|
||||
dim = csrefl.dispatchThreadsDimension
|
||||
|
||||
rw = pipe.GetReadWriteResources(rd.ShaderStage.Compute)
|
||||
|
||||
if len(rw) != 1:
|
||||
rdtest.log.error("Unexpected number of RW resources")
|
||||
continue
|
||||
|
||||
# each test writes up to 16k data, one vec4 per thread * up to 1024 threads
|
||||
bufdata = self.controller.GetBufferData(
|
||||
rw[0].descriptor.resource, test*16*1024, 16*1024)
|
||||
|
||||
for t in thread_checks:
|
||||
xrange = 1
|
||||
yrange = dim[1]
|
||||
xbase = t
|
||||
ybase = 0
|
||||
|
||||
# vertical orientation
|
||||
if dim[1] > dim[0]:
|
||||
xrange = dim[0]
|
||||
yrange = 1
|
||||
xbase = 0
|
||||
ybase = t
|
||||
|
||||
for x in range(xbase, xbase+xrange):
|
||||
for y in range(ybase, ybase+yrange):
|
||||
z = 0
|
||||
|
||||
if x >= dim[0] or y >= dim[1]:
|
||||
continue
|
||||
|
||||
if not self.check_compute_thread_result(test, action, x, y, z, dim, bufdata):
|
||||
failed = True
|
||||
|
||||
overallFailed |= failed
|
||||
if not failed:
|
||||
rdtest.log.success(f"Test {test} successful")
|
||||
else:
|
||||
rdtest.log.error(f"Test {test} failed")
|
||||
|
||||
rdtest.log.end_section(
|
||||
f"Compute tests with {comp_dim.customName} workgroup")
|
||||
|
||||
return overallFailed
|
||||
|
||||
def check_capture(self):
|
||||
graphics_tests = [a for a in self.find_action(
|
||||
"Graphics Tests").children if a.flags & rd.ActionFlags.Drawcall]
|
||||
compute_dims = [a for a in self.find_action(
|
||||
"Compute Tests").children if 'x' in a.customName]
|
||||
|
||||
rdtest.log.begin_section("Graphics tests")
|
||||
|
||||
@@ -34,19 +139,6 @@ class Subgroup_Zoo(rdtest.TestCase):
|
||||
# middle quad on other triangle
|
||||
(56, 64), (57, 64), (56, 65), (57, 65),
|
||||
]
|
||||
# threads to check. largest dimension only (all small dim checked)
|
||||
thread_checks = [
|
||||
# first few
|
||||
0, 1, 2,
|
||||
# near end of 32-subgroup and boundary
|
||||
30, 31, 32,
|
||||
# near end of 64-subgroup and boundary
|
||||
62, 63, 64,
|
||||
# near end of 64-subgroup and boundary
|
||||
62, 63, 64,
|
||||
# large values spaced out with one near the end of our unaligned size
|
||||
100, 110, 120, 140, 149, 150, 160, 200, 250,
|
||||
]
|
||||
clear_col = (123456.0, 789.0, 101112.0, 0.0)
|
||||
|
||||
overallFailed = False
|
||||
@@ -163,102 +255,21 @@ class Subgroup_Zoo(rdtest.TestCase):
|
||||
|
||||
rdtest.log.end_section("Graphics tests")
|
||||
|
||||
for comp_dim in compute_dims:
|
||||
rdtest.log.begin_section(
|
||||
f"Compute tests with {comp_dim.customName} workgroup")
|
||||
# threads to check. largest dimension only (all small dim checked)
|
||||
thread_checks = [
|
||||
# first few
|
||||
0, 1, 2,
|
||||
# near end of 32-subgroup and boundary
|
||||
30, 31, 32, 33, 34,
|
||||
# near end of 64-subgroup and boundary
|
||||
62, 63, 64, 64, 65,
|
||||
# large values spaced out with one near the end of our unaligned size
|
||||
100, 110, 120, 140, 149, 150, 160, 200, 250,
|
||||
]
|
||||
compute_dims = [a for a in self.find_action(
|
||||
"Compute Tests").children if 'x' in a.customName]
|
||||
|
||||
compute_tests = [
|
||||
a for a in comp_dim.children if a.flags & rd.ActionFlags.Dispatch]
|
||||
|
||||
for test, action in enumerate(compute_tests):
|
||||
failed = False
|
||||
self.controller.SetFrameEvent(action.eventId, False)
|
||||
|
||||
pipe = self.controller.GetPipelineState()
|
||||
csrefl = pipe.GetShaderReflection(rd.ShaderStage.Compute)
|
||||
|
||||
dim = csrefl.dispatchThreadsDimension
|
||||
|
||||
rw = pipe.GetReadWriteResources(rd.ShaderStage.Compute)
|
||||
|
||||
if len(rw) != 1:
|
||||
rdtest.log.error("Unexpected number of RW resources")
|
||||
continue
|
||||
|
||||
# each test writes up to 16k data, one vec4 per thread * up to 1024 threads
|
||||
bufdata = self.controller.GetBufferData(
|
||||
rw[0].descriptor.resource, test*16*1024, 16*1024)
|
||||
|
||||
for t in thread_checks:
|
||||
xrange = 1
|
||||
yrange = dim[1]
|
||||
xbase = t
|
||||
ybase = 0
|
||||
|
||||
# vertical orientation
|
||||
if dim[1] > dim[0]:
|
||||
xrange = dim[0]
|
||||
yrange = 1
|
||||
xbase = 0
|
||||
ybase = t
|
||||
|
||||
for x in range(xbase, xbase+xrange):
|
||||
for y in range(ybase, ybase+yrange):
|
||||
z = 0
|
||||
|
||||
if x >= dim[0] or y >= dim[1]:
|
||||
continue
|
||||
|
||||
try:
|
||||
real = struct.unpack_from(
|
||||
"4f", bufdata, 16*y*dim[0] + 16*x)
|
||||
|
||||
trace = self.controller.DebugThread(
|
||||
(0, 0, 0), (x, y, z))
|
||||
|
||||
_, variables = self.process_trace(trace)
|
||||
|
||||
if trace.debugger is None:
|
||||
raise rdtest.TestFailureException(f"Test {test} at {action.eventId} got no debug result at {x},{y},{z}")
|
||||
|
||||
# Find the source variable 'data' at the highest instruction index
|
||||
debugged = None
|
||||
countInst = len(trace.instInfo)
|
||||
for inst in range(countInst):
|
||||
sourceVars = trace.instInfo[countInst-1-inst].sourceVars
|
||||
try:
|
||||
dataVars = [v for v in sourceVars if v.name == 'data']
|
||||
if len(dataVars) == 0:
|
||||
continue
|
||||
debugged = self.evaluate_source_var(dataVars[0], variables)
|
||||
except KeyError as ex:
|
||||
continue
|
||||
except rdtest.TestFailureException as ex:
|
||||
continue
|
||||
break
|
||||
if debugged is None:
|
||||
raise rdtest.TestFailureException(f"Couldn't find source variable {name}")
|
||||
|
||||
debuggedValue = list(debugged.value.f32v[0:4])
|
||||
|
||||
if not rdtest.value_compare(real, debuggedValue, eps=5.0E-06):
|
||||
raise rdtest.TestFailureException(f"EID:{action.eventId} TID:{x},{y},{z} debugged thread value {debuggedValue} does not match output {real}")
|
||||
|
||||
except rdtest.TestFailureException as ex:
|
||||
rdtest.log.error(f"Test {test} failed {ex}")
|
||||
failed = True
|
||||
continue
|
||||
finally:
|
||||
self.controller.FreeTrace(trace)
|
||||
|
||||
overallFailed |= failed
|
||||
if not failed:
|
||||
rdtest.log.success(f"Test {test} successful")
|
||||
else:
|
||||
rdtest.log.error(f"Test {test} failed")
|
||||
|
||||
rdtest.log.end_section(
|
||||
f"Compute tests with {comp_dim.customName} workgroup")
|
||||
overallFailed |= self.check_compute_tests(compute_dims, thread_checks)
|
||||
|
||||
if overallFailed:
|
||||
raise rdtest.TestFailureException("Some tests were not as expected")
|
||||
@@ -0,0 +1,28 @@
|
||||
import rdtest
|
||||
|
||||
# Not a real test, re-used by API-specific tests
|
||||
class Workgroup_Zoo(rdtest.Subgroup_Zoo):
|
||||
internal = True
|
||||
demos_test_name = None
|
||||
|
||||
def check_capture(self):
|
||||
compute_dims = [a for a in self.find_action("Compute Tests").children if 'x' in a.customName]
|
||||
|
||||
# threads to check. largest dimension only (all small dim checked)
|
||||
thread_checks = [
|
||||
# first few
|
||||
0, 1, 2,
|
||||
# near end of 16-subgroup and boundary
|
||||
15, 16, 17,
|
||||
# near end of 32-subgroup and boundary
|
||||
31, 32, 33,
|
||||
# near end of 64-subgroup and boundary
|
||||
63, 64, 65,
|
||||
# near end of 128-subgroup and boundary
|
||||
127, 128, 129,
|
||||
# large values
|
||||
150
|
||||
]
|
||||
|
||||
if self.check_compute_tests(compute_dims, thread_checks):
|
||||
raise rdtest.TestFailureException("Some tests were not as expected")
|
||||
@@ -0,0 +1,5 @@
|
||||
import rdtest
|
||||
|
||||
class D3D12_Workgroup_Zoo(rdtest.Workgroup_Zoo):
|
||||
demos_test_name = 'D3D12_Workgroup_Zoo'
|
||||
internal = False
|
||||
@@ -0,0 +1,5 @@
|
||||
import rdtest
|
||||
|
||||
class VK_Workgroup_Zoo(rdtest.Workgroup_Zoo):
|
||||
demos_test_name = 'VK_Workgroup_Zoo'
|
||||
internal = False
|
||||
Reference in New Issue
Block a user