diff --git a/util/test/demos/CMakeLists.txt b/util/test/demos/CMakeLists.txt
index 25427a8e6..840aeb722 100644
--- a/util/test/demos/CMakeLists.txt
+++ b/util/test/demos/CMakeLists.txt
@@ -165,7 +165,8 @@ set(VULKAN_SRC
         vk/vk_validation_use.cpp
         vk/vk_vertex_attr_zoo.cpp
         vk/vk_video_textures.cpp
-        vk/vk_vs_max_desc_set.cpp)
+        vk/vk_vs_max_desc_set.cpp
+        vk/vk_workgroup_zoo.cpp)
 
 set(OPENGL_SRC
         3rdparty/glad/glad.c
diff --git a/util/test/demos/d3d12/d3d12_subgroup_zoo.cpp b/util/test/demos/d3d12/d3d12_subgroup_zoo.cpp
index b4acf17fe..0e15f12e1 100644
--- a/util/test/demos/d3d12/d3d12_subgroup_zoo.cpp
+++ b/util/test/demos/d3d12/d3d12_subgroup_zoo.cpp
@@ -57,7 +57,7 @@ RWStructuredBuffer<float4> outbuf : register(u0);
 
 static uint3 tid;
 
-void SetOuput(float4 data)
+void SetOutput(float4 data)
 {
   outbuf[root_test * 1024 + tid.y * GROUP_SIZE_X + tid.x] = data;
 }
@@ -221,7 +221,7 @@ void main(uint3 inTid : SV_DispatchThreadID)
 
   uint id = WaveGetLaneIndex();
 
-  SetOuput(id);
+  SetOutput(id);
 
   if(IsTest(0))
   {
@@ -297,7 +297,7 @@ void main(uint3 inTid : SV_DispatchThreadID)
     if (id < 10)
     {
       data.x = WaveActiveSum(id+10);
-      SetOuput(data);
+      SetOutput(data);
       return;
     }
     data.x = WaveActiveSum(id);
@@ -402,7 +402,7 @@ void main(uint3 inTid : SV_DispatchThreadID)
       data.w = float(WaveActiveAllEqual(test4).w);
     }
   }
-  SetOuput(data);
+  SetOutput(data);
 }
 
 )EOSHADER";
@@ -417,7 +417,7 @@ void main(uint3 inTid : SV_DispatchThreadID)
 
   uint id = WaveGetLaneIndex();
 
-  SetOuput(id);
+  SetOutput(id);
 
   if(IsTest(0))
   {
@@ -439,7 +439,7 @@ void main(uint3 inTid : SV_DispatchThreadID)
 		data.z = WaveMultiPrefixBitOr(id, mask);
 		data.w = WaveMultiPrefixBitXor(id, mask);
   }
-  SetOuput(data);
+  SetOutput(data);
 }
 
 )EOSHADER";
@@ -548,12 +548,7 @@ void main(uint3 inTid : SV_DispatchThreadID)
     ID3D12PipelineStatePtr comppipe65[ARRAY_COUNT(compsize)];
 
     std::string defines60;
-    defines60 += fmt::format("#define COMP_TESTS {}\n", numCompTests60);
-    defines60 += "\n";
-
     std::string defines65;
-    defines65 += fmt::format("#define COMP_TESTS {}\n", numCompTests65);
-    defines65 += "\n";
 
     bool supportSM65 = (m_HighestShaderModel >= D3D_SHADER_MODEL_6_5) && m_DXILSupport;
     bool supportSM67 = (m_HighestShaderModel >= D3D_SHADER_MODEL_6_7) && m_DXILSupport;
diff --git a/util/test/demos/d3d12/d3d12_workgroup_zoo.cpp b/util/test/demos/d3d12/d3d12_workgroup_zoo.cpp
new file mode 100644
index 000000000..1a2d3291c
--- /dev/null
+++ b/util/test/demos/d3d12/d3d12_workgroup_zoo.cpp
@@ -0,0 +1,418 @@
+/******************************************************************************
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2019-2025 Baldur Karlsson
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ ******************************************************************************/
+
+#include "3rdparty/fmt/core.h"
+#include "d3d12_test.h"
+
+RD_TEST(D3D12_Workgroup_Zoo, D3D12GraphicsTest)
+{
+  static constexpr const char *Description =
+      "Test of behaviour around workgroup operations in shaders.";
+
+  const std::string common = R"EOSHADER(
+
+cbuffer rootconsts : register(b0)
+{
+  uint root_test;
+}
+
+#define IsTest(x) (root_test == x)
+
+)EOSHADER";
+
+  const std::string compCommon = common + R"EOSHADER(
+
+RWStructuredBuffer<float4> outbuf : register(u0);
+
+static uint3 tid;
+
+groupshared uint4 gsmUint4[1024];
+
+void SetOutput(float4 data)
+{
+  outbuf[root_test * 1024 + tid.y * GROUP_SIZE_X + tid.x] = data;
+}
+
+)EOSHADER";
+
+  const std::string comp = compCommon + R"EOSHADER(
+
+float4 funcD(uint id)
+{
+  return WaveActiveSum(id/2).xxxx;
+}
+
+float4 nestedFunc(uint id)
+{
+  float4 ret = funcD(id/3);
+  ret.w = WaveActiveSum(id);
+  return ret;
+}
+
+float4 funcA(uint id)
+{
+   return nestedFunc(id*2);
+}
+
+float4 funcB(uint id)
+{
+   return nestedFunc(id*4);
+}
+
+float4 funcTest(uint id)
+{
+  if ((id % 2) == 0)
+  {
+    return 0.xxxx;
+  }
+  else
+  {
+    float value = WaveActiveSum(id);
+    if (id < 10)
+    {
+      return value.xxxx;
+    }
+    value += WaveActiveSum(id/2);
+    return value.xxxx;
+  }
+}
+
+[numthreads(GROUP_SIZE_X, GROUP_SIZE_Y, 1)]
+void main(uint3 inTid : SV_DispatchThreadID)
+{
+  tid = inTid;
+  float4 data = 0.0f.xxxx;
+  uint id = WaveGetLaneIndex();
+  gsmUint4[id] = id;
+  SetOutput(data);
+
+  if(IsTest(0))
+  {
+    data.x = id;
+  }
+  else if(IsTest(1))
+  {
+    data.x = WaveActiveSum(id);
+  }
+  else if(IsTest(2))
+  {
+    // Diverged threads which reconverge 
+    if (id < 10)
+    {
+        // active threads 0-9
+        data.x = WaveActiveSum(id);
+
+        if ((id % 2) == 0)
+          data.y = WaveActiveSum(id);
+        else
+          data.y = WaveActiveSum(id);
+
+        data.x += WaveActiveSum(id);
+    }
+    else
+    {
+        // active threads 10...
+        data.x = WaveActiveSum(id);
+    }
+    data.y = WaveActiveSum(id);
+  }
+  else if(IsTest(3))
+  {
+    // Converged threads calling a function 
+    data = funcTest(id);
+    data.y = WaveActiveSum(id);
+  }
+  else if(IsTest(4))
+  {
+    // Converged threads calling a function which has a nested function call in it
+    data = nestedFunc(id);
+    data.y = WaveActiveSum(id);
+  }
+  else if(IsTest(5))
+  {
+    // Diverged threads calling the same function
+    if (id < 10)
+    {
+      data = funcD(id);
+    }
+    else
+    {
+      data = funcD(id);
+    }
+    data.y = WaveActiveSum(id);
+  }
+  else if(IsTest(6))
+  {
+    // Diverged threads calling the same function which has a nested function call in it
+    if (id < 10)
+    {
+      data = funcA(id);
+    }
+    else
+    {
+      data = funcB(id);
+    }
+    data.y = WaveActiveSum(id);
+  }
+  else if(IsTest(7))
+  {
+    // Diverged threads which early exit
+    if (id < 10)
+    {
+      data.x = WaveActiveSum(id+10);
+      SetOutput(data);
+      return;
+    }
+    data.x = WaveActiveSum(id);
+  }
+  else if(IsTest(8))
+  {
+     // Loops with different number of iterations per thread
+    for (uint i = 0; i < id; i++)
+    {
+      data.x += WaveActiveSum(id);
+    }
+  }
+  else if(IsTest(9))
+  {
+    // Query functions : unit tests
+    data.x = float(WaveGetLaneCount());
+    data.y = float(WaveGetLaneIndex());
+    data.z = float(WaveIsFirstLane());
+  }
+  else if(IsTest(10))
+  {
+    // Vote functions : unit tests
+    data.x = float(WaveActiveAnyTrue(id*2 > id+10));
+    data.y = float(WaveActiveAllTrue(id < WaveGetLaneCount()));
+    if (id > 10)
+    {
+      data.z = float(WaveActiveAllTrue(id > 10));
+      uint4 ballot = WaveActiveBallot(id > 20);
+      data.w = countbits(ballot.x) + countbits(ballot.y) + countbits(ballot.z) + countbits(ballot.w);
+    }
+    else
+    {
+      data.z = float(WaveActiveAllTrue(id > 3));
+      uint4 ballot = WaveActiveBallot(id > 4);
+      data.w = countbits(ballot.x) + countbits(ballot.y) + countbits(ballot.z) + countbits(ballot.w);
+    }
+  }
+  else if(IsTest(11))
+  {
+    // Broadcast functions : unit tests
+    if (id >= 2 && id <= 20)
+    {
+      data.x = WaveReadLaneFirst(id);
+      data.y = WaveReadLaneAt(id, 5);
+      data.z = WaveReadLaneAt(id, id);
+      data.w = WaveReadLaneAt(data.x, 2+id%3);
+    }
+  }
+  else if(IsTest(12))
+  {
+    // Scan and Prefix functions : unit tests
+    if (id >= 2 && id <= 20)
+    {
+      data.x = WavePrefixCountBits(id > 4);
+      data.y = WavePrefixCountBits(id > 10);
+      data.z = WavePrefixSum(data.x);
+      data.w = WavePrefixProduct(1 + data.y);
+    }
+    else
+    {
+      data.x = WavePrefixCountBits(id > 23);
+      data.y = WavePrefixCountBits(id < 1);
+      data.z = WavePrefixSum(data.x);
+      data.w = WavePrefixSum(data.y);
+    }
+  }
+  else if(IsTest(13))
+  {
+    // Reduction functions : unit tests
+    if (id >= 2 && id <= 20)
+    {
+      data.x = float(WaveActiveMax(id));
+      data.y = float(WaveActiveMin(id));
+      data.z = float(WaveActiveProduct(id));
+      data.w = float(WaveActiveSum(id));
+    }
+  }
+  else if(IsTest(14))
+  {
+    // Reduction functions : unit tests
+    if (id >= 2 && id <= 20)
+    {
+      data.x = float(WaveActiveCountBits(id > 23));
+      data.y = float(WaveActiveBitAnd(id));
+      data.z = float(WaveActiveBitOr(id));
+      data.w = float(WaveActiveBitXor(id));
+    }
+  }
+  else if(IsTest(15))
+  {
+    // Reduction functions : unit tests
+    if (id > 13)
+    {
+      bool test1 = (id > 15).x;
+      bool2 test2 = bool2(test1, (id < 23));
+      bool3 test3 = bool3(test1, (id < 23), (id >= 25));
+      bool4 test4 = bool4(test1, (id < 23), (id >= 25), (id >= 28));
+
+      data.x = float(WaveActiveAllEqual(test1).x);
+      data.y = float(WaveActiveAllEqual(test2).y);
+      data.z = float(WaveActiveAllEqual(test3).z);
+      data.w = float(WaveActiveAllEqual(test4).w);
+    }
+  }
+
+  SetOutput(data);
+}
+
+)EOSHADER";
+
+  void Prepare(int argc, char **argv)
+  {
+    D3D12GraphicsTest::Prepare(argc, argv);
+
+    if(opts1.WaveLaneCountMax < 16)
+      Avail = "Subgroup size is less than 16";
+
+    bool supportSM60 = (m_HighestShaderModel >= D3D_SHADER_MODEL_6_0) && m_DXILSupport;
+    if(!supportSM60)
+      Avail = "SM 6.0 not supported";
+  }
+
+  int main()
+  {
+    // initialise, create window, create device, etc
+    if(!Init())
+      return 3;
+
+    ID3D12RootSignaturePtr sig = MakeSig({constParam(D3D12_SHADER_VISIBILITY_ALL, 0, 0, 1),
+                                          uavParam(D3D12_SHADER_VISIBILITY_ALL, 0, 0)});
+
+    const uint32_t imgDim = 128;
+
+    ID3D12ResourcePtr fltTex = MakeTexture(DXGI_FORMAT_R32G32B32A32_FLOAT, imgDim, imgDim)
+                                   .RTV()
+                                   .InitialState(D3D12_RESOURCE_STATE_RENDER_TARGET);
+    fltTex->SetName(L"fltTex");
+    D3D12_CPU_DESCRIPTOR_HANDLE fltRTV = MakeRTV(fltTex).CreateCPU(0);
+    D3D12_GPU_DESCRIPTOR_HANDLE fltSRV = MakeSRV(fltTex).CreateGPU(8);
+
+    int32_t numCompTests = 0;
+
+    size_t pos = 0;
+    while(pos != std::string::npos)
+    {
+      pos = comp.find("IsTest(", pos);
+      if(pos == std::string::npos)
+        break;
+      pos += sizeof("IsTest(") - 1;
+      numCompTests = std::max(numCompTests, atoi(comp.c_str() + pos) + 1);
+    }
+
+    struct
+    {
+      int x, y;
+    } compsize[] = {
+        {70, 1},
+    };
+    std::string comppipe_name[ARRAY_COUNT(compsize)];
+    ID3D12PipelineStatePtr comppipe[ARRAY_COUNT(compsize)];
+
+    std::string defines;
+
+    for(int i = 0; i < ARRAY_COUNT(comppipe); i++)
+    {
+      std::string sizedefine;
+      sizedefine = fmt::format("#define GROUP_SIZE_X {}\n#define GROUP_SIZE_Y {}\n", compsize[i].x,
+                               compsize[i].y);
+      comppipe_name[i] = fmt::format("{}x{}", compsize[i].x, compsize[i].y);
+
+      comppipe[i] =
+          MakePSO().RootSig(sig).CS(Compile(defines + sizedefine + comp, "main", "cs_6_0"));
+      comppipe[i]->SetName(UTF82Wide(comppipe_name[i]).c_str());
+    }
+
+    ID3D12ResourcePtr bufOut = MakeBuffer().Size(sizeof(Vec4f) * 1024 * numCompTests).UAV();
+    D3D12ViewCreator uavView =
+        MakeUAV(bufOut).Format(DXGI_FORMAT_R32_UINT).NumElements(4 * 1024 * numCompTests);
+    D3D12_CPU_DESCRIPTOR_HANDLE uavcpu = uavView.CreateClearCPU(10);
+    D3D12_GPU_DESCRIPTOR_HANDLE uavgpu = uavView.CreateGPU(10);
+
+    bufOut->SetName(L"bufOut");
+
+    while(Running())
+    {
+      ID3D12GraphicsCommandListPtr cmd = GetCommandBuffer();
+
+      Reset(cmd);
+
+      cmd->SetDescriptorHeaps(1, &m_CBVUAVSRV.GetInterfacePtr());
+
+      ID3D12ResourcePtr bb = StartUsingBackbuffer(cmd, D3D12_RESOURCE_STATE_RENDER_TARGET);
+
+      ClearRenderTargetView(cmd, BBRTV, {0.2f, 0.2f, 0.2f, 1.0f});
+
+      pushMarker(cmd, "Compute Tests");
+
+      for(size_t p = 0; p < ARRAY_COUNT(comppipe); p++)
+      {
+        ResourceBarrier(cmd);
+
+        UINT zero[4] = {};
+        cmd->ClearUnorderedAccessViewUint(uavgpu, uavcpu, bufOut, zero, 0, NULL);
+
+        ResourceBarrier(cmd);
+        pushMarker(cmd, comppipe_name[p]);
+
+        cmd->SetPipelineState(comppipe[p]);
+        cmd->SetComputeRootSignature(sig);
+        cmd->SetComputeRootUnorderedAccessView(1, bufOut->GetGPUVirtualAddress());
+
+        for(int i = 0; i < numCompTests; i++)
+        {
+          cmd->SetComputeRoot32BitConstant(0, i, 0);
+          cmd->Dispatch(1, 1, 1);
+        }
+
+        popMarker(cmd);
+      }
+
+      popMarker(cmd);
+
+      FinishUsingBackbuffer(cmd, D3D12_RESOURCE_STATE_RENDER_TARGET);
+
+      cmd->Close();
+
+      SubmitAndPresent({cmd});
+    }
+
+    return 0;
+  }
+};
+
+REGISTER_TEST();
diff --git a/util/test/demos/demos.vcxproj b/util/test/demos/demos.vcxproj
index d4f024d66..29e9cba16 100644
--- a/util/test/demos/demos.vcxproj
+++ b/util/test/demos/demos.vcxproj
@@ -232,6 +232,7 @@
     <ClCompile Include="d3d12\d3d12_vertex_uav.cpp" />
     <ClCompile Include="d3d12\d3d12_video_textures.cpp" />
     <ClCompile Include="d3d12\d3d12_vrs.cpp" />
+    <ClCompile Include="d3d12\d3d12_workgroup_zoo.cpp" />
     <ClCompile Include="d3d12\d3d12_write_subresource.cpp" />
     <ClCompile Include="dx\d3d_helpers.cpp" />
     <ClCompile Include="3rdparty\glad\glad.c" />
@@ -374,6 +375,7 @@
     <ClCompile Include="vk\vk_simple_triangle.cpp" />
     <ClCompile Include="vk\vk_test.cpp" />
     <ClCompile Include="3rdparty\volk\volk.c" />
+    <ClCompile Include="vk\vk_workgroup_zoo.cpp" />
     <ClCompile Include="win32\win32_platform.cpp" />
     <ClCompile Include="win32\win32_window.cpp" />
   </ItemGroup>
diff --git a/util/test/demos/demos.vcxproj.filters b/util/test/demos/demos.vcxproj.filters
index 241b29aae..69a29736e 100644
--- a/util/test/demos/demos.vcxproj.filters
+++ b/util/test/demos/demos.vcxproj.filters
@@ -718,6 +718,12 @@
     <ClCompile Include="d3d12\d3d12_subgroup_zoo.cpp">
       <Filter>D3D12\demos</Filter>
     </ClCompile>
+    <ClCompile Include="vk\vk_workgroup_zoo.cpp">
+      <Filter>Vulkan\demos</Filter>
+    </ClCompile>
+    <ClCompile Include="d3d12\d3d12_workgroup_zoo.cpp">
+      <Filter>D3D12\demos</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <Filter Include="D3D11">
diff --git a/util/test/demos/vk/vk_subgroup_zoo.cpp b/util/test/demos/vk/vk_subgroup_zoo.cpp
index b48064ee4..abe1c2914 100644
--- a/util/test/demos/vk/vk_subgroup_zoo.cpp
+++ b/util/test/demos/vk/vk_subgroup_zoo.cpp
@@ -191,7 +191,7 @@ vec4 funcTest(uint id)
   }
 }
 
-void SetOuput(vec4 data)
+void SetOutput(vec4 data)
 {
   outbuf.data[push.test].vals[gl_LocalInvocationID.y * GROUP_SIZE_X + gl_LocalInvocationID.x] = data;
 }
@@ -199,7 +199,7 @@ void main()
 {
   vec4 data = vec4(0);
   uint id = gl_SubgroupInvocationID;
-  SetOuput(data);
+  SetOutput(data);
 
   if(IsTest(0))
   {
@@ -275,7 +275,7 @@ void main()
     if (id < 10)
     {
       data.x = subgroupAdd(id+10);
-      SetOuput(data);
+      SetOutput(data);
       return;
     }
     data.x = subgroupAdd(id);
@@ -380,7 +380,7 @@ void main()
       data.w = float(subgroupAllEqual(id >= 28));
     }
   }
-  SetOuput(data);
+  SetOutput(data);
 }
 
 )EOSHADER";
diff --git a/util/test/demos/vk/vk_workgroup_zoo.cpp b/util/test/demos/vk/vk_workgroup_zoo.cpp
new file mode 100644
index 000000000..b3855e2fe
--- /dev/null
+++ b/util/test/demos/vk/vk_workgroup_zoo.cpp
@@ -0,0 +1,489 @@
+/******************************************************************************
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2019-2025 Baldur Karlsson
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ ******************************************************************************/
+
+#include "3rdparty/fmt/core.h"
+#include "vk_test.h"
+
+RD_TEST(VK_Workgroup_Zoo, VulkanGraphicsTest)
+{
+  static constexpr const char *Description =
+      "Test of behaviour around workgroup operations in shaders.";
+
+  const std::string common = R"EOSHADER(
+
+#version 460 core
+#extension GL_KHR_shader_subgroup_basic : enable
+#extension GL_KHR_shader_subgroup_ballot : enable
+#extension GL_KHR_shader_subgroup_vote : enable
+#extension GL_KHR_shader_subgroup_arithmetic : enable
+
+#if FEAT_SHUFFLE
+#extension GL_KHR_shader_subgroup_shuffle : enable
+#endif
+
+#if FEAT_SHUFFLE_RELATIVE
+#extension GL_KHR_shader_subgroup_shuffle_relative : enable
+#endif
+
+#if FEAT_CLUSTERED
+#extension GL_KHR_shader_subgroup_clustered : enable
+#endif
+
+#if FEAT_QUAD
+#extension GL_KHR_shader_subgroup_quad : enable
+#endif
+
+#if FEAT_ROTATE || FEAT_ROTATE_CLUSTERED
+#extension GL_KHR_shader_subgroup_rotate : enable
+#endif
+
+layout(push_constant) uniform PushData
+{
+  uint test;
+} push;
+
+#define IsTest(x) (push.test == x)
+
+)EOSHADER";
+
+  const std::string comp = common + R"EOSHADER(
+
+shared uvec4 gsmUint4[COMP_TESTS];
+
+struct Output
+{
+  vec4 vals[1024];
+};
+
+layout(binding = 0, std430) buffer outbuftype {
+  Output data[COMP_TESTS];
+} outbuf;
+
+layout(local_size_x = GROUP_SIZE_X, local_size_y = GROUP_SIZE_Y, local_size_z = 1) in;
+
+vec4 funcD(uint id)
+{
+  return vec4(subgroupAdd(id/2));
+}
+
+vec4 nestedFunc(uint id)
+{
+  vec4 ret = funcD(id/3);
+  ret.w = subgroupAdd(id);
+  return ret;
+}
+
+vec4 funcA(uint id)
+{
+   return nestedFunc(id*2);
+}
+
+vec4 funcB(uint id)
+{
+   return nestedFunc(id*4);
+}
+
+vec4 funcTest(uint id)
+{
+  if ((id % 2) == 0)
+  {
+    return vec4(0);
+  }
+  else
+  {
+    float value = subgroupAdd(id);
+    if (id < 10)
+    {
+      return vec4(value);
+    }
+    value += subgroupAdd(id/2);
+    return vec4(value);
+  }
+}
+
+void SetOutput(vec4 data)
+{
+  outbuf.data[push.test].vals[gl_LocalInvocationID.y * GROUP_SIZE_X + gl_LocalInvocationID.x] = data;
+}
+void main()
+{
+  vec4 data = vec4(0);
+  uint id = gl_SubgroupInvocationID;
+  gsmUint4[id] = id;
+  SetOutput(data);
+
+  if(IsTest(0))
+  {
+    data.x = id;
+  }
+  else if(IsTest(1))
+  {
+    data.x = subgroupAdd(id);
+  }
+  else if(IsTest(2))
+  {
+    // Diverged threads which reconverge 
+    if (id < 10)
+    {
+        // active threads 0-9
+        data.x = subgroupAdd(id);
+
+        if ((id % 2) == 0)
+          data.y = subgroupAdd(id);
+        else
+          data.y = subgroupAdd(id);
+
+        data.x += subgroupAdd(id);
+    }
+    else
+    {
+        // active threads 10...
+        data.x = subgroupAdd(id);
+    }
+    data.y = subgroupAdd(id);
+  }
+  else if(IsTest(3))
+  {
+    // Converged threads calling a function 
+    data = funcTest(id);
+    data.y = subgroupAdd(id);
+  }
+  else if(IsTest(4))
+  {
+    // Converged threads calling a function which has a nested function call in it
+    data = nestedFunc(id);
+    data.y = subgroupAdd(id);
+  }
+  else if(IsTest(5))
+  {
+    // Diverged threads calling the same function
+    if (id < 10)
+    {
+      data = funcD(id);
+    }
+    else
+    {
+      data = funcD(id);
+    }
+    data.y = subgroupAdd(id);
+  }
+  else if(IsTest(6))
+  {
+    // Diverged threads calling the same function which has a nested function call in it
+    if (id < 10)
+    {
+      data = funcA(id);
+    }
+    else
+    {
+      data = funcB(id);
+    }
+    data.y = subgroupAdd(id);
+  }
+  else if(IsTest(7))
+  {
+    // Diverged threads which early exit
+    if (id < 10)
+    {
+      data.x = subgroupAdd(id+10);
+      SetOutput(data);
+      return;
+    }
+    data.x = subgroupAdd(id);
+  }
+  else if(IsTest(8))
+  {
+     // Loops with different number of iterations per thread
+    for (uint i = 0; i < id; i++)
+    {
+      data.x += subgroupAdd(id);
+    }
+  }
+  else if(IsTest(9))
+  {
+    // Query functions : unit tests
+    data.x = float(gl_SubgroupSize);
+    data.y = float(gl_SubgroupInvocationID);
+    data.z = float(subgroupElect());
+  }
+  else if(IsTest(10))
+  {
+    // Vote functions : unit tests
+    data.x = float(subgroupAny(id*2 > id+10));
+    data.y = float(subgroupAll(id < gl_SubgroupSize));
+    if (id > 10)
+    {
+      data.z = float(subgroupAll(id > 10));
+      uvec4 ballot = subgroupBallot(id > 20);
+      data.w = bitCount(ballot.x) + bitCount(ballot.y) + bitCount(ballot.z) + bitCount(ballot.w);
+    }
+    else
+    {
+      data.z = float(subgroupAll(id > 3));
+      uvec4 ballot = subgroupBallot(id > 4);
+      data.w = bitCount(ballot.x) + bitCount(ballot.y) + bitCount(ballot.z) + bitCount(ballot.w);
+    }
+  }
+  else if(IsTest(11))
+  {
+    // Broadcast functions : unit tests
+    if (id >= 2 && id <= 20)
+    {
+      data.x = subgroupBroadcastFirst(id);
+      data.y = subgroupBroadcast(id, 5);
+      data.z = subgroupShuffle(id, id);
+      data.w = subgroupShuffle(data.x, 2+id%3);
+    }
+  }
+  else if(IsTest(12))
+  {
+    // Scan and Prefix functions : unit tests
+    if (id >= 2 && id <= 20)
+    {
+      uvec4 bits = subgroupBallot(id > 4);
+      data.x = subgroupBallotExclusiveBitCount(bits);
+      bits = subgroupBallot(id > 10);
+      data.y = subgroupBallotExclusiveBitCount(bits);
+      data.z = subgroupExclusiveAdd(data.x);
+      data.w = subgroupExclusiveMul(1 + data.y);
+    }
+    else
+    {
+      uvec4 bits = subgroupBallot(id > 23);
+      data.x = subgroupBallotExclusiveBitCount(bits);
+      bits = subgroupBallot(id < 1);
+      data.y = subgroupBallotExclusiveBitCount(bits);
+      data.z = subgroupExclusiveAdd(data.x);
+      data.w = subgroupExclusiveAdd(data.y);
+    }
+  }
+  else if(IsTest(13))
+  {
+    // Reduction functions : unit tests
+    if (id >= 2 && id <= 20)
+    {
+      data.x = float(subgroupMax(id));
+      data.y = float(subgroupMin(id));
+      data.z = float(subgroupMul(id));
+      data.w = float(subgroupAdd(id));
+    }
+  }
+  else if(IsTest(14))
+  {
+    // Reduction functions : unit tests
+    if (id >= 2 && id <= 20)
+    {
+      uvec4 bits = subgroupBallot(id > 23);
+      data.x = float(subgroupBallotBitCount(bits));
+      data.y = float(subgroupAnd(id));
+      data.z = float(subgroupOr(id));
+      data.w = float(subgroupXor(id));
+    }
+  }
+  else if(IsTest(15))
+  {
+    // Reduction functions : unit tests
+    if (id > 13)
+    {
+      data.x = float(subgroupAllEqual(id > 15));
+      data.y = float(subgroupAllEqual(id < 23));
+      data.z = float(subgroupAllEqual(id >= 25));
+      data.w = float(subgroupAllEqual(id >= 28));
+    }
+  }
+  SetOutput(data);
+}
+
+)EOSHADER";
+
+  VkSubgroupFeatureFlags ops = 0;
+
+  void Prepare(int argc, char **argv)
+  {
+    VulkanGraphicsTest::Prepare(argc, argv);
+
+    if(!Avail.empty())
+      return;
+
+    if(devVersion < VK_API_VERSION_1_1)
+      Avail = "Vulkan device version isn't 1.1";
+
+    static VkPhysicalDeviceSubgroupProperties subProps = {
+        VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES,
+    };
+
+    getPhysProperties2(&subProps);
+
+    if(subProps.subgroupSize < 16)
+      Avail = "Subgroup size is less than 16";
+
+    // require at least a few ops so we only have a few conditional compilations
+    const VkSubgroupFeatureFlags requiredOps =
+        VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT |
+        VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT;
+
+    ops = subProps.supportedOperations;
+
+    if((subProps.supportedOperations & requiredOps) != requiredOps)
+      Avail = "Missing ops support";
+
+    if((subProps.supportedStages & VK_SHADER_STAGE_COMPUTE_BIT) == 0)
+      Avail = "Missing compute subgroup support";
+  }
+
+  int main()
+  {
+    // initialise, create window, create context, etc
+    if(!Init())
+      return 3;
+
+    VkDescriptorSetLayout setlayout = createDescriptorSetLayout(vkh::DescriptorSetLayoutCreateInfo({
+        {0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT},
+    }));
+
+    VkPipelineLayout layout = createPipelineLayout(vkh::PipelineLayoutCreateInfo(
+        {setlayout}, {vkh::PushConstantRange(VK_SHADER_STAGE_ALL, 0, 4)}));
+
+    std::map<std::string, std::string> macros;
+
+    int numCompTests = 0;
+
+    size_t pos = 0;
+    while(pos != std::string::npos)
+    {
+      pos = comp.find("IsTest(", pos);
+      if(pos == std::string::npos)
+        break;
+      pos += sizeof("IsTest(") - 1;
+      numCompTests = std::max(numCompTests, atoi(comp.c_str() + pos) + 1);
+    }
+
+    if(ops & VK_SUBGROUP_FEATURE_SHUFFLE_BIT)
+      macros["FEAT_SHUFFLE"] = "1";
+    else
+      macros["FEAT_SHUFFLE"] = "0";
+    if(ops & VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT)
+      macros["FEAT_SHUFFLE_RELATIVE"] = "1";
+    else
+      macros["FEAT_SHUFFLE_RELATIVE"] = "0";
+    if(ops & VK_SUBGROUP_FEATURE_CLUSTERED_BIT)
+      macros["FEAT_CLUSTERED"] = "1";
+    else
+      macros["FEAT_CLUSTERED"] = "0";
+    if(ops & VK_SUBGROUP_FEATURE_QUAD_BIT)
+      macros["FEAT_QUAD"] = "1";
+    else
+      macros["FEAT_QUAD"] = "0";
+    if(ops & VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR)
+      macros["FEAT_ROTATE"] = "1";
+    else
+      macros["FEAT_ROTATE"] = "0";
+    if(ops & VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR)
+      macros["FEAT_ROTATE_CLUSTERED"] = "1";
+    else
+      macros["FEAT_ROTATE_CLUSTERED"] = "0";
+
+    std::string comppipe_name[1];
+    VkPipeline comppipe[1];
+    uint32_t countPipes = 0;
+
+    macros["COMP_TESTS"] = fmt::format("{}", numCompTests);
+
+    macros["GROUP_SIZE_X"] = "70";
+    macros["GROUP_SIZE_Y"] = "1";
+    comppipe_name[countPipes] = "70x1";
+    comppipe[countPipes] = createComputePipeline(vkh::ComputePipelineCreateInfo(
+        layout, CompileShaderModule(comp, ShaderLang::glsl, ShaderStage::comp, "main", macros,
+                                    SPIRVTarget::vulkan11)));
+    ++countPipes;
+
+    AllocatedBuffer bufout(
+        this,
+        vkh::BufferCreateInfo(sizeof(Vec4f) * 1024 * numCompTests,
+                              VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT),
+        VmaAllocationCreateInfo({0, VMA_MEMORY_USAGE_CPU_TO_GPU}));
+
+    setName(bufout.buffer, "bufout");
+
+    VkDescriptorSet set = allocateDescriptorSet(setlayout);
+
+    vkh::updateDescriptorSets(
+        device, {vkh::WriteDescriptorSet(set, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+                                         {vkh::DescriptorBufferInfo(bufout.buffer)})});
+
+    while(Running())
+    {
+      VkCommandBuffer cmd = GetCommandBuffer();
+
+      vkBeginCommandBuffer(cmd, vkh::CommandBufferBeginInfo());
+
+      VkImage swapimg =
+          StartUsingBackbuffer(cmd, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL);
+
+      vkh::cmdClearImage(cmd, swapimg, vkh::ClearColorValue(0.2f, 0.2f, 0.2f, 1.0f));
+
+      pushMarker(cmd, "Compute Tests");
+
+      for(size_t p = 0; p < countPipes; p++)
+      {
+        vkh::cmdPipelineBarrier(
+            cmd, {},
+            {vkh::BufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT,
+                                      bufout.buffer, 0, sizeof(Vec4f) * 1024 * numCompTests)});
+
+        vkCmdFillBuffer(cmd, bufout.buffer, 0, sizeof(Vec4f) * 1024 * numCompTests, 0);
+
+        vkh::cmdPipelineBarrier(
+            cmd, {},
+            {vkh::BufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_WRITE_BIT,
+                                      bufout.buffer, 0, sizeof(Vec4f) * 1024 * numCompTests)});
+
+        pushMarker(cmd, comppipe_name[p]);
+
+        vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, comppipe[p]);
+        vkh::cmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, {set}, {});
+
+        for(int i = 0; i < numCompTests; i++)
+        {
+          vkh::cmdPushConstants(cmd, layout, i);
+          vkCmdDispatch(cmd, 1, 1, 1);
+        }
+
+        popMarker(cmd);
+      }
+
+      popMarker(cmd);
+
+      FinishUsingBackbuffer(cmd, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL);
+
+      vkEndCommandBuffer(cmd);
+
+      SubmitAndPresent({cmd});
+    }
+
+    return 0;
+  }
+};
+
+REGISTER_TEST();
diff --git a/util/test/rdtest/__init__.py b/util/test/rdtest/__init__.py
index b80a84068..54f816fb3 100644
--- a/util/test/rdtest/__init__.py
+++ b/util/test/rdtest/__init__.py
@@ -10,3 +10,4 @@ from .shared.Overlay_Test import *
 from .shared.Buffer_Truncation import *
 from .shared.Discard_Zoo import *
 from .shared.Subgroup_Zoo import *
+from .shared.Workgroup_Zoo import *
diff --git a/util/test/rdtest/shared/Subgroup_Zoo.py b/util/test/rdtest/shared/Subgroup_Zoo.py
index 2a6d20c7c..a912fa699 100644
--- a/util/test/rdtest/shared/Subgroup_Zoo.py
+++ b/util/test/rdtest/shared/Subgroup_Zoo.py
@@ -13,11 +13,116 @@ class Subgroup_Zoo(rdtest.TestCase):
             return True, ''
         return False, 'Disabled test'
 
+    def check_compute_thread_result(self, test, action, x, y, z, dim, bufdata):
+        try:
+            real = struct.unpack_from(
+                "4f", bufdata, 16*y*dim[0] + 16*x)
+
+            trace = self.controller.DebugThread(
+                (0, 0, 0), (x, y, z))
+
+            _, variables = self.process_trace(trace)
+
+            if trace.debugger is None:
+                raise rdtest.TestFailureException(f"Test {test} at {action.eventId} got no debug result at {x},{y},{z}")
+
+            # Find the source variable 'data' at the highest instruction index
+            name = 'data'
+            debugged = None
+            countInst = len(trace.instInfo)
+            for inst in range(countInst):
+                sourceVars = trace.instInfo[countInst-1-inst].sourceVars
+                try:
+                    dataVars = [v for v in sourceVars if v.name == name]
+                    if len(dataVars) == 0:
+                        continue
+                    debugged = self.evaluate_source_var(dataVars[0], variables)
+                except KeyError as ex:
+                    continue
+                except rdtest.TestFailureException as ex:
+                    continue
+                break
+            if debugged is None:
+                raise rdtest.TestFailureException(f"Couldn't find source variable {name} at {x},{y},{z}")
+
+            debuggedValue = list(debugged.value.f32v[0:4])
+
+            if not rdtest.value_compare(real, debuggedValue, eps=5.0E-06):
+                raise rdtest.TestFailureException(f"EID:{action.eventId} TID:{x},{y},{z} debugged thread value {debuggedValue} does not match output {real}")
+
+        except rdtest.TestFailureException as ex:
+            rdtest.log.error(f"Test {test} failed {ex}")
+            return False
+        finally:
+            self.controller.FreeTrace(trace)
+
+        return True
+
+    def check_compute_tests(self, compute_dims, thread_checks):
+        overallFailed = False
+        for comp_dim in compute_dims:
+            rdtest.log.begin_section(
+                f"Compute tests with {comp_dim.customName} workgroup")
+
+            compute_tests = [
+                a for a in comp_dim.children if a.flags & rd.ActionFlags.Dispatch]
+
+            for test, action in enumerate(compute_tests):
+                failed = False
+                self.controller.SetFrameEvent(action.eventId, False)
+
+                pipe = self.controller.GetPipelineState()
+                csrefl = pipe.GetShaderReflection(rd.ShaderStage.Compute)
+
+                dim = csrefl.dispatchThreadsDimension
+
+                rw = pipe.GetReadWriteResources(rd.ShaderStage.Compute)
+
+                if len(rw) != 1:
+                    rdtest.log.error("Unexpected number of RW resources")
+                    continue
+
+                # each test writes up to 16k data, one vec4 per thread * up to 1024 threads
+                bufdata = self.controller.GetBufferData(
+                    rw[0].descriptor.resource, test*16*1024, 16*1024)
+
+                for t in thread_checks:
+                    xrange = 1
+                    yrange = dim[1]
+                    xbase = t
+                    ybase = 0
+
+                    # vertical orientation
+                    if dim[1] > dim[0]:
+                        xrange = dim[0]
+                        yrange = 1
+                        xbase = 0
+                        ybase = t
+
+                    for x in range(xbase, xbase+xrange):
+                        for y in range(ybase, ybase+yrange):
+                            z = 0
+
+                            if x >= dim[0] or y >= dim[1]:
+                                continue
+
+                            if not self.check_compute_thread_result(test, action, x, y, z, dim, bufdata):
+                                failed = True
+
+                overallFailed |= failed
+                if not failed:
+                    rdtest.log.success(f"Test {test} successful")
+                else:
+                    rdtest.log.error(f"Test {test} failed")
+
+            rdtest.log.end_section(
+                f"Compute tests with {comp_dim.customName} workgroup")
+
+        return overallFailed
+
     def check_capture(self):
         graphics_tests = [a for a in self.find_action(
             "Graphics Tests").children if a.flags & rd.ActionFlags.Drawcall]
-        compute_dims = [a for a in self.find_action(
-            "Compute Tests").children if 'x' in a.customName]
 
         rdtest.log.begin_section("Graphics tests")
 
@@ -34,19 +139,6 @@ class Subgroup_Zoo(rdtest.TestCase):
             # middle quad on other triangle
             (56, 64), (57, 64), (56, 65), (57, 65),
         ]
-        # threads to check. largest dimension only (all small dim checked)
-        thread_checks = [
-            # first few
-            0, 1, 2,
-            # near end of 32-subgroup and boundary
-            30, 31, 32,
-            # near end of 64-subgroup and boundary
-            62, 63, 64,
-            # near end of 64-subgroup and boundary
-            62, 63, 64,
-            # large values spaced out with one near the end of our unaligned size
-            100, 110, 120, 140, 149, 150, 160, 200, 250,
-        ]
         clear_col = (123456.0, 789.0, 101112.0, 0.0)
 
         overallFailed = False
@@ -163,102 +255,21 @@ class Subgroup_Zoo(rdtest.TestCase):
 
         rdtest.log.end_section("Graphics tests")
 
-        for comp_dim in compute_dims:
-            rdtest.log.begin_section(
-                f"Compute tests with {comp_dim.customName} workgroup")
+        # threads to check. largest dimension only (all small dim checked)
+        thread_checks = [
+            # first few
+            0, 1, 2,
+            # near end of 32-subgroup and boundary
+            30, 31, 32, 33, 34,
+            # near end of 64-subgroup and boundary
+            62, 63, 64, 64, 65,
+            # large values spaced out with one near the end of our unaligned size
+            100, 110, 120, 140, 149, 150, 160, 200, 250,
+        ]
+        compute_dims = [a for a in self.find_action(
+            "Compute Tests").children if 'x' in a.customName]
 
-            compute_tests = [
-                a for a in comp_dim.children if a.flags & rd.ActionFlags.Dispatch]
-
-            for test, action in enumerate(compute_tests):
-                failed = False
-                self.controller.SetFrameEvent(action.eventId, False)
-
-                pipe = self.controller.GetPipelineState()
-                csrefl = pipe.GetShaderReflection(rd.ShaderStage.Compute)
-
-                dim = csrefl.dispatchThreadsDimension
-
-                rw = pipe.GetReadWriteResources(rd.ShaderStage.Compute)
-
-                if len(rw) != 1:
-                    rdtest.log.error("Unexpected number of RW resources")
-                    continue
-
-                # each test writes up to 16k data, one vec4 per thread * up to 1024 threads
-                bufdata = self.controller.GetBufferData(
-                    rw[0].descriptor.resource, test*16*1024, 16*1024)
-
-                for t in thread_checks:
-                    xrange = 1
-                    yrange = dim[1]
-                    xbase = t
-                    ybase = 0
-
-                    # vertical orientation
-                    if dim[1] > dim[0]:
-                        xrange = dim[0]
-                        yrange = 1
-                        xbase = 0
-                        ybase = t
-
-                    for x in range(xbase, xbase+xrange):
-                        for y in range(ybase, ybase+yrange):
-                            z = 0
-
-                            if x >= dim[0] or y >= dim[1]:
-                                continue
-
-                            try:
-                                real = struct.unpack_from(
-                                    "4f", bufdata, 16*y*dim[0] + 16*x)
-
-                                trace = self.controller.DebugThread(
-                                    (0, 0, 0), (x, y, z))
-
-                                _, variables = self.process_trace(trace)
-
-                                if trace.debugger is None:
-                                    raise rdtest.TestFailureException(f"Test {test} at {action.eventId} got no debug result at {x},{y},{z}")
-
-                                # Find the source variable 'data' at the highest instruction index
-                                debugged = None
-                                countInst = len(trace.instInfo)
-                                for inst in range(countInst):
-                                    sourceVars = trace.instInfo[countInst-1-inst].sourceVars
-                                    try:
-                                        dataVars = [v for v in sourceVars if v.name == 'data']
-                                        if len(dataVars) == 0:
-                                            continue
-                                        debugged = self.evaluate_source_var(dataVars[0], variables)
-                                    except KeyError as ex:
-                                        continue
-                                    except rdtest.TestFailureException as ex:
-                                        continue
-                                    break
-                                if debugged is None:
-                                    raise rdtest.TestFailureException(f"Couldn't find source variable {name}")
-
-                                debuggedValue = list(debugged.value.f32v[0:4])
-
-                                if not rdtest.value_compare(real, debuggedValue, eps=5.0E-06):
-                                    raise rdtest.TestFailureException(f"EID:{action.eventId} TID:{x},{y},{z} debugged thread value {debuggedValue} does not match output {real}")
-
-                            except rdtest.TestFailureException as ex:
-                                rdtest.log.error(f"Test {test} failed {ex}")
-                                failed = True
-                                continue
-                            finally:
-                                self.controller.FreeTrace(trace)
-
-                overallFailed |= failed
-                if not failed:
-                    rdtest.log.success(f"Test {test} successful")
-                else:
-                    rdtest.log.error(f"Test {test} failed")
-
-            rdtest.log.end_section(
-                f"Compute tests with {comp_dim.customName} workgroup")
+        overallFailed |= self.check_compute_tests(compute_dims, thread_checks)
 
         if overallFailed:
             raise rdtest.TestFailureException("Some tests were not as expected")
\ No newline at end of file
diff --git a/util/test/rdtest/shared/Workgroup_Zoo.py b/util/test/rdtest/shared/Workgroup_Zoo.py
new file mode 100644
index 000000000..c6d33331a
--- /dev/null
+++ b/util/test/rdtest/shared/Workgroup_Zoo.py
@@ -0,0 +1,28 @@
+import rdtest
+
+# Not a real test, re-used by API-specific tests
+class Workgroup_Zoo(rdtest.Subgroup_Zoo):
+    internal = True
+    demos_test_name = None
+
+    def check_capture(self):
+        compute_dims = [a for a in self.find_action("Compute Tests").children if 'x' in a.customName]
+
+        # threads to check. largest dimension only (all small dim checked)
+        thread_checks = [
+            # first few
+            0, 1, 2,
+            # near end of 16-subgroup and boundary
+            15, 16, 17,
+            # near end of 32-subgroup and boundary
+            31, 32, 33,
+            # near end of 64-subgroup and boundary
+            63, 64, 65,
+            # near end of 128-subgroup and boundary
+            127, 128, 129,
+            # large values 
+            150
+        ]
+
+        if self.check_compute_tests(compute_dims, thread_checks):
+            raise rdtest.TestFailureException("Some tests were not as expected")
\ No newline at end of file
diff --git a/util/test/tests/D3D12/D3D12_Workgroup_Zoo.py b/util/test/tests/D3D12/D3D12_Workgroup_Zoo.py
new file mode 100644
index 000000000..b049cbfb8
--- /dev/null
+++ b/util/test/tests/D3D12/D3D12_Workgroup_Zoo.py
@@ -0,0 +1,5 @@
+import rdtest
+
+class D3D12_Workgroup_Zoo(rdtest.Workgroup_Zoo):
+    demos_test_name = 'D3D12_Workgroup_Zoo'
+    internal = False
diff --git a/util/test/tests/Vulkan/VK_Workgroup_Zoo.py b/util/test/tests/Vulkan/VK_Workgroup_Zoo.py
new file mode 100644
index 000000000..7b21da794
--- /dev/null
+++ b/util/test/tests/Vulkan/VK_Workgroup_Zoo.py
@@ -0,0 +1,5 @@
+import rdtest
+
+class VK_Workgroup_Zoo(rdtest.Workgroup_Zoo):
+    demos_test_name = 'VK_Workgroup_Zoo'
+    internal = False