Add demos that utilise groupshared memory in a simple way

This commit is contained in:
baldurk
2025-02-17 13:24:13 +00:00
parent 26e575f4e0
commit f69ac73f1f
8 changed files with 458 additions and 0 deletions
+1
View File
@@ -119,6 +119,7 @@ set(VULKAN_SRC
vk/vk_ext_buffer_address.cpp
vk/vk_extended_dyn_state.cpp
vk/vk_graphics_pipeline.cpp
vk/vk_groupshared.cpp
vk/vk_image_layouts.cpp
vk/vk_imageless_framebuffer.cpp
vk/vk_indirect.cpp
+112
View File
@@ -0,0 +1,112 @@
/******************************************************************************
* The MIT License (MIT)
*
* Copyright (c) 2019-2024 Baldur Karlsson
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
******************************************************************************/
#include "d3d11_test.h"
RD_TEST(D3D11_Groupshared, D3D11GraphicsTest)
{
static constexpr const char *Description = "Test of compute shader that uses groupshared memory.";
std::string comp = R"EOSHADER(
RWStructuredBuffer<float> indata : register(u0);
RWStructuredBuffer<float4> outdata : register(u1);
groupshared float tmp[64];
[numthreads(64,1,1)]
void main(uint3 tid : SV_GroupThreadID)
{
if(tid.x == 0)
{
for(int i=0; i < 64; i++) tmp[i] = 1.234f;
}
GroupMemoryBarrierWithGroupSync();
float4 outval;
// first write, should be the init value for all threads
outval.x = tmp[tid.x];
tmp[tid.x] = indata[tid.x];
// second write, should be the read value because we're reading our own value
outval.y = tmp[tid.x];
GroupMemoryBarrierWithGroupSync();
// third write, should be our pairwise neighbour's value
outval.z = tmp[tid.x ^ 1];
// do calculation with our neighbour
tmp[tid.x] = tmp[tid.x] * tmp[tid.x ^ 1];
GroupMemoryBarrierWithGroupSync();
// fourth write, our neighbour should be identical to our value
outval.w = tmp[tid.x] == tmp[tid.x ^ 1] ? 9.99f : -9.99f;
outdata[tid.x] = outval;
}
)EOSHADER";
int main()
{
// initialise, create window, create device, etc
if(!Init())
return 3;
float values[64];
for(int i = 0; i < 64; i++)
values[i] = RANDF(1.0f, 100.0f);
ID3D11BufferPtr inBuf = MakeBuffer().Data(values).UAV().Structured(4);
ID3D11BufferPtr outBuf = MakeBuffer().Size(sizeof(Vec4f) * 64).UAV().Structured(sizeof(Vec4f));
ID3D11UnorderedAccessViewPtr inUAV = MakeUAV(inBuf);
ID3D11UnorderedAccessViewPtr outUAV = MakeUAV(outBuf);
ID3D11ComputeShaderPtr shad = CreateCS(Compile(comp, "main", "cs_5_0", true));
while(Running())
{
ClearRenderTargetView(bbRTV, {0.2f, 0.2f, 0.2f, 1.0f});
ClearUnorderedAccessView(outUAV, Vec4u());
ctx->CSSetShader(shad, NULL, 0);
ctx->CSSetUnorderedAccessViews(0, 1, &inUAV.GetInterfacePtr(), NULL);
ctx->CSSetUnorderedAccessViews(1, 1, &outUAV.GetInterfacePtr(), NULL);
ctx->Dispatch(1, 1, 1);
Present();
}
return 0;
}
};
REGISTER_TEST();
+10
View File
@@ -520,6 +520,16 @@ void D3D11GraphicsTest::ClearRenderTargetView(ID3D11RenderTargetView *rt, Vec4f
ctx->ClearRenderTargetView(rt, &col.x);
}
void D3D11GraphicsTest::ClearUnorderedAccessView(ID3D11UnorderedAccessView *uav, Vec4f col)
{
ctx->ClearUnorderedAccessViewFloat(uav, &col.x);
}
void D3D11GraphicsTest::ClearUnorderedAccessView(ID3D11UnorderedAccessView *uav, Vec4u col)
{
ctx->ClearUnorderedAccessViewUint(uav, &col.x);
}
void D3D11GraphicsTest::RSSetViewport(D3D11_VIEWPORT view)
{
ctx->RSSetViewports(1, &view);
+2
View File
@@ -147,6 +147,8 @@ struct D3D11GraphicsTest : public GraphicsTest
void IASetVertexBuffer(ID3D11Buffer *vb, UINT stride, UINT offset);
void ClearRenderTargetView(ID3D11RenderTargetView *rt, Vec4f col);
void ClearUnorderedAccessView(ID3D11UnorderedAccessView *uav, Vec4f col);
void ClearUnorderedAccessView(ID3D11UnorderedAccessView *uav, Vec4u col);
D3D11_RASTERIZER_DESC GetRasterState();
void SetRasterState(const D3D11_RASTERIZER_DESC &desc);
+156
View File
@@ -0,0 +1,156 @@
/******************************************************************************
* The MIT License (MIT)
*
* Copyright (c) 2019-2024 Baldur Karlsson
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
******************************************************************************/
#include "d3d12_test.h"
RD_TEST(D3D12_Groupshared, D3D12GraphicsTest)
{
static constexpr const char *Description = "Test of compute shader that uses groupshared memory.";
std::string comp = R"EOSHADER(
RWStructuredBuffer<float> indata : register(u0);
RWStructuredBuffer<float4> outdata : register(u1);
groupshared float tmp[64];
[numthreads(64,1,1)]
void main(uint3 tid : SV_GroupThreadID)
{
if(tid.x == 0)
{
for(int i=0; i < 64; i++) tmp[i] = 1.234f;
}
GroupMemoryBarrierWithGroupSync();
float4 outval;
// first write, should be the init value for all threads
outval.x = tmp[tid.x];
tmp[tid.x] = indata[tid.x];
// second write, should be the read value because we're reading our own value
outval.y = tmp[tid.x];
GroupMemoryBarrierWithGroupSync();
// third write, should be our pairwise neighbour's value
outval.z = tmp[tid.x ^ 1];
// do calculation with our neighbour
tmp[tid.x] = tmp[tid.x] * tmp[tid.x ^ 1];
GroupMemoryBarrierWithGroupSync();
// fourth write, our neighbour should be identical to our value
outval.w = tmp[tid.x] == tmp[tid.x ^ 1] ? 9.99f : -9.99f;
outdata[tid.x] = outval;
}
)EOSHADER";
int main()
{
// initialise, create window, create device, etc
if(!Init())
return 3;
ID3D12RootSignaturePtr rs = MakeSig({
uavParam(D3D12_SHADER_VISIBILITY_ALL, 0, 0),
uavParam(D3D12_SHADER_VISIBILITY_ALL, 0, 1),
});
ID3DBlobPtr cs = Compile(comp, "main", "cs_5_0", CompileOptionFlags::SkipOptimise);
ID3D12PipelineStatePtr pso50 = MakePSO().CS(cs).RootSig(rs);
ID3D12PipelineStatePtr pso60;
if(m_DXILSupport)
{
cs = Compile(comp, "main", "cs_6_0", CompileOptionFlags::SkipOptimise);
pso60 = MakePSO().CS(cs).RootSig(rs);
}
float values[64];
for(int i = 0; i < 64; i++)
values[i] = RANDF(1.0f, 100.0f);
ID3D12ResourcePtr inBuf = MakeBuffer().Data(values).UAV();
ID3D12ResourcePtr outBuf = MakeBuffer().Size(sizeof(Vec4f) * 64 * 2).UAV();
D3D12_GPU_DESCRIPTOR_HANDLE outUAVGPU =
MakeUAV(outBuf).Format(DXGI_FORMAT_R32G32B32A32_FLOAT).CreateGPU(0);
D3D12_CPU_DESCRIPTOR_HANDLE outUAVClearCPU =
MakeUAV(outBuf).Format(DXGI_FORMAT_R32G32B32A32_FLOAT).CreateClearCPU(0);
while(Running())
{
ID3D12GraphicsCommandListPtr cmd = GetCommandBuffer();
Reset(cmd);
ID3D12ResourcePtr bb = StartUsingBackbuffer(cmd, D3D12_RESOURCE_STATE_RENDER_TARGET);
cmd->SetDescriptorHeaps(1, &m_CBVUAVSRV.GetInterfacePtr());
UINT zero[4] = {};
D3D12_RECT rect = {0, 0, sizeof(Vec4f) * 64, 1};
cmd->ClearUnorderedAccessViewUint(outUAVGPU, outUAVClearCPU, outBuf, zero, 1, &rect);
ResourceBarrier(cmd);
ClearRenderTargetView(cmd, BBRTV, {0.2f, 0.2f, 0.2f, 1.0f});
cmd->SetComputeRootSignature(rs);
cmd->SetComputeRootUnorderedAccessView(0, inBuf->GetGPUVirtualAddress());
cmd->SetComputeRootUnorderedAccessView(1, outBuf->GetGPUVirtualAddress());
setMarker(cmd, "SM5");
cmd->SetPipelineState(pso50);
cmd->Dispatch(1, 1, 1);
if(pso60)
{
setMarker(cmd, "SM6");
cmd->SetComputeRootUnorderedAccessView(1,
outBuf->GetGPUVirtualAddress() + sizeof(Vec4f) * 64);
cmd->SetPipelineState(pso60);
cmd->Dispatch(1, 1, 1);
}
FinishUsingBackbuffer(cmd, D3D12_RESOURCE_STATE_RENDER_TARGET);
cmd->Close();
SubmitAndPresent({cmd});
}
return 0;
}
};
REGISTER_TEST();
+3
View File
@@ -146,6 +146,7 @@
<ClCompile Include="d3d11\d3d11_empty_drawcall.cpp" />
<ClCompile Include="d3d11\d3d11_empty_viewports.cpp" />
<ClCompile Include="d3d11\d3d11_feature_level_9.cpp" />
<ClCompile Include="d3d11\d3d11_groupshared.cpp" />
<ClCompile Include="d3d11\d3d11_helpers.cpp" />
<ClCompile Include="d3d11\d3d11_large_buffer.cpp" />
<ClCompile Include="d3d11\d3d11_leak_check.cpp" />
@@ -193,6 +194,7 @@
<ClCompile Include="d3d12\d3d12_empty_capture.cpp" />
<ClCompile Include="d3d12\d3d12_execute_indirect.cpp" />
<ClCompile Include="d3d12\d3d12_existing_heap.cpp" />
<ClCompile Include="d3d12\d3d12_groupshared.cpp" />
<ClCompile Include="d3d12\d3d12_helpers.cpp" />
<ClCompile Include="d3d12\d3d12_mesh_shader.cpp" />
<ClCompile Include="d3d12\d3d12_multi_wait_before_signal.cpp" />
@@ -312,6 +314,7 @@
<ClCompile Include="vk\vk_empty_capture.cpp" />
<ClCompile Include="vk\vk_extended_dyn_state.cpp" />
<ClCompile Include="vk\vk_graphics_pipeline.cpp" />
<ClCompile Include="vk\vk_groupshared.cpp" />
<ClCompile Include="vk\vk_khr_buffer_address.cpp" />
<ClCompile Include="vk\vk_large_buffer.cpp" />
<ClCompile Include="vk\vk_large_descriptor_sets.cpp" />
+9
View File
@@ -700,6 +700,15 @@
<ClCompile Include="vk\vk_subgroup_zoo.cpp">
<Filter>Vulkan\demos</Filter>
</ClCompile>
<ClCompile Include="vk\vk_groupshared.cpp">
<Filter>Vulkan\demos</Filter>
</ClCompile>
<ClCompile Include="d3d12\d3d12_groupshared.cpp">
<Filter>D3D12\demos</Filter>
</ClCompile>
<ClCompile Include="d3d11\d3d11_groupshared.cpp">
<Filter>D3D11\demos</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<Filter Include="D3D11">
+165
View File
@@ -0,0 +1,165 @@
/******************************************************************************
* The MIT License (MIT)
*
* Copyright (c) 2019-2024 Baldur Karlsson
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
******************************************************************************/
#include "vk_test.h"
RD_TEST(VK_Groupshared, VulkanGraphicsTest)
{
static constexpr const char *Description = "Test of compute shader that uses groupshared memory.";
std::string comp = R"EOSHADER(
#version 460 core
layout(binding = 0, std430) buffer indataBuf
{
float indata[64];
};
layout(binding = 1, std430) buffer outdataBuf
{
vec4 outdata[64];
};
shared float tmp[64];
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
define GroupMemoryBarrierWithGroupSync() memoryBarrierShared();groupMemoryBarrier();barrier();
void main()
{
uvec3 tid = gl_LocalInvocationID;
if(gl_LocalInvocationID.x == 0)
{
for(int i=0; i < 64; i++) tmp[i] = 1.234f;
}
GroupMemoryBarrierWithGroupSync();
vec4 outval;
// first write, should be the init value for all threads
outval.x = tmp[tid.x];
tmp[tid.x] = indata[tid.x];
// second write, should be the read value because we're reading our own value
outval.y = tmp[tid.x];
GroupMemoryBarrierWithGroupSync();
// third write, should be our pairwise neighbour's value
outval.z = tmp[tid.x ^ 1];
// do calculation with our neighbour
tmp[tid.x] = tmp[tid.x] * tmp[tid.x ^ 1];
GroupMemoryBarrierWithGroupSync();
// fourth write, our neighbour should be identical to our value
outval.w = tmp[tid.x] == tmp[tid.x ^ 1] ? 9.99f : -9.99f;
outdata[tid.x] = outval;
}
)EOSHADER";
int main()
{
// initialise, create window, create context, etc
if(!Init())
return 3;
VkDescriptorSetLayout setLayout = createDescriptorSetLayout(vkh::DescriptorSetLayoutCreateInfo({
{0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT},
{1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT},
}));
VkPipelineLayout layout = createPipelineLayout(vkh::PipelineLayoutCreateInfo({setLayout}));
VkPipeline pipe = createComputePipeline(vkh::ComputePipelineCreateInfo(
layout, CompileShaderModule(comp, ShaderLang::glsl, ShaderStage::comp)));
VkDescriptorSet descSet = allocateDescriptorSet(setLayout);
float values[64];
for(int i = 0; i < 64; i++)
values[i] = RANDF(1.0f, 100.0f);
AllocatedBuffer inBuf(this,
vkh::BufferCreateInfo(sizeof(values), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
VmaAllocationCreateInfo({0, VMA_MEMORY_USAGE_CPU_TO_GPU}));
inBuf.upload(values);
AllocatedBuffer outBuf(
this,
vkh::BufferCreateInfo(sizeof(Vec4f) * 64, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
VK_BUFFER_USAGE_TRANSFER_DST_BIT),
VmaAllocationCreateInfo({0, VMA_MEMORY_USAGE_GPU_ONLY}));
vkh::updateDescriptorSets(
device, {
vkh::WriteDescriptorSet(descSet, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
{vkh::DescriptorBufferInfo(inBuf.buffer)}),
vkh::WriteDescriptorSet(descSet, 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
{vkh::DescriptorBufferInfo(outBuf.buffer)}),
});
while(Running())
{
VkCommandBuffer cmd = GetCommandBuffer();
vkBeginCommandBuffer(cmd, vkh::CommandBufferBeginInfo());
VkImage swapimg = StartUsingBackbuffer(cmd);
vkh::cmdClearImage(cmd, swapimg, vkh::ClearColorValue(0.2f, 0.2f, 0.2f, 1.0f));
vkh::cmdPipelineBarrier(
cmd, {},
{vkh::BufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT,
outBuf.buffer)});
vkCmdFillBuffer(cmd, outBuf.buffer, 0, sizeof(Vec4f) * 64, 0);
vkh::cmdPipelineBarrier(cmd, {},
{vkh::BufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT,
VK_ACCESS_SHADER_WRITE_BIT, outBuf.buffer)});
vkh::cmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, {descSet}, {});
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, pipe);
vkCmdDispatch(cmd, 1, 1, 1);
FinishUsingBackbuffer(cmd);
vkEndCommandBuffer(cmd);
SubmitAndPresent({cmd});
}
return 0;
}
};
REGISTER_TEST();