From f69ac73f1fe48eaef4404a6a69f0983a4ab5692a Mon Sep 17 00:00:00 2001 From: baldurk Date: Mon, 17 Feb 2025 13:24:13 +0000 Subject: [PATCH] Add demos that utilise groupshared memory in a simple way --- util/test/demos/CMakeLists.txt | 1 + util/test/demos/d3d11/d3d11_groupshared.cpp | 112 +++++++++++++ util/test/demos/d3d11/d3d11_test.cpp | 10 ++ util/test/demos/d3d11/d3d11_test.h | 2 + util/test/demos/d3d12/d3d12_groupshared.cpp | 156 ++++++++++++++++++ util/test/demos/demos.vcxproj | 3 + util/test/demos/demos.vcxproj.filters | 9 ++ util/test/demos/vk/vk_groupshared.cpp | 165 ++++++++++++++++++++ 8 files changed, 458 insertions(+) create mode 100644 util/test/demos/d3d11/d3d11_groupshared.cpp create mode 100644 util/test/demos/d3d12/d3d12_groupshared.cpp create mode 100644 util/test/demos/vk/vk_groupshared.cpp diff --git a/util/test/demos/CMakeLists.txt b/util/test/demos/CMakeLists.txt index b0679e5de..57c8dfd31 100644 --- a/util/test/demos/CMakeLists.txt +++ b/util/test/demos/CMakeLists.txt @@ -119,6 +119,7 @@ set(VULKAN_SRC vk/vk_ext_buffer_address.cpp vk/vk_extended_dyn_state.cpp vk/vk_graphics_pipeline.cpp + vk/vk_groupshared.cpp vk/vk_image_layouts.cpp vk/vk_imageless_framebuffer.cpp vk/vk_indirect.cpp diff --git a/util/test/demos/d3d11/d3d11_groupshared.cpp b/util/test/demos/d3d11/d3d11_groupshared.cpp new file mode 100644 index 000000000..36c997f69 --- /dev/null +++ b/util/test/demos/d3d11/d3d11_groupshared.cpp @@ -0,0 +1,112 @@ +/****************************************************************************** + * The MIT License (MIT) + * + * Copyright (c) 2019-2024 Baldur Karlsson + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + ******************************************************************************/ + +#include "d3d11_test.h" + +RD_TEST(D3D11_Groupshared, D3D11GraphicsTest) +{ + static constexpr const char *Description = "Test of compute shader that uses groupshared memory."; + + std::string comp = R"EOSHADER( + +RWStructuredBuffer indata : register(u0); +RWStructuredBuffer outdata : register(u1); + +groupshared float tmp[64]; + +[numthreads(64,1,1)] +void main(uint3 tid : SV_GroupThreadID) +{ + if(tid.x == 0) + { + for(int i=0; i < 64; i++) tmp[i] = 1.234f; + } + + GroupMemoryBarrierWithGroupSync(); + + float4 outval; + + // first write, should be the init value for all threads + outval.x = tmp[tid.x]; + + tmp[tid.x] = indata[tid.x]; + + // second write, should be the read value because we're reading our own value + outval.y = tmp[tid.x]; + + GroupMemoryBarrierWithGroupSync(); + + // third write, should be our pairwise neighbour's value + outval.z = tmp[tid.x ^ 1]; + + // do calculation with our neighbour + tmp[tid.x] = tmp[tid.x] * tmp[tid.x ^ 1]; + + GroupMemoryBarrierWithGroupSync(); + + // fourth write, our neighbour should be identical to our value + outval.w = tmp[tid.x] == tmp[tid.x ^ 1] ? 9.99f : -9.99f; + + outdata[tid.x] = outval; +} + +)EOSHADER"; + + int main() + { + // initialise, create window, create device, etc + if(!Init()) + return 3; + + float values[64]; + for(int i = 0; i < 64; i++) + values[i] = RANDF(1.0f, 100.0f); + ID3D11BufferPtr inBuf = MakeBuffer().Data(values).UAV().Structured(4); + ID3D11BufferPtr outBuf = MakeBuffer().Size(sizeof(Vec4f) * 64).UAV().Structured(sizeof(Vec4f)); + + ID3D11UnorderedAccessViewPtr inUAV = MakeUAV(inBuf); + ID3D11UnorderedAccessViewPtr outUAV = MakeUAV(outBuf); + + ID3D11ComputeShaderPtr shad = CreateCS(Compile(comp, "main", "cs_5_0", true)); + + while(Running()) + { + ClearRenderTargetView(bbRTV, {0.2f, 0.2f, 0.2f, 1.0f}); + + ClearUnorderedAccessView(outUAV, Vec4u()); + + ctx->CSSetShader(shad, NULL, 0); + ctx->CSSetUnorderedAccessViews(0, 1, &inUAV.GetInterfacePtr(), NULL); + ctx->CSSetUnorderedAccessViews(1, 1, &outUAV.GetInterfacePtr(), NULL); + + ctx->Dispatch(1, 1, 1); + + Present(); + } + + return 0; + } +}; + +REGISTER_TEST(); diff --git a/util/test/demos/d3d11/d3d11_test.cpp b/util/test/demos/d3d11/d3d11_test.cpp index 013daab81..35f023153 100644 --- a/util/test/demos/d3d11/d3d11_test.cpp +++ b/util/test/demos/d3d11/d3d11_test.cpp @@ -520,6 +520,16 @@ void D3D11GraphicsTest::ClearRenderTargetView(ID3D11RenderTargetView *rt, Vec4f ctx->ClearRenderTargetView(rt, &col.x); } +void D3D11GraphicsTest::ClearUnorderedAccessView(ID3D11UnorderedAccessView *uav, Vec4f col) +{ + ctx->ClearUnorderedAccessViewFloat(uav, &col.x); +} + +void D3D11GraphicsTest::ClearUnorderedAccessView(ID3D11UnorderedAccessView *uav, Vec4u col) +{ + ctx->ClearUnorderedAccessViewUint(uav, &col.x); +} + void D3D11GraphicsTest::RSSetViewport(D3D11_VIEWPORT view) { ctx->RSSetViewports(1, &view); diff --git a/util/test/demos/d3d11/d3d11_test.h b/util/test/demos/d3d11/d3d11_test.h index decf324ed..79f5f0490 100644 --- a/util/test/demos/d3d11/d3d11_test.h +++ b/util/test/demos/d3d11/d3d11_test.h @@ -147,6 +147,8 @@ struct D3D11GraphicsTest : public GraphicsTest void IASetVertexBuffer(ID3D11Buffer *vb, UINT stride, UINT offset); void ClearRenderTargetView(ID3D11RenderTargetView *rt, Vec4f col); + void ClearUnorderedAccessView(ID3D11UnorderedAccessView *uav, Vec4f col); + void ClearUnorderedAccessView(ID3D11UnorderedAccessView *uav, Vec4u col); D3D11_RASTERIZER_DESC GetRasterState(); void SetRasterState(const D3D11_RASTERIZER_DESC &desc); diff --git a/util/test/demos/d3d12/d3d12_groupshared.cpp b/util/test/demos/d3d12/d3d12_groupshared.cpp new file mode 100644 index 000000000..8561190f1 --- /dev/null +++ b/util/test/demos/d3d12/d3d12_groupshared.cpp @@ -0,0 +1,156 @@ +/****************************************************************************** + * The MIT License (MIT) + * + * Copyright (c) 2019-2024 Baldur Karlsson + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + ******************************************************************************/ + +#include "d3d12_test.h" + +RD_TEST(D3D12_Groupshared, D3D12GraphicsTest) +{ + static constexpr const char *Description = "Test of compute shader that uses groupshared memory."; + + std::string comp = R"EOSHADER( + +RWStructuredBuffer indata : register(u0); +RWStructuredBuffer outdata : register(u1); + +groupshared float tmp[64]; + +[numthreads(64,1,1)] +void main(uint3 tid : SV_GroupThreadID) +{ + if(tid.x == 0) + { + for(int i=0; i < 64; i++) tmp[i] = 1.234f; + } + + GroupMemoryBarrierWithGroupSync(); + + float4 outval; + + // first write, should be the init value for all threads + outval.x = tmp[tid.x]; + + tmp[tid.x] = indata[tid.x]; + + // second write, should be the read value because we're reading our own value + outval.y = tmp[tid.x]; + + GroupMemoryBarrierWithGroupSync(); + + // third write, should be our pairwise neighbour's value + outval.z = tmp[tid.x ^ 1]; + + // do calculation with our neighbour + tmp[tid.x] = tmp[tid.x] * tmp[tid.x ^ 1]; + + GroupMemoryBarrierWithGroupSync(); + + // fourth write, our neighbour should be identical to our value + outval.w = tmp[tid.x] == tmp[tid.x ^ 1] ? 9.99f : -9.99f; + + outdata[tid.x] = outval; +} + +)EOSHADER"; + + int main() + { + // initialise, create window, create device, etc + if(!Init()) + return 3; + + ID3D12RootSignaturePtr rs = MakeSig({ + uavParam(D3D12_SHADER_VISIBILITY_ALL, 0, 0), + uavParam(D3D12_SHADER_VISIBILITY_ALL, 0, 1), + }); + + ID3DBlobPtr cs = Compile(comp, "main", "cs_5_0", CompileOptionFlags::SkipOptimise); + + ID3D12PipelineStatePtr pso50 = MakePSO().CS(cs).RootSig(rs); + ID3D12PipelineStatePtr pso60; + + if(m_DXILSupport) + { + cs = Compile(comp, "main", "cs_6_0", CompileOptionFlags::SkipOptimise); + + pso60 = MakePSO().CS(cs).RootSig(rs); + } + + float values[64]; + for(int i = 0; i < 64; i++) + values[i] = RANDF(1.0f, 100.0f); + ID3D12ResourcePtr inBuf = MakeBuffer().Data(values).UAV(); + ID3D12ResourcePtr outBuf = MakeBuffer().Size(sizeof(Vec4f) * 64 * 2).UAV(); + + D3D12_GPU_DESCRIPTOR_HANDLE outUAVGPU = + MakeUAV(outBuf).Format(DXGI_FORMAT_R32G32B32A32_FLOAT).CreateGPU(0); + D3D12_CPU_DESCRIPTOR_HANDLE outUAVClearCPU = + MakeUAV(outBuf).Format(DXGI_FORMAT_R32G32B32A32_FLOAT).CreateClearCPU(0); + + while(Running()) + { + ID3D12GraphicsCommandListPtr cmd = GetCommandBuffer(); + + Reset(cmd); + + ID3D12ResourcePtr bb = StartUsingBackbuffer(cmd, D3D12_RESOURCE_STATE_RENDER_TARGET); + + cmd->SetDescriptorHeaps(1, &m_CBVUAVSRV.GetInterfacePtr()); + + UINT zero[4] = {}; + D3D12_RECT rect = {0, 0, sizeof(Vec4f) * 64, 1}; + cmd->ClearUnorderedAccessViewUint(outUAVGPU, outUAVClearCPU, outBuf, zero, 1, &rect); + + ResourceBarrier(cmd); + + ClearRenderTargetView(cmd, BBRTV, {0.2f, 0.2f, 0.2f, 1.0f}); + + cmd->SetComputeRootSignature(rs); + cmd->SetComputeRootUnorderedAccessView(0, inBuf->GetGPUVirtualAddress()); + cmd->SetComputeRootUnorderedAccessView(1, outBuf->GetGPUVirtualAddress()); + + setMarker(cmd, "SM5"); + cmd->SetPipelineState(pso50); + cmd->Dispatch(1, 1, 1); + + if(pso60) + { + setMarker(cmd, "SM6"); + cmd->SetComputeRootUnorderedAccessView(1, + outBuf->GetGPUVirtualAddress() + sizeof(Vec4f) * 64); + cmd->SetPipelineState(pso60); + cmd->Dispatch(1, 1, 1); + } + + FinishUsingBackbuffer(cmd, D3D12_RESOURCE_STATE_RENDER_TARGET); + + cmd->Close(); + + SubmitAndPresent({cmd}); + } + + return 0; + } +}; + +REGISTER_TEST(); diff --git a/util/test/demos/demos.vcxproj b/util/test/demos/demos.vcxproj index 3aea004e8..818bfc6b5 100644 --- a/util/test/demos/demos.vcxproj +++ b/util/test/demos/demos.vcxproj @@ -146,6 +146,7 @@ + @@ -193,6 +194,7 @@ + @@ -312,6 +314,7 @@ + diff --git a/util/test/demos/demos.vcxproj.filters b/util/test/demos/demos.vcxproj.filters index 95bb97011..6b73b3cfd 100644 --- a/util/test/demos/demos.vcxproj.filters +++ b/util/test/demos/demos.vcxproj.filters @@ -700,6 +700,15 @@ Vulkan\demos + + Vulkan\demos + + + D3D12\demos + + + D3D11\demos + diff --git a/util/test/demos/vk/vk_groupshared.cpp b/util/test/demos/vk/vk_groupshared.cpp new file mode 100644 index 000000000..a99d76419 --- /dev/null +++ b/util/test/demos/vk/vk_groupshared.cpp @@ -0,0 +1,165 @@ +/****************************************************************************** + * The MIT License (MIT) + * + * Copyright (c) 2019-2024 Baldur Karlsson + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + ******************************************************************************/ + +#include "vk_test.h" + +RD_TEST(VK_Groupshared, VulkanGraphicsTest) +{ + static constexpr const char *Description = "Test of compute shader that uses groupshared memory."; + + std::string comp = R"EOSHADER( +#version 460 core + +layout(binding = 0, std430) buffer indataBuf +{ + float indata[64]; +}; + +layout(binding = 1, std430) buffer outdataBuf +{ + vec4 outdata[64]; +}; + +shared float tmp[64]; + +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +define GroupMemoryBarrierWithGroupSync() memoryBarrierShared();groupMemoryBarrier();barrier(); + +void main() +{ + uvec3 tid = gl_LocalInvocationID; + + if(gl_LocalInvocationID.x == 0) + { + for(int i=0; i < 64; i++) tmp[i] = 1.234f; + } + + GroupMemoryBarrierWithGroupSync(); + + vec4 outval; + + // first write, should be the init value for all threads + outval.x = tmp[tid.x]; + + tmp[tid.x] = indata[tid.x]; + + // second write, should be the read value because we're reading our own value + outval.y = tmp[tid.x]; + + GroupMemoryBarrierWithGroupSync(); + + // third write, should be our pairwise neighbour's value + outval.z = tmp[tid.x ^ 1]; + + // do calculation with our neighbour + tmp[tid.x] = tmp[tid.x] * tmp[tid.x ^ 1]; + + GroupMemoryBarrierWithGroupSync(); + + // fourth write, our neighbour should be identical to our value + outval.w = tmp[tid.x] == tmp[tid.x ^ 1] ? 9.99f : -9.99f; + + outdata[tid.x] = outval; +} + +)EOSHADER"; + + int main() + { + // initialise, create window, create context, etc + if(!Init()) + return 3; + + VkDescriptorSetLayout setLayout = createDescriptorSetLayout(vkh::DescriptorSetLayoutCreateInfo({ + {0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT}, + {1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT}, + })); + VkPipelineLayout layout = createPipelineLayout(vkh::PipelineLayoutCreateInfo({setLayout})); + + VkPipeline pipe = createComputePipeline(vkh::ComputePipelineCreateInfo( + layout, CompileShaderModule(comp, ShaderLang::glsl, ShaderStage::comp))); + + VkDescriptorSet descSet = allocateDescriptorSet(setLayout); + + float values[64]; + for(int i = 0; i < 64; i++) + values[i] = RANDF(1.0f, 100.0f); + AllocatedBuffer inBuf(this, + vkh::BufferCreateInfo(sizeof(values), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), + VmaAllocationCreateInfo({0, VMA_MEMORY_USAGE_CPU_TO_GPU})); + inBuf.upload(values); + + AllocatedBuffer outBuf( + this, + vkh::BufferCreateInfo(sizeof(Vec4f) * 64, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_TRANSFER_DST_BIT), + VmaAllocationCreateInfo({0, VMA_MEMORY_USAGE_GPU_ONLY})); + + vkh::updateDescriptorSets( + device, { + vkh::WriteDescriptorSet(descSet, 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + {vkh::DescriptorBufferInfo(inBuf.buffer)}), + vkh::WriteDescriptorSet(descSet, 1, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + {vkh::DescriptorBufferInfo(outBuf.buffer)}), + }); + + while(Running()) + { + VkCommandBuffer cmd = GetCommandBuffer(); + + vkBeginCommandBuffer(cmd, vkh::CommandBufferBeginInfo()); + + VkImage swapimg = StartUsingBackbuffer(cmd); + + vkh::cmdClearImage(cmd, swapimg, vkh::ClearColorValue(0.2f, 0.2f, 0.2f, 1.0f)); + + vkh::cmdPipelineBarrier( + cmd, {}, + {vkh::BufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, + outBuf.buffer)}); + + vkCmdFillBuffer(cmd, outBuf.buffer, 0, sizeof(Vec4f) * 64, 0); + + vkh::cmdPipelineBarrier(cmd, {}, + {vkh::BufferMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, + VK_ACCESS_SHADER_WRITE_BIT, outBuf.buffer)}); + + vkh::cmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, {descSet}, {}); + vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, pipe); + + vkCmdDispatch(cmd, 1, 1, 1); + + FinishUsingBackbuffer(cmd); + + vkEndCommandBuffer(cmd); + + SubmitAndPresent({cmd}); + } + + return 0; + } +}; + +REGISTER_TEST();