From 70256217fea82e6a2b2a57531dc2def784d3fd55 Mon Sep 17 00:00:00 2001 From: baldurk Date: Fri, 24 Apr 2020 18:34:11 +0100 Subject: [PATCH] Handle diverging/converging control flow in pixel shader --- .../driver/shaders/spirv/spirv_debug.cpp | 32 +++++++-- renderdoc/driver/shaders/spirv/spirv_debug.h | 4 ++ .../shaders/spirv/spirv_debug_setup.cpp | 67 +++++++++++++++++-- util/test/demos/vk/vk_shader_debug_zoo.cpp | 28 ++++++++ 4 files changed, 123 insertions(+), 8 deletions(-) diff --git a/renderdoc/driver/shaders/spirv/spirv_debug.cpp b/renderdoc/driver/shaders/spirv/spirv_debug.cpp index 9154bd8c7..96048863e 100644 --- a/renderdoc/driver/shaders/spirv/spirv_debug.cpp +++ b/renderdoc/driver/shaders/spirv/spirv_debug.cpp @@ -79,7 +79,7 @@ ThreadState::~ThreadState() bool ThreadState::Finished() const { - return helperInvocation || killed || callstack.empty(); + return killed || callstack.empty(); } void ThreadState::FillCallstack(ShaderDebugState &state) @@ -418,6 +418,10 @@ void ThreadState::JumpToLabel(Id target) Iter it = debugger.GetIterForInstruction(nextInstruction); if(it.opcode() == Op::LoopMerge) { + OpLoopMerge merge(it); + + mergeBlock = merge.mergeBlock; + it++; if(it.opcode() == Op::Branch) { @@ -431,18 +435,38 @@ void ThreadState::JumpToLabel(Id target) void ThreadState::SkipIgnoredInstructions() { // skip OpLine/OpNoLine now, so that nextInstruction points to the next real instruction - // Also for now we don't care about structured control flow so skip past merge statements so we - // process the branch. + // Also for structured control flow we just save the merge block in case we need it for converging + // in pixel shaders, but otherwise skip them. while(true) { Iter it = debugger.GetIterForInstruction(nextInstruction); rdcspv::Op op = it.opcode(); - if(op == Op::Line || op == Op::NoLine || op == Op::SelectionMerge || op == Op::LoopMerge) + if(op == Op::Line || op == Op::NoLine) { nextInstruction++; continue; } + if(op == Op::SelectionMerge) + { + OpSelectionMerge merge(it); + + mergeBlock = merge.mergeBlock; + + nextInstruction++; + continue; + } + + if(op == Op::LoopMerge) + { + OpLoopMerge merge(it); + + mergeBlock = merge.mergeBlock; + + nextInstruction++; + continue; + } + break; } } diff --git a/renderdoc/driver/shaders/spirv/spirv_debug.h b/renderdoc/driver/shaders/spirv/spirv_debug.h index 399e6658e..8df30c69d 100644 --- a/renderdoc/driver/shaders/spirv/spirv_debug.h +++ b/renderdoc/driver/shaders/spirv/spirv_debug.h @@ -206,6 +206,8 @@ struct ThreadState // the last block we were in and the current block, for OpPhis Id lastBlock, curBlock; + // the id of the merge block that the last branch targetted + Id mergeBlock; ShaderVariable returnValue; rdcarray callstack; @@ -301,6 +303,8 @@ private: GlobalState global; rdcarray workgroup; + Id convergeBlock; + uint32_t activeLaneIndex = 0; ShaderStage stage; diff --git a/renderdoc/driver/shaders/spirv/spirv_debug_setup.cpp b/renderdoc/driver/shaders/spirv/spirv_debug_setup.cpp index 6d7d122e5..0cbb7bdd9 100644 --- a/renderdoc/driver/shaders/spirv/spirv_debug_setup.cpp +++ b/renderdoc/driver/shaders/spirv/spirv_debug_setup.cpp @@ -1561,15 +1561,74 @@ void Debugger::CalcActiveMask(rdcarray &activeMask) // one bool per workgroup thread activeMask.resize(workgroup.size()); - // start as active, then if necessary turn off threads that are running diverged - for(bool &active : activeMask) - active = true; + // mark any threads that have finished as inactive, otherwise they're active + for(size_t i = 0; i < workgroup.size(); i++) + activeMask[i] = !workgroup[i].Finished(); // only pixel shaders automatically converge workgroups, compute shaders need explicit sync if(stage != ShaderStage::Pixel) return; - // TODO handle diverging control flow + // otherwise we need to make sure that control flow which converges stays in lockstep so that + // derivatives etc are still valid. While diverged, we don't have to keep threads in lockstep + // since using derivatives is invalid. + // + // We take advantage of SPIR-V's structured control flow. We only ever diverge at a branch + // instruction, and the preceeding OpLoopMerge/OpSelectionMerge. + // + // So the scheme is as follows: + // * If we haven't diverged and all threads have the same nextInstruction, we're still uniform so + // continue in lockstep. + // * As soon as they differ, we've diverged. Check the last mergeBlock that was specified - we + // won't be uniform again until all threads reach that block. + // * Once we've diverged, any threads which are NOT in the merge block are active, and any threads + // which are in it are inactive. This causes them to pause and wait for others to catch up + // until the point where all threads are in the merge block at which point we've converged and + // can go back to uniformity. + + // if we're waiting on a converge block to be reached, we've diverged previously. + bool wasDiverged = convergeBlock != Id(); + + // see if we've diverged by starting procesing different next instructions + bool diverged = false; + for(size_t i = 1; !diverged && i < workgroup.size(); i++) + diverged |= (workgroup[0].nextInstruction != workgroup[i].nextInstruction); + + if(!wasDiverged && diverged) + { + // if we've newly diverged, all workgroups should have the same merge block - the point where we + // become uniform again. + convergeBlock = workgroup[0].mergeBlock; + for(size_t i = 1; !diverged && i < workgroup.size(); i++) + RDCASSERT(convergeBlock == workgroup[i].mergeBlock); + } + + if(wasDiverged || diverged) + { + // for every thread, turn it off if it's in the converge block + rdcarray inConverge; + inConverge.resize(activeMask.size()); + for(size_t i = 0; i < workgroup.size(); i++) + inConverge[i] = (workgroup[i].curBlock == convergeBlock); + + // is any thread active, but not converged? + bool anyActiveNotConverged = false; + for(size_t i = 0; i < workgroup.size(); i++) + anyActiveNotConverged |= activeMask[i] && !inConverge[i]; + + if(anyActiveNotConverged) + { + // if so, then only non-converged threads are active right now + for(size_t i = 0; i < workgroup.size(); i++) + activeMask[i] &= !inConverge[i]; + } + else + { + // otherwise we can leave the active mask as is, forget the convergence point, and allow + // everything to run as normal + convergeBlock = Id(); + } + } } void Debugger::AllocateVariable(Id id, Id typeId, DebugVariableType sourceVarType, diff --git a/util/test/demos/vk/vk_shader_debug_zoo.cpp b/util/test/demos/vk/vk_shader_debug_zoo.cpp index 28dad0e96..9e95b2f02 100644 --- a/util/test/demos/vk/vk_shader_debug_zoo.cpp +++ b/util/test/demos/vk/vk_shader_debug_zoo.cpp @@ -1268,6 +1268,34 @@ void main() Color = arr[2].xyzx; break; } + case 13: + { + Color = vec4(0,0,0,0); + uint loopCount = uint(intval - test); + loopCount -= (uint(gl_FragCoord.x) % 2u); + loopCount -= (uint(gl_FragCoord.y) % 2u) * 2u; + vec2 val = uv.xy; + for(uint i=0; i < loopCount; i++) + { + val += vec2(0.01f, 0.01f); + } + Color = dFdxFine(val).xyxy; + break; + } + case 14: + { + Color = vec4(0,0,0,0); + uint loopCount = uint(intval - test); + loopCount += (uint(gl_FragCoord.x) % 2u); + loopCount += (uint(gl_FragCoord.y) % 2u) * 2u; + vec2 val = uv.xy; + for(uint i=0; i < loopCount; i++) + { + val += vec2(0.01f, 0.01f); + } + Color = dFdxFine(val).xyxy; + break; + } default: break; } }