Handle diverging/converging control flow in pixel shader

This commit is contained in:
baldurk
2020-04-24 18:34:11 +01:00
parent 320b1cddd4
commit 70256217fe
4 changed files with 123 additions and 8 deletions
+28 -4
View File
@@ -79,7 +79,7 @@ ThreadState::~ThreadState()
bool ThreadState::Finished() const
{
return helperInvocation || killed || callstack.empty();
return killed || callstack.empty();
}
void ThreadState::FillCallstack(ShaderDebugState &state)
@@ -418,6 +418,10 @@ void ThreadState::JumpToLabel(Id target)
Iter it = debugger.GetIterForInstruction(nextInstruction);
if(it.opcode() == Op::LoopMerge)
{
OpLoopMerge merge(it);
mergeBlock = merge.mergeBlock;
it++;
if(it.opcode() == Op::Branch)
{
@@ -431,18 +435,38 @@ void ThreadState::JumpToLabel(Id target)
void ThreadState::SkipIgnoredInstructions()
{
// skip OpLine/OpNoLine now, so that nextInstruction points to the next real instruction
// Also for now we don't care about structured control flow so skip past merge statements so we
// process the branch.
// Also for structured control flow we just save the merge block in case we need it for converging
// in pixel shaders, but otherwise skip them.
while(true)
{
Iter it = debugger.GetIterForInstruction(nextInstruction);
rdcspv::Op op = it.opcode();
if(op == Op::Line || op == Op::NoLine || op == Op::SelectionMerge || op == Op::LoopMerge)
if(op == Op::Line || op == Op::NoLine)
{
nextInstruction++;
continue;
}
if(op == Op::SelectionMerge)
{
OpSelectionMerge merge(it);
mergeBlock = merge.mergeBlock;
nextInstruction++;
continue;
}
if(op == Op::LoopMerge)
{
OpLoopMerge merge(it);
mergeBlock = merge.mergeBlock;
nextInstruction++;
continue;
}
break;
}
}
@@ -206,6 +206,8 @@ struct ThreadState
// the last block we were in and the current block, for OpPhis
Id lastBlock, curBlock;
// the id of the merge block that the last branch targetted
Id mergeBlock;
ShaderVariable returnValue;
rdcarray<StackFrame *> callstack;
@@ -301,6 +303,8 @@ private:
GlobalState global;
rdcarray<ThreadState> workgroup;
Id convergeBlock;
uint32_t activeLaneIndex = 0;
ShaderStage stage;
@@ -1561,15 +1561,74 @@ void Debugger::CalcActiveMask(rdcarray<bool> &activeMask)
// one bool per workgroup thread
activeMask.resize(workgroup.size());
// start as active, then if necessary turn off threads that are running diverged
for(bool &active : activeMask)
active = true;
// mark any threads that have finished as inactive, otherwise they're active
for(size_t i = 0; i < workgroup.size(); i++)
activeMask[i] = !workgroup[i].Finished();
// only pixel shaders automatically converge workgroups, compute shaders need explicit sync
if(stage != ShaderStage::Pixel)
return;
// TODO handle diverging control flow
// otherwise we need to make sure that control flow which converges stays in lockstep so that
// derivatives etc are still valid. While diverged, we don't have to keep threads in lockstep
// since using derivatives is invalid.
//
// We take advantage of SPIR-V's structured control flow. We only ever diverge at a branch
// instruction, and the preceeding OpLoopMerge/OpSelectionMerge.
//
// So the scheme is as follows:
// * If we haven't diverged and all threads have the same nextInstruction, we're still uniform so
// continue in lockstep.
// * As soon as they differ, we've diverged. Check the last mergeBlock that was specified - we
// won't be uniform again until all threads reach that block.
// * Once we've diverged, any threads which are NOT in the merge block are active, and any threads
// which are in it are inactive. This causes them to pause and wait for others to catch up
// until the point where all threads are in the merge block at which point we've converged and
// can go back to uniformity.
// if we're waiting on a converge block to be reached, we've diverged previously.
bool wasDiverged = convergeBlock != Id();
// see if we've diverged by starting procesing different next instructions
bool diverged = false;
for(size_t i = 1; !diverged && i < workgroup.size(); i++)
diverged |= (workgroup[0].nextInstruction != workgroup[i].nextInstruction);
if(!wasDiverged && diverged)
{
// if we've newly diverged, all workgroups should have the same merge block - the point where we
// become uniform again.
convergeBlock = workgroup[0].mergeBlock;
for(size_t i = 1; !diverged && i < workgroup.size(); i++)
RDCASSERT(convergeBlock == workgroup[i].mergeBlock);
}
if(wasDiverged || diverged)
{
// for every thread, turn it off if it's in the converge block
rdcarray<bool> inConverge;
inConverge.resize(activeMask.size());
for(size_t i = 0; i < workgroup.size(); i++)
inConverge[i] = (workgroup[i].curBlock == convergeBlock);
// is any thread active, but not converged?
bool anyActiveNotConverged = false;
for(size_t i = 0; i < workgroup.size(); i++)
anyActiveNotConverged |= activeMask[i] && !inConverge[i];
if(anyActiveNotConverged)
{
// if so, then only non-converged threads are active right now
for(size_t i = 0; i < workgroup.size(); i++)
activeMask[i] &= !inConverge[i];
}
else
{
// otherwise we can leave the active mask as is, forget the convergence point, and allow
// everything to run as normal
convergeBlock = Id();
}
}
}
void Debugger::AllocateVariable(Id id, Id typeId, DebugVariableType sourceVarType,
@@ -1268,6 +1268,34 @@ void main()
Color = arr[2].xyzx;
break;
}
case 13:
{
Color = vec4(0,0,0,0);
uint loopCount = uint(intval - test);
loopCount -= (uint(gl_FragCoord.x) % 2u);
loopCount -= (uint(gl_FragCoord.y) % 2u) * 2u;
vec2 val = uv.xy;
for(uint i=0; i < loopCount; i++)
{
val += vec2(0.01f, 0.01f);
}
Color = dFdxFine(val).xyxy;
break;
}
case 14:
{
Color = vec4(0,0,0,0);
uint loopCount = uint(intval - test);
loopCount += (uint(gl_FragCoord.x) % 2u);
loopCount += (uint(gl_FragCoord.y) % 2u) * 2u;
vec2 val = uv.xy;
for(uint i=0; i < loopCount; i++)
{
val += vec2(0.01f, 0.01f);
}
Color = dFdxFine(val).xyxy;
break;
}
default: break;
}
}