Handle diverging/converging control flow in pixel shader

2026-05-06 01:50:38 +00:00 · 2020-04-24 18:34:11 +01:00
parent 320b1cddd4
commit 70256217fe
4 changed files with 123 additions and 8 deletions
@@ -79,7 +79,7 @@ ThreadState::~ThreadState()

 bool ThreadState::Finished() const
 {
-  return helperInvocation || killed || callstack.empty();
+  return killed || callstack.empty();
 }

 void ThreadState::FillCallstack(ShaderDebugState &state)
@@ -418,6 +418,10 @@ void ThreadState::JumpToLabel(Id target)
  Iter it = debugger.GetIterForInstruction(nextInstruction);
  if(it.opcode() == Op::LoopMerge)
  {
+    OpLoopMerge merge(it);
+
+    mergeBlock = merge.mergeBlock;
+
    it++;
    if(it.opcode() == Op::Branch)
    {
@@ -431,18 +435,38 @@ void ThreadState::JumpToLabel(Id target)
 void ThreadState::SkipIgnoredInstructions()
 {
  // skip OpLine/OpNoLine now, so that nextInstruction points to the next real instruction
-  // Also for now we don't care about structured control flow so skip past merge statements so we
-  // process the branch.
+  // Also for structured control flow we just save the merge block in case we need it for converging
+  // in pixel shaders, but otherwise skip them.
  while(true)
  {
    Iter it = debugger.GetIterForInstruction(nextInstruction);
    rdcspv::Op op = it.opcode();
-    if(op == Op::Line || op == Op::NoLine || op == Op::SelectionMerge || op == Op::LoopMerge)
+    if(op == Op::Line || op == Op::NoLine)
    {
      nextInstruction++;
      continue;
    }

+    if(op == Op::SelectionMerge)
+    {
+      OpSelectionMerge merge(it);
+
+      mergeBlock = merge.mergeBlock;
+
+      nextInstruction++;
+      continue;
+    }
+
+    if(op == Op::LoopMerge)
+    {
+      OpLoopMerge merge(it);
+
+      mergeBlock = merge.mergeBlock;
+
+      nextInstruction++;
+      continue;
+    }
+
    break;
  }
 }
@@ -206,6 +206,8 @@ struct ThreadState

  // the last block we were in and the current block, for OpPhis
  Id lastBlock, curBlock;
+  // the id of the merge block that the last branch targetted
+  Id mergeBlock;
  ShaderVariable returnValue;
  rdcarray<StackFrame *> callstack;

@@ -301,6 +303,8 @@ private:
  GlobalState global;
  rdcarray<ThreadState> workgroup;

+  Id convergeBlock;
+
  uint32_t activeLaneIndex = 0;
  ShaderStage stage;

@@ -1561,15 +1561,74 @@ void Debugger::CalcActiveMask(rdcarray<bool> &activeMask)
  // one bool per workgroup thread
  activeMask.resize(workgroup.size());

-  // start as active, then if necessary turn off threads that are running diverged
-  for(bool &active : activeMask)
-    active = true;
+  // mark any threads that have finished as inactive, otherwise they're active
+  for(size_t i = 0; i < workgroup.size(); i++)
+    activeMask[i] = !workgroup[i].Finished();

  // only pixel shaders automatically converge workgroups, compute shaders need explicit sync
  if(stage != ShaderStage::Pixel)
    return;

-  // TODO handle diverging control flow
+  // otherwise we need to make sure that control flow which converges stays in lockstep so that
+  // derivatives etc are still valid. While diverged, we don't have to keep threads in lockstep
+  // since using derivatives is invalid.
+  //
+  // We take advantage of SPIR-V's structured control flow. We only ever diverge at a branch
+  // instruction, and the preceeding OpLoopMerge/OpSelectionMerge.
+  //
+  // So the scheme is as follows:
+  // * If we haven't diverged and all threads have the same nextInstruction, we're still uniform so
+  //   continue in lockstep.
+  // * As soon as they differ, we've diverged. Check the last mergeBlock that was specified - we
+  //   won't be uniform again until all threads reach that block.
+  // * Once we've diverged, any threads which are NOT in the merge block are active, and any threads
+  //   which are in it are inactive. This causes them to pause and wait for others to catch up
+  //   until the point where all threads are in the merge block at which point we've converged and
+  //   can go back to uniformity.
+
+  // if we're waiting on a converge block to be reached, we've diverged previously.
+  bool wasDiverged = convergeBlock != Id();
+
+  // see if we've diverged by starting procesing different next instructions
+  bool diverged = false;
+  for(size_t i = 1; !diverged && i < workgroup.size(); i++)
+    diverged |= (workgroup[0].nextInstruction != workgroup[i].nextInstruction);
+
+  if(!wasDiverged && diverged)
+  {
+    // if we've newly diverged, all workgroups should have the same merge block - the point where we
+    // become uniform again.
+    convergeBlock = workgroup[0].mergeBlock;
+    for(size_t i = 1; !diverged && i < workgroup.size(); i++)
+      RDCASSERT(convergeBlock == workgroup[i].mergeBlock);
+  }
+
+  if(wasDiverged || diverged)
+  {
+    // for every thread, turn it off if it's in the converge block
+    rdcarray<bool> inConverge;
+    inConverge.resize(activeMask.size());
+    for(size_t i = 0; i < workgroup.size(); i++)
+      inConverge[i] = (workgroup[i].curBlock == convergeBlock);
+
+    // is any thread active, but not converged?
+    bool anyActiveNotConverged = false;
+    for(size_t i = 0; i < workgroup.size(); i++)
+      anyActiveNotConverged |= activeMask[i] && !inConverge[i];
+
+    if(anyActiveNotConverged)
+    {
+      // if so, then only non-converged threads are active right now
+      for(size_t i = 0; i < workgroup.size(); i++)
+        activeMask[i] &= !inConverge[i];
+    }
+    else
+    {
+      // otherwise we can leave the active mask as is, forget the convergence point, and allow
+      // everything to run as normal
+      convergeBlock = Id();
+    }
+  }
 }

 void Debugger::AllocateVariable(Id id, Id typeId, DebugVariableType sourceVarType,
@@ -1268,6 +1268,34 @@ void main()
      Color = arr[2].xyzx;
      break;
    }
+    case 13:
+    {
+      Color = vec4(0,0,0,0);
+      uint loopCount = uint(intval - test);
+      loopCount -= (uint(gl_FragCoord.x) % 2u);
+      loopCount -= (uint(gl_FragCoord.y) % 2u) * 2u;
+      vec2 val = uv.xy;
+      for(uint i=0; i < loopCount; i++)
+      {
+        val += vec2(0.01f, 0.01f);
+      }
+      Color = dFdxFine(val).xyxy;
+      break;
+    }
+    case 14:
+    {
+      Color = vec4(0,0,0,0);
+      uint loopCount = uint(intval - test);
+      loopCount += (uint(gl_FragCoord.x) % 2u);
+      loopCount += (uint(gl_FragCoord.y) % 2u) * 2u;
+      vec2 val = uv.xy;
+      for(uint i=0; i < loopCount; i++)
+      {
+        val += vec2(0.01f, 0.01f);
+      }
+      Color = dFdxFine(val).xyxy;
+      break;
+    }
    default: break;
  }
 }