mirror of
https://github.com/baldurk/renderdoc.git
synced 2026-05-06 01:50:38 +00:00
Change SPIRV debugger control flow to emulate maximal reconvergence
This commit is contained in:
@@ -173,6 +173,11 @@ ThreadState::~ThreadState()
|
||||
callstack.clear();
|
||||
}
|
||||
|
||||
void ThreadState::SetConvergencePoint(Id block)
|
||||
{
|
||||
convergenceInstruction = debugger.GetInstructionForLabel(block);
|
||||
}
|
||||
|
||||
bool ThreadState::Finished() const
|
||||
{
|
||||
return dead || callstack.empty();
|
||||
@@ -618,7 +623,11 @@ void ThreadState::JumpToLabel(Id target)
|
||||
frame->lastBlock = frame->curBlock;
|
||||
frame->curBlock = target;
|
||||
|
||||
nextInstruction = debugger.GetInstructionForLabel(target) + 1;
|
||||
diverged = true;
|
||||
|
||||
uint32_t labelInstruction = debugger.GetInstructionForLabel(target);
|
||||
enteredPoints.push_back(labelInstruction);
|
||||
nextInstruction = labelInstruction + 1;
|
||||
|
||||
// if jumping to an empty unconditional loop header, continue to the loop block
|
||||
Iter it = debugger.GetIterForInstruction(nextInstruction);
|
||||
@@ -627,6 +636,7 @@ void ThreadState::JumpToLabel(Id target)
|
||||
OpLoopMerge merge(it);
|
||||
|
||||
mergeBlock = merge.mergeBlock;
|
||||
SetConvergencePoint(merge.mergeBlock);
|
||||
|
||||
it++;
|
||||
if(it.opcode() == Op::Branch)
|
||||
@@ -699,6 +709,7 @@ void ThreadState::SkipIgnoredInstructions()
|
||||
OpSelectionMerge merge(it);
|
||||
|
||||
mergeBlock = merge.mergeBlock;
|
||||
SetConvergencePoint(merge.mergeBlock);
|
||||
|
||||
nextInstruction++;
|
||||
continue;
|
||||
@@ -709,6 +720,7 @@ void ThreadState::SkipIgnoredInstructions()
|
||||
OpLoopMerge merge(it);
|
||||
|
||||
mergeBlock = merge.mergeBlock;
|
||||
SetConvergencePoint(merge.mergeBlock);
|
||||
|
||||
nextInstruction++;
|
||||
continue;
|
||||
@@ -734,6 +746,10 @@ void ThreadState::StepNext(ShaderDebugState *state, const rdcarray<ThreadState>
|
||||
|
||||
Iter it = debugger.GetIterForInstruction(nextInstruction);
|
||||
nextInstruction++;
|
||||
diverged = false;
|
||||
enteredPoints.clear();
|
||||
convergenceInstruction = INVALID_EXECUTION_POINT;
|
||||
functionReturnPoint = INVALID_EXECUTION_POINT;
|
||||
|
||||
OpDecoder opdata(it);
|
||||
|
||||
@@ -3879,6 +3895,8 @@ void ThreadState::StepNext(ShaderDebugState *state, const rdcarray<ThreadState>
|
||||
// function. The second time we do have a return value so we process it and continue
|
||||
if(returnValue.name.empty())
|
||||
{
|
||||
// The instruction after a function call is defined to be a convergence point
|
||||
functionReturnPoint = nextInstruction;
|
||||
uint32_t returnInstruction = nextInstruction - 1;
|
||||
nextInstruction = debugger.GetInstructionForFunction(call.function);
|
||||
|
||||
@@ -3891,6 +3909,8 @@ void ThreadState::StepNext(ShaderDebugState *state, const rdcarray<ThreadState>
|
||||
{
|
||||
SetDst(call.result, returnValue);
|
||||
returnValue.name.clear();
|
||||
// The instruction after a function call is defined to be a convergence point, mark that we entered it
|
||||
enteredPoints.push_back(nextInstruction);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#include "api/replay/rdcarray.h"
|
||||
#include "maths/vec.h"
|
||||
#include "spirv_common.h"
|
||||
#include "spirv_controlflow.h"
|
||||
#include "spirv_processor.h"
|
||||
|
||||
struct SPIRVInterfaceAccess;
|
||||
@@ -225,12 +226,19 @@ struct ThreadState
|
||||
|
||||
// the id of the merge block that the last branch targetted
|
||||
Id mergeBlock;
|
||||
uint32_t convergenceInstruction;
|
||||
uint32_t functionReturnPoint;
|
||||
ShaderVariable returnValue;
|
||||
rdcarray<StackFrame *> callstack;
|
||||
|
||||
// the list of IDs that are currently valid and live
|
||||
rdcarray<Id> live;
|
||||
|
||||
// true if executed an operation which could trigger divergence
|
||||
bool diverged;
|
||||
// list of potential convergence points that were entered in a single step (used for tracking thread convergence)
|
||||
rdcarray<uint32_t> enteredPoints;
|
||||
|
||||
std::map<Id, uint32_t> lastWrite;
|
||||
|
||||
// quad ID (arbitrary, just used to find neighbours for derivatives)
|
||||
@@ -259,6 +267,7 @@ private:
|
||||
bool ReferencePointer(Id id);
|
||||
|
||||
void SkipIgnoredInstructions();
|
||||
void SetConvergencePoint(Id block);
|
||||
|
||||
ShaderDebugState *m_State = NULL;
|
||||
};
|
||||
@@ -498,7 +507,6 @@ private:
|
||||
|
||||
std::set<rdcstr> usedNames;
|
||||
std::map<Id, rdcstr> dynamicNames;
|
||||
void CalcActiveMask(rdcarray<bool> &activeMask);
|
||||
|
||||
struct
|
||||
{
|
||||
@@ -528,6 +536,8 @@ private:
|
||||
rdcarray<LocalMapping> activeLocalMappings;
|
||||
} m_DebugInfo;
|
||||
|
||||
rdcspv::ControlFlow controlFlow;
|
||||
|
||||
const ScopeData *GetScope(size_t offset) const;
|
||||
};
|
||||
|
||||
|
||||
@@ -1561,6 +1561,7 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *api, const ShaderStage s
|
||||
|
||||
std::sort(liveGlobals.begin(), liveGlobals.end());
|
||||
|
||||
rdcarray<rdcspv::ThreadIndex> threadIds;
|
||||
for(uint32_t i = 0; i < threadsInWorkgroup; i++)
|
||||
{
|
||||
ThreadState &lane = workgroup[i];
|
||||
@@ -1589,8 +1590,14 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *api, const ShaderStage s
|
||||
// now that the globals are allocated and their storage won't move, we can take pointers to them
|
||||
for(const PointerId &p : pointerIDs)
|
||||
p.Set(*this, global, lane);
|
||||
|
||||
// Only add active lanes to control flow
|
||||
if(!lane.dead)
|
||||
threadIds.push_back(i);
|
||||
}
|
||||
|
||||
controlFlow.Construct(threadIds);
|
||||
|
||||
// find quad neighbours
|
||||
{
|
||||
rdcarray<uint32_t> processedQuads;
|
||||
@@ -2462,6 +2469,7 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
|
||||
if(steps == 0)
|
||||
{
|
||||
ShaderDebugState initial;
|
||||
uint32_t startBlock = INVALID_EXECUTION_POINT;
|
||||
|
||||
// we should be sitting at the entry point function prologue, step forward into the first block
|
||||
// and past any function-local variable declarations
|
||||
@@ -2474,6 +2482,7 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
|
||||
thread.EnterEntryPoint(&initial);
|
||||
FillCallstack(thread, initial);
|
||||
initial.nextInstruction = thread.nextInstruction;
|
||||
startBlock = thread.callstack.back()->curBlock.value();
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -2495,6 +2504,21 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
|
||||
|
||||
ret.push_back(std::move(initial));
|
||||
|
||||
// Set the initial block for the threads in the root tangle
|
||||
ThreadExecutionStates threadExecutionStates;
|
||||
TangleGroup &tangles = controlFlow.GetTangles();
|
||||
RDCASSERTEQUAL(tangles.size(), 1);
|
||||
RDCASSERTNOTEQUAL(startBlock, INVALID_EXECUTION_POINT);
|
||||
for(Tangle &tangle : tangles)
|
||||
{
|
||||
RDCASSERT(tangle.IsAliveActive());
|
||||
for(uint32_t threadIdx = 0; threadIdx < workgroup.size(); ++threadIdx)
|
||||
{
|
||||
if(!workgroup[threadIdx].Finished())
|
||||
threadExecutionStates[threadIdx].push_back(startBlock);
|
||||
}
|
||||
}
|
||||
controlFlow.UpdateState(threadExecutionStates);
|
||||
steps++;
|
||||
}
|
||||
|
||||
@@ -2513,21 +2537,60 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
|
||||
if(active.Finished())
|
||||
break;
|
||||
|
||||
// calculate the current mask of which threads are active
|
||||
CalcActiveMask(activeMask);
|
||||
// Execute the threads in each active tangle
|
||||
ThreadExecutionStates threadExecutionStates;
|
||||
TangleGroup &tangles = controlFlow.GetTangles();
|
||||
|
||||
// step all active members of the workgroup
|
||||
for(size_t lane = 0; lane < workgroup.size(); lane++)
|
||||
bool anyActiveThreads = false;
|
||||
for(Tangle &tangle : tangles)
|
||||
{
|
||||
ThreadState &thread = workgroup[lane];
|
||||
if(!tangle.IsAliveActive())
|
||||
continue;
|
||||
|
||||
if(activeMask[lane])
|
||||
rdcarray<rdcspv::ThreadReference> threadRefs = tangle.GetThreadRefs();
|
||||
// calculate the current active thread mask from the threads in the tangle
|
||||
{
|
||||
if(thread.nextInstruction >= instructionOffsets.size())
|
||||
// one bool per workgroup thread
|
||||
activeMask.resize(workgroup.size());
|
||||
|
||||
// start with all threads as inactive
|
||||
for(size_t i = 0; i < workgroup.size(); i++)
|
||||
activeMask[i] = false;
|
||||
|
||||
// activate the threads in the tangle
|
||||
for(const rdcspv::ThreadReference &ref : threadRefs)
|
||||
{
|
||||
uint32_t idx = ref.id;
|
||||
RDCASSERT(idx < workgroup.size(), idx, workgroup.size());
|
||||
RDCASSERT(!workgroup[idx].Finished());
|
||||
activeMask[idx] = true;
|
||||
anyActiveThreads = true;
|
||||
}
|
||||
}
|
||||
|
||||
ExecutionPoint newConvergeInstruction = INVALID_EXECUTION_POINT;
|
||||
ExecutionPoint newFunctionReturnPoint = INVALID_EXECUTION_POINT;
|
||||
uint32_t countActiveThreads = 0;
|
||||
uint32_t countDivergedThreads = 0;
|
||||
uint32_t countConvergePointThreads = 0;
|
||||
uint32_t countFunctionReturnThreads = 0;
|
||||
|
||||
// step all active members of the workgroup
|
||||
for(size_t lane = 0; lane < workgroup.size(); lane++)
|
||||
{
|
||||
if(!activeMask[lane])
|
||||
continue;
|
||||
++countActiveThreads;
|
||||
|
||||
ThreadState &thread = workgroup[lane];
|
||||
const uint32_t currentPC = thread.nextInstruction;
|
||||
const uint32_t threadId = lane;
|
||||
if(currentPC >= instructionOffsets.size())
|
||||
{
|
||||
if(lane == activeLaneIndex)
|
||||
ret.emplace_back();
|
||||
|
||||
tangle.SetThreadDead(threadId);
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -2535,7 +2598,7 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
|
||||
{
|
||||
ShaderDebugState state;
|
||||
|
||||
size_t instOffs = instructionOffsets[thread.nextInstruction];
|
||||
size_t instOffs = instructionOffsets[currentPC];
|
||||
|
||||
// see if we're retiring any IDs at this state
|
||||
for(size_t l = 0; l < thread.live.size();)
|
||||
@@ -2574,7 +2637,7 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
|
||||
|
||||
if(m_DebugInfo.valid)
|
||||
{
|
||||
size_t endOffs = instructionOffsets[thread.nextInstruction - 1];
|
||||
size_t endOffs = instructionOffsets[currentPC - 1];
|
||||
|
||||
// append any inlined functions to the top of the stack
|
||||
InlineData *inlined = m_DebugInfo.lineInline[endOffs];
|
||||
@@ -2622,8 +2685,73 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
|
||||
{
|
||||
thread.StepNext(NULL, workgroup, activeMask);
|
||||
}
|
||||
threadExecutionStates[threadId] = thread.enteredPoints;
|
||||
|
||||
uint32_t threadConvergeInstruction = thread.convergenceInstruction;
|
||||
// the thread activated a new convergence point
|
||||
if(threadConvergeInstruction != INVALID_EXECUTION_POINT)
|
||||
{
|
||||
if(newConvergeInstruction == INVALID_EXECUTION_POINT)
|
||||
{
|
||||
newConvergeInstruction = threadConvergeInstruction;
|
||||
RDCASSERTNOTEQUAL(newConvergeInstruction, INVALID_EXECUTION_POINT);
|
||||
}
|
||||
else
|
||||
{
|
||||
// All the threads in the tangle should set the same convergence point
|
||||
RDCASSERTEQUAL(threadConvergeInstruction, newConvergeInstruction);
|
||||
}
|
||||
++countConvergePointThreads;
|
||||
}
|
||||
uint32_t threadFunctionReturnPoint = thread.functionReturnPoint;
|
||||
// the thread activated a new function return point
|
||||
if(threadFunctionReturnPoint != INVALID_EXECUTION_POINT)
|
||||
{
|
||||
if(newFunctionReturnPoint == INVALID_EXECUTION_POINT)
|
||||
{
|
||||
newFunctionReturnPoint = threadFunctionReturnPoint;
|
||||
RDCASSERTNOTEQUAL(newFunctionReturnPoint, INVALID_EXECUTION_POINT);
|
||||
}
|
||||
else
|
||||
{
|
||||
// All the threads in the tangle should set the same function return point
|
||||
RDCASSERTEQUAL(threadFunctionReturnPoint, newFunctionReturnPoint);
|
||||
}
|
||||
++countFunctionReturnThreads;
|
||||
}
|
||||
|
||||
if(thread.Finished())
|
||||
tangle.SetThreadDead(threadId);
|
||||
|
||||
if(thread.diverged)
|
||||
++countDivergedThreads;
|
||||
}
|
||||
if(countConvergePointThreads)
|
||||
{
|
||||
// all the active threads should have a convergence point if any have one
|
||||
RDCASSERTEQUAL(countConvergePointThreads, countActiveThreads);
|
||||
tangle.AddMergePoint(newConvergeInstruction);
|
||||
}
|
||||
if(countFunctionReturnThreads)
|
||||
{
|
||||
// all the active threads should have a function return point if any have one
|
||||
RDCASSERTEQUAL(countFunctionReturnThreads, countActiveThreads);
|
||||
tangle.AddFunctionReturnPoint(newFunctionReturnPoint);
|
||||
}
|
||||
if(countDivergedThreads)
|
||||
{
|
||||
// all the active threads should have diverged if any diverges
|
||||
RDCASSERTEQUAL(countDivergedThreads, countActiveThreads);
|
||||
tangle.SetDiverged(true);
|
||||
}
|
||||
}
|
||||
if(!anyActiveThreads)
|
||||
{
|
||||
active.dead = true;
|
||||
controlFlow.UpdateState(threadExecutionStates);
|
||||
RDCERR("No active threads in any tangle, killing active thread to terminate the debugger");
|
||||
}
|
||||
controlFlow.UpdateState(threadExecutionStates);
|
||||
}
|
||||
|
||||
return ret;
|
||||
@@ -3426,78 +3554,6 @@ rdcstr Debugger::GetHumanName(Id id)
|
||||
return name;
|
||||
}
|
||||
|
||||
void Debugger::CalcActiveMask(rdcarray<bool> &activeMask)
|
||||
{
|
||||
// one bool per workgroup thread
|
||||
activeMask.resize(workgroup.size());
|
||||
|
||||
// mark any threads that have finished as inactive, otherwise they're active
|
||||
for(size_t i = 0; i < workgroup.size(); i++)
|
||||
activeMask[i] = !workgroup[i].Finished();
|
||||
|
||||
// otherwise we need to make sure that control flow which converges stays in lockstep so that
|
||||
// derivatives etc are still valid. While diverged, we don't have to keep threads in lockstep
|
||||
// since using derivatives is invalid.
|
||||
//
|
||||
// We take advantage of SPIR-V's structured control flow. We only ever diverge at a branch
|
||||
// instruction, and the preceeding OpLoopMerge/OpSelectionMerge.
|
||||
//
|
||||
// So the scheme is as follows:
|
||||
// * If we haven't diverged and all threads have the same nextInstruction, we're still uniform so
|
||||
// continue in lockstep.
|
||||
// * As soon as they differ, we've diverged. Check the last mergeBlock that was specified - we
|
||||
// won't be uniform again until all threads reach that block.
|
||||
// * Once we've diverged, any threads which are NOT in the merge block are active, and any threads
|
||||
// which are in it are inactive. This causes them to pause and wait for others to catch up
|
||||
// until the point where all threads are in the merge block at which point we've converged and
|
||||
// can go back to uniformity.
|
||||
|
||||
// if we're waiting on a converge block to be reached, we've diverged previously.
|
||||
bool wasDiverged = convergeBlock != Id();
|
||||
|
||||
// see if we've diverged by starting procesing different next instructions
|
||||
bool diverged = false;
|
||||
for(size_t i = 1; !diverged && i < workgroup.size(); i++)
|
||||
diverged |= (workgroup[0].nextInstruction != workgroup[i].nextInstruction);
|
||||
|
||||
if(!wasDiverged && diverged)
|
||||
{
|
||||
// if we've newly diverged, all workgroups should have the same merge block - the point where we
|
||||
// become uniform again.
|
||||
convergeBlock = workgroup[0].mergeBlock;
|
||||
for(size_t i = 1; i < workgroup.size(); i++)
|
||||
RDCASSERT(!activeMask[i] || convergeBlock == workgroup[i].mergeBlock);
|
||||
}
|
||||
|
||||
if(wasDiverged || diverged)
|
||||
{
|
||||
// for every thread, turn it off if it's in the converge block
|
||||
rdcarray<bool> inConverge;
|
||||
inConverge.resize(activeMask.size());
|
||||
for(size_t i = 0; i < workgroup.size(); i++)
|
||||
inConverge[i] = (!workgroup[i].callstack.empty() &&
|
||||
workgroup[i].callstack.back()->curBlock == convergeBlock);
|
||||
|
||||
// is any thread active, but not converged?
|
||||
bool anyActiveNotConverged = false;
|
||||
for(size_t i = 0; i < workgroup.size(); i++)
|
||||
anyActiveNotConverged |= activeMask[i] && !inConverge[i];
|
||||
|
||||
if(anyActiveNotConverged)
|
||||
{
|
||||
// if so, then only non-converged threads are active right now
|
||||
for(size_t i = 0; i < workgroup.size(); i++)
|
||||
activeMask[i] &= !inConverge[i];
|
||||
}
|
||||
else
|
||||
{
|
||||
// otherwise we can leave the active mask as is, forget the convergence point, and allow
|
||||
// everything to run as normal
|
||||
convergeBlock = Id();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Debugger::AllocateVariable(Id id, Id typeId, ShaderVariable &outVar)
|
||||
{
|
||||
// allocs should always be pointers
|
||||
|
||||
Reference in New Issue
Block a user