mirror of
https://github.com/baldurk/renderdoc.git
synced 2026-05-29 21:30:53 +00:00
Rework DXIL ThreadState in preparation for Multithreading
Make all members private with Set/Get APIs are required. Mark containers which must be thread safe
This commit is contained in:
@@ -1780,11 +1780,13 @@ void MemoryTracking::ConvertGlobalAllocToLocal(Id allocId)
|
||||
}
|
||||
}
|
||||
|
||||
ThreadState::ThreadState(Debugger &debugger, const GlobalState &globalState, uint32_t maxSSAId)
|
||||
ThreadState::ThreadState(Debugger &debugger, const GlobalState &globalState, uint32_t maxSSAId,
|
||||
uint32_t laneIndex)
|
||||
: m_Debugger(debugger),
|
||||
m_GlobalState(globalState),
|
||||
m_Program(debugger.GetProgram()),
|
||||
m_MaxSSAId(maxSSAId)
|
||||
m_MaxSSAId(maxSSAId),
|
||||
m_WorkgroupIndex(laneIndex)
|
||||
{
|
||||
m_ShaderType = m_Program.GetShaderType();
|
||||
m_Assigned.resize(maxSSAId);
|
||||
@@ -1900,6 +1902,13 @@ void ThreadState::EnterEntryPoint(const Function *function, ShaderDebugState *st
|
||||
m_Memory.ConvertGlobalAllocToLocal(allocId);
|
||||
}
|
||||
|
||||
// active lane : needs it own local backing memory for GSM
|
||||
if(m_State)
|
||||
{
|
||||
for(Id id : m_GlobalState.groupSharedMemoryIds)
|
||||
m_Memory.ConvertGlobalAllocToLocal(id);
|
||||
}
|
||||
|
||||
m_State = NULL;
|
||||
}
|
||||
|
||||
@@ -6844,7 +6853,7 @@ bool ThreadState::IsVariableAssigned(const Id id) const
|
||||
}
|
||||
}
|
||||
|
||||
ShaderVariable ThreadState::GetBuiltin(ShaderBuiltin builtin)
|
||||
ShaderVariable ThreadState::GetBuiltin(ShaderBuiltin builtin) const
|
||||
{
|
||||
auto local = m_Builtins.find(builtin);
|
||||
if(local != m_Builtins.end())
|
||||
@@ -8900,7 +8909,7 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *apiWrapper, uint32_t eve
|
||||
m_GlobalState.constantBlocksDatas = apiWrapper->GetConstantBlocksDatas();
|
||||
|
||||
for(uint32_t i = 0; i < threadsInWorkgroup; i++)
|
||||
m_Workgroup.push_back(ThreadState(*this, m_GlobalState, maxSSAId));
|
||||
m_Workgroup.push_back(ThreadState(*this, m_GlobalState, maxSSAId, i));
|
||||
|
||||
// Get the thread state from the API wrapper
|
||||
const rdcarray<rdcflatmap<ShaderBuiltin, ShaderVariable>> &threadsBuiltins =
|
||||
@@ -8909,8 +8918,8 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *apiWrapper, uint32_t eve
|
||||
|
||||
for(uint32_t i = 0; i < threadsInWorkgroup; i++)
|
||||
{
|
||||
m_Workgroup[i].m_Builtins = threadsBuiltins[i];
|
||||
m_Workgroup[i].m_Input = threadsInputs[i];
|
||||
m_Workgroup[i].SetBuiltins(threadsBuiltins[i]);
|
||||
m_Workgroup[i].SetInput(threadsInputs[i]);
|
||||
}
|
||||
|
||||
ret->sourceVars = apiWrapper->GetSourceVars();
|
||||
@@ -9461,13 +9470,12 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *apiWrapper, uint32_t eve
|
||||
uint32_t countOutputs = (uint32_t)outputs.size();
|
||||
|
||||
// Make fake ShaderVariable struct to hold all the outputs
|
||||
ShaderVariable &outStruct = activeState.m_Output.var;
|
||||
ShaderVariable outStruct;
|
||||
outStruct.name = DXIL_FAKE_OUTPUT_STRUCT_NAME;
|
||||
outStruct.rows = 0;
|
||||
outStruct.columns = 0;
|
||||
outStruct.type = VarType::Struct;
|
||||
outStruct.members.resize(countOutputs);
|
||||
activeState.m_Output.id = outputSSAId;
|
||||
|
||||
for(uint32_t i = 0; i < countOutputs; ++i)
|
||||
{
|
||||
@@ -9597,15 +9605,16 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *apiWrapper, uint32_t eve
|
||||
{
|
||||
// Make a single source variable mapping for the whole output struct
|
||||
SourceVariableMapping outputMapping;
|
||||
outputMapping.name = activeState.m_Output.var.name;
|
||||
outputMapping.name = outStruct.name;
|
||||
outputMapping.type = VarType::Struct;
|
||||
outputMapping.rows = 0;
|
||||
outputMapping.columns = 0;
|
||||
outputMapping.variables.resize(1);
|
||||
outputMapping.variables[0].name = activeState.m_Output.var.name;
|
||||
outputMapping.variables[0].name = outStruct.name;
|
||||
outputMapping.variables[0].type = DebugVariableType::Variable;
|
||||
ret->sourceVars.push_back(outputMapping);
|
||||
}
|
||||
activeState.SetOutput(outputSSAId, outStruct);
|
||||
|
||||
// Global source variable mappings valid for lifetime of the debug session
|
||||
for(const GlobalVariable &gv : m_GlobalState.globals)
|
||||
@@ -9621,8 +9630,7 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *apiWrapper, uint32_t eve
|
||||
ret->sourceVars.push_back(outputMapping);
|
||||
}
|
||||
|
||||
ret->inputs = {activeState.m_Input};
|
||||
ret->inputs.append(activeState.m_Input.members);
|
||||
ret->inputs = {activeState.GetInput()};
|
||||
ret->constantBlocks = m_GlobalState.constantBlocks;
|
||||
ret->readOnlyResources = m_GlobalState.readOnlyResources;
|
||||
ret->readWriteResources = m_GlobalState.readWriteResources;
|
||||
@@ -9632,20 +9640,13 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *apiWrapper, uint32_t eve
|
||||
for(uint32_t i = 0; i < threadsInWorkgroup; i++)
|
||||
{
|
||||
ThreadState &lane = m_Workgroup[i];
|
||||
lane.m_WorkgroupIndex = i;
|
||||
|
||||
if(i != m_ActiveLaneIndex)
|
||||
{
|
||||
lane.m_Variables = activeState.m_Variables;
|
||||
lane.m_Assigned = activeState.m_Assigned;
|
||||
lane.m_Live = activeState.m_Live;
|
||||
lane.m_IsGlobal = activeState.m_IsGlobal;
|
||||
}
|
||||
lane.InitialiseFromActive(activeState);
|
||||
}
|
||||
|
||||
// Add the output struct to the global state
|
||||
if(countOutputs)
|
||||
m_GlobalState.globals.push_back(activeState.m_Output);
|
||||
m_GlobalState.globals.push_back(activeState.GetOutput());
|
||||
|
||||
InitialiseWorkgroup();
|
||||
|
||||
@@ -9679,16 +9680,16 @@ void Debugger::InitialiseWorkgroup()
|
||||
|
||||
if(m_Stage == ShaderStage::Pixel)
|
||||
{
|
||||
lane.m_Helper = workgroupProperties[i][ThreadProperty::Helper] != 0;
|
||||
lane.m_QuadLaneIndex = workgroupProperties[i][ThreadProperty::QuadLane];
|
||||
lane.m_QuadId = workgroupProperties[i][ThreadProperty::QuadId];
|
||||
lane.SetHelper(workgroupProperties[i][ThreadProperty::Helper] != 0);
|
||||
lane.SetQuadLaneIndex(workgroupProperties[i][ThreadProperty::QuadLane]);
|
||||
lane.SetQuadId(workgroupProperties[i][ThreadProperty::QuadId]);
|
||||
}
|
||||
|
||||
lane.m_Dead = workgroupProperties[i][ThreadProperty::Active] == 0;
|
||||
lane.m_SubgroupIdx = workgroupProperties[i][ThreadProperty::SubgroupIdx];
|
||||
lane.SetDead(workgroupProperties[i][ThreadProperty::Active] == 0);
|
||||
lane.SetSubgroupIdx(workgroupProperties[i][ThreadProperty::SubgroupIdx]);
|
||||
|
||||
// Only add active lanes to control flow
|
||||
if(!lane.m_Dead)
|
||||
if(!lane.IsDead())
|
||||
threadIds.push_back(i);
|
||||
}
|
||||
|
||||
@@ -9699,7 +9700,7 @@ void Debugger::InitialiseWorkgroup()
|
||||
rdcarray<uint32_t> processedQuads;
|
||||
for(uint32_t i = 0; i < threadsInWorkgroup; i++)
|
||||
{
|
||||
uint32_t desiredQuad = m_Workgroup[i].m_QuadId;
|
||||
uint32_t desiredQuad = m_Workgroup[i].GetQuadId();
|
||||
|
||||
// ignore threads not in any quad
|
||||
if(desiredQuad == 0)
|
||||
@@ -9721,14 +9722,14 @@ void Debugger::InitialiseWorkgroup()
|
||||
};
|
||||
for(uint32_t j = i + 1, t = 1; j < threadsInWorkgroup && t < 4; j++)
|
||||
{
|
||||
if(m_Workgroup[j].m_QuadId == desiredQuad)
|
||||
if(m_Workgroup[j].GetQuadId() == desiredQuad)
|
||||
threads[t++] = j;
|
||||
}
|
||||
|
||||
// now swizzle the threads to know each other
|
||||
for(uint32_t src = 0; src < 4; src++)
|
||||
{
|
||||
uint32_t lane = m_Workgroup[threads[src]].m_QuadLaneIndex;
|
||||
uint32_t lane = m_Workgroup[threads[src]].GetQuadLaneIndex();
|
||||
|
||||
if(lane >= 4)
|
||||
continue;
|
||||
@@ -9738,7 +9739,7 @@ void Debugger::InitialiseWorkgroup()
|
||||
if(threads[dst] == ~0U)
|
||||
continue;
|
||||
|
||||
m_Workgroup[threads[dst]].m_QuadNeighbours[lane] = threads[src];
|
||||
m_Workgroup[threads[dst]].SetQuadNeighbours(lane, threads[src]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -9765,7 +9766,7 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
|
||||
{
|
||||
thread.EnterEntryPoint(m_EntryPointFunction, &initial);
|
||||
thread.FillCallstack(initial);
|
||||
initial.nextInstruction = thread.m_ActiveGlobalInstructionIdx;
|
||||
initial.nextInstruction = thread.GetActiveGlobalInstructionIdx();
|
||||
startPoint = initial.nextInstruction;
|
||||
}
|
||||
else
|
||||
@@ -9774,10 +9775,6 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
|
||||
}
|
||||
}
|
||||
|
||||
// active lane : needs it own local backing memory, copied from global at the start
|
||||
for(Id id : m_GlobalState.groupSharedMemoryIds)
|
||||
active.m_Memory.ConvertGlobalAllocToLocal(id);
|
||||
|
||||
// globals won't be filled out by entering the entry point, ensure their change is registered.
|
||||
for(const GlobalVariable &gv : m_GlobalState.globals)
|
||||
initial.changes.push_back({ShaderVariable(), gv.var});
|
||||
@@ -9887,9 +9884,9 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
|
||||
thread.StepNext(NULL, m_ApiWrapper, m_Workgroup, activeMask);
|
||||
}
|
||||
|
||||
threadExecutionStates[threadId] = thread.m_EnteredPoints;
|
||||
threadExecutionStates[threadId] = thread.GetEnteredPoints();
|
||||
|
||||
uint32_t threadConvergencePoint = thread.m_ConvergencePoint;
|
||||
const uint32_t threadConvergencePoint = thread.GetConvergencePoint();
|
||||
// the thread activated a new convergence point
|
||||
if(threadConvergencePoint != INVALID_EXECUTION_POINT)
|
||||
{
|
||||
@@ -9905,11 +9902,12 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
|
||||
}
|
||||
++countConvergePointThreads;
|
||||
}
|
||||
if(!thread.m_PartialConvergencePoints.empty())
|
||||
const DXIL::BlockArray *partialConvergentPoints = thread.GetPartialConvergencePoints();
|
||||
if(!partialConvergentPoints->empty())
|
||||
{
|
||||
if(newPartialConvergentPoints == NULL)
|
||||
{
|
||||
newPartialConvergentPoints = &thread.m_PartialConvergencePoints;
|
||||
newPartialConvergentPoints = partialConvergentPoints;
|
||||
RDCASSERT(newPartialConvergentPoints);
|
||||
if(newPartialConvergentPoints)
|
||||
RDCASSERT(!newPartialConvergentPoints->empty());
|
||||
@@ -9917,7 +9915,7 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
|
||||
else
|
||||
{
|
||||
// All the threads in the tangle should set the same partial convergence points
|
||||
RDCASSERT(*newPartialConvergentPoints == thread.m_PartialConvergencePoints);
|
||||
RDCASSERT(*newPartialConvergentPoints == *partialConvergentPoints);
|
||||
}
|
||||
++countPartialConvergePointThreads;
|
||||
}
|
||||
@@ -9925,7 +9923,7 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
|
||||
if(thread.Finished())
|
||||
tangle.SetThreadDead(threadId);
|
||||
|
||||
if(thread.m_Diverged)
|
||||
if(thread.GetDiverged())
|
||||
++countDivergedThreads;
|
||||
}
|
||||
for(size_t lane = 0; lane < m_Workgroup.size(); lane++)
|
||||
@@ -9937,7 +9935,7 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
|
||||
if(hasDebugState)
|
||||
{
|
||||
ThreadState &thread = m_Workgroup[m_ActiveLaneIndex];
|
||||
state.nextInstruction = thread.m_ActiveGlobalInstructionIdx;
|
||||
state.nextInstruction = thread.GetActiveGlobalInstructionIdx();
|
||||
thread.FillCallstack(state);
|
||||
ret.push_back(std::move(state));
|
||||
}
|
||||
@@ -9972,7 +9970,7 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
|
||||
}
|
||||
if(!anyActiveThreads)
|
||||
{
|
||||
active.m_Dead = true;
|
||||
active.SetDead(true);
|
||||
m_ControlFlow.UpdateState(threadExecutionStates);
|
||||
RDCERR("No active threads in any tangle, killing active thread to terminate the debugger");
|
||||
}
|
||||
|
||||
@@ -327,16 +327,59 @@ struct MemoryTracking
|
||||
|
||||
struct ThreadState
|
||||
{
|
||||
ThreadState(Debugger &debugger, const GlobalState &globalState, uint32_t maxSSAId);
|
||||
ThreadState(Debugger &debugger, const GlobalState &globalState, uint32_t maxSSAId,
|
||||
uint32_t laneIndex);
|
||||
~ThreadState();
|
||||
|
||||
void EnterFunction(const DXIL::Function *function, const rdcarray<DXIL::Value *> &args);
|
||||
void EnterEntryPoint(const DXIL::Function *function, ShaderDebugState *state);
|
||||
void StepNext(ShaderDebugState *state, DebugAPIWrapper *apiWrapper,
|
||||
const rdcarray<ThreadState> &workgroup, const rdcarray<bool> &activeMask);
|
||||
void StepOverNopInstructions();
|
||||
void FillCallstack(ShaderDebugState &state);
|
||||
void RetireLiveIDs();
|
||||
|
||||
bool Finished() const;
|
||||
const ShaderVariable &GetInput() const { return m_Input; }
|
||||
const GlobalVariable &GetOutput() const { return m_Output; }
|
||||
bool IsDead() const { return m_Dead; }
|
||||
uint32_t GetQuadId() const { return m_QuadId; }
|
||||
uint32_t GetQuadLaneIndex() const { return m_QuadLaneIndex; }
|
||||
uint32_t GetActiveGlobalInstructionIdx() const { return m_ActiveGlobalInstructionIdx; }
|
||||
DXIL::BlockArray GetEnteredPoints() const { return m_EnteredPoints; }
|
||||
uint32_t GetConvergencePoint() const { return m_ConvergencePoint; }
|
||||
bool GetDiverged() const { return m_Diverged; }
|
||||
const DXIL::BlockArray *GetPartialConvergencePoints() const
|
||||
{
|
||||
return &m_PartialConvergencePoints;
|
||||
}
|
||||
|
||||
void SetBuiltins(const BuiltinInputs &builtins) { m_Builtins = builtins; }
|
||||
void SetInput(const ShaderVariable &input) { m_Input = input; }
|
||||
void SetOutput(const Id id, const ShaderVariable &var)
|
||||
{
|
||||
m_Output.id = id;
|
||||
m_Output.var = var;
|
||||
}
|
||||
void SetDead(bool dead) { m_Dead = dead; }
|
||||
void SetHelper(bool helper) { m_Helper = helper; }
|
||||
void SetQuadLaneIndex(uint32_t quadLaneIndex) { m_QuadLaneIndex = quadLaneIndex; }
|
||||
void SetQuadId(uint32_t quadId) { m_QuadId = quadId; }
|
||||
void SetSubgroupIdx(uint32_t subgroupIdx) { m_SubgroupIdx = subgroupIdx; }
|
||||
void SetQuadNeighbours(uint32_t lane, uint32_t index) { m_QuadNeighbours[lane] = index; }
|
||||
|
||||
void InitialiseFromActive(const ThreadState &active)
|
||||
{
|
||||
m_Variables = active.m_Variables;
|
||||
m_Assigned = active.m_Assigned;
|
||||
m_Live = active.m_Live;
|
||||
m_IsGlobal = active.m_IsGlobal;
|
||||
}
|
||||
|
||||
void UpdateBackingMemoryFromVariable(void *ptr, uint64_t &allocSize, const ShaderVariable &var);
|
||||
|
||||
private:
|
||||
void EnterFunction(const DXIL::Function *function, const rdcarray<DXIL::Value *> &args);
|
||||
|
||||
bool InUniformBlock() const;
|
||||
|
||||
bool JumpToBlock(const DXIL::Block *target, bool divergencePoint);
|
||||
@@ -349,8 +392,6 @@ struct ThreadState
|
||||
rdcstr GetArgumentName(uint32_t i) const;
|
||||
Id GetArgumentId(uint32_t i) const;
|
||||
ResourceReferenceInfo GetResource(Id handleId, bool &annotatedHandle);
|
||||
void FillCallstack(ShaderDebugState &state);
|
||||
void RetireLiveIDs();
|
||||
|
||||
bool GetShaderVariable(const DXIL::Value *dxilValue, DXIL::Operation op, DXIL::DXOp dxOpCode,
|
||||
ShaderVariable &var, bool flushDenormInput = true) const
|
||||
@@ -369,7 +410,6 @@ struct ThreadState
|
||||
bool GetPhiVariable(const Id &id, DXIL::Operation opCode, DXIL::DXOp dxOpCode,
|
||||
ShaderVariable &var) const;
|
||||
bool GetVariableHelper(DXIL::Operation op, DXIL::DXOp dxOpCode, ShaderVariable &var) const;
|
||||
void UpdateBackingMemoryFromVariable(void *ptr, uint64_t &allocSize, const ShaderVariable &var);
|
||||
void UpdateMemoryVariableFromBackingMemory(Id memoryId, const void *ptr);
|
||||
void UpdateGlobalBackingMemory(Id ptrId, const MemoryTracking::Pointer &ptr,
|
||||
const MemoryTracking::Allocation &allocation,
|
||||
@@ -397,7 +437,7 @@ struct ThreadState
|
||||
ShaderVariable &var, bool flushDenormInput, bool isLive) const;
|
||||
bool IsVariableAssigned(const Id id) const;
|
||||
|
||||
ShaderVariable GetBuiltin(ShaderBuiltin builtin);
|
||||
ShaderVariable GetBuiltin(ShaderBuiltin builtin) const;
|
||||
uint32_t GetSubgroupActiveLanes(const rdcarray<bool> &activeMask,
|
||||
const rdcarray<ThreadState> &workgroup,
|
||||
rdcarray<uint32_t> &activeLanes) const;
|
||||
@@ -421,7 +461,7 @@ struct ThreadState
|
||||
ShaderVariable m_Input;
|
||||
GlobalVariable m_Output;
|
||||
|
||||
// Known SSA ShaderVariables
|
||||
// Known SSA ShaderVariables : this must be a thread safe container
|
||||
rdcarray<ShaderVariable> m_Variables;
|
||||
// SSA Variables captured when a branch happens for use in phi nodes
|
||||
std::map<Id, ShaderVariable> m_PhiVariables;
|
||||
@@ -429,7 +469,7 @@ struct ThreadState
|
||||
rdcarray<bool> m_Live;
|
||||
// Globals variables at the current scope
|
||||
rdcarray<bool> m_IsGlobal;
|
||||
// If the variable has been assigned a value
|
||||
// If the variable has been assigned a value : this must be a thread safe container
|
||||
rdcarray<bool> m_Assigned;
|
||||
// Annotated handle properties
|
||||
std::map<Id, AnnotationProperties> m_AnnotatedProperties;
|
||||
|
||||
Reference in New Issue
Block a user