From d7169cf8b0830214a454e8020c4f0eb31d4dded6 Mon Sep 17 00:00:00 2001 From: Jake Turner Date: Fri, 3 Oct 2025 17:49:49 +0100 Subject: [PATCH] Rework DXIL ThreadState in preparation for Multithreading Make all members private with Set/Get APIs are required. Mark containers which must be thread safe --- renderdoc/driver/shaders/dxil/dxil_debug.cpp | 86 ++++++++++---------- renderdoc/driver/shaders/dxil/dxil_debug.h | 56 +++++++++++-- 2 files changed, 90 insertions(+), 52 deletions(-) diff --git a/renderdoc/driver/shaders/dxil/dxil_debug.cpp b/renderdoc/driver/shaders/dxil/dxil_debug.cpp index 2756a9ee8..efaa3fa24 100644 --- a/renderdoc/driver/shaders/dxil/dxil_debug.cpp +++ b/renderdoc/driver/shaders/dxil/dxil_debug.cpp @@ -1780,11 +1780,13 @@ void MemoryTracking::ConvertGlobalAllocToLocal(Id allocId) } } -ThreadState::ThreadState(Debugger &debugger, const GlobalState &globalState, uint32_t maxSSAId) +ThreadState::ThreadState(Debugger &debugger, const GlobalState &globalState, uint32_t maxSSAId, + uint32_t laneIndex) : m_Debugger(debugger), m_GlobalState(globalState), m_Program(debugger.GetProgram()), - m_MaxSSAId(maxSSAId) + m_MaxSSAId(maxSSAId), + m_WorkgroupIndex(laneIndex) { m_ShaderType = m_Program.GetShaderType(); m_Assigned.resize(maxSSAId); @@ -1900,6 +1902,13 @@ void ThreadState::EnterEntryPoint(const Function *function, ShaderDebugState *st m_Memory.ConvertGlobalAllocToLocal(allocId); } + // active lane : needs it own local backing memory for GSM + if(m_State) + { + for(Id id : m_GlobalState.groupSharedMemoryIds) + m_Memory.ConvertGlobalAllocToLocal(id); + } + m_State = NULL; } @@ -6844,7 +6853,7 @@ bool ThreadState::IsVariableAssigned(const Id id) const } } -ShaderVariable ThreadState::GetBuiltin(ShaderBuiltin builtin) +ShaderVariable ThreadState::GetBuiltin(ShaderBuiltin builtin) const { auto local = m_Builtins.find(builtin); if(local != m_Builtins.end()) @@ -8900,7 +8909,7 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *apiWrapper, uint32_t eve m_GlobalState.constantBlocksDatas = apiWrapper->GetConstantBlocksDatas(); for(uint32_t i = 0; i < threadsInWorkgroup; i++) - m_Workgroup.push_back(ThreadState(*this, m_GlobalState, maxSSAId)); + m_Workgroup.push_back(ThreadState(*this, m_GlobalState, maxSSAId, i)); // Get the thread state from the API wrapper const rdcarray> &threadsBuiltins = @@ -8909,8 +8918,8 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *apiWrapper, uint32_t eve for(uint32_t i = 0; i < threadsInWorkgroup; i++) { - m_Workgroup[i].m_Builtins = threadsBuiltins[i]; - m_Workgroup[i].m_Input = threadsInputs[i]; + m_Workgroup[i].SetBuiltins(threadsBuiltins[i]); + m_Workgroup[i].SetInput(threadsInputs[i]); } ret->sourceVars = apiWrapper->GetSourceVars(); @@ -9461,13 +9470,12 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *apiWrapper, uint32_t eve uint32_t countOutputs = (uint32_t)outputs.size(); // Make fake ShaderVariable struct to hold all the outputs - ShaderVariable &outStruct = activeState.m_Output.var; + ShaderVariable outStruct; outStruct.name = DXIL_FAKE_OUTPUT_STRUCT_NAME; outStruct.rows = 0; outStruct.columns = 0; outStruct.type = VarType::Struct; outStruct.members.resize(countOutputs); - activeState.m_Output.id = outputSSAId; for(uint32_t i = 0; i < countOutputs; ++i) { @@ -9597,15 +9605,16 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *apiWrapper, uint32_t eve { // Make a single source variable mapping for the whole output struct SourceVariableMapping outputMapping; - outputMapping.name = activeState.m_Output.var.name; + outputMapping.name = outStruct.name; outputMapping.type = VarType::Struct; outputMapping.rows = 0; outputMapping.columns = 0; outputMapping.variables.resize(1); - outputMapping.variables[0].name = activeState.m_Output.var.name; + outputMapping.variables[0].name = outStruct.name; outputMapping.variables[0].type = DebugVariableType::Variable; ret->sourceVars.push_back(outputMapping); } + activeState.SetOutput(outputSSAId, outStruct); // Global source variable mappings valid for lifetime of the debug session for(const GlobalVariable &gv : m_GlobalState.globals) @@ -9621,8 +9630,7 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *apiWrapper, uint32_t eve ret->sourceVars.push_back(outputMapping); } - ret->inputs = {activeState.m_Input}; - ret->inputs.append(activeState.m_Input.members); + ret->inputs = {activeState.GetInput()}; ret->constantBlocks = m_GlobalState.constantBlocks; ret->readOnlyResources = m_GlobalState.readOnlyResources; ret->readWriteResources = m_GlobalState.readWriteResources; @@ -9632,20 +9640,13 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *apiWrapper, uint32_t eve for(uint32_t i = 0; i < threadsInWorkgroup; i++) { ThreadState &lane = m_Workgroup[i]; - lane.m_WorkgroupIndex = i; - if(i != m_ActiveLaneIndex) - { - lane.m_Variables = activeState.m_Variables; - lane.m_Assigned = activeState.m_Assigned; - lane.m_Live = activeState.m_Live; - lane.m_IsGlobal = activeState.m_IsGlobal; - } + lane.InitialiseFromActive(activeState); } // Add the output struct to the global state if(countOutputs) - m_GlobalState.globals.push_back(activeState.m_Output); + m_GlobalState.globals.push_back(activeState.GetOutput()); InitialiseWorkgroup(); @@ -9679,16 +9680,16 @@ void Debugger::InitialiseWorkgroup() if(m_Stage == ShaderStage::Pixel) { - lane.m_Helper = workgroupProperties[i][ThreadProperty::Helper] != 0; - lane.m_QuadLaneIndex = workgroupProperties[i][ThreadProperty::QuadLane]; - lane.m_QuadId = workgroupProperties[i][ThreadProperty::QuadId]; + lane.SetHelper(workgroupProperties[i][ThreadProperty::Helper] != 0); + lane.SetQuadLaneIndex(workgroupProperties[i][ThreadProperty::QuadLane]); + lane.SetQuadId(workgroupProperties[i][ThreadProperty::QuadId]); } - lane.m_Dead = workgroupProperties[i][ThreadProperty::Active] == 0; - lane.m_SubgroupIdx = workgroupProperties[i][ThreadProperty::SubgroupIdx]; + lane.SetDead(workgroupProperties[i][ThreadProperty::Active] == 0); + lane.SetSubgroupIdx(workgroupProperties[i][ThreadProperty::SubgroupIdx]); // Only add active lanes to control flow - if(!lane.m_Dead) + if(!lane.IsDead()) threadIds.push_back(i); } @@ -9699,7 +9700,7 @@ void Debugger::InitialiseWorkgroup() rdcarray processedQuads; for(uint32_t i = 0; i < threadsInWorkgroup; i++) { - uint32_t desiredQuad = m_Workgroup[i].m_QuadId; + uint32_t desiredQuad = m_Workgroup[i].GetQuadId(); // ignore threads not in any quad if(desiredQuad == 0) @@ -9721,14 +9722,14 @@ void Debugger::InitialiseWorkgroup() }; for(uint32_t j = i + 1, t = 1; j < threadsInWorkgroup && t < 4; j++) { - if(m_Workgroup[j].m_QuadId == desiredQuad) + if(m_Workgroup[j].GetQuadId() == desiredQuad) threads[t++] = j; } // now swizzle the threads to know each other for(uint32_t src = 0; src < 4; src++) { - uint32_t lane = m_Workgroup[threads[src]].m_QuadLaneIndex; + uint32_t lane = m_Workgroup[threads[src]].GetQuadLaneIndex(); if(lane >= 4) continue; @@ -9738,7 +9739,7 @@ void Debugger::InitialiseWorkgroup() if(threads[dst] == ~0U) continue; - m_Workgroup[threads[dst]].m_QuadNeighbours[lane] = threads[src]; + m_Workgroup[threads[dst]].SetQuadNeighbours(lane, threads[src]); } } } @@ -9765,7 +9766,7 @@ rdcarray Debugger::ContinueDebug() { thread.EnterEntryPoint(m_EntryPointFunction, &initial); thread.FillCallstack(initial); - initial.nextInstruction = thread.m_ActiveGlobalInstructionIdx; + initial.nextInstruction = thread.GetActiveGlobalInstructionIdx(); startPoint = initial.nextInstruction; } else @@ -9774,10 +9775,6 @@ rdcarray Debugger::ContinueDebug() } } - // active lane : needs it own local backing memory, copied from global at the start - for(Id id : m_GlobalState.groupSharedMemoryIds) - active.m_Memory.ConvertGlobalAllocToLocal(id); - // globals won't be filled out by entering the entry point, ensure their change is registered. for(const GlobalVariable &gv : m_GlobalState.globals) initial.changes.push_back({ShaderVariable(), gv.var}); @@ -9887,9 +9884,9 @@ rdcarray Debugger::ContinueDebug() thread.StepNext(NULL, m_ApiWrapper, m_Workgroup, activeMask); } - threadExecutionStates[threadId] = thread.m_EnteredPoints; + threadExecutionStates[threadId] = thread.GetEnteredPoints(); - uint32_t threadConvergencePoint = thread.m_ConvergencePoint; + const uint32_t threadConvergencePoint = thread.GetConvergencePoint(); // the thread activated a new convergence point if(threadConvergencePoint != INVALID_EXECUTION_POINT) { @@ -9905,11 +9902,12 @@ rdcarray Debugger::ContinueDebug() } ++countConvergePointThreads; } - if(!thread.m_PartialConvergencePoints.empty()) + const DXIL::BlockArray *partialConvergentPoints = thread.GetPartialConvergencePoints(); + if(!partialConvergentPoints->empty()) { if(newPartialConvergentPoints == NULL) { - newPartialConvergentPoints = &thread.m_PartialConvergencePoints; + newPartialConvergentPoints = partialConvergentPoints; RDCASSERT(newPartialConvergentPoints); if(newPartialConvergentPoints) RDCASSERT(!newPartialConvergentPoints->empty()); @@ -9917,7 +9915,7 @@ rdcarray Debugger::ContinueDebug() else { // All the threads in the tangle should set the same partial convergence points - RDCASSERT(*newPartialConvergentPoints == thread.m_PartialConvergencePoints); + RDCASSERT(*newPartialConvergentPoints == *partialConvergentPoints); } ++countPartialConvergePointThreads; } @@ -9925,7 +9923,7 @@ rdcarray Debugger::ContinueDebug() if(thread.Finished()) tangle.SetThreadDead(threadId); - if(thread.m_Diverged) + if(thread.GetDiverged()) ++countDivergedThreads; } for(size_t lane = 0; lane < m_Workgroup.size(); lane++) @@ -9937,7 +9935,7 @@ rdcarray Debugger::ContinueDebug() if(hasDebugState) { ThreadState &thread = m_Workgroup[m_ActiveLaneIndex]; - state.nextInstruction = thread.m_ActiveGlobalInstructionIdx; + state.nextInstruction = thread.GetActiveGlobalInstructionIdx(); thread.FillCallstack(state); ret.push_back(std::move(state)); } @@ -9972,7 +9970,7 @@ rdcarray Debugger::ContinueDebug() } if(!anyActiveThreads) { - active.m_Dead = true; + active.SetDead(true); m_ControlFlow.UpdateState(threadExecutionStates); RDCERR("No active threads in any tangle, killing active thread to terminate the debugger"); } diff --git a/renderdoc/driver/shaders/dxil/dxil_debug.h b/renderdoc/driver/shaders/dxil/dxil_debug.h index 7a5a2ce69..079f39504 100644 --- a/renderdoc/driver/shaders/dxil/dxil_debug.h +++ b/renderdoc/driver/shaders/dxil/dxil_debug.h @@ -327,16 +327,59 @@ struct MemoryTracking struct ThreadState { - ThreadState(Debugger &debugger, const GlobalState &globalState, uint32_t maxSSAId); + ThreadState(Debugger &debugger, const GlobalState &globalState, uint32_t maxSSAId, + uint32_t laneIndex); ~ThreadState(); - void EnterFunction(const DXIL::Function *function, const rdcarray &args); void EnterEntryPoint(const DXIL::Function *function, ShaderDebugState *state); void StepNext(ShaderDebugState *state, DebugAPIWrapper *apiWrapper, const rdcarray &workgroup, const rdcarray &activeMask); void StepOverNopInstructions(); + void FillCallstack(ShaderDebugState &state); + void RetireLiveIDs(); bool Finished() const; + const ShaderVariable &GetInput() const { return m_Input; } + const GlobalVariable &GetOutput() const { return m_Output; } + bool IsDead() const { return m_Dead; } + uint32_t GetQuadId() const { return m_QuadId; } + uint32_t GetQuadLaneIndex() const { return m_QuadLaneIndex; } + uint32_t GetActiveGlobalInstructionIdx() const { return m_ActiveGlobalInstructionIdx; } + DXIL::BlockArray GetEnteredPoints() const { return m_EnteredPoints; } + uint32_t GetConvergencePoint() const { return m_ConvergencePoint; } + bool GetDiverged() const { return m_Diverged; } + const DXIL::BlockArray *GetPartialConvergencePoints() const + { + return &m_PartialConvergencePoints; + } + + void SetBuiltins(const BuiltinInputs &builtins) { m_Builtins = builtins; } + void SetInput(const ShaderVariable &input) { m_Input = input; } + void SetOutput(const Id id, const ShaderVariable &var) + { + m_Output.id = id; + m_Output.var = var; + } + void SetDead(bool dead) { m_Dead = dead; } + void SetHelper(bool helper) { m_Helper = helper; } + void SetQuadLaneIndex(uint32_t quadLaneIndex) { m_QuadLaneIndex = quadLaneIndex; } + void SetQuadId(uint32_t quadId) { m_QuadId = quadId; } + void SetSubgroupIdx(uint32_t subgroupIdx) { m_SubgroupIdx = subgroupIdx; } + void SetQuadNeighbours(uint32_t lane, uint32_t index) { m_QuadNeighbours[lane] = index; } + + void InitialiseFromActive(const ThreadState &active) + { + m_Variables = active.m_Variables; + m_Assigned = active.m_Assigned; + m_Live = active.m_Live; + m_IsGlobal = active.m_IsGlobal; + } + + void UpdateBackingMemoryFromVariable(void *ptr, uint64_t &allocSize, const ShaderVariable &var); + +private: + void EnterFunction(const DXIL::Function *function, const rdcarray &args); + bool InUniformBlock() const; bool JumpToBlock(const DXIL::Block *target, bool divergencePoint); @@ -349,8 +392,6 @@ struct ThreadState rdcstr GetArgumentName(uint32_t i) const; Id GetArgumentId(uint32_t i) const; ResourceReferenceInfo GetResource(Id handleId, bool &annotatedHandle); - void FillCallstack(ShaderDebugState &state); - void RetireLiveIDs(); bool GetShaderVariable(const DXIL::Value *dxilValue, DXIL::Operation op, DXIL::DXOp dxOpCode, ShaderVariable &var, bool flushDenormInput = true) const @@ -369,7 +410,6 @@ struct ThreadState bool GetPhiVariable(const Id &id, DXIL::Operation opCode, DXIL::DXOp dxOpCode, ShaderVariable &var) const; bool GetVariableHelper(DXIL::Operation op, DXIL::DXOp dxOpCode, ShaderVariable &var) const; - void UpdateBackingMemoryFromVariable(void *ptr, uint64_t &allocSize, const ShaderVariable &var); void UpdateMemoryVariableFromBackingMemory(Id memoryId, const void *ptr); void UpdateGlobalBackingMemory(Id ptrId, const MemoryTracking::Pointer &ptr, const MemoryTracking::Allocation &allocation, @@ -397,7 +437,7 @@ struct ThreadState ShaderVariable &var, bool flushDenormInput, bool isLive) const; bool IsVariableAssigned(const Id id) const; - ShaderVariable GetBuiltin(ShaderBuiltin builtin); + ShaderVariable GetBuiltin(ShaderBuiltin builtin) const; uint32_t GetSubgroupActiveLanes(const rdcarray &activeMask, const rdcarray &workgroup, rdcarray &activeLanes) const; @@ -421,7 +461,7 @@ struct ThreadState ShaderVariable m_Input; GlobalVariable m_Output; - // Known SSA ShaderVariables + // Known SSA ShaderVariables : this must be a thread safe container rdcarray m_Variables; // SSA Variables captured when a branch happens for use in phi nodes std::map m_PhiVariables; @@ -429,7 +469,7 @@ struct ThreadState rdcarray m_Live; // Globals variables at the current scope rdcarray m_IsGlobal; - // If the variable has been assigned a value + // If the variable has been assigned a value : this must be a thread safe container rdcarray m_Assigned; // Annotated handle properties std::map m_AnnotatedProperties;