From 0243927d4330712874de9239b53bef9c636fda7f Mon Sep 17 00:00:00 2001 From: Jake Turner Date: Fri, 3 Oct 2025 18:19:30 +0100 Subject: [PATCH] DXIL Debugger can simulate multiple instructions per step --- renderdoc/driver/shaders/dxil/dxil_debug.cpp | 196 ++++++++++++++++--- renderdoc/driver/shaders/dxil/dxil_debug.h | 29 ++- 2 files changed, 190 insertions(+), 35 deletions(-) diff --git a/renderdoc/driver/shaders/dxil/dxil_debug.cpp b/renderdoc/driver/shaders/dxil/dxil_debug.cpp index 3a3e1b354..9a8bd9f2b 100644 --- a/renderdoc/driver/shaders/dxil/dxil_debug.cpp +++ b/renderdoc/driver/shaders/dxil/dxil_debug.cpp @@ -1939,6 +1939,7 @@ void ThreadState::EnterEntryPoint(const Function *function, bool hasDebugState) } m_HasDebugState = false; + UpdateCurrentInstruction(); } // Must be called from the replay manager thread (the debugger thread) @@ -4126,7 +4127,8 @@ bool ThreadState::ExecuteInstruction(const rdcarray &workgroup) if(lane < workgroup.size()) { ShaderVariable var; - RDCASSERT(workgroup[lane].GetShaderVariable(inst.args[1], opCode, dxOpCode, var)); + RDCASSERT( + GetShaderVariableFromLane(workgroup[lane], inst.args[1], opCode, dxOpCode, var)); result.value = var.value; } else @@ -4146,7 +4148,8 @@ bool ThreadState::ExecuteInstruction(const rdcarray &workgroup) if(lane < workgroup.size()) { ShaderVariable var; - RDCASSERT(workgroup[lane].GetShaderVariable(inst.args[1], opCode, dxOpCode, var)); + RDCASSERT( + GetShaderVariableFromLane(workgroup[lane], inst.args[1], opCode, dxOpCode, var)); result.value = var.value; } else @@ -4190,7 +4193,7 @@ bool ThreadState::ExecuteInstruction(const rdcarray &workgroup) RDCASSERT(lane < workgroup.size(), lane, workgroup.size()); ShaderVariable x; - RDCASSERT(workgroup[lane].GetShaderVariable(inst.args[1], opCode, dxOpCode, x)); + RDCASSERT(GetShaderVariableFromLane(workgroup[lane], inst.args[1], opCode, dxOpCode, x)); for(uint8_t c = 0; c < x.columns; c++) { @@ -4267,7 +4270,7 @@ bool ThreadState::ExecuteInstruction(const rdcarray &workgroup) RDCASSERT(lane < workgroup.size(), lane, workgroup.size()); ShaderVariable x; - RDCASSERT(workgroup[lane].GetShaderVariable(inst.args[1], opCode, dxOpCode, x)); + RDCASSERT(GetShaderVariableFromLane(workgroup[lane], inst.args[1], opCode, dxOpCode, x)); count += x.value.u32v[0]; } @@ -4333,7 +4336,7 @@ bool ThreadState::ExecuteInstruction(const rdcarray &workgroup) { RDCASSERT(lane < workgroup.size(), lane, workgroup.size()); ShaderVariable x; - RDCASSERT(workgroup[lane].GetShaderVariable(inst.args[1], opCode, dxOpCode, x)); + RDCASSERT(GetShaderVariableFromLane(workgroup[lane], inst.args[1], opCode, dxOpCode, x)); if(dxOpCode == DXOp::WaveAnyTrue) { @@ -4415,7 +4418,7 @@ bool ThreadState::ExecuteInstruction(const rdcarray &workgroup) { RDCASSERT(lane < workgroup.size(), lane, workgroup.size()); ShaderVariable x; - RDCASSERT(workgroup[lane].GetShaderVariable(inst.args[1], opCode, dxOpCode, x)); + RDCASSERT(GetShaderVariableFromLane(workgroup[lane], inst.args[1], opCode, dxOpCode, x)); for(uint8_t c = 0; c < x.columns; c++) { @@ -4546,7 +4549,7 @@ bool ThreadState::ExecuteInstruction(const rdcarray &workgroup) { RDCASSERT(lane < workgroup.size(), lane, workgroup.size()); ShaderVariable x; - RDCASSERT(workgroup[lane].GetShaderVariable(inst.args[1], opCode, dxOpCode, x)); + RDCASSERT(GetShaderVariableFromLane(workgroup[lane], inst.args[1], opCode, dxOpCode, x)); for(uint8_t c = 0; c < x.columns; c++) { @@ -4611,7 +4614,7 @@ bool ThreadState::ExecuteInstruction(const rdcarray &workgroup) { RDCASSERT(lane < workgroup.size(), lane, workgroup.size()); ShaderVariable x; - RDCASSERT(workgroup[lane].GetShaderVariable(inst.args[1], opCode, dxOpCode, x)); + RDCASSERT(GetShaderVariableFromLane(workgroup[lane], inst.args[1], opCode, dxOpCode, x)); bool matches = true; for(uint8_t c = 0; c < x.columns; c++) @@ -4691,7 +4694,7 @@ bool ThreadState::ExecuteInstruction(const rdcarray &workgroup) RDCASSERT(lane < workgroup.size(), lane, workgroup.size()); ShaderVariable x; - RDCASSERT(workgroup[lane].GetShaderVariable(inst.args[1], opCode, dxOpCode, x)); + RDCASSERT(GetShaderVariableFromLane(workgroup[lane], inst.args[1], opCode, dxOpCode, x)); for(uint8_t c = 0; c < x.columns; c++) { switch(waveMultiPrefixOpCode) @@ -4804,7 +4807,7 @@ bool ThreadState::ExecuteInstruction(const rdcarray &workgroup) RDCASSERT(lane < workgroup.size(), lane, workgroup.size()); ShaderVariable x; - RDCASSERT(workgroup[lane].GetShaderVariable(inst.args[1], opCode, dxOpCode, x)); + RDCASSERT(GetShaderVariableFromLane(workgroup[lane], inst.args[1], opCode, dxOpCode, x)); count += x.value.u32v[0]; } @@ -4893,7 +4896,8 @@ bool ThreadState::ExecuteInstruction(const rdcarray &workgroup) if(lane < workgroup.size()) { ShaderVariable var; - RDCASSERT(workgroup[lane].GetShaderVariable(inst.args[1], opCode, dxOpCode, var)); + RDCASSERT( + GetShaderVariableFromLane(workgroup[lane], inst.args[1], opCode, dxOpCode, var)); result.value = var.value; } else @@ -4931,7 +4935,7 @@ bool ThreadState::ExecuteInstruction(const rdcarray &workgroup) RDCASSERT(lane < workgroup.size(), lane, workgroup.size()); ShaderVariable x; - RDCASSERT(workgroup[lane].GetShaderVariable(inst.args[1], opCode, dxOpCode, x)); + RDCASSERT(GetShaderVariableFromLane(workgroup[lane], inst.args[1], opCode, dxOpCode, x)); switch(quadVoteOp) { @@ -6756,9 +6760,11 @@ void ThreadState::StepNext(bool hasDebugState, const rdcarray &work m_HasDebugState = false; } +// When getting live variables : this must be a thread safe operation using only thread safe containers bool ThreadState::GetShaderVariableHelper(const DXIL::Value *dxilValue, DXIL::Operation op, DXIL::DXOp dxOpCode, ShaderVariable &var, - bool flushDenormInput, bool isLive) const + bool flushDenormInput, bool isLive, + bool ignoreLiveCheck) const { var.name.clear(); var.members.clear(); @@ -6863,7 +6869,7 @@ bool ThreadState::GetShaderVariableHelper(const DXIL::Value *dxilValue, DXIL::Op if(const Instruction *inst = cast(dxilValue)) { if(isLive) - return GetLiveVariable(inst->slot, op, dxOpCode, var); + return GetLiveVariable(inst->slot, op, dxOpCode, ignoreLiveCheck, var); else return GetPhiVariable(inst->slot, op, dxOpCode, var); } @@ -6899,11 +6905,13 @@ ShaderVariable ThreadState::GetBuiltin(ShaderBuiltin builtin) const return {}; } -bool ThreadState::GetLiveVariable(const Id &id, Operation op, DXOp dxOpCode, ShaderVariable &var) const +// This must be a thread safe operation using only thread safe containers +bool ThreadState::GetLiveVariable(const Id &id, Operation op, DXOp dxOpCode, bool ignoreLiveCheck, + ShaderVariable &var) const { if(id < m_Live.size()) { - RDCASSERT(m_Live[id]); + RDCASSERT(ignoreLiveCheck || m_Live[id]); } else { @@ -7476,8 +7484,10 @@ ShaderValue ThreadState::DDX(bool fine, Operation opCode, DXOp dxOpCode, ShaderVariable a; ShaderVariable b; - RDCASSERT(workgroup[m_QuadNeighbours[index + 1]].GetShaderVariable(dxilValue, opCode, dxOpCode, a)); - RDCASSERT(workgroup[m_QuadNeighbours[index]].GetShaderVariable(dxilValue, opCode, dxOpCode, b)); + RDCASSERT(GetShaderVariableFromLane(workgroup[m_QuadNeighbours[index + 1]], dxilValue, opCode, + dxOpCode, a)); + RDCASSERT(GetShaderVariableFromLane(workgroup[m_QuadNeighbours[index + 0]], dxilValue, opCode, + dxOpCode, b)); Sub(a, b, ret); return ret; } @@ -7520,8 +7530,10 @@ ShaderValue ThreadState::DDY(bool fine, Operation opCode, DXOp dxOpCode, ShaderVariable a; ShaderVariable b; - RDCASSERT(workgroup[m_QuadNeighbours[index + 2]].GetShaderVariable(dxilValue, opCode, dxOpCode, a)); - RDCASSERT(workgroup[m_QuadNeighbours[index]].GetShaderVariable(dxilValue, opCode, dxOpCode, b)); + RDCASSERT(GetShaderVariableFromLane(workgroup[m_QuadNeighbours[index + 2]], dxilValue, opCode, + dxOpCode, a)); + RDCASSERT(GetShaderVariableFromLane(workgroup[m_QuadNeighbours[index + 0]], dxilValue, opCode, + dxOpCode, b)); Sub(a, b, ret); return ret; } @@ -7594,12 +7606,12 @@ bool ThreadState::WorkgroupIsDiverged(const rdcarray &workgroup) continue; if(block0 == ~0U) { - block0 = workgroup[i].m_Block; + block0 = workgroup[i].m_CurrentBlock; instr0 = workgroup[i].m_CurrentGlobalInstructionIdx; continue; } // not in the same basic block - if(workgroup[i].m_Block != block0) + if(workgroup[i].m_CurrentBlock != block0) return true; // not executing the same instruction if(workgroup[i].m_CurrentGlobalInstructionIdx != instr0) @@ -7619,12 +7631,12 @@ bool ThreadState::SubgroupIsDiverged(const rdcarray &workgroup, continue; if(block0 == ~0U) { - block0 = workgroup[lane].m_Block; + block0 = workgroup[lane].m_CurrentBlock; instr0 = workgroup[lane].m_CurrentGlobalInstructionIdx; continue; } // not in the same basic block - if(workgroup[lane].m_Block != block0) + if(workgroup[lane].m_CurrentBlock != block0) return true; // not executing the same instruction if(workgroup[lane].m_CurrentGlobalInstructionIdx != instr0) @@ -7651,12 +7663,12 @@ bool ThreadState::QuadIsDiverged(const rdcarray &workgroup, continue; if(block0 == ~0U) { - block0 = workgroup[i].m_Block; + block0 = workgroup[i].m_CurrentBlock; instr0 = workgroup[i].m_CurrentGlobalInstructionIdx; continue; } // not in the same basic block - if(workgroup[i].m_Block != block0) + if(workgroup[i].m_CurrentBlock != block0) return true; // not executing the same instruction if(workgroup[i].m_CurrentGlobalInstructionIdx != instr0) @@ -7665,6 +7677,117 @@ bool ThreadState::QuadIsDiverged(const rdcarray &workgroup, return false; } +// The conditions where it is not safe to run another step are based on: +// the current simulation state and the next instruction to simulate +bool ThreadState::CanRunAnotherStep() const +{ + // Thread has finished + if(Finished()) + return false; + + // Current Simulated State that prevents running another step: + // Any control flow state changes i.e. branch, convergence point, partial convergence + if(m_Diverged) + return false; + if(!m_EnteredPoints.empty()) + return false; + if(m_ConvergencePoint != INVALID_EXECUTION_POINT) + return false; + if(!m_PartialConvergencePoints.empty()) + return false; + + // current instructions that require full lockstep + const Instruction *inst = m_CurrentInstruction; + Operation opCode = inst->op; + DXOp dxOpCode = DXOp::NumOpCodes; + switch(opCode) + { + case Operation::Call: + { + const Function *callFunc = inst->getFuncCall(); + if(callFunc->family == FunctionFamily::DXOp) + { + RDCASSERT(getival(inst->args[0], dxOpCode)); + RDCASSERT(dxOpCode < DXOp::NumOpCodes, dxOpCode, DXOp::NumOpCodes); + switch(dxOpCode) + { + // no thread can continue until all threads execute the barrier + case DXOp::Barrier: return false; + default: break; + } + } + } + default: break; + } + + // Next instructions that prevent running another step: + // any instruction that requires threads in the tangle to be in lockstep + inst = m_FunctionInfo->function->instructions[m_FunctionInstructionIdx]; + opCode = inst->op; + dxOpCode = DXOp::NumOpCodes; + switch(opCode) + { + case Operation::Call: + { + const Function *callFunc = inst->getFuncCall(); + if(callFunc->family == FunctionFamily::DXOp) + { + RDCASSERT(getival(inst->args[0], dxOpCode)); + RDCASSERT(dxOpCode < DXOp::NumOpCodes, dxOpCode, DXOp::NumOpCodes); + switch(dxOpCode) + { + // thread barriers require threads in the tangle to be in lockstep + case DXOp::Barrier: + return false; + // Image operations require threads in the tangle to be in lockstep + case DXOp::Sample: + case DXOp::SampleBias: + case DXOp::SampleLevel: + case DXOp::SampleGrad: + case DXOp::SampleCmp: + case DXOp::SampleCmpBias: + case DXOp::SampleCmpLevel: + case DXOp::SampleCmpGrad: + case DXOp::SampleCmpLevelZero: + case DXOp::TextureGather: + case DXOp::TextureGatherCmp: + case DXOp::CalculateLOD: return false; + case DXOp::TextureLoad: + // TextureLoad does not require derivatives, does not have to be in lockstep + return true; + // derivatives require threads in the tangle to be in lockstep + case DXOp::DerivCoarseX: + case DXOp::DerivCoarseY: + case DXOp::DerivFineX: + case DXOp::DerivFineY: return false; + // wave/subgroup ops require threads in the tangle to be in lockstep + case DXOp::WaveIsFirstLane: + case DXOp::WaveReadLaneAt: + case DXOp::WaveReadLaneFirst: + case DXOp::WavePrefixOp: + case DXOp::WavePrefixBitCount: + case DXOp::WaveAllBitCount: + case DXOp::WaveAnyTrue: + case DXOp::WaveAllTrue: + case DXOp::WaveActiveBallot: + case DXOp::WaveActiveAllEqual: + case DXOp::WaveActiveOp: + case DXOp::WaveActiveBit: + case DXOp::WaveMatch: + case DXOp::WaveMultiPrefixOp: + case DXOp::WaveMultiPrefixBitCount: + case DXOp::QuadReadLaneAt: + case DXOp::QuadOp: + case DXOp::QuadVote: return false; + default: break; + } + } + } + default: break; + } + return true; +} + Debugger::DebugInfo::~DebugInfo() { for(const ScopedDebugData *scope : scopedDebugDatas) @@ -10196,6 +10319,23 @@ void Debugger::StepThread(uint32_t lane, StepThreadMode stepMode) if(isActiveThread) curActiveSteps++; + + if(stepMode == StepThreadMode::RUN_SINGLE_STEP) + break; + + if(stepMode == StepThreadMode::QUEUE_SINGLE_STEP) + break; + + simulateStep = thread.CanRunAnotherStep(); + if(simulateStep) + { + DXIL_DEBUG_RDCASSERT(thread.IsSimulationStepActive()); + } + if(simulateStep) + thread.SetStepQueued(); + + if(stepMode == StepThreadMode::QUEUE_MULTIPLE_STEPS) + break; }; // Update the number of simulation steps if(isActiveThread) @@ -10227,10 +10367,10 @@ void Debugger::InternalStepThread(uint32_t lane) thread.ClearPendingDebugState(); } thread.StepNext(true, m_Workgroup); + m_ActiveDebugState.nextInstruction = thread.GetActiveGlobalInstructionIdx(); thread.FillCallstack(m_ActiveDebugState); const ShaderDebugState &pendingDebugState = thread.GetPendingDebugState(); - m_ActiveDebugState.nextInstruction = pendingDebugState.nextInstruction; m_ActiveDebugState.flags = pendingDebugState.flags; m_ActiveDebugState.changes.append(pendingDebugState.changes); thread.ClearPendingDebugState(); @@ -10257,6 +10397,6 @@ void Debugger::QueueJob(uint32_t lane) CHECK_DEBUGGER_THREAD(); ThreadState &thread = m_Workgroup[lane]; thread.SetStepQueued(); - StepThread(lane, StepThreadMode::RUN_SINGLE_STEP); + StepThread(lane, StepThreadMode::RUN_MULTIPLE_STEPS); } }; // namespace DXILDebug diff --git a/renderdoc/driver/shaders/dxil/dxil_debug.h b/renderdoc/driver/shaders/dxil/dxil_debug.h index 2b93fc34e..1e4e64244 100644 --- a/renderdoc/driver/shaders/dxil/dxil_debug.h +++ b/renderdoc/driver/shaders/dxil/dxil_debug.h @@ -370,6 +370,7 @@ struct ThreadState bool Finished() const; bool IsSimulationStepActive() const { return (AtomicLoad(&atomic_isSimulationStepActive) == 1); } + bool CanRunAnotherStep() const; const ShaderVariable &GetInput() const { return m_Input; } const GlobalVariable &GetOutput() const { return m_Output; } bool IsDead() const { return m_Dead; } @@ -403,7 +404,11 @@ struct ThreadState RDCASSERTEQUAL(m_ActiveMask.size(), activeMask.size()); memcpy(m_ActiveMask.data(), activeMask.data(), activeMask.size() * sizeof(bool)); } - void UpdateCurrentInstruction() { m_CurrentGlobalInstructionIdx = m_ActiveGlobalInstructionIdx; } + void UpdateCurrentInstruction() + { + m_CurrentGlobalInstructionIdx = m_ActiveGlobalInstructionIdx; + m_CurrentBlock = m_Block; + } void SetSimulationStepCompleted() { AtomicStore(&atomic_isSimulationStepActive, 0); } void SetStepQueued() { AtomicStore(&atomic_isSimulationStepActive, 1); } @@ -438,20 +443,27 @@ private: Id GetArgumentId(uint32_t i) const; ResourceReferenceInfo GetResource(Id handleId, bool &annotatedHandle); + // This must be a thread safe operation using only thread safe containers + bool GetShaderVariableFromLane(const ThreadState &lane, const DXIL::Value *dxilValue, + DXIL::Operation op, DXIL::DXOp dxOpCode, ShaderVariable &var) const + { + return lane.GetShaderVariableHelper(dxilValue, op, dxOpCode, var, true, true, true); + } bool GetShaderVariable(const DXIL::Value *dxilValue, DXIL::Operation op, DXIL::DXOp dxOpCode, ShaderVariable &var, bool flushDenormInput = true) const { - return GetShaderVariableHelper(dxilValue, op, dxOpCode, var, flushDenormInput, true); + return GetShaderVariableHelper(dxilValue, op, dxOpCode, var, flushDenormInput, true, false); } bool GetPhiShaderVariable(const DXIL::Value *dxilValue, DXIL::Operation op, DXIL::DXOp dxOpCode, ShaderVariable &var, bool flushDenormInput = true) const { - return GetShaderVariableHelper(dxilValue, op, dxOpCode, var, flushDenormInput, false); + return GetShaderVariableHelper(dxilValue, op, dxOpCode, var, flushDenormInput, false, false); } + // This must be a thread safe operation using only thread safe containers bool GetLiveVariable(const Id &id, DXIL::Operation opCode, DXIL::DXOp dxOpCode, - ShaderVariable &var) const; + bool ignoreLiveCheck, ShaderVariable &var) const; bool GetPhiVariable(const Id &id, DXIL::Operation opCode, DXIL::DXOp dxOpCode, ShaderVariable &var) const; bool GetVariableHelper(DXIL::Operation op, DXIL::DXOp dxOpCode, ShaderVariable &var) const; @@ -479,8 +491,10 @@ private: static bool SubgroupIsDiverged(const rdcarray &workgroup, const rdcarray &activeLanes); - bool GetShaderVariableHelper(const DXIL::Value *dxilValue, DXIL::Operation op, DXIL::DXOp dxOpCode, - ShaderVariable &var, bool flushDenormInput, bool isLive) const; + // When getting live variables : this must be a thread safe operation using only thread safe containers + bool GetShaderVariableHelper(const DXIL::Value *dxilValue, DXIL::Operation op, + DXIL::DXOp dxOpCode, ShaderVariable &var, bool flushDenormInput, + bool isLive, bool ignoreLiveCheck) const; bool IsVariableAssigned(const Id id) const; ShaderVariable GetBuiltin(ShaderBuiltin builtin) const; @@ -542,8 +556,9 @@ private: uint32_t m_PreviousBlock = ~0U; // The global PC of the active instruction that will be executed on the next simulation step uint32_t m_ActiveGlobalInstructionIdx = 0; - // The global PC of the active instruction that was last executed + // The global PC and block of the instruction that was last executed uint32_t m_CurrentGlobalInstructionIdx = 0; + uint32_t m_CurrentBlock = ~0U; // true if executed an operation which could trigger divergence bool m_Diverged;