mirror of
https://github.com/baldurk/renderdoc.git
synced 2026-05-04 17:10:47 +00:00
Prepare SPIR-V debugger for larger workgroup sizes
* The workgroup size is passed in at creation time, and we handle the potenttial for multiple quads by identifying quads via quad ID
This commit is contained in:
@@ -77,13 +77,9 @@ inline uint64_t CountOnes(uint64_t value)
|
||||
|
||||
namespace rdcspv
|
||||
{
|
||||
ThreadState::ThreadState(uint32_t workgroupIdx, Debugger &debug, const GlobalState &globalState)
|
||||
ThreadState::ThreadState(Debugger &debug, const GlobalState &globalState)
|
||||
: debugger(debug), global(globalState)
|
||||
{
|
||||
workgroupIndex = workgroupIdx;
|
||||
nextInstruction = 0;
|
||||
helperInvocation = false;
|
||||
killed = false;
|
||||
}
|
||||
|
||||
ThreadState::~ThreadState()
|
||||
@@ -95,7 +91,7 @@ ThreadState::~ThreadState()
|
||||
|
||||
bool ThreadState::Finished() const
|
||||
{
|
||||
return killed || callstack.empty();
|
||||
return dead || callstack.empty();
|
||||
}
|
||||
|
||||
void ThreadState::FillCallstack(rdcarray<Id> &funcs)
|
||||
@@ -415,18 +411,33 @@ ShaderVariable ThreadState::CalcDeriv(ThreadState::DerivDir dir, ThreadState::De
|
||||
{
|
||||
const ThreadState *a = NULL, *b = NULL;
|
||||
|
||||
if(quadNeighbours[0] == ~0U || quadNeighbours[1] == ~0U || quadNeighbours[2] == ~0U ||
|
||||
quadNeighbours[3] == ~0U)
|
||||
{
|
||||
debugger.GetAPIWrapper()->AddDebugMessage(
|
||||
MessageCategory::Execution, MessageSeverity::High, MessageSource::RuntimeWarning,
|
||||
StringFormat::Fmt("Derivative calculation within non-quad on input %s",
|
||||
debugger.GetHumanName(val).c_str()));
|
||||
return ShaderVariable("", 0.0f, 0.0f, 0.0f, 0.0f);
|
||||
}
|
||||
|
||||
RDCASSERT(quadNeighbours[0] < workgroup.size(), quadNeighbours[0], workgroup.size());
|
||||
RDCASSERT(quadNeighbours[1] < workgroup.size(), quadNeighbours[1], workgroup.size());
|
||||
RDCASSERT(quadNeighbours[2] < workgroup.size(), quadNeighbours[2], workgroup.size());
|
||||
RDCASSERT(quadNeighbours[3] < workgroup.size(), quadNeighbours[3], workgroup.size());
|
||||
|
||||
const bool xdirection = (dir == DDX);
|
||||
if(type == Coarse)
|
||||
{
|
||||
// coarse derivatives are identical across the quad, based on the top-left.
|
||||
a = &workgroup[0];
|
||||
b = &workgroup[xdirection ? 1 : 2];
|
||||
a = &workgroup[quadNeighbours[0]];
|
||||
b = &workgroup[quadNeighbours[xdirection ? 1 : 2]];
|
||||
}
|
||||
else
|
||||
{
|
||||
// we need to figure out the exact pair to use
|
||||
int x = workgroupIndex & 1;
|
||||
int y = workgroupIndex / 2;
|
||||
int x = quadLaneIndex & 1;
|
||||
int y = quadLaneIndex / 2;
|
||||
|
||||
if(x == 0)
|
||||
{
|
||||
@@ -435,13 +446,13 @@ ShaderVariable ThreadState::CalcDeriv(ThreadState::DerivDir dir, ThreadState::De
|
||||
// top-left
|
||||
if(xdirection)
|
||||
{
|
||||
a = &workgroup[0];
|
||||
b = &workgroup[1];
|
||||
a = &workgroup[quadNeighbours[0]];
|
||||
b = &workgroup[quadNeighbours[1]];
|
||||
}
|
||||
else
|
||||
{
|
||||
a = &workgroup[0];
|
||||
b = &workgroup[2];
|
||||
a = &workgroup[quadNeighbours[0]];
|
||||
b = &workgroup[quadNeighbours[2]];
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -449,13 +460,13 @@ ShaderVariable ThreadState::CalcDeriv(ThreadState::DerivDir dir, ThreadState::De
|
||||
// bottom-left
|
||||
if(xdirection)
|
||||
{
|
||||
a = &workgroup[2];
|
||||
b = &workgroup[3];
|
||||
a = &workgroup[quadNeighbours[2]];
|
||||
b = &workgroup[quadNeighbours[3]];
|
||||
}
|
||||
else
|
||||
{
|
||||
a = &workgroup[0];
|
||||
b = &workgroup[2];
|
||||
a = &workgroup[quadNeighbours[0]];
|
||||
b = &workgroup[quadNeighbours[2]];
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -466,13 +477,13 @@ ShaderVariable ThreadState::CalcDeriv(ThreadState::DerivDir dir, ThreadState::De
|
||||
// top-right
|
||||
if(xdirection)
|
||||
{
|
||||
a = &workgroup[0];
|
||||
b = &workgroup[1];
|
||||
a = &workgroup[quadNeighbours[0]];
|
||||
b = &workgroup[quadNeighbours[1]];
|
||||
}
|
||||
else
|
||||
{
|
||||
a = &workgroup[1];
|
||||
b = &workgroup[3];
|
||||
a = &workgroup[quadNeighbours[1]];
|
||||
b = &workgroup[quadNeighbours[3]];
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -480,13 +491,13 @@ ShaderVariable ThreadState::CalcDeriv(ThreadState::DerivDir dir, ThreadState::De
|
||||
// bottom-right
|
||||
if(xdirection)
|
||||
{
|
||||
a = &workgroup[2];
|
||||
b = &workgroup[3];
|
||||
a = &workgroup[quadNeighbours[2]];
|
||||
b = &workgroup[quadNeighbours[3]];
|
||||
}
|
||||
else
|
||||
{
|
||||
a = &workgroup[1];
|
||||
b = &workgroup[3];
|
||||
a = &workgroup[quadNeighbours[1]];
|
||||
b = &workgroup[quadNeighbours[3]];
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3068,7 +3079,7 @@ void ThreadState::StepNext(ShaderDebugState *state, const rdcarray<ThreadState>
|
||||
case Op::TerminateInvocation:
|
||||
case Op::Kill:
|
||||
{
|
||||
killed = true;
|
||||
dead = true;
|
||||
|
||||
// destroy all stack frames
|
||||
for(StackFrame *exitingFrame : callstack)
|
||||
|
||||
@@ -175,7 +175,7 @@ class Debugger;
|
||||
|
||||
struct ThreadState
|
||||
{
|
||||
ThreadState(uint32_t workgroupIdx, Debugger &debug, const GlobalState &globalState);
|
||||
ThreadState(Debugger &debug, const GlobalState &globalState);
|
||||
~ThreadState();
|
||||
|
||||
void EnterEntryPoint(ShaderDebugState *state);
|
||||
@@ -231,10 +231,16 @@ struct ThreadState
|
||||
|
||||
std::map<Id, uint32_t> lastWrite;
|
||||
|
||||
// index in the pixel quad
|
||||
uint32_t workgroupIndex;
|
||||
bool helperInvocation;
|
||||
bool killed;
|
||||
// quad ID (arbitrary, just used to find neighbours for derivatives)
|
||||
uint32_t quadId = 0;
|
||||
// index in the pixel quad (relative to the active lane)
|
||||
uint32_t quadLaneIndex = ~0U;
|
||||
// the lane indices of our quad neighbours
|
||||
uint32_t quadNeighbours[4] = {~0U, ~0U, ~0U, ~0U};
|
||||
// index in the workgroup
|
||||
uint32_t workgroupIndex = 0;
|
||||
bool helperInvocation = false;
|
||||
bool dead = true;
|
||||
|
||||
const ShaderVariable &GetSrc(Id id) const;
|
||||
void WritePointerValue(Id pointer, const ShaderVariable &val);
|
||||
@@ -368,7 +374,8 @@ public:
|
||||
ShaderDebugTrace *BeginDebug(DebugAPIWrapper *apiWrapper, const ShaderStage stage,
|
||||
const rdcstr &entryPoint, const rdcarray<SpecConstant> &specInfo,
|
||||
const std::map<size_t, uint32_t> &instructionLines,
|
||||
const SPIRVPatchData &patchData, uint32_t activeIndex);
|
||||
const SPIRVPatchData &patchData, uint32_t activeIndex,
|
||||
uint32_t workgroupSize);
|
||||
|
||||
rdcarray<ShaderDebugState> ContinueDebug();
|
||||
|
||||
|
||||
@@ -838,7 +838,8 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *api, const ShaderStage s
|
||||
const rdcstr &entryPoint,
|
||||
const rdcarray<SpecConstant> &specInfo,
|
||||
const std::map<size_t, uint32_t> &instructionLines,
|
||||
const SPIRVPatchData &patchData, uint32_t activeIndex)
|
||||
const SPIRVPatchData &patchData, uint32_t activeIndex,
|
||||
uint32_t workgroupSize)
|
||||
{
|
||||
Id entryId = entryLookup[ShaderEntryPoint(entryPoint, shaderStage)];
|
||||
|
||||
@@ -895,9 +896,8 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *api, const ShaderStage s
|
||||
stage = shaderStage;
|
||||
apiWrapper = api;
|
||||
|
||||
uint32_t workgroupSize = shaderStage == ShaderStage::Pixel ? 4 : 1;
|
||||
for(uint32_t i = 0; i < workgroupSize; i++)
|
||||
workgroup.push_back(ThreadState(i, *this, global));
|
||||
workgroup.push_back(ThreadState(*this, global));
|
||||
|
||||
ThreadState &active = GetActiveLane();
|
||||
|
||||
@@ -1489,6 +1489,7 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *api, const ShaderStage s
|
||||
for(uint32_t i = 0; i < workgroupSize; i++)
|
||||
{
|
||||
ThreadState &lane = workgroup[i];
|
||||
lane.workgroupIndex = i;
|
||||
if(i != activeLaneIndex)
|
||||
{
|
||||
lane.nextInstruction = active.nextInstruction;
|
||||
@@ -1499,16 +1500,68 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *api, const ShaderStage s
|
||||
|
||||
if(stage == ShaderStage::Pixel)
|
||||
{
|
||||
ShaderVariable var(rdcstr(), 0U, 0U, 0U, 0U);
|
||||
apiWrapper->FillInputValue(var, ShaderBuiltin::IsHelper, i, 0, 0);
|
||||
lane.helperInvocation = var.value.u32v[0] != 0;
|
||||
lane.helperInvocation = apiWrapper->GetThreadProperty(i, ThreadProperty::Helper) != 0;
|
||||
lane.quadLaneIndex = apiWrapper->GetThreadProperty(i, ThreadProperty::QuadLane);
|
||||
lane.quadId = apiWrapper->GetThreadProperty(i, ThreadProperty::QuadId);
|
||||
}
|
||||
|
||||
lane.dead = apiWrapper->GetThreadProperty(i, ThreadProperty::Active) == 0;
|
||||
|
||||
// now that the globals are allocated and their storage won't move, we can take pointers to them
|
||||
for(const PointerId &p : pointerIDs)
|
||||
p.Set(*this, global, lane);
|
||||
}
|
||||
|
||||
// find quad neighbours
|
||||
{
|
||||
rdcarray<uint32_t> processedQuads;
|
||||
for(uint32_t i = 0; i < workgroupSize; i++)
|
||||
{
|
||||
uint32_t desiredQuad = workgroup[i].quadId;
|
||||
|
||||
// ignore threads not in any quad
|
||||
if(desiredQuad == 0)
|
||||
continue;
|
||||
|
||||
// quads are almost certainly sorted together, so shortcut by checking the last one
|
||||
if((!processedQuads.empty() && processedQuads.back() == desiredQuad) ||
|
||||
processedQuads.contains(desiredQuad))
|
||||
continue;
|
||||
|
||||
processedQuads.push_back(desiredQuad);
|
||||
|
||||
// find the threads
|
||||
uint32_t threads[4] = {
|
||||
i,
|
||||
~0U,
|
||||
~0U,
|
||||
~0U,
|
||||
};
|
||||
for(uint32_t j = i + 1, t = 1; j < workgroupSize && t < 4; j++)
|
||||
{
|
||||
if(workgroup[j].quadId == desiredQuad)
|
||||
threads[t++] = j;
|
||||
}
|
||||
|
||||
// now swizzle the threads to know each other
|
||||
for(uint32_t src = 0; src < 4; src++)
|
||||
{
|
||||
uint32_t lane = workgroup[threads[src]].quadLaneIndex;
|
||||
|
||||
if(lane >= 4)
|
||||
continue;
|
||||
|
||||
for(uint32_t dst = 0; dst < 4; dst++)
|
||||
{
|
||||
if(threads[dst] == ~0U)
|
||||
continue;
|
||||
|
||||
workgroup[threads[dst]].quadNeighbours[lane] = threads[src];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// this contains all the accumulated line number information. Add in our disassembly mapping
|
||||
ret->instInfo = m_InstInfo;
|
||||
for(size_t i = 0; i < m_InstInfo.size(); i++)
|
||||
|
||||
@@ -2968,8 +2968,8 @@ struct VertexLaneData
|
||||
|
||||
struct PixelLaneData
|
||||
{
|
||||
Vec4f fragCoord; // per-lane coord
|
||||
uint32_t helper; // per-lane helper bit
|
||||
Vec4f fragCoord; // per-lane coord
|
||||
uint32_t isHelper; // per-lane helper bit
|
||||
uint32_t padding[3];
|
||||
};
|
||||
|
||||
@@ -3302,7 +3302,7 @@ static void CreateInputFetcher(rdcarray<uint32_t> &spv,
|
||||
helper.base = helper.loadOps.add(rdcspv::OpSelect(uint32Type, editor.MakeId(), helper.base,
|
||||
getUIntConst(1), getUIntConst(0)));
|
||||
fixedValues.push_back(helper);
|
||||
structMembers.push_back({uint32Type, "__rd_helper", offsetof(PixelLaneData, helper)});
|
||||
structMembers.push_back({uint32Type, "__rd_helper", offsetof(PixelLaneData, isHelper)});
|
||||
|
||||
offset += sizeof(PixelLaneData);
|
||||
}
|
||||
@@ -3881,6 +3881,9 @@ ShaderDebugTrace *VulkanReplay::DebugVertex(uint32_t eventId, uint32_t vertid, u
|
||||
|
||||
apiWrapper->location_inputs.resize(numThreads);
|
||||
apiWrapper->thread_builtins.resize(numThreads);
|
||||
apiWrapper->thread_props.resize(numThreads);
|
||||
|
||||
apiWrapper->thread_props[0][(size_t)rdcspv::ThreadProperty::Active] = 1;
|
||||
|
||||
std::unordered_map<ShaderBuiltin, ShaderVariable> &global_builtins = apiWrapper->global_builtins;
|
||||
global_builtins[ShaderBuiltin::BaseInstance] =
|
||||
@@ -4056,7 +4059,7 @@ ShaderDebugTrace *VulkanReplay::DebugVertex(uint32_t eventId, uint32_t vertid, u
|
||||
rdcspv::Debugger *debugger = new rdcspv::Debugger;
|
||||
debugger->Parse(shader.spirv.GetSPIRV());
|
||||
ShaderDebugTrace *ret = debugger->BeginDebug(apiWrapper, ShaderStage::Vertex, entryPoint, spec,
|
||||
shadRefl.instructionLines, shadRefl.patchData, 0);
|
||||
shadRefl.instructionLines, shadRefl.patchData, 0, 1);
|
||||
apiWrapper->ResetReplay();
|
||||
|
||||
return ret;
|
||||
@@ -4589,27 +4592,40 @@ ShaderDebugTrace *VulkanReplay::DebugPixel(uint32_t eventId, uint32_t x, uint32_
|
||||
{
|
||||
byte *value = LaneData + t * structStride;
|
||||
|
||||
apiWrapper->thread_props[t][(size_t)rdcspv::ThreadProperty::Active] = 1;
|
||||
|
||||
// read PixelLaneData
|
||||
PixelLaneData *pixelData = (PixelLaneData *)value;
|
||||
{
|
||||
PixelLaneData *pixelData = (PixelLaneData *)value;
|
||||
|
||||
{
|
||||
ShaderVariable &var = apiWrapper->thread_builtins[t][ShaderBuiltin::Position];
|
||||
|
||||
var.rows = 1;
|
||||
var.columns = 4;
|
||||
var.type = VarType::Float;
|
||||
|
||||
memcpy(var.value.u8v.data(), &pixelData->fragCoord, sizeof(Vec4f));
|
||||
}
|
||||
|
||||
{
|
||||
ShaderVariable &var = apiWrapper->thread_builtins[t][ShaderBuiltin::IsHelper];
|
||||
|
||||
var.rows = 1;
|
||||
var.columns = 1;
|
||||
var.type = VarType::Bool;
|
||||
|
||||
memcpy(var.value.u8v.data(), &pixelData->isHelper, sizeof(uint32_t));
|
||||
}
|
||||
|
||||
if(numThreads == 4)
|
||||
apiWrapper->thread_props[t][(size_t)rdcspv::ThreadProperty::Active] = 1;
|
||||
apiWrapper->thread_props[t][(size_t)rdcspv::ThreadProperty::Helper] =
|
||||
t != winner->laneIndex ? 1 : 0;
|
||||
apiWrapper->thread_props[t][(size_t)rdcspv::ThreadProperty::QuadId] = 1000;
|
||||
apiWrapper->thread_props[t][(size_t)rdcspv::ThreadProperty::QuadLane] = t;
|
||||
}
|
||||
value += sizeof(PixelLaneData);
|
||||
{
|
||||
ShaderVariable &var = apiWrapper->thread_builtins[t][ShaderBuiltin::Position];
|
||||
|
||||
var.rows = 1;
|
||||
var.columns = 4;
|
||||
var.type = VarType::Float;
|
||||
|
||||
memcpy(var.value.u8v.data(), &pixelData->fragCoord, sizeof(Vec4f));
|
||||
}
|
||||
{
|
||||
ShaderVariable &var = apiWrapper->thread_builtins[t][ShaderBuiltin::IsHelper];
|
||||
|
||||
var.rows = 1;
|
||||
var.columns = 1;
|
||||
var.type = VarType::Bool;
|
||||
|
||||
memcpy(var.value.u8v.data(), &pixelData->helper, sizeof(uint32_t));
|
||||
}
|
||||
|
||||
for(size_t i = 0; i < shadRefl.refl->inputSignature.size(); i++)
|
||||
{
|
||||
@@ -4640,7 +4656,8 @@ ShaderDebugTrace *VulkanReplay::DebugPixel(uint32_t eventId, uint32_t x, uint32_
|
||||
}
|
||||
|
||||
ret = debugger->BeginDebug(apiWrapper, ShaderStage::Pixel, entryPoint, spec,
|
||||
shadRefl.instructionLines, shadRefl.patchData, winner->laneIndex);
|
||||
shadRefl.instructionLines, shadRefl.patchData, winner->laneIndex,
|
||||
numThreads);
|
||||
apiWrapper->ResetReplay();
|
||||
}
|
||||
else
|
||||
@@ -4712,6 +4729,9 @@ ShaderDebugTrace *VulkanReplay::DebugThread(uint32_t eventId,
|
||||
static const uint32_t numThreads = 1;
|
||||
|
||||
apiWrapper->thread_builtins.resize(numThreads);
|
||||
apiWrapper->thread_props.resize(numThreads);
|
||||
|
||||
apiWrapper->thread_props[0][(size_t)rdcspv::ThreadProperty::Active] = 1;
|
||||
|
||||
std::unordered_map<ShaderBuiltin, ShaderVariable> &global_builtins = apiWrapper->global_builtins;
|
||||
global_builtins[ShaderBuiltin::DispatchSize] =
|
||||
@@ -4736,7 +4756,7 @@ ShaderDebugTrace *VulkanReplay::DebugThread(uint32_t eventId,
|
||||
rdcspv::Debugger *debugger = new rdcspv::Debugger;
|
||||
debugger->Parse(shader.spirv.GetSPIRV());
|
||||
ShaderDebugTrace *ret = debugger->BeginDebug(apiWrapper, ShaderStage::Compute, entryPoint, spec,
|
||||
shadRefl.instructionLines, shadRefl.patchData, 0);
|
||||
shadRefl.instructionLines, shadRefl.patchData, 0, 1);
|
||||
apiWrapper->ResetReplay();
|
||||
|
||||
return ret;
|
||||
@@ -4800,6 +4820,9 @@ ShaderDebugTrace *VulkanReplay::DebugMeshThread(uint32_t eventId,
|
||||
static const uint32_t numThreads = 1;
|
||||
|
||||
apiWrapper->thread_builtins.resize(numThreads);
|
||||
apiWrapper->thread_props.resize(numThreads);
|
||||
|
||||
apiWrapper->thread_props[0][(size_t)rdcspv::ThreadProperty::Active] = 1;
|
||||
|
||||
std::unordered_map<ShaderBuiltin, ShaderVariable> &global_builtins = apiWrapper->global_builtins;
|
||||
global_builtins[ShaderBuiltin::DispatchSize] =
|
||||
@@ -4824,7 +4847,7 @@ ShaderDebugTrace *VulkanReplay::DebugMeshThread(uint32_t eventId,
|
||||
rdcspv::Debugger *debugger = new rdcspv::Debugger;
|
||||
debugger->Parse(shader.spirv.GetSPIRV());
|
||||
ShaderDebugTrace *ret = debugger->BeginDebug(apiWrapper, ShaderStage::Mesh, entryPoint, spec,
|
||||
shadRefl.instructionLines, shadRefl.patchData, 0);
|
||||
shadRefl.instructionLines, shadRefl.patchData, 0, 1);
|
||||
apiWrapper->ResetReplay();
|
||||
|
||||
return ret;
|
||||
|
||||
Reference in New Issue
Block a user