Move SPIR-V debugging input fetch lane data to SPIR-V headers

This commit is contained in:
baldurk
2025-11-17 14:38:49 +00:00
parent 04866327cc
commit 2b687a56c8
2 changed files with 153 additions and 152 deletions
@@ -159,6 +159,64 @@ public:
virtual bool QueuedOpsHasSpace() = 0;
};
// things we need to readback once per hit thread
struct ResultDataBase
{
Vec4f pos;
uint32_t prim;
uint32_t sample;
uint32_t view;
uint32_t valid;
float ddxDerivCheck;
uint32_t quadLaneIndex;
uint32_t laneIndex;
uint32_t subgroupSize;
uint32_t globalBallot[4];
uint32_t electBallot[4];
uint32_t helperBallot[4];
uint32_t numSubgroups; // may be packed oddly so we don't assume we can calculate
uint32_t padding[3];
// LaneData lanes[N]
// each LaneData is prefixed by the subgroup struct below if needed, and then the stage struct unconditionally
};
// things we need per-lane with subgroups active, before any per-stage data
struct SubgroupLaneData
{
uint32_t elect; // for OpGroupNonUniformElect, if we don't have ballot
uint32_t isActive; // per lane active mask
uint32_t padding[2];
};
struct VertexLaneData
{
uint32_t inst; // allow/expect instance to vary across subgroup just in case
uint32_t vert; // vertex id (either auto-generated or index)
uint32_t view; // multiview view (if used)
uint32_t padding;
};
struct PixelLaneData
{
Vec4f fragCoord; // per-lane coord
uint32_t isHelper; // per-lane helper bit
uint32_t quadId; // the per-quad ID shared among all 4 threads, to differentiate between quads.
// is the laneIndex of the top-left thread (with an offset, so we can see 0 as invalid)
uint32_t quadLaneIndex; // the quadLaneIndex for quad-neighbours, in case we are fetching a subgroup
uint32_t padding;
};
struct ComputeLaneData
{
uint32_t threadid[3]; // per-lane thread id (in case it's not trivial)
uint32_t subIdxInGroup;
};
typedef ShaderVariable (*ExtInstImpl)(ThreadState &, uint32_t, const rdcarray<Id> &);
struct ExtInstDispatcher
+95 -152
View File
@@ -3568,64 +3568,6 @@ enum class SubgroupCapability : uint32_t
static const uint32_t validMagicNumber = 12345;
static const uint32_t NumReservedBindings = 1;
// things we need to readback once per hit thread
struct ResultDataBase
{
Vec4f pos;
uint32_t prim;
uint32_t sample;
uint32_t view;
uint32_t valid;
float ddxDerivCheck;
uint32_t quadLaneIndex;
uint32_t laneIndex;
uint32_t subgroupSize;
uint32_t globalBallot[4];
uint32_t electBallot[4];
uint32_t helperBallot[4];
uint32_t numSubgroups; // may be packed oddly so we don't assume we can calculate
uint32_t padding[3];
// LaneData lanes[N]
// each LaneData is prefixed by the subgroup struct below if needed, and then the stage struct unconditionally
};
// things we need per-lane with subgroups active, before any per-stage data
struct SubgroupLaneData
{
uint32_t elect; // for OpGroupNonUniformElect, if we don't have ballot
uint32_t isActive; // per lane active mask
uint32_t padding[2];
};
struct VertexLaneData
{
uint32_t inst; // allow/expect instance to vary across subgroup just in case
uint32_t vert; // vertex id (either auto-generated or index)
uint32_t view; // multiview view (if used)
uint32_t padding;
};
struct PixelLaneData
{
Vec4f fragCoord; // per-lane coord
uint32_t isHelper; // per-lane helper bit
uint32_t quadId; // the per-quad ID shared among all 4 threads, to differentiate between quads.
// is the laneIndex of the top-left thread (with an offset, so we can see 0 as invalid)
uint32_t quadLaneIndex; // the quadLaneIndex for quad-neighbours, in case we are fetching a subgroup
uint32_t padding;
};
struct ComputeLaneData
{
uint32_t threadid[3]; // per-lane thread id (in case it's not trivial)
uint32_t subIdxInGroup;
};
// we use the message passing method from the quadoverdraw to swap data between quad neighbours
// using fine derivatives. This is based on "Shader Amortization using Pixel Quad Message Passing",
// Eric Penner, GPU Pro 2.
@@ -3848,17 +3790,17 @@ static void CreateInputFetcher(rdcarray<uint32_t> &spv,
switch(stage)
{
case ShaderStage::Vertex: structStride += sizeof(VertexLaneData); break;
case ShaderStage::Pixel: structStride += sizeof(PixelLaneData); break;
case ShaderStage::Vertex: structStride += sizeof(rdcspv::VertexLaneData); break;
case ShaderStage::Pixel: structStride += sizeof(rdcspv::PixelLaneData); break;
case ShaderStage::Task:
case ShaderStage::Mesh:
case ShaderStage::Compute: structStride += sizeof(ComputeLaneData); break;
case ShaderStage::Compute: structStride += sizeof(rdcspv::ComputeLaneData); break;
default: break;
}
if(threadScope & rdcspv::ThreadScope::Subgroup)
{
structStride += sizeof(SubgroupLaneData);
structStride += sizeof(rdcspv::SubgroupLaneData);
}
// simulating full subgroups with ballot ability to read other lanes, we read all lanes data
@@ -3990,7 +3932,7 @@ static void CreateInputFetcher(rdcarray<uint32_t> &spv,
}
laneValues.push_back(elect);
structMembers.push_back(
{uint32Type, elect.name, offset + (uint32_t)offsetof(SubgroupLaneData, elect)});
{uint32Type, elect.name, offset + (uint32_t)offsetof(rdcspv::SubgroupLaneData, elect)});
// we implicitly only write data for active lanes so we just set isActive to 1 always
laneValue isActive;
@@ -4000,19 +3942,19 @@ static void CreateInputFetcher(rdcarray<uint32_t> &spv,
isActive.base = editor.AddConstantImmediate<uint32_t>(1);
isActive.flat = true;
laneValues.push_back(isActive);
structMembers.push_back(
{uint32Type, isActive.name, offset + (uint32_t)offsetof(SubgroupLaneData, isActive)});
structMembers.push_back({uint32Type, isActive.name,
offset + (uint32_t)offsetof(rdcspv::SubgroupLaneData, isActive)});
structMembers.push_back(
{uint32Type, "__pad", offset + (uint32_t)offsetof(SubgroupLaneData, padding)});
{uint32Type, "__pad", offset + (uint32_t)offsetof(rdcspv::SubgroupLaneData, padding)});
structMembers.push_back(
{uint32Type, "__pad",
uint32_t(offset + offsetof(SubgroupLaneData, padding) + sizeof(uint32_t))});
uint32_t(offset + offsetof(rdcspv::SubgroupLaneData, padding) + sizeof(uint32_t))});
offset += sizeof(SubgroupLaneData);
RDCCOMPILE_ASSERT(
(sizeof(SubgroupLaneData) / sizeof(Vec4f)) * sizeof(Vec4f) == sizeof(SubgroupLaneData),
"SubgroupLaneData is misaligned, ensure 16-byte aligned");
offset += sizeof(rdcspv::SubgroupLaneData);
RDCCOMPILE_ASSERT((sizeof(rdcspv::SubgroupLaneData) / sizeof(Vec4f)) * sizeof(Vec4f) ==
sizeof(rdcspv::SubgroupLaneData),
"SubgroupLaneData is misaligned, ensure 16-byte aligned");
}
if(stage == ShaderStage::Vertex)
@@ -4026,7 +3968,7 @@ static void CreateInputFetcher(rdcarray<uint32_t> &spv,
editor.SetName(inst.base, inst.name);
laneValues.push_back(inst);
structMembers.push_back(
{uint32Type, inst.name, offset + (uint32_t)offsetof(VertexLaneData, inst)});
{uint32Type, inst.name, offset + (uint32_t)offsetof(rdcspv::VertexLaneData, inst)});
laneValue vert;
vert.name = "__rd_vert";
@@ -4037,7 +3979,7 @@ static void CreateInputFetcher(rdcarray<uint32_t> &spv,
editor.SetName(vert.base, vert.name);
laneValues.push_back(vert);
structMembers.push_back(
{uint32Type, vert.name, offset + (uint32_t)offsetof(VertexLaneData, vert)});
{uint32Type, vert.name, offset + (uint32_t)offsetof(rdcspv::VertexLaneData, vert)});
if(useViewIndex)
{
@@ -4050,23 +3992,23 @@ static void CreateInputFetcher(rdcarray<uint32_t> &spv,
editor.SetName(view.base, view.name);
laneValues.push_back(view);
structMembers.push_back(
{uint32Type, view.name, offset + (uint32_t)offsetof(VertexLaneData, view)});
{uint32Type, view.name, offset + (uint32_t)offsetof(rdcspv::VertexLaneData, view)});
structMembers.push_back(
{uint32Type, "__pad", offset + (uint32_t)offsetof(VertexLaneData, padding)});
{uint32Type, "__pad", offset + (uint32_t)offsetof(rdcspv::VertexLaneData, padding)});
}
else
{
structMembers.push_back(
{uint32Type, "__rd_view", offset + (uint32_t)offsetof(VertexLaneData, view)});
{uint32Type, "__rd_view", offset + (uint32_t)offsetof(rdcspv::VertexLaneData, view)});
structMembers.push_back(
{uint32Type, "__pad", offset + (uint32_t)offsetof(VertexLaneData, padding)});
{uint32Type, "__pad", offset + (uint32_t)offsetof(rdcspv::VertexLaneData, padding)});
}
offset += sizeof(VertexLaneData);
RDCCOMPILE_ASSERT(
(sizeof(VertexLaneData) / sizeof(Vec4f)) * sizeof(Vec4f) == sizeof(VertexLaneData),
"VertexLaneData is misaligned, ensure 16-byte aligned");
offset += sizeof(rdcspv::VertexLaneData);
RDCCOMPILE_ASSERT((sizeof(rdcspv::VertexLaneData) / sizeof(Vec4f)) * sizeof(Vec4f) ==
sizeof(rdcspv::VertexLaneData),
"VertexLaneData is misaligned, ensure 16-byte aligned");
}
else if(stage == ShaderStage::Pixel)
{
@@ -4078,8 +4020,8 @@ static void CreateInputFetcher(rdcarray<uint32_t> &spv,
rdcspv::BuiltIn::FragCoord, float4Type);
editor.SetName(fragCoord.base, fragCoord.name);
laneValues.push_back(fragCoord);
structMembers.push_back(
{float4Type, fragCoord.name, offset + (uint32_t)offsetof(PixelLaneData, fragCoord)});
structMembers.push_back({float4Type, fragCoord.name,
offset + (uint32_t)offsetof(rdcspv::PixelLaneData, fragCoord)});
laneValue helper;
helper.name = "__rd_isHelper";
@@ -4093,7 +4035,7 @@ static void CreateInputFetcher(rdcarray<uint32_t> &spv,
editor.SetName(helper.base, helper.name);
laneValues.push_back(helper);
structMembers.push_back(
{uint32Type, helper.name, offset + (uint32_t)offsetof(PixelLaneData, isHelper)});
{uint32Type, helper.name, offset + (uint32_t)offsetof(rdcspv::PixelLaneData, isHelper)});
laneValue quad;
quad.name = "__rd_quadId";
@@ -4105,7 +4047,7 @@ static void CreateInputFetcher(rdcarray<uint32_t> &spv,
editor.SetName(quad.base, quad.name);
laneValues.push_back(quad);
structMembers.push_back(
{uint32Type, quad.name, offset + (uint32_t)offsetof(PixelLaneData, quadId)});
{uint32Type, quad.name, offset + (uint32_t)offsetof(rdcspv::PixelLaneData, quadId)});
laneValue quadLane;
quadLane.name = "__rd_quadLane";
@@ -4114,8 +4056,8 @@ static void CreateInputFetcher(rdcarray<uint32_t> &spv,
quadLane.base = editor.MakeId();
editor.SetName(quadLane.base, quadLane.name);
laneValues.push_back(quadLane);
structMembers.push_back(
{uint32Type, quadLane.name, offset + (uint32_t)offsetof(PixelLaneData, quadLaneIndex)});
structMembers.push_back({uint32Type, quadLane.name,
offset + (uint32_t)offsetof(rdcspv::PixelLaneData, quadLaneIndex)});
// quad properties will be handled specially
isHelper = helper.base;
@@ -4123,12 +4065,12 @@ static void CreateInputFetcher(rdcarray<uint32_t> &spv,
quadLaneIndex = quadLane.base;
structMembers.push_back(
{uint32Type, "__pad", offset + (uint32_t)offsetof(PixelLaneData, padding)});
{uint32Type, "__pad", offset + (uint32_t)offsetof(rdcspv::PixelLaneData, padding)});
offset += sizeof(PixelLaneData);
RDCCOMPILE_ASSERT(
(sizeof(PixelLaneData) / sizeof(Vec4f)) * sizeof(Vec4f) == sizeof(PixelLaneData),
"PixelLaneData is misaligned, ensure 16-byte aligned");
offset += sizeof(rdcspv::PixelLaneData);
RDCCOMPILE_ASSERT((sizeof(rdcspv::PixelLaneData) / sizeof(Vec4f)) * sizeof(Vec4f) ==
sizeof(rdcspv::PixelLaneData),
"PixelLaneData is misaligned, ensure 16-byte aligned");
}
else if(stage == ShaderStage::Compute || stage == ShaderStage::Task || stage == ShaderStage::Mesh)
{
@@ -4140,8 +4082,8 @@ static void CreateInputFetcher(rdcarray<uint32_t> &spv,
rdcspv::BuiltIn::LocalInvocationId, uint3Type);
editor.SetName(threadid.base, threadid.name);
laneValues.push_back(threadid);
structMembers.push_back(
{uint3Type, threadid.name, offset + (uint32_t)offsetof(ComputeLaneData, threadid)});
structMembers.push_back({uint3Type, threadid.name,
offset + (uint32_t)offsetof(rdcspv::ComputeLaneData, threadid)});
laneValue subid;
subid.name = "__rd_subgroupid";
@@ -4151,13 +4093,13 @@ static void CreateInputFetcher(rdcarray<uint32_t> &spv,
rdcspv::BuiltIn::SubgroupId, uint32Type);
editor.SetName(subid.base, subid.name);
laneValues.push_back(subid);
structMembers.push_back(
{uint32Type, subid.name, offset + (uint32_t)offsetof(ComputeLaneData, subIdxInGroup)});
structMembers.push_back({uint32Type, subid.name,
offset + (uint32_t)offsetof(rdcspv::ComputeLaneData, subIdxInGroup)});
offset += sizeof(ComputeLaneData);
RDCCOMPILE_ASSERT(
(sizeof(ComputeLaneData) / sizeof(Vec4f)) * sizeof(Vec4f) == sizeof(ComputeLaneData),
"ComputeLaneData is misaligned, ensure 16-byte aligned");
offset += sizeof(rdcspv::ComputeLaneData);
RDCCOMPILE_ASSERT((sizeof(rdcspv::ComputeLaneData) / sizeof(Vec4f)) * sizeof(Vec4f) ==
sizeof(rdcspv::ComputeLaneData),
"ComputeLaneData is misaligned, ensure 16-byte aligned");
}
// now add input signature values
@@ -4314,6 +4256,7 @@ static void CreateInputFetcher(rdcarray<uint32_t> &spv,
editor.SetName(destInstance, "destInstance");
editor.SetName(destVertex, "destVertex");
editor.SetName(destView, "destView");
rdcspv::Id ResultDataBaseType;
@@ -4336,23 +4279,23 @@ static void CreateInputFetcher(rdcarray<uint32_t> &spv,
{
rdcarray<rdcspv::StructMember> members;
members.push_back({float4Type, "pos", offsetof(ResultDataBase, pos)});
members.push_back({uint32Type, "prim", offsetof(ResultDataBase, prim)});
members.push_back({uint32Type, "sample", offsetof(ResultDataBase, sample)});
members.push_back({uint32Type, "view", offsetof(ResultDataBase, view)});
members.push_back({uint32Type, "valid", offsetof(ResultDataBase, valid)});
members.push_back({floatType, "ddxDerivCheck", offsetof(ResultDataBase, ddxDerivCheck)});
members.push_back({uint32Type, "quadLaneIndex", offsetof(ResultDataBase, quadLaneIndex)});
members.push_back({uint32Type, "laneIndex", offsetof(ResultDataBase, laneIndex)});
members.push_back({uint32Type, "subgroupSize", offsetof(ResultDataBase, subgroupSize)});
members.push_back({uint4Type, "globalBallot", offsetof(ResultDataBase, globalBallot)});
members.push_back({uint4Type, "electBallot", offsetof(ResultDataBase, electBallot)});
members.push_back({uint4Type, "helperBallot", offsetof(ResultDataBase, helperBallot)});
members.push_back({uint32Type, "numSubgroups", offsetof(ResultDataBase, numSubgroups)});
members.push_back({float4Type, "pos", offsetof(rdcspv::ResultDataBase, pos)});
members.push_back({uint32Type, "prim", offsetof(rdcspv::ResultDataBase, prim)});
members.push_back({uint32Type, "sample", offsetof(rdcspv::ResultDataBase, sample)});
members.push_back({uint32Type, "view", offsetof(rdcspv::ResultDataBase, view)});
members.push_back({uint32Type, "valid", offsetof(rdcspv::ResultDataBase, valid)});
members.push_back({floatType, "ddxDerivCheck", offsetof(rdcspv::ResultDataBase, ddxDerivCheck)});
members.push_back({uint32Type, "quadLaneIndex", offsetof(rdcspv::ResultDataBase, quadLaneIndex)});
members.push_back({uint32Type, "laneIndex", offsetof(rdcspv::ResultDataBase, laneIndex)});
members.push_back({uint32Type, "subgroupSize", offsetof(rdcspv::ResultDataBase, subgroupSize)});
members.push_back({uint4Type, "globalBallot", offsetof(rdcspv::ResultDataBase, globalBallot)});
members.push_back({uint4Type, "electBallot", offsetof(rdcspv::ResultDataBase, electBallot)});
members.push_back({uint4Type, "helperBallot", offsetof(rdcspv::ResultDataBase, helperBallot)});
members.push_back({uint32Type, "numSubgroups", offsetof(rdcspv::ResultDataBase, numSubgroups)});
// uint3 padding
const uint32_t dataStart = (uint32_t)AlignUp(sizeof(ResultDataBase), sizeof(Vec4f));
const uint32_t dataStart = (uint32_t)AlignUp(sizeof(rdcspv::ResultDataBase), sizeof(Vec4f));
RDCASSERT((structStride % sizeof(Vec4f)) == 0);
@@ -4369,9 +4312,9 @@ static void CreateInputFetcher(rdcarray<uint32_t> &spv,
rdcspv::Id ResultDataRTArray =
editor.AddType(rdcspv::OpTypeRuntimeArray(editor.MakeId(), ResultDataBaseType));
editor.AddDecoration(rdcspv::OpDecorate(ResultDataRTArray,
rdcspv::DecorationParam<rdcspv::Decoration::ArrayStride>(
structStride * numLanes + sizeof(ResultDataBase))));
editor.AddDecoration(rdcspv::OpDecorate(
ResultDataRTArray, rdcspv::DecorationParam<rdcspv::Decoration::ArrayStride>(
structStride * numLanes + sizeof(rdcspv::ResultDataBase))));
rdcspv::Id bufBase =
editor.DeclareStructType("__rd_HitStorage", {
@@ -5227,16 +5170,16 @@ rdcpair<uint32_t, uint32_t> GetAlignAndOutputSize(VulkanCreationInfo::ShaderModu
uint32_t structStride = (uint32_t)shadRefl.refl->inputSignature.size() * paramAlign;
if(shadRefl.refl->stage == ShaderStage::Vertex)
structStride += sizeof(VertexLaneData);
structStride += sizeof(rdcspv::VertexLaneData);
else if(shadRefl.refl->stage == ShaderStage::Pixel)
structStride += sizeof(PixelLaneData);
structStride += sizeof(rdcspv::PixelLaneData);
else if(shadRefl.refl->stage == ShaderStage::Compute ||
shadRefl.refl->stage == ShaderStage::Task || shadRefl.refl->stage == ShaderStage::Mesh)
structStride += sizeof(ComputeLaneData);
structStride += sizeof(rdcspv::ComputeLaneData);
if(shadRefl.patchData.threadScope & rdcspv::ThreadScope::Subgroup)
{
structStride += sizeof(SubgroupLaneData);
structStride += sizeof(rdcspv::SubgroupLaneData);
}
return {paramAlign, structStride};
@@ -5477,8 +5420,8 @@ ShaderDebugTrace *VulkanReplay::DebugVertex(uint32_t eventId, uint32_t vertid, u
uint32_t maxHits = 4; // we should only ever get one hit
// struct size is ResultDataBase header plus Nx structStride for the number of threads
uint32_t structSize = sizeof(ResultDataBase) + structStride * numThreads;
// struct size is rdcspv::ResultDataBase header plus Nx structStride for the number of threads
uint32_t structSize = sizeof(rdcspv::ResultDataBase) + structStride * numThreads;
VkDeviceSize feedbackStorageSize = maxHits * structSize + 1024;
@@ -5577,7 +5520,7 @@ ShaderDebugTrace *VulkanReplay::DebugVertex(uint32_t eventId, uint32_t vertid, u
base += sizeof(Vec4f);
ResultDataBase *winner = (ResultDataBase *)base;
rdcspv::ResultDataBase *winner = (rdcspv::ResultDataBase *)base;
if(winner->valid != validMagicNumber)
{
@@ -5594,9 +5537,9 @@ ShaderDebugTrace *VulkanReplay::DebugVertex(uint32_t eventId, uint32_t vertid, u
rdcspv::Debugger *debugger = new rdcspv::Debugger;
debugger->Parse(shader.spirv.GetSPIRV());
// the per-thread data immediately follows the ResultDataBase header. Every piece of data is
// uniformly aligned, either 16-byte by default or 32-byte if larger components exist. The
// output is in input signature order.
// the per-thread data immediately follows the rdcspv::ResultDataBase header. Every piece of
// data is uniformly aligned, either 16-byte by default or 32-byte if larger components exist.
// The output is in input signature order.
byte *LaneData = (byte *)(winner + 1);
numThreads = 4;
@@ -5616,17 +5559,17 @@ ShaderDebugTrace *VulkanReplay::DebugVertex(uint32_t eventId, uint32_t vertid, u
byte *value = LaneData + t * structStride;
{
SubgroupLaneData *subgroupData = (SubgroupLaneData *)value;
rdcspv::SubgroupLaneData *subgroupData = (rdcspv::SubgroupLaneData *)value;
apiWrapper->thread_props[t][(size_t)rdcspv::ThreadProperty::Active] = subgroupData->isActive;
apiWrapper->thread_props[t][(size_t)rdcspv::ThreadProperty::Elected] = subgroupData->elect;
apiWrapper->thread_props[t][(size_t)rdcspv::ThreadProperty::SubgroupId] = t;
value += sizeof(SubgroupLaneData);
value += sizeof(rdcspv::SubgroupLaneData);
}
// read VertexLaneData
{
VertexLaneData *vertData = (VertexLaneData *)value;
rdcspv::VertexLaneData *vertData = (rdcspv::VertexLaneData *)value;
apiWrapper->thread_builtins[t][ShaderBuiltin::InstanceIndex] =
ShaderVariable("InstanceIndex"_lit, vertData->inst, 0U, 0U, 0U);
@@ -5638,7 +5581,7 @@ ShaderDebugTrace *VulkanReplay::DebugVertex(uint32_t eventId, uint32_t vertid, u
if(view != ~0U)
RDCASSERTEQUAL(vertData->view, view);
}
value += sizeof(VertexLaneData);
value += sizeof(rdcspv::VertexLaneData);
for(size_t i = 0; i < shadRefl.refl->inputSignature.size(); i++)
{
@@ -6029,8 +5972,8 @@ ShaderDebugTrace *VulkanReplay::DebugPixel(uint32_t eventId, uint32_t x, uint32_
uint32_t overdrawLevels = 100; // maximum number of overdraw levels
// struct size is ResultDataBase header plus Nx structStride for the number of threads
uint32_t structSize = sizeof(ResultDataBase) + structStride * numThreads;
// struct size is rdcspv::ResultDataBase header plus Nx structStride for the number of threads
uint32_t structSize = sizeof(rdcspv::ResultDataBase) + structStride * numThreads;
VkDeviceSize feedbackStorageSize = overdrawLevels * structSize + sizeof(Vec4f) + 1024;
@@ -6135,7 +6078,7 @@ ShaderDebugTrace *VulkanReplay::DebugPixel(uint32_t eventId, uint32_t x, uint32_
base += sizeof(Vec4f);
ResultDataBase *winner = NULL;
rdcspv::ResultDataBase *winner = NULL;
RDCLOG("Got %u hit candidates out of %u total instances", hit_count, total_count);
@@ -6153,7 +6096,7 @@ ShaderDebugTrace *VulkanReplay::DebugPixel(uint32_t eventId, uint32_t x, uint32_
for(uint32_t i = 0; i < hit_count; i++)
{
ResultDataBase *hit = (ResultDataBase *)(base + structSize * i);
rdcspv::ResultDataBase *hit = (rdcspv::ResultDataBase *)(base + structSize * i);
if(hit->valid != validMagicNumber)
{
@@ -6253,9 +6196,9 @@ ShaderDebugTrace *VulkanReplay::DebugPixel(uint32_t eventId, uint32_t x, uint32_
rdcspv::Debugger *debugger = new rdcspv::Debugger;
debugger->Parse(shader.spirv.GetSPIRV());
// the per-thread data immediately follows the ResultDataBase header. Every piece of data is
// uniformly aligned, either 16-byte by default or 32-byte if larger components exist. The
// output is in input signature order.
// the per-thread data immediately follows the rdcspv::ResultDataBase header. Every piece of
// data is uniformly aligned, either 16-byte by default or 32-byte if larger components exist.
// The output is in input signature order.
byte *LaneData = (byte *)(winner + 1);
numThreads = 4;
@@ -6276,17 +6219,17 @@ ShaderDebugTrace *VulkanReplay::DebugPixel(uint32_t eventId, uint32_t x, uint32_
if(shadRefl.patchData.threadScope & rdcspv::ThreadScope::Subgroup)
{
SubgroupLaneData *subgroupData = (SubgroupLaneData *)value;
rdcspv::SubgroupLaneData *subgroupData = (rdcspv::SubgroupLaneData *)value;
apiWrapper->thread_props[t][(size_t)rdcspv::ThreadProperty::Active] = subgroupData->isActive;
apiWrapper->thread_props[t][(size_t)rdcspv::ThreadProperty::Elected] = subgroupData->elect;
apiWrapper->thread_props[t][(size_t)rdcspv::ThreadProperty::SubgroupId] = t;
value += sizeof(SubgroupLaneData);
value += sizeof(rdcspv::SubgroupLaneData);
}
// read PixelLaneData
{
PixelLaneData *pixelData = (PixelLaneData *)value;
rdcspv::PixelLaneData *pixelData = (rdcspv::PixelLaneData *)value;
{
ShaderVariable &var = apiWrapper->thread_builtins[t][ShaderBuiltin::Position];
@@ -6315,7 +6258,7 @@ ShaderDebugTrace *VulkanReplay::DebugPixel(uint32_t eventId, uint32_t x, uint32_
apiWrapper->thread_props[t][(size_t)rdcspv::ThreadProperty::QuadLane] =
pixelData->quadLaneIndex;
}
value += sizeof(PixelLaneData);
value += sizeof(rdcspv::PixelLaneData);
for(size_t i = 0; i < shadRefl.refl->inputSignature.size(); i++)
{
@@ -6568,8 +6511,8 @@ ShaderDebugTrace *VulkanReplay::DebugComputeCommon(ShaderStage stage, uint32_t e
uint32_t maxHits = 4; // we should only ever get one hit
// struct size is ResultDataBase header plus Nx structStride for the number of threads
uint32_t structSize = sizeof(ResultDataBase) + structStride * maxSubgroupSize;
// struct size is rdcspv::ResultDataBase header plus Nx structStride for the number of threads
uint32_t structSize = sizeof(rdcspv::ResultDataBase) + structStride * maxSubgroupSize;
VkDeviceSize feedbackStorageSize = maxHits * structSize + 1024;
@@ -6681,7 +6624,7 @@ ShaderDebugTrace *VulkanReplay::DebugComputeCommon(ShaderStage stage, uint32_t e
base += sizeof(Vec4f);
ResultDataBase *winner = (ResultDataBase *)base;
rdcspv::ResultDataBase *winner = (rdcspv::ResultDataBase *)base;
if(winner->valid != validMagicNumber)
{
@@ -6698,9 +6641,9 @@ ShaderDebugTrace *VulkanReplay::DebugComputeCommon(ShaderStage stage, uint32_t e
rdcspv::Debugger *debugger = new rdcspv::Debugger;
debugger->Parse(shader.spirv.GetSPIRV());
// the per-thread data immediately follows the ResultDataBase header. Every piece of data is
// uniformly aligned, either 16-byte by default or 32-byte if larger components exist. The
// output is in input signature order.
// the per-thread data immediately follows the rdcspv::ResultDataBase header. Every piece of
// data is uniformly aligned, either 16-byte by default or 32-byte if larger components exist.
// The output is in input signature order.
byte *LaneData = (byte *)(winner + 1);
const uint32_t subgroupSize = winner->subgroupSize;
@@ -6727,11 +6670,11 @@ ShaderDebugTrace *VulkanReplay::DebugComputeCommon(ShaderStage stage, uint32_t e
{
byte *value = LaneData + t * structStride;
SubgroupLaneData *subgroupData = (SubgroupLaneData *)value;
value += sizeof(SubgroupLaneData);
rdcspv::SubgroupLaneData *subgroupData = (rdcspv::SubgroupLaneData *)value;
value += sizeof(rdcspv::SubgroupLaneData);
ComputeLaneData *compData = (ComputeLaneData *)value;
value += sizeof(ComputeLaneData);
rdcspv::ComputeLaneData *compData = (rdcspv::ComputeLaneData *)value;
value += sizeof(rdcspv::ComputeLaneData);
uint32_t lane = t;