mirror of
https://github.com/baldurk/renderdoc.git
synced 2026-05-06 01:50:38 +00:00
Move SPIR-V debugging input fetch lane data to SPIR-V headers
This commit is contained in:
@@ -159,6 +159,64 @@ public:
|
||||
virtual bool QueuedOpsHasSpace() = 0;
|
||||
};
|
||||
|
||||
// things we need to readback once per hit thread
|
||||
struct ResultDataBase
|
||||
{
|
||||
Vec4f pos;
|
||||
|
||||
uint32_t prim;
|
||||
uint32_t sample;
|
||||
uint32_t view;
|
||||
uint32_t valid;
|
||||
|
||||
float ddxDerivCheck;
|
||||
uint32_t quadLaneIndex;
|
||||
uint32_t laneIndex;
|
||||
uint32_t subgroupSize;
|
||||
|
||||
uint32_t globalBallot[4];
|
||||
uint32_t electBallot[4];
|
||||
uint32_t helperBallot[4];
|
||||
|
||||
uint32_t numSubgroups; // may be packed oddly so we don't assume we can calculate
|
||||
uint32_t padding[3];
|
||||
|
||||
// LaneData lanes[N]
|
||||
// each LaneData is prefixed by the subgroup struct below if needed, and then the stage struct unconditionally
|
||||
};
|
||||
|
||||
// things we need per-lane with subgroups active, before any per-stage data
|
||||
struct SubgroupLaneData
|
||||
{
|
||||
uint32_t elect; // for OpGroupNonUniformElect, if we don't have ballot
|
||||
uint32_t isActive; // per lane active mask
|
||||
uint32_t padding[2];
|
||||
};
|
||||
|
||||
struct VertexLaneData
|
||||
{
|
||||
uint32_t inst; // allow/expect instance to vary across subgroup just in case
|
||||
uint32_t vert; // vertex id (either auto-generated or index)
|
||||
uint32_t view; // multiview view (if used)
|
||||
uint32_t padding;
|
||||
};
|
||||
|
||||
struct PixelLaneData
|
||||
{
|
||||
Vec4f fragCoord; // per-lane coord
|
||||
uint32_t isHelper; // per-lane helper bit
|
||||
uint32_t quadId; // the per-quad ID shared among all 4 threads, to differentiate between quads.
|
||||
// is the laneIndex of the top-left thread (with an offset, so we can see 0 as invalid)
|
||||
uint32_t quadLaneIndex; // the quadLaneIndex for quad-neighbours, in case we are fetching a subgroup
|
||||
uint32_t padding;
|
||||
};
|
||||
|
||||
struct ComputeLaneData
|
||||
{
|
||||
uint32_t threadid[3]; // per-lane thread id (in case it's not trivial)
|
||||
uint32_t subIdxInGroup;
|
||||
};
|
||||
|
||||
typedef ShaderVariable (*ExtInstImpl)(ThreadState &, uint32_t, const rdcarray<Id> &);
|
||||
|
||||
struct ExtInstDispatcher
|
||||
|
||||
@@ -3568,64 +3568,6 @@ enum class SubgroupCapability : uint32_t
|
||||
static const uint32_t validMagicNumber = 12345;
|
||||
static const uint32_t NumReservedBindings = 1;
|
||||
|
||||
// things we need to readback once per hit thread
|
||||
struct ResultDataBase
|
||||
{
|
||||
Vec4f pos;
|
||||
|
||||
uint32_t prim;
|
||||
uint32_t sample;
|
||||
uint32_t view;
|
||||
uint32_t valid;
|
||||
|
||||
float ddxDerivCheck;
|
||||
uint32_t quadLaneIndex;
|
||||
uint32_t laneIndex;
|
||||
uint32_t subgroupSize;
|
||||
|
||||
uint32_t globalBallot[4];
|
||||
uint32_t electBallot[4];
|
||||
uint32_t helperBallot[4];
|
||||
|
||||
uint32_t numSubgroups; // may be packed oddly so we don't assume we can calculate
|
||||
uint32_t padding[3];
|
||||
|
||||
// LaneData lanes[N]
|
||||
// each LaneData is prefixed by the subgroup struct below if needed, and then the stage struct unconditionally
|
||||
};
|
||||
|
||||
// things we need per-lane with subgroups active, before any per-stage data
|
||||
struct SubgroupLaneData
|
||||
{
|
||||
uint32_t elect; // for OpGroupNonUniformElect, if we don't have ballot
|
||||
uint32_t isActive; // per lane active mask
|
||||
uint32_t padding[2];
|
||||
};
|
||||
|
||||
struct VertexLaneData
|
||||
{
|
||||
uint32_t inst; // allow/expect instance to vary across subgroup just in case
|
||||
uint32_t vert; // vertex id (either auto-generated or index)
|
||||
uint32_t view; // multiview view (if used)
|
||||
uint32_t padding;
|
||||
};
|
||||
|
||||
struct PixelLaneData
|
||||
{
|
||||
Vec4f fragCoord; // per-lane coord
|
||||
uint32_t isHelper; // per-lane helper bit
|
||||
uint32_t quadId; // the per-quad ID shared among all 4 threads, to differentiate between quads.
|
||||
// is the laneIndex of the top-left thread (with an offset, so we can see 0 as invalid)
|
||||
uint32_t quadLaneIndex; // the quadLaneIndex for quad-neighbours, in case we are fetching a subgroup
|
||||
uint32_t padding;
|
||||
};
|
||||
|
||||
struct ComputeLaneData
|
||||
{
|
||||
uint32_t threadid[3]; // per-lane thread id (in case it's not trivial)
|
||||
uint32_t subIdxInGroup;
|
||||
};
|
||||
|
||||
// we use the message passing method from the quadoverdraw to swap data between quad neighbours
|
||||
// using fine derivatives. This is based on "Shader Amortization using Pixel Quad Message Passing",
|
||||
// Eric Penner, GPU Pro 2.
|
||||
@@ -3848,17 +3790,17 @@ static void CreateInputFetcher(rdcarray<uint32_t> &spv,
|
||||
|
||||
switch(stage)
|
||||
{
|
||||
case ShaderStage::Vertex: structStride += sizeof(VertexLaneData); break;
|
||||
case ShaderStage::Pixel: structStride += sizeof(PixelLaneData); break;
|
||||
case ShaderStage::Vertex: structStride += sizeof(rdcspv::VertexLaneData); break;
|
||||
case ShaderStage::Pixel: structStride += sizeof(rdcspv::PixelLaneData); break;
|
||||
case ShaderStage::Task:
|
||||
case ShaderStage::Mesh:
|
||||
case ShaderStage::Compute: structStride += sizeof(ComputeLaneData); break;
|
||||
case ShaderStage::Compute: structStride += sizeof(rdcspv::ComputeLaneData); break;
|
||||
default: break;
|
||||
}
|
||||
|
||||
if(threadScope & rdcspv::ThreadScope::Subgroup)
|
||||
{
|
||||
structStride += sizeof(SubgroupLaneData);
|
||||
structStride += sizeof(rdcspv::SubgroupLaneData);
|
||||
}
|
||||
|
||||
// simulating full subgroups with ballot ability to read other lanes, we read all lanes data
|
||||
@@ -3990,7 +3932,7 @@ static void CreateInputFetcher(rdcarray<uint32_t> &spv,
|
||||
}
|
||||
laneValues.push_back(elect);
|
||||
structMembers.push_back(
|
||||
{uint32Type, elect.name, offset + (uint32_t)offsetof(SubgroupLaneData, elect)});
|
||||
{uint32Type, elect.name, offset + (uint32_t)offsetof(rdcspv::SubgroupLaneData, elect)});
|
||||
|
||||
// we implicitly only write data for active lanes so we just set isActive to 1 always
|
||||
laneValue isActive;
|
||||
@@ -4000,19 +3942,19 @@ static void CreateInputFetcher(rdcarray<uint32_t> &spv,
|
||||
isActive.base = editor.AddConstantImmediate<uint32_t>(1);
|
||||
isActive.flat = true;
|
||||
laneValues.push_back(isActive);
|
||||
structMembers.push_back(
|
||||
{uint32Type, isActive.name, offset + (uint32_t)offsetof(SubgroupLaneData, isActive)});
|
||||
structMembers.push_back({uint32Type, isActive.name,
|
||||
offset + (uint32_t)offsetof(rdcspv::SubgroupLaneData, isActive)});
|
||||
|
||||
structMembers.push_back(
|
||||
{uint32Type, "__pad", offset + (uint32_t)offsetof(SubgroupLaneData, padding)});
|
||||
{uint32Type, "__pad", offset + (uint32_t)offsetof(rdcspv::SubgroupLaneData, padding)});
|
||||
structMembers.push_back(
|
||||
{uint32Type, "__pad",
|
||||
uint32_t(offset + offsetof(SubgroupLaneData, padding) + sizeof(uint32_t))});
|
||||
uint32_t(offset + offsetof(rdcspv::SubgroupLaneData, padding) + sizeof(uint32_t))});
|
||||
|
||||
offset += sizeof(SubgroupLaneData);
|
||||
RDCCOMPILE_ASSERT(
|
||||
(sizeof(SubgroupLaneData) / sizeof(Vec4f)) * sizeof(Vec4f) == sizeof(SubgroupLaneData),
|
||||
"SubgroupLaneData is misaligned, ensure 16-byte aligned");
|
||||
offset += sizeof(rdcspv::SubgroupLaneData);
|
||||
RDCCOMPILE_ASSERT((sizeof(rdcspv::SubgroupLaneData) / sizeof(Vec4f)) * sizeof(Vec4f) ==
|
||||
sizeof(rdcspv::SubgroupLaneData),
|
||||
"SubgroupLaneData is misaligned, ensure 16-byte aligned");
|
||||
}
|
||||
|
||||
if(stage == ShaderStage::Vertex)
|
||||
@@ -4026,7 +3968,7 @@ static void CreateInputFetcher(rdcarray<uint32_t> &spv,
|
||||
editor.SetName(inst.base, inst.name);
|
||||
laneValues.push_back(inst);
|
||||
structMembers.push_back(
|
||||
{uint32Type, inst.name, offset + (uint32_t)offsetof(VertexLaneData, inst)});
|
||||
{uint32Type, inst.name, offset + (uint32_t)offsetof(rdcspv::VertexLaneData, inst)});
|
||||
|
||||
laneValue vert;
|
||||
vert.name = "__rd_vert";
|
||||
@@ -4037,7 +3979,7 @@ static void CreateInputFetcher(rdcarray<uint32_t> &spv,
|
||||
editor.SetName(vert.base, vert.name);
|
||||
laneValues.push_back(vert);
|
||||
structMembers.push_back(
|
||||
{uint32Type, vert.name, offset + (uint32_t)offsetof(VertexLaneData, vert)});
|
||||
{uint32Type, vert.name, offset + (uint32_t)offsetof(rdcspv::VertexLaneData, vert)});
|
||||
|
||||
if(useViewIndex)
|
||||
{
|
||||
@@ -4050,23 +3992,23 @@ static void CreateInputFetcher(rdcarray<uint32_t> &spv,
|
||||
editor.SetName(view.base, view.name);
|
||||
laneValues.push_back(view);
|
||||
structMembers.push_back(
|
||||
{uint32Type, view.name, offset + (uint32_t)offsetof(VertexLaneData, view)});
|
||||
{uint32Type, view.name, offset + (uint32_t)offsetof(rdcspv::VertexLaneData, view)});
|
||||
|
||||
structMembers.push_back(
|
||||
{uint32Type, "__pad", offset + (uint32_t)offsetof(VertexLaneData, padding)});
|
||||
{uint32Type, "__pad", offset + (uint32_t)offsetof(rdcspv::VertexLaneData, padding)});
|
||||
}
|
||||
else
|
||||
{
|
||||
structMembers.push_back(
|
||||
{uint32Type, "__rd_view", offset + (uint32_t)offsetof(VertexLaneData, view)});
|
||||
{uint32Type, "__rd_view", offset + (uint32_t)offsetof(rdcspv::VertexLaneData, view)});
|
||||
structMembers.push_back(
|
||||
{uint32Type, "__pad", offset + (uint32_t)offsetof(VertexLaneData, padding)});
|
||||
{uint32Type, "__pad", offset + (uint32_t)offsetof(rdcspv::VertexLaneData, padding)});
|
||||
}
|
||||
|
||||
offset += sizeof(VertexLaneData);
|
||||
RDCCOMPILE_ASSERT(
|
||||
(sizeof(VertexLaneData) / sizeof(Vec4f)) * sizeof(Vec4f) == sizeof(VertexLaneData),
|
||||
"VertexLaneData is misaligned, ensure 16-byte aligned");
|
||||
offset += sizeof(rdcspv::VertexLaneData);
|
||||
RDCCOMPILE_ASSERT((sizeof(rdcspv::VertexLaneData) / sizeof(Vec4f)) * sizeof(Vec4f) ==
|
||||
sizeof(rdcspv::VertexLaneData),
|
||||
"VertexLaneData is misaligned, ensure 16-byte aligned");
|
||||
}
|
||||
else if(stage == ShaderStage::Pixel)
|
||||
{
|
||||
@@ -4078,8 +4020,8 @@ static void CreateInputFetcher(rdcarray<uint32_t> &spv,
|
||||
rdcspv::BuiltIn::FragCoord, float4Type);
|
||||
editor.SetName(fragCoord.base, fragCoord.name);
|
||||
laneValues.push_back(fragCoord);
|
||||
structMembers.push_back(
|
||||
{float4Type, fragCoord.name, offset + (uint32_t)offsetof(PixelLaneData, fragCoord)});
|
||||
structMembers.push_back({float4Type, fragCoord.name,
|
||||
offset + (uint32_t)offsetof(rdcspv::PixelLaneData, fragCoord)});
|
||||
|
||||
laneValue helper;
|
||||
helper.name = "__rd_isHelper";
|
||||
@@ -4093,7 +4035,7 @@ static void CreateInputFetcher(rdcarray<uint32_t> &spv,
|
||||
editor.SetName(helper.base, helper.name);
|
||||
laneValues.push_back(helper);
|
||||
structMembers.push_back(
|
||||
{uint32Type, helper.name, offset + (uint32_t)offsetof(PixelLaneData, isHelper)});
|
||||
{uint32Type, helper.name, offset + (uint32_t)offsetof(rdcspv::PixelLaneData, isHelper)});
|
||||
|
||||
laneValue quad;
|
||||
quad.name = "__rd_quadId";
|
||||
@@ -4105,7 +4047,7 @@ static void CreateInputFetcher(rdcarray<uint32_t> &spv,
|
||||
editor.SetName(quad.base, quad.name);
|
||||
laneValues.push_back(quad);
|
||||
structMembers.push_back(
|
||||
{uint32Type, quad.name, offset + (uint32_t)offsetof(PixelLaneData, quadId)});
|
||||
{uint32Type, quad.name, offset + (uint32_t)offsetof(rdcspv::PixelLaneData, quadId)});
|
||||
|
||||
laneValue quadLane;
|
||||
quadLane.name = "__rd_quadLane";
|
||||
@@ -4114,8 +4056,8 @@ static void CreateInputFetcher(rdcarray<uint32_t> &spv,
|
||||
quadLane.base = editor.MakeId();
|
||||
editor.SetName(quadLane.base, quadLane.name);
|
||||
laneValues.push_back(quadLane);
|
||||
structMembers.push_back(
|
||||
{uint32Type, quadLane.name, offset + (uint32_t)offsetof(PixelLaneData, quadLaneIndex)});
|
||||
structMembers.push_back({uint32Type, quadLane.name,
|
||||
offset + (uint32_t)offsetof(rdcspv::PixelLaneData, quadLaneIndex)});
|
||||
|
||||
// quad properties will be handled specially
|
||||
isHelper = helper.base;
|
||||
@@ -4123,12 +4065,12 @@ static void CreateInputFetcher(rdcarray<uint32_t> &spv,
|
||||
quadLaneIndex = quadLane.base;
|
||||
|
||||
structMembers.push_back(
|
||||
{uint32Type, "__pad", offset + (uint32_t)offsetof(PixelLaneData, padding)});
|
||||
{uint32Type, "__pad", offset + (uint32_t)offsetof(rdcspv::PixelLaneData, padding)});
|
||||
|
||||
offset += sizeof(PixelLaneData);
|
||||
RDCCOMPILE_ASSERT(
|
||||
(sizeof(PixelLaneData) / sizeof(Vec4f)) * sizeof(Vec4f) == sizeof(PixelLaneData),
|
||||
"PixelLaneData is misaligned, ensure 16-byte aligned");
|
||||
offset += sizeof(rdcspv::PixelLaneData);
|
||||
RDCCOMPILE_ASSERT((sizeof(rdcspv::PixelLaneData) / sizeof(Vec4f)) * sizeof(Vec4f) ==
|
||||
sizeof(rdcspv::PixelLaneData),
|
||||
"PixelLaneData is misaligned, ensure 16-byte aligned");
|
||||
}
|
||||
else if(stage == ShaderStage::Compute || stage == ShaderStage::Task || stage == ShaderStage::Mesh)
|
||||
{
|
||||
@@ -4140,8 +4082,8 @@ static void CreateInputFetcher(rdcarray<uint32_t> &spv,
|
||||
rdcspv::BuiltIn::LocalInvocationId, uint3Type);
|
||||
editor.SetName(threadid.base, threadid.name);
|
||||
laneValues.push_back(threadid);
|
||||
structMembers.push_back(
|
||||
{uint3Type, threadid.name, offset + (uint32_t)offsetof(ComputeLaneData, threadid)});
|
||||
structMembers.push_back({uint3Type, threadid.name,
|
||||
offset + (uint32_t)offsetof(rdcspv::ComputeLaneData, threadid)});
|
||||
|
||||
laneValue subid;
|
||||
subid.name = "__rd_subgroupid";
|
||||
@@ -4151,13 +4093,13 @@ static void CreateInputFetcher(rdcarray<uint32_t> &spv,
|
||||
rdcspv::BuiltIn::SubgroupId, uint32Type);
|
||||
editor.SetName(subid.base, subid.name);
|
||||
laneValues.push_back(subid);
|
||||
structMembers.push_back(
|
||||
{uint32Type, subid.name, offset + (uint32_t)offsetof(ComputeLaneData, subIdxInGroup)});
|
||||
structMembers.push_back({uint32Type, subid.name,
|
||||
offset + (uint32_t)offsetof(rdcspv::ComputeLaneData, subIdxInGroup)});
|
||||
|
||||
offset += sizeof(ComputeLaneData);
|
||||
RDCCOMPILE_ASSERT(
|
||||
(sizeof(ComputeLaneData) / sizeof(Vec4f)) * sizeof(Vec4f) == sizeof(ComputeLaneData),
|
||||
"ComputeLaneData is misaligned, ensure 16-byte aligned");
|
||||
offset += sizeof(rdcspv::ComputeLaneData);
|
||||
RDCCOMPILE_ASSERT((sizeof(rdcspv::ComputeLaneData) / sizeof(Vec4f)) * sizeof(Vec4f) ==
|
||||
sizeof(rdcspv::ComputeLaneData),
|
||||
"ComputeLaneData is misaligned, ensure 16-byte aligned");
|
||||
}
|
||||
|
||||
// now add input signature values
|
||||
@@ -4314,6 +4256,7 @@ static void CreateInputFetcher(rdcarray<uint32_t> &spv,
|
||||
|
||||
editor.SetName(destInstance, "destInstance");
|
||||
editor.SetName(destVertex, "destVertex");
|
||||
editor.SetName(destView, "destView");
|
||||
|
||||
rdcspv::Id ResultDataBaseType;
|
||||
|
||||
@@ -4336,23 +4279,23 @@ static void CreateInputFetcher(rdcarray<uint32_t> &spv,
|
||||
{
|
||||
rdcarray<rdcspv::StructMember> members;
|
||||
|
||||
members.push_back({float4Type, "pos", offsetof(ResultDataBase, pos)});
|
||||
members.push_back({uint32Type, "prim", offsetof(ResultDataBase, prim)});
|
||||
members.push_back({uint32Type, "sample", offsetof(ResultDataBase, sample)});
|
||||
members.push_back({uint32Type, "view", offsetof(ResultDataBase, view)});
|
||||
members.push_back({uint32Type, "valid", offsetof(ResultDataBase, valid)});
|
||||
members.push_back({floatType, "ddxDerivCheck", offsetof(ResultDataBase, ddxDerivCheck)});
|
||||
members.push_back({uint32Type, "quadLaneIndex", offsetof(ResultDataBase, quadLaneIndex)});
|
||||
members.push_back({uint32Type, "laneIndex", offsetof(ResultDataBase, laneIndex)});
|
||||
members.push_back({uint32Type, "subgroupSize", offsetof(ResultDataBase, subgroupSize)});
|
||||
members.push_back({uint4Type, "globalBallot", offsetof(ResultDataBase, globalBallot)});
|
||||
members.push_back({uint4Type, "electBallot", offsetof(ResultDataBase, electBallot)});
|
||||
members.push_back({uint4Type, "helperBallot", offsetof(ResultDataBase, helperBallot)});
|
||||
members.push_back({uint32Type, "numSubgroups", offsetof(ResultDataBase, numSubgroups)});
|
||||
members.push_back({float4Type, "pos", offsetof(rdcspv::ResultDataBase, pos)});
|
||||
members.push_back({uint32Type, "prim", offsetof(rdcspv::ResultDataBase, prim)});
|
||||
members.push_back({uint32Type, "sample", offsetof(rdcspv::ResultDataBase, sample)});
|
||||
members.push_back({uint32Type, "view", offsetof(rdcspv::ResultDataBase, view)});
|
||||
members.push_back({uint32Type, "valid", offsetof(rdcspv::ResultDataBase, valid)});
|
||||
members.push_back({floatType, "ddxDerivCheck", offsetof(rdcspv::ResultDataBase, ddxDerivCheck)});
|
||||
members.push_back({uint32Type, "quadLaneIndex", offsetof(rdcspv::ResultDataBase, quadLaneIndex)});
|
||||
members.push_back({uint32Type, "laneIndex", offsetof(rdcspv::ResultDataBase, laneIndex)});
|
||||
members.push_back({uint32Type, "subgroupSize", offsetof(rdcspv::ResultDataBase, subgroupSize)});
|
||||
members.push_back({uint4Type, "globalBallot", offsetof(rdcspv::ResultDataBase, globalBallot)});
|
||||
members.push_back({uint4Type, "electBallot", offsetof(rdcspv::ResultDataBase, electBallot)});
|
||||
members.push_back({uint4Type, "helperBallot", offsetof(rdcspv::ResultDataBase, helperBallot)});
|
||||
members.push_back({uint32Type, "numSubgroups", offsetof(rdcspv::ResultDataBase, numSubgroups)});
|
||||
|
||||
// uint3 padding
|
||||
|
||||
const uint32_t dataStart = (uint32_t)AlignUp(sizeof(ResultDataBase), sizeof(Vec4f));
|
||||
const uint32_t dataStart = (uint32_t)AlignUp(sizeof(rdcspv::ResultDataBase), sizeof(Vec4f));
|
||||
|
||||
RDCASSERT((structStride % sizeof(Vec4f)) == 0);
|
||||
|
||||
@@ -4369,9 +4312,9 @@ static void CreateInputFetcher(rdcarray<uint32_t> &spv,
|
||||
rdcspv::Id ResultDataRTArray =
|
||||
editor.AddType(rdcspv::OpTypeRuntimeArray(editor.MakeId(), ResultDataBaseType));
|
||||
|
||||
editor.AddDecoration(rdcspv::OpDecorate(ResultDataRTArray,
|
||||
rdcspv::DecorationParam<rdcspv::Decoration::ArrayStride>(
|
||||
structStride * numLanes + sizeof(ResultDataBase))));
|
||||
editor.AddDecoration(rdcspv::OpDecorate(
|
||||
ResultDataRTArray, rdcspv::DecorationParam<rdcspv::Decoration::ArrayStride>(
|
||||
structStride * numLanes + sizeof(rdcspv::ResultDataBase))));
|
||||
|
||||
rdcspv::Id bufBase =
|
||||
editor.DeclareStructType("__rd_HitStorage", {
|
||||
@@ -5227,16 +5170,16 @@ rdcpair<uint32_t, uint32_t> GetAlignAndOutputSize(VulkanCreationInfo::ShaderModu
|
||||
uint32_t structStride = (uint32_t)shadRefl.refl->inputSignature.size() * paramAlign;
|
||||
|
||||
if(shadRefl.refl->stage == ShaderStage::Vertex)
|
||||
structStride += sizeof(VertexLaneData);
|
||||
structStride += sizeof(rdcspv::VertexLaneData);
|
||||
else if(shadRefl.refl->stage == ShaderStage::Pixel)
|
||||
structStride += sizeof(PixelLaneData);
|
||||
structStride += sizeof(rdcspv::PixelLaneData);
|
||||
else if(shadRefl.refl->stage == ShaderStage::Compute ||
|
||||
shadRefl.refl->stage == ShaderStage::Task || shadRefl.refl->stage == ShaderStage::Mesh)
|
||||
structStride += sizeof(ComputeLaneData);
|
||||
structStride += sizeof(rdcspv::ComputeLaneData);
|
||||
|
||||
if(shadRefl.patchData.threadScope & rdcspv::ThreadScope::Subgroup)
|
||||
{
|
||||
structStride += sizeof(SubgroupLaneData);
|
||||
structStride += sizeof(rdcspv::SubgroupLaneData);
|
||||
}
|
||||
|
||||
return {paramAlign, structStride};
|
||||
@@ -5477,8 +5420,8 @@ ShaderDebugTrace *VulkanReplay::DebugVertex(uint32_t eventId, uint32_t vertid, u
|
||||
|
||||
uint32_t maxHits = 4; // we should only ever get one hit
|
||||
|
||||
// struct size is ResultDataBase header plus Nx structStride for the number of threads
|
||||
uint32_t structSize = sizeof(ResultDataBase) + structStride * numThreads;
|
||||
// struct size is rdcspv::ResultDataBase header plus Nx structStride for the number of threads
|
||||
uint32_t structSize = sizeof(rdcspv::ResultDataBase) + structStride * numThreads;
|
||||
|
||||
VkDeviceSize feedbackStorageSize = maxHits * structSize + 1024;
|
||||
|
||||
@@ -5577,7 +5520,7 @@ ShaderDebugTrace *VulkanReplay::DebugVertex(uint32_t eventId, uint32_t vertid, u
|
||||
|
||||
base += sizeof(Vec4f);
|
||||
|
||||
ResultDataBase *winner = (ResultDataBase *)base;
|
||||
rdcspv::ResultDataBase *winner = (rdcspv::ResultDataBase *)base;
|
||||
|
||||
if(winner->valid != validMagicNumber)
|
||||
{
|
||||
@@ -5594,9 +5537,9 @@ ShaderDebugTrace *VulkanReplay::DebugVertex(uint32_t eventId, uint32_t vertid, u
|
||||
rdcspv::Debugger *debugger = new rdcspv::Debugger;
|
||||
debugger->Parse(shader.spirv.GetSPIRV());
|
||||
|
||||
// the per-thread data immediately follows the ResultDataBase header. Every piece of data is
|
||||
// uniformly aligned, either 16-byte by default or 32-byte if larger components exist. The
|
||||
// output is in input signature order.
|
||||
// the per-thread data immediately follows the rdcspv::ResultDataBase header. Every piece of
|
||||
// data is uniformly aligned, either 16-byte by default or 32-byte if larger components exist.
|
||||
// The output is in input signature order.
|
||||
byte *LaneData = (byte *)(winner + 1);
|
||||
|
||||
numThreads = 4;
|
||||
@@ -5616,17 +5559,17 @@ ShaderDebugTrace *VulkanReplay::DebugVertex(uint32_t eventId, uint32_t vertid, u
|
||||
byte *value = LaneData + t * structStride;
|
||||
|
||||
{
|
||||
SubgroupLaneData *subgroupData = (SubgroupLaneData *)value;
|
||||
rdcspv::SubgroupLaneData *subgroupData = (rdcspv::SubgroupLaneData *)value;
|
||||
apiWrapper->thread_props[t][(size_t)rdcspv::ThreadProperty::Active] = subgroupData->isActive;
|
||||
apiWrapper->thread_props[t][(size_t)rdcspv::ThreadProperty::Elected] = subgroupData->elect;
|
||||
apiWrapper->thread_props[t][(size_t)rdcspv::ThreadProperty::SubgroupId] = t;
|
||||
|
||||
value += sizeof(SubgroupLaneData);
|
||||
value += sizeof(rdcspv::SubgroupLaneData);
|
||||
}
|
||||
|
||||
// read VertexLaneData
|
||||
{
|
||||
VertexLaneData *vertData = (VertexLaneData *)value;
|
||||
rdcspv::VertexLaneData *vertData = (rdcspv::VertexLaneData *)value;
|
||||
|
||||
apiWrapper->thread_builtins[t][ShaderBuiltin::InstanceIndex] =
|
||||
ShaderVariable("InstanceIndex"_lit, vertData->inst, 0U, 0U, 0U);
|
||||
@@ -5638,7 +5581,7 @@ ShaderDebugTrace *VulkanReplay::DebugVertex(uint32_t eventId, uint32_t vertid, u
|
||||
if(view != ~0U)
|
||||
RDCASSERTEQUAL(vertData->view, view);
|
||||
}
|
||||
value += sizeof(VertexLaneData);
|
||||
value += sizeof(rdcspv::VertexLaneData);
|
||||
|
||||
for(size_t i = 0; i < shadRefl.refl->inputSignature.size(); i++)
|
||||
{
|
||||
@@ -6029,8 +5972,8 @@ ShaderDebugTrace *VulkanReplay::DebugPixel(uint32_t eventId, uint32_t x, uint32_
|
||||
|
||||
uint32_t overdrawLevels = 100; // maximum number of overdraw levels
|
||||
|
||||
// struct size is ResultDataBase header plus Nx structStride for the number of threads
|
||||
uint32_t structSize = sizeof(ResultDataBase) + structStride * numThreads;
|
||||
// struct size is rdcspv::ResultDataBase header plus Nx structStride for the number of threads
|
||||
uint32_t structSize = sizeof(rdcspv::ResultDataBase) + structStride * numThreads;
|
||||
|
||||
VkDeviceSize feedbackStorageSize = overdrawLevels * structSize + sizeof(Vec4f) + 1024;
|
||||
|
||||
@@ -6135,7 +6078,7 @@ ShaderDebugTrace *VulkanReplay::DebugPixel(uint32_t eventId, uint32_t x, uint32_
|
||||
|
||||
base += sizeof(Vec4f);
|
||||
|
||||
ResultDataBase *winner = NULL;
|
||||
rdcspv::ResultDataBase *winner = NULL;
|
||||
|
||||
RDCLOG("Got %u hit candidates out of %u total instances", hit_count, total_count);
|
||||
|
||||
@@ -6153,7 +6096,7 @@ ShaderDebugTrace *VulkanReplay::DebugPixel(uint32_t eventId, uint32_t x, uint32_
|
||||
|
||||
for(uint32_t i = 0; i < hit_count; i++)
|
||||
{
|
||||
ResultDataBase *hit = (ResultDataBase *)(base + structSize * i);
|
||||
rdcspv::ResultDataBase *hit = (rdcspv::ResultDataBase *)(base + structSize * i);
|
||||
|
||||
if(hit->valid != validMagicNumber)
|
||||
{
|
||||
@@ -6253,9 +6196,9 @@ ShaderDebugTrace *VulkanReplay::DebugPixel(uint32_t eventId, uint32_t x, uint32_
|
||||
rdcspv::Debugger *debugger = new rdcspv::Debugger;
|
||||
debugger->Parse(shader.spirv.GetSPIRV());
|
||||
|
||||
// the per-thread data immediately follows the ResultDataBase header. Every piece of data is
|
||||
// uniformly aligned, either 16-byte by default or 32-byte if larger components exist. The
|
||||
// output is in input signature order.
|
||||
// the per-thread data immediately follows the rdcspv::ResultDataBase header. Every piece of
|
||||
// data is uniformly aligned, either 16-byte by default or 32-byte if larger components exist.
|
||||
// The output is in input signature order.
|
||||
byte *LaneData = (byte *)(winner + 1);
|
||||
|
||||
numThreads = 4;
|
||||
@@ -6276,17 +6219,17 @@ ShaderDebugTrace *VulkanReplay::DebugPixel(uint32_t eventId, uint32_t x, uint32_
|
||||
|
||||
if(shadRefl.patchData.threadScope & rdcspv::ThreadScope::Subgroup)
|
||||
{
|
||||
SubgroupLaneData *subgroupData = (SubgroupLaneData *)value;
|
||||
rdcspv::SubgroupLaneData *subgroupData = (rdcspv::SubgroupLaneData *)value;
|
||||
apiWrapper->thread_props[t][(size_t)rdcspv::ThreadProperty::Active] = subgroupData->isActive;
|
||||
apiWrapper->thread_props[t][(size_t)rdcspv::ThreadProperty::Elected] = subgroupData->elect;
|
||||
apiWrapper->thread_props[t][(size_t)rdcspv::ThreadProperty::SubgroupId] = t;
|
||||
|
||||
value += sizeof(SubgroupLaneData);
|
||||
value += sizeof(rdcspv::SubgroupLaneData);
|
||||
}
|
||||
|
||||
// read PixelLaneData
|
||||
{
|
||||
PixelLaneData *pixelData = (PixelLaneData *)value;
|
||||
rdcspv::PixelLaneData *pixelData = (rdcspv::PixelLaneData *)value;
|
||||
|
||||
{
|
||||
ShaderVariable &var = apiWrapper->thread_builtins[t][ShaderBuiltin::Position];
|
||||
@@ -6315,7 +6258,7 @@ ShaderDebugTrace *VulkanReplay::DebugPixel(uint32_t eventId, uint32_t x, uint32_
|
||||
apiWrapper->thread_props[t][(size_t)rdcspv::ThreadProperty::QuadLane] =
|
||||
pixelData->quadLaneIndex;
|
||||
}
|
||||
value += sizeof(PixelLaneData);
|
||||
value += sizeof(rdcspv::PixelLaneData);
|
||||
|
||||
for(size_t i = 0; i < shadRefl.refl->inputSignature.size(); i++)
|
||||
{
|
||||
@@ -6568,8 +6511,8 @@ ShaderDebugTrace *VulkanReplay::DebugComputeCommon(ShaderStage stage, uint32_t e
|
||||
|
||||
uint32_t maxHits = 4; // we should only ever get one hit
|
||||
|
||||
// struct size is ResultDataBase header plus Nx structStride for the number of threads
|
||||
uint32_t structSize = sizeof(ResultDataBase) + structStride * maxSubgroupSize;
|
||||
// struct size is rdcspv::ResultDataBase header plus Nx structStride for the number of threads
|
||||
uint32_t structSize = sizeof(rdcspv::ResultDataBase) + structStride * maxSubgroupSize;
|
||||
|
||||
VkDeviceSize feedbackStorageSize = maxHits * structSize + 1024;
|
||||
|
||||
@@ -6681,7 +6624,7 @@ ShaderDebugTrace *VulkanReplay::DebugComputeCommon(ShaderStage stage, uint32_t e
|
||||
|
||||
base += sizeof(Vec4f);
|
||||
|
||||
ResultDataBase *winner = (ResultDataBase *)base;
|
||||
rdcspv::ResultDataBase *winner = (rdcspv::ResultDataBase *)base;
|
||||
|
||||
if(winner->valid != validMagicNumber)
|
||||
{
|
||||
@@ -6698,9 +6641,9 @@ ShaderDebugTrace *VulkanReplay::DebugComputeCommon(ShaderStage stage, uint32_t e
|
||||
rdcspv::Debugger *debugger = new rdcspv::Debugger;
|
||||
debugger->Parse(shader.spirv.GetSPIRV());
|
||||
|
||||
// the per-thread data immediately follows the ResultDataBase header. Every piece of data is
|
||||
// uniformly aligned, either 16-byte by default or 32-byte if larger components exist. The
|
||||
// output is in input signature order.
|
||||
// the per-thread data immediately follows the rdcspv::ResultDataBase header. Every piece of
|
||||
// data is uniformly aligned, either 16-byte by default or 32-byte if larger components exist.
|
||||
// The output is in input signature order.
|
||||
byte *LaneData = (byte *)(winner + 1);
|
||||
|
||||
const uint32_t subgroupSize = winner->subgroupSize;
|
||||
@@ -6727,11 +6670,11 @@ ShaderDebugTrace *VulkanReplay::DebugComputeCommon(ShaderStage stage, uint32_t e
|
||||
{
|
||||
byte *value = LaneData + t * structStride;
|
||||
|
||||
SubgroupLaneData *subgroupData = (SubgroupLaneData *)value;
|
||||
value += sizeof(SubgroupLaneData);
|
||||
rdcspv::SubgroupLaneData *subgroupData = (rdcspv::SubgroupLaneData *)value;
|
||||
value += sizeof(rdcspv::SubgroupLaneData);
|
||||
|
||||
ComputeLaneData *compData = (ComputeLaneData *)value;
|
||||
value += sizeof(ComputeLaneData);
|
||||
rdcspv::ComputeLaneData *compData = (rdcspv::ComputeLaneData *)value;
|
||||
value += sizeof(rdcspv::ComputeLaneData);
|
||||
|
||||
uint32_t lane = t;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user