Handle D3D12 mesh shaders that don't read from an existing payload

* In this case we can't find a getMeshPayload call even though we're expecting
  it, so we can't find the payload type either.
This commit is contained in:
baldurk
2024-01-11 14:17:24 +00:00
parent d0166c0984
commit df686bfce0
2 changed files with 60 additions and 18 deletions
+51 -15
View File
@@ -1098,8 +1098,9 @@ static void ConvertToFixedDXILAmpFeeder(const DXBC::DXBCContainer *dxbc, uint32_
editor.AddInstruction(f, editor.CreateInstruction(Operation::Ret, voidType, {}));
}
static void AddDXILMeshShaderOutputStores(const DXBC::DXBCContainer *dxbc, uint32_t space,
bool readAmpOffset, rdcfixedarray<uint32_t, 3> dispatchDim,
static void AddDXILMeshShaderOutputStores(uint32_t ampPayloadSize, const DXBC::DXBCContainer *dxbc,
uint32_t space, bool readAmpOffset,
rdcfixedarray<uint32_t, 3> dispatchDim,
OutDXILMeshletLayout &layout, bytebuf &editedBlob)
{
using namespace DXIL;
@@ -1306,6 +1307,15 @@ static void AddDXILMeshShaderOutputStores(const DXBC::DXBCContainer *dxbc, uint3
uint32_t payloadSize = cast<Constant>(meshData->children[4]->value)->getU32();
// DXIL payload can't be empty, so if the previous size was non-zero we had one previously
hadPayload = payloadSize != 0;
// if the amplification shader declares a payload, but mesh shader doesn't, we need to be sure
// we match them in size for validation
if(!hadPayload && ampPayloadSize != 0)
payloadSize = ampPayloadSize;
// if the mesh shader did have a payload, these sizes should match!
RDCASSERTEQUAL(payloadSize, ampPayloadSize);
payloadSize += 16;
meshData->children[4] = editor.CreateConstantMetadata(payloadSize);
editor.SetMSPayloadSize(payloadSize);
@@ -1332,23 +1342,49 @@ static void AddDXILMeshShaderOutputStores(const DXBC::DXBCContainer *dxbc, uint3
Type *payloadType = NULL;
if(hadPayload)
{
// if we had a payload, seek the dx.op.getMeshPayload to find its type
for(size_t i = 0; i < f->instructions.size(); i++)
if(getMeshPayload)
{
const Instruction &inst = *f->instructions[i];
if(inst.op == Operation::Call && inst.getFuncCall()->name == getMeshPayload->name)
// if we had a payload and it was loaded, seek the dx.op.getMeshPayload to find its type
for(size_t i = 0; i < f->instructions.size(); i++)
{
payloadType = (Type *)inst.type;
const Instruction &inst = *f->instructions[i];
RDCASSERT(payloadType->type == Type::Pointer);
payloadType = (Type *)payloadType->inner;
if(inst.op == Operation::Call && inst.getFuncCall()->name == getMeshPayload->name)
{
payloadType = (Type *)inst.type;
payloadType->members.append({i32, i32, i32, i32});
RDCASSERT(payloadType->type == Type::Pointer);
payloadType = (Type *)payloadType->inner;
break;
payloadType->members.append({i32, i32, i32, i32});
break;
}
}
}
else
{
// if we had a payload declared but it wasn't ever fetched, there will be no function or type.
// We create a synthetic type of the right size then patch it
rdcarray<const Type *> members;
for(uint32_t i = 0; i < ampPayloadSize / sizeof(uint32_t); i++)
members.push_back(i32);
// unclear if HLSL allows non-4byte aligned types
RDCASSERT((ampPayloadSize % sizeof(uint32_t)) == 0);
members.append({i32, i32, i32, i32});
// no payload before. We get to make up our own!
payloadType = editor.CreateNamedStructType("struct.payload_t", members);
const Type *payloadPtrType =
editor.CreatePointerType(payloadType, Type::PointerAddrSpace::Default);
getMeshPayload = editor.DeclareFunction("dx.op.getMeshPayload.struct.payload_t", payloadPtrType,
{i32}, Attribute::NoUnwind | Attribute::ReadOnly);
}
}
else if(readAmpOffset)
{
@@ -1359,7 +1395,7 @@ static void AddDXILMeshShaderOutputStores(const DXBC::DXBCContainer *dxbc, uint3
editor.CreatePointerType(payloadType, Type::PointerAddrSpace::Default);
getMeshPayload = editor.DeclareFunction("dx.op.getMeshPayload.struct.payload_t", payloadPtrType,
{}, Attribute::NoUnwind | Attribute::ReadOnly);
{i32}, Attribute::NoUnwind | Attribute::ReadOnly);
}
if(readAmpOffset)
@@ -2370,8 +2406,8 @@ void D3D12Replay::InitPostMSBuffers(uint32_t eventId)
bytebuf meshOutputDXIL;
AddDXILMeshShaderOutputStores(pipe->MS()->GetDXBC(), space, ampBuffer != NULL, dispatchSize,
layout, meshOutputDXIL);
AddDXILMeshShaderOutputStores(payloadSize, pipe->MS()->GetDXBC(), space, ampBuffer != NULL,
dispatchSize, layout, meshOutputDXIL);
{
// strip the root signature, we shouldn't need it and it may no longer match and fail validation
@@ -1764,10 +1764,16 @@ void ProgramEditor::RegisterUAV(DXILResourceType type, uint32_t space, uint32_t
}
else
{
// from definitions in dxc
const uint32_t headerSizeVer0 = 6 * sizeof(uint32_t);
const uint32_t headerSizeVer1 = headerSizeVer0 + sizeof(uint16_t) + 10 * sizeof(uint8_t);
const uint32_t headerSizeVer2 = headerSizeVer1 + 3 * sizeof(uint32_t);
// If there is no resource in the chunk we also need to insert the size of a resource bind
*numResources = 1;
size_t insertOffset = cur - begin;
uint32_t resourceBindSize = sizeof(ResourceBind1);
uint32_t resourceBindSize =
*headerSize == headerSizeVer2 ? sizeof(ResourceBind1) : sizeof(ResourceBind0);
psv0blob.insert(insertOffset, (byte *)&resourceBindSize, sizeof(resourceBindSize));
psv0blob.insert(insertOffset + sizeof(resourceBindSize), (byte *)&bind, resourceBindSize);
}
@@ -1807,8 +1813,8 @@ void ProgramEditor::SetNumThreads(uint32_t dim[3])
// from definitions in dxc
const uint32_t headerSizeVer0 = 6 * sizeof(uint32_t);
const uint32_t headerSizeVer1 = sizeof(uint16_t) + 10 * sizeof(uint8_t);
const uint32_t headerSizeVer2 = 3 * sizeof(uint32_t);
const uint32_t headerSizeVer1 = headerSizeVer0 + sizeof(uint16_t) + 10 * sizeof(uint8_t);
const uint32_t headerSizeVer2 = headerSizeVer1 + 3 * sizeof(uint32_t);
if(*headerSize >= headerSizeVer2)
{