From df686bfce0ca6af43fb481d34a062a4ec39af69c Mon Sep 17 00:00:00 2001 From: baldurk Date: Thu, 11 Jan 2024 14:17:24 +0000 Subject: [PATCH] Handle D3D12 mesh shaders that don't read from an existing payload * In this case we can't find a getMeshPayload call even though we're expecting it, so we can't find the payload type either. --- renderdoc/driver/d3d12/d3d12_postvs.cpp | 66 ++++++++++++++----- .../shaders/dxil/dxil_bytecode_editor.cpp | 12 +++- 2 files changed, 60 insertions(+), 18 deletions(-) diff --git a/renderdoc/driver/d3d12/d3d12_postvs.cpp b/renderdoc/driver/d3d12/d3d12_postvs.cpp index 3534329fe..ae07f24c8 100644 --- a/renderdoc/driver/d3d12/d3d12_postvs.cpp +++ b/renderdoc/driver/d3d12/d3d12_postvs.cpp @@ -1098,8 +1098,9 @@ static void ConvertToFixedDXILAmpFeeder(const DXBC::DXBCContainer *dxbc, uint32_ editor.AddInstruction(f, editor.CreateInstruction(Operation::Ret, voidType, {})); } -static void AddDXILMeshShaderOutputStores(const DXBC::DXBCContainer *dxbc, uint32_t space, - bool readAmpOffset, rdcfixedarray dispatchDim, +static void AddDXILMeshShaderOutputStores(uint32_t ampPayloadSize, const DXBC::DXBCContainer *dxbc, + uint32_t space, bool readAmpOffset, + rdcfixedarray dispatchDim, OutDXILMeshletLayout &layout, bytebuf &editedBlob) { using namespace DXIL; @@ -1306,6 +1307,15 @@ static void AddDXILMeshShaderOutputStores(const DXBC::DXBCContainer *dxbc, uint3 uint32_t payloadSize = cast(meshData->children[4]->value)->getU32(); // DXIL payload can't be empty, so if the previous size was non-zero we had one previously hadPayload = payloadSize != 0; + + // if the amplification shader declares a payload, but mesh shader doesn't, we need to be sure + // we match them in size for validation + if(!hadPayload && ampPayloadSize != 0) + payloadSize = ampPayloadSize; + + // if the mesh shader did have a payload, these sizes should match! + RDCASSERTEQUAL(payloadSize, ampPayloadSize); + payloadSize += 16; meshData->children[4] = editor.CreateConstantMetadata(payloadSize); editor.SetMSPayloadSize(payloadSize); @@ -1332,23 +1342,49 @@ static void AddDXILMeshShaderOutputStores(const DXBC::DXBCContainer *dxbc, uint3 Type *payloadType = NULL; if(hadPayload) { - // if we had a payload, seek the dx.op.getMeshPayload to find its type - for(size_t i = 0; i < f->instructions.size(); i++) + if(getMeshPayload) { - const Instruction &inst = *f->instructions[i]; - - if(inst.op == Operation::Call && inst.getFuncCall()->name == getMeshPayload->name) + // if we had a payload and it was loaded, seek the dx.op.getMeshPayload to find its type + for(size_t i = 0; i < f->instructions.size(); i++) { - payloadType = (Type *)inst.type; + const Instruction &inst = *f->instructions[i]; - RDCASSERT(payloadType->type == Type::Pointer); - payloadType = (Type *)payloadType->inner; + if(inst.op == Operation::Call && inst.getFuncCall()->name == getMeshPayload->name) + { + payloadType = (Type *)inst.type; - payloadType->members.append({i32, i32, i32, i32}); + RDCASSERT(payloadType->type == Type::Pointer); + payloadType = (Type *)payloadType->inner; - break; + payloadType->members.append({i32, i32, i32, i32}); + + break; + } } } + else + { + // if we had a payload declared but it wasn't ever fetched, there will be no function or type. + // We create a synthetic type of the right size then patch it + + rdcarray members; + for(uint32_t i = 0; i < ampPayloadSize / sizeof(uint32_t); i++) + members.push_back(i32); + + // unclear if HLSL allows non-4byte aligned types + RDCASSERT((ampPayloadSize % sizeof(uint32_t)) == 0); + + members.append({i32, i32, i32, i32}); + + // no payload before. We get to make up our own! + payloadType = editor.CreateNamedStructType("struct.payload_t", members); + + const Type *payloadPtrType = + editor.CreatePointerType(payloadType, Type::PointerAddrSpace::Default); + + getMeshPayload = editor.DeclareFunction("dx.op.getMeshPayload.struct.payload_t", payloadPtrType, + {i32}, Attribute::NoUnwind | Attribute::ReadOnly); + } } else if(readAmpOffset) { @@ -1359,7 +1395,7 @@ static void AddDXILMeshShaderOutputStores(const DXBC::DXBCContainer *dxbc, uint3 editor.CreatePointerType(payloadType, Type::PointerAddrSpace::Default); getMeshPayload = editor.DeclareFunction("dx.op.getMeshPayload.struct.payload_t", payloadPtrType, - {}, Attribute::NoUnwind | Attribute::ReadOnly); + {i32}, Attribute::NoUnwind | Attribute::ReadOnly); } if(readAmpOffset) @@ -2370,8 +2406,8 @@ void D3D12Replay::InitPostMSBuffers(uint32_t eventId) bytebuf meshOutputDXIL; - AddDXILMeshShaderOutputStores(pipe->MS()->GetDXBC(), space, ampBuffer != NULL, dispatchSize, - layout, meshOutputDXIL); + AddDXILMeshShaderOutputStores(payloadSize, pipe->MS()->GetDXBC(), space, ampBuffer != NULL, + dispatchSize, layout, meshOutputDXIL); { // strip the root signature, we shouldn't need it and it may no longer match and fail validation diff --git a/renderdoc/driver/shaders/dxil/dxil_bytecode_editor.cpp b/renderdoc/driver/shaders/dxil/dxil_bytecode_editor.cpp index 3f75f7e59..87bcd7edb 100644 --- a/renderdoc/driver/shaders/dxil/dxil_bytecode_editor.cpp +++ b/renderdoc/driver/shaders/dxil/dxil_bytecode_editor.cpp @@ -1764,10 +1764,16 @@ void ProgramEditor::RegisterUAV(DXILResourceType type, uint32_t space, uint32_t } else { + // from definitions in dxc + const uint32_t headerSizeVer0 = 6 * sizeof(uint32_t); + const uint32_t headerSizeVer1 = headerSizeVer0 + sizeof(uint16_t) + 10 * sizeof(uint8_t); + const uint32_t headerSizeVer2 = headerSizeVer1 + 3 * sizeof(uint32_t); + // If there is no resource in the chunk we also need to insert the size of a resource bind *numResources = 1; size_t insertOffset = cur - begin; - uint32_t resourceBindSize = sizeof(ResourceBind1); + uint32_t resourceBindSize = + *headerSize == headerSizeVer2 ? sizeof(ResourceBind1) : sizeof(ResourceBind0); psv0blob.insert(insertOffset, (byte *)&resourceBindSize, sizeof(resourceBindSize)); psv0blob.insert(insertOffset + sizeof(resourceBindSize), (byte *)&bind, resourceBindSize); } @@ -1807,8 +1813,8 @@ void ProgramEditor::SetNumThreads(uint32_t dim[3]) // from definitions in dxc const uint32_t headerSizeVer0 = 6 * sizeof(uint32_t); - const uint32_t headerSizeVer1 = sizeof(uint16_t) + 10 * sizeof(uint8_t); - const uint32_t headerSizeVer2 = 3 * sizeof(uint32_t); + const uint32_t headerSizeVer1 = headerSizeVer0 + sizeof(uint16_t) + 10 * sizeof(uint8_t); + const uint32_t headerSizeVer2 = headerSizeVer1 + 3 * sizeof(uint32_t); if(*headerSize >= headerSizeVer2) {