From 9efe0f5a831be1b60c2c70204a23aad00155576a Mon Sep 17 00:00:00 2001 From: baldurk Date: Mon, 20 Sep 2021 18:08:31 +0100 Subject: [PATCH] Begin implementing DXIL shader patching for bindless feedback * We don't annotate any bindless uses yet, this only declares the UAV and attempts to write into the first slot. --- .../driver/d3d12/d3d12_shader_feedback.cpp | 388 +++++++++++++++++- 1 file changed, 378 insertions(+), 10 deletions(-) diff --git a/renderdoc/driver/d3d12/d3d12_shader_feedback.cpp b/renderdoc/driver/d3d12/d3d12_shader_feedback.cpp index 42ed2dba1..57298af70 100644 --- a/renderdoc/driver/d3d12/d3d12_shader_feedback.cpp +++ b/renderdoc/driver/d3d12/d3d12_shader_feedback.cpp @@ -24,6 +24,8 @@ #include "core/settings.h" #include "driver/shaders/dxbc/dxbc_bytecode_editor.h" +#include "driver/shaders/dxil/dxil_bytecode_editor.h" +#include "driver/shaders/dxil/dxil_common.h" #include "d3d12_command_queue.h" #include "d3d12_debug.h" #include "d3d12_device.h" @@ -67,9 +69,9 @@ private: uint32_t used : 1; }; -static bool AnnotateShader(const DXBC::DXBCContainer *dxbc, uint32_t space, - const std::map &slots, - bytebuf &editedBlob) +static bool AnnotateDXBCShader(const DXBC::DXBCContainer *dxbc, uint32_t space, + const std::map &slots, + bytebuf &editedBlob) { using namespace DXBCBytecode; using namespace DXBCBytecode::Edit; @@ -180,6 +182,350 @@ static bool AnnotateShader(const DXBC::DXBCContainer *dxbc, uint32_t space, return false; } +static bool AnnotateDXILShader(const DXBC::DXBCContainer *dxbc, uint32_t space, + const std::map &slots, + bytebuf &editedBlob) +{ + DXIL::ProgramEditor editor(dxbc, slots.size(), editedBlob); + + const DXIL::Type *i32 = editor.GetInt32Type(); + const DXIL::Type *i8 = editor.GetInt8Type(); + const DXIL::Type *i1 = editor.GetBoolType(); + + const DXIL::Type *handleType = editor.GetTypeByName("dx.types.Handle"); + const DXIL::Function *createHandle = editor.GetFunctionByName("dx.op.createHandle"); + + // if we don't have the handle type then this shader can't use any dynamically! we have no + // feedback to get + if(!handleType || !createHandle) + return false; + + // get the atomic function we'll need + const DXIL::Function *atomicBinOp = editor.GetFunctionByName("dx.op.atomicBinOp.i32"); + if(!atomicBinOp) + { + DXIL::Type atomicType; + atomicType.type = DXIL::Type::Function; + // return type + atomicType.inner = i32; + atomicType.members = {i32, handleType, i32, i32, i32, i32, i32}; + + const DXIL::Type *funcType = editor.AddType(atomicType); + + DXIL::Type funcPtrType; + funcPtrType.type = DXIL::Type::Pointer; + funcPtrType.inner = funcType; + + DXIL::Function atomicFunc; + atomicFunc.name = "dx.op.atomicBinOp.i32"; + atomicFunc.funcType = editor.AddType(funcPtrType); + atomicFunc.external = true; + + for(const DXIL::AttributeSet &attrs : editor.GetAttributeSets()) + { + if(attrs.functionSlot && attrs.functionSlot->params == DXIL::Attribute::NoUnwind) + { + atomicFunc.attrs = &attrs; + break; + } + } + + if(!atomicFunc.attrs) + RDCWARN("Couldn't find existing nounwind attr set"); + // should we add a set here? or assume the attrs don't matter because this is a builtin function + // anyway? + + atomicBinOp = editor.DeclareFunction(atomicFunc); + } + + // when we add metadata we do it in reverse order since it's unclear if we're supposed to have + // forward references (LLVM seems to handle it, but not emit it, so it's very much a grey area) + // we do this by recreating any nodes that we modify (or their parents recursively up to the named + // metadata) + + // declare the resource, this happens purely in metadata but we need to store the slot + uint32_t regSlot = 0; + DXIL::Metadata *reslist = NULL; + { + const DXIL::Type *rw = editor.GetTypeByName("struct.RWByteAddressBuffer"); + + // declare the type if not present + if(!rw) + { + DXIL::Type rwType; + rwType.name = "struct.RWByteAddressBuffer"; + rwType.type = DXIL::Type::Struct; + rwType.members = {i32}; + + rw = editor.AddType(rwType); + } + + const DXIL::Type *rwptr = editor.GetPointerType(rw, DXIL::Type::PointerAddrSpace::Default); + + if(!rwptr) + { + DXIL::Type rwPtrType; + rwPtrType.type = DXIL::Type::Pointer; + rwPtrType.addrSpace = DXIL::Type::PointerAddrSpace::Default; + rwPtrType.inner = rw; + + rwptr = editor.AddType(rwPtrType); + } + + DXIL::Metadata *resources = editor.GetMetadataByName("dx.resources"); + + // if there are no resources declared we can't have any dynamic indexing + if(!resources) + return false; + + reslist = resources->children[0]; + + DXIL::Metadata *uavs = reslist->children[1]; + + for(size_t i = 0; uavs && i < uavs->children.size(); i++) + { + // each UAV child should have a fixed format, [0] is the reg ID and I think this should always + // be == the index + const DXIL::Metadata *uav = uavs->children[i]; + const DXIL::Metadata *slot = uav->children[0]; + + if(slot->value.type != DXIL::ValueType::Constant) + { + RDCWARN("Unexpected non-constant slot ID in UAV"); + continue; + } + + RDCASSERT(slot->value.constant->val.u32v[0] == i); + + regSlot = RDCMAX(slot->value.constant->val.u32v[0] + 1, regSlot); + } + + DXIL::Metadata *uavRecord[11] = { + editor.AddMetadata(DXIL::Metadata()), + editor.AddMetadata(DXIL::Metadata()), + editor.AddMetadata(DXIL::Metadata()), + editor.AddMetadata(DXIL::Metadata()), + editor.AddMetadata(DXIL::Metadata()), + editor.AddMetadata(DXIL::Metadata()), + editor.AddMetadata(DXIL::Metadata()), + editor.AddMetadata(DXIL::Metadata()), + editor.AddMetadata(DXIL::Metadata()), + editor.AddMetadata(DXIL::Metadata()), + NULL, + }; + +#define SET_CONST_META(m, t, v) \ + m->isConstant = true; \ + m->type = t; \ + m->value = DXIL::Value(editor.GetOrAddConstant(DXIL::Constant(t, v))); + + // linear reg slot/ID + SET_CONST_META(uavRecord[0], i32, regSlot); + // variable + { + DXIL::Constant c; + c.type = rwptr; + c.undef = true; + uavRecord[1]->isConstant = true; + uavRecord[1]->type = rwptr; + uavRecord[1]->value = DXIL::Value(editor.GetOrAddConstant(c)); + } + // name, empty + uavRecord[2]->isString = true; + // reg space + SET_CONST_META(uavRecord[3], i32, space); + // reg base + SET_CONST_META(uavRecord[4], i32, 0U); + // reg count + SET_CONST_META(uavRecord[5], i32, 1U); + // shape + SET_CONST_META(uavRecord[6], i32, uint32_t(DXIL::ResourceKind::RawBuffer)); + // globally coherent + SET_CONST_META(uavRecord[7], i1, 0U); + // hidden counter + SET_CONST_META(uavRecord[8], i1, 0U); + // raster order + SET_CONST_META(uavRecord[9], i1, 0U); + // UAV tags + uavRecord[10] = NULL; + + // create the new UAV record + DXIL::Metadata *feedbackuav = editor.AddMetadata(DXIL::Metadata()); + feedbackuav->children.assign(uavRecord, ARRAY_COUNT(uavRecord)); + + // now push our record onto a new UAV list (either copied from the existing, or created fresh if + // there isn't an existing one). This ensures it has all backwards references + if(uavs) + uavs = editor.AddMetadata(*uavs); + else + uavs = editor.AddMetadata(DXIL::Metadata()); + + uavs->children.push_back(feedbackuav); + + // now recreate the reslist, and repoint the uavs + reslist = editor.AddMetadata(*reslist); + reslist->children[1] = uavs; + + // finally repoint the named metadata + resources->children[0] = reslist; + } + + rdcstr entryName; + // add the entry point tags + { + DXIL::Metadata *entryPoints = editor.GetMetadataByName("dx.entryPoints"); + + if(!entryPoints) + { + RDCERR("Couldn't find entry point list"); + return false; + } + + // TODO select the entry point for multiple entry points? RT only for now + DXIL::Metadata *entry = entryPoints->children[0]; + + entryName = entry->children[1]->str; + + DXIL::Metadata *taglist = entry->children[4]; + + // find existing shader flags tag, if there is one + DXIL::Metadata *shaderFlagsTag = NULL; + DXIL::Metadata *shaderFlagsData = NULL; + size_t existingTag = 0; + for(size_t t = 0; taglist && t < taglist->children.size(); t += 2) + { + RDCASSERT(taglist->children[t]->isConstant); + if(taglist->children[t]->value.constant->val.u32v[0] == + (uint32_t)DXIL::ShaderEntryTag::ShaderFlags) + { + shaderFlagsTag = taglist->children[t]; + shaderFlagsData = taglist->children[t + 1]; + existingTag = t + 1; + break; + } + } + + uint32_t shaderFlagsValue = shaderFlagsData ? shaderFlagsData->value.constant->val.u32v[0] : 0U; + + // raw and structured buffers + shaderFlagsValue |= 0x10; + + if(editor.GetShaderType() != DXBC::ShaderType::Compute && + editor.GetShaderType() != DXBC::ShaderType::Pixel) + { + // UAVs on non-PS/CS stages + shaderFlagsValue |= 0x10000; + } + + // (re-)create shader flags tag + shaderFlagsData = editor.AddMetadata(DXIL::Metadata()); + SET_CONST_META(shaderFlagsData, i32, shaderFlagsValue); + + // if we didn't have a shader tags entry at all, create the metadata node for the shader flags + // tag + if(!shaderFlagsTag) + { + shaderFlagsTag = editor.AddMetadata(DXIL::Metadata()); + SET_CONST_META(shaderFlagsTag, i32, (uint32_t)DXIL::ShaderEntryTag::ShaderFlags); + } + + // (re-)create tag list + if(taglist) + taglist = editor.AddMetadata(*taglist); + else + taglist = editor.AddMetadata(DXIL::Metadata()); + + // if we had a tag already, we can just re-use that tag node and replace the data node. + // Otherwise we need to add both, and we insert them first + if(existingTag) + { + taglist->children[existingTag] = shaderFlagsData; + } + else + { + taglist->children.insert(0, shaderFlagsTag); + taglist->children.insert(1, shaderFlagsData); + } + + // recreate the entry + entry = editor.AddMetadata(*entry); + + // repoint taglist and reslist + entry->children[3] = reslist; + entry->children[4] = taglist; + + // finally repoint the named metadata + entryPoints->children[0] = entry; + } + + DXIL::Function *f = editor.GetFunctionByName(entryName); + + if(!f) + { + RDCERR("Couldn't find entry point function '%s'", entryName.c_str()); + return false; + } + + // create our handle first thing + DXIL::Instruction *handle; + { + DXIL::Instruction inst; + inst.op = DXIL::Operation::Call; + inst.type = handleType; + inst.funcCall = createHandle; + inst.args = { + // dx.op.createHandle opcode + DXIL::Value(editor.GetOrAddConstant(f, DXIL::Constant(i32, 57U))), + // kind = UAV + DXIL::Value(editor.GetOrAddConstant(f, DXIL::Constant(i8, 1U))), + // ID/slot + DXIL::Value(editor.GetOrAddConstant(f, DXIL::Constant(i32, regSlot))), + // array index + DXIL::Value(editor.GetOrAddConstant(f, DXIL::Constant(i32, 0U))), + // non-uniform + DXIL::Value(editor.GetOrAddConstant(f, DXIL::Constant(i1, 0U))), + }; + + handle = editor.AddInstruction(f, 0, inst); + } + + const DXIL::Constant *undefi32; + { + DXIL::Constant c; + c.type = i32; + c.undef = true; + undefi32 = editor.GetOrAddConstant(f, c); + } + + // insert an or to offset 0, just to indicate validity + { + DXIL::Instruction inst; + inst.op = DXIL::Operation::Call; + inst.type = i32; + inst.funcCall = atomicBinOp; + inst.args = { + // dx.op.atomicBinOp.i32 opcode + DXIL::Value(editor.GetOrAddConstant(f, DXIL::Constant(i32, 78U))), + // feedback UAV handle + DXIL::Value(handle), + // operation OR + DXIL::Value(editor.GetOrAddConstant(f, DXIL::Constant(i32, 2U))), + // offset + DXIL::Value(editor.GetOrAddConstant(f, DXIL::Constant(i32, 0U))), + // offset 2 + DXIL::Value(undefi32), + // offset 3 + DXIL::Value(undefi32), + // value + DXIL::Value(editor.GetOrAddConstant(f, DXIL::Constant(i32, ~0U))), + }; + + editor.AddInstruction(f, 1, inst); + } + + return true; +} + static void AddArraySlots(WrappedID3D12PipelineState::ShaderEntry *shad, uint32_t space, uint32_t maxDescriptors, std::map &slots, uint32_t &numSlots, @@ -188,12 +534,6 @@ static void AddArraySlots(WrappedID3D12PipelineState::ShaderEntry *shad, uint32_ if(!shad) return; - if(shad->GetDXBC()->m_Version.Major >= 6) - { - RDCWARN("DXIL shaders are not supported for bindless feedback currently"); - return; - } - ShaderReflection &refl = shad->GetDetails(); const ShaderBindpointMapping &mapping = shad->GetMapping(); @@ -254,7 +594,7 @@ static void AddArraySlots(WrappedID3D12PipelineState::ShaderEntry *shad, uint32_ // only SM5.1 can have dynamic array indexing if(shad->GetDXBC()->m_Version.Major == 5 && shad->GetDXBC()->m_Version.Minor == 1) { - if(AnnotateShader(shad->GetDXBC(), space, slots, editedBlob)) + if(AnnotateDXBCShader(shad->GetDXBC(), space, slots, editedBlob)) { if(!D3D12_Debug_FeedbackDumpDirPath().empty()) FileIO::WriteAll(D3D12_Debug_FeedbackDumpDirPath() + "/before_dxbc_" + @@ -266,6 +606,34 @@ static void AddArraySlots(WrappedID3D12PipelineState::ShaderEntry *shad, uint32_ ToStr(shad->GetDetails().stage).c_str() + ".dxbc", editedBlob); + desc.pShaderBytecode = editedBlob.data(); + desc.BytecodeLength = editedBlob.size(); + } + } + else if(shad->GetDXBC()->m_Version.Major >= 6) + { + if(AnnotateDXILShader(shad->GetDXBC(), space, slots, editedBlob)) + { + DXBC::DXBCContainer::StripDXILDebugInfo(editedBlob); + + if(!D3D12_Debug_FeedbackDumpDirPath().empty()) + { + bytebuf orig = shad->GetDXBC()->GetShaderBlob(); + + DXBC::DXBCContainer::StripDXILDebugInfo(orig); + + FileIO::WriteAll(D3D12_Debug_FeedbackDumpDirPath() + "/before_dxil_" + + ToStr(shad->GetDetails().stage).c_str() + ".dxbc", + orig); + } + + if(!D3D12_Debug_FeedbackDumpDirPath().empty()) + { + FileIO::WriteAll(D3D12_Debug_FeedbackDumpDirPath() + "/after_dxil_" + + ToStr(shad->GetDetails().stage).c_str() + ".dxbc", + editedBlob); + } + desc.pShaderBytecode = editedBlob.data(); desc.BytecodeLength = editedBlob.size(); }