From 05b59b8ef3bc88d45f1609036b761c586403b60c Mon Sep 17 00:00:00 2001 From: baldurk Date: Fri, 9 Oct 2020 15:44:28 +0100 Subject: [PATCH] Post-process DXBC to remove vendor extension UAV and format instructions --- renderdoc/driver/d3d11/d3d11_resources.h | 3 +- renderdoc/driver/d3d12/d3d12_resources.h | 3 +- renderdoc/driver/ihv/nv/nvapi_wrapper.cpp | 2 +- renderdoc/driver/ihv/nv/nvapi_wrapper.h | 32 +- .../driver/shaders/dxbc/dxbc_bytecode.cpp | 27 + renderdoc/driver/shaders/dxbc/dxbc_bytecode.h | 156 +- .../driver/shaders/dxbc/dxbc_container.cpp | 34 +- .../driver/shaders/dxbc/dxbc_container.h | 6 +- .../driver/shaders/dxbc/dxbc_disassemble.cpp | 1725 ++++++++++++++++- .../driver/shaders/dxbc/dxbc_reflect.cpp | 5 +- 10 files changed, 1972 insertions(+), 21 deletions(-) diff --git a/renderdoc/driver/d3d11/d3d11_resources.h b/renderdoc/driver/d3d11/d3d11_resources.h index 51937c94e..a232b088a 100644 --- a/renderdoc/driver/d3d11/d3d11_resources.h +++ b/renderdoc/driver/d3d11/d3d11_resources.h @@ -994,7 +994,8 @@ public: { if(m_DXBCFile == NULL && !m_Bytecode.empty()) { - m_DXBCFile = new DXBC::DXBCContainer(m_Bytecode, m_DebugInfoPath, m_ShaderExtSlot, ~0U); + m_DXBCFile = new DXBC::DXBCContainer(m_Bytecode, m_DebugInfoPath, GraphicsAPI::D3D11, + m_ShaderExtSlot, ~0U); m_Bytecode.clear(); } return m_DXBCFile; diff --git a/renderdoc/driver/d3d12/d3d12_resources.h b/renderdoc/driver/d3d12/d3d12_resources.h index a6f400e27..8f8376d86 100644 --- a/renderdoc/driver/d3d12/d3d12_resources.h +++ b/renderdoc/driver/d3d12/d3d12_resources.h @@ -762,7 +762,8 @@ public: { if(m_DXBCFile == NULL && !m_Bytecode.empty()) { - m_DXBCFile = new DXBC::DXBCContainer(m_Bytecode, rdcstr(), m_ShaderExtSlot, m_ShaderExtSpace); + m_DXBCFile = new DXBC::DXBCContainer(m_Bytecode, rdcstr(), GraphicsAPI::D3D12, + m_ShaderExtSlot, m_ShaderExtSpace); } return m_DXBCFile; } diff --git a/renderdoc/driver/ihv/nv/nvapi_wrapper.cpp b/renderdoc/driver/ihv/nv/nvapi_wrapper.cpp index ce2e70e21..da40e1de7 100644 --- a/renderdoc/driver/ihv/nv/nvapi_wrapper.cpp +++ b/renderdoc/driver/ihv/nv/nvapi_wrapper.cpp @@ -153,7 +153,7 @@ bool SupportedOpcode(NvShaderOpcode opcode) { if(NV_nvapi_AllowUnknownShaderOpcodes()) return true; - return opcode == NvShaderOpcode::UINT64Atomic; + return opcode == NvShaderOpcode::U64Atomic; } // try to initialise nvapi for replay diff --git a/renderdoc/driver/ihv/nv/nvapi_wrapper.h b/renderdoc/driver/ihv/nv/nvapi_wrapper.h index 619f8a696..29b2ab499 100644 --- a/renderdoc/driver/ihv/nv/nvapi_wrapper.h +++ b/renderdoc/driver/ihv/nv/nvapi_wrapper.h @@ -29,11 +29,39 @@ enum class NvShaderOpcode : uint32_t { - UINT64Atomic = 20, + Unknown = 0, + Shuffle = 1, + ShuffleUp = 2, + ShuffleDown = 3, + ShuffleXor = 4, + VoteAll = 5, + VoteAny = 6, + VoteBallot = 7, + GetLaneId = 8, + FP16Atomic = 12, + FP32Atomic = 13, + GetSpecial = 19, + U64Atomic = 20, + MatchAny = 21, + Footprint = 28, + FootprintBias = 29, + GetShadingRate = 30, + FootprintLevel = 31, + FootprintGrad = 32, + ShuffleGeneric = 33, + VPRSEvalAttribAtSample = 51, + VPRSEvalAttribSnapped = 52, }; -enum class NvShaderAtomic : uint32_t +enum class NvShaderSpecial { + ThreadLtMask = 4, + FootprintSingleLOD = 5, +}; + +enum class NvShaderAtomic +{ + Unknown = -1, And = 0, Or = 1, Xor = 2, diff --git a/renderdoc/driver/shaders/dxbc/dxbc_bytecode.cpp b/renderdoc/driver/shaders/dxbc/dxbc_bytecode.cpp index 3a166f227..5feef721e 100644 --- a/renderdoc/driver/shaders/dxbc/dxbc_bytecode.cpp +++ b/renderdoc/driver/shaders/dxbc/dxbc_bytecode.cpp @@ -400,6 +400,33 @@ DXBC::Reflection *Program::GuessReflection() return ret; } +rdcstr Program::GetDebugStatus() +{ + // if there are no vendor extensions this is always debuggable + if(m_ShaderExt.second == ~0U) + return rdcstr(); + + // otherwise we need to check that no unsupported vendor extensions are used + DisassembleHexDump(); + + for(const Operation &op : m_Instructions) + { + if(op.operation >= OPCODE_VENDOR_FIRST) + { + bool supported = false; + + // whitelist supported instructions here + + if(!supported) + return StringFormat::Fmt("Unsupported shader extension '%s' used", + ToStr(op.operation).c_str()); + } + } + + // no unsupported instructions used + return rdcstr(); +} + D3D_PRIMITIVE_TOPOLOGY Program::GetOutputTopology() { DisassembleHexDump(); diff --git a/renderdoc/driver/shaders/dxbc/dxbc_bytecode.h b/renderdoc/driver/shaders/dxbc/dxbc_bytecode.h index f68589304..4959fd24b 100644 --- a/renderdoc/driver/shaders/dxbc/dxbc_bytecode.h +++ b/renderdoc/driver/shaders/dxbc/dxbc_bytecode.h @@ -316,7 +316,59 @@ enum OpcodeType OPCODE_CHECK_ACCESS_FULLY_MAPPED, - NUM_OPCODES, + NUM_REAL_OPCODES, + + OPCODE_VENDOR_REMOVED, + + OPCODE_VENDOR_FIRST, + + OPCODE_AMD_READFIRSTLANE, + OPCODE_AMD_READLANE, + OPCODE_AMD_LANEID, + OPCODE_AMD_SWIZZLE, + OPCODE_AMD_BALLOT, + OPCODE_AMD_MBCNT, + OPCODE_AMD_MIN3U, + OPCODE_AMD_MIN3F, + OPCODE_AMD_MED3U, + OPCODE_AMD_MED3F, + OPCODE_AMD_MAX3U, + OPCODE_AMD_MAX3F, + OPCODE_AMD_BARYCOORD, + OPCODE_AMD_VTXPARAM, + OPCODE_AMD_GET_VIEWPORTINDEX, + OPCODE_AMD_GET_RTARRAYSLICE, + OPCODE_AMD_WAVE_REDUCE, + OPCODE_AMD_WAVE_SCAN, + OPCODE_AMD_LOADDWATADDR, + OPCODE_AMD_GET_DRAWINDEX, + OPCODE_AMD_U64_ATOMIC, + OPCODE_AMD_GET_WAVESIZE, + OPCODE_AMD_GET_BASEINSTANCE, + OPCODE_AMD_GET_BASEVERTEX, + + OPCODE_NV_SHUFFLE, + OPCODE_NV_SHUFFLE_UP, + OPCODE_NV_SHUFFLE_DOWN, + OPCODE_NV_SHUFFLE_XOR, + OPCODE_NV_VOTE_ALL, + OPCODE_NV_VOTE_ANY, + OPCODE_NV_VOTE_BALLOT, + OPCODE_NV_GET_LANEID, + OPCODE_NV_FP16_ATOMIC, + OPCODE_NV_FP32_ATOMIC, + OPCODE_NV_GET_THREADLTMASK, + OPCODE_NV_GET_FOOTPRINT_SINGLELOD, + OPCODE_NV_U64_ATOMIC, + OPCODE_NV_MATCH_ANY, + OPCODE_NV_FOOTPRINT, + OPCODE_NV_FOOTPRINT_BIAS, + OPCODE_NV_GET_SHADING_RATE, + OPCODE_NV_FOOTPRINT_LEVEL, + OPCODE_NV_FOOTPRINT_GRAD, + OPCODE_NV_SHUFFLE_GENERIC, + OPCODE_NV_VPRS_EVAL_ATTRIB_SAMPLE, + OPCODE_NV_VPRS_EVAL_ATTRIB_SNAPPED, }; size_t NumOperands(OpcodeType op); @@ -622,6 +674,39 @@ enum ResourceDimension NUM_DIMENSIONS, }; +enum VendorAtomicOp +{ + ATOMIC_OP_NONE = 0, + ATOMIC_OP_AND, + ATOMIC_OP_OR, + ATOMIC_OP_XOR, + ATOMIC_OP_ADD, + ATOMIC_OP_MAX, + ATOMIC_OP_MIN, + ATOMIC_OP_SWAP, + ATOMIC_OP_CAS, +}; + +enum VendorWaveOp +{ + WAVE_OP_NONE = 0, + WAVE_OP_ADD_FLOAT, + WAVE_OP_ADD_SINT, + WAVE_OP_ADD_UINT, + WAVE_OP_MUL_FLOAT, + WAVE_OP_MUL_SINT, + WAVE_OP_MUL_UINT, + WAVE_OP_MIN_FLOAT, + WAVE_OP_MIN_SINT, + WAVE_OP_MIN_UINT, + WAVE_OP_MAX_FLOAT, + WAVE_OP_MAX_SINT, + WAVE_OP_MAX_UINT, + WAVE_OP_AND, + WAVE_OP_OR, + WAVE_OP_XOR, +}; + ///////////////////////////////////////////////////////////////////////// // Main structures ///////////////////////////////////////////////////////////////////////// @@ -671,9 +756,13 @@ struct Operand /////////////////////////////////////// - OperandType - type; // temp register, constant buffer, input, output, other more specialised types - NumOperandComponents numComponents; // scalar, 4-vector or N-vector (currently unused) + // operands can be given names to make the assembly easier to read. + // mostly used on vendor extensions where the syntax is non-standard/undocumented + rdcstr name; + // temp register, constant buffer, input, output, other more specialised types + OperandType type; + // scalar, 4-vector or N-vector (currently unused) + NumOperandComponents numComponents; uint8_t comps[4]; // the components. each is 0,1,2,3 for x,y,z,w or 0xff if unused. // e.g. .x = { 0, -1, -1, -1 } @@ -683,12 +772,50 @@ struct Operand // .xyzw = { 0, 1, 2, 3 } // .wzyx = { 3, 2, 1, 0 } - rdcarray indices; // indices for this register. - // 0 means this is a special register, specified by type alone. + Operand swizzle(uint8_t c) + { + Operand ret = *this; + ret.numComponents = NUMCOMPS_1; + ret.comps[0] = comps[c]; + ret.comps[1] = 0xff; + ret.comps[2] = 0xff; + ret.comps[3] = 0xff; + ret.values[0] = values[c]; + ret.values[1] = 0; + ret.values[2] = 0; + ret.values[3] = 0; + return ret; + } + + Operand swizzle(uint8_t x, uint8_t y, uint8_t z, uint8_t w) + { + Operand ret = *this; + ret.comps[0] = comps[x]; + ret.comps[1] = comps[y]; + ret.comps[2] = z < 4 ? comps[z] : 0xff; + ret.comps[3] = w < 4 ? comps[w] : 0xff; + ret.values[0] = values[x]; + ret.values[1] = values[y]; + ret.values[2] = z < 4 ? values[z] : 0; + ret.values[3] = w < 4 ? values[w] : 0; + return ret; + } + + void setComps(uint8_t x, uint8_t y, uint8_t z, uint8_t w) + { + comps[0] = x; + comps[1] = y; + comps[2] = z; + comps[3] = w; + } + + // indices for this register. + // 0 means this is a special register, specified by type alone. // 1 is probably most common. Indicates RegIndex specifies the register // 2 is for constant buffers, array inputs etc. [0] indicates the cbuffer, [1] indicates the // cbuffer member // 3 is rare but follows the above pattern + rdcarray indices; // the declaration of the resource in this operand (not always present) Declaration *declaration; @@ -748,7 +875,7 @@ struct Declaration offset = 0; length = 0; instruction = 0; - declaration = NUM_OPCODES; + declaration = NUM_REAL_OPCODES; refactoringAllowed = doublePrecisionFloats = forceEarlyDepthStencil = enableRawAndStructuredBuffers = skipOptimisation = enableMinPrecision = enableD3D11_1DoubleExtensions = enableD3D11_1ShaderExtensions = @@ -907,7 +1034,7 @@ struct Operation line = 0; length = 0; stride = 0; - operation = NUM_OPCODES; + operation = NUM_REAL_OPCODES; nonzero = false; saturate = false; preciseValues = 0; @@ -946,9 +1073,16 @@ class Program public: Program(const byte *bytes, size_t length); + void SetShaderEXTUAV(GraphicsAPI api, uint32_t space, uint32_t reg) + { + m_API = api; + m_ShaderExt = {space, reg}; + } void FetchComputeProperties(DXBC::Reflection *reflection); DXBC::Reflection *GuessReflection(); + rdcstr GetDebugStatus(); + void SetReflection(const DXBC::Reflection *refl) { m_Reflection = refl; } void SetDebugInfo(const DXBC::IDebugInfo *debug) { m_DebugInfo = debug; } DXBC::ShaderType GetShaderType() const { return m_Type; } @@ -978,6 +1112,9 @@ public: private: void FetchTypeVersion(); void DisassembleHexDump(); + + void PostprocessVendorExtensions(); + void MakeDisassemblyString(); const DXBC::Reflection *m_Reflection = NULL; @@ -1003,6 +1140,9 @@ private: bool m_Disassembled = false; + GraphicsAPI m_API = GraphicsAPI::D3D11; + rdcpair m_ShaderExt = {~0U, ~0U}; + rdcstr m_Disassembly; // declarations of inputs, outputs, constant buffers, temp registers etc. diff --git a/renderdoc/driver/shaders/dxbc/dxbc_container.cpp b/renderdoc/driver/shaders/dxbc/dxbc_container.cpp index 5009651bb..511a42429 100644 --- a/renderdoc/driver/shaders/dxbc/dxbc_container.cpp +++ b/renderdoc/driver/shaders/dxbc/dxbc_container.cpp @@ -556,6 +556,9 @@ const rdcstr &DXBCContainer::GetDisassembly() if(!m_DebugFileName.empty()) m_Disassembly += StringFormat::Fmt("// Debug name: %s\n", m_DebugFileName.c_str()); + if(m_ShaderExt.second != ~0U) + m_Disassembly += "// Vendor shader extensions in use\n"; + m_Disassembly += m_DXBCByteCode->GetDisassembly(); } else if(m_DXILByteCode) @@ -568,6 +571,9 @@ const rdcstr &DXBCContainer::GetDisassembly() if(!m_DebugFileName.empty()) m_Disassembly += StringFormat::Fmt("; shader debug name: %s\n", m_DebugFileName.c_str()); + if(m_ShaderExt.second != ~0U) + m_Disassembly += "; Vendor shader extensions in use\n"; + m_Disassembly += "; shader hash: "; byte *hashBytes = (byte *)m_Hash; for(size_t i = 0; i < sizeof(m_Hash); i++) @@ -600,6 +606,8 @@ void DXBCContainer::FillTraceLineInfo(ShaderDebugTrace &trace) const uint32_t extraLines = 2; if(!m_DebugFileName.empty()) extraLines++; + if(m_ShaderExt.second != ~0U) + extraLines++; if(m_GlobalFlags != GlobalShaderFlags::None) extraLines += (uint32_t)Bits::CountOnes((uint32_t)m_GlobalFlags) + 2; @@ -951,8 +959,8 @@ void DXBCContainer::TryFetchSeparateDebugInfo(bytebuf &byteCode, const rdcstr &d } } -DXBCContainer::DXBCContainer(bytebuf &ByteCode, const rdcstr &debugInfoPath, uint32_t shaderExtReg, - uint32_t shaderExtSpace) +DXBCContainer::DXBCContainer(bytebuf &ByteCode, const rdcstr &debugInfoPath, GraphicsAPI api, + uint32_t shaderExtReg, uint32_t shaderExtSpace) { RDCEraseEl(m_ShaderStats); @@ -1838,6 +1846,26 @@ DXBCContainer::DXBCContainer(bytebuf &ByteCode, const rdcstr &debugInfoPath, uin if(m_DXBCByteCode || m_DXILByteCode) { RDCASSERT(m_Reflection); + + if(shaderExtReg != ~0U) + { + bool found = false; + const bool sm51 = (m_Version.Major == 5 && m_Version.Minor == 1); + + // see if we can find the magic UAV. If so remove it from the reflection + for(size_t i = 0; i < m_Reflection->UAVs.size(); i++) + { + const ShaderInputBind &uav = m_Reflection->UAVs[i]; + if(uav.reg == shaderExtReg && (!sm51 || shaderExtSpace == ~0U || shaderExtSpace == uav.space)) + { + found = true; + m_Reflection->UAVs.erase(i); + m_DXBCByteCode->SetShaderEXTUAV(api, shaderExtSpace, shaderExtReg); + m_ShaderExt = {shaderExtSpace, shaderExtReg}; + break; + } + } + } } } @@ -2014,7 +2042,7 @@ TEST_CASE("DO NOT COMMIT - convenience test", "[dxbc]") bytebuf buf; FileIO::ReadAll("/path/to/container_file.dxbc", buf); - DXBC::DXBCContainer container(buf, rdcstr()); + DXBC::DXBCContainer container(buf, rdcstr(), GraphicsAPI::D3D11, ~0U, ~0U); // the only thing fetched lazily is the disassembly, so grab that here diff --git a/renderdoc/driver/shaders/dxbc/dxbc_container.h b/renderdoc/driver/shaders/dxbc/dxbc_container.h index ba6556ebd..aeba0c8f7 100644 --- a/renderdoc/driver/shaders/dxbc/dxbc_container.h +++ b/renderdoc/driver/shaders/dxbc/dxbc_container.h @@ -142,8 +142,8 @@ ShaderCompileFlags EncodeFlags(const uint32_t flags, const rdcstr &profile); class DXBCContainer { public: - DXBCContainer(bytebuf &ByteCode, const rdcstr &debugInfoPath, uint32_t shaderExtReg, - uint32_t shaderExtSpace); + DXBCContainer(bytebuf &ByteCode, const rdcstr &debugInfoPath, GraphicsAPI api, + uint32_t shaderExtReg, uint32_t shaderExtSpace); ~DXBCContainer(); DXBC::ShaderType m_Type = DXBC::ShaderType::Max; struct @@ -190,6 +190,8 @@ private: uint32_t m_Hash[4]; + rdcpair m_ShaderExt = {~0U, ~0U}; + rdcstr m_DebugFileName; GlobalShaderFlags m_GlobalFlags = GlobalShaderFlags::None; diff --git a/renderdoc/driver/shaders/dxbc/dxbc_disassemble.cpp b/renderdoc/driver/shaders/dxbc/dxbc_disassemble.cpp index 893cabd31..27c9106eb 100644 --- a/renderdoc/driver/shaders/dxbc/dxbc_disassemble.cpp +++ b/renderdoc/driver/shaders/dxbc/dxbc_disassemble.cpp @@ -33,9 +33,15 @@ #include "dxbc_container.h" +#include "driver/ihv/nv/nvapi_wrapper.h" + RDOC_CONFIG(bool, DXBC_Disassembly_FriendlyNaming, true, "Where possible (i.e. it is completely unambiguous) replace register names with " "high-level variable names."); +RDOC_CONFIG(bool, DXBC_Disassembly_ProcessVendorShaderExts, true, + "Process vendor shader extensions from magic UAV encoded instructions into the real " + "operations.\n" + "If this is disabled, shader debugging won't produce correct results."); namespace DXBCBytecode { @@ -254,6 +260,158 @@ static MaskedElement MinPrecision; static MaskedElement NonUniform; }; +namespace AMDInstruction +{ +// ha ha these are different :( +enum class DX11Op +{ + Readfirstlane = 0x01, + Readlane = 0x02, + LaneId = 0x03, + Swizzle = 0x04, + Ballot = 0x05, + MBCnt = 0x06, + Min3U = 0x08, + Min3F = 0x09, + Med3U = 0x0a, + Med3F = 0x0b, + Max3U = 0x0c, + Max3F = 0x0d, + BaryCoord = 0x0e, + VtxParam = 0x0f, + ViewportIndex = 0x10, + RtArraySlice = 0x11, + WaveReduce = 0x12, + WaveScan = 0x13, + DrawIndex = 0x17, + AtomicU64 = 0x18, + GetWaveSize = 0x19, + BaseInstance = 0x1a, + BaseVertex = 0x1b, +}; + +enum class DX12Op +{ + Readfirstlane = 0x01, + Readlane = 0x02, + LaneId = 0x03, + Swizzle = 0x04, + Ballot = 0x05, + MBCnt = 0x06, + Min3U = 0x07, + Min3F = 0x08, + Med3U = 0x09, + Med3F = 0x0a, + Max3U = 0x0b, + Max3F = 0x0c, + BaryCoord = 0x0d, + VtxParam = 0x0e, + ViewportIndex = 0x10, // DX11 only + RtArraySlice = 0x11, // DX11 only + WaveReduce = 0x12, + WaveScan = 0x13, + LoadDwAtAddr = 0x14, + DrawIndex = 0x17, + AtomicU64 = 0x18, + GetWaveSize = 0x19, + BaseInstance = 0x1a, + BaseVertex = 0x1b, +}; + +DX12Op convert(DX11Op op) +{ + switch(op) + { + // convert opcodes that don't match up + case DX11Op::Min3U: return DX12Op::Min3U; + case DX11Op::Min3F: return DX12Op::Min3F; + case DX11Op::Med3U: return DX12Op::Med3U; + case DX11Op::Med3F: return DX12Op::Med3F; + case DX11Op::Max3U: return DX12Op::Max3U; + case DX11Op::Max3F: return DX12Op::Max3F; + case DX11Op::BaryCoord: return DX12Op::BaryCoord; + case DX11Op::VtxParam: + return DX12Op::VtxParam; + // others match up exactly + default: return DX12Op(op); + } +} + +enum BaryInterpMode +{ + LinearCenter = 1, + LinearCentroid = 2, + LinearSample = 3, + PerspCenter = 4, + PerspCentroid = 5, + PerspSample = 6, + PerspPullModel = 7, +}; + +enum SwizzleMask +{ + SwapX1 = 0x041f, + SwapX2 = 0x081f, + SwapX4 = 0x101f, + SwapX8 = 0x201f, + SwapX16 = 0x401f, + ReverseX4 = 0x0c1f, + ReverseX8 = 0x1c1f, + ReverseX16 = 0x3c1f, + ReverseX32 = 0x7c1f, + BCastX2 = 0x003e, + BCastX4 = 0x003c, + BCastX8 = 0x0038, + BCastX16 = 0x0030, + BCastX32 = 0x0020, +}; + +enum AMDAtomic +{ + Min = 0x01, + Max = 0x02, + And = 0x03, + Or = 0x04, + Xor = 0x05, + Add = 0x06, + Xchg = 0x07, + CmpXchg = 0x08, +}; + +VendorAtomicOp convert(AMDAtomic op) +{ + switch(op) + { + case Min: return ATOMIC_OP_MIN; + case Max: return ATOMIC_OP_MAX; + case And: return ATOMIC_OP_AND; + case Or: return ATOMIC_OP_OR; + case Xor: return ATOMIC_OP_XOR; + case Add: return ATOMIC_OP_ADD; + case Xchg: return ATOMIC_OP_SWAP; + case CmpXchg: return ATOMIC_OP_CAS; + default: return ATOMIC_OP_NONE; + } +} + +static MaskedElement Magic; +static MaskedElement Phase; +static MaskedElement Data; +static MaskedElement BaryInterp; +static MaskedElement SwizzleOp; +static MaskedElement Opcode11; +static MaskedElement Opcode12; + +static MaskedElement VtxParamComponent; +static MaskedElement VtxParamParameter; +static MaskedElement VtxParamVertex; + +static MaskedElement WaveOp; +static MaskedElement WaveOpFlags; + +static MaskedElement AtomicOp; +}; + rdcstr toString(const uint32_t values[], uint32_t numComps); bool Operand::operator==(const Operand &o) const @@ -583,6 +741,1518 @@ void Program::DisassembleHexDump() implicitRet.str = "ret"; m_Instructions.push_back(implicitRet); + + if(DXBC_Disassembly_ProcessVendorShaderExts() && m_ShaderExt.second != ~0U) + PostprocessVendorExtensions(); +} + +void Program::PostprocessVendorExtensions() +{ + const bool friendly = DXBC_Disassembly_FriendlyNaming(); + + uint32_t magicID = ~0U; + + for(size_t i = 0; i < m_Declarations.size(); i++) + { + const Declaration &decl = m_Declarations[i]; + if((decl.operand.indices.size() == 1 && decl.operand.indices[0].index == m_ShaderExt.second) || + (decl.operand.indices.size() == 3 && decl.operand.indices[1].index == m_ShaderExt.second && + decl.space == m_ShaderExt.first)) + { + magicID = (uint32_t)decl.operand.indices[0].index; + m_Declarations.erase(i); + break; + } + } + + // now we know the UAV, iterate the instructions looking for patterns to replace. + // + // AMD is nice and easy. Every instruction works on a scalar (vector versions repeat for each + // component) and is encoded into a single InterlockedCompareExchange on the UAV. + // So we can simply replace them in-place by decoding. + // + // NV's are not as nice. They are demarcated by IncrementCounter on the UAV so we know we'll see + // a linear stream without re-ordering, but they *can* be intermixed with other non-intrinsic + // instructions. Parameters and data are set by writing to specific offsets within the structure + // + // There are two types: + // + // Simpler, instructions that work purely on vars and not on resources. Shuffle/ballot/etc + // + // These come in the form: + // index = magicUAV.IncrementCounter() + // set params and opcode by writing to magicUAV[index].member... + // retval = magicUAV.IncrementCounter() + // [optional (see below): retval2 = magicUAV.IncrementCounter()] + // + // This type of operand returns the result with the closing IncrementCounter(). There could be + // multiple results, so numOutputs is set before any, and then that many IncrementCounter() are + // emitted with each result. + // + // More complex, instructions that use UAVs. Mostly atomics + // + // index1 = magicUAV.IncrementCounter() + // magicUAV[index1].markUAV = 1; + // userUAV[index1] = 0; // or some variation of such + // index2 = magicUAV.IncrementCounter() + // set params and opcode as above in magicUAV[index2].member... + // retval = magicUAV[index2].dst + // + // Also note that if the shader doesn't use the return result of an atomic, the dst may never be + // read! + // + // The difficulty then is distinguishing between the two and knowing where the boundaries are. + // We do this with a simple state machine tracking where we are in an opcode: + // + // +----------> Nothing + // | v + // | | + // | IncrementCounter() + // Emit instruction | + // | v + // | Instruction >--write markUAV---> UAV instruction header + // | v (wait for other UAV write) + // | | v + // | | | + // | write opcode ] | + // | | ] | + // | v ] simple | + // | Instruction Body ] case | + // | v ] | + // | | ] | + // | IncrementCounter() ] | + // | | | + // +----<---------+ | + // | IncrementCounter() + // | | + // | UAV instruction body <------------------+ + // | v + // | | + // | write opcode + // | | + // +--------------+ + // + // so most state transitions are marked by an IncrementCounter(). The exceptions being + // Instruction where we wait for a write to either markUAV or opcode to move to either simple + // instruction body or to the UAV instruction header, and UAV instruction body which leaves + // when we see an opcode write. + // + // We assume that markUAV will be written BEFORE the fake UAV write. It's not entirely clear if + // this is guaranteed to not be re-ordered but it seems to be true and it's implied that NV's + // driver relies on this. This simplifies tracking since we can use it as a state transition. + // + // We also assume that multiple accesses to the UAV don't overlap. This should be guaranteed by + // the use of the index from the counter being used for access. However we don't actually check + // the index itself. + // + // all src/dst are uint4, others are all uint + + enum class InstructionState + { + // if something goes wrong we enter this state and stop patching + Broken, + + Nothing, + + // this is a state only used for AMD's UAV atomic op, which takes more parameters and uses the + // operation phases. + AMDUAVAtomic, + + // this is the state when we're not sure what type we are. Either markUAV is written, in which + // case we move to UAVInstructionHeader1, or opcode is written, in which case we move to + // Instruction1Out. We should see one or the other. + // + // FP16 UAV instructions (NV_EXTN_OP_FP16_ATOMIC) that operate on float4 resources have two + // return values. Unfortunately we can't reliably detect this from the bytecode, so what + // happens is that when we see opode get written if it's NV_EXTN_OP_FP16_ATOMIC then we jump + // straight to UAVInstructionBody and re-use the UAV instruction header from last time. We + // know this MUST be a continuation because otherwise NV_EXTN_OP_FP16_ATOMIC is always + // preceeded by a UAV instruction header (via markUAV). + InstructionHeader, + InstructionBody, + // we move from Instruction1Out to this state when markUAV is written. The next UAV write is + // used to determine the 'target' UAV. + // We then move to header2 so we don't consume any other UAV writes. + UAVInstructionHeader1, + // here we do nothing but sit and wait for the IncrementCounter() so we can move to the UAV + // body state + UAVInstructionHeader2, + // in this state we aren't sure exactly when to leave it. We wait *at least* until opcode is + // written, but there may be more instructions after that to read from dst :( + UAVInstructionBody, + }; + + enum class NvUAVParam + { + opcode = 0, + src0 = 76, + src1 = 92, + src2 = 108, + src3 = 28, + src4 = 44, + src5 = 60, + dst = 124, + markUAV = 140, + numOutputs = 144, + }; + + InstructionState state = InstructionState::Nothing; + + NvShaderOpcode nvopcode = NvShaderOpcode::Unknown; + Operand srcParam[8]; + Operand dstParam[4]; + Operand uavParam; + int numOutputs = 0, outputsNeeded = 0; + + ToString flags = friendly ? ToString::FriendlyNameRegisters : ToString::None; + + for(size_t i = 0; i < m_Instructions.size(); i++) + { + // reserve space for an added instruction so that curOp can stay valid even if we insert a new + // op. This only actually does work the first time (or after we've inserted a new + // instruction). + m_Instructions.reserve(m_Instructions.size() + 1); + + Operation &curOp = m_Instructions[i]; + + if(state == InstructionState::Broken) + break; + + if(curOp.operation == OPCODE_IMM_ATOMIC_CMP_EXCH && curOp.operands[1].indices[0].index == magicID) + { + // AMD operation + if(curOp.operands[2].type != TYPE_IMMEDIATE32) + { + RDCERR( + "Expected literal value for AMD extension instruction. Was the shader compiled with " + "optimisations disabled?"); + state = InstructionState::Broken; + break; + } + + uint32_t instruction = curOp.operands[2].values[0]; + + if(AMDInstruction::Magic.Get(instruction) == 5) + { + AMDInstruction::DX12Op amdop; + + if(m_API == GraphicsAPI::D3D11) + amdop = AMDInstruction::convert(AMDInstruction::Opcode11.Get(instruction)); + else + amdop = AMDInstruction::Opcode12.Get(instruction); + + uint32_t phase = AMDInstruction::Phase.Get(instruction); + if(phase == 0) + { + srcParam[0] = curOp.operands[3]; + srcParam[1] = curOp.operands[4]; + } + else if(phase == 1) + { + srcParam[2] = curOp.operands[3]; + srcParam[3] = curOp.operands[4]; + } + else if(phase == 2) + { + srcParam[4] = curOp.operands[3]; + srcParam[5] = curOp.operands[4]; + } + else if(phase == 3) + { + srcParam[6] = curOp.operands[3]; + srcParam[7] = curOp.operands[4]; + } + + Operation op; + op.operands.resize(1); + + switch(amdop) + { + case AMDInstruction::DX12Op::Readfirstlane: + { + op.operation = OPCODE_AMD_READFIRSTLANE; + op.operands.resize(2); + op.operands[0] = curOp.operands[0]; + op.operands[1].name = "src"; + op.operands[1] = srcParam[0]; + break; + } + case AMDInstruction::DX12Op::Readlane: + { + op.operation = OPCODE_AMD_READLANE; + op.operands.resize(3); + op.operands[0] = curOp.operands[0]; + op.operands[1].name = "src"; + op.operands[1] = srcParam[0]; + // lane is encoded in instruction data + op.operands[2].name = "lane"; + op.operands[2].type = TYPE_IMMEDIATE32; + op.operands[2].numComponents = NUMCOMPS_1; + op.operands[2].values[0] = AMDInstruction::Data.Get(instruction); + break; + } + case AMDInstruction::DX12Op::LaneId: + { + op.operation = OPCODE_AMD_LANEID; + op.operands = {curOp.operands[0]}; + break; + } + case AMDInstruction::DX12Op::Swizzle: + { + op.operation = OPCODE_AMD_SWIZZLE; + op.operands.resize(2); + op.operands[0] = curOp.operands[0]; + op.operands[1].name = "src"; + op.operands[1] = srcParam[0]; + break; + } + case AMDInstruction::DX12Op::Ballot: + { + if(phase == 0) + { + // srcParams already stored, store the dst for phase 0 + dstParam[0] = curOp.operands[0]; + } + else if(phase == 1) + { + op.operation = OPCODE_AMD_BALLOT; + op.operands.resize(3); + op.operands[0] = dstParam[0]; + op.operands[1] = curOp.operands[0]; + op.operands[2] = srcParam[0]; + op.operands[2].name = "predicate"; + } + break; + } + case AMDInstruction::DX12Op::MBCnt: + { + op.operation = OPCODE_AMD_MBCNT; + op.operands.resize(3); + op.operands[0] = curOp.operands[0]; + op.operands[1] = srcParam[0]; + op.operands[2] = srcParam[1]; + break; + } + case AMDInstruction::DX12Op::Min3U: + case AMDInstruction::DX12Op::Min3F: + case AMDInstruction::DX12Op::Med3U: + case AMDInstruction::DX12Op::Med3F: + case AMDInstruction::DX12Op::Max3U: + case AMDInstruction::DX12Op::Max3F: + { + if(phase == 0) + { + // don't need the output at all, it's just used to chain the instructions + } + else if(phase == 1) + { + switch(amdop) + { + case AMDInstruction::DX12Op::Min3U: op.operation = OPCODE_AMD_MIN3U; break; + case AMDInstruction::DX12Op::Min3F: op.operation = OPCODE_AMD_MIN3F; break; + case AMDInstruction::DX12Op::Med3U: op.operation = OPCODE_AMD_MED3U; break; + case AMDInstruction::DX12Op::Med3F: op.operation = OPCODE_AMD_MED3F; break; + case AMDInstruction::DX12Op::Max3U: op.operation = OPCODE_AMD_MAX3U; break; + case AMDInstruction::DX12Op::Max3F: op.operation = OPCODE_AMD_MAX3F; break; + default: break; + } + op.operands.resize(4); + op.operands[0] = curOp.operands[0]; + op.operands[1] = srcParam[0]; + op.operands[2] = srcParam[1]; + op.operands[3] = srcParam[2]; + } + break; + } + case AMDInstruction::DX12Op::BaryCoord: + { + if(phase == 0) + { + // srcParams already stored, store the dst for phase 0 + dstParam[0] = curOp.operands[0]; + } + else if(phase == 1) + { + if(AMDInstruction::BaryInterp.Get(instruction) != AMDInstruction::PerspPullModel) + { + // all modes except pull model have two outputs + op.operation = OPCODE_AMD_BARYCOORD; + op.operands.resize(2); + op.operands[0].name = "i"; + op.operands[0] = dstParam[0]; + op.operands[0].name = "j"; + op.operands[1] = curOp.operands[0]; + } + else + { + dstParam[1] = curOp.operands[0]; + } + } + else if(phase == 2) + { + // all modes except pull model have two outputs + op.operation = OPCODE_AMD_BARYCOORD; + op.operands.resize(3); + op.operands[0].name = "invW"; + op.operands[0] = dstParam[0]; + op.operands[1].name = "invI"; + op.operands[1] = dstParam[1]; + op.operands[2].name = "invJ"; + op.operands[2] = curOp.operands[0]; + } + break; + } + case AMDInstruction::DX12Op::VtxParam: + { + op.operation = OPCODE_AMD_VTXPARAM; + op.operands.resize(3); + op.operands[0] = curOp.operands[0]; + // vertexIndex is encoded in instruction data + op.operands[1].name = "vertexIndex"; + op.operands[1].type = TYPE_IMMEDIATE32; + op.operands[1].numComponents = NUMCOMPS_1; + op.operands[1].values[0] = AMDInstruction::VtxParamVertex.Get(instruction); + + // decode and pretty-ify the parameter index and component + op.operands[2].name = "parameter"; + op.operands[2].type = TYPE_INPUT; + op.operands[2].numComponents = NUMCOMPS_1; + op.operands[2].indices.resize(1); + op.operands[2].indices[0].absolute = true; + op.operands[2].indices[0].index = AMDInstruction::VtxParamParameter.Get(instruction); + op.operands[2].setComps(AMDInstruction::VtxParamComponent.Get(instruction), 0xff, 0xff, + 0xff); + + break; + } + case AMDInstruction::DX12Op::ViewportIndex: + { + op.operation = OPCODE_AMD_GET_VIEWPORTINDEX; + op.operands = {curOp.operands[0]}; + break; + } + case AMDInstruction::DX12Op::RtArraySlice: + { + op.operation = OPCODE_AMD_GET_RTARRAYSLICE; + op.operands = {curOp.operands[0]}; + break; + } + case AMDInstruction::DX12Op::WaveReduce: + case AMDInstruction::DX12Op::WaveScan: + { + if(amdop == AMDInstruction::DX12Op::WaveReduce) + op.operation = OPCODE_AMD_WAVE_REDUCE; + else + op.operation = OPCODE_AMD_WAVE_SCAN; + + op.preciseValues = AMDInstruction::WaveOp.Get(instruction); + break; + } + case AMDInstruction::DX12Op::LoadDwAtAddr: + { + if(phase == 0) + { + // don't need the output at all, it's just used to chain the instructions + } + else if(phase == 1) + { + op.operation = OPCODE_AMD_LOADDWATADDR; + op.operands.resize(4); + op.operands[0] = curOp.operands[0]; + op.operands[1] = srcParam[0]; + op.operands[1].name = "gpuVaLoBits"; + op.operands[2] = srcParam[1]; + op.operands[2].name = "gpuVaHiBits"; + op.operands[3] = srcParam[2]; + op.operands[3].name = "offset"; + } + break; + } + case AMDInstruction::DX12Op::DrawIndex: + { + op.operation = OPCODE_AMD_GET_DRAWINDEX; + op.operands = {curOp.operands[0]}; + break; + } + case AMDInstruction::DX12Op::GetWaveSize: + { + op.operation = OPCODE_AMD_GET_WAVESIZE; + op.operands = {curOp.operands[0]}; + break; + } + case AMDInstruction::DX12Op::BaseInstance: + { + op.operation = OPCODE_AMD_GET_BASEINSTANCE; + op.operands = {curOp.operands[0]}; + break; + } + case AMDInstruction::DX12Op::BaseVertex: + { + op.operation = OPCODE_AMD_GET_BASEVERTEX; + op.operands = {curOp.operands[0]}; + break; + } + case AMDInstruction::DX12Op::AtomicU64: + { + // if we're in the nothing state, move to the AMD UAV state so we watch for a UAV access + // and nop it out + if(state == InstructionState::Nothing) + state = InstructionState::AMDUAVAtomic; + + VendorAtomicOp atomicop = convert(AMDInstruction::AtomicOp.Get(instruction)); + op.preciseValues = atomicop; + + bool isCAS = (atomicop == ATOMIC_OP_CAS); + + // for CAS we have four phases, only exit the state when we're in phase 3. For all other + // instructions we have three phases so exit in phase 2. + if(phase == 3 || (phase == 2 && !isCAS)) + { + op.operation = OPCODE_AMD_U64_ATOMIC; + state = InstructionState::Nothing; + + // output values first + op.operands.push_back(dstParam[0]); + op.operands.push_back(op.operands[0]); + + // then the saved UAV + op.operands.push_back(uavParam); + + // then the address. This is in params [0], [1], [2]. If they all come from the same + // register we can compact this + if(srcParam[0].indices == srcParam[1].indices && + srcParam[1].indices == srcParam[2].indices) + { + op.operands.push_back(srcParam[0]); + op.operands.back().setComps(srcParam[0].comps[0], srcParam[1].comps[0], + srcParam[2].comps[0], 0xff); + op.operands.back().name = "address"; + } + else + { + op.operands.push_back(srcParam[0]); + op.operands.back().name = "address.x"; + op.operands.push_back(srcParam[1]); + op.operands.back().name = "address.y"; + op.operands.push_back(srcParam[2]); + op.operands.back().name = "address.z"; + } + + // for CAS, the compare value next + if(isCAS) + { + if(srcParam[5].indices == srcParam[6].indices) + { + op.operands.push_back(srcParam[5]); + op.operands.back().setComps(srcParam[5].comps[0], srcParam[6].comps[0], 0xff, 0xff); + op.operands.back().values[1] = srcParam[6].values[0]; + op.operands.back().name = "compare_value"; + } + else + { + op.operands.push_back(srcParam[5].swizzle(0)); + op.operands.back().name = "compare_value.x"; + op.operands.push_back(srcParam[6].swizzle(0)); + op.operands.back().name = "compare_value.y"; + } + } + + // then the value + if(srcParam[3].indices == srcParam[4].indices) + { + op.operands.push_back(srcParam[3]); + op.operands.back().setComps(srcParam[3].comps[0], srcParam[4].comps[0], 0xff, 0xff); + op.operands.back().values[1] = srcParam[4].values[0]; + op.operands.back().name = "value"; + } + else + { + op.operands.push_back(srcParam[3].swizzle(0)); + op.operands.back().name = "value.x"; + op.operands.push_back(srcParam[4].swizzle(0)); + op.operands.back().name = "value.y"; + } + } + + // phase 0's destination is the first destination + if(phase == 0) + dstParam[0] = op.operands[0]; + + break; + } + } + + // if the operation wasn't set we're on an intermediate phase. operands were saved, + // wait until we have the full operation + if(op.operation != NUM_REAL_OPCODES) + { + op.offset = curOp.offset; + op.str = ToStr(op.operation); + + if(op.operation == OPCODE_AMD_BARYCOORD) + { + switch(AMDInstruction::BaryInterp.Get(instruction)) + { + case AMDInstruction::LinearCenter: op.str += "_linear_center"; break; + case AMDInstruction::LinearCentroid: op.str += "_linear_centroid"; break; + case AMDInstruction::LinearSample: op.str += "_linear_sample"; break; + case AMDInstruction::PerspCenter: op.str += "_persp_center"; break; + case AMDInstruction::PerspCentroid: op.str += "_persp_centroid"; break; + case AMDInstruction::PerspSample: op.str += "_persp_sample"; break; + case AMDInstruction::PerspPullModel: op.str += "_persp_pullmodel"; break; + default: op.str += "_unknown"; break; + } + } + else if(op.operation == OPCODE_AMD_SWIZZLE) + { + switch(AMDInstruction::SwizzleOp.Get(instruction)) + { + case AMDInstruction::SwapX1: op.str += "_swap1"; break; + case AMDInstruction::SwapX2: op.str += "_swap2"; break; + case AMDInstruction::SwapX4: op.str += "_swap4"; break; + case AMDInstruction::SwapX8: op.str += "_swap8"; break; + case AMDInstruction::SwapX16: op.str += "_swap16"; break; + case AMDInstruction::ReverseX4: op.str += "_reverse4"; break; + case AMDInstruction::ReverseX8: op.str += "_reverse8"; break; + case AMDInstruction::ReverseX16: op.str += "_reverse16:"; break; + case AMDInstruction::ReverseX32: op.str += "_reverse32:"; break; + case AMDInstruction::BCastX2: op.str += "_bcast2"; break; + case AMDInstruction::BCastX4: op.str += "_bcast4"; break; + case AMDInstruction::BCastX8: op.str += "_bcast8"; break; + case AMDInstruction::BCastX16: op.str += "_bcast16"; break; + case AMDInstruction::BCastX32: op.str += "_bcast32"; break; + } + } + else if(op.operation == OPCODE_AMD_WAVE_REDUCE || op.operation == OPCODE_AMD_WAVE_SCAN) + { + switch((VendorWaveOp)op.preciseValues) + { + default: break; + case WAVE_OP_ADD_FLOAT: op.str += "_addf"; break; + case WAVE_OP_ADD_SINT: op.str += "_addi"; break; + case WAVE_OP_ADD_UINT: op.str += "_addu"; break; + case WAVE_OP_MUL_FLOAT: op.str += "_mulf"; break; + case WAVE_OP_MUL_SINT: op.str += "_muli"; break; + case WAVE_OP_MUL_UINT: op.str += "_mulu"; break; + case WAVE_OP_MIN_FLOAT: op.str += "_minf"; break; + case WAVE_OP_MIN_SINT: op.str += "_mini"; break; + case WAVE_OP_MIN_UINT: op.str += "_minu"; break; + case WAVE_OP_MAX_FLOAT: op.str += "_maxf"; break; + case WAVE_OP_MAX_SINT: op.str += "_maxi"; break; + case WAVE_OP_MAX_UINT: op.str += "_maxu"; break; + case WAVE_OP_AND: op.str += "_and"; break; + case WAVE_OP_OR: op.str += "_or"; break; + case WAVE_OP_XOR: op.str += "_xor"; break; + } + + if(op.operation == OPCODE_AMD_WAVE_SCAN) + { + if(AMDInstruction::WaveOpFlags.Get(instruction) & 0x1) + op.str += "_incl"; + if(AMDInstruction::WaveOpFlags.Get(instruction) & 0x2) + op.str += "_excl"; + } + } + + for(size_t a = 0; a < op.operands.size(); a++) + { + if(a == 0) + op.str += " "; + else + op.str += ", "; + op.str += op.operands[a].toString(m_Reflection, flags | ToString::ShowSwizzle); + } + + m_Instructions.insert(i + 1, op); + } + } + else + { + RDCERR("Expected magic value of 5 in encoded AMD instruction %x", instruction); + state = InstructionState::Broken; + break; + } + + if(state == InstructionState::Broken) + continue; + + // remove this operation, but keep the old operation so we can undo this if things go + // wrong + curOp.syncFlags = curOp.operation; + curOp.operation = OPCODE_VENDOR_REMOVED; + } + else if(curOp.operation == OPCODE_IMM_ATOMIC_ALLOC && + curOp.operands[1].indices[0].index == magicID) + { + // NV IncrementCounter() + switch(state) + { + case InstructionState::Broken: + case InstructionState::AMDUAVAtomic: + break; + // in Nothing an increment marks the beginning of an instruction of some type + case InstructionState::Nothing: + { + state = InstructionState::InstructionHeader; + break; + } + case InstructionState::InstructionHeader: + { + // the transition from instruction to any other state should happen via a markUAV or + // opcode write, not with a counter increment + RDCERR( + "Expected either markUAV or opcode write before counter increment in unknown " + "instruction header!"); + state = InstructionState::Broken; + break; + } + case InstructionState::InstructionBody: + { + outputsNeeded--; + if(outputsNeeded <= 0) + { + // once we've emitted all outputs, move to Nothing state + state = InstructionState::Nothing; + + // and emit vendor instruction + Operation op; + + switch(nvopcode) + { + case NvShaderOpcode::Shuffle: + case NvShaderOpcode::ShuffleUp: + case NvShaderOpcode::ShuffleDown: + case NvShaderOpcode::ShuffleXor: + { + if(nvopcode == NvShaderOpcode::Shuffle) + op.operation = OPCODE_NV_SHUFFLE; + else if(nvopcode == NvShaderOpcode::ShuffleUp) + op.operation = OPCODE_NV_SHUFFLE_UP; + else if(nvopcode == NvShaderOpcode::ShuffleDown) + op.operation = OPCODE_NV_SHUFFLE_DOWN; + else if(nvopcode == NvShaderOpcode::ShuffleXor) + op.operation = OPCODE_NV_SHUFFLE_XOR; + + op.operands.resize(4); + op.operands[0] = curOp.operands[0]; + + op.operands[1].name = "value"; + op.operands[1] = srcParam[0].swizzle(0); + if(nvopcode == NvShaderOpcode::Shuffle) + op.operands[2].name = "srcLane"; + else if(nvopcode == NvShaderOpcode::ShuffleXor) + op.operands[2].name = "laneMask"; + else + op.operands[2].name = "delta"; + op.operands[2] = srcParam[0].swizzle(1); + op.operands[3].name = "width"; + op.operands[3] = srcParam[0].swizzle(3); + break; + } + case NvShaderOpcode::VoteAll: + case NvShaderOpcode::VoteAny: + case NvShaderOpcode::VoteBallot: + { + if(nvopcode == NvShaderOpcode::VoteAll) + op.operation = OPCODE_NV_VOTE_ALL; + else if(nvopcode == NvShaderOpcode::VoteAny) + op.operation = OPCODE_NV_VOTE_ANY; + else if(nvopcode == NvShaderOpcode::VoteBallot) + op.operation = OPCODE_NV_VOTE_BALLOT; + + op.operands.resize(2); + op.operands[0] = curOp.operands[0]; + op.operands[1] = srcParam[0]; + op.operands[1].name = "predicate"; + break; + } + case NvShaderOpcode::GetLaneId: + { + op.operation = OPCODE_NV_GET_LANEID; + op.operands = {curOp.operands[0]}; + break; + } + case NvShaderOpcode::GetSpecial: + { + if(srcParam[0].type != TYPE_IMMEDIATE32) + { + RDCERR("Expected literal value for special subopcode"); + state = InstructionState::Broken; + break; + } + + NvShaderSpecial special = (NvShaderSpecial)srcParam[0].values[0]; + + if(special == NvShaderSpecial::ThreadLtMask) + { + op.operation = OPCODE_NV_GET_THREADLTMASK; + } + else if(special == NvShaderSpecial::FootprintSingleLOD) + { + op.operation = OPCODE_NV_GET_FOOTPRINT_SINGLELOD; + } + else + { + RDCERR("Unexpected special subopcode"); + state = InstructionState::Broken; + break; + } + op.operands = {curOp.operands[0]}; + break; + } + case NvShaderOpcode::MatchAny: + { + op.operation = OPCODE_NV_MATCH_ANY; + op.operands.resize(2); + op.operands[0] = curOp.operands[0]; + op.operands[1] = srcParam[0]; + // we don't need src1, it only indicates the number of components in the value, + // which we already have + break; + } + case NvShaderOpcode::GetShadingRate: + { + op.operation = OPCODE_NV_GET_SHADING_RATE; + + if(dstParam[0].indices == curOp.operands[0].indices && + dstParam[1].indices == curOp.operands[0].indices) + { + op.operands.push_back(curOp.operands[0]); + op.operands.back().name = "result"; + + // fixup the comps according to the shuffle + op.operands.back().setComps( + // x + dstParam[1].comps[0], + // y + dstParam[0].comps[0], + // z + curOp.operands[0].comps[0], 0xff); + } + else + { + // these are in reverse order because we read them as numOutputs was decrementing + op.operands.push_back(dstParam[1]); + op.operands.back().name = "result.x"; + op.operands.push_back(dstParam[0]); + op.operands.back().name = "result.y"; + // z is last + op.operands.push_back(curOp.operands[0]); + op.operands.back().name = "result.z"; + } + + break; + } + // all footprint ops are very similar + case NvShaderOpcode::Footprint: + case NvShaderOpcode::FootprintBias: + case NvShaderOpcode::FootprintLevel: + case NvShaderOpcode::FootprintGrad: + { + if(nvopcode == NvShaderOpcode::Footprint) + op.operation = OPCODE_NV_FOOTPRINT; + else if(nvopcode == NvShaderOpcode::FootprintBias) + op.operation = OPCODE_NV_FOOTPRINT_BIAS; + else if(nvopcode == NvShaderOpcode::FootprintLevel) + op.operation = OPCODE_NV_FOOTPRINT_LEVEL; + else if(nvopcode == NvShaderOpcode::FootprintGrad) + op.operation = OPCODE_NV_FOOTPRINT_GRAD; + + // four output values, could be assigned to different registers depending on packing + // because they come back as scalars from increment counter. In general we have to + // have them separately, but see if they all neatly line up into one output first. + + if(dstParam[0].indices == curOp.operands[0].indices && + dstParam[1].indices == curOp.operands[0].indices && + dstParam[2].indices == curOp.operands[0].indices) + { + op.operands.push_back(curOp.operands[0]); + op.operands.back().name = "result"; + + // fixup the comps according to the shuffle + op.operands.back().setComps( + // x + dstParam[2].comps[0], + // y + dstParam[1].comps[0], + // z + dstParam[0].comps[0], + // w + curOp.operands[0].comps[0]); + } + else + { + // these are in reverse order because we read them as numOutputs was decrementing + op.operands.push_back(dstParam[2]); + op.operands.back().name = "result.x"; + op.operands.push_back(dstParam[1]); + op.operands.back().name = "result.y"; + op.operands.push_back(dstParam[0]); + op.operands.back().name = "result.z"; + // w is last + op.operands.push_back(curOp.operands[0]); + op.operands.back().name = "result.w"; + } + + // peel out the source parameters + op.operands.push_back(srcParam[3].swizzle(0)); + op.operands.back().name = "texSpace"; + op.operands.push_back(srcParam[0].swizzle(0)); + op.operands.back().name = "texIndex"; + op.operands.push_back(srcParam[3].swizzle(1)); + op.operands.back().name = "smpSpace"; + op.operands.push_back(srcParam[0].swizzle(1)); + op.operands.back().name = "smpIndex"; + op.operands.push_back(srcParam[3].swizzle(2)); + op.operands.back().name = "texType"; + op.operands.push_back(srcParam[1]); + op.operands.back().comps[3] = 0xff; // location is a float3 + op.operands.back().values[3] = 0; + op.operands.back().name = "location"; + op.operands.push_back(srcParam[3].swizzle(3)); + op.operands.back().name = "coarse"; + op.operands.push_back(srcParam[1].swizzle(3)); + op.operands.back().name = "gran"; + + if(nvopcode == NvShaderOpcode::FootprintBias) + { + op.operands.push_back(srcParam[2].swizzle(0)); + op.operands.back().name = "bias"; + } + else if(nvopcode == NvShaderOpcode::FootprintLevel) + { + op.operands.push_back(srcParam[2].swizzle(0)); + op.operands.back().name = "lodLevel"; + } + else if(nvopcode == NvShaderOpcode::FootprintGrad) + { + op.operands.push_back(srcParam[2]); + op.operands.back().name = "ddx"; + op.operands.push_back(srcParam[5]); + op.operands.back().name = "ddy"; + } + + op.operands.push_back(srcParam[4]); + op.operands.back().name = "offset"; + + break; + } + case NvShaderOpcode::ShuffleGeneric: + { + op.operation = OPCODE_NV_SHUFFLE_GENERIC; + op.operands.resize(5); + // first output is the actual result + op.operands[0] = curOp.operands[0]; + // second output is the laneValid we stored previously + op.operands[1] = dstParam[0]; + op.operands[1].name = "out laneValid"; + + // we expect the params are packed into srcParam[0] + + op.operands[2] = srcParam[0].swizzle(0); + op.operands[2].name = "value"; + op.operands[3] = srcParam[0].swizzle(1); + op.operands[3].name = "srcLane"; + op.operands[4] = srcParam[0].swizzle(2); + op.operands[4].name = "width"; + break; + } + case NvShaderOpcode::VPRSEvalAttribAtSample: + case NvShaderOpcode::VPRSEvalAttribSnapped: + { + if(nvopcode == NvShaderOpcode::VPRSEvalAttribAtSample) + op.operation = OPCODE_NV_VPRS_EVAL_ATTRIB_SAMPLE; + else if(nvopcode == NvShaderOpcode::VPRSEvalAttribSnapped) + op.operation = OPCODE_NV_VPRS_EVAL_ATTRIB_SNAPPED; + + // up to four output values, could be assigned to different registers depending on + // packing because they come back as scalars from increment counter. In general we + // have to have them separately, but see if they all neatly line up into one output + // first. + + bool allSameReg = true; + + for(int o = 0; o < numOutputs - 1; o++) + { + if(!(dstParam[o].indices == curOp.operands[0].indices)) + { + allSameReg = false; + break; + } + } + + if(allSameReg) + { + op.operands.push_back(curOp.operands[0]); + op.operands.back().name = "result"; + + for(int o = 0; o < 4; o++) + { + if(o >= numOutputs) + op.operands.back().comps[o] = 0xff; + else if(o + 1 == numOutputs) + op.operands.back().comps[o] = curOp.operands[0].comps[0]; + else + op.operands.back().comps[o] = dstParam[numOutputs - 2 - o].comps[0]; + } + } + else + { + const char swz[] = "xyzw"; + for(int o = 0; o < numOutputs - 1; o++) + { + // these are in reverse order because we read them as numOutputs was + // decrementing + op.operands.push_back(dstParam[numOutputs - 2 - o]); + op.operands.back().name = "result."; + op.operands.back().name += swz[o]; + } + op.operands.push_back(curOp.operands[0]); + op.operands.back().name = "result."; + op.operands.back().name += swz[numOutputs - 1]; + } + + op.operands.push_back(srcParam[0]); + op.operands.back().name = "attrib"; + + if(nvopcode == NvShaderOpcode::VPRSEvalAttribAtSample) + { + op.operands.push_back(srcParam[1]); + op.operands.back().name = "sampleIndex"; + op.operands.push_back(srcParam[2]); + op.operands.back().name = "pixelOffset"; + } + else if(nvopcode == NvShaderOpcode::VPRSEvalAttribSnapped) + { + op.operands.push_back(srcParam[1]); + op.operands.back().name = "offset"; + } + + break; + } + default: + RDCERR("Unexpected non-UAV opcode %d.", nvopcode); + state = InstructionState::Broken; + break; + } + + if(state == InstructionState::Broken) + break; + + op.offset = curOp.offset; + op.str = ToStr(op.operation); + + for(size_t a = 0; a < op.operands.size(); a++) + { + if(a == 0) + op.str += " "; + else + op.str += ", "; + op.str += op.operands[a].toString(m_Reflection, flags | ToString::ShowSwizzle); + } + + m_Instructions.insert(i + 1, op); + } + else + { + dstParam[outputsNeeded - 1] = curOp.operands[0]; + } + break; + } + case InstructionState::UAVInstructionHeader1: + { + RDCERR("Expected other UAV write before counter increment in UAV instruction header!"); + state = InstructionState::Broken; + break; + } + case InstructionState::UAVInstructionHeader2: + { + // now that we've gotten the UAV, we can go to the body + state = InstructionState::UAVInstructionBody; + break; + } + case InstructionState::UAVInstructionBody: + { + RDCERR( + "Unexpected counter increment while processing UAV instruction body. Expected " + "opcode!"); + state = InstructionState::Broken; + break; + } + } + + if(state == InstructionState::Broken) + continue; + + // remove this operation, but keep the old operation so we can undo this if things go + // wrong + curOp.syncFlags = curOp.operation; + curOp.operation = OPCODE_VENDOR_REMOVED; + } + else if(curOp.operation == OPCODE_STORE_STRUCTURED && + curOp.operands[0].indices[0].index == magicID) + { + if(curOp.operands[2].type != TYPE_IMMEDIATE32) + { + RDCERR("Expected literal value for UAV write offset"); + state = InstructionState::Broken; + break; + } + + // NV magic UAV write + NvUAVParam param = (NvUAVParam)curOp.operands[2].values[0]; + + switch(param) + { + case NvUAVParam::opcode: + { + if(curOp.operands[3].type != TYPE_IMMEDIATE32) + { + RDCERR( + "Expected literal value being written as opcode. Was the shader compiled with " + "optimisations disabled?"); + state = InstructionState::Broken; + break; + } + + nvopcode = (NvShaderOpcode)curOp.operands[3].values[0]; + + // if this is NV_EXTN_OP_FP16_ATOMIC we should have come here in UAVInstructionBody. + // That we're here now means this is the continuation of an earlier instruction. + if(state == InstructionState::InstructionHeader && nvopcode == NvShaderOpcode::FP16Atomic) + state = InstructionState::UAVInstructionBody; + + // if we're in instruction, this is the simple case so move to the output + if(state == InstructionState::InstructionHeader) + { + // if we haven't gotten a number of outputs at all, set it to 1 + if(outputsNeeded <= 0) + numOutputs = outputsNeeded = 1; + state = InstructionState::InstructionBody; + } + else if(state == InstructionState::UAVInstructionBody) + { + // emit the instruction now, writing to the index register (which we know is + // 'unused'). There might be nothing to read the result value. We'll look out for + // loads and post-patch it. + // once we've emitted all outputs, move to Nothing state + state = InstructionState::Nothing; + + // and emit vendor instruction + Operation op; + // write to the index register at first. If there's a subsequent read of dst we'll patch + // this instruction with the destination for that. + op.operands.push_back(curOp.operands[1]); + // also include the UAV we noted elsewhere + op.operands.push_back(uavParam); + + NvShaderAtomic atomicop = NvShaderAtomic::Unknown; + + switch(nvopcode) + { + case NvShaderOpcode::FP16Atomic: + { + op.operation = OPCODE_NV_FP16_ATOMIC; + + if(srcParam[2].type != TYPE_IMMEDIATE32) + { + RDCERR( + "Expected literal value as atomic opcode. Was the shader compiled with " + "optimisations disabled?"); + state = InstructionState::Broken; + break; + } + + atomicop = (NvShaderAtomic)srcParam[2].values[0]; + + op.operands.push_back(srcParam[0]); + op.operands.back().name = "address"; + op.operands.push_back(srcParam[1]); + op.operands.back().name = "value"; + + break; + } + case NvShaderOpcode::FP32Atomic: + { + op.operation = OPCODE_NV_FP32_ATOMIC; + + if(srcParam[2].type != TYPE_IMMEDIATE32) + { + RDCERR( + "Expected literal value as atomic opcode. Was the shader compiled with " + "optimisations disabled?"); + state = InstructionState::Broken; + break; + } + + atomicop = (NvShaderAtomic)srcParam[2].values[0]; + + op.operands.push_back(srcParam[0].swizzle(0)); + op.operands.back().name = "byteAddress"; + op.operands.push_back(srcParam[1].swizzle(0)); + op.operands.back().name = "value"; + + break; + } + case NvShaderOpcode::U64Atomic: + { + op.operation = OPCODE_NV_U64_ATOMIC; + + if(srcParam[2].type != TYPE_IMMEDIATE32) + { + RDCERR( + "Expected literal value as atomic opcode. Was the shader compiled with " + "optimisations disabled?"); + state = InstructionState::Broken; + break; + } + + atomicop = (NvShaderAtomic)srcParam[2].values[0]; + + op.operands.push_back(srcParam[0]); + op.operands.back().numComponents = NUMCOMPS_1; + op.operands.back().setComps(srcParam[0].comps[0], 0xff, 0xff, 0xff); + op.operands.back().name = "byteAddress"; + + if(atomicop == NvShaderAtomic::CompareAndSwap) + { + op.operands.push_back(srcParam[1]); + op.operands.back().numComponents = NUMCOMPS_4; + op.operands.back().setComps(srcParam[1].comps[0], srcParam[1].comps[1], 0xff, 0xff); + op.operands.back().values[1] = srcParam[1].values[1]; + op.operands.back().name = "compareValue"; + op.operands.push_back(srcParam[1]); + op.operands.back().numComponents = NUMCOMPS_4; + op.operands.back().setComps(srcParam[1].comps[2], srcParam[1].comps[3], 0xff, 0xff); + op.operands.back().values[1] = srcParam[1].values[3]; + op.operands.back().name = "value"; + } + else + { + op.operands.push_back(srcParam[1]); + op.operands.back().numComponents = NUMCOMPS_4; + op.operands.back().setComps(srcParam[1].comps[0], srcParam[1].comps[1], 0xff, 0xff); + op.operands.back().values[1] = srcParam[1].values[1]; + op.operands.back().name = "value"; + } + + break; + } + default: + RDCERR("Unexpected UAV opcode %d.", nvopcode); + state = InstructionState::Broken; + break; + } + + if(state == InstructionState::Broken) + break; + + if(atomicop == NvShaderAtomic::Unknown) + { + RDCERR("Couldn't determine atomic op"); + state = InstructionState::Broken; + break; + } + + op.offset = curOp.offset; + op.preciseValues = (uint32_t)atomicop; + op.str = ToStr(op.operation); + + switch(atomicop) + { + case NvShaderAtomic::Unknown: break; + case NvShaderAtomic::And: + op.str += "_and"; + op.preciseValues = ATOMIC_OP_AND; + break; + case NvShaderAtomic::Or: + op.str += "_or"; + op.preciseValues = ATOMIC_OP_OR; + break; + case NvShaderAtomic::Xor: + op.str += "_xor"; + op.preciseValues = ATOMIC_OP_XOR; + break; + case NvShaderAtomic::Add: + op.str += "_add"; + op.preciseValues = ATOMIC_OP_ADD; + break; + case NvShaderAtomic::Max: + op.str += "_max"; + op.preciseValues = ATOMIC_OP_MAX; + break; + case NvShaderAtomic::Min: + op.str += "_min"; + op.preciseValues = ATOMIC_OP_MIN; + break; + case NvShaderAtomic::Swap: + op.str += "_swap"; + op.preciseValues = ATOMIC_OP_SWAP; + break; + case NvShaderAtomic::CompareAndSwap: + op.str += "_comp_swap"; + op.preciseValues = ATOMIC_OP_CAS; + break; + } + + for(size_t a = 0; a < op.operands.size(); a++) + { + if(a == 0) + op.str += " "; + else + op.str += ", "; + op.str += op.operands[a].toString(m_Reflection, flags | ToString::ShowSwizzle); + } + + m_Instructions.insert(i + 1, op); + + // move into nothing state + state = InstructionState::Nothing; + } + else + { + // no other state should be writing an opcode. + RDCERR("Writing opcode in unexpected state %d.", state); + state = InstructionState::Broken; + } + break; + } + case NvUAVParam::markUAV: + { + if(curOp.operands[3].type != TYPE_IMMEDIATE32 || curOp.operands[3].values[0] != 1) + { + RDCERR( + "Expected literal 1 being written to markUAV. Was the shader compiled with " + "optimisations disabled?"); + state = InstructionState::Broken; + break; + } + + if(state == InstructionState::InstructionHeader) + { + // start waiting for the user's UAV write + state = InstructionState::UAVInstructionHeader1; + } + else + { + // no other state should be writing an opcode. + RDCERR("Writing markUAV in unexpected state %d.", state); + state = InstructionState::Broken; + } + break; + } + // store the src params unconditionally, don't care about the state. + case NvUAVParam::src0: + { + srcParam[0] = curOp.operands[3]; + break; + } + case NvUAVParam::src1: + { + srcParam[1] = curOp.operands[3]; + break; + } + case NvUAVParam::src2: + { + srcParam[2] = curOp.operands[3]; + break; + } + case NvUAVParam::src3: + { + srcParam[3] = curOp.operands[3]; + break; + } + case NvUAVParam::src4: + { + srcParam[4] = curOp.operands[3]; + break; + } + case NvUAVParam::src5: + { + srcParam[5] = curOp.operands[3]; + break; + } + case NvUAVParam::dst: + { + RDCERR("Unexpected store to dst"); + state = InstructionState::Broken; + break; + } + case NvUAVParam::numOutputs: + { + if(curOp.operands[3].type != TYPE_IMMEDIATE32) + { + RDCERR( + "Expected literal value being written as numOutputs. Was the shader compiled " + "with optimisations disabled?"); + state = InstructionState::Broken; + break; + } + + if(state == InstructionState::InstructionHeader || + state == InstructionState::InstructionBody) + { + // allow writing number of outputs in either header or body (before or after + // simple + // opcode) + numOutputs = outputsNeeded = (int)curOp.operands[3].values[0]; + } + else + { + // no other state should be writing an opcode. + RDCERR("Writing numOutputs in unexpected state %d.", state); + state = InstructionState::Broken; + } + break; + } + default: + { + RDCERR("Unexpected offset %u in nvidia magic UAV write.", param); + state = InstructionState::Broken; + break; + } + } + + if(state == InstructionState::Broken) + continue; + + // remove this operation, but keep the old operation so we can undo this if things go + // wrong + curOp.syncFlags = curOp.operation; + curOp.operation = OPCODE_VENDOR_REMOVED; + } + else if(curOp.operation == OPCODE_LD_STRUCTURED && curOp.operands[3].indices[0].index == magicID) + { + // NV magic UAV load. This should only be of dst and only in the Nothing state after + // we've + // emitted a UAV instruction. + if(state == InstructionState::Nothing) + { + if(curOp.operands[2].type == TYPE_IMMEDIATE32) + { + // NV magic UAV read + NvUAVParam param = (NvUAVParam)curOp.operands[2].values[0]; + + if(param == NvUAVParam::dst) + { + // search backwards for the last vendor operation. That's the one we're reading + // from + for(size_t j = i; j > 0; j--) + { + if(m_Instructions[j].operation >= OPCODE_VENDOR_FIRST) + { + // re-emit the instruction writing to the actual output now + Operation op = m_Instructions[j]; + op.offset = curOp.offset; + op.operands[0] = curOp.operands[0]; + op.str = ToStr(op.operation); + + switch((VendorAtomicOp)op.preciseValues) + { + case ATOMIC_OP_NONE: break; + case ATOMIC_OP_AND: op.str += "_and"; break; + case ATOMIC_OP_OR: op.str += "_or"; break; + case ATOMIC_OP_XOR: op.str += "_xor"; break; + case ATOMIC_OP_ADD: op.str += "_add"; break; + case ATOMIC_OP_MAX: op.str += "_max"; break; + case ATOMIC_OP_MIN: op.str += "_min"; break; + case ATOMIC_OP_SWAP: op.str += "_swap"; break; + case ATOMIC_OP_CAS: op.str += "_comp_swap"; break; + } + + for(size_t a = 0; a < op.operands.size(); a++) + { + if(a == 0) + op.str += " "; + else + op.str += ", "; + op.str += op.operands[a].toString(m_Reflection, flags | ToString::ShowSwizzle); + } + + m_Instructions.insert(i + 1, op); + + // remove the old one, we've replaced it + m_Instructions[j].operation = OPCODE_VENDOR_REMOVED; + // if we break and try to revert this one, keep it removed + m_Instructions[j].syncFlags = OPCODE_VENDOR_REMOVED; + // also remove the current one! but back up the original in case something + // goes + // wrong + curOp.syncFlags = curOp.operation; + curOp.operation = OPCODE_VENDOR_REMOVED; + break; + } + } + } + else + { + RDCERR("Unexpected read of UAV at offset %d instead of dst (%d)", param, NvUAVParam::dst); + state = InstructionState::Broken; + } + } + else + { + RDCERR("Expected literal value for UAV read offset"); + state = InstructionState::Broken; + } + } + else + { + RDCERR("Unexpected UAV read in state %d.", state); + state = InstructionState::Broken; + } + } + else if(state == InstructionState::UAVInstructionHeader1) + { + // while we're here the next UAV write is snooped + if(curOp.operation == OPCODE_STORE_RAW || curOp.operation == OPCODE_STORE_UAV_TYPED) + { + uavParam = curOp.operands[0]; + state = InstructionState::UAVInstructionHeader2; + + // remove this operation, but keep the old operation so we can undo this if things go + // wrong + curOp.syncFlags = curOp.operation; + curOp.operation = OPCODE_VENDOR_REMOVED; + } + } + else if(state == InstructionState::AMDUAVAtomic) + { + // similarly for AMD we store the UAV referenced, but we don't change state - that happens + // when we see the appropriate phase instruction. + if(curOp.operation == OPCODE_STORE_RAW || curOp.operation == OPCODE_STORE_UAV_TYPED) + { + uavParam = curOp.operands[0]; + state = InstructionState::UAVInstructionHeader2; + + // remove this operation, but keep the old operation so we can undo this if things go + // wrong + curOp.syncFlags = curOp.operation; + curOp.operation = OPCODE_VENDOR_REMOVED; + } + } + + // any other operation we completely ignore + } + + if(state == InstructionState::Broken) + { + // if we broke, restore the operations and remove any added vendor operations + for(size_t i = 0; i < m_Instructions.size(); i++) + { + if(m_Instructions[i].operation == OPCODE_VENDOR_REMOVED) + m_Instructions[i].operation = (OpcodeType)m_Instructions[i].syncFlags; + else if(m_Instructions[i].operation >= OPCODE_VENDOR_FIRST) + m_Instructions[i].operation = OPCODE_VENDOR_REMOVED; + } + } + + // erase any OPCODE_VENDOR_REMOVED instructions now + for(int32_t i = m_Instructions.count() - 1; i >= 0; i--) + { + if(m_Instructions[i].operation == OPCODE_VENDOR_REMOVED) + m_Instructions.erase(i); + } } void Program::MakeDisassemblyString() @@ -1395,6 +3065,9 @@ rdcstr Operand::toString(const DXBC::Reflection *reflection, ToString flags) con if(decl && !regstr.empty()) str += StringFormat::Fmt(" (%s)", regstr.c_str()); + if(!name.empty()) + str = name + "=" + str; + return str; } @@ -1410,7 +3083,7 @@ bool Program::ExtractDecl(uint32_t *&tokenStream, Declaration &retDecl, bool fri OpcodeType op = Opcode::Type.Get(OpcodeToken0); - RDCASSERT(op < NUM_OPCODES); + RDCASSERT(op < NUM_REAL_OPCODES); if(!IsDeclaration(op)) return false; @@ -2246,7 +3919,7 @@ bool Program::ExtractOperation(uint32_t *&tokenStream, Operation &retOp, bool fr OpcodeType op = Opcode::Type.Get(OpcodeToken0); - RDCASSERT(op < NUM_OPCODES); + RDCASSERT(op < NUM_REAL_OPCODES); if(IsDeclaration(op) && op != OPCODE_CUSTOMDATA) return false; @@ -2817,6 +4490,54 @@ rdcstr DoStringise(const DXBCBytecode::OpcodeType &el) STRINGISE_ENUM_CLASS_NAMED(OPCODE_SAMPLE_C_CLAMP_FEEDBACK, "sample_c_status") STRINGISE_ENUM_CLASS_NAMED(OPCODE_CHECK_ACCESS_FULLY_MAPPED, "check_access_fully_mapped") + + STRINGISE_ENUM_CLASS_NAMED(OPCODE_AMD_READFIRSTLANE, "amd_readfirstlane") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_AMD_READLANE, "amd_readlane") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_AMD_LANEID, "amd_laneid") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_AMD_SWIZZLE, "amd_swizzle") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_AMD_BALLOT, "amd_ballot") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_AMD_MBCNT, "amd_mbcnt") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_AMD_MIN3U, "amd_min3u") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_AMD_MIN3F, "amd_min3f") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_AMD_MED3U, "amd_med3u") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_AMD_MED3F, "amd_med3f") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_AMD_MAX3U, "amd_max3u") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_AMD_MAX3F, "amd_max3f") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_AMD_BARYCOORD, "amd_barycoord") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_AMD_VTXPARAM, "amd_vtxparam") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_AMD_GET_VIEWPORTINDEX, "amd_get_viewportindex") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_AMD_GET_RTARRAYSLICE, "amd_get_rtarrayslice") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_AMD_WAVE_REDUCE, "amd_wave_reduce") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_AMD_WAVE_SCAN, "amd_wave_scan") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_AMD_LOADDWATADDR, "amd_load_dw_at_addr") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_AMD_GET_DRAWINDEX, "amd_get_drawindex") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_AMD_U64_ATOMIC, "amd_u64_atomic") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_AMD_GET_WAVESIZE, "amd_get_wavesize") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_AMD_GET_BASEINSTANCE, "amd_get_baseinstance") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_AMD_GET_BASEVERTEX, "amd_get_basevertex") + + STRINGISE_ENUM_CLASS_NAMED(OPCODE_NV_SHUFFLE, "nv_shuffle") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_NV_SHUFFLE_UP, "nv_shuffle_up") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_NV_SHUFFLE_DOWN, "nv_shuffle_down") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_NV_SHUFFLE_XOR, "nv_shuffle_xor") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_NV_VOTE_ALL, "nv_vote_all") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_NV_VOTE_ANY, "nv_vote_any") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_NV_VOTE_BALLOT, "nv_vote_ballot") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_NV_GET_LANEID, "nv_get_laneid") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_NV_FP16_ATOMIC, "nv_fp16_atomic") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_NV_FP32_ATOMIC, "nv_fp32_atomic") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_NV_GET_THREADLTMASK, "nv_get_threadltmask") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_NV_GET_FOOTPRINT_SINGLELOD, "nv_get_footprint_singlelod") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_NV_U64_ATOMIC, "nv_u64_atomic") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_NV_MATCH_ANY, "nv_match_any") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_NV_FOOTPRINT, "nv_footprint") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_NV_FOOTPRINT_BIAS, "nv_footprint_bias") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_NV_GET_SHADING_RATE, "nv_get_shading_rate") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_NV_FOOTPRINT_LEVEL, "nv_footprint_level") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_NV_FOOTPRINT_GRAD, "nv_footprint_grad") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_NV_SHUFFLE_GENERIC, "nv_shuffle_generic") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_NV_VPRS_EVAL_ATTRIB_SAMPLE, "nv_vprs_eval_attrib_sample") + STRINGISE_ENUM_CLASS_NAMED(OPCODE_NV_VPRS_EVAL_ATTRIB_SNAPPED, "nv_vprs_eval_attrib_snapped") } END_ENUM_STRINGISE(); } diff --git a/renderdoc/driver/shaders/dxbc/dxbc_reflect.cpp b/renderdoc/driver/shaders/dxbc/dxbc_reflect.cpp index 6c3cddb6e..7a5192bf1 100644 --- a/renderdoc/driver/shaders/dxbc/dxbc_reflect.cpp +++ b/renderdoc/driver/shaders/dxbc/dxbc_reflect.cpp @@ -25,6 +25,7 @@ #include "dxbc_reflect.h" #include "common/formatting.h" #include "core/core.h" +#include "dxbc_bytecode.h" #include "dxbc_container.h" static ShaderConstant MakeConstantBufferVariable(const DXBC::CBufferVariable &var); @@ -276,7 +277,9 @@ void MakeShaderReflection(DXBC::DXBCContainer *dxbc, ShaderReflection *refl, if(dxbc->GetDXBCByteCode()) { - refl->debugInfo.debuggable = true; + refl->debugInfo.debugStatus = dxbc->GetDXBCByteCode()->GetDebugStatus(); + + refl->debugInfo.debuggable = refl->debugInfo.debugStatus.empty(); } else {