From 29c16d17e8a96b45059bef6b45ff84a74da5e199 Mon Sep 17 00:00:00 2001 From: baldurk Date: Fri, 3 Sep 2021 13:20:41 +0100 Subject: [PATCH] Encode function instructions --- renderdoc/common/common.cpp | 22 +- renderdoc/common/common.h | 20 + .../driver/shaders/dxil/dxil_bytecode.cpp | 36 +- renderdoc/driver/shaders/dxil/dxil_bytecode.h | 2 + .../shaders/dxil/dxil_bytecode_editor.cpp | 529 +++++++++++++++++- .../driver/shaders/dxil/llvm_encoder.cpp | 76 ++- renderdoc/driver/shaders/dxil/llvm_encoder.h | 3 +- 7 files changed, 668 insertions(+), 20 deletions(-) diff --git a/renderdoc/common/common.cpp b/renderdoc/common/common.cpp index 3536c15d5..aa50cda3b 100644 --- a/renderdoc/common/common.cpp +++ b/renderdoc/common/common.cpp @@ -236,18 +236,36 @@ void FreeAlignedBuffer(byte *buf) uint32_t Log2Floor(uint32_t value) { - RDCASSERT(value > 0); + if(!value) + return ~0U; return 31 - Bits::CountLeadingZeroes(value); } #if ENABLED(RDOC_X64) uint64_t Log2Floor(uint64_t value) { - RDCASSERT(value > 0); + if(!value) + return ~0ULL; return 63 - Bits::CountLeadingZeroes(value); } #endif +uint32_t Log2Ceil(uint32_t value) +{ + if(!value) + return ~0U; + return 32 - Bits::CountLeadingZeroes(value - 1); +} + +#if ENABLED(RDOC_X64) +uint64_t Log2Ceil(uint64_t value) +{ + if(!value) + return ~0ULL; + return 64 - Bits::CountLeadingZeroes(value - 1); +} +#endif + // deliberately leak so it doesn't get destroyed before our static RenderDoc destructor needs it static rdcstr *logfile = new rdcstr; static FileIO::LogFileHandle *logfileHandle = NULL; diff --git a/renderdoc/common/common.h b/renderdoc/common/common.h index 5176a6913..70b54866d 100644 --- a/renderdoc/common/common.h +++ b/renderdoc/common/common.h @@ -293,8 +293,10 @@ byte *AllocAlignedBuffer(uint64_t size, uint64_t alignment = 64); void FreeAlignedBuffer(byte *buf); uint32_t Log2Floor(uint32_t value); +uint32_t Log2Ceil(uint32_t value); #if ENABLED(RDOC_X64) uint64_t Log2Floor(uint64_t value); +uint64_t Log2Ceil(uint64_t value); #endif // super ugly - on apple size_t is a separate type, so we need a new overload @@ -307,6 +309,24 @@ inline size_t Log2Floor(size_t value) return (size_t)Log2Floor((uint32_t)value); #endif } + +inline size_t Log2Ceil(size_t value) +{ +#if ENABLED(RDOC_X64) + return (size_t)Log2Ceil((uint64_t)value); +#else + return (size_t)Log2Ceil((uint32_t)value); +#endif +} + +inline size_t Log2(size_t value) +{ +#if ENABLED(RDOC_X64) + return (size_t)Log2((uint64_t)value); +#else + return (size_t)Log2((uint32_t)value); +#endif +} #endif ///////////////////////////////////////////////// diff --git a/renderdoc/driver/shaders/dxil/dxil_bytecode.cpp b/renderdoc/driver/shaders/dxil/dxil_bytecode.cpp index 31c6be4bf..dcce8c0cb 100644 --- a/renderdoc/driver/shaders/dxil/dxil_bytecode.cpp +++ b/renderdoc/driver/shaders/dxil/dxil_bytecode.cpp @@ -1200,10 +1200,24 @@ Program::Program(const byte *bytes, size_t length) uint64_t callingFlags = op.get(); if(callingFlags & (1ULL << 17)) + { inst.opFlags = op.get(); + RDCASSERT(inst.opFlags != InstructionFlags::NoFlags); + + callingFlags &= ~(1ULL << 17); + } + + const Type *funcCallType = NULL; if(callingFlags & (1ULL << 15)) - op.get(); // funcCallType + { + funcCallType = op.getType(); // funcCallType + + callingFlags &= ~(1ULL << 15); + } + + RDCASSERTMSG("Calling flags should only have at most two known bits set", + callingFlags == 0, callingFlags); Value v = op.getSymbol(); @@ -1216,6 +1230,11 @@ Program::Program(const byte *bytes, size_t length) inst.funcCall = v.function; inst.type = inst.funcCall->funcType->inner->inner; + if(funcCallType) + { + RDCASSERT(funcCallType == inst.funcCall->funcType->inner); + } + for(size_t i = 0; op.remaining() > 0; i++) { if(inst.funcCall->funcType->inner->members[i]->type == Type::Metadata) @@ -1355,6 +1374,8 @@ Program::Program(const byte *bytes, size_t length) // fast math flags overlap inst.opFlags = InstructionFlags(flags); } + + RDCASSERT(inst.opFlags != InstructionFlags::NoFlags); } f.instructions.push_back(inst); @@ -1366,7 +1387,11 @@ Program::Program(const byte *bytes, size_t length) inst.op = Operation::Unreachable; + inst.type = GetVoidType(); + curBlock++; + + f.instructions.push_back(inst); } else if(op.type == FunctionRecord::INST_ALLOCA) { @@ -1383,15 +1408,18 @@ Program::Program(const byte *bytes, size_t length) RDCASSERT(inst.type->type == Type::Pointer); // type of the size - ignored - (void)op.getType(); + const Type *sizeType = op.getType(); // size inst.args.push_back(op.getSymbolAbsolute()); + RDCASSERT(sizeType == inst.args.back().GetType()); + uint64_t align = op.get(); if(align & 0x20) { // argument alloca + inst.opFlags |= InstructionFlags::ArgumentAlloca; } if((align & 0x40) == 0) { @@ -1557,8 +1585,12 @@ Program::Program(const byte *bytes, size_t length) // fast math flags if(op.remaining() > 0) + { inst.opFlags = op.get(); + RDCASSERTNOTEQUAL((uint64_t)inst.opFlags, 0); + } + inst.type = GetBoolType(); // if we're comparing vectors, the return type is an equal sized bool vector diff --git a/renderdoc/driver/shaders/dxil/dxil_bytecode.h b/renderdoc/driver/shaders/dxil/dxil_bytecode.h index 5d605213e..bd6afe944 100644 --- a/renderdoc/driver/shaders/dxil/dxil_bytecode.h +++ b/renderdoc/driver/shaders/dxil/dxil_bytecode.h @@ -544,6 +544,8 @@ enum class InstructionFlags : uint32_t FailureRelease = (0x4 << 15), FailureAcquireRelease = (0x5 << 15), FailureSequentiallyConsistent = (0x6 << 15), + + ArgumentAlloca = 1 << 18, }; BITMASK_OPERATORS(InstructionFlags); diff --git a/renderdoc/driver/shaders/dxil/dxil_bytecode_editor.cpp b/renderdoc/driver/shaders/dxil/dxil_bytecode_editor.cpp index 5693a928d..829436c3e 100644 --- a/renderdoc/driver/shaders/dxil/dxil_bytecode_editor.cpp +++ b/renderdoc/driver/shaders/dxil/dxil_bytecode_editor.cpp @@ -41,13 +41,19 @@ DXIL::ProgramEditor::~ProgramEditor() DXBC::DXBCContainer::ReplaceDXILBytecode(m_OutBlob, EncodeProgram()); } +#define getAttribID(a) uint64_t(a - m_Attributes.begin()) #define getTypeID(t) uint64_t(t - m_Types.begin()) #define getMetaID(m) uint64_t(m - m_Metadata.begin()) -#define getMetaIDOrNull(m) (m ? (uint64_t(m - m_Metadata.begin()) + 1) : 0) +#define getMetaIDOrNull(m) (m ? (getMetaID(m) + 1) : 0ULL) +#define getFunctionMetaID(m) \ + uint64_t(m >= m_Metadata.begin() && m < m_Metadata.end() ? m - m_Metadata.begin() \ + : m - f.metadata.begin()) +#define getFunctionMetaIDOrNull(m) (m ? (getFunctionMetaID(m) + 1) : 0ULL) + +#define getValueID(v) uint64_t(values.indexOf(v)) bytebuf DXIL::ProgramEditor::EncodeProgram() const { -#define getValueID(v) uint64_t(values.indexOf(v)) rdcarray values = m_Values; bytebuf ret; @@ -92,12 +98,11 @@ bytebuf DXIL::ProgramEditor::EncodeProgram() const // stop once we pass constants if(m_Values[i].type != ValueType::Constant) break; - - cfg.numGlobalConsts++; } cfg.numTypes = m_Types.size(); cfg.numSections = m_Sections.size(); + cfg.numGlobalValues = m_Values.size(); writer.ConfigureSizes(cfg); @@ -353,7 +358,7 @@ bytebuf DXIL::ProgramEditor::EncodeProgram() const typeIndex, uint64_t(((g.flags & GlobalFlags::IsConst) ? 1 : 0) | 0x2 | ((uint32_t)g.type->addrSpace << 2)), g.initialiser ? getValueID(Value(g.initialiser)) + 1 : 0, linkageValue, - 32 - Bits::CountLeadingZeroes(g.align), uint64_t(g.section + 1), + Log2Floor((uint32_t)g.align) + 1, uint64_t(g.section + 1), // visibility 0U, // TLS mode @@ -510,6 +515,38 @@ bytebuf DXIL::ProgramEditor::EncodeProgram() const writer.EndBlock(); } +#define encodeRelativeValueID(v) \ + { \ + uint64_t valID = getValueID(v); \ + if(valID <= instValueID) \ + { \ + vals.push_back(instValueID - valID); \ + } \ + else \ + { \ + forwardRefs = true; \ + /* signed integer two's complement for negative */ \ + /* values referencing forward from the instruction */ \ + vals.push_back(0x100000000ULL - (valID - instValueID)); \ + vals.push_back(getTypeID(v.GetType())); \ + } \ + } + +// some cases don't encode the type even for forward refs, if it's implicit (e.g. second parameter +// in a binop). This also doesn't count as a forward ref for the case of breaking the abbrev use +#define encodeRelativeValueIDTypeless(v) \ + { \ + uint64_t valID = getValueID(v); \ + if(valID <= instValueID) \ + { \ + vals.push_back(instValueID - valID); \ + } \ + else \ + { \ + vals.push_back(0x100000000ULL - (valID - instValueID)); \ + } \ + } + for(const Function &f : m_Functions) { if(f.external) @@ -539,6 +576,488 @@ bytebuf DXIL::ProgramEditor::EncodeProgram() const writer.EndBlock(); } + // value IDs for instructions start after all the constants + uint32_t instValueID = uint32_t(m_Values.size() + f.constants.size() + f.args.size()); + + uint32_t debugLoc = ~0U; + + bool forwardRefs = false; + rdcarray vals; + + for(const Instruction &inst : f.instructions) + { + forwardRefs = false; + vals.clear(); + + switch(inst.op) + { + case Operation::NoOp: RDCERR("Unexpected no-op encoding"); continue; + case Operation::Call: + { + vals.push_back(inst.paramAttrs ? getAttribID(inst.paramAttrs) + 1 : 0); + // always emit func type + uint64_t flags = 1 << 15; + if(inst.opFlags != InstructionFlags::NoFlags) + flags |= 1 << 17; + vals.push_back(flags); + if(inst.opFlags != InstructionFlags::NoFlags) + vals.push_back((uint64_t)inst.opFlags); + vals.push_back(getTypeID(inst.funcCall->funcType->inner)); + encodeRelativeValueID(Value(inst.funcCall)); + for(size_t a = 0; a < inst.args.size(); a++) + { + if(inst.args[a].type == ValueType::Metadata) + { + vals.push_back(getFunctionMetaID(inst.args[a].meta)); + } + else + { + encodeRelativeValueIDTypeless(inst.args[a]); + } + } + writer.RecordInstruction(LLVMBC::FunctionRecord::INST_CALL, vals, forwardRefs); + break; + } + case Operation::Trunc: + case Operation::ZExt: + case Operation::SExt: + case Operation::FToU: + case Operation::FToS: + case Operation::UToF: + case Operation::SToF: + case Operation::FPTrunc: + case Operation::FPExt: + case Operation::PtrToI: + case Operation::IToPtr: + case Operation::Bitcast: + case Operation::AddrSpaceCast: + { + encodeRelativeValueID(inst.args[0]); + vals.push_back(getTypeID(inst.type)); + vals.push_back(EncodeCast(inst.op)); + + writer.RecordInstruction(LLVMBC::FunctionRecord::INST_CAST, vals, forwardRefs); + break; + } + case Operation::ExtractVal: + { + encodeRelativeValueID(inst.args[0]); + for(size_t i = 1; i < inst.args.size(); i++) + vals.push_back(inst.args[i].literal); + writer.RecordInstruction(LLVMBC::FunctionRecord::INST_EXTRACTVAL, vals, forwardRefs); + break; + } + case Operation::Ret: + { + if(!inst.args.empty()) + { + encodeRelativeValueID(inst.args[0]); + } + writer.RecordInstruction(LLVMBC::FunctionRecord::INST_RET, vals, forwardRefs); + break; + } + case Operation::FAdd: + case Operation::FSub: + case Operation::FMul: + case Operation::FDiv: + case Operation::FRem: + case Operation::Add: + case Operation::Sub: + case Operation::Mul: + case Operation::UDiv: + case Operation::SDiv: + case Operation::URem: + case Operation::SRem: + case Operation::ShiftLeft: + case Operation::LogicalShiftRight: + case Operation::ArithShiftRight: + case Operation::And: + case Operation::Or: + case Operation::Xor: + { + encodeRelativeValueID(inst.args[0]); + encodeRelativeValueIDTypeless(inst.args[1]); + + const Type *t = inst.args[0].GetType(); + + const bool isFloatOp = (t->scalarType == Type::Float); + + uint64_t opcode = 0; + switch(inst.op) + { + case Operation::FAdd: + case Operation::Add: opcode = 0; break; + case Operation::FSub: + case Operation::Sub: opcode = 1; break; + case Operation::FMul: + case Operation::Mul: opcode = 2; break; + case Operation::UDiv: opcode = 3; break; + case Operation::FDiv: + case Operation::SDiv: opcode = 4; break; + case Operation::URem: opcode = 5; break; + case Operation::FRem: + case Operation::SRem: opcode = 6; break; + case Operation::ShiftLeft: opcode = 7; break; + case Operation::LogicalShiftRight: opcode = 8; break; + case Operation::ArithShiftRight: opcode = 9; break; + case Operation::And: opcode = 10; break; + case Operation::Or: opcode = 11; break; + case Operation::Xor: opcode = 12; break; + default: break; + } + vals.push_back(opcode); + + if(inst.opFlags != InstructionFlags::NoFlags) + { + uint64_t flags = 0; + if(inst.op == Operation::Add || inst.op == Operation::Sub || + inst.op == Operation::Mul || inst.op == Operation::ShiftLeft) + { + if(inst.opFlags & InstructionFlags::NoSignedWrap) + flags |= 0x2; + if(inst.opFlags & InstructionFlags::NoUnsignedWrap) + flags |= 0x1; + vals.push_back(flags); + } + else if(inst.op == Operation::SDiv || inst.op == Operation::UDiv || + inst.op == Operation::LogicalShiftRight || inst.op == Operation::ArithShiftRight) + { + if(inst.opFlags & InstructionFlags::Exact) + flags |= 0x1; + vals.push_back(flags); + } + else if(isFloatOp) + { + // fast math flags overlap + vals.push_back(uint64_t(inst.opFlags)); + } + } + + writer.RecordInstruction(LLVMBC::FunctionRecord::INST_BINOP, vals, forwardRefs); + break; + } + case Operation::Unreachable: + { + writer.RecordInstruction(LLVMBC::FunctionRecord::INST_UNREACHABLE, {}, false); + break; + } + case Operation::Alloca: + { + vals.push_back(getTypeID(inst.type->inner)); + vals.push_back(getTypeID(inst.args[0].GetType())); + vals.push_back(getValueID(inst.args[0])); + uint64_t alignAndFlags = Log2Floor(inst.align) + 1; + // DXC always sets this bit, as the type is ap ointer + alignAndFlags |= 1U << 6; + if(inst.opFlags & InstructionFlags::ArgumentAlloca) + alignAndFlags |= 1U << 5; + vals.push_back(alignAndFlags); + writer.RecordInstruction(LLVMBC::FunctionRecord::INST_ALLOCA, vals, forwardRefs); + break; + } + case Operation::GetElementPtr: + { + vals.push_back((inst.opFlags & InstructionFlags::InBounds) ? 1U : 0U); + vals.push_back(getTypeID(inst.args[0].GetType()->inner)); + + for(size_t i = 0; i < inst.args.size(); i++) + { + encodeRelativeValueID(inst.args[i]); + } + + writer.RecordInstruction(LLVMBC::FunctionRecord::INST_GEP, vals, forwardRefs); + break; + } + case Operation::Load: + { + encodeRelativeValueID(inst.args[0]); + vals.push_back(getTypeID(inst.type)); + vals.push_back(Log2Floor(inst.align) + 1); + vals.push_back((inst.opFlags & InstructionFlags::Volatile) ? 1U : 0U); + writer.RecordInstruction(LLVMBC::FunctionRecord::INST_LOAD, vals, forwardRefs); + break; + } + case Operation::Store: + { + encodeRelativeValueID(inst.args[0]); + encodeRelativeValueID(inst.args[1]); + vals.push_back(Log2Floor(inst.align) + 1); + vals.push_back((inst.opFlags & InstructionFlags::Volatile) ? 1U : 0U); + writer.RecordInstruction(LLVMBC::FunctionRecord::INST_STORE, vals, forwardRefs); + break; + } + case Operation::FOrdFalse: + case Operation::FOrdEqual: + case Operation::FOrdGreater: + case Operation::FOrdGreaterEqual: + case Operation::FOrdLess: + case Operation::FOrdLessEqual: + case Operation::FOrdNotEqual: + case Operation::FOrd: + case Operation::FUnord: + case Operation::FUnordEqual: + case Operation::FUnordGreater: + case Operation::FUnordGreaterEqual: + case Operation::FUnordLess: + case Operation::FUnordLessEqual: + case Operation::FUnordNotEqual: + case Operation::FOrdTrue: + case Operation::IEqual: + case Operation::INotEqual: + case Operation::UGreater: + case Operation::UGreaterEqual: + case Operation::ULess: + case Operation::ULessEqual: + case Operation::SGreater: + case Operation::SGreaterEqual: + case Operation::SLess: + case Operation::SLessEqual: + { + encodeRelativeValueID(inst.args[0]); + encodeRelativeValueIDTypeless(inst.args[1]); + + uint64_t opcode = 0; + switch(inst.op) + { + case Operation::FOrdFalse: opcode = 0; break; + case Operation::FOrdEqual: opcode = 1; break; + case Operation::FOrdGreater: opcode = 2; break; + case Operation::FOrdGreaterEqual: opcode = 3; break; + case Operation::FOrdLess: opcode = 4; break; + case Operation::FOrdLessEqual: opcode = 5; break; + case Operation::FOrdNotEqual: opcode = 6; break; + case Operation::FOrd: opcode = 7; break; + case Operation::FUnord: opcode = 8; break; + case Operation::FUnordEqual: opcode = 9; break; + case Operation::FUnordGreater: opcode = 10; break; + case Operation::FUnordGreaterEqual: opcode = 11; break; + case Operation::FUnordLess: opcode = 12; break; + case Operation::FUnordLessEqual: opcode = 13; break; + case Operation::FUnordNotEqual: opcode = 14; break; + case Operation::FOrdTrue: opcode = 15; break; + + case Operation::IEqual: opcode = 32; break; + case Operation::INotEqual: opcode = 33; break; + case Operation::UGreater: opcode = 34; break; + case Operation::UGreaterEqual: opcode = 35; break; + case Operation::ULess: opcode = 36; break; + case Operation::ULessEqual: opcode = 37; break; + case Operation::SGreater: opcode = 38; break; + case Operation::SGreaterEqual: opcode = 39; break; + case Operation::SLess: opcode = 40; break; + case Operation::SLessEqual: opcode = 41; break; + + default: break; + } + + vals.push_back(opcode); + + if(inst.opFlags != InstructionFlags::NoFlags) + vals.push_back((uint64_t)inst.opFlags); + + writer.RecordInstruction(LLVMBC::FunctionRecord::INST_CMP2, vals, forwardRefs); + break; + } + case Operation::Select: + { + encodeRelativeValueID(inst.args[0]); + encodeRelativeValueIDTypeless(inst.args[1]); + encodeRelativeValueID(inst.args[2]); + writer.RecordInstruction(LLVMBC::FunctionRecord::INST_VSELECT, vals, forwardRefs); + break; + } + case Operation::ExtractElement: + { + encodeRelativeValueID(inst.args[0]); + encodeRelativeValueID(inst.args[1]); + writer.RecordInstruction(LLVMBC::FunctionRecord::INST_EXTRACTELT, vals, forwardRefs); + break; + } + case Operation::InsertElement: + { + encodeRelativeValueID(inst.args[0]); + encodeRelativeValueIDTypeless(inst.args[1]); + encodeRelativeValueID(inst.args[2]); + writer.RecordInstruction(LLVMBC::FunctionRecord::INST_INSERTELT, vals, forwardRefs); + break; + } + case Operation::ShuffleVector: + { + encodeRelativeValueID(inst.args[0]); + encodeRelativeValueIDTypeless(inst.args[1]); + encodeRelativeValueID(inst.args[2]); + writer.RecordInstruction(LLVMBC::FunctionRecord::INST_SHUFFLEVEC, vals, forwardRefs); + break; + } + case Operation::InsertValue: + { + encodeRelativeValueID(inst.args[0]); + encodeRelativeValueID(inst.args[1]); + for(size_t i = 2; i < inst.args.size(); i++) + vals.push_back(inst.args[i].literal); + writer.RecordInstruction(LLVMBC::FunctionRecord::INST_INSERTVAL, vals, forwardRefs); + break; + } + case Operation::Branch: + { + vals.push_back(uint64_t(inst.args[0].block - f.blocks.begin())); + + if(inst.args.size() > 1) + { + vals.push_back(uint64_t(inst.args[1].block - f.blocks.begin())); + encodeRelativeValueIDTypeless(inst.args[2]); + } + + writer.RecordInstruction(LLVMBC::FunctionRecord::INST_BR, vals, forwardRefs); + break; + } + case Operation::Phi: + { + vals.push_back(getTypeID(inst.type)); + + for(size_t i = 0; i < inst.args.size(); i += 2) + { + uint64_t valID = getValueID(inst.args[i]); + int64_t valRef = int64_t(instValueID) - int64_t(valID); + + vals.push_back(LLVMBC::BitWriter::svbr(valRef)); + vals.push_back(uint64_t(inst.args[i + 1].block - f.blocks.begin())); + } + + writer.RecordInstruction(LLVMBC::FunctionRecord::INST_PHI, vals, forwardRefs); + break; + } + case Operation::Switch: + { + vals.push_back(getTypeID(inst.args[0].GetType())); + encodeRelativeValueIDTypeless(inst.args[0]); + + vals.push_back(uint64_t(inst.args[1].block - f.blocks.begin())); + + for(size_t i = 2; i < inst.args.size(); i += 2) + { + vals.push_back(getValueID(inst.args[i])); + vals.push_back(uint64_t(inst.args[i + 1].block - f.blocks.begin())); + } + + writer.RecordInstruction(LLVMBC::FunctionRecord::INST_SWITCH, vals, forwardRefs); + break; + } + case Operation::Fence: + { + vals.push_back(((uint64_t)inst.opFlags & (uint64_t)InstructionFlags::SuccessOrderMask) >> + 12U); + vals.push_back((inst.opFlags & InstructionFlags::SingleThread) ? 0U : 1U); + writer.RecordInstruction(LLVMBC::FunctionRecord::INST_FENCE, vals, forwardRefs); + break; + } + case Operation::CompareExchange: + { + encodeRelativeValueID(inst.args[0]); + encodeRelativeValueID(inst.args[1]); + encodeRelativeValueIDTypeless(inst.args[2]); + vals.push_back((inst.opFlags & InstructionFlags::Volatile) ? 1U : 0U); + vals.push_back(((uint64_t)inst.opFlags & (uint64_t)InstructionFlags::SuccessOrderMask) >> + 12U); + vals.push_back((inst.opFlags & InstructionFlags::SingleThread) ? 0U : 1U); + vals.push_back(((uint64_t)inst.opFlags & (uint64_t)InstructionFlags::FailureOrderMask) >> + 15U); + vals.push_back((inst.opFlags & InstructionFlags::Weak) ? 1U : 0U); + writer.RecordInstruction(LLVMBC::FunctionRecord::INST_CMPXCHG, vals, forwardRefs); + break; + } + case Operation::LoadAtomic: + { + encodeRelativeValueID(inst.args[0]); + vals.push_back(getTypeID(inst.type)); + vals.push_back(Log2Floor(inst.align) + 1); + vals.push_back((inst.opFlags & InstructionFlags::Volatile) ? 1U : 0U); + vals.push_back(((uint64_t)inst.opFlags & (uint64_t)InstructionFlags::SuccessOrderMask) >> + 12U); + vals.push_back((inst.opFlags & InstructionFlags::SingleThread) ? 0U : 1U); + writer.RecordInstruction(LLVMBC::FunctionRecord::INST_LOADATOMIC, vals, forwardRefs); + break; + } + case Operation::StoreAtomic: + { + encodeRelativeValueID(inst.args[0]); + encodeRelativeValueID(inst.args[1]); + vals.push_back(Log2Floor(inst.align) + 1); + vals.push_back((inst.opFlags & InstructionFlags::Volatile) ? 1U : 0U); + vals.push_back(((uint64_t)inst.opFlags & (uint64_t)InstructionFlags::SuccessOrderMask) >> + 12U); + vals.push_back((inst.opFlags & InstructionFlags::SingleThread) ? 0U : 1U); + writer.RecordInstruction(LLVMBC::FunctionRecord::INST_STOREATOMIC, vals, forwardRefs); + break; + } + case Operation::AtomicExchange: + case Operation::AtomicAdd: + case Operation::AtomicSub: + case Operation::AtomicAnd: + case Operation::AtomicNand: + case Operation::AtomicOr: + case Operation::AtomicXor: + case Operation::AtomicMax: + case Operation::AtomicMin: + case Operation::AtomicUMax: + case Operation::AtomicUMin: + { + encodeRelativeValueID(inst.args[0]); + encodeRelativeValueIDTypeless(inst.args[1]); + + uint64_t opcode = 0; + switch(inst.op) + { + case Operation::AtomicExchange: opcode = 0; break; + case Operation::AtomicAdd: opcode = 1; break; + case Operation::AtomicSub: opcode = 2; break; + case Operation::AtomicAnd: opcode = 3; break; + case Operation::AtomicNand: opcode = 4; break; + case Operation::AtomicOr: opcode = 5; break; + case Operation::AtomicXor: opcode = 6; break; + case Operation::AtomicMax: opcode = 7; break; + case Operation::AtomicMin: opcode = 8; break; + case Operation::AtomicUMax: opcode = 9; break; + case Operation::AtomicUMin: opcode = 10; break; + + default: break; + } + + vals.push_back(opcode); + + vals.push_back((inst.opFlags & InstructionFlags::Volatile) ? 1U : 0U); + vals.push_back(((uint64_t)inst.opFlags & (uint64_t)InstructionFlags::SuccessOrderMask) >> + 12U); + vals.push_back((inst.opFlags & InstructionFlags::SingleThread) ? 0U : 1U); + writer.RecordInstruction(LLVMBC::FunctionRecord::INST_ATOMICRMW, vals, forwardRefs); + break; + } + } + + // instruction IDs are the values (i.e. all instructions that return non-void are a value) + if(inst.type != m_VoidType) + instValueID++; + + // no debug location? omit + if(inst.debugLoc == ~0U) + continue; + + // same as last time? emit 'again' record + if(inst.debugLoc == debugLoc) + writer.Record(LLVMBC::FunctionRecord::DEBUG_LOC_AGAIN); + + // new debug location + const DebugLocation &loc = m_DebugLocations[inst.debugLoc]; + + writer.Record(LLVMBC::FunctionRecord::DEBUG_LOC, + { + loc.line, loc.col, getFunctionMetaIDOrNull(loc.scope), + getFunctionMetaIDOrNull(loc.inlinedAt), + }); + + debugLoc = inst.debugLoc; + } + writer.EndBlock(); values.resize(values.size() - f.values.size()); diff --git a/renderdoc/driver/shaders/dxil/llvm_encoder.cpp b/renderdoc/driver/shaders/dxil/llvm_encoder.cpp index 03ba2220e..705a1251a 100644 --- a/renderdoc/driver/shaders/dxil/llvm_encoder.cpp +++ b/renderdoc/driver/shaders/dxil/llvm_encoder.cpp @@ -342,7 +342,8 @@ void BitcodeWriter::BeginBlock(KnownBlock block) abbrevSize = newAbbrevSize; blockStack.push_back({block, offs}); - curAbbrevs.swap(blockStack.back().abbrevs); + if(!blockStack.empty()) + curAbbrevs.swap(blockStack.back().abbrevs); // emit known abbrevs here that aren't in blockinfo switch(block) @@ -396,6 +397,8 @@ void BitcodeWriter::EndBlock() b.PatchLengthWord(offs, uint32_t(lengthInBytes / 4)); + curAbbrevs = blockStack.back().abbrevs; + blockStack.pop_back(); if(blockStack.empty()) { @@ -405,7 +408,6 @@ void BitcodeWriter::EndBlock() else { curBlock = blockStack.back().block; - curAbbrevs = blockStack.back().abbrevs; abbrevSize = GetBlockAbbrevSize(curBlock); } } @@ -429,7 +431,7 @@ void BitcodeWriter::WriteAbbrevDefinition(AbbrevParam *abbrev) if(param.value == MagicFixedSizeNumTypes) param.value = m_Cfg.numTypes; if(param.value == MagicFixedSizeNumConstants) - param.value = m_Cfg.numGlobalConsts; + param.value = m_Cfg.numGlobalValues; const bool lit = param.encoding == AbbrevEncoding::Literal; b.fixed(1, lit); @@ -450,18 +452,18 @@ void BitcodeWriter::ConfigureSizes(Config cfg) { m_Cfg = cfg; - m_Cfg.numTypes = 32 - Bits::CountLeadingZeroes((uint32_t)m_Cfg.numTypes); - m_Cfg.numGlobalConsts = 32 - Bits::CountLeadingZeroes((uint32_t)m_Cfg.numGlobalConsts); + m_Cfg.numTypes = Log2Ceil((uint32_t)m_Cfg.numTypes + 1); + m_Cfg.numGlobalValues = Log2Ceil((uint32_t)m_Cfg.numGlobalValues + 1); - m_Cfg.maxGlobalType = 32 - Bits::CountLeadingZeroes(m_Cfg.maxGlobalType); + m_Cfg.maxGlobalType = Log2Ceil(m_Cfg.maxGlobalType + 1); if(m_Cfg.numSections > 0) - m_Cfg.numSections = 32 - Bits::CountLeadingZeroes((uint32_t)m_Cfg.numSections); + m_Cfg.numSections = Log2Ceil((uint32_t)m_Cfg.numSections + 1); if(m_Cfg.maxAlign > 0) { - uint32_t encodedAlign = 32 - Bits::CountLeadingZeroes((uint32_t)cfg.maxAlign); - m_Cfg.maxAlign = 32 - Bits::CountLeadingZeroes(encodedAlign); + uint32_t encodedAlign = Log2Floor((uint32_t)cfg.maxAlign) + 1; + m_Cfg.maxAlign = Log2Ceil(encodedAlign + 1); } } @@ -772,6 +774,60 @@ void BitcodeWriter::RecordSymTabEntry(size_t id, const rdcstr &str, bool basicBl Abbrev(ValueSymtabAbbrevDefs[(uint32_t)abbrev], (uint32_t)record, vals); } +void BitcodeWriter::RecordInstruction(FunctionRecord record, const rdcarray &vals, + bool forwardRefs) +{ + uint32_t idx = ~0U; + + switch(record) + { + case FunctionRecord::INST_RET: + if(vals.empty()) + idx = (uint32_t)FunctionAbbrev::RetVoid; + else + idx = (uint32_t)FunctionAbbrev::RetValue; + break; + case FunctionRecord::INST_GEP: idx = (uint32_t)FunctionAbbrev::GEP; break; + case FunctionRecord::INST_UNREACHABLE: idx = (uint32_t)FunctionAbbrev::Unreachable; break; + case FunctionRecord::INST_LOAD: + if(!forwardRefs) + idx = (uint32_t)FunctionAbbrev::Load; + break; + case FunctionRecord::INST_CAST: + if(!forwardRefs) + idx = (uint32_t)FunctionAbbrev::Cast; + break; + case FunctionRecord::INST_BINOP: + if(!forwardRefs) + { + idx = (uint32_t)FunctionAbbrev::BinOp; + + // binop with no forward refs is: + // [0]: first param (no type) + // [1]: second param + // [2]: binop itself + // then if there is a 4th val, that is flags + if(vals.size() == 4) + idx = (uint32_t)FunctionAbbrev::BinOpFlags; + } + break; + default: break; + } + + // if we got a valid abbrev, use it, otherwise emit unabbrev + if(idx != ~0U) + { + // write the abbrev ID + b.fixed(abbrevSize, GetAbbrevID(idx)); + + Abbrev(curAbbrevs[idx], (uint32_t)record, vals); + } + else + { + Unabbrev((uint32_t)record, false, vals); + } +} + void BitcodeWriter::Abbrev(AbbrevParam *abbr, uint32_t record, uint64_t val) { WriteAbbrevParam(abbr[0], record); @@ -833,7 +889,7 @@ void BitcodeWriter::WriteAbbrevParam(const AbbrevParam &abbrev, uint64_t val) if(abbrev.value == MagicFixedSizeNumTypes) width = m_Cfg.numTypes; else if(abbrev.value == MagicFixedSizeNumConstants) - width = m_Cfg.numGlobalConsts; + width = m_Cfg.numGlobalValues; b.fixed((size_t)width, val); } else if(abbrev.encoding == AbbrevEncoding::VBR) diff --git a/renderdoc/driver/shaders/dxil/llvm_encoder.h b/renderdoc/driver/shaders/dxil/llvm_encoder.h index f1a932cff..476f38392 100644 --- a/renderdoc/driver/shaders/dxil/llvm_encoder.h +++ b/renderdoc/driver/shaders/dxil/llvm_encoder.h @@ -40,7 +40,7 @@ public: struct Config { size_t numTypes; - size_t numGlobalConsts; + size_t numGlobalValues; size_t numSections; uint64_t maxAlign; uint32_t maxGlobalType; @@ -88,6 +88,7 @@ public: } void RecordSymTabEntry(size_t id, const rdcstr &str, bool basicBlock = false); + void RecordInstruction(FunctionRecord record, const rdcarray &vals, bool forwardRefs); private: void WriteAbbrevDefinition(AbbrevParam *abbrev);