From a1dc494fb54d2b216ec8a7cc0bc7c0f90b7c9809 Mon Sep 17 00:00:00 2001 From: baldurk Date: Thu, 15 Aug 2024 14:42:59 +0100 Subject: [PATCH] Add decoding and encoding for RDAT * Only tested so far on round-trip behaviour, not with modifications. By default we don't make any changes yet but the DXIL program editor may in future automatically decode and encode the RDAT so it can stay in sync if resources are added. --- .../driver/shaders/dxil/dxil_metadata.cpp | 473 ++++++++++++++++++ 1 file changed, 473 insertions(+) diff --git a/renderdoc/driver/shaders/dxil/dxil_metadata.cpp b/renderdoc/driver/shaders/dxil/dxil_metadata.cpp index 21b862495..c30856e57 100644 --- a/renderdoc/driver/shaders/dxil/dxil_metadata.cpp +++ b/renderdoc/driver/shaders/dxil/dxil_metadata.cpp @@ -286,11 +286,484 @@ bool DXBCContainer::GetRuntimeData(DXIL::RDATData &rdat) const if(m_RDATOffset == 0) return false; + const byte *in = m_ShaderBlob.data() + m_RDATOffset; + + // RDAT Header + uint32_t *ver = (uint32_t *)in; + if(ver[0] != RDATData::Version1_0) + return false; + + uint32_t numParts = ver[1]; + rdcarray partOffsets; + partOffsets.resize(numParts); + for(uint32_t i = 0; i < numParts; i++) + partOffsets[i] = ver[2 + i]; + + rdcstr stringbuffer; + rdcarray indexArrays; + bytebuf rawbytes; + + // we need to do this in two passes to first find the index arrays etc which can be referenced + // before they have appeared :( + for(uint32_t partOffset : partOffsets) + { + RuntimePartHeader *part = (RuntimePartHeader *)(in + partOffset); + byte *data = (byte *)(part + 1); + + switch(part->part) + { + case RDATData::Part::StringBuffer: stringbuffer.assign((char *)data, part->size); break; + case RDATData::Part::IndexArrays: + indexArrays.assign((uint32_t *)data, part->size / sizeof(uint32_t)); + break; + case RDATData::Part::RawBytes: rawbytes.assign(data, part->size); break; + default: break; // ignore others for now + } + } + + for(uint32_t partOffset : partOffsets) + { + RuntimePartHeader *part = (RuntimePartHeader *)(in + partOffset); + byte *data = (byte *)(part + 1); + RuntimePartTableHeader *tableHeader = (RuntimePartTableHeader *)data; + + switch(part->part) + { + case RDATData::Part::StringBuffer: break; + case RDATData::Part::IndexArrays: break; + case RDATData::Part::RawBytes: break; + case RDATData::Part::ResourceTable: + { + EncodedResourceInfo *infos = (EncodedResourceInfo *)(tableHeader + 1); + + RDCASSERT(tableHeader->stride == sizeof(EncodedResourceInfo)); + + rdat.resourceInfo.reserve(tableHeader->count); + for(uint32_t i = 0; i < tableHeader->count; i++) + { + EncodedResourceInfo &info = infos[i]; + rdat.resourceInfo.push_back({ + info.nspace, + info.kind, + info.LinearID, + info.space, + info.regStart, + info.regEnd, + stringbuffer.c_str() + info.name.offset, + info.flags, + }); + } + + break; + } + case RDATData::Part::FunctionTable: + { + data = (byte *)(tableHeader + 1); + + RDCASSERT(tableHeader->stride == sizeof(EncodedFunctionInfo2) || + tableHeader->stride == sizeof(EncodedFunctionInfo)); + + rdat.functionVersion = RDATData::FunctionInfoVersion::Version1; + if(tableHeader->stride == sizeof(EncodedFunctionInfo2)) + rdat.functionVersion = RDATData::FunctionInfoVersion::Version2; + + rdat.functionInfo.reserve(tableHeader->count); + for(uint32_t i = 0; i < tableHeader->count; i++) + { + EncodedFunctionInfo &info = (EncodedFunctionInfo &)*data; + EncodedFunctionInfo2 &info2 = (EncodedFunctionInfo2 &)*data; + + RDATData::FunctionInfo out = { + stringbuffer.c_str() + info.name.offset, + stringbuffer.c_str() + info.unmangledName.offset, + {}, + {}, + info.type, + info.payloadBytes, + info.attribBytes, + DXBC::GlobalShaderFlags(uint64_t(info.featureFlags[0]) | + uint64_t(info.featureFlags[1]) << 32), + info.shaderCompatMask, + info.minShaderModel, + info.minType, + }; + + rdat.functionInfo.push_back(out); + + RDATData::FunctionInfo2 &func = rdat.functionInfo.back(); + + if(info.globalResourcesIndexArrayRef.offset != ~0U) + { + uint32_t *idxArray = indexArrays.data() + info.globalResourcesIndexArrayRef.offset; + uint32_t len = *idxArray; + idxArray++; + func.globalResources.reserve(len); + for(uint32_t j = 0; j < len; j++) + func.globalResources.push_back({rdat.resourceInfo[idxArray[j]].nspace, + rdat.resourceInfo[idxArray[j]].resourceIndex}); + } + + if(info.functionDependenciesArrayRef.offset != ~0U) + { + uint32_t *idxArray = indexArrays.data() + info.functionDependenciesArrayRef.offset; + uint32_t len = *idxArray; + idxArray++; + func.functionDependencies.reserve(len); + for(uint32_t j = 0; j < len; j++) + func.functionDependencies.push_back(stringbuffer.c_str() + idxArray[j]); + } + + if(rdat.functionVersion == RDATData::FunctionInfoVersion::Version2) + { + func.minWaveCount = info2.minWaveCount; + func.maxWaveCount = info2.maxWaveCount; + func.shaderBehaviourFlags = info2.shaderBehaviourFlags; + + // below here is a stage-specific set of data containing e.g. signature elements. + // Currently DXC does not emit RDAT except for in library targets, so this will be + // unused. It would be an index into a table elsewhere of VSInfo, PSInfo, etc. + RDCASSERT(info2.extraInfoRef.offset == ~0U); + func.extraInfoRef = ~0U; + } + + data += tableHeader->stride; + } + + break; + } + case RDATData::Part::SubobjectTable: + { + data = (byte *)(tableHeader + 1); + + EncodedSubobjectInfo *subobjects = (EncodedSubobjectInfo *)data; + + RDCASSERT(tableHeader->stride == sizeof(EncodedSubobjectInfo)); + + rdat.subobjectsInfo.reserve(tableHeader->count); + for(uint32_t i = 0; i < tableHeader->count; i++) + { + EncodedSubobjectInfo &info = subobjects[i]; + + rdat.subobjectsInfo.push_back({ + info.type, + stringbuffer.c_str() + info.name.offset, + }); + + RDATData::SubobjectInfo &sub = rdat.subobjectsInfo.back(); + + switch(info.type) + { + case RDATData::SubobjectInfo::SubobjectType::StateConfig: + { + sub.config = info.config; + break; + } + // these are only differentiated by the enum, the data is the same + case RDATData::SubobjectInfo::SubobjectType::GlobalRS: + case RDATData::SubobjectInfo::SubobjectType::LocalRS: + sub.rs.data = bytebuf(rawbytes.data() + info.rs.data.offset, info.rs.data.size); + break; + case RDATData::SubobjectInfo::SubobjectType::SubobjectToExportsAssoc: + { + sub.assoc.subobject = stringbuffer.c_str() + info.assoc.subobject.offset; + + if(info.assoc.exports.offset != ~0U) + { + uint32_t *idxArray = indexArrays.data() + info.assoc.exports.offset; + uint32_t len = *idxArray; + idxArray++; + sub.assoc.exports.reserve(len); + for(uint32_t j = 0; j < len; j++) + sub.assoc.exports.push_back(stringbuffer.c_str() + idxArray[j]); + } + + break; + } + case RDATData::SubobjectInfo::SubobjectType::RTShaderConfig: + { + sub.rtshaderconfig = info.rtshaderconfig; + break; + } + // we can treat these unions identically - in the old config case the flags will be + // ignored and should be 0 but the struct is effective padded to the largest union + // size because of the fixed stride anyway + case RDATData::SubobjectInfo::SubobjectType::RTPipeConfig: + { + RDCASSERT(info.rtpipeconfig.flags == RDATData::RTPipeFlags::None); + DELIBERATE_FALLTHROUGH(); + } + case RDATData::SubobjectInfo::SubobjectType::RTPipeConfig1: + { + sub.rtpipeconfig = info.rtpipeconfig; + break; + } + case RDATData::SubobjectInfo::SubobjectType::Hitgroup: + { + sub.hitgroup.type = info.hitgroup.type; + sub.hitgroup.anyHit = stringbuffer.c_str() + info.hitgroup.anyHit.offset; + sub.hitgroup.closestHit = stringbuffer.c_str() + info.hitgroup.closestHit.offset; + sub.hitgroup.intersection = stringbuffer.c_str() + info.hitgroup.intersection.offset; + break; + } + default: + { + RDCWARN("Unhandled subobject type %d", info.type); + break; + } + } + } + + break; + } + default: RDCWARN("Unhandled RDAT part %d, will not round-trip", part->part); + } + } + return true; } void DXBCContainer::SetRuntimeData(bytebuf &ByteCode, const DXIL::RDATData &rdat) { + using namespace DXIL; + + rdcstr stringblob; + // initialise string blob with empty string at 0 offset + stringblob.push_back('\0'); + + rdcarray indexarrays; + // due to how these are stored and deduplicated (and we have to deduplicate because DXC does so we + // don't know if it's necessary) we have to store byte buffers individually or have some kind of + // lookup which amounts to the same thing. This will get baked into rawbytes at the end + rdcarray rawbyteLookups; + bytebuf rawbytes; + + rdcarray resourceInfo; + rdcarray functionInfo; + rdcarray functionInfo2; + rdcarray subobjectsInfo; + + resourceInfo.reserve(rdat.resourceInfo.size()); + for(const RDATData::ResourceInfo &info : rdat.resourceInfo) + { + resourceInfo.push_back({ + info.nspace, + info.kind, + info.resourceIndex, + info.space, + info.regStart, + info.regEnd, + MakeStringRef(stringblob, info.name), + info.flags, + }); + } + + // LLVM processes function dependencies first here which puts them into the string buffer in a different + // order than if we just process all functions as we encode them. + // That means we need to iterate function dependencies first too, to solidify string buffer + // offsets in order to exactly match RDAT contents to what dxc produces + for(const RDATData::FunctionInfo2 &info : rdat.functionInfo) + for(const rdcstr &f : info.functionDependencies) + MakeStringRef(stringblob, f); + + if(rdat.functionVersion == RDATData::FunctionInfoVersion::Version1) + { + rdcarray functionDependenciesArray; + rdcarray globalResourcesIndexArray; + + functionInfo.reserve(rdat.functionInfo.size()); + for(const RDATData::FunctionInfo2 &info : rdat.functionInfo) + { + globalResourcesIndexArray.clear(); + functionDependenciesArray.clear(); + + globalResourcesIndexArray.reserve(info.globalResources.size()); + for(const rdcpair &res : info.globalResources) + { + int32_t idx = rdat.resourceInfo.indexOf(res); + RDCASSERT(idx >= 0); + globalResourcesIndexArray.push_back(idx); + } + + functionDependenciesArray.reserve(info.functionDependencies.size()); + for(const rdcstr &f : info.functionDependencies) + functionDependenciesArray.push_back(MakeStringRef(stringblob, f).offset); + + functionInfo.push_back({ + MakeStringRef(stringblob, info.name), + MakeStringRef(stringblob, info.unmangledName), + MakeIndexArrayRef(indexarrays, globalResourcesIndexArray, true), + MakeIndexArrayRef(indexarrays, functionDependenciesArray, true), + info.type, + info.payloadBytes, + info.attribBytes, + {uint32_t(info.featureFlags) & 0xffffffff, uint64_t(info.featureFlags) >> 32}, + info.shaderCompatMask, + info.minShaderModel, + info.minType, + }); + } + } + else if(rdat.functionVersion == RDATData::FunctionInfoVersion::Version2) + { + rdcarray functionDependenciesArray; + rdcarray globalResourcesIndexArray; + + functionInfo2.reserve(rdat.functionInfo.size()); + for(const RDATData::FunctionInfo2 &info : rdat.functionInfo) + { + globalResourcesIndexArray.clear(); + functionDependenciesArray.clear(); + + globalResourcesIndexArray.reserve(info.globalResources.size()); + for(const rdcpair &res : info.globalResources) + { + int32_t idx = rdat.resourceInfo.indexOf(res); + RDCASSERT(idx >= 0); + globalResourcesIndexArray.push_back(idx); + } + + functionDependenciesArray.reserve(info.functionDependencies.size()); + for(const rdcstr &f : info.functionDependencies) + functionDependenciesArray.push_back(MakeStringRef(stringblob, f).offset); + + // don't expect any extra info currently + RDCASSERT(info.extraInfoRef == ~0U); + functionInfo2.push_back({ + { + MakeStringRef(stringblob, info.name), + MakeStringRef(stringblob, info.unmangledName), + MakeIndexArrayRef(indexarrays, globalResourcesIndexArray, true), + MakeIndexArrayRef(indexarrays, functionDependenciesArray, true), + info.type, + info.payloadBytes, + info.attribBytes, + {uint32_t(info.featureFlags) & 0xffffffff, uint64_t(info.featureFlags) >> 32}, + info.shaderCompatMask, + info.minShaderModel, + info.minType, + }, + info.minWaveCount, + info.maxWaveCount, + info.shaderBehaviourFlags, + + // below here is a stage-specific set of data containing e.g. signature elements. + // Currently DXC does not emit RDAT except for in library targets, so this will be + // unused. It would be an index into a table elsewhere of VSInfo, PSInfo, etc. + {~0U}, + }); + } + } + + rdcarray tmpIdxArray; + subobjectsInfo.reserve(rdat.subobjectsInfo.size()); + for(const RDATData::SubobjectInfo &info : rdat.subobjectsInfo) + { + EncodedSubobjectInfo sub = { + info.type, + MakeStringRef(stringblob, info.name), + }; + + switch(info.type) + { + case RDATData::SubobjectInfo::SubobjectType::StateConfig: + { + sub.config = info.config; + break; + } + // these are only differentiated by the enum, the data is the same + case RDATData::SubobjectInfo::SubobjectType::GlobalRS: + case RDATData::SubobjectInfo::SubobjectType::LocalRS: + sub.rs.data = MakeBytesRef(rawbyteLookups, info.rs.data); + break; + case RDATData::SubobjectInfo::SubobjectType::SubobjectToExportsAssoc: + { + sub.assoc.subobject = MakeStringRef(stringblob, info.assoc.subobject); + + tmpIdxArray.clear(); + tmpIdxArray.reserve(info.assoc.exports.size()); + for(const rdcstr &f : info.assoc.exports) + tmpIdxArray.push_back(MakeStringRef(stringblob, f).offset); + + sub.assoc.exports = MakeIndexArrayRef(indexarrays, tmpIdxArray, false); + break; + } + case RDATData::SubobjectInfo::SubobjectType::RTShaderConfig: + { + sub.rtshaderconfig = info.rtshaderconfig; + break; + } + // we can treat these unions identically - in the old config case the flags will be ignored + // and should be 0 but the struct is effective padded to the largest union size because of + // the fixed stride anyway + case RDATData::SubobjectInfo::SubobjectType::RTPipeConfig: + { + RDCASSERT(info.rtpipeconfig.flags == RDATData::RTPipeFlags::None); + DELIBERATE_FALLTHROUGH(); + } + case RDATData::SubobjectInfo::SubobjectType::RTPipeConfig1: + { + sub.rtpipeconfig = info.rtpipeconfig; + break; + } + case RDATData::SubobjectInfo::SubobjectType::Hitgroup: + { + sub.hitgroup.type = info.hitgroup.type; + sub.hitgroup.anyHit = MakeStringRef(stringblob, info.hitgroup.anyHit); + sub.hitgroup.closestHit = MakeStringRef(stringblob, info.hitgroup.closestHit); + sub.hitgroup.intersection = MakeStringRef(stringblob, info.hitgroup.intersection); + break; + } + default: + { + RDCWARN("Unhandled subobject type %d", info.type); + break; + } + } + + subobjectsInfo.push_back(sub); + } + + // concatenate bytes together now + for(bytebuf &b : rawbyteLookups) + rawbytes.append(b); + + // the order of these parts is important and matches dxc + + rdcarray parts; + + BakeRuntimePart(parts, RDATData::Part::StringBuffer, stringblob.data(), stringblob.count()); + BakeRuntimeTablePart(parts, RDATData::Part::ResourceTable, resourceInfo); + if(!functionInfo.empty()) + BakeRuntimeTablePart(parts, RDATData::Part::FunctionTable, functionInfo); + else + BakeRuntimeTablePart(parts, RDATData::Part::FunctionTable, functionInfo2); + BakeRuntimePart(parts, RDATData::Part::IndexArrays, indexarrays.data(), + (uint32_t)indexarrays.byteSize()); + BakeRuntimePart(parts, RDATData::Part::RawBytes, rawbytes.data(), (uint32_t)rawbytes.byteSize()); + BakeRuntimeTablePart(parts, RDATData::Part::SubobjectTable, subobjectsInfo); + + // write the header last now that the parts are complete + + // part offsets start immediately after the header which includes the part offsets themselves + uint32_t offset = sizeof(RDATData::Version1_0) + sizeof(uint32_t) * (1 + parts.count()); + + StreamWriter total(256); + total.Write(RDATData::Version1_0); + total.Write(parts.count()); + for(size_t i = 0; i < parts.size(); i++) + { + total.Write(offset); + // parts should already be uint32 aligned + offset += (uint32_t)parts[i].byteSize(); + } + // now write the parts themselves + for(size_t i = 0; i < parts.size(); i++) + { + total.Write(parts[i].data(), parts[i].byteSize()); + } + + DXBC::DXBCContainer::ReplaceChunk(ByteCode, DXBC::FOURCC_RDAT, total.GetData(), + (size_t)total.GetOffset()); } }; // namespace DXBC