From 3cec544508c01468d232879c53febeae959d86d8 Mon Sep 17 00:00:00 2001 From: baldurk Date: Wed, 20 Oct 2021 15:44:00 +0100 Subject: [PATCH] Patch quad overdraw shaders on D3D12 to follow rules. Closes #2356 * This is a stupid requirement as the quad overdraw shader doesn't use any interpolators, but the D3D12 runtime complains and refuses to create a PSO unless the PS has a matching signature. This works as long as the position was the first output from the previous stage, but if it isn't the PSO fails to create. * To fix this, we take the existing shader and patch it by grafting the output signature from the last stage over onto the input signature, and patching up where the position is. --- renderdoc/driver/d3d12/d3d12_overlay.cpp | 749 +++++++++++++++++- renderdoc/driver/d3d12/d3d12_replay.h | 4 + .../shaders/dxbc/dxbc_bytecode_editor.h | 1 + .../driver/shaders/dxil/dxil_bytecode.cpp | 9 + renderdoc/driver/shaders/dxil/dxil_bytecode.h | 2 + util/test/demos/d3d12/d3d12_overlay_test.cpp | 413 ++++++---- util/test/rdtest/shared/Overlay_Test.py | 12 +- util/test/tests/D3D12/D3D12_Overlay_Test.py | 60 +- 8 files changed, 1024 insertions(+), 226 deletions(-) diff --git a/renderdoc/driver/d3d12/d3d12_overlay.cpp b/renderdoc/driver/d3d12/d3d12_overlay.cpp index abe2ef693..bb18c779c 100644 --- a/renderdoc/driver/d3d12/d3d12_overlay.cpp +++ b/renderdoc/driver/d3d12/d3d12_overlay.cpp @@ -23,12 +23,17 @@ ******************************************************************************/ #include "common/shader_cache.h" +#include "core/settings.h" #include "data/resource.h" #include "driver/dx/official/d3dcompiler.h" #include "driver/dxgi/dxgi_common.h" +#include "driver/shaders/dxbc/dxbc_bytecode_editor.h" +#include "driver/shaders/dxil/dxil_bytecode_editor.h" +#include "driver/shaders/dxil/dxil_common.h" #include "maths/formatpacking.h" #include "maths/matrix.h" #include "maths/vec.h" +#include "serialise/streamio.h" #include "stb/stb_truetype.h" #include "strings/string_utils.h" #include "d3d12_command_list.h" @@ -40,20 +45,15 @@ #include "data/hlsl/hlsl_cbuffers.h" +RDOC_CONFIG(rdcstr, D3D12_Debug_OverlayDumpDirPath, "", + "Path to dump quad overdraw patched DXIL files."); + struct D3D12QuadOverdrawCallback : public D3D12ActionCallback { - D3D12QuadOverdrawCallback(WrappedID3D12Device *dev, D3D12_SHADER_BYTECODE quadWrite, - D3D12_SHADER_BYTECODE quadWriteDXIL, const rdcarray &events, + D3D12QuadOverdrawCallback(WrappedID3D12Device *dev, const rdcarray &events, ID3D12Resource *depth, ID3D12Resource *msdepth, PortableHandle dsv, PortableHandle uav) - : m_pDevice(dev), - m_QuadWritePS(quadWrite), - m_QuadWriteDXILPS(quadWriteDXIL), - m_Events(events), - m_Depth(depth), - m_MSDepth(msdepth), - m_DSV(dsv), - m_UAV(uav) + : m_pDevice(dev), m_Events(events), m_Depth(depth), m_MSDepth(msdepth), m_DSV(dsv), m_UAV(uav) { m_pDevice->GetQueue()->GetCommandData()->m_ActionCallback = this; } @@ -144,19 +144,17 @@ struct D3D12QuadOverdrawCallback : public D3D12ActionCallback bool dxil = DXBC::DXBCContainer::CheckForDXIL(pipeDesc.VS.pShaderBytecode, pipeDesc.VS.BytecodeLength); - if(dxil) + // dxil is stricter about pipeline signatures matching. On D3D11 there's an error but all + // drivers handle a PS that reads no VS outputs and only screenspace SV_Position and + // SV_Coverage. On D3D12 we need to patch to generate a new PS + m_pDevice->GetReplay()->PatchQuadWritePS(pipeDesc, dxil); + if(pipeDesc.PS.BytecodeLength == 0) { - pipeDesc.PS = m_QuadWriteDXILPS; - if(pipeDesc.PS.BytecodeLength == 0) - { - m_pDevice->AddDebugMessage(MessageCategory::Shaders, MessageSeverity::High, - MessageSource::UnsupportedConfiguration, - "No DXIL shader available for overlay"); - } - } - else - { - pipeDesc.PS = m_QuadWritePS; + m_pDevice->AddDebugMessage( + MessageCategory::Shaders, MessageSeverity::High, MessageSource::UnsupportedConfiguration, + StringFormat::Fmt("No quad write %s shader available for overlay", + dxil ? "DXIL" : "DXBC")); + return; } pipeDesc.pRootSignature = cache.sig; @@ -252,8 +250,6 @@ struct D3D12QuadOverdrawCallback : public D3D12ActionCallback } WrappedID3D12Device *m_pDevice; - D3D12_SHADER_BYTECODE m_QuadWritePS; - D3D12_SHADER_BYTECODE m_QuadWriteDXILPS; const rdcarray &m_Events; PortableHandle m_UAV; PortableHandle m_DSV; @@ -300,6 +296,697 @@ static void SetRTVDesc(D3D12_RENDER_TARGET_VIEW_DESC &rtDesc, const D3D12_RESOUR } } +void D3D12Replay::PatchQuadWritePS(D3D12_EXPANDED_PIPELINE_STATE_STREAM_DESC &pipeDesc, bool dxil) +{ + pipeDesc.PS.pShaderBytecode = NULL; + pipeDesc.PS.BytecodeLength = 0; + + ID3DBlob *quadWriteBlob = dxil ? m_Overlay.QuadOverdrawWriteDXILPS : m_Overlay.QuadOverdrawWritePS; + + if(!quadWriteBlob) + { + RDCERR("Compiled quad overdraw write %s blob isn't available", dxil ? "DXIL" : "DXBC"); + return; + } + + D3D12_SHADER_BYTECODE *rastFeeding = &pipeDesc.VS; + + if(pipeDesc.DS.BytecodeLength > 0) + rastFeeding = &pipeDesc.DS; + + uint32_t hash[4]; + DXBC::DXBCContainer::GetHash(hash, rastFeeding->pShaderBytecode, rastFeeding->BytecodeLength); + + rdcfixedarray key = hash; + + bytebuf &patchedPs = m_PatchedPSCache[key]; + + // check if we have this shader's matching PS cached already + if(!patchedPs.empty()) + { + pipeDesc.PS.pShaderBytecode = patchedPs.data(); + pipeDesc.PS.BytecodeLength = patchedPs.size(); + return; + } + + bytebuf rastFeedingBytes((const byte *)rastFeeding->pShaderBytecode, rastFeeding->BytecodeLength); + + // get the DXBC for the previous stage + DXBC::DXBCContainer rastFeedingDXBC(rastFeedingBytes, rdcstr(), GraphicsAPI::D3D12, ~0U, ~0U); + + bytebuf patchedDXBC((const byte *)quadWriteBlob->GetBufferPointer(), + quadWriteBlob->GetBufferSize()); + + // if the previous stage already outputs position as the first register, we're done as the + // precompiled quadwrite will be compatible! no patching necessary + if(rastFeedingDXBC.GetReflection()->OutputSig.size() >= 1 && + rastFeedingDXBC.GetReflection()->OutputSig[0].regIndex == 0 && + rastFeedingDXBC.GetReflection()->OutputSig[0].systemValue == ShaderBuiltin::Position) + { + patchedDXBC.swap(patchedPs); + + pipeDesc.PS.pShaderBytecode = patchedPs.data(); + pipeDesc.PS.BytecodeLength = patchedPs.size(); + return; + } + + if(!D3D12_Debug_OverlayDumpDirPath().empty()) + FileIO::WriteAll(D3D12_Debug_OverlayDumpDirPath() + "/before_quadps.dxbc", patchedDXBC); + + DXBC::DXBCContainer quadOverdrawDXBC(patchedDXBC, rdcstr(), GraphicsAPI::D3D12, ~0U, ~0U); + + if(dxil) + { + rdcstr stringTable; + stringTable.push_back('\0'); + + rdcarray semanticIndexTable; + + rdcarray stringTableOffsets; + rdcarray semanticIndexTableOffsets; + + { + // use a local bytebuf so that if we error out, patchedPs above won't be modified + DXIL::ProgramEditor editor( + &quadOverdrawDXBC, rastFeedingDXBC.GetDXILByteCode()->GetMetadataCount() * 2, patchedDXBC); + + const DXIL::Type *i32 = editor.GetInt32Type(); + const DXIL::Type *i8 = editor.GetInt8Type(); + + // We need to make two changes: copy the raster-feeding shader's output signature wholesale + // into + // the pixel shader. It only needs position, which *must* have been written by definition, the + // coverage input comes from an intrinsic. None of the properties should need to change, so + // it's + // a pure deep copy of metadata and properties to ensure a compatible signature. + // + // After that, we need to find the input load ops in the original shader, and patch the row it + // refers to (it would have been 0 previously). Since position is a full float4 we shouldn't + // have to change anything else + + const DXIL::Metadata *rastEntryPoints = + rastFeedingDXBC.GetDXILByteCode()->GetMetadataByName("dx.entryPoints"); + + if(!rastEntryPoints) + { + RDCERR("Couldn't find entry point list"); + return; + } + + // TODO select the entry point for multiple entry points? RT only for now + RDCASSERT(rastEntryPoints->children.size() > 0 && rastEntryPoints->children[0]); + const DXIL::Metadata *rastEntry = rastEntryPoints->children[0]; + + RDCASSERT(rastEntry->children.size() > 2 && rastEntry->children[2]); + const DXIL::Metadata *rastSigs = rastEntry->children[2]; + + RDCASSERT(rastSigs->children.size() > 1 && rastSigs->children[1]); + const DXIL::Metadata *rastOutSig = rastSigs->children[1]; + + DXIL::Metadata *entryPoints = editor.GetMetadataByName("dx.entryPoints"); + + if(!entryPoints) + { + RDCERR("Couldn't find entry point list"); + return; + } + + // TODO select the entry point for multiple entry points? RT only for now + RDCASSERT(entryPoints->children.size() > 0 && entryPoints->children[0]); + DXIL::Metadata *entry = entryPoints->children[0]; + + rdcstr entryName = entry->children[1]->str; + + RDCASSERT(entry->children.size() > 2 && entry->children[2]); + DXIL::Metadata *sigs = entry->children[2]; + + RDCASSERT(sigs->children.size() > 0); + + DXIL::Metadata inputSig; + + uint32_t posID = ~0U; + +#define DUPLICATE_META_CONSTANT(newConst, type_, oldConst) \ + { \ + DXIL::Metadata m; \ + m.isConstant = true; \ + m.type = type_; \ + m.value = DXIL::Value( \ + editor.GetOrAddConstant(DXIL::Constant(type_, oldConst->value.constant->val.u32v[0]))); \ + newConst = editor.AddMetadata(m); \ + } + + for(size_t i = 0; i < rastOutSig->children.size(); i++) + { + const DXIL::Metadata *oldSigEl = rastOutSig->children[i]; + DXIL::Metadata newSigEl; + + newSigEl.children.resize(oldSigEl->children.size()); + + // element ID + DUPLICATE_META_CONSTANT(newSigEl.children[0], i32, oldSigEl->children[0]); + + // semantic name + { + DXIL::Metadata m; + m.isString = true; + m.str = oldSigEl->children[1]->str; + newSigEl.children[1] = editor.AddMetadata(m); + + // only append non-system values to the string table + if(oldSigEl->children[3]->value.constant->val.u32v[0] == 0) + { + stringTableOffsets.push_back((uint32_t)stringTable.size()); + stringTable.append(m.str); + stringTable.push_back('\0'); + } + else + { + stringTableOffsets.push_back(0); + } + } + + // component type + DUPLICATE_META_CONSTANT(newSigEl.children[2], i8, oldSigEl->children[2]); + + // semantic kind + DUPLICATE_META_CONSTANT(newSigEl.children[3], i8, oldSigEl->children[3]); + + // SV_Position is 3 + if(oldSigEl->children[3]->value.constant->val.u32v[0] == 3) + posID = oldSigEl->children[0]->value.constant->val.u32v[0]; + + rdcarray semIndexValues; + + // semantic indices + const DXIL::Metadata *oldSemIdxs = oldSigEl->children[4]; + if(oldSemIdxs) + { + DXIL::Metadata semanticIndices; + + // the semantic index node is a list of constants + semanticIndices.children.resize(oldSemIdxs->children.size()); + for(size_t sidx = 0; sidx < oldSemIdxs->children.size(); sidx++) + { + DUPLICATE_META_CONSTANT(semanticIndices.children[sidx], i32, oldSemIdxs->children[sidx]); + semIndexValues.push_back(oldSemIdxs->children[sidx]->value.constant->val.u32v[0]); + } + + // copy the list + newSigEl.children[4] = editor.AddMetadata(semanticIndices); + } + + size_t tableOffset = ~0U; + + // try to find semIndexValues in semanticIndexTable + for(size_t offs = 0; offs + semIndexValues.size() <= semanticIndexTable.size(); offs++) + { + bool match = true; + for(size_t sidx = 0; sidx < semIndexValues.size(); sidx++) + { + if(semanticIndexTable[offs + sidx] != semIndexValues[sidx]) + { + match = false; + break; + } + } + + if(match) + { + tableOffset = offs; + break; + } + } + + // if we didn't find it, append + if(tableOffset == ~0U) + { + tableOffset = semanticIndexTable.size(); + semanticIndexTable.append(semIndexValues); + } + + semanticIndexTableOffsets.push_back((uint32_t)tableOffset); + + // interpolation mode + DUPLICATE_META_CONSTANT(newSigEl.children[5], i8, oldSigEl->children[5]); + + // number of rows + DUPLICATE_META_CONSTANT(newSigEl.children[6], i32, oldSigEl->children[6]); + + // number of columns + DUPLICATE_META_CONSTANT(newSigEl.children[7], i8, oldSigEl->children[7]); + + // start row + DUPLICATE_META_CONSTANT(newSigEl.children[8], i32, oldSigEl->children[8]); + + // start column + DUPLICATE_META_CONSTANT(newSigEl.children[9], i8, oldSigEl->children[9]); + + // the extra tag/thing list is also a series of ints + const DXIL::Metadata *oldTagList = oldSigEl->children[10]; + if(oldTagList) + { + DXIL::Metadata tagList; + + // the semantic index node is a list of constants + tagList.children.resize(oldTagList->children.size()); + for(size_t sidx = 0; sidx < oldTagList->children.size(); sidx++) + { + DUPLICATE_META_CONSTANT(tagList.children[sidx], i32, oldTagList->children[sidx]); + } + + // copy the list + newSigEl.children[10] = editor.AddMetadata(tagList); + } + + inputSig.children.push_back(editor.AddMetadata(newSigEl)); + } + + if(posID == ~0U) + { + RDCERR("Couldn't find position output in previous shader"); + return; + } + + // recreate input signature list, for backwards references + sigs->children[0] = editor.AddMetadata(inputSig); + + // recreate backwards upwards + sigs = editor.AddMetadata(*sigs); + entry->children[2] = sigs; + entry = editor.AddMetadata(*entry); + entryPoints->children[0] = entry; + + DXIL::Function *f = editor.GetFunctionByName(entryName); + + if(!f) + { + RDCERR("Couldn't find entry point function '%s'", entryName.c_str()); + return; + } + + DXIL::Value inputIDValue(editor.GetOrAddConstant(f, DXIL::Constant(i32, posID))); + + // now locate the loadInputs and patch the row they refer to. We can unconditionally patch + // them all as there was only one input previously + for(size_t i = 0; i < f->instructions.size(); i++) + { + DXIL::Instruction &inst = f->instructions[i]; + + if(inst.op == DXIL::Operation::Call && inst.funcCall->name == "dx.op.loadInput.f32") + { + if(inst.args.size() != 5) + { + RDCERR("Unexpected number of arguments to createHandle"); + continue; + } + + // arg[0] is the loadInput magic number + // arg[1] is the ID we want to patch + + inst.args[1] = inputIDValue; + } + } + } + + { + // do a horrible franken-patch to merge the PSV0 chunks. We use the header from the existing + // PS, + // change the number of declared input elements, then copy the signature elements from the + // last + // shader's chunk. We can't copy the whole string table because that will likely include other + // strings and then the damned thing won't match according to the runtime's validation. + size_t rastPsv0Size = 0; + const byte *rastPsv0Bytes = + DXBC::DXBCContainer::FindChunk(rastFeedingBytes, DXBC::FOURCC_PSV0, rastPsv0Size); + StreamReader rastPsv0(rastPsv0Bytes, rastPsv0Size); + + size_t psPsv0Size = 0; + const byte *psPsv0Bytes = + DXBC::DXBCContainer::FindChunk(patchedDXBC, DXBC::FOURCC_PSV0, psPsv0Size); + StreamReader psPsv0(psPsv0Bytes, psPsv0Size); + + StreamWriter mergedPsv0(1024); + + uint32_t rastHeaderSize = 0; + if(!rastPsv0.Read(rastHeaderSize)) + return; + + uint32_t psHeaderSize = 0; + if(!psPsv0.Read(psHeaderSize)) + return; + + struct PSVHeader0 + { + uint32_t unused[6]; + }; + + struct PSVHeader1 : public PSVHeader0 + { + // other data + uint32_t unused1; + + // signature element counts + uint8_t inputEls; + uint8_t outputEls; + uint8_t patchConstEls; + + // signature vector counts + uint8_t inputVecs; + uint8_t outputVecs[4]; + }; + + struct PSVHeader2 : public PSVHeader1 + { + uint32_t NumThreadsX; + uint32_t NumThreadsY; + uint32_t NumThreadsZ; + }; + + bytebuf copyBuf; + PSVHeader2 rastHeader = {}, psHeader = {}; + + if(rastHeaderSize < sizeof(PSVHeader1)) + { + // only copy the header0 part out of the ps one since we won't have signature data to copy, + // hope this is OK + + // read the whole ps header + psPsv0.Read(&psHeader, psHeaderSize); + + // write only the old sized header + mergedPsv0.Write(rastHeaderSize); + mergedPsv0.Write(&psHeader, rastHeaderSize); + } + else + { + rastPsv0.Read(&rastHeader, rastHeaderSize); + psPsv0.Read(&psHeader, psHeaderSize); + + // copy the previous output signature into the ps input + psHeader.inputEls = rastHeader.outputEls; + psHeader.inputVecs = rastHeader.outputVecs[0]; + + // the ps header should have no other elements for us to worry about + RDCASSERT(psHeader.outputEls == 0); + RDCASSERT(psHeader.patchConstEls == 0); + RDCASSERT(psHeader.outputVecs[0] == 0); + RDCASSERT(psHeader.outputVecs[1] == 0); + RDCASSERT(psHeader.outputVecs[2] == 0); + RDCASSERT(psHeader.outputVecs[3] == 0); + + // we should have a table offset for each output entry + RDCASSERT(rastHeader.outputEls == stringTableOffsets.size()); + RDCASSERT(rastHeader.outputEls == semanticIndexTableOffsets.size()); + + mergedPsv0.Write(psHeaderSize); + mergedPsv0.Write(&psHeader, psHeaderSize); + } + + // skip resource counts in raster side shader + uint32_t rastResCount = 0; + if(!rastPsv0.Read(rastResCount)) + return; + + if(rastResCount > 0) + { + uint32_t resSize = 0; + if(!rastPsv0.Read(resSize)) + return; + rastPsv0.SkipBytes(rastResCount * resSize); + } + + uint32_t psResCount = 0; + if(!psPsv0.Read(psResCount)) + return; + mergedPsv0.Write(psResCount); + + // copy any resources in the pixel psv0 + if(psResCount > 0) + { + uint32_t resSize = 0; + if(!psPsv0.Read(resSize)) + return; + mergedPsv0.Write(resSize); + copyBuf.resize(psResCount * resSize); + psPsv0.Read(copyBuf.data(), copyBuf.size()); + mergedPsv0.Write(copyBuf.data(), copyBuf.size()); + } + + // if we have a new header with signature elements (what we expect) + if(rastHeaderSize >= sizeof(PSVHeader1)) + { + // we're effectively done with the rest of the ps chunk here, we're just going to copy the + // old + // chunk except skipping the input signature. There might be data we don't need in the + // string/indices tables but that's fine. + + // align string table to multiple of 4 size + stringTable.resize(AlignUp4(stringTable.size())); + + // skip the old string table and semantic index table + uint32_t stringTableSize = 0; + if(!rastPsv0.Read(stringTableSize)) + return; + rastPsv0.SkipBytes(stringTableSize); + + uint32_t indexTableSize = 0; + if(!rastPsv0.Read(indexTableSize)) + return; + rastPsv0.SkipBytes(indexTableSize * sizeof(uint32_t)); + + mergedPsv0.Write((uint32_t)stringTable.size()); + mergedPsv0.Write(stringTable.data(), stringTable.size()); + + mergedPsv0.Write((uint32_t)semanticIndexTable.size()); + mergedPsv0.Write(semanticIndexTable.data(), semanticIndexTable.byteSize()); + + uint32_t sigElSize = 0; + if(!rastPsv0.Read(sigElSize)) + return; + mergedPsv0.Write(sigElSize); + + // skip any inputs from the previous stage, we don't want to copy that + rastPsv0.SkipBytes(rastHeader.inputEls * sigElSize); + + struct PSVSigElement + { + uint32_t stringTableOffset; + uint32_t semanticTableOffset; + }; + + // copy the output elements, this will become the input elements. We need to modify the + // table + // offsets to match the one we generated + for(uint8_t el = 0; el < rastHeader.outputEls; el++) + { + copyBuf.resize(sigElSize); + rastPsv0.Read(copyBuf.data(), copyBuf.size()); + PSVSigElement *sigEl = (PSVSigElement *)copyBuf.data(); + sigEl->stringTableOffset = stringTableOffsets[el]; + sigEl->semanticTableOffset = semanticIndexTableOffsets[el]; + + mergedPsv0.Write(copyBuf.data(), copyBuf.size()); + } + } + + DXBC::DXBCContainer::ReplaceChunk(patchedDXBC, DXBC::FOURCC_PSV0, mergedPsv0.GetData(), + (size_t)mergedPsv0.GetOffset()); + } + } + else // dxbc bytecode not dxil + { + using namespace DXBCBytecode; + using namespace DXBCBytecode::Edit; + + ProgramEditor editor(&quadOverdrawDXBC, patchedDXBC); + + // find out which register the previous shader used to write position, we don't need to declare + // any of the others just match the register + uint32_t posReg = 0; + for(const SigParameter &sig : rastFeedingDXBC.GetReflection()->OutputSig) + { + if(sig.systemValue == ShaderBuiltin::Position) + { + posReg = sig.regIndex; + break; + } + } + + for(size_t i = 0; i < editor.GetNumDeclarations(); i++) + { + Declaration &decl = editor.GetDeclaration(i); + + // there's only one SIV input + if(decl.declaration == OpcodeType::OPCODE_DCL_INPUT_PS_SIV) + { + RDCASSERT(decl.operand.type == OperandType::TYPE_INPUT); + if(decl.operand.indices.size() >= 1) + { + decl.operand.indices[0].index = posReg; + } + else + { + RDCERR("Unexpected number of indices for declared PS input"); + } + + break; + } + } + + // now patch any instructions that reference the input + for(size_t i = 0; i < editor.GetNumInstructions(); i++) + { + Operation &op = editor.GetInstruction(i); + + for(Operand &operand : op.operands) + { + if(operand.type == OperandType::TYPE_INPUT && operand.indices.size() == 1 && + operand.indices[0].index == 0) + operand.indices[0].index = posReg; + } + } + } + + // copy the raster shader's OSGX to the pixel's ISGX + { + struct SigElement + { + uint32_t nameOffset; + uint32_t semanticIdx; + uint32_t systemType; + uint32_t componentType; + uint32_t registerNum; + uint8_t mask; + uint8_t rwMask; + uint16_t unused; + }; + + struct SigElement7 + { + uint32_t stream; + SigElement el; + }; + + struct SigElement1 + { + SigElement7 el7; + uint32_t precision; + }; + + bytebuf osg; + + StreamWriter isg(1024); + + size_t inSigElSize = 0; + size_t outSigElSize = 0; + + size_t rastOSGSize = 0; + const byte *rastOSGBytes = + DXBC::DXBCContainer::FindChunk(rastFeedingBytes, DXBC::FOURCC_OSG1, rastOSGSize); + + if(rastOSGBytes) + { + osg.assign(rastOSGBytes, rastOSGSize); + inSigElSize = sizeof(SigElement1); + } + else + { + rastOSGBytes = DXBC::DXBCContainer::FindChunk(rastFeedingBytes, DXBC::FOURCC_OSG5, rastOSGSize); + + if(rastOSGBytes) + { + osg.assign(rastOSGBytes, rastOSGSize); + inSigElSize = sizeof(SigElement7); + } + else + { + rastOSGBytes = + DXBC::DXBCContainer::FindChunk(rastFeedingBytes, DXBC::FOURCC_OSGN, rastOSGSize); + + if(!rastOSGBytes) + { + RDCERR("Couldn't find any output signature in rasterizing-feeding shader"); + return; + } + + osg.assign(rastOSGBytes, rastOSGSize); + inSigElSize = sizeof(SigElement); + } + } + + size_t sz; + + if(DXBC::DXBCContainer::FindChunk(patchedDXBC, DXBC::FOURCC_ISG1, sz)) + { + outSigElSize = sizeof(SigElement1); + } + else if(DXBC::DXBCContainer::FindChunk(patchedDXBC, DXBC::FOURCC_ISGN, sz)) + { + outSigElSize = sizeof(SigElement); + } + else + { + RDCERR("Couldn't find any input signature in pixel shader"); + return; + } + + uint32_t *u = (uint32_t *)osg.data(); + + uint32_t numSigEls = *u; + + isg.Write(u[0]); + isg.Write(u[1]); + + for(uint32_t el = 0; el < numSigEls; el++) + { + SigElement1 s = {}; + + size_t offset = sizeof(uint32_t) * 2 + inSigElSize * el; + + // read the input element into wherever it sits. We can leave any other elements + // (stream/precision) as 0 and that's fine + if(inSigElSize == sizeof(SigElement1)) + memcpy(&s, osg.data() + offset, inSigElSize); + else if(inSigElSize == sizeof(SigElement7)) + memcpy(&s.el7, osg.data() + offset, inSigElSize); + else if(inSigElSize == sizeof(SigElement)) + memcpy(&s.el7.el, osg.data() + offset, inSigElSize); + + // set the rw mask + s.el7.el.rwMask = 0; + + // dxbc seems to set the rwMask to .xy for position being read + if(!dxil && s.el7.el.systemType == 1) + s.el7.el.rwMask = 0x3; + + // write the output element + if(inSigElSize == sizeof(SigElement1)) + isg.Write(s); + else if(inSigElSize == sizeof(SigElement)) + isg.Write(s.el7.el); + } + + size_t stringsOffset = sizeof(uint32_t) * 2 + inSigElSize * numSigEls; + isg.Write(osg.data() + stringsOffset, osg.size() - stringsOffset); + + DXBC::DXBCContainer::ReplaceChunk( + patchedDXBC, outSigElSize == sizeof(SigElement1) ? DXBC::FOURCC_ISG1 : DXBC::FOURCC_ISGN, + isg.GetData(), (size_t)isg.GetOffset()); + } + + // store the patched DXBC into the cache result + patchedDXBC.swap(patchedPs); + + if(!D3D12_Debug_OverlayDumpDirPath().empty()) + FileIO::WriteAll(D3D12_Debug_OverlayDumpDirPath() + "/after_quadps.dxbc", patchedPs); + + DXBC::DXBCContainer(patchedPs, rdcstr(), GraphicsAPI::D3D12, ~0U, ~0U).GetDisassembly(); + + pipeDesc.PS.pShaderBytecode = patchedPs.data(); + pipeDesc.PS.BytecodeLength = patchedPs.size(); +} + RenderOutputSubresource D3D12Replay::GetRenderOutputSubresource(ResourceId id) { const D3D12RenderState &rs = m_pDevice->GetQueue()->GetCommandData()->m_RenderState; @@ -1365,17 +2052,6 @@ ResourceId D3D12Replay::RenderOverlay(ResourceId texid, FloatVector clearCol, De m_pDevice->ReplayLog(0, events[0], eReplay_WithoutDraw); - D3D12_SHADER_BYTECODE quadWrite; - quadWrite.BytecodeLength = m_Overlay.QuadOverdrawWritePS->GetBufferSize(); - quadWrite.pShaderBytecode = m_Overlay.QuadOverdrawWritePS->GetBufferPointer(); - - D3D12_SHADER_BYTECODE quadWriteDXIL = {}; - if(m_Overlay.QuadOverdrawWriteDXILPS) - { - quadWriteDXIL.BytecodeLength = m_Overlay.QuadOverdrawWriteDXILPS->GetBufferSize(); - quadWriteDXIL.pShaderBytecode = m_Overlay.QuadOverdrawWriteDXILPS->GetBufferPointer(); - } - ID3D12Resource *overrideDepth = NULL; ResourceId res = rs.GetDSVID(); @@ -1410,8 +2086,7 @@ ResourceId D3D12Replay::RenderOverlay(ResourceId texid, FloatVector clearCol, De } // declare callback struct here - D3D12QuadOverdrawCallback cb(m_pDevice, quadWrite, quadWriteDXIL, events, overrideDepth, - overrideDepth ? curDepth : NULL, + D3D12QuadOverdrawCallback cb(m_pDevice, events, overrideDepth, overrideDepth ? curDepth : NULL, overrideDepth ? ToPortableHandle(dsv) : PortableHandle(), ToPortableHandle(GetDebugManager()->GetCPUHandle(OVERDRAW_UAV))); diff --git a/renderdoc/driver/d3d12/d3d12_replay.h b/renderdoc/driver/d3d12/d3d12_replay.h index 3f2ceb5b1..901aed6b7 100644 --- a/renderdoc/driver/d3d12/d3d12_replay.h +++ b/renderdoc/driver/d3d12/d3d12_replay.h @@ -238,6 +238,8 @@ public: bool IsRenderOutput(ResourceId id) { return GetRenderOutputSubresource(id).mip != ~0U; } void FileChanged() {} AMDCounters *GetAMDCounters() { return m_pAMDCounters; } + void PatchQuadWritePS(D3D12_EXPANDED_PIPELINE_STATE_STREAM_DESC &pipeDesc, bool dxil); + private: void FillRootElements(uint32_t eventId, const D3D12RenderState::RootSignature &rootSig, const ShaderBindpointMapping *mappings[(uint32_t)ShaderStage::Count], @@ -481,6 +483,8 @@ private: rdcarray m_CustomShaderIncludes; + std::map, bytebuf> m_PatchedPSCache; + void FillTimersAMD(uint32_t *eventStartID, uint32_t *sampleIndex, rdcarray *eventIDs); rdcarray FetchCountersAMD(const rdcarray &counters); diff --git a/renderdoc/driver/shaders/dxbc/dxbc_bytecode_editor.h b/renderdoc/driver/shaders/dxbc/dxbc_bytecode_editor.h index 509f82174..1524aa5d2 100644 --- a/renderdoc/driver/shaders/dxbc/dxbc_bytecode_editor.h +++ b/renderdoc/driver/shaders/dxbc/dxbc_bytecode_editor.h @@ -159,6 +159,7 @@ public: void RemoveOperation(size_t idx, size_t count = 1) { m_Instructions.erase(idx, count); } Operation &GetInstruction(size_t idx) { return m_Instructions[idx]; }; + Declaration &GetDeclaration(size_t idx) { return m_Declarations[idx]; }; private: bytebuf &m_OutBlob; bool m_SM51 = false; diff --git a/renderdoc/driver/shaders/dxil/dxil_bytecode.cpp b/renderdoc/driver/shaders/dxil/dxil_bytecode.cpp index ee29b2b96..6789c7b1f 100644 --- a/renderdoc/driver/shaders/dxil/dxil_bytecode.cpp +++ b/renderdoc/driver/shaders/dxil/dxil_bytecode.cpp @@ -304,6 +304,15 @@ bool Program::Valid(const byte *bytes, size_t length) ptr + offsetof(ProgramHeader, DxilMagic) + header->BitcodeOffset, header->BitcodeSize); } +const Metadata *Program::GetMetadataByName(const rdcstr &name) const +{ + for(size_t i = 0; i < m_NamedMeta.size(); i++) + if(m_NamedMeta[i].name == name) + return &m_NamedMeta[i]; + + return NULL; +} + void ResolveForwardReference(Value &v) { if(!v.empty() && v.type == ValueType::Unknown) diff --git a/renderdoc/driver/shaders/dxil/dxil_bytecode.h b/renderdoc/driver/shaders/dxil/dxil_bytecode.h index a90a4f8d1..07bf2472a 100644 --- a/renderdoc/driver/shaders/dxil/dxil_bytecode.h +++ b/renderdoc/driver/shaders/dxil/dxil_bytecode.h @@ -671,6 +671,8 @@ public: void GetLocals(const DXBC::DXBCContainer *dxbc, size_t instruction, uintptr_t offset, rdcarray &locals) const override; + const Metadata *GetMetadataByName(const rdcstr &name) const; + size_t GetMetadataCount() const { return m_Metadata.size() + m_NamedMeta.size(); } protected: void MakeDisassemblyString(); diff --git a/util/test/demos/d3d12/d3d12_overlay_test.cpp b/util/test/demos/d3d12/d3d12_overlay_test.cpp index 71f8a2eec..9eb688a6f 100644 --- a/util/test/demos/d3d12/d3d12_overlay_test.cpp +++ b/util/test/demos/d3d12/d3d12_overlay_test.cpp @@ -29,6 +29,51 @@ RD_TEST(D3D12_Overlay_Test, D3D12GraphicsTest) static constexpr const char *Description = "Makes a couple of draws that show off all the overlays in some way"; + std::string vertexEndPosVert = R"EOSHADER( + +struct vertin +{ + float3 pos : POSITION; + float4 col : COLOR0; + float2 uv : TEXCOORD0; +}; + +struct v2f +{ + float4 col : COLOR0; + float2 uv : TEXCOORD0; + float4 pos : SV_POSITION; +}; + +v2f main(vertin IN) +{ + v2f OUT = (v2f)0; + + OUT.pos = float4(IN.pos.xyz, 1); + OUT.col = IN.col; + OUT.uv = IN.uv; + + return OUT; +} + +)EOSHADER"; + + std::string vertexEndPosPixel = R"EOSHADER( + +struct v2f +{ + float4 col : COLOR0; + float2 uv : TEXCOORD0; + float4 pos : SV_POSITION; +}; + +float4 main(v2f IN) : SV_Target0 +{ + return IN.col; +} + +)EOSHADER"; + std::string whitePixel = R"EOSHADER( float4 main() : SV_Target0 @@ -44,9 +89,23 @@ float4 main() : SV_Target0 if(!Init()) return 3; - ID3DBlobPtr vsblob = Compile(D3DDefaultVertex, "main", "vs_4_0"); - ID3DBlobPtr psblob = Compile(D3DDefaultPixel, "main", "ps_4_0"); - ID3DBlobPtr whitepsblob = Compile(whitePixel, "main", "ps_4_0"); + ID3DBlobPtr vsblob[3] = {}; + ID3DBlobPtr psblob[3] = {}; + ID3DBlobPtr whitepsblob[3] = {}; + + { + int i = 0; + for(std::string profile : {"_5_0", "_5_1", "_6_0"}) + { + if(i == 2 && !m_DXILSupport) + continue; + + vsblob[i] = Compile(vertexEndPosVert, "main", "vs" + profile); + psblob[i] = Compile(vertexEndPosPixel, "main", "ps" + profile); + whitepsblob[i] = Compile(whitePixel, "main", "ps" + profile); + i++; + } + } const DefaultA2V VBData[] = { // this triangle occludes in depth @@ -137,62 +196,70 @@ float4 main() : SV_Target0 DXGI_SAMPLE_DESC noMSAA = {1, 0}; DXGI_SAMPLE_DESC yesMSAA = {4, qual}; - D3D12PSOCreator creator = MakePSO().RootSig(sig).InputLayout().VS(vsblob).PS(psblob).DSV( - DXGI_FORMAT_D32_FLOAT_S8X24_UINT); + ID3D12PipelineStatePtr depthWritePipe[3][2]; + ID3D12PipelineStatePtr stencilWritePipe[3][2]; + ID3D12PipelineStatePtr backgroundPipe[3][2]; + ID3D12PipelineStatePtr pipe[3][2]; + ID3D12PipelineStatePtr whitepipe[3]; - creator.GraphicsDesc.RasterizerState.CullMode = D3D12_CULL_MODE_BACK; - creator.GraphicsDesc.RasterizerState.DepthClipEnable = TRUE; + for(int i = 0; i < 3; i++) + { + if(vsblob[i] == NULL) + continue; - creator.GraphicsDesc.DepthStencilState.DepthEnable = TRUE; - creator.GraphicsDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL; - creator.GraphicsDesc.DepthStencilState.StencilEnable = FALSE; - creator.GraphicsDesc.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS; - creator.GraphicsDesc.DepthStencilState.FrontFace.StencilPassOp = D3D12_STENCIL_OP_REPLACE; - creator.GraphicsDesc.DepthStencilState.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_KEEP; - creator.GraphicsDesc.DepthStencilState.FrontFace.StencilFailOp = D3D12_STENCIL_OP_KEEP; - creator.GraphicsDesc.DepthStencilState.BackFace = - creator.GraphicsDesc.DepthStencilState.FrontFace; - creator.GraphicsDesc.DepthStencilState.StencilReadMask = 0xff; - creator.GraphicsDesc.DepthStencilState.StencilWriteMask = 0xff; + D3D12PSOCreator creator = MakePSO().RootSig(sig).InputLayout().VS(vsblob[i]).PS(psblob[i]).DSV( + DXGI_FORMAT_D32_FLOAT_S8X24_UINT); - creator.GraphicsDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS; - ID3D12PipelineStatePtr depthWritePipe[2]; - creator.GraphicsDesc.SampleDesc = noMSAA; - depthWritePipe[0] = creator; - creator.GraphicsDesc.SampleDesc = yesMSAA; - depthWritePipe[1] = creator; + creator.GraphicsDesc.RasterizerState.CullMode = D3D12_CULL_MODE_BACK; + creator.GraphicsDesc.RasterizerState.DepthClipEnable = TRUE; - creator.GraphicsDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL; - creator.GraphicsDesc.DepthStencilState.StencilEnable = TRUE; + creator.GraphicsDesc.DepthStencilState.DepthEnable = TRUE; + creator.GraphicsDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL; + creator.GraphicsDesc.DepthStencilState.StencilEnable = FALSE; + creator.GraphicsDesc.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS; + creator.GraphicsDesc.DepthStencilState.FrontFace.StencilPassOp = D3D12_STENCIL_OP_REPLACE; + creator.GraphicsDesc.DepthStencilState.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_KEEP; + creator.GraphicsDesc.DepthStencilState.FrontFace.StencilFailOp = D3D12_STENCIL_OP_KEEP; + creator.GraphicsDesc.DepthStencilState.BackFace = + creator.GraphicsDesc.DepthStencilState.FrontFace; + creator.GraphicsDesc.DepthStencilState.StencilReadMask = 0xff; + creator.GraphicsDesc.DepthStencilState.StencilWriteMask = 0xff; - ID3D12PipelineStatePtr stencilWritePipe[2]; - creator.GraphicsDesc.SampleDesc = noMSAA; - stencilWritePipe[0] = creator; - creator.GraphicsDesc.SampleDesc = yesMSAA; - stencilWritePipe[1] = creator; + creator.GraphicsDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS; + creator.GraphicsDesc.SampleDesc = noMSAA; + depthWritePipe[i][0] = creator; + creator.GraphicsDesc.SampleDesc = yesMSAA; + depthWritePipe[i][1] = creator; - creator.GraphicsDesc.DepthStencilState.StencilEnable = FALSE; - ID3D12PipelineStatePtr backgroundPipe[2]; - creator.GraphicsDesc.SampleDesc = noMSAA; - backgroundPipe[0] = creator; - creator.GraphicsDesc.SampleDesc = yesMSAA; - backgroundPipe[1] = creator; + creator.GraphicsDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL; + creator.GraphicsDesc.DepthStencilState.StencilEnable = TRUE; - creator.GraphicsDesc.DepthStencilState.StencilEnable = TRUE; - creator.GraphicsDesc.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_GREATER; - ID3D12PipelineStatePtr pipe[2]; - creator.GraphicsDesc.SampleDesc = noMSAA; - pipe[0] = creator; - creator.GraphicsDesc.SampleDesc = yesMSAA; - pipe[1] = creator; + creator.GraphicsDesc.SampleDesc = noMSAA; + stencilWritePipe[i][0] = creator; + creator.GraphicsDesc.SampleDesc = yesMSAA; + stencilWritePipe[i][1] = creator; - creator.GraphicsDesc.DepthStencilState.StencilEnable = FALSE; - creator.GraphicsDesc.DepthStencilState.DepthEnable = FALSE; - creator.GraphicsDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS; - creator.PS(whitepsblob); - creator.DSV(DXGI_FORMAT_UNKNOWN); - creator.GraphicsDesc.SampleDesc = noMSAA; - ID3D12PipelineStatePtr whitepipe = creator; + creator.GraphicsDesc.DepthStencilState.StencilEnable = FALSE; + creator.GraphicsDesc.SampleDesc = noMSAA; + backgroundPipe[i][0] = creator; + creator.GraphicsDesc.SampleDesc = yesMSAA; + backgroundPipe[i][1] = creator; + + creator.GraphicsDesc.DepthStencilState.StencilEnable = TRUE; + creator.GraphicsDesc.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_GREATER; + creator.GraphicsDesc.SampleDesc = noMSAA; + pipe[i][0] = creator; + creator.GraphicsDesc.SampleDesc = yesMSAA; + pipe[i][1] = creator; + + creator.GraphicsDesc.DepthStencilState.StencilEnable = FALSE; + creator.GraphicsDesc.DepthStencilState.DepthEnable = FALSE; + creator.GraphicsDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS; + creator.PS(whitepsblob[i]); + creator.DSV(DXGI_FORMAT_UNKNOWN); + creator.GraphicsDesc.SampleDesc = noMSAA; + whitepipe[i] = creator; + } ResourceBarrier(vb, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER); @@ -220,132 +287,158 @@ float4 main() : SV_Target0 while(Running()) { - ID3D12GraphicsCommandListPtr cmd = GetCommandBuffer(); + ID3D12ResourcePtr bb; - Reset(cmd); - - ID3D12ResourcePtr bb = StartUsingBackbuffer(cmd, D3D12_RESOURCE_STATE_RENDER_TARGET); - - for(bool is_msaa : {false, true}) + int pass = 0; + for(std::string profile : {"sm5.0", "sm5.1", "sm6.0"}) { - D3D12_CPU_DESCRIPTOR_HANDLE rtv = - MakeRTV(is_msaa ? msaatex : bb).Format(DXGI_FORMAT_R8G8B8A8_UNORM_SRGB).CreateCPU(0); + if(whitepipe[pass] == NULL) + break; - cmd->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - - IASetVertexBuffer(cmd, vb, sizeof(DefaultA2V), 0); - cmd->SetGraphicsRootSignature(sig); - - RSSetViewport(cmd, {10.0f, 10.0f, (float)screenWidth - 20.0f, (float)screenHeight - 20.0f, - 0.0f, 1.0f}); - RSSetScissorRect(cmd, {0, 0, screenWidth, screenHeight}); - - OMSetRenderTargets(cmd, {rtv}, MakeDSV(is_msaa ? msaadsv : dsv).CreateCPU(0)); - - ClearRenderTargetView(cmd, rtv, {0.2f, 0.2f, 0.2f, 1.0f}); - ClearDepthStencilView(cmd, is_msaa ? msaadsv : dsv, - D3D12_CLEAR_FLAG_DEPTH | D3D12_CLEAR_FLAG_STENCIL, 1.0f, 0); - - cmd->OMSetStencilRef(0x55); - - // draw the setup triangles - cmd->SetPipelineState(depthWritePipe[is_msaa ? 1 : 0]); - cmd->DrawInstanced(3, 1, 0, 0); - - cmd->SetPipelineState(stencilWritePipe[is_msaa ? 1 : 0]); - cmd->DrawInstanced(3, 1, 3, 0); - - cmd->SetPipelineState(backgroundPipe[is_msaa ? 1 : 0]); - cmd->DrawInstanced(3, 1, 6, 0); - - // add a marker so we can easily locate this draw - setMarker(cmd, is_msaa ? "MSAA Test" : "Normal Test"); - - cmd->SetPipelineState(pipe[is_msaa ? 1 : 0]); - cmd->DrawInstanced(24, 1, 9, 0); - - if(!is_msaa) - { - setMarker(cmd, "Viewport Test"); - - RSSetViewport(cmd, {10.0f, 10.0f, 80.0f, 80.0f, 0.0f, 1.0f}); - RSSetScissorRect(cmd, {24, 24, 76, 76}); - cmd->SetPipelineState(backgroundPipe[0]); - cmd->DrawInstanced(3, 1, 33, 0); - } - } - - D3D12_CPU_DESCRIPTOR_HANDLE subrtv = MakeRTV(subtex) - .Format(DXGI_FORMAT_R8G8B8A8_UNORM_SRGB) - .FirstSlice(2) - .NumSlices(1) - .FirstMip(2) - .NumMips(1) - .CreateCPU(1); - - RSSetViewport(cmd, {5.0f, 5.0f, float(screenWidth) / 4.0f - 10.0f, - float(screenHeight) / 4.0f - 10.0f, 0.0f, 1.0f}); - RSSetScissorRect(cmd, {0, 0, screenWidth / 4, screenHeight / 4}); - - OMSetRenderTargets(cmd, {subrtv}, {}); - - ClearRenderTargetView(cmd, subrtv, {0.0f, 0.0f, 0.0f, 1.0f}); - - cmd->SetPipelineState(whitepipe); - - subrtv = MakeRTV(subtex) - .Format(DXGI_FORMAT_R8G8B8A8_UNORM_SRGB) - .FirstSlice(2) - .NumSlices(1) - .FirstMip(3) - .NumMips(1) - .CreateCPU(1); - - setMarker(cmd, "Subresources mip 2"); - cmd->DrawInstanced(24, 1, 9, 0); - - RSSetViewport(cmd, {2.0f, 2.0f, float(screenWidth / 8) - 4.0f, float(screenHeight / 8) - 4.0f, - 0.0f, 1.0f}); - RSSetScissorRect(cmd, {0, 0, screenWidth / 8, screenHeight / 8}); - - OMSetRenderTargets(cmd, {subrtv}, {}); - - ClearRenderTargetView(cmd, subrtv, {0.0f, 0.0f, 0.0f, 1.0f}); - - setMarker(cmd, "Subresources mip 3"); - cmd->DrawInstanced(24, 1, 9, 0); - - cmd->Close(); - - Submit({cmd}); - - { - cmd = GetCommandBuffer(); + ID3D12GraphicsCommandListPtr cmd = GetCommandBuffer(); Reset(cmd); - D3D12_CPU_DESCRIPTOR_HANDLE rtv = - MakeRTV(bb).Format(DXGI_FORMAT_R8G8B8A8_UNORM_SRGB).CreateCPU(0); + if(pass == 0) + bb = StartUsingBackbuffer(cmd, D3D12_RESOURCE_STATE_RENDER_TARGET); - cmd->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + pushMarker(cmd, profile + " tests"); - IASetVertexBuffer(cmd, vb, sizeof(DefaultA2V), 0); - cmd->SetGraphicsRootSignature(sig); + for(bool is_msaa : {false, true}) + { + D3D12_CPU_DESCRIPTOR_HANDLE rtv = + MakeRTV(is_msaa ? msaatex : bb).Format(DXGI_FORMAT_R8G8B8A8_UNORM_SRGB).CreateCPU(0); - OMSetRenderTargets(cmd, {rtv}, {}); + cmd->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - cmd->SetPipelineState(whitepipe); + IASetVertexBuffer(cmd, vb, sizeof(DefaultA2V), 0); + cmd->SetGraphicsRootSignature(sig); - setMarker(cmd, "NoView draw"); + RSSetViewport(cmd, {10.0f, 10.0f, (float)screenWidth - 20.0f, (float)screenHeight - 20.0f, + 0.0f, 1.0f}); + RSSetScissorRect(cmd, {0, 0, screenWidth, screenHeight}); - cmd->DrawInstanced(3, 1, 33, 0); + OMSetRenderTargets(cmd, {rtv}, MakeDSV(is_msaa ? msaadsv : dsv).CreateCPU(0)); + + ClearRenderTargetView(cmd, rtv, {0.2f, 0.2f, 0.2f, 1.0f}); + ClearDepthStencilView(cmd, is_msaa ? msaadsv : dsv, + D3D12_CLEAR_FLAG_DEPTH | D3D12_CLEAR_FLAG_STENCIL, 1.0f, 0); + + cmd->OMSetStencilRef(0x55); + + // draw the setup triangles + cmd->SetPipelineState(depthWritePipe[pass][is_msaa ? 1 : 0]); + cmd->DrawInstanced(3, 1, 0, 0); + + cmd->SetPipelineState(stencilWritePipe[pass][is_msaa ? 1 : 0]); + cmd->DrawInstanced(3, 1, 3, 0); + + cmd->SetPipelineState(backgroundPipe[pass][is_msaa ? 1 : 0]); + cmd->DrawInstanced(3, 1, 6, 0); + + // add a marker so we can easily locate this draw + setMarker(cmd, is_msaa ? "MSAA Test" : "Normal Test"); + + cmd->SetPipelineState(pipe[pass][is_msaa ? 1 : 0]); + cmd->DrawInstanced(24, 1, 9, 0); + + if(!is_msaa) + { + setMarker(cmd, "Viewport Test"); + + RSSetViewport(cmd, {10.0f, 10.0f, 80.0f, 80.0f, 0.0f, 1.0f}); + RSSetScissorRect(cmd, {24, 24, 76, 76}); + cmd->SetPipelineState(backgroundPipe[pass][0]); + cmd->DrawInstanced(3, 1, 33, 0); + } + } + + D3D12_CPU_DESCRIPTOR_HANDLE subrtv = MakeRTV(subtex) + .Format(DXGI_FORMAT_R8G8B8A8_UNORM_SRGB) + .FirstSlice(2) + .NumSlices(1) + .FirstMip(2) + .NumMips(1) + .CreateCPU(1); + + RSSetViewport(cmd, {5.0f, 5.0f, float(screenWidth) / 4.0f - 10.0f, + float(screenHeight) / 4.0f - 10.0f, 0.0f, 1.0f}); + RSSetScissorRect(cmd, {0, 0, screenWidth / 4, screenHeight / 4}); + + OMSetRenderTargets(cmd, {subrtv}, {}); + + ClearRenderTargetView(cmd, subrtv, {0.0f, 0.0f, 0.0f, 1.0f}); + + cmd->SetPipelineState(whitepipe[pass]); + + subrtv = MakeRTV(subtex) + .Format(DXGI_FORMAT_R8G8B8A8_UNORM_SRGB) + .FirstSlice(2) + .NumSlices(1) + .FirstMip(3) + .NumMips(1) + .CreateCPU(1); + + setMarker(cmd, "Subresources mip 2"); + cmd->DrawInstanced(24, 1, 9, 0); + + RSSetViewport(cmd, {2.0f, 2.0f, float(screenWidth / 8) - 4.0f, + float(screenHeight / 8) - 4.0f, 0.0f, 1.0f}); + RSSetScissorRect(cmd, {0, 0, screenWidth / 8, screenHeight / 8}); + + OMSetRenderTargets(cmd, {subrtv}, {}); + + ClearRenderTargetView(cmd, subrtv, {0.0f, 0.0f, 0.0f, 1.0f}); + + setMarker(cmd, "Subresources mip 3"); + cmd->DrawInstanced(24, 1, 9, 0); + + cmd->Close(); + + Submit({cmd}); + + { + cmd = GetCommandBuffer(); + + Reset(cmd); + + D3D12_CPU_DESCRIPTOR_HANDLE rtv = + MakeRTV(bb).Format(DXGI_FORMAT_R8G8B8A8_UNORM_SRGB).CreateCPU(0); + + cmd->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + + IASetVertexBuffer(cmd, vb, sizeof(DefaultA2V), 0); + cmd->SetGraphicsRootSignature(sig); + + OMSetRenderTargets(cmd, {rtv}, {}); + + cmd->SetPipelineState(whitepipe[0]); + + setMarker(cmd, "NoView draw"); + + cmd->DrawInstanced(3, 1, 33, 0); + + popMarker(cmd); + + cmd->Close(); + } + + Submit({cmd}); + + pass++; + } + + { + ID3D12GraphicsCommandListPtr cmd = GetCommandBuffer(); + + Reset(cmd); FinishUsingBackbuffer(cmd, D3D12_RESOURCE_STATE_RENDER_TARGET); cmd->Close(); - } - Submit({cmd}); + Submit({cmd}); + } Present(); } diff --git a/util/test/rdtest/shared/Overlay_Test.py b/util/test/rdtest/shared/Overlay_Test.py index 1dafc5ea4..cd03b3f34 100644 --- a/util/test/rdtest/shared/Overlay_Test.py +++ b/util/test/rdtest/shared/Overlay_Test.py @@ -6,7 +6,7 @@ import rdtest class Overlay_Test(rdtest.TestCase): internal = True - def check_capture(self): + def check_capture(self, base_event=0): out: rd.ReplayOutput = self.controller.CreateOutput(rd.CreateHeadlessWindowingData(100, 100), rd.ReplayOutputType.Texture) self.check(out is not None) @@ -17,9 +17,9 @@ class Overlay_Test(rdtest.TestCase): for is_msaa in [False, True]: if is_msaa: - test_marker: rd.ActionDescription = self.find_action("MSAA Test") + test_marker: rd.ActionDescription = self.find_action("MSAA Test", base_event) else: - test_marker: rd.ActionDescription = self.find_action("Normal Test") + test_marker: rd.ActionDescription = self.find_action("Normal Test", base_event) self.controller.SetFrameEvent(test_marker.next.eventId, True) @@ -388,7 +388,7 @@ class Overlay_Test(rdtest.TestCase): rdtest.log.success("All normal overlays are as expected") # Check the viewport overlay especially - view_marker: rd.ActionDescription = self.find_action("Viewport Test") + view_marker: rd.ActionDescription = self.find_action("Viewport Test", base_event) self.controller.SetFrameEvent(view_marker.next.eventId, True) @@ -506,7 +506,7 @@ class Overlay_Test(rdtest.TestCase): rdtest.log.success("Overlays are as expected around viewport/scissor behaviour") - test_marker: rd.ActionDescription = self.find_action("Normal Test") + test_marker: rd.ActionDescription = self.find_action("Normal Test", base_event) # Now check clear-before-X by hand, for colour and for depth self.controller.SetFrameEvent(test_marker.next.eventId, True) @@ -610,7 +610,7 @@ class Overlay_Test(rdtest.TestCase): # Now test overlays on a render-to-slice/mip case for mip in [2, 3]: - sub_marker: rd.ActionDescription = self.find_action("Subresources mip {}".format(mip)) + sub_marker: rd.ActionDescription = self.find_action("Subresources mip {}".format(mip), base_event) self.controller.SetFrameEvent(sub_marker.next.eventId, True) diff --git a/util/test/tests/D3D12/D3D12_Overlay_Test.py b/util/test/tests/D3D12/D3D12_Overlay_Test.py index 2dc75f02e..130f4dc97 100644 --- a/util/test/tests/D3D12/D3D12_Overlay_Test.py +++ b/util/test/tests/D3D12/D3D12_Overlay_Test.py @@ -7,39 +7,53 @@ class D3D12_Overlay_Test(rdtest.Overlay_Test): internal = False def check_capture(self): - super(D3D12_Overlay_Test, self).check_capture() - out: rd.ReplayOutput = self.controller.CreateOutput(rd.CreateHeadlessWindowingData(100, 100), rd.ReplayOutputType.Texture) - # Don't check any pixel values, but ensure all overlays at least work with no viewport/scissor bound - sub_marker: rd.ActionDescription = self.find_action("NoView draw") - self.controller.SetFrameEvent(sub_marker.next.eventId, True) + for base_event_name in ["sm5.0", "sm5.1", "sm6.0"]: + base = self.find_action(base_event_name) - pipe: rd.PipeState = self.controller.GetPipelineState() - - tex = rd.TextureDisplay() - tex.resourceId = pipe.GetOutputTargets()[0].resourceId - - for overlay in rd.DebugOverlay: - if overlay == rd.DebugOverlay.NoOverlay: + if base is None: continue - # These overlays are just displaymodes really, not actually separate overlays - if overlay == rd.DebugOverlay.NaN or overlay == rd.DebugOverlay.Clipping: - continue + base_event = base.eventId - if overlay == rd.DebugOverlay.ClearBeforeDraw or overlay == rd.DebugOverlay.ClearBeforePass: - continue + rdtest.log.print("Checking tests on {}".format(base_event_name)) - rdtest.log.success("Checking overlay {} with no viewport/scissor".format(str(overlay))) + super(D3D12_Overlay_Test, self).check_capture(base_event) - tex.overlay = overlay - out.SetTextureDisplay(tex) + rdtest.log.success("Base tests worked on {}".format(base_event_name)) - out.Display() + # Don't check any pixel values, but ensure all overlays at least work with no viewport/scissor bound + sub_marker: rd.ActionDescription = self.find_action("NoView draw", base_event) + self.controller.SetFrameEvent(sub_marker.next.eventId, True) - overlay_id: rd.ResourceId = out.GetDebugOverlayTexID() + pipe: rd.PipeState = self.controller.GetPipelineState() - rdtest.log.success("Overlay {} rendered with no viewport/scissor".format(str(overlay))) + tex = rd.TextureDisplay() + tex.resourceId = pipe.GetOutputTargets()[0].resourceId + + for overlay in rd.DebugOverlay: + if overlay == rd.DebugOverlay.NoOverlay: + continue + + # These overlays are just displaymodes really, not actually separate overlays + if overlay == rd.DebugOverlay.NaN or overlay == rd.DebugOverlay.Clipping: + continue + + if overlay == rd.DebugOverlay.ClearBeforeDraw or overlay == rd.DebugOverlay.ClearBeforePass: + continue + + rdtest.log.success("Checking overlay {} with no viewport/scissor".format(str(overlay))) + + tex.overlay = overlay + out.SetTextureDisplay(tex) + + out.Display() + + overlay_id: rd.ResourceId = out.GetDebugOverlayTexID() + + rdtest.log.success("Overlay {} rendered with no viewport/scissor".format(str(overlay))) + + rdtest.log.success("extended tests worked on {}".format(base_event_name)) out.Shutdown()