Patch quad overdraw shaders on D3D12 to follow rules. Closes #2356

* This is a stupid requirement as the quad overdraw shader doesn't use any
  interpolators, but the D3D12 runtime complains and refuses to create a PSO
  unless the PS has a matching signature. This works as long as the position was
  the first output from the previous stage, but if it isn't the PSO fails to
  create.
* To fix this, we take the existing shader and patch it by grafting the output
  signature from the last stage over onto the input signature, and patching up
  where the position is.
This commit is contained in:
baldurk
2021-10-20 15:44:00 +01:00
parent ec785ba167
commit 3cec544508
8 changed files with 1024 additions and 226 deletions
+712 -37
View File
@@ -23,12 +23,17 @@
******************************************************************************/
#include "common/shader_cache.h"
#include "core/settings.h"
#include "data/resource.h"
#include "driver/dx/official/d3dcompiler.h"
#include "driver/dxgi/dxgi_common.h"
#include "driver/shaders/dxbc/dxbc_bytecode_editor.h"
#include "driver/shaders/dxil/dxil_bytecode_editor.h"
#include "driver/shaders/dxil/dxil_common.h"
#include "maths/formatpacking.h"
#include "maths/matrix.h"
#include "maths/vec.h"
#include "serialise/streamio.h"
#include "stb/stb_truetype.h"
#include "strings/string_utils.h"
#include "d3d12_command_list.h"
@@ -40,20 +45,15 @@
#include "data/hlsl/hlsl_cbuffers.h"
RDOC_CONFIG(rdcstr, D3D12_Debug_OverlayDumpDirPath, "",
"Path to dump quad overdraw patched DXIL files.");
struct D3D12QuadOverdrawCallback : public D3D12ActionCallback
{
D3D12QuadOverdrawCallback(WrappedID3D12Device *dev, D3D12_SHADER_BYTECODE quadWrite,
D3D12_SHADER_BYTECODE quadWriteDXIL, const rdcarray<uint32_t> &events,
D3D12QuadOverdrawCallback(WrappedID3D12Device *dev, const rdcarray<uint32_t> &events,
ID3D12Resource *depth, ID3D12Resource *msdepth, PortableHandle dsv,
PortableHandle uav)
: m_pDevice(dev),
m_QuadWritePS(quadWrite),
m_QuadWriteDXILPS(quadWriteDXIL),
m_Events(events),
m_Depth(depth),
m_MSDepth(msdepth),
m_DSV(dsv),
m_UAV(uav)
: m_pDevice(dev), m_Events(events), m_Depth(depth), m_MSDepth(msdepth), m_DSV(dsv), m_UAV(uav)
{
m_pDevice->GetQueue()->GetCommandData()->m_ActionCallback = this;
}
@@ -144,19 +144,17 @@ struct D3D12QuadOverdrawCallback : public D3D12ActionCallback
bool dxil =
DXBC::DXBCContainer::CheckForDXIL(pipeDesc.VS.pShaderBytecode, pipeDesc.VS.BytecodeLength);
if(dxil)
// dxil is stricter about pipeline signatures matching. On D3D11 there's an error but all
// drivers handle a PS that reads no VS outputs and only screenspace SV_Position and
// SV_Coverage. On D3D12 we need to patch to generate a new PS
m_pDevice->GetReplay()->PatchQuadWritePS(pipeDesc, dxil);
if(pipeDesc.PS.BytecodeLength == 0)
{
pipeDesc.PS = m_QuadWriteDXILPS;
if(pipeDesc.PS.BytecodeLength == 0)
{
m_pDevice->AddDebugMessage(MessageCategory::Shaders, MessageSeverity::High,
MessageSource::UnsupportedConfiguration,
"No DXIL shader available for overlay");
}
}
else
{
pipeDesc.PS = m_QuadWritePS;
m_pDevice->AddDebugMessage(
MessageCategory::Shaders, MessageSeverity::High, MessageSource::UnsupportedConfiguration,
StringFormat::Fmt("No quad write %s shader available for overlay",
dxil ? "DXIL" : "DXBC"));
return;
}
pipeDesc.pRootSignature = cache.sig;
@@ -252,8 +250,6 @@ struct D3D12QuadOverdrawCallback : public D3D12ActionCallback
}
WrappedID3D12Device *m_pDevice;
D3D12_SHADER_BYTECODE m_QuadWritePS;
D3D12_SHADER_BYTECODE m_QuadWriteDXILPS;
const rdcarray<uint32_t> &m_Events;
PortableHandle m_UAV;
PortableHandle m_DSV;
@@ -300,6 +296,697 @@ static void SetRTVDesc(D3D12_RENDER_TARGET_VIEW_DESC &rtDesc, const D3D12_RESOUR
}
}
void D3D12Replay::PatchQuadWritePS(D3D12_EXPANDED_PIPELINE_STATE_STREAM_DESC &pipeDesc, bool dxil)
{
pipeDesc.PS.pShaderBytecode = NULL;
pipeDesc.PS.BytecodeLength = 0;
ID3DBlob *quadWriteBlob = dxil ? m_Overlay.QuadOverdrawWriteDXILPS : m_Overlay.QuadOverdrawWritePS;
if(!quadWriteBlob)
{
RDCERR("Compiled quad overdraw write %s blob isn't available", dxil ? "DXIL" : "DXBC");
return;
}
D3D12_SHADER_BYTECODE *rastFeeding = &pipeDesc.VS;
if(pipeDesc.DS.BytecodeLength > 0)
rastFeeding = &pipeDesc.DS;
uint32_t hash[4];
DXBC::DXBCContainer::GetHash(hash, rastFeeding->pShaderBytecode, rastFeeding->BytecodeLength);
rdcfixedarray<uint32_t, 4> key = hash;
bytebuf &patchedPs = m_PatchedPSCache[key];
// check if we have this shader's matching PS cached already
if(!patchedPs.empty())
{
pipeDesc.PS.pShaderBytecode = patchedPs.data();
pipeDesc.PS.BytecodeLength = patchedPs.size();
return;
}
bytebuf rastFeedingBytes((const byte *)rastFeeding->pShaderBytecode, rastFeeding->BytecodeLength);
// get the DXBC for the previous stage
DXBC::DXBCContainer rastFeedingDXBC(rastFeedingBytes, rdcstr(), GraphicsAPI::D3D12, ~0U, ~0U);
bytebuf patchedDXBC((const byte *)quadWriteBlob->GetBufferPointer(),
quadWriteBlob->GetBufferSize());
// if the previous stage already outputs position as the first register, we're done as the
// precompiled quadwrite will be compatible! no patching necessary
if(rastFeedingDXBC.GetReflection()->OutputSig.size() >= 1 &&
rastFeedingDXBC.GetReflection()->OutputSig[0].regIndex == 0 &&
rastFeedingDXBC.GetReflection()->OutputSig[0].systemValue == ShaderBuiltin::Position)
{
patchedDXBC.swap(patchedPs);
pipeDesc.PS.pShaderBytecode = patchedPs.data();
pipeDesc.PS.BytecodeLength = patchedPs.size();
return;
}
if(!D3D12_Debug_OverlayDumpDirPath().empty())
FileIO::WriteAll(D3D12_Debug_OverlayDumpDirPath() + "/before_quadps.dxbc", patchedDXBC);
DXBC::DXBCContainer quadOverdrawDXBC(patchedDXBC, rdcstr(), GraphicsAPI::D3D12, ~0U, ~0U);
if(dxil)
{
rdcstr stringTable;
stringTable.push_back('\0');
rdcarray<uint32_t> semanticIndexTable;
rdcarray<uint32_t> stringTableOffsets;
rdcarray<uint32_t> semanticIndexTableOffsets;
{
// use a local bytebuf so that if we error out, patchedPs above won't be modified
DXIL::ProgramEditor editor(
&quadOverdrawDXBC, rastFeedingDXBC.GetDXILByteCode()->GetMetadataCount() * 2, patchedDXBC);
const DXIL::Type *i32 = editor.GetInt32Type();
const DXIL::Type *i8 = editor.GetInt8Type();
// We need to make two changes: copy the raster-feeding shader's output signature wholesale
// into
// the pixel shader. It only needs position, which *must* have been written by definition, the
// coverage input comes from an intrinsic. None of the properties should need to change, so
// it's
// a pure deep copy of metadata and properties to ensure a compatible signature.
//
// After that, we need to find the input load ops in the original shader, and patch the row it
// refers to (it would have been 0 previously). Since position is a full float4 we shouldn't
// have to change anything else
const DXIL::Metadata *rastEntryPoints =
rastFeedingDXBC.GetDXILByteCode()->GetMetadataByName("dx.entryPoints");
if(!rastEntryPoints)
{
RDCERR("Couldn't find entry point list");
return;
}
// TODO select the entry point for multiple entry points? RT only for now
RDCASSERT(rastEntryPoints->children.size() > 0 && rastEntryPoints->children[0]);
const DXIL::Metadata *rastEntry = rastEntryPoints->children[0];
RDCASSERT(rastEntry->children.size() > 2 && rastEntry->children[2]);
const DXIL::Metadata *rastSigs = rastEntry->children[2];
RDCASSERT(rastSigs->children.size() > 1 && rastSigs->children[1]);
const DXIL::Metadata *rastOutSig = rastSigs->children[1];
DXIL::Metadata *entryPoints = editor.GetMetadataByName("dx.entryPoints");
if(!entryPoints)
{
RDCERR("Couldn't find entry point list");
return;
}
// TODO select the entry point for multiple entry points? RT only for now
RDCASSERT(entryPoints->children.size() > 0 && entryPoints->children[0]);
DXIL::Metadata *entry = entryPoints->children[0];
rdcstr entryName = entry->children[1]->str;
RDCASSERT(entry->children.size() > 2 && entry->children[2]);
DXIL::Metadata *sigs = entry->children[2];
RDCASSERT(sigs->children.size() > 0);
DXIL::Metadata inputSig;
uint32_t posID = ~0U;
#define DUPLICATE_META_CONSTANT(newConst, type_, oldConst) \
{ \
DXIL::Metadata m; \
m.isConstant = true; \
m.type = type_; \
m.value = DXIL::Value( \
editor.GetOrAddConstant(DXIL::Constant(type_, oldConst->value.constant->val.u32v[0]))); \
newConst = editor.AddMetadata(m); \
}
for(size_t i = 0; i < rastOutSig->children.size(); i++)
{
const DXIL::Metadata *oldSigEl = rastOutSig->children[i];
DXIL::Metadata newSigEl;
newSigEl.children.resize(oldSigEl->children.size());
// element ID
DUPLICATE_META_CONSTANT(newSigEl.children[0], i32, oldSigEl->children[0]);
// semantic name
{
DXIL::Metadata m;
m.isString = true;
m.str = oldSigEl->children[1]->str;
newSigEl.children[1] = editor.AddMetadata(m);
// only append non-system values to the string table
if(oldSigEl->children[3]->value.constant->val.u32v[0] == 0)
{
stringTableOffsets.push_back((uint32_t)stringTable.size());
stringTable.append(m.str);
stringTable.push_back('\0');
}
else
{
stringTableOffsets.push_back(0);
}
}
// component type
DUPLICATE_META_CONSTANT(newSigEl.children[2], i8, oldSigEl->children[2]);
// semantic kind
DUPLICATE_META_CONSTANT(newSigEl.children[3], i8, oldSigEl->children[3]);
// SV_Position is 3
if(oldSigEl->children[3]->value.constant->val.u32v[0] == 3)
posID = oldSigEl->children[0]->value.constant->val.u32v[0];
rdcarray<uint32_t> semIndexValues;
// semantic indices
const DXIL::Metadata *oldSemIdxs = oldSigEl->children[4];
if(oldSemIdxs)
{
DXIL::Metadata semanticIndices;
// the semantic index node is a list of constants
semanticIndices.children.resize(oldSemIdxs->children.size());
for(size_t sidx = 0; sidx < oldSemIdxs->children.size(); sidx++)
{
DUPLICATE_META_CONSTANT(semanticIndices.children[sidx], i32, oldSemIdxs->children[sidx]);
semIndexValues.push_back(oldSemIdxs->children[sidx]->value.constant->val.u32v[0]);
}
// copy the list
newSigEl.children[4] = editor.AddMetadata(semanticIndices);
}
size_t tableOffset = ~0U;
// try to find semIndexValues in semanticIndexTable
for(size_t offs = 0; offs + semIndexValues.size() <= semanticIndexTable.size(); offs++)
{
bool match = true;
for(size_t sidx = 0; sidx < semIndexValues.size(); sidx++)
{
if(semanticIndexTable[offs + sidx] != semIndexValues[sidx])
{
match = false;
break;
}
}
if(match)
{
tableOffset = offs;
break;
}
}
// if we didn't find it, append
if(tableOffset == ~0U)
{
tableOffset = semanticIndexTable.size();
semanticIndexTable.append(semIndexValues);
}
semanticIndexTableOffsets.push_back((uint32_t)tableOffset);
// interpolation mode
DUPLICATE_META_CONSTANT(newSigEl.children[5], i8, oldSigEl->children[5]);
// number of rows
DUPLICATE_META_CONSTANT(newSigEl.children[6], i32, oldSigEl->children[6]);
// number of columns
DUPLICATE_META_CONSTANT(newSigEl.children[7], i8, oldSigEl->children[7]);
// start row
DUPLICATE_META_CONSTANT(newSigEl.children[8], i32, oldSigEl->children[8]);
// start column
DUPLICATE_META_CONSTANT(newSigEl.children[9], i8, oldSigEl->children[9]);
// the extra tag/thing list is also a series of ints
const DXIL::Metadata *oldTagList = oldSigEl->children[10];
if(oldTagList)
{
DXIL::Metadata tagList;
// the semantic index node is a list of constants
tagList.children.resize(oldTagList->children.size());
for(size_t sidx = 0; sidx < oldTagList->children.size(); sidx++)
{
DUPLICATE_META_CONSTANT(tagList.children[sidx], i32, oldTagList->children[sidx]);
}
// copy the list
newSigEl.children[10] = editor.AddMetadata(tagList);
}
inputSig.children.push_back(editor.AddMetadata(newSigEl));
}
if(posID == ~0U)
{
RDCERR("Couldn't find position output in previous shader");
return;
}
// recreate input signature list, for backwards references
sigs->children[0] = editor.AddMetadata(inputSig);
// recreate backwards upwards
sigs = editor.AddMetadata(*sigs);
entry->children[2] = sigs;
entry = editor.AddMetadata(*entry);
entryPoints->children[0] = entry;
DXIL::Function *f = editor.GetFunctionByName(entryName);
if(!f)
{
RDCERR("Couldn't find entry point function '%s'", entryName.c_str());
return;
}
DXIL::Value inputIDValue(editor.GetOrAddConstant(f, DXIL::Constant(i32, posID)));
// now locate the loadInputs and patch the row they refer to. We can unconditionally patch
// them all as there was only one input previously
for(size_t i = 0; i < f->instructions.size(); i++)
{
DXIL::Instruction &inst = f->instructions[i];
if(inst.op == DXIL::Operation::Call && inst.funcCall->name == "dx.op.loadInput.f32")
{
if(inst.args.size() != 5)
{
RDCERR("Unexpected number of arguments to createHandle");
continue;
}
// arg[0] is the loadInput magic number
// arg[1] is the ID we want to patch
inst.args[1] = inputIDValue;
}
}
}
{
// do a horrible franken-patch to merge the PSV0 chunks. We use the header from the existing
// PS,
// change the number of declared input elements, then copy the signature elements from the
// last
// shader's chunk. We can't copy the whole string table because that will likely include other
// strings and then the damned thing won't match according to the runtime's validation.
size_t rastPsv0Size = 0;
const byte *rastPsv0Bytes =
DXBC::DXBCContainer::FindChunk(rastFeedingBytes, DXBC::FOURCC_PSV0, rastPsv0Size);
StreamReader rastPsv0(rastPsv0Bytes, rastPsv0Size);
size_t psPsv0Size = 0;
const byte *psPsv0Bytes =
DXBC::DXBCContainer::FindChunk(patchedDXBC, DXBC::FOURCC_PSV0, psPsv0Size);
StreamReader psPsv0(psPsv0Bytes, psPsv0Size);
StreamWriter mergedPsv0(1024);
uint32_t rastHeaderSize = 0;
if(!rastPsv0.Read<uint32_t>(rastHeaderSize))
return;
uint32_t psHeaderSize = 0;
if(!psPsv0.Read<uint32_t>(psHeaderSize))
return;
struct PSVHeader0
{
uint32_t unused[6];
};
struct PSVHeader1 : public PSVHeader0
{
// other data
uint32_t unused1;
// signature element counts
uint8_t inputEls;
uint8_t outputEls;
uint8_t patchConstEls;
// signature vector counts
uint8_t inputVecs;
uint8_t outputVecs[4];
};
struct PSVHeader2 : public PSVHeader1
{
uint32_t NumThreadsX;
uint32_t NumThreadsY;
uint32_t NumThreadsZ;
};
bytebuf copyBuf;
PSVHeader2 rastHeader = {}, psHeader = {};
if(rastHeaderSize < sizeof(PSVHeader1))
{
// only copy the header0 part out of the ps one since we won't have signature data to copy,
// hope this is OK
// read the whole ps header
psPsv0.Read(&psHeader, psHeaderSize);
// write only the old sized header
mergedPsv0.Write(rastHeaderSize);
mergedPsv0.Write(&psHeader, rastHeaderSize);
}
else
{
rastPsv0.Read(&rastHeader, rastHeaderSize);
psPsv0.Read(&psHeader, psHeaderSize);
// copy the previous output signature into the ps input
psHeader.inputEls = rastHeader.outputEls;
psHeader.inputVecs = rastHeader.outputVecs[0];
// the ps header should have no other elements for us to worry about
RDCASSERT(psHeader.outputEls == 0);
RDCASSERT(psHeader.patchConstEls == 0);
RDCASSERT(psHeader.outputVecs[0] == 0);
RDCASSERT(psHeader.outputVecs[1] == 0);
RDCASSERT(psHeader.outputVecs[2] == 0);
RDCASSERT(psHeader.outputVecs[3] == 0);
// we should have a table offset for each output entry
RDCASSERT(rastHeader.outputEls == stringTableOffsets.size());
RDCASSERT(rastHeader.outputEls == semanticIndexTableOffsets.size());
mergedPsv0.Write(psHeaderSize);
mergedPsv0.Write(&psHeader, psHeaderSize);
}
// skip resource counts in raster side shader
uint32_t rastResCount = 0;
if(!rastPsv0.Read<uint32_t>(rastResCount))
return;
if(rastResCount > 0)
{
uint32_t resSize = 0;
if(!rastPsv0.Read<uint32_t>(resSize))
return;
rastPsv0.SkipBytes(rastResCount * resSize);
}
uint32_t psResCount = 0;
if(!psPsv0.Read<uint32_t>(psResCount))
return;
mergedPsv0.Write(psResCount);
// copy any resources in the pixel psv0
if(psResCount > 0)
{
uint32_t resSize = 0;
if(!psPsv0.Read<uint32_t>(resSize))
return;
mergedPsv0.Write(resSize);
copyBuf.resize(psResCount * resSize);
psPsv0.Read(copyBuf.data(), copyBuf.size());
mergedPsv0.Write(copyBuf.data(), copyBuf.size());
}
// if we have a new header with signature elements (what we expect)
if(rastHeaderSize >= sizeof(PSVHeader1))
{
// we're effectively done with the rest of the ps chunk here, we're just going to copy the
// old
// chunk except skipping the input signature. There might be data we don't need in the
// string/indices tables but that's fine.
// align string table to multiple of 4 size
stringTable.resize(AlignUp4(stringTable.size()));
// skip the old string table and semantic index table
uint32_t stringTableSize = 0;
if(!rastPsv0.Read<uint32_t>(stringTableSize))
return;
rastPsv0.SkipBytes(stringTableSize);
uint32_t indexTableSize = 0;
if(!rastPsv0.Read<uint32_t>(indexTableSize))
return;
rastPsv0.SkipBytes(indexTableSize * sizeof(uint32_t));
mergedPsv0.Write((uint32_t)stringTable.size());
mergedPsv0.Write(stringTable.data(), stringTable.size());
mergedPsv0.Write((uint32_t)semanticIndexTable.size());
mergedPsv0.Write(semanticIndexTable.data(), semanticIndexTable.byteSize());
uint32_t sigElSize = 0;
if(!rastPsv0.Read<uint32_t>(sigElSize))
return;
mergedPsv0.Write(sigElSize);
// skip any inputs from the previous stage, we don't want to copy that
rastPsv0.SkipBytes(rastHeader.inputEls * sigElSize);
struct PSVSigElement
{
uint32_t stringTableOffset;
uint32_t semanticTableOffset;
};
// copy the output elements, this will become the input elements. We need to modify the
// table
// offsets to match the one we generated
for(uint8_t el = 0; el < rastHeader.outputEls; el++)
{
copyBuf.resize(sigElSize);
rastPsv0.Read(copyBuf.data(), copyBuf.size());
PSVSigElement *sigEl = (PSVSigElement *)copyBuf.data();
sigEl->stringTableOffset = stringTableOffsets[el];
sigEl->semanticTableOffset = semanticIndexTableOffsets[el];
mergedPsv0.Write(copyBuf.data(), copyBuf.size());
}
}
DXBC::DXBCContainer::ReplaceChunk(patchedDXBC, DXBC::FOURCC_PSV0, mergedPsv0.GetData(),
(size_t)mergedPsv0.GetOffset());
}
}
else // dxbc bytecode not dxil
{
using namespace DXBCBytecode;
using namespace DXBCBytecode::Edit;
ProgramEditor editor(&quadOverdrawDXBC, patchedDXBC);
// find out which register the previous shader used to write position, we don't need to declare
// any of the others just match the register
uint32_t posReg = 0;
for(const SigParameter &sig : rastFeedingDXBC.GetReflection()->OutputSig)
{
if(sig.systemValue == ShaderBuiltin::Position)
{
posReg = sig.regIndex;
break;
}
}
for(size_t i = 0; i < editor.GetNumDeclarations(); i++)
{
Declaration &decl = editor.GetDeclaration(i);
// there's only one SIV input
if(decl.declaration == OpcodeType::OPCODE_DCL_INPUT_PS_SIV)
{
RDCASSERT(decl.operand.type == OperandType::TYPE_INPUT);
if(decl.operand.indices.size() >= 1)
{
decl.operand.indices[0].index = posReg;
}
else
{
RDCERR("Unexpected number of indices for declared PS input");
}
break;
}
}
// now patch any instructions that reference the input
for(size_t i = 0; i < editor.GetNumInstructions(); i++)
{
Operation &op = editor.GetInstruction(i);
for(Operand &operand : op.operands)
{
if(operand.type == OperandType::TYPE_INPUT && operand.indices.size() == 1 &&
operand.indices[0].index == 0)
operand.indices[0].index = posReg;
}
}
}
// copy the raster shader's OSGX to the pixel's ISGX
{
struct SigElement
{
uint32_t nameOffset;
uint32_t semanticIdx;
uint32_t systemType;
uint32_t componentType;
uint32_t registerNum;
uint8_t mask;
uint8_t rwMask;
uint16_t unused;
};
struct SigElement7
{
uint32_t stream;
SigElement el;
};
struct SigElement1
{
SigElement7 el7;
uint32_t precision;
};
bytebuf osg;
StreamWriter isg(1024);
size_t inSigElSize = 0;
size_t outSigElSize = 0;
size_t rastOSGSize = 0;
const byte *rastOSGBytes =
DXBC::DXBCContainer::FindChunk(rastFeedingBytes, DXBC::FOURCC_OSG1, rastOSGSize);
if(rastOSGBytes)
{
osg.assign(rastOSGBytes, rastOSGSize);
inSigElSize = sizeof(SigElement1);
}
else
{
rastOSGBytes = DXBC::DXBCContainer::FindChunk(rastFeedingBytes, DXBC::FOURCC_OSG5, rastOSGSize);
if(rastOSGBytes)
{
osg.assign(rastOSGBytes, rastOSGSize);
inSigElSize = sizeof(SigElement7);
}
else
{
rastOSGBytes =
DXBC::DXBCContainer::FindChunk(rastFeedingBytes, DXBC::FOURCC_OSGN, rastOSGSize);
if(!rastOSGBytes)
{
RDCERR("Couldn't find any output signature in rasterizing-feeding shader");
return;
}
osg.assign(rastOSGBytes, rastOSGSize);
inSigElSize = sizeof(SigElement);
}
}
size_t sz;
if(DXBC::DXBCContainer::FindChunk(patchedDXBC, DXBC::FOURCC_ISG1, sz))
{
outSigElSize = sizeof(SigElement1);
}
else if(DXBC::DXBCContainer::FindChunk(patchedDXBC, DXBC::FOURCC_ISGN, sz))
{
outSigElSize = sizeof(SigElement);
}
else
{
RDCERR("Couldn't find any input signature in pixel shader");
return;
}
uint32_t *u = (uint32_t *)osg.data();
uint32_t numSigEls = *u;
isg.Write(u[0]);
isg.Write(u[1]);
for(uint32_t el = 0; el < numSigEls; el++)
{
SigElement1 s = {};
size_t offset = sizeof(uint32_t) * 2 + inSigElSize * el;
// read the input element into wherever it sits. We can leave any other elements
// (stream/precision) as 0 and that's fine
if(inSigElSize == sizeof(SigElement1))
memcpy(&s, osg.data() + offset, inSigElSize);
else if(inSigElSize == sizeof(SigElement7))
memcpy(&s.el7, osg.data() + offset, inSigElSize);
else if(inSigElSize == sizeof(SigElement))
memcpy(&s.el7.el, osg.data() + offset, inSigElSize);
// set the rw mask
s.el7.el.rwMask = 0;
// dxbc seems to set the rwMask to .xy for position being read
if(!dxil && s.el7.el.systemType == 1)
s.el7.el.rwMask = 0x3;
// write the output element
if(inSigElSize == sizeof(SigElement1))
isg.Write(s);
else if(inSigElSize == sizeof(SigElement))
isg.Write(s.el7.el);
}
size_t stringsOffset = sizeof(uint32_t) * 2 + inSigElSize * numSigEls;
isg.Write(osg.data() + stringsOffset, osg.size() - stringsOffset);
DXBC::DXBCContainer::ReplaceChunk(
patchedDXBC, outSigElSize == sizeof(SigElement1) ? DXBC::FOURCC_ISG1 : DXBC::FOURCC_ISGN,
isg.GetData(), (size_t)isg.GetOffset());
}
// store the patched DXBC into the cache result
patchedDXBC.swap(patchedPs);
if(!D3D12_Debug_OverlayDumpDirPath().empty())
FileIO::WriteAll(D3D12_Debug_OverlayDumpDirPath() + "/after_quadps.dxbc", patchedPs);
DXBC::DXBCContainer(patchedPs, rdcstr(), GraphicsAPI::D3D12, ~0U, ~0U).GetDisassembly();
pipeDesc.PS.pShaderBytecode = patchedPs.data();
pipeDesc.PS.BytecodeLength = patchedPs.size();
}
RenderOutputSubresource D3D12Replay::GetRenderOutputSubresource(ResourceId id)
{
const D3D12RenderState &rs = m_pDevice->GetQueue()->GetCommandData()->m_RenderState;
@@ -1365,17 +2052,6 @@ ResourceId D3D12Replay::RenderOverlay(ResourceId texid, FloatVector clearCol, De
m_pDevice->ReplayLog(0, events[0], eReplay_WithoutDraw);
D3D12_SHADER_BYTECODE quadWrite;
quadWrite.BytecodeLength = m_Overlay.QuadOverdrawWritePS->GetBufferSize();
quadWrite.pShaderBytecode = m_Overlay.QuadOverdrawWritePS->GetBufferPointer();
D3D12_SHADER_BYTECODE quadWriteDXIL = {};
if(m_Overlay.QuadOverdrawWriteDXILPS)
{
quadWriteDXIL.BytecodeLength = m_Overlay.QuadOverdrawWriteDXILPS->GetBufferSize();
quadWriteDXIL.pShaderBytecode = m_Overlay.QuadOverdrawWriteDXILPS->GetBufferPointer();
}
ID3D12Resource *overrideDepth = NULL;
ResourceId res = rs.GetDSVID();
@@ -1410,8 +2086,7 @@ ResourceId D3D12Replay::RenderOverlay(ResourceId texid, FloatVector clearCol, De
}
// declare callback struct here
D3D12QuadOverdrawCallback cb(m_pDevice, quadWrite, quadWriteDXIL, events, overrideDepth,
overrideDepth ? curDepth : NULL,
D3D12QuadOverdrawCallback cb(m_pDevice, events, overrideDepth, overrideDepth ? curDepth : NULL,
overrideDepth ? ToPortableHandle(dsv) : PortableHandle(),
ToPortableHandle(GetDebugManager()->GetCPUHandle(OVERDRAW_UAV)));
+4
View File
@@ -238,6 +238,8 @@ public:
bool IsRenderOutput(ResourceId id) { return GetRenderOutputSubresource(id).mip != ~0U; }
void FileChanged() {}
AMDCounters *GetAMDCounters() { return m_pAMDCounters; }
void PatchQuadWritePS(D3D12_EXPANDED_PIPELINE_STATE_STREAM_DESC &pipeDesc, bool dxil);
private:
void FillRootElements(uint32_t eventId, const D3D12RenderState::RootSignature &rootSig,
const ShaderBindpointMapping *mappings[(uint32_t)ShaderStage::Count],
@@ -481,6 +483,8 @@ private:
rdcarray<rdcstr> m_CustomShaderIncludes;
std::map<rdcfixedarray<uint32_t, 4>, bytebuf> m_PatchedPSCache;
void FillTimersAMD(uint32_t *eventStartID, uint32_t *sampleIndex, rdcarray<uint32_t> *eventIDs);
rdcarray<CounterResult> FetchCountersAMD(const rdcarray<GPUCounter> &counters);
@@ -159,6 +159,7 @@ public:
void RemoveOperation(size_t idx, size_t count = 1) { m_Instructions.erase(idx, count); }
Operation &GetInstruction(size_t idx) { return m_Instructions[idx]; };
Declaration &GetDeclaration(size_t idx) { return m_Declarations[idx]; };
private:
bytebuf &m_OutBlob;
bool m_SM51 = false;
@@ -304,6 +304,15 @@ bool Program::Valid(const byte *bytes, size_t length)
ptr + offsetof(ProgramHeader, DxilMagic) + header->BitcodeOffset, header->BitcodeSize);
}
const Metadata *Program::GetMetadataByName(const rdcstr &name) const
{
for(size_t i = 0; i < m_NamedMeta.size(); i++)
if(m_NamedMeta[i].name == name)
return &m_NamedMeta[i];
return NULL;
}
void ResolveForwardReference(Value &v)
{
if(!v.empty() && v.type == ValueType::Unknown)
@@ -671,6 +671,8 @@ public:
void GetLocals(const DXBC::DXBCContainer *dxbc, size_t instruction, uintptr_t offset,
rdcarray<SourceVariableMapping> &locals) const override;
const Metadata *GetMetadataByName(const rdcstr &name) const;
size_t GetMetadataCount() const { return m_Metadata.size() + m_NamedMeta.size(); }
protected:
void MakeDisassemblyString();
+253 -160
View File
@@ -29,6 +29,51 @@ RD_TEST(D3D12_Overlay_Test, D3D12GraphicsTest)
static constexpr const char *Description =
"Makes a couple of draws that show off all the overlays in some way";
std::string vertexEndPosVert = R"EOSHADER(
struct vertin
{
float3 pos : POSITION;
float4 col : COLOR0;
float2 uv : TEXCOORD0;
};
struct v2f
{
float4 col : COLOR0;
float2 uv : TEXCOORD0;
float4 pos : SV_POSITION;
};
v2f main(vertin IN)
{
v2f OUT = (v2f)0;
OUT.pos = float4(IN.pos.xyz, 1);
OUT.col = IN.col;
OUT.uv = IN.uv;
return OUT;
}
)EOSHADER";
std::string vertexEndPosPixel = R"EOSHADER(
struct v2f
{
float4 col : COLOR0;
float2 uv : TEXCOORD0;
float4 pos : SV_POSITION;
};
float4 main(v2f IN) : SV_Target0
{
return IN.col;
}
)EOSHADER";
std::string whitePixel = R"EOSHADER(
float4 main() : SV_Target0
@@ -44,9 +89,23 @@ float4 main() : SV_Target0
if(!Init())
return 3;
ID3DBlobPtr vsblob = Compile(D3DDefaultVertex, "main", "vs_4_0");
ID3DBlobPtr psblob = Compile(D3DDefaultPixel, "main", "ps_4_0");
ID3DBlobPtr whitepsblob = Compile(whitePixel, "main", "ps_4_0");
ID3DBlobPtr vsblob[3] = {};
ID3DBlobPtr psblob[3] = {};
ID3DBlobPtr whitepsblob[3] = {};
{
int i = 0;
for(std::string profile : {"_5_0", "_5_1", "_6_0"})
{
if(i == 2 && !m_DXILSupport)
continue;
vsblob[i] = Compile(vertexEndPosVert, "main", "vs" + profile);
psblob[i] = Compile(vertexEndPosPixel, "main", "ps" + profile);
whitepsblob[i] = Compile(whitePixel, "main", "ps" + profile);
i++;
}
}
const DefaultA2V VBData[] = {
// this triangle occludes in depth
@@ -137,62 +196,70 @@ float4 main() : SV_Target0
DXGI_SAMPLE_DESC noMSAA = {1, 0};
DXGI_SAMPLE_DESC yesMSAA = {4, qual};
D3D12PSOCreator creator = MakePSO().RootSig(sig).InputLayout().VS(vsblob).PS(psblob).DSV(
DXGI_FORMAT_D32_FLOAT_S8X24_UINT);
ID3D12PipelineStatePtr depthWritePipe[3][2];
ID3D12PipelineStatePtr stencilWritePipe[3][2];
ID3D12PipelineStatePtr backgroundPipe[3][2];
ID3D12PipelineStatePtr pipe[3][2];
ID3D12PipelineStatePtr whitepipe[3];
creator.GraphicsDesc.RasterizerState.CullMode = D3D12_CULL_MODE_BACK;
creator.GraphicsDesc.RasterizerState.DepthClipEnable = TRUE;
for(int i = 0; i < 3; i++)
{
if(vsblob[i] == NULL)
continue;
creator.GraphicsDesc.DepthStencilState.DepthEnable = TRUE;
creator.GraphicsDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL;
creator.GraphicsDesc.DepthStencilState.StencilEnable = FALSE;
creator.GraphicsDesc.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS;
creator.GraphicsDesc.DepthStencilState.FrontFace.StencilPassOp = D3D12_STENCIL_OP_REPLACE;
creator.GraphicsDesc.DepthStencilState.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_KEEP;
creator.GraphicsDesc.DepthStencilState.FrontFace.StencilFailOp = D3D12_STENCIL_OP_KEEP;
creator.GraphicsDesc.DepthStencilState.BackFace =
creator.GraphicsDesc.DepthStencilState.FrontFace;
creator.GraphicsDesc.DepthStencilState.StencilReadMask = 0xff;
creator.GraphicsDesc.DepthStencilState.StencilWriteMask = 0xff;
D3D12PSOCreator creator = MakePSO().RootSig(sig).InputLayout().VS(vsblob[i]).PS(psblob[i]).DSV(
DXGI_FORMAT_D32_FLOAT_S8X24_UINT);
creator.GraphicsDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS;
ID3D12PipelineStatePtr depthWritePipe[2];
creator.GraphicsDesc.SampleDesc = noMSAA;
depthWritePipe[0] = creator;
creator.GraphicsDesc.SampleDesc = yesMSAA;
depthWritePipe[1] = creator;
creator.GraphicsDesc.RasterizerState.CullMode = D3D12_CULL_MODE_BACK;
creator.GraphicsDesc.RasterizerState.DepthClipEnable = TRUE;
creator.GraphicsDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL;
creator.GraphicsDesc.DepthStencilState.StencilEnable = TRUE;
creator.GraphicsDesc.DepthStencilState.DepthEnable = TRUE;
creator.GraphicsDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL;
creator.GraphicsDesc.DepthStencilState.StencilEnable = FALSE;
creator.GraphicsDesc.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS;
creator.GraphicsDesc.DepthStencilState.FrontFace.StencilPassOp = D3D12_STENCIL_OP_REPLACE;
creator.GraphicsDesc.DepthStencilState.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_KEEP;
creator.GraphicsDesc.DepthStencilState.FrontFace.StencilFailOp = D3D12_STENCIL_OP_KEEP;
creator.GraphicsDesc.DepthStencilState.BackFace =
creator.GraphicsDesc.DepthStencilState.FrontFace;
creator.GraphicsDesc.DepthStencilState.StencilReadMask = 0xff;
creator.GraphicsDesc.DepthStencilState.StencilWriteMask = 0xff;
ID3D12PipelineStatePtr stencilWritePipe[2];
creator.GraphicsDesc.SampleDesc = noMSAA;
stencilWritePipe[0] = creator;
creator.GraphicsDesc.SampleDesc = yesMSAA;
stencilWritePipe[1] = creator;
creator.GraphicsDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS;
creator.GraphicsDesc.SampleDesc = noMSAA;
depthWritePipe[i][0] = creator;
creator.GraphicsDesc.SampleDesc = yesMSAA;
depthWritePipe[i][1] = creator;
creator.GraphicsDesc.DepthStencilState.StencilEnable = FALSE;
ID3D12PipelineStatePtr backgroundPipe[2];
creator.GraphicsDesc.SampleDesc = noMSAA;
backgroundPipe[0] = creator;
creator.GraphicsDesc.SampleDesc = yesMSAA;
backgroundPipe[1] = creator;
creator.GraphicsDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL;
creator.GraphicsDesc.DepthStencilState.StencilEnable = TRUE;
creator.GraphicsDesc.DepthStencilState.StencilEnable = TRUE;
creator.GraphicsDesc.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_GREATER;
ID3D12PipelineStatePtr pipe[2];
creator.GraphicsDesc.SampleDesc = noMSAA;
pipe[0] = creator;
creator.GraphicsDesc.SampleDesc = yesMSAA;
pipe[1] = creator;
creator.GraphicsDesc.SampleDesc = noMSAA;
stencilWritePipe[i][0] = creator;
creator.GraphicsDesc.SampleDesc = yesMSAA;
stencilWritePipe[i][1] = creator;
creator.GraphicsDesc.DepthStencilState.StencilEnable = FALSE;
creator.GraphicsDesc.DepthStencilState.DepthEnable = FALSE;
creator.GraphicsDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS;
creator.PS(whitepsblob);
creator.DSV(DXGI_FORMAT_UNKNOWN);
creator.GraphicsDesc.SampleDesc = noMSAA;
ID3D12PipelineStatePtr whitepipe = creator;
creator.GraphicsDesc.DepthStencilState.StencilEnable = FALSE;
creator.GraphicsDesc.SampleDesc = noMSAA;
backgroundPipe[i][0] = creator;
creator.GraphicsDesc.SampleDesc = yesMSAA;
backgroundPipe[i][1] = creator;
creator.GraphicsDesc.DepthStencilState.StencilEnable = TRUE;
creator.GraphicsDesc.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_GREATER;
creator.GraphicsDesc.SampleDesc = noMSAA;
pipe[i][0] = creator;
creator.GraphicsDesc.SampleDesc = yesMSAA;
pipe[i][1] = creator;
creator.GraphicsDesc.DepthStencilState.StencilEnable = FALSE;
creator.GraphicsDesc.DepthStencilState.DepthEnable = FALSE;
creator.GraphicsDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS;
creator.PS(whitepsblob[i]);
creator.DSV(DXGI_FORMAT_UNKNOWN);
creator.GraphicsDesc.SampleDesc = noMSAA;
whitepipe[i] = creator;
}
ResourceBarrier(vb, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER);
@@ -220,132 +287,158 @@ float4 main() : SV_Target0
while(Running())
{
ID3D12GraphicsCommandListPtr cmd = GetCommandBuffer();
ID3D12ResourcePtr bb;
Reset(cmd);
ID3D12ResourcePtr bb = StartUsingBackbuffer(cmd, D3D12_RESOURCE_STATE_RENDER_TARGET);
for(bool is_msaa : {false, true})
int pass = 0;
for(std::string profile : {"sm5.0", "sm5.1", "sm6.0"})
{
D3D12_CPU_DESCRIPTOR_HANDLE rtv =
MakeRTV(is_msaa ? msaatex : bb).Format(DXGI_FORMAT_R8G8B8A8_UNORM_SRGB).CreateCPU(0);
if(whitepipe[pass] == NULL)
break;
cmd->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
IASetVertexBuffer(cmd, vb, sizeof(DefaultA2V), 0);
cmd->SetGraphicsRootSignature(sig);
RSSetViewport(cmd, {10.0f, 10.0f, (float)screenWidth - 20.0f, (float)screenHeight - 20.0f,
0.0f, 1.0f});
RSSetScissorRect(cmd, {0, 0, screenWidth, screenHeight});
OMSetRenderTargets(cmd, {rtv}, MakeDSV(is_msaa ? msaadsv : dsv).CreateCPU(0));
ClearRenderTargetView(cmd, rtv, {0.2f, 0.2f, 0.2f, 1.0f});
ClearDepthStencilView(cmd, is_msaa ? msaadsv : dsv,
D3D12_CLEAR_FLAG_DEPTH | D3D12_CLEAR_FLAG_STENCIL, 1.0f, 0);
cmd->OMSetStencilRef(0x55);
// draw the setup triangles
cmd->SetPipelineState(depthWritePipe[is_msaa ? 1 : 0]);
cmd->DrawInstanced(3, 1, 0, 0);
cmd->SetPipelineState(stencilWritePipe[is_msaa ? 1 : 0]);
cmd->DrawInstanced(3, 1, 3, 0);
cmd->SetPipelineState(backgroundPipe[is_msaa ? 1 : 0]);
cmd->DrawInstanced(3, 1, 6, 0);
// add a marker so we can easily locate this draw
setMarker(cmd, is_msaa ? "MSAA Test" : "Normal Test");
cmd->SetPipelineState(pipe[is_msaa ? 1 : 0]);
cmd->DrawInstanced(24, 1, 9, 0);
if(!is_msaa)
{
setMarker(cmd, "Viewport Test");
RSSetViewport(cmd, {10.0f, 10.0f, 80.0f, 80.0f, 0.0f, 1.0f});
RSSetScissorRect(cmd, {24, 24, 76, 76});
cmd->SetPipelineState(backgroundPipe[0]);
cmd->DrawInstanced(3, 1, 33, 0);
}
}
D3D12_CPU_DESCRIPTOR_HANDLE subrtv = MakeRTV(subtex)
.Format(DXGI_FORMAT_R8G8B8A8_UNORM_SRGB)
.FirstSlice(2)
.NumSlices(1)
.FirstMip(2)
.NumMips(1)
.CreateCPU(1);
RSSetViewport(cmd, {5.0f, 5.0f, float(screenWidth) / 4.0f - 10.0f,
float(screenHeight) / 4.0f - 10.0f, 0.0f, 1.0f});
RSSetScissorRect(cmd, {0, 0, screenWidth / 4, screenHeight / 4});
OMSetRenderTargets(cmd, {subrtv}, {});
ClearRenderTargetView(cmd, subrtv, {0.0f, 0.0f, 0.0f, 1.0f});
cmd->SetPipelineState(whitepipe);
subrtv = MakeRTV(subtex)
.Format(DXGI_FORMAT_R8G8B8A8_UNORM_SRGB)
.FirstSlice(2)
.NumSlices(1)
.FirstMip(3)
.NumMips(1)
.CreateCPU(1);
setMarker(cmd, "Subresources mip 2");
cmd->DrawInstanced(24, 1, 9, 0);
RSSetViewport(cmd, {2.0f, 2.0f, float(screenWidth / 8) - 4.0f, float(screenHeight / 8) - 4.0f,
0.0f, 1.0f});
RSSetScissorRect(cmd, {0, 0, screenWidth / 8, screenHeight / 8});
OMSetRenderTargets(cmd, {subrtv}, {});
ClearRenderTargetView(cmd, subrtv, {0.0f, 0.0f, 0.0f, 1.0f});
setMarker(cmd, "Subresources mip 3");
cmd->DrawInstanced(24, 1, 9, 0);
cmd->Close();
Submit({cmd});
{
cmd = GetCommandBuffer();
ID3D12GraphicsCommandListPtr cmd = GetCommandBuffer();
Reset(cmd);
D3D12_CPU_DESCRIPTOR_HANDLE rtv =
MakeRTV(bb).Format(DXGI_FORMAT_R8G8B8A8_UNORM_SRGB).CreateCPU(0);
if(pass == 0)
bb = StartUsingBackbuffer(cmd, D3D12_RESOURCE_STATE_RENDER_TARGET);
cmd->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
pushMarker(cmd, profile + " tests");
IASetVertexBuffer(cmd, vb, sizeof(DefaultA2V), 0);
cmd->SetGraphicsRootSignature(sig);
for(bool is_msaa : {false, true})
{
D3D12_CPU_DESCRIPTOR_HANDLE rtv =
MakeRTV(is_msaa ? msaatex : bb).Format(DXGI_FORMAT_R8G8B8A8_UNORM_SRGB).CreateCPU(0);
OMSetRenderTargets(cmd, {rtv}, {});
cmd->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
cmd->SetPipelineState(whitepipe);
IASetVertexBuffer(cmd, vb, sizeof(DefaultA2V), 0);
cmd->SetGraphicsRootSignature(sig);
setMarker(cmd, "NoView draw");
RSSetViewport(cmd, {10.0f, 10.0f, (float)screenWidth - 20.0f, (float)screenHeight - 20.0f,
0.0f, 1.0f});
RSSetScissorRect(cmd, {0, 0, screenWidth, screenHeight});
cmd->DrawInstanced(3, 1, 33, 0);
OMSetRenderTargets(cmd, {rtv}, MakeDSV(is_msaa ? msaadsv : dsv).CreateCPU(0));
ClearRenderTargetView(cmd, rtv, {0.2f, 0.2f, 0.2f, 1.0f});
ClearDepthStencilView(cmd, is_msaa ? msaadsv : dsv,
D3D12_CLEAR_FLAG_DEPTH | D3D12_CLEAR_FLAG_STENCIL, 1.0f, 0);
cmd->OMSetStencilRef(0x55);
// draw the setup triangles
cmd->SetPipelineState(depthWritePipe[pass][is_msaa ? 1 : 0]);
cmd->DrawInstanced(3, 1, 0, 0);
cmd->SetPipelineState(stencilWritePipe[pass][is_msaa ? 1 : 0]);
cmd->DrawInstanced(3, 1, 3, 0);
cmd->SetPipelineState(backgroundPipe[pass][is_msaa ? 1 : 0]);
cmd->DrawInstanced(3, 1, 6, 0);
// add a marker so we can easily locate this draw
setMarker(cmd, is_msaa ? "MSAA Test" : "Normal Test");
cmd->SetPipelineState(pipe[pass][is_msaa ? 1 : 0]);
cmd->DrawInstanced(24, 1, 9, 0);
if(!is_msaa)
{
setMarker(cmd, "Viewport Test");
RSSetViewport(cmd, {10.0f, 10.0f, 80.0f, 80.0f, 0.0f, 1.0f});
RSSetScissorRect(cmd, {24, 24, 76, 76});
cmd->SetPipelineState(backgroundPipe[pass][0]);
cmd->DrawInstanced(3, 1, 33, 0);
}
}
D3D12_CPU_DESCRIPTOR_HANDLE subrtv = MakeRTV(subtex)
.Format(DXGI_FORMAT_R8G8B8A8_UNORM_SRGB)
.FirstSlice(2)
.NumSlices(1)
.FirstMip(2)
.NumMips(1)
.CreateCPU(1);
RSSetViewport(cmd, {5.0f, 5.0f, float(screenWidth) / 4.0f - 10.0f,
float(screenHeight) / 4.0f - 10.0f, 0.0f, 1.0f});
RSSetScissorRect(cmd, {0, 0, screenWidth / 4, screenHeight / 4});
OMSetRenderTargets(cmd, {subrtv}, {});
ClearRenderTargetView(cmd, subrtv, {0.0f, 0.0f, 0.0f, 1.0f});
cmd->SetPipelineState(whitepipe[pass]);
subrtv = MakeRTV(subtex)
.Format(DXGI_FORMAT_R8G8B8A8_UNORM_SRGB)
.FirstSlice(2)
.NumSlices(1)
.FirstMip(3)
.NumMips(1)
.CreateCPU(1);
setMarker(cmd, "Subresources mip 2");
cmd->DrawInstanced(24, 1, 9, 0);
RSSetViewport(cmd, {2.0f, 2.0f, float(screenWidth / 8) - 4.0f,
float(screenHeight / 8) - 4.0f, 0.0f, 1.0f});
RSSetScissorRect(cmd, {0, 0, screenWidth / 8, screenHeight / 8});
OMSetRenderTargets(cmd, {subrtv}, {});
ClearRenderTargetView(cmd, subrtv, {0.0f, 0.0f, 0.0f, 1.0f});
setMarker(cmd, "Subresources mip 3");
cmd->DrawInstanced(24, 1, 9, 0);
cmd->Close();
Submit({cmd});
{
cmd = GetCommandBuffer();
Reset(cmd);
D3D12_CPU_DESCRIPTOR_HANDLE rtv =
MakeRTV(bb).Format(DXGI_FORMAT_R8G8B8A8_UNORM_SRGB).CreateCPU(0);
cmd->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
IASetVertexBuffer(cmd, vb, sizeof(DefaultA2V), 0);
cmd->SetGraphicsRootSignature(sig);
OMSetRenderTargets(cmd, {rtv}, {});
cmd->SetPipelineState(whitepipe[0]);
setMarker(cmd, "NoView draw");
cmd->DrawInstanced(3, 1, 33, 0);
popMarker(cmd);
cmd->Close();
}
Submit({cmd});
pass++;
}
{
ID3D12GraphicsCommandListPtr cmd = GetCommandBuffer();
Reset(cmd);
FinishUsingBackbuffer(cmd, D3D12_RESOURCE_STATE_RENDER_TARGET);
cmd->Close();
}
Submit({cmd});
Submit({cmd});
}
Present();
}
+6 -6
View File
@@ -6,7 +6,7 @@ import rdtest
class Overlay_Test(rdtest.TestCase):
internal = True
def check_capture(self):
def check_capture(self, base_event=0):
out: rd.ReplayOutput = self.controller.CreateOutput(rd.CreateHeadlessWindowingData(100, 100), rd.ReplayOutputType.Texture)
self.check(out is not None)
@@ -17,9 +17,9 @@ class Overlay_Test(rdtest.TestCase):
for is_msaa in [False, True]:
if is_msaa:
test_marker: rd.ActionDescription = self.find_action("MSAA Test")
test_marker: rd.ActionDescription = self.find_action("MSAA Test", base_event)
else:
test_marker: rd.ActionDescription = self.find_action("Normal Test")
test_marker: rd.ActionDescription = self.find_action("Normal Test", base_event)
self.controller.SetFrameEvent(test_marker.next.eventId, True)
@@ -388,7 +388,7 @@ class Overlay_Test(rdtest.TestCase):
rdtest.log.success("All normal overlays are as expected")
# Check the viewport overlay especially
view_marker: rd.ActionDescription = self.find_action("Viewport Test")
view_marker: rd.ActionDescription = self.find_action("Viewport Test", base_event)
self.controller.SetFrameEvent(view_marker.next.eventId, True)
@@ -506,7 +506,7 @@ class Overlay_Test(rdtest.TestCase):
rdtest.log.success("Overlays are as expected around viewport/scissor behaviour")
test_marker: rd.ActionDescription = self.find_action("Normal Test")
test_marker: rd.ActionDescription = self.find_action("Normal Test", base_event)
# Now check clear-before-X by hand, for colour and for depth
self.controller.SetFrameEvent(test_marker.next.eventId, True)
@@ -610,7 +610,7 @@ class Overlay_Test(rdtest.TestCase):
# Now test overlays on a render-to-slice/mip case
for mip in [2, 3]:
sub_marker: rd.ActionDescription = self.find_action("Subresources mip {}".format(mip))
sub_marker: rd.ActionDescription = self.find_action("Subresources mip {}".format(mip), base_event)
self.controller.SetFrameEvent(sub_marker.next.eventId, True)
+37 -23
View File
@@ -7,39 +7,53 @@ class D3D12_Overlay_Test(rdtest.Overlay_Test):
internal = False
def check_capture(self):
super(D3D12_Overlay_Test, self).check_capture()
out: rd.ReplayOutput = self.controller.CreateOutput(rd.CreateHeadlessWindowingData(100, 100), rd.ReplayOutputType.Texture)
# Don't check any pixel values, but ensure all overlays at least work with no viewport/scissor bound
sub_marker: rd.ActionDescription = self.find_action("NoView draw")
self.controller.SetFrameEvent(sub_marker.next.eventId, True)
for base_event_name in ["sm5.0", "sm5.1", "sm6.0"]:
base = self.find_action(base_event_name)
pipe: rd.PipeState = self.controller.GetPipelineState()
tex = rd.TextureDisplay()
tex.resourceId = pipe.GetOutputTargets()[0].resourceId
for overlay in rd.DebugOverlay:
if overlay == rd.DebugOverlay.NoOverlay:
if base is None:
continue
# These overlays are just displaymodes really, not actually separate overlays
if overlay == rd.DebugOverlay.NaN or overlay == rd.DebugOverlay.Clipping:
continue
base_event = base.eventId
if overlay == rd.DebugOverlay.ClearBeforeDraw or overlay == rd.DebugOverlay.ClearBeforePass:
continue
rdtest.log.print("Checking tests on {}".format(base_event_name))
rdtest.log.success("Checking overlay {} with no viewport/scissor".format(str(overlay)))
super(D3D12_Overlay_Test, self).check_capture(base_event)
tex.overlay = overlay
out.SetTextureDisplay(tex)
rdtest.log.success("Base tests worked on {}".format(base_event_name))
out.Display()
# Don't check any pixel values, but ensure all overlays at least work with no viewport/scissor bound
sub_marker: rd.ActionDescription = self.find_action("NoView draw", base_event)
self.controller.SetFrameEvent(sub_marker.next.eventId, True)
overlay_id: rd.ResourceId = out.GetDebugOverlayTexID()
pipe: rd.PipeState = self.controller.GetPipelineState()
rdtest.log.success("Overlay {} rendered with no viewport/scissor".format(str(overlay)))
tex = rd.TextureDisplay()
tex.resourceId = pipe.GetOutputTargets()[0].resourceId
for overlay in rd.DebugOverlay:
if overlay == rd.DebugOverlay.NoOverlay:
continue
# These overlays are just displaymodes really, not actually separate overlays
if overlay == rd.DebugOverlay.NaN or overlay == rd.DebugOverlay.Clipping:
continue
if overlay == rd.DebugOverlay.ClearBeforeDraw or overlay == rd.DebugOverlay.ClearBeforePass:
continue
rdtest.log.success("Checking overlay {} with no viewport/scissor".format(str(overlay)))
tex.overlay = overlay
out.SetTextureDisplay(tex)
out.Display()
overlay_id: rd.ResourceId = out.GetDebugOverlayTexID()
rdtest.log.success("Overlay {} rendered with no viewport/scissor".format(str(overlay)))
rdtest.log.success("extended tests worked on {}".format(base_event_name))
out.Shutdown()