mirror of
https://github.com/baldurk/renderdoc.git
synced 2026-05-05 09:30:44 +00:00
2042 lines
68 KiB
C++
2042 lines
68 KiB
C++
/******************************************************************************
|
|
* The MIT License (MIT)
|
|
*
|
|
* Copyright (c) 2015-2018 Baldur Karlsson
|
|
* Copyright (c) 2014 Crytek
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
* THE SOFTWARE.
|
|
******************************************************************************/
|
|
|
|
#include "data/resource.h"
|
|
#include "driver/d3d11/d3d11_renderstate.h"
|
|
#include "driver/d3d11/d3d11_resources.h"
|
|
#include "driver/shaders/dxbc/dxbc_debug.h"
|
|
#include "maths/formatpacking.h"
|
|
#include "maths/vec.h"
|
|
#include "strings/string_utils.h"
|
|
#include "d3d11_context.h"
|
|
#include "d3d11_debug.h"
|
|
#include "d3d11_manager.h"
|
|
#include "d3d11_shader_cache.h"
|
|
|
|
// over this number of cycles and things get problematic
|
|
#define SHADER_DEBUG_WARN_THRESHOLD 100000
|
|
|
|
bool PromptDebugTimeout(DXBC::ProgramType prog, uint32_t cycleCounter)
|
|
{
|
|
string msg = StringFormat::Fmt(
|
|
"RenderDoc's shader debugging has been running for over %u cycles, which indicates either a "
|
|
"very long-running loop, or possibly an infinite loop. Continuing could lead to extreme "
|
|
"memory allocations, slow UI or even crashes. Would you like to abort debugging to see what "
|
|
"has run so far?\n\n"
|
|
"Hit yes to abort debugging. Note that loading the resulting trace could take several "
|
|
"minutes.",
|
|
cycleCounter);
|
|
|
|
int ret = MessageBoxA(NULL, msg.c_str(), "Shader debugging timeout", MB_YESNO | MB_ICONWARNING);
|
|
|
|
if(ret == IDYES)
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
ShaderDebug::State D3D11DebugManager::CreateShaderDebugState(ShaderDebugTrace &trace, int quadIdx,
|
|
DXBC::DXBCFile *dxbc, bytebuf *cbufData)
|
|
{
|
|
using namespace DXBC;
|
|
using namespace ShaderDebug;
|
|
|
|
State initialState = State(quadIdx, &trace, dxbc, m_pDevice);
|
|
|
|
// use pixel shader here to get inputs
|
|
|
|
int32_t maxReg = -1;
|
|
for(size_t i = 0; i < dxbc->m_InputSig.size(); i++)
|
|
maxReg = RDCMAX(maxReg, (int32_t)dxbc->m_InputSig[i].regIndex);
|
|
|
|
bool inputCoverage = false;
|
|
|
|
for(size_t i = 0; i < dxbc->GetNumDeclarations(); i++)
|
|
{
|
|
const DXBC::ASMDecl &decl = dxbc->GetDeclaration(i);
|
|
|
|
if(decl.declaration == OPCODE_DCL_INPUT && decl.operand.type == TYPE_INPUT_COVERAGE_MASK)
|
|
{
|
|
inputCoverage = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if(maxReg >= 0 || inputCoverage)
|
|
{
|
|
trace.inputs.resize(maxReg + 1 + (inputCoverage ? 1 : 0));
|
|
for(size_t i = 0; i < dxbc->m_InputSig.size(); i++)
|
|
{
|
|
SigParameter &sig = dxbc->m_InputSig[i];
|
|
|
|
ShaderVariable v;
|
|
|
|
v.name = StringFormat::Fmt("v%d (%s)", sig.regIndex, sig.semanticIdxName.c_str());
|
|
v.rows = 1;
|
|
v.columns = sig.regChannelMask & 0x8 ? 4 : sig.regChannelMask & 0x4
|
|
? 3
|
|
: sig.regChannelMask & 0x2
|
|
? 2
|
|
: sig.regChannelMask & 0x1 ? 1 : 0;
|
|
|
|
if(sig.compType == CompType::UInt)
|
|
v.type = VarType::UInt;
|
|
else if(sig.compType == CompType::SInt)
|
|
v.type = VarType::Int;
|
|
|
|
if(trace.inputs[sig.regIndex].columns == 0)
|
|
trace.inputs[sig.regIndex] = v;
|
|
else
|
|
trace.inputs[sig.regIndex].columns = RDCMAX(trace.inputs[sig.regIndex].columns, v.columns);
|
|
}
|
|
|
|
if(inputCoverage)
|
|
{
|
|
trace.inputs[maxReg + 1] = ShaderVariable("vCoverage", 0U, 0U, 0U, 0U);
|
|
trace.inputs[maxReg + 1].columns = 1;
|
|
}
|
|
}
|
|
|
|
uint32_t specialOutputs = 0;
|
|
maxReg = -1;
|
|
for(size_t i = 0; i < dxbc->m_OutputSig.size(); i++)
|
|
{
|
|
if(dxbc->m_OutputSig[i].regIndex == ~0U)
|
|
specialOutputs++;
|
|
else
|
|
maxReg = RDCMAX(maxReg, (int32_t)dxbc->m_OutputSig[i].regIndex);
|
|
}
|
|
|
|
if(maxReg >= 0 || specialOutputs > 0)
|
|
{
|
|
initialState.outputs.resize(maxReg + 1 + specialOutputs);
|
|
for(size_t i = 0; i < dxbc->m_OutputSig.size(); i++)
|
|
{
|
|
SigParameter &sig = dxbc->m_OutputSig[i];
|
|
|
|
if(sig.regIndex == ~0U)
|
|
continue;
|
|
|
|
ShaderVariable v;
|
|
|
|
v.name = StringFormat::Fmt("o%d (%s)", sig.regIndex, sig.semanticIdxName.c_str());
|
|
v.rows = 1;
|
|
v.columns = sig.regChannelMask & 0x8 ? 4 : sig.regChannelMask & 0x4
|
|
? 3
|
|
: sig.regChannelMask & 0x2
|
|
? 2
|
|
: sig.regChannelMask & 0x1 ? 1 : 0;
|
|
|
|
if(initialState.outputs[sig.regIndex].columns == 0)
|
|
initialState.outputs[sig.regIndex] = v;
|
|
else
|
|
initialState.outputs[sig.regIndex].columns =
|
|
RDCMAX(initialState.outputs[sig.regIndex].columns, v.columns);
|
|
}
|
|
|
|
int32_t outIdx = maxReg + 1;
|
|
|
|
for(size_t i = 0; i < dxbc->m_OutputSig.size(); i++)
|
|
{
|
|
SigParameter &sig = dxbc->m_OutputSig[i];
|
|
|
|
if(sig.regIndex != ~0U)
|
|
continue;
|
|
|
|
ShaderVariable v;
|
|
|
|
if(sig.systemValue == ShaderBuiltin::OutputControlPointIndex)
|
|
v.name = "vOutputControlPointID";
|
|
else if(sig.systemValue == ShaderBuiltin::DepthOutput)
|
|
v.name = "oDepth";
|
|
else if(sig.systemValue == ShaderBuiltin::DepthOutputLessEqual)
|
|
v.name = "oDepthLessEqual";
|
|
else if(sig.systemValue == ShaderBuiltin::DepthOutputGreaterEqual)
|
|
v.name = "oDepthGreaterEqual";
|
|
else if(sig.systemValue == ShaderBuiltin::MSAACoverage)
|
|
v.name = "oMask";
|
|
// if(sig.systemValue == TYPE_OUTPUT_CONTROL_POINT) str = "oOutputControlPoint";
|
|
else
|
|
{
|
|
RDCERR("Unhandled output: %s (%d)", sig.semanticName.c_str(), sig.systemValue);
|
|
continue;
|
|
}
|
|
|
|
v.rows = 1;
|
|
v.columns = sig.regChannelMask & 0x8 ? 4 : sig.regChannelMask & 0x4
|
|
? 3
|
|
: sig.regChannelMask & 0x2
|
|
? 2
|
|
: sig.regChannelMask & 0x1 ? 1 : 0;
|
|
|
|
initialState.outputs[outIdx++] = v;
|
|
}
|
|
}
|
|
|
|
trace.constantBlocks.resize(dxbc->m_CBuffers.size());
|
|
for(size_t i = 0; i < dxbc->m_CBuffers.size(); i++)
|
|
{
|
|
if(dxbc->m_CBuffers[i].descriptor.type != CBuffer::Descriptor::TYPE_CBUFFER)
|
|
continue;
|
|
|
|
std::vector<ShaderVariable> vars;
|
|
|
|
size_t dummy = 0;
|
|
FillCBufferVariables("", dummy, dxbc->m_CBuffers[i].variables, vars, true,
|
|
cbufData[dxbc->m_CBuffers[i].reg]);
|
|
|
|
trace.constantBlocks[i].members = vars;
|
|
|
|
for(size_t c = 0; c < trace.constantBlocks[i].members.size(); c++)
|
|
trace.constantBlocks[i].members[c].name =
|
|
StringFormat::Fmt("cb%u[%u] (%s)", dxbc->m_CBuffers[i].reg, (uint32_t)c,
|
|
trace.constantBlocks[i].members[c].name.c_str());
|
|
}
|
|
|
|
initialState.Init();
|
|
|
|
return initialState;
|
|
}
|
|
|
|
void D3D11DebugManager::CreateShaderGlobalState(ShaderDebug::GlobalState &global,
|
|
DXBC::DXBCFile *dxbc, uint32_t UAVStartSlot,
|
|
ID3D11UnorderedAccessView **UAVs,
|
|
ID3D11ShaderResourceView **SRVs)
|
|
{
|
|
for(int i = 0; UAVs != NULL && i + UAVStartSlot < D3D11_1_UAV_SLOT_COUNT; i++)
|
|
{
|
|
int dsti = i + UAVStartSlot;
|
|
if(UAVs[i])
|
|
{
|
|
ID3D11Resource *res = NULL;
|
|
UAVs[i]->GetResource(&res);
|
|
|
|
global.uavs[dsti].hiddenCounter = GetStructCount(UAVs[i]);
|
|
|
|
D3D11_UNORDERED_ACCESS_VIEW_DESC udesc;
|
|
UAVs[i]->GetDesc(&udesc);
|
|
|
|
DXGI_FORMAT format = DXGI_FORMAT_UNKNOWN;
|
|
|
|
if(udesc.Format != DXGI_FORMAT_UNKNOWN)
|
|
format = udesc.Format;
|
|
|
|
if(format == DXGI_FORMAT_UNKNOWN)
|
|
{
|
|
if(WrappedID3D11Texture1D::IsAlloc(res))
|
|
{
|
|
D3D11_TEXTURE1D_DESC desc;
|
|
((WrappedID3D11Texture1D *)res)->GetDesc(&desc);
|
|
format = desc.Format;
|
|
}
|
|
else if(WrappedID3D11Texture2D1::IsAlloc(res))
|
|
{
|
|
D3D11_TEXTURE2D_DESC desc;
|
|
((WrappedID3D11Texture2D1 *)res)->GetDesc(&desc);
|
|
format = desc.Format;
|
|
}
|
|
else if(WrappedID3D11Texture3D1::IsAlloc(res))
|
|
{
|
|
D3D11_TEXTURE3D_DESC desc;
|
|
((WrappedID3D11Texture3D1 *)res)->GetDesc(&desc);
|
|
format = desc.Format;
|
|
}
|
|
}
|
|
|
|
if(format != DXGI_FORMAT_UNKNOWN)
|
|
{
|
|
ResourceFormat fmt = MakeResourceFormat(GetTypedFormat(udesc.Format));
|
|
|
|
global.uavs[dsti].format.byteWidth = fmt.compByteWidth;
|
|
global.uavs[dsti].format.numComps = fmt.compCount;
|
|
global.uavs[dsti].format.fmt = fmt.compType;
|
|
|
|
if(udesc.Format == DXGI_FORMAT_R11G11B10_FLOAT)
|
|
global.uavs[dsti].format.byteWidth = 11;
|
|
if(udesc.Format == DXGI_FORMAT_R10G10B10A2_UINT ||
|
|
udesc.Format == DXGI_FORMAT_R10G10B10A2_UNORM)
|
|
global.uavs[dsti].format.byteWidth = 10;
|
|
}
|
|
|
|
if(udesc.ViewDimension == D3D11_UAV_DIMENSION_BUFFER)
|
|
{
|
|
global.uavs[dsti].firstElement = udesc.Buffer.FirstElement;
|
|
global.uavs[dsti].numElements = udesc.Buffer.NumElements;
|
|
}
|
|
|
|
if(res)
|
|
{
|
|
if(WrappedID3D11Buffer::IsAlloc(res))
|
|
{
|
|
GetBufferData((ID3D11Buffer *)res, 0, 0, global.uavs[dsti].data);
|
|
}
|
|
else
|
|
{
|
|
global.uavs[dsti].tex = true;
|
|
|
|
uint32_t &rowPitch = global.uavs[dsti].rowPitch;
|
|
uint32_t &depthPitch = global.uavs[dsti].depthPitch;
|
|
|
|
bytebuf &data = global.uavs[dsti].data;
|
|
|
|
if(udesc.ViewDimension == D3D11_UAV_DIMENSION_TEXTURE1D ||
|
|
udesc.ViewDimension == D3D11_UAV_DIMENSION_TEXTURE1DARRAY)
|
|
{
|
|
D3D11_TEXTURE1D_DESC desc;
|
|
((WrappedID3D11Texture1D *)res)->GetDesc(&desc);
|
|
|
|
desc.MiscFlags = 0;
|
|
desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
|
|
desc.BindFlags = 0;
|
|
desc.Usage = D3D11_USAGE_STAGING;
|
|
|
|
ID3D11Texture1D *stagingTex = NULL;
|
|
m_pDevice->CreateTexture1D(&desc, NULL, &stagingTex);
|
|
|
|
m_pImmediateContext->CopyResource(stagingTex, res);
|
|
|
|
D3D11_MAPPED_SUBRESOURCE mapped;
|
|
m_pImmediateContext->Map(stagingTex, udesc.Texture1D.MipSlice, D3D11_MAP_READ, 0,
|
|
&mapped);
|
|
|
|
rowPitch = 0;
|
|
depthPitch = 0;
|
|
size_t datasize = GetByteSize(desc.Width, 1, 1, desc.Format, udesc.Texture1D.MipSlice);
|
|
|
|
uint32_t numSlices = 1;
|
|
|
|
byte *srcdata = (byte *)mapped.pData;
|
|
if(udesc.ViewDimension == D3D11_UAV_DIMENSION_TEXTURE1DARRAY)
|
|
{
|
|
rowPitch = mapped.RowPitch;
|
|
srcdata += udesc.Texture1DArray.FirstArraySlice * rowPitch;
|
|
numSlices = udesc.Texture1DArray.ArraySize;
|
|
datasize = numSlices * rowPitch;
|
|
}
|
|
|
|
data.resize(datasize);
|
|
|
|
// copy with all padding etc intact
|
|
memcpy(&data[0], srcdata, datasize);
|
|
|
|
m_pImmediateContext->Unmap(stagingTex, udesc.Texture1D.MipSlice);
|
|
|
|
SAFE_RELEASE(stagingTex);
|
|
}
|
|
else if(udesc.ViewDimension == D3D11_UAV_DIMENSION_TEXTURE2D ||
|
|
udesc.ViewDimension == D3D11_UAV_DIMENSION_TEXTURE2DARRAY)
|
|
{
|
|
D3D11_TEXTURE2D_DESC desc;
|
|
((WrappedID3D11Texture2D1 *)res)->GetDesc(&desc);
|
|
|
|
desc.MiscFlags = 0;
|
|
desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
|
|
desc.BindFlags = 0;
|
|
desc.Usage = D3D11_USAGE_STAGING;
|
|
|
|
ID3D11Texture2D *stagingTex = NULL;
|
|
m_pDevice->CreateTexture2D(&desc, NULL, &stagingTex);
|
|
|
|
m_pImmediateContext->CopyResource(stagingTex, res);
|
|
|
|
// MipSlice in union is shared between Texture2D and Texture2DArray unions, so safe to
|
|
// use either
|
|
D3D11_MAPPED_SUBRESOURCE mapped;
|
|
m_pImmediateContext->Map(stagingTex, udesc.Texture2D.MipSlice, D3D11_MAP_READ, 0,
|
|
&mapped);
|
|
|
|
rowPitch = mapped.RowPitch;
|
|
depthPitch = 0;
|
|
size_t datasize = rowPitch * RDCMAX(1U, desc.Height >> udesc.Texture2D.MipSlice);
|
|
|
|
uint32_t numSlices = 1;
|
|
|
|
byte *srcdata = (byte *)mapped.pData;
|
|
if(udesc.ViewDimension == D3D11_UAV_DIMENSION_TEXTURE2DARRAY)
|
|
{
|
|
depthPitch = mapped.DepthPitch;
|
|
srcdata += udesc.Texture2DArray.FirstArraySlice * depthPitch;
|
|
numSlices = udesc.Texture2DArray.ArraySize;
|
|
datasize = numSlices * depthPitch;
|
|
}
|
|
|
|
// copy with all padding etc intact
|
|
data.resize(datasize);
|
|
|
|
memcpy(&data[0], srcdata, datasize);
|
|
|
|
m_pImmediateContext->Unmap(stagingTex, udesc.Texture2D.MipSlice);
|
|
|
|
SAFE_RELEASE(stagingTex);
|
|
}
|
|
else if(udesc.ViewDimension == D3D11_UAV_DIMENSION_TEXTURE3D)
|
|
{
|
|
D3D11_TEXTURE3D_DESC desc;
|
|
((WrappedID3D11Texture3D1 *)res)->GetDesc(&desc);
|
|
|
|
desc.MiscFlags = 0;
|
|
desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
|
|
desc.BindFlags = 0;
|
|
desc.Usage = D3D11_USAGE_STAGING;
|
|
|
|
ID3D11Texture3D *stagingTex = NULL;
|
|
m_pDevice->CreateTexture3D(&desc, NULL, &stagingTex);
|
|
|
|
m_pImmediateContext->CopyResource(stagingTex, res);
|
|
|
|
// MipSlice in union is shared between Texture2D and Texture2DArray unions, so safe to
|
|
// use either
|
|
D3D11_MAPPED_SUBRESOURCE mapped;
|
|
m_pImmediateContext->Map(stagingTex, udesc.Texture3D.MipSlice, D3D11_MAP_READ, 0,
|
|
&mapped);
|
|
|
|
rowPitch = mapped.RowPitch;
|
|
depthPitch = mapped.DepthPitch;
|
|
|
|
byte *srcdata = (byte *)mapped.pData;
|
|
srcdata += udesc.Texture3D.FirstWSlice * mapped.DepthPitch;
|
|
uint32_t numSlices = udesc.Texture3D.WSize;
|
|
size_t datasize = depthPitch * numSlices;
|
|
|
|
data.resize(datasize);
|
|
|
|
// copy with all padding etc intact
|
|
memcpy(&data[0], srcdata, datasize);
|
|
|
|
m_pImmediateContext->Unmap(stagingTex, udesc.Texture3D.MipSlice);
|
|
|
|
SAFE_RELEASE(stagingTex);
|
|
}
|
|
}
|
|
}
|
|
|
|
SAFE_RELEASE(res);
|
|
}
|
|
}
|
|
|
|
for(int i = 0; SRVs != NULL && i < D3D11_COMMONSHADER_INPUT_RESOURCE_SLOT_COUNT; i++)
|
|
{
|
|
if(SRVs[i])
|
|
{
|
|
ID3D11Resource *res = NULL;
|
|
SRVs[i]->GetResource(&res);
|
|
|
|
D3D11_SHADER_RESOURCE_VIEW_DESC sdesc;
|
|
SRVs[i]->GetDesc(&sdesc);
|
|
|
|
if(sdesc.Format != DXGI_FORMAT_UNKNOWN)
|
|
{
|
|
ResourceFormat fmt = MakeResourceFormat(sdesc.Format);
|
|
|
|
global.srvs[i].format.byteWidth = fmt.compByteWidth;
|
|
global.srvs[i].format.numComps = fmt.compCount;
|
|
global.srvs[i].format.fmt = fmt.compType;
|
|
|
|
if(sdesc.Format == DXGI_FORMAT_R11G11B10_FLOAT)
|
|
global.srvs[i].format.byteWidth = 11;
|
|
if(sdesc.Format == DXGI_FORMAT_R10G10B10A2_UINT ||
|
|
sdesc.Format == DXGI_FORMAT_R10G10B10A2_UNORM)
|
|
global.srvs[i].format.byteWidth = 10;
|
|
}
|
|
else
|
|
{
|
|
D3D11_RESOURCE_DIMENSION dim;
|
|
res->GetType(&dim);
|
|
|
|
if(dim == D3D11_RESOURCE_DIMENSION_BUFFER)
|
|
{
|
|
ID3D11Buffer *buf = (ID3D11Buffer *)res;
|
|
D3D11_BUFFER_DESC bufdesc;
|
|
buf->GetDesc(&bufdesc);
|
|
|
|
global.srvs[i].format.stride = bufdesc.StructureByteStride;
|
|
|
|
// if we didn't get a type from the SRV description, try to pull it from the declaration
|
|
for(const DXBC::ShaderInputBind &bind : dxbc->m_SRVs)
|
|
{
|
|
if(bind.reg == (uint32_t)i && bind.dimension == DXBC::ShaderInputBind::DIM_BUFFER &&
|
|
bind.retType < DXBC::RETURN_TYPE_MIXED && bind.retType != DXBC::RETURN_TYPE_UNKNOWN)
|
|
{
|
|
global.srvs[i].format.byteWidth = 4;
|
|
global.srvs[i].format.numComps = bind.numSamples;
|
|
|
|
if(bind.retType == DXBC::RETURN_TYPE_UNORM)
|
|
global.srvs[i].format.fmt = CompType::UNorm;
|
|
else if(bind.retType == DXBC::RETURN_TYPE_SNORM)
|
|
global.srvs[i].format.fmt = CompType::SNorm;
|
|
else if(bind.retType == DXBC::RETURN_TYPE_UINT)
|
|
global.srvs[i].format.fmt = CompType::UInt;
|
|
else if(bind.retType == DXBC::RETURN_TYPE_SINT)
|
|
global.srvs[i].format.fmt = CompType::SInt;
|
|
else
|
|
global.srvs[i].format.fmt = CompType::Float;
|
|
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if(sdesc.ViewDimension == D3D11_SRV_DIMENSION_BUFFER)
|
|
{
|
|
// I know this isn't what the docs say, but as best as I can tell
|
|
// this is how it's used.
|
|
global.srvs[i].firstElement = sdesc.Buffer.FirstElement;
|
|
global.srvs[i].numElements = sdesc.Buffer.NumElements;
|
|
}
|
|
else if(sdesc.ViewDimension == D3D11_SRV_DIMENSION_BUFFEREX)
|
|
{
|
|
global.srvs[i].firstElement = sdesc.BufferEx.FirstElement;
|
|
global.srvs[i].numElements = sdesc.BufferEx.NumElements;
|
|
}
|
|
|
|
if(res)
|
|
{
|
|
if(WrappedID3D11Buffer::IsAlloc(res))
|
|
{
|
|
GetBufferData((ID3D11Buffer *)res, 0, 0, global.srvs[i].data);
|
|
}
|
|
}
|
|
|
|
SAFE_RELEASE(res);
|
|
}
|
|
}
|
|
|
|
for(size_t i = 0; i < dxbc->GetNumDeclarations(); i++)
|
|
{
|
|
const DXBC::ASMDecl &decl = dxbc->GetDeclaration(i);
|
|
|
|
if(decl.declaration == DXBC::OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW ||
|
|
decl.declaration == DXBC::OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED)
|
|
{
|
|
uint32_t slot = (uint32_t)decl.operand.indices[0].index;
|
|
|
|
if(global.groupshared.size() <= slot)
|
|
{
|
|
global.groupshared.resize(slot + 1);
|
|
|
|
ShaderDebug::GlobalState::groupsharedMem &mem = global.groupshared[slot];
|
|
|
|
mem.structured = (decl.declaration == DXBC::OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED);
|
|
|
|
mem.count = decl.count;
|
|
if(mem.structured)
|
|
mem.bytestride = decl.stride;
|
|
else
|
|
mem.bytestride = 4; // raw groupshared is implicitly uint32s
|
|
|
|
mem.data.resize(mem.bytestride * mem.count);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// struct that saves pointers as we iterate through to where we ultimately
|
|
// want to copy the data to
|
|
struct DataOutput
|
|
{
|
|
DataOutput(int regster, int element, int numWords, ShaderBuiltin attr, bool inc)
|
|
{
|
|
reg = regster;
|
|
elem = element;
|
|
numwords = numWords;
|
|
sysattribute = attr;
|
|
included = inc;
|
|
}
|
|
|
|
int reg;
|
|
int elem;
|
|
ShaderBuiltin sysattribute;
|
|
|
|
int numwords;
|
|
|
|
bool included;
|
|
};
|
|
|
|
struct DebugHit
|
|
{
|
|
uint32_t numHits;
|
|
float posx;
|
|
float posy;
|
|
float depth;
|
|
uint32_t primitive;
|
|
uint32_t isFrontFace;
|
|
uint32_t sample;
|
|
uint32_t coverage;
|
|
uint32_t rawdata; // arbitrary, depending on shader
|
|
};
|
|
|
|
ShaderDebugTrace D3D11Replay::DebugVertex(uint32_t eventId, uint32_t vertid, uint32_t instid,
|
|
uint32_t idx, uint32_t instOffset, uint32_t vertOffset)
|
|
{
|
|
using namespace DXBC;
|
|
using namespace ShaderDebug;
|
|
|
|
D3D11MarkerRegion debugpixRegion(
|
|
StringFormat::Fmt("DebugVertex @ %u of (%u,%u,%u)", eventId, vertid, instid, idx));
|
|
|
|
ShaderDebugTrace empty;
|
|
|
|
const DrawcallDescription *draw = m_pDevice->GetDrawcall(eventId);
|
|
|
|
D3D11RenderStateTracker tracker(m_pImmediateContext);
|
|
|
|
ID3D11VertexShader *stateVS = NULL;
|
|
m_pImmediateContext->VSGetShader(&stateVS, NULL, NULL);
|
|
|
|
WrappedID3D11Shader<ID3D11VertexShader> *vs = (WrappedID3D11Shader<ID3D11VertexShader> *)stateVS;
|
|
|
|
SAFE_RELEASE(stateVS);
|
|
|
|
if(!vs)
|
|
return empty;
|
|
|
|
DXBCFile *dxbc = vs->GetDXBC();
|
|
|
|
if(!dxbc)
|
|
return empty;
|
|
|
|
dxbc->GetDisassembly();
|
|
|
|
D3D11RenderState *rs = m_pImmediateContext->GetCurrentPipelineState();
|
|
|
|
vector<D3D11_INPUT_ELEMENT_DESC> inputlayout = m_pDevice->GetLayoutDesc(rs->IA.Layout);
|
|
|
|
set<UINT> vertexbuffers;
|
|
uint32_t trackingOffs[32] = {0};
|
|
|
|
UINT MaxStepRate = 1U;
|
|
|
|
// need special handling for other step rates
|
|
for(size_t i = 0; i < inputlayout.size(); i++)
|
|
{
|
|
if(inputlayout[i].InputSlotClass == D3D11_INPUT_PER_INSTANCE_DATA &&
|
|
inputlayout[i].InstanceDataStepRate < draw->numInstances)
|
|
MaxStepRate = RDCMAX(inputlayout[i].InstanceDataStepRate, MaxStepRate);
|
|
|
|
UINT slot =
|
|
RDCCLAMP(inputlayout[i].InputSlot, 0U, UINT(D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT - 1));
|
|
|
|
vertexbuffers.insert(slot);
|
|
|
|
if(inputlayout[i].AlignedByteOffset == ~0U)
|
|
{
|
|
inputlayout[i].AlignedByteOffset = trackingOffs[slot];
|
|
}
|
|
else
|
|
{
|
|
trackingOffs[slot] = inputlayout[i].AlignedByteOffset;
|
|
}
|
|
|
|
ResourceFormat fmt = MakeResourceFormat(inputlayout[i].Format);
|
|
|
|
trackingOffs[slot] += fmt.compByteWidth * fmt.compCount;
|
|
}
|
|
|
|
bytebuf vertData[D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT];
|
|
bytebuf *instData = new bytebuf[MaxStepRate * D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT];
|
|
bytebuf staticData[D3D11_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT];
|
|
|
|
for(auto it = vertexbuffers.begin(); it != vertexbuffers.end(); ++it)
|
|
{
|
|
UINT i = *it;
|
|
if(rs->IA.VBs[i])
|
|
{
|
|
GetDebugManager()->GetBufferData(rs->IA.VBs[i],
|
|
rs->IA.Offsets[i] + rs->IA.Strides[i] * (vertOffset + idx),
|
|
rs->IA.Strides[i], vertData[i]);
|
|
|
|
for(UINT isr = 1; isr <= MaxStepRate; isr++)
|
|
{
|
|
GetDebugManager()->GetBufferData(
|
|
rs->IA.VBs[i], rs->IA.Offsets[i] + rs->IA.Strides[i] * (instOffset + (instid / isr)),
|
|
rs->IA.Strides[i], instData[i * MaxStepRate + isr - 1]);
|
|
}
|
|
|
|
GetDebugManager()->GetBufferData(rs->IA.VBs[i],
|
|
rs->IA.Offsets[i] + rs->IA.Strides[i] * instOffset,
|
|
rs->IA.Strides[i], staticData[i]);
|
|
}
|
|
}
|
|
|
|
bytebuf cbufData[D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT];
|
|
|
|
for(int i = 0; i < D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT; i++)
|
|
if(rs->VS.ConstantBuffers[i])
|
|
GetDebugManager()->GetBufferData(rs->VS.ConstantBuffers[i],
|
|
rs->VS.CBOffsets[i] * sizeof(Vec4f), 0, cbufData[i]);
|
|
|
|
ShaderDebugTrace ret;
|
|
|
|
GlobalState global;
|
|
GetDebugManager()->CreateShaderGlobalState(global, dxbc, 0, NULL, rs->VS.SRVs);
|
|
State initialState = GetDebugManager()->CreateShaderDebugState(ret, -1, dxbc, cbufData);
|
|
|
|
for(size_t i = 0; i < ret.inputs.size(); i++)
|
|
{
|
|
if(dxbc->m_InputSig[i].systemValue == ShaderBuiltin::Undefined ||
|
|
dxbc->m_InputSig[i].systemValue ==
|
|
ShaderBuiltin::Position) // SV_Position seems to get promoted
|
|
// automatically, but it's invalid for
|
|
// vertex input
|
|
{
|
|
const D3D11_INPUT_ELEMENT_DESC *el = NULL;
|
|
|
|
std::string signame = strlower(dxbc->m_InputSig[i].semanticName);
|
|
|
|
for(size_t l = 0; l < inputlayout.size(); l++)
|
|
{
|
|
std::string layoutname = strlower(inputlayout[l].SemanticName);
|
|
|
|
if(signame == layoutname && dxbc->m_InputSig[i].semanticIndex == inputlayout[l].SemanticIndex)
|
|
{
|
|
el = &inputlayout[l];
|
|
break;
|
|
}
|
|
if(signame == layoutname + ToStr(inputlayout[l].SemanticIndex))
|
|
{
|
|
el = &inputlayout[l];
|
|
break;
|
|
}
|
|
}
|
|
|
|
RDCASSERT(el);
|
|
|
|
if(!el)
|
|
continue;
|
|
|
|
byte *srcData = NULL;
|
|
size_t dataSize = 0;
|
|
|
|
if(el->InputSlotClass == D3D11_INPUT_PER_VERTEX_DATA)
|
|
{
|
|
if(vertData[el->InputSlot].size() >= el->AlignedByteOffset)
|
|
{
|
|
srcData = &vertData[el->InputSlot][el->AlignedByteOffset];
|
|
dataSize = vertData[el->InputSlot].size() - el->AlignedByteOffset;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if(el->InstanceDataStepRate == 0 || el->InstanceDataStepRate >= draw->numInstances)
|
|
{
|
|
srcData = &staticData[el->InputSlot][el->AlignedByteOffset];
|
|
dataSize = staticData[el->InputSlot].size() - el->AlignedByteOffset;
|
|
}
|
|
else
|
|
{
|
|
UINT isrIdx = el->InputSlot * MaxStepRate + (el->InstanceDataStepRate - 1);
|
|
if(instData[isrIdx].size() >= el->AlignedByteOffset)
|
|
{
|
|
srcData = &instData[isrIdx][el->AlignedByteOffset];
|
|
dataSize = instData[isrIdx].size() - el->AlignedByteOffset;
|
|
}
|
|
}
|
|
}
|
|
|
|
ResourceFormat fmt = MakeResourceFormat(el->Format);
|
|
|
|
// more data needed than is provided
|
|
if(dxbc->m_InputSig[i].compCount > fmt.compCount)
|
|
{
|
|
ret.inputs[i].value.u.w = 1;
|
|
|
|
if(fmt.compType == CompType::Float)
|
|
ret.inputs[i].value.f.w = 1.0f;
|
|
}
|
|
|
|
// interpret resource format types
|
|
if(fmt.Special())
|
|
{
|
|
Vec3f *v3 = (Vec3f *)ret.inputs[i].value.fv;
|
|
Vec4f *v4 = (Vec4f *)ret.inputs[i].value.fv;
|
|
|
|
// only pull in all or nothing from these,
|
|
// if there's only e.g. 3 bytes remaining don't read and unpack some of
|
|
// a 4-byte resource format type
|
|
size_t packedsize = 4;
|
|
if(fmt.type == ResourceFormatType::R5G5B5A1 || fmt.type == ResourceFormatType::R5G6B5 ||
|
|
fmt.type == ResourceFormatType::R4G4B4A4)
|
|
packedsize = 2;
|
|
|
|
if(srcData == NULL || packedsize > dataSize)
|
|
{
|
|
ret.inputs[i].value.u.x = ret.inputs[i].value.u.y = ret.inputs[i].value.u.z =
|
|
ret.inputs[i].value.u.w = 0;
|
|
}
|
|
else if(fmt.type == ResourceFormatType::R5G5B5A1)
|
|
{
|
|
RDCASSERT(fmt.bgraOrder);
|
|
uint16_t packed = ((uint16_t *)srcData)[0];
|
|
*v4 = ConvertFromB5G5R5A1(packed);
|
|
}
|
|
else if(fmt.type == ResourceFormatType::R5G6B5)
|
|
{
|
|
RDCASSERT(fmt.bgraOrder);
|
|
uint16_t packed = ((uint16_t *)srcData)[0];
|
|
*v3 = ConvertFromB5G6R5(packed);
|
|
}
|
|
else if(fmt.type == ResourceFormatType::R4G4B4A4)
|
|
{
|
|
RDCASSERT(fmt.bgraOrder);
|
|
uint16_t packed = ((uint16_t *)srcData)[0];
|
|
*v4 = ConvertFromB4G4R4A4(packed);
|
|
}
|
|
else if(fmt.type == ResourceFormatType::R10G10B10A2)
|
|
{
|
|
uint32_t packed = ((uint32_t *)srcData)[0];
|
|
|
|
if(fmt.compType == CompType::UInt)
|
|
{
|
|
ret.inputs[i].value.u.z = (packed >> 0) & 0x3ff;
|
|
ret.inputs[i].value.u.y = (packed >> 10) & 0x3ff;
|
|
ret.inputs[i].value.u.x = (packed >> 20) & 0x3ff;
|
|
ret.inputs[i].value.u.w = (packed >> 30) & 0x003;
|
|
}
|
|
else
|
|
{
|
|
*v4 = ConvertFromR10G10B10A2(packed);
|
|
}
|
|
}
|
|
else if(fmt.type == ResourceFormatType::R11G11B10)
|
|
{
|
|
uint32_t packed = ((uint32_t *)srcData)[0];
|
|
*v3 = ConvertFromR11G11B10(packed);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for(uint32_t c = 0; c < fmt.compCount; c++)
|
|
{
|
|
if(srcData == NULL || fmt.compByteWidth > dataSize)
|
|
{
|
|
ret.inputs[i].value.uv[c] = 0;
|
|
continue;
|
|
}
|
|
|
|
dataSize -= fmt.compByteWidth;
|
|
|
|
if(fmt.compByteWidth == 1)
|
|
{
|
|
byte *src = srcData + c * fmt.compByteWidth;
|
|
|
|
if(fmt.compType == CompType::UInt)
|
|
ret.inputs[i].value.uv[c] = *src;
|
|
else if(fmt.compType == CompType::SInt)
|
|
ret.inputs[i].value.iv[c] = *((int8_t *)src);
|
|
else if(fmt.compType == CompType::UNorm)
|
|
ret.inputs[i].value.fv[c] = float(*src) / 255.0f;
|
|
else if(fmt.compType == CompType::SNorm)
|
|
{
|
|
signed char *schar = (signed char *)src;
|
|
|
|
// -128 is mapped to -1, then -127 to -127 are mapped to -1 to 1
|
|
if(*schar == -128)
|
|
ret.inputs[i].value.fv[c] = -1.0f;
|
|
else
|
|
ret.inputs[i].value.fv[c] = float(*schar) / 127.0f;
|
|
}
|
|
else
|
|
RDCERR("Unexpected component type");
|
|
}
|
|
else if(fmt.compByteWidth == 2)
|
|
{
|
|
uint16_t *src = (uint16_t *)(srcData + c * fmt.compByteWidth);
|
|
|
|
if(fmt.compType == CompType::Float)
|
|
ret.inputs[i].value.fv[c] = ConvertFromHalf(*src);
|
|
else if(fmt.compType == CompType::UInt)
|
|
ret.inputs[i].value.uv[c] = *src;
|
|
else if(fmt.compType == CompType::SInt)
|
|
ret.inputs[i].value.iv[c] = *((int16_t *)src);
|
|
else if(fmt.compType == CompType::UNorm)
|
|
ret.inputs[i].value.fv[c] = float(*src) / float(UINT16_MAX);
|
|
else if(fmt.compType == CompType::SNorm)
|
|
{
|
|
int16_t *sint = (int16_t *)src;
|
|
|
|
// -32768 is mapped to -1, then -32767 to -32767 are mapped to -1 to 1
|
|
if(*sint == -32768)
|
|
ret.inputs[i].value.fv[c] = -1.0f;
|
|
else
|
|
ret.inputs[i].value.fv[c] = float(*sint) / 32767.0f;
|
|
}
|
|
else
|
|
RDCERR("Unexpected component type");
|
|
}
|
|
else if(fmt.compByteWidth == 4)
|
|
{
|
|
uint32_t *src = (uint32_t *)(srcData + c * fmt.compByteWidth);
|
|
|
|
if(fmt.compType == CompType::Float || fmt.compType == CompType::UInt ||
|
|
fmt.compType == CompType::SInt)
|
|
memcpy(&ret.inputs[i].value.uv[c], src, 4);
|
|
else
|
|
RDCERR("Unexpected component type");
|
|
}
|
|
}
|
|
|
|
if(fmt.bgraOrder)
|
|
{
|
|
RDCASSERT(fmt.compCount == 4);
|
|
std::swap(ret.inputs[i].value.fv[2], ret.inputs[i].value.fv[0]);
|
|
}
|
|
}
|
|
}
|
|
else if(dxbc->m_InputSig[i].systemValue == ShaderBuiltin::VertexIndex)
|
|
{
|
|
uint32_t sv_vertid = vertid;
|
|
|
|
if(draw->flags & DrawFlags::Indexed)
|
|
sv_vertid = idx;
|
|
|
|
if(dxbc->m_InputSig[i].compType == CompType::Float)
|
|
ret.inputs[i].value.f.x = ret.inputs[i].value.f.y = ret.inputs[i].value.f.z =
|
|
ret.inputs[i].value.f.w = (float)sv_vertid;
|
|
else
|
|
ret.inputs[i].value.u.x = ret.inputs[i].value.u.y = ret.inputs[i].value.u.z =
|
|
ret.inputs[i].value.u.w = sv_vertid;
|
|
}
|
|
else if(dxbc->m_InputSig[i].systemValue == ShaderBuiltin::InstanceIndex)
|
|
{
|
|
if(dxbc->m_InputSig[i].compType == CompType::Float)
|
|
ret.inputs[i].value.f.x = ret.inputs[i].value.f.y = ret.inputs[i].value.f.z =
|
|
ret.inputs[i].value.f.w = (float)instid;
|
|
else
|
|
ret.inputs[i].value.u.x = ret.inputs[i].value.u.y = ret.inputs[i].value.u.z =
|
|
ret.inputs[i].value.u.w = instid;
|
|
}
|
|
else
|
|
{
|
|
RDCERR("Unhandled system value semantic on VS input");
|
|
}
|
|
}
|
|
|
|
delete[] instData;
|
|
|
|
State last;
|
|
|
|
vector<ShaderDebugState> states;
|
|
|
|
dxbc->m_DebugInfo->GetStack(0, dxbc->GetInstruction(0).offset, initialState.callstack);
|
|
|
|
states.push_back((State)initialState);
|
|
|
|
D3D11MarkerRegion simloop("Simulation Loop");
|
|
|
|
for(int cycleCounter = 0;; cycleCounter++)
|
|
{
|
|
if(initialState.Finished())
|
|
break;
|
|
|
|
initialState = initialState.GetNext(global, NULL);
|
|
|
|
{
|
|
const ASMOperation &op = dxbc->GetInstruction((size_t)initialState.nextInstruction);
|
|
dxbc->m_DebugInfo->GetStack(initialState.nextInstruction, op.offset, initialState.callstack);
|
|
}
|
|
|
|
states.push_back((State)initialState);
|
|
|
|
if(cycleCounter == SHADER_DEBUG_WARN_THRESHOLD)
|
|
{
|
|
if(PromptDebugTimeout(DXBC::TYPE_VERTEX, cycleCounter))
|
|
break;
|
|
}
|
|
}
|
|
|
|
ret.states = states;
|
|
|
|
return ret;
|
|
}
|
|
|
|
ShaderDebugTrace D3D11Replay::DebugPixel(uint32_t eventId, uint32_t x, uint32_t y, uint32_t sample,
|
|
uint32_t primitive)
|
|
{
|
|
using namespace DXBC;
|
|
using namespace ShaderDebug;
|
|
|
|
D3D11MarkerRegion debugpixRegion(
|
|
StringFormat::Fmt("DebugPixel @ %u of (%u,%u) %u / %u", eventId, x, y, sample, primitive));
|
|
|
|
ShaderDebugTrace empty;
|
|
|
|
D3D11RenderStateTracker tracker(m_pImmediateContext);
|
|
|
|
ID3D11PixelShader *statePS = NULL;
|
|
m_pImmediateContext->PSGetShader(&statePS, NULL, NULL);
|
|
|
|
WrappedID3D11Shader<ID3D11PixelShader> *ps = (WrappedID3D11Shader<ID3D11PixelShader> *)statePS;
|
|
|
|
SAFE_RELEASE(statePS);
|
|
|
|
ID3D11GeometryShader *stateGS = NULL;
|
|
m_pImmediateContext->GSGetShader(&stateGS, NULL, NULL);
|
|
|
|
WrappedID3D11Shader<ID3D11GeometryShader> *gs =
|
|
(WrappedID3D11Shader<ID3D11GeometryShader> *)stateGS;
|
|
|
|
SAFE_RELEASE(stateGS);
|
|
|
|
ID3D11DomainShader *stateDS = NULL;
|
|
m_pImmediateContext->DSGetShader(&stateDS, NULL, NULL);
|
|
|
|
WrappedID3D11Shader<ID3D11DomainShader> *ds = (WrappedID3D11Shader<ID3D11DomainShader> *)stateDS;
|
|
|
|
SAFE_RELEASE(stateDS);
|
|
|
|
ID3D11VertexShader *stateVS = NULL;
|
|
m_pImmediateContext->VSGetShader(&stateVS, NULL, NULL);
|
|
|
|
WrappedID3D11Shader<ID3D11VertexShader> *vs = (WrappedID3D11Shader<ID3D11VertexShader> *)stateVS;
|
|
|
|
SAFE_RELEASE(stateVS);
|
|
|
|
if(!ps)
|
|
return empty;
|
|
|
|
D3D11RenderState *rs = m_pImmediateContext->GetCurrentPipelineState();
|
|
|
|
DXBCFile *dxbc = ps->GetDXBC();
|
|
|
|
if(!dxbc)
|
|
return empty;
|
|
|
|
dxbc->GetDisassembly();
|
|
|
|
DXBCFile *prevdxbc = NULL;
|
|
|
|
if(prevdxbc == NULL && gs != NULL)
|
|
prevdxbc = gs->GetDXBC();
|
|
if(prevdxbc == NULL && ds != NULL)
|
|
prevdxbc = ds->GetDXBC();
|
|
if(prevdxbc == NULL && vs != NULL)
|
|
prevdxbc = vs->GetDXBC();
|
|
|
|
vector<DataOutput> initialValues;
|
|
|
|
string extractHlsl = "struct PSInput\n{\n";
|
|
|
|
int structureStride = 0;
|
|
|
|
if(dxbc->m_InputSig.empty())
|
|
{
|
|
extractHlsl += "float4 input_dummy : SV_Position;\n";
|
|
|
|
initialValues.push_back(DataOutput(-1, 0, 4, ShaderBuiltin::Undefined, true));
|
|
|
|
structureStride += 4;
|
|
}
|
|
|
|
vector<string> floatInputs;
|
|
vector<pair<string, pair<uint32_t, uint32_t> > >
|
|
arrays; // name, pair<start semantic index, end semantic index>
|
|
|
|
uint32_t nextreg = 0;
|
|
|
|
for(size_t i = 0; i < dxbc->m_InputSig.size(); i++)
|
|
{
|
|
extractHlsl += " ";
|
|
|
|
bool included = true;
|
|
|
|
// handled specially to account for SV_ ordering
|
|
if(dxbc->m_InputSig[i].systemValue == ShaderBuiltin::PrimitiveIndex ||
|
|
dxbc->m_InputSig[i].systemValue == ShaderBuiltin::MSAACoverage ||
|
|
dxbc->m_InputSig[i].systemValue == ShaderBuiltin::IsFrontFace ||
|
|
dxbc->m_InputSig[i].systemValue == ShaderBuiltin::MSAASampleIndex)
|
|
{
|
|
extractHlsl += "//";
|
|
included = false;
|
|
}
|
|
|
|
bool arrayElem = false;
|
|
|
|
for(size_t a = 0; a < arrays.size(); a++)
|
|
{
|
|
if(dxbc->m_InputSig[i].semanticName == arrays[a].first &&
|
|
arrays[a].second.first <= dxbc->m_InputSig[i].semanticIndex &&
|
|
arrays[a].second.second >= dxbc->m_InputSig[i].semanticIndex)
|
|
{
|
|
extractHlsl += "//";
|
|
included = false;
|
|
arrayElem = true;
|
|
}
|
|
}
|
|
|
|
int missingreg = int(dxbc->m_InputSig[i].regIndex) - int(nextreg);
|
|
|
|
// fill in holes from output sig of previous shader if possible, to try and
|
|
// ensure the same register order
|
|
for(int dummy = 0; dummy < missingreg; dummy++)
|
|
{
|
|
bool filled = false;
|
|
|
|
if(prevdxbc)
|
|
{
|
|
for(size_t os = 0; os < prevdxbc->m_OutputSig.size(); os++)
|
|
{
|
|
if(prevdxbc->m_OutputSig[os].regIndex == nextreg + dummy)
|
|
{
|
|
filled = true;
|
|
|
|
if(prevdxbc->m_OutputSig[os].compType == CompType::Float)
|
|
extractHlsl += "float";
|
|
else if(prevdxbc->m_OutputSig[os].compType == CompType::SInt)
|
|
extractHlsl += "int";
|
|
else if(prevdxbc->m_OutputSig[os].compType == CompType::UInt)
|
|
extractHlsl += "uint";
|
|
else
|
|
RDCERR("Unexpected input signature type: %d", prevdxbc->m_OutputSig[os].compType);
|
|
|
|
int numCols = (prevdxbc->m_OutputSig[os].regChannelMask & 0x1 ? 1 : 0) +
|
|
(prevdxbc->m_OutputSig[os].regChannelMask & 0x2 ? 1 : 0) +
|
|
(prevdxbc->m_OutputSig[os].regChannelMask & 0x4 ? 1 : 0) +
|
|
(prevdxbc->m_OutputSig[os].regChannelMask & 0x8 ? 1 : 0);
|
|
|
|
structureStride += 4 * numCols;
|
|
|
|
initialValues.push_back(DataOutput(-1, 0, numCols, ShaderBuiltin::Undefined, true));
|
|
|
|
std::string name = prevdxbc->m_OutputSig[os].semanticIdxName;
|
|
|
|
extractHlsl += ToStr((uint32_t)numCols) + " input_" + name + " : " + name + ";\n";
|
|
}
|
|
}
|
|
}
|
|
|
|
if(!filled)
|
|
{
|
|
string dummy_reg = "dummy_register";
|
|
dummy_reg += ToStr((uint32_t)nextreg + dummy);
|
|
extractHlsl += "float4 var_" + dummy_reg + " : semantic_" + dummy_reg + ";\n";
|
|
|
|
initialValues.push_back(DataOutput(-1, 0, 4, ShaderBuiltin::Undefined, true));
|
|
|
|
structureStride += 4 * sizeof(float);
|
|
}
|
|
}
|
|
|
|
nextreg = dxbc->m_InputSig[i].regIndex + 1;
|
|
|
|
if(dxbc->m_InputSig[i].compType == CompType::Float)
|
|
{
|
|
// if we're packed with ints on either side, we must be nointerpolation
|
|
bool nointerp = false;
|
|
for(size_t j = 0; j < dxbc->m_InputSig.size(); j++)
|
|
{
|
|
if(dxbc->m_InputSig[i].regIndex == dxbc->m_InputSig[j].regIndex &&
|
|
dxbc->m_InputSig[j].compType != CompType::Float)
|
|
{
|
|
nointerp = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if(nointerp)
|
|
extractHlsl += "nointerpolation ";
|
|
|
|
extractHlsl += "float";
|
|
}
|
|
else if(dxbc->m_InputSig[i].compType == CompType::SInt)
|
|
extractHlsl += "nointerpolation int";
|
|
else if(dxbc->m_InputSig[i].compType == CompType::UInt)
|
|
extractHlsl += "nointerpolation uint";
|
|
else
|
|
RDCERR("Unexpected input signature type: %d", dxbc->m_InputSig[i].compType);
|
|
|
|
int numCols = (dxbc->m_InputSig[i].regChannelMask & 0x1 ? 1 : 0) +
|
|
(dxbc->m_InputSig[i].regChannelMask & 0x2 ? 1 : 0) +
|
|
(dxbc->m_InputSig[i].regChannelMask & 0x4 ? 1 : 0) +
|
|
(dxbc->m_InputSig[i].regChannelMask & 0x8 ? 1 : 0);
|
|
|
|
if(included)
|
|
structureStride += 4 * numCols;
|
|
|
|
std::string name = dxbc->m_InputSig[i].semanticIdxName;
|
|
|
|
// arrays of interpolators are handled really weirdly. They use cbuffer
|
|
// packing rules where each new value is in a new register (rather than
|
|
// e.g. 2 x float2 in a single register), but that's pointless because
|
|
// you can't dynamically index into input registers.
|
|
// If we declare those elements as a non-array, the float2s or floats
|
|
// will be packed into registers and won't match up to the previous
|
|
// shader.
|
|
// HOWEVER to add an extra bit of fun, fxc will happily pack other
|
|
// parameters not in the array into spare parts of the registers.
|
|
//
|
|
// So I think the upshot is that we can detect arrays reliably by
|
|
// whenever we encounter a float or float2 at the start of a register,
|
|
// search forward to see if the next register has an element that is the
|
|
// same semantic name and one higher semantic index. If so, there's an
|
|
// array, so keep searching to enumerate its length.
|
|
// I think this should be safe if the packing just happens to place those
|
|
// registers together.
|
|
|
|
int arrayLength = 0;
|
|
|
|
if(included && numCols <= 2 && dxbc->m_InputSig[i].regChannelMask <= 0x3)
|
|
{
|
|
uint32_t nextIdx = dxbc->m_InputSig[i].semanticIndex + 1;
|
|
|
|
for(size_t j = i + 1; j < dxbc->m_InputSig.size(); j++)
|
|
{
|
|
// if we've found the 'next' semantic
|
|
if(dxbc->m_InputSig[i].semanticName == dxbc->m_InputSig[j].semanticName &&
|
|
nextIdx == dxbc->m_InputSig[j].semanticIndex)
|
|
{
|
|
int jNumCols = (dxbc->m_InputSig[i].regChannelMask & 0x1 ? 1 : 0) +
|
|
(dxbc->m_InputSig[i].regChannelMask & 0x2 ? 1 : 0) +
|
|
(dxbc->m_InputSig[i].regChannelMask & 0x4 ? 1 : 0) +
|
|
(dxbc->m_InputSig[i].regChannelMask & 0x8 ? 1 : 0);
|
|
|
|
// if it's the same size, and it's at the start of the next register
|
|
if(jNumCols == numCols && dxbc->m_InputSig[j].regChannelMask <= 0x3)
|
|
{
|
|
if(arrayLength == 0)
|
|
arrayLength = 2;
|
|
else
|
|
arrayLength++;
|
|
|
|
// continue searching now
|
|
nextIdx++;
|
|
j = i + 1;
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
if(arrayLength > 0)
|
|
arrays.push_back(
|
|
std::make_pair(dxbc->m_InputSig[i].semanticName,
|
|
std::make_pair(dxbc->m_InputSig[i].semanticIndex, nextIdx - 1)));
|
|
}
|
|
|
|
// as another side effect of the above, an element declared as a 1-length array won't be
|
|
// detected but it WILL be put in its own register (not packed together), so detect this
|
|
// case too.
|
|
// Note we have to search *backwards* because we need to know if this register should have
|
|
// been packed into the previous register, but wasn't. float/float2 can be packed after an
|
|
// array just fine.
|
|
if(included && i > 0 && arrayLength == 0 && numCols <= 2 &&
|
|
dxbc->m_InputSig[i].regChannelMask <= 0x3)
|
|
{
|
|
const SigParameter &prev = dxbc->m_InputSig[i - 1];
|
|
|
|
if(prev.regIndex != dxbc->m_InputSig[i].regIndex && prev.compCount <= 2 &&
|
|
prev.regChannelMask <= 0x3)
|
|
arrayLength = 1;
|
|
}
|
|
|
|
// The compiler is also really annoying and will go to great lengths to rearrange elements
|
|
// and screw up our declaration, to pack things together. E.g.:
|
|
// float2 a : TEXCOORD1;
|
|
// float4 b : TEXCOORD2;
|
|
// float4 c : TEXCOORD3;
|
|
// float2 d : TEXCOORD4;
|
|
// the compiler will move d up and pack it into the last two components of a.
|
|
// To prevent this, we look forward and backward to check that we aren't expecting to pack
|
|
// with anything, and if not then we just make it a 1-length array to ensure no packing.
|
|
// Note the regChannelMask & 0x1 means it is using .x, so it's not the tail-end of a pack
|
|
if(included && arrayLength == 0 && numCols <= 2 && (dxbc->m_InputSig[i].regChannelMask & 0x1))
|
|
{
|
|
if(i == dxbc->m_InputSig.size() - 1)
|
|
{
|
|
// the last element is never packed
|
|
arrayLength = 1;
|
|
}
|
|
else
|
|
{
|
|
// if the next reg is using .x, it wasn't packed with us
|
|
if(dxbc->m_InputSig[i + 1].regChannelMask & 0x1)
|
|
arrayLength = 1;
|
|
}
|
|
}
|
|
|
|
extractHlsl += ToStr((uint32_t)numCols) + " input_" + name;
|
|
if(arrayLength > 0)
|
|
extractHlsl += "[" + ToStr(arrayLength) + "]";
|
|
extractHlsl += " : " + name;
|
|
|
|
if(included && dxbc->m_InputSig[i].compType == CompType::Float)
|
|
{
|
|
if(arrayLength == 0)
|
|
{
|
|
floatInputs.push_back("input_" + name);
|
|
}
|
|
else
|
|
{
|
|
for(int a = 0; a < arrayLength; a++)
|
|
floatInputs.push_back("input_" + name + "[" + ToStr(a) + "]");
|
|
}
|
|
}
|
|
|
|
extractHlsl += ";\n";
|
|
|
|
int firstElem = dxbc->m_InputSig[i].regChannelMask & 0x1
|
|
? 0
|
|
: dxbc->m_InputSig[i].regChannelMask & 0x2
|
|
? 1
|
|
: dxbc->m_InputSig[i].regChannelMask & 0x4
|
|
? 2
|
|
: dxbc->m_InputSig[i].regChannelMask & 0x8 ? 3 : -1;
|
|
|
|
// arrays get added all at once (because in the struct data, they are contiguous even if
|
|
// in the input signature they're not).
|
|
if(!arrayElem)
|
|
{
|
|
if(arrayLength == 0)
|
|
{
|
|
initialValues.push_back(DataOutput(dxbc->m_InputSig[i].regIndex, firstElem, numCols,
|
|
dxbc->m_InputSig[i].systemValue, included));
|
|
}
|
|
else
|
|
{
|
|
for(int a = 0; a < arrayLength; a++)
|
|
{
|
|
initialValues.push_back(DataOutput(dxbc->m_InputSig[i].regIndex + a, firstElem, numCols,
|
|
dxbc->m_InputSig[i].systemValue, included));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
extractHlsl += "};\n\n";
|
|
|
|
uint32_t overdrawLevels = 100; // maximum number of overdraw levels
|
|
|
|
uint32_t uavslot = 0;
|
|
|
|
ID3D11DepthStencilView *depthView = NULL;
|
|
ID3D11RenderTargetView *rtView = NULL;
|
|
// preserve at least one render target and/or the depth view, so that
|
|
// we have the right multisample level on output either way
|
|
m_pImmediateContext->OMGetRenderTargets(1, &rtView, &depthView);
|
|
if(rtView != NULL)
|
|
uavslot = 1;
|
|
|
|
extractHlsl +=
|
|
"struct PSInitialData { uint hit; float3 pos; uint prim; uint fface; uint sample; uint "
|
|
"covge; float derivValid; PSInput IN; PSInput INddx; PSInput INddy; PSInput INddxfine; "
|
|
"PSInput INddyfine; };\n\n";
|
|
extractHlsl +=
|
|
"RWStructuredBuffer<PSInitialData> PSInitialBuffer : register(u" + ToStr(uavslot) + ");\n\n";
|
|
extractHlsl +=
|
|
"void ExtractInputsPS(PSInput IN, float4 debug_pixelPos : SV_Position, uint prim : "
|
|
"SV_PrimitiveID, uint sample : SV_SampleIndex, uint covge : SV_Coverage, bool fface : "
|
|
"SV_IsFrontFace)\n{\n";
|
|
extractHlsl += " uint idx = " + ToStr(overdrawLevels) + ";\n";
|
|
extractHlsl += " if(abs(debug_pixelPos.x - " + ToStr(x) +
|
|
".5) < 0.5f && abs(debug_pixelPos.y - " + ToStr(y) + ".5) < 0.5f)\n";
|
|
extractHlsl += " InterlockedAdd(PSInitialBuffer[0].hit, 1, idx);\n\n";
|
|
extractHlsl += " idx = min(idx, " + ToStr(overdrawLevels) + ");\n\n";
|
|
extractHlsl += " PSInitialBuffer[idx].pos = debug_pixelPos.xyz;\n";
|
|
extractHlsl += " PSInitialBuffer[idx].prim = prim;\n";
|
|
extractHlsl += " PSInitialBuffer[idx].fface = fface;\n";
|
|
extractHlsl += " PSInitialBuffer[idx].covge = covge;\n";
|
|
extractHlsl += " PSInitialBuffer[idx].sample = sample;\n";
|
|
extractHlsl += " PSInitialBuffer[idx].IN = IN;\n";
|
|
extractHlsl += " PSInitialBuffer[idx].derivValid = ddx(debug_pixelPos.x);\n";
|
|
extractHlsl += " PSInitialBuffer[idx].INddx = (PSInput)0;\n";
|
|
extractHlsl += " PSInitialBuffer[idx].INddy = (PSInput)0;\n";
|
|
extractHlsl += " PSInitialBuffer[idx].INddxfine = (PSInput)0;\n";
|
|
extractHlsl += " PSInitialBuffer[idx].INddyfine = (PSInput)0;\n";
|
|
|
|
for(size_t i = 0; i < floatInputs.size(); i++)
|
|
{
|
|
const string &name = floatInputs[i];
|
|
extractHlsl += " PSInitialBuffer[idx].INddx." + name + " = ddx(IN." + name + ");\n";
|
|
extractHlsl += " PSInitialBuffer[idx].INddy." + name + " = ddy(IN." + name + ");\n";
|
|
extractHlsl += " PSInitialBuffer[idx].INddxfine." + name + " = ddx_fine(IN." + name + ");\n";
|
|
extractHlsl += " PSInitialBuffer[idx].INddyfine." + name + " = ddy_fine(IN." + name + ");\n";
|
|
}
|
|
extractHlsl += "\n}";
|
|
|
|
ID3D11PixelShader *extract =
|
|
m_pDevice->GetShaderCache()->MakePShader(extractHlsl.c_str(), "ExtractInputsPS", "ps_5_0");
|
|
|
|
uint32_t structStride = sizeof(uint32_t) // uint hit;
|
|
+ sizeof(float) * 3 // float3 pos;
|
|
+ sizeof(uint32_t) // uint prim;
|
|
+ sizeof(uint32_t) // uint fface;
|
|
+ sizeof(uint32_t) // uint sample;
|
|
+ sizeof(uint32_t) // uint covge;
|
|
+ sizeof(float) // float derivValid;
|
|
+
|
|
structureStride * 5; // PSInput IN, INddx, INddy, INddxfine, INddyfine;
|
|
|
|
HRESULT hr = S_OK;
|
|
|
|
D3D11_BUFFER_DESC bdesc;
|
|
bdesc.BindFlags = D3D11_BIND_UNORDERED_ACCESS;
|
|
bdesc.CPUAccessFlags = 0;
|
|
bdesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
|
|
bdesc.Usage = D3D11_USAGE_DEFAULT;
|
|
bdesc.StructureByteStride = structStride;
|
|
bdesc.ByteWidth = bdesc.StructureByteStride * (overdrawLevels + 1);
|
|
|
|
ID3D11Buffer *initialBuf = NULL;
|
|
hr = m_pDevice->CreateBuffer(&bdesc, NULL, &initialBuf);
|
|
|
|
if(FAILED(hr))
|
|
{
|
|
RDCERR("Failed to create buffer HRESULT: %s", ToStr(hr).c_str());
|
|
return empty;
|
|
}
|
|
|
|
bdesc.BindFlags = 0;
|
|
bdesc.MiscFlags = 0;
|
|
bdesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
|
|
bdesc.Usage = D3D11_USAGE_STAGING;
|
|
bdesc.StructureByteStride = 0;
|
|
|
|
ID3D11Buffer *stageBuf = NULL;
|
|
hr = m_pDevice->CreateBuffer(&bdesc, NULL, &stageBuf);
|
|
|
|
if(FAILED(hr))
|
|
{
|
|
RDCERR("Failed to create buffer HRESULT: %s", ToStr(hr).c_str());
|
|
return empty;
|
|
}
|
|
|
|
D3D11_UNORDERED_ACCESS_VIEW_DESC uavdesc;
|
|
uavdesc.Format = DXGI_FORMAT_UNKNOWN;
|
|
uavdesc.Buffer.FirstElement = 0;
|
|
uavdesc.Buffer.Flags = 0;
|
|
uavdesc.Buffer.NumElements = overdrawLevels + 1;
|
|
uavdesc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
|
|
|
|
ID3D11UnorderedAccessView *initialUAV = NULL;
|
|
hr = m_pDevice->CreateUnorderedAccessView(initialBuf, &uavdesc, &initialUAV);
|
|
|
|
if(FAILED(hr))
|
|
{
|
|
RDCERR("Failed to create buffer HRESULT: %s", ToStr(hr).c_str());
|
|
return empty;
|
|
}
|
|
|
|
UINT zero = 0;
|
|
m_pImmediateContext->ClearUnorderedAccessViewUint(initialUAV, &zero);
|
|
|
|
UINT count = (UINT)-1;
|
|
m_pImmediateContext->OMSetRenderTargetsAndUnorderedAccessViews(uavslot, &rtView, depthView,
|
|
uavslot, 1, &initialUAV, &count);
|
|
m_pImmediateContext->PSSetShader(extract, NULL, 0);
|
|
|
|
SAFE_RELEASE(rtView);
|
|
SAFE_RELEASE(depthView);
|
|
|
|
{
|
|
D3D11MarkerRegion initState("Replaying event for initial states");
|
|
|
|
m_pDevice->ReplayLog(0, eventId, eReplay_OnlyDraw);
|
|
|
|
m_pImmediateContext->CopyResource(stageBuf, initialBuf);
|
|
}
|
|
|
|
D3D11_MAPPED_SUBRESOURCE mapped;
|
|
hr = m_pImmediateContext->Map(stageBuf, 0, D3D11_MAP_READ, 0, &mapped);
|
|
|
|
if(FAILED(hr))
|
|
{
|
|
RDCERR("Failed to map stage buff HRESULT: %s", ToStr(hr).c_str());
|
|
return empty;
|
|
}
|
|
|
|
byte *initialData = new byte[bdesc.ByteWidth];
|
|
memcpy(initialData, mapped.pData, bdesc.ByteWidth);
|
|
|
|
m_pImmediateContext->Unmap(stageBuf, 0);
|
|
|
|
SAFE_RELEASE(initialUAV);
|
|
SAFE_RELEASE(initialBuf);
|
|
SAFE_RELEASE(stageBuf);
|
|
|
|
SAFE_RELEASE(extract);
|
|
|
|
DebugHit *buf = (DebugHit *)initialData;
|
|
|
|
D3D11MarkerRegion::Set(StringFormat::Fmt("Got %u hits", buf[0].numHits));
|
|
|
|
if(buf[0].numHits == 0)
|
|
{
|
|
RDCLOG("No hit for this event");
|
|
SAFE_DELETE_ARRAY(initialData);
|
|
return empty;
|
|
}
|
|
|
|
// if we encounter multiple hits at our destination pixel co-ord (or any other) we
|
|
// check to see if a specific primitive was requested (via primitive parameter not
|
|
// being set to ~0U). If it was, debug that pixel, otherwise do a best-estimate
|
|
// of which fragment was the last to successfully depth test and debug that, just by
|
|
// checking if the depth test is ordered and picking the final fragment in the series
|
|
|
|
// our debugging quad. Order is TL, TR, BL, BR
|
|
State quad[4];
|
|
|
|
// figure out the TL pixel's coords. Assume even top left (towards 0,0)
|
|
// this isn't spec'd but is a reasonable assumption.
|
|
int xTL = x & (~1);
|
|
int yTL = y & (~1);
|
|
|
|
// get the index of our desired pixel
|
|
int destIdx = (x - xTL) + 2 * (y - yTL);
|
|
|
|
bytebuf cbufData[D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT];
|
|
|
|
for(int i = 0; i < D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT; i++)
|
|
if(rs->PS.ConstantBuffers[i])
|
|
GetDebugManager()->GetBufferData(rs->PS.ConstantBuffers[i],
|
|
rs->PS.CBOffsets[i] * sizeof(Vec4f), 0, cbufData[i]);
|
|
|
|
D3D11_COMPARISON_FUNC depthFunc = D3D11_COMPARISON_LESS;
|
|
|
|
if(rs->OM.DepthStencilState)
|
|
{
|
|
D3D11_DEPTH_STENCIL_DESC desc;
|
|
rs->OM.DepthStencilState->GetDesc(&desc);
|
|
depthFunc = desc.DepthFunc;
|
|
}
|
|
|
|
DebugHit *winner = NULL;
|
|
|
|
if(sample == ~0U)
|
|
sample = 0;
|
|
|
|
if(primitive != ~0U)
|
|
{
|
|
for(size_t i = 0; i < buf[0].numHits && i < overdrawLevels; i++)
|
|
{
|
|
DebugHit *hit = (DebugHit *)(initialData + i * structStride);
|
|
|
|
if(hit->primitive == primitive && hit->sample == sample)
|
|
winner = hit;
|
|
}
|
|
}
|
|
|
|
if(winner == NULL)
|
|
{
|
|
for(size_t i = 0; i < buf[0].numHits && i < overdrawLevels; i++)
|
|
{
|
|
DebugHit *hit = (DebugHit *)(initialData + i * structStride);
|
|
|
|
if(winner == NULL || (winner->sample != sample && hit->sample == sample) ||
|
|
depthFunc == D3D11_COMPARISON_ALWAYS || depthFunc == D3D11_COMPARISON_NEVER ||
|
|
depthFunc == D3D11_COMPARISON_NOT_EQUAL || depthFunc == D3D11_COMPARISON_EQUAL)
|
|
{
|
|
winner = hit;
|
|
continue;
|
|
}
|
|
|
|
if((depthFunc == D3D11_COMPARISON_LESS && hit->depth < winner->depth) ||
|
|
(depthFunc == D3D11_COMPARISON_LESS_EQUAL && hit->depth <= winner->depth) ||
|
|
(depthFunc == D3D11_COMPARISON_GREATER && hit->depth > winner->depth) ||
|
|
(depthFunc == D3D11_COMPARISON_GREATER_EQUAL && hit->depth >= winner->depth))
|
|
{
|
|
if(hit->sample == sample)
|
|
winner = hit;
|
|
}
|
|
}
|
|
}
|
|
|
|
if(winner == NULL)
|
|
{
|
|
RDCLOG("Couldn't find any pixels that passed depth test at target co-ordinates");
|
|
SAFE_DELETE_ARRAY(initialData);
|
|
return empty;
|
|
}
|
|
|
|
ShaderDebugTrace traces[4];
|
|
|
|
GlobalState global;
|
|
GetDebugManager()->CreateShaderGlobalState(global, dxbc, rs->OM.UAVStartSlot, rs->OM.UAVs,
|
|
rs->PS.SRVs);
|
|
|
|
{
|
|
DebugHit *hit = winner;
|
|
|
|
State initialState =
|
|
GetDebugManager()->CreateShaderDebugState(traces[destIdx], destIdx, dxbc, cbufData);
|
|
|
|
rdcarray<ShaderVariable> &ins = traces[destIdx].inputs;
|
|
if(!ins.empty() && ins.back().name == "vCoverage")
|
|
ins.back().value.u.x = hit->coverage;
|
|
|
|
initialState.semantics.coverage = hit->coverage;
|
|
initialState.semantics.primID = hit->primitive;
|
|
initialState.semantics.isFrontFace = hit->isFrontFace;
|
|
|
|
uint32_t *data = &hit->rawdata;
|
|
|
|
float *ddx = (float *)data;
|
|
|
|
// ddx(SV_Position.x) MUST be 1.0
|
|
if(*ddx != 1.0f)
|
|
{
|
|
RDCERR("Derivatives invalid");
|
|
SAFE_DELETE_ARRAY(initialData);
|
|
return empty;
|
|
}
|
|
|
|
data++;
|
|
|
|
for(size_t i = 0; i < initialValues.size(); i++)
|
|
{
|
|
int32_t *rawout = NULL;
|
|
|
|
if(initialValues[i].reg >= 0)
|
|
{
|
|
ShaderVariable &invar = traces[destIdx].inputs[initialValues[i].reg];
|
|
|
|
if(initialValues[i].sysattribute == ShaderBuiltin::PrimitiveIndex)
|
|
{
|
|
invar.value.u.x = hit->primitive;
|
|
}
|
|
else if(initialValues[i].sysattribute == ShaderBuiltin::MSAASampleIndex)
|
|
{
|
|
invar.value.u.x = hit->sample;
|
|
}
|
|
else if(initialValues[i].sysattribute == ShaderBuiltin::MSAACoverage)
|
|
{
|
|
invar.value.u.x = hit->coverage;
|
|
}
|
|
else if(initialValues[i].sysattribute == ShaderBuiltin::IsFrontFace)
|
|
{
|
|
invar.value.u.x = hit->isFrontFace ? ~0U : 0;
|
|
}
|
|
else
|
|
{
|
|
rawout = &invar.value.iv[initialValues[i].elem];
|
|
|
|
memcpy(rawout, data, initialValues[i].numwords * 4);
|
|
}
|
|
}
|
|
|
|
if(initialValues[i].included)
|
|
data += initialValues[i].numwords;
|
|
}
|
|
|
|
for(int i = 0; i < 4; i++)
|
|
{
|
|
if(i != destIdx)
|
|
traces[i] = traces[destIdx];
|
|
quad[i] = initialState;
|
|
quad[i].SetTrace(i, &traces[i]);
|
|
if(i != destIdx)
|
|
quad[i].SetHelper();
|
|
}
|
|
|
|
// We make the assumption that the coarse derivatives are
|
|
// generated from (0,0) in the quad, and fine derivatives
|
|
// are generated from the destination index and its
|
|
// neighbours in X and Y.
|
|
// This isn't spec'd but we must assume something and this
|
|
// will hopefully get us closest to reproducing actual
|
|
// results.
|
|
//
|
|
// For debugging, we need members of the quad to be able
|
|
// to generate coarse and fine derivatives.
|
|
//
|
|
// For (0,0) we only need the coarse derivatives to get
|
|
// our neighbours (1,0) and (0,1) which will give us
|
|
// coarse and fine derivatives being identical.
|
|
//
|
|
// For the others we will need to use a combination of
|
|
// coarse and fine derivatives to get the diagonal element
|
|
// in the quad. E.g. for (1,1):
|
|
//
|
|
// (1,0) = (1,1) - ddx_fine
|
|
// (0,1) = (1,1) - ddy_fine
|
|
// (0,0) = (1,1) - ddy_fine - ddx_coarse
|
|
//
|
|
// This only works if coarse and fine are calculated as we
|
|
// are assuming.
|
|
|
|
ddx = (float *)data;
|
|
|
|
for(size_t i = 0; i < initialValues.size(); i++)
|
|
{
|
|
if(!initialValues[i].included)
|
|
continue;
|
|
|
|
if(initialValues[i].reg >= 0)
|
|
{
|
|
if(destIdx == 0)
|
|
{
|
|
for(int w = 0; w < initialValues[i].numwords; w++)
|
|
{
|
|
traces[1].inputs[initialValues[i].reg].value.fv[initialValues[i].elem + w] += ddx[w];
|
|
traces[3].inputs[initialValues[i].reg].value.fv[initialValues[i].elem + w] += ddx[w];
|
|
}
|
|
}
|
|
else if(destIdx == 1)
|
|
{
|
|
for(int w = 0; w < initialValues[i].numwords; w++)
|
|
{
|
|
traces[0].inputs[initialValues[i].reg].value.fv[initialValues[i].elem + w] -= ddx[w];
|
|
traces[2].inputs[initialValues[i].reg].value.fv[initialValues[i].elem + w] -= ddx[w];
|
|
}
|
|
}
|
|
else if(destIdx == 2)
|
|
{
|
|
for(int w = 0; w < initialValues[i].numwords; w++)
|
|
{
|
|
traces[1].inputs[initialValues[i].reg].value.fv[initialValues[i].elem + w] += ddx[w];
|
|
}
|
|
}
|
|
else if(destIdx == 3)
|
|
{
|
|
for(int w = 0; w < initialValues[i].numwords; w++)
|
|
{
|
|
traces[0].inputs[initialValues[i].reg].value.fv[initialValues[i].elem + w] -= ddx[w];
|
|
}
|
|
}
|
|
}
|
|
|
|
ddx += initialValues[i].numwords;
|
|
}
|
|
|
|
float *ddy = ddx;
|
|
|
|
for(size_t i = 0; i < initialValues.size(); i++)
|
|
{
|
|
if(!initialValues[i].included)
|
|
continue;
|
|
|
|
if(initialValues[i].reg >= 0)
|
|
{
|
|
if(destIdx == 0)
|
|
{
|
|
for(int w = 0; w < initialValues[i].numwords; w++)
|
|
{
|
|
traces[2].inputs[initialValues[i].reg].value.fv[initialValues[i].elem + w] += ddy[w];
|
|
traces[3].inputs[initialValues[i].reg].value.fv[initialValues[i].elem + w] += ddy[w];
|
|
}
|
|
}
|
|
else if(destIdx == 1)
|
|
{
|
|
for(int w = 0; w < initialValues[i].numwords; w++)
|
|
{
|
|
traces[2].inputs[initialValues[i].reg].value.fv[initialValues[i].elem + w] += ddy[w];
|
|
}
|
|
}
|
|
else if(destIdx == 2)
|
|
{
|
|
for(int w = 0; w < initialValues[i].numwords; w++)
|
|
{
|
|
traces[0].inputs[initialValues[i].reg].value.fv[initialValues[i].elem + w] -= ddy[w];
|
|
traces[1].inputs[initialValues[i].reg].value.fv[initialValues[i].elem + w] -= ddy[w];
|
|
}
|
|
}
|
|
}
|
|
|
|
ddy += initialValues[i].numwords;
|
|
}
|
|
|
|
float *ddxfine = ddy;
|
|
|
|
for(size_t i = 0; i < initialValues.size(); i++)
|
|
{
|
|
if(!initialValues[i].included)
|
|
continue;
|
|
|
|
if(initialValues[i].reg >= 0)
|
|
{
|
|
if(destIdx == 2)
|
|
{
|
|
for(int w = 0; w < initialValues[i].numwords; w++)
|
|
{
|
|
traces[3].inputs[initialValues[i].reg].value.fv[initialValues[i].elem + w] += ddxfine[w];
|
|
}
|
|
}
|
|
else if(destIdx == 3)
|
|
{
|
|
for(int w = 0; w < initialValues[i].numwords; w++)
|
|
{
|
|
traces[2].inputs[initialValues[i].reg].value.fv[initialValues[i].elem + w] -= ddxfine[w];
|
|
}
|
|
}
|
|
}
|
|
|
|
ddxfine += initialValues[i].numwords;
|
|
}
|
|
|
|
float *ddyfine = ddxfine;
|
|
|
|
for(size_t i = 0; i < initialValues.size(); i++)
|
|
{
|
|
if(!initialValues[i].included)
|
|
continue;
|
|
|
|
if(initialValues[i].reg >= 0)
|
|
{
|
|
if(destIdx == 1)
|
|
{
|
|
for(int w = 0; w < initialValues[i].numwords; w++)
|
|
{
|
|
traces[3].inputs[initialValues[i].reg].value.fv[initialValues[i].elem + w] += ddyfine[w];
|
|
}
|
|
}
|
|
else if(destIdx == 3)
|
|
{
|
|
for(int w = 0; w < initialValues[i].numwords; w++)
|
|
{
|
|
traces[1].inputs[initialValues[i].reg].value.fv[initialValues[i].elem + w] -= ddyfine[w];
|
|
traces[0].inputs[initialValues[i].reg].value.fv[initialValues[i].elem + w] -= ddyfine[w];
|
|
}
|
|
}
|
|
}
|
|
|
|
ddyfine += initialValues[i].numwords;
|
|
}
|
|
}
|
|
|
|
SAFE_DELETE_ARRAY(initialData);
|
|
|
|
vector<ShaderDebugState> states;
|
|
|
|
dxbc->m_DebugInfo->GetStack(0, dxbc->GetInstruction(0).offset, quad[destIdx].callstack);
|
|
|
|
states.push_back((State)quad[destIdx]);
|
|
|
|
// ping pong between so that we can have 'current' quad to update into new one
|
|
State quad2[4];
|
|
|
|
State *curquad = quad;
|
|
State *newquad = quad2;
|
|
|
|
// marks any threads stalled waiting for others to catch up
|
|
bool activeMask[4] = {true, true, true, true};
|
|
|
|
int cycleCounter = 0;
|
|
|
|
D3D11MarkerRegion simloop("Simulation Loop");
|
|
|
|
// simulate lockstep until all threads are finished
|
|
bool finished = true;
|
|
do
|
|
{
|
|
for(size_t i = 0; i < 4; i++)
|
|
{
|
|
if(activeMask[i])
|
|
newquad[i] = curquad[i].GetNext(global, curquad);
|
|
else
|
|
newquad[i] = curquad[i];
|
|
}
|
|
|
|
State *a = curquad;
|
|
curquad = newquad;
|
|
newquad = a;
|
|
|
|
// if our destination quad is paused don't record multiple identical states.
|
|
if(activeMask[destIdx])
|
|
{
|
|
State &s = curquad[destIdx];
|
|
|
|
{
|
|
const ASMOperation &op = dxbc->GetInstruction((size_t)s.nextInstruction);
|
|
dxbc->m_DebugInfo->GetStack(s.nextInstruction, op.offset, s.callstack);
|
|
}
|
|
|
|
states.push_back(s);
|
|
}
|
|
|
|
// we need to make sure that control flow which converges stays in lockstep so that
|
|
// derivatives are still valid. While diverged, we don't have to keep threads in lockstep
|
|
// since using derivatives is invalid.
|
|
|
|
// Threads diverge either in ifs, loops, or switches. Due to the nature of the bytecode,
|
|
// all threads *must* pass through the same exit instruction for each, there's no jumping
|
|
// around with gotos. Note also for the same reason, the only time threads are on earlier
|
|
// instructions is if they are still catching up to a thread that has exited the control
|
|
// flow.
|
|
|
|
// So the scheme is as follows:
|
|
// * If all threads have the same nextInstruction, just continue we are still in lockstep.
|
|
// * If threads are out of lockstep, find any thread which has nextInstruction pointing
|
|
// immediately *after* an ENDIF, ENDLOOP or ENDSWITCH. Pointing directly at one is not
|
|
// an indication the thread is done, as the next step for an ENDLOOP will jump back to
|
|
// the matching LOOP and continue iterating.
|
|
// * Pause any thread matching the above until all threads are pointing to the same
|
|
// instruction. By the assumption above, all threads will eventually pass through this
|
|
// terminating instruction so we just pause any other threads and don't do anything
|
|
// until the control flow has converged and we can continue stepping in lockstep.
|
|
|
|
// mark all threads as active again.
|
|
// if we've converged, or we were never diverged, this keeps everything ticking
|
|
activeMask[0] = activeMask[1] = activeMask[2] = activeMask[3] = true;
|
|
|
|
if(curquad[0].nextInstruction != curquad[1].nextInstruction ||
|
|
curquad[0].nextInstruction != curquad[2].nextInstruction ||
|
|
curquad[0].nextInstruction != curquad[3].nextInstruction)
|
|
{
|
|
// this isn't *perfect* but it will still eventually continue. We look for the most
|
|
// advanced thread, and check to see if it's just finished a control flow. If it has
|
|
// then we assume it's at the convergence point and wait for every other thread to
|
|
// catch up, pausing any threads that reach the convergence point before others.
|
|
|
|
// Note this might mean we don't have any threads paused even within divergent flow.
|
|
// This is fine and all we care about is pausing to make sure threads don't run ahead
|
|
// into code that should be lockstep. We don't care at all about what they do within
|
|
// the code that is divergent.
|
|
|
|
// The reason this isn't perfect is that the most advanced thread could be on an
|
|
// inner loop or inner if, not the convergence point, and we could be pausing it
|
|
// fruitlessly. Worse still - it could be on a branch none of the other threads will
|
|
// take so they will never reach that exact instruction.
|
|
// But we know that all threads will eventually go through the convergence point, so
|
|
// even in that worst case if we didn't pick the right waiting point, another thread
|
|
// will overtake and become the new most advanced thread and the previous waiting
|
|
// thread will resume. So in this case we caused a thread to wait more than it should
|
|
// have but that's not a big deal as it's within divergent flow so they don't have to
|
|
// stay in lockstep. Also if all threads will eventually pass that point we picked,
|
|
// we just waited to converge even in technically divergent code which is also
|
|
// harmless.
|
|
|
|
// Phew!
|
|
|
|
uint32_t convergencePoint = 0;
|
|
|
|
// find which thread is most advanced
|
|
for(size_t i = 0; i < 4; i++)
|
|
if(curquad[i].nextInstruction > convergencePoint)
|
|
convergencePoint = curquad[i].nextInstruction;
|
|
|
|
if(convergencePoint > 0)
|
|
{
|
|
OpcodeType op = dxbc->GetInstruction(convergencePoint - 1).operation;
|
|
|
|
// if the most advnaced thread hasn't just finished control flow, then all
|
|
// threads are still running, so don't converge
|
|
if(op != OPCODE_ENDIF && op != OPCODE_ENDLOOP && op != OPCODE_ENDSWITCH)
|
|
convergencePoint = 0;
|
|
}
|
|
|
|
// pause any threads at that instruction (could be none)
|
|
for(size_t i = 0; i < 4; i++)
|
|
if(curquad[i].nextInstruction == convergencePoint)
|
|
activeMask[i] = false;
|
|
}
|
|
|
|
finished = curquad[destIdx].Finished();
|
|
|
|
cycleCounter++;
|
|
|
|
if(cycleCounter == SHADER_DEBUG_WARN_THRESHOLD)
|
|
{
|
|
if(PromptDebugTimeout(DXBC::TYPE_VERTEX, cycleCounter))
|
|
break;
|
|
}
|
|
} while(!finished);
|
|
|
|
traces[destIdx].states = states;
|
|
|
|
return traces[destIdx];
|
|
}
|
|
|
|
ShaderDebugTrace D3D11Replay::DebugThread(uint32_t eventId, const uint32_t groupid[3],
|
|
const uint32_t threadid[3])
|
|
{
|
|
using namespace DXBC;
|
|
using namespace ShaderDebug;
|
|
|
|
D3D11MarkerRegion simloop(StringFormat::Fmt("DebugThread @ %u: [%u, %u, %u] (%u, %u, %u)",
|
|
eventId, groupid[0], groupid[1], groupid[2],
|
|
threadid[0], threadid[1], threadid[2]));
|
|
|
|
ShaderDebugTrace empty;
|
|
|
|
D3D11RenderStateTracker tracker(m_pImmediateContext);
|
|
|
|
ID3D11ComputeShader *stateCS = NULL;
|
|
m_pImmediateContext->CSGetShader(&stateCS, NULL, NULL);
|
|
|
|
WrappedID3D11Shader<ID3D11ComputeShader> *cs = (WrappedID3D11Shader<ID3D11ComputeShader> *)stateCS;
|
|
|
|
SAFE_RELEASE(stateCS);
|
|
|
|
if(!cs)
|
|
return empty;
|
|
|
|
DXBCFile *dxbc = cs->GetDXBC();
|
|
|
|
if(!dxbc)
|
|
return empty;
|
|
|
|
dxbc->GetDisassembly();
|
|
|
|
D3D11RenderState *rs = m_pImmediateContext->GetCurrentPipelineState();
|
|
|
|
bytebuf cbufData[D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT];
|
|
|
|
for(int i = 0; i < D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT; i++)
|
|
if(rs->CS.ConstantBuffers[i])
|
|
GetDebugManager()->GetBufferData(rs->CS.ConstantBuffers[i],
|
|
rs->CS.CBOffsets[i] * sizeof(Vec4f), 0, cbufData[i]);
|
|
|
|
ShaderDebugTrace ret;
|
|
|
|
GlobalState global;
|
|
GetDebugManager()->CreateShaderGlobalState(global, dxbc, 0, rs->CSUAVs, rs->CS.SRVs);
|
|
State initialState = GetDebugManager()->CreateShaderDebugState(ret, -1, dxbc, cbufData);
|
|
|
|
for(int i = 0; i < 3; i++)
|
|
{
|
|
initialState.semantics.GroupID[i] = groupid[i];
|
|
initialState.semantics.ThreadID[i] = threadid[i];
|
|
}
|
|
|
|
vector<ShaderDebugState> states;
|
|
|
|
dxbc->m_DebugInfo->GetStack(0, dxbc->GetInstruction(0).offset, initialState.callstack);
|
|
|
|
states.push_back((State)initialState);
|
|
|
|
for(int cycleCounter = 0;; cycleCounter++)
|
|
{
|
|
if(initialState.Finished())
|
|
break;
|
|
|
|
initialState = initialState.GetNext(global, NULL);
|
|
|
|
{
|
|
const ASMOperation &op = dxbc->GetInstruction((size_t)initialState.nextInstruction);
|
|
dxbc->m_DebugInfo->GetStack(initialState.nextInstruction, op.offset, initialState.callstack);
|
|
}
|
|
|
|
states.push_back((State)initialState);
|
|
|
|
if(cycleCounter == SHADER_DEBUG_WARN_THRESHOLD)
|
|
{
|
|
if(PromptDebugTimeout(DXBC::TYPE_VERTEX, cycleCounter))
|
|
break;
|
|
}
|
|
}
|
|
|
|
ret.states = states;
|
|
|
|
return ret;
|
|
}
|