mirror of
https://github.com/baldurk/renderdoc.git
synced 2026-05-12 13:00:32 +00:00
Move some shader debug functions to a shared location.
In early testing with D3D12 shader debugging, these functions did not require any differences from the D3D11 versions. Moving them to a common location to avoid some code duplication.
This commit is contained in:
committed by
Baldur Karlsson
parent
67d73af8c8
commit
fa232b7f14
@@ -35,27 +35,6 @@
|
||||
#include "d3d11_debug.h"
|
||||
#include "d3d11_manager.h"
|
||||
#include "d3d11_shader_cache.h"
|
||||
// struct that saves pointers as we iterate through to where we ultimately
|
||||
// want to copy the data to
|
||||
struct DataOutput
|
||||
{
|
||||
DataOutput(int regster, int element, int numWords, ShaderBuiltin attr, bool inc)
|
||||
{
|
||||
reg = regster;
|
||||
elem = element;
|
||||
numwords = numWords;
|
||||
sysattribute = attr;
|
||||
included = inc;
|
||||
}
|
||||
|
||||
int reg;
|
||||
int elem;
|
||||
ShaderBuiltin sysattribute;
|
||||
|
||||
int numwords;
|
||||
|
||||
bool included;
|
||||
};
|
||||
|
||||
struct DebugHit
|
||||
{
|
||||
@@ -70,176 +49,6 @@ struct DebugHit
|
||||
uint32_t rawdata; // arbitrary, depending on shader
|
||||
};
|
||||
|
||||
// over this number of cycles and things get problematic
|
||||
#define SHADER_DEBUG_WARN_THRESHOLD 100000
|
||||
|
||||
bool PromptDebugTimeout(uint32_t cycleCounter)
|
||||
{
|
||||
std::string msg = StringFormat::Fmt(
|
||||
"RenderDoc's shader debugging has been running for over %u cycles, which indicates either a "
|
||||
"very long-running loop, or possibly an infinite loop. Continuing could lead to extreme "
|
||||
"memory allocations, slow UI or even crashes. Would you like to abort debugging to see what "
|
||||
"has run so far?\n\n"
|
||||
"Hit yes to abort debugging. Note that loading the resulting trace could take several "
|
||||
"minutes.",
|
||||
cycleCounter);
|
||||
|
||||
int ret = MessageBoxA(NULL, msg.c_str(), "Shader debugging timeout", MB_YESNO | MB_ICONWARNING);
|
||||
|
||||
if(ret == IDYES)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// apply coarse/fine derivatives to select threads within a quad to ensure all values are correct
|
||||
static void ApplyDerivatives(ShaderDebug::GlobalState &global, ShaderDebugTrace traces[4],
|
||||
const DataOutput &initialValue, float *data, float signmul,
|
||||
int32_t quadIdxA, int32_t quadIdxB = -1)
|
||||
{
|
||||
for(int w = 0; w < initialValue.numwords; w++)
|
||||
{
|
||||
traces[quadIdxA].inputs[initialValue.reg].value.fv[initialValue.elem + w] += signmul * data[w];
|
||||
if(quadIdxB >= 0)
|
||||
traces[quadIdxB].inputs[initialValue.reg].value.fv[initialValue.elem + w] += signmul * data[w];
|
||||
}
|
||||
|
||||
// quick check to see if this register was evaluated
|
||||
if(global.sampleEvalRegisterMask & (1ULL << initialValue.reg))
|
||||
{
|
||||
// apply derivative to any cached sample evaluations on these quad indices
|
||||
for(auto it = global.sampleEvalCache.begin(); it != global.sampleEvalCache.end(); ++it)
|
||||
{
|
||||
if((it->first.quadIndex == quadIdxA || it->first.quadIndex == quadIdxB) &&
|
||||
initialValue.reg == it->first.inputRegisterIndex)
|
||||
{
|
||||
for(int w = 0; w < initialValue.numwords; w++)
|
||||
it->second.value.fv[initialValue.elem + w] += data[w];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void FlattenSingleVariable(uint32_t byteOffset, const std::string &basename,
|
||||
const ShaderVariable &v, rdcarray<ShaderVariable> &outvars)
|
||||
{
|
||||
size_t outIdx = byteOffset / 16;
|
||||
size_t outComp = (byteOffset % 16) / 4;
|
||||
|
||||
if(v.rowMajor)
|
||||
outvars.resize(RDCMAX(outIdx + v.rows, outvars.size()));
|
||||
else
|
||||
outvars.resize(RDCMAX(outIdx + v.columns, outvars.size()));
|
||||
|
||||
if(!outvars[outIdx].name.empty())
|
||||
{
|
||||
// if we already have a variable in this slot, just append this variable to it. We should not
|
||||
// overlap into the next register as that's not allowed.
|
||||
outvars[outIdx].name = std::string(outvars[outIdx].name) + ", " + basename;
|
||||
outvars[outIdx].rows = 1;
|
||||
outvars[outIdx].isStruct = false;
|
||||
outvars[outIdx].columns += v.columns;
|
||||
|
||||
RDCASSERT(outvars[outIdx].columns <= 4, outvars[outIdx].columns);
|
||||
|
||||
memcpy(&outvars[outIdx].value.uv[outComp], &v.value.uv[0], sizeof(uint32_t) * v.columns);
|
||||
}
|
||||
else
|
||||
{
|
||||
const uint32_t numRegisters = v.rowMajor ? v.rows : v.columns;
|
||||
const char *regName = v.rowMajor ? "row" : "col";
|
||||
for(uint32_t reg = 0; reg < numRegisters; reg++)
|
||||
{
|
||||
if(numRegisters > 1)
|
||||
outvars[outIdx + reg].name = StringFormat::Fmt("%s.%s%u", basename.c_str(), regName, reg);
|
||||
else
|
||||
outvars[outIdx + reg].name = basename;
|
||||
outvars[outIdx + reg].rows = 1;
|
||||
outvars[outIdx + reg].type = v.type;
|
||||
outvars[outIdx + reg].isStruct = false;
|
||||
outvars[outIdx + reg].columns = v.columns;
|
||||
outvars[outIdx + reg].rowMajor = v.rowMajor;
|
||||
}
|
||||
|
||||
if(v.rowMajor)
|
||||
{
|
||||
for(size_t ri = 0; ri < v.rows; ri++)
|
||||
memcpy(&outvars[outIdx + ri].value.uv[0], &v.value.uv[ri * v.columns],
|
||||
sizeof(uint32_t) * v.columns);
|
||||
}
|
||||
else
|
||||
{
|
||||
// if we have a matrix stored in column major order, we need to transpose it back so we can
|
||||
// unroll it into vectors.
|
||||
for(size_t ci = 0; ci < v.columns; ci++)
|
||||
for(size_t ri = 0; ri < v.rows; ri++)
|
||||
outvars[outIdx + ci].value.uv[ri] = v.value.uv[ri * v.columns + ci];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void FlattenVariables(const rdcarray<ShaderConstant> &constants,
|
||||
const rdcarray<ShaderVariable> &invars,
|
||||
rdcarray<ShaderVariable> &outvars, const std::string &prefix,
|
||||
uint32_t baseOffset)
|
||||
{
|
||||
RDCASSERTEQUAL(constants.size(), invars.size());
|
||||
|
||||
for(size_t i = 0; i < constants.size(); i++)
|
||||
{
|
||||
const ShaderConstant &c = constants[i];
|
||||
const ShaderVariable &v = invars[i];
|
||||
|
||||
uint32_t byteOffset = baseOffset + c.byteOffset;
|
||||
|
||||
std::string basename = prefix + std::string(v.name);
|
||||
|
||||
if(!v.members.empty())
|
||||
{
|
||||
if(v.isStruct)
|
||||
{
|
||||
FlattenVariables(c.type.members, v.members, outvars, basename + ".", byteOffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
if(c.type.members.empty())
|
||||
{
|
||||
// if there are no members in this type, it means it's a basic array - unroll directly
|
||||
|
||||
for(int m = 0; m < v.members.count(); m++)
|
||||
{
|
||||
FlattenSingleVariable(byteOffset + m * c.type.descriptor.arrayByteStride,
|
||||
StringFormat::Fmt("%s[%zu]", basename.c_str(), m), v.members[m],
|
||||
outvars);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// otherwise we recurse into each member and flatten
|
||||
|
||||
for(int m = 0; m < v.members.count(); m++)
|
||||
{
|
||||
FlattenVariables(c.type.members, v.members[m].members, outvars,
|
||||
StringFormat::Fmt("%s[%zu].", basename.c_str(), m),
|
||||
byteOffset + m * c.type.descriptor.arrayByteStride);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
FlattenSingleVariable(byteOffset, basename, v, outvars);
|
||||
}
|
||||
}
|
||||
|
||||
static void FlattenVariables(const rdcarray<ShaderConstant> &constants,
|
||||
const rdcarray<ShaderVariable> &invars,
|
||||
rdcarray<ShaderVariable> &outvars)
|
||||
{
|
||||
FlattenVariables(constants, invars, outvars, "", 0);
|
||||
}
|
||||
|
||||
class D3D11DebugAPIWrapper : public ShaderDebug::DebugAPIWrapper
|
||||
{
|
||||
public:
|
||||
@@ -939,15 +748,18 @@ bool D3D11DebugAPIWrapper::CalculateSampleGather(
|
||||
// we just use the fattest one necessary. There's no harm in retrieving at
|
||||
// higher precision
|
||||
DXGI_FORMAT fmts[DXBC::NUM_RETURN_TYPES] = {
|
||||
DXGI_FORMAT_UNKNOWN, // enum starts at ==1
|
||||
DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_FLOAT,
|
||||
DXGI_FORMAT_R32G32B32A32_SINT, DXGI_FORMAT_R32G32B32A32_UINT, DXGI_FORMAT_R32G32B32A32_FLOAT,
|
||||
DXGI_FORMAT_UNKNOWN, // RETURN_TYPE_MIXED
|
||||
DXGI_FORMAT_UNKNOWN, // enum starts at ==1
|
||||
DXGI_FORMAT_R32G32B32A32_FLOAT, // unorm float
|
||||
DXGI_FORMAT_R32G32B32A32_FLOAT, // snorm float
|
||||
DXGI_FORMAT_R32G32B32A32_SINT, // int
|
||||
DXGI_FORMAT_R32G32B32A32_UINT, // uint
|
||||
DXGI_FORMAT_R32G32B32A32_FLOAT, // float
|
||||
DXGI_FORMAT_UNKNOWN, // RETURN_TYPE_MIXED
|
||||
|
||||
// should maybe be double, but there is no double texture format anyway!
|
||||
// spec is unclear but I presume reads are done at most at float
|
||||
// precision anyway since that's the source, and converted to doubles.
|
||||
DXGI_FORMAT_R32G32B32A32_FLOAT,
|
||||
DXGI_FORMAT_R32G32B32A32_FLOAT, // double
|
||||
|
||||
DXGI_FORMAT_UNKNOWN, // RETURN_TYPE_CONTINUED
|
||||
DXGI_FORMAT_UNKNOWN, // RETURN_TYPE_UNUSED
|
||||
@@ -1101,9 +913,9 @@ bool D3D11DebugAPIWrapper::CalculateSampleGather(
|
||||
{
|
||||
if(offsetDim == 1)
|
||||
StringFormat::snprintf(buf, 255, ", int(%d)", texelOffsets[0]);
|
||||
if(offsetDim == 2)
|
||||
else if(offsetDim == 2)
|
||||
StringFormat::snprintf(buf, 255, ", int2(%d, %d)", texelOffsets[0], texelOffsets[1]);
|
||||
if(offsetDim == 3)
|
||||
else if(offsetDim == 3)
|
||||
StringFormat::snprintf(buf, 255, ", int3(%d, %d, %d)", texelOffsets[0], texelOffsets[1],
|
||||
texelOffsets[2]);
|
||||
// texdim == 4 is cube arrays, no offset supported
|
||||
@@ -2571,7 +2383,7 @@ ShaderDebugTrace D3D11Replay::DebugPixel(uint32_t eventId, uint32_t x, uint32_t
|
||||
if(prevdxbc == NULL && vs != NULL)
|
||||
prevdxbc = vs->GetDXBC();
|
||||
|
||||
std::vector<DataOutput> initialValues;
|
||||
std::vector<PSInputElement> initialValues;
|
||||
|
||||
std::string extractHlsl = "struct PSInput\n{\n";
|
||||
|
||||
@@ -2581,7 +2393,7 @@ ShaderDebugTrace D3D11Replay::DebugPixel(uint32_t eventId, uint32_t x, uint32_t
|
||||
{
|
||||
extractHlsl += "float4 input_dummy : SV_Position;\n";
|
||||
|
||||
initialValues.push_back(DataOutput(-1, 0, 4, ShaderBuiltin::Undefined, true));
|
||||
initialValues.push_back(PSInputElement(-1, 0, 4, ShaderBuiltin::Undefined, true));
|
||||
|
||||
structureStride += 4;
|
||||
}
|
||||
@@ -2658,7 +2470,7 @@ ShaderDebugTrace D3D11Replay::DebugPixel(uint32_t eventId, uint32_t x, uint32_t
|
||||
|
||||
structureStride += 4 * numCols;
|
||||
|
||||
initialValues.push_back(DataOutput(-1, 0, numCols, ShaderBuiltin::Undefined, true));
|
||||
initialValues.push_back(PSInputElement(-1, 0, numCols, ShaderBuiltin::Undefined, true));
|
||||
|
||||
std::string name = prevdxbc->GetReflection()->OutputSig[os].semanticIdxName;
|
||||
|
||||
@@ -2673,7 +2485,7 @@ ShaderDebugTrace D3D11Replay::DebugPixel(uint32_t eventId, uint32_t x, uint32_t
|
||||
dummy_reg += ToStr((uint32_t)nextreg + dummy);
|
||||
extractHlsl += "float4 var_" + dummy_reg + " : semantic_" + dummy_reg + ";\n";
|
||||
|
||||
initialValues.push_back(DataOutput(-1, 0, 4, ShaderBuiltin::Undefined, true));
|
||||
initialValues.push_back(PSInputElement(-1, 0, 4, ShaderBuiltin::Undefined, true));
|
||||
|
||||
structureStride += 4 * sizeof(float);
|
||||
}
|
||||
@@ -2859,17 +2671,17 @@ ShaderDebugTrace D3D11Replay::DebugPixel(uint32_t eventId, uint32_t x, uint32_t
|
||||
{
|
||||
if(arrayLength == 0)
|
||||
{
|
||||
initialValues.push_back(DataOutput(dxbc->GetReflection()->InputSig[i].regIndex, firstElem,
|
||||
numCols, dxbc->GetReflection()->InputSig[i].systemValue,
|
||||
included));
|
||||
initialValues.push_back(
|
||||
PSInputElement(dxbc->GetReflection()->InputSig[i].regIndex, firstElem, numCols,
|
||||
dxbc->GetReflection()->InputSig[i].systemValue, included));
|
||||
}
|
||||
else
|
||||
{
|
||||
for(int a = 0; a < arrayLength; a++)
|
||||
{
|
||||
initialValues.push_back(
|
||||
DataOutput(dxbc->GetReflection()->InputSig[i].regIndex + a, firstElem, numCols,
|
||||
dxbc->GetReflection()->InputSig[i].systemValue, included));
|
||||
PSInputElement(dxbc->GetReflection()->InputSig[i].regIndex + a, firstElem, numCols,
|
||||
dxbc->GetReflection()->InputSig[i].systemValue, included));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3516,132 +3328,7 @@ void ExtractInputsPS(PSInput IN, float4 debug_pixelPos : SV_Position, uint prim
|
||||
evalSampleCache += 4;
|
||||
}
|
||||
|
||||
// We make the assumption that the coarse derivatives are generated from (0,0) in the quad, and
|
||||
// fine derivatives are generated from the destination index and its neighbours in X and Y.
|
||||
// This isn't spec'd but we must assume something and this will hopefully get us closest to
|
||||
// reproducing actual results.
|
||||
//
|
||||
// For debugging, we need members of the quad to be able to generate coarse and fine
|
||||
// derivatives.
|
||||
//
|
||||
// For (0,0) we only need the coarse derivatives to get our neighbours (1,0) and (0,1) which
|
||||
// will give us coarse and fine derivatives being identical.
|
||||
//
|
||||
// For the others we will need to use a combination of coarse and fine derivatives to get the
|
||||
// diagonal element in the quad. In the examples below, remember that the quad indices are:
|
||||
//
|
||||
// +---+---+
|
||||
// | 0 | 1 |
|
||||
// +---+---+
|
||||
// | 2 | 3 |
|
||||
// +---+---+
|
||||
//
|
||||
// And that we have definitions of the derivatives:
|
||||
//
|
||||
// ddx_coarse = (1,0) - (0,0)
|
||||
// ddy_coarse = (0,1) - (0,0)
|
||||
//
|
||||
// i.e. the same for all members of the quad
|
||||
//
|
||||
// ddx_fine = (x,y) - (1-x,y)
|
||||
// ddy_fine = (x,y) - (x,1-y)
|
||||
//
|
||||
// i.e. the difference to the neighbour of our desired invocation (the one we have the actual
|
||||
// inputs for, from gathering above).
|
||||
//
|
||||
// So e.g. if our thread is at (1,1) destIdx = 3
|
||||
//
|
||||
// (1,0) = (1,1) - ddx_fine
|
||||
// (0,1) = (1,1) - ddy_fine
|
||||
// (0,0) = (1,1) - ddy_fine - ddx_coarse
|
||||
//
|
||||
// and ddy_coarse is unused. For (1,0) destIdx = 1:
|
||||
//
|
||||
// (1,1) = (1,0) + ddy_fine
|
||||
// (0,1) = (1,0) - ddx_coarse + ddy_coarse
|
||||
// (0,0) = (1,0) - ddx_coarse
|
||||
//
|
||||
// and ddx_fine is unused (it's identical to ddx_coarse anyway)
|
||||
|
||||
// this is the value of input[1] - input[0]
|
||||
float *ddx_coarse = (float *)data;
|
||||
|
||||
for(size_t i = 0; i < initialValues.size(); i++)
|
||||
{
|
||||
if(!initialValues[i].included)
|
||||
continue;
|
||||
|
||||
if(initialValues[i].reg >= 0)
|
||||
{
|
||||
if(destIdx == 0)
|
||||
ApplyDerivatives(global, traces, initialValues[i], ddx_coarse, 1.0f, 1, 3);
|
||||
else if(destIdx == 1)
|
||||
ApplyDerivatives(global, traces, initialValues[i], ddx_coarse, -1.0f, 0, 2);
|
||||
else if(destIdx == 2)
|
||||
ApplyDerivatives(global, traces, initialValues[i], ddx_coarse, 1.0f, 1);
|
||||
else if(destIdx == 3)
|
||||
ApplyDerivatives(global, traces, initialValues[i], ddx_coarse, -1.0f, 0);
|
||||
}
|
||||
|
||||
ddx_coarse += initialValues[i].numwords;
|
||||
}
|
||||
|
||||
// this is the value of input[2] - input[0]
|
||||
float *ddy_coarse = ddx_coarse;
|
||||
|
||||
for(size_t i = 0; i < initialValues.size(); i++)
|
||||
{
|
||||
if(!initialValues[i].included)
|
||||
continue;
|
||||
|
||||
if(initialValues[i].reg >= 0)
|
||||
{
|
||||
if(destIdx == 0)
|
||||
ApplyDerivatives(global, traces, initialValues[i], ddy_coarse, 1.0f, 2, 3);
|
||||
else if(destIdx == 1)
|
||||
ApplyDerivatives(global, traces, initialValues[i], ddy_coarse, 1.0f, 2);
|
||||
else if(destIdx == 2)
|
||||
ApplyDerivatives(global, traces, initialValues[i], ddy_coarse, -1.0f, 0, 1);
|
||||
}
|
||||
|
||||
ddy_coarse += initialValues[i].numwords;
|
||||
}
|
||||
|
||||
float *ddxfine = ddy_coarse;
|
||||
|
||||
for(size_t i = 0; i < initialValues.size(); i++)
|
||||
{
|
||||
if(!initialValues[i].included)
|
||||
continue;
|
||||
|
||||
if(initialValues[i].reg >= 0)
|
||||
{
|
||||
if(destIdx == 2)
|
||||
ApplyDerivatives(global, traces, initialValues[i], ddxfine, 1.0f, 3);
|
||||
else if(destIdx == 3)
|
||||
ApplyDerivatives(global, traces, initialValues[i], ddxfine, -1.0f, 2);
|
||||
}
|
||||
|
||||
ddxfine += initialValues[i].numwords;
|
||||
}
|
||||
|
||||
float *ddyfine = ddxfine;
|
||||
|
||||
for(size_t i = 0; i < initialValues.size(); i++)
|
||||
{
|
||||
if(!initialValues[i].included)
|
||||
continue;
|
||||
|
||||
if(initialValues[i].reg >= 0)
|
||||
{
|
||||
if(destIdx == 1)
|
||||
ApplyDerivatives(global, traces, initialValues[i], ddyfine, 1.0f, 3);
|
||||
else if(destIdx == 3)
|
||||
ApplyDerivatives(global, traces, initialValues[i], ddyfine, -1.0f, 0, 1);
|
||||
}
|
||||
|
||||
ddyfine += initialValues[i].numwords;
|
||||
}
|
||||
ApplyAllDerivatives(global, traces, destIdx, initialValues, (float *)data);
|
||||
}
|
||||
|
||||
SAFE_DELETE_ARRAY(initialData);
|
||||
|
||||
@@ -3982,6 +3982,312 @@ State State::GetNext(GlobalState &global, DebugAPIWrapper *apiWrapper, State qua
|
||||
return s;
|
||||
}
|
||||
|
||||
bool PromptDebugTimeout(uint32_t cycleCounter)
|
||||
{
|
||||
std::string msg = StringFormat::Fmt(
|
||||
"RenderDoc's shader debugging has been running for over %u cycles, which indicates either a "
|
||||
"very long-running loop, or possibly an infinite loop. Continuing could lead to extreme "
|
||||
"memory allocations, slow UI or even crashes. Would you like to abort debugging to see what "
|
||||
"has run so far?\n\n"
|
||||
"Hit yes to abort debugging. Note that loading the resulting trace could take several "
|
||||
"minutes.",
|
||||
cycleCounter);
|
||||
|
||||
int ret = MessageBoxA(NULL, msg.c_str(), "Shader debugging timeout", MB_YESNO | MB_ICONWARNING);
|
||||
|
||||
if(ret == IDYES)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void ApplyDerivatives(ShaderDebug::GlobalState &global, ShaderDebugTrace traces[4], int reg,
|
||||
int element, int numWords, float *data, float signmul, int32_t quadIdxA,
|
||||
int32_t quadIdxB)
|
||||
{
|
||||
for(int w = 0; w < numWords; w++)
|
||||
{
|
||||
traces[quadIdxA].inputs[reg].value.fv[element + w] += signmul * data[w];
|
||||
if(quadIdxB >= 0)
|
||||
traces[quadIdxB].inputs[reg].value.fv[element + w] += signmul * data[w];
|
||||
}
|
||||
|
||||
// quick check to see if this register was evaluated
|
||||
if(global.sampleEvalRegisterMask & (1ULL << reg))
|
||||
{
|
||||
// apply derivative to any cached sample evaluations on these quad indices
|
||||
for(auto it = global.sampleEvalCache.begin(); it != global.sampleEvalCache.end(); ++it)
|
||||
{
|
||||
if((it->first.quadIndex == quadIdxA || it->first.quadIndex == quadIdxB) &&
|
||||
reg == it->first.inputRegisterIndex)
|
||||
{
|
||||
for(int w = 0; w < numWords; w++)
|
||||
it->second.value.fv[element + w] += data[w];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ApplyAllDerivatives(ShaderDebug::GlobalState &global, ShaderDebugTrace traces[4], int destIdx,
|
||||
const std::vector<PSInputElement> &initialValues, float *data)
|
||||
{
|
||||
// We make the assumption that the coarse derivatives are generated from (0,0) in the quad, and
|
||||
// fine derivatives are generated from the destination index and its neighbours in X and Y.
|
||||
// This isn't spec'd but we must assume something and this will hopefully get us closest to
|
||||
// reproducing actual results.
|
||||
//
|
||||
// For debugging, we need members of the quad to be able to generate coarse and fine
|
||||
// derivatives.
|
||||
//
|
||||
// For (0,0) we only need the coarse derivatives to get our neighbours (1,0) and (0,1) which
|
||||
// will give us coarse and fine derivatives being identical.
|
||||
//
|
||||
// For the others we will need to use a combination of coarse and fine derivatives to get the
|
||||
// diagonal element in the quad. In the examples below, remember that the quad indices are:
|
||||
//
|
||||
// +---+---+
|
||||
// | 0 | 1 |
|
||||
// +---+---+
|
||||
// | 2 | 3 |
|
||||
// +---+---+
|
||||
//
|
||||
// And that we have definitions of the derivatives:
|
||||
//
|
||||
// ddx_coarse = (1,0) - (0,0)
|
||||
// ddy_coarse = (0,1) - (0,0)
|
||||
//
|
||||
// i.e. the same for all members of the quad
|
||||
//
|
||||
// ddx_fine = (x,y) - (1-x,y)
|
||||
// ddy_fine = (x,y) - (x,1-y)
|
||||
//
|
||||
// i.e. the difference to the neighbour of our desired invocation (the one we have the actual
|
||||
// inputs for, from gathering above).
|
||||
//
|
||||
// So e.g. if our thread is at (1,1) destIdx = 3
|
||||
//
|
||||
// (1,0) = (1,1) - ddx_fine
|
||||
// (0,1) = (1,1) - ddy_fine
|
||||
// (0,0) = (1,1) - ddy_fine - ddx_coarse
|
||||
//
|
||||
// and ddy_coarse is unused. For (1,0) destIdx = 1:
|
||||
//
|
||||
// (1,1) = (1,0) + ddy_fine
|
||||
// (0,1) = (1,0) - ddx_coarse + ddy_coarse
|
||||
// (0,0) = (1,0) - ddx_coarse
|
||||
//
|
||||
// and ddx_fine is unused (it's identical to ddx_coarse anyway)
|
||||
|
||||
// this is the value of input[1] - input[0]
|
||||
float *ddx_coarse = (float *)data;
|
||||
|
||||
for(size_t i = 0; i < initialValues.size(); i++)
|
||||
{
|
||||
if(!initialValues[i].included)
|
||||
continue;
|
||||
|
||||
if(initialValues[i].reg >= 0)
|
||||
{
|
||||
if(destIdx == 0)
|
||||
ApplyDerivatives(global, traces, initialValues[i].reg, initialValues[i].elem,
|
||||
initialValues[i].numwords, ddx_coarse, 1.0f, 1, 3);
|
||||
else if(destIdx == 1)
|
||||
ApplyDerivatives(global, traces, initialValues[i].reg, initialValues[i].elem,
|
||||
initialValues[i].numwords, ddx_coarse, -1.0f, 0, 2);
|
||||
else if(destIdx == 2)
|
||||
ApplyDerivatives(global, traces, initialValues[i].reg, initialValues[i].elem,
|
||||
initialValues[i].numwords, ddx_coarse, 1.0f, 1, -1);
|
||||
else if(destIdx == 3)
|
||||
ApplyDerivatives(global, traces, initialValues[i].reg, initialValues[i].elem,
|
||||
initialValues[i].numwords, ddx_coarse, -1.0f, 0, -1);
|
||||
}
|
||||
|
||||
ddx_coarse += initialValues[i].numwords;
|
||||
}
|
||||
|
||||
// this is the value of input[2] - input[0]
|
||||
float *ddy_coarse = ddx_coarse;
|
||||
|
||||
for(size_t i = 0; i < initialValues.size(); i++)
|
||||
{
|
||||
if(!initialValues[i].included)
|
||||
continue;
|
||||
|
||||
if(initialValues[i].reg >= 0)
|
||||
{
|
||||
if(destIdx == 0)
|
||||
ApplyDerivatives(global, traces, initialValues[i].reg, initialValues[i].elem,
|
||||
initialValues[i].numwords, ddy_coarse, 1.0f, 2, 3);
|
||||
else if(destIdx == 1)
|
||||
ApplyDerivatives(global, traces, initialValues[i].reg, initialValues[i].elem,
|
||||
initialValues[i].numwords, ddy_coarse, 1.0f, 2, -1);
|
||||
else if(destIdx == 2)
|
||||
ApplyDerivatives(global, traces, initialValues[i].reg, initialValues[i].elem,
|
||||
initialValues[i].numwords, ddy_coarse, -1.0f, 0, 1);
|
||||
}
|
||||
|
||||
ddy_coarse += initialValues[i].numwords;
|
||||
}
|
||||
|
||||
float *ddxfine = ddy_coarse;
|
||||
|
||||
for(size_t i = 0; i < initialValues.size(); i++)
|
||||
{
|
||||
if(!initialValues[i].included)
|
||||
continue;
|
||||
|
||||
if(initialValues[i].reg >= 0)
|
||||
{
|
||||
if(destIdx == 2)
|
||||
ApplyDerivatives(global, traces, initialValues[i].reg, initialValues[i].elem,
|
||||
initialValues[i].numwords, ddxfine, 1.0f, 3, -1);
|
||||
else if(destIdx == 3)
|
||||
ApplyDerivatives(global, traces, initialValues[i].reg, initialValues[i].elem,
|
||||
initialValues[i].numwords, ddxfine, -1.0f, 2, -1);
|
||||
}
|
||||
|
||||
ddxfine += initialValues[i].numwords;
|
||||
}
|
||||
|
||||
float *ddyfine = ddxfine;
|
||||
|
||||
for(size_t i = 0; i < initialValues.size(); i++)
|
||||
{
|
||||
if(!initialValues[i].included)
|
||||
continue;
|
||||
|
||||
if(initialValues[i].reg >= 0)
|
||||
{
|
||||
if(destIdx == 1)
|
||||
ApplyDerivatives(global, traces, initialValues[i].reg, initialValues[i].elem,
|
||||
initialValues[i].numwords, ddyfine, 1.0f, 3, -1);
|
||||
else if(destIdx == 3)
|
||||
ApplyDerivatives(global, traces, initialValues[i].reg, initialValues[i].elem,
|
||||
initialValues[i].numwords, ddyfine, -1.0f, 0, 1);
|
||||
}
|
||||
|
||||
ddyfine += initialValues[i].numwords;
|
||||
}
|
||||
}
|
||||
|
||||
void FlattenSingleVariable(uint32_t byteOffset, const std::string &basename,
|
||||
const ShaderVariable &v, rdcarray<ShaderVariable> &outvars)
|
||||
{
|
||||
size_t outIdx = byteOffset / 16;
|
||||
size_t outComp = (byteOffset % 16) / 4;
|
||||
|
||||
if(v.rowMajor)
|
||||
outvars.resize(RDCMAX(outIdx + v.rows, outvars.size()));
|
||||
else
|
||||
outvars.resize(RDCMAX(outIdx + v.columns, outvars.size()));
|
||||
|
||||
if(!outvars[outIdx].name.empty())
|
||||
{
|
||||
// if we already have a variable in this slot, just append this variable to it. We should not
|
||||
// overlap into the next register as that's not allowed.
|
||||
outvars[outIdx].name = std::string(outvars[outIdx].name) + ", " + basename;
|
||||
outvars[outIdx].rows = 1;
|
||||
outvars[outIdx].isStruct = false;
|
||||
outvars[outIdx].columns += v.columns;
|
||||
|
||||
RDCASSERT(outvars[outIdx].columns <= 4, outvars[outIdx].columns);
|
||||
|
||||
memcpy(&outvars[outIdx].value.uv[outComp], &v.value.uv[0], sizeof(uint32_t) * v.columns);
|
||||
}
|
||||
else
|
||||
{
|
||||
const uint32_t numRegisters = v.rowMajor ? v.rows : v.columns;
|
||||
const char *regName = v.rowMajor ? "row" : "col";
|
||||
for(uint32_t reg = 0; reg < numRegisters; reg++)
|
||||
{
|
||||
if(numRegisters > 1)
|
||||
outvars[outIdx + reg].name = StringFormat::Fmt("%s.%s%u", basename.c_str(), regName, reg);
|
||||
else
|
||||
outvars[outIdx + reg].name = basename;
|
||||
outvars[outIdx + reg].rows = 1;
|
||||
outvars[outIdx + reg].type = v.type;
|
||||
outvars[outIdx + reg].isStruct = false;
|
||||
outvars[outIdx + reg].columns = v.columns;
|
||||
outvars[outIdx + reg].rowMajor = v.rowMajor;
|
||||
}
|
||||
|
||||
if(v.rowMajor)
|
||||
{
|
||||
for(size_t ri = 0; ri < v.rows; ri++)
|
||||
memcpy(&outvars[outIdx + ri].value.uv[0], &v.value.uv[ri * v.columns],
|
||||
sizeof(uint32_t) * v.columns);
|
||||
}
|
||||
else
|
||||
{
|
||||
// if we have a matrix stored in column major order, we need to transpose it back so we can
|
||||
// unroll it into vectors.
|
||||
for(size_t ci = 0; ci < v.columns; ci++)
|
||||
for(size_t ri = 0; ri < v.rows; ri++)
|
||||
outvars[outIdx + ci].value.uv[ri] = v.value.uv[ri * v.columns + ci];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FlattenVariables(const rdcarray<ShaderConstant> &constants,
|
||||
const rdcarray<ShaderVariable> &invars, rdcarray<ShaderVariable> &outvars,
|
||||
const std::string &prefix, uint32_t baseOffset)
|
||||
{
|
||||
RDCASSERTEQUAL(constants.size(), invars.size());
|
||||
|
||||
for(size_t i = 0; i < constants.size(); i++)
|
||||
{
|
||||
const ShaderConstant &c = constants[i];
|
||||
const ShaderVariable &v = invars[i];
|
||||
|
||||
uint32_t byteOffset = baseOffset + c.byteOffset;
|
||||
|
||||
std::string basename = prefix + std::string(v.name);
|
||||
|
||||
if(!v.members.empty())
|
||||
{
|
||||
if(v.isStruct)
|
||||
{
|
||||
FlattenVariables(c.type.members, v.members, outvars, basename + ".", byteOffset);
|
||||
}
|
||||
else
|
||||
{
|
||||
if(c.type.members.empty())
|
||||
{
|
||||
// if there are no members in this type, it means it's a basic array - unroll directly
|
||||
|
||||
for(int m = 0; m < v.members.count(); m++)
|
||||
{
|
||||
FlattenSingleVariable(byteOffset + m * c.type.descriptor.arrayByteStride,
|
||||
StringFormat::Fmt("%s[%zu]", basename.c_str(), m), v.members[m],
|
||||
outvars);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// otherwise we recurse into each member and flatten
|
||||
|
||||
for(int m = 0; m < v.members.count(); m++)
|
||||
{
|
||||
FlattenVariables(c.type.members, v.members[m].members, outvars,
|
||||
StringFormat::Fmt("%s[%zu].", basename.c_str(), m),
|
||||
byteOffset + m * c.type.descriptor.arrayByteStride);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
FlattenSingleVariable(byteOffset, basename, v, outvars);
|
||||
}
|
||||
}
|
||||
|
||||
void FlattenVariables(const rdcarray<ShaderConstant> &constants,
|
||||
const rdcarray<ShaderVariable> &invars, rdcarray<ShaderVariable> &outvars)
|
||||
{
|
||||
FlattenVariables(constants, invars, outvars, "", 0);
|
||||
}
|
||||
|
||||
}; // namespace ShaderDebug
|
||||
|
||||
#if ENABLED(ENABLE_UNIT_TESTS)
|
||||
|
||||
@@ -147,6 +147,44 @@ public:
|
||||
std::map<SampleEvalCacheKey, ShaderVariable> sampleEvalCache;
|
||||
};
|
||||
|
||||
#define SHADER_DEBUG_WARN_THRESHOLD 100000
|
||||
bool PromptDebugTimeout(uint32_t cycleCounter);
|
||||
|
||||
struct PSInputElement
|
||||
{
|
||||
PSInputElement(int regster, int element, int numWords, ShaderBuiltin attr, bool inc)
|
||||
{
|
||||
reg = regster;
|
||||
elem = element;
|
||||
numwords = numWords;
|
||||
sysattribute = attr;
|
||||
included = inc;
|
||||
}
|
||||
|
||||
int reg;
|
||||
int elem;
|
||||
ShaderBuiltin sysattribute;
|
||||
|
||||
int numwords;
|
||||
|
||||
bool included;
|
||||
};
|
||||
|
||||
void ApplyDerivatives(ShaderDebug::GlobalState &global, ShaderDebugTrace traces[4], int reg,
|
||||
int element, int numWords, float *data, float signmul, int32_t quadIdxA,
|
||||
int32_t quadIdxB);
|
||||
|
||||
void ApplyAllDerivatives(ShaderDebug::GlobalState &global, ShaderDebugTrace traces[4], int destIdx,
|
||||
const std::vector<PSInputElement> &initialValues, float *data);
|
||||
|
||||
void FlattenSingleVariable(uint32_t byteOffset, const std::string &basename,
|
||||
const ShaderVariable &v, rdcarray<ShaderVariable> &outvars);
|
||||
void FlattenVariables(const rdcarray<ShaderConstant> &constants,
|
||||
const rdcarray<ShaderVariable> &invars, rdcarray<ShaderVariable> &outvars,
|
||||
const std::string &prefix, uint32_t baseOffset);
|
||||
void FlattenVariables(const rdcarray<ShaderConstant> &constants,
|
||||
const rdcarray<ShaderVariable> &invars, rdcarray<ShaderVariable> &outvars);
|
||||
|
||||
struct SampleGatherResourceData
|
||||
{
|
||||
DXBCBytecode::ResourceDimension dim;
|
||||
|
||||
Reference in New Issue
Block a user