Implement derivatives

This commit is contained in:
baldurk
2020-04-07 18:37:13 +01:00
parent 1690d30621
commit d8fa0b624d
4 changed files with 227 additions and 33 deletions
+130 -1
View File
@@ -253,7 +253,7 @@ void ThreadState::JumpToLabel(Id target)
}
void ThreadState::StepNext(ShaderDebugState *state,
const rdcarray<rdcarray<ShaderVariable>> &prevWorkgroup)
const rdcarray<DenseIdMap<ShaderVariable>> &prevWorkgroup)
{
Iter it = debugger.GetIterForInstruction(nextInstruction);
nextInstruction++;
@@ -387,6 +387,135 @@ void ThreadState::StepNext(ShaderDebugState *state,
break;
}
//////////////////////////////////////////////////////////////////////////////
//
// Derivative opcodes
//
//////////////////////////////////////////////////////////////////////////////
// spec allows the implementation to choose what DPdx means (coarse or fine), so we choose
// coarse which seems a reasonable default. In future we could driver-detect the selection in
// use (assuming it's not dynamic base on circumstances)
case Op::DPdx:
case Op::DPdxCoarse:
{
// these all share a format
OpDPdx deriv(it);
// coarse derivatives are identical across the quad, based on the top-left.
ShaderVariable var = prevWorkgroup[0][deriv.p];
ShaderVariable other = prevWorkgroup[1][deriv.p];
for(uint8_t c = 0; c < var.columns; c++)
var.value.fv[c] = other.value.fv[c] - var.value.fv[c];
SetDst(state, deriv.result, var);
break;
}
case Op::DPdy:
case Op::DPdyCoarse:
{
// these all share a format
OpDPdx deriv(it);
// coarse derivatives are identical across the quad, based on the top-left.
ShaderVariable var = prevWorkgroup[0][deriv.p];
ShaderVariable other = prevWorkgroup[2][deriv.p];
for(uint8_t c = 0; c < var.columns; c++)
var.value.fv[c] = other.value.fv[c] - var.value.fv[c];
SetDst(state, deriv.result, var);
break;
}
case Op::DPdxFine:
case Op::DPdyFine:
{
// these all share a format
OpDPdxFine deriv(it);
const bool xdirection = (opdata.op == Op::DPdxFine);
ShaderVariable a, b;
// we need to figure out the exact pair to use
int x = workgroupIndex & 1;
int y = workgroupIndex / 2;
if(x == 0)
{
if(y == 0)
{
// top-left
if(xdirection)
{
a = prevWorkgroup[0][deriv.p];
b = prevWorkgroup[1][deriv.p];
}
else
{
a = prevWorkgroup[0][deriv.p];
b = prevWorkgroup[2][deriv.p];
}
}
else
{
// bottom-left
if(xdirection)
{
a = prevWorkgroup[2][deriv.p];
b = prevWorkgroup[3][deriv.p];
}
else
{
a = prevWorkgroup[0][deriv.p];
b = prevWorkgroup[2][deriv.p];
}
}
}
else
{
if(y == 0)
{
// top-right
if(xdirection)
{
a = prevWorkgroup[0][deriv.p];
b = prevWorkgroup[1][deriv.p];
}
else
{
a = prevWorkgroup[1][deriv.p];
b = prevWorkgroup[3][deriv.p];
}
}
else
{
// bottom-right
if(xdirection)
{
a = prevWorkgroup[2][deriv.p];
b = prevWorkgroup[3][deriv.p];
}
else
{
a = prevWorkgroup[1][deriv.p];
b = prevWorkgroup[3][deriv.p];
}
}
}
// do the subtract
for(uint8_t c = 0; c < a.columns; c++)
a.value.fv[c] = b.value.fv[c] - a.value.fv[c];
SetDst(state, deriv.result, a);
break;
}
//////////////////////////////////////////////////////////////////////////////
//
// Composite/vector opcodes
+1 -1
View File
@@ -93,7 +93,7 @@ struct ThreadState
~ThreadState();
void EnterFunction(ShaderDebugState *state, const rdcarray<Id> &arguments);
void StepNext(ShaderDebugState *state, const rdcarray<rdcarray<ShaderVariable>> &prevWorkgroup);
void StepNext(ShaderDebugState *state, const rdcarray<DenseIdMap<ShaderVariable>> &prevWorkgroup);
void FillCallstack(ShaderDebugState &state);
@@ -337,16 +337,30 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *apiWrapper, const Shader
}
}
// now that the globals are allocated and their storage won't move, we can take pointers to them
for(size_t i = 0; i < active.inputs.size(); i++)
active.ids[inputIDs[i]] = MakePointerVariable(inputIDs[i], &active.inputs[i]);
for(size_t i = 0; i < active.outputs.size(); i++)
active.ids[outputIDs[i]] = MakePointerVariable(outputIDs[i], &active.outputs[i]);
for(size_t i = 0; i < global.constantBlocks.size(); i++)
active.ids[cbufferIDs[i]] = MakePointerVariable(cbufferIDs[i], &global.constantBlocks[i]);
std::sort(outputIDs.begin(), outputIDs.end());
for(uint32_t i = 0; i < workgroupSize; i++)
{
ThreadState &lane = workgroup[i];
if(i != activeLaneIndex)
{
lane.nextInstruction = active.nextInstruction;
lane.inputs = active.inputs;
lane.outputs = active.outputs;
lane.ids = active.ids;
// mark as inactive/helper lane
lane.done = true;
}
// now that the globals are allocated and their storage won't move, we can take pointers to them
for(size_t i = 0; i < lane.inputs.size(); i++)
lane.ids[inputIDs[i]] = MakePointerVariable(inputIDs[i], &lane.inputs[i]);
for(size_t i = 0; i < lane.outputs.size(); i++)
lane.ids[outputIDs[i]] = MakePointerVariable(outputIDs[i], &lane.outputs[i]);
for(size_t i = 0; i < global.constantBlocks.size(); i++)
lane.ids[cbufferIDs[i]] = MakePointerVariable(cbufferIDs[i], &global.constantBlocks[i]);
}
// only outputs are considered mutable
liveGlobals.append(outputIDs);
@@ -390,19 +404,6 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *apiWrapper, const Shader
ret->constantBlocks = global.constantBlocks;
ret->inputs = active.inputs;
for(uint32_t i = 0; i < workgroupSize; i++)
{
if(i == activeLaneIndex)
continue;
workgroup[i].nextInstruction = active.nextInstruction;
workgroup[i].inputs = active.inputs;
workgroup[i].outputs = active.outputs;
workgroup[i].ids = active.ids;
// mark as inactive/helper lane
workgroup[i].done = true;
}
if(stage == ShaderStage::Pixel)
{
// apply derivatives to generate the correct inputs for the quad neighbours
@@ -467,7 +468,7 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
if(active.Finished())
return ret;
rdcarray<rdcarray<ShaderVariable>> oldworkgroup;
rdcarray<DenseIdMap<ShaderVariable>> oldworkgroup;
oldworkgroup.resize(workgroup.size());
+73 -9
View File
@@ -115,18 +115,82 @@ void main()
float zero = linearData.zeroVal.x;
float tiny = linearData.tinyVal;
int intval = flatData.intval;
int intval = int(flatData.intval);
uint test = flatData.test;
vec2 inpos = linearData.inpos;
vec2 inposIncreased = linearData.inposIncreased;
Color = vec4(0,0,0,0);
if(test == 0)
switch(test)
{
Color = vec4(1.0f, 2.0f, 3.0f, 4.0f);
}
else if(test == 1)
{
Color = gl_FragCoord;
case 0:
{
Color = gl_FragCoord;
break;
}
case 1:
{
Color = dFdx(gl_FragCoord);
break;
}
case 2:
{
Color = dFdy(gl_FragCoord);
break;
}
case 3:
{
Color = dFdxCoarse(gl_FragCoord);
break;
}
case 4:
{
Color = dFdyCoarse(gl_FragCoord);
break;
}
case 5:
{
Color = dFdxFine(gl_FragCoord);
break;
}
case 6:
{
Color = dFdyFine(gl_FragCoord);
break;
}
case 7:
{
Color = dFdx(vec4(inpos, inposIncreased));
break;
}
case 8:
{
Color = dFdy(vec4(inpos, inposIncreased));
break;
}
case 9:
{
Color = dFdxCoarse(vec4(inpos, inposIncreased));
break;
}
case 10:
{
Color = dFdyCoarse(vec4(inpos, inposIncreased));
break;
}
case 11:
{
Color = dFdxFine(vec4(inpos, inposIncreased));
break;
}
case 12:
{
Color = dFdyFine(vec4(inpos, inposIncreased));
break;
}
default: break;
}
}
@@ -226,8 +290,8 @@ void main()
if(!Init())
return 3;
size_t lastTest = pixel_glsl.rfind("test == ");
lastTest += sizeof("test == ") - 1;
size_t lastTest = pixel_glsl.rfind("case ");
lastTest += sizeof("case ") - 1;
const uint32_t numGLSLTests = atoi(pixel_glsl.c_str() + lastTest) + 1;