Implement derivatives

2026-05-12 13:00:32 +00:00 · 2020-04-07 18:37:13 +01:00
parent 1690d30621
commit d8fa0b624d
4 changed files with 227 additions and 33 deletions
@@ -253,7 +253,7 @@ void ThreadState::JumpToLabel(Id target)
 }

 void ThreadState::StepNext(ShaderDebugState *state,
-                           const rdcarray<rdcarray<ShaderVariable>> &prevWorkgroup)
+                           const rdcarray<DenseIdMap<ShaderVariable>> &prevWorkgroup)
 {
  Iter it = debugger.GetIterForInstruction(nextInstruction);
  nextInstruction++;
@@ -387,6 +387,135 @@ void ThreadState::StepNext(ShaderDebugState *state,
      break;
    }

+    //////////////////////////////////////////////////////////////////////////////
+    //
+    // Derivative opcodes
+    //
+    //////////////////////////////////////////////////////////////////////////////
+
+    // spec allows the implementation to choose what DPdx means (coarse or fine), so we choose
+    // coarse which seems a reasonable default. In future we could driver-detect the selection in
+    // use (assuming it's not dynamic base on circumstances)
+    case Op::DPdx:
+    case Op::DPdxCoarse:
+    {
+      // these all share a format
+      OpDPdx deriv(it);
+
+      // coarse derivatives are identical across the quad, based on the top-left.
+      ShaderVariable var = prevWorkgroup[0][deriv.p];
+      ShaderVariable other = prevWorkgroup[1][deriv.p];
+
+      for(uint8_t c = 0; c < var.columns; c++)
+        var.value.fv[c] = other.value.fv[c] - var.value.fv[c];
+
+      SetDst(state, deriv.result, var);
+
+      break;
+    }
+    case Op::DPdy:
+    case Op::DPdyCoarse:
+    {
+      // these all share a format
+      OpDPdx deriv(it);
+
+      // coarse derivatives are identical across the quad, based on the top-left.
+      ShaderVariable var = prevWorkgroup[0][deriv.p];
+      ShaderVariable other = prevWorkgroup[2][deriv.p];
+
+      for(uint8_t c = 0; c < var.columns; c++)
+        var.value.fv[c] = other.value.fv[c] - var.value.fv[c];
+
+      SetDst(state, deriv.result, var);
+
+      break;
+    }
+    case Op::DPdxFine:
+    case Op::DPdyFine:
+    {
+      // these all share a format
+      OpDPdxFine deriv(it);
+
+      const bool xdirection = (opdata.op == Op::DPdxFine);
+
+      ShaderVariable a, b;
+
+      // we need to figure out the exact pair to use
+      int x = workgroupIndex & 1;
+      int y = workgroupIndex / 2;
+
+      if(x == 0)
+      {
+        if(y == 0)
+        {
+          // top-left
+          if(xdirection)
+          {
+            a = prevWorkgroup[0][deriv.p];
+            b = prevWorkgroup[1][deriv.p];
+          }
+          else
+          {
+            a = prevWorkgroup[0][deriv.p];
+            b = prevWorkgroup[2][deriv.p];
+          }
+        }
+        else
+        {
+          // bottom-left
+          if(xdirection)
+          {
+            a = prevWorkgroup[2][deriv.p];
+            b = prevWorkgroup[3][deriv.p];
+          }
+          else
+          {
+            a = prevWorkgroup[0][deriv.p];
+            b = prevWorkgroup[2][deriv.p];
+          }
+        }
+      }
+      else
+      {
+        if(y == 0)
+        {
+          // top-right
+          if(xdirection)
+          {
+            a = prevWorkgroup[0][deriv.p];
+            b = prevWorkgroup[1][deriv.p];
+          }
+          else
+          {
+            a = prevWorkgroup[1][deriv.p];
+            b = prevWorkgroup[3][deriv.p];
+          }
+        }
+        else
+        {
+          // bottom-right
+          if(xdirection)
+          {
+            a = prevWorkgroup[2][deriv.p];
+            b = prevWorkgroup[3][deriv.p];
+          }
+          else
+          {
+            a = prevWorkgroup[1][deriv.p];
+            b = prevWorkgroup[3][deriv.p];
+          }
+        }
+      }
+
+      // do the subtract
+      for(uint8_t c = 0; c < a.columns; c++)
+        a.value.fv[c] = b.value.fv[c] - a.value.fv[c];
+
+      SetDst(state, deriv.result, a);
+
+      break;
+    }
+
    //////////////////////////////////////////////////////////////////////////////
    //
    // Composite/vector opcodes
@@ -93,7 +93,7 @@ struct ThreadState
  ~ThreadState();

  void EnterFunction(ShaderDebugState *state, const rdcarray<Id> &arguments);
-  void StepNext(ShaderDebugState *state, const rdcarray<rdcarray<ShaderVariable>> &prevWorkgroup);
+  void StepNext(ShaderDebugState *state, const rdcarray<DenseIdMap<ShaderVariable>> &prevWorkgroup);

  void FillCallstack(ShaderDebugState &state);

@@ -337,16 +337,30 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *apiWrapper, const Shader
    }
  }

-  // now that the globals are allocated and their storage won't move, we can take pointers to them
-  for(size_t i = 0; i < active.inputs.size(); i++)
-    active.ids[inputIDs[i]] = MakePointerVariable(inputIDs[i], &active.inputs[i]);
-  for(size_t i = 0; i < active.outputs.size(); i++)
-    active.ids[outputIDs[i]] = MakePointerVariable(outputIDs[i], &active.outputs[i]);
-  for(size_t i = 0; i < global.constantBlocks.size(); i++)
-    active.ids[cbufferIDs[i]] = MakePointerVariable(cbufferIDs[i], &global.constantBlocks[i]);
-
  std::sort(outputIDs.begin(), outputIDs.end());

+  for(uint32_t i = 0; i < workgroupSize; i++)
+  {
+    ThreadState &lane = workgroup[i];
+    if(i != activeLaneIndex)
+    {
+      lane.nextInstruction = active.nextInstruction;
+      lane.inputs = active.inputs;
+      lane.outputs = active.outputs;
+      lane.ids = active.ids;
+      // mark as inactive/helper lane
+      lane.done = true;
+    }
+
+    // now that the globals are allocated and their storage won't move, we can take pointers to them
+    for(size_t i = 0; i < lane.inputs.size(); i++)
+      lane.ids[inputIDs[i]] = MakePointerVariable(inputIDs[i], &lane.inputs[i]);
+    for(size_t i = 0; i < lane.outputs.size(); i++)
+      lane.ids[outputIDs[i]] = MakePointerVariable(outputIDs[i], &lane.outputs[i]);
+    for(size_t i = 0; i < global.constantBlocks.size(); i++)
+      lane.ids[cbufferIDs[i]] = MakePointerVariable(cbufferIDs[i], &global.constantBlocks[i]);
+  }
+
  // only outputs are considered mutable
  liveGlobals.append(outputIDs);

@@ -390,19 +404,6 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *apiWrapper, const Shader
  ret->constantBlocks = global.constantBlocks;
  ret->inputs = active.inputs;

-  for(uint32_t i = 0; i < workgroupSize; i++)
-  {
-    if(i == activeLaneIndex)
-      continue;
-
-    workgroup[i].nextInstruction = active.nextInstruction;
-    workgroup[i].inputs = active.inputs;
-    workgroup[i].outputs = active.outputs;
-    workgroup[i].ids = active.ids;
-    // mark as inactive/helper lane
-    workgroup[i].done = true;
-  }
-
  if(stage == ShaderStage::Pixel)
  {
    // apply derivatives to generate the correct inputs for the quad neighbours
@@ -467,7 +468,7 @@ rdcarray<ShaderDebugState> Debugger::ContinueDebug()
  if(active.Finished())
    return ret;

-  rdcarray<rdcarray<ShaderVariable>> oldworkgroup;
+  rdcarray<DenseIdMap<ShaderVariable>> oldworkgroup;

  oldworkgroup.resize(workgroup.size());

@@ -115,18 +115,82 @@ void main()
  float zero = linearData.zeroVal.x;
  float tiny = linearData.tinyVal;

-  int intval = flatData.intval;
+  int intval = int(flatData.intval);

  uint test = flatData.test;

+  vec2 inpos = linearData.inpos;
+  vec2 inposIncreased = linearData.inposIncreased;
+
  Color = vec4(0,0,0,0);
-  if(test == 0)
+  switch(test)
  {
-    Color = vec4(1.0f, 2.0f, 3.0f, 4.0f);
-  }
-  else if(test == 1)
-  {
-    Color = gl_FragCoord;
+    case 0:
+    {
+      Color = gl_FragCoord;
+      break;
+    }
+    case 1:
+    {
+      Color = dFdx(gl_FragCoord);
+      break;
+    }
+    case 2:
+    {
+      Color = dFdy(gl_FragCoord);
+      break;
+    }
+    case 3:
+    {
+      Color = dFdxCoarse(gl_FragCoord);
+      break;
+    }
+    case 4:
+    {
+      Color = dFdyCoarse(gl_FragCoord);
+      break;
+    }
+    case 5:
+    {
+      Color = dFdxFine(gl_FragCoord);
+      break;
+    }
+    case 6:
+    {
+      Color = dFdyFine(gl_FragCoord);
+      break;
+    }
+    case 7:
+    {
+      Color = dFdx(vec4(inpos, inposIncreased));
+      break;
+    }
+    case 8:
+    {
+      Color = dFdy(vec4(inpos, inposIncreased));
+      break;
+    }
+    case 9:
+    {
+      Color = dFdxCoarse(vec4(inpos, inposIncreased));
+      break;
+    }
+    case 10:
+    {
+      Color = dFdyCoarse(vec4(inpos, inposIncreased));
+      break;
+    }
+    case 11:
+    {
+      Color = dFdxFine(vec4(inpos, inposIncreased));
+      break;
+    }
+    case 12:
+    {
+      Color = dFdyFine(vec4(inpos, inposIncreased));
+      break;
+    }
+    default: break;
  }
 }

@@ -226,8 +290,8 @@ void main()
    if(!Init())
      return 3;

-    size_t lastTest = pixel_glsl.rfind("test == ");
-    lastTest += sizeof("test == ") - 1;
+    size_t lastTest = pixel_glsl.rfind("case ");
+    lastTest += sizeof("case ") - 1;

    const uint32_t numGLSLTests = atoi(pixel_glsl.c_str() + lastTest) + 1;