Add support for extended int math functions (with borrow/carry/overflow)

2026-05-06 10:00:40 +00:00 · 2020-04-16 12:54:27 +01:00
parent 60258bd57b
commit a725a4e278
2 changed files with 178 additions and 0 deletions
@@ -1414,6 +1414,92 @@ void ThreadState::StepNext(ShaderDebugState *state, const rdcarray<ThreadState>
      SetDst(math.result, var);
      break;
    }
+    // extended math ops
+    case Op::UMulExtended:
+    case Op::SMulExtended:
+    case Op::IAddCarry:
+    case Op::ISubBorrow:
+    {
+      OpUMulExtended math(it);
+
+      ShaderVariable a = GetSrc(math.operand1);
+      ShaderVariable b = GetSrc(math.operand2);
+
+      ShaderVariable lsb = a;
+      ShaderVariable msb = a;
+
+      if(opdata.op == Op::UMulExtended)
+      {
+        // if this is less than 64-bit precision inputs, we can just upcast, do the mul, and then
+        // mask off the bits we care about
+        if(VarTypeByteSize(a.type) < 8)
+        {
+          for(uint8_t c = 0; c < a.columns; c++)
+          {
+            const uint64_t x = a.value.uv[c];
+            const uint64_t y = b.value.uv[c];
+            const uint64_t res = x * y;
+
+            lsb.value.uv[c] = uint32_t(res & 0xFFFFFFFFu);
+            msb.value.uv[c] = uint32_t(res >> 32);
+          }
+        }
+      }
+      else if(opdata.op == Op::SMulExtended)
+      {
+        if(VarTypeByteSize(a.type) < 8)
+        {
+          for(uint8_t c = 0; c < a.columns; c++)
+          {
+            const int64_t x = a.value.iv[c];
+            const int64_t y = b.value.iv[c];
+            const int64_t res = x * y;
+
+            lsb.value.iv[c] = int32_t(res & 0xFFFFFFFFu);
+            msb.value.iv[c] = int32_t(res >> 32);
+          }
+        }
+      }
+      else if(opdata.op == Op::IAddCarry)
+      {
+        for(uint8_t c = 0; c < a.columns; c++)
+        {
+          // unsigned overflow is well-defined to wrap around, giving us the lsb we want.
+          lsb.value.uv[c] = a.value.uv[c] + b.value.uv[c];
+          // if the result is less than one of the operands, we overflowed so set msb
+          msb.value.uv[c] = (lsb.value.uv[c] < b.value.uv[c]) ? 1 : 0;
+        }
+      }
+      else if(opdata.op == Op::ISubBorrow)
+      {
+        for(uint8_t c = 0; c < a.columns; c++)
+        {
+          // if b <= a we don't need to borrow
+          if(b.value.uv[c] <= a.value.uv[c])
+          {
+            msb.value.uv[c] = 0;
+            lsb.value.uv[c] = a.value.uv[c] - b.value.uv[c];
+          }
+          else
+          {
+            // otherwise set borrow bit
+            msb.value.uv[c] = 1;
+            lsb.value.uv[c] = 0xFFFFFFFFu - (b.value.uv[c] - a.value.uv[c] - 1);
+          }
+        }
+      }
+
+      ShaderVariable result;
+      result.rows = 1;
+      result.columns = 1;
+      result.isStruct = true;
+      result.members = {lsb, msb};
+      result.members[0].name = "_child0";
+      result.members[1].name = "_child1";
+
+      SetDst(math.result, result);
+      break;
+    }
    case Op::FNegate:
    case Op::SNegate:
    {
@@ -755,6 +755,98 @@ void main()
                   ldexp(posone*4.4f, zeroi+7));
      break;
    }
+    case 96:
+    {
+      uint a = zerou + 0xb0b0b0b0;
+      uint b = zerou + 0x12345678;
+
+      // add and sub with no carry/borrow
+      uint y;
+      uint x = uaddCarry(a, b, y);
+      uint w;
+      uint z = usubBorrow(a, b, w);
+
+      Color = vec4(float(x), float(y), float(z), float(w));
+      break;
+    }
+    case 97:
+    {
+      uint a = zerou + 0xb0b0b0b0;
+      uint b = zerou + 0xdeadbeef;
+
+      // add and sub with carry/borrow
+      uint y;
+      uint x = uaddCarry(a, b, y);
+      uint w;
+      uint z = usubBorrow(a, b, w);
+
+      Color = vec4(float(x), float(y), float(z), float(w));
+      break;
+    }
+    case 98:
+    {
+      uint a = zerou + 0xb0b0b0b0;
+      uint b = zerou + 0xdeadbeef;
+
+      // add and sub with carry/borrow
+      uint y;
+      uint x = uaddCarry(a, b, y);
+      uint w;
+      uint z = usubBorrow(a, b, w);
+
+      Color = vec4(float(x), float(y), float(z), float(w));
+      break;
+    }
+    case 99:
+    {
+      uint a = zerou + 0x1234;
+      uint b = zerou + 0x5678;
+      int c = zeroi + 0x1234;
+      int d = zeroi + 0x5678;
+
+      // positive mul with no overflow
+      uint x, y;
+      umulExtended(a, b, y, x);
+      int z, w;
+      imulExtended(c, d, w, z);
+
+      Color = vec4(float(x), float(y), float(z), float(w));
+      break;
+    }
+    case 100:
+    {
+      uint a = zerou + 0x123456;
+      uint b = zerou + 0x78abcd;
+      int c = zeroi + 0x123456;
+      int d = zeroi + 0x78abcd;
+
+      // positive mul with overflow
+      uint x, y;
+      umulExtended(a, b, y, x);
+      int z, w;
+      imulExtended(c, d, w, z);
+
+      Color = vec4(float(x), float(y), float(z), float(w));
+      break;
+    }
+    case 101:
+    {
+      int a = zeroi - 0x1234;
+      int b = zeroi - 0x5678;
+      int c = zeroi - 0x123456;
+      int d = zeroi - 0x78abcd;
+
+      // negative mul with and without overflow
+      int x, y;
+      imulExtended(a, b, y, x);
+      int z, w;
+      imulExtended(c, d, w, z);
+
+      Color = vec4(float(x), float(y), float(z), float(w));
+      break;
+    }
+)EOSHADER"
+                   R"EOSHADER(
    default: break;
  }
 }