Add support for extended int math functions (with borrow/carry/overflow)

This commit is contained in:
baldurk
2020-04-16 12:54:27 +01:00
parent 60258bd57b
commit a725a4e278
2 changed files with 178 additions and 0 deletions
@@ -1414,6 +1414,92 @@ void ThreadState::StepNext(ShaderDebugState *state, const rdcarray<ThreadState>
SetDst(math.result, var);
break;
}
// extended math ops
case Op::UMulExtended:
case Op::SMulExtended:
case Op::IAddCarry:
case Op::ISubBorrow:
{
OpUMulExtended math(it);
ShaderVariable a = GetSrc(math.operand1);
ShaderVariable b = GetSrc(math.operand2);
ShaderVariable lsb = a;
ShaderVariable msb = a;
if(opdata.op == Op::UMulExtended)
{
// if this is less than 64-bit precision inputs, we can just upcast, do the mul, and then
// mask off the bits we care about
if(VarTypeByteSize(a.type) < 8)
{
for(uint8_t c = 0; c < a.columns; c++)
{
const uint64_t x = a.value.uv[c];
const uint64_t y = b.value.uv[c];
const uint64_t res = x * y;
lsb.value.uv[c] = uint32_t(res & 0xFFFFFFFFu);
msb.value.uv[c] = uint32_t(res >> 32);
}
}
}
else if(opdata.op == Op::SMulExtended)
{
if(VarTypeByteSize(a.type) < 8)
{
for(uint8_t c = 0; c < a.columns; c++)
{
const int64_t x = a.value.iv[c];
const int64_t y = b.value.iv[c];
const int64_t res = x * y;
lsb.value.iv[c] = int32_t(res & 0xFFFFFFFFu);
msb.value.iv[c] = int32_t(res >> 32);
}
}
}
else if(opdata.op == Op::IAddCarry)
{
for(uint8_t c = 0; c < a.columns; c++)
{
// unsigned overflow is well-defined to wrap around, giving us the lsb we want.
lsb.value.uv[c] = a.value.uv[c] + b.value.uv[c];
// if the result is less than one of the operands, we overflowed so set msb
msb.value.uv[c] = (lsb.value.uv[c] < b.value.uv[c]) ? 1 : 0;
}
}
else if(opdata.op == Op::ISubBorrow)
{
for(uint8_t c = 0; c < a.columns; c++)
{
// if b <= a we don't need to borrow
if(b.value.uv[c] <= a.value.uv[c])
{
msb.value.uv[c] = 0;
lsb.value.uv[c] = a.value.uv[c] - b.value.uv[c];
}
else
{
// otherwise set borrow bit
msb.value.uv[c] = 1;
lsb.value.uv[c] = 0xFFFFFFFFu - (b.value.uv[c] - a.value.uv[c] - 1);
}
}
}
ShaderVariable result;
result.rows = 1;
result.columns = 1;
result.isStruct = true;
result.members = {lsb, msb};
result.members[0].name = "_child0";
result.members[1].name = "_child1";
SetDst(math.result, result);
break;
}
case Op::FNegate:
case Op::SNegate:
{
@@ -755,6 +755,98 @@ void main()
ldexp(posone*4.4f, zeroi+7));
break;
}
case 96:
{
uint a = zerou + 0xb0b0b0b0;
uint b = zerou + 0x12345678;
// add and sub with no carry/borrow
uint y;
uint x = uaddCarry(a, b, y);
uint w;
uint z = usubBorrow(a, b, w);
Color = vec4(float(x), float(y), float(z), float(w));
break;
}
case 97:
{
uint a = zerou + 0xb0b0b0b0;
uint b = zerou + 0xdeadbeef;
// add and sub with carry/borrow
uint y;
uint x = uaddCarry(a, b, y);
uint w;
uint z = usubBorrow(a, b, w);
Color = vec4(float(x), float(y), float(z), float(w));
break;
}
case 98:
{
uint a = zerou + 0xb0b0b0b0;
uint b = zerou + 0xdeadbeef;
// add and sub with carry/borrow
uint y;
uint x = uaddCarry(a, b, y);
uint w;
uint z = usubBorrow(a, b, w);
Color = vec4(float(x), float(y), float(z), float(w));
break;
}
case 99:
{
uint a = zerou + 0x1234;
uint b = zerou + 0x5678;
int c = zeroi + 0x1234;
int d = zeroi + 0x5678;
// positive mul with no overflow
uint x, y;
umulExtended(a, b, y, x);
int z, w;
imulExtended(c, d, w, z);
Color = vec4(float(x), float(y), float(z), float(w));
break;
}
case 100:
{
uint a = zerou + 0x123456;
uint b = zerou + 0x78abcd;
int c = zeroi + 0x123456;
int d = zeroi + 0x78abcd;
// positive mul with overflow
uint x, y;
umulExtended(a, b, y, x);
int z, w;
imulExtended(c, d, w, z);
Color = vec4(float(x), float(y), float(z), float(w));
break;
}
case 101:
{
int a = zeroi - 0x1234;
int b = zeroi - 0x5678;
int c = zeroi - 0x123456;
int d = zeroi - 0x78abcd;
// negative mul with and without overflow
int x, y;
imulExtended(a, b, y, x);
int z, w;
imulExtended(c, d, w, z);
Color = vec4(float(x), float(y), float(z), float(w));
break;
}
)EOSHADER"
R"EOSHADER(
default: break;
}
}