Optimized BFREV opcode in dxbc_debug

Main goal was to fix undefined behaviour, but it's also 50x faster...

cppcheck:
[renderdoc/driver/shaders/dxbc/dxbc_debug.cpp:1163]: (error) Shifting signed 32-bit value by 31 bits is undefined behaviour
This commit is contained in:
Cory Bloor
2017-11-03 02:36:32 -06:00
committed by Baldur Karlsson
parent 5e60d90dd4
commit f7b9f0be0d
+21 -10
View File
@@ -1093,6 +1093,25 @@ ShaderVariable State::GetSrc(const ASMOperand &oper, const ASMOperation &op) con
return v;
}
static uint32_t BitwiseReverseLSB16(uint32_t x)
{
// Reverse the bits in x, then discard the lower half
// https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
x = ((x >> 1) & 0x55555555) | ((x & 0x55555555) << 1);
x = ((x >> 2) & 0x33333333) | ((x & 0x33333333) << 2);
x = ((x >> 4) & 0x0F0F0F0F) | ((x & 0x0F0F0F0F) << 4);
x = ((x >> 8) & 0x00FF00FF) | ((x & 0x00FF00FF) << 8);
return x << 16;
}
static uint32_t PopCount(uint32_t x)
{
// https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
x = x - ((x >> 1) & 0x55555555);
x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
return (((x + (x >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
}
State State::GetNext(GlobalState &global, State quad[4]) const
{
State s = *this;
@@ -1156,13 +1175,9 @@ State State::GetNext(GlobalState &global, State quad[4]) const
{
ShaderVariable ret("", 0U, 0U, 0U, 0U);
// do a bitwise reverse
for(size_t i = 0; i < 4; i++)
{
for(size_t b = 0; b < 32; b++)
{
ret.value.uv[i] |= (srcOpers[0].value.uv[i] & (1 << b)) << (31 - 2 * b);
}
ret.value.uv[i] = BitwiseReverseLSB16(srcOpers[0].value.uv[i]);
}
s.SetDst(op.operands[0], op, ret);
@@ -1173,13 +1188,9 @@ State State::GetNext(GlobalState &global, State quad[4]) const
{
ShaderVariable ret("", 0U, 0U, 0U, 0U);
// do a bitwise reverse
for(size_t i = 0; i < 4; i++)
{
uint32_t bf = srcOpers[0].value.uv[i];
bf = bf - ((bf >> 1) & 0x55555555);
bf = (bf & 0x33333333) + ((bf >> 2) & 0x33333333);
ret.value.uv[i] = (((bf + (bf >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
ret.value.uv[i] = PopCount(srcOpers[0].value.uv[i]);
}
s.SetDst(op.operands[0], op, ret);