Files
renderdoc/util/test/demos/texture_zoo.cpp
T
baldurk f622ac36d6 Standardise layout of packed texture formats on disk/network
* We preserve each API's interpretation of bit order for packed formats like
  RGBA4 or R5G6B5 when displaying the raw data in the UI, but when we need to
  proxy it or save to disk, we always transform to D3D's order as standard.
* This allows us to proxy them reliably because we always have a standard bit
  order and APIs that need a different order transform when fetching data to the
  standard format, or setting proxy data from the standard format.
2020-05-18 13:21:55 +01:00

797 lines
26 KiB
C++

/******************************************************************************
* The MIT License (MIT)
*
* Copyright (c) 2019-2020 Baldur Karlsson
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
******************************************************************************/
#include <algorithm>
#include "test_common.h"
namespace TextureZoo
{
void MakePixel(byte *data, const TexConfig &cfg, uint32_t x, uint32_t y, uint32_t z, uint32_t mip,
uint32_t slice)
{
// each 3D slice cycles the x
x += z;
x %= std::max(1U, texWidth >> mip);
if(cfg.data == DataType::Float || cfg.data == DataType::UNorm || cfg.data == DataType::SNorm)
{
// start points for each component
const float vals[] = {
0.1f, 0.35f, 0.6f, 0.85f,
};
for(uint32_t c = 0; c < cfg.componentCount; c++)
{
uint32_t idx = c;
// pixels off the diagonal invert the colors
if(x != y)
idx = 3 - idx;
// subsequent slices add a coarse checkerboard pattern of inverted colors
if((slice % 3 > 0) && (((x / 2) % 2) != ((y / 2) % 2)))
idx = 3 - idx;
float f = vals[idx];
// subsequent mips are shifted up a bit
f += 0.075f * mip;
// Signed normals are negative
if(cfg.data == DataType::SNorm)
f = -f;
// if it's a full float, just copy
if(cfg.componentBytes == 4)
{
memcpy(data, &f, cfg.componentBytes);
}
else if(cfg.componentBytes == 2)
{
uint16_t h;
if(cfg.data == DataType::Float)
h = MakeHalf(f);
else if(cfg.data == DataType::UNorm)
h = uint16_t(f * 0xffff);
else if(cfg.data == DataType::SNorm)
h = int16_t(f * 0x7fff);
memcpy(data, &h, cfg.componentBytes);
}
else if(cfg.componentBytes == 1)
{
uint8_t b;
if(cfg.data == DataType::UNorm)
b = uint8_t(f * 0xff);
else if(cfg.data == DataType::SNorm)
b = int8_t(f * 0x7f);
memcpy(data, &b, cfg.componentBytes);
}
else
{
TEST_ERROR("Unexpected component bytes %d in float", cfg.componentBytes);
}
data += cfg.componentBytes;
}
}
else if(cfg.data == DataType::UInt || cfg.data == DataType::SInt)
{
// same pattern as above but with integer values
const int32_t vals[] = {
10, 40, 70, 100,
};
for(uint32_t c = 0; c < cfg.componentCount; c++)
{
uint32_t idx = c;
// pixels off the diagonal invert the colors
if(x != y)
idx = 3 - idx;
if((slice % 3 > 0) && (((x / 2) % 2) != ((y / 2) % 2)))
idx = 3 - idx;
int32_t val = vals[idx];
val += 10 * mip;
// Signed ints are negative
if(cfg.data == DataType::SInt)
val = -val;
// because the values are below one byte and we're little-endian we can just copy the
// right number of bytes from val
memcpy(data, &val, cfg.componentBytes);
data += cfg.componentBytes;
}
}
}
void MakeData(TexData &data, const TexConfig &cfg, Vec4i dimensions, uint32_t mip, uint32_t slice)
{
uint32_t mipWidth = std::max(1, dimensions.x >> mip);
uint32_t mipHeight = std::max(1, dimensions.y >> mip);
uint32_t mipDepth = std::max(1, dimensions.z >> mip);
if(cfg.type == TextureType::Unknown)
{
data = TexData();
return;
}
else if(cfg.type == TextureType::Regular)
{
uint32_t pixelPitch = cfg.componentBytes * cfg.componentCount;
data.rowPitch = pixelPitch * mipWidth;
data.slicePitch = data.rowPitch * mipHeight;
data.byteData.resize(data.slicePitch * mipDepth);
byte *out = data.byteData.data();
for(uint32_t z = 0; z < mipDepth; z++)
{
for(uint32_t y = 0; y < mipHeight; y++)
{
for(uint32_t x = 0; x < mipWidth; x++)
{
MakePixel(out, cfg, x, y, z, mip, slice);
out += pixelPitch;
}
}
}
}
else
{
bool bc1 = false, bc2alpha = false, bc3alpha = false, bc6 = false, bc7 = false, sharedExp = false;
int bc4channels = 0;
uint32_t nybblePattern = 0;
bool rgb5 = false;
int alphabitPlace = 0;
bool rgb10a2 = false;
switch(cfg.type)
{
case TextureType::BC1: bc1 = true; break;
case TextureType::BC2:
bc1 = true;
bc2alpha = true;
break;
case TextureType::BC3:
bc1 = true;
bc3alpha = true;
break;
case TextureType::BC4: bc4channels = 1; break;
case TextureType::BC5: bc4channels = 2; break;
case TextureType::BC6: bc6 = true; break;
case TextureType::BC7: bc7 = true; break;
case TextureType::R9G9B9E5: sharedExp = true; break;
case TextureType::G4R4: nybblePattern = 0x12; break;
case TextureType::A4R4G4B4: nybblePattern = 0x3214; break;
case TextureType::R4G4B4A4: nybblePattern = 0x4321; break;
case TextureType::R5G6B5:
rgb5 = true;
alphabitPlace = 0;
break;
case TextureType::R5G5B5A1:
rgb5 = true;
alphabitPlace = 1;
break;
case TextureType::A1R5G5B5:
rgb5 = true;
alphabitPlace = 2;
break;
case TextureType::RGB10A2: rgb10a2 = true; break;
default: data = TexData(); return;
}
// get float data so we can do the best possible job of truncating to the desired bit width
TexConfig floatcfg = {TextureType::Regular, 4, 4, DataType::Float};
TexData floatdata;
if(rgb10a2 && cfg.data == DataType::UInt)
floatcfg.data = cfg.data;
MakeData(floatdata, floatcfg, dimensions, mip, slice);
Vec4f *srcPixels = (Vec4f *)floatdata.byteData.data();
Vec4i *srcPixelsI = (Vec4i *)floatdata.byteData.data();
if(rgb10a2)
{
uint32_t pixelPitch = 4;
data.rowPitch = pixelPitch * mipWidth;
data.slicePitch = data.rowPitch * mipHeight;
data.byteData.resize(data.slicePitch * mipDepth);
uint32_t *out = (uint32_t *)data.byteData.data();
for(uint32_t z = 0; z < mipDepth; z++)
{
for(uint32_t y = 0; y < mipHeight; y++)
{
for(uint32_t x = 0; x < mipWidth; x++)
{
uint32_t encodedPixel = 0;
if(cfg.data == DataType::UInt)
{
int32_t rgba[4];
rgba[0] = srcPixelsI[y * mipWidth + x].x;
rgba[1] = srcPixelsI[y * mipWidth + x].y;
rgba[2] = srcPixelsI[y * mipWidth + x].z;
rgba[3] = srcPixelsI[y * mipWidth + x].w;
encodedPixel |= (rgba[0] & 0x3ff) << 0;
encodedPixel |= (rgba[1] & 0x3ff) << 10;
encodedPixel |= (rgba[2] & 0x3ff) << 20;
encodedPixel |= (std::min(rgba[3], 3) & 0x3) << 30;
}
else
{
float rgba[4];
rgba[0] = srcPixels[y * mipWidth + x].x;
rgba[1] = srcPixels[y * mipWidth + x].y;
rgba[2] = srcPixels[y * mipWidth + x].z;
rgba[3] = srcPixels[y * mipWidth + x].w;
encodedPixel |= uint32_t(round(rgba[0] * 0x3ff)) << 0;
encodedPixel |= uint32_t(round(rgba[1] * 0x3ff)) << 10;
encodedPixel |= uint32_t(round(rgba[2] * 0x3ff)) << 20;
encodedPixel |= uint32_t(round(rgba[3] * 0x3)) << 30;
}
*out = encodedPixel;
out++;
}
}
srcPixels += mipWidth * mipHeight;
srcPixelsI += mipWidth * mipHeight;
}
}
else if(nybblePattern || rgb5)
{
uint32_t pixelPitch = 2;
data.rowPitch = pixelPitch * mipWidth;
data.slicePitch = data.rowPitch * mipHeight;
data.byteData.resize(data.slicePitch * mipDepth);
uint8_t *out = data.byteData.data();
for(uint32_t z = 0; z < mipDepth; z++)
{
for(uint32_t y = 0; y < mipHeight; y++)
{
for(uint32_t x = 0; x < mipWidth; x++)
{
float rgb[4];
rgb[0] = srcPixels[y * mipWidth + x].x;
rgb[1] = srcPixels[y * mipWidth + x].y;
rgb[2] = srcPixels[y * mipWidth + x].z;
rgb[3] = srcPixels[y * mipWidth + x].w;
if(rgb5)
{
bool alpha = rgb[3] >= 0.5f;
uint16_t encodedPixel = 0;
if(alphabitPlace == 0)
{
encodedPixel |= uint16_t(rgb[0] * 31) << 0;
encodedPixel |= uint16_t(rgb[1] * 63) << 5;
encodedPixel |= uint16_t(rgb[2] * 31) << 11;
}
else
{
encodedPixel |= uint16_t(rgb[0] * 31) << 0;
encodedPixel |= uint16_t(rgb[1] * 31) << 5;
encodedPixel |= uint16_t(rgb[2] * 31) << 10;
if(alphabitPlace == 1)
{
if(alpha)
encodedPixel |= 0x8000;
}
else
{
encodedPixel <<= 1;
if(alpha)
encodedPixel |= 0x1;
}
}
memcpy(out, &encodedPixel, sizeof(encodedPixel));
out += 2;
}
else
{
uint8_t encodedPixel = 0;
encodedPixel |= uint8_t(rgb[((nybblePattern & 0x000f) >> 0) - 1] * 15) << 0;
encodedPixel |= uint8_t(rgb[((nybblePattern & 0x00f0) >> 4) - 1] * 15) << 4;
*out = encodedPixel;
out++;
if(nybblePattern & 0xff00)
{
encodedPixel = 0;
encodedPixel |= uint8_t(rgb[((nybblePattern & 0x0f00) >> 8) - 1] * 15) << 0;
encodedPixel |= uint8_t(rgb[((nybblePattern & 0xf000) >> 12) - 1] * 15) << 4;
*out = encodedPixel;
out++;
}
}
}
}
srcPixels += mipWidth * mipHeight;
}
}
else if(sharedExp)
{
uint32_t pixelPitch = 4;
data.rowPitch = pixelPitch * mipWidth;
data.slicePitch = data.rowPitch * mipHeight;
data.byteData.resize(data.slicePitch * mipDepth);
uint32_t *out = (uint32_t *)data.byteData.data();
for(uint32_t z = 0; z < mipDepth; z++)
{
for(uint32_t y = 0; y < mipHeight; y++)
{
for(uint32_t x = 0; x < mipWidth; x++)
{
float rgb[3];
rgb[0] = srcPixels[y * mipWidth + x].x;
rgb[1] = srcPixels[y * mipWidth + x].y;
rgb[2] = srcPixels[y * mipWidth + x].z;
uint32_t encodedPixel = 0;
int exp = -10;
// we pick the highest exponent, losing bits off the bottom of any value that
// needs a lower one, rather than picking a lower one and having to saturate
// values that need a higher one
for(int channel = 0; channel < 3; channel++)
{
int e = 0;
frexpf(rgb[channel], &e);
exp = std::max(exp, e);
}
for(int channel = 0; channel < 3; channel++)
encodedPixel |= uint32_t(rgb[channel] * 511.0 / (1 << exp)) << (9 * channel);
encodedPixel |= (exp + 15) << 27;
*out = encodedPixel;
out++;
}
}
srcPixels += mipWidth * mipHeight;
}
}
else
{
// these don't change, but make the code easier to read
const uint32_t blockWidth = 4;
const uint32_t blockHeight = 4;
uint32_t blockSize;
// 0.5 byte per pixel
if(cfg.type == TextureType::BC1 || cfg.type == TextureType::BC4)
blockSize = 8;
else
blockSize = 16;
data.rowPitch = blockSize * std::max(1U, mipWidth / blockWidth);
data.slicePitch = data.rowPitch * std::max(1U, mipHeight / blockHeight);
data.byteData.resize(data.slicePitch * mipDepth);
byte *out = (byte *)data.byteData.data();
const Vec4f invalid(999001.0f, 999002.0f, -999003.0f, -999004.0f);
// compress each slice separately
for(uint32_t z = 0; z < mipDepth; z++)
{
// block compressed - iterate over the pixels in block size
for(uint32_t y = 0; y < mipHeight; y += blockHeight)
{
for(uint32_t x = 0; x < mipWidth; x += blockWidth)
{
Vec4f blockPixels[blockWidth * blockHeight] = {};
// copy all the in-range pixels into the block data
for(uint32_t by = 0; by < blockHeight; by++)
{
for(uint32_t bx = 0; bx < blockWidth; bx++)
{
if(x + bx >= mipWidth || y + by >= mipHeight)
{
blockPixels[by * blockWidth + bx] = invalid;
}
else
{
blockPixels[by * blockWidth + bx] = srcPixels[(y + by) * mipWidth + (x + bx)];
}
}
}
// we should have at most two unique pixels. The pattern is structured to allow
// that, since any other colour can't be uniquely represented in all compressed
// formats (even interpolated values)
Vec4f a = invalid, b = invalid;
uint32_t bc1bitmask = 0;
uint64_t bc4bitmask = 0;
// BC1 and BC4 both share A = 0 and B = 0 codes
enum class BCCode : uint64_t
{
A = 0,
B = 1,
};
// iterate the pixels in the block in ascending bitmask order
for(uint32_t p = 0; p < blockWidth * blockHeight; p++)
{
if(blockPixels[p] == invalid)
{
// out of bounds pixel (think of a 2x2 mip), store as A - whatever A is.
bc1bitmask |= uint32_t(BCCode::A) << (p * 2);
bc4bitmask |= uint64_t(BCCode::A) << (p * 3);
}
else if(a == invalid)
{
// A hasn't been found yet, let's use this pixel for that
a = blockPixels[p];
bc1bitmask |= uint32_t(BCCode::A) << (p * 2);
bc4bitmask |= uint64_t(BCCode::A) << (p * 3);
}
else if(blockPixels[p] == a)
{
// if A has been found then re-use it before assigning to B
bc1bitmask |= uint32_t(BCCode::A) << (p * 2);
bc4bitmask |= uint64_t(BCCode::A) << (p * 3);
}
else if(b == invalid)
{
// B hasn't been found yet, let's use this pixel for that
b = blockPixels[p];
bc1bitmask |= uint32_t(BCCode::B) << (p * 2);
bc4bitmask |= uint64_t(BCCode::B) << (p * 3);
}
else if(blockPixels[p] == b)
{
bc1bitmask |= uint32_t(BCCode::B) << (p * 2);
bc4bitmask |= uint64_t(BCCode::B) << (p * 3);
}
else
{
TEST_ERROR("Found pixel that isn't A, or B!");
}
}
byte a8[4], b8[4];
uint16_t aHalf[4], bHalf[4];
int16_t *aHalfS = (int16_t *)aHalf;
int16_t *bHalfS = (int16_t *)bHalf;
uint16_t a565 = 0;
uint16_t b565 = 0;
if(cfg.data == DataType::SNorm)
{
int8_t *ia8 = (int8_t *)a8;
int8_t *ib8 = (int8_t *)b8;
ia8[0] = int8_t(round(a.x * -127.0f));
ia8[1] = int8_t(round(a.y * -127.0f));
ia8[2] = int8_t(round(a.z * -127.0f));
ia8[3] = int8_t(round(a.w * -127.0f));
ib8[0] = int8_t(round(b.x * -127.0f));
ib8[1] = int8_t(round(b.y * -127.0f));
ib8[2] = int8_t(round(b.z * -127.0f));
ib8[3] = int8_t(round(b.w * -127.0f));
aHalf[0] = MakeHalf(-a.x);
aHalf[1] = MakeHalf(-a.y);
aHalf[2] = MakeHalf(-a.z);
aHalf[3] = MakeHalf(-a.w);
bHalf[0] = MakeHalf(-b.x);
bHalf[1] = MakeHalf(-b.y);
bHalf[2] = MakeHalf(-b.z);
bHalf[3] = MakeHalf(-b.w);
}
else
{
a8[0] = byte(round(a.x * 255.0f));
a8[1] = byte(round(a.y * 255.0f));
a8[2] = byte(round(a.z * 255.0f));
a8[3] = byte(round(a.w * 255.0f));
// red
a565 |= byte(round(a.x * 31.0f)) << 11;
// green
a565 |= byte(round(a.y * 63.0f)) << 5;
// blue
a565 |= byte(round(a.z * 31.0f)) << 0;
b8[0] = byte(round(b.x * 255.0f));
b8[1] = byte(round(b.y * 255.0f));
b8[2] = byte(round(b.z * 255.0f));
b8[3] = byte(round(b.w * 255.0f));
// red
b565 |= byte(round(b.x * 31.0f)) << 11;
// green
b565 |= byte(round(b.y * 63.0f)) << 5;
// blue
b565 |= byte(round(b.z * 31.0f)) << 0;
aHalf[0] = MakeHalf(a.x);
aHalf[1] = MakeHalf(a.y);
aHalf[2] = MakeHalf(a.z);
aHalf[3] = MakeHalf(a.w);
bHalf[0] = MakeHalf(b.x);
bHalf[1] = MakeHalf(b.y);
bHalf[2] = MakeHalf(b.z);
bHalf[3] = MakeHalf(b.w);
}
struct BC1
{
uint16_t a565;
uint16_t b565;
uint32_t bitmask;
};
static_assert(sizeof(BC1) == 8, "BC1 struct is mis-sized");
struct BC4
{
uint64_t a : 8;
uint64_t b : 8;
uint64_t bitmask : 48;
};
static_assert(sizeof(BC4) == 8, "BC4 struct is mis-sized");
if(bc2alpha)
{
uint64_t alphaBits = 0;
for(uint32_t p = 0; p < blockWidth * blockHeight; p++)
{
BCCode code = BCCode((bc1bitmask & (0x3 << (p * 2))) >> (p * 2));
if(code == BCCode::A)
alphaBits |= uint64_t(a8[3] >> 4) << (p * 4);
else if(code == BCCode::B)
alphaBits |= uint64_t(b8[3] >> 4) << (p * 4);
}
memcpy(out, &alphaBits, sizeof(alphaBits));
out += sizeof(alphaBits);
}
else if(bc3alpha)
{
// basically the same layout just a different meaning for codes above 1, which
// we
// don't use
BC4 *alpha = (BC4 *)out;
alpha->a = a8[3];
alpha->b = b8[3];
alpha->bitmask = bc4bitmask;
out += sizeof(BC4);
}
if(bc1)
{
BC1 *rgb = (BC1 *)out;
// we don't care about color0 <= color1 order
rgb->a565 = a565;
rgb->b565 = b565;
rgb->bitmask = bc1bitmask;
out += sizeof(BC1);
}
for(int ch = 0; ch < bc4channels; ch++)
{
BC4 *alpha = (BC4 *)out;
alpha->a = a8[ch];
alpha->b = b8[ch];
alpha->bitmask = bc4bitmask;
out += sizeof(BC4);
}
uint64_t bc67indexbits = 0;
if(bc6 || bc7)
{
for(uint32_t p = 0; p < blockWidth * blockHeight; p++)
{
BCCode code = BCCode((bc1bitmask & (0x3 << (p * 2))) >> (p * 2));
if(p == 0)
{
// the first colour we came across should have been assigned code A. We
// require this, because we're missing a bit from the first index
TEST_ASSERT(code == BCCode::A, "First code must be code A when encoding BC6");
}
else
{
if(code == BCCode::A)
{
bc67indexbits |= uint64_t(0) << ((p * 4) - 1);
}
else if(code == BCCode::B)
{
bc67indexbits |= uint64_t(15) << ((p * 4) - 1);
}
}
}
}
if(bc6)
{
byte mode = 0x03;
// mode 3: no transformed endpoints, 0 partition bits, 10 endpoint bits per
// channel, no delta bits.
uint16_t bias = 0;
if(cfg.data == DataType::SNorm)
{
// final quantize step, the absolute value gets scaled a little
for(int ch = 0; ch < 3; ch++)
{
bool negA = (aHalf[ch] & 0x8000) != 0;
bool negB = (bHalf[ch] & 0x8000) != 0;
int16_t valA = int16_t(((aHalf[ch] & 0x7fff) * 32) / 31);
int16_t valB = int16_t(((bHalf[ch] & 0x7fff) * 32) / 31);
aHalfS[ch] = (negA ? -valA : valA);
bHalfS[ch] = (negB ? -valB : valB);
}
bias = 63;
}
else
{
// final quantize step, such that max representable half float is 65504.0
// (which gets mapped to 0xffff)
for(int ch = 0; ch < 3; ch++)
{
aHalf[ch] = uint32_t(aHalf[ch] * 64) / 31;
bHalf[ch] = uint32_t(bHalf[ch] * 64) / 31;
}
bias = 15;
}
uint64_t colorbits = 0;
byte colorbit65 = 0;
// 10 bits for each value, RGB for A then RGB for B
colorbits |= uint64_t((aHalf[0] + bias) >> 6) << 0;
colorbits |= uint64_t((aHalf[1] + bias) >> 6) << 10;
colorbits |= uint64_t((aHalf[2] + bias) >> 6) << 20;
colorbits |= uint64_t((bHalf[0] + bias) >> 6) << 30;
colorbits |= uint64_t((bHalf[1] + bias) >> 6) << 40;
colorbits |= uint64_t((bHalf[2] + bias) >> 6) << 50; // overflows by 1 bit
colorbit65 = (bHalf[2] >> 15) & 0x1;
uint64_t block[2];
// first 64 bits are mode, and 59 of the color bits.
block[0] = mode << 0;
block[0] |= colorbits << 5;
// second 64-bit is the top bit of the colors bits, then the index bits
block[1] = (bc67indexbits << 1) | colorbit65;
memcpy(out, block, sizeof(block));
out += sizeof(block);
}
#define ROUND_7BIT(x) ((x) >> 1)
#define LO_BIT(x) ((x)&0x1)
if(bc7)
{
byte mode = 0x40;
// x1000000 = mode 6: no partition bits, no rotation bits, no index selection
// bit.
// 7 color bits, 7 alpha bits, 1 endpoint p-bit, 0 shared p-bits, 4 index bits,
// 0 secondary index bits
// color is stored R0, R1, G0, G1, B0, B1 because we only have one subset
uint64_t colorbits = 0;
colorbits |= uint64_t(ROUND_7BIT(a8[0])) << 0;
colorbits |= uint64_t(ROUND_7BIT(b8[0])) << 7;
colorbits |= uint64_t(ROUND_7BIT(a8[1])) << 14;
colorbits |= uint64_t(ROUND_7BIT(b8[1])) << 21;
colorbits |= uint64_t(ROUND_7BIT(a8[2])) << 28;
colorbits |= uint64_t(ROUND_7BIT(b8[2])) << 35;
uint64_t alphabits = 0;
alphabits |= uint64_t(ROUND_7BIT(a8[3])) << 0;
alphabits |= uint64_t(ROUND_7BIT(b8[3])) << 7;
byte endpointA = 0;
byte endpointB = 0;
// take a vote, if more than two of the original values have the low bit set,
// set
// the endpoint. The tie-break is towards zero because we're wanting *more* than
// two (so exactly two means 0)
if(LO_BIT(a8[0]) + LO_BIT(a8[1]) + LO_BIT(a8[2]) + LO_BIT(a8[3]) > 2)
endpointA = 1;
if(LO_BIT(b8[0]) + LO_BIT(b8[1]) + LO_BIT(b8[2]) + LO_BIT(b8[3]) > 2)
endpointB = 1;
uint64_t block[2];
// first 64 bits are mode, color, alpha, and endpoint A
block[0] = mode << 0;
block[0] |= colorbits << 7;
block[0] |= alphabits << (7 + 42);
block[0] |= uint64_t(endpointA & 0x1) << (7 + 42 + 14);
// second 64-bit is endpoint B, then the index bits
block[1] = (bc67indexbits << 1) | endpointB;
memcpy(out, block, sizeof(block));
out += sizeof(block);
}
}
}
srcPixels += floatdata.slicePitch / sizeof(Vec4f);
}
}
}
}
}; // namespace TextureZoo