/****************************************************************************** * The MIT License (MIT) * * Copyright (c) 2019-2026 Baldur Karlsson * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. ******************************************************************************/ #include #include "test_common.h" namespace TextureZoo { void MakePixel(byte *data, const TexConfig &cfg, uint32_t x, uint32_t y, uint32_t z, uint32_t mip, uint32_t slice) { // each 3D slice cycles the x x += z; x %= std::max(1U, texWidth >> mip); if(cfg.data == DataType::Float || cfg.data == DataType::UNorm || cfg.data == DataType::SNorm) { // start points for each component const float vals[] = { 0.1f, 0.35f, 0.6f, 0.85f, }; for(uint32_t c = 0; c < cfg.componentCount; c++) { uint32_t idx = c; // pixels off the diagonal invert the colors if(x != y) idx = 3 - idx; // subsequent slices add a coarse checkerboard pattern of inverted colors if((slice % 2 > 0) && (((x / 2) % 2) != ((y / 2) % 2))) idx = 3 - idx; float f = vals[idx]; // subsequent mips are shifted up a bit f += 0.075f * mip; // Signed normals are negative if(cfg.data == DataType::SNorm) f = -f; // if it's a full float, just copy if(cfg.componentBytes == 4) { memcpy(data, &f, cfg.componentBytes); } else if(cfg.componentBytes == 2) { uint16_t h; if(cfg.data == DataType::Float) h = MakeHalf(f); else if(cfg.data == DataType::UNorm) h = uint16_t(f * 0xffff); else if(cfg.data == DataType::SNorm) h = f < 0 ? int16_t(roundf(f * 0x8000)) : int16_t(roundf(f * 0x7fff)); memcpy(data, &h, cfg.componentBytes); } else if(cfg.componentBytes == 1) { uint8_t b; if(cfg.data == DataType::UNorm) b = uint8_t(f * 0xff); else if(cfg.data == DataType::SNorm) b = f < 0 ? int8_t(roundf(f * 0x80)) : int8_t(roundf(f * 0x7f)); memcpy(data, &b, cfg.componentBytes); } else { TEST_ERROR("Unexpected component bytes %d in float", cfg.componentBytes); } data += cfg.componentBytes; } } else if(cfg.data == DataType::UInt || cfg.data == DataType::SInt) { // same pattern as above but with integer values const int32_t vals[] = { 10, 40, 70, 100, }; for(uint32_t c = 0; c < cfg.componentCount; c++) { uint32_t idx = c; // pixels off the diagonal invert the colors if(x != y) idx = 3 - idx; if((slice % 3 > 0) && (((x / 2) % 2) != ((y / 2) % 2))) idx = 3 - idx; int32_t val = vals[idx]; val += 10 * mip; // Signed ints are negative if(cfg.data == DataType::SInt) val = -val; // because the values are below one byte and we're little-endian we can just copy the // right number of bytes from val memcpy(data, &val, cfg.componentBytes); data += cfg.componentBytes; } } } void MakeData(TexData &data, const TexConfig &cfg, Vec4i dimensions, uint32_t mip, uint32_t slice) { uint32_t mipWidth = std::max(1, dimensions.x >> mip); uint32_t mipHeight = std::max(1, dimensions.y >> mip); uint32_t mipDepth = std::max(1, dimensions.z >> mip); if(cfg.type == TextureType::Unknown) { data = TexData(); return; } else if(cfg.type == TextureType::Regular) { uint32_t pixelPitch = cfg.componentBytes * cfg.componentCount; data.rowPitch = pixelPitch * mipWidth; data.slicePitch = data.rowPitch * mipHeight; data.byteData.resize(data.slicePitch * mipDepth); byte *out = data.byteData.data(); for(uint32_t z = 0; z < mipDepth; z++) { for(uint32_t y = 0; y < mipHeight; y++) { for(uint32_t x = 0; x < mipWidth; x++) { MakePixel(out, cfg, x, y, z, mip, slice); out += pixelPitch; } } } } else { bool bc1 = false, bc2alpha = false, bc3alpha = false, bc6 = false, bc7 = false, sharedExp = false; int bc4channels = 0; uint32_t nybblePattern = 0; bool rgb5 = false; int alphabitPlace = 0; bool rgb10a2 = false; switch(cfg.type) { case TextureType::BC1: bc1 = true; break; case TextureType::BC2: bc1 = true; bc2alpha = true; break; case TextureType::BC3: bc1 = true; bc3alpha = true; break; case TextureType::BC4: bc4channels = 1; break; case TextureType::BC5: bc4channels = 2; break; case TextureType::BC6: bc6 = true; break; case TextureType::BC7: bc7 = true; break; case TextureType::R9G9B9E5: sharedExp = true; break; case TextureType::G4R4: nybblePattern = 0x12; break; case TextureType::A4R4G4B4: nybblePattern = 0x3214; break; case TextureType::R4G4B4A4: nybblePattern = 0x4321; break; case TextureType::R5G6B5: rgb5 = true; alphabitPlace = 0; break; case TextureType::R5G5B5A1: rgb5 = true; alphabitPlace = 1; break; case TextureType::A1R5G5B5: rgb5 = true; alphabitPlace = 2; break; case TextureType::RGB10A2: rgb10a2 = true; break; default: data = TexData(); return; } // get float data so we can do the best possible job of truncating to the desired bit width TexConfig floatcfg = {TextureType::Regular, 4, 4, DataType::Float}; TexData floatdata; if(rgb10a2 && cfg.data == DataType::UInt) floatcfg.data = cfg.data; MakeData(floatdata, floatcfg, dimensions, mip, slice); Vec4f *srcPixels = (Vec4f *)floatdata.byteData.data(); Vec4i *srcPixelsI = (Vec4i *)floatdata.byteData.data(); if(rgb10a2) { uint32_t pixelPitch = 4; data.rowPitch = pixelPitch * mipWidth; data.slicePitch = data.rowPitch * mipHeight; data.byteData.resize(data.slicePitch * mipDepth); uint32_t *out = (uint32_t *)data.byteData.data(); for(uint32_t z = 0; z < mipDepth; z++) { for(uint32_t y = 0; y < mipHeight; y++) { for(uint32_t x = 0; x < mipWidth; x++) { uint32_t encodedPixel = 0; if(cfg.data == DataType::UInt) { int32_t rgba[4]; rgba[0] = srcPixelsI[y * mipWidth + x].x; rgba[1] = srcPixelsI[y * mipWidth + x].y; rgba[2] = srcPixelsI[y * mipWidth + x].z; rgba[3] = srcPixelsI[y * mipWidth + x].w; encodedPixel |= (rgba[0] & 0x3ff) << 0; encodedPixel |= (rgba[1] & 0x3ff) << 10; encodedPixel |= (rgba[2] & 0x3ff) << 20; encodedPixel |= (std::min(rgba[3], 3) & 0x3) << 30; } else { float rgba[4]; rgba[0] = srcPixels[y * mipWidth + x].x; rgba[1] = srcPixels[y * mipWidth + x].y; rgba[2] = srcPixels[y * mipWidth + x].z; rgba[3] = srcPixels[y * mipWidth + x].w; encodedPixel |= uint32_t(round(rgba[0] * 0x3ff)) << 0; encodedPixel |= uint32_t(round(rgba[1] * 0x3ff)) << 10; encodedPixel |= uint32_t(round(rgba[2] * 0x3ff)) << 20; encodedPixel |= uint32_t(round(rgba[3] * 0x3)) << 30; } *out = encodedPixel; out++; } } srcPixels += mipWidth * mipHeight; srcPixelsI += mipWidth * mipHeight; } } else if(nybblePattern || rgb5) { uint32_t pixelPitch = 2; data.rowPitch = pixelPitch * mipWidth; data.slicePitch = data.rowPitch * mipHeight; data.byteData.resize(data.slicePitch * mipDepth); uint8_t *out = data.byteData.data(); for(uint32_t z = 0; z < mipDepth; z++) { for(uint32_t y = 0; y < mipHeight; y++) { for(uint32_t x = 0; x < mipWidth; x++) { float rgb[4]; rgb[0] = srcPixels[y * mipWidth + x].x; rgb[1] = srcPixels[y * mipWidth + x].y; rgb[2] = srcPixels[y * mipWidth + x].z; rgb[3] = srcPixels[y * mipWidth + x].w; if(rgb5) { bool alpha = rgb[3] >= 0.5f; uint16_t encodedPixel = 0; if(alphabitPlace == 0) { encodedPixel |= uint16_t(rgb[0] * 31) << 0; encodedPixel |= uint16_t(rgb[1] * 63) << 5; encodedPixel |= uint16_t(rgb[2] * 31) << 11; } else { encodedPixel |= uint16_t(rgb[0] * 31) << 0; encodedPixel |= uint16_t(rgb[1] * 31) << 5; encodedPixel |= uint16_t(rgb[2] * 31) << 10; if(alphabitPlace == 1) { if(alpha) encodedPixel |= 0x8000; } else { encodedPixel <<= 1; if(alpha) encodedPixel |= 0x1; } } memcpy(out, &encodedPixel, sizeof(encodedPixel)); out += 2; } else { uint8_t encodedPixel = 0; encodedPixel |= uint8_t(rgb[((nybblePattern & 0x000f) >> 0) - 1] * 15) << 0; encodedPixel |= uint8_t(rgb[((nybblePattern & 0x00f0) >> 4) - 1] * 15) << 4; *out = encodedPixel; out++; if(nybblePattern & 0xff00) { encodedPixel = 0; encodedPixel |= uint8_t(rgb[((nybblePattern & 0x0f00) >> 8) - 1] * 15) << 0; encodedPixel |= uint8_t(rgb[((nybblePattern & 0xf000) >> 12) - 1] * 15) << 4; *out = encodedPixel; out++; } } } } srcPixels += mipWidth * mipHeight; } } else if(sharedExp) { uint32_t pixelPitch = 4; data.rowPitch = pixelPitch * mipWidth; data.slicePitch = data.rowPitch * mipHeight; data.byteData.resize(data.slicePitch * mipDepth); uint32_t *out = (uint32_t *)data.byteData.data(); for(uint32_t z = 0; z < mipDepth; z++) { for(uint32_t y = 0; y < mipHeight; y++) { for(uint32_t x = 0; x < mipWidth; x++) { float rgb[3]; rgb[0] = srcPixels[y * mipWidth + x].x; rgb[1] = srcPixels[y * mipWidth + x].y; rgb[2] = srcPixels[y * mipWidth + x].z; uint32_t encodedPixel = 0; int exp = -10; // we pick the highest exponent, losing bits off the bottom of any value that // needs a lower one, rather than picking a lower one and having to saturate // values that need a higher one for(int channel = 0; channel < 3; channel++) { int e = 0; frexpf(rgb[channel], &e); exp = std::max(exp, e); } for(int channel = 0; channel < 3; channel++) encodedPixel |= uint32_t(rgb[channel] * 511.0 / (1 << exp)) << (9 * channel); encodedPixel |= (exp + 15) << 27; *out = encodedPixel; out++; } } srcPixels += mipWidth * mipHeight; } } else { // these don't change, but make the code easier to read const uint32_t blockWidth = 4; const uint32_t blockHeight = 4; uint32_t blockSize; // 0.5 byte per pixel if(cfg.type == TextureType::BC1 || cfg.type == TextureType::BC4) blockSize = 8; else blockSize = 16; data.rowPitch = blockSize * std::max(1U, mipWidth / blockWidth); data.slicePitch = data.rowPitch * std::max(1U, mipHeight / blockHeight); data.byteData.resize(data.slicePitch * mipDepth); byte *out = (byte *)data.byteData.data(); const Vec4f invalid(999001.0f, 999002.0f, -999003.0f, -999004.0f); // compress each slice separately for(uint32_t z = 0; z < mipDepth; z++) { // block compressed - iterate over the pixels in block size for(uint32_t y = 0; y < mipHeight; y += blockHeight) { for(uint32_t x = 0; x < mipWidth; x += blockWidth) { Vec4f blockPixels[blockWidth * blockHeight] = {}; // copy all the in-range pixels into the block data for(uint32_t by = 0; by < blockHeight; by++) { for(uint32_t bx = 0; bx < blockWidth; bx++) { if(x + bx >= mipWidth || y + by >= mipHeight) { blockPixels[by * blockWidth + bx] = invalid; } else { blockPixels[by * blockWidth + bx] = srcPixels[(y + by) * mipWidth + (x + bx)]; } } } // we should have at most two unique pixels. The pattern is structured to allow // that, since any other colour can't be uniquely represented in all compressed // formats (even interpolated values) Vec4f a = invalid, b = invalid; uint32_t bc1bitmask = 0; uint64_t bc4bitmask = 0; // BC1 and BC4 both share A = 0 and B = 0 codes enum class BCCode : uint64_t { A = 0, B = 1, }; // iterate the pixels in the block in ascending bitmask order for(uint32_t p = 0; p < blockWidth * blockHeight; p++) { if(blockPixels[p] == invalid) { // out of bounds pixel (think of a 2x2 mip), store as A - whatever A is. bc1bitmask |= uint32_t(BCCode::A) << (p * 2); bc4bitmask |= uint64_t(BCCode::A) << (p * 3); } else if(a == invalid) { // A hasn't been found yet, let's use this pixel for that a = blockPixels[p]; bc1bitmask |= uint32_t(BCCode::A) << (p * 2); bc4bitmask |= uint64_t(BCCode::A) << (p * 3); } else if(blockPixels[p] == a) { // if A has been found then re-use it before assigning to B bc1bitmask |= uint32_t(BCCode::A) << (p * 2); bc4bitmask |= uint64_t(BCCode::A) << (p * 3); } else if(b == invalid) { // B hasn't been found yet, let's use this pixel for that b = blockPixels[p]; bc1bitmask |= uint32_t(BCCode::B) << (p * 2); bc4bitmask |= uint64_t(BCCode::B) << (p * 3); } else if(blockPixels[p] == b) { bc1bitmask |= uint32_t(BCCode::B) << (p * 2); bc4bitmask |= uint64_t(BCCode::B) << (p * 3); } else { TEST_ERROR("Found pixel that isn't A, or B!"); } } byte a8[4], b8[4]; uint16_t aHalf[4], bHalf[4]; int16_t *aHalfS = (int16_t *)aHalf; int16_t *bHalfS = (int16_t *)bHalf; uint16_t a565 = 0; uint16_t b565 = 0; if(cfg.data == DataType::SNorm) { int8_t *ia8 = (int8_t *)a8; int8_t *ib8 = (int8_t *)b8; ia8[0] = int8_t(round(a.x * -127.0f)); ia8[1] = int8_t(round(a.y * -127.0f)); ia8[2] = int8_t(round(a.z * -127.0f)); ia8[3] = int8_t(round(a.w * -127.0f)); ib8[0] = int8_t(round(b.x * -127.0f)); ib8[1] = int8_t(round(b.y * -127.0f)); ib8[2] = int8_t(round(b.z * -127.0f)); ib8[3] = int8_t(round(b.w * -127.0f)); aHalf[0] = MakeHalf(-a.x); aHalf[1] = MakeHalf(-a.y); aHalf[2] = MakeHalf(-a.z); aHalf[3] = MakeHalf(-a.w); bHalf[0] = MakeHalf(-b.x); bHalf[1] = MakeHalf(-b.y); bHalf[2] = MakeHalf(-b.z); bHalf[3] = MakeHalf(-b.w); } else { a8[0] = byte(round(a.x * 255.0f)); a8[1] = byte(round(a.y * 255.0f)); a8[2] = byte(round(a.z * 255.0f)); a8[3] = byte(round(a.w * 255.0f)); // red a565 |= byte(round(a.x * 31.0f)) << 11; // green a565 |= byte(round(a.y * 63.0f)) << 5; // blue a565 |= byte(round(a.z * 31.0f)) << 0; b8[0] = byte(round(b.x * 255.0f)); b8[1] = byte(round(b.y * 255.0f)); b8[2] = byte(round(b.z * 255.0f)); b8[3] = byte(round(b.w * 255.0f)); // red b565 |= byte(round(b.x * 31.0f)) << 11; // green b565 |= byte(round(b.y * 63.0f)) << 5; // blue b565 |= byte(round(b.z * 31.0f)) << 0; aHalf[0] = MakeHalf(a.x); aHalf[1] = MakeHalf(a.y); aHalf[2] = MakeHalf(a.z); aHalf[3] = MakeHalf(a.w); bHalf[0] = MakeHalf(b.x); bHalf[1] = MakeHalf(b.y); bHalf[2] = MakeHalf(b.z); bHalf[3] = MakeHalf(b.w); } struct BC1 { uint16_t a565; uint16_t b565; uint32_t bitmask; }; static_assert(sizeof(BC1) == 8, "BC1 struct is mis-sized"); struct BC4 { uint64_t a : 8; uint64_t b : 8; uint64_t bitmask : 48; }; static_assert(sizeof(BC4) == 8, "BC4 struct is mis-sized"); if(bc2alpha) { uint64_t alphaBits = 0; for(uint32_t p = 0; p < blockWidth * blockHeight; p++) { BCCode code = BCCode((bc1bitmask & (0x3 << (p * 2))) >> (p * 2)); if(code == BCCode::A) alphaBits |= uint64_t(a8[3] >> 4) << (p * 4); else if(code == BCCode::B) alphaBits |= uint64_t(b8[3] >> 4) << (p * 4); } memcpy(out, &alphaBits, sizeof(alphaBits)); out += sizeof(alphaBits); } else if(bc3alpha) { // basically the same layout just a different meaning for codes above 1, which // we // don't use BC4 *alpha = (BC4 *)out; alpha->a = a8[3]; alpha->b = b8[3]; alpha->bitmask = bc4bitmask; out += sizeof(BC4); } if(bc1) { BC1 *rgb = (BC1 *)out; // we don't care about color0 <= color1 order rgb->a565 = a565; rgb->b565 = b565; rgb->bitmask = bc1bitmask; out += sizeof(BC1); } for(int ch = 0; ch < bc4channels; ch++) { BC4 *alpha = (BC4 *)out; alpha->a = a8[ch]; alpha->b = b8[ch]; alpha->bitmask = bc4bitmask; out += sizeof(BC4); } uint64_t bc67indexbits = 0; if(bc6 || bc7) { for(uint32_t p = 0; p < blockWidth * blockHeight; p++) { BCCode code = BCCode((bc1bitmask & (0x3 << (p * 2))) >> (p * 2)); if(p == 0) { // the first colour we came across should have been assigned code A. We // require this, because we're missing a bit from the first index TEST_ASSERT(code == BCCode::A, "First code must be code A when encoding BC6"); } else { if(code == BCCode::A) { bc67indexbits |= uint64_t(0) << ((p * 4) - 1); } else if(code == BCCode::B) { bc67indexbits |= uint64_t(15) << ((p * 4) - 1); } } } } if(bc6) { byte mode = 0x03; // mode 3: no transformed endpoints, 0 partition bits, 10 endpoint bits per // channel, no delta bits. uint16_t bias = 0; if(cfg.data == DataType::SNorm) { // final quantize step, the absolute value gets scaled a little for(int ch = 0; ch < 3; ch++) { bool negA = (aHalf[ch] & 0x8000) != 0; bool negB = (bHalf[ch] & 0x8000) != 0; int16_t valA = int16_t(((aHalf[ch] & 0x7fff) * 32) / 31); int16_t valB = int16_t(((bHalf[ch] & 0x7fff) * 32) / 31); aHalfS[ch] = (negA ? -valA : valA); bHalfS[ch] = (negB ? -valB : valB); } bias = 63; } else { // final quantize step, such that max representable half float is 65504.0 // (which gets mapped to 0xffff) for(int ch = 0; ch < 3; ch++) { aHalf[ch] = uint32_t(aHalf[ch] * 64) / 31; bHalf[ch] = uint32_t(bHalf[ch] * 64) / 31; } bias = 15; } uint64_t colorbits = 0; byte colorbit65 = 0; // 10 bits for each value, RGB for A then RGB for B colorbits |= uint64_t((aHalf[0] + bias) >> 6) << 0; colorbits |= uint64_t((aHalf[1] + bias) >> 6) << 10; colorbits |= uint64_t((aHalf[2] + bias) >> 6) << 20; colorbits |= uint64_t((bHalf[0] + bias) >> 6) << 30; colorbits |= uint64_t((bHalf[1] + bias) >> 6) << 40; colorbits |= uint64_t((bHalf[2] + bias) >> 6) << 50; // overflows by 1 bit colorbit65 = (bHalf[2] >> 15) & 0x1; uint64_t block[2]; // first 64 bits are mode, and 59 of the color bits. block[0] = mode << 0; block[0] |= colorbits << 5; // second 64-bit is the top bit of the colors bits, then the index bits block[1] = (bc67indexbits << 1) | colorbit65; memcpy(out, block, sizeof(block)); out += sizeof(block); } #define ROUND_7BIT(x) ((x) >> 1) #define LO_BIT(x) ((x)&0x1) if(bc7) { byte mode = 0x40; // x1000000 = mode 6: no partition bits, no rotation bits, no index selection // bit. // 7 color bits, 7 alpha bits, 1 endpoint p-bit, 0 shared p-bits, 4 index bits, // 0 secondary index bits // color is stored R0, R1, G0, G1, B0, B1 because we only have one subset uint64_t colorbits = 0; colorbits |= uint64_t(ROUND_7BIT(a8[0])) << 0; colorbits |= uint64_t(ROUND_7BIT(b8[0])) << 7; colorbits |= uint64_t(ROUND_7BIT(a8[1])) << 14; colorbits |= uint64_t(ROUND_7BIT(b8[1])) << 21; colorbits |= uint64_t(ROUND_7BIT(a8[2])) << 28; colorbits |= uint64_t(ROUND_7BIT(b8[2])) << 35; uint64_t alphabits = 0; alphabits |= uint64_t(ROUND_7BIT(a8[3])) << 0; alphabits |= uint64_t(ROUND_7BIT(b8[3])) << 7; byte endpointA = 0; byte endpointB = 0; // take a vote, if more than two of the original values have the low bit set, // set // the endpoint. The tie-break is towards zero because we're wanting *more* than // two (so exactly two means 0) if(LO_BIT(a8[0]) + LO_BIT(a8[1]) + LO_BIT(a8[2]) + LO_BIT(a8[3]) > 2) endpointA = 1; if(LO_BIT(b8[0]) + LO_BIT(b8[1]) + LO_BIT(b8[2]) + LO_BIT(b8[3]) > 2) endpointB = 1; uint64_t block[2]; // first 64 bits are mode, color, alpha, and endpoint A block[0] = mode << 0; block[0] |= colorbits << 7; block[0] |= alphabits << (7 + 42); block[0] |= uint64_t(endpointA & 0x1) << (7 + 42 + 14); // second 64-bit is endpoint B, then the index bits block[1] = (bc67indexbits << 1) | endpointB; memcpy(out, block, sizeof(block)); out += sizeof(block); } } } srcPixels += floatdata.slicePitch / sizeof(Vec4f); } } } } }; // namespace TextureZoo