Add bit reader to decode low-level LLVM bitstream

2026-07-31 20:01:12 +00:00 · 2019-10-18 20:57:35 +01:00
parent 6140701358
commit a9a094bcdd
5 changed files with 1004 additions and 0 deletions
@@ -0,0 +1,336 @@
+/******************************************************************************
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2019 Baldur Karlsson
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ ******************************************************************************/
+
+#pragma once
+
+#include "common/common.h"
+
+namespace LLVMBC
+{
+class BitReader
+{
+public:
+  BitReader(const byte *bits, size_t length)
+      : m_Bits(bits), m_Start(bits), m_End(bits + length), m_Offset(0)
+  {
+  }
+  size_t ByteOffset() const { return m_Bits - m_Start; }
+  size_t BitOffset() const { return ByteOffset() * 8 + m_Offset; }
+  size_t ByteLength() const { return m_End - m_Start; }
+  size_t BitLength() const { return (m_End - m_Start) * 8; }
+  bool AtEndOfStream() const { return m_Bits >= m_End; }
+  void SeekByte(size_t byteOffset)
+  {
+    m_Bits = m_Start + byteOffset;
+    m_Offset = 0;
+  }
+  void SeekBit(size_t bitOffset)
+  {
+    m_Bits = m_Start + (bitOffset / 8);
+    m_Offset = (bitOffset % 8);
+  }
+  char c6()
+  {
+    byte c = 0;
+    ReadBits(6, &c);
+
+    if(c >= 0 && c <= 25)
+      return char('a' + c);
+    else if(c >= 26 && c <= 51)
+      return char('A' + c - 26);
+    else if(c >= 52 && c <= 61)
+      return char('0' + c - 52);
+    else if(c == 62)
+      return '.';
+    else if(c == 63)
+      return '_';
+
+    RDCERR("Unexpected 6-bit char: %x", (uint32_t)c);
+
+    return '?';
+  }
+
+  template <typename T>
+  T fixed(const size_t bitWidth)
+  {
+    byte scratch[8] = {};
+
+    RDCASSERT(bitWidth <= 64);
+
+    ReadBits(bitWidth, scratch);
+
+    T ret;
+    memcpy(&ret, scratch, sizeof(T));
+    return ret;
+  }
+
+  template <typename T>
+  T vbr(const size_t groupBitSize)
+  {
+    uint64_t ret = 0;
+
+    RDCASSERT(groupBitSize > 1 && "chunk size must be greater than 1");
+    RDCASSERT(groupBitSize <= 8 && "Only chunk sizes up to 8 supported");
+    byte scratch = 0;
+
+    const byte hibit = 1 << (groupBitSize - 1);
+    const byte lobits = hibit - 1;
+
+    uint64_t shift = 0;
+    do
+    {
+      ReadBits(groupBitSize, &scratch);
+
+      RDCASSERT(shift <= 63);
+
+      ret += (uint64_t(scratch & lobits) << shift);
+
+      shift += uint64_t(groupBitSize - 1);
+    } while(scratch & hibit);
+
+    // check for overflow of the return type
+    const uint64_t mask = ((1ULL << (sizeof(T) * 8 - 1)) - 1) << 1 | 1;
+    RDCASSERT((ret & mask) == ret);
+
+    return T(ret);
+  }
+
+  template <typename T>
+  T svbr(size_t groupBitSize)
+  {
+    // the value will fit in a uint64_t because the any negative values with the high bit set, which
+    // would overflow when shifted, no longer have the high bit set after being negated.
+    uint64_t var = vbr<uint64_t>(groupBitSize);
+
+    // if the low bit is set, it's negative
+    if(var & 0x1)
+    {
+      return T(-int64_t(var >> 1));
+    }
+    else
+    {
+      return T(var >> 1);
+    }
+  }
+
+  template <typename T>
+  T Read()
+  {
+    byte scratch[sizeof(T)] = {};
+
+    ReadBits(sizeof(T) * 8, scratch);
+
+    T ret;
+    memcpy(&ret, scratch, sizeof(T));
+    return ret;
+  }
+
+  void ReadBlob(const byte *&blobptr, size_t &bloblen)
+  {
+    // get the blob length
+    bloblen = vbr<size_t>(6);
+
+    // align to dword boundary
+    align32bits();
+
+    // the blob is at m_Bits now
+    blobptr = m_Bits;
+
+    // advance by the length, and align up as well
+    m_Bits += bloblen;
+    align32bits();
+  }
+
+  void align32bits()
+  {
+    // skip the rest of the current byte, if we're part-way through
+    if(m_Offset > 0)
+      Advance(8 - m_Offset);
+
+    const size_t byteOffs = ByteOffset();
+    const size_t alignedByteOffs = (byteOffs + 0x3) & ~0x3;
+
+    // advance by N bytes to dword align the stream
+    m_Bits += (alignedByteOffs - byteOffs);
+  }
+
+private:
+  const byte *m_Bits, *m_Start, *m_End;
+  size_t m_Offset;
+
+  void Advance(size_t N)
+  {
+    m_Offset += N;
+    // shouldn't read more than this byte
+    RDCASSERT(m_Offset <= 8);
+
+    // roll over to next byte after consuming all 8 bits
+    if(m_Offset == 8)
+    {
+      m_Bits++;
+      m_Offset = 0;
+    }
+  }
+
+  void ReadBits(size_t bitsToRead, byte *dst)
+  {
+    if(BitOffset() + bitsToRead > BitLength())
+    {
+      RDCERR("Reading off end of bitstream");
+
+      // read 0s off the end of the stream.
+      // set any whole bytes to 0:
+      while(bitsToRead >= 8)
+      {
+        *dst = 0;
+        dst++;
+        bitsToRead -= 8;
+      }
+
+      // set any remaining bits to 0:
+      if(bitsToRead)
+        *dst &= ~((1 << bitsToRead) - 1);
+
+      m_Bits = m_End;
+      m_Offset = 0;
+      return;
+    }
+
+    size_t dstoffs = 0;
+
+    // if we're already partway through a byte, read as many bits as we need and we can
+    if(m_Offset != 0)
+    {
+      const size_t avail = 8 - m_Offset;
+
+      if(avail == bitsToRead)
+      {
+        // if we have just enough in this byte, great! shift and mask off, and update the offset
+
+        // grab the bits into the low end
+        *dst = (*m_Bits >> m_Offset);
+
+        Advance(bitsToRead);
+
+        return;
+      }
+      else if(avail > bitsToRead)
+      {
+        // we have more than enough. Similar to above but we need to mask out only the bits we need.
+
+        // grab the bits into the low end and mask
+        *dst = (*m_Bits >> m_Offset) & ((1 << bitsToRead) - 1);
+
+        Advance(bitsToRead);
+
+        return;
+      }
+      else
+      {
+        // we don't have enough. Consume what we can then continue
+        *dst = (*m_Bits >> m_Offset);
+
+        dstoffs = avail;
+        bitsToRead -= avail;
+
+        Advance(avail);
+      }
+    }
+    else
+    {
+      // ensure if we didn't read any bits that the byte is zeroed out, so we can OR on bits below
+      // without needing to worry
+      *dst = 0;
+    }
+
+    // we're now at the start of a byte since we read any remainder above.
+    RDCASSERT(m_Offset == 0);
+
+    // if we have to read whole bytes, do that here.
+    if(bitsToRead >= 8)
+    {
+      if(dstoffs == 0)
+      {
+        // if dstoffs is 0 then it's an easy case, we can just copy all the whole bytes into *dst
+        memcpy(dst, m_Bits, bitsToRead / 8);
+
+        // manual advance
+        m_Bits += (bitsToRead / 8);
+        dst += (bitsToRead / 8);
+
+        // make sure we read any sub-byte remainder
+        bitsToRead &= 0x7;
+      }
+      else
+      {
+        while(bitsToRead >= 8)
+        {
+          // manual advance
+          const byte cur = *m_Bits;
+          m_Bits++;
+
+          bitsToRead -= 8;
+
+          // dstoffs doesn't change because we wrap around to the same offset in the next byte.
+          // However we do need to shuffle the bits in cur around to add what will fit into the
+          // current byte, and then the remainder into the next byte.
+          *dst |= (cur << dstoffs);
+          dst++;
+          *dst = (cur >> (8 - dstoffs));
+        }
+      }
+    }
+
+    // if nothing remains, return
+    if(bitsToRead == 0)
+      return;
+
+    // we should now have no more than than 7 bits to read
+    RDCASSERT(bitsToRead < 8);
+
+    // this is the mask to get only the bits we want
+    const byte mask = ((1 << bitsToRead) - 1);
+
+    // take the bits that we want from the next byte (knowing we want the low-order bits), and shift
+    // them into where they should go.
+    byte data = *m_Bits & mask;
+
+    // check if we overlap into the next destination byte
+    if(dstoffs + bitsToRead < 8)
+    {
+      *dst |= data << dstoffs;
+    }
+    else
+    {
+      *dst |= data << dstoffs;
+      dst++;
+      *dst = data >> (8 - dstoffs);
+    }
+
+    // consume the bits we used
+    Advance(bitsToRead);
+  }
+};
+
+};    // namespace LLVMBC
@@ -0,0 +1,631 @@
+/******************************************************************************
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2019 Baldur Karlsson
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ ******************************************************************************/
+
+#include "llvm_decoder.h"
+
+namespace LLVMBC
+{
+};    // namespace LLVMBC
+
+#if ENABLED(ENABLE_UNIT_TESTS)
+
+#include "3rdparty/catch/catch.hpp"
+
+TEST_CASE("Check LLVM bitreader", "[llvm]")
+{
+  SECTION("Check simple reading of bytes")
+  {
+    byte bits[] = {0x01, 0x02, 0x40, 0x80, 0xff};
+
+    LLVMBC::BitReader b(bits, sizeof(bits));
+
+    CHECK(!b.AtEndOfStream());
+    CHECK(b.ByteOffset() == 0);
+    CHECK(b.BitOffset() == 0);
+
+    // ensure we can read it all out again in whole bytes
+    for(size_t i = 0; i < sizeof(bits); i++)
+    {
+      byte val = b.Read<byte>();
+      CHECK(val == bits[i]);
+      if(i + 1 < sizeof(bits))
+        CHECK(!b.AtEndOfStream());
+      else
+        CHECK(b.AtEndOfStream());
+      CHECK(b.ByteOffset() == i + 1);
+      CHECK(b.BitOffset() == (i + 1) * 8);
+    }
+  }
+
+  SECTION("Check seeking within the stream")
+  {
+    byte bits[] = {0x01, 0x4f, 0x8c, 0xff};
+    byte val;
+
+    LLVMBC::BitReader b(bits, sizeof(bits));
+
+    CHECK(!b.AtEndOfStream());
+    CHECK(b.ByteOffset() == 0);
+    CHECK(b.BitOffset() == 0);
+
+    b.SeekByte(4);
+
+    CHECK(b.AtEndOfStream());
+    CHECK(b.ByteOffset() == 4);
+    CHECK(b.BitOffset() == 32);
+
+    b.SeekBit(32);
+
+    CHECK(b.AtEndOfStream());
+    CHECK(b.ByteOffset() == 4);
+    CHECK(b.BitOffset() == 32);
+
+    b.SeekBit(29);
+
+    CHECK(!b.AtEndOfStream());
+    CHECK(b.ByteOffset() == 3);
+    CHECK(b.BitOffset() == 29);
+
+    val = b.fixed<byte>(3);
+
+    CHECK(val == 0x7);
+    CHECK(b.AtEndOfStream());
+    CHECK(b.ByteOffset() == 4);
+    CHECK(b.BitOffset() == 32);
+
+    b.SeekBit(0);
+
+    CHECK(!b.AtEndOfStream());
+    CHECK(b.ByteOffset() == 0);
+    CHECK(b.BitOffset() == 0);
+  }
+
+  SECTION("Check with empty bitstream")
+  {
+    byte bits[] = {0};
+
+    LLVMBC::BitReader b(bits, 0);
+
+    CHECK(b.AtEndOfStream());
+    CHECK(b.ByteOffset() == 0);
+    CHECK(b.BitOffset() == 0);
+  }
+
+  SECTION("Check out of bounds behaviour")
+  {
+    byte bits[] = {0x40, 0x80, 0xff};
+
+    LLVMBC::BitReader b(bits, sizeof(bits));
+
+    CHECK(!b.AtEndOfStream());
+    CHECK(b.ByteOffset() == 0);
+    CHECK(b.BitOffset() == 0);
+
+    // first read is fully satisfied, we get the value we expect
+    uint32_t val1 = b.fixed<uint32_t>(17);
+    CHECK(val1 == 0x18040);
+
+    // second read is partially out of bounds, we should read all 0s
+    uint32_t val2 = b.fixed<uint32_t>(16);
+    CHECK(val2 == 0);
+
+    // should be exactly at the end of the stream
+    CHECK(b.AtEndOfStream());
+    CHECK(b.ByteOffset() == sizeof(bits));
+    CHECK(b.BitOffset() == sizeof(bits) * 8);
+  }
+
+  SECTION("Check fixed encoding")
+  {
+    // 0x96 = 0b 1001 0110
+    // 0xF0 = 0b 1111 0000
+    // 0xA5 = 0b 1010 0101
+    // 0x3C = 0b 0011 1100
+
+    // we pad out with 0s so we don't read off the end of the stream when reading up to 4 32-bit
+    // values
+    byte bits[] = {
+        // dword 1
+        0x96, 0xf0, 0xA5, 0x3C,
+        // padding dword
+        0x00, 0x00, 0x00, 0x00,
+        // padding dword
+        0x00, 0x00, 0x00, 0x00,
+        // padding dword
+        0x00, 0x00, 0x00, 0x00,
+    };
+
+    LLVMBC::BitReader b(bits, sizeof(bits));
+
+    // for each of the bit widths, 1 to 32, read 4 values.
+    // This should decode from the LSB to MSB in the bitstream - in the commented values above that
+    // is right-to-left then top-to-bottom
+    uint32_t expected[32][4] = {
+        // i_1
+        {0x00, 0x01, 0x01, 0x00},
+        // i_2
+        {0x02, 0x01, 0x01, 0x02},
+        // i_3
+        {0x06, 0x02, 0x02, 0x00},
+        // i_4
+        {0x06, 0x09, 0x00, 0x0f},
+        // i_5
+        {0x16, 0x04, 0x1C, 0x0B},
+        // i_6
+        {0x16, 0x02, 0x1F, 0x29},
+        // i_7
+        {0x16, 0x61, 0x17, 0x65},
+        // i_8
+        {0x96, 0xF0, 0xA5, 0x3C},
+
+        // i_9
+        {0x0096, 0x00F8, 0x0129, 0x0007},
+        // i_10
+        {0x0096, 0x017C, 0x03CA, 0x0000},
+        // i_11
+        {0x0096, 0x04BE, 0x00F2, 0x0000},
+        // i_12
+        {0x0096, 0x0A5F, 0x003C, 0x0000},
+        // i_13
+        {0x1096, 0x052F, 0x000F, 0x0000},
+        // i_14
+        {0x3096, 0x3297, 0x0003, 0x0000},
+        // i_15
+        {0x7096, 0x794B, 0x0000, 0x0000},
+        // i_16
+        {0xF096, 0x3CA5, 0x0000, 0x0000},
+
+        // i_17
+        {0x0001F096, 0x00001E52},
+        // i_18
+        {0x0001F096, 0x00000F29},
+        // i_19
+        {0x0005F096, 0x00000794},
+        // i_20
+        {0x0005F096, 0x000003CA},
+        // i_21
+        {0x0005F096, 0x000001E5},
+        // i_22
+        {0x0025F096, 0x000000F2},
+        // i_23
+        {0x0025F096, 0x00000079},
+        // i_24
+        {0x00A5F096, 0x0000003C},
+        // i_25
+        {0x00A5F096, 0x0000001E},
+        // i_26
+        {0x00A5F096, 0x0000000F},
+        // i_27
+        {0x04A5F096, 0x00000007},
+        // i_28
+        {0x0CA5F096, 0x00000003},
+        // i_29
+        {0x1CA5F096, 0x00000001},
+        // i_30
+        {0x3CA5F096, 0x00000000},
+        // i_31
+        {0x3CA5F096, 0x00000000},
+        // i_32
+        {0x3CA5F096, 0x00000000},
+    };
+
+    for(size_t i = 0; i < 32; i++)
+    {
+      b.SeekBit(0);
+      uint32_t read;
+
+      INFO("Bit width: " << (i + 1));
+
+      read = b.fixed<uint32_t>(i + 1);
+      CHECK(read == expected[i][0]);
+
+      read = b.fixed<uint32_t>(i + 1);
+      CHECK(read == expected[i][1]);
+
+      read = b.fixed<uint32_t>(i + 1);
+      CHECK(read == expected[i][2]);
+
+      read = b.fixed<uint32_t>(i + 1);
+      CHECK(read == expected[i][3]);
+    }
+
+    // should be exactly at the end of the stream
+    CHECK(b.AtEndOfStream());
+    CHECK(b.ByteOffset() == sizeof(bits));
+    CHECK(b.BitOffset() == sizeof(bits) * 8);
+  }
+
+  SECTION("Check variable encoding")
+  {
+    SECTION("Single chunk, no extension")
+    {
+      // just set as many bits as we can in one chunk, so all 1s except the MSB
+
+      byte bits[] = {
+          // i_vbr0 (padding)
+          0,
+          // i_vbr1 (padding)
+          0,
+          // i_vbr2
+          0x01,
+          // i_vbr3
+          0x03,
+          // i_vbr4
+          0x07,
+          // i_vbr5
+          0x0f,
+          // i_vbr6
+          0x1f,
+          // i_vbr7
+          0x3f,
+          // i_vbr8
+          0x7f,
+      };
+
+      LLVMBC::BitReader b(bits, sizeof(bits));
+
+      for(size_t i = 2; i <= 8; i++)
+      {
+        INFO("VBR group size: " << i);
+        b.SeekByte(i);
+
+        uint64_t val = b.vbr<uint64_t>(i);
+        CHECK(val == bits[i]);
+      }
+
+      // should be exactly at the end of the stream
+      CHECK(b.AtEndOfStream());
+      CHECK(b.ByteOffset() == sizeof(bits));
+      CHECK(b.BitOffset() == sizeof(bits) * 8);
+    }
+
+    SECTION("Two chunks, one extension")
+    {
+      // set all bits that we can from two chunks - that means the first chunk is all 1s, the second
+      // is all 1s except the leading 0
+
+      byte bits[] = {
+          // i_vbr0 (padding)
+          0, 0,
+          // i_vbr1 (padding)
+          0, 0,
+          // i_vbr2
+          0x07, 0x00,    // 0b 01 11
+          // i_vbr3
+          0x1f, 0x00,    // 0b 011 111
+          // i_vbr4
+          0x7f, 0x00,    // 0b 0111 1111
+          // i_vbr5
+          0xff, 0x01,    // 0b 01111 11111
+          // i_vbr6
+          0xff, 0x07,    // 0b 011111 111111
+          // i_vbr7
+          0xff, 0x1f,    // 0b 0111111 1111111
+          // i_vbr8
+          0xff, 0x7f,    // 0b 01111111 11111111
+      };
+
+      uint64_t expected[] = {
+          0, 0,
+          // i_vbr2
+          0x0003,
+          // i_vbr3
+          0x000f,
+          // i_vbr4
+          0x003f,
+          // i_vbr5
+          0x00ff,
+          // i_vbr6
+          0x03ff,
+          // i_vbr7
+          0x0fff,
+          // i_vbr8
+          0x3fff,
+      };
+
+      LLVMBC::BitReader b(bits, sizeof(bits));
+
+      for(size_t i = 2; i <= 8; i++)
+      {
+        INFO("VBR group size: " << i);
+        b.SeekByte(i * 2);
+
+        uint64_t val = b.vbr<uint64_t>(i);
+        CHECK(val == expected[i]);
+      }
+
+      // should be exactly at the end of the stream
+      CHECK(b.AtEndOfStream());
+      CHECK(b.ByteOffset() == sizeof(bits));
+      CHECK(b.BitOffset() == sizeof(bits) * 8);
+    }
+
+    SECTION("Five chunks, four extensions")
+    {
+      // set an alternating 10 pattern from the top bit. Each group except the last has a leading 1
+
+      byte bits[] = {
+          // i_vbr0 (padding)
+          0, 0, 0, 0, 0,
+          // i_vbr1 (padding)
+          0, 0, 0, 0, 0,
+          // i_vbr2
+          0xBB, 0x01, 0x00, 0x00, 0x00,    // 0b 01 10 11 10 11
+          // i_vbr3
+          0xB6, 0x2D, 0x00, 0x00, 0x00,    // 0b 010 110 110 110 110
+          // i_vbr4
+          0xAD, 0xAD, 0x05, 0x00, 0x00,    // 0b 0101 1010 1101 1010 1101
+          // i_vbr5
+          0x5A, 0x6B, 0xAD, 0x00, 0x00,    // 0b 01010 11010 11010 11010 11010
+          // i_vbr6
+          0xB5, 0x5A, 0xAB, 0x15, 0x00,    // 0b 010101 101010 110101 101010 110101
+          // i_vbr7
+          0x6A, 0xB5, 0x5A, 0xAD, 0x02,    // 0b 0101010 1101010 1101010 1101010 1101010
+          // i_vbr8
+          0xD5, 0xAA, 0xD5, 0xAA, 0x55,    // 0b 01010101 10101010 11010101 10101010 11010101
+      };
+
+      uint64_t expected[] = {
+          0, 0,
+          // i_vbr2
+          0x0000000015ULL,    // 0b 1 0 1 0 1
+          // i_vbr3
+          0x00000002AAULL,    // 0b 10 10 10 10 10
+          // i_vbr4
+          0x0000005555ULL,    // 0b 101 010 101 010 101
+          // i_vbr5
+          0x00000AAAAAULL,    // 0b 1010 1010 1010 1010 1010
+          // i_vbr6
+          0x0001555555ULL,    // 0b 10101 01010 10101 01010 10101
+          // i_vbr7
+          0x002AAAAAAAULL,    // 0b 101010 101010 101010 101010 101010
+          // i_vbr8
+          0x0555555555ULL,    // 0b 1010101 0101010 1010101 0101010 1010101
+      };
+
+      LLVMBC::BitReader b(bits, sizeof(bits));
+
+      for(size_t i = 2; i <= 8; i++)
+      {
+        INFO("VBR group size: " << i);
+        b.SeekByte(i * 5);
+
+        uint64_t val = b.vbr<uint64_t>(i);
+        CHECK(val == expected[i]);
+      }
+
+      // should be exactly at the end of the stream
+      CHECK(b.AtEndOfStream());
+      CHECK(b.ByteOffset() == sizeof(bits));
+      CHECK(b.BitOffset() == sizeof(bits) * 8);
+    }
+
+    SECTION("Check signed vbr decoding")
+    {
+      // we don't check every possible bit width since this is decoded the same as vbr except for a
+      // post-check and shift. Instead we use vbr4 since it's convenient for hex literals
+      byte bits[] = {
+          0x04,                // 0b 0100 = +2
+          0x05,                // 0b 0101 = -2
+          0xBA, 0x9E, 0x68,    // 0b 0110 1000 1001 1110 1011 1010 = +98765
+          0xBB, 0x9E, 0x68,    // 0b 0110 1000 1001 1110 1011 1011 = -98765
+          // INT64_MAX. 64-bits encoded in 3-bit groups is 22 groups, so 22 * 4-bit encoded groups
+          // is 88 bits, meaning 11 bytes
+          0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F,
+          // INT64_MIN. Same as above but with the LSB set to 1
+          0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F,
+          // one more value just to check that we didn't overrun above
+          0x06,    // 0b 0110 = +3
+      };
+
+      LLVMBC::BitReader b(bits, sizeof(bits));
+
+      int64_t val;
+
+      val = b.svbr<int64_t>(4);
+      CHECK(val == 2);
+
+      val = b.svbr<int64_t>(4);
+      CHECK(val == 0);
+
+      val = b.svbr<int64_t>(4);
+      CHECK(val == -2);
+
+      val = b.svbr<int64_t>(4);
+      CHECK(val == 0);
+
+      val = b.svbr<int64_t>(4);
+      CHECK(val == 98765);
+
+      val = b.svbr<int64_t>(4);
+      CHECK(val == -98765);
+
+      val = b.svbr<int64_t>(4);
+      CHECK(val == INT64_MAX);
+
+      val = b.svbr<int64_t>(4);
+      CHECK(val == -INT64_MAX);
+
+      val = b.svbr<int64_t>(4);
+      CHECK(val == 3);
+
+      val = b.svbr<int64_t>(4);
+      CHECK(val == 0);
+
+      // should be exactly at the end of the stream
+      CHECK(b.AtEndOfStream());
+      CHECK(b.ByteOffset() == sizeof(bits));
+      CHECK(b.BitOffset() == sizeof(bits) * 8);
+    }
+  }
+
+  SECTION("Check char6 encoding")
+  {
+    byte bits[64] = {};
+    for(size_t i = 0; i < sizeof(bits); i++)
+      bits[i] = i & 0xff;
+
+    // this is the char6 encoding
+    const char string[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._";
+
+    RDCCOMPILE_ASSERT(sizeof(string) - 1 == sizeof(bits),
+                      "bits byte array and string should be same size.");
+
+    LLVMBC::BitReader b(bits, sizeof(bits));
+
+    for(size_t i = 0; i < sizeof(bits); i++)
+    {
+      char c = b.c6();
+      // for simplicity we read padding too
+      byte pad = b.fixed<byte>(2);
+
+      CHECK(c == string[i]);
+      CHECK(pad == 0);
+    }
+  }
+
+  SECTION("Check 32-bit aligning")
+  {
+    byte bits[] = {
+        // first i_4 value
+        0x04,
+        // padding for alignment
+        0x00, 0x00, 0x00,
+
+        // second two i_4 values
+        0xF5,
+        // i_24 value
+        0xCA, 0x99, 0x23,
+
+        // no padding - already aligned
+
+        // i_6 value and i_2 value
+        0xBF,
+    };
+
+    LLVMBC::BitReader b(bits, sizeof(bits));
+
+    CHECK(!b.AtEndOfStream());
+    CHECK(b.ByteOffset() == 0);
+    CHECK(b.BitOffset() == 0);
+
+    uint32_t val;
+
+    // first read is fully satisfied, we get the value we expect
+    val = b.fixed<uint32_t>(4);
+    CHECK(val == 0x4);
+    CHECK(b.ByteOffset() == 0);
+    CHECK(b.BitOffset() == 4);
+
+    b.align32bits();
+
+    CHECK(b.ByteOffset() == 4);
+    CHECK(b.BitOffset() == 32);
+
+    val = b.fixed<uint32_t>(4);
+    CHECK(val == 0x5);
+
+    val = b.fixed<uint32_t>(4);
+    CHECK(val == 0xf);
+
+    val = b.fixed<uint32_t>(24);
+    CHECK(val == 0x2399CA);
+
+    CHECK(b.ByteOffset() == 8);
+    CHECK(b.BitOffset() == 64);
+
+    // should be a no-op because we're already aligned
+    b.align32bits();
+
+    CHECK(b.ByteOffset() == 8);
+    CHECK(b.BitOffset() == 64);
+
+    val = b.fixed<uint32_t>(6);
+    CHECK(val == 0x3f);
+
+    val = b.fixed<uint32_t>(2);
+    CHECK(val == 0x2);
+
+    // should be exactly at the end of the stream
+    CHECK(b.AtEndOfStream());
+    CHECK(b.ByteOffset() == sizeof(bits));
+    CHECK(b.BitOffset() == sizeof(bits) * 8);
+  }
+
+  SECTION("Check blob fetch")
+  {
+    // size = 16 bytes for encoded data and first blob, 70 bytes for second blob, 2 bytes trailing
+    // padding
+    byte bits[16 + 70 + 2] = {
+        // first vbr_6 length
+        0x06,
+        // padding for alignment
+        0x00, 0x00, 0x00,
+
+        // blob data
+        0xF5, 0x00, 0xCA, 0x40, 0x99, 0x23,
+
+        // padding for trailing alignment
+        0x00, 0x00,
+
+        // i_20 dummy to get us to the point where two vbr_6 chunks would be aligned
+        // we choose a length of 70, which is 0b10 00110, then vbr_6 encoded it becomes
+        // 0b000010 100110 which is 0xA6, over 12 bits. That leaves 4 bits in the upper part of
+        // the last byte of the i_20, and the remaining 8 in the next byte
+        0x5B, 0xC2, 0x64, 0x0A,
+    };
+
+    LLVMBC::BitReader b(bits, sizeof(bits));
+
+    CHECK(!b.AtEndOfStream());
+    CHECK(b.ByteOffset() == 0);
+    CHECK(b.BitOffset() == 0);
+
+    const byte *ptr = NULL;
+    size_t size = 0;
+
+    b.ReadBlob(ptr, size);
+
+    CHECK(size == 6);
+    CHECK(ptr == &bits[4]);
+
+    uint32_t val = b.fixed<uint32_t>(20);
+    CHECK(val == 0x4C25B);
+
+    ptr = NULL;
+    size = 0;
+
+    b.ReadBlob(ptr, size);
+
+    CHECK(size == 70);
+    CHECK(ptr == &bits[16]);
+
+    // should be exactly at the end of the stream
+    CHECK(b.AtEndOfStream());
+    CHECK(b.ByteOffset() == sizeof(bits));
+    CHECK(b.BitOffset() == sizeof(bits) * 8);
+  }
+}
+
+#endif
@@ -0,0 +1,31 @@
+/******************************************************************************
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2019 Baldur Karlsson
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ ******************************************************************************/
+
+#pragma once
+
+#include "llvm_bitreader.h"
+
+namespace LLVMBC
+{
+};    // namespace LLVMBC
@@ -100,12 +100,15 @@
  </ItemDefinitionGroup>
  <ItemGroup>
    <ClCompile Include="dxil_bytecode.cpp" />
+    <ClCompile Include="llvm_decoder.cpp" />
    <ClCompile Include="precompiled.cpp">
      <PrecompiledHeader>Create</PrecompiledHeader>
    </ClCompile>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="dxil_bytecode.h" />
+    <ClInclude Include="llvm_bitreader.h" />
+    <ClInclude Include="llvm_decoder.h" />
    <ClInclude Include="precompiled.h" />
  </ItemGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
@@ -5,12 +5,15 @@
    <ClCompile Include="precompiled.cpp">
      <Filter>PCH</Filter>
    </ClCompile>
+    <ClCompile Include="llvm_decoder.cpp" />
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="dxil_inspect.h" />
    <ClInclude Include="precompiled.h">
      <Filter>PCH</Filter>
    </ClInclude>
+    <ClInclude Include="llvm_bitreader.h" />
+    <ClInclude Include="llvm_decoder.h" />
  </ItemGroup>
  <ItemGroup>
    <Filter Include="PCH">