Remap entire index buffer for post-vs data, instead of just shifting

* Previously for post VS data we could maintain the index buffer by just
  doing the stream-out/transform feedback for a point list with each
  unique index. To maintain the index buffer we padded out any gaps in
  the indices with a single value, so that we could just shift the
  indices. This causes severe problems though if the indices start at 0
  and contain an invalid value like 0xcccccccc - we'd allocate a huge
  array and perform a massively wasteful streamout.
* Instead, we just stream out on the tightly packed list of unique
  indices, then remap each 'real' index to where it is in the tightly
  packed output buffer.
This commit is contained in:
baldurk
2015-03-10 21:05:59 +00:00
parent 0677b102ee
commit c3e381a76a
3 changed files with 60 additions and 65 deletions
+15 -19
View File
@@ -3940,23 +3940,19 @@ void D3D11DebugManager::InitPostVSBuffers(uint32_t frameID, uint32_t eventID)
// data with padding. Instead we rebase the indices based on the smallest vertex so it becomes
// 0, 1, 2, 1, 3, 2 and then that matches our stream-out'd buffer.
//
// Since we want the indices to be preserved in order to easily match up inputs to outputs,
// but shifted, fill in gaps in our streamout vertex buffer with the lowest index value.
// (use the lowest index value so that even the gaps are a 'valid' vertex, rather than
// potentially garbage data).
uint32_t minindex = indices.empty() ? 0 : indices[0];
// Note that there could also be gaps, like: 500, 501, 502, 510, 511, 512
// which would become 0, 1, 2, 3, 4, 5 and so the old index buffer would no longer be valid.
// We just stream-out a tightly packed list of unique indices, and then remap the index buffer
// so that what did point to 500 points to 0 (accounting for rebasing), and what did point
// to 510 now points to 3 (accounting for the unique sort).
// indices[] contains ascending unique vertex indices referenced. Fill gaps with minindex
for(size_t i=1; i < indices.size(); i++)
// we use a map here since the indices may be sparse. Especially considering if an index
// is 'invalid' like 0xcccccccc then we don't want an array of 3.4 billion entries.
map<uint32_t,size_t> indexRemap;
for(size_t i=0; i < indices.size(); i++)
{
if(indices[i]-1 > indices[i-1])
{
size_t gapsize = size_t( (indices[i]-1) - indices[i-1] );
indices.insert(indices.begin()+i, gapsize, minindex);
i += gapsize;
}
// by definition, this index will only appear once in indices[]
indexRemap[ indices[i] ] = i;
}
D3D11_BUFFER_DESC desc = { UINT(sizeof(uint32_t)*indices.size()), D3D11_USAGE_IMMUTABLE, D3D11_BIND_INDEX_BUFFER, 0, 0, 0 };
@@ -3979,17 +3975,17 @@ void D3D11DebugManager::InitPostVSBuffers(uint32_t frameID, uint32_t eventID)
m_pImmediateContext->IASetPrimitiveTopology(topo);
m_pImmediateContext->IASetIndexBuffer(UNWRAP(WrappedID3D11Buffer, origBuf), idxFmt, idxOffs);
// rebase existing index buffer to point from 0 onwards (which will index into our
// stream-out'd vertex buffer)
// rebase existing index buffer to point to the right elements in our stream-out'd
// vertex buffer
if(index16)
{
for(uint32_t i=0; i < numIndices; i++)
idx16[i] -= uint16_t(minindex&0xffff);
idx16[i] = uint16_t(indexRemap[ idx16[i] ]);
}
else
{
for(uint32_t i=0; i < numIndices; i++)
idx32[i] -= minindex;
idx32[i] = uint32_t(indexRemap[ idx32[i] ]);
}
desc.ByteWidth = (UINT)idxdata.size();
+16 -20
View File
@@ -2307,23 +2307,19 @@ void GLReplay::InitPostVSBuffers(uint32_t frameID, uint32_t eventID)
// data with padding. Instead we rebase the indices based on the smallest vertex so it becomes
// 0, 1, 2, 1, 3, 2 and then that matches our stream-out'd buffer.
//
// Since we want the indices to be preserved in order to easily match up inputs to outputs,
// but shifted, fill in gaps in our streamout vertex buffer with the lowest index value.
// (use the lowest index value so that even the gaps are a 'valid' vertex, rather than
// potentially garbage data).
uint32_t minindex = indices.empty() ? 0 : indices[0];
// Note that there could also be gaps, like: 500, 501, 502, 510, 511, 512
// which would become 0, 1, 2, 3, 4, 5 and so the old index buffer would no longer be valid.
// We just stream-out a tightly packed list of unique indices, and then remap the index buffer
// so that what did point to 500 points to 0 (accounting for rebasing), and what did point
// to 510 now points to 3 (accounting for the unique sort).
// indices[] contains ascending unique vertex indices referenced. Fill gaps with minindex
for(size_t i=1; i < indices.size(); i++)
// we use a map here since the indices may be sparse. Especially considering if an index
// is 'invalid' like 0xcccccccc then we don't want an array of 3.4 billion entries.
map<uint32_t,size_t> indexRemap;
for(size_t i=0; i < indices.size(); i++)
{
if(indices[i]-1 > indices[i-1])
{
size_t gapsize = size_t( (indices[i]-1) - indices[i-1] );
indices.insert(indices.begin()+i, gapsize, minindex);
i += gapsize;
}
// by definition, this index will only appear once in indices[]
indexRemap[ indices[i] ] = i;
}
// generate a temporary index buffer with our 'unique index set' indices,
@@ -2352,22 +2348,22 @@ void GLReplay::InitPostVSBuffers(uint32_t frameID, uint32_t eventID)
if(drawcall->indexByteWidth == 1)
{
for(uint32_t i=0; i < numIndices; i++)
idx8[i] -= uint8_t(minindex&0xff);
idx8[i] = uint8_t(indexRemap[ idx8[i] ]);
}
else if(drawcall->indexByteWidth == 2)
{
for(uint32_t i=0; i < numIndices; i++)
idx16[i] -= uint16_t(minindex&0xffff);
idx16[i] = uint16_t(indexRemap[ idx16[i] ]);
}
else
{
for(uint32_t i=0; i < numIndices; i++)
idx32[i] -= minindex;
idx32[i] = uint32_t(indexRemap[ idx32[i] ]);
}
// make the index buffer that can be used to render this postvs data - the original
// indices, rebased with minindex being 0 (since we transform feedback to the start
// of our feedback buffer).
// indices, repointed (since we transform feedback to the start of our feedback
// buffer and only tightly packed unique indices).
if(!idxdata.empty())
{
gl.glGenBuffers(1, &idxBuf);
+29 -26
View File
@@ -85,14 +85,14 @@ namespace renderdocui.Windows
private class Dataset
{
public uint IndexCount = 0;
public uint IndexAdd = 0;
public MeshFormat PostVS;
public PrimitiveTopology Topology = PrimitiveTopology.Unknown;
public byte[][] Buffers = null;
public uint[] Indices = null;
public uint[] Indices = null; // 'displayed' indices from index buffer
public uint[] DataIndices = null; // where to find the data, different only for PostVS
}
// we generate a UIState object with everything needed to populate the actual
@@ -848,7 +848,6 @@ namespace renderdocui.Windows
Dataset ret = new Dataset();
ret.IndexCount = 0;
ret.IndexAdd = 0;
if (input == null)
return ret;
@@ -865,6 +864,7 @@ namespace renderdocui.Windows
ret.Buffers[0] = r.GetTextureData(input.Buffers[1], 0, 0);
ret.Indices = null;
ret.DataIndices = null;
ret.IndexCount = (uint)ret.Buffers[0].Length / input.Strides[0];
}
@@ -904,6 +904,7 @@ namespace renderdocui.Windows
}
ret.Indices = null;
ret.DataIndices = null;
if (ret.PostVS.buf != ResourceId.Null && type == MeshDataStage.VSOut &&
(input.Drawcall.flags & DrawcallFlags.UseIBuffer) > 0 && input.IndexBuffer != ResourceId.Null)
@@ -939,21 +940,17 @@ namespace renderdocui.Windows
}
}
uint minIndex = ret.Indices.Length > 0 ? ret.Indices[0] : 0;
foreach (var i in ret.Indices)
rawidxs = r.GetBufferData(ret.PostVS.idxbuf, 0, 0);
if (input.Drawcall.indexByteWidth == 0 || rawidxs == null || rawidxs.Length == 0)
{
if (input.Drawcall.indexByteWidth == 2 && i == input.IndexRestartValue && input.IndexRestart)
continue;
if (input.Drawcall.indexByteWidth == 4 && i == input.IndexRestartValue && input.IndexRestart)
continue;
minIndex = Math.Min(minIndex, i);
ret.DataIndices = new uint[0] { };
}
else
{
ret.DataIndices = new uint[rawidxs.Length / sizeof(uint)];
Buffer.BlockCopy(rawidxs, 0, ret.DataIndices, 0, rawidxs.Length);
}
for (int idx = 0; idx < ret.Indices.Length; idx++)
ret.Indices[idx] -= minIndex;
ret.IndexAdd = minIndex;
}
return ret;
@@ -1007,6 +1004,8 @@ namespace renderdocui.Windows
}
}
ret.DataIndices = ret.Indices;
ret.Buffers = new byte[input.Buffers.Length][];
for (int i = 0; i < input.Buffers.Length; i++)
{
@@ -1602,30 +1601,34 @@ namespace renderdocui.Windows
return;
}
uint index = (uint)rowIdx;
uint dataIndex = (uint)rowIdx;
bool outOfBoundsIdx = false;
if (data.Indices != null)
if (data.DataIndices != null)
{
if (rowIdx >= data.Indices.Length)
if (rowIdx >= data.DataIndices.Length)
{
index = 0;
dataIndex = 0;
outOfBoundsIdx = true;
}
else
{
index = data.Indices[rowIdx];
dataIndex = data.DataIndices[rowIdx];
}
}
else if (input.Drawcall != null && (input.Drawcall.flags & DrawcallFlags.UseIBuffer) != 0 &&
(state == m_VSIn || state == m_VSOut))
{
// no index buffer, but indexed drawcall
index = 0;
dataIndex = 0;
outOfBoundsIdx = true;
}
uint displayIndex = dataIndex;
if (data.Indices != null && rowIdx < data.Indices.Length)
displayIndex = data.Indices[rowIdx];
object[] rowdata = null;
int x = 0;
@@ -1637,16 +1640,16 @@ namespace renderdocui.Windows
if (outOfBoundsIdx)
rowdata[1] = "-";
else
rowdata[1] = index + data.IndexAdd;
rowdata[1] = displayIndex;
bool strip = state.m_Data.Topology == PrimitiveTopology.LineStrip ||
state.m_Data.Topology == PrimitiveTopology.LineStrip_Adj ||
state.m_Data.Topology == PrimitiveTopology.TriangleStrip ||
state.m_Data.Topology == PrimitiveTopology.TriangleStrip_Adj;
if (state.m_Input.Drawcall.indexByteWidth == 2 && index == state.m_Input.IndexRestartValue && state.m_Input.IndexRestart && strip)
if (state.m_Input.Drawcall.indexByteWidth == 2 && dataIndex == state.m_Input.IndexRestartValue && state.m_Input.IndexRestart && strip)
rowdata[1] = "-1";
if (state.m_Input.Drawcall.indexByteWidth == 4 && index == state.m_Input.IndexRestartValue && state.m_Input.IndexRestart && strip)
if (state.m_Input.Drawcall.indexByteWidth == 4 && dataIndex == state.m_Input.IndexRestartValue && state.m_Input.IndexRestart && strip)
rowdata[1] = "-1";
x = 2;
@@ -1679,7 +1682,7 @@ namespace renderdocui.Windows
BinaryReader read = state.m_Reader[bufferFormats[el].buffer];
uint offs = input.Strides[bufferFormats[el].buffer] *
(bufferFormats[el].perinstance ? (instance / (uint)bufferFormats[el].instancerate) : index)
(bufferFormats[el].perinstance ? (instance / (uint)bufferFormats[el].instancerate) : dataIndex)
+ bufferFormats[el].offset;
if (!MeshView)