Track bytes written for hidden SO counters. Closes #2662

* The D3D11 spec is unclear but checking sources including D3D11On12 it looks
  like the defined behaviour for SO counters is to calculate the number of bytes
  written and divide that by the VB stride on draw.
* Old captures can't be updated to work with this because the stride is unknown,
  but new captures will work correctly as well as any data that is stream'd out
  mid-capture.
This commit is contained in:
baldurk
2022-07-22 16:09:46 +01:00
parent aafd0ec788
commit 0b187934de
12 changed files with 373 additions and 70 deletions
+4
View File
@@ -524,6 +524,10 @@ bool D3D11InitParams::IsSupportedVersion(uint64_t ver)
if(ver == 0x11)
return true;
// 0x12 -> 0x13 - added stride from stream-out to hidden counter data
if(ver == 0x12)
return true;
return false;
}
+34 -18
View File
@@ -213,11 +213,6 @@ WrappedID3D11DeviceContext::~WrappedID3D11DeviceContext()
if(m_pRealContext && GetType() != D3D11_DEVICE_CONTEXT_IMMEDIATE)
m_pDevice->RemoveDeferredContext(this);
for(auto it = m_StreamOutCounters.begin(); it != m_StreamOutCounters.end(); ++it)
{
SAFE_RELEASE(it->second.query);
}
SAFE_DELETE(m_FrameReader);
SAFE_RELEASE(m_WrappedVideo.m_pReal);
@@ -266,6 +261,7 @@ struct HiddenCounter
{
ResourceId id;
uint64_t counterValue;
uint32_t stride;
};
DECLARE_REFLECTION_STRUCT(HiddenCounter);
@@ -275,6 +271,14 @@ void DoSerialise(SerialiserType &ser, HiddenCounter &el)
{
SERIALISE_MEMBER(id);
SERIALISE_MEMBER(counterValue);
if(ser.VersionAtLeast(0x13))
{
SERIALISE_MEMBER(stride);
}
else
{
el.stride = 0;
}
}
template <typename SerialiserType>
@@ -316,12 +320,12 @@ bool WrappedID3D11DeviceContext::Serialise_BeginCaptureFrame(SerialiserType &ser
if(buf)
{
ResourceId id = GetIDForDeviceChild(buf);
StreamOutData &so = m_pDevice->GetSOHiddenCounterForBuffer(GetIDForDeviceChild(buf));
if(m_StreamOutCounters[id].running)
if(so.running)
{
m_pRealContext->End(m_StreamOutCounters[id].query);
m_StreamOutCounters[id].running = false;
m_pRealContext->End(so.query);
so.running = false;
}
restart[b] = true;
@@ -331,7 +335,8 @@ bool WrappedID3D11DeviceContext::Serialise_BeginCaptureFrame(SerialiserType &ser
D3D11_QUERY_DATA_SO_STATISTICS numPrims;
// readback all known counters
for(auto it = m_StreamOutCounters.begin(); it != m_StreamOutCounters.end(); ++it)
for(auto it = m_pDevice->GetSOHiddenCounters().begin();
it != m_pDevice->GetSOHiddenCounters().end(); ++it)
{
RDCEraseEl(numPrims);
@@ -350,7 +355,12 @@ bool WrappedID3D11DeviceContext::Serialise_BeginCaptureFrame(SerialiserType &ser
ToStr(it->first).c_str());
}
HiddenStreamOutCounters.push_back({it->first, (uint64_t)numPrims.NumPrimitivesWritten});
HiddenCounter h;
h.id = it->first;
h.counterValue = (uint64_t)numPrims.NumPrimitivesWritten;
h.stride = it->second.stride;
HiddenStreamOutCounters.push_back(h);
}
// restart any counters we were forced to stop
@@ -360,10 +370,10 @@ bool WrappedID3D11DeviceContext::Serialise_BeginCaptureFrame(SerialiserType &ser
if(buf && restart[b])
{
ResourceId id = GetIDForDeviceChild(buf);
StreamOutData &so = m_pDevice->GetSOHiddenCounterForBuffer(GetIDForDeviceChild(buf));
// release any previous query as the hidden counter is overwritten
SAFE_RELEASE(m_StreamOutCounters[id].query);
SAFE_RELEASE(so.query);
D3D11_QUERY queryTypes[] = {
D3D11_QUERY_SO_STATISTICS_STREAM0, D3D11_QUERY_SO_STATISTICS_STREAM1,
@@ -374,10 +384,12 @@ bool WrappedID3D11DeviceContext::Serialise_BeginCaptureFrame(SerialiserType &ser
qdesc.MiscFlags = 0;
qdesc.Query = queryTypes[b];
m_pDevice->GetReal()->CreateQuery(&qdesc, &m_StreamOutCounters[id].query);
m_pDevice->GetReal()->CreateQuery(&qdesc, &so.query);
m_pRealContext->Begin(m_StreamOutCounters[id].query);
m_StreamOutCounters[id].running = true;
m_pRealContext->Begin(so.query);
so.running = true;
// stride doesn't change as the shader hasn't changed
}
}
}
@@ -406,8 +418,12 @@ bool WrappedID3D11DeviceContext::Serialise_BeginCaptureFrame(SerialiserType &ser
for(const HiddenCounter &c : HiddenStreamOutCounters)
{
if(m_pDevice->GetResourceManager()->HasLiveResource(c.id))
m_StreamOutCounters[m_pDevice->GetResourceManager()->GetLiveID(c.id)].numPrims =
c.counterValue;
{
StreamOutData &so =
m_pDevice->GetSOHiddenCounterForBuffer(m_pDevice->GetResourceManager()->GetLiveID(c.id));
so.numPrims = c.counterValue;
so.stride = c.stride;
}
}
}
+2 -11
View File
@@ -125,17 +125,7 @@ private:
std::set<ResourceId> m_HighTrafficResources;
std::map<MappedResource, MapIntercept> m_OpenMaps;
struct StreamOutData
{
StreamOutData() : query(NULL), running(false), numPrims(0) {}
ID3D11Query *query;
bool running;
uint64_t numPrims;
};
std::map<ResourceId, StreamOutData> m_StreamOutCounters;
std::map<ResourceId, rdcarray<EventUsage> > m_ResourceUses;
std::map<ResourceId, rdcarray<EventUsage>> m_ResourceUses;
WrappedID3D11Device *m_pDevice;
ID3D11DeviceContext *m_pRealContext;
@@ -228,6 +218,7 @@ private:
void Serialise_DebugMessages(SerialiserType &ser);
void DrainAnnotationQueue();
void LatchSOProperties();
void AddUsage(const ActionDescription &a);
+132 -39
View File
@@ -2209,12 +2209,14 @@ bool WrappedID3D11DeviceContext::Serialise_SOSetTargets(SerialiserType &ser, UIN
if(buf)
{
StreamOutData &so = m_pDevice->GetSOHiddenCounterForBuffer(GetIDForDeviceChild(buf));
ResourceId id = GetIDForDeviceChild(buf);
if(m_StreamOutCounters[id].running)
if(so.running)
{
m_pRealContext->End(m_StreamOutCounters[id].query);
m_StreamOutCounters[id].running = false;
m_pRealContext->End(so.query);
so.running = false;
}
}
}
@@ -2226,10 +2228,10 @@ bool WrappedID3D11DeviceContext::Serialise_SOSetTargets(SerialiserType &ser, UIN
if(buf)
{
ResourceId id = GetIDForDeviceChild(buf);
StreamOutData &so = m_pDevice->GetSOHiddenCounterForBuffer(GetIDForDeviceChild(buf));
// release any previous query as the hidden counter is overwritten
SAFE_RELEASE(m_StreamOutCounters[id].query);
SAFE_RELEASE(so.query);
D3D11_QUERY queryTypes[] = {
D3D11_QUERY_SO_STATISTICS_STREAM0, D3D11_QUERY_SO_STATISTICS_STREAM1,
@@ -2240,13 +2242,18 @@ bool WrappedID3D11DeviceContext::Serialise_SOSetTargets(SerialiserType &ser, UIN
qdesc.MiscFlags = 0;
qdesc.Query = queryTypes[b];
HRESULT hr = m_pDevice->GetReal()->CreateQuery(&qdesc, &m_StreamOutCounters[id].query);
HRESULT hr = m_pDevice->GetReal()->CreateQuery(&qdesc, &so.query);
if(FAILED(hr))
RDCERR("Couldn't create streamout query: %s", ToStr(hr).c_str());
m_pRealContext->Begin(m_StreamOutCounters[id].query);
m_StreamOutCounters[id].running = true;
m_pRealContext->Begin(so.query);
so.running = true;
// since we don't know the binding order (SO targets before GS, or GS before SO targets)
// we'll set this to 0 now and latch it at draw time. We assume these don't change over the
// course of the stream out.
so.stride = 0;
}
}
@@ -2321,12 +2328,12 @@ void WrappedID3D11DeviceContext::SOSetTargets(UINT NumBuffers, ID3D11Buffer *con
if(buf)
{
ResourceId id = GetIDForDeviceChild(buf);
StreamOutData &so = m_pDevice->GetSOHiddenCounterForBuffer(GetIDForDeviceChild(buf));
if(m_StreamOutCounters[id].running)
if(so.running)
{
m_pRealContext->End(m_StreamOutCounters[id].query);
m_StreamOutCounters[id].running = false;
m_pRealContext->End(so.query);
so.running = false;
}
}
}
@@ -2338,10 +2345,10 @@ void WrappedID3D11DeviceContext::SOSetTargets(UINT NumBuffers, ID3D11Buffer *con
if(buf)
{
ResourceId id = GetIDForDeviceChild(buf);
StreamOutData &so = m_pDevice->GetSOHiddenCounterForBuffer(GetIDForDeviceChild(buf));
// release any previous query as the hidden counter is overwritten
SAFE_RELEASE(m_StreamOutCounters[id].query);
SAFE_RELEASE(so.query);
D3D11_QUERY queryTypes[] = {
D3D11_QUERY_SO_STATISTICS_STREAM0, D3D11_QUERY_SO_STATISTICS_STREAM1,
@@ -2352,10 +2359,15 @@ void WrappedID3D11DeviceContext::SOSetTargets(UINT NumBuffers, ID3D11Buffer *con
qdesc.MiscFlags = 0;
qdesc.Query = queryTypes[b];
m_pDevice->GetReal()->CreateQuery(&qdesc, &m_StreamOutCounters[id].query);
m_pDevice->GetReal()->CreateQuery(&qdesc, &so.query);
m_pRealContext->Begin(m_StreamOutCounters[id].query);
m_StreamOutCounters[id].running = true;
m_pRealContext->Begin(so.query);
so.running = true;
// since we don't know the binding order (SO targets before GS, or GS before SO targets)
// we'll set this to 0 now and latch it at draw time. We assume these don't change over the
// course of the stream out.
so.stride = 0;
}
}
@@ -3795,6 +3807,27 @@ void WrappedID3D11DeviceContext::Serialise_DebugMessages(SerialiserType &ser)
}
}
void WrappedID3D11DeviceContext::LatchSOProperties()
{
for(UINT b = 0; b < D3D11_SO_STREAM_COUNT; b++)
{
ID3D11Buffer *buf = m_CurrentPipelineState->SO.Buffers[b];
if(buf)
{
StreamOutData &so = m_pDevice->GetSOHiddenCounterForBuffer(GetIDForDeviceChild(buf));
if(so.running && so.stride == 0)
{
const SOShaderData &shad =
m_pDevice->GetSOShaderData(GetIDForDeviceChild(m_CurrentPipelineState->GS.Object));
so.stride = shad.strides[b];
}
}
}
}
template <typename SerialiserType>
bool WrappedID3D11DeviceContext::Serialise_DrawIndexedInstanced(
SerialiserType &ser, UINT IndexCountPerInstance, UINT InstanceCount, UINT StartIndexLocation,
@@ -3815,6 +3848,8 @@ bool WrappedID3D11DeviceContext::Serialise_DrawIndexedInstanced(
m_pRealContext->DrawIndexedInstanced(IndexCountPerInstance, InstanceCount, StartIndexLocation,
BaseVertexLocation, StartInstanceLocation);
LatchSOProperties();
if(IsLoading(m_State))
{
RecordDrawStats(true, false, InstanceCount);
@@ -3853,6 +3888,8 @@ void WrappedID3D11DeviceContext::DrawIndexedInstanced(UINT IndexCountPerInstance
StartIndexLocation, BaseVertexLocation,
StartInstanceLocation));
LatchSOProperties();
if(IsActiveCapturing(m_State))
{
USE_SCRATCH_SERIALISER();
@@ -3888,6 +3925,8 @@ bool WrappedID3D11DeviceContext::Serialise_DrawInstanced(SerialiserType &ser,
m_pRealContext->DrawInstanced(VertexCountPerInstance, InstanceCount, StartVertexLocation,
StartInstanceLocation);
LatchSOProperties();
if(IsLoading(m_State))
{
RecordDrawStats(true, false, InstanceCount);
@@ -3923,6 +3962,8 @@ void WrappedID3D11DeviceContext::DrawInstanced(UINT VertexCountPerInstance, UINT
SERIALISE_TIME_CALL(m_pRealContext->DrawInstanced(VertexCountPerInstance, InstanceCount,
StartVertexLocation, StartInstanceLocation));
LatchSOProperties();
if(IsActiveCapturing(m_State))
{
USE_SCRATCH_SERIALISER();
@@ -3955,6 +3996,8 @@ bool WrappedID3D11DeviceContext::Serialise_DrawIndexed(SerialiserType &ser, UINT
{
m_pRealContext->DrawIndexed(IndexCount, StartIndexLocation, BaseVertexLocation);
LatchSOProperties();
if(IsLoading(m_State))
{
RecordDrawStats(false, false, 1);
@@ -3988,6 +4031,8 @@ void WrappedID3D11DeviceContext::DrawIndexed(UINT IndexCount, UINT StartIndexLoc
SERIALISE_TIME_CALL(m_pRealContext->DrawIndexed(IndexCount, StartIndexLocation, BaseVertexLocation));
LatchSOProperties();
if(IsActiveCapturing(m_State))
{
USE_SCRATCH_SERIALISER();
@@ -4017,6 +4062,8 @@ bool WrappedID3D11DeviceContext::Serialise_Draw(SerialiserType &ser, UINT Vertex
{
m_pRealContext->Draw(VertexCount, StartVertexLocation);
LatchSOProperties();
if(IsLoading(m_State))
{
RecordDrawStats(false, false, 1);
@@ -4048,6 +4095,8 @@ void WrappedID3D11DeviceContext::Draw(UINT VertexCount, UINT StartVertexLocation
SERIALISE_TIME_CALL(m_pRealContext->Draw(VertexCount, StartVertexLocation));
LatchSOProperties();
if(IsActiveCapturing(m_State))
{
USE_SCRATCH_SERIALISER();
@@ -4069,7 +4118,7 @@ bool WrappedID3D11DeviceContext::Serialise_DrawAuto(SerialiserType &ser)
SERIALISE_CHECK_READ_ERRORS();
uint64_t numVerts = 0;
uint64_t numVertsToDraw = 0;
if(IsReplayingAndReading())
{
@@ -4084,7 +4133,7 @@ bool WrappedID3D11DeviceContext::Serialise_DrawAuto(SerialiserType &ser)
{
ResourceId id = GetIDForDeviceChild(m_CurrentPipelineState->IA.VBs[0]);
StreamOutData &data = m_StreamOutCounters[id];
StreamOutData &data = m_pDevice->GetSOHiddenCounterForBuffer(id);
// if we have a query, the stream-out data for this DrawAuto was generated
// in the captured frame, so we can do a legitimate DrawAuto()
@@ -4103,27 +4152,61 @@ bool WrappedID3D11DeviceContext::Serialise_DrawAuto(SerialiserType &ser)
sizeof(D3D11_QUERY_DATA_SO_STATISTICS), 0);
} while(hr == S_FALSE);
if(m_CurrentPipelineState->IA.Topo == D3D11_PRIMITIVE_TOPOLOGY_POINTLIST)
numVerts = numPrims.NumPrimitivesWritten;
else if(m_CurrentPipelineState->IA.Topo == D3D11_PRIMITIVE_TOPOLOGY_LINELIST)
numVerts = numPrims.NumPrimitivesWritten * 2;
if(data.stride != 0)
{
uint64_t bytesWritten = numPrims.NumPrimitivesWritten * data.stride;
numVertsToDraw = uint32_t((bytesWritten - m_CurrentPipelineState->IA.Offsets[0]) /
m_CurrentPipelineState->IA.Strides[0]);
}
else
numVerts = numPrims.NumPrimitivesWritten * 3;
{
RDCERR("Unexpected 0 stride on DrawAuto, no SO shader bound properly?");
// fallback to the mostly-accurate estimate
if(m_CurrentPipelineState->IA.Topo == D3D11_PRIMITIVE_TOPOLOGY_POINTLIST)
numVertsToDraw = numPrims.NumPrimitivesWritten;
else if(m_CurrentPipelineState->IA.Topo == D3D11_PRIMITIVE_TOPOLOGY_LINELIST)
numVertsToDraw = numPrims.NumPrimitivesWritten * 2;
else
numVertsToDraw = numPrims.NumPrimitivesWritten * 3;
}
m_pRealContext->DrawAuto();
}
else
{
// otherwise use the cached value from the previous frame.
// in older captures we only stored the number of primitives, so use the old behaviour of
// taking the current topology and assuming it's the same, so behaviour doesn't change.
// newer captures store enough information that we can do a proper byte-wise calculation
if(m_CurrentPipelineState->IA.Topo == D3D11_PRIMITIVE_TOPOLOGY_POINTLIST)
numVerts = data.numPrims;
else if(m_CurrentPipelineState->IA.Topo == D3D11_PRIMITIVE_TOPOLOGY_LINELIST)
numVerts = data.numPrims * 2;
if(data.stride != 0)
{
uint64_t bytesWritten = data.numPrims * data.stride;
numVertsToDraw = uint32_t((bytesWritten - m_CurrentPipelineState->IA.Offsets[0]) /
m_CurrentPipelineState->IA.Strides[0]);
}
else
numVerts = data.numPrims * 3;
{
m_pDevice->AddDebugMessage(MessageCategory::Execution, MessageSeverity::High,
MessageSource::IncorrectAPIUse,
"Call to DrawAuto may be inaccurate if topology or vertex "
"stride has changed between stream-out and draw.\n"
"Recapture with this version of RenderDoc to fix this "
"problem, this capture was created with an older version.");
m_pRealContext->Draw((UINT)numVerts, 0);
if(m_CurrentPipelineState->IA.Topo == D3D11_PRIMITIVE_TOPOLOGY_POINTLIST)
numVertsToDraw = data.numPrims;
else if(m_CurrentPipelineState->IA.Topo == D3D11_PRIMITIVE_TOPOLOGY_LINELIST)
numVertsToDraw = data.numPrims * 2;
else
numVertsToDraw = data.numPrims * 3;
}
m_pRealContext->Draw((UINT)numVertsToDraw, 0);
}
}
@@ -4134,9 +4217,9 @@ bool WrappedID3D11DeviceContext::Serialise_DrawAuto(SerialiserType &ser)
AddEvent();
ActionDescription action;
action.customName = StringFormat::Fmt("DrawAuto(<%u>)", numVerts);
action.customName = StringFormat::Fmt("DrawAuto(<%u>)", numVertsToDraw);
action.flags |= ActionFlags::Drawcall | ActionFlags::Auto;
action.numIndices = (uint32_t)numVerts;
action.numIndices = (uint32_t)numVertsToDraw;
action.vertexOffset = 0;
action.indexOffset = 0;
action.instanceOffset = 0;
@@ -4195,6 +4278,8 @@ bool WrappedID3D11DeviceContext::Serialise_DrawIndexedInstancedIndirect(Serialis
AlignedByteOffsetForArgs);
}
LatchSOProperties();
if(IsLoading(m_State))
{
AddEvent();
@@ -4289,6 +4374,8 @@ void WrappedID3D11DeviceContext::DrawIndexedInstancedIndirect(ID3D11Buffer *pBuf
SERIALISE_TIME_CALL(m_pRealContext->DrawIndexedInstancedIndirect(
UNWRAP(WrappedID3D11Buffer, pBufferForArgs), AlignedByteOffsetForArgs));
LatchSOProperties();
if(IsActiveCapturing(m_State))
{
USE_SCRATCH_SERIALISER();
@@ -4326,6 +4413,8 @@ bool WrappedID3D11DeviceContext::Serialise_DrawInstancedIndirect(SerialiserType
AlignedByteOffsetForArgs);
}
LatchSOProperties();
if(IsLoading(m_State))
{
AddEvent();
@@ -4417,6 +4506,8 @@ void WrappedID3D11DeviceContext::DrawInstancedIndirect(ID3D11Buffer *pBufferForA
SERIALISE_TIME_CALL(m_pRealContext->DrawInstancedIndirect(
UNWRAP(WrappedID3D11Buffer, pBufferForArgs), AlignedByteOffsetForArgs));
LatchSOProperties();
if(IsActiveCapturing(m_State))
{
USE_SCRATCH_SERIALISER();
@@ -6455,12 +6546,12 @@ bool WrappedID3D11DeviceContext::Serialise_ClearState(SerialiserType &ser)
if(buf)
{
ResourceId id = GetIDForDeviceChild(buf);
StreamOutData &so = m_pDevice->GetSOHiddenCounterForBuffer(GetIDForDeviceChild(buf));
if(m_StreamOutCounters[id].running)
if(so.running)
{
m_pRealContext->End(m_StreamOutCounters[id].query);
m_StreamOutCounters[id].running = false;
m_pRealContext->End(so.query);
so.running = false;
}
}
}
@@ -6499,12 +6590,14 @@ void WrappedID3D11DeviceContext::ClearState()
if(buf)
{
StreamOutData &so = m_pDevice->GetSOHiddenCounterForBuffer(GetIDForDeviceChild(buf));
ResourceId id = GetIDForDeviceChild(buf);
if(m_StreamOutCounters[id].running)
if(so.running)
{
m_pRealContext->End(m_StreamOutCounters[id].query);
m_StreamOutCounters[id].running = false;
m_pRealContext->End(so.query);
so.running = false;
}
}
}
+3
View File
@@ -265,6 +265,9 @@ WrappedID3D11Device::~WrappedID3D11Device()
RenderDoc::Inst().RemoveDeviceFrameCapturer((ID3D11Device *)this);
for(auto it = m_StreamOutCounters.begin(); it != m_StreamOutCounters.end(); ++it)
SAFE_RELEASE(it->second.query);
for(auto it = m_CachedStateObjects.begin(); it != m_CachedStateObjects.end(); ++it)
if(*it)
IntRelease(*it);
+20 -1
View File
@@ -46,6 +46,19 @@ class D3D11Replay;
#define D3D11_1_UAV_SLOT_COUNT 64
#endif
struct StreamOutData
{
ID3D11Query *query = NULL;
bool running = false;
uint64_t numPrims = 0;
uint32_t stride = 0;
};
struct SOShaderData
{
uint32_t strides[4] = {};
};
enum TextureDisplayType
{
TEXDISPLAY_UNKNOWN = 0,
@@ -68,7 +81,7 @@ struct D3D11InitParams
uint32_t VendorUAV = ~0U;
// check if a frame capture section version is supported
static const uint64_t CurrentVersion = 0x12;
static const uint64_t CurrentVersion = 0x13;
static bool IsSupportedVersion(uint64_t ver);
};
@@ -585,6 +598,9 @@ private:
std::map<ID3D11InputLayout *, rdcarray<D3D11_INPUT_ELEMENT_DESC> > m_LayoutDescs;
std::map<ID3D11InputLayout *, WrappedShader *> m_LayoutShaders;
std::map<ResourceId, StreamOutData> m_StreamOutCounters;
std::map<ResourceId, SOShaderData> m_SOShaders;
static WrappedID3D11Device *m_pCurrentWrappedDevice;
std::map<IDXGISwapper *, ID3D11RenderTargetView *> m_SwapChains;
@@ -652,6 +668,9 @@ public:
void RemoveDeferredContext(WrappedID3D11DeviceContext *defctx);
WrappedID3D11DeviceContext *GetDeferredContext(size_t idx);
const std::map<ResourceId, StreamOutData> &GetSOHiddenCounters() { return m_StreamOutCounters; }
StreamOutData &GetSOHiddenCounterForBuffer(ResourceId id) { return m_StreamOutCounters[id]; }
const SOShaderData &GetSOShaderData(ResourceId id) { return m_SOShaders[id]; }
ResourceId GetResourceID() { return m_ResourceID; }
const ActionDescription *GetAction(uint32_t eventId);
ResourceDescription &GetResourceDesc(ResourceId id);
@@ -1724,6 +1724,40 @@ bool WrappedID3D11Device::Serialise_CreateGeometryShaderWithStreamOutput(
GetResourceManager()->AddLiveResource(pShader, ret);
}
D3D_PRIMITIVE_TOPOLOGY topo =
DXBC::DXBCContainer::GetOutputTopology(pShaderBytecode, BytecodeLength);
uint32_t vertsPerPrim = 1;
if(topo == D3D_PRIMITIVE_TOPOLOGY_LINELIST)
vertsPerPrim = 1;
else if(topo == D3D_PRIMITIVE_TOPOLOGY_LINELIST)
vertsPerPrim = 2;
else
vertsPerPrim = 3;
SOShaderData &soshader = m_SOShaders[GetIDForDeviceChild(ret)];
for(UINT i = 0; i < NumStrides; i++)
soshader.strides[i] = pBufferStrides[i] * vertsPerPrim;
// Undocumented, but D3D11 auto-calculates tight strides if they are not specified, based on the
// declarations (which are tightly packed)
for(UINT i = NumStrides; i < D3D11_SO_STREAM_COUNT; i++)
{
// count the entries writing to this slot
for(size_t decl = 0; decl < NumEntries; decl++)
{
if(pSODeclaration[decl].OutputSlot == i)
{
// all components are written as 32-bit values
soshader.strides[i] += pSODeclaration[decl].ComponentCount * sizeof(uint32_t);
}
}
// still want the stride per-primitive not per-vertex
soshader.strides[i] *= vertsPerPrim;
}
AddResource(pShader, ResourceType::Shader, "Geometry Shader");
// if this shader was initialised with a shader ext UAV, pull in that chunk as one of ours
// and unset it (there will be one for each create that actually used vendor extensions)
@@ -1768,6 +1802,40 @@ HRESULT WrappedID3D11Device::CreateGeometryShaderWithStreamOutput(
wrapped = new WrappedID3D11Shader<ID3D11GeometryShader>(
real, ResourceId(), (const byte *)pShaderBytecode, BytecodeLength, this);
D3D_PRIMITIVE_TOPOLOGY topo =
DXBC::DXBCContainer::GetOutputTopology(pShaderBytecode, BytecodeLength);
uint32_t vertsPerPrim = 1;
if(topo == D3D_PRIMITIVE_TOPOLOGY_LINELIST)
vertsPerPrim = 1;
else if(topo == D3D_PRIMITIVE_TOPOLOGY_LINELIST)
vertsPerPrim = 2;
else
vertsPerPrim = 3;
SOShaderData &soshader = m_SOShaders[GetIDForDeviceChild(wrapped)];
for(UINT i = 0; i < NumStrides; i++)
soshader.strides[i] = pBufferStrides[i] * vertsPerPrim;
// Undocumented, but D3D11 auto-calculates tight strides if they are not specified, based on the
// declarations (which are tightly packed)
for(UINT i = NumStrides; i < D3D11_SO_STREAM_COUNT; i++)
{
// count the entries writing to this slot
for(size_t decl = 0; decl < NumEntries; decl++)
{
if(pSODeclaration[decl].OutputSlot == i)
{
// all components are written as 32-bit values
soshader.strides[i] += pSODeclaration[decl].ComponentCount * sizeof(uint32_t);
}
}
// still want the stride per-primitive not per-vertex
soshader.strides[i] *= vertsPerPrim;
}
if(IsCaptureMode(m_State))
{
Chunk *vendorChunk = NULL;
@@ -589,6 +589,47 @@ D3D_PRIMITIVE_TOPOLOGY Program::GetOutputTopology()
return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
}
D3D_PRIMITIVE_TOPOLOGY Program::GetOutputTopology(const byte *bytes, size_t length)
{
uint32_t *begin = (uint32_t *)bytes;
uint32_t *cur = begin;
uint32_t *end = begin + (length / sizeof(uint32_t));
// skip version and length
cur += 2;
while(cur < end)
{
uint32_t OpcodeToken0 = cur[0];
OpcodeType op = Opcode::Type.Get(OpcodeToken0);
// nvidia is a structured buffer with counter
// AMD is a RW byte address buffer
if(op == OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY)
{
uint32_t *tokenStream = cur;
// skip opcode and length
tokenStream++;
return Decl::OutputPrimitiveTopology.Get(tokenStream[0]);
}
if(op == OPCODE_CUSTOMDATA)
{
// length in opcode token is 0, full length is in second dword
cur += cur[1];
}
else
{
cur += Opcode::Length.Get(OpcodeToken0);
}
}
return D3D_PRIMITIVE_TOPOLOGY_UNDEFINED;
}
void Program::SetupRegisterFile(rdcarray<ShaderVariable> &registers) const
{
size_t numRegisters = m_NumTemps + m_IndexTempSizes.size() + m_NumOutputs;
@@ -1235,6 +1235,7 @@ public:
rdcstr GetRegisterName(OperandType oper, uint32_t index) const;
static bool UsesExtensionUAV(uint32_t slot, uint32_t space, const byte *bytes, size_t length);
static D3D_PRIMITIVE_TOPOLOGY GetOutputTopology(const byte *bytes, size_t length);
protected:
friend class Program;
@@ -458,6 +458,34 @@ D3D_PRIMITIVE_TOPOLOGY DXBCContainer::GetOutputTopology()
return m_OutputTopology;
}
D3D_PRIMITIVE_TOPOLOGY DXBCContainer::GetOutputTopology(const void *ByteCode, size_t ByteCodeLength)
{
const FileHeader *header = (const FileHeader *)ByteCode;
const byte *data = (const byte *)ByteCode; // just for convenience
if(header->fourcc != FOURCC_DXBC)
return D3D_PRIMITIVE_TOPOLOGY_UNDEFINED;
if(header->fileLength != (uint32_t)ByteCodeLength)
return D3D_PRIMITIVE_TOPOLOGY_UNDEFINED;
const uint32_t *chunkOffsets = (const uint32_t *)(header + 1); // right after the header
for(uint32_t chunkIdx = 0; chunkIdx < header->numChunks; chunkIdx++)
{
const uint32_t *fourcc = (const uint32_t *)(data + chunkOffsets[chunkIdx]);
const uint32_t *chunkSize = (const uint32_t *)(fourcc + 1);
const byte *chunkContents = (const byte *)(chunkSize + 1);
if(*fourcc == FOURCC_SHEX || *fourcc == FOURCC_SHDR)
return DXBCBytecode::Program::GetOutputTopology(chunkContents, *chunkSize);
}
return D3D_PRIMITIVE_TOPOLOGY_UNDEFINED;
}
const rdcstr &DXBCContainer::GetDisassembly()
{
if(m_Disassembly.empty())
@@ -220,6 +220,7 @@ public:
static bool CheckForDebugInfo(const void *ByteCode, size_t ByteCodeLength);
static bool CheckForDXIL(const void *ByteCode, size_t ByteCodeLength);
static rdcstr GetDebugBinaryPath(const void *ByteCode, size_t ByteCodeLength);
static D3D_PRIMITIVE_TOPOLOGY GetOutputTopology(const void *ByteCode, size_t ByteCodeLength);
private:
void TryFetchSeparateDebugInfo(bytebuf &byteCode, const rdcstr &debugInfoPath);
+39 -1
View File
@@ -73,8 +73,9 @@ RD_TEST(D3D11_Stream_Out, D3D11GraphicsTest)
ID3D11BufferPtr vb = MakeBuffer().Vertex().Data(DefaultTri);
ID3D11BufferPtr so[2] = {
ID3D11BufferPtr so[3] = {
MakeBuffer().StreamOut().Vertex().Size(2048), MakeBuffer().StreamOut().Vertex().Size(2048),
MakeBuffer().StreamOut().Vertex().Size(2048),
};
D3D11_INPUT_ELEMENT_DESC layoutdesc[] = {
@@ -93,6 +94,29 @@ RD_TEST(D3D11_Stream_Out, D3D11GraphicsTest)
CHECK_HR(dev->CreateInputLayout(layoutdesc, ARRAY_COUNT(layoutdesc), vsblob->GetBufferPointer(),
vsblob->GetBufferSize(), &streamoutLayout));
// pre fill buffer 2 with pre-frame data
{
ctx->ClearState();
IASetVertexBuffer(vb, sizeof(DefaultA2V), 0);
ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
ctx->IASetInputLayout(defaultLayout);
ctx->VSSetShader(vs, NULL, 0);
ctx->GSSetShader(gs, NULL, 0);
ctx->PSSetShader(ps, NULL, 0);
RSSetViewport({0.0f, 0.0f, (float)screenWidth, (float)screenHeight, 0.0f, 1.0f});
ctx->OMSetRenderTargets(1, &bbRTV.GetInterfacePtr(), NULL);
ID3D11Buffer *bufs[] = {so[2], so[1]};
UINT offs[2] = {0};
ctx->SOSetTargets(2, bufs, offs);
ctx->Draw(3, 0);
}
while(Running())
{
ctx->ClearState();
@@ -156,7 +180,9 @@ RD_TEST(D3D11_Stream_Out, D3D11GraphicsTest)
ctx->IASetVertexBuffers(0, 2, bufs, &strides[0], offs);
ctx->IASetInputLayout(streamoutLayout);
ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_POINTLIST);
ctx->DrawAuto();
ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
RSSetViewport({0.0f, 0.0f, (float)screenWidth, (float)screenHeight, 0.0f, 1.0f});
@@ -203,8 +229,20 @@ RD_TEST(D3D11_Stream_Out, D3D11GraphicsTest)
ctx->IASetInputLayout(streamoutLayout);
ctx->DrawAuto();
ctx->SOSetTargets(0, NULL, NULL);
RSSetViewport({(screenWidth * 3.0f) / 4.0f, 0.0f, (float)screenWidth / 4.0f,
(float)screenHeight / 4.0f, 0.0f, 1.0f});
bufs[0] = so[2];
ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_POINTLIST);
ctx->IASetVertexBuffers(0, 2, bufs, &strides[0], offs);
ctx->DrawAuto();
// leave stream-out buffers bound at the end of the frame
ctx->ClearState();
bufs[0] = so[1];
bufs[1] = so[0];
ctx->SOSetTargets(2, bufs, offs);
Present();