From ce0a67e8f11ad205fdf87d63da1ede80d3fcedef Mon Sep 17 00:00:00 2001 From: baldurk Date: Sun, 9 Nov 2014 16:17:47 +0000 Subject: [PATCH] Fetch and serialise hidden stream-out buffer counters. Closes #105 * Essentially when streamout'ing to a buffer a hidden counter is saved with it that contains the number of primitives written. * With this change we issue queries over each buffer's duration as a stream-out target so we have those counters available when necessary. * At the start of frame capture, we save out the current value of all those queries, and use them if that buffer is used for a DrawAuto() without being streamout'd to again in the captured frame. --- renderdoc/driver/d3d11/d3d11_context.cpp | 109 +++++++++++++ renderdoc/driver/d3d11/d3d11_context.h | 10 ++ renderdoc/driver/d3d11/d3d11_context_wrap.cpp | 150 +++++++++++++++++- renderdoc/driver/d3d11/d3d11_debug.cpp | 8 +- 4 files changed, 270 insertions(+), 7 deletions(-) diff --git a/renderdoc/driver/d3d11/d3d11_context.cpp b/renderdoc/driver/d3d11/d3d11_context.cpp index 0c9285497..3986986d7 100644 --- a/renderdoc/driver/d3d11/d3d11_context.cpp +++ b/renderdoc/driver/d3d11/d3d11_context.cpp @@ -178,6 +178,11 @@ WrappedID3D11DeviceContext::~WrappedID3D11DeviceContext() if(m_pRealContext->GetType() != D3D11_DEVICE_CONTEXT_IMMEDIATE) m_pDevice->RemoveDeferredContext(this); + + for(auto it = m_StreamOutCounters.begin(); it != m_StreamOutCounters.end(); ++it) + { + SAFE_RELEASE(it->second.query); + } if(m_State >= WRITING) { @@ -226,6 +231,110 @@ bool WrappedID3D11DeviceContext::Serialise_BeginCaptureFrame(bool applyInitialSt VerifyState(); } + // stream-out hidden counters need to be saved, in case their results are used + // for a DrawAuto() somewhere. Each buffer used as a stream-out target has a hidden + // counter saved with it that stores the number of primitives written, which is then + // used for a DrawAuto(). If the stream-out happens in frame we don't need to worry, + // but if it references a buffer from before we need to have that counter available + // on replay to 'fake' the DrawAuto() just as a Draw() with known values + if(m_State >= WRITING) + { + // this may break API guarantees, but we need to fetch the hidden counters + // so we need to restart any queries for currently set SO targets. + // Potentially to be more correct we could defer fetching the results of queries + // that are still running until they get detached (as they must be detached + // before being used for any DrawAuto calls - if we're in CAPFRAME we could + // serialise the data then. If they're never detached, we don't need the results) + + bool restart[4] = { false }; + + for(UINT b=0; b < 4; b++) + { + ID3D11Buffer *buf = m_CurrentPipelineState->SO.Buffers[b]; + + if(buf) + { + ResourceId id = GetIDForResource(buf); + + m_pRealContext->End(m_StreamOutCounters[id].query); + m_StreamOutCounters[id].running = false; + + restart[b] = true; + } + } + + D3D11_QUERY_DATA_SO_STATISTICS numPrims; + + // readback all known counters + SERIALISE_ELEMENT(uint32_t, numStreamOutCounters, (uint32_t)m_StreamOutCounters.size()); + for(auto it = m_StreamOutCounters.begin(); it != m_StreamOutCounters.end(); ++it) + { + SERIALISE_ELEMENT(ResourceId, id, it->first); + + RDCEraseEl(numPrims); + + HRESULT hr = S_FALSE; + + do + { + hr = m_pRealContext->GetData(it->second.query, &numPrims, sizeof(D3D11_QUERY_DATA_SO_STATISTICS), 0); + } while(hr == S_FALSE); + + if(hr != S_OK) + { + numPrims.NumPrimitivesWritten = 0; + RDCERR("Couldn't retrieve hidden buffer counter for streamout on buffer %llx", id); + } + + SERIALISE_ELEMENT(uint64_t, hiddenCounter, (uint64_t)numPrims.NumPrimitivesWritten); + } + + // restart any counters we were forced to stop + for(UINT b=0; b < 4; b++) + { + ID3D11Buffer *buf = m_CurrentPipelineState->SO.Buffers[b]; + + if(buf && restart[b]) + { + ResourceId id = GetIDForResource(buf); + + // release any previous query as the hidden counter is overwritten + SAFE_RELEASE(m_StreamOutCounters[id].query); + + D3D11_QUERY queryTypes[] = { + D3D11_QUERY_SO_STATISTICS_STREAM0, + D3D11_QUERY_SO_STATISTICS_STREAM1, + D3D11_QUERY_SO_STATISTICS_STREAM2, + D3D11_QUERY_SO_STATISTICS_STREAM3, + }; + + D3D11_QUERY_DESC qdesc; + qdesc.MiscFlags = 0; + qdesc.Query = queryTypes[b]; + + m_pDevice->GetReal()->CreateQuery(&qdesc, &m_StreamOutCounters[id].query); + + m_pRealContext->Begin(m_StreamOutCounters[id].query); + m_StreamOutCounters[id].running = true; + } + } + } + else + { + // read in the known stream-out counters at the start of the frame. + // any stream-out that happens in the captured frame will be replayed + // and those counters will override this value when it comes to a + // DrawAuto() + SERIALISE_ELEMENT(uint32_t, numStreamOutCounters, 0); + for(uint32_t i=0; i < numStreamOutCounters; i++) + { + SERIALISE_ELEMENT(ResourceId, id, ResourceId()); + SERIALISE_ELEMENT(uint64_t, hiddenCounter, 0); + + m_StreamOutCounters[m_pDevice->GetResourceManager()->GetLiveID(id)].numPrims = hiddenCounter; + } + } + return true; } diff --git a/renderdoc/driver/d3d11/d3d11_context.h b/renderdoc/driver/d3d11/d3d11_context.h index e0d85b5d6..49a6418a4 100644 --- a/renderdoc/driver/d3d11/d3d11_context.h +++ b/renderdoc/driver/d3d11/d3d11_context.h @@ -157,6 +157,16 @@ private: set m_HighTrafficResources; map m_OpenMaps; + struct StreamOutData + { + StreamOutData() : query(NULL), running(false), numPrims(0) {} + ID3D11Query *query; + bool running; + uint64_t numPrims; + }; + + map m_StreamOutCounters; + map > m_ResourceUses; WrappedID3D11Device* m_pDevice; diff --git a/renderdoc/driver/d3d11/d3d11_context_wrap.cpp b/renderdoc/driver/d3d11/d3d11_context_wrap.cpp index 6bb00c676..fa8acedc4 100644 --- a/renderdoc/driver/d3d11/d3d11_context_wrap.cpp +++ b/renderdoc/driver/d3d11/d3d11_context_wrap.cpp @@ -1952,6 +1952,50 @@ bool WrappedID3D11DeviceContext::Serialise_SOSetTargets(UINT NumBuffers_, ID3D11 setoffs[b] = Offsets[b]; } + // end stream-out queries for outgoing targets + for(UINT b=0; b < 4; b++) + { + ID3D11Buffer *buf = m_CurrentPipelineState->SO.Buffers[b]; + + if(buf) + { + ResourceId id = GetIDForResource(buf); + + m_pRealContext->End(m_StreamOutCounters[id].query); + m_StreamOutCounters[id].running = false; + } + } + + // start new queries for incoming targets + for(UINT b=0; b < 4; b++) + { + ID3D11Buffer *buf = setbufs[b]; + + if(buf) + { + ResourceId id = GetIDForResource(buf); + + // release any previous query as the hidden counter is overwritten + SAFE_RELEASE(m_StreamOutCounters[id].query); + + D3D11_QUERY queryTypes[] = { + D3D11_QUERY_SO_STATISTICS_STREAM0, + D3D11_QUERY_SO_STATISTICS_STREAM1, + D3D11_QUERY_SO_STATISTICS_STREAM2, + D3D11_QUERY_SO_STATISTICS_STREAM3, + }; + + D3D11_QUERY_DESC qdesc; + qdesc.MiscFlags = 0; + qdesc.Query = queryTypes[b]; + + m_pDevice->GetReal()->CreateQuery(&qdesc, &m_StreamOutCounters[id].query); + + m_pRealContext->Begin(m_StreamOutCounters[id].query); + m_StreamOutCounters[id].running = true; + } + } + m_CurrentPipelineState->ChangeRefWrite(m_CurrentPipelineState->SO.Buffers, setbufs, 0, 4); m_CurrentPipelineState->Change(m_CurrentPipelineState->SO.Offsets, setoffs, 0, 4); } @@ -1996,6 +2040,50 @@ void WrappedID3D11DeviceContext::SOSetTargets(UINT NumBuffers, ID3D11Buffer *con setoffs[b] = pOffsets[b]; } + // end stream-out queries for outgoing targets + for(UINT b=0; b < 4; b++) + { + ID3D11Buffer *buf = m_CurrentPipelineState->SO.Buffers[b]; + + if(buf) + { + ResourceId id = GetIDForResource(buf); + + m_pRealContext->End(m_StreamOutCounters[id].query); + m_StreamOutCounters[id].running = false; + } + } + + // start new queries for incoming targets + for(UINT b=0; b < 4; b++) + { + ID3D11Buffer *buf = setbufs[b]; + + if(buf) + { + ResourceId id = GetIDForResource(buf); + + // release any previous query as the hidden counter is overwritten + SAFE_RELEASE(m_StreamOutCounters[id].query); + + D3D11_QUERY queryTypes[] = { + D3D11_QUERY_SO_STATISTICS_STREAM0, + D3D11_QUERY_SO_STATISTICS_STREAM1, + D3D11_QUERY_SO_STATISTICS_STREAM2, + D3D11_QUERY_SO_STATISTICS_STREAM3, + }; + + D3D11_QUERY_DESC qdesc; + qdesc.MiscFlags = 0; + qdesc.Query = queryTypes[b]; + + m_pDevice->GetReal()->CreateQuery(&qdesc, &m_StreamOutCounters[id].query); + + m_pRealContext->Begin(m_StreamOutCounters[id].query); + m_StreamOutCounters[id].running = true; + } + } + // "If less than four [buffers] are defined by the call, the remaining buffer slots are set to NULL." m_CurrentPipelineState->ChangeRefWrite(m_CurrentPipelineState->SO.Buffers, setbufs, 0, 4); m_CurrentPipelineState->Change(m_CurrentPipelineState->SO.Offsets, setoffs, 0, 4); @@ -3498,9 +3586,58 @@ void WrappedID3D11DeviceContext::Draw(UINT VertexCount, UINT StartVertexLocation bool WrappedID3D11DeviceContext::Serialise_DrawAuto() { + uint64_t numVerts = 0; + if(m_State <= EXECUTING) { - m_pRealContext->DrawAuto(); + // spec says that only the first vertex buffer is used + if(m_CurrentPipelineState->IA.VBs[0] == NULL) + { + RDCERR("DrawAuto() with VB 0 set to NULL!"); + } + else + { + ResourceId id = GetIDForResource(m_CurrentPipelineState->IA.VBs[0]); + + StreamOutData &data = m_StreamOutCounters[id]; + + // if we have a query, the stream-out data for this DrawAuto was generated + // in the captured frame, so we can do a legitimate DrawAuto() + if(data.query) + { + // shouldn't still be bound on output + RDCASSERT(!data.running); + + D3D11_QUERY_DATA_SO_STATISTICS numPrims; + + HRESULT hr = S_FALSE; + + do + { + hr = m_pRealContext->GetData(data.query, &numPrims, sizeof(D3D11_QUERY_DATA_SO_STATISTICS), 0); + } while(hr == S_FALSE); + + if(m_CurrentPipelineState->IA.Topo == D3D11_PRIMITIVE_TOPOLOGY_POINTLIST) + numVerts = numPrims.NumPrimitivesWritten; + else if(m_CurrentPipelineState->IA.Topo == D3D11_PRIMITIVE_TOPOLOGY_LINELIST) + numVerts = numPrims.NumPrimitivesWritten*2; + else + numVerts = numPrims.NumPrimitivesWritten*3; + + m_pRealContext->DrawAuto(); + } + else + { + if(m_CurrentPipelineState->IA.Topo == D3D11_PRIMITIVE_TOPOLOGY_POINTLIST) + numVerts = data.numPrims; + else if(m_CurrentPipelineState->IA.Topo == D3D11_PRIMITIVE_TOPOLOGY_LINELIST) + numVerts = data.numPrims*2; + else + numVerts = data.numPrims*3; + + m_pRealContext->Draw((UINT)numVerts, 0); + } + } } const string desc = m_pSerialiser->GetDebugStr(); @@ -3510,15 +3647,16 @@ bool WrappedID3D11DeviceContext::Serialise_DrawAuto() if(m_State == READING) { AddEvent(DRAW_AUTO, desc); - string name = "DrawAuto()"; - - // Not implemented. Need to D3D11_QUERY_SO_STATISTICS to find out the - // index count etc to fill out FetchDrawcall - RDCUNIMPLEMENTED("Not fetching draw data for DrawAuto() display"); + string name = "DrawAuto(<" + ToStr::Get(numVerts) + ">)"; FetchDrawcall draw; draw.name = widen(name); draw.flags |= eDraw_Drawcall|eDraw_Auto; + draw.numIndices = (uint32_t)numVerts; + draw.vertexOffset = 0; + draw.indexOffset = 0; + draw.instanceOffset = 0; + draw.numInstances = 1; draw.debugMessages = debugMessages; diff --git a/renderdoc/driver/d3d11/d3d11_debug.cpp b/renderdoc/driver/d3d11/d3d11_debug.cpp index 8749b41ec..8a15e9d76 100644 --- a/renderdoc/driver/d3d11/d3d11_debug.cpp +++ b/renderdoc/driver/d3d11/d3d11_debug.cpp @@ -4192,7 +4192,13 @@ void D3D11DebugManager::InitPostVSBuffers(uint32_t frameID, uint32_t eventID) m_pImmediateContext->SOSetTargets( 1, &m_SOBuffer, &offset ); m_pImmediateContext->Begin(m_SOStatsQuery); - m_WrappedDevice->ReplayLog(frameID, 0, eventID, eReplay_OnlyDraw); + + // trying to stream out a stream-out-auto based drawcall would be bad! + // instead just draw the number of verts we pre-calculated + if(drawcall->flags & eDraw_Auto) + m_pImmediateContext->Draw(drawcall->numIndices, 0); + else + m_WrappedDevice->ReplayLog(frameID, 0, eventID, eReplay_OnlyDraw); m_pImmediateContext->End(m_SOStatsQuery); m_pImmediateContext->GSSetShader(NULL, NULL, 0);