diff --git a/renderdoc/driver/d3d11/d3d11_common.cpp b/renderdoc/driver/d3d11/d3d11_common.cpp index 214b1462b..cc8657c12 100644 --- a/renderdoc/driver/d3d11/d3d11_common.cpp +++ b/renderdoc/driver/d3d11/d3d11_common.cpp @@ -524,6 +524,10 @@ bool D3D11InitParams::IsSupportedVersion(uint64_t ver) if(ver == 0x11) return true; + // 0x12 -> 0x13 - added stride from stream-out to hidden counter data + if(ver == 0x12) + return true; + return false; } diff --git a/renderdoc/driver/d3d11/d3d11_context.cpp b/renderdoc/driver/d3d11/d3d11_context.cpp index a06fe91ff..13465d7f1 100644 --- a/renderdoc/driver/d3d11/d3d11_context.cpp +++ b/renderdoc/driver/d3d11/d3d11_context.cpp @@ -213,11 +213,6 @@ WrappedID3D11DeviceContext::~WrappedID3D11DeviceContext() if(m_pRealContext && GetType() != D3D11_DEVICE_CONTEXT_IMMEDIATE) m_pDevice->RemoveDeferredContext(this); - for(auto it = m_StreamOutCounters.begin(); it != m_StreamOutCounters.end(); ++it) - { - SAFE_RELEASE(it->second.query); - } - SAFE_DELETE(m_FrameReader); SAFE_RELEASE(m_WrappedVideo.m_pReal); @@ -266,6 +261,7 @@ struct HiddenCounter { ResourceId id; uint64_t counterValue; + uint32_t stride; }; DECLARE_REFLECTION_STRUCT(HiddenCounter); @@ -275,6 +271,14 @@ void DoSerialise(SerialiserType &ser, HiddenCounter &el) { SERIALISE_MEMBER(id); SERIALISE_MEMBER(counterValue); + if(ser.VersionAtLeast(0x13)) + { + SERIALISE_MEMBER(stride); + } + else + { + el.stride = 0; + } } template @@ -316,12 +320,12 @@ bool WrappedID3D11DeviceContext::Serialise_BeginCaptureFrame(SerialiserType &ser if(buf) { - ResourceId id = GetIDForDeviceChild(buf); + StreamOutData &so = m_pDevice->GetSOHiddenCounterForBuffer(GetIDForDeviceChild(buf)); - if(m_StreamOutCounters[id].running) + if(so.running) { - m_pRealContext->End(m_StreamOutCounters[id].query); - m_StreamOutCounters[id].running = false; + m_pRealContext->End(so.query); + so.running = false; } restart[b] = true; @@ -331,7 +335,8 @@ bool WrappedID3D11DeviceContext::Serialise_BeginCaptureFrame(SerialiserType &ser D3D11_QUERY_DATA_SO_STATISTICS numPrims; // readback all known counters - for(auto it = m_StreamOutCounters.begin(); it != m_StreamOutCounters.end(); ++it) + for(auto it = m_pDevice->GetSOHiddenCounters().begin(); + it != m_pDevice->GetSOHiddenCounters().end(); ++it) { RDCEraseEl(numPrims); @@ -350,7 +355,12 @@ bool WrappedID3D11DeviceContext::Serialise_BeginCaptureFrame(SerialiserType &ser ToStr(it->first).c_str()); } - HiddenStreamOutCounters.push_back({it->first, (uint64_t)numPrims.NumPrimitivesWritten}); + HiddenCounter h; + h.id = it->first; + h.counterValue = (uint64_t)numPrims.NumPrimitivesWritten; + h.stride = it->second.stride; + + HiddenStreamOutCounters.push_back(h); } // restart any counters we were forced to stop @@ -360,10 +370,10 @@ bool WrappedID3D11DeviceContext::Serialise_BeginCaptureFrame(SerialiserType &ser if(buf && restart[b]) { - ResourceId id = GetIDForDeviceChild(buf); + StreamOutData &so = m_pDevice->GetSOHiddenCounterForBuffer(GetIDForDeviceChild(buf)); // release any previous query as the hidden counter is overwritten - SAFE_RELEASE(m_StreamOutCounters[id].query); + SAFE_RELEASE(so.query); D3D11_QUERY queryTypes[] = { D3D11_QUERY_SO_STATISTICS_STREAM0, D3D11_QUERY_SO_STATISTICS_STREAM1, @@ -374,10 +384,12 @@ bool WrappedID3D11DeviceContext::Serialise_BeginCaptureFrame(SerialiserType &ser qdesc.MiscFlags = 0; qdesc.Query = queryTypes[b]; - m_pDevice->GetReal()->CreateQuery(&qdesc, &m_StreamOutCounters[id].query); + m_pDevice->GetReal()->CreateQuery(&qdesc, &so.query); - m_pRealContext->Begin(m_StreamOutCounters[id].query); - m_StreamOutCounters[id].running = true; + m_pRealContext->Begin(so.query); + so.running = true; + + // stride doesn't change as the shader hasn't changed } } } @@ -406,8 +418,12 @@ bool WrappedID3D11DeviceContext::Serialise_BeginCaptureFrame(SerialiserType &ser for(const HiddenCounter &c : HiddenStreamOutCounters) { if(m_pDevice->GetResourceManager()->HasLiveResource(c.id)) - m_StreamOutCounters[m_pDevice->GetResourceManager()->GetLiveID(c.id)].numPrims = - c.counterValue; + { + StreamOutData &so = + m_pDevice->GetSOHiddenCounterForBuffer(m_pDevice->GetResourceManager()->GetLiveID(c.id)); + so.numPrims = c.counterValue; + so.stride = c.stride; + } } } diff --git a/renderdoc/driver/d3d11/d3d11_context.h b/renderdoc/driver/d3d11/d3d11_context.h index bfdb335d5..bc9474e2b 100644 --- a/renderdoc/driver/d3d11/d3d11_context.h +++ b/renderdoc/driver/d3d11/d3d11_context.h @@ -125,17 +125,7 @@ private: std::set m_HighTrafficResources; std::map m_OpenMaps; - struct StreamOutData - { - StreamOutData() : query(NULL), running(false), numPrims(0) {} - ID3D11Query *query; - bool running; - uint64_t numPrims; - }; - - std::map m_StreamOutCounters; - - std::map > m_ResourceUses; + std::map> m_ResourceUses; WrappedID3D11Device *m_pDevice; ID3D11DeviceContext *m_pRealContext; @@ -228,6 +218,7 @@ private: void Serialise_DebugMessages(SerialiserType &ser); void DrainAnnotationQueue(); + void LatchSOProperties(); void AddUsage(const ActionDescription &a); diff --git a/renderdoc/driver/d3d11/d3d11_context_wrap.cpp b/renderdoc/driver/d3d11/d3d11_context_wrap.cpp index c70377854..3b19ae983 100644 --- a/renderdoc/driver/d3d11/d3d11_context_wrap.cpp +++ b/renderdoc/driver/d3d11/d3d11_context_wrap.cpp @@ -2209,12 +2209,14 @@ bool WrappedID3D11DeviceContext::Serialise_SOSetTargets(SerialiserType &ser, UIN if(buf) { + StreamOutData &so = m_pDevice->GetSOHiddenCounterForBuffer(GetIDForDeviceChild(buf)); + ResourceId id = GetIDForDeviceChild(buf); - if(m_StreamOutCounters[id].running) + if(so.running) { - m_pRealContext->End(m_StreamOutCounters[id].query); - m_StreamOutCounters[id].running = false; + m_pRealContext->End(so.query); + so.running = false; } } } @@ -2226,10 +2228,10 @@ bool WrappedID3D11DeviceContext::Serialise_SOSetTargets(SerialiserType &ser, UIN if(buf) { - ResourceId id = GetIDForDeviceChild(buf); + StreamOutData &so = m_pDevice->GetSOHiddenCounterForBuffer(GetIDForDeviceChild(buf)); // release any previous query as the hidden counter is overwritten - SAFE_RELEASE(m_StreamOutCounters[id].query); + SAFE_RELEASE(so.query); D3D11_QUERY queryTypes[] = { D3D11_QUERY_SO_STATISTICS_STREAM0, D3D11_QUERY_SO_STATISTICS_STREAM1, @@ -2240,13 +2242,18 @@ bool WrappedID3D11DeviceContext::Serialise_SOSetTargets(SerialiserType &ser, UIN qdesc.MiscFlags = 0; qdesc.Query = queryTypes[b]; - HRESULT hr = m_pDevice->GetReal()->CreateQuery(&qdesc, &m_StreamOutCounters[id].query); + HRESULT hr = m_pDevice->GetReal()->CreateQuery(&qdesc, &so.query); if(FAILED(hr)) RDCERR("Couldn't create streamout query: %s", ToStr(hr).c_str()); - m_pRealContext->Begin(m_StreamOutCounters[id].query); - m_StreamOutCounters[id].running = true; + m_pRealContext->Begin(so.query); + so.running = true; + + // since we don't know the binding order (SO targets before GS, or GS before SO targets) + // we'll set this to 0 now and latch it at draw time. We assume these don't change over the + // course of the stream out. + so.stride = 0; } } @@ -2321,12 +2328,12 @@ void WrappedID3D11DeviceContext::SOSetTargets(UINT NumBuffers, ID3D11Buffer *con if(buf) { - ResourceId id = GetIDForDeviceChild(buf); + StreamOutData &so = m_pDevice->GetSOHiddenCounterForBuffer(GetIDForDeviceChild(buf)); - if(m_StreamOutCounters[id].running) + if(so.running) { - m_pRealContext->End(m_StreamOutCounters[id].query); - m_StreamOutCounters[id].running = false; + m_pRealContext->End(so.query); + so.running = false; } } } @@ -2338,10 +2345,10 @@ void WrappedID3D11DeviceContext::SOSetTargets(UINT NumBuffers, ID3D11Buffer *con if(buf) { - ResourceId id = GetIDForDeviceChild(buf); + StreamOutData &so = m_pDevice->GetSOHiddenCounterForBuffer(GetIDForDeviceChild(buf)); // release any previous query as the hidden counter is overwritten - SAFE_RELEASE(m_StreamOutCounters[id].query); + SAFE_RELEASE(so.query); D3D11_QUERY queryTypes[] = { D3D11_QUERY_SO_STATISTICS_STREAM0, D3D11_QUERY_SO_STATISTICS_STREAM1, @@ -2352,10 +2359,15 @@ void WrappedID3D11DeviceContext::SOSetTargets(UINT NumBuffers, ID3D11Buffer *con qdesc.MiscFlags = 0; qdesc.Query = queryTypes[b]; - m_pDevice->GetReal()->CreateQuery(&qdesc, &m_StreamOutCounters[id].query); + m_pDevice->GetReal()->CreateQuery(&qdesc, &so.query); - m_pRealContext->Begin(m_StreamOutCounters[id].query); - m_StreamOutCounters[id].running = true; + m_pRealContext->Begin(so.query); + so.running = true; + + // since we don't know the binding order (SO targets before GS, or GS before SO targets) + // we'll set this to 0 now and latch it at draw time. We assume these don't change over the + // course of the stream out. + so.stride = 0; } } @@ -3795,6 +3807,27 @@ void WrappedID3D11DeviceContext::Serialise_DebugMessages(SerialiserType &ser) } } +void WrappedID3D11DeviceContext::LatchSOProperties() +{ + for(UINT b = 0; b < D3D11_SO_STREAM_COUNT; b++) + { + ID3D11Buffer *buf = m_CurrentPipelineState->SO.Buffers[b]; + + if(buf) + { + StreamOutData &so = m_pDevice->GetSOHiddenCounterForBuffer(GetIDForDeviceChild(buf)); + + if(so.running && so.stride == 0) + { + const SOShaderData &shad = + m_pDevice->GetSOShaderData(GetIDForDeviceChild(m_CurrentPipelineState->GS.Object)); + + so.stride = shad.strides[b]; + } + } + } +} + template bool WrappedID3D11DeviceContext::Serialise_DrawIndexedInstanced( SerialiserType &ser, UINT IndexCountPerInstance, UINT InstanceCount, UINT StartIndexLocation, @@ -3815,6 +3848,8 @@ bool WrappedID3D11DeviceContext::Serialise_DrawIndexedInstanced( m_pRealContext->DrawIndexedInstanced(IndexCountPerInstance, InstanceCount, StartIndexLocation, BaseVertexLocation, StartInstanceLocation); + LatchSOProperties(); + if(IsLoading(m_State)) { RecordDrawStats(true, false, InstanceCount); @@ -3853,6 +3888,8 @@ void WrappedID3D11DeviceContext::DrawIndexedInstanced(UINT IndexCountPerInstance StartIndexLocation, BaseVertexLocation, StartInstanceLocation)); + LatchSOProperties(); + if(IsActiveCapturing(m_State)) { USE_SCRATCH_SERIALISER(); @@ -3888,6 +3925,8 @@ bool WrappedID3D11DeviceContext::Serialise_DrawInstanced(SerialiserType &ser, m_pRealContext->DrawInstanced(VertexCountPerInstance, InstanceCount, StartVertexLocation, StartInstanceLocation); + LatchSOProperties(); + if(IsLoading(m_State)) { RecordDrawStats(true, false, InstanceCount); @@ -3923,6 +3962,8 @@ void WrappedID3D11DeviceContext::DrawInstanced(UINT VertexCountPerInstance, UINT SERIALISE_TIME_CALL(m_pRealContext->DrawInstanced(VertexCountPerInstance, InstanceCount, StartVertexLocation, StartInstanceLocation)); + LatchSOProperties(); + if(IsActiveCapturing(m_State)) { USE_SCRATCH_SERIALISER(); @@ -3955,6 +3996,8 @@ bool WrappedID3D11DeviceContext::Serialise_DrawIndexed(SerialiserType &ser, UINT { m_pRealContext->DrawIndexed(IndexCount, StartIndexLocation, BaseVertexLocation); + LatchSOProperties(); + if(IsLoading(m_State)) { RecordDrawStats(false, false, 1); @@ -3988,6 +4031,8 @@ void WrappedID3D11DeviceContext::DrawIndexed(UINT IndexCount, UINT StartIndexLoc SERIALISE_TIME_CALL(m_pRealContext->DrawIndexed(IndexCount, StartIndexLocation, BaseVertexLocation)); + LatchSOProperties(); + if(IsActiveCapturing(m_State)) { USE_SCRATCH_SERIALISER(); @@ -4017,6 +4062,8 @@ bool WrappedID3D11DeviceContext::Serialise_Draw(SerialiserType &ser, UINT Vertex { m_pRealContext->Draw(VertexCount, StartVertexLocation); + LatchSOProperties(); + if(IsLoading(m_State)) { RecordDrawStats(false, false, 1); @@ -4048,6 +4095,8 @@ void WrappedID3D11DeviceContext::Draw(UINT VertexCount, UINT StartVertexLocation SERIALISE_TIME_CALL(m_pRealContext->Draw(VertexCount, StartVertexLocation)); + LatchSOProperties(); + if(IsActiveCapturing(m_State)) { USE_SCRATCH_SERIALISER(); @@ -4069,7 +4118,7 @@ bool WrappedID3D11DeviceContext::Serialise_DrawAuto(SerialiserType &ser) SERIALISE_CHECK_READ_ERRORS(); - uint64_t numVerts = 0; + uint64_t numVertsToDraw = 0; if(IsReplayingAndReading()) { @@ -4084,7 +4133,7 @@ bool WrappedID3D11DeviceContext::Serialise_DrawAuto(SerialiserType &ser) { ResourceId id = GetIDForDeviceChild(m_CurrentPipelineState->IA.VBs[0]); - StreamOutData &data = m_StreamOutCounters[id]; + StreamOutData &data = m_pDevice->GetSOHiddenCounterForBuffer(id); // if we have a query, the stream-out data for this DrawAuto was generated // in the captured frame, so we can do a legitimate DrawAuto() @@ -4103,27 +4152,61 @@ bool WrappedID3D11DeviceContext::Serialise_DrawAuto(SerialiserType &ser) sizeof(D3D11_QUERY_DATA_SO_STATISTICS), 0); } while(hr == S_FALSE); - if(m_CurrentPipelineState->IA.Topo == D3D11_PRIMITIVE_TOPOLOGY_POINTLIST) - numVerts = numPrims.NumPrimitivesWritten; - else if(m_CurrentPipelineState->IA.Topo == D3D11_PRIMITIVE_TOPOLOGY_LINELIST) - numVerts = numPrims.NumPrimitivesWritten * 2; + if(data.stride != 0) + { + uint64_t bytesWritten = numPrims.NumPrimitivesWritten * data.stride; + + numVertsToDraw = uint32_t((bytesWritten - m_CurrentPipelineState->IA.Offsets[0]) / + m_CurrentPipelineState->IA.Strides[0]); + } else - numVerts = numPrims.NumPrimitivesWritten * 3; + { + RDCERR("Unexpected 0 stride on DrawAuto, no SO shader bound properly?"); + + // fallback to the mostly-accurate estimate + + if(m_CurrentPipelineState->IA.Topo == D3D11_PRIMITIVE_TOPOLOGY_POINTLIST) + numVertsToDraw = numPrims.NumPrimitivesWritten; + else if(m_CurrentPipelineState->IA.Topo == D3D11_PRIMITIVE_TOPOLOGY_LINELIST) + numVertsToDraw = numPrims.NumPrimitivesWritten * 2; + else + numVertsToDraw = numPrims.NumPrimitivesWritten * 3; + } m_pRealContext->DrawAuto(); } else { // otherwise use the cached value from the previous frame. + // in older captures we only stored the number of primitives, so use the old behaviour of + // taking the current topology and assuming it's the same, so behaviour doesn't change. + // newer captures store enough information that we can do a proper byte-wise calculation - if(m_CurrentPipelineState->IA.Topo == D3D11_PRIMITIVE_TOPOLOGY_POINTLIST) - numVerts = data.numPrims; - else if(m_CurrentPipelineState->IA.Topo == D3D11_PRIMITIVE_TOPOLOGY_LINELIST) - numVerts = data.numPrims * 2; + if(data.stride != 0) + { + uint64_t bytesWritten = data.numPrims * data.stride; + + numVertsToDraw = uint32_t((bytesWritten - m_CurrentPipelineState->IA.Offsets[0]) / + m_CurrentPipelineState->IA.Strides[0]); + } else - numVerts = data.numPrims * 3; + { + m_pDevice->AddDebugMessage(MessageCategory::Execution, MessageSeverity::High, + MessageSource::IncorrectAPIUse, + "Call to DrawAuto may be inaccurate if topology or vertex " + "stride has changed between stream-out and draw.\n" + "Recapture with this version of RenderDoc to fix this " + "problem, this capture was created with an older version."); - m_pRealContext->Draw((UINT)numVerts, 0); + if(m_CurrentPipelineState->IA.Topo == D3D11_PRIMITIVE_TOPOLOGY_POINTLIST) + numVertsToDraw = data.numPrims; + else if(m_CurrentPipelineState->IA.Topo == D3D11_PRIMITIVE_TOPOLOGY_LINELIST) + numVertsToDraw = data.numPrims * 2; + else + numVertsToDraw = data.numPrims * 3; + } + + m_pRealContext->Draw((UINT)numVertsToDraw, 0); } } @@ -4134,9 +4217,9 @@ bool WrappedID3D11DeviceContext::Serialise_DrawAuto(SerialiserType &ser) AddEvent(); ActionDescription action; - action.customName = StringFormat::Fmt("DrawAuto(<%u>)", numVerts); + action.customName = StringFormat::Fmt("DrawAuto(<%u>)", numVertsToDraw); action.flags |= ActionFlags::Drawcall | ActionFlags::Auto; - action.numIndices = (uint32_t)numVerts; + action.numIndices = (uint32_t)numVertsToDraw; action.vertexOffset = 0; action.indexOffset = 0; action.instanceOffset = 0; @@ -4195,6 +4278,8 @@ bool WrappedID3D11DeviceContext::Serialise_DrawIndexedInstancedIndirect(Serialis AlignedByteOffsetForArgs); } + LatchSOProperties(); + if(IsLoading(m_State)) { AddEvent(); @@ -4289,6 +4374,8 @@ void WrappedID3D11DeviceContext::DrawIndexedInstancedIndirect(ID3D11Buffer *pBuf SERIALISE_TIME_CALL(m_pRealContext->DrawIndexedInstancedIndirect( UNWRAP(WrappedID3D11Buffer, pBufferForArgs), AlignedByteOffsetForArgs)); + LatchSOProperties(); + if(IsActiveCapturing(m_State)) { USE_SCRATCH_SERIALISER(); @@ -4326,6 +4413,8 @@ bool WrappedID3D11DeviceContext::Serialise_DrawInstancedIndirect(SerialiserType AlignedByteOffsetForArgs); } + LatchSOProperties(); + if(IsLoading(m_State)) { AddEvent(); @@ -4417,6 +4506,8 @@ void WrappedID3D11DeviceContext::DrawInstancedIndirect(ID3D11Buffer *pBufferForA SERIALISE_TIME_CALL(m_pRealContext->DrawInstancedIndirect( UNWRAP(WrappedID3D11Buffer, pBufferForArgs), AlignedByteOffsetForArgs)); + LatchSOProperties(); + if(IsActiveCapturing(m_State)) { USE_SCRATCH_SERIALISER(); @@ -6455,12 +6546,12 @@ bool WrappedID3D11DeviceContext::Serialise_ClearState(SerialiserType &ser) if(buf) { - ResourceId id = GetIDForDeviceChild(buf); + StreamOutData &so = m_pDevice->GetSOHiddenCounterForBuffer(GetIDForDeviceChild(buf)); - if(m_StreamOutCounters[id].running) + if(so.running) { - m_pRealContext->End(m_StreamOutCounters[id].query); - m_StreamOutCounters[id].running = false; + m_pRealContext->End(so.query); + so.running = false; } } } @@ -6499,12 +6590,14 @@ void WrappedID3D11DeviceContext::ClearState() if(buf) { + StreamOutData &so = m_pDevice->GetSOHiddenCounterForBuffer(GetIDForDeviceChild(buf)); + ResourceId id = GetIDForDeviceChild(buf); - if(m_StreamOutCounters[id].running) + if(so.running) { - m_pRealContext->End(m_StreamOutCounters[id].query); - m_StreamOutCounters[id].running = false; + m_pRealContext->End(so.query); + so.running = false; } } } diff --git a/renderdoc/driver/d3d11/d3d11_device.cpp b/renderdoc/driver/d3d11/d3d11_device.cpp index 32f823eb2..37be59ded 100644 --- a/renderdoc/driver/d3d11/d3d11_device.cpp +++ b/renderdoc/driver/d3d11/d3d11_device.cpp @@ -265,6 +265,9 @@ WrappedID3D11Device::~WrappedID3D11Device() RenderDoc::Inst().RemoveDeviceFrameCapturer((ID3D11Device *)this); + for(auto it = m_StreamOutCounters.begin(); it != m_StreamOutCounters.end(); ++it) + SAFE_RELEASE(it->second.query); + for(auto it = m_CachedStateObjects.begin(); it != m_CachedStateObjects.end(); ++it) if(*it) IntRelease(*it); diff --git a/renderdoc/driver/d3d11/d3d11_device.h b/renderdoc/driver/d3d11/d3d11_device.h index c4cb13a8c..6f1b92fc1 100644 --- a/renderdoc/driver/d3d11/d3d11_device.h +++ b/renderdoc/driver/d3d11/d3d11_device.h @@ -46,6 +46,19 @@ class D3D11Replay; #define D3D11_1_UAV_SLOT_COUNT 64 #endif +struct StreamOutData +{ + ID3D11Query *query = NULL; + bool running = false; + uint64_t numPrims = 0; + uint32_t stride = 0; +}; + +struct SOShaderData +{ + uint32_t strides[4] = {}; +}; + enum TextureDisplayType { TEXDISPLAY_UNKNOWN = 0, @@ -68,7 +81,7 @@ struct D3D11InitParams uint32_t VendorUAV = ~0U; // check if a frame capture section version is supported - static const uint64_t CurrentVersion = 0x12; + static const uint64_t CurrentVersion = 0x13; static bool IsSupportedVersion(uint64_t ver); }; @@ -585,6 +598,9 @@ private: std::map > m_LayoutDescs; std::map m_LayoutShaders; + std::map m_StreamOutCounters; + std::map m_SOShaders; + static WrappedID3D11Device *m_pCurrentWrappedDevice; std::map m_SwapChains; @@ -652,6 +668,9 @@ public: void RemoveDeferredContext(WrappedID3D11DeviceContext *defctx); WrappedID3D11DeviceContext *GetDeferredContext(size_t idx); + const std::map &GetSOHiddenCounters() { return m_StreamOutCounters; } + StreamOutData &GetSOHiddenCounterForBuffer(ResourceId id) { return m_StreamOutCounters[id]; } + const SOShaderData &GetSOShaderData(ResourceId id) { return m_SOShaders[id]; } ResourceId GetResourceID() { return m_ResourceID; } const ActionDescription *GetAction(uint32_t eventId); ResourceDescription &GetResourceDesc(ResourceId id); diff --git a/renderdoc/driver/d3d11/d3d11_device_wrap.cpp b/renderdoc/driver/d3d11/d3d11_device_wrap.cpp index 4d3d68e88..b27aae03c 100644 --- a/renderdoc/driver/d3d11/d3d11_device_wrap.cpp +++ b/renderdoc/driver/d3d11/d3d11_device_wrap.cpp @@ -1724,6 +1724,40 @@ bool WrappedID3D11Device::Serialise_CreateGeometryShaderWithStreamOutput( GetResourceManager()->AddLiveResource(pShader, ret); } + D3D_PRIMITIVE_TOPOLOGY topo = + DXBC::DXBCContainer::GetOutputTopology(pShaderBytecode, BytecodeLength); + + uint32_t vertsPerPrim = 1; + if(topo == D3D_PRIMITIVE_TOPOLOGY_LINELIST) + vertsPerPrim = 1; + else if(topo == D3D_PRIMITIVE_TOPOLOGY_LINELIST) + vertsPerPrim = 2; + else + vertsPerPrim = 3; + + SOShaderData &soshader = m_SOShaders[GetIDForDeviceChild(ret)]; + + for(UINT i = 0; i < NumStrides; i++) + soshader.strides[i] = pBufferStrides[i] * vertsPerPrim; + + // Undocumented, but D3D11 auto-calculates tight strides if they are not specified, based on the + // declarations (which are tightly packed) + for(UINT i = NumStrides; i < D3D11_SO_STREAM_COUNT; i++) + { + // count the entries writing to this slot + for(size_t decl = 0; decl < NumEntries; decl++) + { + if(pSODeclaration[decl].OutputSlot == i) + { + // all components are written as 32-bit values + soshader.strides[i] += pSODeclaration[decl].ComponentCount * sizeof(uint32_t); + } + } + + // still want the stride per-primitive not per-vertex + soshader.strides[i] *= vertsPerPrim; + } + AddResource(pShader, ResourceType::Shader, "Geometry Shader"); // if this shader was initialised with a shader ext UAV, pull in that chunk as one of ours // and unset it (there will be one for each create that actually used vendor extensions) @@ -1768,6 +1802,40 @@ HRESULT WrappedID3D11Device::CreateGeometryShaderWithStreamOutput( wrapped = new WrappedID3D11Shader( real, ResourceId(), (const byte *)pShaderBytecode, BytecodeLength, this); + D3D_PRIMITIVE_TOPOLOGY topo = + DXBC::DXBCContainer::GetOutputTopology(pShaderBytecode, BytecodeLength); + + uint32_t vertsPerPrim = 1; + if(topo == D3D_PRIMITIVE_TOPOLOGY_LINELIST) + vertsPerPrim = 1; + else if(topo == D3D_PRIMITIVE_TOPOLOGY_LINELIST) + vertsPerPrim = 2; + else + vertsPerPrim = 3; + + SOShaderData &soshader = m_SOShaders[GetIDForDeviceChild(wrapped)]; + + for(UINT i = 0; i < NumStrides; i++) + soshader.strides[i] = pBufferStrides[i] * vertsPerPrim; + + // Undocumented, but D3D11 auto-calculates tight strides if they are not specified, based on the + // declarations (which are tightly packed) + for(UINT i = NumStrides; i < D3D11_SO_STREAM_COUNT; i++) + { + // count the entries writing to this slot + for(size_t decl = 0; decl < NumEntries; decl++) + { + if(pSODeclaration[decl].OutputSlot == i) + { + // all components are written as 32-bit values + soshader.strides[i] += pSODeclaration[decl].ComponentCount * sizeof(uint32_t); + } + } + + // still want the stride per-primitive not per-vertex + soshader.strides[i] *= vertsPerPrim; + } + if(IsCaptureMode(m_State)) { Chunk *vendorChunk = NULL; diff --git a/renderdoc/driver/shaders/dxbc/dxbc_bytecode.cpp b/renderdoc/driver/shaders/dxbc/dxbc_bytecode.cpp index 7fcffe903..270269af8 100644 --- a/renderdoc/driver/shaders/dxbc/dxbc_bytecode.cpp +++ b/renderdoc/driver/shaders/dxbc/dxbc_bytecode.cpp @@ -589,6 +589,47 @@ D3D_PRIMITIVE_TOPOLOGY Program::GetOutputTopology() return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; } +D3D_PRIMITIVE_TOPOLOGY Program::GetOutputTopology(const byte *bytes, size_t length) +{ + uint32_t *begin = (uint32_t *)bytes; + uint32_t *cur = begin; + uint32_t *end = begin + (length / sizeof(uint32_t)); + + // skip version and length + cur += 2; + + while(cur < end) + { + uint32_t OpcodeToken0 = cur[0]; + + OpcodeType op = Opcode::Type.Get(OpcodeToken0); + + // nvidia is a structured buffer with counter + // AMD is a RW byte address buffer + if(op == OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY) + { + uint32_t *tokenStream = cur; + + // skip opcode and length + tokenStream++; + + return Decl::OutputPrimitiveTopology.Get(tokenStream[0]); + } + + if(op == OPCODE_CUSTOMDATA) + { + // length in opcode token is 0, full length is in second dword + cur += cur[1]; + } + else + { + cur += Opcode::Length.Get(OpcodeToken0); + } + } + + return D3D_PRIMITIVE_TOPOLOGY_UNDEFINED; +} + void Program::SetupRegisterFile(rdcarray ®isters) const { size_t numRegisters = m_NumTemps + m_IndexTempSizes.size() + m_NumOutputs; diff --git a/renderdoc/driver/shaders/dxbc/dxbc_bytecode.h b/renderdoc/driver/shaders/dxbc/dxbc_bytecode.h index 345285536..f22173d34 100644 --- a/renderdoc/driver/shaders/dxbc/dxbc_bytecode.h +++ b/renderdoc/driver/shaders/dxbc/dxbc_bytecode.h @@ -1235,6 +1235,7 @@ public: rdcstr GetRegisterName(OperandType oper, uint32_t index) const; static bool UsesExtensionUAV(uint32_t slot, uint32_t space, const byte *bytes, size_t length); + static D3D_PRIMITIVE_TOPOLOGY GetOutputTopology(const byte *bytes, size_t length); protected: friend class Program; diff --git a/renderdoc/driver/shaders/dxbc/dxbc_container.cpp b/renderdoc/driver/shaders/dxbc/dxbc_container.cpp index ac7f03d03..29bb1697b 100644 --- a/renderdoc/driver/shaders/dxbc/dxbc_container.cpp +++ b/renderdoc/driver/shaders/dxbc/dxbc_container.cpp @@ -458,6 +458,34 @@ D3D_PRIMITIVE_TOPOLOGY DXBCContainer::GetOutputTopology() return m_OutputTopology; } +D3D_PRIMITIVE_TOPOLOGY DXBCContainer::GetOutputTopology(const void *ByteCode, size_t ByteCodeLength) +{ + const FileHeader *header = (const FileHeader *)ByteCode; + + const byte *data = (const byte *)ByteCode; // just for convenience + + if(header->fourcc != FOURCC_DXBC) + return D3D_PRIMITIVE_TOPOLOGY_UNDEFINED; + + if(header->fileLength != (uint32_t)ByteCodeLength) + return D3D_PRIMITIVE_TOPOLOGY_UNDEFINED; + + const uint32_t *chunkOffsets = (const uint32_t *)(header + 1); // right after the header + + for(uint32_t chunkIdx = 0; chunkIdx < header->numChunks; chunkIdx++) + { + const uint32_t *fourcc = (const uint32_t *)(data + chunkOffsets[chunkIdx]); + const uint32_t *chunkSize = (const uint32_t *)(fourcc + 1); + + const byte *chunkContents = (const byte *)(chunkSize + 1); + + if(*fourcc == FOURCC_SHEX || *fourcc == FOURCC_SHDR) + return DXBCBytecode::Program::GetOutputTopology(chunkContents, *chunkSize); + } + + return D3D_PRIMITIVE_TOPOLOGY_UNDEFINED; +} + const rdcstr &DXBCContainer::GetDisassembly() { if(m_Disassembly.empty()) diff --git a/renderdoc/driver/shaders/dxbc/dxbc_container.h b/renderdoc/driver/shaders/dxbc/dxbc_container.h index 216a8b5ad..db0d6638c 100644 --- a/renderdoc/driver/shaders/dxbc/dxbc_container.h +++ b/renderdoc/driver/shaders/dxbc/dxbc_container.h @@ -220,6 +220,7 @@ public: static bool CheckForDebugInfo(const void *ByteCode, size_t ByteCodeLength); static bool CheckForDXIL(const void *ByteCode, size_t ByteCodeLength); static rdcstr GetDebugBinaryPath(const void *ByteCode, size_t ByteCodeLength); + static D3D_PRIMITIVE_TOPOLOGY GetOutputTopology(const void *ByteCode, size_t ByteCodeLength); private: void TryFetchSeparateDebugInfo(bytebuf &byteCode, const rdcstr &debugInfoPath); diff --git a/util/test/demos/d3d11/d3d11_stream_out.cpp b/util/test/demos/d3d11/d3d11_stream_out.cpp index e636514a7..6abeff944 100644 --- a/util/test/demos/d3d11/d3d11_stream_out.cpp +++ b/util/test/demos/d3d11/d3d11_stream_out.cpp @@ -73,8 +73,9 @@ RD_TEST(D3D11_Stream_Out, D3D11GraphicsTest) ID3D11BufferPtr vb = MakeBuffer().Vertex().Data(DefaultTri); - ID3D11BufferPtr so[2] = { + ID3D11BufferPtr so[3] = { MakeBuffer().StreamOut().Vertex().Size(2048), MakeBuffer().StreamOut().Vertex().Size(2048), + MakeBuffer().StreamOut().Vertex().Size(2048), }; D3D11_INPUT_ELEMENT_DESC layoutdesc[] = { @@ -93,6 +94,29 @@ RD_TEST(D3D11_Stream_Out, D3D11GraphicsTest) CHECK_HR(dev->CreateInputLayout(layoutdesc, ARRAY_COUNT(layoutdesc), vsblob->GetBufferPointer(), vsblob->GetBufferSize(), &streamoutLayout)); + // pre fill buffer 2 with pre-frame data + { + ctx->ClearState(); + + IASetVertexBuffer(vb, sizeof(DefaultA2V), 0); + ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + ctx->IASetInputLayout(defaultLayout); + + ctx->VSSetShader(vs, NULL, 0); + ctx->GSSetShader(gs, NULL, 0); + ctx->PSSetShader(ps, NULL, 0); + + RSSetViewport({0.0f, 0.0f, (float)screenWidth, (float)screenHeight, 0.0f, 1.0f}); + + ctx->OMSetRenderTargets(1, &bbRTV.GetInterfacePtr(), NULL); + + ID3D11Buffer *bufs[] = {so[2], so[1]}; + UINT offs[2] = {0}; + ctx->SOSetTargets(2, bufs, offs); + + ctx->Draw(3, 0); + } + while(Running()) { ctx->ClearState(); @@ -156,7 +180,9 @@ RD_TEST(D3D11_Stream_Out, D3D11GraphicsTest) ctx->IASetVertexBuffers(0, 2, bufs, &strides[0], offs); ctx->IASetInputLayout(streamoutLayout); + ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_POINTLIST); ctx->DrawAuto(); + ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); RSSetViewport({0.0f, 0.0f, (float)screenWidth, (float)screenHeight, 0.0f, 1.0f}); @@ -203,8 +229,20 @@ RD_TEST(D3D11_Stream_Out, D3D11GraphicsTest) ctx->IASetInputLayout(streamoutLayout); ctx->DrawAuto(); + ctx->SOSetTargets(0, NULL, NULL); + + RSSetViewport({(screenWidth * 3.0f) / 4.0f, 0.0f, (float)screenWidth / 4.0f, + (float)screenHeight / 4.0f, 0.0f, 1.0f}); + + bufs[0] = so[2]; + ctx->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_POINTLIST); + ctx->IASetVertexBuffers(0, 2, bufs, &strides[0], offs); + ctx->DrawAuto(); + // leave stream-out buffers bound at the end of the frame ctx->ClearState(); + bufs[0] = so[1]; + bufs[1] = so[0]; ctx->SOSetTargets(2, bufs, offs); Present();