diff --git a/renderdoc/data/hlsl/misc.hlsl b/renderdoc/data/hlsl/misc.hlsl index 271def16d..d9413210f 100644 --- a/renderdoc/data/hlsl/misc.hlsl +++ b/renderdoc/data/hlsl/misc.hlsl @@ -133,3 +133,113 @@ uint4 RENDERDOC_DiscardIntPS(float4 pos : SV_Position, out float depth : SV_Dept return val.xxxx; } + +cbuffer executepatchdata : register(b0) +{ + uint argCount; + uint bufCount; + uint argStride; + uint4 argOffsets[32]; +}; + +cbuffer countbuffer : register(b1) +{ + uint numExecutes; +}; + +cbuffer countbuffer : register(b2) +{ + uint maxNumExecutes; +}; + +struct buffermapping +{ + // {.x = LSB, .y = MSB} to match uint64 order + uint2 origBase; + uint2 origEnd; + uint2 newBase; + uint2 pad; +}; + +StructuredBuffer buffers : register(t0); +RWByteAddressBuffer arguments : register(u0); + +bool uint64LessThan(uint2 a, uint2 b) +{ + // either MSB is less, or MSB is equal and LSB is less-equal + return a.y < b.y || (a.y == b.y && a.x < b.x); +} + +bool uint64LessEqual(uint2 a, uint2 b) +{ + return uint64LessThan(a, b) || (a.y == b.y && a.x == b.x); +} + +uint2 uint64Add(uint2 a, uint2 b) +{ + uint msb = 0, lsb = 0; + if(b.x > 0 && a.x > 0xffffffff - b.x) + { + uint x = max(a.x, b.x) - 0x80000000; + uint y = min(a.x, b.x); + + uint sum = x + y; + + msb = a.y + b.y + 1; + lsb = sum - 0x80000000; + } + else + { + msb = a.y + b.y; + lsb = a.x + b.x; + } + + return uint2(lsb, msb); +} + +uint2 uint64Sub(uint2 a, uint2 b) +{ + uint msb = 0, lsb = 0; + if(a.x < b.x) + { + uint diff = b.x - a.x; + + msb = a.y - b.y - 1; + lsb = 0xffffffff - (diff - 1); + } + else + { + msb = a.y - b.y; + lsb = a.x - b.x; + } + + return uint2(lsb, msb); +} + +uint2 PatchAddress(uint2 addr) +{ + for(uint i = 0; i < bufCount; i++) + { + buffermapping b = buffers[i]; + + if(uint64LessEqual(b.origBase, addr) && uint64LessThan(addr, b.origEnd)) + { + return uint64Add(b.newBase, uint64Sub(addr, b.origBase)); + } + } + + return addr; +} + +[numthreads(128, 1, 1)] void RENDERDOC_ExecuteIndirectPatchCS(uint idx + : SV_GroupIndex) { + if(idx < argCount) + { + for(uint i = 0; i < min(numExecutes, maxNumExecutes); i++) + { + uint offs = argStride * i + argOffsets[idx / 4][idx % 4]; + + arguments.Store2(offs, PatchAddress(arguments.Load2(offs))); + } + } +} diff --git a/renderdoc/driver/d3d12/d3d12_command_list.h b/renderdoc/driver/d3d12/d3d12_command_list.h index 88cd8c3f0..829b06484 100644 --- a/renderdoc/driver/d3d12/d3d12_command_list.h +++ b/renderdoc/driver/d3d12/d3d12_command_list.h @@ -212,17 +212,17 @@ public: WrappedID3D12Device *GetWrappedDevice() { return m_pDevice; } D3D12ResourceRecord *GetResourceRecord() { return m_ListRecord; } D3D12ResourceRecord *GetCreationRecord() { return m_CreationRecord; } - ID3D12GraphicsCommandList *GetCrackedList(); - ID3D12GraphicsCommandList1 *GetCrackedList1(); - ID3D12GraphicsCommandList2 *GetCrackedList2(); - ID3D12GraphicsCommandList3 *GetCrackedList3(); - ID3D12GraphicsCommandList4 *GetCrackedList4(); - ID3D12GraphicsCommandList5 *GetCrackedList5(); - ID3D12GraphicsCommandList6 *GetCrackedList6(); - ID3D12GraphicsCommandList7 *GetCrackedList7(); - ID3D12GraphicsCommandList8 *GetCrackedList8(); - ID3D12GraphicsCommandList9 *GetCrackedList9(); - ID3D12GraphicsCommandListX *GetWrappedCrackedList(); + + void FinaliseExecuteIndirectEvents(BakedCmdListInfo &info, BakedCmdListInfo::ExecuteData &exec); + void SaveExecuteIndirectParameters(ID3D12GraphicsCommandListX *list, + ID3D12CommandSignature *pCommandSignature, UINT MaxCommandCount, + ID3D12Resource *pArgumentBuffer, UINT64 ArgumentBufferOffset, + ID3D12Resource *pCountBuffer, UINT64 CountBufferOffset); + void ResetAndRecordExecuteIndirectStates(ID3D12GraphicsCommandListX *list, uint32_t baseEventID, + uint32_t execCount, + ID3D12CommandSignature *pCommandSignature, + ID3D12Resource *pArgumentBuffer, + UINT64 ArgumentBufferOffset, uint32_t argumentsReplayed); void SetAMDMarkerInterface(IAmdExtD3DCommandListMarker *marker) { m_AMDMarkers = marker; } void SetCommandData(D3D12CommandData *cmd) { m_Cmd = cmd; } @@ -477,11 +477,6 @@ public: IMPLEMENT_FUNCTION_SERIALISED(virtual void STDMETHODCALLTYPE, EndEvent, ); - void ReserveExecuteIndirect(ID3D12GraphicsCommandList *list, - WrappedID3D12CommandSignature *comSig, UINT maxCount); - void PatchExecuteIndirect(BakedCmdListInfo &info, uint32_t executeIndex); - void ReplayExecuteIndirect(ID3D12GraphicsCommandList *list); - IMPLEMENT_FUNCTION_SERIALISED(virtual void STDMETHODCALLTYPE, ExecuteIndirect, ID3D12CommandSignature *pCommandSignature, UINT MaxCommandCount, ID3D12Resource *pArgumentBuffer, UINT64 ArgumentBufferOffset, diff --git a/renderdoc/driver/d3d12/d3d12_command_list1_wrap.cpp b/renderdoc/driver/d3d12/d3d12_command_list1_wrap.cpp index 24e18dde9..ae2bcb57d 100644 --- a/renderdoc/driver/d3d12/d3d12_command_list1_wrap.cpp +++ b/renderdoc/driver/d3d12/d3d12_command_list1_wrap.cpp @@ -79,9 +79,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_AtomicCopyBufferUINT( Unwrap1(pCommandList) ->AtomicCopyBufferUINT(Unwrap(pDstBuffer), DstOffset, Unwrap(pSrcBuffer), SrcOffset, Dependencies, deps.data(), pDependentSubresourceRanges); - GetCrackedList1()->AtomicCopyBufferUINT(Unwrap(pDstBuffer), DstOffset, Unwrap(pSrcBuffer), - SrcOffset, Dependencies, deps.data(), - pDependentSubresourceRanges); { m_Cmd->AddEvent(); @@ -195,9 +192,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_AtomicCopyBufferUINT64( Unwrap1(pCommandList) ->AtomicCopyBufferUINT64(Unwrap(pDstBuffer), DstOffset, Unwrap(pSrcBuffer), SrcOffset, Dependencies, deps.data(), pDependentSubresourceRanges); - GetCrackedList1()->AtomicCopyBufferUINT64(Unwrap(pDstBuffer), DstOffset, Unwrap(pSrcBuffer), - SrcOffset, Dependencies, deps.data(), - pDependentSubresourceRanges); { m_Cmd->AddEvent(); @@ -313,7 +307,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_OMSetDepthBounds(SerialiserType else { Unwrap1(pCommandList)->OMSetDepthBounds(Min, Max); - GetCrackedList1()->OMSetDepthBounds(Min, Max); stateUpdate = true; } @@ -403,7 +396,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_SetSamplePositions( else { Unwrap1(pCommandList)->SetSamplePositions(NumSamplesPerPixel, NumPixels, pSamplePositions); - GetCrackedList1()->SetSamplePositions(NumSamplesPerPixel, NumPixels, pSamplePositions); stateUpdate = true; } @@ -483,9 +475,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_ResolveSubresourceRegion( ->ResolveSubresourceRegion(Unwrap(pDstResource), DstSubresource, DstX, DstY, Unwrap(pSrcResource), SrcSubresource, pSrcRect, Format, ResolveMode); - GetCrackedList1()->ResolveSubresourceRegion(Unwrap(pDstResource), DstSubresource, DstX, DstY, - Unwrap(pSrcResource), SrcSubresource, pSrcRect, - Format, ResolveMode); { m_Cmd->AddEvent(); @@ -595,7 +584,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_SetViewInstanceMask(SerialiserT else { Unwrap1(pCommandList)->SetViewInstanceMask(Mask); - GetCrackedList1()->SetViewInstanceMask(Mask); stateUpdate = true; } diff --git a/renderdoc/driver/d3d12/d3d12_command_list2_wrap.cpp b/renderdoc/driver/d3d12/d3d12_command_list2_wrap.cpp index 8881e46be..88bf745da 100644 --- a/renderdoc/driver/d3d12/d3d12_command_list2_wrap.cpp +++ b/renderdoc/driver/d3d12/d3d12_command_list2_wrap.cpp @@ -59,7 +59,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_WriteBufferImmediate( else { Unwrap2(pCommandList)->WriteBufferImmediate(Count, pParams, pModes); - GetCrackedList2()->WriteBufferImmediate(Count, pParams, pModes); } } diff --git a/renderdoc/driver/d3d12/d3d12_command_list4_wrap.cpp b/renderdoc/driver/d3d12/d3d12_command_list4_wrap.cpp index 127547e1b..17a8f223f 100644 --- a/renderdoc/driver/d3d12/d3d12_command_list4_wrap.cpp +++ b/renderdoc/driver/d3d12/d3d12_command_list4_wrap.cpp @@ -349,12 +349,9 @@ bool WrappedID3D12GraphicsCommandList::Serialise_BeginRenderPass( // need to unwrap here, as FromPortableHandle unwraps too. Unwrap(pCommandList) ->OMSetRenderTargets(NumRenderTargets, rtHandles, FALSE, dsvHandle.ptr ? &dsvHandle : NULL); - GetCrackedList()->OMSetRenderTargets(NumRenderTargets, rtHandles, FALSE, - dsvHandle.ptr ? &dsvHandle : NULL); // Unwrap4(pCommandList)->BeginRenderPass(NumRenderTargets, pRenderTargets, pDepthStencil, // Flags); - // GetCrackedList4()->BeginRenderPass(NumRenderTargets, pRenderTargets, pDepthStencil, Flags); m_Cmd->AddEvent(); @@ -663,7 +660,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_EndRenderPass(SerialiserType &s else { // Unwrap4(pCommandList)->EndRenderPass(); - // GetCrackedList4()->EndRenderPass(); m_Cmd->AddEvent(); diff --git a/renderdoc/driver/d3d12/d3d12_command_list5_wrap.cpp b/renderdoc/driver/d3d12/d3d12_command_list5_wrap.cpp index c7f84d765..8b297d852 100644 --- a/renderdoc/driver/d3d12/d3d12_command_list5_wrap.cpp +++ b/renderdoc/driver/d3d12/d3d12_command_list5_wrap.cpp @@ -86,7 +86,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_RSSetShadingRate( else { Unwrap5(pCommandList)->RSSetShadingRate(baseShadingRate, combiners); - GetCrackedList5()->RSSetShadingRate(baseShadingRate, combiners); stateUpdate = true; } @@ -181,7 +180,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_RSSetShadingRateImage(Serialise else { Unwrap5(pCommandList)->RSSetShadingRateImage(Unwrap(shadingRateImage)); - GetCrackedList5()->RSSetShadingRateImage(Unwrap(shadingRateImage)); stateUpdate = true; } diff --git a/renderdoc/driver/d3d12/d3d12_command_list7_wrap.cpp b/renderdoc/driver/d3d12/d3d12_command_list7_wrap.cpp index 247b84b7d..ef06fa4bb 100644 --- a/renderdoc/driver/d3d12/d3d12_command_list7_wrap.cpp +++ b/renderdoc/driver/d3d12/d3d12_command_list7_wrap.cpp @@ -172,7 +172,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_Barrier(SerialiserType &ser, UI if(!filteredUnwrapped.empty()) { Unwrap7(pCommandList)->Barrier((UINT)filteredUnwrapped.size(), filteredUnwrapped.data()); - GetCrackedList7()->Barrier((UINT)filteredUnwrapped.size(), filteredUnwrapped.data()); } } diff --git a/renderdoc/driver/d3d12/d3d12_command_list8_wrap.cpp b/renderdoc/driver/d3d12/d3d12_command_list8_wrap.cpp index 8e4a599d4..350b82e25 100644 --- a/renderdoc/driver/d3d12/d3d12_command_list8_wrap.cpp +++ b/renderdoc/driver/d3d12/d3d12_command_list8_wrap.cpp @@ -66,7 +66,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_OMSetFrontAndBackStencilRef(Ser else { Unwrap8(pCommandList)->OMSetFrontAndBackStencilRef(FrontStencilRef, BackStencilRef); - GetCrackedList8()->OMSetFrontAndBackStencilRef(FrontStencilRef, BackStencilRef); stateUpdate = true; } diff --git a/renderdoc/driver/d3d12/d3d12_command_list9_wrap.cpp b/renderdoc/driver/d3d12/d3d12_command_list9_wrap.cpp index c4530c352..18190975a 100644 --- a/renderdoc/driver/d3d12/d3d12_command_list9_wrap.cpp +++ b/renderdoc/driver/d3d12/d3d12_command_list9_wrap.cpp @@ -67,7 +67,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_RSSetDepthBias(SerialiserType & else { Unwrap9(pCommandList)->RSSetDepthBias(DepthBias, DepthBiasClamp, SlopeScaledDepthBias); - GetCrackedList9()->RSSetDepthBias(DepthBias, DepthBiasClamp, SlopeScaledDepthBias); stateUpdate = true; } @@ -140,7 +139,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_IASetIndexBufferStripCutValue( else { Unwrap9(pCommandList)->IASetIndexBufferStripCutValue(IBStripCutValue); - GetCrackedList9()->IASetIndexBufferStripCutValue(IBStripCutValue); stateUpdate = true; } diff --git a/renderdoc/driver/d3d12/d3d12_command_list_wrap.cpp b/renderdoc/driver/d3d12/d3d12_command_list_wrap.cpp index 68913f341..f11537374 100644 --- a/renderdoc/driver/d3d12/d3d12_command_list_wrap.cpp +++ b/renderdoc/driver/d3d12/d3d12_command_list_wrap.cpp @@ -29,61 +29,6 @@ #include "d3d12_command_queue.h" #include "d3d12_debug.h" -ID3D12GraphicsCommandList *WrappedID3D12GraphicsCommandList::GetCrackedList() -{ - return Unwrap(m_Cmd->m_BakedCmdListInfo[m_Cmd->m_LastCmdListID].crackedLists.back()); -} - -ID3D12GraphicsCommandList1 *WrappedID3D12GraphicsCommandList::GetCrackedList1() -{ - return Unwrap1(m_Cmd->m_BakedCmdListInfo[m_Cmd->m_LastCmdListID].crackedLists.back()); -} - -ID3D12GraphicsCommandList2 *WrappedID3D12GraphicsCommandList::GetCrackedList2() -{ - return Unwrap2(m_Cmd->m_BakedCmdListInfo[m_Cmd->m_LastCmdListID].crackedLists.back()); -} - -ID3D12GraphicsCommandList3 *WrappedID3D12GraphicsCommandList::GetCrackedList3() -{ - return Unwrap3(m_Cmd->m_BakedCmdListInfo[m_Cmd->m_LastCmdListID].crackedLists.back()); -} - -ID3D12GraphicsCommandList4 *WrappedID3D12GraphicsCommandList::GetCrackedList4() -{ - return Unwrap4(m_Cmd->m_BakedCmdListInfo[m_Cmd->m_LastCmdListID].crackedLists.back()); -} - -ID3D12GraphicsCommandList5 *WrappedID3D12GraphicsCommandList::GetCrackedList5() -{ - return Unwrap5(m_Cmd->m_BakedCmdListInfo[m_Cmd->m_LastCmdListID].crackedLists.back()); -} - -ID3D12GraphicsCommandList6 *WrappedID3D12GraphicsCommandList::GetCrackedList6() -{ - return Unwrap6(m_Cmd->m_BakedCmdListInfo[m_Cmd->m_LastCmdListID].crackedLists.back()); -} - -ID3D12GraphicsCommandList7 *WrappedID3D12GraphicsCommandList::GetCrackedList7() -{ - return Unwrap7(m_Cmd->m_BakedCmdListInfo[m_Cmd->m_LastCmdListID].crackedLists.back()); -} - -ID3D12GraphicsCommandList8 *WrappedID3D12GraphicsCommandList::GetCrackedList8() -{ - return Unwrap8(m_Cmd->m_BakedCmdListInfo[m_Cmd->m_LastCmdListID].crackedLists.back()); -} - -ID3D12GraphicsCommandList9 *WrappedID3D12GraphicsCommandList::GetCrackedList9() -{ - return Unwrap9(m_Cmd->m_BakedCmdListInfo[m_Cmd->m_LastCmdListID].crackedLists.back()); -} - -ID3D12GraphicsCommandListX *WrappedID3D12GraphicsCommandList::GetWrappedCrackedList() -{ - return m_Cmd->m_BakedCmdListInfo[m_Cmd->m_LastCmdListID].crackedLists.back(); -} - template bool WrappedID3D12GraphicsCommandList::Serialise_Close(SerialiserType &ser) { @@ -141,11 +86,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_Close(SerialiserType &ser) { GetResourceManager()->GetLiveAs(CommandList)->Close(); - if(!m_Cmd->m_BakedCmdListInfo[BakedCommandList].crackedLists.empty()) - { - GetCrackedList()->Close(); - } - { if(m_Cmd->GetActionStack().size() > 1) m_Cmd->GetActionStack().pop_back(); @@ -375,27 +315,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_Reset(SerialiserType &ser, D3D12ActionTreeNode *action = new D3D12ActionTreeNode; m_Cmd->m_BakedCmdListInfo[BakedCommandList].action = action; - { - if(m_Cmd->m_CrackedAllocators[GetResID(pAllocator)] == NULL) - { - HRESULT hr = m_pDevice->CreateCommandAllocator( - type, __uuidof(ID3D12CommandAllocator), - (void **)&m_Cmd->m_CrackedAllocators[GetResID(pAllocator)]); - RDCASSERTEQUAL(hr, S_OK); - } - - ID3D12GraphicsCommandList *listptr = NULL; - m_pDevice->CreateCommandList( - nodeMask, type, m_Cmd->m_CrackedAllocators[GetResID(pAllocator)], pInitialState, - __uuidof(ID3D12GraphicsCommandList), (void **)&listptr); - - // this is a safe upcast because it's a wrapped object - ID3D12GraphicsCommandListX *list = (ID3D12GraphicsCommandListX *)listptr; - - RDCASSERT(m_Cmd->m_BakedCmdListInfo[BakedCommandList].crackedLists.empty()); - m_Cmd->m_BakedCmdListInfo[BakedCommandList].crackedLists.push_back(list); - } - m_Cmd->m_BakedCmdListInfo[CommandList].type = m_Cmd->m_BakedCmdListInfo[BakedCommandList].type = type; m_Cmd->m_BakedCmdListInfo[CommandList].nodeMask = @@ -595,10 +514,7 @@ bool WrappedID3D12GraphicsCommandList::Serialise_ResourceBarrier( else { if(!filtered.empty()) - { Unwrap(pCommandList)->ResourceBarrier((UINT)filtered.size(), &filtered[0]); - GetCrackedList()->ResourceBarrier((UINT)filtered.size(), &filtered[0]); - } } if(pCommandList) @@ -691,7 +607,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_ClearState(SerialiserType &ser, else { Unwrap(pCommandList)->ClearState(Unwrap(pPipelineState)); - GetCrackedList()->ClearState(Unwrap(pPipelineState)); stateUpdate = true; } @@ -769,7 +684,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_IASetPrimitiveTopology( else { Unwrap(pCommandList)->IASetPrimitiveTopology(PrimitiveTopology); - GetCrackedList()->IASetPrimitiveTopology(PrimitiveTopology); stateUpdate = true; } @@ -833,7 +747,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_RSSetViewports(SerialiserType & else { Unwrap(pCommandList)->RSSetViewports(NumViewports, pViewports); - GetCrackedList()->RSSetViewports(NumViewports, pViewports); stateUpdate = true; } @@ -901,7 +814,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_RSSetScissorRects(SerialiserTyp else { Unwrap(pCommandList)->RSSetScissorRects(NumRects, pRects); - GetCrackedList()->RSSetScissorRects(NumRects, pRects); stateUpdate = true; } @@ -967,7 +879,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_OMSetBlendFactor(SerialiserType else { Unwrap(pCommandList)->OMSetBlendFactor(BlendFactor); - GetCrackedList()->OMSetBlendFactor(BlendFactor); stateUpdate = true; } @@ -1025,7 +936,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_OMSetStencilRef(SerialiserType else { Unwrap(pCommandList)->OMSetStencilRef(StencilRef); - GetCrackedList()->OMSetStencilRef(StencilRef); stateUpdate = true; } @@ -1098,7 +1008,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_SetDescriptorHeaps( else { Unwrap(pCommandList)->SetDescriptorHeaps(NumDescriptorHeaps, heaps.data()); - GetCrackedList()->SetDescriptorHeaps(NumDescriptorHeaps, heaps.data()); stateUpdate = true; } @@ -1169,7 +1078,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_IASetIndexBuffer(SerialiserType ID3D12GraphicsCommandList *list = pCommandList; list->IASetIndexBuffer(pView); - GetCrackedList()->IASetIndexBuffer(pView); stateUpdate = true; } @@ -1249,7 +1157,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_IASetVertexBuffers( else { Unwrap(pCommandList)->IASetVertexBuffers(StartSlot, NumViews, pViews); - GetCrackedList()->IASetVertexBuffers(StartSlot, NumViews, pViews); stateUpdate = true; } @@ -1328,7 +1235,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_SOSetTargets( else { Unwrap(pCommandList)->SOSetTargets(StartSlot, NumViews, pViews); - GetCrackedList()->SOSetTargets(StartSlot, NumViews, pViews); stateUpdate = true; } @@ -1408,7 +1314,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_SetPipelineState(SerialiserType else { Unwrap(pCommandList)->SetPipelineState(Unwrap(pPipelineState)); - GetCrackedList()->SetPipelineState(Unwrap(pPipelineState)); stateUpdate = true; } @@ -1591,8 +1496,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_OMSetRenderTargets( Unwrap(pCommandList) ->OMSetRenderTargets((UINT)unwrappedRTs.size(), unwrappedRTs.data(), FALSE, unwrappedDSV.ptr ? &unwrappedDSV : NULL); - GetCrackedList()->OMSetRenderTargets((UINT)unwrappedRTs.size(), unwrappedRTs.data(), FALSE, - unwrappedDSV.ptr ? &unwrappedDSV : NULL); stateUpdate = true; } @@ -1701,7 +1604,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_SetComputeRootSignature( else { Unwrap(pCommandList)->SetComputeRootSignature(Unwrap(pRootSignature)); - GetCrackedList()->SetComputeRootSignature(Unwrap(pRootSignature)); stateUpdate = true; } @@ -1778,7 +1680,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_SetComputeRootDescriptorTable( else { Unwrap(pCommandList)->SetComputeRootDescriptorTable(RootParameterIndex, Unwrap(BaseDescriptor)); - GetCrackedList()->SetComputeRootDescriptorTable(RootParameterIndex, Unwrap(BaseDescriptor)); stateUpdate = true; } @@ -1887,8 +1788,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_SetComputeRoot32BitConstant( { Unwrap(pCommandList) ->SetComputeRoot32BitConstant(RootParameterIndex, SrcData, DestOffsetIn32BitValues); - GetCrackedList()->SetComputeRoot32BitConstant(RootParameterIndex, SrcData, - DestOffsetIn32BitValues); stateUpdate = true; } @@ -1966,8 +1865,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_SetComputeRoot32BitConstants( Unwrap(pCommandList) ->SetComputeRoot32BitConstants(RootParameterIndex, Num32BitValuesToSet, pValidSrcData, DestOffsetIn32BitValues); - GetCrackedList()->SetComputeRoot32BitConstants(RootParameterIndex, Num32BitValuesToSet, - pValidSrcData, DestOffsetIn32BitValues); stateUpdate = true; } @@ -2049,7 +1946,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_SetComputeRootConstantBufferVie else { Unwrap(pCommandList)->SetComputeRootConstantBufferView(RootParameterIndex, BufferLocation); - GetCrackedList()->SetComputeRootConstantBufferView(RootParameterIndex, BufferLocation); stateUpdate = true; } @@ -2129,7 +2025,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_SetComputeRootShaderResourceVie else { Unwrap(pCommandList)->SetComputeRootShaderResourceView(RootParameterIndex, BufferLocation); - GetCrackedList()->SetComputeRootShaderResourceView(RootParameterIndex, BufferLocation); stateUpdate = true; } @@ -2209,7 +2104,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_SetComputeRootUnorderedAccessVi else { Unwrap(pCommandList)->SetComputeRootUnorderedAccessView(RootParameterIndex, BufferLocation); - GetCrackedList()->SetComputeRootUnorderedAccessView(RootParameterIndex, BufferLocation); stateUpdate = true; } @@ -2284,7 +2178,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_SetGraphicsRootSignature( else { Unwrap(pCommandList)->SetGraphicsRootSignature(Unwrap(pRootSignature)); - GetCrackedList()->SetGraphicsRootSignature(Unwrap(pRootSignature)); stateUpdate = true; } @@ -2361,7 +2254,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_SetGraphicsRootDescriptorTable( else { Unwrap(pCommandList)->SetGraphicsRootDescriptorTable(RootParameterIndex, Unwrap(BaseDescriptor)); - GetCrackedList()->SetGraphicsRootDescriptorTable(RootParameterIndex, Unwrap(BaseDescriptor)); stateUpdate = true; } @@ -2470,8 +2362,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_SetGraphicsRoot32BitConstant( { Unwrap(pCommandList) ->SetGraphicsRoot32BitConstant(RootParameterIndex, SrcData, DestOffsetIn32BitValues); - GetCrackedList()->SetGraphicsRoot32BitConstant(RootParameterIndex, SrcData, - DestOffsetIn32BitValues); stateUpdate = true; } @@ -2549,8 +2439,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_SetGraphicsRoot32BitConstants( Unwrap(pCommandList) ->SetGraphicsRoot32BitConstants(RootParameterIndex, Num32BitValuesToSet, pValidSrcData, DestOffsetIn32BitValues); - GetCrackedList()->SetGraphicsRoot32BitConstants(RootParameterIndex, Num32BitValuesToSet, - pValidSrcData, DestOffsetIn32BitValues); stateUpdate = true; } @@ -2632,7 +2520,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_SetGraphicsRootConstantBufferVi else { Unwrap(pCommandList)->SetGraphicsRootConstantBufferView(RootParameterIndex, BufferLocation); - GetCrackedList()->SetGraphicsRootConstantBufferView(RootParameterIndex, BufferLocation); stateUpdate = true; } @@ -2712,7 +2599,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_SetGraphicsRootShaderResourceVi else { Unwrap(pCommandList)->SetGraphicsRootShaderResourceView(RootParameterIndex, BufferLocation); - GetCrackedList()->SetGraphicsRootShaderResourceView(RootParameterIndex, BufferLocation); stateUpdate = true; } @@ -2792,7 +2678,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_SetGraphicsRootUnorderedAccessV else { Unwrap(pCommandList)->SetGraphicsRootUnorderedAccessView(RootParameterIndex, BufferLocation); - GetCrackedList()->SetGraphicsRootUnorderedAccessView(RootParameterIndex, BufferLocation); stateUpdate = true; } @@ -3066,7 +2951,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_SetMarker(SerialiserType &ser, else { D3D12MarkerRegion::Set(pCommandList, MarkerText); - D3D12MarkerRegion::Set(GetWrappedCrackedList(), MarkerText); ActionDescription action; action.customName = MarkerText; @@ -3140,7 +3024,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_BeginEvent(SerialiserType &ser, else { D3D12MarkerRegion::Begin(pCommandList, MarkerText); - D3D12MarkerRegion::Begin(GetWrappedCrackedList(), MarkerText); ActionDescription action; action.customName = MarkerText; @@ -3203,7 +3086,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_EndEvent(SerialiserType &ser) else { D3D12MarkerRegion::End(pCommandList); - D3D12MarkerRegion::End(GetWrappedCrackedList()); ActionDescription action; action.flags = ActionFlags::PopMarker; @@ -3280,8 +3162,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_DrawInstanced(SerialiserType &s Unwrap(pCommandList) ->DrawInstanced(VertexCountPerInstance, InstanceCount, StartVertexLocation, StartInstanceLocation); - GetCrackedList()->DrawInstanced(VertexCountPerInstance, InstanceCount, StartVertexLocation, - StartInstanceLocation); m_Cmd->AddEvent(); @@ -3361,8 +3241,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_DrawIndexedInstanced( Unwrap(pCommandList) ->DrawIndexedInstanced(IndexCountPerInstance, InstanceCount, StartIndexLocation, BaseVertexLocation, StartInstanceLocation); - GetCrackedList()->DrawIndexedInstanced(IndexCountPerInstance, InstanceCount, StartIndexLocation, - BaseVertexLocation, StartInstanceLocation); m_Cmd->AddEvent(); @@ -3439,7 +3317,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_Dispatch(SerialiserType &ser, U else { Unwrap(pCommandList)->Dispatch(ThreadGroupCountX, ThreadGroupCountY, ThreadGroupCountZ); - GetCrackedList()->Dispatch(ThreadGroupCountX, ThreadGroupCountY, ThreadGroupCountZ); m_Cmd->AddEvent(); @@ -3507,7 +3384,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_ExecuteBundle(SerialiserType &s else { Unwrap(pCommandList)->ExecuteBundle(Unwrap(pBundle)); - GetCrackedList()->ExecuteBundle(Unwrap(pBundle)); m_Cmd->AddEvent(); @@ -3545,85 +3421,140 @@ void WrappedID3D12GraphicsCommandList::ExecuteBundle(ID3D12GraphicsCommandList * } } -/* - * ExecuteIndirect needs special handling - whenever we encounter an ExecuteIndirect during loading - * time we crack the list into two, and copy off the argument buffer in the first part and execute - * with the copy destination in the second part. - * - * Then when we come to ExecuteCommandLists this list, we go step by step through the cracked lists, - * executing the first, then syncing to the GPU and patching the argument buffer before continuing. - * - * At loading time we reserve a maxCount number of actions and events, and later on when patching - * the argument buffer we fill in the parameters/names and remove any excess actions that weren't - * actually executed. - * - * During active replaying we read the patched argument buffer and execute any commands needed by - * hand on the CPU. - */ - -void WrappedID3D12GraphicsCommandList::ReserveExecuteIndirect(ID3D12GraphicsCommandList *list, - WrappedID3D12CommandSignature *comSig, - UINT maxCount) +void WrappedID3D12GraphicsCommandList::SaveExecuteIndirectParameters( + ID3D12GraphicsCommandListX *list, ID3D12CommandSignature *pCommandSignature, + UINT MaxCommandCount, ID3D12Resource *pArgumentBuffer, UINT64 ArgumentBufferOffset, + ID3D12Resource *pCountBuffer, UINT64 CountBufferOffset) { - const bool multiaction = (maxCount > 1 || comSig->sig.numActions > 1); - const uint32_t sigSize = (uint32_t)comSig->sig.arguments.size(); + WrappedID3D12CommandSignature *comSig = (WrappedID3D12CommandSignature *)pCommandSignature; - RDCASSERT(IsLoading(m_State)); + BakedCmdListInfo &cmdListInfo = m_Cmd->m_BakedCmdListInfo[m_Cmd->m_LastCmdListID]; - BakedCmdListInfo &cmdInfo = m_Cmd->m_BakedCmdListInfo[m_Cmd->m_LastCmdListID]; + const size_t argsSize = comSig->sig.ByteStride * (MaxCommandCount - 1) + comSig->sig.PackedByteSize; + const size_t countSize = 16; - for(uint32_t i = 0; i < maxCount; i++) + // at most we need to copy two executes. The last may be partial and so contain some state set + // in the previous execute + ID3D12Resource *buf = NULL; + uint64_t offs = 0; + m_Cmd->GetIndirectBuffer(argsSize + countSize, &buf, &offs); + + if(pCountBuffer) + Unwrap(list)->CopyBufferRegion(Unwrap(buf), offs, Unwrap(pCountBuffer), CountBufferOffset, 4); + Unwrap(list)->CopyBufferRegion(Unwrap(buf), offs + countSize, Unwrap(pArgumentBuffer), + ArgumentBufferOffset, argsSize); + + BakedCmdListInfo::ExecuteData exec = {}; + exec.baseEvent = cmdListInfo.curEventID; + exec.sig = comSig; + exec.maxCount = MaxCommandCount; + if(pCountBuffer) { - for(uint32_t a = 0; a < sigSize; a++) - { - const D3D12_INDIRECT_ARGUMENT_DESC &arg = comSig->sig.arguments[a]; + exec.countBuf = buf; + exec.countOffs = offs; + } + exec.argBuf = buf; + exec.argOffs = offs + 16; + cmdListInfo.executeEvents.push_back(exec); +} + +void WrappedID3D12GraphicsCommandList::ResetAndRecordExecuteIndirectStates( + ID3D12GraphicsCommandListX *list, uint32_t baseEventID, uint32_t execCount, + ID3D12CommandSignature *pCommandSignature, ID3D12Resource *pArgumentBuffer, + UINT64 ArgumentBufferOffset, uint32_t argumentsReplayed) +{ + WrappedID3D12CommandSignature *comSig = (WrappedID3D12CommandSignature *)pCommandSignature; + + BakedCmdListInfo &cmdListInfo = m_Cmd->m_BakedCmdListInfo[m_Cmd->m_LastCmdListID]; + D3D12RenderState &state = cmdListInfo.state; + + const uint32_t numArgsPerExec = (uint32_t)comSig->sig.arguments.size(); + + if(m_Cmd->m_LastEventID > baseEventID + execCount * comSig->sig.arguments.size() + 1) + { + // reset states to 0, we've replayed past this EI + for(const D3D12_INDIRECT_ARGUMENT_DESC &arg : comSig->sig.arguments) + { switch(arg.Type) { - case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH: - case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED: - case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW: - // add dummy event and action - m_Cmd->AddEvent(); - m_Cmd->AddAction(ActionDescription()); - m_Cmd->GetActionStack().back()->children.back().state = new D3D12RenderState(cmdInfo.state); - cmdInfo.curEventID++; - break; case D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW: - case D3D12_INDIRECT_ARGUMENT_TYPE_INDEX_BUFFER_VIEW: + if(arg.VertexBuffer.Slot < state.vbuffers.size()) + state.vbuffers[arg.VertexBuffer.Slot] = {}; + break; + case D3D12_INDIRECT_ARGUMENT_TYPE_INDEX_BUFFER_VIEW: state.ibuffer = {}; break; case D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT: + if(arg.Constant.RootParameterIndex < state.graphics.sigelems.size()) + state.graphics.sigelems[arg.Constant.RootParameterIndex].constants.clear(); + + if(arg.Constant.RootParameterIndex < state.compute.sigelems.size()) + state.compute.sigelems[arg.Constant.RootParameterIndex].constants.clear(); + break; case D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT_BUFFER_VIEW: case D3D12_INDIRECT_ARGUMENT_TYPE_SHADER_RESOURCE_VIEW: case D3D12_INDIRECT_ARGUMENT_TYPE_UNORDERED_ACCESS_VIEW: - // add dummy event - m_Cmd->AddEvent(); - cmdInfo.curEventID++; + // ConstantBufferView, ShaderResourceView and UnorderedAccessView all have one member - + // RootParameterIndex + if(arg.ConstantBufferView.RootParameterIndex < state.graphics.sigelems.size()) + { + state.graphics.sigelems[arg.ConstantBufferView.RootParameterIndex].id = ResourceId(); + state.graphics.sigelems[arg.ConstantBufferView.RootParameterIndex].offset = 0; + } + + if(arg.ConstantBufferView.RootParameterIndex < state.compute.sigelems.size()) + { + state.compute.sigelems[arg.ConstantBufferView.RootParameterIndex].id = ResourceId(); + state.compute.sigelems[arg.ConstantBufferView.RootParameterIndex].offset = 0; + } break; - default: RDCERR("Unexpected argument type! %d", arg.Type); break; + default: break; } } + + return; } - if(multiaction) + if(m_Cmd->m_LastEventID > baseEventID) { - m_Cmd->AddEvent(); - ActionDescription action; - action.flags = ActionFlags::PopMarker; - m_Cmd->AddAction(action); - } - else - { - cmdInfo.curEventID--; + // at most we need to copy two executes. The last may be partial and so contain some state set + // in the previous execute + ID3D12Resource *buf = NULL; + uint64_t offs = 0; + m_Cmd->GetIndirectBuffer(comSig->sig.ByteStride + comSig->sig.PackedByteSize, &buf, &offs); + + state.indirectState.argsBuf = buf; + state.indirectState.argsOffs = offs; + state.indirectState.comSig = comSig; + + UINT64 BytesToRead = comSig->sig.PackedByteSize; + + if(argumentsReplayed <= numArgsPerExec) + { + state.indirectState.argsToProcess = argumentsReplayed; + } + else + { + state.indirectState.argsToProcess = argumentsReplayed % numArgsPerExec + numArgsPerExec; + BytesToRead += comSig->sig.ByteStride; + } + + // skip all but the last executes we care about + while(argumentsReplayed > state.indirectState.argsToProcess) + { + ArgumentBufferOffset += comSig->sig.ByteStride; + argumentsReplayed -= numArgsPerExec; + } + + Unwrap(list)->CopyBufferRegion(Unwrap(buf), offs, Unwrap(pArgumentBuffer), ArgumentBufferOffset, + BytesToRead); + + // this is processed in D3D12RenderState::ResolvePendingIndirectState() } } -void WrappedID3D12GraphicsCommandList::PatchExecuteIndirect(BakedCmdListInfo &info, - uint32_t executeIndex) +void WrappedID3D12GraphicsCommandList::FinaliseExecuteIndirectEvents(BakedCmdListInfo &info, + BakedCmdListInfo::ExecuteData &exec) { - BakedCmdListInfo::ExecuteData &exec = info.executeEvents[executeIndex]; - - exec.patched = true; - WrappedID3D12CommandSignature *comSig = exec.sig; uint32_t count = exec.maxCount; @@ -3639,14 +3570,8 @@ void WrappedID3D12GraphicsCommandList::PatchExecuteIndirect(BakedCmdListInfo &in count = RDCMIN(count, *(uint32_t *)&data[0]); } - exec.realCount = count; - - const bool multiaction = (count > 1 || comSig->sig.numActions > 1); const uint32_t sigSize = (uint32_t)comSig->sig.arguments.size(); - // + 1 is because baseEvent refers to the marker before the commands - exec.lastEvent = exec.baseEvent + 1 + sigSize * count; - D3D12_RANGE range = {0, D3D12CommandData::m_IndirectSize}; byte *mapPtr = NULL; m_pDevice->CheckHRESULT(exec.argBuf->Map(0, &range, (void **)&mapPtr)); @@ -3659,9 +3584,11 @@ void WrappedID3D12GraphicsCommandList::PatchExecuteIndirect(BakedCmdListInfo &in size_t idx = 0; uint32_t eid = exec.baseEvent; + uint32_t firstActionEid = eid; + // find the action where our execute begins for(; idx < actions.size(); idx++) - if(actions[idx].action.eventId == eid) + if(actions[idx].action.eventId == firstActionEid) break; RDCASSERTMSG("Couldn't find base event action!", idx < actions.size(), idx, actions.size()); @@ -3669,10 +3596,6 @@ void WrappedID3D12GraphicsCommandList::PatchExecuteIndirect(BakedCmdListInfo &in // patch the name for the base action actions[idx].action.customName = StringFormat::Fmt("ExecuteIndirect(maxCount %u, count <%u>)", exec.maxCount, count); - // if there's only one command running, remove its pushmarker flag - if(!multiaction) - actions[idx].action.flags = - (actions[idx].action.flags & ~ActionFlags::PushMarker) | ActionFlags::SetMarker; // move to the first actual action of the commands idx++; @@ -3715,7 +3638,7 @@ void WrappedID3D12GraphicsCommandList::PatchExecuteIndirect(BakedCmdListInfo &in APIEvent dummy; if(!curEvent) { - RDCWARN("Couldn't find EID %u in current action while patching ExecuteIndirect", eid); + RDCERR("Couldn't find EID %u in current action while patching ExecuteIndirect", eid); // assign a dummy so we don't have to NULL-check below curEvent = &dummy; } @@ -3745,6 +3668,7 @@ void WrappedID3D12GraphicsCommandList::PatchExecuteIndirect(BakedCmdListInfo &in curAction.vertexOffset = args->StartVertexLocation; curAction.instanceOffset = args->StartInstanceLocation; curAction.flags |= ActionFlags::Drawcall | ActionFlags::Instanced | ActionFlags::Indirect; + curAction.customName = StringFormat::Fmt("[%u] arg%u: IndirectDraw(<%u, %u>)", i, a, curAction.numIndices, curAction.numInstances); @@ -3858,7 +3782,7 @@ void WrappedID3D12GraphicsCommandList::PatchExecuteIndirect(BakedCmdListInfo &in ResourceId id; uint64_t offs = 0; - m_pDevice->GetResIDFromAddr(vb->BufferLocation, id, offs); + m_pDevice->GetResIDFromOrigAddr(vb->BufferLocation, id, offs); ID3D12Resource *res = GetResourceManager()->GetLiveAs(id); RDCASSERT(res); @@ -3889,7 +3813,7 @@ void WrappedID3D12GraphicsCommandList::PatchExecuteIndirect(BakedCmdListInfo &in ResourceId id; uint64_t offs = 0; - m_pDevice->GetResIDFromAddr(ib->BufferLocation, id, offs); + m_pDevice->GetResIDFromOrigAddr(ib->BufferLocation, id, offs); ID3D12Resource *res = GetResourceManager()->GetLiveAs(id); RDCASSERT(res); @@ -3919,7 +3843,7 @@ void WrappedID3D12GraphicsCommandList::PatchExecuteIndirect(BakedCmdListInfo &in ResourceId id; uint64_t offs = 0; - m_pDevice->GetResIDFromAddr(*addr, id, offs); + m_pDevice->GetResIDFromOrigAddr(*addr, id, offs); ID3D12Resource *res = GetResourceManager()->GetLiveAs(id); if(res) @@ -3970,6 +3894,7 @@ void WrappedID3D12GraphicsCommandList::PatchExecuteIndirect(BakedCmdListInfo &in } } + range.End = range.Begin = 0; exec.argBuf->Unmap(0, &range); // remove excesss actions if count < maxCount @@ -3989,404 +3914,6 @@ void WrappedID3D12GraphicsCommandList::PatchExecuteIndirect(BakedCmdListInfo &in // shift all subsequent EIDs and action IDs so they're contiguous info.ShiftForRemoved(shiftActionID, shiftEID, idx); } - - if(!multiaction && exec.maxCount > 1) - { - // remove pop event - actions.erase(idx); - - info.ShiftForRemoved(1, 1, idx); - } -} - -void WrappedID3D12GraphicsCommandList::ReplayExecuteIndirect(ID3D12GraphicsCommandList *list) -{ - BakedCmdListInfo &cmdInfo = m_Cmd->m_BakedCmdListInfo[m_Cmd->m_LastCmdListID]; - - size_t executeIndex = cmdInfo.executeEvents.size(); - - for(size_t i = 0; i < cmdInfo.executeEvents.size(); i++) - { - if(cmdInfo.executeEvents[i].baseEvent <= cmdInfo.curEventID && - cmdInfo.curEventID < cmdInfo.executeEvents[i].lastEvent) - { - executeIndex = i; - break; - } - } - - if(executeIndex >= cmdInfo.executeEvents.size()) - { - RDCERR("Couldn't find ExecuteIndirect to replay!"); - return; - } - - BakedCmdListInfo::ExecuteData &exec = cmdInfo.executeEvents[executeIndex]; - - WrappedID3D12CommandSignature *comSig = exec.sig; - - uint32_t count = exec.realCount; - uint32_t origCount = exec.realCount; - - const bool multiaction = (count > 1 || comSig->sig.numActions > 1); - - const bool gfx = comSig->sig.graphics; - const uint32_t sigSize = (uint32_t)comSig->sig.arguments.size(); - - // if we're partial then continue to emulate & replay, otherwise use the patched buffer - if(!m_Cmd->IsPartialCmdList(m_Cmd->m_LastCmdListID)) - { - list->ExecuteIndirect(comSig->GetReal(), exec.maxCount, Unwrap(exec.argBuf), exec.argOffs, - Unwrap(exec.countBuf), exec.countOffs); - - // skip past all the events - cmdInfo.curEventID += origCount * sigSize; - - // skip past the pop event - if(multiaction) - cmdInfo.curEventID++; - - return; - } - - bytebuf data; - m_pDevice->GetDebugManager()->GetBufferData(exec.argBuf, exec.argOffs, - count * comSig->sig.ByteStride, data); - - byte *dataPtr = &data[0]; - - D3D12RenderState &state = m_Cmd->m_BakedCmdListInfo[m_Cmd->m_LastCmdListID].state; - - rdcarray &sigelems = - gfx ? state.graphics.sigelems : state.compute.sigelems; - - // while executing, decide where to start and stop. We do this by modifying the max count and - // noting which arg we should start working, and which arg in the last execute we should get up - // to. Since we don't actually replay as indirect executes to save on having to allocate and - // manage indirect buffers across the command list, we can just skip commands we don't want to - // execute. - - uint32_t firstCommand = 0; - uint32_t firstArg = 0; - uint32_t lastArg = ~0U; - - { - uint32_t curEID = m_Cmd->m_RootEventID; - - if(m_Cmd->m_FirstEventID <= 1) - { - curEID = cmdInfo.curEventID; - - if(m_Cmd->m_Partial[D3D12CommandData::Primary].partialParent == m_Cmd->m_LastCmdListID) - curEID += m_Cmd->m_Partial[D3D12CommandData::Primary].baseEvent; - else if(m_Cmd->m_Partial[D3D12CommandData::Secondary].partialParent == m_Cmd->m_LastCmdListID) - curEID += m_Cmd->m_Partial[D3D12CommandData::Secondary].baseEvent; - } - - D3D12CommandData::ActionUse use(m_Cmd->m_CurChunkOffset, 0); - auto it = std::lower_bound(m_Cmd->m_ActionUses.begin(), m_Cmd->m_ActionUses.end(), use); - - if(it == m_Cmd->m_ActionUses.end()) - { - RDCERR("Unexpected action not found in uses vector, offset %llu", m_Cmd->m_CurChunkOffset); - } - else - { - uint32_t baseEventID = it->eventId; - - // TODO when using an action callback, we should submit every action individually - if(m_Cmd->m_ActionCallback) - { - firstCommand = 0; - firstArg = 0; - lastArg = ~0U; - } - // To add the execute, we made an event N that is the 'parent' marker, then - // N+1, N+2, N+3, ... for each of the arguments. If the first sub-argument is selected - // then we'll replay up to N but not N+1, so just do nothing - we DON'T want to draw - // the first sub-draw in that range. - else if(m_Cmd->m_LastEventID > baseEventID) - { - if(m_Cmd->m_FirstEventID <= 1) - { - // one event per arg, and N args per command - uint32_t numArgs = m_Cmd->m_LastEventID - baseEventID; - - // play all commands up to the one we want - firstCommand = 0; - - // how many commands? - uint32_t numCmds = numArgs / sigSize + 1; - count = RDCMIN(count, numCmds); - - // play all args in the fnial commmad up to the one we want - firstArg = 0; - - // how many args in the final command - if(numCmds > count) - lastArg = ~0U; - else - lastArg = numArgs % sigSize; - } - else - { - // note we'll never be asked to do e.g. 3rd-7th commands of an execute. Only ever 0th-nth - // or - // a single argument. - uint32_t argIdx = (curEID - baseEventID - 1); - - firstCommand = argIdx / sigSize; - count = RDCMIN(count, firstCommand + 1); - - firstArg = argIdx % sigSize; - lastArg = firstArg + 1; - } - } - else - { - // don't do anything, we've selected the base event - count = 0; - } - } - } - - bool executing = true; - - for(uint32_t i = 0; i < count; i++) - { - byte *src = dataPtr; - dataPtr += comSig->sig.ByteStride; - - // don't have to do an upper bound on commands, count was modified - if(i < firstCommand) - continue; - - for(uint32_t a = 0; a < sigSize; a++) - { - const D3D12_INDIRECT_ARGUMENT_DESC &arg = comSig->sig.arguments[a]; - - // only execute things while we're in the range we want - // on the last command count, stop executing once we're past the last arg. - if(i == count - 1) - executing = (a >= firstArg && a < lastArg); - - switch(arg.Type) - { - case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW: - { - D3D12_DRAW_ARGUMENTS *args = (D3D12_DRAW_ARGUMENTS *)src; - src += sizeof(D3D12_DRAW_ARGUMENTS); - - if(executing) - list->DrawInstanced(args->VertexCountPerInstance, args->InstanceCount, - args->StartVertexLocation, args->StartInstanceLocation); - break; - } - case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED: - { - D3D12_DRAW_INDEXED_ARGUMENTS *args = (D3D12_DRAW_INDEXED_ARGUMENTS *)src; - src += sizeof(D3D12_DRAW_INDEXED_ARGUMENTS); - - if(executing) - list->DrawIndexedInstanced(args->IndexCountPerInstance, args->InstanceCount, - args->StartIndexLocation, args->BaseVertexLocation, - args->StartInstanceLocation); - break; - } - case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH: - { - D3D12_DISPATCH_ARGUMENTS *args = (D3D12_DISPATCH_ARGUMENTS *)src; - src += sizeof(D3D12_DISPATCH_ARGUMENTS); - - if(executing) - list->Dispatch(args->ThreadGroupCountX, args->ThreadGroupCountY, args->ThreadGroupCountZ); - - break; - } - case D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT: - { - size_t argSize = sizeof(uint32_t) * arg.Constant.Num32BitValuesToSet; - uint32_t *values = (uint32_t *)src; - src += argSize; - - if(executing) - { - if(sigelems.size() < arg.Constant.RootParameterIndex + 1) - sigelems.resize(arg.Constant.RootParameterIndex + 1); - - sigelems[arg.Constant.RootParameterIndex].SetConstants( - arg.Constant.Num32BitValuesToSet, values, arg.Constant.DestOffsetIn32BitValues); - - if(gfx) - list->SetGraphicsRoot32BitConstants(arg.Constant.RootParameterIndex, - arg.Constant.Num32BitValuesToSet, values, - arg.Constant.DestOffsetIn32BitValues); - else - list->SetComputeRoot32BitConstants(arg.Constant.RootParameterIndex, - arg.Constant.Num32BitValuesToSet, values, - arg.Constant.DestOffsetIn32BitValues); - } - - break; - } - case D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW: - { - D3D12_VERTEX_BUFFER_VIEW *srcVB = (D3D12_VERTEX_BUFFER_VIEW *)src; - src += sizeof(D3D12_VERTEX_BUFFER_VIEW); - - ResourceId id; - uint64_t offs = 0; - WrappedID3D12Resource::GetResIDFromAddr(srcVB->BufferLocation, id, offs); - RDCASSERT(id != ResourceId()); - - if(executing) - { - if(state.vbuffers.size() < arg.VertexBuffer.Slot + 1) - state.vbuffers.resize(arg.VertexBuffer.Slot + 1); - - state.vbuffers[arg.VertexBuffer.Slot].buf = id; - state.vbuffers[arg.VertexBuffer.Slot].offs = offs; - state.vbuffers[arg.VertexBuffer.Slot].size = srcVB->SizeInBytes; - state.vbuffers[arg.VertexBuffer.Slot].stride = srcVB->StrideInBytes; - } - - if(executing && id != ResourceId()) - list->IASetVertexBuffers(arg.VertexBuffer.Slot, 1, srcVB); - - break; - } - case D3D12_INDIRECT_ARGUMENT_TYPE_INDEX_BUFFER_VIEW: - { - D3D12_INDEX_BUFFER_VIEW *srcIB = (D3D12_INDEX_BUFFER_VIEW *)src; - src += sizeof(D3D12_INDEX_BUFFER_VIEW); - - ResourceId id; - uint64_t offs = 0; - WrappedID3D12Resource::GetResIDFromAddr(srcIB->BufferLocation, id, offs); - RDCASSERT(id != ResourceId()); - - if(executing) - { - state.ibuffer.buf = id; - state.ibuffer.offs = offs; - state.ibuffer.size = srcIB->SizeInBytes; - state.ibuffer.bytewidth = (srcIB->Format == DXGI_FORMAT_R32_UINT ? 4 : 2); - } - - if(executing && id != ResourceId()) - list->IASetIndexBuffer(srcIB); - - break; - } - case D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT_BUFFER_VIEW: - case D3D12_INDIRECT_ARGUMENT_TYPE_SHADER_RESOURCE_VIEW: - case D3D12_INDIRECT_ARGUMENT_TYPE_UNORDERED_ACCESS_VIEW: - { - D3D12_GPU_VIRTUAL_ADDRESS *srcAddr = (D3D12_GPU_VIRTUAL_ADDRESS *)src; - src += sizeof(D3D12_GPU_VIRTUAL_ADDRESS); - - ResourceId id; - uint64_t offs = 0; - WrappedID3D12Resource::GetResIDFromAddr(*srcAddr, id, offs); - RDCASSERT(*srcAddr == 0 || id != ResourceId()); - - const uint32_t rootIdx = arg.Constant.RootParameterIndex; - - SignatureElementType elemType = eRootUnknown; - - if(gfx) - { - if(arg.Type == D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT_BUFFER_VIEW) - { - elemType = eRootCBV; - - if(executing) - { - if(id != ResourceId()) - list->SetGraphicsRootConstantBufferView(rootIdx, *srcAddr); - else - list->SetGraphicsRootConstantBufferView(rootIdx, 0); - } - } - else if(arg.Type == D3D12_INDIRECT_ARGUMENT_TYPE_SHADER_RESOURCE_VIEW) - { - elemType = eRootSRV; - - if(executing) - { - if(id != ResourceId()) - list->SetGraphicsRootShaderResourceView(rootIdx, *srcAddr); - else - list->SetGraphicsRootShaderResourceView(rootIdx, 0); - } - } - else if(arg.Type == D3D12_INDIRECT_ARGUMENT_TYPE_UNORDERED_ACCESS_VIEW) - { - elemType = eRootUAV; - - if(executing) - { - if(id != ResourceId()) - list->SetGraphicsRootUnorderedAccessView(rootIdx, *srcAddr); - else - list->SetGraphicsRootUnorderedAccessView(rootIdx, 0); - } - } - else - { - RDCERR("Unexpected argument type! %d", arg.Type); - } - } - else - { - if(arg.Type == D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT_BUFFER_VIEW) - { - elemType = eRootCBV; - - if(executing && id != ResourceId()) - list->SetComputeRootConstantBufferView(rootIdx, *srcAddr); - } - else if(arg.Type == D3D12_INDIRECT_ARGUMENT_TYPE_SHADER_RESOURCE_VIEW) - { - elemType = eRootSRV; - - if(executing && id != ResourceId()) - list->SetComputeRootShaderResourceView(rootIdx, *srcAddr); - } - else if(arg.Type == D3D12_INDIRECT_ARGUMENT_TYPE_UNORDERED_ACCESS_VIEW) - { - elemType = eRootUAV; - - if(executing && id != ResourceId()) - list->SetComputeRootUnorderedAccessView(rootIdx, *srcAddr); - } - else - { - RDCERR("Unexpected argument type! %d", arg.Type); - } - } - - if(executing) - { - if(sigelems.size() < rootIdx + 1) - sigelems.resize(rootIdx + 1); - - sigelems[rootIdx] = D3D12RenderState::SignatureElement(elemType, id, offs); - } - - break; - } - default: RDCERR("Unexpected argument type! %d", arg.Type); break; - } - } - } - - // skip past all the events - cmdInfo.curEventID += origCount * sigSize; - - // skip past the pop event - if(multiaction) - cmdInfo.curEventID++; } template @@ -4410,175 +3937,259 @@ bool WrappedID3D12GraphicsCommandList::Serialise_ExecuteIndirect( { m_Cmd->m_LastCmdListID = GetResourceManager()->GetOriginalID(GetResID(pCommandList)); + BakedCmdListInfo &cmdInfo = m_Cmd->m_BakedCmdListInfo[m_Cmd->m_LastCmdListID]; + if(IsActiveReplaying(m_State)) { + WrappedID3D12CommandSignature *comSig = (WrappedID3D12CommandSignature *)pCommandSignature; + + uint32_t count = MaxCommandCount; + if(m_Cmd->InRerecordRange(m_Cmd->m_LastCmdListID)) { ID3D12GraphicsCommandListX *list = m_Cmd->RerecordCmdList(m_Cmd->m_LastCmdListID); - uint32_t eventId = m_Cmd->HandlePreCallback(list, ActionFlags::MultiAction); - ReplayExecuteIndirect(Unwrap(list)); - if(eventId && m_Cmd->m_ActionCallback->PostMisc(eventId, ActionFlags::MultiAction, list)) + uint32_t curEID = m_Cmd->m_RootEventID; + + if(m_Cmd->m_FirstEventID <= 1) { - ReplayExecuteIndirect(Unwrap(list)); - m_Cmd->m_ActionCallback->PostRemisc(eventId, ActionFlags::MultiAction, list); + curEID = cmdInfo.curEventID; + + if(m_Cmd->m_Partial[D3D12CommandData::Primary].partialParent == m_Cmd->m_LastCmdListID) + curEID += m_Cmd->m_Partial[D3D12CommandData::Primary].baseEvent; + else if(m_Cmd->m_Partial[D3D12CommandData::Secondary].partialParent == + m_Cmd->m_LastCmdListID) + curEID += m_Cmd->m_Partial[D3D12CommandData::Secondary].baseEvent; + } + + D3D12CommandData::ActionUse use(m_Cmd->m_CurChunkOffset, 0); + auto it = std::lower_bound(m_Cmd->m_ActionUses.begin(), m_Cmd->m_ActionUses.end(), use); + + uint32_t baseEventID = it->eventId; + + if(count > 1) + { + // get the number of draws by looking at how many children the parent action has. + const rdcarray &children = m_pDevice->GetAction(it->eventId)->children; + count = (uint32_t)children.size(); + + // don't count the popmarker child + if(!children.empty() && children.back().flags & ActionFlags::PopMarker) + count--; + } + + uint32_t argumentsReplayed = + RDCMIN(m_Cmd->m_LastEventID - baseEventID, count * comSig->sig.arguments.count()); + uint32_t executesReplayed = argumentsReplayed / comSig->sig.arguments.count(); + + BarrierSet barriers; + + barriers.Configure(pArgumentBuffer, cmdInfo.GetState(m_pDevice, GetResID(pArgumentBuffer)), + BarrierSet::CopySourceAccess); + if(pCountBuffer) + barriers.Configure(pCountBuffer, cmdInfo.GetState(m_pDevice, GetResID(pCountBuffer)), + BarrierSet::CopySourceAccess); + + barriers.Apply(list); + + // the spec says that any root arguments of VB/IBs set are reset to 0. We reset the ones + // replayed here (accounting for selecting within the first few events), then record the + // arguments so that if the last event ends mid-way through this execute we can later + // set the state with the correct arguments + ResetAndRecordExecuteIndirectStates(list, baseEventID, count, pCommandSignature, + pArgumentBuffer, ArgumentBufferOffset, argumentsReplayed); + + barriers.Unapply(list); + + // when we have a callback, submit every action individually to the callback + if(m_Cmd->m_ActionCallback) + { + D3D12MarkerRegion::Begin( + list, StringFormat::Fmt("ExecuteIndirect callback replay (drawCount=%u)", count)); + + rdcpair patched = + m_pDevice->GetDebugManager()->PatchExecuteIndirect( + list, m_Cmd->m_BakedCmdListInfo[m_Cmd->m_LastCmdListID].state, pCommandSignature, + pArgumentBuffer, ArgumentBufferOffset, + (pCountBuffer ? pCountBuffer->GetGPUVirtualAddress() : 0) + CountBufferOffset, + MaxCommandCount); + for(uint32_t i = 0; i < count; i++) + { + uint32_t eventId = m_Cmd->HandlePreCallback(list, ActionFlags::Drawcall, + (i + 1) * comSig->sig.arguments.count()); + + // action up to and including i. The previous draws will be nop'd out + Unwrap(list)->ExecuteIndirect(Unwrap(pCommandSignature), 1, Unwrap(patched.first), + patched.second, NULL, 0); + + if(eventId && m_Cmd->m_ActionCallback->PostDraw(eventId, list)) + { + Unwrap(list)->ExecuteIndirect(Unwrap(pCommandSignature), 1, Unwrap(patched.first), + patched.second, NULL, 0); + m_Cmd->m_ActionCallback->PostRedraw(eventId, list); + } + + patched.second += comSig->sig.ByteStride; + } + + D3D12MarkerRegion::End(list); + } + else if(m_Cmd->m_LastEventID > baseEventID) + { + rdcpair patched = + m_pDevice->GetDebugManager()->PatchExecuteIndirect( + list, m_Cmd->m_BakedCmdListInfo[m_Cmd->m_LastCmdListID].state, pCommandSignature, + pArgumentBuffer, ArgumentBufferOffset, + (pCountBuffer ? pCountBuffer->GetGPUVirtualAddress() : 0) + CountBufferOffset, + MaxCommandCount); + + if(m_Cmd->m_FirstEventID <= 1) + { + // if we're replaying part-way into a multidraw we just clamp the count + // ExecuteIndirect requires that there is precisely one dispatch/draw, and it comes + // last. So after accounting for state setting above in + // ResetAndRecordExecuteIndirectStates we can 'round down' to the nearest whole number + // of executes, as if we select e.g. partway but not to the end of the second execute + // there's no need to replay anything more than the first execute. + count = RDCMIN(count, executesReplayed); + } + else + { + const uint32_t argidx = (curEID - baseEventID - 1); + const uint32_t execidx = argidx / comSig->sig.arguments.count(); + + // don't do anything when selecting the final popmarker as well - everything will have + // been done in previous replays so this is a no-op. + if(argidx >= count * comSig->sig.arguments.count()) + { + count = 0; + } + // we also know that only the last argument actually does anything - previous are just + // state setting. So if argIdx isn't the last one, we can skip this + else if((argidx + 1) % comSig->sig.arguments.count() != 0) + { + count = 0; + } + else + { + // slightly more complex, we're replaying only one execute later on as a single draw + // fortunately ExecuteIndirect has no 'draw' builtin, so we can just offset the + // argument buffer and set count to 1 + count = 1; + patched.second += comSig->sig.ByteStride * execidx; + } + } + + if(count > 0) + Unwrap(list)->ExecuteIndirect(Unwrap(pCommandSignature), count, Unwrap(patched.first), + patched.second, NULL, 0); } } + + // executes skip the event ID past the whole thing + uint32_t numEvents = count * (uint32_t)comSig->sig.arguments.size() + 1; + if(m_Cmd->m_FirstEventID > 1) + m_Cmd->m_RootEventID += numEvents; + else + m_Cmd->m_BakedCmdListInfo[m_Cmd->m_LastCmdListID].curEventID += numEvents; } else { WrappedID3D12CommandSignature *comSig = (WrappedID3D12CommandSignature *)pCommandSignature; - m_Cmd->AddEvent(); + BarrierSet barriers; - ActionDescription action; - action.customName = "ExecuteIndirect"; - - action.flags |= ActionFlags::MultiAction; - - if(MaxCommandCount > 1 || comSig->sig.numActions > 1) - action.flags |= ActionFlags::PushMarker; - - m_Cmd->AddAction(action); - - D3D12ActionTreeNode &actionNode = m_Cmd->GetActionStack().back()->children.back(); - - actionNode.resourceUsage.push_back(make_rdcpair( - GetResID(pArgumentBuffer), EventUsage(actionNode.action.eventId, ResourceUsage::Indirect))); + barriers.Configure(pArgumentBuffer, cmdInfo.GetState(m_pDevice, GetResID(pArgumentBuffer)), + BarrierSet::CopySourceAccess); if(pCountBuffer) - actionNode.resourceUsage.push_back(make_rdcpair( - GetResID(pCountBuffer), EventUsage(actionNode.action.eventId, ResourceUsage::Indirect))); + barriers.Configure(pCountBuffer, cmdInfo.GetState(m_pDevice, GetResID(pCountBuffer)), + BarrierSet::CopySourceAccess); - ID3D12GraphicsCommandList *cracked = GetCrackedList(); + ID3D12GraphicsCommandListX *list = ((ID3D12GraphicsCommandListX *)pCommandList); - BakedCmdListInfo::ExecuteData exec; - exec.baseEvent = m_Cmd->m_BakedCmdListInfo[m_Cmd->m_LastCmdListID].curEventID; - exec.sig = comSig; - exec.maxCount = MaxCommandCount; - exec.countBuf = pCountBuffer; - exec.countOffs = CountBufferOffset; + barriers.Apply(list); - // allocate space for patched indirect buffer - m_Cmd->GetIndirectBuffer(comSig->sig.ByteStride * MaxCommandCount, &exec.argBuf, &exec.argOffs); + SaveExecuteIndirectParameters(list, pCommandSignature, MaxCommandCount, pArgumentBuffer, + ArgumentBufferOffset, pCountBuffer, CountBufferOffset); - // transition buffer to COPY_SOURCE/COPY_DEST, copy, and back to INDIRECT_ARG - D3D12_RESOURCE_BARRIER barriers[2] = {}; - barriers[0].Transition.pResource = Unwrap(exec.argBuf); - barriers[0].Transition.StateBefore = D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT; - barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST; - barriers[1].Transition.pResource = Unwrap(pArgumentBuffer); - barriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT; - barriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; + barriers.Unapply(list); - UINT barrierCount = 2; + rdcpair patched = m_pDevice->GetDebugManager()->PatchExecuteIndirect( + list, cmdInfo.state, pCommandSignature, pArgumentBuffer, ArgumentBufferOffset, + (pCountBuffer ? pCountBuffer->GetGPUVirtualAddress() : 0) + CountBufferOffset, + MaxCommandCount); - D3D12ResourceLayout layout = m_pDevice->GetSubresourceStates(GetResID(pArgumentBuffer))[0]; - // with new barriers (layouts) we don't need a layout change but we do need a new-style - // barrier - if(layout.IsLayout()) + Unwrap(list)->ExecuteIndirect(comSig->GetReal(), MaxCommandCount, Unwrap(patched.first), + patched.second, Unwrap(pCountBuffer), CountBufferOffset); + + const uint32_t sigSize = (uint32_t)comSig->sig.arguments.size(); + + // add base PushMarker. We always push for even single-event indirects, for consistency { - barrierCount = 1; + m_Cmd->AddEvent(); - ID3D12GraphicsCommandList7 *list7 = GetCrackedList7(); + ActionDescription action; + action.customName = "ExecuteIndirect"; - if(list7) - { - D3D12_BUFFER_BARRIER buf; - buf.pResource = Unwrap(pArgumentBuffer); - buf.Offset = 0; - buf.Size = UINT64_MAX; - buf.SyncBefore = D3D12_BARRIER_SYNC_ALL; - buf.SyncAfter = D3D12_BARRIER_SYNC_COPY; - buf.AccessBefore = D3D12_BARRIER_ACCESS_COMMON; - buf.AccessAfter = D3D12_BARRIER_ACCESS_COPY_SOURCE; + action.flags |= ActionFlags::MultiAction | ActionFlags::PushMarker; - D3D12_BARRIER_GROUP group; - group.NumBarriers = 1; - group.Type = D3D12_BARRIER_TYPE_BUFFER; - group.pBufferBarriers = &buf; - list7->Barrier(1, &group); - } - else - { - RDCERR("Encountered new layout at ExecuteIndirect time but couldn't get cracked list 7"); - } - } - else if(layout.ToStates() & D3D12_RESOURCE_STATE_COPY_SOURCE) - { - barrierCount = 1; + m_Cmd->AddAction(action); + cmdInfo.curEventID++; + + D3D12ActionTreeNode &actionNode = m_Cmd->GetActionStack().back()->children.back(); + + actionNode.resourceUsage.push_back( + make_rdcpair(GetResID(pArgumentBuffer), + EventUsage(actionNode.action.eventId, ResourceUsage::Indirect))); + if(pCountBuffer) + actionNode.resourceUsage.push_back(make_rdcpair( + GetResID(pCountBuffer), EventUsage(actionNode.action.eventId, ResourceUsage::Indirect))); } - cracked->ResourceBarrier(barrierCount, barriers); - - cracked->CopyBufferRegion(Unwrap(exec.argBuf), exec.argOffs, Unwrap(pArgumentBuffer), - ArgumentBufferOffset, comSig->sig.ByteStride * MaxCommandCount); - - std::swap(barriers[0].Transition.StateBefore, barriers[0].Transition.StateAfter); - std::swap(barriers[1].Transition.StateBefore, barriers[1].Transition.StateAfter); - cracked->ResourceBarrier(barrierCount, barriers); - - if(layout.IsLayout()) + for(uint32_t i = 0; i < MaxCommandCount; i++) { - ID3D12GraphicsCommandList7 *list7 = GetCrackedList7(); - - if(list7) + for(uint32_t a = 0; a < sigSize; a++) { - D3D12_BUFFER_BARRIER buf; - buf.pResource = Unwrap(pArgumentBuffer); - buf.Offset = 0; - buf.Size = UINT64_MAX; - buf.SyncBefore = D3D12_BARRIER_SYNC_COPY; - buf.SyncAfter = D3D12_BARRIER_SYNC_ALL; - buf.AccessBefore = D3D12_BARRIER_ACCESS_COPY_SOURCE; - buf.AccessAfter = D3D12_BARRIER_ACCESS_COMMON; + const D3D12_INDIRECT_ARGUMENT_DESC &arg = comSig->sig.arguments[a]; - D3D12_BARRIER_GROUP group; - group.NumBarriers = 1; - group.Type = D3D12_BARRIER_TYPE_BUFFER; - group.pBufferBarriers = &buf; - list7->Barrier(1, &group); + switch(arg.Type) + { + case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH: + case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED: + case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW: + { + // add dummy event and action + m_Cmd->AddEvent(); + ActionDescription action; + action.customName = "ExecuteIndirect"; + m_Cmd->AddAction(action); + m_Cmd->GetActionStack().back()->children.back().state = + new D3D12RenderState(cmdInfo.state); + cmdInfo.curEventID++; + break; + } + case D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW: + case D3D12_INDIRECT_ARGUMENT_TYPE_INDEX_BUFFER_VIEW: + case D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT: + case D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT_BUFFER_VIEW: + case D3D12_INDIRECT_ARGUMENT_TYPE_SHADER_RESOURCE_VIEW: + case D3D12_INDIRECT_ARGUMENT_TYPE_UNORDERED_ACCESS_VIEW: + // add dummy event + m_Cmd->AddEvent(); + cmdInfo.curEventID++; + break; + default: RDCERR("Unexpected argument type! %d", arg.Type); break; + } } } - cracked->Close(); - - // open new cracked list and re-apply the current state { - D3D12_COMMAND_LIST_TYPE type = m_Cmd->m_BakedCmdListInfo[m_Cmd->m_LastCmdListID].type; - UINT nodeMask = m_Cmd->m_BakedCmdListInfo[m_Cmd->m_LastCmdListID].nodeMask; - - nodeMask = 0; - - ResourceId allocid = m_Cmd->m_BakedCmdListInfo[m_Cmd->m_LastCmdListID].allocator; - ID3D12CommandAllocator *allocator = m_Cmd->m_CrackedAllocators[allocid]; - - ID3D12GraphicsCommandList *listptr = NULL; - m_pDevice->CreateCommandList(nodeMask, type, allocator, NULL, - __uuidof(ID3D12GraphicsCommandList), (void **)&listptr); - - // this is a safe upcast because it's a wrapped object - ID3D12GraphicsCommandListX *list = (ID3D12GraphicsCommandListX *)listptr; - - m_Cmd->m_BakedCmdListInfo[m_Cmd->m_LastCmdListID].crackedLists.push_back(list); - - m_Cmd->m_BakedCmdListInfo[m_Cmd->m_LastCmdListID].state.ApplyState(m_pDevice, list); + m_Cmd->AddEvent(); + ActionDescription action; + action.customName = "ExecuteIndirect()"; + action.flags = ActionFlags::PopMarker; + m_Cmd->AddAction(action); } - - // perform indirect action, but from patched buffer. It will be patched between the above list - // and this list during the first execution of the command list - Unwrap(pCommandList) - ->ExecuteIndirect(comSig->GetReal(), MaxCommandCount, Unwrap(exec.argBuf), exec.argOffs, - Unwrap(pCountBuffer), CountBufferOffset); - GetCrackedList()->ExecuteIndirect(comSig->GetReal(), MaxCommandCount, Unwrap(exec.argBuf), - exec.argOffs, Unwrap(pCountBuffer), CountBufferOffset); - - m_Cmd->m_BakedCmdListInfo[m_Cmd->m_LastCmdListID].executeEvents.push_back(exec); - - m_Cmd->m_BakedCmdListInfo[m_Cmd->m_LastCmdListID].curEventID++; - - // reserve the right number of actions and events, to later be patched up with the actual - // details - ReserveExecuteIndirect(pCommandList, comSig, MaxCommandCount); } } @@ -4674,8 +4285,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_ClearDepthStencilView( Unwrap(pCommandList) ->ClearDepthStencilView(Unwrap(DepthStencilView), ClearFlags, Depth, Stencil, NumRects, pRects); - GetCrackedList()->ClearDepthStencilView(Unwrap(DepthStencilView), ClearFlags, Depth, Stencil, - NumRects, pRects); { m_Cmd->AddEvent(); @@ -4777,7 +4386,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_ClearRenderTargetView( else { Unwrap(pCommandList)->ClearRenderTargetView(Unwrap(RenderTargetView), ColorRGBA, NumRects, pRects); - GetCrackedList()->ClearRenderTargetView(Unwrap(RenderTargetView), ColorRGBA, NumRects, pRects); { m_Cmd->AddEvent(); @@ -4884,9 +4492,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_ClearUnorderedAccessViewUint( Unwrap(pCommandList) ->ClearUnorderedAccessViewUint(Unwrap(ViewGPUHandleInCurrentHeap), Unwrap(ViewCPUHandle), Unwrap(pResource), Values, NumRects, pRects); - GetCrackedList()->ClearUnorderedAccessViewUint(Unwrap(ViewGPUHandleInCurrentHeap), - Unwrap(ViewCPUHandle), Unwrap(pResource), - Values, NumRects, pRects); { m_Cmd->AddEvent(); @@ -4998,9 +4603,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_ClearUnorderedAccessViewFloat( Unwrap(pCommandList) ->ClearUnorderedAccessViewFloat(Unwrap(ViewGPUHandleInCurrentHeap), Unwrap(ViewCPUHandle), Unwrap(pResource), Values, NumRects, pRects); - GetCrackedList()->ClearUnorderedAccessViewFloat(Unwrap(ViewGPUHandleInCurrentHeap), - Unwrap(ViewCPUHandle), Unwrap(pResource), - Values, NumRects, pRects); { m_Cmd->AddEvent(); @@ -5090,7 +4692,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_DiscardResource(SerialiserType else { Unwrap(pCommandList)->DiscardResource(Unwrap(pResource), pRegion); - GetCrackedList()->DiscardResource(Unwrap(pResource), pRegion); { m_Cmd->AddEvent(); @@ -5175,8 +4776,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_CopyBufferRegion(SerialiserType { Unwrap(pCommandList) ->CopyBufferRegion(Unwrap(pDstBuffer), DstOffset, Unwrap(pSrcBuffer), SrcOffset, NumBytes); - GetCrackedList()->CopyBufferRegion(Unwrap(pDstBuffer), DstOffset, Unwrap(pSrcBuffer), - SrcOffset, NumBytes); { m_Cmd->AddEvent(); @@ -5275,7 +4874,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_CopyTextureRegion( else { Unwrap(pCommandList)->CopyTextureRegion(&unwrappedDst, DstX, DstY, DstZ, &unwrappedSrc, pSrcBox); - GetCrackedList()->CopyTextureRegion(&unwrappedDst, DstX, DstY, DstZ, &unwrappedSrc, pSrcBox); { m_Cmd->AddEvent(); @@ -5390,7 +4988,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_CopyResource(SerialiserType &se else { Unwrap(pCommandList)->CopyResource(Unwrap(pDstResource), Unwrap(pSrcResource)); - GetCrackedList()->CopyResource(Unwrap(pDstResource), Unwrap(pSrcResource)); { m_Cmd->AddEvent(); @@ -5477,8 +5074,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_ResolveSubresource( Unwrap(pCommandList) ->ResolveSubresource(Unwrap(pDstResource), DstSubresource, Unwrap(pSrcResource), SrcSubresource, Format); - GetCrackedList()->ResolveSubresource(Unwrap(pDstResource), DstSubresource, - Unwrap(pSrcResource), SrcSubresource, Format); { m_Cmd->AddEvent(); @@ -5587,8 +5182,6 @@ bool WrappedID3D12GraphicsCommandList::Serialise_CopyTiles( Unwrap(pCommandList) ->CopyTiles(Unwrap(pTiledResource), &TileRegionStartCoordinate, &TileRegionSize, Unwrap(pBuffer), BufferStartOffsetInBytes, Flags); - GetCrackedList()->CopyTiles(Unwrap(pTiledResource), &TileRegionStartCoordinate, - &TileRegionSize, Unwrap(pBuffer), BufferStartOffsetInBytes, Flags); { m_Cmd->AddEvent(); diff --git a/renderdoc/driver/d3d12/d3d12_command_queue_wrap.cpp b/renderdoc/driver/d3d12/d3d12_command_queue_wrap.cpp index 8d55053c6..16eba0ec3 100644 --- a/renderdoc/driver/d3d12/d3d12_command_queue_wrap.cpp +++ b/renderdoc/driver/d3d12/d3d12_command_queue_wrap.cpp @@ -487,46 +487,24 @@ bool WrappedID3D12CommandQueue::Serialise_ExecuteCommandLists(SerialiserType &se { ResourceId cmd = GetResourceManager()->GetOriginalID(GetResID(ppCommandLists[i])); - if(m_Cmd.m_BakedCmdListInfo[cmd].executeEvents.empty() || - m_Cmd.m_BakedCmdListInfo[cmd].executeEvents[0].patched) + ID3D12CommandList *list = Unwrap(ppCommandLists[i]); + real->ExecuteCommandLists(1, &list); + if(D3D12_Debug_SingleSubmitFlushing()) + m_pDevice->GPUSync(); + + BakedCmdListInfo &info = m_Cmd.m_BakedCmdListInfo[cmd]; + + if(!info.executeEvents.empty()) { - ID3D12CommandList *list = Unwrap(ppCommandLists[i]); - real->ExecuteCommandLists(1, &list); - if(D3D12_Debug_SingleSubmitFlushing()) - m_pDevice->GPUSync(); - } - else - { - BakedCmdListInfo &info = m_Cmd.m_BakedCmdListInfo[cmd]; + // ensure all GPU work has finished for readback of arguments + m_pDevice->GPUSync(); - // execute the first half of the cracked list - ID3D12CommandList *list = Unwrap(info.crackedLists[0]); - real->ExecuteCommandLists(1, &list); + if(m_pDevice->HasFatalError()) + return false; - for(size_t c = 1; c < info.crackedLists.size(); c++) - { - // ensure all GPU work has finished - m_pDevice->GPUSync(); - - if(m_pDevice->HasFatalError()) - return false; - - // readback the patch buffer and perform patching - m_ReplayList->PatchExecuteIndirect(info, uint32_t(c - 1)); - - if(m_pDevice->HasFatalError()) - return false; - - // execute next list with this indirect. - list = Unwrap(info.crackedLists[c]); - real->ExecuteCommandLists(1, &list); - - if(m_pDevice->HasFatalError()) - return false; - } - - if(D3D12_Debug_SingleSubmitFlushing()) - m_pDevice->GPUSync(); + // readback the patch buffer and update recorded events + for(size_t c = 0; c < info.executeEvents.size(); c++) + m_ReplayList->FinaliseExecuteIndirectEvents(info, info.executeEvents[c]); } } diff --git a/renderdoc/driver/d3d12/d3d12_commands.cpp b/renderdoc/driver/d3d12/d3d12_commands.cpp index 1fe50cdf0..c50056cc0 100644 --- a/renderdoc/driver/d3d12/d3d12_commands.cpp +++ b/renderdoc/driver/d3d12/d3d12_commands.cpp @@ -535,9 +535,6 @@ WrappedID3D12CommandQueue::~WrappedID3D12CommandQueue() SAFE_RELEASE(m_pDownlevel); - for(size_t i = 0; i < m_Cmd.m_IndirectBuffers.size(); i++) - SAFE_RELEASE(m_Cmd.m_IndirectBuffers[i]); - SAFE_RELEASE(m_WrappedCompat.m_pReal); SAFE_RELEASE(m_WrappedDebug.m_pReal); SAFE_RELEASE(m_WrappedDebug.m_pReal1); @@ -1115,6 +1112,13 @@ RDResult WrappedID3D12CommandQueue::ReplayLog(CaptureState readType, uint32_t st m_Cmd.m_LastEventID = ~0U; } + if(IsReplayMode(m_State)) + { + for(size_t i = 0; i < m_Cmd.m_IndirectBuffers.size(); i++) + SAFE_RELEASE(m_Cmd.m_IndirectBuffers[i]); + m_Cmd.m_IndirectBuffers.clear(); + } + uint64_t startOffset = ser.GetReader()->GetOffset(); for(;;) @@ -1631,12 +1635,7 @@ void BakedCmdListInfo::ShiftForRemoved(uint32_t shiftActionID, uint32_t shiftEID for(size_t i = 0; i < executeEvents.size(); i++) { if(executeEvents[i].baseEvent >= lastEID) - { executeEvents[i].baseEvent -= shiftEID; - - if(executeEvents[i].lastEvent > 0) - executeEvents[i].lastEvent -= shiftEID; - } } } @@ -1646,6 +1645,17 @@ void BakedCmdListInfo::ShiftForRemoved(uint32_t shiftActionID, uint32_t shiftEID } } +SubresourceStateVector BakedCmdListInfo::GetState(WrappedID3D12Device *device, ResourceId id) +{ + std::map data; + + data[id] = device->GetSubresourceStates(id); + + device->GetResourceManager()->ApplyBarriers(barriers, data); + + return data[id]; +} + D3D12CommandData::D3D12CommandData() { m_CurChunkOffset = 0; @@ -1686,22 +1696,20 @@ void D3D12CommandData::GetIndirectBuffer(size_t size, ID3D12Resource **buf, uint indirectDesc.SampleDesc.Quality = 0; indirectDesc.Width = RDCMAX(AlignUp((uint64_t)size, 64ULL), m_IndirectSize); - // create a custom heap that sits in CPU memory and is mappable, but we can - // use for indirect args (unlike upload and readback). D3D12_HEAP_PROPERTIES heapProps; - heapProps.Type = D3D12_HEAP_TYPE_CUSTOM; - heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE; - heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_L0; + heapProps.Type = D3D12_HEAP_TYPE_READBACK; + heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; heapProps.CreationNodeMask = 1; heapProps.VisibleNodeMask = 1; ID3D12Resource *argbuf = NULL; HRESULT hr = m_pDevice->CreateCommittedResource(&heapProps, D3D12_HEAP_FLAG_NONE, &indirectDesc, - D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, NULL, + D3D12_RESOURCE_STATE_COPY_DEST, NULL, __uuidof(ID3D12Resource), (void **)&argbuf); - SetObjName(argbuf, StringFormat::Fmt("Indirect Arg Buf (%llu bytes)", (uint64_t)size)); + SetObjName(argbuf, StringFormat::Fmt("Indirect Readback Buf (%llu bytes)", (uint64_t)size)); if(FAILED(hr)) RDCERR("Failed to create indirect buffer, HRESULT: %s", ToStr(hr).c_str()); diff --git a/renderdoc/driver/d3d12/d3d12_commands.h b/renderdoc/driver/d3d12/d3d12_commands.h index 581184dd2..d7360cdf8 100644 --- a/renderdoc/driver/d3d12/d3d12_commands.h +++ b/renderdoc/driver/d3d12/d3d12_commands.h @@ -178,21 +178,19 @@ struct BakedCmdListInfo ~BakedCmdListInfo() { SAFE_DELETE(action); } void ShiftForRemoved(uint32_t shiftActionID, uint32_t shiftEID, size_t idx); + SubresourceStateVector GetState(WrappedID3D12Device *device, ResourceId id); + struct ExecuteData { uint32_t baseEvent = 0; - uint32_t lastEvent = 0; - bool patched = false; ID3D12Resource *argBuf = NULL; ID3D12Resource *countBuf = NULL; uint64_t argOffs = 0; uint64_t countOffs = 0; WrappedID3D12CommandSignature *sig = NULL; UINT maxCount = 0; - UINT realCount = 0; }; - rdcarray crackedLists; rdcarray executeEvents; rdcarray curEvents; @@ -237,8 +235,6 @@ struct D3D12CommandData RDResult m_FailedReplayResult = ResultCode::APIReplayFailed; - std::map m_CrackedAllocators; - rdcarray m_IndirectBuffers; static const uint64_t m_IndirectSize = 4 * 1024 * 1024; uint64_t m_IndirectOffset; diff --git a/renderdoc/driver/d3d12/d3d12_common.h b/renderdoc/driver/d3d12/d3d12_common.h index 5c196e135..d33c5c3d9 100644 --- a/renderdoc/driver/d3d12/d3d12_common.h +++ b/renderdoc/driver/d3d12/d3d12_common.h @@ -457,8 +457,8 @@ DECLARE_REFLECTION_STRUCT(D3D12RootSignature); struct D3D12CommandSignature { bool graphics = true; - UINT numActions = 0; UINT ByteStride = 0; + UINT PackedByteSize = 0; rdcarray arguments; }; diff --git a/renderdoc/driver/d3d12/d3d12_debug.cpp b/renderdoc/driver/d3d12/d3d12_debug.cpp index a4fd6b0d1..cc75696ea 100644 --- a/renderdoc/driver/d3d12/d3d12_debug.cpp +++ b/renderdoc/driver/d3d12/d3d12_debug.cpp @@ -53,6 +53,32 @@ inline static D3D12_ROOT_PARAMETER1 cbvParam(D3D12_SHADER_VISIBILITY vis, UINT s return ret; } +inline static D3D12_ROOT_PARAMETER1 srvParam(D3D12_SHADER_VISIBILITY vis, UINT space, UINT reg) +{ + D3D12_ROOT_PARAMETER1 ret; + + ret.ShaderVisibility = vis; + ret.ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV; + ret.Descriptor.RegisterSpace = space; + ret.Descriptor.ShaderRegister = reg; + ret.Descriptor.Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE; + + return ret; +} + +inline static D3D12_ROOT_PARAMETER1 uavParam(D3D12_SHADER_VISIBILITY vis, UINT space, UINT reg) +{ + D3D12_ROOT_PARAMETER1 ret; + + ret.ShaderVisibility = vis; + ret.ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV; + ret.Descriptor.RegisterSpace = space; + ret.Descriptor.ShaderRegister = reg; + ret.Descriptor.Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE; + + return ret; +} + inline static D3D12_ROOT_PARAMETER1 constParam(D3D12_SHADER_VISIBILITY vis, UINT space, UINT reg, UINT num) { @@ -528,11 +554,11 @@ bool D3D12DebugManager::CreateShaderDebugResources() range.OffsetInDescriptorsFromTableStart = 0; range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; - D3D12RootSignatureParameter srvParam; - srvParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; - srvParam.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - srvParam.ranges.push_back(range); - rootSig.Parameters.push_back(srvParam); + D3D12RootSignatureParameter srv; + srv.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + srv.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + srv.ranges.push_back(range); + rootSig.Parameters.push_back(srv); range.NumDescriptors = 2; range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; @@ -861,6 +887,132 @@ void D3D12DebugManager::ResetDebugAlloc() m_DebugAlloc->Reset(); } +rdcpair D3D12DebugManager::PatchExecuteIndirect( + ID3D12GraphicsCommandListX *cmd, const D3D12RenderState &state, ID3D12CommandSignature *comSig, + ID3D12Resource *argBuf, UINT64 argBufOffset, D3D12_GPU_VIRTUAL_ADDRESS countBufAddr, + UINT maxCount) +{ + rdcarray argOffsets; + + WrappedID3D12CommandSignature *wrappedComSig = (WrappedID3D12CommandSignature *)comSig; + uint32_t offset = 0; + for(const D3D12_INDIRECT_ARGUMENT_DESC &arg : wrappedComSig->sig.arguments) + { + switch(arg.Type) + { + case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW: + { + offset += sizeof(D3D12_DRAW_ARGUMENTS); + break; + } + case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED: + { + offset += sizeof(D3D12_DRAW_INDEXED_ARGUMENTS); + break; + } + case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH: + { + offset += sizeof(D3D12_DISPATCH_ARGUMENTS); + break; + } + case D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT: + { + offset += sizeof(uint32_t) * arg.Constant.Num32BitValuesToSet; + break; + } + case D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW: + { + argOffsets.push_back(offset); + offset += sizeof(D3D12_VERTEX_BUFFER_VIEW); + + break; + } + case D3D12_INDIRECT_ARGUMENT_TYPE_INDEX_BUFFER_VIEW: + { + argOffsets.push_back(offset); + offset += sizeof(D3D12_INDEX_BUFFER_VIEW); + break; + } + case D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT_BUFFER_VIEW: + case D3D12_INDIRECT_ARGUMENT_TYPE_SHADER_RESOURCE_VIEW: + case D3D12_INDIRECT_ARGUMENT_TYPE_UNORDERED_ACCESS_VIEW: + { + argOffsets.push_back(offset); + offset += sizeof(D3D12_GPU_VIRTUAL_ADDRESS); + break; + } + default: RDCERR("Unexpected argument type! %d", arg.Type); break; + } + } + + // early out if the command signature doesn't reference anything with addresses + if(argOffsets.empty()) + return {argBuf, argBufOffset}; + + // only handle patching 128 address based arguments... + RDCASSERT(argOffsets.size() <= 128); + + D3D12MarkerRegion marker(cmd, "Patch execute indirect"); + + argOffsets.insert(0, (uint32_t)argOffsets.size()); + argOffsets.insert(1, m_EIPatchBufferCount); + argOffsets.insert(2, wrappedComSig->sig.ByteStride); + argOffsets.resize(128 + 3); + // argOffsets is now the executepatchdata cbuffer + + const UINT64 argDataSize = + wrappedComSig->sig.ByteStride * (maxCount - 1) + wrappedComSig->sig.PackedByteSize; + + if(m_EIPatchScratchOffset + argDataSize > m_EIPatchScratchBuffer->GetDesc().Width) + m_EIPatchScratchOffset = 0; + + RDCASSERT(m_EIPatchScratchOffset + argDataSize < m_EIPatchScratchBuffer->GetDesc().Width, + wrappedComSig->sig.ByteStride, wrappedComSig->sig.PackedByteSize, maxCount); + + rdcpair ret = {m_EIPatchScratchBuffer, m_EIPatchScratchOffset}; + + D3D12_RESOURCE_BARRIER b = {}; + b.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + b.Transition.pResource = m_EIPatchScratchBuffer; + b.Transition.StateBefore = D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT; + b.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST; + + cmd->ResourceBarrier(1, &b); + + cmd->CopyBufferRegion(m_EIPatchScratchBuffer, m_EIPatchScratchOffset, argBuf, argBufOffset, + argDataSize); + + b.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; + b.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + cmd->ResourceBarrier(1, &b); + + cmd->SetPipelineState(m_EIPatchPso); + cmd->SetComputeRootSignature(m_EIPatchRootSig); + cmd->SetComputeRootConstantBufferView(0, UploadConstants(argOffsets.data(), argOffsets.byteSize())); + if(countBufAddr == 0) + cmd->SetComputeRootConstantBufferView(1, UploadConstants(&maxCount, sizeof(uint32_t))); + else + cmd->SetComputeRootConstantBufferView(1, countBufAddr); + cmd->SetComputeRoot32BitConstant(2, maxCount, 0); + cmd->SetComputeRootShaderResourceView(3, m_EIPatchBufferData->GetGPUVirtualAddress()); + cmd->SetComputeRootUnorderedAccessView(4, ret.first->GetGPUVirtualAddress() + ret.second); + cmd->Dispatch(1, 1, 1); + + b.Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + b.Transition.StateAfter = D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT; + cmd->ResourceBarrier(1, &b); + + b.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; + b.UAV.pResource = m_EIPatchScratchBuffer; + cmd->ResourceBarrier(1, &b); + + state.ApplyState(m_pDevice, cmd); + + m_EIPatchScratchOffset += wrappedComSig->sig.ByteStride * maxCount; + + return ret; +} + void D3D12DebugManager::FillWithDiscardPattern(ID3D12GraphicsCommandListX *cmd, const D3D12RenderState &state, DiscardType type, ID3D12Resource *res, @@ -1523,6 +1675,131 @@ D3D12_CPU_DESCRIPTOR_HANDLE D3D12DebugManager::GetUAVClearHandle(CBVUAVSRVSlot s return ret; } +void D3D12DebugManager::PrepareExecuteIndirectPatching(const GPUAddressRangeTracker &origAddresses) +{ + D3D12ShaderCache *shaderCache = m_pDevice->GetShaderCache(); + + shaderCache->SetCaching(true); + + HRESULT hr = S_OK; + + { + ID3DBlob *root = shaderCache->MakeRootSig({ + cbvParam(D3D12_SHADER_VISIBILITY_ALL, 0, 0), + cbvParam(D3D12_SHADER_VISIBILITY_ALL, 0, 1), + constParam(D3D12_SHADER_VISIBILITY_ALL, 0, 2, 1), + srvParam(D3D12_SHADER_VISIBILITY_ALL, 0, 0), + uavParam(D3D12_SHADER_VISIBILITY_ALL, 0, 0), + }); + + RDCASSERT(root); + + hr = m_pDevice->CreateRootSignature(0, root->GetBufferPointer(), root->GetBufferSize(), + __uuidof(ID3D12RootSignature), (void **)&m_EIPatchRootSig); + + if(FAILED(hr)) + { + RDCERR("Couldn't create execute indirect patching RootSig! HRESULT: %s", ToStr(hr).c_str()); + } + + SAFE_RELEASE(root); + } + + { + rdcstr mischlsl = GetEmbeddedResource(misc_hlsl); + + ID3DBlob *eiPatchCS; + + shaderCache->GetShaderBlob(mischlsl.c_str(), "RENDERDOC_ExecuteIndirectPatchCS", + D3DCOMPILE_WARNINGS_ARE_ERRORS, {}, "cs_5_0", &eiPatchCS); + + RDCASSERT(eiPatchCS); + + D3D12_COMPUTE_PIPELINE_STATE_DESC compPipeDesc = {}; + compPipeDesc.pRootSignature = m_EIPatchRootSig; + compPipeDesc.CS.BytecodeLength = eiPatchCS->GetBufferSize(); + compPipeDesc.CS.pShaderBytecode = eiPatchCS->GetBufferPointer(); + + hr = m_pDevice->CreateComputePipelineState(&compPipeDesc, __uuidof(ID3D12PipelineState), + (void **)&m_EIPatchPso); + + if(FAILED(hr)) + { + RDCERR("Couldn't create m_MeshPickPipe! HRESULT: %s", ToStr(hr).c_str()); + } + + SAFE_RELEASE(eiPatchCS); + } + + shaderCache->SetCaching(false); + + struct buffermapping + { + uint64_t origBase; + uint64_t origEnd; + uint64_t newBase; + uint64_t pad; + }; + rdcarray buffers; + + for(const GPUAddressRange &addr : origAddresses.addresses) + { + buffermapping b = {}; + b.origBase = addr.start; + b.origEnd = addr.end; + b.newBase = + m_pDevice->GetResourceManager()->GetLiveAs(addr.id)->GetGPUVirtualAddress(); + buffers.push_back(b); + } + + m_EIPatchBufferCount = (uint32_t)buffers.size(); + + if(!buffers.empty()) + { + m_EIPatchBufferData = MakeCBuffer(buffers.byteSize()); + FillBuffer(m_EIPatchBufferData, 0, buffers.data(), buffers.byteSize()); + } + + // estimated sizing for scratch buffers: + // 65536 maxcount + // 128 bytes command signature + // = 8MB per EI + // 64MB = ring for 8 such executes (or many more smaller) + { + D3D12_RESOURCE_DESC desc; + desc.Alignment = 0; + desc.DepthOrArraySize = 1; + desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + desc.Format = DXGI_FORMAT_UNKNOWN; + desc.Height = 1; + desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + desc.MipLevels = 1; + desc.SampleDesc.Count = 1; + desc.SampleDesc.Quality = 0; + desc.Width = 64 * 1024 * 1024; + + D3D12_HEAP_PROPERTIES heapProps; + heapProps.Type = D3D12_HEAP_TYPE_DEFAULT; + heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + heapProps.CreationNodeMask = 1; + heapProps.VisibleNodeMask = 1; + + hr = m_pDevice->CreateCommittedResource( + &heapProps, D3D12_HEAP_FLAG_NONE, &desc, D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, NULL, + __uuidof(ID3D12Resource), (void **)&m_EIPatchScratchBuffer); + + m_EIPatchScratchBuffer->SetName(L"m_EIPatchScratchBuffer"); + + if(FAILED(hr)) + { + RDCERR("Failed to create scratch buffer, HRESULT: %s", ToStr(hr).c_str()); + return; + } + } +} + void D3D12DebugManager::GetBufferData(ID3D12Resource *buffer, uint64_t offset, uint64_t length, bytebuf &ret) { diff --git a/renderdoc/driver/d3d12/d3d12_debug.h b/renderdoc/driver/d3d12/d3d12_debug.h index 7668e1896..d40218a61 100644 --- a/renderdoc/driver/d3d12/d3d12_debug.h +++ b/renderdoc/driver/d3d12/d3d12_debug.h @@ -155,6 +155,13 @@ public: DiscardType type, ID3D12Resource *res, const D3D12_DISCARD_REGION *region, D3D12_BARRIER_LAYOUT LayoutAfter); + rdcpair PatchExecuteIndirect(ID3D12GraphicsCommandListX *cmd, + const D3D12RenderState &state, + ID3D12CommandSignature *comSig, + ID3D12Resource *argBuf, UINT64 argBufOffset, + D3D12_GPU_VIRTUAL_ADDRESS countBufAddr, + UINT maxCount); + D3D12_CPU_DESCRIPTOR_HANDLE GetCPUHandle(CBVUAVSRVSlot slot); D3D12_CPU_DESCRIPTOR_HANDLE GetCPUHandle(RTVSlot slot); D3D12_CPU_DESCRIPTOR_HANDLE GetCPUHandle(DSVSlot slot); @@ -174,6 +181,7 @@ public: void PrepareTextureSampling(ID3D12Resource *resource, CompType typeCast, int &resType, BarrierSet &barrierSet); + void PrepareExecuteIndirectPatching(const GPUAddressRangeTracker &origAddresses); MeshDisplayPipelines CacheMeshDisplayPipelines(const MeshFormat &primary, const MeshFormat &secondary); @@ -249,6 +257,14 @@ private: ID3D12Resource *m_DiscardConstantsUndefined = NULL; ID3D12RootSignature *m_DiscardRootSig = NULL; + // Execute Indirect patching + ID3D12RootSignature *m_EIPatchRootSig = NULL; + ID3D12Resource *m_EIPatchBufferData = NULL; + uint32_t m_EIPatchBufferCount = 0; + ID3D12PipelineState *m_EIPatchPso = NULL; + ID3D12Resource *m_EIPatchScratchBuffer = NULL; + uint64_t m_EIPatchScratchOffset = 0; + std::map, ID3D12PipelineState *> m_DiscardPipes; std::map, ID3D12Resource *> m_DiscardPatterns; rdcarray m_DiscardBuffers; diff --git a/renderdoc/driver/d3d12/d3d12_device.cpp b/renderdoc/driver/d3d12/d3d12_device.cpp index 61c264f99..7326007b6 100644 --- a/renderdoc/driver/d3d12/d3d12_device.cpp +++ b/renderdoc/driver/d3d12/d3d12_device.cpp @@ -36,6 +36,7 @@ #include "strings/string_utils.h" #include "d3d12_command_list.h" #include "d3d12_command_queue.h" +#include "d3d12_debug.h" #include "d3d12_rendertext.h" #include "d3d12_replay.h" #include "d3d12_resources.h" @@ -2452,6 +2453,8 @@ bool WrappedID3D12Device::Serialise_BeginCaptureFrame(SerialiserType &ser) if(IsReplayingAndReading() && IsLoading(m_State)) { m_InitialResourceStates = m_ResourceStates; + + GetDebugManager()->PrepareExecuteIndirectPatching(m_OrigGPUAddresses); } std::map initialStates; @@ -3321,7 +3324,7 @@ rdcarray WrappedID3D12Device::GetDebugMessages() msg.description = rdcstr(message->pDescription); // during capture add all messages. Otherwise only add this message if it's different to the - // last one - due to our replay with real and cracked lists we get many duplicated messages + // last one - we can sometimes get duplicated messages if(!IsLoading(m_State) || ret.empty() || !(ret.back() == msg)) ret.push_back(msg); @@ -4578,18 +4581,6 @@ RDResult WrappedID3D12Device::ReadLogInitialisation(RDCFile *rdc, bool storeStru m_Queue->GetParentAction().children.clear(); SetupActionPointers(m_Actions, GetReplay()->WriteFrameRecord().actionList); - - D3D12CommandData &cmd = *m_Queue->GetCommandData(); - - for(auto it = cmd.m_BakedCmdListInfo.begin(); it != cmd.m_BakedCmdListInfo.end(); it++) - { - for(size_t i = 0; i < it->second.crackedLists.size(); i++) - SAFE_RELEASE(it->second.crackedLists[i]); - it->second.crackedLists.clear(); - } - - for(auto it = cmd.m_CrackedAllocators.begin(); it != cmd.m_CrackedAllocators.end(); it++) - SAFE_RELEASE(it->second); } { @@ -4790,6 +4781,7 @@ void WrappedID3D12Device::ReplayLog(uint32_t startEventID, uint32_t endEventID, cmd.m_RenderState = cmd.m_BakedCmdListInfo[cmd.m_Partial[D3D12CommandData::Primary].partialParent].state; + cmd.m_RenderState.ResolvePendingIndirectState(this); if(D3D12_Debug_SingleSubmitFlushing()) { diff --git a/renderdoc/driver/d3d12/d3d12_device.h b/renderdoc/driver/d3d12/d3d12_device.h index 16150ce69..c8d8739b1 100644 --- a/renderdoc/driver/d3d12/d3d12_device.h +++ b/renderdoc/driver/d3d12/d3d12_device.h @@ -748,7 +748,7 @@ private: Threading::CriticalSection m_DynDescLock; rdcarray m_DynamicDescriptorRefs; - GPUAddressRangeTracker m_GPUAddresses; + GPUAddressRangeTracker m_OrigGPUAddresses; // used both on capture and replay side to track resource states. Only locked // in capture @@ -910,9 +910,9 @@ public: m_DynamicDescriptorRefs.swap(refs); } - void GetResIDFromAddr(D3D12_GPU_VIRTUAL_ADDRESS addr, ResourceId &id, UINT64 &offs) + void GetResIDFromOrigAddr(D3D12_GPU_VIRTUAL_ADDRESS addr, ResourceId &id, UINT64 &offs) { - m_GPUAddresses.GetResIDFromAddr(addr, id, offs); + m_OrigGPUAddresses.GetResIDFromAddr(addr, id, offs); } bool IsCubemap(ResourceId id) { return m_Cubemaps.find(id) != m_Cubemaps.end(); } diff --git a/renderdoc/driver/d3d12/d3d12_device_rescreate_wrap.cpp b/renderdoc/driver/d3d12/d3d12_device_rescreate_wrap.cpp index 9b197beca..7f6924ce1 100644 --- a/renderdoc/driver/d3d12/d3d12_device_rescreate_wrap.cpp +++ b/renderdoc/driver/d3d12/d3d12_device_rescreate_wrap.cpp @@ -78,7 +78,7 @@ bool WrappedID3D12Device::Serialise_CreateResource( range.end = gpuAddress + desc.Width; range.id = pResource; - m_GPUAddresses.AddTo(range); + m_OrigGPUAddresses.AddTo(range); } // check for device requirement diff --git a/renderdoc/driver/d3d12/d3d12_device_wrap.cpp b/renderdoc/driver/d3d12/d3d12_device_wrap.cpp index dea8bf3b1..9cd343192 100644 --- a/renderdoc/driver/d3d12/d3d12_device_wrap.cpp +++ b/renderdoc/driver/d3d12/d3d12_device_wrap.cpp @@ -1756,7 +1756,7 @@ bool WrappedID3D12Device::Serialise_CreateCommandSignature(SerialiserType &ser, wrapped->sig.arguments.assign(Descriptor.pArgumentDescs, Descriptor.NumArgumentDescs); wrapped->sig.graphics = true; - wrapped->sig.numActions = 0; + wrapped->sig.PackedByteSize = 0; // From MSDN, command signatures are either graphics or compute so just search for dispatches: // "A given command signature is either an action or a compute command signature. If a command @@ -1765,13 +1765,49 @@ bool WrappedID3D12Device::Serialise_CreateCommandSignature(SerialiserType &ser, // signature." for(uint32_t i = 0; i < Descriptor.NumArgumentDescs; i++) { - if(Descriptor.pArgumentDescs[i].Type == D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH) - wrapped->sig.graphics = false; - - if(Descriptor.pArgumentDescs[i].Type == D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH || - Descriptor.pArgumentDescs[i].Type == D3D12_INDIRECT_ARGUMENT_TYPE_DRAW || - Descriptor.pArgumentDescs[i].Type == D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED) - wrapped->sig.numActions++; + switch(Descriptor.pArgumentDescs[i].Type) + { + case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW: + { + wrapped->sig.PackedByteSize += sizeof(D3D12_DRAW_ARGUMENTS); + break; + } + case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED: + { + wrapped->sig.PackedByteSize += sizeof(D3D12_DRAW_INDEXED_ARGUMENTS); + break; + } + case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH: + { + wrapped->sig.PackedByteSize += sizeof(D3D12_DISPATCH_ARGUMENTS); + wrapped->sig.graphics = false; + break; + } + case D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT: + { + wrapped->sig.PackedByteSize += + sizeof(uint32_t) * Descriptor.pArgumentDescs[i].Constant.Num32BitValuesToSet; + break; + } + case D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW: + { + wrapped->sig.PackedByteSize += sizeof(D3D12_VERTEX_BUFFER_VIEW); + break; + } + case D3D12_INDIRECT_ARGUMENT_TYPE_INDEX_BUFFER_VIEW: + { + wrapped->sig.PackedByteSize += sizeof(D3D12_INDEX_BUFFER_VIEW); + break; + } + case D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT_BUFFER_VIEW: + case D3D12_INDIRECT_ARGUMENT_TYPE_SHADER_RESOURCE_VIEW: + case D3D12_INDIRECT_ARGUMENT_TYPE_UNORDERED_ACCESS_VIEW: + { + wrapped->sig.PackedByteSize += sizeof(D3D12_GPU_VIRTUAL_ADDRESS); + break; + } + default: RDCERR("Unexpected argument type! %d", Descriptor.pArgumentDescs[i].Type); break; + } } ret = wrapped; diff --git a/renderdoc/driver/d3d12/d3d12_outputwindow.cpp b/renderdoc/driver/d3d12/d3d12_outputwindow.cpp index 739c7b487..5b87e3398 100644 --- a/renderdoc/driver/d3d12/d3d12_outputwindow.cpp +++ b/renderdoc/driver/d3d12/d3d12_outputwindow.cpp @@ -491,6 +491,7 @@ void D3D12Replay::GetOutputWindowData(uint64_t id, bytebuf &retData) // transition back std::swap(barrier.Transition.StateBefore, barrier.Transition.StateAfter); + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; list->ResourceBarrier(1, &barrier); list->Close(); diff --git a/renderdoc/driver/d3d12/d3d12_state.cpp b/renderdoc/driver/d3d12/d3d12_state.cpp index 04f114537..8286dd621 100644 --- a/renderdoc/driver/d3d12/d3d12_state.cpp +++ b/renderdoc/driver/d3d12/d3d12_state.cpp @@ -62,6 +62,147 @@ ResourceId D3D12RenderState::GetDSVID() const return dsv.GetResResourceId(); } +void D3D12RenderState::ResolvePendingIndirectState(WrappedID3D12Device *device) +{ + if(indirectState.argsBuf == NULL) + return; + + device->GPUSync(); + + D3D12_RANGE range = {0, D3D12CommandData::m_IndirectSize}; + byte *mapPtr = NULL; + device->CheckHRESULT(indirectState.argsBuf->Map(0, &range, (void **)&mapPtr)); + + if(device->HasFatalError()) + return; + + WrappedID3D12CommandSignature *comSig = (WrappedID3D12CommandSignature *)indirectState.comSig; + + { + byte *data = mapPtr + indirectState.argsOffs; + mapPtr += comSig->sig.ByteStride; + + for(uint32_t argIdx = 0; argIdx < indirectState.argsToProcess; argIdx++) + { + uint32_t a = argIdx % comSig->sig.arguments.size(); + const D3D12_INDIRECT_ARGUMENT_DESC &arg = comSig->sig.arguments[a]; + + switch(arg.Type) + { + case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW: + case D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED: + case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH: break; + case D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT: + { + size_t argSize = sizeof(uint32_t) * arg.Constant.Num32BitValuesToSet; + const uint32_t *data32 = (uint32_t *)data; + data += argSize; + + if(comSig->sig.graphics) + { + graphics.sigelems.resize_for_index(arg.ConstantBufferView.RootParameterIndex); + graphics.sigelems[arg.Constant.RootParameterIndex].constants.assign( + data32, arg.Constant.Num32BitValuesToSet); + } + else + { + compute.sigelems.resize_for_index(arg.ConstantBufferView.RootParameterIndex); + compute.sigelems[arg.Constant.RootParameterIndex].constants.assign( + data32, arg.Constant.Num32BitValuesToSet); + } + + break; + } + case D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW: + { + const D3D12_VERTEX_BUFFER_VIEW *vb = (D3D12_VERTEX_BUFFER_VIEW *)data; + data += sizeof(D3D12_VERTEX_BUFFER_VIEW); + + ResourceId id; + uint64_t offs = 0; + D3D12_GPU_VIRTUAL_ADDRESS va = vb->BufferLocation; + device->GetResIDFromOrigAddr(va, id, offs); + + ID3D12Resource *res = GetResourceManager()->GetLiveAs(id); + RDCASSERT(res); + + if(arg.VertexBuffer.Slot >= vbuffers.size()) + vbuffers.resize(arg.VertexBuffer.Slot + 1); + + vbuffers[arg.VertexBuffer.Slot].buf = GetResID(res); + vbuffers[arg.VertexBuffer.Slot].offs = offs; + vbuffers[arg.VertexBuffer.Slot].size = vb->SizeInBytes; + vbuffers[arg.VertexBuffer.Slot].stride = vb->StrideInBytes; + + break; + } + case D3D12_INDIRECT_ARGUMENT_TYPE_INDEX_BUFFER_VIEW: + { + const D3D12_INDEX_BUFFER_VIEW *ib = (D3D12_INDEX_BUFFER_VIEW *)data; + data += sizeof(D3D12_INDEX_BUFFER_VIEW); + + ResourceId id; + uint64_t offs = 0; + device->GetResIDFromOrigAddr(ib->BufferLocation, id, offs); + + ID3D12Resource *res = GetResourceManager()->GetLiveAs(id); + RDCASSERT(res); + + ibuffer.buf = GetResID(res); + ibuffer.offs = offs; + ibuffer.size = ib->SizeInBytes; + ibuffer.bytewidth = ib->Format == DXGI_FORMAT_R32_UINT ? 4 : 2; + + break; + } + case D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT_BUFFER_VIEW: + case D3D12_INDIRECT_ARGUMENT_TYPE_SHADER_RESOURCE_VIEW: + case D3D12_INDIRECT_ARGUMENT_TYPE_UNORDERED_ACCESS_VIEW: + { + const D3D12_GPU_VIRTUAL_ADDRESS *addr = (D3D12_GPU_VIRTUAL_ADDRESS *)data; + data += sizeof(D3D12_GPU_VIRTUAL_ADDRESS); + + ResourceId id; + uint64_t offs = 0; + device->GetResIDFromOrigAddr(*addr, id, offs); + + ID3D12Resource *res = GetResourceManager()->GetLiveAs(id); + + SignatureElementType t = eRootCBV; + if(arg.Type == D3D12_INDIRECT_ARGUMENT_TYPE_SHADER_RESOURCE_VIEW) + t = eRootSRV; + if(arg.Type == D3D12_INDIRECT_ARGUMENT_TYPE_UNORDERED_ACCESS_VIEW) + t = eRootUAV; + + // ConstantBufferView, ShaderResourceView and UnorderedAccessView all have one member - + // RootParameterIndex + if(comSig->sig.graphics) + { + graphics.sigelems.resize_for_index(arg.ConstantBufferView.RootParameterIndex); + graphics.sigelems[arg.ConstantBufferView.RootParameterIndex] = + D3D12RenderState::SignatureElement(t, GetResID(res), offs); + } + else + { + compute.sigelems.resize_for_index(arg.ConstantBufferView.RootParameterIndex); + compute.sigelems[arg.ConstantBufferView.RootParameterIndex] = + D3D12RenderState::SignatureElement(t, GetResID(res), offs); + } + + break; + } + default: RDCERR("Unexpected argument type! %d", arg.Type); break; + } + } + } + + indirectState.argsBuf->Unmap(0, &range); + indirectState.argsBuf = NULL; + indirectState.argsOffs = 0; + indirectState.comSig = NULL; + indirectState.argsToProcess = 0; +} + void D3D12RenderState::ApplyState(WrappedID3D12Device *dev, ID3D12GraphicsCommandListX *cmd) const { D3D12_COMMAND_LIST_TYPE type = cmd->GetType(); diff --git a/renderdoc/driver/d3d12/d3d12_state.h b/renderdoc/driver/d3d12/d3d12_state.h index aea776478..0a3037a9e 100644 --- a/renderdoc/driver/d3d12/d3d12_state.h +++ b/renderdoc/driver/d3d12/d3d12_state.h @@ -218,4 +218,13 @@ struct D3D12RenderState D3D12DebugManager *GetDebugManager() const { return m_DebugManager; } D3D12DebugManager *m_DebugManager = NULL; + + struct IndirectPendingState + { + ID3D12Resource *argsBuf = NULL; + uint64_t argsOffs = 0; + ID3D12CommandSignature *comSig = NULL; + uint32_t argsToProcess = 0; + } indirectState; + void ResolvePendingIndirectState(WrappedID3D12Device *device); };