diff --git a/renderdoc/data/hlsl/raytracing.hlsl b/renderdoc/data/hlsl/raytracing.hlsl index 18351ec90..104352215 100644 --- a/renderdoc/data/hlsl/raytracing.hlsl +++ b/renderdoc/data/hlsl/raytracing.hlsl @@ -29,32 +29,29 @@ StructuredBuffer oldNewAddressesPair : register(t0); bool InRange(BlasAddressRange addressRange, GPUAddress address) { - if(lessEqual(addressRange.start, address) && lessThan(address, addressRange.end)) - { - return true; - } - - return false; + return (lessEqual(addressRange.start, address) && lessThan(address, addressRange.end)); } -// Each SV_GroupId corresponds to each of the BLAS (instance) in TLAS -[numthreads(1, 1, 1)] void RENDERDOC_PatchAccStructAddressCS(uint3 dispatchGroup - : SV_GroupId) { - GPUAddress instanceBlasAddress = instanceDescs[dispatchGroup.x].blasAddress; - +GPUAddress RemapCaptureToReplayAddress(GPUAddress instanceBlasAddress) +{ for(uint i = 0; i < addressCount; i++) { if(InRange(oldNewAddressesPair[i].oldAddress, instanceBlasAddress)) { GPUAddress offset = sub(instanceBlasAddress, oldNewAddressesPair[i].oldAddress.start); - instanceDescs[dispatchGroup.x].blasAddress = - add(oldNewAddressesPair[i].newAddress.start, offset); - return; + return add(oldNewAddressesPair[i].newAddress.start, offset); } } - // This might cause device hang but at least we won't access incorrect addresses - instanceDescs[dispatchGroup.x].blasAddress = 0; + // This might cause device hang but at least we won't access incorrect addresses + return 0; +} + +// Each SV_GroupId corresponds to each of the BLAS (instance) in TLAS +[numthreads(1, 1, 1)] void RENDERDOC_PatchAccStructAddressCS(uint3 dispatchGroup + : SV_GroupId) { + instanceDescs[dispatchGroup.x].blasAddress = + RemapCaptureToReplayAddress(instanceDescs[dispatchGroup.x].blasAddress); } StructuredBuffer stateObjects : register(t1); @@ -450,13 +447,16 @@ GPUAddress AlignRecordAddress(GPUAddress x) internalExecuteCount.Store(0, dispatchIndex); } -StructuredBuffer applicationBLASPointers : register(t0); +StructuredBuffer applicationBLASPointers : register(t1); RWStructuredBuffer internalTLASCopyArguments : register(u0); [numthreads(1, 1, 1)] void RENDERDOC_PrepareTLASCopyIndirectExecuteCS(uint3 dispatchThread : SV_DispatchThreadID) { TLASCopyExecute execute = (TLASCopyExecute)0; - execute.blasPointer = applicationBLASPointers[dispatchThread.x]; + if(addressCount > 0) + execute.blasPointer = RemapCaptureToReplayAddress(applicationBLASPointers[dispatchThread.x]); + else + execute.blasPointer = applicationBLASPointers[dispatchThread.x]; execute.index = dispatchThread.x; execute.dispatchDim = uint3(1, 1, 1); @@ -464,7 +464,7 @@ RWStructuredBuffer internalTLASCopyArguments : register(u0); } // this is from the EI argument above, so we always copy from [0] to indirect the pointer -StructuredBuffer copySource : register(t0); +StructuredBuffer copySource : register(t1); // also from the EI argument above cbuffer TLASCopyExecuteCB : register(b0) diff --git a/renderdoc/driver/d3d12/d3d12_command_list.h b/renderdoc/driver/d3d12/d3d12_command_list.h index db89e7969..d936321c9 100644 --- a/renderdoc/driver/d3d12/d3d12_command_list.h +++ b/renderdoc/driver/d3d12/d3d12_command_list.h @@ -590,7 +590,7 @@ public: const void *pExecutionParametersData, _In_ SIZE_T ExecutionParametersDataSizeInBytes); - bool PatchAccStructBlasAddress(const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC *accStructInput, + bool PatchAccStructBlasAddress(D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC &accStructInput, ID3D12GraphicsCommandList4 *unwrappedList, BakedCmdListInfo::PatchRaytracing *patchRaytracing); diff --git a/renderdoc/driver/d3d12/d3d12_command_list4_wrap.cpp b/renderdoc/driver/d3d12/d3d12_command_list4_wrap.cpp index 754a34536..43c633db3 100644 --- a/renderdoc/driver/d3d12/d3d12_command_list4_wrap.cpp +++ b/renderdoc/driver/d3d12/d3d12_command_list4_wrap.cpp @@ -804,11 +804,11 @@ bool WrappedID3D12GraphicsCommandList::ProcessASBuildAfterSubmission(ResourceId } bool WrappedID3D12GraphicsCommandList::PatchAccStructBlasAddress( - const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC *accStructInput, + D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC &accStructInput, ID3D12GraphicsCommandList4 *unwrappedList, BakedCmdListInfo::PatchRaytracing *patchRaytracing) { - if(accStructInput->Inputs.Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL && - accStructInput->Inputs.NumDescs > 0) + if(accStructInput.Inputs.Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL && + accStructInput.Inputs.NumDescs > 0) { // Here, we are uploading the old BLAS addresses, and comparing the BLAS // addresses in the TLAS and patching it with the corresponding new address. @@ -818,26 +818,34 @@ bool WrappedID3D12GraphicsCommandList::PatchAccStructBlasAddress( // Create a resource for patched instance desc; we don't // need a resource of same size but of same number of instances in the TLAS with uav uint64_t totalInstancesSize = - accStructInput->Inputs.NumDescs * sizeof(D3D12_RAYTRACING_INSTANCE_DESC); + accStructInput.Inputs.NumDescs * sizeof(D3D12_RAYTRACING_INSTANCE_DESC); totalInstancesSize = AlignUp(totalInstancesSize, D3D12_RAYTRACING_INSTANCE_DESCS_BYTE_ALIGNMENT); ResourceId instanceResourceId = - WrappedID3D12Resource::GetResIDFromAddr(accStructInput->Inputs.InstanceDescs); + WrappedID3D12Resource::GetResIDFromAddr(accStructInput.Inputs.InstanceDescs); ID3D12Resource *instanceResource = GetResourceManager()->GetCurrentAs(instanceResourceId)->GetReal(); D3D12_GPU_VIRTUAL_ADDRESS instanceGpuAddress = instanceResource->GetGPUVirtualAddress(); - uint64_t instanceResOffset = accStructInput->Inputs.InstanceDescs - instanceGpuAddress; + uint64_t instanceResOffset = accStructInput.Inputs.InstanceDescs - instanceGpuAddress; - D3D12_RESOURCE_STATES instanceResState = - m_pDevice->GetSubresourceStates(instanceResourceId)[0].ToStates(); + D3D12_RESOURCE_STATES instanceResState = D3D12_RESOURCE_STATES(); bool needInitialTransition = false; - if(!(instanceResState & D3D12_RESOURCE_STATE_COPY_SOURCE)) + // our unwrapping of array-of-pointers will read from this as an SRV so we don't need to transition + if(accStructInput.Inputs.DescsLayout == D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS) { - needInitialTransition = true; + needInitialTransition = false; + } + else + { + instanceResState = m_pDevice->GetSubresourceStates(instanceResourceId)[0].ToStates(); + if(!(instanceResState & D3D12_RESOURCE_STATE_COPY_SOURCE)) + { + needInitialTransition = true; + } } { @@ -869,9 +877,33 @@ bool WrappedID3D12GraphicsCommandList::PatchAccStructBlasAddress( unwrappedList->ResourceBarrier((UINT)resBarriers.size(), resBarriers.data()); } - unwrappedList->CopyBufferRegion(patchRaytracing->m_patchedInstanceBuffer->Resource(), - patchRaytracing->m_patchedInstanceBuffer->Offset(), - instanceResource, instanceResOffset, totalInstancesSize); + ID3D12Resource *addressPairRes = m_pDevice->GetBLASAddressBufferResource(); + D3D12_GPU_VIRTUAL_ADDRESS addressPairResAddress = addressPairRes->GetGPUVirtualAddress(); + + uint64_t addressCount = m_pDevice->GetBLASAddressCount(); + + if(accStructInput.Inputs.DescsLayout == D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS) + { + // unroll the instances list into a flat array (which will then get patched below in-place) + D3D12GpuBuffer *tempBuffer = rtManager->UnrollBLASInstancesList( + unwrappedList, accStructInput.Inputs, addressPairResAddress, addressCount, + patchRaytracing->m_patchedInstanceBuffer); + + accStructInput.Inputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY; + + // keep these buffer around until the parent cmd executes even if we reallocate soon + tempBuffer->AddRef(); + AddSubmissionASBuildCallback(true, [tempBuffer]() { + tempBuffer->Release(); + return true; + }); + } + else + { + unwrappedList->CopyBufferRegion(patchRaytracing->m_patchedInstanceBuffer->Resource(), + patchRaytracing->m_patchedInstanceBuffer->Offset(), + instanceResource, instanceResOffset, totalInstancesSize); + } D3D12AccStructPatchInfo patchInfo = rtManager->GetAccStructPatchInfo(); @@ -920,11 +952,6 @@ bool WrappedID3D12GraphicsCommandList::PatchAccStructBlasAddress( unwrappedList->ResourceBarrier(1, &resBarrier); } - ID3D12Resource *addressPairRes = m_pDevice->GetBLASAddressBufferResource(); - D3D12_GPU_VIRTUAL_ADDRESS addressPairResAddress = addressPairRes->GetGPUVirtualAddress(); - - uint64_t addressCount = m_pDevice->GetBLASAddressCount(); - unwrappedList->SetPipelineState(patchInfo.m_pipeline); unwrappedList->SetComputeRootSignature(patchInfo.m_rootSignature); unwrappedList->SetComputeRoot32BitConstant((UINT)D3D12PatchTLASBuildParam::RootConstantBuffer, @@ -934,7 +961,7 @@ bool WrappedID3D12GraphicsCommandList::PatchAccStructBlasAddress( unwrappedList->SetComputeRootUnorderedAccessView( (UINT)D3D12PatchTLASBuildParam::RootPatchedAddressUav, patchRaytracing->m_patchedInstanceBuffer->Address()); - unwrappedList->Dispatch(accStructInput->Inputs.NumDescs, 1, 1); + unwrappedList->Dispatch(accStructInput.Inputs.NumDescs, 1, 1); { D3D12_RESOURCE_BARRIER resBarrier; @@ -996,7 +1023,7 @@ bool WrappedID3D12GraphicsCommandList::Serialise_BuildRaytracingAccelerationStru AccStructDesc.Inputs.NumDescs > 0) { patchInfo.m_patched = false; - PatchAccStructBlasAddress(&AccStructDesc, Unwrap4(list), &patchInfo); + PatchAccStructBlasAddress(AccStructDesc, Unwrap4(list), &patchInfo); if(patchInfo.m_patched) { AccStructDesc.Inputs.InstanceDescs = patchInfo.m_patchedInstanceBuffer->Address(); @@ -1031,7 +1058,7 @@ bool WrappedID3D12GraphicsCommandList::Serialise_BuildRaytracingAccelerationStru totalInstancesSize, D3D12_RAYTRACING_INSTANCE_DESCS_BYTE_ALIGNMENT, &patchInfo.m_patchedInstanceBuffer)) { - PatchAccStructBlasAddress(&AccStructDesc, Unwrap4(pCommandList), &patchInfo); + PatchAccStructBlasAddress(AccStructDesc, Unwrap4(pCommandList), &patchInfo); if(patchInfo.m_patched) { diff --git a/renderdoc/driver/d3d12/d3d12_device.cpp b/renderdoc/driver/d3d12/d3d12_device.cpp index a8f43f1f5..5aabcebcd 100644 --- a/renderdoc/driver/d3d12/d3d12_device.cpp +++ b/renderdoc/driver/d3d12/d3d12_device.cpp @@ -3228,7 +3228,6 @@ void WrappedID3D12Device::UploadBLASBufferAddresses() if(resManager->HasLiveResource(resId)) { WrappedID3D12Resource *wrappedRes = (WrappedID3D12Resource *)resManager->GetLiveResource(resId); - if(wrappedRes->IsAccelerationStructureResource()) { BlasAddressPair addressPair; addressPair.oldAddress.start = addressRange.start; @@ -3236,7 +3235,15 @@ void WrappedID3D12Device::UploadBLASBufferAddresses() addressPair.newAddress.start = wrappedRes->GetGPUVirtualAddress(); addressPair.newAddress.end = addressPair.newAddress.start + wrappedRes->GetDesc().Width; - blasAddressPair.push_back(addressPair); + + // ASB addresses are far more likely to be used so put them at the front to be found first + // as this isn't sorted. + // The only time we are looking up 'normal' buffers on the GPU to patch is when we're + // unrolling an ARRAY_OF_POINTERS list on replay when building a TLAS + if(wrappedRes->IsAccelerationStructureResource()) + blasAddressPair.insert(0, addressPair); + else + blasAddressPair.push_back(addressPair); } } } diff --git a/renderdoc/driver/d3d12/d3d12_manager.cpp b/renderdoc/driver/d3d12/d3d12_manager.cpp index 0af76148b..056589c68 100644 --- a/renderdoc/driver/d3d12/d3d12_manager.cpp +++ b/renderdoc/driver/d3d12/d3d12_manager.cpp @@ -784,11 +784,7 @@ void D3D12RTManager::InitInternalResources() { InitReplayBlasPatchingResources(); } - else - { - // only needed during capture - InitTLASInstanceCopyingResources(); - } + InitTLASInstanceCopyingResources(); InitRayDispatchPatchingResources(); } @@ -1319,67 +1315,21 @@ ASBuildData *D3D12RTManager::CopyBuildInputs( // {BLASDescAddr, BLASDescAddr, BlasDescAddr...} to // {BLASDescAddr, dispatch(1,1,1), BLASDescAddr, dispatch(1,1,1), ...} - const uint64_t argSize = AlignUp(sizeof(TLASCopyExecute) * ret->NumBLAS, 256ULL); - - if(m_TLASCopyingData.ArgsBuffer == NULL || m_TLASCopyingData.ArgsBuffer->Size() < argSize) - { - // needs to be dedicated so we can sure it's not shared with anything when we transition it... - m_GPUBufferAllocator.Alloc( - D3D12GpuBufferHeapType::DefaultHeapWithUav, D3D12GpuBufferHeapMemoryFlag::Dedicated, - argSize, D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT, &m_TLASCopyingData.ArgsBuffer); - } + const uint64_t unpackedLayoutSize = byteSize; if(m_TLASCopyingData.ScratchBuffer == NULL || - m_TLASCopyingData.ScratchBuffer->Size() < byteSize) + m_TLASCopyingData.ScratchBuffer->Size() < unpackedLayoutSize) { m_GPUBufferAllocator.Alloc(D3D12GpuBufferHeapType::DefaultHeapWithUav, - D3D12GpuBufferHeapMemoryFlag::Default, byteSize, + D3D12GpuBufferHeapMemoryFlag::Default, unpackedLayoutSize, D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT, &m_TLASCopyingData.ScratchBuffer); } - // keep these buffers around until the parent cmd executes even if we reallocate soon - m_TLASCopyingData.ArgsBuffer->AddRef(); - m_TLASCopyingData.ScratchBuffer->AddRef(); - ret->cleanupCallback = [this]() { - m_TLASCopyingData.ArgsBuffer->Release(); - m_TLASCopyingData.ScratchBuffer->Release(); - return true; - }; + UnrollBLASInstancesList(unwrappedCmd, inputs, 0, 0, m_TLASCopyingData.ScratchBuffer); - // do a normal dispatch to set up the EI argument buffer in temporary scratch memory - unwrappedCmd->SetPipelineState(m_TLASCopyingData.PreparePipe); - unwrappedCmd->SetComputeRootSignature(m_TLASCopyingData.RootSig); - // dummy, will be set by the EI argument - unwrappedCmd->SetComputeRoot32BitConstant( - (UINT)D3D12TLASInstanceCopyParam::IndirectArgumentIndex, 0, 0); - unwrappedCmd->SetComputeRootShaderResourceView((UINT)D3D12TLASInstanceCopyParam::SourceSRV, - inputs.InstanceDescs); - unwrappedCmd->SetComputeRootUnorderedAccessView((UINT)D3D12TLASInstanceCopyParam::DestUAV, - m_TLASCopyingData.ArgsBuffer->Address()); - unwrappedCmd->Dispatch(ret->NumBLAS, 1, 1); - - // make sure the argument buffer is ready + // copy to readback buffer (can't write to it directly) D3D12_RESOURCE_BARRIER barrier = {}; - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Transition.pResource = m_TLASCopyingData.ArgsBuffer->Resource(); - barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT; - - unwrappedCmd->ResourceBarrier(1, &barrier); - - unwrappedCmd->SetPipelineState(m_TLASCopyingData.CopyPipe); - unwrappedCmd->SetComputeRootSignature(m_TLASCopyingData.RootSig); - // dummy, will be set by the EI argument - unwrappedCmd->SetComputeRoot32BitConstant( - (UINT)D3D12TLASInstanceCopyParam::IndirectArgumentIndex, 0, 0); - unwrappedCmd->SetComputeRootUnorderedAccessView((UINT)D3D12TLASInstanceCopyParam::DestUAV, - m_TLASCopyingData.ScratchBuffer->Address()); - // the EI takes care of both setting the source SRV and the index constant - unwrappedCmd->ExecuteIndirect(m_TLASCopyingData.IndirectSig, ret->NumBLAS, - m_TLASCopyingData.ArgsBuffer->Resource(), - m_TLASCopyingData.ArgsBuffer->Offset(), NULL, 0); - barrier.Transition.pResource = m_TLASCopyingData.ScratchBuffer->Resource(); barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; @@ -1387,7 +1337,16 @@ ASBuildData *D3D12RTManager::CopyBuildInputs( unwrappedCmd->CopyBufferRegion(ret->buffer->Resource(), ret->buffer->Offset(), m_TLASCopyingData.ScratchBuffer->Resource(), - m_TLASCopyingData.ScratchBuffer->Offset(), byteSize); + m_TLASCopyingData.ScratchBuffer->Offset(), unpackedLayoutSize); + + // keep these buffer around until the parent cmd executes even if we reallocate soon + m_TLASCopyingData.ArgsBuffer->AddRef(); + m_TLASCopyingData.ScratchBuffer->AddRef(); + ret->cleanupCallback = [this]() { + m_TLASCopyingData.ArgsBuffer->Release(); + m_TLASCopyingData.ScratchBuffer->Release(); + return true; + }; } } } @@ -1599,6 +1558,63 @@ ASBuildData *D3D12RTManager::CopyBuildInputs( return ret; } +D3D12GpuBuffer *D3D12RTManager::UnrollBLASInstancesList( + ID3D12GraphicsCommandList4 *unwrappedCmd, + const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS &inputs, + D3D12_GPU_VIRTUAL_ADDRESS addressPairResAddress, uint64_t addressCount, + D3D12GpuBuffer *copyDestUAV) +{ + const uint64_t indirectArgSize = AlignUp(sizeof(TLASCopyExecute) * inputs.NumDescs, 256ULL); + + if(m_TLASCopyingData.ArgsBuffer == NULL || m_TLASCopyingData.ArgsBuffer->Size() < indirectArgSize) + { + // needs to be dedicated so we can sure it's not shared with anything when we transition it... + m_GPUBufferAllocator.Alloc(D3D12GpuBufferHeapType::DefaultHeapWithUav, + D3D12GpuBufferHeapMemoryFlag::Dedicated, indirectArgSize, + D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT, + &m_TLASCopyingData.ArgsBuffer); + } + + // do a normal dispatch to set up the EI argument buffer in temporary scratch memory + unwrappedCmd->SetPipelineState(m_TLASCopyingData.PreparePipe); + unwrappedCmd->SetComputeRootSignature(m_TLASCopyingData.RootSig); + unwrappedCmd->SetComputeRoot32BitConstant((UINT)D3D12TLASInstanceCopyParam::RootCB, + (UINT)addressCount, 0); + unwrappedCmd->SetComputeRootShaderResourceView((UINT)D3D12TLASInstanceCopyParam::SourceSRV, + inputs.InstanceDescs); + unwrappedCmd->SetComputeRootShaderResourceView( + (UINT)D3D12TLASInstanceCopyParam::RootAddressPairSrv, + addressPairResAddress ? addressPairResAddress : inputs.InstanceDescs); + unwrappedCmd->SetComputeRootUnorderedAccessView((UINT)D3D12TLASInstanceCopyParam::DestUAV, + m_TLASCopyingData.ArgsBuffer->Address()); + unwrappedCmd->Dispatch(inputs.NumDescs, 1, 1); + + // make sure the argument buffer is ready + D3D12_RESOURCE_BARRIER barrier = {}; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Transition.pResource = m_TLASCopyingData.ArgsBuffer->Resource(); + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT; + + unwrappedCmd->ResourceBarrier(1, &barrier); + + unwrappedCmd->SetPipelineState(m_TLASCopyingData.CopyPipe); + unwrappedCmd->SetComputeRootSignature(m_TLASCopyingData.RootSig); + // dummy, will be set by the EI argument + unwrappedCmd->SetComputeRoot32BitConstant((UINT)D3D12TLASInstanceCopyParam::RootCB, 0, 0); + unwrappedCmd->SetComputeRootShaderResourceView( + (UINT)D3D12TLASInstanceCopyParam::RootAddressPairSrv, + addressPairResAddress ? addressPairResAddress : inputs.InstanceDescs); + unwrappedCmd->SetComputeRootUnorderedAccessView((UINT)D3D12TLASInstanceCopyParam::DestUAV, + copyDestUAV->Address()); + // the EI takes care of both setting the source SRV and the index constant + unwrappedCmd->ExecuteIndirect(m_TLASCopyingData.IndirectSig, inputs.NumDescs, + m_TLASCopyingData.ArgsBuffer->Resource(), + m_TLASCopyingData.ArgsBuffer->Offset(), NULL, 0); + + return m_TLASCopyingData.ArgsBuffer; +} + void D3D12RTManager::CopyFromVA(ID3D12GraphicsCommandList4 *unwrappedCmd, ID3D12Resource *dstRes, uint64_t dstOffset, D3D12_GPU_VIRTUAL_ADDRESS sourceVA, uint64_t byteSize) @@ -1923,7 +1939,7 @@ void D3D12RTManager::InitTLASInstanceCopyingResources() rdcarray rootParameters; rootParameters.reserve((uint16_t)D3D12TLASInstanceCopyParam::Count); - // only used in the EI + // used as an index in the EI, and as an address count in the prepare step { D3D12_ROOT_PARAMETER1 rootParam; rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; @@ -1938,7 +1954,7 @@ void D3D12RTManager::InitTLASInstanceCopyingResources() D3D12_ROOT_PARAMETER1 rootParam; rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV; rootParam.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - rootParam.Descriptor.ShaderRegister = 0; + rootParam.Descriptor.ShaderRegister = 1; rootParam.Descriptor.RegisterSpace = 0; rootParam.Descriptor.Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE; rootParameters.push_back(rootParam); @@ -1954,6 +1970,16 @@ void D3D12RTManager::InitTLASInstanceCopyingResources() rootParameters.push_back(rootParam); } + { + D3D12_ROOT_PARAMETER1 rootParam; + rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV; + rootParam.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + rootParam.Descriptor.ShaderRegister = 0; + rootParam.Descriptor.RegisterSpace = 0; + rootParam.Descriptor.Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE; + rootParameters.push_back(rootParam); + } + RDCASSERT(rootParameters.size() == uint32_t(D3D12TLASInstanceCopyParam::Count)); bytebuf rootSig = EncodeRootSig(m_wrappedDevice->RootSigVersion(), rootParameters, @@ -2029,7 +2055,7 @@ void D3D12RTManager::InitTLASInstanceCopyingResources() args[0].Constant.DestOffsetIn32BitValues = 0; args[0].Constant.Num32BitValuesToSet = 1; - args[0].Constant.RootParameterIndex = (uint32_t)D3D12TLASInstanceCopyParam::IndirectArgumentIndex; + args[0].Constant.RootParameterIndex = (uint32_t)D3D12TLASInstanceCopyParam::RootCB; args[1].ShaderResourceView.RootParameterIndex = (uint32_t)D3D12TLASInstanceCopyParam::SourceSRV; diff --git a/renderdoc/driver/d3d12/d3d12_manager.h b/renderdoc/driver/d3d12/d3d12_manager.h index 0e7f7bb4f..7313942f6 100644 --- a/renderdoc/driver/d3d12/d3d12_manager.h +++ b/renderdoc/driver/d3d12/d3d12_manager.h @@ -981,9 +981,10 @@ enum class D3D12PatchTLASBuildParam enum class D3D12TLASInstanceCopyParam { - IndirectArgumentIndex, + RootCB, SourceSRV, DestUAV, + RootAddressPairSrv, Count }; @@ -1013,9 +1014,8 @@ enum class D3D12PatchRayDispatchParam struct D3D12AccStructPatchInfo { - D3D12AccStructPatchInfo() : m_rootSignature(NULL), m_pipeline(NULL) {} - ID3D12RootSignature *m_rootSignature; - ID3D12PipelineState *m_pipeline; + ID3D12RootSignature *m_rootSignature = NULL; + ID3D12PipelineState *m_pipeline = NULL; }; struct PatchedRayDispatch @@ -1210,6 +1210,12 @@ public: ASBuildData *CopyBuildInputs(ID3D12GraphicsCommandList4 *unwrappedCmd, const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS &inputs); + D3D12GpuBuffer *UnrollBLASInstancesList( + ID3D12GraphicsCommandList4 *unwrappedCmd, + const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS &inputs, + D3D12_GPU_VIRTUAL_ADDRESS addressPairResAddress, uint64_t addressCount, + D3D12GpuBuffer *copyDestUAV); + PatchedRayDispatch PatchRayDispatch(ID3D12GraphicsCommandList4 *unwrappedCmd, rdcarray heaps, const D3D12_DISPATCH_RAYS_DESC &desc);