mirror of
https://github.com/baldurk/renderdoc.git
synced 2026-05-13 05:20:45 +00:00
Handle unrolling array-of-pointers for replay-time AS builds
This commit is contained in:
@@ -29,32 +29,29 @@ StructuredBuffer<BlasAddressPair> oldNewAddressesPair : register(t0);
|
||||
|
||||
bool InRange(BlasAddressRange addressRange, GPUAddress address)
|
||||
{
|
||||
if(lessEqual(addressRange.start, address) && lessThan(address, addressRange.end))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
return (lessEqual(addressRange.start, address) && lessThan(address, addressRange.end));
|
||||
}
|
||||
|
||||
// Each SV_GroupId corresponds to each of the BLAS (instance) in TLAS
|
||||
[numthreads(1, 1, 1)] void RENDERDOC_PatchAccStructAddressCS(uint3 dispatchGroup
|
||||
: SV_GroupId) {
|
||||
GPUAddress instanceBlasAddress = instanceDescs[dispatchGroup.x].blasAddress;
|
||||
|
||||
GPUAddress RemapCaptureToReplayAddress(GPUAddress instanceBlasAddress)
|
||||
{
|
||||
for(uint i = 0; i < addressCount; i++)
|
||||
{
|
||||
if(InRange(oldNewAddressesPair[i].oldAddress, instanceBlasAddress))
|
||||
{
|
||||
GPUAddress offset = sub(instanceBlasAddress, oldNewAddressesPair[i].oldAddress.start);
|
||||
instanceDescs[dispatchGroup.x].blasAddress =
|
||||
add(oldNewAddressesPair[i].newAddress.start, offset);
|
||||
return;
|
||||
return add(oldNewAddressesPair[i].newAddress.start, offset);
|
||||
}
|
||||
}
|
||||
|
||||
// This might cause device hang but at least we won't access incorrect addresses
|
||||
instanceDescs[dispatchGroup.x].blasAddress = 0;
|
||||
// This might cause device hang but at least we won't access incorrect addresses
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Each SV_GroupId corresponds to each of the BLAS (instance) in TLAS
|
||||
[numthreads(1, 1, 1)] void RENDERDOC_PatchAccStructAddressCS(uint3 dispatchGroup
|
||||
: SV_GroupId) {
|
||||
instanceDescs[dispatchGroup.x].blasAddress =
|
||||
RemapCaptureToReplayAddress(instanceDescs[dispatchGroup.x].blasAddress);
|
||||
}
|
||||
|
||||
StructuredBuffer<StateObjectLookup> stateObjects : register(t1);
|
||||
@@ -450,13 +447,16 @@ GPUAddress AlignRecordAddress(GPUAddress x)
|
||||
internalExecuteCount.Store(0, dispatchIndex);
|
||||
}
|
||||
|
||||
StructuredBuffer<uint2> applicationBLASPointers : register(t0);
|
||||
StructuredBuffer<uint2> applicationBLASPointers : register(t1);
|
||||
RWStructuredBuffer<TLASCopyExecute> internalTLASCopyArguments : register(u0);
|
||||
|
||||
[numthreads(1, 1, 1)] void RENDERDOC_PrepareTLASCopyIndirectExecuteCS(uint3 dispatchThread
|
||||
: SV_DispatchThreadID) {
|
||||
TLASCopyExecute execute = (TLASCopyExecute)0;
|
||||
execute.blasPointer = applicationBLASPointers[dispatchThread.x];
|
||||
if(addressCount > 0)
|
||||
execute.blasPointer = RemapCaptureToReplayAddress(applicationBLASPointers[dispatchThread.x]);
|
||||
else
|
||||
execute.blasPointer = applicationBLASPointers[dispatchThread.x];
|
||||
execute.index = dispatchThread.x;
|
||||
execute.dispatchDim = uint3(1, 1, 1);
|
||||
|
||||
@@ -464,7 +464,7 @@ RWStructuredBuffer<TLASCopyExecute> internalTLASCopyArguments : register(u0);
|
||||
}
|
||||
|
||||
// this is from the EI argument above, so we always copy from [0] to indirect the pointer
|
||||
StructuredBuffer<InstanceDesc> copySource : register(t0);
|
||||
StructuredBuffer<InstanceDesc> copySource : register(t1);
|
||||
|
||||
// also from the EI argument above
|
||||
cbuffer TLASCopyExecuteCB : register(b0)
|
||||
|
||||
@@ -590,7 +590,7 @@ public:
|
||||
const void *pExecutionParametersData,
|
||||
_In_ SIZE_T ExecutionParametersDataSizeInBytes);
|
||||
|
||||
bool PatchAccStructBlasAddress(const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC *accStructInput,
|
||||
bool PatchAccStructBlasAddress(D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC &accStructInput,
|
||||
ID3D12GraphicsCommandList4 *unwrappedList,
|
||||
BakedCmdListInfo::PatchRaytracing *patchRaytracing);
|
||||
|
||||
|
||||
@@ -804,11 +804,11 @@ bool WrappedID3D12GraphicsCommandList::ProcessASBuildAfterSubmission(ResourceId
|
||||
}
|
||||
|
||||
bool WrappedID3D12GraphicsCommandList::PatchAccStructBlasAddress(
|
||||
const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC *accStructInput,
|
||||
D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC &accStructInput,
|
||||
ID3D12GraphicsCommandList4 *unwrappedList, BakedCmdListInfo::PatchRaytracing *patchRaytracing)
|
||||
{
|
||||
if(accStructInput->Inputs.Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL &&
|
||||
accStructInput->Inputs.NumDescs > 0)
|
||||
if(accStructInput.Inputs.Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL &&
|
||||
accStructInput.Inputs.NumDescs > 0)
|
||||
{
|
||||
// Here, we are uploading the old BLAS addresses, and comparing the BLAS
|
||||
// addresses in the TLAS and patching it with the corresponding new address.
|
||||
@@ -818,26 +818,34 @@ bool WrappedID3D12GraphicsCommandList::PatchAccStructBlasAddress(
|
||||
// Create a resource for patched instance desc; we don't
|
||||
// need a resource of same size but of same number of instances in the TLAS with uav
|
||||
uint64_t totalInstancesSize =
|
||||
accStructInput->Inputs.NumDescs * sizeof(D3D12_RAYTRACING_INSTANCE_DESC);
|
||||
accStructInput.Inputs.NumDescs * sizeof(D3D12_RAYTRACING_INSTANCE_DESC);
|
||||
|
||||
totalInstancesSize =
|
||||
AlignUp<uint64_t>(totalInstancesSize, D3D12_RAYTRACING_INSTANCE_DESCS_BYTE_ALIGNMENT);
|
||||
|
||||
ResourceId instanceResourceId =
|
||||
WrappedID3D12Resource::GetResIDFromAddr(accStructInput->Inputs.InstanceDescs);
|
||||
WrappedID3D12Resource::GetResIDFromAddr(accStructInput.Inputs.InstanceDescs);
|
||||
|
||||
ID3D12Resource *instanceResource =
|
||||
GetResourceManager()->GetCurrentAs<WrappedID3D12Resource>(instanceResourceId)->GetReal();
|
||||
D3D12_GPU_VIRTUAL_ADDRESS instanceGpuAddress = instanceResource->GetGPUVirtualAddress();
|
||||
uint64_t instanceResOffset = accStructInput->Inputs.InstanceDescs - instanceGpuAddress;
|
||||
uint64_t instanceResOffset = accStructInput.Inputs.InstanceDescs - instanceGpuAddress;
|
||||
|
||||
D3D12_RESOURCE_STATES instanceResState =
|
||||
m_pDevice->GetSubresourceStates(instanceResourceId)[0].ToStates();
|
||||
D3D12_RESOURCE_STATES instanceResState = D3D12_RESOURCE_STATES();
|
||||
|
||||
bool needInitialTransition = false;
|
||||
if(!(instanceResState & D3D12_RESOURCE_STATE_COPY_SOURCE))
|
||||
// our unwrapping of array-of-pointers will read from this as an SRV so we don't need to transition
|
||||
if(accStructInput.Inputs.DescsLayout == D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS)
|
||||
{
|
||||
needInitialTransition = true;
|
||||
needInitialTransition = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
instanceResState = m_pDevice->GetSubresourceStates(instanceResourceId)[0].ToStates();
|
||||
if(!(instanceResState & D3D12_RESOURCE_STATE_COPY_SOURCE))
|
||||
{
|
||||
needInitialTransition = true;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
@@ -869,9 +877,33 @@ bool WrappedID3D12GraphicsCommandList::PatchAccStructBlasAddress(
|
||||
unwrappedList->ResourceBarrier((UINT)resBarriers.size(), resBarriers.data());
|
||||
}
|
||||
|
||||
unwrappedList->CopyBufferRegion(patchRaytracing->m_patchedInstanceBuffer->Resource(),
|
||||
patchRaytracing->m_patchedInstanceBuffer->Offset(),
|
||||
instanceResource, instanceResOffset, totalInstancesSize);
|
||||
ID3D12Resource *addressPairRes = m_pDevice->GetBLASAddressBufferResource();
|
||||
D3D12_GPU_VIRTUAL_ADDRESS addressPairResAddress = addressPairRes->GetGPUVirtualAddress();
|
||||
|
||||
uint64_t addressCount = m_pDevice->GetBLASAddressCount();
|
||||
|
||||
if(accStructInput.Inputs.DescsLayout == D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS)
|
||||
{
|
||||
// unroll the instances list into a flat array (which will then get patched below in-place)
|
||||
D3D12GpuBuffer *tempBuffer = rtManager->UnrollBLASInstancesList(
|
||||
unwrappedList, accStructInput.Inputs, addressPairResAddress, addressCount,
|
||||
patchRaytracing->m_patchedInstanceBuffer);
|
||||
|
||||
accStructInput.Inputs.DescsLayout = D3D12_ELEMENTS_LAYOUT_ARRAY;
|
||||
|
||||
// keep these buffer around until the parent cmd executes even if we reallocate soon
|
||||
tempBuffer->AddRef();
|
||||
AddSubmissionASBuildCallback(true, [tempBuffer]() {
|
||||
tempBuffer->Release();
|
||||
return true;
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
unwrappedList->CopyBufferRegion(patchRaytracing->m_patchedInstanceBuffer->Resource(),
|
||||
patchRaytracing->m_patchedInstanceBuffer->Offset(),
|
||||
instanceResource, instanceResOffset, totalInstancesSize);
|
||||
}
|
||||
|
||||
D3D12AccStructPatchInfo patchInfo = rtManager->GetAccStructPatchInfo();
|
||||
|
||||
@@ -920,11 +952,6 @@ bool WrappedID3D12GraphicsCommandList::PatchAccStructBlasAddress(
|
||||
unwrappedList->ResourceBarrier(1, &resBarrier);
|
||||
}
|
||||
|
||||
ID3D12Resource *addressPairRes = m_pDevice->GetBLASAddressBufferResource();
|
||||
D3D12_GPU_VIRTUAL_ADDRESS addressPairResAddress = addressPairRes->GetGPUVirtualAddress();
|
||||
|
||||
uint64_t addressCount = m_pDevice->GetBLASAddressCount();
|
||||
|
||||
unwrappedList->SetPipelineState(patchInfo.m_pipeline);
|
||||
unwrappedList->SetComputeRootSignature(patchInfo.m_rootSignature);
|
||||
unwrappedList->SetComputeRoot32BitConstant((UINT)D3D12PatchTLASBuildParam::RootConstantBuffer,
|
||||
@@ -934,7 +961,7 @@ bool WrappedID3D12GraphicsCommandList::PatchAccStructBlasAddress(
|
||||
unwrappedList->SetComputeRootUnorderedAccessView(
|
||||
(UINT)D3D12PatchTLASBuildParam::RootPatchedAddressUav,
|
||||
patchRaytracing->m_patchedInstanceBuffer->Address());
|
||||
unwrappedList->Dispatch(accStructInput->Inputs.NumDescs, 1, 1);
|
||||
unwrappedList->Dispatch(accStructInput.Inputs.NumDescs, 1, 1);
|
||||
|
||||
{
|
||||
D3D12_RESOURCE_BARRIER resBarrier;
|
||||
@@ -996,7 +1023,7 @@ bool WrappedID3D12GraphicsCommandList::Serialise_BuildRaytracingAccelerationStru
|
||||
AccStructDesc.Inputs.NumDescs > 0)
|
||||
{
|
||||
patchInfo.m_patched = false;
|
||||
PatchAccStructBlasAddress(&AccStructDesc, Unwrap4(list), &patchInfo);
|
||||
PatchAccStructBlasAddress(AccStructDesc, Unwrap4(list), &patchInfo);
|
||||
if(patchInfo.m_patched)
|
||||
{
|
||||
AccStructDesc.Inputs.InstanceDescs = patchInfo.m_patchedInstanceBuffer->Address();
|
||||
@@ -1031,7 +1058,7 @@ bool WrappedID3D12GraphicsCommandList::Serialise_BuildRaytracingAccelerationStru
|
||||
totalInstancesSize, D3D12_RAYTRACING_INSTANCE_DESCS_BYTE_ALIGNMENT,
|
||||
&patchInfo.m_patchedInstanceBuffer))
|
||||
{
|
||||
PatchAccStructBlasAddress(&AccStructDesc, Unwrap4(pCommandList), &patchInfo);
|
||||
PatchAccStructBlasAddress(AccStructDesc, Unwrap4(pCommandList), &patchInfo);
|
||||
|
||||
if(patchInfo.m_patched)
|
||||
{
|
||||
|
||||
@@ -3228,7 +3228,6 @@ void WrappedID3D12Device::UploadBLASBufferAddresses()
|
||||
if(resManager->HasLiveResource(resId))
|
||||
{
|
||||
WrappedID3D12Resource *wrappedRes = (WrappedID3D12Resource *)resManager->GetLiveResource(resId);
|
||||
if(wrappedRes->IsAccelerationStructureResource())
|
||||
{
|
||||
BlasAddressPair addressPair;
|
||||
addressPair.oldAddress.start = addressRange.start;
|
||||
@@ -3236,7 +3235,15 @@ void WrappedID3D12Device::UploadBLASBufferAddresses()
|
||||
|
||||
addressPair.newAddress.start = wrappedRes->GetGPUVirtualAddress();
|
||||
addressPair.newAddress.end = addressPair.newAddress.start + wrappedRes->GetDesc().Width;
|
||||
blasAddressPair.push_back(addressPair);
|
||||
|
||||
// ASB addresses are far more likely to be used so put them at the front to be found first
|
||||
// as this isn't sorted.
|
||||
// The only time we are looking up 'normal' buffers on the GPU to patch is when we're
|
||||
// unrolling an ARRAY_OF_POINTERS list on replay when building a TLAS
|
||||
if(wrappedRes->IsAccelerationStructureResource())
|
||||
blasAddressPair.insert(0, addressPair);
|
||||
else
|
||||
blasAddressPair.push_back(addressPair);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -784,11 +784,7 @@ void D3D12RTManager::InitInternalResources()
|
||||
{
|
||||
InitReplayBlasPatchingResources();
|
||||
}
|
||||
else
|
||||
{
|
||||
// only needed during capture
|
||||
InitTLASInstanceCopyingResources();
|
||||
}
|
||||
InitTLASInstanceCopyingResources();
|
||||
InitRayDispatchPatchingResources();
|
||||
}
|
||||
|
||||
@@ -1319,67 +1315,21 @@ ASBuildData *D3D12RTManager::CopyBuildInputs(
|
||||
// {BLASDescAddr, BLASDescAddr, BlasDescAddr...} to
|
||||
// {BLASDescAddr, dispatch(1,1,1), BLASDescAddr, dispatch(1,1,1), ...}
|
||||
|
||||
const uint64_t argSize = AlignUp(sizeof(TLASCopyExecute) * ret->NumBLAS, 256ULL);
|
||||
|
||||
if(m_TLASCopyingData.ArgsBuffer == NULL || m_TLASCopyingData.ArgsBuffer->Size() < argSize)
|
||||
{
|
||||
// needs to be dedicated so we can sure it's not shared with anything when we transition it...
|
||||
m_GPUBufferAllocator.Alloc(
|
||||
D3D12GpuBufferHeapType::DefaultHeapWithUav, D3D12GpuBufferHeapMemoryFlag::Dedicated,
|
||||
argSize, D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT, &m_TLASCopyingData.ArgsBuffer);
|
||||
}
|
||||
const uint64_t unpackedLayoutSize = byteSize;
|
||||
|
||||
if(m_TLASCopyingData.ScratchBuffer == NULL ||
|
||||
m_TLASCopyingData.ScratchBuffer->Size() < byteSize)
|
||||
m_TLASCopyingData.ScratchBuffer->Size() < unpackedLayoutSize)
|
||||
{
|
||||
m_GPUBufferAllocator.Alloc(D3D12GpuBufferHeapType::DefaultHeapWithUav,
|
||||
D3D12GpuBufferHeapMemoryFlag::Default, byteSize,
|
||||
D3D12GpuBufferHeapMemoryFlag::Default, unpackedLayoutSize,
|
||||
D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT,
|
||||
&m_TLASCopyingData.ScratchBuffer);
|
||||
}
|
||||
|
||||
// keep these buffers around until the parent cmd executes even if we reallocate soon
|
||||
m_TLASCopyingData.ArgsBuffer->AddRef();
|
||||
m_TLASCopyingData.ScratchBuffer->AddRef();
|
||||
ret->cleanupCallback = [this]() {
|
||||
m_TLASCopyingData.ArgsBuffer->Release();
|
||||
m_TLASCopyingData.ScratchBuffer->Release();
|
||||
return true;
|
||||
};
|
||||
UnrollBLASInstancesList(unwrappedCmd, inputs, 0, 0, m_TLASCopyingData.ScratchBuffer);
|
||||
|
||||
// do a normal dispatch to set up the EI argument buffer in temporary scratch memory
|
||||
unwrappedCmd->SetPipelineState(m_TLASCopyingData.PreparePipe);
|
||||
unwrappedCmd->SetComputeRootSignature(m_TLASCopyingData.RootSig);
|
||||
// dummy, will be set by the EI argument
|
||||
unwrappedCmd->SetComputeRoot32BitConstant(
|
||||
(UINT)D3D12TLASInstanceCopyParam::IndirectArgumentIndex, 0, 0);
|
||||
unwrappedCmd->SetComputeRootShaderResourceView((UINT)D3D12TLASInstanceCopyParam::SourceSRV,
|
||||
inputs.InstanceDescs);
|
||||
unwrappedCmd->SetComputeRootUnorderedAccessView((UINT)D3D12TLASInstanceCopyParam::DestUAV,
|
||||
m_TLASCopyingData.ArgsBuffer->Address());
|
||||
unwrappedCmd->Dispatch(ret->NumBLAS, 1, 1);
|
||||
|
||||
// make sure the argument buffer is ready
|
||||
// copy to readback buffer (can't write to it directly)
|
||||
D3D12_RESOURCE_BARRIER barrier = {};
|
||||
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
||||
barrier.Transition.pResource = m_TLASCopyingData.ArgsBuffer->Resource();
|
||||
barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
||||
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT;
|
||||
|
||||
unwrappedCmd->ResourceBarrier(1, &barrier);
|
||||
|
||||
unwrappedCmd->SetPipelineState(m_TLASCopyingData.CopyPipe);
|
||||
unwrappedCmd->SetComputeRootSignature(m_TLASCopyingData.RootSig);
|
||||
// dummy, will be set by the EI argument
|
||||
unwrappedCmd->SetComputeRoot32BitConstant(
|
||||
(UINT)D3D12TLASInstanceCopyParam::IndirectArgumentIndex, 0, 0);
|
||||
unwrappedCmd->SetComputeRootUnorderedAccessView((UINT)D3D12TLASInstanceCopyParam::DestUAV,
|
||||
m_TLASCopyingData.ScratchBuffer->Address());
|
||||
// the EI takes care of both setting the source SRV and the index constant
|
||||
unwrappedCmd->ExecuteIndirect(m_TLASCopyingData.IndirectSig, ret->NumBLAS,
|
||||
m_TLASCopyingData.ArgsBuffer->Resource(),
|
||||
m_TLASCopyingData.ArgsBuffer->Offset(), NULL, 0);
|
||||
|
||||
barrier.Transition.pResource = m_TLASCopyingData.ScratchBuffer->Resource();
|
||||
barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
||||
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;
|
||||
@@ -1387,7 +1337,16 @@ ASBuildData *D3D12RTManager::CopyBuildInputs(
|
||||
|
||||
unwrappedCmd->CopyBufferRegion(ret->buffer->Resource(), ret->buffer->Offset(),
|
||||
m_TLASCopyingData.ScratchBuffer->Resource(),
|
||||
m_TLASCopyingData.ScratchBuffer->Offset(), byteSize);
|
||||
m_TLASCopyingData.ScratchBuffer->Offset(), unpackedLayoutSize);
|
||||
|
||||
// keep these buffer around until the parent cmd executes even if we reallocate soon
|
||||
m_TLASCopyingData.ArgsBuffer->AddRef();
|
||||
m_TLASCopyingData.ScratchBuffer->AddRef();
|
||||
ret->cleanupCallback = [this]() {
|
||||
m_TLASCopyingData.ArgsBuffer->Release();
|
||||
m_TLASCopyingData.ScratchBuffer->Release();
|
||||
return true;
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1599,6 +1558,63 @@ ASBuildData *D3D12RTManager::CopyBuildInputs(
|
||||
return ret;
|
||||
}
|
||||
|
||||
D3D12GpuBuffer *D3D12RTManager::UnrollBLASInstancesList(
|
||||
ID3D12GraphicsCommandList4 *unwrappedCmd,
|
||||
const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS &inputs,
|
||||
D3D12_GPU_VIRTUAL_ADDRESS addressPairResAddress, uint64_t addressCount,
|
||||
D3D12GpuBuffer *copyDestUAV)
|
||||
{
|
||||
const uint64_t indirectArgSize = AlignUp(sizeof(TLASCopyExecute) * inputs.NumDescs, 256ULL);
|
||||
|
||||
if(m_TLASCopyingData.ArgsBuffer == NULL || m_TLASCopyingData.ArgsBuffer->Size() < indirectArgSize)
|
||||
{
|
||||
// needs to be dedicated so we can sure it's not shared with anything when we transition it...
|
||||
m_GPUBufferAllocator.Alloc(D3D12GpuBufferHeapType::DefaultHeapWithUav,
|
||||
D3D12GpuBufferHeapMemoryFlag::Dedicated, indirectArgSize,
|
||||
D3D12_RAYTRACING_SHADER_TABLE_BYTE_ALIGNMENT,
|
||||
&m_TLASCopyingData.ArgsBuffer);
|
||||
}
|
||||
|
||||
// do a normal dispatch to set up the EI argument buffer in temporary scratch memory
|
||||
unwrappedCmd->SetPipelineState(m_TLASCopyingData.PreparePipe);
|
||||
unwrappedCmd->SetComputeRootSignature(m_TLASCopyingData.RootSig);
|
||||
unwrappedCmd->SetComputeRoot32BitConstant((UINT)D3D12TLASInstanceCopyParam::RootCB,
|
||||
(UINT)addressCount, 0);
|
||||
unwrappedCmd->SetComputeRootShaderResourceView((UINT)D3D12TLASInstanceCopyParam::SourceSRV,
|
||||
inputs.InstanceDescs);
|
||||
unwrappedCmd->SetComputeRootShaderResourceView(
|
||||
(UINT)D3D12TLASInstanceCopyParam::RootAddressPairSrv,
|
||||
addressPairResAddress ? addressPairResAddress : inputs.InstanceDescs);
|
||||
unwrappedCmd->SetComputeRootUnorderedAccessView((UINT)D3D12TLASInstanceCopyParam::DestUAV,
|
||||
m_TLASCopyingData.ArgsBuffer->Address());
|
||||
unwrappedCmd->Dispatch(inputs.NumDescs, 1, 1);
|
||||
|
||||
// make sure the argument buffer is ready
|
||||
D3D12_RESOURCE_BARRIER barrier = {};
|
||||
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
||||
barrier.Transition.pResource = m_TLASCopyingData.ArgsBuffer->Resource();
|
||||
barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
||||
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT;
|
||||
|
||||
unwrappedCmd->ResourceBarrier(1, &barrier);
|
||||
|
||||
unwrappedCmd->SetPipelineState(m_TLASCopyingData.CopyPipe);
|
||||
unwrappedCmd->SetComputeRootSignature(m_TLASCopyingData.RootSig);
|
||||
// dummy, will be set by the EI argument
|
||||
unwrappedCmd->SetComputeRoot32BitConstant((UINT)D3D12TLASInstanceCopyParam::RootCB, 0, 0);
|
||||
unwrappedCmd->SetComputeRootShaderResourceView(
|
||||
(UINT)D3D12TLASInstanceCopyParam::RootAddressPairSrv,
|
||||
addressPairResAddress ? addressPairResAddress : inputs.InstanceDescs);
|
||||
unwrappedCmd->SetComputeRootUnorderedAccessView((UINT)D3D12TLASInstanceCopyParam::DestUAV,
|
||||
copyDestUAV->Address());
|
||||
// the EI takes care of both setting the source SRV and the index constant
|
||||
unwrappedCmd->ExecuteIndirect(m_TLASCopyingData.IndirectSig, inputs.NumDescs,
|
||||
m_TLASCopyingData.ArgsBuffer->Resource(),
|
||||
m_TLASCopyingData.ArgsBuffer->Offset(), NULL, 0);
|
||||
|
||||
return m_TLASCopyingData.ArgsBuffer;
|
||||
}
|
||||
|
||||
void D3D12RTManager::CopyFromVA(ID3D12GraphicsCommandList4 *unwrappedCmd, ID3D12Resource *dstRes,
|
||||
uint64_t dstOffset, D3D12_GPU_VIRTUAL_ADDRESS sourceVA,
|
||||
uint64_t byteSize)
|
||||
@@ -1923,7 +1939,7 @@ void D3D12RTManager::InitTLASInstanceCopyingResources()
|
||||
rdcarray<D3D12_ROOT_PARAMETER1> rootParameters;
|
||||
rootParameters.reserve((uint16_t)D3D12TLASInstanceCopyParam::Count);
|
||||
|
||||
// only used in the EI
|
||||
// used as an index in the EI, and as an address count in the prepare step
|
||||
{
|
||||
D3D12_ROOT_PARAMETER1 rootParam;
|
||||
rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
|
||||
@@ -1938,7 +1954,7 @@ void D3D12RTManager::InitTLASInstanceCopyingResources()
|
||||
D3D12_ROOT_PARAMETER1 rootParam;
|
||||
rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV;
|
||||
rootParam.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
rootParam.Descriptor.ShaderRegister = 0;
|
||||
rootParam.Descriptor.ShaderRegister = 1;
|
||||
rootParam.Descriptor.RegisterSpace = 0;
|
||||
rootParam.Descriptor.Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE;
|
||||
rootParameters.push_back(rootParam);
|
||||
@@ -1954,6 +1970,16 @@ void D3D12RTManager::InitTLASInstanceCopyingResources()
|
||||
rootParameters.push_back(rootParam);
|
||||
}
|
||||
|
||||
{
|
||||
D3D12_ROOT_PARAMETER1 rootParam;
|
||||
rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV;
|
||||
rootParam.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
rootParam.Descriptor.ShaderRegister = 0;
|
||||
rootParam.Descriptor.RegisterSpace = 0;
|
||||
rootParam.Descriptor.Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE;
|
||||
rootParameters.push_back(rootParam);
|
||||
}
|
||||
|
||||
RDCASSERT(rootParameters.size() == uint32_t(D3D12TLASInstanceCopyParam::Count));
|
||||
|
||||
bytebuf rootSig = EncodeRootSig(m_wrappedDevice->RootSigVersion(), rootParameters,
|
||||
@@ -2029,7 +2055,7 @@ void D3D12RTManager::InitTLASInstanceCopyingResources()
|
||||
|
||||
args[0].Constant.DestOffsetIn32BitValues = 0;
|
||||
args[0].Constant.Num32BitValuesToSet = 1;
|
||||
args[0].Constant.RootParameterIndex = (uint32_t)D3D12TLASInstanceCopyParam::IndirectArgumentIndex;
|
||||
args[0].Constant.RootParameterIndex = (uint32_t)D3D12TLASInstanceCopyParam::RootCB;
|
||||
|
||||
args[1].ShaderResourceView.RootParameterIndex = (uint32_t)D3D12TLASInstanceCopyParam::SourceSRV;
|
||||
|
||||
|
||||
@@ -981,9 +981,10 @@ enum class D3D12PatchTLASBuildParam
|
||||
|
||||
enum class D3D12TLASInstanceCopyParam
|
||||
{
|
||||
IndirectArgumentIndex,
|
||||
RootCB,
|
||||
SourceSRV,
|
||||
DestUAV,
|
||||
RootAddressPairSrv,
|
||||
Count
|
||||
};
|
||||
|
||||
@@ -1013,9 +1014,8 @@ enum class D3D12PatchRayDispatchParam
|
||||
|
||||
struct D3D12AccStructPatchInfo
|
||||
{
|
||||
D3D12AccStructPatchInfo() : m_rootSignature(NULL), m_pipeline(NULL) {}
|
||||
ID3D12RootSignature *m_rootSignature;
|
||||
ID3D12PipelineState *m_pipeline;
|
||||
ID3D12RootSignature *m_rootSignature = NULL;
|
||||
ID3D12PipelineState *m_pipeline = NULL;
|
||||
};
|
||||
|
||||
struct PatchedRayDispatch
|
||||
@@ -1210,6 +1210,12 @@ public:
|
||||
ASBuildData *CopyBuildInputs(ID3D12GraphicsCommandList4 *unwrappedCmd,
|
||||
const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS &inputs);
|
||||
|
||||
D3D12GpuBuffer *UnrollBLASInstancesList(
|
||||
ID3D12GraphicsCommandList4 *unwrappedCmd,
|
||||
const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS &inputs,
|
||||
D3D12_GPU_VIRTUAL_ADDRESS addressPairResAddress, uint64_t addressCount,
|
||||
D3D12GpuBuffer *copyDestUAV);
|
||||
|
||||
PatchedRayDispatch PatchRayDispatch(ID3D12GraphicsCommandList4 *unwrappedCmd,
|
||||
rdcarray<ResourceId> heaps,
|
||||
const D3D12_DISPATCH_RAYS_DESC &desc);
|
||||
|
||||
Reference in New Issue
Block a user