Add post-submission AS build callbacks that need to wait for GPU sync

* This will be needed for copies where the size may not be known on the CPU -
  currently for AS builds we know the size directly.
* We also only store the size of an AS not the whole pre-build info since we
  don't need the scratch data and it won't be available in full for copies.
This commit is contained in:
baldurk
2024-05-07 14:40:05 +01:00
parent 75810b983c
commit cc3b2f6443
8 changed files with 124 additions and 121 deletions
+20 -6
View File
@@ -200,7 +200,8 @@ private:
static rdcstr GetChunkName(uint32_t idx);
D3D12ResourceManager *GetResourceManager() { return m_pDevice->GetResourceManager(); }
rdcarray<std::function<bool()>> m_accStructPostBuildQueueFunc;
rdcarray<std::function<bool()>> m_ImmediateASCallbacks;
rdcarray<std::function<bool()>> m_PendingASCallbacks;
public:
ALLOCATE_WITH_WRAPPED_POOL(WrappedID3D12GraphicsCommandList);
@@ -249,24 +250,33 @@ public:
bool ValidateRootGPUVA(D3D12_GPU_VIRTUAL_ADDRESS buffer);
void EnqueueAccStructPostBuild(const std::function<bool()> &postBldExec)
void AddSubmissionASBuildCallback(bool waitForSubmission, const std::function<bool()> &postBldExec)
{
m_accStructPostBuildQueueFunc.push_back(postBldExec);
if(waitForSubmission)
m_PendingASCallbacks.push_back(postBldExec);
else
m_ImmediateASCallbacks.push_back(postBldExec);
}
bool ExecuteAccStructPostBuilds()
bool ExecuteImmediateASBuildCallbacks()
{
bool success = true;
for(std::function<bool()> &func : m_accStructPostBuildQueueFunc)
for(std::function<bool()> &func : m_ImmediateASCallbacks)
{
success &= func();
}
m_accStructPostBuildQueueFunc.clear();
m_ImmediateASCallbacks.clear();
return success;
}
void TakeWaitingASBuildCallbacks(rdcarray<std::function<bool()>> &callbacks)
{
callbacks.append(m_PendingASCallbacks);
m_PendingASCallbacks.clear();
}
//////////////////////////////
// implement IUnknown
ULONG STDMETHODCALLTYPE AddRef() { return m_RefCounter.SoftRef(m_pDevice); }
@@ -582,6 +592,10 @@ public:
ID3D12GraphicsCommandList4 *dxrCmd,
BakedCmdListInfo::PatchRaytracing *patchRaytracing);
bool ProcessASBuildAfterSubmission(ResourceId asbWrappedResourceId,
D3D12BufferOffset asbWrappedResourceBufferOffset,
UINT64 byteSize);
IMPLEMENT_FUNCTION_SERIALISED(
virtual void STDMETHODCALLTYPE, BuildRaytracingAccelerationStructure,
_In_ const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC *pDesc,
@@ -727,6 +727,72 @@ void WrappedID3D12GraphicsCommandList::ExecuteMetaCommand(
RDCERR("ExecuteMetaCommand called but no meta commands reported!");
}
bool WrappedID3D12GraphicsCommandList::ProcessASBuildAfterSubmission(ResourceId destASBId,
D3D12BufferOffset destASBOffset,
UINT64 byteSize)
{
bool success = false;
D3D12ResourceManager *resManager = m_pDevice->GetResourceManager();
D3D12AccelerationStructure *accStructAtDestOffset = NULL;
WrappedID3D12Resource *dstASB = resManager->GetCurrentAs<WrappedID3D12Resource>(destASBId);
// See if acc already exist at the given offset
bool accStructExistAtDestOffset =
dstASB->GetAccStructIfExist(destASBOffset, &accStructAtDestOffset);
bool createAccStruct = false;
if(accStructExistAtDestOffset)
{
if(accStructAtDestOffset && accStructAtDestOffset->Size() != byteSize)
{
dstASB->DeleteAccStructAtOffset(destASBOffset);
createAccStruct = true;
}
else
{
// if the AS is being rebuilt in place, that's also successful
success = true;
}
}
else
{
createAccStruct = true;
}
if(createAccStruct)
{
// CreateAccStruct also deletes any previous overlapping ASs on the ASB
if(dstASB->CreateAccStruct(destASBOffset, byteSize, &accStructAtDestOffset))
{
success = true;
D3D12ResourceRecord *record =
resManager->AddResourceRecord(accStructAtDestOffset->GetResourceID());
record->type = Resource_AccelerationStructure;
record->Length = 0;
accStructAtDestOffset->SetResourceRecord(record);
resManager->MarkDirtyResource(accStructAtDestOffset->GetResourceID());
record->AddParent(
resManager->GetResourceRecord(accStructAtDestOffset->GetBackingBufferResourceId()));
// register this AS so its resource can be created during replay
m_pDevice->CreateAS(dstASB, destASBOffset, byteSize, accStructAtDestOffset);
m_pDevice->AddForcedReference(record);
}
else
{
RDCERR("Unable to create acceleration structure");
success = false;
}
}
return success;
}
bool WrappedID3D12GraphicsCommandList::PatchAccStructBlasAddress(
const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC *accStructInput,
ID3D12GraphicsCommandList4 *dxrCmd, BakedCmdListInfo::PatchRaytracing *patchRaytracing)
@@ -1016,74 +1082,13 @@ void WrappedID3D12GraphicsCommandList::BuildRaytracingAccelerationStructure(
D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO preBldInfo;
m_pDevice->GetRaytracingAccelerationStructurePrebuildInfo(&pDesc->Inputs, &preBldInfo);
auto PostBldExecute = [this, asbWrappedResourceId, asbWrappedResourceBufferOffset,
preBldInfo]() -> bool {
bool success = false;
D3D12AccelerationStructure *accStructAtOffset = NULL;
UINT64 byteSize = preBldInfo.ResultDataMaxSizeInBytes;
D3D12ResourceManager *resManager = m_pDevice->GetResourceManager();
WrappedID3D12Resource *asbWrappedResource =
resManager->GetCurrentAs<WrappedID3D12Resource>(asbWrappedResourceId);
// See if acc already exist at the given offset
bool accStructExistAtOffset = asbWrappedResource->GetAccStructIfExist(
asbWrappedResourceBufferOffset, &accStructAtOffset);
bool createAccStruct = false;
if(accStructExistAtOffset)
{
if(accStructAtOffset && accStructAtOffset->Size() != preBldInfo.ResultDataMaxSizeInBytes)
{
asbWrappedResource->DeleteAccStructAtOffset(asbWrappedResourceBufferOffset);
createAccStruct = true;
}
else
{
// if the AS is being rebuilt in place, that's also successful
success = true;
}
}
else
{
createAccStruct = true;
}
if(createAccStruct)
{
// CreateAccStruct also deletes any previous overlapping ASs on the ASB
if(asbWrappedResource->CreateAccStruct(asbWrappedResourceBufferOffset, preBldInfo,
&accStructAtOffset))
{
success = true;
D3D12ResourceRecord *record =
resManager->AddResourceRecord(accStructAtOffset->GetResourceID());
record->type = Resource_AccelerationStructure;
record->Length = 0;
accStructAtOffset->SetResourceRecord(record);
resManager->MarkDirtyResource(accStructAtOffset->GetResourceID());
record->AddParent(
resManager->GetResourceRecord(accStructAtOffset->GetBackingBufferResourceId()));
// register this AS so its resource can be created during replay
m_pDevice->CreateAS(asbWrappedResource, asbWrappedResourceBufferOffset, preBldInfo,
accStructAtOffset);
m_pDevice->AddForcedReference(record);
}
else
{
RDCERR("Unable to create acceleration structure");
success = false;
}
}
return success;
};
EnqueueAccStructPostBuild(PostBldExecute);
AddSubmissionASBuildCallback(
false, [this, asbWrappedResourceId, asbWrappedResourceBufferOffset, byteSize]() {
return ProcessASBuildAfterSubmission(asbWrappedResourceId, asbWrappedResourceBufferOffset,
byteSize);
});
}
}
@@ -744,7 +744,7 @@ void WrappedID3D12CommandQueue::ExecuteCommandListsInternal(UINT NumCommandLists
WrappedID3D12GraphicsCommandList *wrapped =
(WrappedID3D12GraphicsCommandList *)(ppCommandLists[i]);
if(!wrapped->ExecuteAccStructPostBuilds())
if(!wrapped->ExecuteImmediateASBuildCallbacks())
{
RDCERR("Unable to execute post build for acc struct");
}
@@ -753,6 +753,8 @@ void WrappedID3D12CommandQueue::ExecuteCommandListsInternal(UINT NumCommandLists
if(IsCaptureMode(m_State))
{
CheckAndFreeRayDispatches();
rdcarray<PatchedRayDispatch::Resources> rayDispatches;
if(!InFrameCaptureBoundary)
+13 -17
View File
@@ -3666,14 +3666,13 @@ void WrappedID3D12Device::SetName(ID3D12DeviceChild *pResource, const char *Name
}
template <typename SerialiserType>
bool WrappedID3D12Device::Serialise_CreateAS(
SerialiserType &ser, ID3D12Resource *pResource, UINT64 resourceOffset,
const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO &preBldInfo,
D3D12AccelerationStructure *as)
bool WrappedID3D12Device::Serialise_CreateAS(SerialiserType &ser, ID3D12Resource *pResource,
UINT64 resourceOffset, UINT64 byteSize,
D3D12AccelerationStructure *as)
{
SERIALISE_ELEMENT(pResource);
SERIALISE_ELEMENT(resourceOffset);
SERIALISE_ELEMENT(preBldInfo);
SERIALISE_ELEMENT(byteSize);
SERIALISE_ELEMENT_LOCAL(asId, as->GetResourceID());
SERIALISE_CHECK_READ_ERRORS();
@@ -3682,7 +3681,7 @@ bool WrappedID3D12Device::Serialise_CreateAS(
{
WrappedID3D12Resource *asbWrappedResource = (WrappedID3D12Resource *)pResource;
D3D12AccelerationStructure *accStructAtOffset = NULL;
if(asbWrappedResource->CreateAccStruct(resourceOffset, preBldInfo, &accStructAtOffset))
if(asbWrappedResource->CreateAccStruct(resourceOffset, byteSize, &accStructAtOffset))
{
GetResourceManager()->AddLiveResource(asId, accStructAtOffset);
@@ -3701,18 +3700,15 @@ bool WrappedID3D12Device::Serialise_CreateAS(
return true;
}
template bool WrappedID3D12Device::Serialise_CreateAS(
ReadSerialiser &ser, ID3D12Resource *pResource, UINT64 resourceOffset,
const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO &preBldInfo,
D3D12AccelerationStructure *as);
template bool WrappedID3D12Device::Serialise_CreateAS(
WriteSerialiser &ser, ID3D12Resource *pResource, UINT64 resourceOffset,
const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO &preBldInfo,
D3D12AccelerationStructure *as);
template bool WrappedID3D12Device::Serialise_CreateAS(ReadSerialiser &ser, ID3D12Resource *pResource,
UINT64 resourceOffset, UINT64 byteSize,
D3D12AccelerationStructure *as);
template bool WrappedID3D12Device::Serialise_CreateAS(WriteSerialiser &ser, ID3D12Resource *pResource,
UINT64 resourceOffset, UINT64 byteSize,
D3D12AccelerationStructure *as);
void WrappedID3D12Device::CreateAS(ID3D12Resource *pResource, UINT64 resourceOffset,
const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO &preBldInfo,
D3D12AccelerationStructure *as)
UINT64 byteSize, D3D12AccelerationStructure *as)
{
if(IsCaptureMode(m_State))
{
@@ -3723,7 +3719,7 @@ void WrappedID3D12Device::CreateAS(ID3D12Resource *pResource, UINT64 resourceOff
{
WriteSerialiser &ser = GetThreadSerialiser();
SCOPED_SERIALISE_CHUNK(D3D12Chunk::CreateAS);
Serialise_CreateAS(ser, pResource, resourceOffset, preBldInfo, as);
Serialise_CreateAS(ser, pResource, resourceOffset, byteSize, as);
record->AddChunk(scope.Get());
}
}
+3 -4
View File
@@ -1284,10 +1284,9 @@ public:
IMPLEMENT_FUNCTION_THREAD_SERIALISED(HRESULT, SetShaderDebugPath, ID3D12DeviceChild *pResource,
const char *Path);
IMPLEMENT_FUNCTION_THREAD_SERIALISED(
void, CreateAS, ID3D12Resource *pResource, UINT64 resourceOffset,
const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO &preBldInfo,
D3D12AccelerationStructure *as);
IMPLEMENT_FUNCTION_THREAD_SERIALISED(void, CreateAS, ID3D12Resource *pResource,
UINT64 resourceOffset, UINT64 byteSize,
D3D12AccelerationStructure *as);
// IHV APIs
IMPLEMENT_FUNCTION_SERIALISED(void, SetShaderExtUAV, GPUVendor vendor, uint32_t reg,
+1 -10
View File
@@ -32,14 +32,6 @@ bool WrappedID3D12Device::Serialise_AddToStateObject(SerialiserType &ser,
REFIID riid,
_COM_Outptr_ void **ppNewStateObject)
{
// AMD TODO - //Serialize Members
if(IsReplayingAndReading())
{
// AMD TODO
// Handle reading, and replaying
}
return false;
}
@@ -47,8 +39,7 @@ HRESULT STDMETHODCALLTYPE WrappedID3D12Device::AddToStateObject(
const D3D12_STATE_OBJECT_DESC *pAddition, ID3D12StateObject *pStateObjectToGrowFrom,
REFIID riid, _COM_Outptr_ void **ppNewStateObject)
{
// TODO AMD
RDCERR("AddToStateObject called but raytracing is not supported!");
D3D12NOTIMP("AddToStateObject");
return E_INVALIDARG;
}
+8 -10
View File
@@ -361,14 +361,14 @@ HRESULT STDMETHODCALLTYPE WrappedID3D12Resource::WriteToSubresource(UINT DstSubr
WRAPPED_POOL_INST(D3D12AccelerationStructure);
D3D12AccelerationStructure::D3D12AccelerationStructure(
WrappedID3D12Device *wrappedDevice, WrappedID3D12Resource *bufferRes,
D3D12BufferOffset bufferOffset,
const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO &preBldInfo)
D3D12AccelerationStructure::D3D12AccelerationStructure(WrappedID3D12Device *wrappedDevice,
WrappedID3D12Resource *bufferRes,
D3D12BufferOffset bufferOffset,
UINT64 byteSize)
: WrappedDeviceChild12(NULL, wrappedDevice),
m_asbWrappedResource(bufferRes),
m_asbWrappedResourceBufferOffset(bufferOffset),
m_preBldInfo(preBldInfo)
byteSize(byteSize)
{
}
@@ -377,16 +377,14 @@ D3D12AccelerationStructure::~D3D12AccelerationStructure()
Shutdown();
}
bool WrappedID3D12Resource::CreateAccStruct(
D3D12BufferOffset bufferOffset,
const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO &preBldInfo,
D3D12AccelerationStructure **accStruct)
bool WrappedID3D12Resource::CreateAccStruct(D3D12BufferOffset bufferOffset, UINT64 byteSize,
D3D12AccelerationStructure **accStruct)
{
SCOPED_LOCK(m_accStructResourcesCS);
if(m_accelerationStructMap.find(bufferOffset) == m_accelerationStructMap.end())
{
m_accelerationStructMap[bufferOffset] =
new D3D12AccelerationStructure(m_pDevice, this, bufferOffset, preBldInfo);
new D3D12AccelerationStructure(m_pDevice, this, bufferOffset, byteSize);
if(accStruct)
{
+4 -6
View File
@@ -1187,8 +1187,7 @@ public:
return this->GetResourceID();
}
bool CreateAccStruct(D3D12BufferOffset bufferOffset,
const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO &preBldInfo,
bool CreateAccStruct(D3D12BufferOffset bufferOffset, UINT64 byteSize,
D3D12AccelerationStructure **accStruct);
bool GetAccStructIfExist(D3D12BufferOffset bufferOffset,
@@ -1519,12 +1518,11 @@ public:
ALLOCATE_WITH_WRAPPED_POOL(D3D12AccelerationStructure);
D3D12AccelerationStructure(WrappedID3D12Device *wrappedDevice, WrappedID3D12Resource *bufferRes,
D3D12BufferOffset bufferOffset,
const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO &preBldInfo);
D3D12BufferOffset bufferOffset, UINT64 byteSize);
~D3D12AccelerationStructure();
uint64_t Size() const { return m_preBldInfo.ResultDataMaxSizeInBytes; }
uint64_t Size() const { return byteSize; }
ResourceId GetBackingBufferResourceId() const { return m_asbWrappedResource->GetResourceID(); }
D3D12_GPU_VIRTUAL_ADDRESS GetVirtualAddress() const
{
@@ -1534,7 +1532,7 @@ public:
private:
WrappedID3D12Resource *m_asbWrappedResource;
D3D12BufferOffset m_asbWrappedResourceBufferOffset;
D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO m_preBldInfo;
UINT64 byteSize;
};
#define ALL_D3D12_TYPES \