mirror of
https://github.com/baldurk/renderdoc.git
synced 2026-05-06 01:50:38 +00:00
New memory scope that clears on after first Apply()
Also: * Use GPUBuffer instead of own radback memory mechanism, but mark as internal * Use GPU local buffer for AS builds instead of upload memory to improve performance
This commit is contained in:
committed by
Baldur Karlsson
parent
c1e7655309
commit
8e5ab5dd41
@@ -24,7 +24,6 @@
|
||||
|
||||
#include "vk_acceleration_structure.h"
|
||||
#include "core/settings.h"
|
||||
#include "limits"
|
||||
#include "vk_core.h"
|
||||
#include "vk_manager.h"
|
||||
|
||||
@@ -99,15 +98,9 @@ void DoSerialise(SerialiserType &ser, VkAccelerationStructureInfo &el)
|
||||
}
|
||||
INSTANTIATE_SERIALISE_TYPE(VkAccelerationStructureInfo);
|
||||
|
||||
uint64_t VkAccelerationStructureInfo::GeometryData::GetSerialisedSize() const
|
||||
{
|
||||
return sizeof(GeometryData);
|
||||
}
|
||||
|
||||
VkAccelerationStructureInfo::~VkAccelerationStructureInfo()
|
||||
{
|
||||
if(readbackMem != VK_NULL_HANDLE)
|
||||
ObjDisp(device)->FreeMemory(Unwrap(device), readbackMem, NULL);
|
||||
readbackMem.Destroy();
|
||||
}
|
||||
|
||||
void VkAccelerationStructureInfo::Release()
|
||||
@@ -120,9 +113,7 @@ void VkAccelerationStructureInfo::Release()
|
||||
|
||||
uint64_t VkAccelerationStructureInfo::GetSerialisedSize() const
|
||||
{
|
||||
uint64_t geomDataSize = 0;
|
||||
for(const GeometryData &geoData : geometryData)
|
||||
geomDataSize += geoData.GetSerialisedSize();
|
||||
const uint64_t geomDataSize = geometryData.byteSize();
|
||||
|
||||
const uint64_t size = sizeof(VkAccelerationStructureTypeKHR) + // type
|
||||
sizeof(VkBuildAccelerationStructureFlagsKHR) + // flags
|
||||
@@ -134,10 +125,10 @@ uint64_t VkAccelerationStructureInfo::GetSerialisedSize() const
|
||||
return size + bufferSize;
|
||||
}
|
||||
|
||||
rdcarray<VkAccelerationStructureGeometryKHR> VkAccelerationStructureInfo::convertGeometryData() const
|
||||
void VkAccelerationStructureInfo::convertGeometryData(
|
||||
rdcarray<VkAccelerationStructureGeometryKHR> &geometry) const
|
||||
{
|
||||
rdcarray<VkAccelerationStructureGeometryKHR> result;
|
||||
result.reserve(geometryData.size());
|
||||
geometry.clear();
|
||||
|
||||
for(const VkAccelerationStructureInfo::GeometryData &g : geometryData)
|
||||
{
|
||||
@@ -157,11 +148,9 @@ rdcarray<VkAccelerationStructureGeometryKHR> VkAccelerationStructureInfo::conver
|
||||
// vkGetAccelerationStructureBuildSizesKHR just checks if the transform BDA is non-null,
|
||||
// so fudge that here
|
||||
VkDeviceOrHostAddressConstKHR tData;
|
||||
tData.deviceAddress = g.buildRangeInfo.transformOffset
|
||||
? g.memOffset
|
||||
: std::numeric_limits<VkDeviceAddress>::max();
|
||||
tData.deviceAddress = g.buildRangeInfo.transformOffset ? g.memOffset : ~0ULL;
|
||||
|
||||
geoUnion.triangles = VkAccelerationStructureGeometryTrianglesDataKHR{
|
||||
geoUnion.triangles = {
|
||||
VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR,
|
||||
NULL,
|
||||
g.tris.vertexFormat,
|
||||
@@ -200,14 +189,12 @@ rdcarray<VkAccelerationStructureGeometryKHR> VkAccelerationStructureInfo::conver
|
||||
};
|
||||
break;
|
||||
}
|
||||
default: RDCERR("Unhandled geometry type: %d", g.geometryType); return {};
|
||||
default: RDCERR("Unhandled geometry type: %d", g.geometryType); return;
|
||||
}
|
||||
|
||||
result.push_back({VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, NULL, g.geometryType,
|
||||
geoUnion, g.flags});
|
||||
geometry.push_back({VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, NULL, g.geometryType,
|
||||
geoUnion, g.flags});
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
rdcarray<VkAccelerationStructureBuildRangeInfoKHR> VkAccelerationStructureInfo::getBuildRanges() const
|
||||
@@ -245,7 +232,6 @@ RDResult VulkanAccelerationStructureManager::CopyInputBuffers(
|
||||
}
|
||||
|
||||
VkDevice device = cmdRecord->cmdInfo->device;
|
||||
metadata->device = device;
|
||||
metadata->type = info.type;
|
||||
metadata->flags = info.flags;
|
||||
|
||||
@@ -536,44 +522,44 @@ RDResult VulkanAccelerationStructureManager::CopyInputBuffers(
|
||||
}
|
||||
}
|
||||
|
||||
bool skipBarrier = false;
|
||||
if(currentDstOffset == 0)
|
||||
{
|
||||
RDCWARN("Cannot copy empty AS input buffers, ignoring");
|
||||
return {};
|
||||
// Rather than deal with empty buffers, for empty ASes just create a min-sized one
|
||||
const VkDeviceSize nonCoherentAtomSize = m_pDriver->GetDeviceProps().limits.nonCoherentAtomSize;
|
||||
currentDstOffset = nonCoherentAtomSize;
|
||||
skipBarrier = true;
|
||||
}
|
||||
|
||||
// Allocate the required memory block
|
||||
Allocation readbackmem = CreateReadBackMemory(device, currentDstOffset);
|
||||
if(readbackmem.mem == VK_NULL_HANDLE)
|
||||
metadata->readbackMem = CreateTempReadBackBuffer(device, currentDstOffset);
|
||||
if(metadata->readbackMem.mem == VK_NULL_HANDLE)
|
||||
{
|
||||
RDCERR("Unable to allocate AS input buffer readback memory (size: %u bytes)", currentDstOffset);
|
||||
return {};
|
||||
}
|
||||
|
||||
metadata->readbackMem = readbackmem.mem;
|
||||
metadata->memSize = currentDstOffset;
|
||||
|
||||
// Queue the copying
|
||||
for(const BufferData &bufData : inputBuffersData)
|
||||
ObjDisp(device)->CmdCopyBuffer(Unwrap(commandBuffer), bufData.buf, readbackmem.buf, 1,
|
||||
&bufData.region);
|
||||
|
||||
// Make sure nothing writes to our source buffers before we finish copying them
|
||||
VkMemoryBarrier barrier = {
|
||||
VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
NULL,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
VK_ACCESS_MEMORY_WRITE_BIT,
|
||||
};
|
||||
ObjDisp(device)->CmdPipelineBarrier(Unwrap(commandBuffer), VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 1, &barrier, 0,
|
||||
VK_NULL_HANDLE, 0, VK_NULL_HANDLE);
|
||||
if(!skipBarrier)
|
||||
{
|
||||
// Queue the copying
|
||||
for(const BufferData &bufData : inputBuffersData)
|
||||
ObjDisp(device)->CmdCopyBuffer(Unwrap(commandBuffer), bufData.buf,
|
||||
Unwrap(metadata->readbackMem.buf), 1, &bufData.region);
|
||||
|
||||
// We can schedule buffer deletion now as it isn't needed anymore
|
||||
cmdRecord->cmdInfo->pendingSubmissionCompleteCallbacks->callbacks.push_back(
|
||||
[device, buffer = readbackmem.buf]() {
|
||||
ObjDisp(device)->DestroyBuffer(Unwrap(device), buffer, NULL);
|
||||
});
|
||||
VkMemoryBarrier barrier = {
|
||||
VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
NULL,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
VK_ACCESS_NONE,
|
||||
};
|
||||
ObjDisp(device)->CmdPipelineBarrier(Unwrap(commandBuffer), VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_HOST_BIT |
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
|
||||
0, 1, &barrier, 0, VK_NULL_HANDLE, 0, VK_NULL_HANDLE);
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
@@ -615,89 +601,72 @@ bool VulkanAccelerationStructureManager::Serialise(SerialiserType &ser, Resource
|
||||
VkDevice d = !IsStructuredExporting(state) ? m_pDriver->GetDev() : VK_NULL_HANDLE;
|
||||
VkResult vkr = VK_SUCCESS;
|
||||
|
||||
VkAccelerationStructureInfo *asInfo =
|
||||
initial ? initial->accelerationStructureInfo : new VkAccelerationStructureInfo();
|
||||
RDCASSERT(asInfo);
|
||||
|
||||
byte *contents = NULL;
|
||||
|
||||
const VkDeviceSize nonCoherentAtomSize = m_pDriver->GetDeviceProps().limits.nonCoherentAtomSize;
|
||||
Allocation uploadMemory;
|
||||
|
||||
SERIALISE_ELEMENT(*asInfo).Hidden();
|
||||
|
||||
if(ser.IsWriting())
|
||||
{
|
||||
VkAccelerationStructureInfo *asInfo = initial->accelerationStructureInfo;
|
||||
RDCASSERT(asInfo != NULL);
|
||||
SERIALISE_ELEMENT(*asInfo).Hidden();
|
||||
|
||||
RDCASSERT(asInfo->readbackMem != VK_NULL_HANDLE);
|
||||
|
||||
// The input buffers have already been copied into readable memory, so they just need
|
||||
// mapping and serialising
|
||||
vkr = ObjDisp(d)->MapMemory(Unwrap(d), asInfo->readbackMem, 0, asInfo->memSize, 0,
|
||||
(void **)&contents);
|
||||
contents = (byte *)asInfo->readbackMem.Map();
|
||||
}
|
||||
else if(IsReplayMode(state) && !ser.IsErrored())
|
||||
{
|
||||
uploadMemory = CreateTempReplayBuffer(MemoryType::Upload, asInfo->memSize, 0);
|
||||
if(uploadMemory.memAlloc.mem == VK_NULL_HANDLE)
|
||||
{
|
||||
RDCERR("Failed to allocate AS build data upload buffer");
|
||||
return false;
|
||||
}
|
||||
|
||||
vkr = ObjDisp(d)->MapMemory(
|
||||
Unwrap(d), Unwrap(uploadMemory.memAlloc.mem), uploadMemory.memAlloc.offs,
|
||||
AlignUp(asInfo->memSize, nonCoherentAtomSize), 0, (void **)&contents);
|
||||
CHECK_VKR(m_pDriver, vkr);
|
||||
|
||||
// invalidate the cpu cache for this memory range to avoid reading stale data
|
||||
const VkMappedMemoryRange range = {
|
||||
VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, NULL, asInfo->readbackMem, 0, asInfo->memSize,
|
||||
};
|
||||
vkr = ObjDisp(d)->InvalidateMappedMemoryRanges(Unwrap(d), 1, &range);
|
||||
CHECK_VKR(m_pDriver, vkr);
|
||||
if(!contents)
|
||||
{
|
||||
RDCERR("Manually reporting failed memory map");
|
||||
CHECK_VKR(m_pDriver, VK_ERROR_MEMORY_MAP_FAILED);
|
||||
return false;
|
||||
}
|
||||
|
||||
ser.Serialise("AS Input"_lit, contents, asInfo->memSize, SerialiserFlags::NoFlags).Hidden();
|
||||
if(vkr != VK_SUCCESS)
|
||||
return false;
|
||||
}
|
||||
|
||||
ObjDisp(d)->UnmapMemory(Unwrap(d), asInfo->readbackMem);
|
||||
ser.Serialise("AS Input"_lit, contents, asInfo->memSize, SerialiserFlags::NoFlags).Hidden();
|
||||
|
||||
if(ser.IsWriting())
|
||||
{
|
||||
asInfo->readbackMem.Unmap();
|
||||
}
|
||||
else
|
||||
{
|
||||
const VkDeviceSize nonCoherentAtomSize = m_pDriver->GetDeviceProps().limits.nonCoherentAtomSize;
|
||||
|
||||
VkAccelerationStructureInfo *asInfo = new VkAccelerationStructureInfo();
|
||||
SERIALISE_ELEMENT(*asInfo).Hidden();
|
||||
|
||||
Allocation uploadMemory;
|
||||
|
||||
if(IsReplayMode(state) && !ser.IsErrored())
|
||||
{
|
||||
uploadMemory =
|
||||
CreateReplayMemory(MemoryType::Upload, asInfo->memSize,
|
||||
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR);
|
||||
if(uploadMemory.mem == VK_NULL_HANDLE)
|
||||
{
|
||||
RDCERR("Failed to allocate AS build data upload buffer");
|
||||
return false;
|
||||
}
|
||||
|
||||
vkr = ObjDisp(d)->MapMemory(Unwrap(d), uploadMemory.mem, 0,
|
||||
AlignUp(asInfo->memSize, nonCoherentAtomSize), 0,
|
||||
(void **)&contents);
|
||||
CHECK_VKR(m_pDriver, vkr);
|
||||
|
||||
if(!contents)
|
||||
{
|
||||
RDCERR("Manually reporting failed memory map");
|
||||
CHECK_VKR(m_pDriver, VK_ERROR_MEMORY_MAP_FAILED);
|
||||
return false;
|
||||
}
|
||||
|
||||
if(vkr != VK_SUCCESS)
|
||||
return false;
|
||||
}
|
||||
|
||||
// not using SERIALISE_ELEMENT_ARRAY so we can deliberately avoid allocation - we serialise
|
||||
// directly into upload memory
|
||||
ser.Serialise("AS Input"_lit, contents, asInfo->memSize, SerialiserFlags::NoFlags).Hidden();
|
||||
|
||||
if(!IsStructuredExporting(state) && uploadMemory.mem != VK_NULL_HANDLE)
|
||||
if(!IsStructuredExporting(state) && uploadMemory.memAlloc.mem != VK_NULL_HANDLE)
|
||||
{
|
||||
// first ensure we flush the writes from the cpu to gpu memory
|
||||
const VkMappedMemoryRange range = {
|
||||
VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, //
|
||||
NULL,
|
||||
uploadMemory.mem,
|
||||
0,
|
||||
Unwrap(uploadMemory.memAlloc.mem),
|
||||
uploadMemory.memAlloc.offs,
|
||||
AlignUp(asInfo->memSize, nonCoherentAtomSize),
|
||||
};
|
||||
vkr = ObjDisp(d)->FlushMappedMemoryRanges(Unwrap(d), 1, &range);
|
||||
CHECK_VKR(m_pDriver, vkr);
|
||||
|
||||
ObjDisp(d)->UnmapMemory(Unwrap(d), uploadMemory.mem);
|
||||
ObjDisp(d)->UnmapMemory(Unwrap(d), Unwrap(uploadMemory.memAlloc.mem));
|
||||
|
||||
asInfo->uploadMem = uploadMemory.mem;
|
||||
asInfo->uploadAlloc = uploadMemory.memAlloc;
|
||||
asInfo->uploadBuf = uploadMemory.buf;
|
||||
}
|
||||
|
||||
@@ -734,18 +703,49 @@ void VulkanAccelerationStructureManager::Apply(ResourceId id, VkInitialContents
|
||||
VkAccelerationStructureInfo *asInfo = initial.accelerationStructureInfo;
|
||||
RDCASSERT(asInfo);
|
||||
|
||||
VkCommandBuffer cmd;
|
||||
const VkDevice d = m_pDriver->GetDev();
|
||||
VkCommandBuffer cmd = m_pDriver->GetInitStateCmd();
|
||||
if(cmd == VK_NULL_HANDLE)
|
||||
{
|
||||
RDCERR("Couldn't acquire command buffer");
|
||||
return;
|
||||
}
|
||||
|
||||
// If our 'base' AS has not been created yet, build it now
|
||||
if(asInfo->replayAS == VK_NULL_HANDLE)
|
||||
{
|
||||
rdcarray<VkAccelerationStructureBuildRangeInfoKHR> buildRangeInfos = asInfo->getBuildRanges();
|
||||
rdcarray<VkAccelerationStructureGeometryKHR> asGeomData = asInfo->convertGeometryData();
|
||||
RDCASSERT(!asGeomData.empty());
|
||||
RDCASSERT(asInfo->geometryData.size() == asGeomData.size());
|
||||
rdcarray<VkAccelerationStructureGeometryKHR> geometry;
|
||||
asInfo->convertGeometryData(geometry);
|
||||
RDCASSERT(!geometry.empty());
|
||||
RDCASSERT(asInfo->geometryData.size() == geometry.size());
|
||||
|
||||
if(!FixUpReplayBDAs(asInfo, asGeomData))
|
||||
// Copy over the input data from the upload mem to GPU local to increase build speed
|
||||
Allocation inputGpuMemory =
|
||||
CreateTempReplayBuffer(MemoryType::GPULocal, asInfo->memSize, 0,
|
||||
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR);
|
||||
|
||||
VkBufferCopy toGpuCopy = {0, 0, asInfo->memSize};
|
||||
ObjDisp(d)->CmdCopyBuffer(Unwrap(cmd), asInfo->uploadBuf, inputGpuMemory.buf, 1, &toGpuCopy);
|
||||
|
||||
const VkMemoryBarrier copyBarrier = {
|
||||
VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
NULL,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
VK_ACCESS_SHADER_READ_BIT,
|
||||
};
|
||||
ObjDisp(d)->CmdPipelineBarrier(Unwrap(cmd), VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, 0, 1,
|
||||
©Barrier, 0, NULL, 0, NULL);
|
||||
|
||||
// We can clean up the buffers now, the backing mem will be freed after the first Apply()
|
||||
m_pDriver->AddPendingObjectCleanup(
|
||||
[d, gpuBuf = inputGpuMemory.buf, uploadBuf = asInfo->uploadBuf]() {
|
||||
ObjDisp(d)->DestroyBuffer(Unwrap(d), uploadBuf, NULL);
|
||||
ObjDisp(d)->DestroyBuffer(Unwrap(d), gpuBuf, NULL);
|
||||
});
|
||||
|
||||
if(!FixUpReplayBDAs(asInfo, inputGpuMemory.buf, geometry))
|
||||
return;
|
||||
|
||||
// Allocate the scratch buffer which involves working out how big it should be
|
||||
@@ -761,13 +761,13 @@ void VulkanAccelerationStructureManager::Apply(ResourceId id, VkInitialContents
|
||||
VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,
|
||||
VK_NULL_HANDLE,
|
||||
VK_NULL_HANDLE,
|
||||
(uint32_t)asGeomData.size(),
|
||||
asGeomData.data(),
|
||||
(uint32_t)geometry.size(),
|
||||
geometry.data(),
|
||||
VK_NULL_HANDLE,
|
||||
};
|
||||
|
||||
rdcarray<uint32_t> counts;
|
||||
counts.reserve(asGeomData.size());
|
||||
counts.reserve(geometry.size());
|
||||
for(VkAccelerationStructureBuildRangeInfoKHR numPrims : buildRangeInfos)
|
||||
counts.push_back(numPrims.primitiveCount);
|
||||
|
||||
@@ -777,13 +777,6 @@ void VulkanAccelerationStructureManager::Apply(ResourceId id, VkInitialContents
|
||||
}
|
||||
UpdateScratch(sizeResult.buildScratchSize);
|
||||
|
||||
cmd = m_pDriver->GetInitStateCmd();
|
||||
if(cmd == VK_NULL_HANDLE)
|
||||
{
|
||||
RDCERR("Couldn't acquire command buffer");
|
||||
return;
|
||||
}
|
||||
|
||||
// Create the base AS
|
||||
const VkBufferCreateInfo gpuBufInfo = {
|
||||
VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
@@ -828,8 +821,8 @@ void VulkanAccelerationStructureManager::Apply(ResourceId id, VkInitialContents
|
||||
VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR,
|
||||
VK_NULL_HANDLE,
|
||||
Unwrap(asInfo->replayAS),
|
||||
(uint32_t)asGeomData.size(),
|
||||
asGeomData.data(),
|
||||
(uint32_t)geometry.size(),
|
||||
geometry.data(),
|
||||
NULL,
|
||||
scratchAddressUnion,
|
||||
};
|
||||
@@ -837,30 +830,16 @@ void VulkanAccelerationStructureManager::Apply(ResourceId id, VkInitialContents
|
||||
const VkAccelerationStructureBuildRangeInfoKHR *pBuildInfo = buildRangeInfos.data();
|
||||
ObjDisp(d)->CmdBuildAccelerationStructuresKHR(Unwrap(cmd), 1, &asGeomInfo, &pBuildInfo);
|
||||
|
||||
m_pDriver->AddPendingObjectCleanup(
|
||||
[d, uploadMem = asInfo->uploadMem, uploadBuf = asInfo->uploadBuf]() {
|
||||
ObjDisp(d)->DestroyBuffer(Unwrap(d), uploadBuf, NULL);
|
||||
ObjDisp(d)->FreeMemory(Unwrap(d), uploadMem, NULL);
|
||||
});
|
||||
|
||||
// Make sure the AS builds are serialised as the scratch mem is shared
|
||||
VkMemoryBarrier barrier = {
|
||||
const VkMemoryBarrier barrier = {
|
||||
VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||
NULL,
|
||||
VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR,
|
||||
VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR,
|
||||
};
|
||||
ObjDisp(d)->CmdPipelineBarrier(Unwrap(cmd),
|
||||
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
|
||||
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, 0, 1,
|
||||
&barrier, 0, VK_NULL_HANDLE, 0, VK_NULL_HANDLE);
|
||||
}
|
||||
|
||||
cmd = m_pDriver->GetInitStateCmd();
|
||||
if(cmd == VK_NULL_HANDLE)
|
||||
{
|
||||
RDCERR("Couldn't acquire command buffer");
|
||||
return;
|
||||
ObjDisp(d)->CmdPipelineBarrier(
|
||||
Unwrap(cmd), VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR,
|
||||
VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, 0, 1, &barrier, 0, NULL, 0, NULL);
|
||||
}
|
||||
|
||||
// Copy the base AS to the captured one to reset it
|
||||
@@ -881,79 +860,24 @@ void VulkanAccelerationStructureManager::Apply(ResourceId id, VkInitialContents
|
||||
}
|
||||
}
|
||||
|
||||
VulkanAccelerationStructureManager::Allocation VulkanAccelerationStructureManager::CreateReadBackMemory(
|
||||
VkDevice device, VkDeviceSize size, VkDeviceSize alignment)
|
||||
GPUBuffer VulkanAccelerationStructureManager::CreateTempReadBackBuffer(VkDevice device,
|
||||
VkDeviceSize size)
|
||||
{
|
||||
VkBufferCreateInfo bufInfo = {
|
||||
VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
NULL,
|
||||
0,
|
||||
size,
|
||||
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
|
||||
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
|
||||
};
|
||||
GPUBuffer result;
|
||||
result.Create(m_pDriver, device, size, 1,
|
||||
GPUBuffer::eGPUBufferReadback | GPUBuffer::eGPUBufferAddressable);
|
||||
|
||||
// we make the buffer concurrently accessible by all queue families to not invalidate the
|
||||
// contents of the memory we're reading back from.
|
||||
bufInfo.sharingMode = VK_SHARING_MODE_CONCURRENT;
|
||||
bufInfo.queueFamilyIndexCount = (uint32_t)m_pDriver->GetQueueFamilyIndices().size();
|
||||
bufInfo.pQueueFamilyIndices = m_pDriver->GetQueueFamilyIndices().data();
|
||||
m_pDriver->GetResourceManager()->SetInternalResource(GetResID(result.mem));
|
||||
m_pDriver->GetResourceManager()->SetInternalResource(GetResID(result.buf));
|
||||
|
||||
// spec requires that CONCURRENT must specify more than one queue family. If there is only one
|
||||
// queue family, we can safely use exclusive.
|
||||
if(bufInfo.queueFamilyIndexCount == 1)
|
||||
bufInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
||||
|
||||
Allocation readbackmem;
|
||||
VkResult vkr = ObjDisp(device)->CreateBuffer(Unwrap(device), &bufInfo, NULL, &readbackmem.buf);
|
||||
if(vkr != VK_SUCCESS)
|
||||
{
|
||||
RDCERR("Failed to create readback buffer");
|
||||
return {};
|
||||
}
|
||||
|
||||
VkMemoryRequirements mrq = {};
|
||||
ObjDisp(device)->GetBufferMemoryRequirements(Unwrap(device), readbackmem.buf, &mrq);
|
||||
|
||||
if(alignment != 0)
|
||||
mrq.alignment = RDCMAX(mrq.alignment, alignment);
|
||||
|
||||
readbackmem.size = AlignUp(mrq.size, mrq.alignment);
|
||||
readbackmem.size =
|
||||
AlignUp(readbackmem.size, m_pDriver->GetDeviceProps().limits.nonCoherentAtomSize);
|
||||
|
||||
VkMemoryAllocateFlagsInfo flagsInfo = {
|
||||
VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
|
||||
NULL,
|
||||
VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT,
|
||||
};
|
||||
VkMemoryAllocateInfo info = {
|
||||
VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
&flagsInfo,
|
||||
readbackmem.size,
|
||||
m_pDriver->GetReadbackMemoryIndex(mrq.memoryTypeBits),
|
||||
};
|
||||
|
||||
vkr = ObjDisp(device)->AllocateMemory(Unwrap(device), &info, NULL, &readbackmem.mem);
|
||||
if(vkr != VK_SUCCESS)
|
||||
{
|
||||
RDCERR("Failed to allocate readback memory");
|
||||
return {};
|
||||
}
|
||||
|
||||
vkr = ObjDisp(device)->BindBufferMemory(Unwrap(device), readbackmem.buf, readbackmem.mem, 0);
|
||||
if(vkr != VK_SUCCESS)
|
||||
{
|
||||
RDCERR("Failed to bind readback memory");
|
||||
return {};
|
||||
}
|
||||
|
||||
return readbackmem;
|
||||
return result;
|
||||
}
|
||||
|
||||
VulkanAccelerationStructureManager::Allocation VulkanAccelerationStructureManager::CreateReplayMemory(
|
||||
MemoryType memType, VkDeviceSize size, VkBufferUsageFlags extraUsageFlags)
|
||||
VulkanAccelerationStructureManager::Allocation VulkanAccelerationStructureManager::CreateTempReplayBuffer(
|
||||
MemoryType memType, VkDeviceSize size, VkDeviceSize alignment, VkBufferUsageFlags extraUsageFlags)
|
||||
{
|
||||
const VkDevice d = m_pDriver->GetDev();
|
||||
|
||||
const VkBufferCreateInfo bufInfo = {
|
||||
VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
NULL,
|
||||
@@ -963,54 +887,29 @@ VulkanAccelerationStructureManager::Allocation VulkanAccelerationStructureManage
|
||||
VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | extraUsageFlags,
|
||||
};
|
||||
|
||||
const VkDevice d = m_pDriver->GetDev();
|
||||
|
||||
Allocation result;
|
||||
result.size = size;
|
||||
|
||||
VkResult vkr = ObjDisp(d)->CreateBuffer(Unwrap(d), &bufInfo, NULL, &result.buf);
|
||||
CHECK_VKR(m_pDriver, vkr);
|
||||
|
||||
VkMemoryRequirements mrq = {};
|
||||
ObjDisp(d)->GetBufferMemoryRequirements(Unwrap(d), result.buf, &mrq);
|
||||
mrq.alignment = RDCMAX(mrq.alignment, alignment);
|
||||
|
||||
uint32_t memoryTypeIndex = 0;
|
||||
switch(memType)
|
||||
{
|
||||
case MemoryType::Upload:
|
||||
memoryTypeIndex = m_pDriver->GetUploadMemoryIndex(mrq.memoryTypeBits);
|
||||
break;
|
||||
case MemoryType::GPULocal:
|
||||
memoryTypeIndex = m_pDriver->GetGPULocalMemoryIndex(mrq.memoryTypeBits);
|
||||
break;
|
||||
case MemoryType::Readback:
|
||||
memoryTypeIndex = m_pDriver->GetReadbackMemoryIndex(mrq.memoryTypeBits);
|
||||
break;
|
||||
}
|
||||
result.memAlloc = m_pDriver->AllocateMemoryForResource(
|
||||
true, mrq, MemoryScope::InitialContentsFirstApplyOnly, memType);
|
||||
if(result.memAlloc.mem == VK_NULL_HANDLE)
|
||||
return {};
|
||||
|
||||
VkMemoryAllocateFlagsInfo flagsInfo = {
|
||||
VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,
|
||||
NULL,
|
||||
VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT,
|
||||
};
|
||||
VkMemoryAllocateInfo info = {
|
||||
VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
&flagsInfo,
|
||||
size,
|
||||
memoryTypeIndex,
|
||||
};
|
||||
|
||||
vkr = ObjDisp(d)->AllocateMemory(Unwrap(d), &info, NULL, &result.mem);
|
||||
CHECK_VKR(m_pDriver, vkr);
|
||||
|
||||
vkr = ObjDisp(d)->BindBufferMemory(Unwrap(d), result.buf, result.mem, 0);
|
||||
vkr = ObjDisp(d)->BindBufferMemory(Unwrap(d), result.buf, Unwrap(result.memAlloc.mem),
|
||||
result.memAlloc.offs);
|
||||
CHECK_VKR(m_pDriver, vkr);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
bool VulkanAccelerationStructureManager::FixUpReplayBDAs(
|
||||
VkAccelerationStructureInfo *asInfo, rdcarray<VkAccelerationStructureGeometryKHR> &geoms)
|
||||
VkAccelerationStructureInfo *asInfo, VkBuffer buf,
|
||||
rdcarray<VkAccelerationStructureGeometryKHR> &geoms)
|
||||
{
|
||||
RDCASSERT(asInfo);
|
||||
RDCASSERT(asInfo->geometryData.size() == geoms.size());
|
||||
@@ -1018,7 +917,7 @@ bool VulkanAccelerationStructureManager::FixUpReplayBDAs(
|
||||
const VkDevice d = m_pDriver->GetDev();
|
||||
|
||||
const VkBufferDeviceAddressInfo addrInfo = {VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, NULL,
|
||||
asInfo->uploadBuf};
|
||||
buf};
|
||||
const VkDeviceAddress bufAddr = ObjDisp(d)->GetBufferDeviceAddressKHR(Unwrap(d), &addrInfo);
|
||||
|
||||
for(size_t i = 0; i < geoms.size(); ++i)
|
||||
@@ -1035,7 +934,7 @@ bool VulkanAccelerationStructureManager::FixUpReplayBDAs(
|
||||
if(tri.indexType != VK_INDEX_TYPE_NONE_KHR)
|
||||
tri.indexData.deviceAddress += bufAddr;
|
||||
|
||||
if(tri.transformData.deviceAddress != std::numeric_limits<VkDeviceAddress>::max())
|
||||
if(tri.transformData.deviceAddress != ~0ULL)
|
||||
tri.transformData.deviceAddress += bufAddr;
|
||||
else
|
||||
tri.transformData.deviceAddress = 0x0;
|
||||
@@ -1061,43 +960,27 @@ bool VulkanAccelerationStructureManager::FixUpReplayBDAs(
|
||||
|
||||
void VulkanAccelerationStructureManager::UpdateScratch(VkDeviceSize requiredSize)
|
||||
{
|
||||
const VkDevice d = m_pDriver->GetDev();
|
||||
const VkPhysicalDevice physDev = m_pDriver->GetPhysDev();
|
||||
|
||||
VkPhysicalDeviceAccelerationStructurePropertiesKHR asProps = {
|
||||
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_PROPERTIES_KHR,
|
||||
};
|
||||
VkPhysicalDeviceProperties2 asPropsBase = {
|
||||
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
|
||||
&asProps,
|
||||
};
|
||||
ObjDisp(physDev)->GetPhysicalDeviceProperties2(Unwrap(physDev), &asPropsBase);
|
||||
|
||||
requiredSize =
|
||||
AlignUp(requiredSize, (VkDeviceSize)asProps.minAccelerationStructureScratchOffsetAlignment);
|
||||
|
||||
// We serialise the AS builds, so reuse the existing scratch
|
||||
if(requiredSize > scratch.size || scratch.mem == VK_NULL_HANDLE)
|
||||
// We serialise the AS and OMM builds, so reuse the existing scratch
|
||||
if(requiredSize > scratch.memAlloc.size || scratch.memAlloc.mem == VK_NULL_HANDLE)
|
||||
{
|
||||
// Delete the previous
|
||||
if(scratch.mem != VK_NULL_HANDLE)
|
||||
{
|
||||
m_pDriver->AddPendingObjectCleanup([d, tmp = scratch]() {
|
||||
ObjDisp(d)->DestroyBuffer(Unwrap(d), tmp.buf, NULL);
|
||||
ObjDisp(d)->FreeMemory(Unwrap(d), tmp.mem, NULL);
|
||||
});
|
||||
const VkDevice d = m_pDriver->GetDev();
|
||||
const VkPhysicalDevice physDev = m_pDriver->GetPhysDev();
|
||||
|
||||
RDCDEBUG("AS build shared scratch changed to size %llu, flushing", requiredSize);
|
||||
m_pDriver->CloseInitStateCmd();
|
||||
m_pDriver->SubmitCmds();
|
||||
m_pDriver->FlushQ();
|
||||
}
|
||||
VkPhysicalDeviceAccelerationStructurePropertiesKHR asProps = {
|
||||
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_PROPERTIES_KHR,
|
||||
};
|
||||
VkPhysicalDeviceProperties2 asPropsBase = {
|
||||
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
|
||||
&asProps,
|
||||
};
|
||||
ObjDisp(physDev)->GetPhysicalDeviceProperties2(Unwrap(physDev), &asPropsBase);
|
||||
|
||||
scratch =
|
||||
CreateReplayMemory(MemoryType::GPULocal, requiredSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
|
||||
if(scratch.mem == VK_NULL_HANDLE)
|
||||
scratch = CreateTempReplayBuffer(MemoryType::GPULocal, requiredSize,
|
||||
asProps.minAccelerationStructureScratchOffsetAlignment,
|
||||
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
|
||||
if(scratch.memAlloc.mem == VK_NULL_HANDLE)
|
||||
{
|
||||
RDCERR("Failed to allocate AS build data scratch buffer");
|
||||
RDCERR("Failed to allocate AS build scratch buffer");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1109,6 +992,10 @@ void VulkanAccelerationStructureManager::UpdateScratch(VkDeviceSize requiredSize
|
||||
|
||||
scratchAddressUnion.deviceAddress =
|
||||
ObjDisp(d)->GetBufferDeviceAddressKHR(Unwrap(d), &scratchAddressInfo);
|
||||
|
||||
// We do not need the buffer object, only the mem address
|
||||
m_pDriver->AddPendingObjectCleanup(
|
||||
[d, buf = scratch.buf]() { ObjDisp(d)->DestroyBuffer(Unwrap(d), buf, NULL); });
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -46,8 +46,6 @@ struct VkAccelerationStructureInfo
|
||||
VkDeviceSize stride;
|
||||
};
|
||||
|
||||
uint64_t GetSerialisedSize() const;
|
||||
|
||||
VkGeometryTypeKHR geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR;
|
||||
VkGeometryFlagsKHR flags;
|
||||
|
||||
@@ -65,21 +63,19 @@ struct VkAccelerationStructureInfo
|
||||
|
||||
uint64_t GetSerialisedSize() const;
|
||||
|
||||
rdcarray<VkAccelerationStructureGeometryKHR> convertGeometryData() const;
|
||||
void convertGeometryData(rdcarray<VkAccelerationStructureGeometryKHR> &geometry) const;
|
||||
rdcarray<VkAccelerationStructureBuildRangeInfoKHR> getBuildRanges() const;
|
||||
|
||||
VkDevice device = VK_NULL_HANDLE;
|
||||
|
||||
VkAccelerationStructureTypeKHR type =
|
||||
VkAccelerationStructureTypeKHR::VK_ACCELERATION_STRUCTURE_TYPE_GENERIC_KHR;
|
||||
VkBuildAccelerationStructureFlagsKHR flags = 0;
|
||||
|
||||
rdcarray<GeometryData> geometryData;
|
||||
|
||||
VkDeviceMemory readbackMem = VK_NULL_HANDLE;
|
||||
GPUBuffer readbackMem;
|
||||
VkDeviceSize memSize = 0;
|
||||
|
||||
VkDeviceMemory uploadMem = VK_NULL_HANDLE;
|
||||
MemoryAllocation uploadAlloc;
|
||||
VkBuffer uploadBuf = VK_NULL_HANDLE;
|
||||
VkAccelerationStructureKHR replayAS = VK_NULL_HANDLE;
|
||||
|
||||
@@ -118,8 +114,7 @@ public:
|
||||
private:
|
||||
struct Allocation
|
||||
{
|
||||
VkDeviceMemory mem = VK_NULL_HANDLE;
|
||||
VkDeviceSize size = 0;
|
||||
MemoryAllocation memAlloc;
|
||||
VkBuffer buf = VK_NULL_HANDLE;
|
||||
};
|
||||
|
||||
@@ -130,11 +125,11 @@ private:
|
||||
VkDeviceSize offset = 0;
|
||||
};
|
||||
|
||||
Allocation CreateReadBackMemory(VkDevice device, VkDeviceSize size, VkDeviceSize alignment = 0);
|
||||
Allocation CreateReplayMemory(MemoryType memType, VkDeviceSize size,
|
||||
VkBufferUsageFlags extraUsageFlags = 0);
|
||||
GPUBuffer CreateTempReadBackBuffer(VkDevice device, VkDeviceSize size);
|
||||
Allocation CreateTempReplayBuffer(MemoryType memType, VkDeviceSize size, VkDeviceSize alignment,
|
||||
VkBufferUsageFlags extraUsageFlags = 0);
|
||||
|
||||
bool FixUpReplayBDAs(VkAccelerationStructureInfo *asInfo,
|
||||
bool FixUpReplayBDAs(VkAccelerationStructureInfo *asInfo, VkBuffer buf,
|
||||
rdcarray<VkAccelerationStructureGeometryKHR> &geoms);
|
||||
|
||||
void UpdateScratch(VkDeviceSize requiredSize);
|
||||
|
||||
@@ -454,6 +454,8 @@ enum class MemoryScope : uint8_t
|
||||
// allocated the same way
|
||||
ImmutableReplayDebug = InitialContents,
|
||||
IndirectReadback,
|
||||
// Same as initial contents but freed after first Serialise/Apply cycle
|
||||
InitialContentsFirstApplyOnly,
|
||||
Count,
|
||||
};
|
||||
|
||||
|
||||
@@ -2305,6 +2305,8 @@ void WrappedVulkan::StartFrameCapture(DeviceOwnedWindow devWnd)
|
||||
GetResourceManager()->ClearReferencedResources();
|
||||
GetResourceManager()->ClearReferencedMemory();
|
||||
|
||||
CheckPendingCommandBufferCallbacks();
|
||||
|
||||
// need to do all this atomically so that no other commands
|
||||
// will check to see if they need to markdirty or markpendingdirty
|
||||
// and go into the frame record.
|
||||
@@ -2346,7 +2348,6 @@ void WrappedVulkan::StartFrameCapture(DeviceOwnedWindow devWnd)
|
||||
}
|
||||
|
||||
m_PreparedNotSerialisedInitStates.clear();
|
||||
CheckPendingCommandBufferCallbacks();
|
||||
GetResourceManager()->PrepareInitialContents();
|
||||
|
||||
{
|
||||
@@ -3264,7 +3265,7 @@ RDResult WrappedVulkan::ReadLogInitialisation(RDCFile *rdc, bool storeStructured
|
||||
GetReplay()->WriteFrameRecord().frameInfo.initDataSize =
|
||||
chunkInfos[(VulkanChunk)SystemChunk::InitialContents].totalsize;
|
||||
|
||||
RDCDEBUG("Allocating %llu persistant bytes of memory for the log.",
|
||||
RDCDEBUG("Allocating %llu persistent bytes of memory for the log.",
|
||||
GetReplay()->WriteFrameRecord().frameInfo.persistentSize);
|
||||
|
||||
// ensure the capture at least created a device and fetched a queue.
|
||||
@@ -3780,6 +3781,8 @@ void WrappedVulkan::ApplyInitialContents()
|
||||
SubmitCmds();
|
||||
FlushQ();
|
||||
}
|
||||
|
||||
FreeAllMemory(MemoryScope::InitialContentsFirstApplyOnly);
|
||||
}
|
||||
|
||||
bool WrappedVulkan::ContextProcessChunk(ReadSerialiser &ser, VulkanChunk chunk)
|
||||
|
||||
@@ -588,10 +588,6 @@ bool WrappedVulkan::Prepare_InitialState(WrappedVkRes *res)
|
||||
return true;
|
||||
}
|
||||
|
||||
// Skip empty AS input data (BLASes are force ref-ed)
|
||||
if(record->accelerationStructureInfo->memSize == 0)
|
||||
return true;
|
||||
|
||||
// The input buffers and metadata have all been created by this point, so we just need to
|
||||
// assemble a VkInitialContents
|
||||
VkInitialContents ic;
|
||||
|
||||
@@ -298,6 +298,7 @@ rdcstr DoStringise(const MemoryScope &el)
|
||||
{
|
||||
STRINGISE_ENUM_CLASS(InitialContents);
|
||||
STRINGISE_ENUM_CLASS(IndirectReadback);
|
||||
STRINGISE_ENUM_CLASS(InitialContentsFirstApplyOnly);
|
||||
}
|
||||
END_ENUM_STRINGISE()
|
||||
}
|
||||
|
||||
@@ -1014,6 +1014,7 @@ void WrappedVulkan::Shutdown()
|
||||
}
|
||||
|
||||
FreeAllMemory(MemoryScope::InitialContents);
|
||||
FreeAllMemory(MemoryScope::InitialContentsFirstApplyOnly);
|
||||
|
||||
if(m_MemoryFreeThread)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user