Vulkan AS rebuild-on-replay: BDA tracking

In order to unify the input structs between host and device builds, the AS API uses addresses rather than VkBuffer handles, but there's no way of retrieving a handle from an address so we have to track all VkBuffer binds to map their addresses back to the owning handle. This is done is in TrackInputBuffer(..) and UntrackInputBuffer(..), the latter for when the resource is destroyed. A complexity here is that when doing the lookup, users are allowed to manipulate the BDA so we have to find the buffer the address lies in and return the offset too.
This commit is contained in:
Cam Mannett
2024-08-12 16:29:39 +01:00
committed by Baldur Karlsson
parent 808a14dede
commit 647313ae14
18 changed files with 303 additions and 156 deletions
+2
View File
@@ -139,6 +139,8 @@ set(sources
core/image_viewer.cpp
core/core.h
core/crash_handler.h
core/gpu_address_range_tracker.cpp
core/gpu_address_range_tracker.h
core/target_control.cpp
core/remote_server.cpp
core/remote_server.h
@@ -0,0 +1,135 @@
/******************************************************************************
* The MIT License (MIT)
*
* Copyright (c) 2024 Baldur Karlsson
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
******************************************************************************/
#include "core/gpu_address_range_tracker.h"
void GPUAddressRangeTracker::AddTo(const GPUAddressRange &range)
{
SCOPED_WRITELOCK(addressLock);
auto it = std::lower_bound(addresses.begin(), addresses.end(), range.start);
addresses.insert(it - addresses.begin(), range);
}
void GPUAddressRangeTracker::RemoveFrom(const GPUAddressRange &range)
{
{
SCOPED_WRITELOCK(addressLock);
size_t i = std::lower_bound(addresses.begin(), addresses.end(), range.start) - addresses.begin();
// there might be multiple buffers with the same range start, find the exact range for this
// buffer
while(i < addresses.size() && addresses[i].start == range.start)
{
if(addresses[i].id == range.id)
{
addresses.erase(i);
return;
}
++i;
}
}
RDCERR("Couldn't find matching range to remove for %s", ToStr(range.id).c_str());
}
void GPUAddressRangeTracker::GetResIDFromAddr(GPUAddressRange::Address addr, ResourceId &id,
uint64_t &offs)
{
id = ResourceId();
offs = 0;
if(addr == 0)
return;
GPUAddressRange range;
{
SCOPED_READLOCK(addressLock);
auto it = std::lower_bound(addresses.begin(), addresses.end(), addr);
if(it == addresses.end())
return;
range = *it;
// find the largest resource containing this address - not perfect but helps with trivially bad
// aliases where a tiny resource and a large resource are co-situated and the larger resource
// needs to be used for validity
while((it + 1)->start <= addr && (it + 1)->realEnd > range.realEnd)
{
it++;
range = *it;
}
}
if(addr < range.start || addr >= range.realEnd)
return;
id = range.id;
offs = addr - range.start;
}
void GPUAddressRangeTracker::GetResIDFromAddrAllowOutOfBounds(GPUAddressRange::Address addr,
ResourceId &id, uint64_t &offs)
{
id = ResourceId();
offs = 0;
if(addr == 0)
return;
GPUAddressRange range;
{
SCOPED_READLOCK(addressLock);
auto it = std::lower_bound(addresses.begin(), addresses.end(), addr);
if(it == addresses.end())
return;
range = *it;
// find the largest resource containing this address - not perfect but helps with trivially bad
// aliases where a tiny resource and a large resource are co-situated and the larger resource
// needs to be used for validity
while((it + 1)->start <= addr && (it + 1)->realEnd > range.realEnd)
{
it++;
range = *it;
}
}
if(addr < range.start)
return;
// still enforce the OOB end on ranges - which is the remaining range in the backing store.
// Otherwise we could end up passing through invalid addresses stored in stale descriptors
if(addr >= range.oobEnd)
return;
id = range.id;
offs = addr - range.start;
}
@@ -0,0 +1,63 @@
/******************************************************************************
* The MIT License (MIT)
*
* Copyright (c) 2024 Baldur Karlsson
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
******************************************************************************/
#pragma once
#include <functional>
#include "api/replay/resourceid.h"
#include "common/threading.h"
struct GPUAddressRange
{
using Address = uint64_t;
Address start, realEnd, oobEnd;
ResourceId id;
bool operator<(const Address &o) const
{
if(o < start)
return true;
return false;
}
};
struct GPUAddressRangeTracker
{
GPUAddressRangeTracker() {}
// no copying
GPUAddressRangeTracker(const GPUAddressRangeTracker &) = delete;
GPUAddressRangeTracker &operator=(const GPUAddressRangeTracker &) = delete;
rdcarray<GPUAddressRange> addresses;
Threading::RWLock addressLock;
void AddTo(const GPUAddressRange &range);
void RemoveFrom(const GPUAddressRange &range);
void GetResIDFromAddr(GPUAddressRange::Address addr, ResourceId &id, uint64_t &offs);
void GetResIDFromAddrAllowOutOfBounds(GPUAddressRange::Address addr, ResourceId &id,
uint64_t &offs);
};
-112
View File
@@ -2306,118 +2306,6 @@ bool D3D12ResourceManager::ResourceTypeRelease(ID3D12DeviceChild *res)
return true;
}
void GPUAddressRangeTracker::AddTo(const GPUAddressRange &range)
{
SCOPED_WRITELOCK(addressLock);
auto it = std::lower_bound(addresses.begin(), addresses.end(), range.start);
addresses.insert(it - addresses.begin(), range);
}
void GPUAddressRangeTracker::RemoveFrom(const GPUAddressRange &range)
{
{
SCOPED_WRITELOCK(addressLock);
size_t i = std::lower_bound(addresses.begin(), addresses.end(), range.start) - addresses.begin();
// there might be multiple buffers with the same range start, find the exact range for this
// buffer
while(i < addresses.size() && addresses[i].start == range.start)
{
if(addresses[i].id == range.id)
{
addresses.erase(i);
return;
}
++i;
}
}
RDCERR("Couldn't find matching range to remove for %s", ToStr(range.id).c_str());
}
void GPUAddressRangeTracker::GetResIDFromAddr(D3D12_GPU_VIRTUAL_ADDRESS addr, ResourceId &id,
UINT64 &offs)
{
id = ResourceId();
offs = 0;
if(addr == 0)
return;
GPUAddressRange range;
// this should really be a read-write lock
{
SCOPED_READLOCK(addressLock);
auto it = std::lower_bound(addresses.begin(), addresses.end(), addr);
if(it == addresses.end())
return;
range = *it;
// find the largest resource containing this address - not perfect but helps with trivially bad
// aliases where a tiny resource and a large resource are co-situated and the larger resource
// needs to be used for validity
while((it + 1)->start <= addr && (it + 1)->realEnd > range.realEnd)
{
it++;
range = *it;
}
}
if(addr < range.start || addr >= range.realEnd)
return;
id = range.id;
offs = addr - range.start;
}
void GPUAddressRangeTracker::GetResIDFromAddrAllowOutOfBounds(D3D12_GPU_VIRTUAL_ADDRESS addr,
ResourceId &id, UINT64 &offs)
{
id = ResourceId();
offs = 0;
if(addr == 0)
return;
GPUAddressRange range;
// this should really be a read-write lock
{
SCOPED_READLOCK(addressLock);
auto it = std::lower_bound(addresses.begin(), addresses.end(), addr);
if(it == addresses.end())
return;
range = *it;
// find the largest resource containing this address - not perfect but helps with trivially bad
// aliases where a tiny resource and a large resource are co-situated and the larger resource
// needs to be used for validity
while((it + 1)->start <= addr && (it + 1)->realEnd > range.realEnd)
{
it++;
range = *it;
}
}
if(addr < range.start)
return;
// still enforce the OOB end on ranges - which is the remaining range in the backing store.
// Otherwise we could end up passing through invalid addresses stored in stale descriptors
if(addr >= range.oobEnd)
return;
id = range.id;
offs = addr - range.start;
}
void D3D12GpuBuffer::AddRef()
{
InterlockedIncrement(&m_RefCount);
+1 -30
View File
@@ -26,6 +26,7 @@
#include "common/wrapped_pool.h"
#include "core/core.h"
#include "core/gpu_address_range_tracker.h"
#include "core/intervals.h"
#include "core/resource_manager.h"
#include "core/sparse_page_table.h"
@@ -510,36 +511,6 @@ struct CmdListRecordingInfo
class WrappedID3D12Resource;
using D3D12BufferOffset = UINT64;
struct GPUAddressRange
{
D3D12_GPU_VIRTUAL_ADDRESS start, realEnd, oobEnd;
ResourceId id;
bool operator<(const D3D12_GPU_VIRTUAL_ADDRESS &o) const
{
if(o < start)
return true;
return false;
}
};
struct GPUAddressRangeTracker
{
GPUAddressRangeTracker() {}
// no copying
GPUAddressRangeTracker(const GPUAddressRangeTracker &);
GPUAddressRangeTracker &operator=(const GPUAddressRangeTracker &);
rdcarray<GPUAddressRange> addresses;
Threading::RWLock addressLock;
void AddTo(const GPUAddressRange &range);
void RemoveFrom(const GPUAddressRange &range);
void GetResIDFromAddr(D3D12_GPU_VIRTUAL_ADDRESS addr, ResourceId &id, UINT64 &offs);
void GetResIDFromAddrAllowOutOfBounds(D3D12_GPU_VIRTUAL_ADDRESS addr, ResourceId &id, UINT64 &offs);
};
struct MapState
{
ID3D12Resource *res;
@@ -39,6 +39,11 @@ constexpr VkDeviceSize handleCountSize = 8;
constexpr VkDeviceSize asBufferAlignment = 256;
}
VulkanAccelerationStructureManager::VulkanAccelerationStructureManager(WrappedVulkan *driver)
: m_pDriver(driver)
{
}
bool VulkanAccelerationStructureManager::Prepare(VkAccelerationStructureKHR unwrappedAs,
const rdcarray<uint32_t> &queueFamilyIndices,
ASMemory &result)
@@ -28,6 +28,12 @@
class WrappedVulkan;
// Just holds the built flag, will eventually hold all the AS build data
struct VkAccelerationStructureInfo
{
bool accelerationStructureBuilt = false;
};
class VulkanAccelerationStructureManager
{
public:
@@ -37,7 +43,7 @@ public:
bool isTLAS;
};
VulkanAccelerationStructureManager(WrappedVulkan *driver) : m_pDriver(driver) {}
explicit VulkanAccelerationStructureManager(WrappedVulkan *driver);
// Called when the initial state is prepared. Any TLAS and BLAS data is copied into temporary
// buffers and the handles for that memory and the buffers is stored in the init state
+6 -6
View File
@@ -191,14 +191,14 @@ WrappedVulkan::~WrappedVulkan()
SAFE_DELETE(m_StoredStructuredData);
SAFE_DELETE(m_ASManager);
// in case the application leaked some objects, avoid crashing trying
// to release them ourselves by clearing the resource manager.
// In a well-behaved application, this should be a no-op.
m_ResourceManager->ClearWithoutReleasing();
SAFE_DELETE(m_ResourceManager);
SAFE_DELETE(m_ASManager);
SAFE_DELETE(m_FrameReader);
for(size_t i = 0; i < m_ThreadSerialisers.size(); i++)
@@ -1332,16 +1332,16 @@ static const VkExtensionProperties supportedExtensions[] = {
VK_KHR_8BIT_STORAGE_EXTENSION_NAME,
VK_KHR_8BIT_STORAGE_SPEC_VERSION,
},
{
VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME,
VK_KHR_ACCELERATION_STRUCTURE_SPEC_VERSION,
},
#ifdef VK_KHR_android_surface
{
VK_KHR_ANDROID_SURFACE_EXTENSION_NAME,
VK_KHR_ANDROID_SURFACE_SPEC_VERSION,
},
#endif
{
VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME,
VK_KHR_ACCELERATION_STRUCTURE_SPEC_VERSION,
},
{
VK_KHR_BIND_MEMORY_2_EXTENSION_NAME,
VK_KHR_BIND_MEMORY_2_SPEC_VERSION,
+9 -2
View File
@@ -25,6 +25,7 @@
#pragma once
#include "common/timing.h"
#include "core/gpu_address_range_tracker.h"
#include "serialise/serialiser.h"
#include "vk_acceleration_structure.h"
#include "vk_common.h"
@@ -978,6 +979,9 @@ private:
bytebuf m_MaskedMapData;
GPUAddressRangeTracker m_AddressTracker;
GPUAddressRange CreateAddressRange(VkDevice device, VkBuffer buffer);
// on replay we may need to allocate several bits of temporary memory, so the single-region
// doesn't work as well. We're not quite as performance-sensitive so we allocate 4MB per thread
// and use it in a ring-buffer fashion. This allows multiple allocations to live at once as long
@@ -1131,8 +1135,8 @@ private:
const VulkanRenderState &renderState);
// no copy semantics
WrappedVulkan(const WrappedVulkan &);
WrappedVulkan &operator=(const WrappedVulkan &);
WrappedVulkan(const WrappedVulkan &) = delete;
WrappedVulkan &operator=(const WrappedVulkan &) = delete;
VkBool32 DebugCallback(MessageSeverity severity, MessageCategory category, int messageCode,
const char *pMessageId, const char *pMessage);
@@ -1254,6 +1258,9 @@ public:
void ChooseMemoryIndices();
void TrackBufferAddress(VkDevice device, VkBuffer buffer);
void UntrackBufferAddress(VkDevice device, VkBuffer buffer);
EventFlags GetEventFlags(uint32_t eid) { return m_EventFlags[eid]; }
rdcarray<EventUsage> GetUsage(ResourceId id) { return m_ResourceUses[id]; }
// return the pre-selected device and queue
+2 -1
View File
@@ -577,7 +577,8 @@ bool WrappedVulkan::Prepare_InitialState(WrappedVkRes *res)
else if(type == eResAccelerationStructureKHR)
{
VkResourceRecord *record = GetResourceManager()->GetResourceRecord(id);
if(!record->accelerationStructureBuilt)
if(!record->accelerationStructureInfo->accelerationStructureBuilt)
{
RDCDEBUG("Skipping AS %s as it has not been built", ToStr(id).c_str());
return true;
+49
View File
@@ -28,6 +28,55 @@
RDOC_CONFIG(bool, Vulkan_Debug_MemoryAllocationLogging, false,
"Output verbose debug logging messages when allocating internal memory.");
GPUAddressRange WrappedVulkan::CreateAddressRange(VkDevice device, VkBuffer buffer)
{
bool isBDA = false;
{
SCOPED_LOCK(m_DeviceAddressResourcesLock);
isBDA = m_DeviceAddressResources.IDs.contains(GetResID(buffer));
}
if(!isBDA)
return {};
VkResourceRecord *record = GetRecord(buffer);
VkResourceRecord *memrecord = GetResourceManager()->GetResourceRecord(record->baseResourceMem);
const VkBufferDeviceAddressInfo addrInfo = {
VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,
NULL,
Unwrap(buffer),
};
const VkDeviceAddress address =
ObjDisp(device)->GetBufferDeviceAddressKHR(Unwrap(device), &addrInfo);
return {
address,
address + record->memSize,
address + (memrecord->memSize - record->memOffset),
record->GetResourceID(),
};
}
void WrappedVulkan::TrackBufferAddress(VkDevice device, VkBuffer buffer)
{
const GPUAddressRange rng = CreateAddressRange(device, buffer);
if(rng.id == ResourceId())
return;
m_AddressTracker.AddTo(rng);
}
void WrappedVulkan::UntrackBufferAddress(VkDevice device, VkBuffer buffer)
{
const GPUAddressRange rng = CreateAddressRange(device, buffer);
if(rng.id == ResourceId())
return;
m_AddressTracker.RemoveFrom(rng);
}
void WrappedVulkan::ChooseMemoryIndices()
{
// we need to do this little dance because Get*MemoryIndex checks to see if the existing
+4
View File
@@ -24,6 +24,7 @@
#include "vk_resources.h"
#include "maths/vec.h"
#include "vk_acceleration_structure.h"
#include "vk_info.h"
WRAPPED_POOL_INST(WrappedVkInstance)
@@ -3981,6 +3982,9 @@ VkResourceRecord::~VkResourceRecord()
if(resType == eResCommandPool)
SAFE_DELETE(cmdPoolInfo);
if(resType == eResAccelerationStructureKHR)
SAFE_DELETE(accelerationStructureInfo);
}
void VkResourceRecord::MarkImageFrameReferenced(VkResourceRecord *img, const ImageRange &range,
+2 -1
View File
@@ -2194,6 +2194,7 @@ inline FrameRefType MarkMemoryReferenced(std::unordered_map<ResourceId, MemRefs>
struct DescUpdateTemplate;
struct ImageLayouts;
struct VkAccelerationStructureInfo;
struct VkResourceRecord : public ResourceRecord
{
@@ -2285,7 +2286,7 @@ public:
DescPoolInfo *descPoolInfo; // only for descriptor pools
CmdPoolInfo *cmdPoolInfo; // only for command pools
uint32_t queueFamilyIndex; // only for queues
bool accelerationStructureBuilt; // only for acceleration structures
VkAccelerationStructureInfo *accelerationStructureInfo; // only for acceleration structures
};
VkResourceRecord *bakedCommands;
@@ -238,6 +238,9 @@ void WrappedVulkan::vkDestroyBuffer(VkDevice device, VkBuffer buffer, const VkAl
if(buffer == VK_NULL_HANDLE)
return;
if(IsCaptureMode(m_State))
UntrackBufferAddress(device, buffer);
// artificially extend the lifespan of buffer device address memory or buffers, to ensure their
// opaque capture address isn't re-used before the capture completes
{
@@ -1336,9 +1336,7 @@ void WrappedVulkan::CaptureQueueSubmit(VkQueue queue,
capDescriptors.clear();
for(VkResourceRecord *asRecord : accelerationStructures)
{
asRecord->accelerationStructureBuilt = true;
}
asRecord->accelerationStructureInfo->accelerationStructureBuilt = true;
}
template <typename SerialiserType>
@@ -1525,6 +1525,8 @@ VkResult WrappedVulkan::vkBindBufferMemory(VkDevice device, VkBuffer buffer, VkD
// memory that has been allocated but not used, but that will be skipped or postponed as
// appropriate.
GetResourceManager()->MarkDirtyResource(GetResID(memory));
TrackBufferAddress(device, buffer);
}
return ret;
@@ -3026,6 +3028,8 @@ VkResult WrappedVulkan::vkBindBufferMemory2(VkDevice device, uint32_t bindInfoCo
// memory that has been allocated but not used, but that will be skipped or postponed as
// appropriate.
GetResourceManager()->MarkDirtyResource(GetResID(pBindInfos[i].memory));
TrackBufferAddress(device, pBindInfos[i].buffer);
}
}
@@ -3379,6 +3383,8 @@ VkResult WrappedVulkan::vkCreateAccelerationStructureKHR(
record->AddChunk(chunk);
record->AddParent(bufferRecord);
record->accelerationStructureInfo = new VkAccelerationStructureInfo();
// store the base resource
record->baseResource = bufferRecord->GetResourceID();
record->baseResourceMem = bufferRecord->baseResource;
+2
View File
@@ -206,6 +206,7 @@
<ClInclude Include="common\timing.h" />
<ClInclude Include="common\wrapped_pool.h" />
<ClInclude Include="core\bit_flag_iterator.h" />
<ClInclude Include="core\gpu_address_range_tracker.h" />
<ClInclude Include="core\settings.h" />
<ClInclude Include="core\core.h" />
<ClInclude Include="core\crash_handler.h" />
@@ -533,6 +534,7 @@
<ClCompile Include="common\dds_readwrite.cpp" />
<ClCompile Include="common\threading_tests.cpp" />
<ClCompile Include="core\bit_flag_iterator_tests.cpp" />
<ClCompile Include="core\gpu_address_range_tracker.cpp" />
<ClCompile Include="core\settings.cpp" />
<ClCompile Include="core\core.cpp">
<AdditionalOptions>/bigobj %(AdditionalOptions)</AdditionalOptions>
+6
View File
@@ -561,6 +561,9 @@
<ClInclude Include="common\result.h">
<Filter>Common</Filter>
</ClInclude>
<ClInclude Include="core\gpu_address_range_tracker.h">
<Filter>Core</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="maths\camera.cpp">
@@ -956,6 +959,9 @@
<ClCompile Include="replay\dummy_driver.cpp">
<Filter>Replay</Filter>
</ClCompile>
<ClCompile Include="core\gpu_address_range_tracker.cpp">
<Filter>Core</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<None Include="os\win32\comexport.def">