diff --git a/renderdoc/core/resource_manager.h b/renderdoc/core/resource_manager.h index 72d62e639..4f2c5cd21 100644 --- a/renderdoc/core/resource_manager.h +++ b/renderdoc/core/resource_manager.h @@ -262,6 +262,11 @@ struct ResourceRecord void MarkResourceFrameReferenced(ResourceId id, FrameRefType refType); void AddResourceReferences(ResourceRecordHandler *mgr); + void AddReferencedIDs(std::set &ids) + { + for(auto it=m_FrameRefs.begin(); it != m_FrameRefs.end(); ++it) + ids.insert(it->first); + } int UpdateCount; diff --git a/renderdoc/driver/vulkan/vk_core.cpp b/renderdoc/driver/vulkan/vk_core.cpp index 38d0fd382..1c2f9d431 100644 --- a/renderdoc/driver/vulkan/vk_core.cpp +++ b/renderdoc/driver/vulkan/vk_core.cpp @@ -618,9 +618,12 @@ void WrappedVulkan::FinishCapture() ObjDisp(GetDev())->DeviceWaitIdle(Unwrap(GetDev())); { - SCOPED_LOCK(m_CurrentMapsLock); - for(auto it = m_CurrentMaps.begin(); it != m_CurrentMaps.end(); ++it) - SAFE_DELETE_ARRAY(it->second.refData); + SCOPED_LOCK(m_CoherentMapsLock); + for(auto it = m_CoherentMaps.begin(); it != m_CoherentMaps.end(); ++it) + { + Serialiser::FreeAlignedBuffer((*it)->memMapState->refData); + (*it)->memMapState->refData = NULL; + } } } diff --git a/renderdoc/driver/vulkan/vk_core.h b/renderdoc/driver/vulkan/vk_core.h index d3f485f8e..6ee49d3be 100644 --- a/renderdoc/driver/vulkan/vk_core.h +++ b/renderdoc/driver/vulkan/vk_core.h @@ -173,6 +173,7 @@ private: uint32_t uploadMemIndex; uint32_t GPULocalMemIndex; + VkPhysicalDeviceMemoryProperties *fakeMemProps; uint32_t *memIdxMap; VkPhysicalDeviceFeatures features; @@ -388,13 +389,9 @@ private: // capture-side data - // holds the current list of mapped memory. Locked against concurrent use - // VKTODOLOW once maps are handled properly we will know which maps must be - // treated as coherent and this will be a vector of VkResourceRecords to - // iterate over and flush changes out at any queuesubmit. Then the main - // MapState can be moved into the resource record - map m_CurrentMaps; - Threading::CriticalSection m_CurrentMapsLock; + // holds the current list of coherent mapped memory. Locked against concurrent use + vector m_CoherentMaps; + Threading::CriticalSection m_CoherentMapsLock; // used both on capture and replay side to track image layouts. Only locked // in capture diff --git a/renderdoc/driver/vulkan/vk_resources.cpp b/renderdoc/driver/vulkan/vk_resources.cpp index 8b50d12b8..8a1665d2f 100644 --- a/renderdoc/driver/vulkan/vk_resources.cpp +++ b/renderdoc/driver/vulkan/vk_resources.cpp @@ -497,4 +497,10 @@ VkResourceRecord::~VkResourceRecord() SAFE_DELETE(layout); SAFE_DELETE(swapInfo); SAFE_DELETE(cmdInfo); + if(memMapState) + { + Serialiser::FreeAlignedBuffer(memMapState->refData); + + SAFE_DELETE(memMapState); + } } diff --git a/renderdoc/driver/vulkan/vk_resources.h b/renderdoc/driver/vulkan/vk_resources.h index f23dff536..dff3759ac 100644 --- a/renderdoc/driver/vulkan/vk_resources.h +++ b/renderdoc/driver/vulkan/vk_resources.h @@ -592,17 +592,16 @@ struct CmdBufferRecordingInfo set boundDescSets; }; -struct MapState +struct MemMapState { - MapState() - : mapOffset(0), mapSize(0), mapFlags(0) - , mapFrame(0), mapFlushed(false), mappedPtr(NULL), refData(NULL) + MemMapState() + : mapOffset(0), mapSize(0) + , mapFlushed(false), mapCoherent(false), mappedPtr(NULL), refData(NULL) { } VkDeviceSize mapOffset, mapSize; - VkMemoryMapFlags mapFlags; - uint32_t mapFrame; bool mapFlushed; - void *mappedPtr; + bool mapCoherent; + byte *mappedPtr; byte *refData; }; @@ -624,7 +623,8 @@ struct VkResourceRecord : public ResourceRecord memProps(NULL), layout(NULL), swapInfo(NULL), - cmdInfo(NULL) + cmdInfo(NULL), + memMapState(NULL) { } @@ -696,6 +696,7 @@ struct VkResourceRecord : public ResourceRecord SwapchainInfo *swapInfo; CmdBufferRecordingInfo *cmdInfo; + MemMapState *memMapState; // pointer to either the pool this item is allocated from, or the children allocated // from this pool. Protected by the chunk lock diff --git a/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp index 8702610ff..53830d902 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp @@ -282,6 +282,8 @@ VkResult WrappedVulkan::vkEnumeratePhysicalDevices( vkr = ObjDisp(instance)->EnumeratePhysicalDevices(Unwrap(instance), &count, devices); RDCASSERT(vkr == VK_SUCCESS); + + m_PhysicalDevices.resize(count); for(uint32_t i=0; i < count; i++) { @@ -307,6 +309,8 @@ VkResult WrappedVulkan::vkEnumeratePhysicalDevices( ObjDisp(devices[i])->GetPhysicalDeviceMemoryProperties(Unwrap(devices[i]), record->memProps); + m_PhysicalDevices[i] = devices[i]; + // we remap memory indices to discourage coherent maps as much as possible RemapMemoryIndices(record->memProps, &record->memIdxMap); @@ -630,6 +634,8 @@ VkResult WrappedVulkan::vkCreateDevice( m_PhysicalDeviceData.uploadMemIndex = m_PhysicalDeviceData.GetMemoryIndex(~0U, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, 0); m_PhysicalDeviceData.GPULocalMemIndex = m_PhysicalDeviceData.GetMemoryIndex(~0U, VK_MEMORY_PROPERTY_DEVICE_ONLY, 0); + m_PhysicalDeviceData.fakeMemProps = GetRecord(physicalDevice)->memProps; + m_DebugManager = new VulkanDebugManager(this, device); } diff --git a/renderdoc/driver/vulkan/wrappers/vk_queue_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_queue_funcs.cpp index f02c84496..48ea823ab 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_queue_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_queue_funcs.cpp @@ -349,6 +349,7 @@ VkResult WrappedVulkan::vkQueueSubmit( VkResult ret = ObjDisp(queue)->QueueSubmit(Unwrap(queue), cmdBufferCount, unwrapped, Unwrap(fence)); bool capframe = false; + set refdIDs; for(uint32_t i=0; i < cmdBufferCount; i++) { @@ -395,11 +396,15 @@ VkResult WrappedVulkan::vkQueueSubmit( VkResourceRecord *setrecord = GetRecord(*it); for(auto refit = setrecord->bindFrameRefs.begin(); refit != setrecord->bindFrameRefs.end(); ++refit) + { + refdIDs.insert(refit->first); GetResourceManager()->MarkResourceFrameReferenced(refit->first, refit->second.second); + } } // pull in frame refs from this baked command buffer record->bakedCommands->AddResourceReferences(GetResourceManager()); + record->bakedCommands->AddReferencedIDs(refdIDs); // ref the parent command buffer by itself, this will pull in the cmd buffer pool GetResourceManager()->MarkResourceFrameReferenced(record->GetResourceID(), eFrameRef_Read); @@ -422,32 +427,36 @@ VkResult WrappedVulkan::vkQueueSubmit( if(capframe) { - map maps; + vector maps; { - SCOPED_LOCK(m_CurrentMapsLock); - maps = m_CurrentMaps; + SCOPED_LOCK(m_CoherentMapsLock); + maps = m_CoherentMaps; } - - // VKTODOHIGH when maps are intercepted with local buffers, this will have to be - // done when not in capframe :(. + for(auto it = maps.begin(); it != maps.end(); ++it) { - // potential persistent map, force a full flush - // VKTODOHIGH need better detection than just 'has not been flushed and has - // been mapped for many frames'. Once we are duplicating coherent memory types - // to offer a non-coherent version, we'll have to treat all maps into coherent - // memory the same. - if(it->second.mappedPtr && !it->second.mapFlushed && it->second.mapFrame + 4 < m_FrameCounter) + VkResourceRecord *record = *it; + MemMapState &state = *record->memMapState; + + // potential persistent map + if(state.mapCoherent && state.mappedPtr && !state.mapFlushed) { + // only need to flush memory that could affect this submitted batch of work + if(refdIDs.find(record->GetResourceID()) == refdIDs.end()) + { + RDCDEBUG("Map of memory %llu not referenced in this queue - not flushing", record->GetResourceID()); + continue; + } + size_t diffStart = 0, diffEnd = 0; bool found = true; // if we have a previous set of data, compare. // otherwise just serialise it all - if(it->second.refData) - found = FindDiffRange((byte *)it->second.mappedPtr, it->second.refData, (size_t)it->second.mapSize, diffStart, diffEnd); + if(state.refData) + found = FindDiffRange((byte *)state.mappedPtr, state.refData, (size_t)state.mapSize, diffStart, diffEnd); else - diffEnd = (size_t)it->second.mapSize; + diffEnd = (size_t)state.mapSize; if(found) { @@ -456,23 +465,26 @@ VkResult WrappedVulkan::vkQueueSubmit( VkDevice dev = GetDev(); { - RDCLOG("Persistent map flush forced for %llu (%llu -> %llu) [mapped in %u, flushed %u]", it->first, (uint64_t)diffStart, (uint64_t)diffEnd, it->second.mapFrame, it->second.mapFlushed); - VkMappedMemoryRange range = { VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, NULL, GetResourceManager()->GetCurrentHandle(it->first), it->second.mapOffset+diffStart, diffEnd-diffStart }; + RDCLOG("Persistent map flush forced for %llu (%llu -> %llu)", record->GetResourceID(), (uint64_t)diffStart, (uint64_t)diffEnd); + VkMappedMemoryRange range = { + VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, NULL, + (VkDeviceMemory)(uint64_t)record->Resource, + state.mapOffset+diffStart, diffEnd-diffStart + }; vkFlushMappedMemoryRanges(dev, 1, &range); + state.mapFlushed = false; } - GetResourceManager()->MarkPendingDirty(it->first); + GetResourceManager()->MarkPendingDirty(record->GetResourceID()); // allocate ref data so we can compare next time to minimise serialised data - if(it->second.refData == NULL) - it->second.refData = new byte[(size_t)it->second.mapSize]; - memcpy(it->second.refData, it->second.mappedPtr, (size_t)it->second.mapSize); - - { - SCOPED_LOCK(m_CurrentMapsLock); - m_CurrentMaps[it->first].mapFlushed = false; - m_CurrentMaps[it->first].refData = it->second.refData; - } + if(state.refData == NULL) + state.refData = Serialiser::AllocAlignedBuffer((size_t)state.mapSize, 64); + memcpy(state.refData, state.mappedPtr, (size_t)state.mapSize); + } + else + { + RDCDEBUG("Persistent map flush not needed for %llu", record->GetResourceID()); } } } diff --git a/renderdoc/driver/vulkan/wrappers/vk_resource_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_resource_funcs.cpp index 7ed1026bd..e6a6111ab 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_resource_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_resource_funcs.cpp @@ -213,6 +213,16 @@ VkResult WrappedVulkan::vkAllocMemory( // VKTODOLOW Change record->Length to at least int64_t (maybe uint64_t) record->Length = (int32_t)pAllocInfo->allocationSize; RDCASSERT(pAllocInfo->allocationSize < 0x7FFFFFFF); + + uint32_t memProps = m_PhysicalDeviceData.fakeMemProps->memoryTypes[pAllocInfo->memoryTypeIndex].propertyFlags; + + // if memory is not host visible, so not mappable, don't create map state at all + if((memProps & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0) + { + record->memMapState = new MemMapState(); + record->memMapState->mapCoherent = (memProps & VK_MEMORY_PROPERTY_HOST_NON_COHERENT_BIT) == 0; + record->memMapState->refData = NULL; + } } else { @@ -247,30 +257,45 @@ VkResult WrappedVulkan::vkMapMemory( VkMemoryMapFlags flags, void** ppData) { - VkResult ret = ObjDisp(device)->MapMemory(Unwrap(device), Unwrap(mem), offset, size, flags, ppData); + void *realData = NULL; + VkResult ret = ObjDisp(device)->MapMemory(Unwrap(device), Unwrap(mem), offset, size, flags, &realData); - if(ret == VK_SUCCESS && ppData) + if(ret == VK_SUCCESS && realData) { ResourceId id = GetResID(mem); if(m_State >= WRITING) { - MapState state; - state.mappedPtr = *ppData; - state.mapOffset = offset; - state.mapSize = size == 0 ? GetRecord(mem)->Length : size; - state.mapFrame = m_FrameCounter; - state.mapFlags = flags; - state.mapFlushed = false; + VkResourceRecord *memrecord = GetRecord(mem); + + // must have map state, only non host visible memories have no map + // state, and they can't be mapped! + RDCASSERT(memrecord->memMapState); + MemMapState &state = *memrecord->memMapState; + + // ensure size is valid + RDCASSERT(size == 0 || size <= (VkDeviceSize)memrecord->Length); + + state.mappedPtr = (byte *)realData; state.refData = NULL; + state.mapOffset = offset; + state.mapSize = size == 0 ? memrecord->Length : size; + state.mapFlushed = false; + + *ppData = realData; + + if(state.mapCoherent) { - SCOPED_LOCK(m_CurrentMapsLock); - RDCASSERT(m_CurrentMaps.find(id) == m_CurrentMaps.end()); - m_CurrentMaps[id] = state; + SCOPED_LOCK(m_CoherentMapsLock); + m_CoherentMaps.push_back(memrecord); } } } + else + { + *ppData = NULL; + } return ret; } @@ -283,24 +308,13 @@ bool WrappedVulkan::Serialise_vkUnmapMemory( SERIALISE_ELEMENT(ResourceId, devId, GetResID(device)); SERIALISE_ELEMENT(ResourceId, id, GetResID(mem)); - MapState state; + MemMapState *state; if(m_State >= WRITING) - { - SCOPED_LOCK(m_CurrentMapsLock); - state = m_CurrentMaps[id]; - } + state = GetRecord(mem)->memMapState; - SERIALISE_ELEMENT(VkMemoryMapFlags, flags, state.mapFlags); - SERIALISE_ELEMENT(uint64_t, memOffset, state.mapOffset); - SERIALISE_ELEMENT(uint64_t, memSize, state.mapSize); - - // VKTODOHIGH: this is really horrible - this could be write-combined memory that we're - // reading from to get the latest data. This saves on having to fetch the data some - // other way and provide an interception buffer to the app, but is awful. - // we're also not doing any diff range checks, just serialising the whole memory region. - // In vulkan the common case will be one memory region for a large number of distinct - // bits of data so most maps will not change the whole region. - SERIALISE_ELEMENT_BUF(byte*, data, (byte *)state.mappedPtr + state.mapOffset, (size_t)memSize); + SERIALISE_ELEMENT(uint64_t, memOffset, state->mapOffset); + SERIALISE_ELEMENT(uint64_t, memSize, state->mapSize); + SERIALISE_ELEMENT_BUF(byte*, data, (byte *)state->mappedPtr + state->mapOffset, (size_t)memSize); if(m_State < WRITING) { @@ -308,10 +322,10 @@ bool WrappedVulkan::Serialise_vkUnmapMemory( mem = GetResourceManager()->GetLiveHandle(id); // VKTODOLOW figure out what alignments there are on mapping, so we only map the region - // we're going to modify. For no, offset/size is handled in the memcpy before and we + // we're going to modify. For now, offset/size is handled in the memcpy before and we // map the whole region void *mapPtr = NULL; - VkResult ret = ObjDisp(device)->MapMemory(Unwrap(device), Unwrap(mem), 0, 0, flags, &mapPtr); + VkResult ret = ObjDisp(device)->MapMemory(Unwrap(device), Unwrap(mem), 0, 0, 0, &mapPtr); if(ret != VK_SUCCESS) { @@ -334,23 +348,14 @@ void WrappedVulkan::vkUnmapMemory( VkDevice device, VkDeviceMemory mem) { - ObjDisp(device)->UnmapMemory(Unwrap(device), Unwrap(mem)); - if(m_State >= WRITING) { ResourceId id = GetResID(mem); - MapState state; + VkResourceRecord *memrecord = GetRecord(mem); - { - SCOPED_LOCK(m_CurrentMapsLock); - - auto it = m_CurrentMaps.find(id); - if(it == m_CurrentMaps.end()) - RDCERR("vkUnmapMemory for memory handle that's not currently mapped"); - - state = it->second; - } + RDCASSERT(memrecord->memMapState); + MemMapState &state = *memrecord->memMapState; { // decide atomically if this chunk should be in-frame or not @@ -363,7 +368,7 @@ void WrappedVulkan::vkUnmapMemory( capframe = (m_State == WRITING_CAPFRAME); if(!capframe) - GetResourceManager()->MarkDirtyResource(GetResID(mem)); + GetResourceManager()->MarkDirtyResource(id); } if(capframe) @@ -384,7 +389,7 @@ void WrappedVulkan::vkUnmapMemory( else { m_FrameCaptureRecord->AddChunk(scope.Get()); - GetResourceManager()->MarkResourceFrameReferenced(GetResID(mem), eFrameRef_Write); + GetResourceManager()->MarkResourceFrameReferenced(id, eFrameRef_Write); } } else @@ -395,20 +400,23 @@ void WrappedVulkan::vkUnmapMemory( } state.mappedPtr = NULL; - SAFE_DELETE_ARRAY(state.refData); } - { - SCOPED_LOCK(m_CurrentMapsLock); + Serialiser::FreeAlignedBuffer(state.refData); - auto it = m_CurrentMaps.find(id); - if(it == m_CurrentMaps.end()) + if(state.mapCoherent) + { + SCOPED_LOCK(m_CoherentMapsLock); + + auto it = std::find(m_CoherentMaps.begin(), m_CoherentMaps.end(), memrecord); + if(it == m_CoherentMaps.end()) RDCERR("vkUnmapMemory for memory handle that's not currently mapped"); - state = it->second; - m_CurrentMaps.erase(it); + m_CoherentMaps.erase(it); } } + + ObjDisp(device)->UnmapMemory(Unwrap(device), Unwrap(mem)); } bool WrappedVulkan::Serialise_vkFlushMappedMemoryRanges( @@ -420,27 +428,18 @@ bool WrappedVulkan::Serialise_vkFlushMappedMemoryRanges( SERIALISE_ELEMENT(ResourceId, devId, GetResID(device)); SERIALISE_ELEMENT(ResourceId, id, GetResID(pMemRanges->mem)); - MapState state; + MemMapState *state; if(m_State >= WRITING) { - SCOPED_LOCK(m_CurrentMapsLock); - state = m_CurrentMaps[id]; + state = GetRecord(pMemRanges->mem)->memMapState; // don't support any extensions on VkMappedMemoryRange RDCASSERT(pMemRanges->pNext == NULL); } - SERIALISE_ELEMENT(VkMemoryMapFlags, flags, state.mapFlags); SERIALISE_ELEMENT(uint64_t, memOffset, pMemRanges->offset); SERIALISE_ELEMENT(uint64_t, memSize, pMemRanges->size); - - // VKTODOHIGH: this is really horrible - this could be write-combined memory that we're - // reading from to get the latest data. This saves on having to fetch the data some - // other way and provide an interception buffer to the app, but is awful. - // we're also not doing any diff range checks, just serialising the whole memory region. - // In vulkan the common case will be one memory region for a large number of distinct - // bits of data so most maps will not change the whole region. - SERIALISE_ELEMENT_BUF(byte*, data, (byte *)state.mappedPtr + (size_t)memOffset, (size_t)memSize); + SERIALISE_ELEMENT_BUF(byte*, data, state->mappedPtr + (size_t)memOffset, (size_t)memSize); if(m_State < WRITING) { @@ -451,7 +450,7 @@ bool WrappedVulkan::Serialise_vkFlushMappedMemoryRanges( // we're going to modify. For no, offset/size is handled in the memcpy before and we // map the whole region void *mapPtr = NULL; - VkResult ret = ObjDisp(device)->MapMemory(Unwrap(device), Unwrap(mem), 0, 0, flags, &mapPtr); + VkResult ret = ObjDisp(device)->MapMemory(Unwrap(device), Unwrap(mem), 0, 0, 0, &mapPtr); if(ret != VK_SUCCESS) { @@ -475,6 +474,34 @@ VkResult WrappedVulkan::vkFlushMappedMemoryRanges( uint32_t memRangeCount, const VkMappedMemoryRange* pMemRanges) { + if(m_State >= WRITING) + { + bool capframe = false; + { + SCOPED_LOCK(m_CapTransitionLock); + capframe = (m_State == WRITING_CAPFRAME); + } + + for(uint32_t i = 0; i < memRangeCount; i++) + { + ResourceId memid = GetResID(pMemRanges[i].mem); + + MemMapState *state = GetRecord(pMemRanges[i].mem)->memMapState; + state->mapFlushed = true; + + if(capframe) + { + CACHE_THREAD_SERIALISER(); + + SCOPED_SERIALISE_CONTEXT(FLUSH_MEM); + Serialise_vkFlushMappedMemoryRanges(localSerialiser, device, 1, pMemRanges + i); + + m_FrameCaptureRecord->AddChunk(scope.Get()); + GetResourceManager()->MarkResourceFrameReferenced(GetResID(pMemRanges[i].mem), eFrameRef_Write); + } + } + } + VkMappedMemoryRange *unwrapped = new VkMappedMemoryRange[memRangeCount]; for(uint32_t i=0; i < memRangeCount; i++) { @@ -486,29 +513,6 @@ VkResult WrappedVulkan::vkFlushMappedMemoryRanges( SAFE_DELETE_ARRAY(unwrapped); - for(uint32_t i=0; i < memRangeCount; i++) - { - ResourceId memid = GetResID(pMemRanges[i].mem); - - { - SCOPED_LOCK(m_CurrentMapsLock); - auto it = m_CurrentMaps.find(memid); - RDCASSERT(it != m_CurrentMaps.end()); - it->second.mapFlushed = true; - } - - if(ret == VK_SUCCESS && m_State >= WRITING_CAPFRAME) - { - CACHE_THREAD_SERIALISER(); - - SCOPED_SERIALISE_CONTEXT(FLUSH_MEM); - Serialise_vkFlushMappedMemoryRanges(localSerialiser, device, 1, pMemRanges+i); - - m_FrameCaptureRecord->AddChunk(scope.Get()); - GetResourceManager()->MarkResourceFrameReferenced(GetResID(pMemRanges[i].mem), eFrameRef_Write); - } - } - return ret; }