mirror of
https://github.com/baldurk/renderdoc.git
synced 2026-05-26 11:50:59 +00:00
Add thread-local temp memory pool for storing temp unwrapped structs
* Avoids needing to do lots of small allocations
This commit is contained in:
@@ -412,6 +412,35 @@ const char * WrappedVulkan::GetChunkName(uint32_t idx)
|
||||
return VkChunkNames[idx-FIRST_CHUNK_ID];
|
||||
}
|
||||
|
||||
byte *WrappedVulkan::GetTempMemory(size_t s)
|
||||
{
|
||||
TempMem *mem = (TempMem *)Threading::GetTLSValue(tempMemoryTLSSlot);
|
||||
if(mem && mem->size >= s) return mem->memory;
|
||||
|
||||
// alloc or grow alloc
|
||||
TempMem *newmem = mem;
|
||||
|
||||
if(!newmem) newmem = new TempMem();
|
||||
|
||||
// free old memory, don't need to keep contents
|
||||
if(newmem->memory) delete[] newmem->memory;
|
||||
|
||||
// alloc new memory
|
||||
newmem->size = s;
|
||||
newmem->memory = new byte[s];
|
||||
|
||||
Threading::SetTLSValue(tempMemoryTLSSlot, (void *)newmem);
|
||||
|
||||
// if this is entirely new, save it for deletion on shutdown
|
||||
if(!mem)
|
||||
{
|
||||
SCOPED_LOCK(m_ThreadTempMemLock);
|
||||
m_ThreadTempMem.push_back(newmem);
|
||||
}
|
||||
|
||||
return newmem->memory;
|
||||
}
|
||||
|
||||
Serialiser *WrappedVulkan::GetThreadSerialiser()
|
||||
{
|
||||
Serialiser *ser = (Serialiser *)Threading::GetTLSValue(threadSerialiserTLSSlot);
|
||||
|
||||
@@ -116,6 +116,16 @@ private:
|
||||
|
||||
Threading::CriticalSection m_ThreadSerialisersLock;
|
||||
vector<Serialiser *> m_ThreadSerialisers;
|
||||
|
||||
uint64_t tempMemoryTLSSlot;
|
||||
struct TempMem
|
||||
{
|
||||
TempMem() : memory(NULL), size(0) {}
|
||||
byte *memory;
|
||||
size_t size;
|
||||
};
|
||||
Threading::CriticalSection m_ThreadTempMemLock;
|
||||
vector<TempMem*> m_ThreadTempMem;
|
||||
|
||||
VulkanReplay m_Replay;
|
||||
|
||||
@@ -379,6 +389,10 @@ private:
|
||||
|
||||
static const char *GetChunkName(uint32_t idx);
|
||||
|
||||
// returns thread-local temporary memory
|
||||
byte *GetTempMemory(size_t s);
|
||||
template<class T> T *GetTempArray(uint32_t arraycount) { return (T*)GetTempMemory(sizeof(T)*arraycount); }
|
||||
|
||||
Serialiser *GetThreadSerialiser();
|
||||
Serialiser *GetMainSerialiser() { return m_pSerialiser; }
|
||||
|
||||
|
||||
@@ -819,16 +819,11 @@ void WrappedVulkan::vkCmdBindDescriptorSets(
|
||||
uint32_t dynamicOffsetCount,
|
||||
const uint32_t* pDynamicOffsets)
|
||||
{
|
||||
// VKTODOLOW this should be a persistent per-thread array that resizes up
|
||||
// to a high water mark, so we don't have to allocate
|
||||
VkDescriptorSet *unwrapped = new VkDescriptorSet[setCount];
|
||||
for(uint32_t i=0; i < setCount; i++)
|
||||
unwrapped[i] = Unwrap(pDescriptorSets[i]);
|
||||
VkDescriptorSet *unwrapped = GetTempArray<VkDescriptorSet>(setCount);
|
||||
for(uint32_t i=0; i < setCount; i++) unwrapped[i] = Unwrap(pDescriptorSets[i]);
|
||||
|
||||
ObjDisp(cmdBuffer)->CmdBindDescriptorSets(Unwrap(cmdBuffer), pipelineBindPoint, Unwrap(layout), firstSet, setCount, unwrapped, dynamicOffsetCount, pDynamicOffsets);
|
||||
|
||||
SAFE_DELETE_ARRAY(unwrapped);
|
||||
|
||||
if(m_State >= WRITING)
|
||||
{
|
||||
VkResourceRecord *record = GetRecord(cmdBuffer);
|
||||
@@ -1132,16 +1127,11 @@ void WrappedVulkan::vkCmdBindVertexBuffers(
|
||||
const VkBuffer* pBuffers,
|
||||
const VkDeviceSize* pOffsets)
|
||||
{
|
||||
// VKTODOLOW this should be a persistent per-thread array that resizes up
|
||||
// to a high water mark, so we don't have to allocate
|
||||
VkBuffer *unwrapped = new VkBuffer[bindingCount];
|
||||
for(uint32_t i=0; i < bindingCount; i++)
|
||||
unwrapped[i] = Unwrap(pBuffers[i]);
|
||||
VkBuffer *unwrapped = GetTempArray<VkBuffer>(bindingCount);
|
||||
for(uint32_t i=0; i < bindingCount; i++) unwrapped[i] = Unwrap(pBuffers[i]);
|
||||
|
||||
ObjDisp(cmdBuffer)->CmdBindVertexBuffers(Unwrap(cmdBuffer), startBinding, bindingCount, unwrapped, pOffsets);
|
||||
|
||||
SAFE_DELETE_ARRAY(unwrapped);
|
||||
|
||||
if(m_State >= WRITING)
|
||||
{
|
||||
VkResourceRecord *record = GetRecord(cmdBuffer);
|
||||
@@ -1313,16 +1303,16 @@ void WrappedVulkan::vkCmdPipelineBarrier(
|
||||
{
|
||||
|
||||
{
|
||||
// VKTODOLOW this should be a persistent per-thread array that resizes up
|
||||
// to a high water mark, so we don't have to allocate
|
||||
vector<VkImageMemoryBarrier> im;
|
||||
vector<VkBufferMemoryBarrier> buf;
|
||||
// conservatively request memory for worst case to avoid needing to iterate
|
||||
// twice to count
|
||||
byte *memory = GetTempMemory( ( sizeof(void*) + sizeof(VkImageMemoryBarrier) + sizeof(VkBufferMemoryBarrier) )*memBarrierCount);
|
||||
|
||||
// ensure we don't resize while looping so we can take pointers
|
||||
im.reserve(memBarrierCount);
|
||||
buf.reserve(memBarrierCount);
|
||||
VkImageMemoryBarrier *im = (VkImageMemoryBarrier *)memory;
|
||||
VkBufferMemoryBarrier *buf = (VkBufferMemoryBarrier *)(im + memBarrierCount);
|
||||
|
||||
void **unwrappedBarriers = new void*[memBarrierCount];
|
||||
size_t imCount = 0, bufCount = 0;
|
||||
|
||||
void **unwrappedBarriers = (void **)(buf + memBarrierCount);
|
||||
|
||||
for(uint32_t i=0; i < memBarrierCount; i++)
|
||||
{
|
||||
@@ -1330,17 +1320,21 @@ void WrappedVulkan::vkCmdPipelineBarrier(
|
||||
|
||||
if(header->sType == VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER)
|
||||
{
|
||||
VkImageMemoryBarrier barrier = *(VkImageMemoryBarrier *)header;
|
||||
VkImageMemoryBarrier &barrier = im[imCount];
|
||||
barrier = *(VkImageMemoryBarrier *)header;
|
||||
barrier.image = Unwrap(barrier.image);
|
||||
im.push_back(barrier);
|
||||
unwrappedBarriers[i] = &im.back();
|
||||
unwrappedBarriers[i] = &im[imCount];
|
||||
|
||||
imCount++;
|
||||
}
|
||||
else if(header->sType == VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER)
|
||||
{
|
||||
VkBufferMemoryBarrier barrier = *(VkBufferMemoryBarrier *)header;
|
||||
VkBufferMemoryBarrier &barrier = buf[bufCount];
|
||||
barrier = *(VkBufferMemoryBarrier *)header;
|
||||
barrier.buffer = Unwrap(barrier.buffer);
|
||||
buf.push_back(barrier);
|
||||
unwrappedBarriers[i] = &buf.back();
|
||||
unwrappedBarriers[i] = &buf[bufCount];
|
||||
|
||||
bufCount++;
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -1349,8 +1343,6 @@ void WrappedVulkan::vkCmdPipelineBarrier(
|
||||
}
|
||||
|
||||
ObjDisp(cmdBuffer)->CmdPipelineBarrier(Unwrap(cmdBuffer), srcStageMask, destStageMask, byRegion, memBarrierCount, unwrappedBarriers);
|
||||
|
||||
SAFE_DELETE_ARRAY(unwrappedBarriers);
|
||||
}
|
||||
|
||||
if(m_State >= WRITING)
|
||||
|
||||
@@ -139,16 +139,24 @@ VkResult WrappedVulkan::vkCreateDescriptorSetLayout(
|
||||
const VkDescriptorSetLayoutCreateInfo* pCreateInfo,
|
||||
VkDescriptorSetLayout* pSetLayout)
|
||||
{
|
||||
// VKTODOLOW this should be a persistent per-thread array that resizes up
|
||||
// to a high water mark, so we don't have to allocate
|
||||
VkDescriptorSetLayoutBinding *unwrapped = new VkDescriptorSetLayoutBinding[pCreateInfo->count];
|
||||
size_t tempmemSize = sizeof(VkDescriptorSetLayoutBinding)*pCreateInfo->count;
|
||||
|
||||
// need to count how many VkSampler arrays to allocate for
|
||||
for(uint32_t i=0; i < pCreateInfo->count; i++)
|
||||
if(pCreateInfo->pBinding[i].pImmutableSamplers) tempmemSize += pCreateInfo->pBinding[i].arraySize;
|
||||
|
||||
byte *memory = GetTempMemory(tempmemSize);
|
||||
|
||||
VkDescriptorSetLayoutBinding *unwrapped = (VkDescriptorSetLayoutBinding *)memory;
|
||||
VkSampler *nextSampler = (VkSampler *)(unwrapped + pCreateInfo->count);
|
||||
|
||||
for(uint32_t i=0; i < pCreateInfo->count; i++)
|
||||
{
|
||||
unwrapped[i] = pCreateInfo->pBinding[i];
|
||||
|
||||
if(unwrapped[i].pImmutableSamplers)
|
||||
{
|
||||
VkSampler *unwrappedSamplers = new VkSampler[unwrapped[i].arraySize];
|
||||
VkSampler *unwrappedSamplers = nextSampler; nextSampler += unwrapped[i].arraySize;
|
||||
for(uint32_t j=0; j < unwrapped[i].arraySize; j++)
|
||||
unwrappedSamplers[j] = Unwrap(unwrapped[i].pImmutableSamplers[j]);
|
||||
unwrapped[i].pImmutableSamplers = unwrappedSamplers;
|
||||
@@ -158,10 +166,6 @@ VkResult WrappedVulkan::vkCreateDescriptorSetLayout(
|
||||
VkDescriptorSetLayoutCreateInfo unwrappedInfo = *pCreateInfo;
|
||||
unwrappedInfo.pBinding = unwrapped;
|
||||
VkResult ret = ObjDisp(device)->CreateDescriptorSetLayout(Unwrap(device), &unwrappedInfo, pSetLayout);
|
||||
|
||||
for(uint32_t i=0; i < pCreateInfo->count; i++)
|
||||
delete[] unwrapped[i].pImmutableSamplers;
|
||||
SAFE_DELETE_ARRAY(unwrapped);
|
||||
|
||||
if(ret == VK_SUCCESS)
|
||||
{
|
||||
@@ -245,15 +249,10 @@ VkResult WrappedVulkan::vkAllocDescriptorSets(
|
||||
VkDescriptorSet* pDescriptorSets,
|
||||
uint32_t* pCount)
|
||||
{
|
||||
// VKTODOLOW this should be a persistent per-thread array that resizes up
|
||||
// to a high water mark, so we don't have to allocate
|
||||
VkDescriptorSetLayout *unwrapped = new VkDescriptorSetLayout[count];
|
||||
for(uint32_t i=0; i < count; i++)
|
||||
unwrapped[i] = Unwrap(pSetLayouts[i]);
|
||||
VkDescriptorSetLayout *unwrapped = GetTempArray<VkDescriptorSetLayout>(count);
|
||||
for(uint32_t i=0; i < count; i++) unwrapped[i] = Unwrap(pSetLayouts[i]);
|
||||
|
||||
VkResult ret = ObjDisp(device)->AllocDescriptorSets(Unwrap(device), Unwrap(descriptorPool), setUsage, count, unwrapped, pDescriptorSets, pCount);
|
||||
|
||||
SAFE_DELETE_ARRAY(unwrapped);
|
||||
|
||||
RDCASSERT(pCount == NULL || *pCount == count); // VKTODOMED: find out what *pCount < count means
|
||||
|
||||
@@ -309,11 +308,8 @@ VkResult WrappedVulkan::vkFreeDescriptorSets(
|
||||
uint32_t count,
|
||||
const VkDescriptorSet* pDescriptorSets)
|
||||
{
|
||||
// VKTODOLOW this should be a persistent per-thread array that resizes up
|
||||
// to a high water mark, so we don't have to allocate
|
||||
VkDescriptorSet *unwrapped = new VkDescriptorSet[count];
|
||||
for(uint32_t i=0; i < count; i++)
|
||||
unwrapped[i] = Unwrap(pDescriptorSets[i]);
|
||||
VkDescriptorSet *unwrapped = GetTempArray<VkDescriptorSet>(count);
|
||||
for(uint32_t i=0; i < count; i++) unwrapped[i] = Unwrap(pDescriptorSets[i]);
|
||||
|
||||
for(uint32_t i=0; i < count; i++)
|
||||
GetResourceManager()->ReleaseWrappedResource(pDescriptorSets[i]);
|
||||
@@ -451,27 +447,24 @@ VkResult WrappedVulkan::vkUpdateDescriptorSets(
|
||||
VkResult ret = VK_SUCCESS;
|
||||
|
||||
{
|
||||
// VKTODOLOW this should be a persistent per-thread array that resizes up
|
||||
// to a high water mark, so we don't have to allocate
|
||||
vector<VkDescriptorInfo> desc;
|
||||
|
||||
// need to count up number of descriptor infos, to be able to alloc enough space
|
||||
uint32_t numInfos = 0;
|
||||
for(uint32_t i=0; i < writeCount; i++) numInfos += pDescriptorWrites[i].count;
|
||||
|
||||
// ensure we don't resize while looping so we can take pointers
|
||||
desc.resize(numInfos);
|
||||
|
||||
VkWriteDescriptorSet *unwrappedWrites = new VkWriteDescriptorSet[writeCount];
|
||||
VkCopyDescriptorSet *unwrappedCopies = new VkCopyDescriptorSet[copyCount];
|
||||
|
||||
uint32_t curInfo = 0;
|
||||
byte *memory = GetTempMemory(sizeof(VkDescriptorInfo)*numInfos +
|
||||
sizeof(VkWriteDescriptorSet)*writeCount + sizeof(VkCopyDescriptorSet)*copyCount);
|
||||
|
||||
VkWriteDescriptorSet *unwrappedWrites = (VkWriteDescriptorSet *)memory;
|
||||
VkCopyDescriptorSet *unwrappedCopies = (VkCopyDescriptorSet *)(unwrappedWrites + writeCount);
|
||||
VkDescriptorInfo *nextDescriptors = (VkDescriptorInfo *)(unwrappedCopies + copyCount);
|
||||
|
||||
for(uint32_t i=0; i < writeCount; i++)
|
||||
{
|
||||
unwrappedWrites[i] = pDescriptorWrites[i];
|
||||
unwrappedWrites[i].destSet = Unwrap(unwrappedWrites[i].destSet);
|
||||
|
||||
VkDescriptorInfo *unwrappedInfos = &desc[curInfo];
|
||||
curInfo += pDescriptorWrites[i].count;
|
||||
VkDescriptorInfo *unwrappedInfos = nextDescriptors;
|
||||
nextDescriptors += pDescriptorWrites[i].count;
|
||||
|
||||
for(uint32_t j=0; j < pDescriptorWrites[i].count; j++)
|
||||
{
|
||||
@@ -493,9 +486,6 @@ VkResult WrappedVulkan::vkUpdateDescriptorSets(
|
||||
}
|
||||
|
||||
ret = ObjDisp(device)->UpdateDescriptorSets(Unwrap(device), writeCount, unwrappedWrites, copyCount, unwrappedCopies);
|
||||
|
||||
SAFE_DELETE_ARRAY(unwrappedWrites);
|
||||
SAFE_DELETE_ARRAY(unwrappedCopies);
|
||||
}
|
||||
|
||||
if(ret == VK_SUCCESS)
|
||||
|
||||
@@ -39,10 +39,11 @@ VkResult WrappedVulkan::vkCreateInstance(
|
||||
|
||||
VkResult ret = GetInstanceDispatchTable(*pInstance)->CreateInstance(pCreateInfo, &inst);
|
||||
|
||||
// VKTODOHIGH need to deallocate this
|
||||
// VKTODOHIGH need to deallocate these
|
||||
threadSerialiserTLSSlot = Threading::AllocateTLSSlot();
|
||||
tempMemoryTLSSlot = Threading::AllocateTLSSlot();
|
||||
|
||||
// VKTODOHIGH need to deallocate m_ThreadSerialisers
|
||||
// VKTODOHIGH need to deallocate m_ThreadSerialisers and m_ThreadTempMem
|
||||
|
||||
GetResourceManager()->WrapResource(inst, inst);
|
||||
|
||||
|
||||
@@ -499,9 +499,7 @@ VkResult WrappedVulkan::vkCreateFramebuffer(
|
||||
const VkFramebufferCreateInfo* pCreateInfo,
|
||||
VkFramebuffer* pFramebuffer)
|
||||
{
|
||||
// VKTODOLOW this should be a persistent per-thread array that resizes up
|
||||
// to a high water mark, so we don't have to allocate
|
||||
VkAttachmentBindInfo *unwrapped = new VkAttachmentBindInfo[pCreateInfo->attachmentCount];
|
||||
VkAttachmentBindInfo *unwrapped = GetTempArray<VkAttachmentBindInfo>(pCreateInfo->attachmentCount);
|
||||
for(uint32_t i=0; i < pCreateInfo->attachmentCount; i++)
|
||||
{
|
||||
unwrapped[i] = pCreateInfo->pAttachments[i];
|
||||
@@ -514,8 +512,6 @@ VkResult WrappedVulkan::vkCreateFramebuffer(
|
||||
|
||||
VkResult ret = ObjDisp(device)->CreateFramebuffer(Unwrap(device), &unwrappedInfo, pFramebuffer);
|
||||
|
||||
SAFE_DELETE_ARRAY(unwrapped);
|
||||
|
||||
if(ret == VK_SUCCESS)
|
||||
{
|
||||
ResourceId id = GetResourceManager()->WrapResource(Unwrap(device), *pFramebuffer);
|
||||
|
||||
@@ -325,16 +325,11 @@ VkResult WrappedVulkan::vkQueueSubmit(
|
||||
const VkCmdBuffer* pCmdBuffers,
|
||||
VkFence fence)
|
||||
{
|
||||
// VKTODOLOW this should be a persistent per-thread array that resizes up
|
||||
// to a high water mark, so we don't have to allocate
|
||||
VkCmdBuffer *unwrapped = new VkCmdBuffer[cmdBufferCount];
|
||||
for(uint32_t i=0; i < cmdBufferCount; i++)
|
||||
unwrapped[i] = Unwrap(pCmdBuffers[i]);
|
||||
VkCmdBuffer *unwrapped = GetTempArray<VkCmdBuffer>(cmdBufferCount);
|
||||
for(uint32_t i=0; i < cmdBufferCount; i++) unwrapped[i] = Unwrap(pCmdBuffers[i]);
|
||||
|
||||
VkResult ret = ObjDisp(queue)->QueueSubmit(Unwrap(queue), cmdBufferCount, unwrapped, Unwrap(fence));
|
||||
|
||||
SAFE_DELETE_ARRAY(unwrapped);
|
||||
|
||||
// VKTODOHIGH when maps are intercepted with local buffers, this will have to be
|
||||
// done when not in capframe :(.
|
||||
if(m_State == WRITING_CAPFRAME)
|
||||
|
||||
@@ -64,19 +64,14 @@ VkResult WrappedVulkan::vkCreatePipelineLayout(
|
||||
const VkPipelineLayoutCreateInfo* pCreateInfo,
|
||||
VkPipelineLayout* pPipelineLayout)
|
||||
{
|
||||
// VKTODOLOW this should be a persistent per-thread array that resizes up
|
||||
// to a high water mark, so we don't have to allocate
|
||||
VkDescriptorSetLayout *unwrapped = new VkDescriptorSetLayout[pCreateInfo->descriptorSetCount];
|
||||
for(uint32_t i=0; i < pCreateInfo->descriptorSetCount; i++)
|
||||
unwrapped[i] = Unwrap(pCreateInfo->pSetLayouts[i]);
|
||||
VkDescriptorSetLayout *unwrapped = GetTempArray<VkDescriptorSetLayout>(pCreateInfo->descriptorSetCount);
|
||||
for(uint32_t i=0; i < pCreateInfo->descriptorSetCount; i++) unwrapped[i] = Unwrap(pCreateInfo->pSetLayouts[i]);
|
||||
|
||||
VkPipelineLayoutCreateInfo unwrappedInfo = *pCreateInfo;
|
||||
unwrappedInfo.pSetLayouts = unwrapped;
|
||||
|
||||
VkResult ret = ObjDisp(device)->CreatePipelineLayout(Unwrap(device), &unwrappedInfo, pPipelineLayout);
|
||||
|
||||
SAFE_DELETE_ARRAY(unwrapped);
|
||||
|
||||
if(ret == VK_SUCCESS)
|
||||
{
|
||||
ResourceId id = GetResourceManager()->WrapResource(Unwrap(device), *pPipelineLayout);
|
||||
@@ -378,12 +373,17 @@ VkResult WrappedVulkan::vkCreateGraphicsPipelines(
|
||||
const VkGraphicsPipelineCreateInfo* pCreateInfos,
|
||||
VkPipeline* pPipelines)
|
||||
{
|
||||
// VKTODOLOW this should be a persistent per-thread array that resizes up
|
||||
// to a high water mark, so we don't have to allocate
|
||||
VkGraphicsPipelineCreateInfo *unwrappedInfos = new VkGraphicsPipelineCreateInfo[count];
|
||||
// conservatively request memory for 5 stages on each pipeline
|
||||
// (worst case - can't have compute stage). Avoids needing to count
|
||||
byte *unwrapped = GetTempMemory(sizeof(VkGraphicsPipelineCreateInfo)*count + sizeof(VkPipelineShaderStageCreateInfo)*count*5);
|
||||
|
||||
// keep pipelines first in the memory, then the stages
|
||||
VkGraphicsPipelineCreateInfo *unwrappedInfos = (VkGraphicsPipelineCreateInfo *)unwrapped;
|
||||
VkPipelineShaderStageCreateInfo *nextUnwrappedStages = (VkPipelineShaderStageCreateInfo *)(unwrappedInfos + count);
|
||||
|
||||
for(uint32_t i=0; i < count; i++)
|
||||
{
|
||||
VkPipelineShaderStageCreateInfo *unwrappedStages = new VkPipelineShaderStageCreateInfo[pCreateInfos[i].stageCount];
|
||||
VkPipelineShaderStageCreateInfo *unwrappedStages = nextUnwrappedStages; nextUnwrappedStages += pCreateInfos[i].stageCount;
|
||||
for(uint32_t j=0; j < pCreateInfos[i].stageCount; j++)
|
||||
{
|
||||
unwrappedStages[j] = pCreateInfos[i].pStages[j];
|
||||
@@ -399,10 +399,6 @@ VkResult WrappedVulkan::vkCreateGraphicsPipelines(
|
||||
|
||||
VkResult ret = ObjDisp(device)->CreateGraphicsPipelines(Unwrap(device), Unwrap(pipelineCache), count, unwrappedInfos, pPipelines);
|
||||
|
||||
for(uint32_t i=0; i < count; i++)
|
||||
delete[] unwrappedInfos[i].pStages;
|
||||
SAFE_DELETE_ARRAY(unwrappedInfos);
|
||||
|
||||
if(ret == VK_SUCCESS)
|
||||
{
|
||||
for(uint32_t i=0; i < count; i++)
|
||||
|
||||
Reference in New Issue
Block a user