Add thread-local temp memory pool for storing temp unwrapped structs

* Avoids needing to do lots of small allocations
This commit is contained in:
baldurk
2015-10-08 11:20:20 +02:00
parent 36eaa937d0
commit 27aec9b277
8 changed files with 108 additions and 95 deletions
+29
View File
@@ -412,6 +412,35 @@ const char * WrappedVulkan::GetChunkName(uint32_t idx)
return VkChunkNames[idx-FIRST_CHUNK_ID];
}
byte *WrappedVulkan::GetTempMemory(size_t s)
{
TempMem *mem = (TempMem *)Threading::GetTLSValue(tempMemoryTLSSlot);
if(mem && mem->size >= s) return mem->memory;
// alloc or grow alloc
TempMem *newmem = mem;
if(!newmem) newmem = new TempMem();
// free old memory, don't need to keep contents
if(newmem->memory) delete[] newmem->memory;
// alloc new memory
newmem->size = s;
newmem->memory = new byte[s];
Threading::SetTLSValue(tempMemoryTLSSlot, (void *)newmem);
// if this is entirely new, save it for deletion on shutdown
if(!mem)
{
SCOPED_LOCK(m_ThreadTempMemLock);
m_ThreadTempMem.push_back(newmem);
}
return newmem->memory;
}
Serialiser *WrappedVulkan::GetThreadSerialiser()
{
Serialiser *ser = (Serialiser *)Threading::GetTLSValue(threadSerialiserTLSSlot);
+14
View File
@@ -116,6 +116,16 @@ private:
Threading::CriticalSection m_ThreadSerialisersLock;
vector<Serialiser *> m_ThreadSerialisers;
uint64_t tempMemoryTLSSlot;
struct TempMem
{
TempMem() : memory(NULL), size(0) {}
byte *memory;
size_t size;
};
Threading::CriticalSection m_ThreadTempMemLock;
vector<TempMem*> m_ThreadTempMem;
VulkanReplay m_Replay;
@@ -379,6 +389,10 @@ private:
static const char *GetChunkName(uint32_t idx);
// returns thread-local temporary memory
byte *GetTempMemory(size_t s);
template<class T> T *GetTempArray(uint32_t arraycount) { return (T*)GetTempMemory(sizeof(T)*arraycount); }
Serialiser *GetThreadSerialiser();
Serialiser *GetMainSerialiser() { return m_pSerialiser; }
@@ -819,16 +819,11 @@ void WrappedVulkan::vkCmdBindDescriptorSets(
uint32_t dynamicOffsetCount,
const uint32_t* pDynamicOffsets)
{
// VKTODOLOW this should be a persistent per-thread array that resizes up
// to a high water mark, so we don't have to allocate
VkDescriptorSet *unwrapped = new VkDescriptorSet[setCount];
for(uint32_t i=0; i < setCount; i++)
unwrapped[i] = Unwrap(pDescriptorSets[i]);
VkDescriptorSet *unwrapped = GetTempArray<VkDescriptorSet>(setCount);
for(uint32_t i=0; i < setCount; i++) unwrapped[i] = Unwrap(pDescriptorSets[i]);
ObjDisp(cmdBuffer)->CmdBindDescriptorSets(Unwrap(cmdBuffer), pipelineBindPoint, Unwrap(layout), firstSet, setCount, unwrapped, dynamicOffsetCount, pDynamicOffsets);
SAFE_DELETE_ARRAY(unwrapped);
if(m_State >= WRITING)
{
VkResourceRecord *record = GetRecord(cmdBuffer);
@@ -1132,16 +1127,11 @@ void WrappedVulkan::vkCmdBindVertexBuffers(
const VkBuffer* pBuffers,
const VkDeviceSize* pOffsets)
{
// VKTODOLOW this should be a persistent per-thread array that resizes up
// to a high water mark, so we don't have to allocate
VkBuffer *unwrapped = new VkBuffer[bindingCount];
for(uint32_t i=0; i < bindingCount; i++)
unwrapped[i] = Unwrap(pBuffers[i]);
VkBuffer *unwrapped = GetTempArray<VkBuffer>(bindingCount);
for(uint32_t i=0; i < bindingCount; i++) unwrapped[i] = Unwrap(pBuffers[i]);
ObjDisp(cmdBuffer)->CmdBindVertexBuffers(Unwrap(cmdBuffer), startBinding, bindingCount, unwrapped, pOffsets);
SAFE_DELETE_ARRAY(unwrapped);
if(m_State >= WRITING)
{
VkResourceRecord *record = GetRecord(cmdBuffer);
@@ -1313,16 +1303,16 @@ void WrappedVulkan::vkCmdPipelineBarrier(
{
{
// VKTODOLOW this should be a persistent per-thread array that resizes up
// to a high water mark, so we don't have to allocate
vector<VkImageMemoryBarrier> im;
vector<VkBufferMemoryBarrier> buf;
// conservatively request memory for worst case to avoid needing to iterate
// twice to count
byte *memory = GetTempMemory( ( sizeof(void*) + sizeof(VkImageMemoryBarrier) + sizeof(VkBufferMemoryBarrier) )*memBarrierCount);
// ensure we don't resize while looping so we can take pointers
im.reserve(memBarrierCount);
buf.reserve(memBarrierCount);
VkImageMemoryBarrier *im = (VkImageMemoryBarrier *)memory;
VkBufferMemoryBarrier *buf = (VkBufferMemoryBarrier *)(im + memBarrierCount);
void **unwrappedBarriers = new void*[memBarrierCount];
size_t imCount = 0, bufCount = 0;
void **unwrappedBarriers = (void **)(buf + memBarrierCount);
for(uint32_t i=0; i < memBarrierCount; i++)
{
@@ -1330,17 +1320,21 @@ void WrappedVulkan::vkCmdPipelineBarrier(
if(header->sType == VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER)
{
VkImageMemoryBarrier barrier = *(VkImageMemoryBarrier *)header;
VkImageMemoryBarrier &barrier = im[imCount];
barrier = *(VkImageMemoryBarrier *)header;
barrier.image = Unwrap(barrier.image);
im.push_back(barrier);
unwrappedBarriers[i] = &im.back();
unwrappedBarriers[i] = &im[imCount];
imCount++;
}
else if(header->sType == VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER)
{
VkBufferMemoryBarrier barrier = *(VkBufferMemoryBarrier *)header;
VkBufferMemoryBarrier &barrier = buf[bufCount];
barrier = *(VkBufferMemoryBarrier *)header;
barrier.buffer = Unwrap(barrier.buffer);
buf.push_back(barrier);
unwrappedBarriers[i] = &buf.back();
unwrappedBarriers[i] = &buf[bufCount];
bufCount++;
}
else
{
@@ -1349,8 +1343,6 @@ void WrappedVulkan::vkCmdPipelineBarrier(
}
ObjDisp(cmdBuffer)->CmdPipelineBarrier(Unwrap(cmdBuffer), srcStageMask, destStageMask, byRegion, memBarrierCount, unwrappedBarriers);
SAFE_DELETE_ARRAY(unwrappedBarriers);
}
if(m_State >= WRITING)
@@ -139,16 +139,24 @@ VkResult WrappedVulkan::vkCreateDescriptorSetLayout(
const VkDescriptorSetLayoutCreateInfo* pCreateInfo,
VkDescriptorSetLayout* pSetLayout)
{
// VKTODOLOW this should be a persistent per-thread array that resizes up
// to a high water mark, so we don't have to allocate
VkDescriptorSetLayoutBinding *unwrapped = new VkDescriptorSetLayoutBinding[pCreateInfo->count];
size_t tempmemSize = sizeof(VkDescriptorSetLayoutBinding)*pCreateInfo->count;
// need to count how many VkSampler arrays to allocate for
for(uint32_t i=0; i < pCreateInfo->count; i++)
if(pCreateInfo->pBinding[i].pImmutableSamplers) tempmemSize += pCreateInfo->pBinding[i].arraySize;
byte *memory = GetTempMemory(tempmemSize);
VkDescriptorSetLayoutBinding *unwrapped = (VkDescriptorSetLayoutBinding *)memory;
VkSampler *nextSampler = (VkSampler *)(unwrapped + pCreateInfo->count);
for(uint32_t i=0; i < pCreateInfo->count; i++)
{
unwrapped[i] = pCreateInfo->pBinding[i];
if(unwrapped[i].pImmutableSamplers)
{
VkSampler *unwrappedSamplers = new VkSampler[unwrapped[i].arraySize];
VkSampler *unwrappedSamplers = nextSampler; nextSampler += unwrapped[i].arraySize;
for(uint32_t j=0; j < unwrapped[i].arraySize; j++)
unwrappedSamplers[j] = Unwrap(unwrapped[i].pImmutableSamplers[j]);
unwrapped[i].pImmutableSamplers = unwrappedSamplers;
@@ -158,10 +166,6 @@ VkResult WrappedVulkan::vkCreateDescriptorSetLayout(
VkDescriptorSetLayoutCreateInfo unwrappedInfo = *pCreateInfo;
unwrappedInfo.pBinding = unwrapped;
VkResult ret = ObjDisp(device)->CreateDescriptorSetLayout(Unwrap(device), &unwrappedInfo, pSetLayout);
for(uint32_t i=0; i < pCreateInfo->count; i++)
delete[] unwrapped[i].pImmutableSamplers;
SAFE_DELETE_ARRAY(unwrapped);
if(ret == VK_SUCCESS)
{
@@ -245,15 +249,10 @@ VkResult WrappedVulkan::vkAllocDescriptorSets(
VkDescriptorSet* pDescriptorSets,
uint32_t* pCount)
{
// VKTODOLOW this should be a persistent per-thread array that resizes up
// to a high water mark, so we don't have to allocate
VkDescriptorSetLayout *unwrapped = new VkDescriptorSetLayout[count];
for(uint32_t i=0; i < count; i++)
unwrapped[i] = Unwrap(pSetLayouts[i]);
VkDescriptorSetLayout *unwrapped = GetTempArray<VkDescriptorSetLayout>(count);
for(uint32_t i=0; i < count; i++) unwrapped[i] = Unwrap(pSetLayouts[i]);
VkResult ret = ObjDisp(device)->AllocDescriptorSets(Unwrap(device), Unwrap(descriptorPool), setUsage, count, unwrapped, pDescriptorSets, pCount);
SAFE_DELETE_ARRAY(unwrapped);
RDCASSERT(pCount == NULL || *pCount == count); // VKTODOMED: find out what *pCount < count means
@@ -309,11 +308,8 @@ VkResult WrappedVulkan::vkFreeDescriptorSets(
uint32_t count,
const VkDescriptorSet* pDescriptorSets)
{
// VKTODOLOW this should be a persistent per-thread array that resizes up
// to a high water mark, so we don't have to allocate
VkDescriptorSet *unwrapped = new VkDescriptorSet[count];
for(uint32_t i=0; i < count; i++)
unwrapped[i] = Unwrap(pDescriptorSets[i]);
VkDescriptorSet *unwrapped = GetTempArray<VkDescriptorSet>(count);
for(uint32_t i=0; i < count; i++) unwrapped[i] = Unwrap(pDescriptorSets[i]);
for(uint32_t i=0; i < count; i++)
GetResourceManager()->ReleaseWrappedResource(pDescriptorSets[i]);
@@ -451,27 +447,24 @@ VkResult WrappedVulkan::vkUpdateDescriptorSets(
VkResult ret = VK_SUCCESS;
{
// VKTODOLOW this should be a persistent per-thread array that resizes up
// to a high water mark, so we don't have to allocate
vector<VkDescriptorInfo> desc;
// need to count up number of descriptor infos, to be able to alloc enough space
uint32_t numInfos = 0;
for(uint32_t i=0; i < writeCount; i++) numInfos += pDescriptorWrites[i].count;
// ensure we don't resize while looping so we can take pointers
desc.resize(numInfos);
VkWriteDescriptorSet *unwrappedWrites = new VkWriteDescriptorSet[writeCount];
VkCopyDescriptorSet *unwrappedCopies = new VkCopyDescriptorSet[copyCount];
uint32_t curInfo = 0;
byte *memory = GetTempMemory(sizeof(VkDescriptorInfo)*numInfos +
sizeof(VkWriteDescriptorSet)*writeCount + sizeof(VkCopyDescriptorSet)*copyCount);
VkWriteDescriptorSet *unwrappedWrites = (VkWriteDescriptorSet *)memory;
VkCopyDescriptorSet *unwrappedCopies = (VkCopyDescriptorSet *)(unwrappedWrites + writeCount);
VkDescriptorInfo *nextDescriptors = (VkDescriptorInfo *)(unwrappedCopies + copyCount);
for(uint32_t i=0; i < writeCount; i++)
{
unwrappedWrites[i] = pDescriptorWrites[i];
unwrappedWrites[i].destSet = Unwrap(unwrappedWrites[i].destSet);
VkDescriptorInfo *unwrappedInfos = &desc[curInfo];
curInfo += pDescriptorWrites[i].count;
VkDescriptorInfo *unwrappedInfos = nextDescriptors;
nextDescriptors += pDescriptorWrites[i].count;
for(uint32_t j=0; j < pDescriptorWrites[i].count; j++)
{
@@ -493,9 +486,6 @@ VkResult WrappedVulkan::vkUpdateDescriptorSets(
}
ret = ObjDisp(device)->UpdateDescriptorSets(Unwrap(device), writeCount, unwrappedWrites, copyCount, unwrappedCopies);
SAFE_DELETE_ARRAY(unwrappedWrites);
SAFE_DELETE_ARRAY(unwrappedCopies);
}
if(ret == VK_SUCCESS)
@@ -39,10 +39,11 @@ VkResult WrappedVulkan::vkCreateInstance(
VkResult ret = GetInstanceDispatchTable(*pInstance)->CreateInstance(pCreateInfo, &inst);
// VKTODOHIGH need to deallocate this
// VKTODOHIGH need to deallocate these
threadSerialiserTLSSlot = Threading::AllocateTLSSlot();
tempMemoryTLSSlot = Threading::AllocateTLSSlot();
// VKTODOHIGH need to deallocate m_ThreadSerialisers
// VKTODOHIGH need to deallocate m_ThreadSerialisers and m_ThreadTempMem
GetResourceManager()->WrapResource(inst, inst);
@@ -499,9 +499,7 @@ VkResult WrappedVulkan::vkCreateFramebuffer(
const VkFramebufferCreateInfo* pCreateInfo,
VkFramebuffer* pFramebuffer)
{
// VKTODOLOW this should be a persistent per-thread array that resizes up
// to a high water mark, so we don't have to allocate
VkAttachmentBindInfo *unwrapped = new VkAttachmentBindInfo[pCreateInfo->attachmentCount];
VkAttachmentBindInfo *unwrapped = GetTempArray<VkAttachmentBindInfo>(pCreateInfo->attachmentCount);
for(uint32_t i=0; i < pCreateInfo->attachmentCount; i++)
{
unwrapped[i] = pCreateInfo->pAttachments[i];
@@ -514,8 +512,6 @@ VkResult WrappedVulkan::vkCreateFramebuffer(
VkResult ret = ObjDisp(device)->CreateFramebuffer(Unwrap(device), &unwrappedInfo, pFramebuffer);
SAFE_DELETE_ARRAY(unwrapped);
if(ret == VK_SUCCESS)
{
ResourceId id = GetResourceManager()->WrapResource(Unwrap(device), *pFramebuffer);
@@ -325,16 +325,11 @@ VkResult WrappedVulkan::vkQueueSubmit(
const VkCmdBuffer* pCmdBuffers,
VkFence fence)
{
// VKTODOLOW this should be a persistent per-thread array that resizes up
// to a high water mark, so we don't have to allocate
VkCmdBuffer *unwrapped = new VkCmdBuffer[cmdBufferCount];
for(uint32_t i=0; i < cmdBufferCount; i++)
unwrapped[i] = Unwrap(pCmdBuffers[i]);
VkCmdBuffer *unwrapped = GetTempArray<VkCmdBuffer>(cmdBufferCount);
for(uint32_t i=0; i < cmdBufferCount; i++) unwrapped[i] = Unwrap(pCmdBuffers[i]);
VkResult ret = ObjDisp(queue)->QueueSubmit(Unwrap(queue), cmdBufferCount, unwrapped, Unwrap(fence));
SAFE_DELETE_ARRAY(unwrapped);
// VKTODOHIGH when maps are intercepted with local buffers, this will have to be
// done when not in capframe :(.
if(m_State == WRITING_CAPFRAME)
@@ -64,19 +64,14 @@ VkResult WrappedVulkan::vkCreatePipelineLayout(
const VkPipelineLayoutCreateInfo* pCreateInfo,
VkPipelineLayout* pPipelineLayout)
{
// VKTODOLOW this should be a persistent per-thread array that resizes up
// to a high water mark, so we don't have to allocate
VkDescriptorSetLayout *unwrapped = new VkDescriptorSetLayout[pCreateInfo->descriptorSetCount];
for(uint32_t i=0; i < pCreateInfo->descriptorSetCount; i++)
unwrapped[i] = Unwrap(pCreateInfo->pSetLayouts[i]);
VkDescriptorSetLayout *unwrapped = GetTempArray<VkDescriptorSetLayout>(pCreateInfo->descriptorSetCount);
for(uint32_t i=0; i < pCreateInfo->descriptorSetCount; i++) unwrapped[i] = Unwrap(pCreateInfo->pSetLayouts[i]);
VkPipelineLayoutCreateInfo unwrappedInfo = *pCreateInfo;
unwrappedInfo.pSetLayouts = unwrapped;
VkResult ret = ObjDisp(device)->CreatePipelineLayout(Unwrap(device), &unwrappedInfo, pPipelineLayout);
SAFE_DELETE_ARRAY(unwrapped);
if(ret == VK_SUCCESS)
{
ResourceId id = GetResourceManager()->WrapResource(Unwrap(device), *pPipelineLayout);
@@ -378,12 +373,17 @@ VkResult WrappedVulkan::vkCreateGraphicsPipelines(
const VkGraphicsPipelineCreateInfo* pCreateInfos,
VkPipeline* pPipelines)
{
// VKTODOLOW this should be a persistent per-thread array that resizes up
// to a high water mark, so we don't have to allocate
VkGraphicsPipelineCreateInfo *unwrappedInfos = new VkGraphicsPipelineCreateInfo[count];
// conservatively request memory for 5 stages on each pipeline
// (worst case - can't have compute stage). Avoids needing to count
byte *unwrapped = GetTempMemory(sizeof(VkGraphicsPipelineCreateInfo)*count + sizeof(VkPipelineShaderStageCreateInfo)*count*5);
// keep pipelines first in the memory, then the stages
VkGraphicsPipelineCreateInfo *unwrappedInfos = (VkGraphicsPipelineCreateInfo *)unwrapped;
VkPipelineShaderStageCreateInfo *nextUnwrappedStages = (VkPipelineShaderStageCreateInfo *)(unwrappedInfos + count);
for(uint32_t i=0; i < count; i++)
{
VkPipelineShaderStageCreateInfo *unwrappedStages = new VkPipelineShaderStageCreateInfo[pCreateInfos[i].stageCount];
VkPipelineShaderStageCreateInfo *unwrappedStages = nextUnwrappedStages; nextUnwrappedStages += pCreateInfos[i].stageCount;
for(uint32_t j=0; j < pCreateInfos[i].stageCount; j++)
{
unwrappedStages[j] = pCreateInfos[i].pStages[j];
@@ -399,10 +399,6 @@ VkResult WrappedVulkan::vkCreateGraphicsPipelines(
VkResult ret = ObjDisp(device)->CreateGraphicsPipelines(Unwrap(device), Unwrap(pipelineCache), count, unwrappedInfos, pPipelines);
for(uint32_t i=0; i < count; i++)
delete[] unwrappedInfos[i].pStages;
SAFE_DELETE_ARRAY(unwrappedInfos);
if(ret == VK_SUCCESS)
{
for(uint32_t i=0; i < count; i++)