From 360d845ce08093074639de6cc0b1c13b9bfd69be Mon Sep 17 00:00:00 2001 From: baldurk Date: Mon, 5 Oct 2015 21:25:41 +0200 Subject: [PATCH] Add TLS handling to os specific layer --- renderdoc/core/core.cpp | 4 ++ renderdoc/driver/vulkan/vk_core.cpp | 22 ++++++- renderdoc/driver/vulkan/vk_core.h | 5 ++ .../vulkan/wrappers/vk_device_funcs.cpp | 5 ++ renderdoc/os/linux/linux_threading.cpp | 65 +++++++++++++++++++ renderdoc/os/os_specific.h | 7 ++ renderdoc/os/win32/win32_threading.cpp | 65 +++++++++++++++++++ 7 files changed, 172 insertions(+), 1 deletion(-) diff --git a/renderdoc/core/core.cpp b/renderdoc/core/core.cpp index cc65a3878..6da9ef939 100644 --- a/renderdoc/core/core.cpp +++ b/renderdoc/core/core.cpp @@ -189,6 +189,8 @@ void RenderDoc::Initialise() Network::Init(); + Threading::Init(); + m_RemoteIdent = 0; if(!IsReplayApp()) @@ -298,6 +300,8 @@ RenderDoc::~RenderDoc() Network::Shutdown(); + Threading::Shutdown(); + FileIO::Delete(m_LoggingFilename.c_str()); } diff --git a/renderdoc/driver/vulkan/vk_core.cpp b/renderdoc/driver/vulkan/vk_core.cpp index 8e169ef67..85b673c3a 100644 --- a/renderdoc/driver/vulkan/vk_core.cpp +++ b/renderdoc/driver/vulkan/vk_core.cpp @@ -412,7 +412,27 @@ const char * WrappedVulkan::GetChunkName(uint32_t idx) Serialiser *WrappedVulkan::GetThreadSerialiser() { - return m_pSerialiser; + Serialiser *ser = (Serialiser *)Threading::GetTLSValue(threadSerialiserTLSSlot); + if(ser) return ser; + + // slow path, but rare + +#if defined(RELEASE) + const bool debugSerialiser = false; +#else + const bool debugSerialiser = true; +#endif + + ser = new Serialiser(NULL, Serialiser::WRITING, debugSerialiser); + + Threading::SetTLSValue(threadSerialiserTLSSlot, (void *)ser); + + { + SCOPED_LOCK(m_ThreadSerialisersLock); + m_ThreadSerialisers.push_back(ser); + } + + return ser; } void WrappedVulkan::Serialise_CaptureScope(uint64_t offset) diff --git a/renderdoc/driver/vulkan/vk_core.h b/renderdoc/driver/vulkan/vk_core.h index 449d578be..01d64e567 100644 --- a/renderdoc/driver/vulkan/vk_core.h +++ b/renderdoc/driver/vulkan/vk_core.h @@ -111,6 +111,11 @@ private: Serialiser *m_pSerialiser; LogState m_State; + + uint64_t threadSerialiserTLSSlot; + + Threading::CriticalSection m_ThreadSerialisersLock; + vector m_ThreadSerialisers; VulkanReplay m_Replay; diff --git a/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp index 24d3a1037..9f3bfe2ab 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp @@ -39,6 +39,11 @@ VkResult WrappedVulkan::vkCreateInstance( VkResult ret = GetInstanceDispatchTable(*pInstance)->CreateInstance(pCreateInfo, &inst); + // VKTODOHIGH need to deallocate this + threadSerialiserTLSSlot = Threading::AllocateTLSSlot(); + + // VKTODOHIGH need to deallocate m_ThreadSerialisers + GetResourceManager()->WrapResource(inst, inst); if(ret != VK_SUCCESS) diff --git a/renderdoc/os/linux/linux_threading.cpp b/renderdoc/os/linux/linux_threading.cpp index 8e8859788..01fe8d34f 100644 --- a/renderdoc/os/linux/linux_threading.cpp +++ b/renderdoc/os/linux/linux_threading.cpp @@ -122,6 +122,71 @@ namespace Threading return NULL; } + // to not exhaust OS slots, we only allocate one that points + // to our own array + pthread_key_t OSTLSHandle; + int64_t nextTLSSlot = 0; + + struct TLSData + { + vector data; + }; + + void Init() + { + int err = pthread_key_create(&OSTLSHandle, NULL); + if(err != 0) + RDCFATAL("Can't allocate OS TLS slot"); + } + + void Shutdown() + { + // let the TLS data leak. It's not great, but it's only a few kb per thread + // that we actually use (ie. not short-lived threads that don't use our TLS). + // We don't have a realistic alternative as the threads aren't ours when in-app + // and there may not be a way to have something call on thread death. + pthread_key_delete(OSTLSHandle); + } + + // allocate a TLS slot in our per-thread vectors with an atomic increment. + // Note this is going to be 1-indexed because Inc64 returns the post-increment + // value + uint64_t AllocateTLSSlot() + { + return Atomic::Inc64(&nextTLSSlot); + } + + // look up our per-thread vector. + void *GetTLSValue(uint64_t slot) + { + TLSData *slots = (TLSData *)pthread_getspecific(OSTLSHandle); + if(slots == NULL || slot-1 >= slots->data.size()) + return NULL; + return slots->data[slot-1]; + } + + void SetTLSValue(uint64_t slot, void *value) + { + TLSData *slots = (TLSData *)pthread_getspecific(OSTLSHandle); + + // resize or allocate slot data if needed. + // We don't need to lock this, as it is by definition thread local so we are + // blocking on the only possible concurrent access. + if(slots == NULL || slot-1 >= slots->data.size()) + { + if(slots == NULL) + { + slots = new TLSData; + pthread_setspecific(OSTLSHandle, slots); + } + + if(slot-1 >= slots->data.size()) + slots->data.resize(slot); + } + + slots->data[slot-1] = value; + } + ThreadHandle CreateThread(ThreadEntry entryFunc, void *userData) { pthread_t thread; diff --git a/renderdoc/os/os_specific.h b/renderdoc/os/os_specific.h index 7db01c1cb..34b6ca739 100644 --- a/renderdoc/os/os_specific.h +++ b/renderdoc/os/os_specific.h @@ -82,6 +82,13 @@ namespace Threading data m_Data; }; + void Init(); + void Shutdown(); + uint64_t AllocateTLSSlot(); + + void *GetTLSValue(uint64_t slot); + void SetTLSValue(uint64_t slot, void *value); + // must typedef CriticalSectionTemplate CriticalSection typedef void (*ThreadEntry)(void *); diff --git a/renderdoc/os/win32/win32_threading.cpp b/renderdoc/os/win32/win32_threading.cpp index 7e0df24a7..649783628 100644 --- a/renderdoc/os/win32/win32_threading.cpp +++ b/renderdoc/os/win32/win32_threading.cpp @@ -116,6 +116,71 @@ namespace Threading return 0; } + + // to not exhaust OS slots, we only allocate one that points + // to our own array + DWORD OSTLSHandle; + int64_t nextTLSSlot = 0; + + struct TLSData + { + vector data; + }; + + void Init() + { + OSTLSHandle = TlsAlloc(); + if(OSTLSHandle == TLS_OUT_OF_INDEXES) + RDCFATAL("Can't allocate OS TLS slot"); + } + + void Shutdown() + { + // let the TLS data leak. It's not great, but it's only a few kb per thread + // that we actually use (ie. not short-lived threads that don't use our TLS). + // We don't have a realistic alternative as the threads aren't ours when in-app + // and there may not be a way to have something call on thread death. + TlsFree(OSTLSHandle); + } + + // allocate a TLS slot in our per-thread vectors with an atomic increment. + // Note this is going to be 1-indexed because Inc64 returns the post-increment + // value + uint64_t AllocateTLSSlot() + { + return Atomic::Inc64(&nextTLSSlot); + } + + // look up our per-thread vector. + void *GetTLSValue(uint64_t slot) + { + TLSData *slots = (TLSData *)TlsGetValue(OSTLSHandle); + if(slots == NULL || slot-1 >= slots->data.size()) + return NULL; + return slots->data[slot-1]; + } + + void SetTLSValue(uint64_t slot, void *value) + { + TLSData *slots = (TLSData *)TlsGetValue(OSTLSHandle); + + // resize or allocate slot data if needed. + // We don't need to lock this, as it is by definition thread local so we are + // blocking on the only possible concurrent access. + if(slots == NULL || slot-1 >= slots->data.size()) + { + if(slots == NULL) + { + slots = new TLSData; + TlsSetValue(OSTLSHandle, slots); + } + + if(slot-1 >= slots->data.size()) + slots->data.resize(slot); + } + + slots->data[slot-1] = value; + } ThreadHandle CreateThread(ThreadEntry entryFunc, void *userData) {