diff --git a/qrenderdoc/Windows/Dialogs/PerformanceCounterSelection.cpp b/qrenderdoc/Windows/Dialogs/PerformanceCounterSelection.cpp index f4c04c7e7..582445410 100644 --- a/qrenderdoc/Windows/Dialogs/PerformanceCounterSelection.cpp +++ b/qrenderdoc/Windows/Dialogs/PerformanceCounterSelection.cpp @@ -50,6 +50,7 @@ enum class CounterFamily AMD, Intel, NVIDIA, + VulkanExtended, }; CounterFamily GetCounterFamily(GPUCounter counter) @@ -66,6 +67,10 @@ CounterFamily GetCounterFamily(GPUCounter counter) { return CounterFamily::NVIDIA; } + else if(IsVulkanExtendedCounter(counter)) + { + return CounterFamily::VulkanExtended; + } return CounterFamily::Generic; } @@ -78,12 +83,13 @@ QString ToString(CounterFamily family) case CounterFamily::Generic: return lit("Generic"); case CounterFamily::Intel: return lit("Intel"); case CounterFamily::NVIDIA: return lit("NVIDIA"); + case CounterFamily::VulkanExtended: return lit("Vulkan Extended"); case CounterFamily::Unknown: return lit("Unknown"); } return QString(); } -} +} // namespace const int PerformanceCounterSelection::CounterDescriptionRole = Qt::UserRole + 1; const int PerformanceCounterSelection::CounterIdRole = Qt::UserRole + 2; diff --git a/renderdoc/api/replay/replay_enums.h b/renderdoc/api/replay/replay_enums.h index 900772c9c..180273e47 100644 --- a/renderdoc/api/replay/replay_enums.h +++ b/renderdoc/api/replay/replay_enums.h @@ -3040,7 +3040,10 @@ enum class GPUCounter : uint32_t FirstNvidia = 3000000, LastIntel = FirstNvidia - 1, - LastNvidia = 4000000, + FirstVulkanExtended = 4000000, + LastNvidia = FirstVulkanExtended - 1, + + LastVulkanExtended = 5000000, }; ITERABLE_OPERATORS(GPUCounter); @@ -3090,6 +3093,17 @@ inline constexpr bool IsNvidiaCounter(GPUCounter c) return c >= GPUCounter::FirstNvidia && c <= GPUCounter::LastNvidia; } +DOCUMENT(R"(Check whether or not this is a KHR counter. + +:param GPUCounter c: The counter. +:return: ``True`` if it is a Vulkan counter reported through the VK_KHR_performance_query extension, ``False`` if it's not. +:rtype: ``bool`` +)"); +inline constexpr bool IsVulkanExtendedCounter(GPUCounter c) +{ + return c >= GPUCounter::FirstVulkanExtended && c <= GPUCounter::LastVulkanExtended; +} + DOCUMENT(R"(The unit that GPU counter data is returned in. .. data:: Absolute @@ -3932,7 +3946,7 @@ DOCUMENT(R"(A set of flags giving details of the current status of vulkan layer also set then the entire process can be done un-elevated if user-local is desired. .. note:: - + If the :data:`NeedElevation` flag is set then elevation is required to fix the layer registration, even if a user-local registration is desired. @@ -4009,4 +4023,4 @@ DECLARE_REFLECTION_ENUM(AndroidFlags); #if defined(DISABLE_PYTHON_FLAG_ENUMS) DISABLE_PYTHON_FLAG_ENUMS; -#endif \ No newline at end of file +#endif diff --git a/renderdoc/driver/vulkan/vk_common.h b/renderdoc/driver/vulkan/vk_common.h index 047e9a8e4..f80e2c3ee 100644 --- a/renderdoc/driver/vulkan/vk_common.h +++ b/renderdoc/driver/vulkan/vk_common.h @@ -638,6 +638,7 @@ SERIALISE_VK_HANDLES(); // pNext structs - always have deserialise for the next chain DECLARE_REFLECTION_STRUCT(VkAcquireNextImageInfoKHR); +DECLARE_REFLECTION_STRUCT(VkAcquireProfilingLockInfoKHR); DECLARE_REFLECTION_STRUCT(VkApplicationInfo); DECLARE_REFLECTION_STRUCT(VkAttachmentDescription2KHR); DECLARE_REFLECTION_STRUCT(VkAttachmentReference2KHR); @@ -757,6 +758,9 @@ DECLARE_REFLECTION_STRUCT(VkMemoryPriorityAllocateInfoEXT); DECLARE_REFLECTION_STRUCT(VkMemoryRequirements2); DECLARE_REFLECTION_STRUCT(VkMultisamplePropertiesEXT); DECLARE_REFLECTION_STRUCT(VkPastPresentationTimingGOOGLE); +DECLARE_REFLECTION_STRUCT(VkPerformanceCounterKHR); +DECLARE_REFLECTION_STRUCT(VkPerformanceCounterDescriptionKHR); +DECLARE_REFLECTION_STRUCT(VkPerformanceQuerySubmitInfoKHR); DECLARE_REFLECTION_STRUCT(VkPhysicalDevice16BitStorageFeatures); DECLARE_REFLECTION_STRUCT(VkPhysicalDevice8BitStorageFeaturesKHR); DECLARE_REFLECTION_STRUCT(VkPhysicalDeviceASTCDecodeFeaturesEXT); @@ -796,6 +800,8 @@ DECLARE_REFLECTION_STRUCT(VkPhysicalDeviceMemoryProperties2); DECLARE_REFLECTION_STRUCT(VkPhysicalDeviceMultiviewFeatures); DECLARE_REFLECTION_STRUCT(VkPhysicalDeviceMultiviewProperties); DECLARE_REFLECTION_STRUCT(VkPhysicalDevicePCIBusInfoPropertiesEXT); +DECLARE_REFLECTION_STRUCT(VkPhysicalDevicePerformanceQueryFeaturesKHR); +DECLARE_REFLECTION_STRUCT(VkPhysicalDevicePerformanceQueryPropertiesKHR); DECLARE_REFLECTION_STRUCT(VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR); DECLARE_REFLECTION_STRUCT(VkPhysicalDevicePointClippingProperties); DECLARE_REFLECTION_STRUCT(VkPhysicalDeviceProperties2); @@ -864,6 +870,7 @@ DECLARE_REFLECTION_STRUCT(VkPresentTimeGOOGLE); DECLARE_REFLECTION_STRUCT(VkPresentTimesInfoGOOGLE); DECLARE_REFLECTION_STRUCT(VkProtectedSubmitInfo); DECLARE_REFLECTION_STRUCT(VkQueryPoolCreateInfo); +DECLARE_REFLECTION_STRUCT(VkQueryPoolPerformanceCreateInfoKHR); DECLARE_REFLECTION_STRUCT(VkQueueFamilyProperties2); DECLARE_REFLECTION_STRUCT(VkRefreshCycleDurationGOOGLE); DECLARE_REFLECTION_STRUCT(VkRenderPassAttachmentBeginInfoKHR); @@ -1220,6 +1227,7 @@ DECLARE_REFLECTION_STRUCT(VkMemoryRequirements); DECLARE_REFLECTION_STRUCT(VkMemoryType); DECLARE_REFLECTION_STRUCT(VkOffset2D); DECLARE_REFLECTION_STRUCT(VkOffset3D); +DECLARE_REFLECTION_STRUCT(VkPerformanceCounterResultKHR); DECLARE_REFLECTION_STRUCT(VkPhysicalDeviceFeatures); DECLARE_REFLECTION_STRUCT(VkPhysicalDeviceLimits); DECLARE_REFLECTION_STRUCT(VkPhysicalDeviceMemoryProperties); @@ -1306,6 +1314,7 @@ DECLARE_DESERIALISE_TYPE(VkWin32KeyedMutexAcquireReleaseInfoNV); // enums DECLARE_REFLECTION_ENUM(VkAccessFlagBits); +DECLARE_REFLECTION_ENUM(VkAcquireProfilingLockFlagBitsKHR); DECLARE_REFLECTION_ENUM(VkAttachmentDescriptionFlagBits); DECLARE_REFLECTION_ENUM(VkAttachmentLoadOp); DECLARE_REFLECTION_ENUM(VkAttachmentStoreOp); @@ -1374,6 +1383,10 @@ DECLARE_REFLECTION_ENUM(VkLogicOp); DECLARE_REFLECTION_ENUM(VkMemoryAllocateFlagBits); DECLARE_REFLECTION_ENUM(VkMemoryHeapFlagBits); DECLARE_REFLECTION_ENUM(VkMemoryPropertyFlagBits); +DECLARE_REFLECTION_ENUM(VkPerformanceCounterDescriptionFlagBitsKHR); +DECLARE_REFLECTION_ENUM(VkPerformanceCounterScopeKHR); +DECLARE_REFLECTION_ENUM(VkPerformanceCounterStorageKHR); +DECLARE_REFLECTION_ENUM(VkPerformanceCounterUnitKHR); DECLARE_REFLECTION_ENUM(VkPhysicalDeviceType); DECLARE_REFLECTION_ENUM(VkPipelineBindPoint); DECLARE_REFLECTION_ENUM(VkPipelineCreateFlagBits); diff --git a/renderdoc/driver/vulkan/vk_core.cpp b/renderdoc/driver/vulkan/vk_core.cpp index 678972781..89225d5fe 100644 --- a/renderdoc/driver/vulkan/vk_core.cpp +++ b/renderdoc/driver/vulkan/vk_core.cpp @@ -130,6 +130,7 @@ WrappedVulkan::WrappedVulkan() : m_RenderState(this, &m_CreationInfo) m_LastEventID = ~0U; m_DrawcallCallback = NULL; + m_SubmitChain = NULL; m_CurChunkOffset = 0; m_AddedDrawcall = false; @@ -267,7 +268,7 @@ void WrappedVulkan::SubmitCmds(VkSemaphore *unwrappedWaitSemaphores, VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO, - NULL, + m_SubmitChain, waitSemaphoreCount, unwrappedWaitSemaphores, waitStageMask, @@ -395,7 +396,7 @@ void WrappedVulkan::SubmitAndFlushExtQueue(uint32_t queueFamilyIdx) VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO, - NULL, + m_SubmitChain, 0, NULL, NULL, // wait semaphores @@ -955,6 +956,9 @@ static const VkExtensionProperties supportedExtensions[] = { { VK_KHR_MULTIVIEW_EXTENSION_NAME, VK_KHR_MULTIVIEW_SPEC_VERSION, }, + { + VK_KHR_PERFORMANCE_QUERY_EXTENSION_NAME, VK_KHR_PERFORMANCE_QUERY_SPEC_VERSION, + }, { VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME, VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_SPEC_VERSION, @@ -2417,7 +2421,7 @@ ReplayStatus WrappedVulkan::ContextReplayLog(CaptureState readType, uint32_t sta { VkSubmitInfo submitInfo = { VK_STRUCTURE_TYPE_SUBMIT_INFO, - NULL, + m_SubmitChain, 0, NULL, NULL, // wait semaphores diff --git a/renderdoc/driver/vulkan/vk_core.h b/renderdoc/driver/vulkan/vk_core.h index a68f36072..9a547a949 100644 --- a/renderdoc/driver/vulkan/vk_core.h +++ b/renderdoc/driver/vulkan/vk_core.h @@ -328,6 +328,7 @@ private: Threading::RWLock m_CapTransitionLock; VulkanDrawcallCallback *m_DrawcallCallback; + void *m_SubmitChain; SDFile *m_StructuredFile; SDFile m_StoredStructuredData; @@ -377,6 +378,8 @@ private: VkFormatProperties fmtprops[VK_FORMAT_RANGE_SIZE] = {}; VkDriverInfo driverInfo = VkDriverInfo(props); + VkPhysicalDevicePerformanceQueryFeaturesKHR performanceQueryFeatures = {}; + uint32_t queueCount = 0; VkQueueFamilyProperties queueProps[16] = {}; }; @@ -1008,6 +1011,7 @@ public: VulkanRenderState &GetRenderState() { return m_RenderState; } void SetDrawcallCB(VulkanDrawcallCallback *cb) { m_DrawcallCallback = cb; } + void SetSubmitChain(void *submitChain) { m_SubmitChain = submitChain; } static bool IsSupportedExtension(const char *extName); static void FilterToSupportedExtensions(std::vector &exts, std::vector &filtered); @@ -1024,6 +1028,10 @@ public: const VkPhysicalDeviceFeatures &GetDeviceFeatures() { return m_PhysicalDeviceData.features; } const VkPhysicalDeviceProperties &GetDeviceProps() { return m_PhysicalDeviceData.props; } + const VkPhysicalDevicePerformanceQueryFeaturesKHR &GetPhysicalDevicePerformanceQueryFeatures() + { + return m_PhysicalDeviceData.performanceQueryFeatures; + } VkDriverInfo GetDriverInfo() { return m_PhysicalDeviceData.driverInfo; } // Device initialization @@ -2184,4 +2192,15 @@ public: IMPLEMENT_FUNCTION_SERIALISED(VkResult, vkSignalSemaphoreKHR, VkDevice device, const VkSemaphoreSignalInfoKHR *pSignalInfo); + + // VK_KHR_performance_query + + VkResult vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR( + VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, uint32_t *pCounterCount, + VkPerformanceCounterKHR *pCounters, VkPerformanceCounterDescriptionKHR *pCounterDescriptions); + void vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR( + VkPhysicalDevice physicalDevice, + const VkQueryPoolPerformanceCreateInfoKHR *pPerformanceQueryCreateInfo, uint32_t *pNumPasses); + VkResult vkAcquireProfilingLockKHR(VkDevice device, const VkAcquireProfilingLockInfoKHR *pInfo); + void vkReleaseProfilingLockKHR(VkDevice device); }; diff --git a/renderdoc/driver/vulkan/vk_counters.cpp b/renderdoc/driver/vulkan/vk_counters.cpp index 057398c98..ddaafd6b8 100644 --- a/renderdoc/driver/vulkan/vk_counters.cpp +++ b/renderdoc/driver/vulkan/vk_counters.cpp @@ -30,6 +30,113 @@ #include "driver/ihv/amd/amd_counters.h" #include "driver/ihv/amd/official/GPUPerfAPI/Include/GPUPerfAPI-VK.h" +#include "strings/string_utils.h" + +static uint32_t FromKHRCounter(GPUCounter counterID) +{ + return (uint32_t)counterID - (uint32_t)GPUCounter::FirstVulkanExtended; +} + +static GPUCounter ToKHRCounter(uint32_t idx) +{ + return (GPUCounter)((uint32_t)GPUCounter::FirstVulkanExtended + idx); +} + +static void GetKHRUnitDescription(const VkPerformanceCounterUnitKHR khrUnit, CounterUnit &unit, + CompType &type, uint32_t &byteWidth) +{ + switch(khrUnit) + { + case VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR: + unit = CounterUnit::Absolute; + type = CompType::UInt; + byteWidth = 8; + return; + case VK_PERFORMANCE_COUNTER_UNIT_PERCENTAGE_KHR: + unit = CounterUnit::Percentage; + type = CompType::Double; + byteWidth = 8; + return; + case VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR: + unit = CounterUnit::Seconds; + type = CompType::Double; + byteWidth = 8; + return; + case VK_PERFORMANCE_COUNTER_UNIT_BYTES_KHR: + unit = CounterUnit::Bytes; + type = CompType::UInt; + byteWidth = 8; + return; + case VK_PERFORMANCE_COUNTER_UNIT_BYTES_PER_SECOND_KHR: + unit = CounterUnit::Ratio; + type = CompType::Double; + byteWidth = 8; + return; + case VK_PERFORMANCE_COUNTER_UNIT_KELVIN_KHR: + unit = CounterUnit::Absolute; + type = CompType::UInt; + byteWidth = 8; + return; + case VK_PERFORMANCE_COUNTER_UNIT_WATTS_KHR: + unit = CounterUnit::Absolute; + type = CompType::UInt; + byteWidth = 8; + return; + case VK_PERFORMANCE_COUNTER_UNIT_VOLTS_KHR: + unit = CounterUnit::Absolute; + type = CompType::UInt; + byteWidth = 8; + return; + case VK_PERFORMANCE_COUNTER_UNIT_AMPS_KHR: + unit = CounterUnit::Absolute; + type = CompType::UInt; + byteWidth = 8; + return; + case VK_PERFORMANCE_COUNTER_UNIT_HERTZ_KHR: + unit = CounterUnit::Absolute; + type = CompType::UInt; + byteWidth = 8; + return; + case VK_PERFORMANCE_COUNTER_UNIT_CYCLES_KHR: + unit = CounterUnit::Cycles; + type = CompType::UInt; + byteWidth = 8; + return; + default: RDCERR("Invalid performance counter unit %d", khrUnit); + } +} + +void VulkanReplay::convertKhrCounterResult(CounterResult &rdcResult, + const VkPerformanceCounterResultKHR &khrResult, + VkPerformanceCounterUnitKHR khrUnit, + VkPerformanceCounterStorageKHR khrStorage) +{ + CounterUnit unit; + CompType type; + uint32_t byteWidth; + GetKHRUnitDescription(khrUnit, unit, type, byteWidth); + + double value; + + // Convert everything to doubles. + switch(khrStorage) + { + case VK_PERFORMANCE_COUNTER_STORAGE_INT32_KHR: rdcResult.value.u64 = khrResult.int32; break; + case VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR: rdcResult.value.u64 = khrResult.uint32; break; + case VK_PERFORMANCE_COUNTER_STORAGE_INT64_KHR: rdcResult.value.u64 = khrResult.int64; break; + case VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR: rdcResult.value.u64 = khrResult.uint64; break; + case VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32_KHR: rdcResult.value.d = khrResult.float32; break; + case VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR: rdcResult.value.d = khrResult.float64; break; + default: value = 0; RDCERR("Wrong counter storage type %d", khrStorage); + } + + // Special case for time units, renderdoc only has a Seconds type. + if(khrUnit == VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR) + { + RDCASSERT(type == CompType::Double); + rdcResult.value.d /= 1000.0 * 1000.0 * 1000.0; + } +} std::vector VulkanReplay::EnumerateCounters() { @@ -60,6 +167,23 @@ std::vector VulkanReplay::EnumerateCounters() ret.push_back(GPUCounter::CSInvocations); } + if(m_pDriver->GetPhysicalDevicePerformanceQueryFeatures().performanceCounterQueryPools) + { + VkPhysicalDevice physDev = m_pDriver->GetPhysDev(); + uint32_t khrCounters = 0; + ObjDisp(physDev)->EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR( + Unwrap(physDev), 0, &khrCounters, NULL, NULL); + + m_KHRCounters.resize(khrCounters); + m_KHRCountersDescriptions.resize(khrCounters); + + ObjDisp(physDev)->EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR( + Unwrap(physDev), 0, &khrCounters, &m_KHRCounters[0], &m_KHRCountersDescriptions[0]); + + for(uint32_t c = 0; c < khrCounters; c++) + ret.push_back(ToKHRCounter(c)); + } + if(m_pAMDCounters) { std::vector amdCounters = m_pAMDCounters->GetPublicCounterIds(); @@ -85,6 +209,29 @@ CounterDescription VulkanReplay::DescribeCounter(GPUCounter counterID) } } + if(IsVulkanExtendedCounter(counterID)) + { + const VkPerformanceCounterKHR &khrCounter = m_KHRCounters[FromKHRCounter(counterID)]; + const VkPerformanceCounterDescriptionKHR &khrCounterDesc = + m_KHRCountersDescriptions[FromKHRCounter(counterID)]; + + CounterDescription rdcDesc; + rdcDesc.counter = counterID; + rdcDesc.name = khrCounterDesc.name; + rdcDesc.category = khrCounterDesc.category; + rdcDesc.description = khrCounterDesc.description; + + const uint32_t *uuid_dwords = (const uint32_t *)khrCounter.uuid; + desc.uuid.words[0] = uuid_dwords[0]; + desc.uuid.words[1] = uuid_dwords[1]; + desc.uuid.words[2] = uuid_dwords[2]; + desc.uuid.words[3] = uuid_dwords[3]; + + GetKHRUnitDescription(khrCounter.unit, rdcDesc.unit, rdcDesc.resultType, rdcDesc.resultByteWidth); + + return rdcDesc; + } + // 6839CB5B-FBD2-4550-B606-8C65157C684C desc.uuid.words[0] = 0x6839CB5B; desc.uuid.words[1] = 0xFBD24550; @@ -376,6 +523,183 @@ std::vector VulkanReplay::FetchCountersAMD(const std::vectorSetDrawcallCB(this); + } + ~VulkanKHRCallback() { m_pDriver->SetDrawcallCB(NULL); } + void PreDraw(uint32_t eid, VkCommandBuffer cmd) override + { + ObjDisp(cmd)->CmdBeginQuery(Unwrap(cmd), m_QueryPool, (uint32_t)m_Results.size(), 0); + } + + bool PostDraw(uint32_t eid, VkCommandBuffer cmd) override + { + ObjDisp(cmd)->CmdEndQuery(Unwrap(cmd), m_QueryPool, (uint32_t)m_Results.size()); + m_Results.push_back(eid); + return false; + } + + void PostRedraw(uint32_t eid, VkCommandBuffer cmd) override {} + // we don't need to distinguish, call the Draw functions + void PreDispatch(uint32_t eid, VkCommandBuffer cmd) override { PreDraw(eid, cmd); } + bool PostDispatch(uint32_t eid, VkCommandBuffer cmd) override { return PostDraw(eid, cmd); } + void PostRedispatch(uint32_t eid, VkCommandBuffer cmd) override { PostRedraw(eid, cmd); } + void PreMisc(uint32_t eid, DrawFlags flags, VkCommandBuffer cmd) override { PreDraw(eid, cmd); } + bool PostMisc(uint32_t eid, DrawFlags flags, VkCommandBuffer cmd) override + { + return PostDraw(eid, cmd); + } + void PostRemisc(uint32_t eid, DrawFlags flags, VkCommandBuffer cmd) override + { + PostRedraw(eid, cmd); + } + void AliasEvent(uint32_t primary, uint32_t alias) override + { + m_AliasEvents.push_back(std::make_pair(primary, alias)); + } + + void PreEndCommandBuffer(VkCommandBuffer cmd) override {} + WrappedVulkan *m_pDriver; + VulkanReplay *m_pReplay; + VkQueryPool m_QueryPool; + std::vector m_Results; + // events which are the 'same' from being the same command buffer resubmitted + // multiple times in the frame. We will only get the full callback when we're + // recording the command buffer, and will be given the first EID. After that + // we'll just be told which other EIDs alias this event. + std::vector > m_AliasEvents; +}; + +std::vector VulkanReplay::FetchCountersKHR(const std::vector &counters) +{ + std::vector counterIndices; + for(const GPUCounter &c : counters) + counterIndices.push_back(FromKHRCounter(c)); + + VkQueryPoolPerformanceCreateInfoKHR perfCreateInfo = { + VK_STRUCTURE_TYPE_QUERY_POOL_PERFORMANCE_CREATE_INFO_KHR, NULL, 0, + (uint32_t)counterIndices.size(), &counterIndices[0]}; + uint32_t passCount = 0; + ObjDisp(m_pDriver->GetInstance()) + ->GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(Unwrap(m_pDriver->GetPhysDev()), + &perfCreateInfo, &passCount); + + VkDevice dev = m_pDriver->GetDev(); + VkAcquireProfilingLockInfoKHR acquireLockInfo = { + VK_STRUCTURE_TYPE_ACQUIRE_PROFILING_LOCK_INFO_KHR, NULL, 0, 50 * 1000 * 1000 /* 50ms */}; + VkResult vkr = ObjDisp(dev)->AcquireProfilingLockKHR(Unwrap(dev), &acquireLockInfo); + if(vkr != VK_SUCCESS) + { + RDCWARN("Unable to acquire profiling lock: %s", ToStr(vkr).c_str()); + return std::vector(); + } + + uint32_t maxEID = m_pDriver->GetMaxEID(); + VkQueryPoolCreateInfo queryPoolCreateInfo = { + VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, &perfCreateInfo, 0, + VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR, maxEID, 0}; + + VkQueryPool queryPool; + vkr = ObjDisp(dev)->CreateQueryPool(Unwrap(dev), &queryPoolCreateInfo, NULL, &queryPool); + RDCASSERTEQUAL(vkr, VK_SUCCESS); + + // Reset query pool + VkCommandBuffer cmd = m_pDriver->GetNextCmd(); + + VkCommandBufferBeginInfo beginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, NULL, + VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT}; + + vkr = ObjDisp(dev)->BeginCommandBuffer(Unwrap(cmd), &beginInfo); + RDCASSERTEQUAL(vkr, VK_SUCCESS); + + ObjDisp(dev)->CmdResetQueryPool(Unwrap(cmd), queryPool, 0, maxEID); + + vkr = ObjDisp(dev)->EndCommandBuffer(Unwrap(cmd)); + RDCASSERTEQUAL(vkr, VK_SUCCESS); + + m_pDriver->SubmitCmds(); + + VulkanKHRCallback cb(m_pDriver, this, queryPool); + + // replay the events to perform all the queries + for(uint32_t i = 0; i < passCount; i++) + { + VkPerformanceQuerySubmitInfoKHR perfSubmitInfo = { + VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR, NULL, i}; + + cb.m_Results.clear(); + + m_pDriver->SetSubmitChain(&perfSubmitInfo); + m_pDriver->ReplayLog(0, maxEID, eReplay_Full); + m_pDriver->SetSubmitChain(NULL); + } + + std::vector perfResults; + perfResults.resize(cb.m_Results.size() * counters.size()); + + vkr = ObjDisp(dev)->GetQueryPoolResults( + Unwrap(dev), queryPool, 0, (uint32_t)cb.m_Results.size(), + sizeof(VkPerformanceCounterResultKHR) * perfResults.size(), &perfResults[0], + sizeof(VkPerformanceCounterResultKHR) * counters.size(), VK_QUERY_RESULT_WAIT_BIT); + RDCASSERTEQUAL(vkr, VK_SUCCESS); + + ObjDisp(dev)->DestroyQueryPool(Unwrap(dev), queryPool, NULL); + + ObjDisp(dev)->ReleaseProfilingLockKHR(Unwrap(dev)); + + std::vector ret; + for(size_t i = 0; i < cb.m_Results.size(); i++) + { + for(size_t c = 0; c < counters.size(); c++) + { + CounterResult result; + + result.eventId = cb.m_Results[i]; + result.counter = counters[c]; + + const VkPerformanceCounterKHR &khrCounter = m_KHRCounters[counterIndices[c]]; + + convertKhrCounterResult(result, perfResults[counters.size() * i + c], khrCounter.unit, + khrCounter.storage); + ret.push_back(result); + } + } + + for(size_t i = 0; i < cb.m_AliasEvents.size(); i++) + { + for(size_t c = 0; c < counters.size(); c++) + { + CounterResult search; + search.counter = counters[c]; + search.eventId = cb.m_AliasEvents[i].first; + + // find the result we're aliasing + auto it = std::find(ret.begin(), ret.end(), search); + if(it != ret.end()) + { + // duplicate the result and append + CounterResult aliased = *it; + aliased.eventId = cb.m_AliasEvents[i].second; + ret.push_back(aliased); + } + else + { + RDCERR("Expected to find alias-target result for EID %u counter %u, but didn't", + search.eventId, search.counter); + } + } + } + + // sort so that the alias results appear in the right places + std::sort(ret.begin(), ret.end()); + + return ret; +} + struct VulkanGPUTimerCallback : public VulkanDrawcallCallback { VulkanGPUTimerCallback(WrappedVulkan *vk, VulkanReplay *rp, VkQueryPool tsqp, VkQueryPool occqp, @@ -468,9 +792,13 @@ std::vector VulkanReplay::FetchCounters(const std::vector vkKHRCounters; + std::copy_if(counters.begin(), counters.end(), std::back_inserter(vkKHRCounters), + [](const GPUCounter &c) { return IsVulkanExtendedCounter(c); }); + if(!vkKHRCounters.empty()) { - return ret; + std::vector khrResults = FetchCountersKHR(vkKHRCounters); + ret.insert(ret.end(), khrResults.begin(), khrResults.end()); } VkPhysicalDeviceFeatures availableFeatures = m_pDriver->GetDeviceFeatures(); diff --git a/renderdoc/driver/vulkan/vk_hookset_defs.h b/renderdoc/driver/vulkan/vk_hookset_defs.h index c57810267..b6fc75c67 100644 --- a/renderdoc/driver/vulkan/vk_hookset_defs.h +++ b/renderdoc/driver/vulkan/vk_hookset_defs.h @@ -452,7 +452,8 @@ DeclExt(AMD_negative_viewport_height); \ DeclExt(EXT_line_rasterization); \ DeclExt(GOOGLE_display_timing); \ - DeclExt(KHR_timeline_semaphore); + DeclExt(KHR_timeline_semaphore); \ + DeclExt(KHR_performance_query); // for simplicity and since the check itself is platform agnostic, // these aren't protected in platform defines @@ -484,7 +485,8 @@ CheckExt(EXT_full_screen_exclusive, VKXX); \ CheckExt(EXT_headless_surface, VKXX); \ CheckExt(EXT_metal_surface, VKXX); \ - CheckExt(KHR_wayland_surface, VKXX); + CheckExt(KHR_wayland_surface, VKXX); \ + CheckExt(KHR_performance_query, VKXX); #define CheckDeviceExts() \ CheckExt(EXT_debug_marker, VKXX); \ @@ -538,7 +540,8 @@ CheckExt(AMD_negative_viewport_height, VKXX); \ CheckExt(EXT_line_rasterization, VKXX); \ CheckExt(GOOGLE_display_timing, VKXX); \ - CheckExt(KHR_timeline_semaphore, VKXX); + CheckExt(KHR_timeline_semaphore, VKXX); \ + CheckExt(KHR_performance_query, VKXX); #define HookInitVulkanInstanceExts() \ HookInitExtension(KHR_surface, DestroySurfaceKHR); \ @@ -595,6 +598,9 @@ HookInitExtension(EXT_sample_locations, GetPhysicalDeviceMultisamplePropertiesEXT); \ HookInitExtension(EXT_calibrated_timestamps, GetPhysicalDeviceCalibrateableTimeDomainsEXT); \ HookInitExtension(EXT_headless_surface, CreateHeadlessSurfaceEXT); \ + HookInitExtension(KHR_performance_query, \ + EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR); \ + HookInitExtension(KHR_performance_query, GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR); \ HookInitInstance_PlatformSpecific() #define HookInitVulkanDeviceExts() \ @@ -687,6 +693,8 @@ HookInitExtension(KHR_timeline_semaphore, GetSemaphoreCounterValueKHR); \ HookInitExtension(KHR_timeline_semaphore, WaitSemaphoresKHR); \ HookInitExtension(KHR_timeline_semaphore, SignalSemaphoreKHR); \ + HookInitExtension(KHR_performance_query, AcquireProfilingLockKHR); \ + HookInitExtension(KHR_performance_query, ReleaseProfilingLockKHR); \ HookInitDevice_PlatformSpecific() #define DefineHooks() \ @@ -1289,4 +1297,14 @@ pWaitInfo, uint64_t, timeout); \ HookDefine2(VkResult, vkSignalSemaphoreKHR, VkDevice, device, const VkSemaphoreSignalInfoKHR *, \ pSignalInfo); \ + HookDefine5(VkResult, vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR, \ + VkPhysicalDevice, physicalDevice, uint32_t, queueFamilyIndex, uint32_t *, \ + pCounterCount, VkPerformanceCounterKHR *, pCounters, \ + VkPerformanceCounterDescriptionKHR *, pCounterDescriptions); \ + HookDefine3(void, vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR, VkPhysicalDevice, \ + physicalDevice, const VkQueryPoolPerformanceCreateInfoKHR *, \ + pPerformanceQueryCreateInfo, uint32_t *, pNumPasses); \ + HookDefine2(VkResult, vkAcquireProfilingLockKHR, VkDevice, device, \ + const VkAcquireProfilingLockInfoKHR *, pInfo); \ + HookDefine1(void, vkReleaseProfilingLockKHR, VkDevice, device); \ HookDefine_PlatformSpecific() diff --git a/renderdoc/driver/vulkan/vk_replay.h b/renderdoc/driver/vulkan/vk_replay.h index 0cbc79f8a..f55ca668a 100644 --- a/renderdoc/driver/vulkan/vk_replay.h +++ b/renderdoc/driver/vulkan/vk_replay.h @@ -727,4 +727,14 @@ private: AMDRGPControl *m_RGP = NULL; VulkanAMDDrawCallback *m_pAMDDrawCallback = NULL; + + std::vector FetchCountersKHR(const std::vector &counters); + + std::vector m_KHRCounters; + std::vector m_KHRCountersDescriptions; + + void convertKhrCounterResult(CounterResult &rdcResult, + const VkPerformanceCounterResultKHR &khrResult, + VkPerformanceCounterUnitKHR khrUnit, + VkPerformanceCounterStorageKHR khrStorage); }; diff --git a/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp index 762db1f8e..2854eb13f 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp @@ -1589,6 +1589,15 @@ bool WrappedVulkan::Serialise_vkCreateDevice(SerialiserType &ser, VkPhysicalDevi "bindless shader access will use less reliable fallback"); } + bool perfQuery = false; + + if(supportedExtensions.find(VK_KHR_PERFORMANCE_QUERY_EXTENSION_NAME) != supportedExtensions.end()) + { + perfQuery = true; + Extensions.push_back(VK_KHR_PERFORMANCE_QUERY_EXTENSION_NAME); + RDCLOG("Enabling VK_KHR_performance_query"); + } + VkDevice device; uint32_t qCount = 0; @@ -2538,6 +2547,48 @@ bool WrappedVulkan::Serialise_vkCreateDevice(SerialiserType &ser, VkPhysicalDevi } } + VkPhysicalDevicePerformanceQueryFeaturesKHR perfFeatures = { + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR, + }; + + if(perfQuery) + { + VkPhysicalDeviceFeatures2 availBase = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2}; + m_PhysicalDeviceData.performanceQueryFeatures.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR; + availBase.pNext = &perfFeatures; + ObjDisp(physicalDevice)->GetPhysicalDeviceFeatures2(Unwrap(physicalDevice), &availBase); + + m_PhysicalDeviceData.performanceQueryFeatures = perfFeatures; + + if(perfFeatures.performanceCounterQueryPools) + { + VkPhysicalDevicePerformanceQueryFeaturesKHR *existing = + (VkPhysicalDevicePerformanceQueryFeaturesKHR *)FindNextStruct( + &createInfo, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR); + + if(existing) + { + existing->performanceCounterQueryPools = VK_TRUE; + } + else + { + perfFeatures.performanceCounterQueryPools = VK_TRUE; + perfFeatures.performanceCounterMultipleQueryPools = VK_FALSE; + + perfFeatures.pNext = (void *)createInfo.pNext; + createInfo.pNext = &perfFeatures; + } + } + else + { + auto it = + std::find(Extensions.begin(), Extensions.end(), VK_KHR_PERFORMANCE_QUERY_EXTENSION_NAME); + RDCASSERT(it != Extensions.end()); + Extensions.erase(it); + } + } + std::vector layerArray(Layers.size()); for(size_t i = 0; i < Layers.size(); i++) layerArray[i] = Layers[i].c_str(); @@ -2580,6 +2631,7 @@ bool WrappedVulkan::Serialise_vkCreateDevice(SerialiserType &ser, VkPhysicalDevi CheckDeviceExts(); } + InitInstanceExtensionTables(m_Instance, &m_EnabledExtensions); InitDeviceExtensionTables(device, &m_EnabledExtensions); RDCASSERT(m_Device == VK_NULL_HANDLE); // MULTIDEVICE diff --git a/renderdoc/driver/vulkan/wrappers/vk_misc_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_misc_funcs.cpp index 8bf17c76a..306b10b2c 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_misc_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_misc_funcs.cpp @@ -2073,3 +2073,32 @@ VkResult WrappedVulkan::vkGetPastPresentationTimingGOOGLE( return ObjDisp(device)->GetPastPresentationTimingGOOGLE( Unwrap(device), Unwrap(swapchain), pPresentationTimingCount, pPresentationTimings); } + +VkResult WrappedVulkan::vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR( + VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, uint32_t *pCounterCount, + VkPerformanceCounterKHR *pCounters, VkPerformanceCounterDescriptionKHR *pCounterDescriptions) +{ + return ObjDisp(physicalDevice) + ->EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR( + Unwrap(physicalDevice), queueFamilyIndex, pCounterCount, pCounters, pCounterDescriptions); +} + +void WrappedVulkan::vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR( + VkPhysicalDevice physicalDevice, + const VkQueryPoolPerformanceCreateInfoKHR *pPerformanceQueryCreateInfo, uint32_t *pNumPasses) +{ + ObjDisp(physicalDevice) + ->GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR( + Unwrap(physicalDevice), pPerformanceQueryCreateInfo, pNumPasses); +} + +VkResult WrappedVulkan::vkAcquireProfilingLockKHR(VkDevice device, + const VkAcquireProfilingLockInfoKHR *pInfo) +{ + return ObjDisp(device)->AcquireProfilingLockKHR(Unwrap(device), pInfo); +} + +void WrappedVulkan::vkReleaseProfilingLockKHR(VkDevice device) +{ + ObjDisp(device)->ReleaseProfilingLockKHR(Unwrap(device)); +} diff --git a/renderdoc/driver/vulkan/wrappers/vk_queue_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_queue_funcs.cpp index a2436203e..9b46640a0 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_queue_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_queue_funcs.cpp @@ -169,6 +169,13 @@ void WrappedVulkan::vkGetDeviceQueue(VkDevice device, uint32_t queueFamilyIndex, } } +static void appendChain(VkBaseInStructure *chain, void *item) +{ + while(chain->pNext != NULL) + chain = (VkBaseInStructure *)chain->pNext; + chain->pNext = (VkBaseInStructure *)item; +} + template bool WrappedVulkan::Serialise_vkQueueSubmit(SerialiserType &ser, VkQueue queue, uint32_t submitCount, const VkSubmitInfo *pSubmits, VkFence fence) @@ -242,6 +249,7 @@ bool WrappedVulkan::Serialise_vkQueueSubmit(SerialiserType &ser, VkQueue queue, tempMem += unwrapped.commandBufferCount * sizeof(VkCommandBuffer); UnwrapNextChain(m_State, "VkSubmitInfo", tempMem, (VkBaseInStructure *)&unwrapped); + appendChain((VkBaseInStructure *)&unwrapped, m_SubmitChain); ObjDisp(queue)->QueueSubmit(Unwrap(queue), 1, &unwrapped, VK_NULL_HANDLE); @@ -411,6 +419,7 @@ bool WrappedVulkan::Serialise_vkQueueSubmit(SerialiserType &ser, VkQueue queue, byte *tempMem = GetTempMemory(GetNextPatchSize(rerecordedSubmit.pNext)); UnwrapNextChain(m_State, "VkSubmitInfo", tempMem, (VkBaseInStructure *)&rerecordedSubmit); + appendChain((VkBaseInStructure *)&rerecordedSubmit, m_SubmitChain); rerecordedSubmit.commandBufferCount = (uint32_t)rerecordedCmds.size(); rerecordedSubmit.pCommandBuffers = &rerecordedCmds[0]; @@ -785,6 +794,7 @@ VkResult WrappedVulkan::vkQueueSubmit(VkQueue queue, uint32_t submitCount, unwrappedSignalSems[o] = Unwrap(pSubmits[i].pSignalSemaphores[o]); UnwrapNextChain(m_State, "VkSubmitInfo", memory, (VkBaseInStructure *)&unwrappedSubmits[i]); + appendChain((VkBaseInStructure *)&unwrappedSubmits[i], m_SubmitChain); } VkResult ret;