Expose Vulkan performance counter from VK_KHR_performance_query

This commit is contained in:
Lionel Landwerlin
2018-11-09 22:10:26 +00:00
committed by Baldur Karlsson
parent a1c6cc1411
commit 80183c068a
11 changed files with 515 additions and 12 deletions
@@ -50,6 +50,7 @@ enum class CounterFamily
AMD,
Intel,
NVIDIA,
VulkanExtended,
};
CounterFamily GetCounterFamily(GPUCounter counter)
@@ -66,6 +67,10 @@ CounterFamily GetCounterFamily(GPUCounter counter)
{
return CounterFamily::NVIDIA;
}
else if(IsVulkanExtendedCounter(counter))
{
return CounterFamily::VulkanExtended;
}
return CounterFamily::Generic;
}
@@ -78,12 +83,13 @@ QString ToString(CounterFamily family)
case CounterFamily::Generic: return lit("Generic");
case CounterFamily::Intel: return lit("Intel");
case CounterFamily::NVIDIA: return lit("NVIDIA");
case CounterFamily::VulkanExtended: return lit("Vulkan Extended");
case CounterFamily::Unknown: return lit("Unknown");
}
return QString();
}
}
} // namespace
const int PerformanceCounterSelection::CounterDescriptionRole = Qt::UserRole + 1;
const int PerformanceCounterSelection::CounterIdRole = Qt::UserRole + 2;
+17 -3
View File
@@ -3040,7 +3040,10 @@ enum class GPUCounter : uint32_t
FirstNvidia = 3000000,
LastIntel = FirstNvidia - 1,
LastNvidia = 4000000,
FirstVulkanExtended = 4000000,
LastNvidia = FirstVulkanExtended - 1,
LastVulkanExtended = 5000000,
};
ITERABLE_OPERATORS(GPUCounter);
@@ -3090,6 +3093,17 @@ inline constexpr bool IsNvidiaCounter(GPUCounter c)
return c >= GPUCounter::FirstNvidia && c <= GPUCounter::LastNvidia;
}
DOCUMENT(R"(Check whether or not this is a KHR counter.
:param GPUCounter c: The counter.
:return: ``True`` if it is a Vulkan counter reported through the VK_KHR_performance_query extension, ``False`` if it's not.
:rtype: ``bool``
)");
inline constexpr bool IsVulkanExtendedCounter(GPUCounter c)
{
return c >= GPUCounter::FirstVulkanExtended && c <= GPUCounter::LastVulkanExtended;
}
DOCUMENT(R"(The unit that GPU counter data is returned in.
.. data:: Absolute
@@ -3932,7 +3946,7 @@ DOCUMENT(R"(A set of flags giving details of the current status of vulkan layer
also set then the entire process can be done un-elevated if user-local is desired.
.. note::
If the :data:`NeedElevation` flag is set then elevation is required to fix the layer
registration, even if a user-local registration is desired.
@@ -4009,4 +4023,4 @@ DECLARE_REFLECTION_ENUM(AndroidFlags);
#if defined(DISABLE_PYTHON_FLAG_ENUMS)
DISABLE_PYTHON_FLAG_ENUMS;
#endif
#endif
+13
View File
@@ -638,6 +638,7 @@ SERIALISE_VK_HANDLES();
// pNext structs - always have deserialise for the next chain
DECLARE_REFLECTION_STRUCT(VkAcquireNextImageInfoKHR);
DECLARE_REFLECTION_STRUCT(VkAcquireProfilingLockInfoKHR);
DECLARE_REFLECTION_STRUCT(VkApplicationInfo);
DECLARE_REFLECTION_STRUCT(VkAttachmentDescription2KHR);
DECLARE_REFLECTION_STRUCT(VkAttachmentReference2KHR);
@@ -757,6 +758,9 @@ DECLARE_REFLECTION_STRUCT(VkMemoryPriorityAllocateInfoEXT);
DECLARE_REFLECTION_STRUCT(VkMemoryRequirements2);
DECLARE_REFLECTION_STRUCT(VkMultisamplePropertiesEXT);
DECLARE_REFLECTION_STRUCT(VkPastPresentationTimingGOOGLE);
DECLARE_REFLECTION_STRUCT(VkPerformanceCounterKHR);
DECLARE_REFLECTION_STRUCT(VkPerformanceCounterDescriptionKHR);
DECLARE_REFLECTION_STRUCT(VkPerformanceQuerySubmitInfoKHR);
DECLARE_REFLECTION_STRUCT(VkPhysicalDevice16BitStorageFeatures);
DECLARE_REFLECTION_STRUCT(VkPhysicalDevice8BitStorageFeaturesKHR);
DECLARE_REFLECTION_STRUCT(VkPhysicalDeviceASTCDecodeFeaturesEXT);
@@ -796,6 +800,8 @@ DECLARE_REFLECTION_STRUCT(VkPhysicalDeviceMemoryProperties2);
DECLARE_REFLECTION_STRUCT(VkPhysicalDeviceMultiviewFeatures);
DECLARE_REFLECTION_STRUCT(VkPhysicalDeviceMultiviewProperties);
DECLARE_REFLECTION_STRUCT(VkPhysicalDevicePCIBusInfoPropertiesEXT);
DECLARE_REFLECTION_STRUCT(VkPhysicalDevicePerformanceQueryFeaturesKHR);
DECLARE_REFLECTION_STRUCT(VkPhysicalDevicePerformanceQueryPropertiesKHR);
DECLARE_REFLECTION_STRUCT(VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR);
DECLARE_REFLECTION_STRUCT(VkPhysicalDevicePointClippingProperties);
DECLARE_REFLECTION_STRUCT(VkPhysicalDeviceProperties2);
@@ -864,6 +870,7 @@ DECLARE_REFLECTION_STRUCT(VkPresentTimeGOOGLE);
DECLARE_REFLECTION_STRUCT(VkPresentTimesInfoGOOGLE);
DECLARE_REFLECTION_STRUCT(VkProtectedSubmitInfo);
DECLARE_REFLECTION_STRUCT(VkQueryPoolCreateInfo);
DECLARE_REFLECTION_STRUCT(VkQueryPoolPerformanceCreateInfoKHR);
DECLARE_REFLECTION_STRUCT(VkQueueFamilyProperties2);
DECLARE_REFLECTION_STRUCT(VkRefreshCycleDurationGOOGLE);
DECLARE_REFLECTION_STRUCT(VkRenderPassAttachmentBeginInfoKHR);
@@ -1220,6 +1227,7 @@ DECLARE_REFLECTION_STRUCT(VkMemoryRequirements);
DECLARE_REFLECTION_STRUCT(VkMemoryType);
DECLARE_REFLECTION_STRUCT(VkOffset2D);
DECLARE_REFLECTION_STRUCT(VkOffset3D);
DECLARE_REFLECTION_STRUCT(VkPerformanceCounterResultKHR);
DECLARE_REFLECTION_STRUCT(VkPhysicalDeviceFeatures);
DECLARE_REFLECTION_STRUCT(VkPhysicalDeviceLimits);
DECLARE_REFLECTION_STRUCT(VkPhysicalDeviceMemoryProperties);
@@ -1306,6 +1314,7 @@ DECLARE_DESERIALISE_TYPE(VkWin32KeyedMutexAcquireReleaseInfoNV);
// enums
DECLARE_REFLECTION_ENUM(VkAccessFlagBits);
DECLARE_REFLECTION_ENUM(VkAcquireProfilingLockFlagBitsKHR);
DECLARE_REFLECTION_ENUM(VkAttachmentDescriptionFlagBits);
DECLARE_REFLECTION_ENUM(VkAttachmentLoadOp);
DECLARE_REFLECTION_ENUM(VkAttachmentStoreOp);
@@ -1374,6 +1383,10 @@ DECLARE_REFLECTION_ENUM(VkLogicOp);
DECLARE_REFLECTION_ENUM(VkMemoryAllocateFlagBits);
DECLARE_REFLECTION_ENUM(VkMemoryHeapFlagBits);
DECLARE_REFLECTION_ENUM(VkMemoryPropertyFlagBits);
DECLARE_REFLECTION_ENUM(VkPerformanceCounterDescriptionFlagBitsKHR);
DECLARE_REFLECTION_ENUM(VkPerformanceCounterScopeKHR);
DECLARE_REFLECTION_ENUM(VkPerformanceCounterStorageKHR);
DECLARE_REFLECTION_ENUM(VkPerformanceCounterUnitKHR);
DECLARE_REFLECTION_ENUM(VkPhysicalDeviceType);
DECLARE_REFLECTION_ENUM(VkPipelineBindPoint);
DECLARE_REFLECTION_ENUM(VkPipelineCreateFlagBits);
+7 -3
View File
@@ -130,6 +130,7 @@ WrappedVulkan::WrappedVulkan() : m_RenderState(this, &m_CreationInfo)
m_LastEventID = ~0U;
m_DrawcallCallback = NULL;
m_SubmitChain = NULL;
m_CurChunkOffset = 0;
m_AddedDrawcall = false;
@@ -267,7 +268,7 @@ void WrappedVulkan::SubmitCmds(VkSemaphore *unwrappedWaitSemaphores,
VkSubmitInfo submitInfo = {
VK_STRUCTURE_TYPE_SUBMIT_INFO,
NULL,
m_SubmitChain,
waitSemaphoreCount,
unwrappedWaitSemaphores,
waitStageMask,
@@ -395,7 +396,7 @@ void WrappedVulkan::SubmitAndFlushExtQueue(uint32_t queueFamilyIdx)
VkSubmitInfo submitInfo = {
VK_STRUCTURE_TYPE_SUBMIT_INFO,
NULL,
m_SubmitChain,
0,
NULL,
NULL, // wait semaphores
@@ -955,6 +956,9 @@ static const VkExtensionProperties supportedExtensions[] = {
{
VK_KHR_MULTIVIEW_EXTENSION_NAME, VK_KHR_MULTIVIEW_SPEC_VERSION,
},
{
VK_KHR_PERFORMANCE_QUERY_EXTENSION_NAME, VK_KHR_PERFORMANCE_QUERY_SPEC_VERSION,
},
{
VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_EXTENSION_NAME,
VK_KHR_PIPELINE_EXECUTABLE_PROPERTIES_SPEC_VERSION,
@@ -2417,7 +2421,7 @@ ReplayStatus WrappedVulkan::ContextReplayLog(CaptureState readType, uint32_t sta
{
VkSubmitInfo submitInfo = {
VK_STRUCTURE_TYPE_SUBMIT_INFO,
NULL,
m_SubmitChain,
0,
NULL,
NULL, // wait semaphores
+19
View File
@@ -328,6 +328,7 @@ private:
Threading::RWLock m_CapTransitionLock;
VulkanDrawcallCallback *m_DrawcallCallback;
void *m_SubmitChain;
SDFile *m_StructuredFile;
SDFile m_StoredStructuredData;
@@ -377,6 +378,8 @@ private:
VkFormatProperties fmtprops[VK_FORMAT_RANGE_SIZE] = {};
VkDriverInfo driverInfo = VkDriverInfo(props);
VkPhysicalDevicePerformanceQueryFeaturesKHR performanceQueryFeatures = {};
uint32_t queueCount = 0;
VkQueueFamilyProperties queueProps[16] = {};
};
@@ -1008,6 +1011,7 @@ public:
VulkanRenderState &GetRenderState() { return m_RenderState; }
void SetDrawcallCB(VulkanDrawcallCallback *cb) { m_DrawcallCallback = cb; }
void SetSubmitChain(void *submitChain) { m_SubmitChain = submitChain; }
static bool IsSupportedExtension(const char *extName);
static void FilterToSupportedExtensions(std::vector<VkExtensionProperties> &exts,
std::vector<VkExtensionProperties> &filtered);
@@ -1024,6 +1028,10 @@ public:
const VkPhysicalDeviceFeatures &GetDeviceFeatures() { return m_PhysicalDeviceData.features; }
const VkPhysicalDeviceProperties &GetDeviceProps() { return m_PhysicalDeviceData.props; }
const VkPhysicalDevicePerformanceQueryFeaturesKHR &GetPhysicalDevicePerformanceQueryFeatures()
{
return m_PhysicalDeviceData.performanceQueryFeatures;
}
VkDriverInfo GetDriverInfo() { return m_PhysicalDeviceData.driverInfo; }
// Device initialization
@@ -2184,4 +2192,15 @@ public:
IMPLEMENT_FUNCTION_SERIALISED(VkResult, vkSignalSemaphoreKHR, VkDevice device,
const VkSemaphoreSignalInfoKHR *pSignalInfo);
// VK_KHR_performance_query
VkResult vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, uint32_t *pCounterCount,
VkPerformanceCounterKHR *pCounters, VkPerformanceCounterDescriptionKHR *pCounterDescriptions);
void vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(
VkPhysicalDevice physicalDevice,
const VkQueryPoolPerformanceCreateInfoKHR *pPerformanceQueryCreateInfo, uint32_t *pNumPasses);
VkResult vkAcquireProfilingLockKHR(VkDevice device, const VkAcquireProfilingLockInfoKHR *pInfo);
void vkReleaseProfilingLockKHR(VkDevice device);
};
+330 -2
View File
@@ -30,6 +30,113 @@
#include "driver/ihv/amd/amd_counters.h"
#include "driver/ihv/amd/official/GPUPerfAPI/Include/GPUPerfAPI-VK.h"
#include "strings/string_utils.h"
static uint32_t FromKHRCounter(GPUCounter counterID)
{
return (uint32_t)counterID - (uint32_t)GPUCounter::FirstVulkanExtended;
}
static GPUCounter ToKHRCounter(uint32_t idx)
{
return (GPUCounter)((uint32_t)GPUCounter::FirstVulkanExtended + idx);
}
static void GetKHRUnitDescription(const VkPerformanceCounterUnitKHR khrUnit, CounterUnit &unit,
CompType &type, uint32_t &byteWidth)
{
switch(khrUnit)
{
case VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR:
unit = CounterUnit::Absolute;
type = CompType::UInt;
byteWidth = 8;
return;
case VK_PERFORMANCE_COUNTER_UNIT_PERCENTAGE_KHR:
unit = CounterUnit::Percentage;
type = CompType::Double;
byteWidth = 8;
return;
case VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR:
unit = CounterUnit::Seconds;
type = CompType::Double;
byteWidth = 8;
return;
case VK_PERFORMANCE_COUNTER_UNIT_BYTES_KHR:
unit = CounterUnit::Bytes;
type = CompType::UInt;
byteWidth = 8;
return;
case VK_PERFORMANCE_COUNTER_UNIT_BYTES_PER_SECOND_KHR:
unit = CounterUnit::Ratio;
type = CompType::Double;
byteWidth = 8;
return;
case VK_PERFORMANCE_COUNTER_UNIT_KELVIN_KHR:
unit = CounterUnit::Absolute;
type = CompType::UInt;
byteWidth = 8;
return;
case VK_PERFORMANCE_COUNTER_UNIT_WATTS_KHR:
unit = CounterUnit::Absolute;
type = CompType::UInt;
byteWidth = 8;
return;
case VK_PERFORMANCE_COUNTER_UNIT_VOLTS_KHR:
unit = CounterUnit::Absolute;
type = CompType::UInt;
byteWidth = 8;
return;
case VK_PERFORMANCE_COUNTER_UNIT_AMPS_KHR:
unit = CounterUnit::Absolute;
type = CompType::UInt;
byteWidth = 8;
return;
case VK_PERFORMANCE_COUNTER_UNIT_HERTZ_KHR:
unit = CounterUnit::Absolute;
type = CompType::UInt;
byteWidth = 8;
return;
case VK_PERFORMANCE_COUNTER_UNIT_CYCLES_KHR:
unit = CounterUnit::Cycles;
type = CompType::UInt;
byteWidth = 8;
return;
default: RDCERR("Invalid performance counter unit %d", khrUnit);
}
}
void VulkanReplay::convertKhrCounterResult(CounterResult &rdcResult,
const VkPerformanceCounterResultKHR &khrResult,
VkPerformanceCounterUnitKHR khrUnit,
VkPerformanceCounterStorageKHR khrStorage)
{
CounterUnit unit;
CompType type;
uint32_t byteWidth;
GetKHRUnitDescription(khrUnit, unit, type, byteWidth);
double value;
// Convert everything to doubles.
switch(khrStorage)
{
case VK_PERFORMANCE_COUNTER_STORAGE_INT32_KHR: rdcResult.value.u64 = khrResult.int32; break;
case VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR: rdcResult.value.u64 = khrResult.uint32; break;
case VK_PERFORMANCE_COUNTER_STORAGE_INT64_KHR: rdcResult.value.u64 = khrResult.int64; break;
case VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR: rdcResult.value.u64 = khrResult.uint64; break;
case VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32_KHR: rdcResult.value.d = khrResult.float32; break;
case VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR: rdcResult.value.d = khrResult.float64; break;
default: value = 0; RDCERR("Wrong counter storage type %d", khrStorage);
}
// Special case for time units, renderdoc only has a Seconds type.
if(khrUnit == VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR)
{
RDCASSERT(type == CompType::Double);
rdcResult.value.d /= 1000.0 * 1000.0 * 1000.0;
}
}
std::vector<GPUCounter> VulkanReplay::EnumerateCounters()
{
@@ -60,6 +167,23 @@ std::vector<GPUCounter> VulkanReplay::EnumerateCounters()
ret.push_back(GPUCounter::CSInvocations);
}
if(m_pDriver->GetPhysicalDevicePerformanceQueryFeatures().performanceCounterQueryPools)
{
VkPhysicalDevice physDev = m_pDriver->GetPhysDev();
uint32_t khrCounters = 0;
ObjDisp(physDev)->EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
Unwrap(physDev), 0, &khrCounters, NULL, NULL);
m_KHRCounters.resize(khrCounters);
m_KHRCountersDescriptions.resize(khrCounters);
ObjDisp(physDev)->EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
Unwrap(physDev), 0, &khrCounters, &m_KHRCounters[0], &m_KHRCountersDescriptions[0]);
for(uint32_t c = 0; c < khrCounters; c++)
ret.push_back(ToKHRCounter(c));
}
if(m_pAMDCounters)
{
std::vector<GPUCounter> amdCounters = m_pAMDCounters->GetPublicCounterIds();
@@ -85,6 +209,29 @@ CounterDescription VulkanReplay::DescribeCounter(GPUCounter counterID)
}
}
if(IsVulkanExtendedCounter(counterID))
{
const VkPerformanceCounterKHR &khrCounter = m_KHRCounters[FromKHRCounter(counterID)];
const VkPerformanceCounterDescriptionKHR &khrCounterDesc =
m_KHRCountersDescriptions[FromKHRCounter(counterID)];
CounterDescription rdcDesc;
rdcDesc.counter = counterID;
rdcDesc.name = khrCounterDesc.name;
rdcDesc.category = khrCounterDesc.category;
rdcDesc.description = khrCounterDesc.description;
const uint32_t *uuid_dwords = (const uint32_t *)khrCounter.uuid;
desc.uuid.words[0] = uuid_dwords[0];
desc.uuid.words[1] = uuid_dwords[1];
desc.uuid.words[2] = uuid_dwords[2];
desc.uuid.words[3] = uuid_dwords[3];
GetKHRUnitDescription(khrCounter.unit, rdcDesc.unit, rdcDesc.resultType, rdcDesc.resultByteWidth);
return rdcDesc;
}
// 6839CB5B-FBD2-4550-B606-8C65157C684C
desc.uuid.words[0] = 0x6839CB5B;
desc.uuid.words[1] = 0xFBD24550;
@@ -376,6 +523,183 @@ std::vector<CounterResult> VulkanReplay::FetchCountersAMD(const std::vector<GPUC
return ret;
}
struct VulkanKHRCallback : public VulkanDrawcallCallback
{
VulkanKHRCallback(WrappedVulkan *vk, VulkanReplay *rp, VkQueryPool qp)
: m_pDriver(vk), m_pReplay(rp), m_QueryPool(qp)
{
m_pDriver->SetDrawcallCB(this);
}
~VulkanKHRCallback() { m_pDriver->SetDrawcallCB(NULL); }
void PreDraw(uint32_t eid, VkCommandBuffer cmd) override
{
ObjDisp(cmd)->CmdBeginQuery(Unwrap(cmd), m_QueryPool, (uint32_t)m_Results.size(), 0);
}
bool PostDraw(uint32_t eid, VkCommandBuffer cmd) override
{
ObjDisp(cmd)->CmdEndQuery(Unwrap(cmd), m_QueryPool, (uint32_t)m_Results.size());
m_Results.push_back(eid);
return false;
}
void PostRedraw(uint32_t eid, VkCommandBuffer cmd) override {}
// we don't need to distinguish, call the Draw functions
void PreDispatch(uint32_t eid, VkCommandBuffer cmd) override { PreDraw(eid, cmd); }
bool PostDispatch(uint32_t eid, VkCommandBuffer cmd) override { return PostDraw(eid, cmd); }
void PostRedispatch(uint32_t eid, VkCommandBuffer cmd) override { PostRedraw(eid, cmd); }
void PreMisc(uint32_t eid, DrawFlags flags, VkCommandBuffer cmd) override { PreDraw(eid, cmd); }
bool PostMisc(uint32_t eid, DrawFlags flags, VkCommandBuffer cmd) override
{
return PostDraw(eid, cmd);
}
void PostRemisc(uint32_t eid, DrawFlags flags, VkCommandBuffer cmd) override
{
PostRedraw(eid, cmd);
}
void AliasEvent(uint32_t primary, uint32_t alias) override
{
m_AliasEvents.push_back(std::make_pair(primary, alias));
}
void PreEndCommandBuffer(VkCommandBuffer cmd) override {}
WrappedVulkan *m_pDriver;
VulkanReplay *m_pReplay;
VkQueryPool m_QueryPool;
std::vector<uint32_t> m_Results;
// events which are the 'same' from being the same command buffer resubmitted
// multiple times in the frame. We will only get the full callback when we're
// recording the command buffer, and will be given the first EID. After that
// we'll just be told which other EIDs alias this event.
std::vector<std::pair<uint32_t, uint32_t> > m_AliasEvents;
};
std::vector<CounterResult> VulkanReplay::FetchCountersKHR(const std::vector<GPUCounter> &counters)
{
std::vector<uint32_t> counterIndices;
for(const GPUCounter &c : counters)
counterIndices.push_back(FromKHRCounter(c));
VkQueryPoolPerformanceCreateInfoKHR perfCreateInfo = {
VK_STRUCTURE_TYPE_QUERY_POOL_PERFORMANCE_CREATE_INFO_KHR, NULL, 0,
(uint32_t)counterIndices.size(), &counterIndices[0]};
uint32_t passCount = 0;
ObjDisp(m_pDriver->GetInstance())
->GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(Unwrap(m_pDriver->GetPhysDev()),
&perfCreateInfo, &passCount);
VkDevice dev = m_pDriver->GetDev();
VkAcquireProfilingLockInfoKHR acquireLockInfo = {
VK_STRUCTURE_TYPE_ACQUIRE_PROFILING_LOCK_INFO_KHR, NULL, 0, 50 * 1000 * 1000 /* 50ms */};
VkResult vkr = ObjDisp(dev)->AcquireProfilingLockKHR(Unwrap(dev), &acquireLockInfo);
if(vkr != VK_SUCCESS)
{
RDCWARN("Unable to acquire profiling lock: %s", ToStr(vkr).c_str());
return std::vector<CounterResult>();
}
uint32_t maxEID = m_pDriver->GetMaxEID();
VkQueryPoolCreateInfo queryPoolCreateInfo = {
VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, &perfCreateInfo, 0,
VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR, maxEID, 0};
VkQueryPool queryPool;
vkr = ObjDisp(dev)->CreateQueryPool(Unwrap(dev), &queryPoolCreateInfo, NULL, &queryPool);
RDCASSERTEQUAL(vkr, VK_SUCCESS);
// Reset query pool
VkCommandBuffer cmd = m_pDriver->GetNextCmd();
VkCommandBufferBeginInfo beginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, NULL,
VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT};
vkr = ObjDisp(dev)->BeginCommandBuffer(Unwrap(cmd), &beginInfo);
RDCASSERTEQUAL(vkr, VK_SUCCESS);
ObjDisp(dev)->CmdResetQueryPool(Unwrap(cmd), queryPool, 0, maxEID);
vkr = ObjDisp(dev)->EndCommandBuffer(Unwrap(cmd));
RDCASSERTEQUAL(vkr, VK_SUCCESS);
m_pDriver->SubmitCmds();
VulkanKHRCallback cb(m_pDriver, this, queryPool);
// replay the events to perform all the queries
for(uint32_t i = 0; i < passCount; i++)
{
VkPerformanceQuerySubmitInfoKHR perfSubmitInfo = {
VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR, NULL, i};
cb.m_Results.clear();
m_pDriver->SetSubmitChain(&perfSubmitInfo);
m_pDriver->ReplayLog(0, maxEID, eReplay_Full);
m_pDriver->SetSubmitChain(NULL);
}
std::vector<VkPerformanceCounterResultKHR> perfResults;
perfResults.resize(cb.m_Results.size() * counters.size());
vkr = ObjDisp(dev)->GetQueryPoolResults(
Unwrap(dev), queryPool, 0, (uint32_t)cb.m_Results.size(),
sizeof(VkPerformanceCounterResultKHR) * perfResults.size(), &perfResults[0],
sizeof(VkPerformanceCounterResultKHR) * counters.size(), VK_QUERY_RESULT_WAIT_BIT);
RDCASSERTEQUAL(vkr, VK_SUCCESS);
ObjDisp(dev)->DestroyQueryPool(Unwrap(dev), queryPool, NULL);
ObjDisp(dev)->ReleaseProfilingLockKHR(Unwrap(dev));
std::vector<CounterResult> ret;
for(size_t i = 0; i < cb.m_Results.size(); i++)
{
for(size_t c = 0; c < counters.size(); c++)
{
CounterResult result;
result.eventId = cb.m_Results[i];
result.counter = counters[c];
const VkPerformanceCounterKHR &khrCounter = m_KHRCounters[counterIndices[c]];
convertKhrCounterResult(result, perfResults[counters.size() * i + c], khrCounter.unit,
khrCounter.storage);
ret.push_back(result);
}
}
for(size_t i = 0; i < cb.m_AliasEvents.size(); i++)
{
for(size_t c = 0; c < counters.size(); c++)
{
CounterResult search;
search.counter = counters[c];
search.eventId = cb.m_AliasEvents[i].first;
// find the result we're aliasing
auto it = std::find(ret.begin(), ret.end(), search);
if(it != ret.end())
{
// duplicate the result and append
CounterResult aliased = *it;
aliased.eventId = cb.m_AliasEvents[i].second;
ret.push_back(aliased);
}
else
{
RDCERR("Expected to find alias-target result for EID %u counter %u, but didn't",
search.eventId, search.counter);
}
}
}
// sort so that the alias results appear in the right places
std::sort(ret.begin(), ret.end());
return ret;
}
struct VulkanGPUTimerCallback : public VulkanDrawcallCallback
{
VulkanGPUTimerCallback(WrappedVulkan *vk, VulkanReplay *rp, VkQueryPool tsqp, VkQueryPool occqp,
@@ -468,9 +792,13 @@ std::vector<CounterResult> VulkanReplay::FetchCounters(const std::vector<GPUCoun
}
}
if(vkCounters.empty())
std::vector<GPUCounter> vkKHRCounters;
std::copy_if(counters.begin(), counters.end(), std::back_inserter(vkKHRCounters),
[](const GPUCounter &c) { return IsVulkanExtendedCounter(c); });
if(!vkKHRCounters.empty())
{
return ret;
std::vector<CounterResult> khrResults = FetchCountersKHR(vkKHRCounters);
ret.insert(ret.end(), khrResults.begin(), khrResults.end());
}
VkPhysicalDeviceFeatures availableFeatures = m_pDriver->GetDeviceFeatures();
+21 -3
View File
@@ -452,7 +452,8 @@
DeclExt(AMD_negative_viewport_height); \
DeclExt(EXT_line_rasterization); \
DeclExt(GOOGLE_display_timing); \
DeclExt(KHR_timeline_semaphore);
DeclExt(KHR_timeline_semaphore); \
DeclExt(KHR_performance_query);
// for simplicity and since the check itself is platform agnostic,
// these aren't protected in platform defines
@@ -484,7 +485,8 @@
CheckExt(EXT_full_screen_exclusive, VKXX); \
CheckExt(EXT_headless_surface, VKXX); \
CheckExt(EXT_metal_surface, VKXX); \
CheckExt(KHR_wayland_surface, VKXX);
CheckExt(KHR_wayland_surface, VKXX); \
CheckExt(KHR_performance_query, VKXX);
#define CheckDeviceExts() \
CheckExt(EXT_debug_marker, VKXX); \
@@ -538,7 +540,8 @@
CheckExt(AMD_negative_viewport_height, VKXX); \
CheckExt(EXT_line_rasterization, VKXX); \
CheckExt(GOOGLE_display_timing, VKXX); \
CheckExt(KHR_timeline_semaphore, VKXX);
CheckExt(KHR_timeline_semaphore, VKXX); \
CheckExt(KHR_performance_query, VKXX);
#define HookInitVulkanInstanceExts() \
HookInitExtension(KHR_surface, DestroySurfaceKHR); \
@@ -595,6 +598,9 @@
HookInitExtension(EXT_sample_locations, GetPhysicalDeviceMultisamplePropertiesEXT); \
HookInitExtension(EXT_calibrated_timestamps, GetPhysicalDeviceCalibrateableTimeDomainsEXT); \
HookInitExtension(EXT_headless_surface, CreateHeadlessSurfaceEXT); \
HookInitExtension(KHR_performance_query, \
EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR); \
HookInitExtension(KHR_performance_query, GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR); \
HookInitInstance_PlatformSpecific()
#define HookInitVulkanDeviceExts() \
@@ -687,6 +693,8 @@
HookInitExtension(KHR_timeline_semaphore, GetSemaphoreCounterValueKHR); \
HookInitExtension(KHR_timeline_semaphore, WaitSemaphoresKHR); \
HookInitExtension(KHR_timeline_semaphore, SignalSemaphoreKHR); \
HookInitExtension(KHR_performance_query, AcquireProfilingLockKHR); \
HookInitExtension(KHR_performance_query, ReleaseProfilingLockKHR); \
HookInitDevice_PlatformSpecific()
#define DefineHooks() \
@@ -1289,4 +1297,14 @@
pWaitInfo, uint64_t, timeout); \
HookDefine2(VkResult, vkSignalSemaphoreKHR, VkDevice, device, const VkSemaphoreSignalInfoKHR *, \
pSignalInfo); \
HookDefine5(VkResult, vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR, \
VkPhysicalDevice, physicalDevice, uint32_t, queueFamilyIndex, uint32_t *, \
pCounterCount, VkPerformanceCounterKHR *, pCounters, \
VkPerformanceCounterDescriptionKHR *, pCounterDescriptions); \
HookDefine3(void, vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR, VkPhysicalDevice, \
physicalDevice, const VkQueryPoolPerformanceCreateInfoKHR *, \
pPerformanceQueryCreateInfo, uint32_t *, pNumPasses); \
HookDefine2(VkResult, vkAcquireProfilingLockKHR, VkDevice, device, \
const VkAcquireProfilingLockInfoKHR *, pInfo); \
HookDefine1(void, vkReleaseProfilingLockKHR, VkDevice, device); \
HookDefine_PlatformSpecific()
+10
View File
@@ -727,4 +727,14 @@ private:
AMDRGPControl *m_RGP = NULL;
VulkanAMDDrawCallback *m_pAMDDrawCallback = NULL;
std::vector<CounterResult> FetchCountersKHR(const std::vector<GPUCounter> &counters);
std::vector<VkPerformanceCounterKHR> m_KHRCounters;
std::vector<VkPerformanceCounterDescriptionKHR> m_KHRCountersDescriptions;
void convertKhrCounterResult(CounterResult &rdcResult,
const VkPerformanceCounterResultKHR &khrResult,
VkPerformanceCounterUnitKHR khrUnit,
VkPerformanceCounterStorageKHR khrStorage);
};
@@ -1589,6 +1589,15 @@ bool WrappedVulkan::Serialise_vkCreateDevice(SerialiserType &ser, VkPhysicalDevi
"bindless shader access will use less reliable fallback");
}
bool perfQuery = false;
if(supportedExtensions.find(VK_KHR_PERFORMANCE_QUERY_EXTENSION_NAME) != supportedExtensions.end())
{
perfQuery = true;
Extensions.push_back(VK_KHR_PERFORMANCE_QUERY_EXTENSION_NAME);
RDCLOG("Enabling VK_KHR_performance_query");
}
VkDevice device;
uint32_t qCount = 0;
@@ -2538,6 +2547,48 @@ bool WrappedVulkan::Serialise_vkCreateDevice(SerialiserType &ser, VkPhysicalDevi
}
}
VkPhysicalDevicePerformanceQueryFeaturesKHR perfFeatures = {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR,
};
if(perfQuery)
{
VkPhysicalDeviceFeatures2 availBase = {VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2};
m_PhysicalDeviceData.performanceQueryFeatures.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR;
availBase.pNext = &perfFeatures;
ObjDisp(physicalDevice)->GetPhysicalDeviceFeatures2(Unwrap(physicalDevice), &availBase);
m_PhysicalDeviceData.performanceQueryFeatures = perfFeatures;
if(perfFeatures.performanceCounterQueryPools)
{
VkPhysicalDevicePerformanceQueryFeaturesKHR *existing =
(VkPhysicalDevicePerformanceQueryFeaturesKHR *)FindNextStruct(
&createInfo, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR);
if(existing)
{
existing->performanceCounterQueryPools = VK_TRUE;
}
else
{
perfFeatures.performanceCounterQueryPools = VK_TRUE;
perfFeatures.performanceCounterMultipleQueryPools = VK_FALSE;
perfFeatures.pNext = (void *)createInfo.pNext;
createInfo.pNext = &perfFeatures;
}
}
else
{
auto it =
std::find(Extensions.begin(), Extensions.end(), VK_KHR_PERFORMANCE_QUERY_EXTENSION_NAME);
RDCASSERT(it != Extensions.end());
Extensions.erase(it);
}
}
std::vector<const char *> layerArray(Layers.size());
for(size_t i = 0; i < Layers.size(); i++)
layerArray[i] = Layers[i].c_str();
@@ -2580,6 +2631,7 @@ bool WrappedVulkan::Serialise_vkCreateDevice(SerialiserType &ser, VkPhysicalDevi
CheckDeviceExts();
}
InitInstanceExtensionTables(m_Instance, &m_EnabledExtensions);
InitDeviceExtensionTables(device, &m_EnabledExtensions);
RDCASSERT(m_Device == VK_NULL_HANDLE); // MULTIDEVICE
@@ -2073,3 +2073,32 @@ VkResult WrappedVulkan::vkGetPastPresentationTimingGOOGLE(
return ObjDisp(device)->GetPastPresentationTimingGOOGLE(
Unwrap(device), Unwrap(swapchain), pPresentationTimingCount, pPresentationTimings);
}
VkResult WrappedVulkan::vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex, uint32_t *pCounterCount,
VkPerformanceCounterKHR *pCounters, VkPerformanceCounterDescriptionKHR *pCounterDescriptions)
{
return ObjDisp(physicalDevice)
->EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(
Unwrap(physicalDevice), queueFamilyIndex, pCounterCount, pCounters, pCounterDescriptions);
}
void WrappedVulkan::vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(
VkPhysicalDevice physicalDevice,
const VkQueryPoolPerformanceCreateInfoKHR *pPerformanceQueryCreateInfo, uint32_t *pNumPasses)
{
ObjDisp(physicalDevice)
->GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(
Unwrap(physicalDevice), pPerformanceQueryCreateInfo, pNumPasses);
}
VkResult WrappedVulkan::vkAcquireProfilingLockKHR(VkDevice device,
const VkAcquireProfilingLockInfoKHR *pInfo)
{
return ObjDisp(device)->AcquireProfilingLockKHR(Unwrap(device), pInfo);
}
void WrappedVulkan::vkReleaseProfilingLockKHR(VkDevice device)
{
ObjDisp(device)->ReleaseProfilingLockKHR(Unwrap(device));
}
@@ -169,6 +169,13 @@ void WrappedVulkan::vkGetDeviceQueue(VkDevice device, uint32_t queueFamilyIndex,
}
}
static void appendChain(VkBaseInStructure *chain, void *item)
{
while(chain->pNext != NULL)
chain = (VkBaseInStructure *)chain->pNext;
chain->pNext = (VkBaseInStructure *)item;
}
template <typename SerialiserType>
bool WrappedVulkan::Serialise_vkQueueSubmit(SerialiserType &ser, VkQueue queue, uint32_t submitCount,
const VkSubmitInfo *pSubmits, VkFence fence)
@@ -242,6 +249,7 @@ bool WrappedVulkan::Serialise_vkQueueSubmit(SerialiserType &ser, VkQueue queue,
tempMem += unwrapped.commandBufferCount * sizeof(VkCommandBuffer);
UnwrapNextChain(m_State, "VkSubmitInfo", tempMem, (VkBaseInStructure *)&unwrapped);
appendChain((VkBaseInStructure *)&unwrapped, m_SubmitChain);
ObjDisp(queue)->QueueSubmit(Unwrap(queue), 1, &unwrapped, VK_NULL_HANDLE);
@@ -411,6 +419,7 @@ bool WrappedVulkan::Serialise_vkQueueSubmit(SerialiserType &ser, VkQueue queue,
byte *tempMem = GetTempMemory(GetNextPatchSize(rerecordedSubmit.pNext));
UnwrapNextChain(m_State, "VkSubmitInfo", tempMem, (VkBaseInStructure *)&rerecordedSubmit);
appendChain((VkBaseInStructure *)&rerecordedSubmit, m_SubmitChain);
rerecordedSubmit.commandBufferCount = (uint32_t)rerecordedCmds.size();
rerecordedSubmit.pCommandBuffers = &rerecordedCmds[0];
@@ -785,6 +794,7 @@ VkResult WrappedVulkan::vkQueueSubmit(VkQueue queue, uint32_t submitCount,
unwrappedSignalSems[o] = Unwrap(pSubmits[i].pSignalSemaphores[o]);
UnwrapNextChain(m_State, "VkSubmitInfo", memory, (VkBaseInStructure *)&unwrappedSubmits[i]);
appendChain((VkBaseInStructure *)&unwrappedSubmits[i], m_SubmitChain);
}
VkResult ret;