Implemented Vulkan counters

Implemented support for Vulkan counters in FetchCounters based on the
vulkan Pipeline Statistics query and the precise Occlusion query.
This commit is contained in:
Victor Moya
2017-03-13 10:21:48 +01:00
committed by Baldur Karlsson
parent e6d2f1b7a6
commit 0964034b4b
3 changed files with 279 additions and 53 deletions
+1 -1
View File
@@ -367,7 +367,6 @@ private:
vector<VkDeviceMemory> m_CleanupMems;
vector<VkEvent> m_CleanupEvents;
const VkPhysicalDeviceFeatures &GetDeviceFeatures() { return m_PhysicalDeviceData.features; }
const VkPhysicalDeviceProperties &GetDeviceProps() { return m_PhysicalDeviceData.props; }
VkDriverInfo GetDriverVersion() { return VkDriverInfo(m_PhysicalDeviceData.props); }
const VkFormatProperties &GetFormatProperties(VkFormat f)
@@ -740,6 +739,7 @@ public:
static VkResult GetProvidedExtensionProperties(uint32_t *pPropertyCount,
VkExtensionProperties *pProperties);
const VkPhysicalDeviceFeatures &GetDeviceFeatures() { return m_PhysicalDeviceData.features; }
// Device initialization
IMPLEMENT_FUNCTION_SERIALISED(VkResult, vkCreateInstance, const VkInstanceCreateInfo *pCreateInfo,
+268 -52
View File
@@ -46,7 +46,30 @@ vector<uint32_t> VulkanReplay::EnumerateCounters()
{
vector<uint32_t> ret;
VkPhysicalDeviceFeatures availableFeatures = m_pDriver->GetDeviceFeatures();
ret.push_back(eCounter_EventGPUDuration);
if(availableFeatures.pipelineStatisticsQuery)
{
ret.push_back(eCounter_InputVerticesRead);
ret.push_back(eCounter_IAPrimitives);
ret.push_back(eCounter_GSPrimitives);
ret.push_back(eCounter_RasterizerInvocations);
ret.push_back(eCounter_RasterizedPrimitives);
}
if(availableFeatures.occlusionQueryPrecise)
ret.push_back(eCounter_SamplesWritten);
if(availableFeatures.pipelineStatisticsQuery)
{
ret.push_back(eCounter_VSInvocations);
ret.push_back(eCounter_TCSInvocations);
ret.push_back(eCounter_TESInvocations);
ret.push_back(eCounter_GSInvocations);
ret.push_back(eCounter_PSInvocations);
ret.push_back(eCounter_CSInvocations);
}
return ret;
}
@@ -55,43 +78,142 @@ void VulkanReplay::DescribeCounter(uint32_t counterID, CounterDescription &desc)
{
desc.counterID = counterID;
if(counterID == eCounter_EventGPUDuration)
switch(counterID)
{
desc.name = "GPU Duration";
desc.description =
"Time taken for this event on the GPU, as measured by delta between two GPU timestamps, "
"top to bottom of the pipe.";
desc.resultByteWidth = 8;
desc.resultCompType = eCompType_Double;
desc.units = eUnits_Seconds;
}
else
{
desc.name = "Unknown";
desc.description = "Unknown counter ID";
desc.resultByteWidth = 0;
desc.resultCompType = eCompType_None;
desc.units = eUnits_Absolute;
case eCounter_EventGPUDuration:
desc.name = "GPU Duration";
desc.description =
"Time taken for this event on the GPU, as measured by delta between two GPU timestamps.";
desc.resultByteWidth = 8;
desc.resultCompType = eCompType_Double;
desc.units = eUnits_Seconds;
break;
case eCounter_InputVerticesRead:
desc.name = "Input Vertices Read";
desc.description = "Number of vertices read by input assembler.";
desc.resultByteWidth = 8;
desc.resultCompType = eCompType_UInt;
desc.units = eUnits_Absolute;
break;
case eCounter_IAPrimitives:
desc.name = "Input Primitives";
desc.description = "Number of primitives read by the input assembler.";
desc.resultByteWidth = 8;
desc.resultCompType = eCompType_UInt;
desc.units = eUnits_Absolute;
break;
case eCounter_GSPrimitives:
desc.name = "GS Primitives";
desc.description = "Number of primitives output by a geometry shader.";
desc.resultByteWidth = 8;
desc.resultCompType = eCompType_UInt;
desc.units = eUnits_Absolute;
break;
case eCounter_RasterizerInvocations:
desc.name = "Rasterizer Invocations";
desc.description = "Number of primitives that were sent to the rasterizer.";
desc.resultByteWidth = 8;
desc.resultCompType = eCompType_UInt;
desc.units = eUnits_Absolute;
break;
case eCounter_RasterizedPrimitives:
desc.name = "Rasterized Primitives";
desc.description = "Number of primitives that were rendered.";
desc.resultByteWidth = 8;
desc.resultCompType = eCompType_UInt;
desc.units = eUnits_Absolute;
break;
case eCounter_SamplesWritten:
desc.name = "Samples Written";
desc.description = "Number of samples that passed depth/stencil test.";
desc.resultByteWidth = 8;
desc.resultCompType = eCompType_UInt;
desc.units = eUnits_Absolute;
break;
case eCounter_VSInvocations:
desc.name = "VS Invocations";
desc.description = "Number of times a vertex shader was invoked.";
desc.resultByteWidth = 8;
desc.resultCompType = eCompType_UInt;
desc.units = eUnits_Absolute;
break;
case eCounter_GSInvocations:
desc.name = "GS Invocations";
desc.description = "Number of times a geometry shader was invoked.";
desc.resultByteWidth = 8;
desc.resultCompType = eCompType_UInt;
desc.units = eUnits_Absolute;
break;
case eCounter_TCSInvocations:
desc.name = "TCS Invocations";
desc.description = "Number of times a tesselation control shader was invoked.";
desc.resultByteWidth = 8;
desc.resultCompType = eCompType_UInt;
desc.units = eUnits_Absolute;
break;
case eCounter_TESInvocations:
desc.name = "TES Invocations";
desc.description = "Number of times a tesselation evaluation shader was invoked.";
desc.resultByteWidth = 8;
desc.resultCompType = eCompType_UInt;
desc.units = eUnits_Absolute;
break;
case eCounter_PSInvocations:
desc.name = "PS Invocations";
desc.description = "Number of times a pixel shader was invoked.";
desc.resultByteWidth = 8;
desc.resultCompType = eCompType_UInt;
desc.units = eUnits_Absolute;
break;
case eCounter_CSInvocations:
desc.name = "CS Invocations";
desc.description = "Number of times a compute shader was invoked.";
desc.resultByteWidth = 8;
desc.resultCompType = eCompType_UInt;
desc.units = eUnits_Absolute;
break;
default:
desc.name = "Unknown";
desc.description = "Unknown counter ID";
desc.resultByteWidth = 0;
desc.resultCompType = eCompType_None;
desc.units = eUnits_Absolute;
break;
}
}
struct VulkanGPUTimerCallback : public VulkanDrawcallCallback
{
VulkanGPUTimerCallback(WrappedVulkan *vk, VulkanReplay *rp, VkQueryPool qp)
: m_pDriver(vk), m_pReplay(rp), m_QueryPool(qp)
VulkanGPUTimerCallback(WrappedVulkan *vk, VulkanReplay *rp, VkQueryPool tsqp, VkQueryPool occqp,
VkQueryPool psqp)
: m_pDriver(vk),
m_pReplay(rp),
m_TimeStampQueryPool(tsqp),
m_OcclusionQueryPool(occqp),
m_PipeStatsQueryPool(psqp)
{
m_pDriver->SetDrawcallCB(this);
}
~VulkanGPUTimerCallback() { m_pDriver->SetDrawcallCB(NULL); }
void PreDraw(uint32_t eid, VkCommandBuffer cmd)
{
ObjDisp(cmd)->CmdWriteTimestamp(Unwrap(cmd), VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_QueryPool,
(uint32_t)(m_Results.size() * 2 + 0));
if(m_OcclusionQueryPool != VK_NULL_HANDLE)
ObjDisp(cmd)->CmdBeginQuery(Unwrap(cmd), m_OcclusionQueryPool, (uint32_t)m_Results.size(),
VK_QUERY_CONTROL_PRECISE_BIT);
if(m_PipeStatsQueryPool != VK_NULL_HANDLE)
ObjDisp(cmd)->CmdBeginQuery(Unwrap(cmd), m_PipeStatsQueryPool, (uint32_t)m_Results.size(), 0);
ObjDisp(cmd)->CmdWriteTimestamp(Unwrap(cmd), VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
m_TimeStampQueryPool, (uint32_t)(m_Results.size() * 2 + 0));
}
bool PostDraw(uint32_t eid, VkCommandBuffer cmd)
{
ObjDisp(cmd)->CmdWriteTimestamp(Unwrap(cmd), VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, m_QueryPool,
(uint32_t)(m_Results.size() * 2 + 1));
ObjDisp(cmd)->CmdWriteTimestamp(Unwrap(cmd), VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
m_TimeStampQueryPool, (uint32_t)(m_Results.size() * 2 + 1));
if(m_OcclusionQueryPool != VK_NULL_HANDLE)
ObjDisp(cmd)->CmdEndQuery(Unwrap(cmd), m_OcclusionQueryPool, (uint32_t)m_Results.size());
if(m_PipeStatsQueryPool != VK_NULL_HANDLE)
ObjDisp(cmd)->CmdEndQuery(Unwrap(cmd), m_PipeStatsQueryPool, (uint32_t)m_Results.size());
m_Results.push_back(eid);
return false;
}
@@ -115,7 +237,9 @@ struct VulkanGPUTimerCallback : public VulkanDrawcallCallback
WrappedVulkan *m_pDriver;
VulkanReplay *m_pReplay;
VkQueryPool m_QueryPool;
VkQueryPool m_TimeStampQueryPool;
VkQueryPool m_OcclusionQueryPool;
VkQueryPool m_PipeStatsQueryPool;
vector<uint32_t> m_Results;
// events which are the 'same' from being the same command buffer resubmitted
// multiple times in the frame. We will only get the full callback when we're
@@ -128,15 +252,52 @@ vector<CounterResult> VulkanReplay::FetchCounters(const vector<uint32_t> &counte
{
uint32_t maxEID = m_pDriver->GetMaxEID();
VkPhysicalDeviceFeatures availableFeatures = m_pDriver->GetDeviceFeatures();
VkDevice dev = m_pDriver->GetDev();
VkQueryPoolCreateInfo poolCreateInfo = {
VkQueryPoolCreateInfo timeStampPoolCreateInfo = {
VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, NULL, 0, VK_QUERY_TYPE_TIMESTAMP, maxEID * 2, 0};
VkQueryPool pool;
VkResult vkr = ObjDisp(dev)->CreateQueryPool(Unwrap(dev), &poolCreateInfo, NULL, &pool);
VkQueryPoolCreateInfo occlusionPoolCreateInfo = {
VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, NULL, 0, VK_QUERY_TYPE_OCCLUSION, maxEID, 0};
VkQueryPipelineStatisticFlags pipeStatsFlags =
VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT |
VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT |
VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT |
VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT |
VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT |
VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT |
VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT |
VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT |
VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT |
VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT |
VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT;
VkQueryPoolCreateInfo pipeStatsPoolCreateInfo = {
VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, NULL, 0,
VK_QUERY_TYPE_PIPELINE_STATISTICS, maxEID, pipeStatsFlags};
VkQueryPool timeStampPool;
VkResult vkr =
ObjDisp(dev)->CreateQueryPool(Unwrap(dev), &timeStampPoolCreateInfo, NULL, &timeStampPool);
RDCASSERTEQUAL(vkr, VK_SUCCESS);
VkQueryPool occlusionPool = NULL;
if(availableFeatures.occlusionQueryPrecise)
{
vkr = ObjDisp(dev)->CreateQueryPool(Unwrap(dev), &occlusionPoolCreateInfo, NULL, &occlusionPool);
RDCASSERTEQUAL(vkr, VK_SUCCESS);
}
VkQueryPool pipeStatsPool = NULL;
if(availableFeatures.pipelineStatisticsQuery)
{
vkr = ObjDisp(dev)->CreateQueryPool(Unwrap(dev), &pipeStatsPoolCreateInfo, NULL, &pipeStatsPool);
RDCASSERTEQUAL(vkr, VK_SUCCESS);
}
VkCommandBuffer cmd = m_pDriver->GetNextCmd();
VkCommandBufferBeginInfo beginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, NULL,
@@ -145,7 +306,11 @@ vector<CounterResult> VulkanReplay::FetchCounters(const vector<uint32_t> &counte
vkr = ObjDisp(dev)->BeginCommandBuffer(Unwrap(cmd), &beginInfo);
RDCASSERTEQUAL(vkr, VK_SUCCESS);
ObjDisp(dev)->CmdResetQueryPool(Unwrap(cmd), pool, 0, maxEID * 2);
ObjDisp(dev)->CmdResetQueryPool(Unwrap(cmd), timeStampPool, 0, maxEID * 2);
if(occlusionPool != VK_NULL_HANDLE)
ObjDisp(dev)->CmdResetQueryPool(Unwrap(cmd), occlusionPool, 0, maxEID);
if(pipeStatsPool != VK_NULL_HANDLE)
ObjDisp(dev)->CmdResetQueryPool(Unwrap(cmd), pipeStatsPool, 0, maxEID);
vkr = ObjDisp(dev)->EndCommandBuffer(Unwrap(cmd));
RDCASSERTEQUAL(vkr, VK_SUCCESS);
@@ -154,52 +319,103 @@ vector<CounterResult> VulkanReplay::FetchCounters(const vector<uint32_t> &counte
m_pDriver->SubmitCmds();
#endif
VulkanGPUTimerCallback cb(m_pDriver, this, pool);
VulkanGPUTimerCallback cb(m_pDriver, this, timeStampPool, occlusionPool, pipeStatsPool);
// replay the events to perform all the queries
m_pDriver->ReplayLog(0, maxEID, eReplay_Full);
vector<uint64_t> m_Data;
m_Data.resize(cb.m_Results.size() * 2);
vector<uint64_t> m_TimeStampData;
m_TimeStampData.resize(cb.m_Results.size() * 2);
vkr = ObjDisp(dev)->GetQueryPoolResults(
Unwrap(dev), pool, 0, (uint32_t)m_Data.size(), sizeof(uint64_t) * m_Data.size(), &m_Data[0],
sizeof(uint64_t), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
Unwrap(dev), timeStampPool, 0, (uint32_t)m_TimeStampData.size(),
sizeof(uint64_t) * m_TimeStampData.size(), &m_TimeStampData[0], sizeof(uint64_t),
VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
RDCASSERTEQUAL(vkr, VK_SUCCESS);
ObjDisp(dev)->DestroyQueryPool(Unwrap(dev), pool, NULL);
ObjDisp(dev)->DestroyQueryPool(Unwrap(dev), timeStampPool, NULL);
vector<uint64_t> m_OcclusionData;
m_OcclusionData.resize(cb.m_Results.size());
if(occlusionPool != VK_NULL_HANDLE)
{
vkr = ObjDisp(dev)->GetQueryPoolResults(
Unwrap(dev), occlusionPool, 0, (uint32_t)m_OcclusionData.size(),
sizeof(uint64_t) * m_OcclusionData.size(), &m_OcclusionData[0], sizeof(uint64_t),
VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
RDCASSERTEQUAL(vkr, VK_SUCCESS);
ObjDisp(dev)->DestroyQueryPool(Unwrap(dev), occlusionPool, NULL);
}
vector<uint64_t> m_PipeStatsData;
m_PipeStatsData.resize(cb.m_Results.size() * 11);
if(pipeStatsPool != VK_NULL_HANDLE)
{
vkr = ObjDisp(dev)->GetQueryPoolResults(
Unwrap(dev), pipeStatsPool, 0, (uint32_t)cb.m_Results.size(),
sizeof(uint64_t) * m_PipeStatsData.size(), &m_PipeStatsData[0], sizeof(uint64_t) * 11,
VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
RDCASSERTEQUAL(vkr, VK_SUCCESS);
ObjDisp(dev)->DestroyQueryPool(Unwrap(dev), pipeStatsPool, NULL);
}
vector<CounterResult> ret;
for(size_t i = 0; i < cb.m_Results.size(); i++)
{
CounterResult result;
for(size_t c = 0; c < counters.size(); c++)
{
CounterResult result;
uint64_t delta = m_Data[i * 2 + 1] - m_Data[i * 2 + 0];
result.eventID = cb.m_Results[i];
result.counterID = counters[c];
result.eventID = cb.m_Results[i];
result.counterID = eCounter_EventGPUDuration;
result.value.d =
(double(m_pDriver->GetDeviceProps().limits.timestampPeriod) * double(delta)) // nanoseconds
/ (1000.0 * 1000.0 * 1000.0); // to seconds
ret.push_back(result);
switch(counters[c])
{
case eCounter_EventGPUDuration:
{
uint64_t delta = m_TimeStampData[i * 2 + 1] - m_TimeStampData[i * 2 + 0];
result.value.d = (double(m_pDriver->GetDeviceProps().limits.timestampPeriod) *
double(delta)) // nanoseconds
/ (1000.0 * 1000.0 * 1000.0); // to seconds
}
break;
case eCounter_InputVerticesRead: result.value.u64 = m_PipeStatsData[i * 11 + 0]; break;
case eCounter_IAPrimitives: result.value.u64 = m_PipeStatsData[i * 11 + 1]; break;
case eCounter_GSPrimitives: result.value.u64 = m_PipeStatsData[i * 11 + 4]; break;
case eCounter_RasterizerInvocations: result.value.u64 = m_PipeStatsData[i * 11 + 5]; break;
case eCounter_RasterizedPrimitives: result.value.u64 = m_PipeStatsData[i * 11 + 6]; break;
case eCounter_SamplesWritten: result.value.u64 = m_OcclusionData[i]; break;
case eCounter_VSInvocations: result.value.u64 = m_PipeStatsData[i * 11 + 2]; break;
case eCounter_TCSInvocations: result.value.u64 = m_PipeStatsData[i * 11 + 8]; break;
case eCounter_TESInvocations: result.value.u64 = m_PipeStatsData[i * 11 + 9]; break;
case eCounter_GSInvocations: result.value.u64 = m_PipeStatsData[i * 11 + 3]; break;
case eCounter_PSInvocations: result.value.u64 = m_PipeStatsData[i * 11 + 9]; break;
case eCounter_CSInvocations: result.value.u64 = m_PipeStatsData[i * 11 + 10]; break;
}
ret.push_back(result);
}
}
for(size_t i = 0; i < cb.m_AliasEvents.size(); i++)
{
CounterResult search;
search.counterID = eCounter_EventGPUDuration;
search.eventID = cb.m_AliasEvents[i].first;
for(size_t c = 0; c < counters.size(); c++)
{
CounterResult search;
search.counterID = counters[c];
search.eventID = cb.m_AliasEvents[i].first;
// find the result we're aliasing
auto it = std::find(ret.begin(), ret.end(), search);
RDCASSERT(it != ret.end());
// find the result we're aliasing
auto it = std::find(ret.begin(), ret.end(), search);
RDCASSERT(it != ret.end());
// duplicate the result and append
CounterResult aliased = *it;
aliased.eventID = cb.m_AliasEvents[i].second;
ret.push_back(aliased);
// duplicate the result and append
CounterResult aliased = *it;
aliased.eventID = cb.m_AliasEvents[i].second;
ret.push_back(aliased);
}
}
// sort so that the alias results appear in the right places
@@ -1247,6 +1247,16 @@ VkResult WrappedVulkan::vkCreateDevice(VkPhysicalDevice physicalDevice,
"sampleRateShading = false, save/load from depth 2DMS textures will not be "
"possible");
if(availFeatures.occlusionQueryPrecise)
enabledFeatures.occlusionQueryPrecise = true;
else
RDCWARN("occlusionQueryPrecise = false, samples written counter will not work");
if(availFeatures.pipelineStatisticsQuery)
enabledFeatures.pipelineStatisticsQuery = true;
else
RDCWARN("pipelineStatisticsQuery = false, pipeline counters will not work");
createInfo.pEnabledFeatures = &enabledFeatures;
VkResult ret = createFunc(Unwrap(physicalDevice), &createInfo, pAllocator, pDevice);