diff --git a/renderdoc/driver/vulkan/vk_core.h b/renderdoc/driver/vulkan/vk_core.h index ca7bebe75..784528de6 100644 --- a/renderdoc/driver/vulkan/vk_core.h +++ b/renderdoc/driver/vulkan/vk_core.h @@ -367,7 +367,6 @@ private: vector m_CleanupMems; vector m_CleanupEvents; - const VkPhysicalDeviceFeatures &GetDeviceFeatures() { return m_PhysicalDeviceData.features; } const VkPhysicalDeviceProperties &GetDeviceProps() { return m_PhysicalDeviceData.props; } VkDriverInfo GetDriverVersion() { return VkDriverInfo(m_PhysicalDeviceData.props); } const VkFormatProperties &GetFormatProperties(VkFormat f) @@ -740,6 +739,7 @@ public: static VkResult GetProvidedExtensionProperties(uint32_t *pPropertyCount, VkExtensionProperties *pProperties); + const VkPhysicalDeviceFeatures &GetDeviceFeatures() { return m_PhysicalDeviceData.features; } // Device initialization IMPLEMENT_FUNCTION_SERIALISED(VkResult, vkCreateInstance, const VkInstanceCreateInfo *pCreateInfo, diff --git a/renderdoc/driver/vulkan/vk_counters.cpp b/renderdoc/driver/vulkan/vk_counters.cpp index 4e7d37798..4e6e6a43f 100644 --- a/renderdoc/driver/vulkan/vk_counters.cpp +++ b/renderdoc/driver/vulkan/vk_counters.cpp @@ -46,7 +46,30 @@ vector VulkanReplay::EnumerateCounters() { vector ret; + VkPhysicalDeviceFeatures availableFeatures = m_pDriver->GetDeviceFeatures(); + ret.push_back(eCounter_EventGPUDuration); + if(availableFeatures.pipelineStatisticsQuery) + { + ret.push_back(eCounter_InputVerticesRead); + ret.push_back(eCounter_IAPrimitives); + ret.push_back(eCounter_GSPrimitives); + ret.push_back(eCounter_RasterizerInvocations); + ret.push_back(eCounter_RasterizedPrimitives); + } + + if(availableFeatures.occlusionQueryPrecise) + ret.push_back(eCounter_SamplesWritten); + + if(availableFeatures.pipelineStatisticsQuery) + { + ret.push_back(eCounter_VSInvocations); + ret.push_back(eCounter_TCSInvocations); + ret.push_back(eCounter_TESInvocations); + ret.push_back(eCounter_GSInvocations); + ret.push_back(eCounter_PSInvocations); + ret.push_back(eCounter_CSInvocations); + } return ret; } @@ -55,43 +78,142 @@ void VulkanReplay::DescribeCounter(uint32_t counterID, CounterDescription &desc) { desc.counterID = counterID; - if(counterID == eCounter_EventGPUDuration) + switch(counterID) { - desc.name = "GPU Duration"; - desc.description = - "Time taken for this event on the GPU, as measured by delta between two GPU timestamps, " - "top to bottom of the pipe."; - desc.resultByteWidth = 8; - desc.resultCompType = eCompType_Double; - desc.units = eUnits_Seconds; - } - else - { - desc.name = "Unknown"; - desc.description = "Unknown counter ID"; - desc.resultByteWidth = 0; - desc.resultCompType = eCompType_None; - desc.units = eUnits_Absolute; + case eCounter_EventGPUDuration: + desc.name = "GPU Duration"; + desc.description = + "Time taken for this event on the GPU, as measured by delta between two GPU timestamps."; + desc.resultByteWidth = 8; + desc.resultCompType = eCompType_Double; + desc.units = eUnits_Seconds; + break; + case eCounter_InputVerticesRead: + desc.name = "Input Vertices Read"; + desc.description = "Number of vertices read by input assembler."; + desc.resultByteWidth = 8; + desc.resultCompType = eCompType_UInt; + desc.units = eUnits_Absolute; + break; + case eCounter_IAPrimitives: + desc.name = "Input Primitives"; + desc.description = "Number of primitives read by the input assembler."; + desc.resultByteWidth = 8; + desc.resultCompType = eCompType_UInt; + desc.units = eUnits_Absolute; + break; + case eCounter_GSPrimitives: + desc.name = "GS Primitives"; + desc.description = "Number of primitives output by a geometry shader."; + desc.resultByteWidth = 8; + desc.resultCompType = eCompType_UInt; + desc.units = eUnits_Absolute; + break; + case eCounter_RasterizerInvocations: + desc.name = "Rasterizer Invocations"; + desc.description = "Number of primitives that were sent to the rasterizer."; + desc.resultByteWidth = 8; + desc.resultCompType = eCompType_UInt; + desc.units = eUnits_Absolute; + break; + case eCounter_RasterizedPrimitives: + desc.name = "Rasterized Primitives"; + desc.description = "Number of primitives that were rendered."; + desc.resultByteWidth = 8; + desc.resultCompType = eCompType_UInt; + desc.units = eUnits_Absolute; + break; + case eCounter_SamplesWritten: + desc.name = "Samples Written"; + desc.description = "Number of samples that passed depth/stencil test."; + desc.resultByteWidth = 8; + desc.resultCompType = eCompType_UInt; + desc.units = eUnits_Absolute; + break; + case eCounter_VSInvocations: + desc.name = "VS Invocations"; + desc.description = "Number of times a vertex shader was invoked."; + desc.resultByteWidth = 8; + desc.resultCompType = eCompType_UInt; + desc.units = eUnits_Absolute; + break; + case eCounter_GSInvocations: + desc.name = "GS Invocations"; + desc.description = "Number of times a geometry shader was invoked."; + desc.resultByteWidth = 8; + desc.resultCompType = eCompType_UInt; + desc.units = eUnits_Absolute; + break; + case eCounter_TCSInvocations: + desc.name = "TCS Invocations"; + desc.description = "Number of times a tesselation control shader was invoked."; + desc.resultByteWidth = 8; + desc.resultCompType = eCompType_UInt; + desc.units = eUnits_Absolute; + break; + case eCounter_TESInvocations: + desc.name = "TES Invocations"; + desc.description = "Number of times a tesselation evaluation shader was invoked."; + desc.resultByteWidth = 8; + desc.resultCompType = eCompType_UInt; + desc.units = eUnits_Absolute; + break; + case eCounter_PSInvocations: + desc.name = "PS Invocations"; + desc.description = "Number of times a pixel shader was invoked."; + desc.resultByteWidth = 8; + desc.resultCompType = eCompType_UInt; + desc.units = eUnits_Absolute; + break; + case eCounter_CSInvocations: + desc.name = "CS Invocations"; + desc.description = "Number of times a compute shader was invoked."; + desc.resultByteWidth = 8; + desc.resultCompType = eCompType_UInt; + desc.units = eUnits_Absolute; + break; + default: + desc.name = "Unknown"; + desc.description = "Unknown counter ID"; + desc.resultByteWidth = 0; + desc.resultCompType = eCompType_None; + desc.units = eUnits_Absolute; + break; } } + struct VulkanGPUTimerCallback : public VulkanDrawcallCallback { - VulkanGPUTimerCallback(WrappedVulkan *vk, VulkanReplay *rp, VkQueryPool qp) - : m_pDriver(vk), m_pReplay(rp), m_QueryPool(qp) + VulkanGPUTimerCallback(WrappedVulkan *vk, VulkanReplay *rp, VkQueryPool tsqp, VkQueryPool occqp, + VkQueryPool psqp) + : m_pDriver(vk), + m_pReplay(rp), + m_TimeStampQueryPool(tsqp), + m_OcclusionQueryPool(occqp), + m_PipeStatsQueryPool(psqp) { m_pDriver->SetDrawcallCB(this); } ~VulkanGPUTimerCallback() { m_pDriver->SetDrawcallCB(NULL); } void PreDraw(uint32_t eid, VkCommandBuffer cmd) { - ObjDisp(cmd)->CmdWriteTimestamp(Unwrap(cmd), VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_QueryPool, - (uint32_t)(m_Results.size() * 2 + 0)); + if(m_OcclusionQueryPool != VK_NULL_HANDLE) + ObjDisp(cmd)->CmdBeginQuery(Unwrap(cmd), m_OcclusionQueryPool, (uint32_t)m_Results.size(), + VK_QUERY_CONTROL_PRECISE_BIT); + if(m_PipeStatsQueryPool != VK_NULL_HANDLE) + ObjDisp(cmd)->CmdBeginQuery(Unwrap(cmd), m_PipeStatsQueryPool, (uint32_t)m_Results.size(), 0); + ObjDisp(cmd)->CmdWriteTimestamp(Unwrap(cmd), VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + m_TimeStampQueryPool, (uint32_t)(m_Results.size() * 2 + 0)); } bool PostDraw(uint32_t eid, VkCommandBuffer cmd) { - ObjDisp(cmd)->CmdWriteTimestamp(Unwrap(cmd), VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, m_QueryPool, - (uint32_t)(m_Results.size() * 2 + 1)); + ObjDisp(cmd)->CmdWriteTimestamp(Unwrap(cmd), VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + m_TimeStampQueryPool, (uint32_t)(m_Results.size() * 2 + 1)); + if(m_OcclusionQueryPool != VK_NULL_HANDLE) + ObjDisp(cmd)->CmdEndQuery(Unwrap(cmd), m_OcclusionQueryPool, (uint32_t)m_Results.size()); + if(m_PipeStatsQueryPool != VK_NULL_HANDLE) + ObjDisp(cmd)->CmdEndQuery(Unwrap(cmd), m_PipeStatsQueryPool, (uint32_t)m_Results.size()); m_Results.push_back(eid); return false; } @@ -115,7 +237,9 @@ struct VulkanGPUTimerCallback : public VulkanDrawcallCallback WrappedVulkan *m_pDriver; VulkanReplay *m_pReplay; - VkQueryPool m_QueryPool; + VkQueryPool m_TimeStampQueryPool; + VkQueryPool m_OcclusionQueryPool; + VkQueryPool m_PipeStatsQueryPool; vector m_Results; // events which are the 'same' from being the same command buffer resubmitted // multiple times in the frame. We will only get the full callback when we're @@ -128,15 +252,52 @@ vector VulkanReplay::FetchCounters(const vector &counte { uint32_t maxEID = m_pDriver->GetMaxEID(); + VkPhysicalDeviceFeatures availableFeatures = m_pDriver->GetDeviceFeatures(); + VkDevice dev = m_pDriver->GetDev(); - VkQueryPoolCreateInfo poolCreateInfo = { + VkQueryPoolCreateInfo timeStampPoolCreateInfo = { VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, NULL, 0, VK_QUERY_TYPE_TIMESTAMP, maxEID * 2, 0}; - VkQueryPool pool; - VkResult vkr = ObjDisp(dev)->CreateQueryPool(Unwrap(dev), &poolCreateInfo, NULL, &pool); + VkQueryPoolCreateInfo occlusionPoolCreateInfo = { + VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, NULL, 0, VK_QUERY_TYPE_OCCLUSION, maxEID, 0}; + + VkQueryPipelineStatisticFlags pipeStatsFlags = + VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_VERTICES_BIT | + VK_QUERY_PIPELINE_STATISTIC_INPUT_ASSEMBLY_PRIMITIVES_BIT | + VK_QUERY_PIPELINE_STATISTIC_VERTEX_SHADER_INVOCATIONS_BIT | + VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_INVOCATIONS_BIT | + VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT | + VK_QUERY_PIPELINE_STATISTIC_CLIPPING_INVOCATIONS_BIT | + VK_QUERY_PIPELINE_STATISTIC_CLIPPING_PRIMITIVES_BIT | + VK_QUERY_PIPELINE_STATISTIC_FRAGMENT_SHADER_INVOCATIONS_BIT | + VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT | + VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT | + VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT; + + VkQueryPoolCreateInfo pipeStatsPoolCreateInfo = { + VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, NULL, 0, + VK_QUERY_TYPE_PIPELINE_STATISTICS, maxEID, pipeStatsFlags}; + + VkQueryPool timeStampPool; + VkResult vkr = + ObjDisp(dev)->CreateQueryPool(Unwrap(dev), &timeStampPoolCreateInfo, NULL, &timeStampPool); RDCASSERTEQUAL(vkr, VK_SUCCESS); + VkQueryPool occlusionPool = NULL; + if(availableFeatures.occlusionQueryPrecise) + { + vkr = ObjDisp(dev)->CreateQueryPool(Unwrap(dev), &occlusionPoolCreateInfo, NULL, &occlusionPool); + RDCASSERTEQUAL(vkr, VK_SUCCESS); + } + + VkQueryPool pipeStatsPool = NULL; + if(availableFeatures.pipelineStatisticsQuery) + { + vkr = ObjDisp(dev)->CreateQueryPool(Unwrap(dev), &pipeStatsPoolCreateInfo, NULL, &pipeStatsPool); + RDCASSERTEQUAL(vkr, VK_SUCCESS); + } + VkCommandBuffer cmd = m_pDriver->GetNextCmd(); VkCommandBufferBeginInfo beginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, NULL, @@ -145,7 +306,11 @@ vector VulkanReplay::FetchCounters(const vector &counte vkr = ObjDisp(dev)->BeginCommandBuffer(Unwrap(cmd), &beginInfo); RDCASSERTEQUAL(vkr, VK_SUCCESS); - ObjDisp(dev)->CmdResetQueryPool(Unwrap(cmd), pool, 0, maxEID * 2); + ObjDisp(dev)->CmdResetQueryPool(Unwrap(cmd), timeStampPool, 0, maxEID * 2); + if(occlusionPool != VK_NULL_HANDLE) + ObjDisp(dev)->CmdResetQueryPool(Unwrap(cmd), occlusionPool, 0, maxEID); + if(pipeStatsPool != VK_NULL_HANDLE) + ObjDisp(dev)->CmdResetQueryPool(Unwrap(cmd), pipeStatsPool, 0, maxEID); vkr = ObjDisp(dev)->EndCommandBuffer(Unwrap(cmd)); RDCASSERTEQUAL(vkr, VK_SUCCESS); @@ -154,52 +319,103 @@ vector VulkanReplay::FetchCounters(const vector &counte m_pDriver->SubmitCmds(); #endif - VulkanGPUTimerCallback cb(m_pDriver, this, pool); + VulkanGPUTimerCallback cb(m_pDriver, this, timeStampPool, occlusionPool, pipeStatsPool); // replay the events to perform all the queries m_pDriver->ReplayLog(0, maxEID, eReplay_Full); - vector m_Data; - m_Data.resize(cb.m_Results.size() * 2); + vector m_TimeStampData; + m_TimeStampData.resize(cb.m_Results.size() * 2); vkr = ObjDisp(dev)->GetQueryPoolResults( - Unwrap(dev), pool, 0, (uint32_t)m_Data.size(), sizeof(uint64_t) * m_Data.size(), &m_Data[0], - sizeof(uint64_t), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); + Unwrap(dev), timeStampPool, 0, (uint32_t)m_TimeStampData.size(), + sizeof(uint64_t) * m_TimeStampData.size(), &m_TimeStampData[0], sizeof(uint64_t), + VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); RDCASSERTEQUAL(vkr, VK_SUCCESS); - ObjDisp(dev)->DestroyQueryPool(Unwrap(dev), pool, NULL); + ObjDisp(dev)->DestroyQueryPool(Unwrap(dev), timeStampPool, NULL); + + vector m_OcclusionData; + m_OcclusionData.resize(cb.m_Results.size()); + if(occlusionPool != VK_NULL_HANDLE) + { + vkr = ObjDisp(dev)->GetQueryPoolResults( + Unwrap(dev), occlusionPool, 0, (uint32_t)m_OcclusionData.size(), + sizeof(uint64_t) * m_OcclusionData.size(), &m_OcclusionData[0], sizeof(uint64_t), + VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); + RDCASSERTEQUAL(vkr, VK_SUCCESS); + + ObjDisp(dev)->DestroyQueryPool(Unwrap(dev), occlusionPool, NULL); + } + + vector m_PipeStatsData; + m_PipeStatsData.resize(cb.m_Results.size() * 11); + if(pipeStatsPool != VK_NULL_HANDLE) + { + vkr = ObjDisp(dev)->GetQueryPoolResults( + Unwrap(dev), pipeStatsPool, 0, (uint32_t)cb.m_Results.size(), + sizeof(uint64_t) * m_PipeStatsData.size(), &m_PipeStatsData[0], sizeof(uint64_t) * 11, + VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); + RDCASSERTEQUAL(vkr, VK_SUCCESS); + + ObjDisp(dev)->DestroyQueryPool(Unwrap(dev), pipeStatsPool, NULL); + } vector ret; for(size_t i = 0; i < cb.m_Results.size(); i++) { - CounterResult result; + for(size_t c = 0; c < counters.size(); c++) + { + CounterResult result; - uint64_t delta = m_Data[i * 2 + 1] - m_Data[i * 2 + 0]; + result.eventID = cb.m_Results[i]; + result.counterID = counters[c]; - result.eventID = cb.m_Results[i]; - result.counterID = eCounter_EventGPUDuration; - result.value.d = - (double(m_pDriver->GetDeviceProps().limits.timestampPeriod) * double(delta)) // nanoseconds - / (1000.0 * 1000.0 * 1000.0); // to seconds - - ret.push_back(result); + switch(counters[c]) + { + case eCounter_EventGPUDuration: + { + uint64_t delta = m_TimeStampData[i * 2 + 1] - m_TimeStampData[i * 2 + 0]; + result.value.d = (double(m_pDriver->GetDeviceProps().limits.timestampPeriod) * + double(delta)) // nanoseconds + / (1000.0 * 1000.0 * 1000.0); // to seconds + } + break; + case eCounter_InputVerticesRead: result.value.u64 = m_PipeStatsData[i * 11 + 0]; break; + case eCounter_IAPrimitives: result.value.u64 = m_PipeStatsData[i * 11 + 1]; break; + case eCounter_GSPrimitives: result.value.u64 = m_PipeStatsData[i * 11 + 4]; break; + case eCounter_RasterizerInvocations: result.value.u64 = m_PipeStatsData[i * 11 + 5]; break; + case eCounter_RasterizedPrimitives: result.value.u64 = m_PipeStatsData[i * 11 + 6]; break; + case eCounter_SamplesWritten: result.value.u64 = m_OcclusionData[i]; break; + case eCounter_VSInvocations: result.value.u64 = m_PipeStatsData[i * 11 + 2]; break; + case eCounter_TCSInvocations: result.value.u64 = m_PipeStatsData[i * 11 + 8]; break; + case eCounter_TESInvocations: result.value.u64 = m_PipeStatsData[i * 11 + 9]; break; + case eCounter_GSInvocations: result.value.u64 = m_PipeStatsData[i * 11 + 3]; break; + case eCounter_PSInvocations: result.value.u64 = m_PipeStatsData[i * 11 + 9]; break; + case eCounter_CSInvocations: result.value.u64 = m_PipeStatsData[i * 11 + 10]; break; + } + ret.push_back(result); + } } for(size_t i = 0; i < cb.m_AliasEvents.size(); i++) { - CounterResult search; - search.counterID = eCounter_EventGPUDuration; - search.eventID = cb.m_AliasEvents[i].first; + for(size_t c = 0; c < counters.size(); c++) + { + CounterResult search; + search.counterID = counters[c]; + search.eventID = cb.m_AliasEvents[i].first; - // find the result we're aliasing - auto it = std::find(ret.begin(), ret.end(), search); - RDCASSERT(it != ret.end()); + // find the result we're aliasing + auto it = std::find(ret.begin(), ret.end(), search); + RDCASSERT(it != ret.end()); - // duplicate the result and append - CounterResult aliased = *it; - aliased.eventID = cb.m_AliasEvents[i].second; - ret.push_back(aliased); + // duplicate the result and append + CounterResult aliased = *it; + aliased.eventID = cb.m_AliasEvents[i].second; + ret.push_back(aliased); + } } // sort so that the alias results appear in the right places diff --git a/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp index 024fb8aef..486f45b32 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_device_funcs.cpp @@ -1247,6 +1247,16 @@ VkResult WrappedVulkan::vkCreateDevice(VkPhysicalDevice physicalDevice, "sampleRateShading = false, save/load from depth 2DMS textures will not be " "possible"); + if(availFeatures.occlusionQueryPrecise) + enabledFeatures.occlusionQueryPrecise = true; + else + RDCWARN("occlusionQueryPrecise = false, samples written counter will not work"); + + if(availFeatures.pipelineStatisticsQuery) + enabledFeatures.pipelineStatisticsQuery = true; + else + RDCWARN("pipelineStatisticsQuery = false, pipeline counters will not work"); + createInfo.pEnabledFeatures = &enabledFeatures; VkResult ret = createFunc(Unwrap(physicalDevice), &createInfo, pAllocator, pDevice);