Add GL Intel performance counters support

Intel published a performance query extension support defined back in
2013. This is available on Windows & Linux (in the Mesa driver). This
should provide the same types of counters as the MDAPI backend for
DX11.

Frameretrace [1] (a fork/branch of Apitrace) uses the same extension.

v2: Fix build without OpenGL
    Simplify logic to enable counters
    Warn about non enabled counters on Linux/Mesa
    Generate counter Uuid

v3: Turn asserts into errors
    Don't load perf entry points manually

v4: More clang-format

v5: Fix some Windows conversion warnings

v6: Fix errors on Windows where the driver reports an error on
    glGetPerfQueryInfoINTEL as a mean to say that the queryId cannot
    be used through the extension

v7: clang-format

v8: Initialize variable passed by pointers to GL entry points

v9: Only try to use the INTEL_performance_query on Mesa, experience
    shows the Intel Windows driver doesn't report anything useful.

[1]: https://github.com/janesma/apitrace/wiki/screen-shots
This commit is contained in:
Lionel Landwerlin
2018-10-20 11:54:06 +01:00
committed by Baldur Karlsson
parent 550d5477d8
commit 12ce67b228
14 changed files with 744 additions and 28 deletions
+6 -1
View File
@@ -430,6 +430,12 @@ endif()
add_subdirectory(driver/ihv/amd)
list(APPEND renderdoc_objects $<TARGET_OBJECTS:rdoc_amd>)
# pull in the intel folder for GL perf queries
if(ENABLE_GL OR ENABLE_GLES)
add_subdirectory(driver/ihv/intel)
list(APPEND renderdoc_objects $<TARGET_OBJECTS:rdoc_intel>)
endif()
add_library(rdoc OBJECT ${sources})
target_compile_definitions(rdoc ${RDOC_DEFINITIONS})
target_include_directories(rdoc ${RDOC_INCLUDES})
@@ -491,4 +497,3 @@ install (TARGETS renderdoc DESTINATION lib${LIB_SUFFIX}/${LIB_SUBFOLDER})
# Copy in application API header to include
install (FILES api/app/renderdoc_app.h DESTINATION include RENAME renderdoc.h)
+12
View File
@@ -689,6 +689,7 @@ extern bool IsGLES;
EXT_TO_CHECK(99, 30, EXT_texture_swizzle) \
EXT_TO_CHECK(99, 99, KHR_blend_equation_advanced_coherent) \
EXT_TO_CHECK(99, 99, EXT_texture_sRGB_decode) \
EXT_TO_CHECK(99, 99, INTEL_performance_query) \
/* OpenGL ES extensions */ \
EXT_TO_CHECK(99, 32, EXT_color_buffer_float) \
EXT_TO_CHECK(99, 32, EXT_primitive_bounding_box) \
@@ -2084,6 +2085,17 @@ enum class GLChunk : uint32_t
CoherentMapWrite,
glBeginPerfQueryINTEL,
glCreatePerfQueryINTEL,
glDeletePerfQueryINTEL,
glEndPerfQueryINTEL,
glGetFirstPerfQueryIdINTEL,
glGetNextPerfQueryIdINTEL,
glGetPerfCounterInfoINTEL,
glGetPerfQueryDataINTEL,
glGetPerfQueryIdByNameINTEL,
glGetPerfQueryInfoINTEL,
Max,
};
+107
View File
@@ -25,6 +25,7 @@
#include <algorithm>
#include <iterator>
#include "driver/ihv/amd/amd_counters.h"
#include "driver/ihv/intel/intel_gl_counters.h"
#include "gl_driver.h"
#include "gl_replay.h"
#include "gl_resources.h"
@@ -59,6 +60,12 @@ vector<GPUCounter> GLReplay::EnumerateCounters()
ret.insert(ret.end(), amdCounters.begin(), amdCounters.end());
}
if(m_pIntelCounters)
{
vector<GPUCounter> intelCounters = m_pIntelCounters->GetPublicCounterIds();
ret.insert(ret.end(), intelCounters.begin(), intelCounters.end());
}
return ret;
}
@@ -79,6 +86,17 @@ CounterDescription GLReplay::DescribeCounter(GPUCounter counterID)
}
}
/////Intel/////
if(IsIntelCounter(counterID))
{
if(m_pIntelCounters)
{
desc = m_pIntelCounters->GetCounterDescription(counterID);
return desc;
}
}
// FFBA5548-FBF8-405D-BA18-F0329DA370A0
desc.uuid.words[0] = 0xFFBA5548;
desc.uuid.words[1] = 0xFBF8405D;
@@ -368,6 +386,82 @@ vector<CounterResult> GLReplay::FetchCountersAMD(const vector<GPUCounter> &count
return ret;
}
void GLReplay::FillTimersIntel(uint32_t *eventStartID, uint32_t *sampleIndex,
vector<uint32_t> *eventIDs, const DrawcallDescription &drawnode)
{
if(drawnode.children.empty())
return;
for(size_t i = 0; i < drawnode.children.size(); i++)
{
const DrawcallDescription &d = drawnode.children[i];
FillTimersIntel(eventStartID, sampleIndex, eventIDs, drawnode.children[i]);
if(d.events.empty())
continue;
eventIDs->push_back(d.eventId);
m_pDriver->ReplayLog(*eventStartID, d.eventId, eReplay_WithoutDraw);
m_pIntelCounters->BeginSample(*sampleIndex);
m_pDriver->ReplayLog(*eventStartID, d.eventId, eReplay_OnlyDraw);
m_pIntelCounters->EndSample();
*eventStartID = d.eventId + 1;
++*sampleIndex;
}
}
vector<CounterResult> GLReplay::FetchCountersIntel(const vector<GPUCounter> &counters)
{
m_pIntelCounters->DisableAllCounters();
// enable counters it needs
for(size_t i = 0; i < counters.size(); i++)
{
// This function is only called internally, and violating this assertion means our
// caller has invoked this method incorrectly
RDCASSERT(IsIntelCounter(counters[i]));
m_pIntelCounters->EnableCounter(counters[i]);
}
m_pIntelCounters->BeginSession();
uint32_t passCount = m_pIntelCounters->GetPassCount();
uint32_t sampleIndex = 0;
vector<uint32_t> eventIDs;
m_pDriver->ReplayMarkers(false);
for(uint32_t p = 0; p < passCount; p++)
{
m_pIntelCounters->BeginPass(p);
uint32_t eventStartID = 0;
sampleIndex = 0;
eventIDs.clear();
FillTimersIntel(&eventStartID, &sampleIndex, &eventIDs, m_pDriver->GetRootDraw());
m_pIntelCounters->EndPass();
}
m_pDriver->ReplayMarkers(true);
std::vector<CounterResult> ret = m_pIntelCounters->GetCounterData(sampleIndex, eventIDs, counters);
m_pIntelCounters->EndSession();
return ret;
}
vector<CounterResult> GLReplay::FetchCounters(const vector<GPUCounter> &allCounters)
{
vector<CounterResult> ret;
@@ -397,6 +491,19 @@ vector<CounterResult> GLReplay::FetchCounters(const vector<GPUCounter> &allCount
}
}
if(m_pIntelCounters)
{
// Filter out the Intel counters
vector<GPUCounter> intelCounters;
std::copy_if(allCounters.begin(), allCounters.end(), std::back_inserter(intelCounters),
[](const GPUCounter &c) { return IsIntelCounter(c); });
if(!intelCounters.empty())
{
ret = FetchCountersIntel(intelCounters);
}
}
if(counters.empty())
{
return ret;
+13 -1
View File
@@ -942,7 +942,19 @@ struct GLDispatchTable
PFNGLGETQUERYBUFFEROBJECTIVPROC glGetQueryBufferObjectiv;
PFNGLGETQUERYBUFFEROBJECTUI64VPROC glGetQueryBufferObjectui64v;
PFNGLGETQUERYBUFFEROBJECTUIVPROC glGetQueryBufferObjectuiv;
// INTEL_performance_query
PFNGLBEGINPERFQUERYINTELPROC glBeginPerfQueryINTEL;
PFNGLCREATEPERFQUERYINTELPROC glCreatePerfQueryINTEL;
PFNGLDELETEPERFQUERYINTELPROC glDeletePerfQueryINTEL;
PFNGLENDPERFQUERYINTELPROC glEndPerfQueryINTEL;
PFNGLGETFIRSTPERFQUERYIDINTELPROC glGetFirstPerfQueryIdINTEL;
PFNGLGETNEXTPERFQUERYIDINTELPROC glGetNextPerfQueryIdINTEL;
PFNGLGETPERFCOUNTERINFOINTELPROC glGetPerfCounterInfoINTEL;
PFNGLGETPERFQUERYDATAINTELPROC glGetPerfQueryDataINTEL;
PFNGLGETPERFQUERYIDBYNAMEINTELPROC glGetPerfQueryIdByNameINTEL;
PFNGLGETPERFQUERYINFOINTELPROC glGetPerfQueryInfoINTEL;
// stubbed on all non-windows platforms
PFNWGLDXSETRESOURCESHAREHANDLENVPROC wglDXSetResourceShareHandleNV;
PFNWGLDXOPENDEVICENVPROC wglDXOpenDeviceNV;
+20 -20
View File
@@ -1271,6 +1271,16 @@
FUNC(glGetQueryBufferObjectiv, glGetQueryBufferObjectiv); \
FUNC(glGetQueryBufferObjectui64v, glGetQueryBufferObjectui64v); \
FUNC(glGetQueryBufferObjectuiv, glGetQueryBufferObjectuiv); \
FUNC(glBeginPerfQueryINTEL, glBeginPerfQueryINTEL); \
FUNC(glCreatePerfQueryINTEL, glCreatePerfQueryINTEL); \
FUNC(glDeletePerfQueryINTEL, glDeletePerfQueryINTEL); \
FUNC(glEndPerfQueryINTEL, glEndPerfQueryINTEL); \
FUNC(glGetFirstPerfQueryIdINTEL, glGetFirstPerfQueryIdINTEL); \
FUNC(glGetNextPerfQueryIdINTEL, glGetNextPerfQueryIdINTEL); \
FUNC(glGetPerfCounterInfoINTEL, glGetPerfCounterInfoINTEL); \
FUNC(glGetPerfQueryDataINTEL, glGetPerfQueryDataINTEL); \
FUNC(glGetPerfQueryIdByNameINTEL, glGetPerfQueryIdByNameINTEL); \
FUNC(glGetPerfQueryInfoINTEL, glGetPerfQueryInfoINTEL); \
FUNC(wglDXSetResourceShareHandleNV, wglDXSetResourceShareHandleNV); \
FUNC(wglDXOpenDeviceNV, wglDXOpenDeviceNV); \
FUNC(wglDXCloseDeviceNV, wglDXCloseDeviceNV); \
@@ -2520,6 +2530,16 @@
FuncWrapper4(void, glGetQueryBufferObjectiv, GLuint, id, GLuint, buffer, GLenum, pname, GLintptr, offset); \
FuncWrapper4(void, glGetQueryBufferObjectui64v, GLuint, id, GLuint, buffer, GLenum, pname, GLintptr, offset); \
FuncWrapper4(void, glGetQueryBufferObjectuiv, GLuint, id, GLuint, buffer, GLenum, pname, GLintptr, offset); \
FuncWrapper1(void, glBeginPerfQueryINTEL, GLuint, queryHandle); \
FuncWrapper2(void, glCreatePerfQueryINTEL, GLuint, queryId, GLuint *, queryHandle); \
FuncWrapper1(void, glDeletePerfQueryINTEL, GLuint, queryHandle); \
FuncWrapper1(void, glEndPerfQueryINTEL, GLuint, queryHandle); \
FuncWrapper1(void, glGetFirstPerfQueryIdINTEL, GLuint *, queryId); \
FuncWrapper2(void, glGetNextPerfQueryIdINTEL, GLuint, queryId, GLuint *, nextQueryId); \
FuncWrapper11(void, glGetPerfCounterInfoINTEL, GLuint, queryId, GLuint, counterId, GLuint, counterNameLength, GLchar *, counterName, GLuint, counterDescLength, GLchar *, counterDesc, GLuint *, counterOffset, GLuint *, counterDataSize, GLuint *, counterTypeEnum, GLuint *, counterDataTypeEnum, GLuint64 *, rawCounterMaxValue); \
FuncWrapper5(void, glGetPerfQueryDataINTEL, GLuint, queryHandle, GLuint, flags, GLsizei, dataSize, void *, data, GLuint *, bytesWritten); \
FuncWrapper2(void, glGetPerfQueryIdByNameINTEL, GLchar *, queryName, GLuint *, queryId); \
FuncWrapper7(void, glGetPerfQueryInfoINTEL, GLuint, queryId, GLuint, queryNameLength, GLchar *, queryName, GLuint *, dataSize, GLuint *, noCounters, GLuint *, noInstances, GLuint *, capsMask); \
FuncWrapper2(BOOL, wglDXSetResourceShareHandleNV, void *, dxObject, HANDLE, shareHandle); \
FuncWrapper1(HANDLE, wglDXOpenDeviceNV, void *, dxDevice); \
FuncWrapper1(BOOL, wglDXCloseDeviceNV, HANDLE, hDevice); \
@@ -2560,7 +2580,6 @@
FUNC(glBeginFragmentShaderATI); \
FUNC(glBeginOcclusionQueryNV); \
FUNC(glBeginPerfMonitorAMD); \
FUNC(glBeginPerfQueryINTEL); \
FUNC(glBegin); \
FUNC(glBeginTransformFeedbackNV); \
FUNC(glBeginVertexShaderEXT); \
@@ -2760,7 +2779,6 @@
FUNC(glCoverStrokePathInstancedNV); \
FUNC(glCoverStrokePathNV); \
FUNC(glCreateCommandListsNV); \
FUNC(glCreatePerfQueryINTEL); \
FUNC(glCreateProgramObjectARB); \
FUNC(glCreateShaderObjectARB); \
FUNC(glCreateShaderProgramEXT); \
@@ -2786,7 +2804,6 @@
FUNC(glDeleteOcclusionQueriesNV); \
FUNC(glDeletePathsNV); \
FUNC(glDeletePerfMonitorsAMD); \
FUNC(glDeletePerfQueryINTEL); \
FUNC(glDeleteProgramsARB); \
FUNC(glDeleteProgramsNV); \
FUNC(glDeleteQueryResourceTagNV); \
@@ -2857,7 +2874,6 @@
FUNC(glEndList); \
FUNC(glEndOcclusionQueryNV); \
FUNC(glEndPerfMonitorAMD); \
FUNC(glEndPerfQueryINTEL); \
FUNC(glEnd); \
FUNC(glEndTilingQCOM); \
FUNC(glEndTransformFeedbackNV); \
@@ -3022,7 +3038,6 @@
FUNC(glGetFenceivNV); \
FUNC(glGetFinalCombinerInputParameterfvNV); \
FUNC(glGetFinalCombinerInputParameterivNV); \
FUNC(glGetFirstPerfQueryIdINTEL); \
FUNC(glGetFixedvOES); \
FUNC(glGetFogFuncSGIS); \
FUNC(glGetFragDataIndexEXT); \
@@ -3098,7 +3113,6 @@
FUNC(glGetnColorTable); \
FUNC(glGetnConvolutionFilterARB); \
FUNC(glGetnConvolutionFilter); \
FUNC(glGetNextPerfQueryIdINTEL); \
FUNC(glGetnHistogramARB); \
FUNC(glGetnHistogram); \
FUNC(glGetnMapdvARB); \
@@ -3144,16 +3158,12 @@
FUNC(glGetPathSpacingNV); \
FUNC(glGetPathTexGenfvNV); \
FUNC(glGetPathTexGenivNV); \
FUNC(glGetPerfCounterInfoINTEL); \
FUNC(glGetPerfMonitorCounterDataAMD); \
FUNC(glGetPerfMonitorCounterInfoAMD); \
FUNC(glGetPerfMonitorCountersAMD); \
FUNC(glGetPerfMonitorCounterStringAMD); \
FUNC(glGetPerfMonitorGroupsAMD); \
FUNC(glGetPerfMonitorGroupStringAMD); \
FUNC(glGetPerfQueryDataINTEL); \
FUNC(glGetPerfQueryIdByNameINTEL); \
FUNC(glGetPerfQueryInfoINTEL); \
FUNC(glGetPixelMapfv); \
FUNC(glGetPixelMapuiv); \
FUNC(glGetPixelMapusv); \
@@ -4483,7 +4493,6 @@
UnsupportedWrapper0(void, glBeginFragmentShaderATI); \
UnsupportedWrapper1(void, glBeginOcclusionQueryNV, GLuint, id); \
UnsupportedWrapper1(void, glBeginPerfMonitorAMD, GLuint, monitor); \
UnsupportedWrapper1(void, glBeginPerfQueryINTEL, GLuint, queryHandle); \
UnsupportedWrapper1(void, glBegin, GLenum, mode); \
UnsupportedWrapper1(void, glBeginTransformFeedbackNV, GLenum, primitiveMode); \
UnsupportedWrapper0(void, glBeginVertexShaderEXT); \
@@ -4683,7 +4692,6 @@
UnsupportedWrapper7(void, glCoverStrokePathInstancedNV, GLsizei, numPaths, GLenum, pathNameType, const void *, paths, GLuint, pathBase, GLenum, coverMode, GLenum, transformType, const GLfloat *, transformValues); \
UnsupportedWrapper2(void, glCoverStrokePathNV, GLuint, path, GLenum, coverMode); \
UnsupportedWrapper2(void, glCreateCommandListsNV, GLsizei, n, GLuint *, lists); \
UnsupportedWrapper2(void, glCreatePerfQueryINTEL, GLuint, queryId, GLuint *, queryHandle); \
UnsupportedWrapper0(GLhandleARB, glCreateProgramObjectARB); \
UnsupportedWrapper1(GLhandleARB, glCreateShaderObjectARB, GLenum, shaderType); \
UnsupportedWrapper2(GLuint, glCreateShaderProgramEXT, GLenum, type, const GLchar *, string); \
@@ -4709,7 +4717,6 @@
UnsupportedWrapper2(void, glDeleteOcclusionQueriesNV, GLsizei, n, const GLuint *, ids); \
UnsupportedWrapper2(void, glDeletePathsNV, GLuint, path, GLsizei, range); \
UnsupportedWrapper2(void, glDeletePerfMonitorsAMD, GLsizei, n, GLuint *, monitors); \
UnsupportedWrapper1(void, glDeletePerfQueryINTEL, GLuint, queryHandle); \
UnsupportedWrapper2(void, glDeleteProgramsARB, GLsizei, n, const GLuint *, programs); \
UnsupportedWrapper2(void, glDeleteProgramsNV, GLsizei, n, const GLuint *, programs); \
UnsupportedWrapper2(void, glDeleteQueryResourceTagNV, GLsizei, n, const GLint *, tagIds); \
@@ -4780,7 +4787,6 @@
UnsupportedWrapper0(void, glEndList); \
UnsupportedWrapper0(void, glEndOcclusionQueryNV); \
UnsupportedWrapper1(void, glEndPerfMonitorAMD, GLuint, monitor); \
UnsupportedWrapper1(void, glEndPerfQueryINTEL, GLuint, queryHandle); \
UnsupportedWrapper0(void, glEnd); \
UnsupportedWrapper1(void, glEndTilingQCOM, GLbitfield, preserveMask); \
UnsupportedWrapper0(void, glEndTransformFeedbackNV); \
@@ -4945,7 +4951,6 @@
UnsupportedWrapper3(void, glGetFenceivNV, GLuint, fence, GLenum, pname, GLint *, params); \
UnsupportedWrapper3(void, glGetFinalCombinerInputParameterfvNV, GLenum, variable, GLenum, pname, GLfloat *, params); \
UnsupportedWrapper3(void, glGetFinalCombinerInputParameterivNV, GLenum, variable, GLenum, pname, GLint *, params); \
UnsupportedWrapper1(void, glGetFirstPerfQueryIdINTEL, GLuint *, queryId); \
UnsupportedWrapper2(void, glGetFixedvOES, GLenum, pname, GLfixed *, params); \
UnsupportedWrapper1(void, glGetFogFuncSGIS, GLfloat *, points); \
UnsupportedWrapper2(GLint, glGetFragDataIndexEXT, GLuint, program, const GLchar *, name); \
@@ -5021,7 +5026,6 @@
UnsupportedWrapper5(void, glGetnColorTable, GLenum, target, GLenum, format, GLenum, type, GLsizei, bufSize, void *, table); \
UnsupportedWrapper5(void, glGetnConvolutionFilterARB, GLenum, target, GLenum, format, GLenum, type, GLsizei, bufSize, void *, image); \
UnsupportedWrapper5(void, glGetnConvolutionFilter, GLenum, target, GLenum, format, GLenum, type, GLsizei, bufSize, void *, image); \
UnsupportedWrapper2(void, glGetNextPerfQueryIdINTEL, GLuint, queryId, GLuint *, nextQueryId); \
UnsupportedWrapper6(void, glGetnHistogramARB, GLenum, target, GLboolean, reset, GLenum, format, GLenum, type, GLsizei, bufSize, void *, values); \
UnsupportedWrapper6(void, glGetnHistogram, GLenum, target, GLboolean, reset, GLenum, format, GLenum, type, GLsizei, bufSize, void *, values); \
UnsupportedWrapper4(void, glGetnMapdvARB, GLenum, target, GLenum, query, GLsizei, bufSize, GLdouble *, v); \
@@ -5067,16 +5071,12 @@
UnsupportedWrapper9(void, glGetPathSpacingNV, GLenum, pathListMode, GLsizei, numPaths, GLenum, pathNameType, const void *, paths, GLuint, pathBase, GLfloat, advanceScale, GLfloat, kerningScale, GLenum, transformType, GLfloat *, returnedSpacing); \
UnsupportedWrapper3(void, glGetPathTexGenfvNV, GLenum, texCoordSet, GLenum, pname, GLfloat *, value); \
UnsupportedWrapper3(void, glGetPathTexGenivNV, GLenum, texCoordSet, GLenum, pname, GLint *, value); \
UnsupportedWrapper11(void, glGetPerfCounterInfoINTEL, GLuint, queryId, GLuint, counterId, GLuint, counterNameLength, GLchar *, counterName, GLuint, counterDescLength, GLchar *, counterDesc, GLuint *, counterOffset, GLuint *, counterDataSize, GLuint *, counterTypeEnum, GLuint *, counterDataTypeEnum, GLuint64 *, rawCounterMaxValue); \
UnsupportedWrapper5(void, glGetPerfMonitorCounterDataAMD, GLuint, monitor, GLenum, pname, GLsizei, dataSize, GLuint *, data, GLint *, bytesWritten); \
UnsupportedWrapper4(void, glGetPerfMonitorCounterInfoAMD, GLuint, group, GLuint, counter, GLenum, pname, void *, data); \
UnsupportedWrapper5(void, glGetPerfMonitorCountersAMD, GLuint, group, GLint *, numCounters, GLint *, maxActiveCounters, GLsizei, counterSize, GLuint *, counters); \
UnsupportedWrapper5(void, glGetPerfMonitorCounterStringAMD, GLuint, group, GLuint, counter, GLsizei, bufSize, GLsizei *, length, GLchar *, counterString); \
UnsupportedWrapper3(void, glGetPerfMonitorGroupsAMD, GLint *, numGroups, GLsizei, groupsSize, GLuint *, groups); \
UnsupportedWrapper4(void, glGetPerfMonitorGroupStringAMD, GLuint, group, GLsizei, bufSize, GLsizei *, length, GLchar *, groupString); \
UnsupportedWrapper5(void, glGetPerfQueryDataINTEL, GLuint, queryHandle, GLuint, flags, GLsizei, dataSize, void *, data, GLuint *, bytesWritten); \
UnsupportedWrapper2(void, glGetPerfQueryIdByNameINTEL, GLchar *, queryName, GLuint *, queryId); \
UnsupportedWrapper7(void, glGetPerfQueryInfoINTEL, GLuint, queryId, GLuint, queryNameLength, GLchar *, queryName, GLuint *, dataSize, GLuint *, noCounters, GLuint *, noInstances, GLuint *, capsMask); \
UnsupportedWrapper2(void, glGetPixelMapfv, GLenum, map, GLfloat *, values); \
UnsupportedWrapper2(void, glGetPixelMapuiv, GLenum, map, GLuint *, values); \
UnsupportedWrapper2(void, glGetPixelMapusv, GLenum, map, GLushort *, values); \
+10
View File
@@ -4368,6 +4368,16 @@ bool WrappedOpenGL::ProcessChunk(ReadSerialiser &ser, GLChunk chunk)
case GLChunk::glDeleteSemaphoresEXT:
case GLChunk::glIsSemaphoreEXT:
case GLChunk::glGetSemaphoreParameterui64vEXT:
case GLChunk::glBeginPerfQueryINTEL:
case GLChunk::glCreatePerfQueryINTEL:
case GLChunk::glDeletePerfQueryINTEL:
case GLChunk::glEndPerfQueryINTEL:
case GLChunk::glGetFirstPerfQueryIdINTEL:
case GLChunk::glGetNextPerfQueryIdINTEL:
case GLChunk::glGetPerfCounterInfoINTEL:
case GLChunk::glGetPerfQueryDataINTEL:
case GLChunk::glGetPerfQueryIdByNameINTEL:
case GLChunk::glGetPerfQueryInfoINTEL:
case GLChunk::Max:
RDCERR("Unexpected chunk %s, or missing case for processing! Skipping...",
+20
View File
@@ -2359,6 +2359,26 @@ public:
IMPLEMENT_FUNCTION_SERIALISED(GLboolean, glAcquireKeyedMutexWin32EXT, GLuint memory, GLuint64 key,
GLuint timeout);
IMPLEMENT_FUNCTION_SERIALISED(GLboolean, glReleaseKeyedMutexWin32EXT, GLuint memory, GLuint64 key);
// INTEL_performance_query
IMPLEMENT_FUNCTION_SERIALISED(void, glBeginPerfQueryINTEL, GLuint queryHandle);
IMPLEMENT_FUNCTION_SERIALISED(void, glCreatePerfQueryINTEL, GLuint queryId, GLuint *queryHandle);
IMPLEMENT_FUNCTION_SERIALISED(void, glDeletePerfQueryINTEL, GLuint queryHandle);
IMPLEMENT_FUNCTION_SERIALISED(void, glEndPerfQueryINTEL, GLuint queryHandle);
IMPLEMENT_FUNCTION_SERIALISED(void, glGetFirstPerfQueryIdINTEL, GLuint *queryId);
IMPLEMENT_FUNCTION_SERIALISED(void, glGetNextPerfQueryIdINTEL, GLuint queryId, GLuint *nextQueryId);
IMPLEMENT_FUNCTION_SERIALISED(void, glGetPerfCounterInfoINTEL, GLuint queryId, GLuint counterId,
GLuint counterNameLength, GLchar *counterName,
GLuint counterDescLength, GLchar *counterDesc, GLuint *counterOffset,
GLuint *counterDataSize, GLuint *counterTypeEnum,
GLuint *counterDataTypeEnum, GLuint64 *rawCounterMaxValue);
IMPLEMENT_FUNCTION_SERIALISED(void, glGetPerfQueryDataINTEL, GLuint queryHandle, GLuint flags,
GLsizei dataSize, GLvoid *data, GLuint *bytesWritten);
IMPLEMENT_FUNCTION_SERIALISED(void, glGetPerfQueryIdByNameINTEL, GLchar *queryName,
GLuint *queryId);
IMPLEMENT_FUNCTION_SERIALISED(void, glGetPerfQueryInfoINTEL, GLuint queryId,
GLuint queryNameLength, GLchar *queryName, GLuint *dataSize,
GLuint *noCounters, GLuint *noInstances, GLuint *capsMask);
};
class ScopedDebugContext
+25 -6
View File
@@ -25,6 +25,7 @@
#include "gl_replay.h"
#include "driver/ihv/amd/amd_counters.h"
#include "driver/ihv/intel/intel_gl_counters.h"
#include "maths/matrix.h"
#include "serialise/rdcfile.h"
#include "strings/string_utils.h"
@@ -60,6 +61,7 @@ GLReplay::GLReplay()
void GLReplay::Shutdown()
{
SAFE_DELETE(m_pAMDCounters);
SAFE_DELETE(m_pIntelCounters);
DeleteDebugData();
@@ -222,7 +224,8 @@ void GLReplay::SetReplayData(GLWindowingData data)
InitDebugData();
AMDCounters *counters = NULL;
AMDCounters *countersAMD = NULL;
IntelGlCounters *countersIntel = NULL;
bool isMesa = false;
@@ -256,14 +259,20 @@ void GLReplay::SetReplayData(GLWindowingData data)
if(isMesa)
{
RDCLOG("Mesa driver detected - skipping IHV counter initialisation");
if(m_DriverInfo.vendor == GPUVendor::Intel)
{
RDCLOG("Intel GPU detected - trying to initialise Intel GL counters");
countersIntel = new IntelGlCounters();
}
else
RDCLOG("Non Intel Mesa driver detected - skipping IHV counter initialisation");
}
else
{
if(m_DriverInfo.vendor == GPUVendor::AMD)
{
RDCLOG("AMD GPU detected - trying to initialise AMD counters");
counters = new AMDCounters();
countersAMD = new AMDCounters();
}
else
{
@@ -271,15 +280,25 @@ void GLReplay::SetReplayData(GLWindowingData data)
}
}
if(counters && counters->Init(AMDCounters::ApiType::Ogl, m_ReplayCtx.ctx))
if(countersAMD && countersAMD->Init(AMDCounters::ApiType::Ogl, m_ReplayCtx.ctx))
{
m_pAMDCounters = counters;
m_pAMDCounters = countersAMD;
}
else
{
delete counters;
delete countersAMD;
m_pAMDCounters = NULL;
}
if(countersIntel && countersIntel->Init())
{
m_pIntelCounters = countersIntel;
}
else
{
delete countersIntel;
m_pIntelCounters = NULL;
}
}
void GLReplay::GetBufferData(ResourceId buff, uint64_t offset, uint64_t len, bytebuf &ret)
+9
View File
@@ -34,6 +34,7 @@ using std::map;
using std::pair;
class AMDCounters;
class IntelGlCounters;
class WrappedOpenGL;
struct GLCounterContext;
@@ -438,4 +439,12 @@ private:
const DrawcallDescription &drawnode);
vector<CounterResult> FetchCountersAMD(const vector<GPUCounter> &counters);
// Intel counter instance
IntelGlCounters *m_pIntelCounters = NULL;
void FillTimersIntel(uint32_t *eventStartID, uint32_t *sampleIndex, vector<uint32_t> *eventIDs,
const DrawcallDescription &drawnode);
vector<CounterResult> FetchCountersIntel(const vector<GPUCounter> &counters);
};
@@ -573,6 +573,67 @@ void WrappedOpenGL::glDeleteQueries(GLsizei n, const GLuint *ids)
GL.glDeleteQueries(n, ids);
}
void WrappedOpenGL::glBeginPerfQueryINTEL(GLuint queryHandle)
{
GL.glBeginPerfQueryINTEL(queryHandle);
}
void WrappedOpenGL::glCreatePerfQueryINTEL(GLuint queryId, GLuint *queryHandle)
{
GL.glCreatePerfQueryINTEL(queryId, queryHandle);
}
void WrappedOpenGL::glDeletePerfQueryINTEL(GLuint queryHandle)
{
GL.glDeletePerfQueryINTEL(queryHandle);
}
void WrappedOpenGL::glEndPerfQueryINTEL(GLuint queryHandle)
{
GL.glEndPerfQueryINTEL(queryHandle);
}
void WrappedOpenGL::glGetFirstPerfQueryIdINTEL(GLuint *queryId)
{
GL.glGetFirstPerfQueryIdINTEL(queryId);
}
void WrappedOpenGL::glGetNextPerfQueryIdINTEL(GLuint queryId, GLuint *nextQueryId)
{
GL.glGetNextPerfQueryIdINTEL(queryId, nextQueryId);
}
void WrappedOpenGL::glGetPerfCounterInfoINTEL(GLuint queryId, GLuint counterId,
GLuint counterNameLength, GLchar *counterName,
GLuint counterDescLength, GLchar *counterDesc,
GLuint *counterOffset, GLuint *counterDataSize,
GLuint *counterTypeEnum, GLuint *counterDataTypeEnum,
GLuint64 *rawCounterMaxValue)
{
GL.glGetPerfCounterInfoINTEL(queryId, counterId, counterNameLength, counterName,
counterDescLength, counterDesc, counterOffset, counterDataSize,
counterTypeEnum, counterDataTypeEnum, rawCounterMaxValue);
}
void WrappedOpenGL::glGetPerfQueryDataINTEL(GLuint queryHandle, GLuint flags, GLsizei dataSize,
GLvoid *data, GLuint *bytesWritten)
{
GL.glGetPerfQueryDataINTEL(queryHandle, flags, dataSize, data, bytesWritten);
}
void WrappedOpenGL::glGetPerfQueryIdByNameINTEL(GLchar *queryName, GLuint *queryId)
{
GL.glGetPerfQueryIdByNameINTEL(queryName, queryId);
}
void WrappedOpenGL::glGetPerfQueryInfoINTEL(GLuint queryId, GLuint queryNameLength,
GLchar *queryName, GLuint *dataSize, GLuint *noCounters,
GLuint *noInstances, GLuint *capsMask)
{
GL.glGetPerfQueryInfoINTEL(queryId, queryNameLength, queryName, dataSize, noCounters, noInstances,
capsMask);
}
INSTANTIATE_FUNCTION_SERIALISED(void, glFenceSync, GLsync real, GLenum condition, GLbitfield flags);
INSTANTIATE_FUNCTION_SERIALISED(void, glClientWaitSync, GLsync sync_, GLbitfield flags,
GLuint64 timeout);
@@ -0,0 +1,9 @@
set(sources
intel_gl_counters.cpp
intel_gl_counters.h)
set(include_dirs ${RDOC_INCLUDES})
add_library(rdoc_intel OBJECT ${sources})
target_compile_definitions(rdoc_intel ${RDOC_DEFINITIONS})
target_include_directories(rdoc_intel ${include_dirs})
+2
View File
@@ -96,9 +96,11 @@
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="intel_counters.cpp" />
<ClCompile Include="intel_gl_counters.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="intel_counters.h" />
<ClInclude Include="intel_gl_counters.h" />
<ClInclude Include="official\DriverStorePath.h" />
<ClInclude Include="official\metrics_discovery_api.h" />
</ItemGroup>
@@ -0,0 +1,318 @@
/******************************************************************************
* The MIT License (MIT)
*
* Copyright (c) 2018 Baldur Karlsson
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
******************************************************************************/
#include <algorithm>
#include <fstream>
#include "common/common.h"
#include "driver/gl/gl_dispatch_table.h"
#include "driver/gl/gl_driver.h"
#include "strings/string_utils.h"
#include "intel_gl_counters.h"
const static std::vector<std::string> metricSetBlacklist = {
// Used for testing HW is programmed correctly.
"TestOa",
// Used to plumb raw data from the GL driver to metrics-discovery.
"Intel_Raw_Hardware_Counters_Set_0_Query", "Intel_Raw_Pipeline_Statistics_Query"};
IntelGlCounters::IntelGlCounters() : m_passIndex(0)
{
}
IntelGlCounters::~IntelGlCounters()
{
}
std::vector<GPUCounter> IntelGlCounters::GetPublicCounterIds() const
{
vector<GPUCounter> counters;
for(const IntelGlCounter &c : m_Counters)
counters.push_back(c.desc.counter);
return counters;
}
CounterDescription IntelGlCounters::GetCounterDescription(GPUCounter index) const
{
return m_Counters[GPUCounterToCounterIndex(index)].desc;
}
static CompType glToRdcCounterType(GLuint glDataType)
{
switch(glDataType)
{
case GL_PERFQUERY_COUNTER_DATA_UINT32_INTEL: return CompType::UInt;
case GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL: return CompType::UInt;
case GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL: return CompType::Float;
case GL_PERFQUERY_COUNTER_DATA_DOUBLE_INTEL: return CompType::Double;
case GL_PERFQUERY_COUNTER_DATA_BOOL32_INTEL: return CompType::UInt;
default: RDCERR("Wrong counter data type: %u", glDataType);
}
return CompType::Typeless;
}
void IntelGlCounters::addCounter(const IntelGlQuery &query, GLuint counterId)
{
IntelGlCounter counter;
counter.queryId = query.queryId;
counter.desc.counter = (GPUCounter)((int)GPUCounter::FirstIntel + m_Counters.size());
counter.desc.category = query.name;
GLint len = 0;
GL.glGetIntegerv(eGL_PERFQUERY_COUNTER_NAME_LENGTH_MAX_INTEL, &len);
counter.desc.name.resize(len);
GL.glGetIntegerv(eGL_PERFQUERY_COUNTER_DESC_LENGTH_MAX_INTEL, &len);
counter.desc.description.resize(len);
GL.glGetPerfCounterInfoINTEL(
query.queryId, counterId, (GLuint)counter.desc.name.size(), &counter.desc.name[0],
(GLuint)counter.desc.description.size(), &counter.desc.description[0], &counter.offset,
&counter.desc.resultByteWidth, &counter.type, &counter.dataType, NULL);
if(m_CounterNames.find(counter.desc.name) != m_CounterNames.end())
return;
uint32_t query_hash = strhash(query.name.c_str());
uint32_t name_hash = strhash(counter.desc.name.c_str());
uint32_t desc_hash = strhash(counter.desc.description.c_str());
counter.desc.uuid = Uuid(0x8086, query_hash, name_hash, desc_hash);
counter.desc.resultType = glToRdcCounterType(counter.dataType);
m_Counters.push_back(counter);
m_CounterNames[counter.desc.name] = counter;
}
void IntelGlCounters::addQuery(GLuint queryId)
{
IntelGlQuery query;
query.queryId = queryId;
GLint len = 0;
GL.glGetIntegerv(eGL_PERFQUERY_QUERY_NAME_LENGTH_MAX_INTEL, &len);
query.name.resize(len);
GLuint nCounters = 0;
GL.glGetPerfQueryInfoINTEL(queryId, (GLuint)query.name.size(), &query.name[0], &query.size,
&nCounters, NULL, NULL);
// Some drivers raise an error when we query some of its IDs because those
// are used to plumb external library with raw counter data.
if(GL.glGetError() != eGL_NONE)
return;
if(std::find(metricSetBlacklist.begin(), metricSetBlacklist.end(), query.name) !=
metricSetBlacklist.end())
return;
m_Queries[query.queryId] = query;
for(GLuint c = 1; c <= nCounters; c++)
addCounter(query, c);
}
bool IntelGlCounters::Init()
{
if(!HasExt[INTEL_performance_query])
return false;
GLuint queryId;
GL.glGetFirstPerfQueryIdINTEL(&queryId);
GLenum err = GL.glGetError();
if(err != eGL_NONE)
return false;
#if defined(RENDERDOC_PLATFORM_ANDROID) || defined(RENDERDOC_PLATFORM_LINUX)
std::ifstream f("/proc/sys/dev/i915/perf_stream_paranoid");
std::string contents;
std::getline(f, contents);
if(!contents.empty())
{
int paranoid = std::stoi(contents);
if(paranoid)
{
RDCWARN(
"Not all counters available, run "
"'sudo sysctl dev.i915.perf_stream_paranoid=0' to enable more counters!");
}
}
#endif
do
{
addQuery(queryId);
GL.glGetNextPerfQueryIdINTEL(queryId, &queryId);
} while(queryId != 0);
return true;
}
void IntelGlCounters::EnableCounter(GPUCounter index)
{
const IntelGlCounter &counter = m_Counters[GPUCounterToCounterIndex(index)];
for(uint32_t p = 0; p < m_EnabledQueries.size(); p++)
{
if(m_EnabledQueries[p] == counter.queryId)
return;
}
m_EnabledQueries.push_back(counter.queryId);
}
void IntelGlCounters::DisableAllCounters()
{
m_EnabledQueries.clear();
}
uint32_t IntelGlCounters::GetPassCount()
{
return (uint32_t)m_EnabledQueries.size();
}
void IntelGlCounters::BeginSession()
{
RDCASSERT(m_glQueries.empty());
}
void IntelGlCounters::EndSession()
{
for(uint32_t queryHandle : m_glQueries)
GL.glDeletePerfQueryINTEL(queryHandle);
m_glQueries.clear();
}
void IntelGlCounters::BeginPass(uint32_t passID)
{
m_passIndex = passID;
}
void IntelGlCounters::EndPass()
{
// Flush all of the pass' queries to ensure we can begin further samples
// with a different pass.
std::vector<uint8_t> data(m_Queries[m_EnabledQueries[m_passIndex]].size);
GLuint len;
uint32_t nSamples = (uint32_t)m_glQueries.size() / (m_passIndex + 1);
for(uint32_t q = nSamples * m_passIndex; q < m_glQueries.size(); q++)
{
GL.glGetPerfQueryDataINTEL(m_glQueries[q], GL_PERFQUERY_WAIT_INTEL, (GLsizei)data.size(),
&data[0], &len);
}
}
void IntelGlCounters::BeginSample(uint32_t sampleID)
{
GLuint queryId = m_EnabledQueries[m_passIndex];
GLuint queryHandle = 0;
GL.glCreatePerfQueryINTEL(queryId, &queryHandle);
m_glQueries.push_back(queryHandle);
GLenum err = GL.glGetError();
if(err != eGL_NONE)
return;
GL.glBeginPerfQueryINTEL(m_glQueries.back());
}
void IntelGlCounters::EndSample()
{
GLuint queryHandle = m_glQueries.back();
if(queryHandle == 0)
return;
GL.glEndPerfQueryINTEL(queryHandle);
}
uint32_t IntelGlCounters::CounterPass(const IntelGlCounter &counter)
{
for(uint32_t p = 0; p < m_EnabledQueries.size(); p++)
if(m_EnabledQueries[p] == counter.queryId)
return p;
RDCERR("Counters not enabled");
return 0;
}
void IntelGlCounters::CopyData(void *dest, const IntelGlCounter &counter, uint32_t sample,
uint32_t maxSampleIndex)
{
uint32_t pass = CounterPass(counter);
uint32_t queryHandle = m_glQueries[maxSampleIndex * pass + sample];
std::vector<uint8_t> data(m_Queries[m_EnabledQueries[pass]].size);
GLuint len;
GL.glGetPerfQueryDataINTEL(queryHandle, 0, (GLsizei)data.size(), &data[0], &len);
memcpy(dest, &data[counter.offset], counter.desc.resultByteWidth);
}
std::vector<CounterResult> IntelGlCounters::GetCounterData(uint32_t maxSampleIndex,
const std::vector<uint32_t> &eventIDs,
const std::vector<GPUCounter> &counters)
{
std::vector<CounterResult> ret;
RDCASSERT((maxSampleIndex * m_EnabledQueries.size()) == m_glQueries.size());
for(uint32_t s = 0; s < maxSampleIndex; s++)
{
for(const GPUCounter &c : counters)
{
const IntelGlCounter &counter = m_Counters[GPUCounterToCounterIndex(c)];
switch(counter.desc.resultType)
{
case CompType::Double:
{
double r;
CopyData(&r, counter, s, maxSampleIndex);
ret.push_back(CounterResult(eventIDs[s], counter.desc.counter, r));
break;
}
case CompType::Float:
{
float r;
CopyData(&r, counter, s, maxSampleIndex);
ret.push_back(CounterResult(eventIDs[s], counter.desc.counter, r));
break;
}
case CompType::UInt:
{
uint64_t r;
CopyData(&r, counter, s, maxSampleIndex);
ret.push_back(CounterResult(eventIDs[s], counter.desc.counter, r));
break;
}
default: RDCERR("Wrong counter result type: %u", counter.desc.resultType);
}
}
}
return ret;
}
@@ -0,0 +1,132 @@
/******************************************************************************
* The MIT License (MIT)
*
* Copyright (c) 2018 Baldur Karlsson
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
******************************************************************************/
#pragma once
#include <map>
#include <vector>
#include "api/replay/renderdoc_replay.h"
#include "driver/gl/gl_common.h"
#include "replay/replay_driver.h"
class WrappedOpenGL;
inline constexpr GPUCounter MakeIntelGlCounter(int index)
{
return GPUCounter((int)GPUCounter::FirstIntel + index);
}
class IntelGlCounters
{
public:
IntelGlCounters();
bool Init();
~IntelGlCounters();
std::vector<GPUCounter> GetPublicCounterIds() const;
CounterDescription GetCounterDescription(GPUCounter index) const;
void EnableCounter(GPUCounter index);
void DisableAllCounters();
uint32_t GetPassCount();
void BeginSession();
void EndSession();
void BeginPass(uint32_t passID);
void EndPass();
void BeginSample(uint32_t sampleID);
void EndSample();
std::vector<CounterResult> GetCounterData(uint32_t maxSampleIndex,
const std::vector<uint32_t> &eventIDs,
const std::vector<GPUCounter> &counters);
private:
static uint32_t GPUCounterToCounterIndex(GPUCounter counter)
{
return (uint32_t)(counter) - (uint32_t)(GPUCounter::FirstIntel);
}
struct IntelGlCounter
{
IntelGlCounter()
{
desc = CounterDescription();
queryId = offset = type = dataType = 0;
}
IntelGlCounter(const IntelGlCounter &other)
{
desc = other.desc;
queryId = other.queryId;
offset = other.offset;
type = other.type;
dataType = other.dataType;
}
CounterDescription desc;
GLuint queryId;
GLuint offset;
GLuint type;
GLuint dataType;
};
std::vector<IntelGlCounter> m_Counters;
std::map<std::string, IntelGlCounter> m_CounterNames;
struct IntelGlQuery
{
IntelGlQuery()
{
queryId = 0;
name = "";
size = 0;
}
IntelGlQuery(const IntelGlQuery &other)
{
queryId = other.queryId;
name = other.name;
size = other.size;
}
GLuint queryId;
std::string name;
GLuint size;
};
std::map<GLuint, IntelGlQuery> m_Queries;
void addCounter(const IntelGlQuery &query, GLuint counterId);
void addQuery(GLuint queryId);
uint32_t CounterPass(const IntelGlCounter &counter);
void CopyData(void *dest, const IntelGlCounter &counter, uint32_t sample, uint32_t maxSampleIndex);
std::vector<uint32_t> m_EnabledQueries;
uint32_t m_passIndex;
std::vector<GLuint> m_glQueries;
};