From 5a4b4dca031a969c6ff86fefbf7987342daace95 Mon Sep 17 00:00:00 2001 From: baldurk Date: Fri, 14 Nov 2025 13:39:48 +0000 Subject: [PATCH] Add GL API wrapper skeleton, based on vulkan wrapper * We keep some of the threading code rather than removing it, but this will run single threaded. --- renderdoc/driver/gl/gl_common.h | 1 + renderdoc/driver/gl/gl_overlay.cpp | 102 ++-- renderdoc/driver/gl/gl_pixelhistory.cpp | 12 +- renderdoc/driver/gl/gl_replay.h | 4 +- renderdoc/driver/gl/gl_shaderdebug.cpp | 681 +++++++++++++++++++++++- 5 files changed, 747 insertions(+), 53 deletions(-) diff --git a/renderdoc/driver/gl/gl_common.h b/renderdoc/driver/gl/gl_common.h index d2a25666b..ad414b491 100644 --- a/renderdoc/driver/gl/gl_common.h +++ b/renderdoc/driver/gl/gl_common.h @@ -790,6 +790,7 @@ extern bool IsGLES; EXT_TO_CHECK(99, 99, EXT_texture_sRGB_decode) \ EXT_TO_CHECK(99, 99, INTEL_performance_query) \ EXT_TO_CHECK(99, 99, EXT_texture_buffer) \ + EXT_TO_CHECK(99, 99, KHR_shader_subgroup) \ /* OpenGL ES extensions */ \ EXT_TO_CHECK(99, 32, EXT_color_buffer_float) \ EXT_TO_CHECK(99, 99, EXT_color_buffer_half_float) \ diff --git a/renderdoc/driver/gl/gl_overlay.cpp b/renderdoc/driver/gl/gl_overlay.cpp index 88e515bec..cf5e607fb 100644 --- a/renderdoc/driver/gl/gl_overlay.cpp +++ b/renderdoc/driver/gl/gl_overlay.cpp @@ -36,42 +36,48 @@ #define OPENGL 1 #include "data/glsl/glsl_ubos_cpp.h" -bool GLReplay::CreateFragmentShaderReplacementProgram(GLuint program, GLuint replacementProgram, - GLuint pipeline, GLuint fragShader, - GLuint fragShaderSPIRV) +bool GLReplay::CreateShaderReplacementProgram(GLuint srcProgram, GLuint srcPipeline, + GLuint dstProgram, ShaderStage stage, + GLuint newShader, GLuint newShaderSPIRV) { WrappedOpenGL &drv = *m_pDriver; ContextPair &ctx = drv.GetCtx(); // these are the shaders to attach, and the programs to copy details from - GLuint shaders[4] = {0}; - GLuint programs[4] = {0}; + GLuint shaders[5] = {0}; + GLuint programs[5] = {0}; // temporary programs created as needed if the original program was created with // glCreateShaderProgramv and we don't have a shader to attach - GLuint tmpShaders[4] = {0}; + GLuint tmpShaders[5] = {0}; // the reflection for the vertex shader, used to copy vertex bindings const ShaderReflection *vsRefl = NULL; + // the reflection for the vertex shader, used to copy fragment bindings + const ShaderReflection *fsRefl = NULL; bool HasSPIRVShaders = false; bool HasGLSLShaders = false; - if(program == 0) + if(srcProgram == 0) { - if(pipeline == 0) + if(srcPipeline == 0) { return false; } else { - ResourceId id = m_pDriver->GetResourceManager()->GetResID(ProgramPipeRes(ctx, pipeline)); + ResourceId id = m_pDriver->GetResourceManager()->GetResID(ProgramPipeRes(ctx, srcPipeline)); const WrappedOpenGL::PipelineData &pipeDetails = m_pDriver->GetPipeline(id); // fetch the corresponding shaders and programs for each stage - for(size_t i = 0; i < 4; i++) + for(size_t i = 0; i < 5; i++) { + // don't copy the stage we're replacing + if(ShaderStage(i) == stage) + continue; + if(pipeDetails.stageShaders[i] != ResourceId()) { const WrappedOpenGL::ShaderData &shadDetails = @@ -117,8 +123,10 @@ bool GLReplay::CreateFragmentShaderReplacementProgram(GLuint program, GLuint rep } } - if(i == 0) + if(i == (size_t)ShaderStage::Vertex) vsRefl = GetShader(ResourceId(), pipeDetails.stageShaders[i], ShaderEntryPoint()); + if(i == (size_t)ShaderStage::Pixel) + fsRefl = GetShader(ResourceId(), pipeDetails.stageShaders[i], ShaderEntryPoint()); } } } @@ -126,14 +134,18 @@ bool GLReplay::CreateFragmentShaderReplacementProgram(GLuint program, GLuint rep else { const WrappedOpenGL::ProgramData &progDetails = - m_pDriver->GetProgram(m_pDriver->GetResourceManager()->GetResID(ProgramRes(ctx, program))); + m_pDriver->GetProgram(m_pDriver->GetResourceManager()->GetResID(ProgramRes(ctx, srcProgram))); // fetch any and all non-fragment shader shaders - for(size_t i = 0; i < 4; i++) + for(size_t i = 0; i < 5; i++) { + // don't copy the stage we're replacing + if(ShaderStage(i) == stage) + continue; + if(progDetails.stageShaders[i] != ResourceId()) { - programs[i] = program; + programs[i] = srcProgram; shaders[i] = m_pDriver->GetResourceManager()->GetCurrentResource(progDetails.stageShaders[i]).name; @@ -145,8 +157,10 @@ bool GLReplay::CreateFragmentShaderReplacementProgram(GLuint program, GLuint rep else HasGLSLShaders = true; - if(i == 0) - vsRefl = GetShader(ResourceId(), progDetails.stageShaders[0], ShaderEntryPoint()); + if(i == (size_t)ShaderStage::Vertex) + vsRefl = GetShader(ResourceId(), progDetails.stageShaders[i], ShaderEntryPoint()); + if(i == (size_t)ShaderStage::Pixel) + fsRefl = GetShader(ResourceId(), progDetails.stageShaders[i], ShaderEntryPoint()); } } } @@ -155,50 +169,54 @@ bool GLReplay::CreateFragmentShaderReplacementProgram(GLuint program, GLuint rep RDCERR("Unsupported - mixed GLSL and SPIR-V shaders in pipeline"); // attach the shaders - for(size_t i = 0; i < 4; i++) + for(size_t i = 0; i < 5; i++) if(shaders[i]) - drv.glAttachShader(replacementProgram, shaders[i]); + drv.glAttachShader(dstProgram, shaders[i]); if(HasSPIRVShaders) { - RDCASSERT(fragShaderSPIRV); - drv.glAttachShader(replacementProgram, fragShaderSPIRV); + RDCASSERT(newShaderSPIRV); + drv.glAttachShader(dstProgram, newShaderSPIRV); } else { - drv.glAttachShader(replacementProgram, fragShader); + drv.glAttachShader(dstProgram, newShader); } // copy the vertex attribs over from the source program - if(vsRefl && programs[0] && !HasSPIRVShaders) - CopyProgramAttribBindings(programs[0], replacementProgram, vsRefl); + if(vsRefl && programs[(uint32_t)ShaderStage::Vertex] && !HasSPIRVShaders) + CopyProgramAttribBindings(programs[(uint32_t)ShaderStage::Vertex], dstProgram, vsRefl); + if(fsRefl && programs[(uint32_t)ShaderStage::Pixel] && !HasSPIRVShaders) + CopyProgramFragDataBindings(programs[(uint32_t)ShaderStage::Pixel], dstProgram, fsRefl); // link the overlay program - drv.glLinkProgram(replacementProgram); + drv.glLinkProgram(dstProgram); // detach the shaders - for(size_t i = 0; i < 4; i++) + for(size_t i = 0; i < 5; i++) if(shaders[i]) - drv.glDetachShader(replacementProgram, shaders[i]); + drv.glDetachShader(dstProgram, shaders[i]); if(HasSPIRVShaders) - drv.glDetachShader(replacementProgram, fragShaderSPIRV); + drv.glDetachShader(dstProgram, newShaderSPIRV); else - drv.glDetachShader(replacementProgram, fragShader); + drv.glDetachShader(dstProgram, newShader); // delete any temporaries - for(size_t i = 0; i < 4; i++) + for(size_t i = 0; i < 5; i++) if(tmpShaders[i]) drv.glDeleteShader(tmpShaders[i]); // check that the link succeeded - char buffer[1024] = {}; GLint status = 0; - drv.glGetProgramiv(replacementProgram, eGL_LINK_STATUS, &status); + drv.glGetProgramiv(dstProgram, eGL_LINK_STATUS, &status); if(status == 0) { - drv.glGetProgramInfoLog(replacementProgram, 1024, NULL, buffer); - RDCERR("Error linking overlay program: %s", buffer); + rdcstr buffer; + drv.glGetProgramiv(dstProgram, eGL_INFO_LOG_LENGTH, &status); + buffer.resize(status); + drv.glGetProgramInfoLog(dstProgram, status, NULL, buffer.data()); + RDCERR("Error linking overlay program: %s", buffer.c_str()); return false; } @@ -206,16 +224,16 @@ bool GLReplay::CreateFragmentShaderReplacementProgram(GLuint program, GLuint rep // same program is bound to multiple stages. It's just inefficient { PerStageReflections dstStages; - m_pDriver->FillReflectionArray(ProgramRes(ctx, replacementProgram), dstStages); + m_pDriver->FillReflectionArray(ProgramRes(ctx, dstProgram), dstStages); - for(size_t i = 0; i < 4; i++) + for(size_t i = 0; i < 5; i++) { if(programs[i]) { PerStageReflections stages; m_pDriver->FillReflectionArray(ProgramRes(ctx, programs[i]), stages); - CopyProgramUniforms(stages, programs[i], dstStages, replacementProgram); + CopyProgramUniforms(stages, programs[i], dstStages, dstProgram); } } } @@ -487,9 +505,9 @@ ResourceId GLReplay::RenderOverlay(ResourceId texid, FloatVector clearCol, Debug // we bind the separable program created for each shader, and copy // uniforms and attrib bindings from the 'real' programs, wherever // they are. - bool spirvOverlay = CreateFragmentShaderReplacementProgram( - rs.Program.name, DebugData.overlayProg, rs.Pipeline.name, DebugData.fixedcolFragShader, - DebugData.fixedcolFragShaderSPIRV); + bool spirvOverlay = CreateShaderReplacementProgram( + rs.Program.name, rs.Pipeline.name, DebugData.overlayProg, ShaderStage::Pixel, + DebugData.fixedcolFragShader, DebugData.fixedcolFragShaderSPIRV); drv.glUseProgram(DebugData.overlayProg); GLint overlayFixedColLocation = 0; @@ -2207,9 +2225,9 @@ ResourceId GLReplay::RenderOverlay(ResourceId texid, FloatVector clearCol, Debug // replace fragment shader. This is exactly what we did // at the start of this function for the single-event case, but now we have // to do it for every event - spirvOverlay = CreateFragmentShaderReplacementProgram( - prog, DebugData.overlayProg, pipe, DebugData.quadoverdrawFragShader, - DebugData.quadoverdrawFragShaderSPIRV); + spirvOverlay = CreateShaderReplacementProgram( + prog, pipe, DebugData.overlayProg, ShaderStage::Pixel, + DebugData.quadoverdrawFragShader, DebugData.quadoverdrawFragShaderSPIRV); drv.glUseProgram(DebugData.overlayProg); drv.glBindProgramPipeline(0); diff --git a/renderdoc/driver/gl/gl_pixelhistory.cpp b/renderdoc/driver/gl/gl_pixelhistory.cpp index e23c05ae5..03bf52486 100644 --- a/renderdoc/driver/gl/gl_pixelhistory.cpp +++ b/renderdoc/driver/gl/gl_pixelhistory.cpp @@ -146,9 +146,9 @@ GLuint GetFixedColProgram(WrappedOpenGL *driver, GLReplay *replay, rs.FetchState(driver); GLuint ret = driver->glCreateProgram(); - replay->CreateFragmentShaderReplacementProgram(rs.Program.name, ret, rs.Pipeline.name, - resources.fixedColFragmentShader, - resources.fixedColFragmentShaderSPIRV); + replay->CreateShaderReplacementProgram(rs.Program.name, rs.Pipeline.name, ret, ShaderStage::Pixel, + resources.fixedColFragmentShader, + resources.fixedColFragmentShaderSPIRV); resources.fixedColPrograms[currentProgram] = ret; @@ -168,9 +168,9 @@ GLuint GetPrimitiveIdProgram(WrappedOpenGL *driver, GLReplay *replay, rs.FetchState(driver); GLuint ret = driver->glCreateProgram(); - replay->CreateFragmentShaderReplacementProgram(rs.Program.name, ret, rs.Pipeline.name, - resources.primitiveIdFragmentShader, - resources.primitiveIdFragmentShaderSPIRV); + replay->CreateShaderReplacementProgram(rs.Program.name, rs.Pipeline.name, ret, ShaderStage::Pixel, + resources.primitiveIdFragmentShader, + resources.primitiveIdFragmentShaderSPIRV); resources.primIdPrograms[currentProgram] = ret; diff --git a/renderdoc/driver/gl/gl_replay.h b/renderdoc/driver/gl/gl_replay.h index 9f4281d79..b3f1d62b1 100644 --- a/renderdoc/driver/gl/gl_replay.h +++ b/renderdoc/driver/gl/gl_replay.h @@ -357,8 +357,8 @@ public: GLint yoffset = 0, GLint zoffset = 0, GLsizei width = 65536, GLsizei height = 65536, GLsizei depth = 65536); - bool CreateFragmentShaderReplacementProgram(GLuint program, GLuint replacedProgram, GLuint pipeline, - GLuint fragShader, GLuint fragShaderSPIRV); + bool CreateShaderReplacementProgram(GLuint srcProgram, GLuint srcPipeline, GLuint replacedProgram, + ShaderStage stage, GLuint newShader, GLuint newShaderSPIRV); GLuint GetShaderDebugMathProg(); GLuint MakeShaderDebugSampleProg(const SamplingProgramConfig &config); diff --git a/renderdoc/driver/gl/gl_shaderdebug.cpp b/renderdoc/driver/gl/gl_shaderdebug.cpp index eceb9cdd4..f0f0b30d9 100644 --- a/renderdoc/driver/gl/gl_shaderdebug.cpp +++ b/renderdoc/driver/gl/gl_shaderdebug.cpp @@ -23,6 +23,7 @@ ******************************************************************************/ #include "gl_shaderdebug.h" +#include "core/settings.h" #include "driver/shaders/spirv/spirv_debug.h" #include "driver/shaders/spirv/spirv_editor.h" #include "driver/shaders/spirv/spirv_op_helpers.h" @@ -31,6 +32,11 @@ #include "gl_driver.h" #include "gl_replay.h" +RDOC_CONFIG(rdcstr, OpenGL_Debug_ShaderDebugDumpDirPath, "", + "Path to dump shader debugging generated SPIR-V files."); +RDOC_CONFIG(bool, OpenGL_Debug_ShaderDebugLogging, false, + "Output verbose debug logging messages when debugging shaders."); + #define OPENGL 1 #include "data/glsl/glsl_ubos_cpp.h" @@ -1688,6 +1694,122 @@ private: const uint64_t deviceThreadID; }; +enum class SubgroupSupport : uint32_t +{ + None = 0, + NoBallot, + All, +}; + +static const uint32_t validMagicNumber = 12345; + +static GLuint CreateInputFetcher(const WrappedOpenGL::ShaderData &shadDetails, + uint32_t storageBufferBinding, bool usePrimitiveID, bool useSampleID, + SubgroupSupport subgroupSupport, uint32_t maxSubgroupSize) +{ + rdcstr source; + + if(shadDetails.spirvWords.empty()) + return CreateShader(eGL_FRAGMENT_SHADER, source); + + return CreateSPIRVShader(eGL_FRAGMENT_SHADER, source); +} + +void CalculateSubgroupProperties(uint32_t &maxSubgroupSize, SubgroupSupport &subgroupSupport) +{ + if(HasExt[KHR_shader_subgroup]) + { + GL.glGetIntegerv(eGL_SUBGROUP_SIZE_KHR, (GLint *)&maxSubgroupSize); + + subgroupSupport = SubgroupSupport::All; + + GLbitfield features = 0; + GL.glGetIntegerv(eGL_SUBGROUP_SUPPORTED_FEATURES_KHR, (GLint *)&features); + + if((features & eGL_SUBGROUP_FEATURE_BALLOT_BIT_KHR) == 0) + subgroupSupport = SubgroupSupport::NoBallot; + } +} + +rdcpair GetAlignAndOutputSize(const ShaderReflection *refl, + const SPIRVPatchData &patchData) +{ + uint32_t paramAlign = 16; + + for(const SigParameter &sig : refl->inputSignature) + { + if(VarTypeByteSize(sig.varType) * sig.compCount > paramAlign) + paramAlign = 32; + } + + // conservatively calculate structure stride with full amount for every input element + uint32_t structStride = (uint32_t)refl->inputSignature.size() * paramAlign; + + // GLSL doesn't allow empty structs, so pad with 16 minimum + structStride = RDCMAX(paramAlign, structStride); + + if(refl->stage == ShaderStage::Vertex) + structStride += sizeof(rdcspv::VertexLaneData); + else if(refl->stage == ShaderStage::Pixel) + structStride += sizeof(rdcspv::PixelLaneData); + else if(refl->stage == ShaderStage::Compute || refl->stage == ShaderStage::Task || + refl->stage == ShaderStage::Mesh) + structStride += sizeof(rdcspv::ComputeLaneData); + + if(patchData.threadScope & rdcspv::ThreadScope::Subgroup) + { + structStride += sizeof(rdcspv::SubgroupLaneData); + } + + return {paramAlign, structStride}; +} + +uint32_t GetStorageBufferBinding(WrappedOpenGL *driver, + const ResourceId stagePrograms[NumShaderStages], + const ResourceId stageShaders[NumShaderStages], ShaderStage stage) +{ + rdcarray avail; + + GLint ssboCount = 0; + GL.glGetIntegerv(eGL_MAX_SHADER_STORAGE_BUFFER_BINDINGS, &ssboCount); + + avail.resize(ssboCount); + for(GLint i = 0; i < ssboCount; i++) + avail[i] = i; + + for(size_t i = 0; i < NumShaderStages; i++) + { + if(stageShaders[i] == ResourceId()) + continue; + + // we can use SSBOs from the stage we're replacing + if(ShaderStage(i) == stage) + continue; + + const ShaderReflection *refl = driver->GetShader(stageShaders[i]).GetReflection(); + + GLuint prog = driver->GetResourceManager()->GetCurrentResource(stagePrograms[i]).name; + + for(const ShaderResource &res : refl->readWriteResources) + { + // storage images use separate bindings + if(res.isTexture) + continue; + + uint32_t slot = 0; + bool used = false; + GetCurrentBinding(prog, refl, res, slot, used); + + if(used) + avail.removeOne(slot); + } + } + + if(avail.empty()) + return ~0U; + return avail.front(); +} + ShaderDebugTrace *GLReplay::DebugVertex(uint32_t eventId, uint32_t vertid, uint32_t instid, uint32_t idx, uint32_t view) { @@ -1698,8 +1820,549 @@ ShaderDebugTrace *GLReplay::DebugVertex(uint32_t eventId, uint32_t vertid, uint3 ShaderDebugTrace *GLReplay::DebugPixel(uint32_t eventId, uint32_t x, uint32_t y, const DebugPixelInputs &inputs) { - GLNOTIMP("DebugPixel"); - return new ShaderDebugTrace(); + uint32_t sample = inputs.sample; + uint32_t primitive = inputs.primitive; + uint32_t view = inputs.view; + + MakeCurrentReplayContext(&m_ReplayCtx); + + GLRenderState rs; + rs.FetchState(m_pDriver); + + // When RenderDoc passed y, the value being passed in is with the Y axis starting from the top + // However, we need to have it starting from the bottom, so flip it by subtracting y from the + // height. + { + ContextPair &ctx = m_pDriver->GetCtx(); + uint32_t height = 0; + + GLint numCols = 8; + GL.glGetIntegerv(eGL_MAX_COLOR_ATTACHMENTS, &numCols); + + GLuint obj = 0; + GLenum type = eGL_TEXTURE; + for(GLint i = 0; i < numCols; i++) + { + GL.glGetFramebufferAttachmentParameteriv( + eGL_DRAW_FRAMEBUFFER, GLenum(eGL_COLOR_ATTACHMENT0 + i), + eGL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME, (GLint *)&obj); + + if(obj) + { + GL.glGetFramebufferAttachmentParameteriv( + eGL_DRAW_FRAMEBUFFER, GLenum(eGL_COLOR_ATTACHMENT0 + i), + eGL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE, (GLint *)&type); + + ResourceId id = m_pDriver->GetResourceManager()->GetResID( + type == eGL_RENDERBUFFER ? RenderbufferRes(ctx, obj) : TextureRes(ctx, obj)); + + GLint firstMip = 0, firstSlice = 0; + GetFramebufferMipAndLayer(rs.DrawFBO.name, GLenum(eGL_COLOR_ATTACHMENT0 + i), + (GLint *)&firstMip, (GLint *)&firstSlice); + + height = (uint32_t)RDCMAX(1, m_pDriver->m_Textures[id].height >> firstMip); + break; + } + } + + if(height == 0) + { + GL.glGetFramebufferAttachmentParameteriv(eGL_DRAW_FRAMEBUFFER, eGL_DEPTH_ATTACHMENT, + eGL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME, (GLint *)&obj); + + if(obj) + { + GL.glGetFramebufferAttachmentParameteriv(eGL_DRAW_FRAMEBUFFER, eGL_DEPTH_ATTACHMENT, + eGL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE, + (GLint *)&type); + + ResourceId id = m_pDriver->GetResourceManager()->GetResID( + type == eGL_RENDERBUFFER ? RenderbufferRes(ctx, obj) : TextureRes(ctx, obj)); + + GLint firstMip = 0, firstSlice = 0; + GetFramebufferMipAndLayer(rs.DrawFBO.name, eGL_DEPTH_ATTACHMENT, (GLint *)&firstMip, + (GLint *)&firstSlice); + + height = (uint32_t)RDCMAX(1, m_pDriver->m_Textures[id].height >> firstMip); + } + } + + if(height == 0) + { + GL.glGetFramebufferParameteriv(eGL_DRAW_FRAMEBUFFER, eGL_FRAMEBUFFER_DEFAULT_HEIGHT, + (GLint *)&height); + } + + y = height - y - 1; + } + + rdcstr regionName = StringFormat::Fmt("DebugPixel @ %u of (%u,%u) sample %u primitive %u view %u", + eventId, x, y, sample, primitive, view); + + GLMarkerRegion region(regionName); + + if(OpenGL_Debug_ShaderDebugLogging()) + RDCLOG("%s", regionName.c_str()); + + const ActionDescription *action = m_pDriver->GetAction(eventId); + + if(!(action->flags & (ActionFlags::MeshDispatch | ActionFlags::Drawcall))) + { + RDCLOG("No drawcall selected"); + return new ShaderDebugTrace(); + } + + uint32_t storageBufferBinding = ~0U; + + ResourceId geom; + ResourceId pixel; + if(rs.Program.name) + { + ResourceId id = m_pDriver->GetResourceManager()->GetResID(rs.Program); + const WrappedOpenGL::ProgramData &progDetails = m_pDriver->GetProgram(id); + + geom = progDetails.stageShaders[(uint32_t)ShaderStage::Geometry]; + pixel = progDetails.stageShaders[(uint32_t)ShaderStage::Pixel]; + + ResourceId stagePrograms[NumShaderStages]; + for(size_t i = 0; i < NumShaderStages; i++) + stagePrograms[i] = id; + storageBufferBinding = GetStorageBufferBinding(m_pDriver, stagePrograms, + progDetails.stageShaders, ShaderStage::Pixel); + } + else if(rs.Pipeline.name) + { + ResourceId id = m_pDriver->GetResourceManager()->GetResID(rs.Pipeline); + const WrappedOpenGL::PipelineData &pipeDetails = m_pDriver->GetPipeline(id); + + geom = pipeDetails.stageShaders[(uint32_t)ShaderStage::Geometry]; + pixel = pipeDetails.stageShaders[(uint32_t)ShaderStage::Pixel]; + + storageBufferBinding = GetStorageBufferBinding(m_pDriver, pipeDetails.stagePrograms, + pipeDetails.stageShaders, ShaderStage::Pixel); + } + + if(pixel == ResourceId()) + { + RDCLOG("No pixel shader bound at draw"); + return new ShaderDebugTrace(); + } + + if(storageBufferBinding == ~0U) + { + RDCLOG("No spare SSBO available in program"); + return new ShaderDebugTrace(); + } + + // get ourselves in pristine state before this action (without any side effects it may have had) + m_pDriver->ReplayLog(0, eventId, eReplay_WithoutDraw); + + WrappedOpenGL::ShaderData &shadDetails = m_pDriver->GetWriteableShader(pixel); + + rdcstr entryPoint = shadDetails.entryPoint; + rdcarray spec; + for(size_t i = 0; i < shadDetails.specIDs.size() && i < shadDetails.specValues.size(); i++) + spec.push_back(SpecConstant(shadDetails.specIDs[i], shadDetails.specValues[i], 4)); + + const ShaderReflection *refl = shadDetails.GetReflection(); + + if(!refl->debugInfo.debuggable) + { + RDCLOG("Shader is not debuggable: %s", refl->debugInfo.debugStatus.c_str()); + return new ShaderDebugTrace(); + } + + shadDetails.Disassemble(entryPoint); + + GLAPIWrapper *apiWrapper = new GLAPIWrapper(m_pDriver, ShaderStage::Pixel, eventId, pixel); + + SubgroupSupport subgroupSupport = SubgroupSupport::None; + uint32_t maxSubgroupSize = 1; + CalculateSubgroupProperties(maxSubgroupSize, subgroupSupport); + + uint32_t numThreads = 4; + + if(shadDetails.patchData.threadScope & rdcspv::ThreadScope::Subgroup) + numThreads = RDCMAX(numThreads, maxSubgroupSize); + + std::unordered_map &global_builtins = apiWrapper->global_builtins; + global_builtins[ShaderBuiltin::DeviceIndex] = ShaderVariable(rdcstr(), 0U, 0U, 0U, 0U); + global_builtins[ShaderBuiltin::DrawIndex] = ShaderVariable(rdcstr(), action->drawIndex, 0U, 0U, 0U); + + // If the pipe contains a geometry shader, then Primitive ID cannot be used in the pixel + // shader without being emitted from the geometry shader. For now, check if this semantic + // will succeed in a new pixel shader with the rest of the pipe unchanged + bool usePrimitiveID = false; + if(geom != ResourceId()) + { + const WrappedOpenGL::ShaderData &gsDetails = m_pDriver->GetShader(geom); + + const ShaderReflection *gsRefl = gsDetails.GetReflection(); + + // check to see if the shader outputs a primitive ID + for(const SigParameter &e : gsRefl->outputSignature) + { + if(e.systemValue == ShaderBuiltin::PrimitiveIndex) + { + if(OpenGL_Debug_ShaderDebugLogging()) + { + RDCLOG("Geometry shader exports primitive ID, can use"); + } + + usePrimitiveID = true; + break; + } + } + + if(OpenGL_Debug_ShaderDebugLogging()) + { + if(!usePrimitiveID) + RDCLOG("Geometry shader doesn't export primitive ID, can't use"); + } + } + else + { + // no geometry shader - safe to use + usePrimitiveID = true; + } + + bool useSampleID = HasExt[ARB_sample_shading]; + + if(OpenGL_Debug_ShaderDebugLogging()) + { + RDCLOG("useSampleID is %u because of bare capability", useSampleID); + } + + uint32_t paramAlign, structStride; + rdctie(paramAlign, structStride) = GetAlignAndOutputSize(refl, shadDetails.patchData); + + uint32_t overdrawLevels = 100; // maximum number of overdraw levels + + // struct size is ResultDataBase header plus Nx structStride for the number of threads + uint32_t structSize = sizeof(rdcspv::ResultDataBase) + structStride * numThreads; + + GLuint feedbackStorageSize = overdrawLevels * structSize + sizeof(Vec4f) * 3 + 1024; + + if(OpenGL_Debug_ShaderDebugLogging()) + { + RDCLOG("Output structure is %u sized, output buffer is %llu bytes", structStride, + feedbackStorageSize); + } + + bytebuf data; + { + GLRenderState push; + push.FetchState(m_pDriver); + + GLuint shaderFeedback; + GL.glGenBuffers(1, &shaderFeedback); + GL.glBindBuffer(eGL_SHADER_STORAGE_BUFFER, shaderFeedback); + GL.glNamedBufferDataEXT(shaderFeedback, feedbackStorageSize, NULL, eGL_DYNAMIC_DRAW); + byte *clear = (byte *)GL.glMapBufferRange(eGL_SHADER_STORAGE_BUFFER, 0, feedbackStorageSize, + GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT); + memset(clear, 0, feedbackStorageSize); + GL.glUnmapBuffer(eGL_SHADER_STORAGE_BUFFER); + + Vec2f destXY(float(x) + 0.5f, float(y) + 0.5f); + + GL.glBufferSubData(eGL_SHADER_STORAGE_BUFFER, 0, sizeof(uint32_t), &overdrawLevels); + GL.glBufferSubData(eGL_SHADER_STORAGE_BUFFER, sizeof(uint32_t), sizeof(Vec2f), &destXY.x); + + GLuint inputFetcher = CreateInputFetcher(shadDetails, storageBufferBinding, usePrimitiveID, + useSampleID, subgroupSupport, maxSubgroupSize); + + GLuint inputShader = 0; + GLuint inputShaderSPIRV = 0; + + if(shadDetails.spirvWords.empty()) + inputShader = inputFetcher; + else + inputShaderSPIRV = inputFetcher; + + GLuint replacementProgram = GL.glCreateProgram(); + + CreateShaderReplacementProgram(rs.Program.name, rs.Pipeline.name, replacementProgram, + ShaderStage::Pixel, inputShader, inputShaderSPIRV); + + GL.glUseProgram(replacementProgram); + + if(inputShader) + { + GLuint ssboIdx = + GL.glGetProgramResourceIndex(replacementProgram, eGL_SHADER_STORAGE_BLOCK, "Output"); + GL.glShaderStorageBlockBinding(replacementProgram, ssboIdx, storageBufferBinding); + } + GL.glBindBufferBase(eGL_SHADER_STORAGE_BUFFER, storageBufferBinding, shaderFeedback); + + m_pDriver->ReplayLog(eventId, eventId, eReplay_OnlyDraw); + + GL.glDeleteProgram(replacementProgram); + GL.glDeleteShader(inputFetcher); + + data.resize(feedbackStorageSize); + GL.glGetBufferSubData(eGL_SHADER_STORAGE_BUFFER, 0, feedbackStorageSize, data.data()); + + push.ApplyState(m_pDriver); + } + + byte *base = data.data(); + base += sizeof(Vec4f); + base += sizeof(Vec4f); + + uint32_t hit_count = ((uint32_t *)base)[0]; + uint32_t total_count = ((uint32_t *)base)[1]; + + if(hit_count > overdrawLevels) + { + RDCERR("%u hits, more than max overdraw levels allowed %u. Clamping", hit_count, overdrawLevels); + hit_count = overdrawLevels; + } + + base += sizeof(Vec4f); + + rdcspv::ResultDataBase *winner = NULL; + + RDCLOG("Got %u hit candidates out of %u total instances", hit_count, total_count); + + // if we encounter multiple hits at our destination pixel co-ord (or any other) we + // check to see if a specific primitive was requested (via primitive parameter not + // being set to ~0U). If it was, debug that pixel, otherwise do a best-estimate + // of which fragment was the last to successfully depth test and debug that, just by + // checking if the depth test is ordered and picking the final fragment in the series + + GLenum depthOp = rs.DepthFunc; + + // depth tests disabled acts the same as always compare mode + if(!rs.Enabled[GLRenderState::eEnabled_DepthTest]) + depthOp = eGL_ALWAYS; + + for(uint32_t i = 0; i < hit_count; i++) + { + rdcspv::ResultDataBase *hit = (rdcspv::ResultDataBase *)(base + structSize * i); + + if(hit->valid != validMagicNumber) + { + RDCWARN("Hit %u doesn't have valid magic number", i); + continue; + } + + if(hit->ddxDerivCheck != 1.0f) + { + RDCWARN("Hit %u doesn't have valid derivatives", i); + continue; + } + + // see if this hit is a closer match than the previous winner. + + // if there's no previous winner it's clearly better + if(winner == NULL) + { + winner = hit; + continue; + } + + // if we're looking for a specific primitive + if(primitive != ~0U) + { + // and this hit is a match and the winner isn't, it's better + if(winner->prim != primitive && hit->prim == primitive) + { + winner = hit; + continue; + } + + // if the winner is a match and we're not, we can't be better so stop now + if(winner->prim == primitive && hit->prim != primitive) + { + continue; + } + } + + // if we're looking for a particular sample, check that + if(sample != ~0U) + { + if(winner->sample != sample && hit->sample == sample) + { + winner = hit; + continue; + } + + if(winner->sample == sample && hit->sample != sample) + { + continue; + } + } + + // otherwise apply depth test + switch(depthOp) + { + case eGL_NEVER: + case eGL_EQUAL: + case eGL_NOTEQUAL: + case eGL_ALWAYS: + default: + // don't emulate equal or not equal since we don't know the reference value. Take any hit + // (thus meaning the last hit) + winner = hit; + break; + case eGL_LESS: + if(hit->pos.z < winner->pos.z) + winner = hit; + break; + case eGL_LEQUAL: + if(hit->pos.z <= winner->pos.z) + winner = hit; + break; + case eGL_GREATER: + if(hit->pos.z > winner->pos.z) + winner = hit; + break; + case eGL_GEQUAL: + if(hit->pos.z >= winner->pos.z) + winner = hit; + break; + } + } + + ShaderDebugTrace *ret = NULL; + + if(winner) + { + rdcspv::Debugger *debugger = new rdcspv::Debugger; + debugger->Parse(shadDetails.spirvWords); + + // the per-thread data immediately follows the rdcspv::ResultDataBase header. Every piece of + // data is uniformly aligned, either 16-byte by default or 32-byte if larger components exist. + // The output is in input signature order. + byte *LaneData = (byte *)(winner + 1); + + numThreads = 4; + + if(shadDetails.patchData.threadScope & rdcspv::ThreadScope::Subgroup) + { + RDCASSERTNOTEQUAL(winner->subgroupSize, 0); + numThreads = RDCMAX(numThreads, winner->subgroupSize); + } + + apiWrapper->location_inputs.resize(numThreads); + apiWrapper->thread_builtins.resize(numThreads); + apiWrapper->thread_props.resize(numThreads); + + for(uint32_t t = 0; t < numThreads; t++) + { + byte *value = LaneData + t * structStride; + + if(shadDetails.patchData.threadScope & rdcspv::ThreadScope::Subgroup) + { + rdcspv::SubgroupLaneData *subgroupData = (rdcspv::SubgroupLaneData *)value; + apiWrapper->thread_props[t][(size_t)rdcspv::ThreadProperty::Active] = subgroupData->isActive; + apiWrapper->thread_props[t][(size_t)rdcspv::ThreadProperty::Elected] = subgroupData->elect; + apiWrapper->thread_props[t][(size_t)rdcspv::ThreadProperty::SubgroupId] = t; + + value += sizeof(rdcspv::SubgroupLaneData); + } + + // read PixelLaneData + { + rdcspv::PixelLaneData *pixelData = (rdcspv::PixelLaneData *)value; + + { + ShaderVariable &var = apiWrapper->thread_builtins[t][ShaderBuiltin::Position]; + + var.rows = 1; + var.columns = 4; + var.type = VarType::Float; + + memcpy(var.value.u8v.data(), &pixelData->fragCoord, sizeof(Vec4f)); + } + + { + ShaderVariable &var = apiWrapper->thread_builtins[t][ShaderBuiltin::IsHelper]; + + var.rows = 1; + var.columns = 1; + var.type = VarType::Bool; + + memcpy(var.value.u8v.data(), &pixelData->isHelper, sizeof(uint32_t)); + } + + if(numThreads == 4) + apiWrapper->thread_props[t][(size_t)rdcspv::ThreadProperty::Active] = 1; + apiWrapper->thread_props[t][(size_t)rdcspv::ThreadProperty::Helper] = pixelData->isHelper; + apiWrapper->thread_props[t][(size_t)rdcspv::ThreadProperty::QuadId] = pixelData->quadId; + apiWrapper->thread_props[t][(size_t)rdcspv::ThreadProperty::QuadLane] = + pixelData->quadLaneIndex; + } + value += sizeof(rdcspv::PixelLaneData); + + for(size_t i = 0; i < refl->inputSignature.size(); i++) + { + const SigParameter ¶m = refl->inputSignature[i]; + + bool builtin = true; + if(param.systemValue == ShaderBuiltin::Undefined) + { + builtin = false; + apiWrapper->location_inputs[t].resize( + RDCMAX((uint32_t)apiWrapper->location_inputs.size(), param.regIndex + 1)); + } + + ShaderVariable &var = builtin ? apiWrapper->thread_builtins[t][param.systemValue] + : apiWrapper->location_inputs[t][param.regIndex]; + + var.rows = 1; + var.columns = param.compCount & 0xff; + var.type = param.varType; + + const uint32_t firstComp = Bits::CountTrailingZeroes(uint32_t(param.regChannelMask)); + const uint32_t elemSize = VarTypeByteSize(param.varType); + + // we always store in 32-bit types + const size_t sz = RDCMAX(4U, elemSize) * param.compCount; + + memcpy((var.value.u8v.data()) + elemSize * firstComp, value + i * paramAlign, sz); + + // convert down from stored 32-bit types if they were smaller + if(elemSize == 1) + { + ShaderVariable tmp = var; + + for(uint32_t comp = 0; comp < param.compCount; comp++) + var.value.u8v[comp] = tmp.value.u32v[comp] & 0xff; + } + else if(elemSize == 2) + { + ShaderVariable tmp = var; + + for(uint32_t comp = 0; comp < param.compCount; comp++) + { + if(VarTypeCompType(param.varType) == CompType::Float) + var.value.f16v[comp] = rdhalf::make(tmp.value.f32v[comp]); + else + var.value.u16v[comp] = tmp.value.u32v[comp] & 0xffff; + } + } + } + } + + apiWrapper->global_builtins[ShaderBuiltin::SubgroupSize] = + ShaderVariable(rdcstr(), numThreads, 0U, 0U, 0U); + + ret = debugger->BeginDebug(apiWrapper, ShaderStage::Pixel, entryPoint, spec, + shadDetails.spirvInstructionLines, shadDetails.patchData, + winner->laneIndex, numThreads, numThreads); + apiWrapper->ResetReplay(); + } + else + { + RDCLOG("Didn't get any valid hit to debug"); + delete apiWrapper; + + ret = new ShaderDebugTrace; + ret->stage = ShaderStage::Pixel; + } + + return ret; } ShaderDebugTrace *GLReplay::DebugThread(uint32_t eventId, const rdcfixedarray &groupid, @@ -1719,7 +2382,19 @@ ShaderDebugTrace *GLReplay::DebugMeshThread(uint32_t eventId, rdcarray GLReplay::ContinueDebug(ShaderDebugger *debugger) { - return {}; + rdcspv::Debugger *spvDebugger = (rdcspv::Debugger *)debugger; + + if(!spvDebugger) + return {}; + + GLMarkerRegion region("ContinueDebug Simulation Loop"); + + rdcarray ret = spvDebugger->ContinueDebug(); + + GLAPIWrapper *api = (GLAPIWrapper *)spvDebugger->GetAPIWrapper(); + api->ResetReplay(); + + return ret; } void GLReplay::FreeDebugger(ShaderDebugger *debugger)