mirror of
https://github.com/baldurk/renderdoc.git
synced 2026-05-12 21:10:42 +00:00
Add CPU side handling of math/sampling operations
This commit is contained in:
@@ -93,10 +93,6 @@ uniform sampler2DRectShadow tex2DRectShadow;
|
||||
|
||||
#endif
|
||||
|
||||
#define CONCAT2(a, b, c) a##b##c
|
||||
#define CONCAT(a, b) CONCAT2(Do, a, b)
|
||||
#define FUNC(a, b) CONCAT(a, b)
|
||||
|
||||
in vec4 input_uvwa;
|
||||
out RESULT Output;
|
||||
|
||||
@@ -716,5 +712,5 @@ RESULT DoGatherDref2DRect()
|
||||
|
||||
void main()
|
||||
{
|
||||
Output = FUNC(OPERATION, DIM)();
|
||||
Output = OPERATION();
|
||||
}
|
||||
|
||||
@@ -50,8 +50,13 @@ uniform vec4 in_ddy;
|
||||
|
||||
void main(void)
|
||||
{
|
||||
#ifdef VULKAN
|
||||
const vec4 verts[3] = vec4[3](vec4(-0.75, -0.75, 0.5, 1.0), vec4(1.25, -0.75, 0.5, 1.0),
|
||||
vec4(-0.75, 1.25, 0.5, 1.0));
|
||||
#else
|
||||
const vec4 verts[3] =
|
||||
vec4[3](vec4(-0.75, 0.75, 0.5, 1.0), vec4(1.25, 0.75, 0.5, 1.0), vec4(-0.75, -1.25, 0.5, 1.0));
|
||||
#endif
|
||||
|
||||
gl_Position = verts[VERTEX_ID];
|
||||
uvwa = in_uvwa;
|
||||
|
||||
@@ -737,6 +737,7 @@ extern bool IsGLES;
|
||||
EXT_TO_CHECK(33, 30, ARB_shader_bit_encoding) \
|
||||
EXT_TO_CHECK(40, 32, ARB_draw_buffers_blend) \
|
||||
EXT_TO_CHECK(40, 31, ARB_draw_indirect) \
|
||||
EXT_TO_CHECK(40, 32, ARB_texture_gather) \
|
||||
EXT_TO_CHECK(40, 32, ARB_gpu_shader5) \
|
||||
EXT_TO_CHECK(40, 32, ARB_sample_shading) \
|
||||
EXT_TO_CHECK(40, 99, ARB_shader_subroutine) \
|
||||
|
||||
@@ -425,6 +425,8 @@ void GLReplay::InitDebugData()
|
||||
"#extension GL_ARB_shader_bit_encoding : require\n";
|
||||
}
|
||||
|
||||
DebugData.texSampleDefines = texSampleDefines;
|
||||
|
||||
rdcstr vsDefines = "#define FORCE_IO_LOCATION 1";
|
||||
|
||||
if(!IsGLES)
|
||||
@@ -779,6 +781,9 @@ void GLReplay::InitDebugData()
|
||||
GL.glDeleteShader(fsShad);
|
||||
}
|
||||
|
||||
cs = GenerateGLSLShader(GetEmbeddedResource(glsl_debug_math_comp), shaderType, glslCSVer);
|
||||
DebugData.shaderDebugMathProg = CreateCShaderProgram(cs);
|
||||
|
||||
RenderDoc::Inst().SetProgress(LoadProgress::DebugManagerInit, 0.4f);
|
||||
|
||||
drv.glGenBuffers(ARRAY_COUNT(DebugData.UBOs), DebugData.UBOs);
|
||||
@@ -1274,6 +1279,14 @@ void GLReplay::DeleteDebugData()
|
||||
if(DebugData.trisizeProg)
|
||||
drv.glDeleteProgram(DebugData.trisizeProg);
|
||||
|
||||
if(DebugData.shaderDebugMathProg)
|
||||
drv.glDeleteProgram(DebugData.shaderDebugMathProg);
|
||||
|
||||
for(auto it = m_ShaderDebugSampleProg.begin(); it != m_ShaderDebugSampleProg.end(); ++it)
|
||||
{
|
||||
drv.glDeleteProgram(it->second);
|
||||
}
|
||||
|
||||
drv.glDeleteBuffers(ARRAY_COUNT(DebugData.UBOs), DebugData.UBOs);
|
||||
drv.glDeleteFramebuffers(1, &DebugData.pickPixelFBO);
|
||||
drv.glDeleteTextures(1, &DebugData.pickPixelTex);
|
||||
@@ -1329,6 +1342,117 @@ void GLReplay::DeleteDebugData()
|
||||
drv.glDeleteBuffers(1, &DebugData.triHighlightBuffer);
|
||||
}
|
||||
|
||||
GLuint GLReplay::GetShaderDebugMathProg()
|
||||
{
|
||||
return DebugData.shaderDebugMathProg;
|
||||
}
|
||||
|
||||
GLuint GLReplay::MakeShaderDebugSampleProg(const SamplingProgramConfig &config)
|
||||
{
|
||||
uint32_t hash = config.hashKey();
|
||||
|
||||
if(m_ShaderDebugSampleProg[hash])
|
||||
return m_ShaderDebugSampleProg[hash];
|
||||
|
||||
rdcstr defines;
|
||||
|
||||
defines += rdcstr("#define SHADER_BASETYPE ") + ToStr((uint32_t)config.resType) + "\n";
|
||||
|
||||
defines += StringFormat::Fmt("#define FETCH_OFFSET ivec3(%u, %u, %u)\n", config.fetchOffset.x,
|
||||
config.fetchOffset.y, config.fetchOffset.z);
|
||||
|
||||
defines += StringFormat::Fmt(
|
||||
"#define GATHER_OFFSETS ivec2[4](ivec2(%u, %u), ivec2(%u, %u), ivec2(%u, %u), ivec2(%u, "
|
||||
"%u))\n",
|
||||
config.gatherOffsets[0], config.gatherOffsets[1], config.gatherOffsets[2],
|
||||
config.gatherOffsets[3], config.gatherOffsets[4], config.gatherOffsets[5],
|
||||
config.gatherOffsets[6], config.gatherOffsets[7]);
|
||||
|
||||
defines += StringFormat::Fmt("#define USE_GRAD %u\n", config.useGrad);
|
||||
defines += StringFormat::Fmt("#define USE_GATHER_OFFS %u\n", config.useGatherOffs);
|
||||
defines += StringFormat::Fmt("#define GATHER_CHANNEL %u\n", config.gatherChannel);
|
||||
|
||||
rdcstr operation = "Do";
|
||||
switch(config.op)
|
||||
{
|
||||
case SamplingProgramConfig::Fetch: operation += "Fetch"; break;
|
||||
case SamplingProgramConfig::QueryLod: operation += "QueryLod"; break;
|
||||
case SamplingProgramConfig::Sample: operation += "Sample"; break;
|
||||
case SamplingProgramConfig::SampleDref: operation += "SampleDref"; break;
|
||||
case SamplingProgramConfig::Gather: operation += "Gather"; break;
|
||||
case SamplingProgramConfig::GatherDref: operation += "GatherDref"; break;
|
||||
}
|
||||
|
||||
rdcstr dim;
|
||||
switch(config.dim)
|
||||
{
|
||||
case SamplingProgramConfig::TexBuffer: dim = "Buffer"; break;
|
||||
case SamplingProgramConfig::Tex1D: dim = "1D"; break;
|
||||
case SamplingProgramConfig::Tex2D: dim = "2D"; break;
|
||||
case SamplingProgramConfig::Tex3D: dim = "3D"; break;
|
||||
case SamplingProgramConfig::TexCube: dim = "Cube"; break;
|
||||
case SamplingProgramConfig::Tex1DArray: dim = "1DArray"; break;
|
||||
case SamplingProgramConfig::Tex2DArray: dim = "2DArray"; break;
|
||||
case SamplingProgramConfig::Tex3DArray: dim = "3DArray"; break;
|
||||
case SamplingProgramConfig::TexCubeArray: dim = "CubeArray"; break;
|
||||
case SamplingProgramConfig::Tex2DRect: dim = "2DRect"; break;
|
||||
case SamplingProgramConfig::Tex2DMS: dim = "2DMS"; break;
|
||||
case SamplingProgramConfig::Tex2DMSArray: dim = "2DMSArray"; break;
|
||||
}
|
||||
defines += StringFormat::Fmt("#define OPERATION %s%s\n", operation.c_str(), dim.c_str());
|
||||
defines += StringFormat::Fmt("#define USE_GRAD %u\n", config.useGrad);
|
||||
|
||||
ShaderType shaderType;
|
||||
int glslVersion;
|
||||
int glslBaseVer;
|
||||
int glslCSVer; // compute shader
|
||||
|
||||
GetGLSLVersions(shaderType, glslVersion, glslBaseVer, glslCSVer);
|
||||
|
||||
if(!IsGLES)
|
||||
{
|
||||
glslVersion = 330;
|
||||
if(GLCoreVersion >= 45)
|
||||
{
|
||||
glslVersion = 450;
|
||||
defines += "#extension GL_ARB_sparse_texture_clamp : require\n";
|
||||
defines += "#define USE_MINLOD 1\n";
|
||||
}
|
||||
|
||||
if(HasExt[ARB_gpu_shader5] && HasExt[ARB_texture_gather])
|
||||
{
|
||||
defines += "#extension GL_ARB_texture_gather : require\n";
|
||||
defines += "#extension GL_ARB_gpu_shader5 : require\n";
|
||||
defines += "#define GATHER_SUPPORT 2\n";
|
||||
}
|
||||
else if(HasExt[ARB_texture_gather])
|
||||
{
|
||||
defines += "#extension GL_ARB_texture_gather : require\n";
|
||||
defines += "#define GATHER_SUPPORT 1\n";
|
||||
}
|
||||
else
|
||||
{
|
||||
defines += "#define GATHER_SUPPORT 0\n";
|
||||
}
|
||||
}
|
||||
|
||||
rdcstr vs =
|
||||
GenerateGLSLShader(GetEmbeddedResource(glsl_shaderdebug_sample_vert), shaderType, glslCSVer);
|
||||
rdcstr ps = GenerateGLSLShader(GetEmbeddedResource(glsl_debug_sample_frag), shaderType, glslCSVer,
|
||||
defines + DebugData.texSampleDefines);
|
||||
GLuint prog = CreateShaderProgram(vs, ps, "");
|
||||
|
||||
if(config.op == SamplingProgramConfig::SampleDref || config.op == SamplingProgramConfig::GatherDref)
|
||||
dim += "Shadow";
|
||||
|
||||
dim = "tex" + dim;
|
||||
|
||||
BindUBO(prog, "debugsample", 0);
|
||||
GL.glUniform1i(GL.glGetUniformLocation(prog, dim.c_str()), 0);
|
||||
|
||||
return m_ShaderDebugSampleProg[hash] = prog;
|
||||
}
|
||||
|
||||
GLReplay::TextureSamplerState GLReplay::SetSamplerParams(GLenum target, GLuint texname,
|
||||
TextureSamplerMode mode)
|
||||
{
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
#include "api/replay/renderdoc_replay.h"
|
||||
#include "core/core.h"
|
||||
#include "replay/replay_driver.h"
|
||||
@@ -117,6 +118,65 @@ enum TexDisplayFlags
|
||||
eTexDisplay_RemapSInt = 0x10,
|
||||
};
|
||||
|
||||
struct SamplingProgramConfig
|
||||
{
|
||||
enum
|
||||
{
|
||||
TexBuffer,
|
||||
Tex1D,
|
||||
Tex2D,
|
||||
Tex3D,
|
||||
TexCube,
|
||||
Tex1DArray,
|
||||
Tex2DArray,
|
||||
Tex3DArray,
|
||||
TexCubeArray,
|
||||
Tex2DRect,
|
||||
Tex2DMS,
|
||||
Tex2DMSArray,
|
||||
} dim = Tex2D;
|
||||
|
||||
enum
|
||||
{
|
||||
Fetch,
|
||||
QueryLod,
|
||||
Sample,
|
||||
SampleDref,
|
||||
Gather,
|
||||
GatherDref,
|
||||
} op = Fetch;
|
||||
|
||||
enum
|
||||
{
|
||||
Float,
|
||||
UInt,
|
||||
SInt,
|
||||
} resType = Float;
|
||||
|
||||
uint32_t gatherChannel = 0;
|
||||
bool useGatherOffs = false;
|
||||
bool useGrad = false;
|
||||
rdcfixedarray<int32_t, 8> gatherOffsets = {};
|
||||
Vec3i fetchOffset;
|
||||
|
||||
uint32_t hashKey() const
|
||||
{
|
||||
uint32_t hash = 5381;
|
||||
hash = ((hash << 5) + hash) + (uint32_t)dim;
|
||||
hash = ((hash << 5) + hash) + (uint32_t)op;
|
||||
hash = ((hash << 5) + hash) + (uint32_t)resType;
|
||||
hash = ((hash << 5) + hash) + gatherChannel;
|
||||
hash = ((hash << 5) + hash) + useGatherOffs;
|
||||
hash = ((hash << 5) + hash) + useGrad;
|
||||
hash = ((hash << 5) + hash) + fetchOffset.x;
|
||||
hash = ((hash << 5) + hash) + fetchOffset.y;
|
||||
hash = ((hash << 5) + hash) + fetchOffset.z;
|
||||
for(size_t i = 0; i < gatherOffsets.size(); i++)
|
||||
hash = ((hash << 5) + hash) + gatherOffsets[i];
|
||||
return hash;
|
||||
}
|
||||
};
|
||||
|
||||
class GLReplay : public IReplayDriver
|
||||
{
|
||||
public:
|
||||
@@ -287,6 +347,7 @@ public:
|
||||
void FileChanged() {}
|
||||
void SetReplayData(GLWindowingData data);
|
||||
|
||||
void UseReplayContext() { MakeCurrentReplayContext(&m_ReplayCtx); }
|
||||
bool IsReplayContext(void *ctx) { return m_ReplayCtx.ctx == NULL || ctx == m_ReplayCtx.ctx; }
|
||||
bool HasDebugContext() { return m_DebugCtx != NULL; }
|
||||
void FillWithDiscardPattern(DiscardType type, GLuint framebuffer, GLsizei numAttachments,
|
||||
@@ -299,6 +360,9 @@ public:
|
||||
bool CreateFragmentShaderReplacementProgram(GLuint program, GLuint replacedProgram, GLuint pipeline,
|
||||
GLuint fragShader, GLuint fragShaderSPIRV);
|
||||
|
||||
GLuint GetShaderDebugMathProg();
|
||||
GLuint MakeShaderDebugSampleProg(const SamplingProgramConfig &config);
|
||||
|
||||
private:
|
||||
void OpenGLFillCBufferVariables(ResourceId shader, GLuint prog, bool bufferBacked, rdcstr prefix,
|
||||
const rdcarray<ShaderConstant> &variables,
|
||||
@@ -373,6 +437,8 @@ private:
|
||||
|
||||
int glslVersion;
|
||||
|
||||
rdcstr texSampleDefines;
|
||||
|
||||
// min/max data
|
||||
GLuint minmaxTileResult; // tile result buffer
|
||||
GLuint minmaxResult; // Vec4f[2] final result buffer
|
||||
@@ -438,6 +504,8 @@ private:
|
||||
GLuint discardProg[3][4];
|
||||
GLuint discardPatternBuffer;
|
||||
|
||||
GLuint shaderDebugMathProg;
|
||||
|
||||
ResourceId overlayTexId;
|
||||
GLuint overlayTex;
|
||||
GLuint overlayFBO;
|
||||
@@ -454,6 +522,8 @@ private:
|
||||
|
||||
HighlightCache m_HighlightCache;
|
||||
|
||||
std::unordered_map<uint32_t, GLuint> m_ShaderDebugSampleProg;
|
||||
|
||||
std::map<GLenum, bytebuf> m_DiscardPatterns;
|
||||
|
||||
// eventId -> data
|
||||
|
||||
@@ -31,6 +31,9 @@
|
||||
#include "gl_driver.h"
|
||||
#include "gl_replay.h"
|
||||
|
||||
#define OPENGL 1
|
||||
#include "data/glsl/glsl_ubos_cpp.h"
|
||||
|
||||
#if ENABLED(RDOC_DEVEL)
|
||||
#define CHECK_DEVICE_THREAD() \
|
||||
RDCASSERTMSG("API Wrapper function called from non-device thread!", IsDeviceThread());
|
||||
@@ -103,11 +106,13 @@ public:
|
||||
~GLAPIWrapper()
|
||||
{
|
||||
CHECK_DEVICE_THREAD();
|
||||
m_pDriver->glFlush();
|
||||
m_pDriver->glFinish();
|
||||
|
||||
for(auto it = m_BiasSamplers.begin(); it != m_BiasSamplers.end(); it++)
|
||||
m_pDriver->glDeleteSamplers(1, &it->second);
|
||||
GL.glDeleteBuffers(1, &m_UBO);
|
||||
GL.glDeleteBuffers(1, &m_MathBuffer);
|
||||
GL.glDeleteBuffers(1, &m_SampleBuffer);
|
||||
|
||||
GL.glDeleteTextures(1, &m_ReadbackTex);
|
||||
GL.glDeleteFramebuffers(1, &m_ReadbackFBO);
|
||||
}
|
||||
|
||||
void ResetReplay()
|
||||
@@ -118,7 +123,7 @@ public:
|
||||
GLMarkerRegion region("ResetReplay");
|
||||
// replay the action to get back to 'normal' state for this event, and mark that we need to
|
||||
// replay back to pristine state next time we need to fetch data.
|
||||
m_pDriver->ReplayLog(0, m_EventID, eReplay_OnlyDraw);
|
||||
m_pDriver->GetReplay()->ReplayLog(m_EventID, eReplay_OnlyDraw);
|
||||
}
|
||||
m_ResourcesDirty = true;
|
||||
}
|
||||
@@ -550,6 +555,614 @@ public:
|
||||
bool &hasResult) override
|
||||
{
|
||||
CHECK_DEVICE_THREAD();
|
||||
|
||||
DebugSampleUBO uniformParams = {};
|
||||
|
||||
const bool buffer = (texType & DebugAPIWrapper::Buffer_Texture) != 0;
|
||||
const bool uintTex = (texType & DebugAPIWrapper::UInt_Texture) != 0;
|
||||
const bool sintTex = (texType & DebugAPIWrapper::SInt_Texture) != 0;
|
||||
|
||||
// fetch the right type of descriptor depending on if we're buffer or not
|
||||
bool valid = true;
|
||||
rdcstr access = StringFormat::Fmt("performing %s operation", ToStr(opcode).c_str());
|
||||
const Descriptor &imageDescriptor = buffer ? GetDescriptor(access, ShaderBindIndex(), valid)
|
||||
: GetDescriptor(access, imageBind, valid);
|
||||
const Descriptor &bufferViewDescriptor = buffer
|
||||
? GetDescriptor(access, imageBind, valid)
|
||||
: GetDescriptor(access, ShaderBindIndex(), valid);
|
||||
|
||||
// fetch the sampler (if there's no sampler, this will silently return dummy data without
|
||||
// marking invalid
|
||||
const SamplerDescriptor &samplerDescriptor = GetSamplerDescriptor(access, samplerBind, valid);
|
||||
|
||||
// if any descriptor lookup failed, return now
|
||||
if(!valid)
|
||||
{
|
||||
hasResult = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
GLMarkerRegion markerRegion("QueueSampleGather");
|
||||
|
||||
GLResource texture = m_pDriver->GetResourceManager()->GetLiveResource(imageDescriptor.resource);
|
||||
GLResource bufTexture =
|
||||
m_pDriver->GetResourceManager()->GetLiveResource(bufferViewDescriptor.resource);
|
||||
GLResource sampler = m_pDriver->GetResourceManager()->GetLiveResource(samplerDescriptor.object);
|
||||
|
||||
// NULL texture : return 0,0,0,0
|
||||
if(!buffer && (texture.name == 0))
|
||||
{
|
||||
memset(&output.value, 0, sizeof(output.value));
|
||||
hasResult = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
WrappedOpenGL::TextureData &texDetails =
|
||||
m_pDriver->m_Textures[m_pDriver->GetResourceManager()->GetLiveID(imageDescriptor.resource)];
|
||||
|
||||
SamplingProgramConfig config;
|
||||
|
||||
config.resType = SamplingProgramConfig::Float;
|
||||
if(uintTex)
|
||||
config.resType = SamplingProgramConfig::UInt;
|
||||
else if(sintTex)
|
||||
config.resType = SamplingProgramConfig::SInt;
|
||||
|
||||
// how many co-ordinates should there be
|
||||
int coords = 0, gradCoords = 0;
|
||||
if(buffer)
|
||||
{
|
||||
config.dim = SamplingProgramConfig::TexBuffer;
|
||||
coords = gradCoords = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
switch(texDetails.curType)
|
||||
{
|
||||
case eGL_TEXTURE_1D:
|
||||
coords = 1;
|
||||
gradCoords = 1;
|
||||
config.dim = SamplingProgramConfig::Tex1D;
|
||||
break;
|
||||
case eGL_TEXTURE_2D:
|
||||
coords = 2;
|
||||
gradCoords = 2;
|
||||
config.dim = SamplingProgramConfig::Tex2D;
|
||||
break;
|
||||
case eGL_TEXTURE_3D:
|
||||
coords = 3;
|
||||
gradCoords = 3;
|
||||
config.dim = SamplingProgramConfig::Tex3D;
|
||||
break;
|
||||
case eGL_TEXTURE_CUBE_MAP:
|
||||
coords = 3;
|
||||
gradCoords = 3;
|
||||
config.dim = SamplingProgramConfig::TexCube;
|
||||
break;
|
||||
case eGL_TEXTURE_1D_ARRAY:
|
||||
coords = 2;
|
||||
gradCoords = 1;
|
||||
config.dim = SamplingProgramConfig::Tex1DArray;
|
||||
break;
|
||||
case eGL_TEXTURE_2D_ARRAY:
|
||||
coords = 3;
|
||||
gradCoords = 2;
|
||||
config.dim = SamplingProgramConfig::Tex2DArray;
|
||||
break;
|
||||
case eGL_TEXTURE_CUBE_MAP_ARRAY:
|
||||
coords = 4;
|
||||
gradCoords = 3;
|
||||
config.dim = SamplingProgramConfig::TexCubeArray;
|
||||
break;
|
||||
case eGL_TEXTURE_RECTANGLE:
|
||||
coords = 2;
|
||||
gradCoords = 2;
|
||||
config.dim = SamplingProgramConfig::Tex2DRect;
|
||||
break;
|
||||
case eGL_TEXTURE_BUFFER:
|
||||
coords = 1;
|
||||
gradCoords = 1;
|
||||
config.dim = SamplingProgramConfig::TexBuffer;
|
||||
break;
|
||||
case eGL_TEXTURE_2D_MULTISAMPLE:
|
||||
coords = 2;
|
||||
gradCoords = 2;
|
||||
config.dim = SamplingProgramConfig::Tex2DMS;
|
||||
break;
|
||||
case eGL_TEXTURE_2D_MULTISAMPLE_ARRAY:
|
||||
coords = 3;
|
||||
gradCoords = 2;
|
||||
config.dim = SamplingProgramConfig::Tex2DMSArray;
|
||||
break;
|
||||
default: RDCERR("Invalid texture type %s", ToStr(texDetails.curType).c_str()); return false;
|
||||
}
|
||||
}
|
||||
|
||||
GLint firstMip = 0, numMips = 1;
|
||||
if(texture.name)
|
||||
{
|
||||
GL.glGetTextureParameterivEXT(texture.name, texDetails.curType, eGL_TEXTURE_BASE_LEVEL,
|
||||
&firstMip);
|
||||
GL.glGetTextureParameterivEXT(texture.name, texDetails.curType, eGL_TEXTURE_MAX_LEVEL,
|
||||
&numMips);
|
||||
}
|
||||
|
||||
// handle query opcodes now
|
||||
switch(opcode)
|
||||
{
|
||||
case rdcspv::Op::ImageQueryLevels:
|
||||
{
|
||||
output.value.u32v[0] = numMips;
|
||||
hasResult = true;
|
||||
return true;
|
||||
}
|
||||
case rdcspv::Op::ImageQuerySamples:
|
||||
{
|
||||
output.value.u32v[0] = (uint32_t)RDCMAX(1, texDetails.samples);
|
||||
hasResult = true;
|
||||
return true;
|
||||
}
|
||||
case rdcspv::Op::ImageQuerySize:
|
||||
case rdcspv::Op::ImageQuerySizeLod:
|
||||
{
|
||||
uint32_t mip = firstMip;
|
||||
|
||||
if(opcode == rdcspv::Op::ImageQuerySizeLod)
|
||||
mip += uintComp(lane.GetSrc(operands.lod), 0);
|
||||
|
||||
RDCEraseEl(output.value);
|
||||
|
||||
int i = 0;
|
||||
setUintComp(output, i++, RDCMAX(1, texDetails.width >> mip));
|
||||
if(coords >= 2)
|
||||
setUintComp(output, i++, RDCMAX(1, texDetails.height >> mip));
|
||||
if(texDetails.curType == eGL_TEXTURE_3D)
|
||||
setUintComp(output, i++, RDCMAX(1, texDetails.depth >> mip));
|
||||
|
||||
if(texDetails.curType == eGL_TEXTURE_1D_ARRAY)
|
||||
setUintComp(output, i++, texDetails.height);
|
||||
else if(texDetails.curType == eGL_TEXTURE_2D_ARRAY)
|
||||
setUintComp(output, i++, texDetails.depth);
|
||||
else if(texDetails.curType == eGL_TEXTURE_CUBE_MAP ||
|
||||
texDetails.curType == eGL_TEXTURE_CUBE_MAP_ARRAY)
|
||||
setUintComp(output, i++, texDetails.depth / 6);
|
||||
|
||||
if(buffer)
|
||||
{
|
||||
uint64_t size = bufferViewDescriptor.byteSize;
|
||||
GLenum format = MakeGLFormat(bufferViewDescriptor.format);
|
||||
|
||||
setUintComp(
|
||||
output, 0,
|
||||
uint32_t(size / GetByteSize(1, 1, 1, GetBaseFormat(format), GetDataType(format))));
|
||||
}
|
||||
|
||||
hasResult = true;
|
||||
return true;
|
||||
}
|
||||
default: break;
|
||||
}
|
||||
|
||||
bool lodBiasRestore = false;
|
||||
float lodBiasRestoreValue = 0.0f;
|
||||
|
||||
if(operands.flags & rdcspv::ImageOperands::Bias)
|
||||
{
|
||||
const ShaderVariable &biasVar = lane.GetSrc(operands.bias);
|
||||
|
||||
// silently cast parameters to 32-bit floats
|
||||
float bias = floatComp(biasVar, 0);
|
||||
|
||||
if(bias != 0.0f)
|
||||
{
|
||||
// bias can only be used with implicit lod operations, but we want to do everything with
|
||||
// explicit lod operations. So we instead push the bias into the sampler itself, which is
|
||||
// entirely equivalent.
|
||||
|
||||
lodBiasRestore = true;
|
||||
if(sampler.name)
|
||||
{
|
||||
GL.glGetSamplerParameterfv(sampler.name, eGL_TEXTURE_LOD_BIAS, &lodBiasRestoreValue);
|
||||
GL.glSamplerParameterf(sampler.name, eGL_TEXTURE_LOD_BIAS, lodBiasRestoreValue + bias);
|
||||
}
|
||||
else
|
||||
{
|
||||
GL.glGetTextureParameterfvEXT(texture.name, texDetails.curType, eGL_TEXTURE_LOD_BIAS,
|
||||
&lodBiasRestoreValue);
|
||||
float val = lodBiasRestoreValue + bias;
|
||||
GL.glTextureParameterfvEXT(texture.name, texDetails.curType, eGL_TEXTURE_LOD_BIAS, &val);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
switch(opcode)
|
||||
{
|
||||
case rdcspv::Op::ImageFetch: config.op = SamplingProgramConfig::Fetch; break;
|
||||
case rdcspv::Op::ImageQueryLod: config.op = SamplingProgramConfig::QueryLod; break;
|
||||
case rdcspv::Op::ImageSampleExplicitLod:
|
||||
case rdcspv::Op::ImageSampleImplicitLod:
|
||||
case rdcspv::Op::ImageSampleProjExplicitLod:
|
||||
case rdcspv::Op::ImageSampleProjImplicitLod: config.op = SamplingProgramConfig::Sample; break;
|
||||
case rdcspv::Op::ImageSampleDrefExplicitLod:
|
||||
case rdcspv::Op::ImageSampleDrefImplicitLod:
|
||||
case rdcspv::Op::ImageSampleProjDrefExplicitLod:
|
||||
case rdcspv::Op::ImageSampleProjDrefImplicitLod:
|
||||
config.op = SamplingProgramConfig::SampleDref;
|
||||
break;
|
||||
case rdcspv::Op::ImageGather: config.op = SamplingProgramConfig::Gather; break;
|
||||
case rdcspv::Op::ImageDrefGather: config.op = SamplingProgramConfig::GatherDref; break;
|
||||
default:
|
||||
{
|
||||
RDCERR("Unsupported opcode %s", ToStr(opcode).c_str());
|
||||
hasResult = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// proj opcodes have an extra q parameter, but we do the divide ourselves and 'demote' these to
|
||||
// non-proj variants
|
||||
bool proj = false;
|
||||
switch(opcode)
|
||||
{
|
||||
case rdcspv::Op::ImageSampleProjExplicitLod:
|
||||
case rdcspv::Op::ImageSampleProjImplicitLod:
|
||||
case rdcspv::Op::ImageSampleProjDrefExplicitLod:
|
||||
case rdcspv::Op::ImageSampleProjDrefImplicitLod:
|
||||
{
|
||||
proj = true;
|
||||
break;
|
||||
}
|
||||
default: break;
|
||||
}
|
||||
|
||||
bool useCompare = false;
|
||||
switch(opcode)
|
||||
{
|
||||
case rdcspv::Op::ImageDrefGather:
|
||||
case rdcspv::Op::ImageSampleDrefExplicitLod:
|
||||
case rdcspv::Op::ImageSampleDrefImplicitLod:
|
||||
case rdcspv::Op::ImageSampleProjDrefExplicitLod:
|
||||
case rdcspv::Op::ImageSampleProjDrefImplicitLod:
|
||||
{
|
||||
useCompare = true;
|
||||
break;
|
||||
}
|
||||
default: break;
|
||||
}
|
||||
|
||||
bool gatherOp = false;
|
||||
|
||||
switch(opcode)
|
||||
{
|
||||
case rdcspv::Op::ImageFetch:
|
||||
{
|
||||
// co-ordinates after the used ones are read as 0s. This allows us to then read an implicit
|
||||
// 0 for array layer when we promote accesses to arrays.
|
||||
uniformParams.texel_uvw.x = uintComp(uv, 0);
|
||||
if(coords >= 2)
|
||||
uniformParams.texel_uvw.y = uintComp(uv, 1);
|
||||
if(coords >= 3)
|
||||
uniformParams.texel_uvw.z = uintComp(uv, 2);
|
||||
|
||||
if(!buffer && operands.flags & rdcspv::ImageOperands::Lod)
|
||||
uniformParams.texel_lod = uintComp(lane.GetSrc(operands.lod), 0);
|
||||
else
|
||||
uniformParams.texel_lod = 0;
|
||||
|
||||
if(operands.flags & rdcspv::ImageOperands::Sample)
|
||||
uniformParams.sampleIdx = uintComp(lane.GetSrc(operands.sample), 0);
|
||||
|
||||
break;
|
||||
}
|
||||
case rdcspv::Op::ImageGather:
|
||||
case rdcspv::Op::ImageDrefGather:
|
||||
{
|
||||
gatherOp = true;
|
||||
|
||||
// silently cast parameters to 32-bit floats
|
||||
for(int i = 0; i < coords; i++)
|
||||
uniformParams.uvwa.fv[i] = floatComp(uv, i);
|
||||
|
||||
if(useCompare)
|
||||
uniformParams.compare = floatComp(compare, 0);
|
||||
|
||||
config.gatherChannel = (uint32_t)gatherChannel;
|
||||
|
||||
if(operands.flags & rdcspv::ImageOperands::ConstOffsets)
|
||||
{
|
||||
ShaderVariable constOffsets = lane.GetSrc(operands.constOffsets);
|
||||
|
||||
config.useGatherOffs = true;
|
||||
|
||||
// should be an array of ivec2
|
||||
RDCASSERT(constOffsets.members.size() == 4);
|
||||
|
||||
// sign extend variables lower than 32-bits
|
||||
for(int i = 0; i < 4; i++)
|
||||
{
|
||||
if(constOffsets.members[i].type == VarType::SByte)
|
||||
{
|
||||
constOffsets.members[i].value.s32v[0] = constOffsets.members[i].value.s8v[0];
|
||||
constOffsets.members[i].value.s32v[1] = constOffsets.members[i].value.s8v[1];
|
||||
}
|
||||
else if(constOffsets.members[i].type == VarType::SShort)
|
||||
{
|
||||
constOffsets.members[i].value.s32v[0] = constOffsets.members[i].value.s16v[0];
|
||||
constOffsets.members[i].value.s32v[1] = constOffsets.members[i].value.s16v[1];
|
||||
}
|
||||
}
|
||||
|
||||
config.gatherOffsets[0] = constOffsets.members[0].value.s32v[0];
|
||||
config.gatherOffsets[1] = constOffsets.members[0].value.s32v[1];
|
||||
config.gatherOffsets[2] = constOffsets.members[1].value.s32v[0];
|
||||
config.gatherOffsets[3] = constOffsets.members[1].value.s32v[1];
|
||||
config.gatherOffsets[4] = constOffsets.members[2].value.s32v[0];
|
||||
config.gatherOffsets[5] = constOffsets.members[2].value.s32v[1];
|
||||
config.gatherOffsets[6] = constOffsets.members[3].value.s32v[0];
|
||||
config.gatherOffsets[7] = constOffsets.members[3].value.s32v[1];
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
case rdcspv::Op::ImageQueryLod:
|
||||
case rdcspv::Op::ImageSampleExplicitLod:
|
||||
case rdcspv::Op::ImageSampleImplicitLod:
|
||||
case rdcspv::Op::ImageSampleProjExplicitLod:
|
||||
case rdcspv::Op::ImageSampleProjImplicitLod:
|
||||
case rdcspv::Op::ImageSampleDrefExplicitLod:
|
||||
case rdcspv::Op::ImageSampleDrefImplicitLod:
|
||||
case rdcspv::Op::ImageSampleProjDrefExplicitLod:
|
||||
case rdcspv::Op::ImageSampleProjDrefImplicitLod:
|
||||
{
|
||||
// silently cast parameters to 32-bit floats
|
||||
for(int i = 0; i < coords; i++)
|
||||
uniformParams.uvwa.fv[i] = floatComp(uv, i);
|
||||
|
||||
if(proj)
|
||||
{
|
||||
// coords shouldn't be 4 because that's only valid for cube arrays which can't be
|
||||
// projected
|
||||
RDCASSERT(coords < 4);
|
||||
|
||||
// do the divide ourselves rather than severely complicating the sample shader (as proj
|
||||
// variants need non-arrayed textures)
|
||||
float q = floatComp(uv, coords);
|
||||
|
||||
uniformParams.uvwa.fv[0] /= q;
|
||||
uniformParams.uvwa.fv[1] /= q;
|
||||
uniformParams.uvwa.fv[2] /= q;
|
||||
}
|
||||
|
||||
if(operands.flags & rdcspv::ImageOperands::MinLod)
|
||||
{
|
||||
const ShaderVariable &minLodVar = lane.GetSrc(operands.minLod);
|
||||
|
||||
// silently cast parameters to 32-bit floats
|
||||
uniformParams.minlod = floatComp(minLodVar, 0);
|
||||
}
|
||||
|
||||
if(useCompare)
|
||||
{
|
||||
// silently cast parameters to 32-bit floats
|
||||
uniformParams.compare = floatComp(compare, 0);
|
||||
}
|
||||
|
||||
if(operands.flags & rdcspv::ImageOperands::Lod)
|
||||
{
|
||||
const ShaderVariable &lodVar = lane.GetSrc(operands.lod);
|
||||
|
||||
// silently cast parameters to 32-bit floats
|
||||
uniformParams.lod = floatComp(lodVar, 0);
|
||||
config.useGrad = false;
|
||||
}
|
||||
else if(operands.flags & rdcspv::ImageOperands::Grad)
|
||||
{
|
||||
ShaderVariable ddx = lane.GetSrc(operands.grad.first);
|
||||
ShaderVariable ddy = lane.GetSrc(operands.grad.second);
|
||||
|
||||
config.useGrad = true;
|
||||
|
||||
// silently cast parameters to 32-bit floats
|
||||
RDCASSERTEQUAL(ddx.type, ddy.type);
|
||||
for(int i = 0; i < gradCoords; i++)
|
||||
{
|
||||
uniformParams.ddx_uvw.fv[i] = floatComp(ddx, i);
|
||||
uniformParams.ddy_uvw.fv[i] = floatComp(ddy, i);
|
||||
}
|
||||
}
|
||||
|
||||
if(opcode == rdcspv::Op::ImageSampleImplicitLod ||
|
||||
opcode == rdcspv::Op::ImageSampleProjImplicitLod || opcode == rdcspv::Op::ImageQueryLod)
|
||||
{
|
||||
// use grad to sub in for the implicit lod
|
||||
config.useGrad = true;
|
||||
|
||||
// silently cast parameters to 32-bit floats
|
||||
RDCASSERTEQUAL(ddxCalc.type, ddyCalc.type);
|
||||
for(int i = 0; i < gradCoords; i++)
|
||||
{
|
||||
uniformParams.ddx_uvw.fv[i] = floatComp(ddxCalc, i);
|
||||
uniformParams.ddy_uvw.fv[i] = floatComp(ddyCalc, i);
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
default: break;
|
||||
}
|
||||
|
||||
if(operands.flags & rdcspv::ImageOperands::ConstOffset)
|
||||
{
|
||||
ShaderVariable constOffset = lane.GetSrc(operands.constOffset);
|
||||
|
||||
// sign extend variables lower than 32-bits
|
||||
for(uint8_t c = 0; c < constOffset.columns; c++)
|
||||
{
|
||||
if(constOffset.type == VarType::SByte)
|
||||
constOffset.value.s32v[c] = constOffset.value.s8v[c];
|
||||
else if(constOffset.type == VarType::SShort)
|
||||
constOffset.value.s32v[c] = constOffset.value.s16v[c];
|
||||
}
|
||||
|
||||
// pass offsets as uniform where possible - when the feature (widely available) on gather
|
||||
// operations. On non-gather operations we are forced to use const offsets and must specialise
|
||||
// the pipeline.
|
||||
if(gatherOp)
|
||||
{
|
||||
uniformParams.dynoffset.x = constOffset.value.s32v[0];
|
||||
if(gradCoords >= 2)
|
||||
uniformParams.dynoffset.y = constOffset.value.s32v[1];
|
||||
if(gradCoords >= 3)
|
||||
uniformParams.dynoffset.z = constOffset.value.s32v[2];
|
||||
}
|
||||
else
|
||||
{
|
||||
config.fetchOffset.x = constOffset.value.s32v[0];
|
||||
if(gradCoords >= 2)
|
||||
config.fetchOffset.y = constOffset.value.s32v[1];
|
||||
if(gradCoords >= 3)
|
||||
config.fetchOffset.z = constOffset.value.s32v[2];
|
||||
}
|
||||
}
|
||||
else if(operands.flags & rdcspv::ImageOperands::Offset)
|
||||
{
|
||||
ShaderVariable offset = lane.GetSrc(operands.offset);
|
||||
|
||||
// sign extend variables lower than 32-bits
|
||||
for(uint8_t c = 0; c < offset.columns; c++)
|
||||
{
|
||||
if(offset.type == VarType::SByte)
|
||||
offset.value.s32v[c] = offset.value.s8v[c];
|
||||
else if(offset.type == VarType::SShort)
|
||||
offset.value.s32v[c] = offset.value.s16v[c];
|
||||
}
|
||||
|
||||
// if the app's shader used a dynamic offset, we can too!
|
||||
uniformParams.dynoffset.x = offset.value.s32v[0];
|
||||
if(gradCoords >= 2)
|
||||
uniformParams.dynoffset.y = offset.value.s32v[1];
|
||||
if(gradCoords >= 3)
|
||||
uniformParams.dynoffset.z = offset.value.s32v[2];
|
||||
}
|
||||
|
||||
GLuint prog = m_pDriver->GetReplay()->MakeShaderDebugSampleProg(config);
|
||||
|
||||
if(prog == 0)
|
||||
{
|
||||
m_pDriver->AddDebugMessage(MessageCategory::Execution, MessageSeverity::High,
|
||||
MessageSource::RuntimeWarning,
|
||||
"Failed to compile graphics program for sampling operation");
|
||||
return false;
|
||||
}
|
||||
|
||||
m_pDriver->GetReplay()->UseReplayContext();
|
||||
|
||||
GLRenderState rs;
|
||||
rs.FetchState(m_pDriver);
|
||||
|
||||
// do this 'lazily' so we are already inside the state push and pop
|
||||
if(m_UBO == 0)
|
||||
{
|
||||
GL.glGenBuffers(1, &m_UBO);
|
||||
GL.glBindBuffer(eGL_UNIFORM_BUFFER, m_UBO);
|
||||
GL.glNamedBufferDataEXT(m_UBO, 2048, NULL, eGL_DYNAMIC_DRAW);
|
||||
|
||||
GL.glGenFramebuffers(1, &m_ReadbackFBO);
|
||||
GL.glBindFramebuffer(eGL_FRAMEBUFFER, m_ReadbackFBO);
|
||||
|
||||
GL.glGenTextures(1, &m_ReadbackTex);
|
||||
GL.glBindTexture(eGL_TEXTURE_2D, m_ReadbackTex);
|
||||
|
||||
GL.glBindBuffer(eGL_PIXEL_UNPACK_BUFFER, 0);
|
||||
GL.glTextureImage2DEXT(m_ReadbackTex, eGL_TEXTURE_2D, 0, eGL_RGBA32F, 1, 1, 0, eGL_RGBA,
|
||||
eGL_FLOAT, NULL);
|
||||
GL.glTextureParameteriEXT(m_ReadbackTex, eGL_TEXTURE_2D, eGL_TEXTURE_MAX_LEVEL, 0);
|
||||
GL.glTextureParameteriEXT(m_ReadbackTex, eGL_TEXTURE_2D, eGL_TEXTURE_MIN_FILTER, eGL_NEAREST);
|
||||
GL.glTextureParameteriEXT(m_ReadbackTex, eGL_TEXTURE_2D, eGL_TEXTURE_MAG_FILTER, eGL_NEAREST);
|
||||
GL.glTextureParameteriEXT(m_ReadbackTex, eGL_TEXTURE_2D, eGL_TEXTURE_WRAP_S, eGL_CLAMP_TO_EDGE);
|
||||
GL.glTextureParameteriEXT(m_ReadbackTex, eGL_TEXTURE_2D, eGL_TEXTURE_WRAP_T, eGL_CLAMP_TO_EDGE);
|
||||
GL.glFramebufferTexture2D(eGL_FRAMEBUFFER, eGL_COLOR_ATTACHMENT0, eGL_TEXTURE_2D,
|
||||
m_ReadbackTex, 0);
|
||||
}
|
||||
|
||||
if(m_SampleOffset >= m_SampleBufferSize || m_SampleBuffer == 0)
|
||||
{
|
||||
m_SampleBufferSize = m_SampleBufferSize * 2 + 1024 * mathOpResultByteSize;
|
||||
|
||||
GLuint oldBuf = m_SampleBuffer;
|
||||
GLsizeiptr oldSize = m_SampleBufferSize;
|
||||
|
||||
// resize the buffer up
|
||||
GL.glGenBuffers(1, &m_SampleBuffer);
|
||||
GL.glBindBuffer(eGL_PIXEL_PACK_BUFFER, m_SampleBuffer);
|
||||
GL.glNamedBufferDataEXT(m_SampleBuffer, m_SampleBufferSize, NULL, eGL_DYNAMIC_DRAW);
|
||||
|
||||
if(oldBuf)
|
||||
GL.glNamedCopyBufferSubDataEXT(oldBuf, m_SampleBuffer, 0, 0, oldSize);
|
||||
GL.glBindBuffer(eGL_PIXEL_PACK_BUFFER, 0);
|
||||
}
|
||||
|
||||
GL.glUseProgram(prog);
|
||||
|
||||
GL.glActiveTexture(eGL_TEXTURE0);
|
||||
if(texture.name)
|
||||
GL.glBindTexture(texDetails.curType, texture.name);
|
||||
if(bufTexture.name)
|
||||
GL.glBindTexture(eGL_TEXTURE_BUFFER, bufTexture.name);
|
||||
if(sampler.name)
|
||||
GL.glBindSampler(0, sampler.name);
|
||||
|
||||
GL.glBindBufferBase(eGL_UNIFORM_BUFFER, 0, m_UBO);
|
||||
DebugSampleUBO *cdata =
|
||||
(DebugSampleUBO *)GL.glMapBufferRange(eGL_UNIFORM_BUFFER, 0, sizeof(DebugSampleUBO),
|
||||
GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT);
|
||||
|
||||
memcpy(cdata, &uniformParams, sizeof(uniformParams));
|
||||
GL.glUnmapBuffer(eGL_UNIFORM_BUFFER);
|
||||
|
||||
// set UVW/DDX/DDY for vertex shader
|
||||
GL.glUniform4fv(GL.glGetUniformLocation(prog, "in_uvwa"), 1, &uniformParams.uvwa.x);
|
||||
GL.glUniform4fv(GL.glGetUniformLocation(prog, "in_ddx"), 1, &uniformParams.ddx_uvw.x);
|
||||
GL.glUniform4fv(GL.glGetUniformLocation(prog, "in_ddy"), 1, &uniformParams.ddy_uvw.x);
|
||||
|
||||
GL.glBindFramebuffer(eGL_FRAMEBUFFER, m_ReadbackFBO);
|
||||
|
||||
float pixel[4] = {};
|
||||
GL.glClearBufferfv(eGL_COLOR, 0, pixel);
|
||||
|
||||
if(HasExt[EXT_depth_bounds_test])
|
||||
GL.glDisable(eGL_DEPTH_BOUNDS_TEST_EXT);
|
||||
GL.glDisable(eGL_DEPTH_TEST);
|
||||
GL.glDisable(eGL_STENCIL_TEST);
|
||||
GL.glDisable(eGL_CULL_FACE);
|
||||
if(HasExt[ARB_texture_multisample_no_array] || HasExt[ARB_texture_multisample])
|
||||
GL.glDisable(eGL_SAMPLE_MASK);
|
||||
GL.glDisable(eGL_SCISSOR_TEST);
|
||||
GL.glDisable(eGL_BLEND);
|
||||
GL.glViewport(0, 0, 1, 1);
|
||||
GL.glDrawArrays(eGL_TRIANGLES, 0, 3);
|
||||
|
||||
RDCASSERT(m_SampleOffset + sampleGatherOpResultByteSize <= m_SampleBufferSize, m_SampleOffset,
|
||||
sampleGatherOpResultByteSize, m_SampleBufferSize);
|
||||
|
||||
GL.glBindBuffer(eGL_PIXEL_PACK_BUFFER, m_SampleBuffer);
|
||||
GL.glReadPixels(0, 0, 1, 1, eGL_RGBA, eGL_FLOAT, (void *)m_SampleOffset);
|
||||
GL.glBindBuffer(eGL_PIXEL_PACK_BUFFER, 0);
|
||||
m_SampleOffset += sampleGatherOpResultByteSize;
|
||||
|
||||
hasResult = false;
|
||||
|
||||
if(lodBiasRestore)
|
||||
{
|
||||
if(sampler.name)
|
||||
GL.glSamplerParameterf(sampler.name, eGL_TEXTURE_LOD_BIAS, lodBiasRestoreValue);
|
||||
else
|
||||
GL.glTextureParameterfvEXT(texture.name, texDetails.curType, eGL_TEXTURE_LOD_BIAS,
|
||||
&lodBiasRestoreValue);
|
||||
}
|
||||
|
||||
rs.ApplyState(m_pDriver);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -557,6 +1170,95 @@ public:
|
||||
const rdcarray<ShaderVariable> ¶ms) override
|
||||
{
|
||||
CHECK_DEVICE_THREAD();
|
||||
RDCASSERT(params.size() <= 3, params.size());
|
||||
|
||||
RDCASSERTEQUAL(params[0].type, VarType::Float);
|
||||
|
||||
GLMarkerRegion markerRegion("QueueCalculateMathOp");
|
||||
|
||||
m_pDriver->GetReplay()->UseReplayContext();
|
||||
|
||||
GLRenderState rs;
|
||||
rs.FetchState(m_pDriver);
|
||||
|
||||
RDCCOMPILE_ASSERT(SPV_OpSin == (int)rdcspv::GLSLstd450::Sin, "Shader defines are mismatched");
|
||||
RDCCOMPILE_ASSERT(SPV_OpCos == (int)rdcspv::GLSLstd450::Cos, "Shader defines are mismatched");
|
||||
RDCCOMPILE_ASSERT(SPV_OpTan == (int)rdcspv::GLSLstd450::Tan, "Shader defines are mismatched");
|
||||
RDCCOMPILE_ASSERT(SPV_OpAsin == (int)rdcspv::GLSLstd450::Asin, "Shader defines are mismatched");
|
||||
RDCCOMPILE_ASSERT(SPV_OpAcos == (int)rdcspv::GLSLstd450::Acos, "Shader defines are mismatched");
|
||||
RDCCOMPILE_ASSERT(SPV_OpAtan == (int)rdcspv::GLSLstd450::Atan, "Shader defines are mismatched");
|
||||
RDCCOMPILE_ASSERT(SPV_OpSinh == (int)rdcspv::GLSLstd450::Sinh, "Shader defines are mismatched");
|
||||
RDCCOMPILE_ASSERT(SPV_OpCosh == (int)rdcspv::GLSLstd450::Cosh, "Shader defines are mismatched");
|
||||
RDCCOMPILE_ASSERT(SPV_OpTanh == (int)rdcspv::GLSLstd450::Tanh, "Shader defines are mismatched");
|
||||
RDCCOMPILE_ASSERT(SPV_OpAsinh == (int)rdcspv::GLSLstd450::Asinh,
|
||||
"Shader defines are mismatched");
|
||||
RDCCOMPILE_ASSERT(SPV_OpAcosh == (int)rdcspv::GLSLstd450::Acosh,
|
||||
"Shader defines are mismatched");
|
||||
RDCCOMPILE_ASSERT(SPV_OpAtanh == (int)rdcspv::GLSLstd450::Atanh,
|
||||
"Shader defines are mismatched");
|
||||
RDCCOMPILE_ASSERT(SPV_OpExp == (int)rdcspv::GLSLstd450::Exp, "Shader defines are mismatched");
|
||||
RDCCOMPILE_ASSERT(SPV_OpLog == (int)rdcspv::GLSLstd450::Log, "Shader defines are mismatched");
|
||||
RDCCOMPILE_ASSERT(SPV_OpExp2 == (int)rdcspv::GLSLstd450::Exp2, "Shader defines are mismatched");
|
||||
RDCCOMPILE_ASSERT(SPV_OpLog2 == (int)rdcspv::GLSLstd450::Log2, "Shader defines are mismatched");
|
||||
RDCCOMPILE_ASSERT(SPV_OpSqrt == (int)rdcspv::GLSLstd450::Sqrt, "Shader defines are mismatched");
|
||||
RDCCOMPILE_ASSERT(SPV_OpInverseSqrt == (int)rdcspv::GLSLstd450::InverseSqrt,
|
||||
"Shader defines are mismatched");
|
||||
RDCCOMPILE_ASSERT(SPV_OpNormalize == (int)rdcspv::GLSLstd450::Normalize,
|
||||
"Shader defines are mismatched");
|
||||
RDCCOMPILE_ASSERT(SPV_OpAtan2 == (int)rdcspv::GLSLstd450::Atan2,
|
||||
"Shader defines are mismatched");
|
||||
RDCCOMPILE_ASSERT(SPV_OpPow == (int)rdcspv::GLSLstd450::Pow, "Shader defines are mismatched");
|
||||
RDCCOMPILE_ASSERT(SPV_OpFma == (int)rdcspv::GLSLstd450::Fma, "Shader defines are mismatched");
|
||||
RDCCOMPILE_ASSERT(SPV_OpLength == (int)rdcspv::GLSLstd450::Length,
|
||||
"Shader defines are mismatched");
|
||||
RDCCOMPILE_ASSERT(SPV_OpDistance == (int)rdcspv::GLSLstd450::Distance,
|
||||
"Shader defines are mismatched");
|
||||
RDCCOMPILE_ASSERT(SPV_OpRefract == (int)rdcspv::GLSLstd450::Refract,
|
||||
"Shader defines are mismatched");
|
||||
|
||||
GLuint mathProg = m_pDriver->GetReplay()->GetShaderDebugMathProg();
|
||||
|
||||
GL.glUniform1i(GL.glGetUniformLocation(mathProg, "outputs"), 0);
|
||||
|
||||
if(m_MathOffset >= m_MathBufferSize || m_MathBuffer == 0)
|
||||
{
|
||||
m_MathBufferSize = m_MathBufferSize * 2 + 1024 * mathOpResultByteSize;
|
||||
|
||||
GLuint oldBuf = m_MathBuffer;
|
||||
GLsizeiptr oldSize = m_MathBufferSize;
|
||||
|
||||
// resize the buffer up
|
||||
GL.glGenBuffers(1, &m_MathBuffer);
|
||||
GL.glBindBuffer(eGL_SHADER_STORAGE_BUFFER, m_MathBuffer);
|
||||
GL.glNamedBufferDataEXT(m_MathBuffer, m_MathBufferSize, NULL, eGL_DYNAMIC_DRAW);
|
||||
|
||||
if(oldBuf)
|
||||
GL.glNamedCopyBufferSubDataEXT(oldBuf, m_MathBuffer, 0, 0, oldSize);
|
||||
}
|
||||
|
||||
GL.glBindBufferRange(eGL_SHADER_STORAGE_BUFFER, 0, m_MathBuffer, (GLintptr)m_MathOffset,
|
||||
(GLsizeiptr)mathOpResultByteSize);
|
||||
|
||||
m_MathOffset += mathOpResultByteSize;
|
||||
|
||||
GL.glUseProgram(mathProg);
|
||||
|
||||
const char *names[] = {"a", "b", "c"};
|
||||
|
||||
// push the parameters
|
||||
for(size_t i = 0; i < params.size(); i++)
|
||||
{
|
||||
RDCASSERTEQUAL(params[i].type, params[0].type);
|
||||
GL.glUniform4fv(GL.glGetUniformLocation(mathProg, names[i]), 1, params[i].value.f32v.data());
|
||||
}
|
||||
|
||||
// push the operation afterwards
|
||||
GL.glUniform1ui(GL.glGetUniformLocation(mathProg, "op"), (uint32_t)op);
|
||||
|
||||
GL.glDispatchCompute(1, 1, 1);
|
||||
|
||||
rs.ApplyState(m_pDriver);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -564,9 +1266,78 @@ public:
|
||||
rdcarray<ShaderVariable *> &sampleGatherResults) override
|
||||
{
|
||||
CHECK_DEVICE_THREAD();
|
||||
return false;
|
||||
|
||||
bytebuf gpuResults;
|
||||
gpuResults.resize(m_MathBufferSize + m_SampleBufferSize);
|
||||
if(m_MathBuffer)
|
||||
{
|
||||
GL.glBindBuffer(eGL_COPY_READ_BUFFER, m_MathBuffer);
|
||||
GL.glGetBufferSubData(eGL_COPY_READ_BUFFER, 0, m_MathBufferSize, gpuResults.data());
|
||||
}
|
||||
if(m_SampleBuffer)
|
||||
{
|
||||
GL.glBindBuffer(eGL_COPY_READ_BUFFER, m_SampleBuffer);
|
||||
GL.glGetBufferSubData(eGL_COPY_READ_BUFFER, 0, m_SampleBufferSize,
|
||||
gpuResults.data() + m_MathBufferSize);
|
||||
}
|
||||
|
||||
m_MathOffset = m_SampleOffset = 0;
|
||||
|
||||
uintptr_t bufferEnd = (uintptr_t)gpuResults.end();
|
||||
|
||||
byte *gpuMathOpResults = gpuResults.data();
|
||||
for(ShaderVariable *result : mathOpResults)
|
||||
{
|
||||
size_t countBytes = VarTypeByteSize(result->type) * result->columns;
|
||||
RDCASSERT((uintptr_t)gpuMathOpResults + countBytes <= bufferEnd, (uintptr_t)gpuMathOpResults,
|
||||
countBytes, bufferEnd);
|
||||
RDCASSERT(countBytes <= mathOpResultByteSize, countBytes, mathOpResultByteSize);
|
||||
memcpy(result->value.u32v.data(), gpuMathOpResults, countBytes);
|
||||
gpuMathOpResults += mathOpResultByteSize;
|
||||
}
|
||||
|
||||
byte *gpuSampleGatherOpResults = gpuResults.data() + m_MathBufferSize;
|
||||
for(ShaderVariable *result : sampleGatherResults)
|
||||
{
|
||||
float *retf = (float *)gpuSampleGatherOpResults;
|
||||
uint32_t *retu = (uint32_t *)gpuSampleGatherOpResults;
|
||||
int32_t *reti = (int32_t *)gpuSampleGatherOpResults;
|
||||
|
||||
size_t countBytes = 16;
|
||||
RDCASSERT((uintptr_t)gpuSampleGatherOpResults + countBytes <= bufferEnd,
|
||||
(uintptr_t)gpuSampleGatherOpResults, countBytes, bufferEnd);
|
||||
RDCASSERT(countBytes <= sampleGatherOpResultByteSize, countBytes, sampleGatherOpResultByteSize);
|
||||
// convert full precision results, we did all sampling at 32-bit precision
|
||||
ShaderVariable &output = *result;
|
||||
for(uint8_t c = 0; c < 4; c++)
|
||||
{
|
||||
if(VarTypeCompType(output.type) == CompType::Float)
|
||||
setFloatComp(output, c, retf[c]);
|
||||
else if(VarTypeCompType(output.type) == CompType::SInt)
|
||||
setIntComp(output, c, reti[c]);
|
||||
else
|
||||
setUintComp(output, c, retu[c]);
|
||||
}
|
||||
gpuSampleGatherOpResults += sampleGatherOpResultByteSize;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
GLuint m_UBO = 0;
|
||||
GLuint m_ReadbackTex = 0;
|
||||
GLuint m_ReadbackFBO = 0;
|
||||
|
||||
GLuint m_MathBuffer = 0;
|
||||
size_t m_MathBufferSize = 0;
|
||||
GLuint m_SampleBuffer = 0;
|
||||
size_t m_SampleBufferSize = 0;
|
||||
|
||||
size_t m_MathOffset = 0, m_SampleOffset = 0;
|
||||
|
||||
const size_t mathOpResultByteSize = sizeof(Vec4f) * 2;
|
||||
const size_t sampleGatherOpResultByteSize = sizeof(Vec4f);
|
||||
|
||||
virtual bool QueuedOpsHasSpace() override { return true; }
|
||||
|
||||
// global over all threads
|
||||
@@ -594,9 +1365,6 @@ private:
|
||||
rdcarray<Descriptor> m_Descriptors;
|
||||
rdcarray<SamplerDescriptor> m_SamplerDescriptors;
|
||||
|
||||
typedef rdcpair<ResourceId, float> SamplerBiasKey;
|
||||
std::map<SamplerBiasKey, GLuint> m_BiasSamplers;
|
||||
|
||||
Threading::RWLock bufferCacheLock;
|
||||
std::map<ShaderBindIndex, bytebuf> bufferCache;
|
||||
|
||||
@@ -763,7 +1531,7 @@ private:
|
||||
if(m_ResourcesDirty)
|
||||
{
|
||||
GLMarkerRegion region("un-dirtying resources");
|
||||
m_pDriver->ReplayLog(0, m_EventID, eReplay_WithoutDraw);
|
||||
m_pDriver->GetReplay()->ReplayLog(m_EventID, eReplay_WithoutDraw);
|
||||
m_ResourcesDirty = false;
|
||||
}
|
||||
|
||||
@@ -797,7 +1565,7 @@ private:
|
||||
if(m_ResourcesDirty)
|
||||
{
|
||||
GLMarkerRegion region("un-dirtying resources");
|
||||
m_pDriver->ReplayLog(0, m_EventID, eReplay_WithoutDraw);
|
||||
m_pDriver->GetReplay()->ReplayLog(m_EventID, eReplay_WithoutDraw);
|
||||
m_ResourcesDirty = false;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user