diff --git a/renderdoc/driver/shaders/spirv/spirv_debug.h b/renderdoc/driver/shaders/spirv/spirv_debug.h index 5af6d45f6..538a0e682 100644 --- a/renderdoc/driver/shaders/spirv/spirv_debug.h +++ b/renderdoc/driver/shaders/spirv/spirv_debug.h @@ -25,6 +25,7 @@ #pragma once #include "api/replay/rdcarray.h" +#include "maths/vec.h" #include "spirv_common.h" #include "spirv_processor.h" @@ -43,7 +44,17 @@ public: virtual void ReadConstantBufferValue(uint32_t set, uint32_t bind, uint32_t offset, uint32_t byteSize, void *dst) = 0; virtual void FillInputValue(ShaderVariable &var, ShaderBuiltin builtin, uint32_t location, - uint32_t offset) = 0; + uint32_t component) = 0; + + struct DerivativeDeltas + { + Vec4f ddxcoarse; + Vec4f ddycoarse; + Vec4f ddxfine; + Vec4f ddyfine; + }; + + virtual DerivativeDeltas GetDerivative(uint32_t location, uint32_t component) = 0; }; struct GlobalState @@ -172,6 +183,9 @@ private: uint32_t AllocateVariable(const Decorations &varDecorations, const Decorations &curDecorations, DebugVariableType sourceVarType, const rdcstr &sourceName, uint32_t offset, const DataType &inType, ShaderVariable &outVar); + uint32_t ApplyDerivatives(uint32_t quadIndex, const Decorations &curDecorations, + uint32_t location, const DataType &inType, ShaderVariable &outVar); + void AddSourceVars(rdcarray &sourceVars, const DataType &inType, const rdcstr &sourceName, const rdcstr &varName, uint32_t &offset); void MakeSignatureNames(const rdcarray &sigList, rdcarray &sigNames); diff --git a/renderdoc/driver/shaders/spirv/spirv_debug_setup.cpp b/renderdoc/driver/shaders/spirv/spirv_debug_setup.cpp index f1d8986e6..a4a47cc50 100644 --- a/renderdoc/driver/shaders/spirv/spirv_debug_setup.cpp +++ b/renderdoc/driver/shaders/spirv/spirv_debug_setup.cpp @@ -264,7 +264,7 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *apiWrapper, const Shader // fill the interface variable AllocateVariable(decorations[v.id], decorations[v.id], isInput ? DebugVariableType::Input : DebugVariableType::Variable, sourceName, - 0, dataTypes[type.InnerType()], var); + decorations[v.id].location, dataTypes[type.InnerType()], var); for(size_t i = oldSize; i < globalSourceVars.size(); i++) globalSourceVars[i].signatureIndex = @@ -385,6 +385,33 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *apiWrapper, const Shader workgroup[i].inputs = active.inputs; workgroup[i].outputs = active.outputs; workgroup[i].ids = active.ids; + // mark as inactive/helper lane + workgroup[i].done = true; + } + + if(stage == ShaderStage::Pixel) + { + // apply derivatives to generate the correct inputs for the quad neighbours + for(uint32_t q = 0; q < workgroupSize; q++) + { + if(q == activeLaneIndex) + continue; + + for(size_t i = 0; i < inputIDs.size(); i++) + { + Id id = inputIDs[i]; + + const DataType &type = dataTypes[idTypes[id]]; + + // global variables should all be pointers into opaque storage + RDCASSERT(type.type == DataType::PointerType); + + const DataType &innertype = dataTypes[type.InnerType()]; + + ApplyDerivatives(q, decorations[id], decorations[id].location, innertype, + workgroup[q].inputs[i]); + } + } } return ret; @@ -980,10 +1007,21 @@ uint32_t Debugger::AllocateVariable(const Decorations &varDecorations, if(sourceVarType == DebugVariableType::Input) { uint32_t location = genLocations ? offset : 0; + + uint32_t component = 0; + for(const DecorationAndParamData &dec : curDecorations.others) + { + if(dec.value == Decoration::Component) + { + component = dec.component; + break; + } + } + apiWrapper->FillInputValue( outVar, builtin, (curDecorations.flags & Decorations::HasLocation) ? curDecorations.location : location, - (curDecorations.flags & Decorations::HasOffset) ? curDecorations.offset : 0); + component); } else if(sourceVarType == DebugVariableType::Constant) { @@ -1044,6 +1082,226 @@ uint32_t Debugger::AllocateVariable(const Decorations &varDecorations, return outVar.rows; } +uint32_t Debugger::ApplyDerivatives(uint32_t quadIndex, const Decorations &curDecorations, + uint32_t location, const DataType &inType, ShaderVariable &outVar) +{ + switch(inType.type) + { + case DataType::PointerType: + { + RDCERR("Pointers not supported in interface variables"); + return 0; + } + case DataType::ScalarType: + case DataType::VectorType: + case DataType::MatrixType: break; + case DataType::StructType: + { + uint32_t childLocation = 0; + for(int32_t i = 0; i < inType.children.count(); i++) + { + const Decorations &childDecorations = inType.children[i].decorations; + + uint32_t locations = ApplyDerivatives(quadIndex, childDecorations, location + childLocation, + dataTypes[inType.children[i].type], outVar.members[i]); + + childLocation += locations; + } + return childLocation; + } + case DataType::ArrayType: + { + uint32_t childLocation = 0; + + ShaderVariable len = GetActiveLane().ids[inType.length]; + for(uint32_t i = 0; i < len.value.u.x; i++) + { + uint32_t locations = ApplyDerivatives(quadIndex, curDecorations, location + childLocation, + dataTypes[inType.InnerType()], outVar.members[i]); + + childLocation += locations; + } + return childLocation; + } + case DataType::ImageType: + case DataType::SamplerType: + case DataType::SampledImageType: + case DataType::UnknownType: + { + RDCERR("Unexpected variable type %d", inType.type); + return 0; + } + } + + // only floats have derivatives + if(outVar.type == VarType::Float) + { + uint32_t component = 0; + for(const DecorationAndParamData &dec : curDecorations.others) + { + if(dec.value == Decoration::Component) + { + component = dec.component; + break; + } + } + + // We make the assumption that the coarse derivatives are generated from (0,0) in the quad, and + // fine derivatives are generated from the destination index and its neighbours in X and Y. + // This isn't spec'd but we must assume something and this will hopefully get us closest to + // reproducing actual results. + // + // For debugging, we need members of the quad to be able to generate coarse and fine + // derivatives. + // + // For (0,0) we only need the coarse derivatives to get our neighbours (1,0) and (0,1) which + // will give us coarse and fine derivatives being identical. + // + // For the others we will need to use a combination of coarse and fine derivatives to get the + // diagonal element in the quad. In the examples below, remember that the quad indices are: + // + // +---+---+ + // | 0 | 1 | + // +---+---+ + // | 2 | 3 | + // +---+---+ + // + // And that we have definitions of the derivatives: + // + // ddx_coarse = (1,0) - (0,0) + // ddy_coarse = (0,1) - (0,0) + // + // i.e. the same for all members of the quad + // + // ddx_fine = (x,y) - (1-x,y) + // ddy_fine = (x,y) - (x,1-y) + // + // i.e. the difference to the neighbour of our desired invocation (the one we have the actual + // inputs for, from gathering above). + // + // So e.g. if our thread is at (1,1) destIdx = 3 + // + // (1,0) = (1,1) - ddx_fine + // (0,1) = (1,1) - ddy_fine + // (0,0) = (1,1) - ddy_fine - ddx_coarse + // + // and ddy_coarse is unused. For (1,0) destIdx = 1: + // + // (1,1) = (1,0) + ddy_fine + // (0,1) = (1,0) - ddx_coarse + ddy_coarse + // (0,0) = (1,0) - ddx_coarse + // + // and ddx_fine is unused (it's identical to ddx_coarse anyway) + + if(curDecorations.flags & Decorations::HasLocation) + location = curDecorations.location; + + DebugAPIWrapper::DerivativeDeltas derivs = apiWrapper->GetDerivative(location, component); + + Vec4f &dst = *(Vec4f *)outVar.value.fv; + + // in the diagrams below * marks the active lane index. + // + // V and ^ == coarse ddy + // , and ` == fine ddy + // < and > == coarse ddx + // { and } == fine ddx + // + // We are basically making one or two cardinal direction moves from the starting point + // (activeLaneIndex) to the end point (quadIndex). + RDCASSERTNOTEQUAL(activeLaneIndex, quadIndex); + + switch(activeLaneIndex) + { + case 0: + { + // +---+---+ + // |*0 > 1 | + // +-V-+-V-+ + // | 2 | 3 | + // +---+---+ + switch(quadIndex) + { + case 0: break; + case 1: dst += derivs.ddxcoarse; break; + case 2: dst += derivs.ddycoarse; break; + case 3: + dst += derivs.ddxcoarse; + dst += derivs.ddycoarse; + break; + default: break; + } + break; + } + case 1: + { + // we need to use fine to get from 1 to 3 as coarse only ever involves 0->1 and 0->2 + // +---+---+ + // | 0 < 1*| + // +-V-+-,-+ + // | 2 | 3 | + // +---+---+ + switch(quadIndex) + { + case 0: dst -= derivs.ddxcoarse; break; + case 1: break; + case 2: + dst -= derivs.ddxcoarse; + dst += derivs.ddycoarse; + break; + case 3: dst += derivs.ddyfine; break; + default: break; + } + break; + } + case 2: + { + // +---+---+ + // | 0 > 1 | + // +-^-+---+ + // |*2 } 3 | + // +---+---+ + switch(quadIndex) + { + case 0: dst -= derivs.ddycoarse; break; + case 1: + dst -= derivs.ddycoarse; + dst += derivs.ddxcoarse; + break; + case 2: break; + case 3: dst += derivs.ddxfine; break; + default: break; + } + break; + } + case 3: + { + // +---+---+ + // | 0 < 1 | + // +---+-`-+ + // | 2 { 3*| + // +---+---+ + switch(quadIndex) + { + case 0: + dst -= derivs.ddyfine; + dst -= derivs.ddxcoarse; + break; + case 1: dst -= derivs.ddyfine; break; + case 2: dst -= derivs.ddxfine; break; + case 3: break; + default: break; + } + break; + } + default: break; + } + } + + // each row consumes a new location + return outVar.rows; +} + void Debugger::PreParse(uint32_t maxId) { Processor::PreParse(maxId); diff --git a/renderdoc/driver/vulkan/vk_shaderdebug.cpp b/renderdoc/driver/vulkan/vk_shaderdebug.cpp index e6291f584..a32bc3c54 100644 --- a/renderdoc/driver/vulkan/vk_shaderdebug.cpp +++ b/renderdoc/driver/vulkan/vk_shaderdebug.cpp @@ -61,7 +61,7 @@ public: } virtual void FillInputValue(ShaderVariable &var, ShaderBuiltin builtin, uint32_t location, - uint32_t offset) override + uint32_t component) override { if(builtin != ShaderBuiltin::Undefined) { @@ -76,7 +76,8 @@ public: return; } - RDCASSERT(offset == 0); + // TODO handle components + RDCASSERT(component == 0); if(location < location_inputs.size()) { @@ -84,13 +85,28 @@ public: return; } - RDCERR("Couldn't get input for location=%u, offset=%u", location, offset); + RDCERR("Couldn't get input for %s at location=%u, component=%u", var.name.c_str(), location, + component); + } + + virtual DerivativeDeltas GetDerivative(uint32_t location, uint32_t component) override + { + // TODO handle components + RDCASSERT(component == 0); + + if(location < derivatives.size()) + return derivatives[location]; + + RDCERR("Couldn't get derivative for location=%u, component=%u", location, component); + return DerivativeDeltas(); } std::map, bytebuf> cbuffers; std::map builtin_inputs; rdcarray location_inputs; + rdcarray derivatives; + private: WrappedVulkan *m_pDriver = NULL; }; @@ -113,6 +129,16 @@ enum class InputSpecConstant static const uint32_t validMagicNumber = 12345; +struct PSHit +{ + Vec4f pos; + uint32_t prim; + uint32_t sample; + uint32_t valid; + uint32_t padding; + // PSInput base, ddx, .... +}; + static void CreatePSInputFetcher(rdcarray &fragspv, uint32_t &structStride, VulkanCreationInfo::ShaderModuleReflection &shadRefl, StorageMode storageMode, bool usePrimitiveID, bool useSampleID) @@ -487,9 +513,6 @@ static void CreatePSInputFetcher(rdcarray &fragspv, uint32_t &structSt member++; } - // we have 5 input structs, and two vectors for our data - structStride = sizeof(Vec4f) + sizeof(Vec4f) + structStride * 5; - rdcspv::Id PSHitRTArray = editor.AddType(rdcspv::OpTypeRuntimeArray(editor.MakeId(), PSHit)); editor.AddDecoration(rdcspv::OpDecorate( @@ -1145,7 +1168,10 @@ ShaderDebugTrace *VulkanReplay::DebugPixel(uint32_t eventId, uint32_t x, uint32_ m_pDriver->GetShaderCache()->MakeGraphicsPipelineInfo(graphicsInfo, state.graphics.pipeline); - VkDeviceSize feedbackStorageSize = overdrawLevels * structStride + sizeof(Vec4f) + 1024; + // struct size is PSHit header plus 5x structStride = base, ddxcoarse, ddycoarse, ddxfine, ddyfine + uint32_t structSize = sizeof(PSHit) + structStride * 5; + + VkDeviceSize feedbackStorageSize = overdrawLevels * structSize + sizeof(Vec4f) + 1024; if(feedbackStorageSize > m_BindlessFeedback.FeedbackBuffer.sz) { @@ -1387,27 +1413,150 @@ ShaderDebugTrace *VulkanReplay::DebugPixel(uint32_t eventId, uint32_t x, uint32_ base += sizeof(Vec4f); - struct PSHit - { - Vec4f pos; - uint32_t prim; - uint32_t sample; - uint32_t valid; - uint32_t padding; - // PSInput base, ddx, .... - }; + PSHit *winner = NULL; + + RDCLOG("Got %u hits", numHits); + + // if we encounter multiple hits at our destination pixel co-ord (or any other) we + // check to see if a specific primitive was requested (via primitive parameter not + // being set to ~0U). If it was, debug that pixel, otherwise do a best-estimate + // of which fragment was the last to successfully depth test and debug that, just by + // checking if the depth test is ordered and picking the final fragment in the series + + // figure out the TL pixel's coords. Assume even top left (towards 0,0) + // this isn't spec'd but is a reasonable assumption. + int xTL = x & (~1); + int yTL = y & (~1); + + // get the index of our desired pixel + int destIdx = (x - xTL) + 2 * (y - yTL); + + VkCompareOp depthOp = pipe.depthCompareOp; + + // depth tests disabled acts the same as always compare mode + if(!pipe.depthTestEnable) + depthOp = VK_COMPARE_OP_ALWAYS; for(uint32_t i = 0; i < numHits; i++) { PSHit *hit = (PSHit *)(base + structStride * i); - RDCLOG("Hit %u at %f, %f, %f, %f", i, hit->pos.x, hit->pos.y, hit->pos.z, hit->pos.w); + if(hit->valid != validMagicNumber) + { + RDCWARN("Hit %u doesn't have valid magic number"); + continue; + } + + // see if this hit is a closer match than the previous winner. + + // if there's no previous winner it's clearly better + if(winner == NULL) + { + winner = hit; + continue; + } + + // if we're looking for a specific primitive + if(primitive != ~0U) + { + // and this hit is a match and the winner isn't, it's better + if(winner->prim != primitive && hit->prim == primitive) + { + winner = hit; + continue; + } + + // if the winner is a match and we're not, we can't be better so stop now + if(winner->prim == primitive && hit->prim != primitive) + { + continue; + } + } + + // if we're looking for a particular sample, check that + if(sample != ~0U) + { + if(winner->sample != sample && hit->sample == sample) + { + winner = hit; + continue; + } + + if(winner->sample == sample && hit->sample != sample) + { + continue; + } + } + + // otherwise apply depth test + switch(depthOp) + { + case VK_COMPARE_OP_NEVER: + case VK_COMPARE_OP_EQUAL: + case VK_COMPARE_OP_NOT_EQUAL: + case VK_COMPARE_OP_ALWAYS: + default: + // don't emulate equal or not equal since we don't know the reference value. Take any hit + // (thus meaning the last hit) + winner = hit; + break; + case VK_COMPARE_OP_LESS: + if(hit->pos.z < winner->pos.z) + winner = hit; + break; + case VK_COMPARE_OP_LESS_OR_EQUAL: + if(hit->pos.z <= winner->pos.z) + winner = hit; + break; + case VK_COMPARE_OP_GREATER: + if(hit->pos.z > winner->pos.z) + winner = hit; + break; + case VK_COMPARE_OP_GREATER_OR_EQUAL: + if(hit->pos.z >= winner->pos.z) + winner = hit; + break; + } } - rdcspv::Debugger *debugger = new rdcspv::Debugger; - debugger->Parse(shader.spirv.GetSPIRV()); - ShaderDebugTrace *ret = debugger->BeginDebug(apiWrapper, ShaderStage::Pixel, entryPoint, spec, - shadRefl.instructionLines, shadRefl.patchData, 0); + ShaderDebugTrace *ret = NULL; + + if(winner) + { + rdcspv::Debugger *debugger = new rdcspv::Debugger; + debugger->Parse(shader.spirv.GetSPIRV()); + + // the data immediately follows the PSHit header. Every piece of data is vec4 aligned, and the + // output is in input signature order. + byte *PSInputs = (byte *)(winner + 1); + Vec4f *value = (Vec4f *)(PSInputs + 0 * structStride); + Vec4f *ddxcoarse = (Vec4f *)(PSInputs + 1 * structStride); + Vec4f *ddycoarse = (Vec4f *)(PSInputs + 2 * structStride); + Vec4f *ddxfine = (Vec4f *)(PSInputs + 3 * structStride); + Vec4f *ddyfine = (Vec4f *)(PSInputs + 4 * structStride); + + rdcarray &locations = apiWrapper->location_inputs; + for(size_t i = 0; i < shadRefl.refl.inputSignature.size(); i++) + { + const SigParameter ¶m = shadRefl.refl.inputSignature[i]; + locations.resize(RDCMAX((uint32_t)locations.size(), param.regIndex + 1)); + apiWrapper->derivatives.resize(RDCMAX((uint32_t)locations.size(), param.regIndex + 1)); + + memcpy(&locations[param.regIndex].value.uv, &value[i], sizeof(Vec4f)); + memcpy(&apiWrapper->derivatives[param.regIndex].ddxcoarse, &ddxcoarse[i], sizeof(Vec4f)); + memcpy(&apiWrapper->derivatives[param.regIndex].ddycoarse, &ddycoarse[i], sizeof(Vec4f)); + memcpy(&apiWrapper->derivatives[param.regIndex].ddxfine, &ddxfine[i], sizeof(Vec4f)); + memcpy(&apiWrapper->derivatives[param.regIndex].ddyfine, &ddyfine[i], sizeof(Vec4f)); + } + + ret = debugger->BeginDebug(apiWrapper, ShaderStage::Pixel, entryPoint, spec, + shadRefl.instructionLines, shadRefl.patchData, destIdx); + } + else + { + RDCLOG("Didn't get any valid hit to debug"); + delete apiWrapper; + } if(descpool != VK_NULL_HANDLE) { diff --git a/renderdoc/maths/vec.h b/renderdoc/maths/vec.h index c98c15acb..cdd36db81 100644 --- a/renderdoc/maths/vec.h +++ b/renderdoc/maths/vec.h @@ -109,6 +109,38 @@ inline Vec3f operator+=(Vec3f &a, const Vec3f &b) return a; } +inline Vec4f operator*(const Vec4f &a, const float b) +{ + return Vec4f(a.x * b, a.y * b, a.z * b); +} + +inline Vec4f operator+(const Vec4f &a, const Vec4f &b) +{ + return Vec4f(a.x + b.x, a.y + b.y, a.z + b.z); +} + +inline Vec4f operator-(const Vec4f &a) +{ + return Vec4f(-a.x, -a.y, -a.z); +} + +inline Vec4f operator-(const Vec4f &a, const Vec4f &b) +{ + return a + (-b); +} + +inline Vec4f operator-=(Vec4f &a, const Vec4f &b) +{ + a = a - b; + return a; +} + +inline Vec4f operator+=(Vec4f &a, const Vec4f &b) +{ + a = a + b; + return a; +} + struct Vec4u { Vec4u(uint32_t X = 0, uint32_t Y = 0, uint32_t Z = 0, uint32_t W = 0)