Files
renderdoc/renderdoc/driver/vulkan/vk_shader_feedback.cpp
T

1934 lines
66 KiB
C++

/******************************************************************************
* The MIT License (MIT)
*
* Copyright (c) 2019-2021 Baldur Karlsson
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
******************************************************************************/
#include <ctype.h>
#include <float.h>
#include "common/formatting.h"
#include "core/settings.h"
#include "driver/shaders/spirv/spirv_editor.h"
#include "driver/shaders/spirv/spirv_op_helpers.h"
#include "vk_core.h"
#include "vk_debug.h"
#include "vk_replay.h"
#include "vk_shader_cache.h"
RDOC_CONFIG(rdcstr, Vulkan_Debug_FeedbackDumpDirPath, "",
"Path to dump bindless feedback annotation generated SPIR-V files.");
RDOC_CONFIG(
bool, Vulkan_BindlessFeedback, true,
"Enable fetching from GPU which descriptors were dynamically used in descriptor arrays.");
RDOC_CONFIG(bool, Vulkan_PrintfFetch, true, "Enable fetching printf messages from GPU.");
RDOC_CONFIG(uint32_t, Vulkan_Debug_PrintfBufferSize, 64 * 1024,
"How many bytes to reserve for a printf output buffer.");
RDOC_EXTERN_CONFIG(bool, Vulkan_Debug_DisableBufferDeviceAddress);
static const uint32_t ShaderStageHeaderBitShift = 28U;
struct feedbackData
{
uint64_t offset;
uint32_t numEntries;
};
struct PrintfData
{
rdcstr format;
// vectors are expanded so there's one for each component (as printf will expect)
rdcarray<rdcspv::Scalar> argTypes;
size_t payloadWords;
};
struct ShaderPrintfArgs : public StringFormat::Args
{
public:
ShaderPrintfArgs(const uint32_t *payload, const PrintfData &formats)
: m_Start(payload), m_Cur(payload), m_Idx(0), m_Formats(formats)
{
}
void reset() override
{
m_Cur = m_Start;
m_Idx = 0;
}
int get_int() override
{
int32_t ret = *(int32_t *)m_Cur;
m_Idx++;
m_Cur++;
return ret;
}
unsigned int get_uint() override
{
uint32_t ret = *(uint32_t *)m_Cur;
m_Idx++;
m_Cur++;
return ret;
}
double get_double() override
{
// here we need to know if a real double was stored or not. It probably isn't but we handle it
if(m_Idx < m_Formats.argTypes.size())
{
if(m_Formats.argTypes[m_Idx].width == 64)
{
double ret = *(double *)m_Cur;
m_Idx++;
m_Cur += 2;
return ret;
}
else
{
float ret = *(float *)m_Cur;
m_Idx++;
m_Cur++;
return ret;
}
}
else
{
return 0.0;
}
}
void *get_ptr() override
{
m_Idx++;
return NULL;
}
uint64_t get_uint64() override
{
uint64_t ret = *(uint64_t *)m_Cur;
m_Idx++;
m_Cur += 2;
return ret;
}
size_t get_size() override { return sizeof(size_t) == 8 ? (size_t)get_uint64() : get_uint(); }
private:
const uint32_t *m_Cur;
const uint32_t *m_Start;
size_t m_Idx;
const PrintfData &m_Formats;
};
rdcstr PatchFormatString(rdcstr format)
{
// we don't support things like %XX.YYv2f so look for vector formatters and expand them to
// %XX.YYf, %XX.YYf
// Also annoyingly the printf specification for 64-bit integers is printed as %ul instead of %llu,
// so we need to patch that up too
for(size_t i = 0; i < format.size(); i++)
{
if(format[i] == '%')
{
size_t start = i;
i++;
if(format[i] == '%')
continue;
// skip to first letter
while(i < format.size() && !isalpha(format[i]))
i++;
// malformed string, abort
if(!isalpha(format[i]))
{
RDCERR("Malformed format string '%s'", format.c_str());
break;
}
// if the first letter is v, this is a vector format
if(format[i] == 'v' || format[i] == 'V')
{
size_t vecStart = i;
int vecsize = int(format[i + 1]) - int('0');
if(vecsize < 2 || vecsize > 4)
{
RDCERR("Malformed format string '%s'", format.c_str());
break;
}
// skip the v and the [234]
i += 2;
if(i >= format.size())
{
RDCERR("Malformed format string '%s'", format.c_str());
break;
}
bool int64 = false;
// if the final letter is u, we need to peek ahead to see if there's a l following
if(format[i] == 'u' && i + 1 < format.size() && format[i + 1] == 'l')
{
i++;
int64 = true;
}
rdcstr componentFormat = format.substr(start, i - start + 1);
// remove the vX from the component format
componentFormat.erase(vecStart - start, 2);
// if it's a 64-bit ul, transform to llu
if(int64)
{
componentFormat.pop_back();
componentFormat.pop_back();
componentFormat += "llu";
}
rdcstr vectorExpandedFormat;
for(int v = 0; v < vecsize; v++)
{
vectorExpandedFormat += componentFormat;
if(v + 1 < vecsize)
vectorExpandedFormat += ", ";
}
// remove the vector formatter
format.erase(start, i - start + 1);
format.insert(start, vectorExpandedFormat);
continue;
}
// if the letter is u, see if the next is l. If so we translate ul to llu
if(format[i] == 'u' && i + 1 < format.size() && format[i + 1] == 'l')
{
format[i] = 'l';
format[i + 1] = 'u';
format.insert(i, 'l');
}
}
}
return format;
}
void AnnotateShader(const ShaderReflection &refl, const SPIRVPatchData &patchData,
ShaderStage stage, const char *entryName,
const std::map<rdcspv::Binding, feedbackData> &offsetMap, uint32_t maxSlot,
bool usePrimitiveID, VkDeviceAddress addr, bool bufferAddressKHR,
rdcarray<uint32_t> &modSpirv, std::map<uint32_t, PrintfData> &printfData)
{
// calculate offsets for IDs on the original unmodified SPIR-V. The editor may insert some nops,
// so we do it manually here
std::map<rdcspv::Id, uint32_t> idToOffset;
for(rdcspv::Iter it(modSpirv, rdcspv::FirstRealWord); it; it++)
idToOffset[rdcspv::OpDecoder(it).result] = (uint32_t)it.offs();
rdcspv::Editor editor(modSpirv);
editor.Prepare();
RDCASSERTMSG("SPIR-V module is too large to encode instruction ID!", modSpirv.size() < 0xfffffffU);
const bool useBufferAddress = (addr != 0);
const uint32_t targetIndexWidth = useBufferAddress ? 64 : 32;
// store the maximum slot we can use, for clamping outputs to avoid writing out of bounds
rdcspv::Id maxSlotID = useBufferAddress ? editor.AddConstantImmediate<uint64_t>(maxSlot)
: editor.AddConstantImmediate<uint32_t>(maxSlot);
rdcspv::Id maxPrintfWordOffset =
editor.AddConstantImmediate<uint32_t>(Vulkan_Debug_PrintfBufferSize() / sizeof(uint32_t));
rdcspv::Id uint32Type = editor.DeclareType(rdcspv::scalar<uint32_t>());
rdcspv::Id int32Type = editor.DeclareType(rdcspv::scalar<int32_t>());
rdcspv::Id f32Type = editor.DeclareType(rdcspv::scalar<float>());
rdcspv::Id uint64Type, int64Type;
rdcspv::Id uint32StructID;
rdcspv::Id funcParamType;
if(useBufferAddress)
{
// declare the int64 types we'll need
uint64Type = editor.DeclareType(rdcspv::scalar<uint64_t>());
int64Type = editor.DeclareType(rdcspv::scalar<int64_t>());
uint32StructID = editor.AddType(rdcspv::OpTypeStruct(editor.MakeId(), {uint32Type}));
// any function parameters we add are uint64 byte offsets
funcParamType = uint64Type;
}
else
{
rdcspv::Id runtimeArrayID =
editor.AddType(rdcspv::OpTypeRuntimeArray(editor.MakeId(), uint32Type));
editor.AddDecoration(rdcspv::OpDecorate(
runtimeArrayID, rdcspv::DecorationParam<rdcspv::Decoration::ArrayStride>(sizeof(uint32_t))));
uint32StructID = editor.AddType(rdcspv::OpTypeStruct(editor.MakeId(), {runtimeArrayID}));
// any function parameters we add are uint32 indices
funcParamType = uint32Type;
// if the module declares int64 capability, ensure uint64/int64 are declared in case we need to
// transform them for printf arguments
if(editor.HasCapability(rdcspv::Capability::Int64))
{
uint64Type = editor.DeclareType(rdcspv::scalar<uint64_t>());
int64Type = editor.DeclareType(rdcspv::scalar<int64_t>());
}
}
editor.SetName(uint32StructID, "__rd_feedbackStruct");
editor.AddDecoration(rdcspv::OpMemberDecorate(
uint32StructID, 0, rdcspv::DecorationParam<rdcspv::Decoration::Offset>(0)));
// map from variable ID to watch, to variable ID to get offset from (as a SPIR-V constant,
// or as either uint64 byte offset for buffer addressing or uint32 ssbo index otherwise)
std::map<rdcspv::Id, rdcspv::Id> varLookup;
// iterate over all variables. We do this here because in the absence of the buffer address
// extension we might declare our own below and patch bindings - so we need to look these up now
for(const rdcspv::Variable &var : editor.GetGlobals())
{
// skip variables without one of these storage classes, as they are not descriptors
if(var.storage != rdcspv::StorageClass::UniformConstant &&
var.storage != rdcspv::StorageClass::Uniform &&
var.storage != rdcspv::StorageClass::StorageBuffer)
continue;
// get this variable's binding info
rdcspv::Binding bind = editor.GetBinding(var.id);
// if this is one of the bindings we care about
auto it = offsetMap.find(bind);
if(it != offsetMap.end())
{
// store the offset for this variable so we watch for access chains and know where to store to
if(useBufferAddress)
{
rdcspv::Id id = varLookup[var.id] = editor.AddConstantImmediate<uint64_t>(it->second.offset);
editor.SetName(id, StringFormat::Fmt("__feedbackOffset_set%u_bind%u", it->first.set,
it->first.binding));
}
else
{
// check that the offset fits in 32-bit word, convert byte offset to uint32 index
uint64_t index = it->second.offset / 4;
RDCASSERT(index < 0xFFFFFFFFULL, bind.set, bind.binding, it->second.offset);
rdcspv::Id id = varLookup[var.id] = editor.AddConstantImmediate<uint32_t>(uint32_t(index));
editor.SetName(
id, StringFormat::Fmt("__feedbackIndex_set%u_bind%u", it->first.set, it->first.binding));
}
}
}
rdcspv::Id bufferAddressConst, ssboVar, uint32ptrtype;
if(usePrimitiveID && stage == ShaderStage::Fragment && Vulkan_PrintfFetch())
{
editor.AddCapability(rdcspv::Capability::Geometry);
}
rdcarray<rdcspv::Id> newGlobals;
if(useBufferAddress)
{
// add the extension
editor.AddExtension(bufferAddressKHR ? "SPV_KHR_physical_storage_buffer"
: "SPV_EXT_physical_storage_buffer");
// change the memory model to physical storage buffer 64
rdcspv::Iter it = editor.Begin(rdcspv::Section::MemoryModel);
rdcspv::OpMemoryModel model(it);
model.addressingModel = rdcspv::AddressingModel::PhysicalStorageBuffer64;
it = model;
// add capabilities
editor.AddCapability(rdcspv::Capability::PhysicalStorageBufferAddresses);
editor.AddCapability(rdcspv::Capability::Int64);
// declare the address constants and make our pointers physical storage buffer pointers
bufferAddressConst = editor.AddConstantImmediate<uint64_t>(addr);
uint32ptrtype =
editor.DeclareType(rdcspv::Pointer(uint32Type, rdcspv::StorageClass::PhysicalStorageBuffer));
editor.SetName(bufferAddressConst, "__rd_feedbackAddress");
// struct is block decorated
editor.AddDecoration(rdcspv::OpDecorate(uint32StructID, rdcspv::Decoration::Block));
}
else
{
rdcspv::StorageClass ssboClass = editor.StorageBufferClass();
// the pointers are SSBO pointers
rdcspv::Id bufptrtype = editor.DeclareType(rdcspv::Pointer(uint32StructID, ssboClass));
uint32ptrtype = editor.DeclareType(rdcspv::Pointer(uint32Type, ssboClass));
// patch all bindings up by 1
for(rdcspv::Iter it = editor.Begin(rdcspv::Section::Annotations),
end = editor.End(rdcspv::Section::Annotations);
it < end; ++it)
{
// we will use descriptor set 0 for our own purposes if we don't have a buffer address.
//
// Since bindings are arbitrary, we just increase all user bindings to make room, and we'll
// redeclare the descriptor set layouts and pipeline layout. This is inevitable in the case
// where all descriptor sets are already used. In theory we only have to do this with set 0,
// but that requires knowing which variables are in set 0 and it's simpler to increase all
// bindings.
if(it.opcode() == rdcspv::Op::Decorate)
{
rdcspv::OpDecorate dec(it);
if(dec.decoration == rdcspv::Decoration::Binding)
{
RDCASSERT(dec.decoration.binding != 0xffffffff);
dec.decoration.binding += 1;
it = dec;
}
}
}
// add our SSBO variable, at set 0 binding 0
ssboVar = editor.MakeId();
editor.AddVariable(rdcspv::OpVariable(bufptrtype, ssboVar, ssboClass));
editor.AddDecoration(
rdcspv::OpDecorate(ssboVar, rdcspv::DecorationParam<rdcspv::Decoration::DescriptorSet>(0)));
editor.AddDecoration(
rdcspv::OpDecorate(ssboVar, rdcspv::DecorationParam<rdcspv::Decoration::Binding>(0)));
if(editor.EntryPointAllGlobals())
newGlobals.push_back(ssboVar);
editor.SetName(ssboVar, "__rd_feedbackBuffer");
editor.DecorateStorageBufferStruct(uint32StructID);
}
rdcspv::Id rtarrayOffset = editor.AddConstantImmediate<uint32_t>(0U);
rdcspv::Id printfArrayOffset = rtarrayOffset;
rdcspv::Id zero = rtarrayOffset;
rdcspv::Id usedValue = editor.AddConstantImmediate<uint32_t>(0xFFFFFFFFU);
rdcspv::Id scope = editor.AddConstantImmediate<uint32_t>((uint32_t)rdcspv::Scope::Invocation);
rdcspv::Id semantics = editor.AddConstantImmediate<uint32_t>(0U);
rdcspv::Id uint32shift = editor.AddConstantImmediate<uint32_t>(2U);
rdcspv::MemoryAccessAndParamDatas memoryAccess;
memoryAccess.setAligned(sizeof(uint32_t));
rdcspv::Id printfIncrement;
if(useBufferAddress)
{
printfIncrement = editor.AddConstantImmediate<uint64_t>(sizeof(uint32_t));
}
else
{
printfIncrement = editor.AddConstantImmediate<uint32_t>(1U);
}
rdcspv::Id glsl450 = editor.ImportExtInst("GLSL.std.450");
std::map<rdcspv::Id, rdcspv::Scalar> intTypeLookup;
for(auto scalarType : editor.GetTypeInfo<rdcspv::Scalar>())
if(scalarType.first.type == rdcspv::Op::TypeInt)
intTypeLookup[scalarType.second] = scalarType.first;
rdcspv::Id entryID;
for(const rdcspv::EntryPoint &entry : editor.GetEntries())
{
if(entry.name == entryName && MakeShaderStage(entry.executionModel) == stage)
{
entryID = entry.id;
break;
}
}
rdcspv::Id uvec2Type = editor.DeclareType(rdcspv::Vector(rdcspv::scalar<uint32_t>(), 2));
rdcspv::Id uvec3Type = editor.DeclareType(rdcspv::Vector(rdcspv::scalar<uint32_t>(), 3));
rdcspv::Id uvec4Type = editor.DeclareType(rdcspv::Vector(rdcspv::scalar<uint32_t>(), 4));
// we'll initialise this at the start of the entry point, and use it globally to get the location
// for printf statements
rdcspv::Id printfLocationVar = editor.MakeId();
if(Vulkan_PrintfFetch())
{
editor.AddVariable(rdcspv::OpVariable(
editor.DeclareType(rdcspv::Pointer(uvec3Type, rdcspv::StorageClass::Private)),
printfLocationVar, rdcspv::StorageClass::Private));
if(editor.EntryPointAllGlobals())
newGlobals.push_back(printfLocationVar);
}
rdcspv::Id shaderStageConstant =
editor.AddConstantImmediate<uint32_t>(uint32_t(stage) << ShaderStageHeaderBitShift);
rdcspv::Id int64wordshift = editor.AddConstantImmediate<uint32_t>(32U);
// build up operations to pull in the location from globals - either existing or ones we add
rdcspv::OperationList locationGather;
if(Vulkan_PrintfFetch())
{
rdcarray<rdcspv::Id> idxs;
auto fetchOrAddGlobalInput = [&editor, &idxs, &refl, &patchData, &locationGather, &newGlobals](
const char *name, ShaderBuiltin builtin, rdcspv::BuiltIn spvBuiltin, rdcspv::Id varType) {
rdcspv::Id ret;
rdcspv::Id ptrType = editor.DeclareType(rdcspv::Pointer(varType, rdcspv::StorageClass::Input));
for(size_t i = 0; i < refl.inputSignature.size(); i++)
{
if(refl.inputSignature[i].systemValue == builtin)
{
rdcspv::Id loadType = varType;
if(refl.inputSignature[i].varType == VarType::SInt)
{
if(refl.inputSignature[i].compCount == 1)
loadType = editor.DeclareType(rdcspv::scalar<int32_t>());
else
loadType = editor.DeclareType(
rdcspv::Vector(rdcspv::scalar<int32_t>(), refl.inputSignature[i].compCount));
}
if(patchData.inputs[i].accessChain.empty())
{
ret =
locationGather.add(rdcspv::OpLoad(loadType, editor.MakeId(), patchData.inputs[i].ID));
}
else
{
rdcarray<rdcspv::Id> chain;
for(uint32_t accessIdx : patchData.inputs[i].accessChain)
{
idxs.resize_for_index(accessIdx);
if(idxs[accessIdx] == 0)
idxs[accessIdx] = editor.AddConstantImmediate<uint32_t>(accessIdx);
chain.push_back(idxs[accessIdx]);
}
rdcspv::Id subElement = locationGather.add(
rdcspv::OpAccessChain(ptrType, editor.MakeId(), patchData.inputs[i].ID, chain));
ret =
locationGather.add(rdcspv::OpLoad(loadType, editor.MakeId(), patchData.inputs[i].ID));
}
if(loadType != varType)
ret = locationGather.add(rdcspv::OpBitcast(varType, editor.MakeId(), ret));
}
}
if(ret == rdcspv::Id())
{
rdcspv::Id rdocGlobalVar = editor.AddVariable(
rdcspv::OpVariable(ptrType, editor.MakeId(), rdcspv::StorageClass::Input));
editor.AddDecoration(rdcspv::OpDecorate(
rdocGlobalVar, rdcspv::DecorationParam<rdcspv::Decoration::BuiltIn>(spvBuiltin)));
if(editor.EntryPointAllGlobals())
newGlobals.push_back(rdocGlobalVar);
editor.SetName(rdocGlobalVar, name);
ret = locationGather.add(rdcspv::OpLoad(varType, editor.MakeId(), rdocGlobalVar));
}
return ret;
};
rdcspv::Id location;
// the location encoding varies by stage
if(stage == ShaderStage::Compute)
{
// the location for compute is easy, it's just the global invocation
location = fetchOrAddGlobalInput("rdoc_invocation", ShaderBuiltin::DispatchThreadIndex,
rdcspv::BuiltIn::GlobalInvocationId, uvec3Type);
}
else if(stage == ShaderStage::Vertex)
{
rdcspv::Id vtx = fetchOrAddGlobalInput("rdoc_vertexIndex", ShaderBuiltin::VertexIndex,
rdcspv::BuiltIn::VertexIndex, uint32Type);
rdcspv::Id inst = fetchOrAddGlobalInput("rdoc_instanceIndex", ShaderBuiltin::InstanceIndex,
rdcspv::BuiltIn::InstanceIndex, uint32Type);
rdcspv::Id view;
// only search for the view index is the multiview capability is declared, otherwise it's
// invalid and we just set 0
if(editor.HasCapability(rdcspv::Capability::MultiView))
{
view = fetchOrAddGlobalInput("rdoc_viewIndex", ShaderBuiltin::ViewportIndex,
rdcspv::BuiltIn::ViewIndex, uint32Type);
}
else
{
view = editor.AddConstantImmediate<uint32_t>(0U);
}
location = locationGather.add(
rdcspv::OpCompositeConstruct(uvec3Type, editor.MakeId(), {vtx, inst, view}));
}
else if(stage == ShaderStage::Pixel)
{
rdcspv::Id float2Type = editor.DeclareType(rdcspv::Vector(rdcspv::scalar<float>(), 2));
rdcspv::Id float4Type = editor.DeclareType(rdcspv::Vector(rdcspv::scalar<float>(), 4));
rdcspv::Id coord = fetchOrAddGlobalInput("rdoc_fragCoord", ShaderBuiltin::Position,
rdcspv::BuiltIn::FragCoord, float4Type);
// grab just the xy
coord = locationGather.add(
rdcspv::OpVectorShuffle(float2Type, editor.MakeId(), coord, coord, {0, 1}));
// convert to int
coord = locationGather.add(rdcspv::OpConvertFToU(uvec2Type, editor.MakeId(), coord));
rdcspv::Id x =
locationGather.add(rdcspv::OpCompositeExtract(uint32Type, editor.MakeId(), coord, {0}));
rdcspv::Id y =
locationGather.add(rdcspv::OpCompositeExtract(uint32Type, editor.MakeId(), coord, {1}));
// shift x up into top 16-bits
x = locationGather.add(rdcspv::OpShiftLeftLogical(
uint32Type, editor.MakeId(), x, editor.AddConstantImmediate<uint32_t>(16U)));
// OR together
coord = locationGather.add(rdcspv::OpBitwiseOr(uint32Type, editor.MakeId(), x, y));
rdcspv::Id samp;
// only grab the sample ID if sample shading is already enabled
for(size_t i = 0; i < refl.inputSignature.size(); i++)
{
if(refl.inputSignature[i].systemValue == ShaderBuiltin::MSAASampleIndex ||
refl.inputSignature[i].systemValue == ShaderBuiltin::MSAASamplePosition)
{
samp = fetchOrAddGlobalInput("rdoc_sampleIndex", ShaderBuiltin::MSAASampleIndex,
rdcspv::BuiltIn::SampleId, uint32Type);
}
}
if(samp == rdcspv::Id())
{
samp = editor.AddConstantImmediate<uint32_t>(~0U);
}
rdcspv::Id prim;
if(usePrimitiveID)
{
prim = fetchOrAddGlobalInput("rdoc_primitiveIndex", ShaderBuiltin::PrimitiveIndex,
rdcspv::BuiltIn::PrimitiveId, uint32Type);
}
else
{
prim = editor.AddConstantImmediate<uint32_t>(~0U);
}
location = locationGather.add(
rdcspv::OpCompositeConstruct(uvec3Type, editor.MakeId(), {coord, samp, prim}));
}
else
{
RDCWARN("No identifier stored for %s stage", ToStr(stage).c_str());
location = locationGather.add(
rdcspv::OpCompositeConstruct(uvec3Type, editor.MakeId(), {zero, zero, zero}));
}
locationGather.add(rdcspv::OpStore(printfLocationVar, location));
}
if(!newGlobals.empty())
{
rdcspv::Iter it = editor.GetEntry(entryID);
RDCASSERT(it.opcode() == rdcspv::Op::EntryPoint);
rdcspv::OpEntryPoint entry(it);
editor.Remove(it);
entry.iface.append(newGlobals);
editor.AddOperation(it, entry);
}
rdcspv::Id debugPrintfSet = editor.HasExtInst("NonSemantic.DebugPrintf");
rdcspv::TypeToIds<rdcspv::FunctionType> funcTypes = editor.GetTypes<rdcspv::FunctionType>();
// functions that have been patched with annotation & extra function parameters if needed
std::set<rdcspv::Id> patchedFunctions;
// functions we need to patch, with the indices of which parameters have bindings coming along
// with
std::map<rdcspv::Id, rdcarray<size_t>> functionPatchQueue;
// start with the entry point, with no parameters to patch
functionPatchQueue[entryID] = {};
// now keep patching functions until we have no more to patch
while(!functionPatchQueue.empty())
{
rdcspv::Id funcId;
rdcarray<size_t> patchArgIndices;
{
auto it = functionPatchQueue.begin();
funcId = functionPatchQueue.begin()->first;
patchArgIndices = functionPatchQueue.begin()->second;
functionPatchQueue.erase(it);
patchedFunctions.insert(funcId);
}
rdcspv::Iter it = editor.GetID(funcId);
RDCASSERT(it.opcode() == rdcspv::Op::Function);
if(!patchArgIndices.empty())
{
rdcspv::OpFunction func(it);
// find the function's type declaration, add the necessary arguments, redeclare and patch it
for(const rdcspv::TypeToId<rdcspv::FunctionType> &funcType : funcTypes)
{
if(funcType.second == func.functionType)
{
rdcspv::FunctionType patchedFuncType = funcType.first;
for(size_t i = 0; i < patchArgIndices.size(); i++)
patchedFuncType.argumentIds.push_back(funcParamType);
rdcspv::Id newFuncTypeID = editor.DeclareType(patchedFuncType);
// re-fetch the iterator as it might have moved with the type declaration
it = editor.GetID(funcId);
// change the declared function type
func.functionType = newFuncTypeID;
editor.PreModify(it);
it = func;
editor.PostModify(it);
break;
}
}
}
++it;
// onto the OpFunctionParameters. First allocate IDs for all our new function parameters
rdcarray<rdcspv::Id> patchedParamIDs;
for(size_t i = 0; i < patchArgIndices.size(); i++)
patchedParamIDs.push_back(editor.MakeId());
size_t argIndex = 0;
size_t watchIndex = 0;
while(it.opcode() == rdcspv::Op::FunctionParameter)
{
rdcspv::OpFunctionParameter param(it);
// if this is a parameter we're patching, add it into varLookup
if(watchIndex < patchArgIndices.size() && patchArgIndices[watchIndex] == argIndex)
{
// when we see use of this parameter, patch it using the added parameter
varLookup[param.result] = patchedParamIDs[watchIndex];
// watch for the next argument
watchIndex++;
}
argIndex++;
++it;
}
// we're past the existing function parameters, now declare our new ones
for(size_t i = 0; i < patchedParamIDs.size(); i++)
{
editor.AddOperation(it, rdcspv::OpFunctionParameter(funcParamType, patchedParamIDs[i]));
++it;
}
// continue to the first label so we can insert things at the start of the entry point
for(; it; ++it)
{
if(it.opcode() == rdcspv::Op::Label)
{
++it;
break;
}
}
// skip past any local variables
while(it.opcode() == rdcspv::Op::Variable)
++it;
if(funcId == entryID)
{
for(const rdcspv::Operation &op : locationGather)
{
editor.AddOperation(it, op);
++it;
}
}
// now patch accesses in the function body
for(; it; ++it)
{
// finish when we hit the end of the function
if(it.opcode() == rdcspv::Op::FunctionEnd)
break;
// if we see an OpCopyObject, just add it to the map pointing to the same value
if(it.opcode() == rdcspv::Op::CopyObject)
{
rdcspv::OpCopyObject copy(it);
// is this a var we want to snoop?
auto varIt = varLookup.find(copy.operand);
if(varIt != varLookup.end())
{
varLookup[copy.result] = varIt->second;
}
}
if(it.opcode() == rdcspv::Op::FunctionCall)
{
rdcspv::OpFunctionCall call(it);
// check if any of the variables being passed are ones we care about. Accumulate the added
// parameters
rdcarray<uint32_t> funccall;
rdcarray<size_t> patchArgs;
// examine each argument to see if it's one we care about
for(size_t i = 0; i < call.arguments.size(); i++)
{
// if this param we're snooping then pass our offset - whether it's a constant or a
// function
// argument itself - into the function call
auto varIt = varLookup.find(call.arguments[i]);
if(varIt != varLookup.end())
{
funccall.push_back(varIt->second.value());
patchArgs.push_back(i);
}
}
// if we have parameters to patch, replace the function call
if(!funccall.empty())
{
// prepend all the existing words
for(size_t i = 1; i < it.size(); i++)
funccall.insert(i - 1, it.word(i));
rdcspv::Iter oldCall = it;
// add our patched call afterwards
it++;
editor.AddOperation(it, rdcspv::Operation(rdcspv::Op::FunctionCall, funccall));
// remove the old call
editor.Remove(oldCall);
}
// if this function isn't marked for patching yet, and isn't patched, queue it
if(functionPatchQueue[call.function].empty() &&
patchedFunctions.find(call.function) == patchedFunctions.end())
functionPatchQueue[call.function] = patchArgs;
}
if(it.opcode() == rdcspv::Op::ExtInst && Vulkan_PrintfFetch())
{
rdcspv::OpExtInst extinst(it);
// is this a printf extinst?
if(extinst.set == debugPrintfSet)
{
uint32_t printfID = idToOffset[extinst.result];
rdcspv::Id resultConstant = editor.AddConstantDeferred<uint32_t>(printfID);
PrintfData &format = printfData[printfID];
{
rdcspv::OpString str(editor.GetID(rdcspv::Id::fromWord(extinst.params[0])));
format.format = PatchFormatString(str.string);
}
rdcarray<rdcspv::Id> packetWords;
// pack all the parameters into uint32s
for(size_t i = 1; i < extinst.params.size(); i++)
{
rdcspv::Id printfparam = rdcspv::Id::fromWord(extinst.params[i]);
rdcspv::Id type = editor.GetIDType(printfparam);
rdcspv::Iter typeIt = editor.GetID(type);
// handle vectors, but no other composites
uint32_t vecDim = 0;
if(typeIt.opcode() == rdcspv::Op::TypeVector)
{
rdcspv::OpTypeVector vec(typeIt);
vecDim = vec.componentCount;
type = vec.componentType;
typeIt = editor.GetID(type);
}
rdcspv::Scalar scalarType(typeIt);
for(uint32_t comp = 0; comp < RDCMAX(1U, vecDim); comp++)
{
rdcspv::Id input = printfparam;
format.argTypes.push_back(scalarType);
// if the input is a vector, extract the component we're working on
if(vecDim > 0)
{
input = editor.AddOperation(
it, rdcspv::OpCompositeExtract(type, editor.MakeId(), input, {comp}));
it++;
}
// handle ints and floats
if(typeIt.opcode() == rdcspv::Op::TypeInt)
{
rdcspv::OpTypeInt intType(typeIt);
rdcspv::Id param = input;
if(intType.signedness)
{
// extend to 32-bit if needed then bitcast to unsigned
if(intType.width < 32)
{
param = editor.AddOperation(
it, rdcspv::OpSConvert(int32Type, editor.MakeId(), param));
it++;
}
param = editor.AddOperation(
it, rdcspv::OpBitcast(intType.width == 64 ? uint64Type : uint32Type,
editor.MakeId(), param));
it++;
}
else
{
// just extend to 32-bit if needed
if(intType.width < 32)
{
param = editor.AddOperation(
it, rdcspv::OpSConvert(uint32Type, editor.MakeId(), param));
it++;
}
}
// 64-bit integers we now need to split up the words and add them. Otherwise we have
// a 32-bit uint to add
if(intType.width == 64)
{
rdcspv::Id lo = editor.AddOperation(
it, rdcspv::OpUConvert(uint32Type, editor.MakeId(), param));
it++;
rdcspv::Id shifted = editor.AddOperation(
it, rdcspv::OpShiftRightLogical(uint64Type, editor.MakeId(), param,
int64wordshift));
it++;
rdcspv::Id hi = editor.AddOperation(
it, rdcspv::OpUConvert(uint32Type, editor.MakeId(), shifted));
it++;
packetWords.push_back(lo);
packetWords.push_back(hi);
}
else
{
packetWords.push_back(param);
}
}
else if(typeIt.opcode() == rdcspv::Op::TypeFloat)
{
rdcspv::OpTypeFloat floatType(typeIt);
rdcspv::Id param = input;
// if it's not at least a float, upconvert. We don't convert to doubles since that
// would require double capability
if(floatType.width < 32)
{
param =
editor.AddOperation(it, rdcspv::OpFConvert(f32Type, editor.MakeId(), param));
it++;
}
if(floatType.width == 64)
{
// for doubles we use the GLSL unpack operation
rdcspv::Id unpacked = editor.AddOperation(
it, rdcspv::OpGLSL450(uvec2Type, editor.MakeId(), glsl450,
rdcspv::GLSLstd450::UnpackDouble2x32, {param}));
// then extract the components
rdcspv::Id lo = editor.AddOperation(
it, rdcspv::OpCompositeExtract(uint32Type, editor.MakeId(), unpacked, {0}));
it++;
rdcspv::Id hi = editor.AddOperation(
it, rdcspv::OpCompositeExtract(uint32Type, editor.MakeId(), unpacked, {1}));
it++;
packetWords.push_back(lo);
packetWords.push_back(hi);
}
else
{
// otherwise we bitcast to uint32
param =
editor.AddOperation(it, rdcspv::OpBitcast(uint32Type, editor.MakeId(), param));
it++;
packetWords.push_back(param);
}
}
else
{
RDCERR("Unexpected type of operand to printf %s, ignoring",
ToStr(typeIt.opcode()).c_str());
}
}
}
format.payloadWords = packetWords.size();
// pack header uint32
rdcspv::Id header =
editor.AddOperation(it, rdcspv::OpBitwiseOr(uint32Type, editor.MakeId(),
shaderStageConstant, resultConstant));
it++;
packetWords.insert(0, header);
// load the location out of the global where we put it
rdcspv::Id location =
editor.AddOperation(it, rdcspv::OpLoad(uvec3Type, editor.MakeId(), printfLocationVar));
it++;
// extract each component and add it as a new word after the header
packetWords.insert(
1, editor.AddOperation(
it, rdcspv::OpCompositeExtract(uint32Type, editor.MakeId(), location, {0})));
it++;
packetWords.insert(
2, editor.AddOperation(
it, rdcspv::OpCompositeExtract(uint32Type, editor.MakeId(), location, {1})));
it++;
packetWords.insert(
3, editor.AddOperation(
it, rdcspv::OpCompositeExtract(uint32Type, editor.MakeId(), location, {2})));
it++;
rdcspv::Id counterptr;
if(useBufferAddress)
{
// make a pointer out of the buffer address
// uint32_t *bufptr = (uint32_t *)offsetaddr
counterptr = editor.AddOperation(
it, rdcspv::OpConvertUToPtr(uint32ptrtype, editor.MakeId(), bufferAddressConst));
it++;
}
else
{
// accesschain to get the pointer we'll atomic into.
// accesschain is 0 to access rtarray (first member) then zero for the first array index
// uint32_t *bufptr = (uint32_t *)&buf.printfWords[ssboindex];
counterptr =
editor.AddOperation(it, rdcspv::OpAccessChain(uint32ptrtype, editor.MakeId(),
ssboVar, {printfArrayOffset, zero}));
it++;
}
rdcspv::Id packetSize = editor.AddConstantDeferred<uint32_t>((uint32_t)packetWords.size());
// atomically reserve enough space
rdcspv::Id idx =
editor.AddOperation(it, rdcspv::OpAtomicIAdd(uint32Type, editor.MakeId(), counterptr,
scope, semantics, packetSize));
it++;
// clamp to the buffer size so we don't overflow
idx = editor.AddOperation(
it, rdcspv::OpGLSL450(uint32Type, editor.MakeId(), glsl450, rdcspv::GLSLstd450::UMin,
{idx, maxPrintfWordOffset}));
it++;
if(useBufferAddress)
{
// convert to a 64-bit value
idx = editor.AddOperation(it, rdcspv::OpUConvert(uint64Type, editor.MakeId(), idx));
it++;
// the index is in words, so multiply by the increment to get a byte offset
rdcspv::Id byteOffset = editor.AddOperation(
it, rdcspv::OpIMul(uint64Type, editor.MakeId(), idx, printfIncrement));
it++;
// add the offset to the base address
rdcspv::Id bufAddr = editor.AddOperation(
it, rdcspv::OpIAdd(uint64Type, editor.MakeId(), bufferAddressConst, byteOffset));
it++;
for(rdcspv::Id word : packetWords)
{
// we pre-increment idx because it starts from 0 but we want to write into words
// starting from [1] to leave the counter itself alone.
bufAddr = editor.AddOperation(
it, rdcspv::OpIAdd(uint64Type, editor.MakeId(), bufAddr, printfIncrement));
it++;
rdcspv::Id ptr = editor.AddOperation(
it, rdcspv::OpConvertUToPtr(uint32ptrtype, editor.MakeId(), bufAddr));
it++;
editor.AddOperation(it, rdcspv::OpStore(ptr, word, memoryAccess));
it++;
}
}
else
{
for(rdcspv::Id word : packetWords)
{
// we pre-increment idx because it starts from 0 but we want to write into words
// starting from [1] to leave the counter itself alone.
idx = editor.AddOperation(
it, rdcspv::OpIAdd(uint32Type, editor.MakeId(), idx, printfIncrement));
it++;
rdcspv::Id ptr =
editor.AddOperation(it, rdcspv::OpAccessChain(uint32ptrtype, editor.MakeId(),
ssboVar, {printfArrayOffset, idx}));
it++;
editor.AddOperation(it, rdcspv::OpStore(ptr, word));
it++;
}
}
// no it++ here, it will happen implicitly on loop continue
}
}
// if we see an access chain of a variable we're snooping, save out the result
if(it.opcode() == rdcspv::Op::AccessChain || it.opcode() == rdcspv::Op::InBoundsAccessChain)
{
rdcspv::OpAccessChain chain(it);
chain.op = it.opcode();
// is this a var we want to snoop?
auto varIt = varLookup.find(chain.base);
if(varIt != varLookup.end())
{
// multi-dimensional arrays of descriptors is not allowed - however an access chain could
// be longer than 5 words (1 index). Think of the case of a uniform buffer where the first
// index goes into the descriptor array, and further indices go inside the uniform buffer
// members.
RDCASSERT(chain.indexes.size() >= 1, chain.indexes.size());
rdcspv::Id index = chain.indexes[0];
// patch after the access chain
it++;
// upcast the index to uint32 or uint64 depending on which path we're taking
{
rdcspv::Id indexType = editor.GetIDType(index);
if(indexType == rdcspv::Id())
{
RDCERR("Unknown type for ID %u, defaulting to uint32_t", index.value());
indexType = uint32Type;
}
rdcspv::Scalar indexTypeData = rdcspv::scalar<uint32_t>();
auto indexTypeIt = intTypeLookup.find(indexType);
if(indexTypeIt != intTypeLookup.end())
{
indexTypeData = indexTypeIt->second;
}
else
{
RDCERR("Unknown index type ID %u, defaulting to uint32_t", indexType.value());
}
// if it's signed, bitcast it to unsigned
if(indexTypeData.signedness)
{
indexTypeData.signedness = false;
index = editor.AddOperation(
it, rdcspv::OpBitcast(editor.DeclareType(indexTypeData), editor.MakeId(), index));
it++;
}
// if it's not wide enough, uconvert expand it
if(indexTypeData.width != targetIndexWidth)
{
rdcspv::Id extendedtype =
editor.DeclareType(rdcspv::Scalar(rdcspv::Op::TypeInt, targetIndexWidth, false));
index =
editor.AddOperation(it, rdcspv::OpUConvert(extendedtype, editor.MakeId(), index));
it++;
}
}
// clamp the index to the maximum slot. If the user is reading out of bounds, don't write
// out of bounds.
{
rdcspv::Id clampedtype =
editor.DeclareType(rdcspv::Scalar(rdcspv::Op::TypeInt, targetIndexWidth, false));
index = editor.AddOperation(
it, rdcspv::OpGLSL450(clampedtype, editor.MakeId(), glsl450,
rdcspv::GLSLstd450::UMin, {index, maxSlotID}));
it++;
}
rdcspv::Id bufptr;
if(useBufferAddress)
{
// convert the constant embedded device address to a pointer
// get our output slot address by adding an offset to the base pointer
// baseaddr = bufferAddressConst + bindingOffset
rdcspv::Id baseaddr = editor.AddOperation(
it, rdcspv::OpIAdd(uint64Type, editor.MakeId(), bufferAddressConst, varIt->second));
it++;
// shift the index since this is a byte offset
// shiftedindex = index << uint32shift
rdcspv::Id shiftedindex = editor.AddOperation(
it, rdcspv::OpShiftLeftLogical(uint64Type, editor.MakeId(), index, uint32shift));
it++;
// add the index on top of that
// offsetaddr = baseaddr + shiftedindex
rdcspv::Id offsetaddr = editor.AddOperation(
it, rdcspv::OpIAdd(uint64Type, editor.MakeId(), baseaddr, shiftedindex));
it++;
// make a pointer out of it
// uint32_t *bufptr = (uint32_t *)offsetaddr
bufptr = editor.AddOperation(
it, rdcspv::OpConvertUToPtr(uint32ptrtype, editor.MakeId(), offsetaddr));
it++;
}
else
{
// accesschain into the SSBO, by adding the base offset for this var onto the index
// add the index to this binding's base index
// ssboindex = bindingOffset + index
rdcspv::Id ssboindex = editor.AddOperation(
it, rdcspv::OpIAdd(uint32Type, editor.MakeId(), index, varIt->second));
it++;
// accesschain to get the pointer we'll atomic into.
// accesschain is 0 to access rtarray (first member) then ssboindex for array index
// uint32_t *bufptr = (uint32_t *)&buf.rtarray[ssboindex];
bufptr =
editor.AddOperation(it, rdcspv::OpAccessChain(uint32ptrtype, editor.MakeId(),
ssboVar, {rtarrayOffset, ssboindex}));
it++;
}
// atomically set the uint32 that's pointed to
editor.AddOperation(it, rdcspv::OpAtomicUMax(uint32Type, editor.MakeId(), bufptr, scope,
semantics, usedValue));
// no it++ here, it will happen implicitly on loop continue
}
}
}
}
}
void VulkanReplay::ClearFeedbackCache()
{
m_BindlessFeedback.Usage.clear();
}
void VulkanReplay::FetchShaderFeedback(uint32_t eventId)
{
if(m_BindlessFeedback.Usage.find(eventId) != m_BindlessFeedback.Usage.end())
return;
if(!Vulkan_BindlessFeedback())
return;
// create it here so we won't re-run any code if the event is re-selected. We'll mark it as valid
// if it actually has any data in it later.
DynamicShaderFeedback &result = m_BindlessFeedback.Usage[eventId];
bool useBufferAddress = (m_pDriver->GetExtensions(NULL).ext_KHR_buffer_device_address ||
m_pDriver->GetExtensions(NULL).ext_EXT_buffer_device_address) &&
m_pDriver->GetDeviceEnabledFeatures().shaderInt64;
if(Vulkan_Debug_DisableBufferDeviceAddress() ||
m_pDriver->GetDriverInfo().AMDBufferDeviceAddressBrokenDriver())
useBufferAddress = false;
bool useBufferAddressKHR = m_pDriver->GetExtensions(NULL).ext_KHR_buffer_device_address;
const VulkanRenderState &state = m_pDriver->m_RenderState;
VulkanCreationInfo &creationInfo = m_pDriver->m_CreationInfo;
const DrawcallDescription *drawcall = m_pDriver->GetDrawcall(eventId);
if(drawcall == NULL || !(drawcall->flags & (DrawFlags::Dispatch | DrawFlags::Drawcall)))
return;
result.compute = bool(drawcall->flags & DrawFlags::Dispatch);
const VulkanStatePipeline &pipe = result.compute ? state.compute : state.graphics;
if(pipe.pipeline == ResourceId())
return;
const VulkanCreationInfo::Pipeline &pipeInfo = creationInfo.m_Pipeline[pipe.pipeline];
VkDeviceSize feedbackStorageSize = 0;
std::map<rdcspv::Binding, feedbackData> offsetMap;
bool usesPrintf = false;
VkGraphicsPipelineCreateInfo graphicsInfo = {};
VkComputePipelineCreateInfo computeInfo = {};
// get pipeline create info
if(result.compute)
{
m_pDriver->GetShaderCache()->MakeComputePipelineInfo(computeInfo, state.compute.pipeline);
}
else
{
m_pDriver->GetShaderCache()->MakeGraphicsPipelineInfo(graphicsInfo, state.graphics.pipeline);
graphicsInfo.renderPass =
creationInfo.m_RenderPass[GetResID(graphicsInfo.renderPass)].loadRPs[graphicsInfo.subpass];
graphicsInfo.subpass = 0;
}
if(result.compute)
{
usesPrintf = pipeInfo.shaders[5].patchData->usesPrintf;
}
else
{
for(uint32_t i = 0; i < graphicsInfo.stageCount; i++)
{
VkPipelineShaderStageCreateInfo &stage =
(VkPipelineShaderStageCreateInfo &)graphicsInfo.pStages[i];
int idx = StageIndex(stage.stage);
usesPrintf |= pipeInfo.shaders[idx].patchData->usesPrintf;
}
}
if(usesPrintf)
{
// reserve some space at the start for an atomic offset counter then the buffer size, and an
// overflow section for any clamped messages
feedbackStorageSize += 16 + Vulkan_Debug_PrintfBufferSize() + 1024;
}
{
const rdcarray<ResourceId> &descSetLayoutIds =
creationInfo.m_PipelineLayout[pipeInfo.layout].descSetLayouts;
rdcspv::Binding key;
for(size_t set = 0; set < descSetLayoutIds.size(); set++)
{
key.set = (uint32_t)set;
const DescSetLayout &layout = creationInfo.m_DescSetLayout[descSetLayoutIds[set]];
for(size_t binding = 0; binding < layout.bindings.size(); binding++)
{
const DescSetLayout::Binding &bindData = layout.bindings[binding];
// skip empty bindings
if(bindData.descriptorType == VK_DESCRIPTOR_TYPE_MAX_ENUM)
continue;
// only process array bindings
if(bindData.descriptorCount > 1 &&
bindData.descriptorType != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT)
{
key.binding = (uint32_t)binding;
offsetMap[key] = {feedbackStorageSize, bindData.descriptorCount};
feedbackStorageSize += bindData.descriptorCount * sizeof(uint32_t);
}
}
}
}
uint32_t maxSlot = uint32_t(feedbackStorageSize / sizeof(uint32_t));
// add some extra padding just in case of out-of-bounds writes
feedbackStorageSize += 128;
// if we don't have any array descriptors or printf's to feedback then just return now
if(offsetMap.empty() && !usesPrintf)
return;
if(!result.compute)
{
// if we don't have any stores supported at all, we can't do feedback on the graphics pipeline
if(!m_pDriver->GetDeviceEnabledFeatures().vertexPipelineStoresAndAtomics &&
!m_pDriver->GetDeviceEnabledFeatures().fragmentStoresAndAtomics)
{
return;
}
}
// we go through the driver for all these creations since they need to be properly
// registered in order to be put in the partial replay state
VkResult vkr = VK_SUCCESS;
VkDevice dev = m_Device;
if(feedbackStorageSize > m_BindlessFeedback.FeedbackBuffer.sz)
{
uint32_t flags = GPUBuffer::eGPUBufferGPULocal | GPUBuffer::eGPUBufferSSBO;
if(useBufferAddress)
flags |= GPUBuffer::eGPUBufferAddressable;
m_BindlessFeedback.FeedbackBuffer.Destroy();
m_BindlessFeedback.FeedbackBuffer.Create(m_pDriver, dev, feedbackStorageSize, 1, flags);
}
VkDeviceAddress bufferAddress = 0;
VkDescriptorPool descpool = VK_NULL_HANDLE;
rdcarray<VkDescriptorSetLayout> setLayouts;
rdcarray<VkDescriptorSet> descSets;
VkPipelineLayout pipeLayout = VK_NULL_HANDLE;
if(useBufferAddress)
{
RDCCOMPILE_ASSERT(VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO ==
VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_EXT,
"KHR and EXT buffer_device_address should be interchangeable here.");
VkBufferDeviceAddressInfo getAddressInfo = {VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO};
getAddressInfo.buffer = m_BindlessFeedback.FeedbackBuffer.buf;
if(useBufferAddressKHR)
bufferAddress = m_pDriver->vkGetBufferDeviceAddress(dev, &getAddressInfo);
else
bufferAddress = m_pDriver->vkGetBufferDeviceAddressEXT(dev, &getAddressInfo);
}
else
{
VkDescriptorSetLayoutBinding newBindings[] = {
// output buffer
{
0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1,
VkShaderStageFlags(result.compute ? VK_SHADER_STAGE_COMPUTE_BIT
: VK_SHADER_STAGE_ALL_GRAPHICS),
NULL,
},
};
RDCCOMPILE_ASSERT(ARRAY_COUNT(newBindings) == 1,
"Should only be one new descriptor for bindless feedback");
// create a duplicate set of descriptor sets, all visible to compute, with bindings shifted to
// account for new ones we need. This also copies the existing bindings into the new sets
PatchReservedDescriptors(pipe, descpool, setLayouts, descSets, VkShaderStageFlagBits(),
newBindings, ARRAY_COUNT(newBindings));
// if the pool failed due to limits, it will be NULL so bail now
if(descpool == VK_NULL_HANDLE)
return;
// create pipeline layout with new descriptor set layouts
{
const rdcarray<VkPushConstantRange> &push =
creationInfo.m_PipelineLayout[pipeInfo.layout].pushRanges;
VkPipelineLayoutCreateInfo pipeLayoutInfo = {
VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
NULL,
0,
(uint32_t)setLayouts.size(),
setLayouts.data(),
(uint32_t)push.size(),
push.data(),
};
vkr = m_pDriver->vkCreatePipelineLayout(dev, &pipeLayoutInfo, NULL, &pipeLayout);
RDCASSERTEQUAL(vkr, VK_SUCCESS);
// we'll only use one, set both structs to keep things simple
computeInfo.layout = pipeLayout;
graphicsInfo.layout = pipeLayout;
}
// vkUpdateDescriptorSet desc set to point to buffer
VkDescriptorBufferInfo desc = {0};
m_BindlessFeedback.FeedbackBuffer.FillDescriptor(desc);
VkWriteDescriptorSet write = {
VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
NULL,
Unwrap(descSets[0]),
0,
0,
1,
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
NULL,
&desc,
NULL,
};
ObjDisp(dev)->UpdateDescriptorSets(Unwrap(dev), 1, &write, 0, NULL);
}
// create vertex shader with modified code
VkShaderModuleCreateInfo moduleCreateInfo = {VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO};
VkShaderModule modules[6] = {};
const rdcstr filename[6] = {
"bindless_vertex.spv", "bindless_hull.spv", "bindless_domain.spv",
"bindless_geometry.spv", "bindless_pixel.spv", "bindless_compute.spv",
};
std::map<uint32_t, PrintfData> printfData[6];
if(result.compute)
{
VkPipelineShaderStageCreateInfo &stage = computeInfo.stage;
const VulkanCreationInfo::ShaderModule &moduleInfo =
creationInfo.m_ShaderModule[pipeInfo.shaders[5].module];
rdcarray<uint32_t> modSpirv = moduleInfo.spirv.GetSPIRV();
if(!Vulkan_Debug_FeedbackDumpDirPath().empty())
FileIO::WriteAll(Vulkan_Debug_FeedbackDumpDirPath() + "/before_" + filename[5], modSpirv);
AnnotateShader(*pipeInfo.shaders[5].refl, *pipeInfo.shaders[5].patchData,
ShaderStage(StageIndex(stage.stage)), stage.pName, offsetMap, maxSlot, false,
bufferAddress, useBufferAddressKHR, modSpirv, printfData[5]);
if(!Vulkan_Debug_FeedbackDumpDirPath().empty())
FileIO::WriteAll(Vulkan_Debug_FeedbackDumpDirPath() + "/after_" + filename[5], modSpirv);
moduleCreateInfo.pCode = modSpirv.data();
moduleCreateInfo.codeSize = modSpirv.size() * sizeof(uint32_t);
vkr = m_pDriver->vkCreateShaderModule(dev, &moduleCreateInfo, NULL, &modules[0]);
RDCASSERTEQUAL(vkr, VK_SUCCESS);
stage.module = modules[0];
}
else
{
bool hasGeom = false;
for(uint32_t i = 0; i < graphicsInfo.stageCount; i++)
{
VkPipelineShaderStageCreateInfo &stage =
(VkPipelineShaderStageCreateInfo &)graphicsInfo.pStages[i];
if((stage.stage & VK_SHADER_STAGE_GEOMETRY_BIT) != 0)
{
hasGeom = true;
break;
}
}
bool usePrimitiveID =
!hasGeom && m_pDriver->GetDeviceEnabledFeatures().geometryShader != VK_FALSE;
for(uint32_t i = 0; i < graphicsInfo.stageCount; i++)
{
VkPipelineShaderStageCreateInfo &stage =
(VkPipelineShaderStageCreateInfo &)graphicsInfo.pStages[i];
if(stage.stage & VK_SHADER_STAGE_FRAGMENT_BIT)
{
if(!m_pDriver->GetDeviceEnabledFeatures().fragmentStoresAndAtomics)
continue;
}
else
{
if(!m_pDriver->GetDeviceEnabledFeatures().vertexPipelineStoresAndAtomics)
continue;
}
int idx = StageIndex(stage.stage);
const VulkanCreationInfo::ShaderModule &moduleInfo =
creationInfo.m_ShaderModule[pipeInfo.shaders[idx].module];
rdcarray<uint32_t> modSpirv = moduleInfo.spirv.GetSPIRV();
if(!Vulkan_Debug_FeedbackDumpDirPath().empty())
FileIO::WriteAll(Vulkan_Debug_FeedbackDumpDirPath() + "/before_" + filename[idx], modSpirv);
AnnotateShader(*pipeInfo.shaders[idx].refl, *pipeInfo.shaders[idx].patchData,
ShaderStage(StageIndex(stage.stage)), stage.pName, offsetMap, maxSlot,
usePrimitiveID, bufferAddress, useBufferAddressKHR, modSpirv, printfData[idx]);
if(!Vulkan_Debug_FeedbackDumpDirPath().empty())
FileIO::WriteAll(Vulkan_Debug_FeedbackDumpDirPath() + "/after_" + filename[idx], modSpirv);
moduleCreateInfo.pCode = modSpirv.data();
moduleCreateInfo.codeSize = modSpirv.size() * sizeof(uint32_t);
vkr = m_pDriver->vkCreateShaderModule(dev, &moduleCreateInfo, NULL, &modules[i]);
RDCASSERTEQUAL(vkr, VK_SUCCESS);
stage.module = modules[i];
}
}
VkPipeline feedbackPipe;
if(result.compute)
{
vkr = m_pDriver->vkCreateComputePipelines(m_Device, VK_NULL_HANDLE, 1, &computeInfo, NULL,
&feedbackPipe);
RDCASSERTEQUAL(vkr, VK_SUCCESS);
}
else
{
vkr = m_pDriver->vkCreateGraphicsPipelines(m_Device, VK_NULL_HANDLE, 1, &graphicsInfo, NULL,
&feedbackPipe);
RDCASSERTEQUAL(vkr, VK_SUCCESS);
}
// make copy of state to draw from
VulkanRenderState modifiedstate = state;
VulkanStatePipeline &modifiedpipe = result.compute ? modifiedstate.compute : modifiedstate.graphics;
// bind created pipeline to partial replay state
modifiedpipe.pipeline = GetResID(feedbackPipe);
if(!useBufferAddress)
{
// replace descriptor set IDs with our temporary sets. The offsets we keep the same. If the
// original draw had no sets, we ensure there's room (with no offsets needed)
if(modifiedpipe.descSets.empty())
modifiedpipe.descSets.resize(1);
for(size_t i = 0; i < descSets.size(); i++)
{
modifiedpipe.descSets[i].pipeLayout = GetResID(pipeLayout);
modifiedpipe.descSets[i].descSet = GetResID(descSets[i]);
}
}
{
VkCommandBuffer cmd = m_pDriver->GetNextCmd();
VkCommandBufferBeginInfo beginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, NULL,
VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT};
vkr = ObjDisp(dev)->BeginCommandBuffer(Unwrap(cmd), &beginInfo);
RDCASSERTEQUAL(vkr, VK_SUCCESS);
// fill destination buffer with 0s to ensure a baseline to then feedback against
ObjDisp(dev)->CmdFillBuffer(Unwrap(cmd), Unwrap(m_BindlessFeedback.FeedbackBuffer.buf), 0,
feedbackStorageSize, 0);
VkBufferMemoryBarrier feedbackbufBarrier = {
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
NULL,
VK_ACCESS_TRANSFER_WRITE_BIT,
VK_ACCESS_SHADER_WRITE_BIT,
VK_QUEUE_FAMILY_IGNORED,
VK_QUEUE_FAMILY_IGNORED,
Unwrap(m_BindlessFeedback.FeedbackBuffer.buf),
0,
feedbackStorageSize,
};
// wait for the above fill to finish.
DoPipelineBarrier(cmd, 1, &feedbackbufBarrier);
if(result.compute)
{
modifiedstate.BindPipeline(m_pDriver, cmd, VulkanRenderState::BindCompute, true);
ObjDisp(cmd)->CmdDispatch(Unwrap(cmd), drawcall->dispatchDimension[0],
drawcall->dispatchDimension[1], drawcall->dispatchDimension[2]);
}
else
{
modifiedstate.BeginRenderPassAndApplyState(m_pDriver, cmd, VulkanRenderState::BindGraphics);
m_pDriver->ReplayDraw(cmd, *drawcall);
modifiedstate.EndRenderPass(cmd);
}
vkr = ObjDisp(dev)->EndCommandBuffer(Unwrap(cmd));
RDCASSERTEQUAL(vkr, VK_SUCCESS);
m_pDriver->SubmitCmds();
m_pDriver->FlushQ();
}
bytebuf data;
GetBufferData(GetResID(m_BindlessFeedback.FeedbackBuffer.buf), 0, 0, data);
for(auto it = offsetMap.begin(); it != offsetMap.end(); ++it)
{
uint32_t *feedbackData = (uint32_t *)(data.data() + it->second.offset);
BindpointIndex used;
used.bindset = it->first.set;
used.bind = it->first.binding;
for(uint32_t i = 0; i < it->second.numEntries; i++)
{
if(feedbackData[i])
{
used.arrayIndex = i;
result.used.push_back(used);
}
}
}
result.valid = true;
uint32_t *printfBuf = (uint32_t *)data.data();
uint32_t *printfBufEnd = (uint32_t *)(data.data() + Vulkan_Debug_PrintfBufferSize());
if(usesPrintf && *printfBuf > 0)
{
uint32_t wordsNeeded = *printfBuf;
if(wordsNeeded > Vulkan_Debug_PrintfBufferSize())
{
RDCLOG("printf buffer overflowed, needed %u bytes but printf buffer is only %u bytes",
wordsNeeded * 4, Vulkan_Debug_PrintfBufferSize());
}
printfBuf++;
while(*printfBuf && printfBuf < printfBufEnd)
{
ShaderStage stage = ShaderStage((*printfBuf) >> ShaderStageHeaderBitShift);
uint32_t printfID = *printfBuf & 0xfffffffU;
printfBuf++;
if(stage < ShaderStage::Count)
{
auto it = printfData[(uint32_t)stage].find(printfID);
if(it == printfData[(uint32_t)stage].end())
{
RDCERR("Error parsing DebugPrintf buffer, unexpected printf ID %x from header %x",
printfID, *printfBuf);
break;
}
uint32_t *location = printfBuf;
printfBuf += 3;
const PrintfData &fmt = it->second;
ShaderPrintfArgs args(printfBuf, fmt);
printfBuf += fmt.payloadWords;
// this message overflowed, don't process it
if(printfBuf >= printfBufEnd)
break;
ShaderMessage msg;
msg.stage = stage;
const VulkanCreationInfo::Pipeline::Shader &sh = pipeInfo.shaders[(uint32_t)stage];
{
VulkanCreationInfo::ShaderModule &mod = creationInfo.m_ShaderModule[sh.module];
VulkanCreationInfo::ShaderModuleReflection &modrefl =
mod.GetReflection(sh.entryPoint, pipe.pipeline);
modrefl.PopulateDisassembly(mod.spirv);
const std::map<size_t, uint32_t> instructionLines = modrefl.instructionLines;
auto instit = instructionLines.find(printfID);
if(instit != instructionLines.end())
msg.disassemblyLine = (int32_t)instit->second;
else
msg.disassemblyLine = -1;
}
if(stage == ShaderStage::Compute)
{
for(int x = 0; x < 3; x++)
{
uint32_t threadDimX = sh.refl->dispatchThreadsDimension[x];
msg.location.compute.workgroup[x] = location[x] / threadDimX;
msg.location.compute.thread[x] = location[x] % threadDimX;
}
}
else if(stage == ShaderStage::Vertex)
{
msg.location.vertex.vertexIndex = location[0];
if(!(drawcall->flags & DrawFlags::Indexed))
{
// for non-indexed draws get back to 0-based index
msg.location.vertex.vertexIndex -= drawcall->vertexOffset;
}
// go back to a 0-based instance index
msg.location.vertex.instance = location[1] - drawcall->instanceOffset;
msg.location.vertex.view = location[2];
}
else
{
msg.location.pixel.x = location[0] >> 16U;
msg.location.pixel.y = location[0] & 0xffff;
msg.location.pixel.sample = location[1];
msg.location.pixel.primitive = location[2];
RDCLOG("pixel %u, %u", msg.location.pixel.x, msg.location.pixel.y);
}
msg.message = StringFormat::FmtArgs(fmt.format.c_str(), args);
result.messages.push_back(msg);
}
else
{
RDCERR("Error parsing DebugPrintf buffer, unexpected stage %x from header %x", stage,
*printfBuf);
break;
}
}
}
if(descpool != VK_NULL_HANDLE)
{
// delete descriptors. Technically we don't have to free the descriptor sets, but our tracking
// on
// replay doesn't handle destroying children of pooled objects so we do it explicitly anyway.
m_pDriver->vkFreeDescriptorSets(dev, descpool, (uint32_t)descSets.size(), descSets.data());
m_pDriver->vkDestroyDescriptorPool(dev, descpool, NULL);
}
for(VkDescriptorSetLayout layout : setLayouts)
m_pDriver->vkDestroyDescriptorSetLayout(dev, layout, NULL);
// delete pipeline layout
m_pDriver->vkDestroyPipelineLayout(dev, pipeLayout, NULL);
// delete pipeline
m_pDriver->vkDestroyPipeline(dev, feedbackPipe, NULL);
// delete shader/shader module
for(size_t i = 0; i < ARRAY_COUNT(modules); i++)
if(modules[i] != VK_NULL_HANDLE)
m_pDriver->vkDestroyShaderModule(dev, modules[i], NULL);
// replay from the start as we may have corrupted state while fetching the above feedback.
m_pDriver->ReplayLog(0, eventId, eReplay_Full);
}
#if ENABLED(ENABLE_UNIT_TESTS)
#undef Always
#undef None
#include "catch/catch.hpp"
TEST_CASE("Test printf format string mangling", "[vulkan]")
{
SECTION("Vector format expansion")
{
CHECK(PatchFormatString("hello %f normal %i string") == "hello %f normal %i string");
CHECK(PatchFormatString("hello %% normal %2i string") == "hello %% normal %2i string");
CHECK(PatchFormatString("hello %fv normal %iv string") == "hello %fv normal %iv string");
CHECK(PatchFormatString("hello %02.3fv normal % 2.fiv string") ==
"hello %02.3fv normal % 2.fiv string");
CHECK(PatchFormatString("vector string: %v2f | %v3i") == "vector string: %f, %f | %i, %i, %i");
CHECK(PatchFormatString("vector with precision: %04.3v4f !") ==
"vector with precision: %04.3f, %04.3f, %04.3f, %04.3f !");
CHECK(PatchFormatString("vector at end %v2f") == "vector at end %f, %f");
CHECK(PatchFormatString("%v3f vector at start") == "%f, %f, %f vector at start");
CHECK(PatchFormatString("%v2f") == "%f, %f");
CHECK(PatchFormatString("%v2u") == "%u, %u");
};
SECTION("64-bit format twiddling")
{
CHECK(PatchFormatString("hello %ul") == "hello %llu");
CHECK(PatchFormatString("%ul hello") == "%llu hello");
CHECK(PatchFormatString("%ul") == "%llu");
CHECK(PatchFormatString("hello %04ul there") == "hello %04llu there");
CHECK(PatchFormatString("hello %v2ul there") == "hello %llu, %llu there");
CHECK(PatchFormatString("hello %u l there") == "hello %u l there");
CHECK(PatchFormatString("%v2u") == "%u, %u");
CHECK(PatchFormatString("%v2ul") == "%llu, %llu");
};
};
#endif // ENABLED(ENABLE_UNIT_TESTS)