mirror of
https://github.com/baldurk/renderdoc.git
synced 2026-05-04 17:10:47 +00:00
1934 lines
66 KiB
C++
1934 lines
66 KiB
C++
/******************************************************************************
|
|
* The MIT License (MIT)
|
|
*
|
|
* Copyright (c) 2019-2021 Baldur Karlsson
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
* THE SOFTWARE.
|
|
******************************************************************************/
|
|
|
|
#include <ctype.h>
|
|
#include <float.h>
|
|
#include "common/formatting.h"
|
|
#include "core/settings.h"
|
|
#include "driver/shaders/spirv/spirv_editor.h"
|
|
#include "driver/shaders/spirv/spirv_op_helpers.h"
|
|
#include "vk_core.h"
|
|
#include "vk_debug.h"
|
|
#include "vk_replay.h"
|
|
#include "vk_shader_cache.h"
|
|
|
|
RDOC_CONFIG(rdcstr, Vulkan_Debug_FeedbackDumpDirPath, "",
|
|
"Path to dump bindless feedback annotation generated SPIR-V files.");
|
|
RDOC_CONFIG(
|
|
bool, Vulkan_BindlessFeedback, true,
|
|
"Enable fetching from GPU which descriptors were dynamically used in descriptor arrays.");
|
|
RDOC_CONFIG(bool, Vulkan_PrintfFetch, true, "Enable fetching printf messages from GPU.");
|
|
RDOC_CONFIG(uint32_t, Vulkan_Debug_PrintfBufferSize, 64 * 1024,
|
|
"How many bytes to reserve for a printf output buffer.");
|
|
RDOC_EXTERN_CONFIG(bool, Vulkan_Debug_DisableBufferDeviceAddress);
|
|
|
|
static const uint32_t ShaderStageHeaderBitShift = 28U;
|
|
|
|
struct feedbackData
|
|
{
|
|
uint64_t offset;
|
|
uint32_t numEntries;
|
|
};
|
|
|
|
struct PrintfData
|
|
{
|
|
rdcstr format;
|
|
// vectors are expanded so there's one for each component (as printf will expect)
|
|
rdcarray<rdcspv::Scalar> argTypes;
|
|
size_t payloadWords;
|
|
};
|
|
|
|
struct ShaderPrintfArgs : public StringFormat::Args
|
|
{
|
|
public:
|
|
ShaderPrintfArgs(const uint32_t *payload, const PrintfData &formats)
|
|
: m_Start(payload), m_Cur(payload), m_Idx(0), m_Formats(formats)
|
|
{
|
|
}
|
|
|
|
void reset() override
|
|
{
|
|
m_Cur = m_Start;
|
|
m_Idx = 0;
|
|
}
|
|
int get_int() override
|
|
{
|
|
int32_t ret = *(int32_t *)m_Cur;
|
|
m_Idx++;
|
|
m_Cur++;
|
|
return ret;
|
|
}
|
|
unsigned int get_uint() override
|
|
{
|
|
uint32_t ret = *(uint32_t *)m_Cur;
|
|
m_Idx++;
|
|
m_Cur++;
|
|
return ret;
|
|
}
|
|
double get_double() override
|
|
{
|
|
// here we need to know if a real double was stored or not. It probably isn't but we handle it
|
|
if(m_Idx < m_Formats.argTypes.size())
|
|
{
|
|
if(m_Formats.argTypes[m_Idx].width == 64)
|
|
{
|
|
double ret = *(double *)m_Cur;
|
|
m_Idx++;
|
|
m_Cur += 2;
|
|
return ret;
|
|
}
|
|
else
|
|
{
|
|
float ret = *(float *)m_Cur;
|
|
m_Idx++;
|
|
m_Cur++;
|
|
return ret;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
return 0.0;
|
|
}
|
|
}
|
|
void *get_ptr() override
|
|
{
|
|
m_Idx++;
|
|
return NULL;
|
|
}
|
|
uint64_t get_uint64() override
|
|
{
|
|
uint64_t ret = *(uint64_t *)m_Cur;
|
|
m_Idx++;
|
|
m_Cur += 2;
|
|
return ret;
|
|
}
|
|
|
|
size_t get_size() override { return sizeof(size_t) == 8 ? (size_t)get_uint64() : get_uint(); }
|
|
private:
|
|
const uint32_t *m_Cur;
|
|
const uint32_t *m_Start;
|
|
size_t m_Idx;
|
|
const PrintfData &m_Formats;
|
|
};
|
|
|
|
rdcstr PatchFormatString(rdcstr format)
|
|
{
|
|
// we don't support things like %XX.YYv2f so look for vector formatters and expand them to
|
|
// %XX.YYf, %XX.YYf
|
|
// Also annoyingly the printf specification for 64-bit integers is printed as %ul instead of %llu,
|
|
// so we need to patch that up too
|
|
|
|
for(size_t i = 0; i < format.size(); i++)
|
|
{
|
|
if(format[i] == '%')
|
|
{
|
|
size_t start = i;
|
|
|
|
i++;
|
|
if(format[i] == '%')
|
|
continue;
|
|
|
|
// skip to first letter
|
|
while(i < format.size() && !isalpha(format[i]))
|
|
i++;
|
|
|
|
// malformed string, abort
|
|
if(!isalpha(format[i]))
|
|
{
|
|
RDCERR("Malformed format string '%s'", format.c_str());
|
|
break;
|
|
}
|
|
|
|
// if the first letter is v, this is a vector format
|
|
if(format[i] == 'v' || format[i] == 'V')
|
|
{
|
|
size_t vecStart = i;
|
|
|
|
int vecsize = int(format[i + 1]) - int('0');
|
|
|
|
if(vecsize < 2 || vecsize > 4)
|
|
{
|
|
RDCERR("Malformed format string '%s'", format.c_str());
|
|
break;
|
|
}
|
|
|
|
// skip the v and the [234]
|
|
i += 2;
|
|
|
|
if(i >= format.size())
|
|
{
|
|
RDCERR("Malformed format string '%s'", format.c_str());
|
|
break;
|
|
}
|
|
|
|
bool int64 = false;
|
|
// if the final letter is u, we need to peek ahead to see if there's a l following
|
|
if(format[i] == 'u' && i + 1 < format.size() && format[i + 1] == 'l')
|
|
{
|
|
i++;
|
|
int64 = true;
|
|
}
|
|
|
|
rdcstr componentFormat = format.substr(start, i - start + 1);
|
|
|
|
// remove the vX from the component format
|
|
componentFormat.erase(vecStart - start, 2);
|
|
|
|
// if it's a 64-bit ul, transform to llu
|
|
if(int64)
|
|
{
|
|
componentFormat.pop_back();
|
|
componentFormat.pop_back();
|
|
componentFormat += "llu";
|
|
}
|
|
|
|
rdcstr vectorExpandedFormat;
|
|
for(int v = 0; v < vecsize; v++)
|
|
{
|
|
vectorExpandedFormat += componentFormat;
|
|
if(v + 1 < vecsize)
|
|
vectorExpandedFormat += ", ";
|
|
}
|
|
|
|
// remove the vector formatter
|
|
format.erase(start, i - start + 1);
|
|
format.insert(start, vectorExpandedFormat);
|
|
|
|
continue;
|
|
}
|
|
|
|
// if the letter is u, see if the next is l. If so we translate ul to llu
|
|
if(format[i] == 'u' && i + 1 < format.size() && format[i + 1] == 'l')
|
|
{
|
|
format[i] = 'l';
|
|
format[i + 1] = 'u';
|
|
format.insert(i, 'l');
|
|
}
|
|
}
|
|
}
|
|
|
|
return format;
|
|
}
|
|
|
|
void AnnotateShader(const ShaderReflection &refl, const SPIRVPatchData &patchData,
|
|
ShaderStage stage, const char *entryName,
|
|
const std::map<rdcspv::Binding, feedbackData> &offsetMap, uint32_t maxSlot,
|
|
bool usePrimitiveID, VkDeviceAddress addr, bool bufferAddressKHR,
|
|
rdcarray<uint32_t> &modSpirv, std::map<uint32_t, PrintfData> &printfData)
|
|
{
|
|
// calculate offsets for IDs on the original unmodified SPIR-V. The editor may insert some nops,
|
|
// so we do it manually here
|
|
std::map<rdcspv::Id, uint32_t> idToOffset;
|
|
|
|
for(rdcspv::Iter it(modSpirv, rdcspv::FirstRealWord); it; it++)
|
|
idToOffset[rdcspv::OpDecoder(it).result] = (uint32_t)it.offs();
|
|
|
|
rdcspv::Editor editor(modSpirv);
|
|
|
|
editor.Prepare();
|
|
|
|
RDCASSERTMSG("SPIR-V module is too large to encode instruction ID!", modSpirv.size() < 0xfffffffU);
|
|
|
|
const bool useBufferAddress = (addr != 0);
|
|
|
|
const uint32_t targetIndexWidth = useBufferAddress ? 64 : 32;
|
|
|
|
// store the maximum slot we can use, for clamping outputs to avoid writing out of bounds
|
|
rdcspv::Id maxSlotID = useBufferAddress ? editor.AddConstantImmediate<uint64_t>(maxSlot)
|
|
: editor.AddConstantImmediate<uint32_t>(maxSlot);
|
|
|
|
rdcspv::Id maxPrintfWordOffset =
|
|
editor.AddConstantImmediate<uint32_t>(Vulkan_Debug_PrintfBufferSize() / sizeof(uint32_t));
|
|
|
|
rdcspv::Id uint32Type = editor.DeclareType(rdcspv::scalar<uint32_t>());
|
|
rdcspv::Id int32Type = editor.DeclareType(rdcspv::scalar<int32_t>());
|
|
rdcspv::Id f32Type = editor.DeclareType(rdcspv::scalar<float>());
|
|
rdcspv::Id uint64Type, int64Type;
|
|
rdcspv::Id uint32StructID;
|
|
rdcspv::Id funcParamType;
|
|
|
|
if(useBufferAddress)
|
|
{
|
|
// declare the int64 types we'll need
|
|
uint64Type = editor.DeclareType(rdcspv::scalar<uint64_t>());
|
|
int64Type = editor.DeclareType(rdcspv::scalar<int64_t>());
|
|
|
|
uint32StructID = editor.AddType(rdcspv::OpTypeStruct(editor.MakeId(), {uint32Type}));
|
|
|
|
// any function parameters we add are uint64 byte offsets
|
|
funcParamType = uint64Type;
|
|
}
|
|
else
|
|
{
|
|
rdcspv::Id runtimeArrayID =
|
|
editor.AddType(rdcspv::OpTypeRuntimeArray(editor.MakeId(), uint32Type));
|
|
|
|
editor.AddDecoration(rdcspv::OpDecorate(
|
|
runtimeArrayID, rdcspv::DecorationParam<rdcspv::Decoration::ArrayStride>(sizeof(uint32_t))));
|
|
|
|
uint32StructID = editor.AddType(rdcspv::OpTypeStruct(editor.MakeId(), {runtimeArrayID}));
|
|
|
|
// any function parameters we add are uint32 indices
|
|
funcParamType = uint32Type;
|
|
|
|
// if the module declares int64 capability, ensure uint64/int64 are declared in case we need to
|
|
// transform them for printf arguments
|
|
if(editor.HasCapability(rdcspv::Capability::Int64))
|
|
{
|
|
uint64Type = editor.DeclareType(rdcspv::scalar<uint64_t>());
|
|
int64Type = editor.DeclareType(rdcspv::scalar<int64_t>());
|
|
}
|
|
}
|
|
|
|
editor.SetName(uint32StructID, "__rd_feedbackStruct");
|
|
|
|
editor.AddDecoration(rdcspv::OpMemberDecorate(
|
|
uint32StructID, 0, rdcspv::DecorationParam<rdcspv::Decoration::Offset>(0)));
|
|
|
|
// map from variable ID to watch, to variable ID to get offset from (as a SPIR-V constant,
|
|
// or as either uint64 byte offset for buffer addressing or uint32 ssbo index otherwise)
|
|
std::map<rdcspv::Id, rdcspv::Id> varLookup;
|
|
|
|
// iterate over all variables. We do this here because in the absence of the buffer address
|
|
// extension we might declare our own below and patch bindings - so we need to look these up now
|
|
for(const rdcspv::Variable &var : editor.GetGlobals())
|
|
{
|
|
// skip variables without one of these storage classes, as they are not descriptors
|
|
if(var.storage != rdcspv::StorageClass::UniformConstant &&
|
|
var.storage != rdcspv::StorageClass::Uniform &&
|
|
var.storage != rdcspv::StorageClass::StorageBuffer)
|
|
continue;
|
|
|
|
// get this variable's binding info
|
|
rdcspv::Binding bind = editor.GetBinding(var.id);
|
|
|
|
// if this is one of the bindings we care about
|
|
auto it = offsetMap.find(bind);
|
|
if(it != offsetMap.end())
|
|
{
|
|
// store the offset for this variable so we watch for access chains and know where to store to
|
|
if(useBufferAddress)
|
|
{
|
|
rdcspv::Id id = varLookup[var.id] = editor.AddConstantImmediate<uint64_t>(it->second.offset);
|
|
|
|
editor.SetName(id, StringFormat::Fmt("__feedbackOffset_set%u_bind%u", it->first.set,
|
|
it->first.binding));
|
|
}
|
|
else
|
|
{
|
|
// check that the offset fits in 32-bit word, convert byte offset to uint32 index
|
|
uint64_t index = it->second.offset / 4;
|
|
RDCASSERT(index < 0xFFFFFFFFULL, bind.set, bind.binding, it->second.offset);
|
|
rdcspv::Id id = varLookup[var.id] = editor.AddConstantImmediate<uint32_t>(uint32_t(index));
|
|
|
|
editor.SetName(
|
|
id, StringFormat::Fmt("__feedbackIndex_set%u_bind%u", it->first.set, it->first.binding));
|
|
}
|
|
}
|
|
}
|
|
|
|
rdcspv::Id bufferAddressConst, ssboVar, uint32ptrtype;
|
|
|
|
if(usePrimitiveID && stage == ShaderStage::Fragment && Vulkan_PrintfFetch())
|
|
{
|
|
editor.AddCapability(rdcspv::Capability::Geometry);
|
|
}
|
|
|
|
rdcarray<rdcspv::Id> newGlobals;
|
|
|
|
if(useBufferAddress)
|
|
{
|
|
// add the extension
|
|
editor.AddExtension(bufferAddressKHR ? "SPV_KHR_physical_storage_buffer"
|
|
: "SPV_EXT_physical_storage_buffer");
|
|
|
|
// change the memory model to physical storage buffer 64
|
|
rdcspv::Iter it = editor.Begin(rdcspv::Section::MemoryModel);
|
|
rdcspv::OpMemoryModel model(it);
|
|
model.addressingModel = rdcspv::AddressingModel::PhysicalStorageBuffer64;
|
|
it = model;
|
|
|
|
// add capabilities
|
|
editor.AddCapability(rdcspv::Capability::PhysicalStorageBufferAddresses);
|
|
editor.AddCapability(rdcspv::Capability::Int64);
|
|
|
|
// declare the address constants and make our pointers physical storage buffer pointers
|
|
bufferAddressConst = editor.AddConstantImmediate<uint64_t>(addr);
|
|
uint32ptrtype =
|
|
editor.DeclareType(rdcspv::Pointer(uint32Type, rdcspv::StorageClass::PhysicalStorageBuffer));
|
|
|
|
editor.SetName(bufferAddressConst, "__rd_feedbackAddress");
|
|
|
|
// struct is block decorated
|
|
editor.AddDecoration(rdcspv::OpDecorate(uint32StructID, rdcspv::Decoration::Block));
|
|
}
|
|
else
|
|
{
|
|
rdcspv::StorageClass ssboClass = editor.StorageBufferClass();
|
|
|
|
// the pointers are SSBO pointers
|
|
rdcspv::Id bufptrtype = editor.DeclareType(rdcspv::Pointer(uint32StructID, ssboClass));
|
|
uint32ptrtype = editor.DeclareType(rdcspv::Pointer(uint32Type, ssboClass));
|
|
|
|
// patch all bindings up by 1
|
|
for(rdcspv::Iter it = editor.Begin(rdcspv::Section::Annotations),
|
|
end = editor.End(rdcspv::Section::Annotations);
|
|
it < end; ++it)
|
|
{
|
|
// we will use descriptor set 0 for our own purposes if we don't have a buffer address.
|
|
//
|
|
// Since bindings are arbitrary, we just increase all user bindings to make room, and we'll
|
|
// redeclare the descriptor set layouts and pipeline layout. This is inevitable in the case
|
|
// where all descriptor sets are already used. In theory we only have to do this with set 0,
|
|
// but that requires knowing which variables are in set 0 and it's simpler to increase all
|
|
// bindings.
|
|
if(it.opcode() == rdcspv::Op::Decorate)
|
|
{
|
|
rdcspv::OpDecorate dec(it);
|
|
if(dec.decoration == rdcspv::Decoration::Binding)
|
|
{
|
|
RDCASSERT(dec.decoration.binding != 0xffffffff);
|
|
dec.decoration.binding += 1;
|
|
it = dec;
|
|
}
|
|
}
|
|
}
|
|
|
|
// add our SSBO variable, at set 0 binding 0
|
|
ssboVar = editor.MakeId();
|
|
editor.AddVariable(rdcspv::OpVariable(bufptrtype, ssboVar, ssboClass));
|
|
editor.AddDecoration(
|
|
rdcspv::OpDecorate(ssboVar, rdcspv::DecorationParam<rdcspv::Decoration::DescriptorSet>(0)));
|
|
editor.AddDecoration(
|
|
rdcspv::OpDecorate(ssboVar, rdcspv::DecorationParam<rdcspv::Decoration::Binding>(0)));
|
|
|
|
if(editor.EntryPointAllGlobals())
|
|
newGlobals.push_back(ssboVar);
|
|
|
|
editor.SetName(ssboVar, "__rd_feedbackBuffer");
|
|
|
|
editor.DecorateStorageBufferStruct(uint32StructID);
|
|
}
|
|
|
|
rdcspv::Id rtarrayOffset = editor.AddConstantImmediate<uint32_t>(0U);
|
|
rdcspv::Id printfArrayOffset = rtarrayOffset;
|
|
rdcspv::Id zero = rtarrayOffset;
|
|
rdcspv::Id usedValue = editor.AddConstantImmediate<uint32_t>(0xFFFFFFFFU);
|
|
rdcspv::Id scope = editor.AddConstantImmediate<uint32_t>((uint32_t)rdcspv::Scope::Invocation);
|
|
rdcspv::Id semantics = editor.AddConstantImmediate<uint32_t>(0U);
|
|
rdcspv::Id uint32shift = editor.AddConstantImmediate<uint32_t>(2U);
|
|
|
|
rdcspv::MemoryAccessAndParamDatas memoryAccess;
|
|
memoryAccess.setAligned(sizeof(uint32_t));
|
|
|
|
rdcspv::Id printfIncrement;
|
|
|
|
if(useBufferAddress)
|
|
{
|
|
printfIncrement = editor.AddConstantImmediate<uint64_t>(sizeof(uint32_t));
|
|
}
|
|
else
|
|
{
|
|
printfIncrement = editor.AddConstantImmediate<uint32_t>(1U);
|
|
}
|
|
|
|
rdcspv::Id glsl450 = editor.ImportExtInst("GLSL.std.450");
|
|
|
|
std::map<rdcspv::Id, rdcspv::Scalar> intTypeLookup;
|
|
|
|
for(auto scalarType : editor.GetTypeInfo<rdcspv::Scalar>())
|
|
if(scalarType.first.type == rdcspv::Op::TypeInt)
|
|
intTypeLookup[scalarType.second] = scalarType.first;
|
|
|
|
rdcspv::Id entryID;
|
|
for(const rdcspv::EntryPoint &entry : editor.GetEntries())
|
|
{
|
|
if(entry.name == entryName && MakeShaderStage(entry.executionModel) == stage)
|
|
{
|
|
entryID = entry.id;
|
|
break;
|
|
}
|
|
}
|
|
|
|
rdcspv::Id uvec2Type = editor.DeclareType(rdcspv::Vector(rdcspv::scalar<uint32_t>(), 2));
|
|
rdcspv::Id uvec3Type = editor.DeclareType(rdcspv::Vector(rdcspv::scalar<uint32_t>(), 3));
|
|
rdcspv::Id uvec4Type = editor.DeclareType(rdcspv::Vector(rdcspv::scalar<uint32_t>(), 4));
|
|
|
|
// we'll initialise this at the start of the entry point, and use it globally to get the location
|
|
// for printf statements
|
|
rdcspv::Id printfLocationVar = editor.MakeId();
|
|
|
|
if(Vulkan_PrintfFetch())
|
|
{
|
|
editor.AddVariable(rdcspv::OpVariable(
|
|
editor.DeclareType(rdcspv::Pointer(uvec3Type, rdcspv::StorageClass::Private)),
|
|
printfLocationVar, rdcspv::StorageClass::Private));
|
|
|
|
if(editor.EntryPointAllGlobals())
|
|
newGlobals.push_back(printfLocationVar);
|
|
}
|
|
|
|
rdcspv::Id shaderStageConstant =
|
|
editor.AddConstantImmediate<uint32_t>(uint32_t(stage) << ShaderStageHeaderBitShift);
|
|
rdcspv::Id int64wordshift = editor.AddConstantImmediate<uint32_t>(32U);
|
|
|
|
// build up operations to pull in the location from globals - either existing or ones we add
|
|
rdcspv::OperationList locationGather;
|
|
|
|
if(Vulkan_PrintfFetch())
|
|
{
|
|
rdcarray<rdcspv::Id> idxs;
|
|
|
|
auto fetchOrAddGlobalInput = [&editor, &idxs, &refl, &patchData, &locationGather, &newGlobals](
|
|
const char *name, ShaderBuiltin builtin, rdcspv::BuiltIn spvBuiltin, rdcspv::Id varType) {
|
|
rdcspv::Id ret;
|
|
|
|
rdcspv::Id ptrType = editor.DeclareType(rdcspv::Pointer(varType, rdcspv::StorageClass::Input));
|
|
|
|
for(size_t i = 0; i < refl.inputSignature.size(); i++)
|
|
{
|
|
if(refl.inputSignature[i].systemValue == builtin)
|
|
{
|
|
rdcspv::Id loadType = varType;
|
|
if(refl.inputSignature[i].varType == VarType::SInt)
|
|
{
|
|
if(refl.inputSignature[i].compCount == 1)
|
|
loadType = editor.DeclareType(rdcspv::scalar<int32_t>());
|
|
else
|
|
loadType = editor.DeclareType(
|
|
rdcspv::Vector(rdcspv::scalar<int32_t>(), refl.inputSignature[i].compCount));
|
|
}
|
|
|
|
if(patchData.inputs[i].accessChain.empty())
|
|
{
|
|
ret =
|
|
locationGather.add(rdcspv::OpLoad(loadType, editor.MakeId(), patchData.inputs[i].ID));
|
|
}
|
|
else
|
|
{
|
|
rdcarray<rdcspv::Id> chain;
|
|
|
|
for(uint32_t accessIdx : patchData.inputs[i].accessChain)
|
|
{
|
|
idxs.resize_for_index(accessIdx);
|
|
if(idxs[accessIdx] == 0)
|
|
idxs[accessIdx] = editor.AddConstantImmediate<uint32_t>(accessIdx);
|
|
|
|
chain.push_back(idxs[accessIdx]);
|
|
}
|
|
|
|
rdcspv::Id subElement = locationGather.add(
|
|
rdcspv::OpAccessChain(ptrType, editor.MakeId(), patchData.inputs[i].ID, chain));
|
|
|
|
ret =
|
|
locationGather.add(rdcspv::OpLoad(loadType, editor.MakeId(), patchData.inputs[i].ID));
|
|
}
|
|
|
|
if(loadType != varType)
|
|
ret = locationGather.add(rdcspv::OpBitcast(varType, editor.MakeId(), ret));
|
|
}
|
|
}
|
|
|
|
if(ret == rdcspv::Id())
|
|
{
|
|
rdcspv::Id rdocGlobalVar = editor.AddVariable(
|
|
rdcspv::OpVariable(ptrType, editor.MakeId(), rdcspv::StorageClass::Input));
|
|
editor.AddDecoration(rdcspv::OpDecorate(
|
|
rdocGlobalVar, rdcspv::DecorationParam<rdcspv::Decoration::BuiltIn>(spvBuiltin)));
|
|
|
|
if(editor.EntryPointAllGlobals())
|
|
newGlobals.push_back(rdocGlobalVar);
|
|
|
|
editor.SetName(rdocGlobalVar, name);
|
|
|
|
ret = locationGather.add(rdcspv::OpLoad(varType, editor.MakeId(), rdocGlobalVar));
|
|
}
|
|
|
|
return ret;
|
|
};
|
|
|
|
rdcspv::Id location;
|
|
|
|
// the location encoding varies by stage
|
|
if(stage == ShaderStage::Compute)
|
|
{
|
|
// the location for compute is easy, it's just the global invocation
|
|
location = fetchOrAddGlobalInput("rdoc_invocation", ShaderBuiltin::DispatchThreadIndex,
|
|
rdcspv::BuiltIn::GlobalInvocationId, uvec3Type);
|
|
}
|
|
else if(stage == ShaderStage::Vertex)
|
|
{
|
|
rdcspv::Id vtx = fetchOrAddGlobalInput("rdoc_vertexIndex", ShaderBuiltin::VertexIndex,
|
|
rdcspv::BuiltIn::VertexIndex, uint32Type);
|
|
rdcspv::Id inst = fetchOrAddGlobalInput("rdoc_instanceIndex", ShaderBuiltin::InstanceIndex,
|
|
rdcspv::BuiltIn::InstanceIndex, uint32Type);
|
|
|
|
rdcspv::Id view;
|
|
|
|
// only search for the view index is the multiview capability is declared, otherwise it's
|
|
// invalid and we just set 0
|
|
if(editor.HasCapability(rdcspv::Capability::MultiView))
|
|
{
|
|
view = fetchOrAddGlobalInput("rdoc_viewIndex", ShaderBuiltin::ViewportIndex,
|
|
rdcspv::BuiltIn::ViewIndex, uint32Type);
|
|
}
|
|
else
|
|
{
|
|
view = editor.AddConstantImmediate<uint32_t>(0U);
|
|
}
|
|
|
|
location = locationGather.add(
|
|
rdcspv::OpCompositeConstruct(uvec3Type, editor.MakeId(), {vtx, inst, view}));
|
|
}
|
|
else if(stage == ShaderStage::Pixel)
|
|
{
|
|
rdcspv::Id float2Type = editor.DeclareType(rdcspv::Vector(rdcspv::scalar<float>(), 2));
|
|
rdcspv::Id float4Type = editor.DeclareType(rdcspv::Vector(rdcspv::scalar<float>(), 4));
|
|
|
|
rdcspv::Id coord = fetchOrAddGlobalInput("rdoc_fragCoord", ShaderBuiltin::Position,
|
|
rdcspv::BuiltIn::FragCoord, float4Type);
|
|
|
|
// grab just the xy
|
|
coord = locationGather.add(
|
|
rdcspv::OpVectorShuffle(float2Type, editor.MakeId(), coord, coord, {0, 1}));
|
|
|
|
// convert to int
|
|
coord = locationGather.add(rdcspv::OpConvertFToU(uvec2Type, editor.MakeId(), coord));
|
|
|
|
rdcspv::Id x =
|
|
locationGather.add(rdcspv::OpCompositeExtract(uint32Type, editor.MakeId(), coord, {0}));
|
|
rdcspv::Id y =
|
|
locationGather.add(rdcspv::OpCompositeExtract(uint32Type, editor.MakeId(), coord, {1}));
|
|
|
|
// shift x up into top 16-bits
|
|
x = locationGather.add(rdcspv::OpShiftLeftLogical(
|
|
uint32Type, editor.MakeId(), x, editor.AddConstantImmediate<uint32_t>(16U)));
|
|
|
|
// OR together
|
|
coord = locationGather.add(rdcspv::OpBitwiseOr(uint32Type, editor.MakeId(), x, y));
|
|
|
|
rdcspv::Id samp;
|
|
|
|
// only grab the sample ID if sample shading is already enabled
|
|
for(size_t i = 0; i < refl.inputSignature.size(); i++)
|
|
{
|
|
if(refl.inputSignature[i].systemValue == ShaderBuiltin::MSAASampleIndex ||
|
|
refl.inputSignature[i].systemValue == ShaderBuiltin::MSAASamplePosition)
|
|
{
|
|
samp = fetchOrAddGlobalInput("rdoc_sampleIndex", ShaderBuiltin::MSAASampleIndex,
|
|
rdcspv::BuiltIn::SampleId, uint32Type);
|
|
}
|
|
}
|
|
|
|
if(samp == rdcspv::Id())
|
|
{
|
|
samp = editor.AddConstantImmediate<uint32_t>(~0U);
|
|
}
|
|
|
|
rdcspv::Id prim;
|
|
|
|
if(usePrimitiveID)
|
|
{
|
|
prim = fetchOrAddGlobalInput("rdoc_primitiveIndex", ShaderBuiltin::PrimitiveIndex,
|
|
rdcspv::BuiltIn::PrimitiveId, uint32Type);
|
|
}
|
|
else
|
|
{
|
|
prim = editor.AddConstantImmediate<uint32_t>(~0U);
|
|
}
|
|
|
|
location = locationGather.add(
|
|
rdcspv::OpCompositeConstruct(uvec3Type, editor.MakeId(), {coord, samp, prim}));
|
|
}
|
|
else
|
|
{
|
|
RDCWARN("No identifier stored for %s stage", ToStr(stage).c_str());
|
|
location = locationGather.add(
|
|
rdcspv::OpCompositeConstruct(uvec3Type, editor.MakeId(), {zero, zero, zero}));
|
|
}
|
|
|
|
locationGather.add(rdcspv::OpStore(printfLocationVar, location));
|
|
}
|
|
|
|
if(!newGlobals.empty())
|
|
{
|
|
rdcspv::Iter it = editor.GetEntry(entryID);
|
|
|
|
RDCASSERT(it.opcode() == rdcspv::Op::EntryPoint);
|
|
|
|
rdcspv::OpEntryPoint entry(it);
|
|
|
|
editor.Remove(it);
|
|
|
|
entry.iface.append(newGlobals);
|
|
|
|
editor.AddOperation(it, entry);
|
|
}
|
|
|
|
rdcspv::Id debugPrintfSet = editor.HasExtInst("NonSemantic.DebugPrintf");
|
|
|
|
rdcspv::TypeToIds<rdcspv::FunctionType> funcTypes = editor.GetTypes<rdcspv::FunctionType>();
|
|
|
|
// functions that have been patched with annotation & extra function parameters if needed
|
|
std::set<rdcspv::Id> patchedFunctions;
|
|
|
|
// functions we need to patch, with the indices of which parameters have bindings coming along
|
|
// with
|
|
std::map<rdcspv::Id, rdcarray<size_t>> functionPatchQueue;
|
|
|
|
// start with the entry point, with no parameters to patch
|
|
functionPatchQueue[entryID] = {};
|
|
|
|
// now keep patching functions until we have no more to patch
|
|
while(!functionPatchQueue.empty())
|
|
{
|
|
rdcspv::Id funcId;
|
|
rdcarray<size_t> patchArgIndices;
|
|
|
|
{
|
|
auto it = functionPatchQueue.begin();
|
|
funcId = functionPatchQueue.begin()->first;
|
|
patchArgIndices = functionPatchQueue.begin()->second;
|
|
functionPatchQueue.erase(it);
|
|
|
|
patchedFunctions.insert(funcId);
|
|
}
|
|
|
|
rdcspv::Iter it = editor.GetID(funcId);
|
|
|
|
RDCASSERT(it.opcode() == rdcspv::Op::Function);
|
|
|
|
if(!patchArgIndices.empty())
|
|
{
|
|
rdcspv::OpFunction func(it);
|
|
|
|
// find the function's type declaration, add the necessary arguments, redeclare and patch it
|
|
for(const rdcspv::TypeToId<rdcspv::FunctionType> &funcType : funcTypes)
|
|
{
|
|
if(funcType.second == func.functionType)
|
|
{
|
|
rdcspv::FunctionType patchedFuncType = funcType.first;
|
|
for(size_t i = 0; i < patchArgIndices.size(); i++)
|
|
patchedFuncType.argumentIds.push_back(funcParamType);
|
|
|
|
rdcspv::Id newFuncTypeID = editor.DeclareType(patchedFuncType);
|
|
|
|
// re-fetch the iterator as it might have moved with the type declaration
|
|
it = editor.GetID(funcId);
|
|
|
|
// change the declared function type
|
|
func.functionType = newFuncTypeID;
|
|
|
|
editor.PreModify(it);
|
|
|
|
it = func;
|
|
|
|
editor.PostModify(it);
|
|
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
++it;
|
|
|
|
// onto the OpFunctionParameters. First allocate IDs for all our new function parameters
|
|
rdcarray<rdcspv::Id> patchedParamIDs;
|
|
for(size_t i = 0; i < patchArgIndices.size(); i++)
|
|
patchedParamIDs.push_back(editor.MakeId());
|
|
|
|
size_t argIndex = 0;
|
|
size_t watchIndex = 0;
|
|
while(it.opcode() == rdcspv::Op::FunctionParameter)
|
|
{
|
|
rdcspv::OpFunctionParameter param(it);
|
|
|
|
// if this is a parameter we're patching, add it into varLookup
|
|
if(watchIndex < patchArgIndices.size() && patchArgIndices[watchIndex] == argIndex)
|
|
{
|
|
// when we see use of this parameter, patch it using the added parameter
|
|
varLookup[param.result] = patchedParamIDs[watchIndex];
|
|
// watch for the next argument
|
|
watchIndex++;
|
|
}
|
|
|
|
argIndex++;
|
|
++it;
|
|
}
|
|
|
|
// we're past the existing function parameters, now declare our new ones
|
|
for(size_t i = 0; i < patchedParamIDs.size(); i++)
|
|
{
|
|
editor.AddOperation(it, rdcspv::OpFunctionParameter(funcParamType, patchedParamIDs[i]));
|
|
++it;
|
|
}
|
|
|
|
// continue to the first label so we can insert things at the start of the entry point
|
|
for(; it; ++it)
|
|
{
|
|
if(it.opcode() == rdcspv::Op::Label)
|
|
{
|
|
++it;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// skip past any local variables
|
|
while(it.opcode() == rdcspv::Op::Variable)
|
|
++it;
|
|
|
|
if(funcId == entryID)
|
|
{
|
|
for(const rdcspv::Operation &op : locationGather)
|
|
{
|
|
editor.AddOperation(it, op);
|
|
++it;
|
|
}
|
|
}
|
|
|
|
// now patch accesses in the function body
|
|
for(; it; ++it)
|
|
{
|
|
// finish when we hit the end of the function
|
|
if(it.opcode() == rdcspv::Op::FunctionEnd)
|
|
break;
|
|
|
|
// if we see an OpCopyObject, just add it to the map pointing to the same value
|
|
if(it.opcode() == rdcspv::Op::CopyObject)
|
|
{
|
|
rdcspv::OpCopyObject copy(it);
|
|
|
|
// is this a var we want to snoop?
|
|
auto varIt = varLookup.find(copy.operand);
|
|
if(varIt != varLookup.end())
|
|
{
|
|
varLookup[copy.result] = varIt->second;
|
|
}
|
|
}
|
|
|
|
if(it.opcode() == rdcspv::Op::FunctionCall)
|
|
{
|
|
rdcspv::OpFunctionCall call(it);
|
|
|
|
// check if any of the variables being passed are ones we care about. Accumulate the added
|
|
// parameters
|
|
rdcarray<uint32_t> funccall;
|
|
rdcarray<size_t> patchArgs;
|
|
|
|
// examine each argument to see if it's one we care about
|
|
for(size_t i = 0; i < call.arguments.size(); i++)
|
|
{
|
|
// if this param we're snooping then pass our offset - whether it's a constant or a
|
|
// function
|
|
// argument itself - into the function call
|
|
auto varIt = varLookup.find(call.arguments[i]);
|
|
if(varIt != varLookup.end())
|
|
{
|
|
funccall.push_back(varIt->second.value());
|
|
patchArgs.push_back(i);
|
|
}
|
|
}
|
|
|
|
// if we have parameters to patch, replace the function call
|
|
if(!funccall.empty())
|
|
{
|
|
// prepend all the existing words
|
|
for(size_t i = 1; i < it.size(); i++)
|
|
funccall.insert(i - 1, it.word(i));
|
|
|
|
rdcspv::Iter oldCall = it;
|
|
|
|
// add our patched call afterwards
|
|
it++;
|
|
editor.AddOperation(it, rdcspv::Operation(rdcspv::Op::FunctionCall, funccall));
|
|
|
|
// remove the old call
|
|
editor.Remove(oldCall);
|
|
}
|
|
|
|
// if this function isn't marked for patching yet, and isn't patched, queue it
|
|
if(functionPatchQueue[call.function].empty() &&
|
|
patchedFunctions.find(call.function) == patchedFunctions.end())
|
|
functionPatchQueue[call.function] = patchArgs;
|
|
}
|
|
|
|
if(it.opcode() == rdcspv::Op::ExtInst && Vulkan_PrintfFetch())
|
|
{
|
|
rdcspv::OpExtInst extinst(it);
|
|
// is this a printf extinst?
|
|
if(extinst.set == debugPrintfSet)
|
|
{
|
|
uint32_t printfID = idToOffset[extinst.result];
|
|
|
|
rdcspv::Id resultConstant = editor.AddConstantDeferred<uint32_t>(printfID);
|
|
|
|
PrintfData &format = printfData[printfID];
|
|
|
|
{
|
|
rdcspv::OpString str(editor.GetID(rdcspv::Id::fromWord(extinst.params[0])));
|
|
format.format = PatchFormatString(str.string);
|
|
}
|
|
|
|
rdcarray<rdcspv::Id> packetWords;
|
|
|
|
// pack all the parameters into uint32s
|
|
for(size_t i = 1; i < extinst.params.size(); i++)
|
|
{
|
|
rdcspv::Id printfparam = rdcspv::Id::fromWord(extinst.params[i]);
|
|
rdcspv::Id type = editor.GetIDType(printfparam);
|
|
|
|
rdcspv::Iter typeIt = editor.GetID(type);
|
|
|
|
// handle vectors, but no other composites
|
|
uint32_t vecDim = 0;
|
|
if(typeIt.opcode() == rdcspv::Op::TypeVector)
|
|
{
|
|
rdcspv::OpTypeVector vec(typeIt);
|
|
vecDim = vec.componentCount;
|
|
type = vec.componentType;
|
|
typeIt = editor.GetID(type);
|
|
}
|
|
|
|
rdcspv::Scalar scalarType(typeIt);
|
|
|
|
for(uint32_t comp = 0; comp < RDCMAX(1U, vecDim); comp++)
|
|
{
|
|
rdcspv::Id input = printfparam;
|
|
|
|
format.argTypes.push_back(scalarType);
|
|
|
|
// if the input is a vector, extract the component we're working on
|
|
if(vecDim > 0)
|
|
{
|
|
input = editor.AddOperation(
|
|
it, rdcspv::OpCompositeExtract(type, editor.MakeId(), input, {comp}));
|
|
it++;
|
|
}
|
|
|
|
// handle ints and floats
|
|
if(typeIt.opcode() == rdcspv::Op::TypeInt)
|
|
{
|
|
rdcspv::OpTypeInt intType(typeIt);
|
|
|
|
rdcspv::Id param = input;
|
|
|
|
if(intType.signedness)
|
|
{
|
|
// extend to 32-bit if needed then bitcast to unsigned
|
|
if(intType.width < 32)
|
|
{
|
|
param = editor.AddOperation(
|
|
it, rdcspv::OpSConvert(int32Type, editor.MakeId(), param));
|
|
it++;
|
|
}
|
|
|
|
param = editor.AddOperation(
|
|
it, rdcspv::OpBitcast(intType.width == 64 ? uint64Type : uint32Type,
|
|
editor.MakeId(), param));
|
|
it++;
|
|
}
|
|
else
|
|
{
|
|
// just extend to 32-bit if needed
|
|
if(intType.width < 32)
|
|
{
|
|
param = editor.AddOperation(
|
|
it, rdcspv::OpSConvert(uint32Type, editor.MakeId(), param));
|
|
it++;
|
|
}
|
|
}
|
|
|
|
// 64-bit integers we now need to split up the words and add them. Otherwise we have
|
|
// a 32-bit uint to add
|
|
if(intType.width == 64)
|
|
{
|
|
rdcspv::Id lo = editor.AddOperation(
|
|
it, rdcspv::OpUConvert(uint32Type, editor.MakeId(), param));
|
|
it++;
|
|
|
|
rdcspv::Id shifted = editor.AddOperation(
|
|
it, rdcspv::OpShiftRightLogical(uint64Type, editor.MakeId(), param,
|
|
int64wordshift));
|
|
it++;
|
|
|
|
rdcspv::Id hi = editor.AddOperation(
|
|
it, rdcspv::OpUConvert(uint32Type, editor.MakeId(), shifted));
|
|
it++;
|
|
|
|
packetWords.push_back(lo);
|
|
packetWords.push_back(hi);
|
|
}
|
|
else
|
|
{
|
|
packetWords.push_back(param);
|
|
}
|
|
}
|
|
else if(typeIt.opcode() == rdcspv::Op::TypeFloat)
|
|
{
|
|
rdcspv::OpTypeFloat floatType(typeIt);
|
|
|
|
rdcspv::Id param = input;
|
|
|
|
// if it's not at least a float, upconvert. We don't convert to doubles since that
|
|
// would require double capability
|
|
if(floatType.width < 32)
|
|
{
|
|
param =
|
|
editor.AddOperation(it, rdcspv::OpFConvert(f32Type, editor.MakeId(), param));
|
|
it++;
|
|
}
|
|
|
|
if(floatType.width == 64)
|
|
{
|
|
// for doubles we use the GLSL unpack operation
|
|
rdcspv::Id unpacked = editor.AddOperation(
|
|
it, rdcspv::OpGLSL450(uvec2Type, editor.MakeId(), glsl450,
|
|
rdcspv::GLSLstd450::UnpackDouble2x32, {param}));
|
|
|
|
// then extract the components
|
|
rdcspv::Id lo = editor.AddOperation(
|
|
it, rdcspv::OpCompositeExtract(uint32Type, editor.MakeId(), unpacked, {0}));
|
|
it++;
|
|
|
|
rdcspv::Id hi = editor.AddOperation(
|
|
it, rdcspv::OpCompositeExtract(uint32Type, editor.MakeId(), unpacked, {1}));
|
|
it++;
|
|
|
|
packetWords.push_back(lo);
|
|
packetWords.push_back(hi);
|
|
}
|
|
else
|
|
{
|
|
// otherwise we bitcast to uint32
|
|
param =
|
|
editor.AddOperation(it, rdcspv::OpBitcast(uint32Type, editor.MakeId(), param));
|
|
it++;
|
|
|
|
packetWords.push_back(param);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
RDCERR("Unexpected type of operand to printf %s, ignoring",
|
|
ToStr(typeIt.opcode()).c_str());
|
|
}
|
|
}
|
|
}
|
|
|
|
format.payloadWords = packetWords.size();
|
|
|
|
// pack header uint32
|
|
rdcspv::Id header =
|
|
editor.AddOperation(it, rdcspv::OpBitwiseOr(uint32Type, editor.MakeId(),
|
|
shaderStageConstant, resultConstant));
|
|
it++;
|
|
|
|
packetWords.insert(0, header);
|
|
|
|
// load the location out of the global where we put it
|
|
rdcspv::Id location =
|
|
editor.AddOperation(it, rdcspv::OpLoad(uvec3Type, editor.MakeId(), printfLocationVar));
|
|
it++;
|
|
|
|
// extract each component and add it as a new word after the header
|
|
packetWords.insert(
|
|
1, editor.AddOperation(
|
|
it, rdcspv::OpCompositeExtract(uint32Type, editor.MakeId(), location, {0})));
|
|
it++;
|
|
packetWords.insert(
|
|
2, editor.AddOperation(
|
|
it, rdcspv::OpCompositeExtract(uint32Type, editor.MakeId(), location, {1})));
|
|
it++;
|
|
packetWords.insert(
|
|
3, editor.AddOperation(
|
|
it, rdcspv::OpCompositeExtract(uint32Type, editor.MakeId(), location, {2})));
|
|
it++;
|
|
|
|
rdcspv::Id counterptr;
|
|
|
|
if(useBufferAddress)
|
|
{
|
|
// make a pointer out of the buffer address
|
|
// uint32_t *bufptr = (uint32_t *)offsetaddr
|
|
counterptr = editor.AddOperation(
|
|
it, rdcspv::OpConvertUToPtr(uint32ptrtype, editor.MakeId(), bufferAddressConst));
|
|
it++;
|
|
}
|
|
else
|
|
{
|
|
// accesschain to get the pointer we'll atomic into.
|
|
// accesschain is 0 to access rtarray (first member) then zero for the first array index
|
|
// uint32_t *bufptr = (uint32_t *)&buf.printfWords[ssboindex];
|
|
counterptr =
|
|
editor.AddOperation(it, rdcspv::OpAccessChain(uint32ptrtype, editor.MakeId(),
|
|
ssboVar, {printfArrayOffset, zero}));
|
|
it++;
|
|
}
|
|
|
|
rdcspv::Id packetSize = editor.AddConstantDeferred<uint32_t>((uint32_t)packetWords.size());
|
|
|
|
// atomically reserve enough space
|
|
rdcspv::Id idx =
|
|
editor.AddOperation(it, rdcspv::OpAtomicIAdd(uint32Type, editor.MakeId(), counterptr,
|
|
scope, semantics, packetSize));
|
|
it++;
|
|
|
|
// clamp to the buffer size so we don't overflow
|
|
idx = editor.AddOperation(
|
|
it, rdcspv::OpGLSL450(uint32Type, editor.MakeId(), glsl450, rdcspv::GLSLstd450::UMin,
|
|
{idx, maxPrintfWordOffset}));
|
|
it++;
|
|
|
|
if(useBufferAddress)
|
|
{
|
|
// convert to a 64-bit value
|
|
idx = editor.AddOperation(it, rdcspv::OpUConvert(uint64Type, editor.MakeId(), idx));
|
|
it++;
|
|
|
|
// the index is in words, so multiply by the increment to get a byte offset
|
|
rdcspv::Id byteOffset = editor.AddOperation(
|
|
it, rdcspv::OpIMul(uint64Type, editor.MakeId(), idx, printfIncrement));
|
|
it++;
|
|
|
|
// add the offset to the base address
|
|
rdcspv::Id bufAddr = editor.AddOperation(
|
|
it, rdcspv::OpIAdd(uint64Type, editor.MakeId(), bufferAddressConst, byteOffset));
|
|
it++;
|
|
|
|
for(rdcspv::Id word : packetWords)
|
|
{
|
|
// we pre-increment idx because it starts from 0 but we want to write into words
|
|
// starting from [1] to leave the counter itself alone.
|
|
bufAddr = editor.AddOperation(
|
|
it, rdcspv::OpIAdd(uint64Type, editor.MakeId(), bufAddr, printfIncrement));
|
|
it++;
|
|
|
|
rdcspv::Id ptr = editor.AddOperation(
|
|
it, rdcspv::OpConvertUToPtr(uint32ptrtype, editor.MakeId(), bufAddr));
|
|
it++;
|
|
|
|
editor.AddOperation(it, rdcspv::OpStore(ptr, word, memoryAccess));
|
|
it++;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for(rdcspv::Id word : packetWords)
|
|
{
|
|
// we pre-increment idx because it starts from 0 but we want to write into words
|
|
// starting from [1] to leave the counter itself alone.
|
|
idx = editor.AddOperation(
|
|
it, rdcspv::OpIAdd(uint32Type, editor.MakeId(), idx, printfIncrement));
|
|
it++;
|
|
|
|
rdcspv::Id ptr =
|
|
editor.AddOperation(it, rdcspv::OpAccessChain(uint32ptrtype, editor.MakeId(),
|
|
ssboVar, {printfArrayOffset, idx}));
|
|
it++;
|
|
|
|
editor.AddOperation(it, rdcspv::OpStore(ptr, word));
|
|
it++;
|
|
}
|
|
}
|
|
|
|
// no it++ here, it will happen implicitly on loop continue
|
|
}
|
|
}
|
|
|
|
// if we see an access chain of a variable we're snooping, save out the result
|
|
if(it.opcode() == rdcspv::Op::AccessChain || it.opcode() == rdcspv::Op::InBoundsAccessChain)
|
|
{
|
|
rdcspv::OpAccessChain chain(it);
|
|
chain.op = it.opcode();
|
|
|
|
// is this a var we want to snoop?
|
|
auto varIt = varLookup.find(chain.base);
|
|
if(varIt != varLookup.end())
|
|
{
|
|
// multi-dimensional arrays of descriptors is not allowed - however an access chain could
|
|
// be longer than 5 words (1 index). Think of the case of a uniform buffer where the first
|
|
// index goes into the descriptor array, and further indices go inside the uniform buffer
|
|
// members.
|
|
RDCASSERT(chain.indexes.size() >= 1, chain.indexes.size());
|
|
|
|
rdcspv::Id index = chain.indexes[0];
|
|
|
|
// patch after the access chain
|
|
it++;
|
|
|
|
// upcast the index to uint32 or uint64 depending on which path we're taking
|
|
{
|
|
rdcspv::Id indexType = editor.GetIDType(index);
|
|
|
|
if(indexType == rdcspv::Id())
|
|
{
|
|
RDCERR("Unknown type for ID %u, defaulting to uint32_t", index.value());
|
|
indexType = uint32Type;
|
|
}
|
|
|
|
rdcspv::Scalar indexTypeData = rdcspv::scalar<uint32_t>();
|
|
auto indexTypeIt = intTypeLookup.find(indexType);
|
|
|
|
if(indexTypeIt != intTypeLookup.end())
|
|
{
|
|
indexTypeData = indexTypeIt->second;
|
|
}
|
|
else
|
|
{
|
|
RDCERR("Unknown index type ID %u, defaulting to uint32_t", indexType.value());
|
|
}
|
|
|
|
// if it's signed, bitcast it to unsigned
|
|
if(indexTypeData.signedness)
|
|
{
|
|
indexTypeData.signedness = false;
|
|
|
|
index = editor.AddOperation(
|
|
it, rdcspv::OpBitcast(editor.DeclareType(indexTypeData), editor.MakeId(), index));
|
|
it++;
|
|
}
|
|
|
|
// if it's not wide enough, uconvert expand it
|
|
if(indexTypeData.width != targetIndexWidth)
|
|
{
|
|
rdcspv::Id extendedtype =
|
|
editor.DeclareType(rdcspv::Scalar(rdcspv::Op::TypeInt, targetIndexWidth, false));
|
|
index =
|
|
editor.AddOperation(it, rdcspv::OpUConvert(extendedtype, editor.MakeId(), index));
|
|
it++;
|
|
}
|
|
}
|
|
|
|
// clamp the index to the maximum slot. If the user is reading out of bounds, don't write
|
|
// out of bounds.
|
|
{
|
|
rdcspv::Id clampedtype =
|
|
editor.DeclareType(rdcspv::Scalar(rdcspv::Op::TypeInt, targetIndexWidth, false));
|
|
index = editor.AddOperation(
|
|
it, rdcspv::OpGLSL450(clampedtype, editor.MakeId(), glsl450,
|
|
rdcspv::GLSLstd450::UMin, {index, maxSlotID}));
|
|
it++;
|
|
}
|
|
|
|
rdcspv::Id bufptr;
|
|
|
|
if(useBufferAddress)
|
|
{
|
|
// convert the constant embedded device address to a pointer
|
|
|
|
// get our output slot address by adding an offset to the base pointer
|
|
// baseaddr = bufferAddressConst + bindingOffset
|
|
rdcspv::Id baseaddr = editor.AddOperation(
|
|
it, rdcspv::OpIAdd(uint64Type, editor.MakeId(), bufferAddressConst, varIt->second));
|
|
it++;
|
|
|
|
// shift the index since this is a byte offset
|
|
// shiftedindex = index << uint32shift
|
|
rdcspv::Id shiftedindex = editor.AddOperation(
|
|
it, rdcspv::OpShiftLeftLogical(uint64Type, editor.MakeId(), index, uint32shift));
|
|
it++;
|
|
|
|
// add the index on top of that
|
|
// offsetaddr = baseaddr + shiftedindex
|
|
rdcspv::Id offsetaddr = editor.AddOperation(
|
|
it, rdcspv::OpIAdd(uint64Type, editor.MakeId(), baseaddr, shiftedindex));
|
|
it++;
|
|
|
|
// make a pointer out of it
|
|
// uint32_t *bufptr = (uint32_t *)offsetaddr
|
|
bufptr = editor.AddOperation(
|
|
it, rdcspv::OpConvertUToPtr(uint32ptrtype, editor.MakeId(), offsetaddr));
|
|
it++;
|
|
}
|
|
else
|
|
{
|
|
// accesschain into the SSBO, by adding the base offset for this var onto the index
|
|
|
|
// add the index to this binding's base index
|
|
// ssboindex = bindingOffset + index
|
|
rdcspv::Id ssboindex = editor.AddOperation(
|
|
it, rdcspv::OpIAdd(uint32Type, editor.MakeId(), index, varIt->second));
|
|
it++;
|
|
|
|
// accesschain to get the pointer we'll atomic into.
|
|
// accesschain is 0 to access rtarray (first member) then ssboindex for array index
|
|
// uint32_t *bufptr = (uint32_t *)&buf.rtarray[ssboindex];
|
|
bufptr =
|
|
editor.AddOperation(it, rdcspv::OpAccessChain(uint32ptrtype, editor.MakeId(),
|
|
ssboVar, {rtarrayOffset, ssboindex}));
|
|
it++;
|
|
}
|
|
|
|
// atomically set the uint32 that's pointed to
|
|
editor.AddOperation(it, rdcspv::OpAtomicUMax(uint32Type, editor.MakeId(), bufptr, scope,
|
|
semantics, usedValue));
|
|
|
|
// no it++ here, it will happen implicitly on loop continue
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void VulkanReplay::ClearFeedbackCache()
|
|
{
|
|
m_BindlessFeedback.Usage.clear();
|
|
}
|
|
|
|
void VulkanReplay::FetchShaderFeedback(uint32_t eventId)
|
|
{
|
|
if(m_BindlessFeedback.Usage.find(eventId) != m_BindlessFeedback.Usage.end())
|
|
return;
|
|
|
|
if(!Vulkan_BindlessFeedback())
|
|
return;
|
|
|
|
// create it here so we won't re-run any code if the event is re-selected. We'll mark it as valid
|
|
// if it actually has any data in it later.
|
|
DynamicShaderFeedback &result = m_BindlessFeedback.Usage[eventId];
|
|
|
|
bool useBufferAddress = (m_pDriver->GetExtensions(NULL).ext_KHR_buffer_device_address ||
|
|
m_pDriver->GetExtensions(NULL).ext_EXT_buffer_device_address) &&
|
|
m_pDriver->GetDeviceEnabledFeatures().shaderInt64;
|
|
|
|
if(Vulkan_Debug_DisableBufferDeviceAddress() ||
|
|
m_pDriver->GetDriverInfo().AMDBufferDeviceAddressBrokenDriver())
|
|
useBufferAddress = false;
|
|
|
|
bool useBufferAddressKHR = m_pDriver->GetExtensions(NULL).ext_KHR_buffer_device_address;
|
|
|
|
const VulkanRenderState &state = m_pDriver->m_RenderState;
|
|
VulkanCreationInfo &creationInfo = m_pDriver->m_CreationInfo;
|
|
|
|
const DrawcallDescription *drawcall = m_pDriver->GetDrawcall(eventId);
|
|
|
|
if(drawcall == NULL || !(drawcall->flags & (DrawFlags::Dispatch | DrawFlags::Drawcall)))
|
|
return;
|
|
|
|
result.compute = bool(drawcall->flags & DrawFlags::Dispatch);
|
|
|
|
const VulkanStatePipeline &pipe = result.compute ? state.compute : state.graphics;
|
|
|
|
if(pipe.pipeline == ResourceId())
|
|
return;
|
|
|
|
const VulkanCreationInfo::Pipeline &pipeInfo = creationInfo.m_Pipeline[pipe.pipeline];
|
|
|
|
VkDeviceSize feedbackStorageSize = 0;
|
|
|
|
std::map<rdcspv::Binding, feedbackData> offsetMap;
|
|
|
|
bool usesPrintf = false;
|
|
|
|
VkGraphicsPipelineCreateInfo graphicsInfo = {};
|
|
VkComputePipelineCreateInfo computeInfo = {};
|
|
|
|
// get pipeline create info
|
|
if(result.compute)
|
|
{
|
|
m_pDriver->GetShaderCache()->MakeComputePipelineInfo(computeInfo, state.compute.pipeline);
|
|
}
|
|
else
|
|
{
|
|
m_pDriver->GetShaderCache()->MakeGraphicsPipelineInfo(graphicsInfo, state.graphics.pipeline);
|
|
|
|
graphicsInfo.renderPass =
|
|
creationInfo.m_RenderPass[GetResID(graphicsInfo.renderPass)].loadRPs[graphicsInfo.subpass];
|
|
graphicsInfo.subpass = 0;
|
|
}
|
|
|
|
if(result.compute)
|
|
{
|
|
usesPrintf = pipeInfo.shaders[5].patchData->usesPrintf;
|
|
}
|
|
else
|
|
{
|
|
for(uint32_t i = 0; i < graphicsInfo.stageCount; i++)
|
|
{
|
|
VkPipelineShaderStageCreateInfo &stage =
|
|
(VkPipelineShaderStageCreateInfo &)graphicsInfo.pStages[i];
|
|
|
|
int idx = StageIndex(stage.stage);
|
|
|
|
usesPrintf |= pipeInfo.shaders[idx].patchData->usesPrintf;
|
|
}
|
|
}
|
|
|
|
if(usesPrintf)
|
|
{
|
|
// reserve some space at the start for an atomic offset counter then the buffer size, and an
|
|
// overflow section for any clamped messages
|
|
feedbackStorageSize += 16 + Vulkan_Debug_PrintfBufferSize() + 1024;
|
|
}
|
|
|
|
{
|
|
const rdcarray<ResourceId> &descSetLayoutIds =
|
|
creationInfo.m_PipelineLayout[pipeInfo.layout].descSetLayouts;
|
|
|
|
rdcspv::Binding key;
|
|
|
|
for(size_t set = 0; set < descSetLayoutIds.size(); set++)
|
|
{
|
|
key.set = (uint32_t)set;
|
|
|
|
const DescSetLayout &layout = creationInfo.m_DescSetLayout[descSetLayoutIds[set]];
|
|
|
|
for(size_t binding = 0; binding < layout.bindings.size(); binding++)
|
|
{
|
|
const DescSetLayout::Binding &bindData = layout.bindings[binding];
|
|
|
|
// skip empty bindings
|
|
if(bindData.descriptorType == VK_DESCRIPTOR_TYPE_MAX_ENUM)
|
|
continue;
|
|
|
|
// only process array bindings
|
|
if(bindData.descriptorCount > 1 &&
|
|
bindData.descriptorType != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT)
|
|
{
|
|
key.binding = (uint32_t)binding;
|
|
|
|
offsetMap[key] = {feedbackStorageSize, bindData.descriptorCount};
|
|
|
|
feedbackStorageSize += bindData.descriptorCount * sizeof(uint32_t);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
uint32_t maxSlot = uint32_t(feedbackStorageSize / sizeof(uint32_t));
|
|
|
|
// add some extra padding just in case of out-of-bounds writes
|
|
feedbackStorageSize += 128;
|
|
|
|
// if we don't have any array descriptors or printf's to feedback then just return now
|
|
if(offsetMap.empty() && !usesPrintf)
|
|
return;
|
|
|
|
if(!result.compute)
|
|
{
|
|
// if we don't have any stores supported at all, we can't do feedback on the graphics pipeline
|
|
if(!m_pDriver->GetDeviceEnabledFeatures().vertexPipelineStoresAndAtomics &&
|
|
!m_pDriver->GetDeviceEnabledFeatures().fragmentStoresAndAtomics)
|
|
{
|
|
return;
|
|
}
|
|
}
|
|
|
|
// we go through the driver for all these creations since they need to be properly
|
|
// registered in order to be put in the partial replay state
|
|
VkResult vkr = VK_SUCCESS;
|
|
VkDevice dev = m_Device;
|
|
|
|
if(feedbackStorageSize > m_BindlessFeedback.FeedbackBuffer.sz)
|
|
{
|
|
uint32_t flags = GPUBuffer::eGPUBufferGPULocal | GPUBuffer::eGPUBufferSSBO;
|
|
|
|
if(useBufferAddress)
|
|
flags |= GPUBuffer::eGPUBufferAddressable;
|
|
|
|
m_BindlessFeedback.FeedbackBuffer.Destroy();
|
|
m_BindlessFeedback.FeedbackBuffer.Create(m_pDriver, dev, feedbackStorageSize, 1, flags);
|
|
}
|
|
|
|
VkDeviceAddress bufferAddress = 0;
|
|
|
|
VkDescriptorPool descpool = VK_NULL_HANDLE;
|
|
rdcarray<VkDescriptorSetLayout> setLayouts;
|
|
rdcarray<VkDescriptorSet> descSets;
|
|
|
|
VkPipelineLayout pipeLayout = VK_NULL_HANDLE;
|
|
|
|
if(useBufferAddress)
|
|
{
|
|
RDCCOMPILE_ASSERT(VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO ==
|
|
VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_EXT,
|
|
"KHR and EXT buffer_device_address should be interchangeable here.");
|
|
VkBufferDeviceAddressInfo getAddressInfo = {VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO};
|
|
getAddressInfo.buffer = m_BindlessFeedback.FeedbackBuffer.buf;
|
|
|
|
if(useBufferAddressKHR)
|
|
bufferAddress = m_pDriver->vkGetBufferDeviceAddress(dev, &getAddressInfo);
|
|
else
|
|
bufferAddress = m_pDriver->vkGetBufferDeviceAddressEXT(dev, &getAddressInfo);
|
|
}
|
|
else
|
|
{
|
|
VkDescriptorSetLayoutBinding newBindings[] = {
|
|
// output buffer
|
|
{
|
|
0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1,
|
|
VkShaderStageFlags(result.compute ? VK_SHADER_STAGE_COMPUTE_BIT
|
|
: VK_SHADER_STAGE_ALL_GRAPHICS),
|
|
NULL,
|
|
},
|
|
};
|
|
RDCCOMPILE_ASSERT(ARRAY_COUNT(newBindings) == 1,
|
|
"Should only be one new descriptor for bindless feedback");
|
|
|
|
// create a duplicate set of descriptor sets, all visible to compute, with bindings shifted to
|
|
// account for new ones we need. This also copies the existing bindings into the new sets
|
|
PatchReservedDescriptors(pipe, descpool, setLayouts, descSets, VkShaderStageFlagBits(),
|
|
newBindings, ARRAY_COUNT(newBindings));
|
|
|
|
// if the pool failed due to limits, it will be NULL so bail now
|
|
if(descpool == VK_NULL_HANDLE)
|
|
return;
|
|
|
|
// create pipeline layout with new descriptor set layouts
|
|
{
|
|
const rdcarray<VkPushConstantRange> &push =
|
|
creationInfo.m_PipelineLayout[pipeInfo.layout].pushRanges;
|
|
|
|
VkPipelineLayoutCreateInfo pipeLayoutInfo = {
|
|
VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
|
NULL,
|
|
0,
|
|
(uint32_t)setLayouts.size(),
|
|
setLayouts.data(),
|
|
(uint32_t)push.size(),
|
|
push.data(),
|
|
};
|
|
|
|
vkr = m_pDriver->vkCreatePipelineLayout(dev, &pipeLayoutInfo, NULL, &pipeLayout);
|
|
RDCASSERTEQUAL(vkr, VK_SUCCESS);
|
|
|
|
// we'll only use one, set both structs to keep things simple
|
|
computeInfo.layout = pipeLayout;
|
|
graphicsInfo.layout = pipeLayout;
|
|
}
|
|
|
|
// vkUpdateDescriptorSet desc set to point to buffer
|
|
VkDescriptorBufferInfo desc = {0};
|
|
|
|
m_BindlessFeedback.FeedbackBuffer.FillDescriptor(desc);
|
|
|
|
VkWriteDescriptorSet write = {
|
|
VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
|
NULL,
|
|
Unwrap(descSets[0]),
|
|
0,
|
|
0,
|
|
1,
|
|
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
|
NULL,
|
|
&desc,
|
|
NULL,
|
|
};
|
|
|
|
ObjDisp(dev)->UpdateDescriptorSets(Unwrap(dev), 1, &write, 0, NULL);
|
|
}
|
|
|
|
// create vertex shader with modified code
|
|
VkShaderModuleCreateInfo moduleCreateInfo = {VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO};
|
|
|
|
VkShaderModule modules[6] = {};
|
|
|
|
const rdcstr filename[6] = {
|
|
"bindless_vertex.spv", "bindless_hull.spv", "bindless_domain.spv",
|
|
"bindless_geometry.spv", "bindless_pixel.spv", "bindless_compute.spv",
|
|
};
|
|
|
|
std::map<uint32_t, PrintfData> printfData[6];
|
|
|
|
if(result.compute)
|
|
{
|
|
VkPipelineShaderStageCreateInfo &stage = computeInfo.stage;
|
|
|
|
const VulkanCreationInfo::ShaderModule &moduleInfo =
|
|
creationInfo.m_ShaderModule[pipeInfo.shaders[5].module];
|
|
|
|
rdcarray<uint32_t> modSpirv = moduleInfo.spirv.GetSPIRV();
|
|
|
|
if(!Vulkan_Debug_FeedbackDumpDirPath().empty())
|
|
FileIO::WriteAll(Vulkan_Debug_FeedbackDumpDirPath() + "/before_" + filename[5], modSpirv);
|
|
|
|
AnnotateShader(*pipeInfo.shaders[5].refl, *pipeInfo.shaders[5].patchData,
|
|
ShaderStage(StageIndex(stage.stage)), stage.pName, offsetMap, maxSlot, false,
|
|
bufferAddress, useBufferAddressKHR, modSpirv, printfData[5]);
|
|
|
|
if(!Vulkan_Debug_FeedbackDumpDirPath().empty())
|
|
FileIO::WriteAll(Vulkan_Debug_FeedbackDumpDirPath() + "/after_" + filename[5], modSpirv);
|
|
|
|
moduleCreateInfo.pCode = modSpirv.data();
|
|
moduleCreateInfo.codeSize = modSpirv.size() * sizeof(uint32_t);
|
|
|
|
vkr = m_pDriver->vkCreateShaderModule(dev, &moduleCreateInfo, NULL, &modules[0]);
|
|
RDCASSERTEQUAL(vkr, VK_SUCCESS);
|
|
|
|
stage.module = modules[0];
|
|
}
|
|
else
|
|
{
|
|
bool hasGeom = false;
|
|
|
|
for(uint32_t i = 0; i < graphicsInfo.stageCount; i++)
|
|
{
|
|
VkPipelineShaderStageCreateInfo &stage =
|
|
(VkPipelineShaderStageCreateInfo &)graphicsInfo.pStages[i];
|
|
|
|
if((stage.stage & VK_SHADER_STAGE_GEOMETRY_BIT) != 0)
|
|
{
|
|
hasGeom = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
bool usePrimitiveID =
|
|
!hasGeom && m_pDriver->GetDeviceEnabledFeatures().geometryShader != VK_FALSE;
|
|
|
|
for(uint32_t i = 0; i < graphicsInfo.stageCount; i++)
|
|
{
|
|
VkPipelineShaderStageCreateInfo &stage =
|
|
(VkPipelineShaderStageCreateInfo &)graphicsInfo.pStages[i];
|
|
|
|
if(stage.stage & VK_SHADER_STAGE_FRAGMENT_BIT)
|
|
{
|
|
if(!m_pDriver->GetDeviceEnabledFeatures().fragmentStoresAndAtomics)
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
if(!m_pDriver->GetDeviceEnabledFeatures().vertexPipelineStoresAndAtomics)
|
|
continue;
|
|
}
|
|
|
|
int idx = StageIndex(stage.stage);
|
|
|
|
const VulkanCreationInfo::ShaderModule &moduleInfo =
|
|
creationInfo.m_ShaderModule[pipeInfo.shaders[idx].module];
|
|
|
|
rdcarray<uint32_t> modSpirv = moduleInfo.spirv.GetSPIRV();
|
|
|
|
if(!Vulkan_Debug_FeedbackDumpDirPath().empty())
|
|
FileIO::WriteAll(Vulkan_Debug_FeedbackDumpDirPath() + "/before_" + filename[idx], modSpirv);
|
|
|
|
AnnotateShader(*pipeInfo.shaders[idx].refl, *pipeInfo.shaders[idx].patchData,
|
|
ShaderStage(StageIndex(stage.stage)), stage.pName, offsetMap, maxSlot,
|
|
usePrimitiveID, bufferAddress, useBufferAddressKHR, modSpirv, printfData[idx]);
|
|
|
|
if(!Vulkan_Debug_FeedbackDumpDirPath().empty())
|
|
FileIO::WriteAll(Vulkan_Debug_FeedbackDumpDirPath() + "/after_" + filename[idx], modSpirv);
|
|
|
|
moduleCreateInfo.pCode = modSpirv.data();
|
|
moduleCreateInfo.codeSize = modSpirv.size() * sizeof(uint32_t);
|
|
|
|
vkr = m_pDriver->vkCreateShaderModule(dev, &moduleCreateInfo, NULL, &modules[i]);
|
|
RDCASSERTEQUAL(vkr, VK_SUCCESS);
|
|
|
|
stage.module = modules[i];
|
|
}
|
|
}
|
|
|
|
VkPipeline feedbackPipe;
|
|
|
|
if(result.compute)
|
|
{
|
|
vkr = m_pDriver->vkCreateComputePipelines(m_Device, VK_NULL_HANDLE, 1, &computeInfo, NULL,
|
|
&feedbackPipe);
|
|
RDCASSERTEQUAL(vkr, VK_SUCCESS);
|
|
}
|
|
else
|
|
{
|
|
vkr = m_pDriver->vkCreateGraphicsPipelines(m_Device, VK_NULL_HANDLE, 1, &graphicsInfo, NULL,
|
|
&feedbackPipe);
|
|
RDCASSERTEQUAL(vkr, VK_SUCCESS);
|
|
}
|
|
|
|
// make copy of state to draw from
|
|
VulkanRenderState modifiedstate = state;
|
|
VulkanStatePipeline &modifiedpipe = result.compute ? modifiedstate.compute : modifiedstate.graphics;
|
|
|
|
// bind created pipeline to partial replay state
|
|
modifiedpipe.pipeline = GetResID(feedbackPipe);
|
|
|
|
if(!useBufferAddress)
|
|
{
|
|
// replace descriptor set IDs with our temporary sets. The offsets we keep the same. If the
|
|
// original draw had no sets, we ensure there's room (with no offsets needed)
|
|
|
|
if(modifiedpipe.descSets.empty())
|
|
modifiedpipe.descSets.resize(1);
|
|
|
|
for(size_t i = 0; i < descSets.size(); i++)
|
|
{
|
|
modifiedpipe.descSets[i].pipeLayout = GetResID(pipeLayout);
|
|
modifiedpipe.descSets[i].descSet = GetResID(descSets[i]);
|
|
}
|
|
}
|
|
|
|
{
|
|
VkCommandBuffer cmd = m_pDriver->GetNextCmd();
|
|
|
|
VkCommandBufferBeginInfo beginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, NULL,
|
|
VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT};
|
|
|
|
vkr = ObjDisp(dev)->BeginCommandBuffer(Unwrap(cmd), &beginInfo);
|
|
RDCASSERTEQUAL(vkr, VK_SUCCESS);
|
|
|
|
// fill destination buffer with 0s to ensure a baseline to then feedback against
|
|
ObjDisp(dev)->CmdFillBuffer(Unwrap(cmd), Unwrap(m_BindlessFeedback.FeedbackBuffer.buf), 0,
|
|
feedbackStorageSize, 0);
|
|
|
|
VkBufferMemoryBarrier feedbackbufBarrier = {
|
|
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
|
NULL,
|
|
VK_ACCESS_TRANSFER_WRITE_BIT,
|
|
VK_ACCESS_SHADER_WRITE_BIT,
|
|
VK_QUEUE_FAMILY_IGNORED,
|
|
VK_QUEUE_FAMILY_IGNORED,
|
|
Unwrap(m_BindlessFeedback.FeedbackBuffer.buf),
|
|
0,
|
|
feedbackStorageSize,
|
|
};
|
|
|
|
// wait for the above fill to finish.
|
|
DoPipelineBarrier(cmd, 1, &feedbackbufBarrier);
|
|
|
|
if(result.compute)
|
|
{
|
|
modifiedstate.BindPipeline(m_pDriver, cmd, VulkanRenderState::BindCompute, true);
|
|
|
|
ObjDisp(cmd)->CmdDispatch(Unwrap(cmd), drawcall->dispatchDimension[0],
|
|
drawcall->dispatchDimension[1], drawcall->dispatchDimension[2]);
|
|
}
|
|
else
|
|
{
|
|
modifiedstate.BeginRenderPassAndApplyState(m_pDriver, cmd, VulkanRenderState::BindGraphics);
|
|
|
|
m_pDriver->ReplayDraw(cmd, *drawcall);
|
|
|
|
modifiedstate.EndRenderPass(cmd);
|
|
}
|
|
|
|
vkr = ObjDisp(dev)->EndCommandBuffer(Unwrap(cmd));
|
|
RDCASSERTEQUAL(vkr, VK_SUCCESS);
|
|
|
|
m_pDriver->SubmitCmds();
|
|
m_pDriver->FlushQ();
|
|
}
|
|
|
|
bytebuf data;
|
|
GetBufferData(GetResID(m_BindlessFeedback.FeedbackBuffer.buf), 0, 0, data);
|
|
|
|
for(auto it = offsetMap.begin(); it != offsetMap.end(); ++it)
|
|
{
|
|
uint32_t *feedbackData = (uint32_t *)(data.data() + it->second.offset);
|
|
|
|
BindpointIndex used;
|
|
used.bindset = it->first.set;
|
|
used.bind = it->first.binding;
|
|
|
|
for(uint32_t i = 0; i < it->second.numEntries; i++)
|
|
{
|
|
if(feedbackData[i])
|
|
{
|
|
used.arrayIndex = i;
|
|
|
|
result.used.push_back(used);
|
|
}
|
|
}
|
|
}
|
|
|
|
result.valid = true;
|
|
|
|
uint32_t *printfBuf = (uint32_t *)data.data();
|
|
uint32_t *printfBufEnd = (uint32_t *)(data.data() + Vulkan_Debug_PrintfBufferSize());
|
|
if(usesPrintf && *printfBuf > 0)
|
|
{
|
|
uint32_t wordsNeeded = *printfBuf;
|
|
|
|
if(wordsNeeded > Vulkan_Debug_PrintfBufferSize())
|
|
{
|
|
RDCLOG("printf buffer overflowed, needed %u bytes but printf buffer is only %u bytes",
|
|
wordsNeeded * 4, Vulkan_Debug_PrintfBufferSize());
|
|
}
|
|
|
|
printfBuf++;
|
|
|
|
while(*printfBuf && printfBuf < printfBufEnd)
|
|
{
|
|
ShaderStage stage = ShaderStage((*printfBuf) >> ShaderStageHeaderBitShift);
|
|
uint32_t printfID = *printfBuf & 0xfffffffU;
|
|
|
|
printfBuf++;
|
|
|
|
if(stage < ShaderStage::Count)
|
|
{
|
|
auto it = printfData[(uint32_t)stage].find(printfID);
|
|
if(it == printfData[(uint32_t)stage].end())
|
|
{
|
|
RDCERR("Error parsing DebugPrintf buffer, unexpected printf ID %x from header %x",
|
|
printfID, *printfBuf);
|
|
break;
|
|
}
|
|
|
|
uint32_t *location = printfBuf;
|
|
|
|
printfBuf += 3;
|
|
|
|
const PrintfData &fmt = it->second;
|
|
|
|
ShaderPrintfArgs args(printfBuf, fmt);
|
|
|
|
printfBuf += fmt.payloadWords;
|
|
|
|
// this message overflowed, don't process it
|
|
if(printfBuf >= printfBufEnd)
|
|
break;
|
|
|
|
ShaderMessage msg;
|
|
|
|
msg.stage = stage;
|
|
|
|
const VulkanCreationInfo::Pipeline::Shader &sh = pipeInfo.shaders[(uint32_t)stage];
|
|
|
|
{
|
|
VulkanCreationInfo::ShaderModule &mod = creationInfo.m_ShaderModule[sh.module];
|
|
VulkanCreationInfo::ShaderModuleReflection &modrefl =
|
|
mod.GetReflection(sh.entryPoint, pipe.pipeline);
|
|
modrefl.PopulateDisassembly(mod.spirv);
|
|
|
|
const std::map<size_t, uint32_t> instructionLines = modrefl.instructionLines;
|
|
|
|
auto instit = instructionLines.find(printfID);
|
|
if(instit != instructionLines.end())
|
|
msg.disassemblyLine = (int32_t)instit->second;
|
|
else
|
|
msg.disassemblyLine = -1;
|
|
}
|
|
|
|
if(stage == ShaderStage::Compute)
|
|
{
|
|
for(int x = 0; x < 3; x++)
|
|
{
|
|
uint32_t threadDimX = sh.refl->dispatchThreadsDimension[x];
|
|
msg.location.compute.workgroup[x] = location[x] / threadDimX;
|
|
msg.location.compute.thread[x] = location[x] % threadDimX;
|
|
}
|
|
}
|
|
else if(stage == ShaderStage::Vertex)
|
|
{
|
|
msg.location.vertex.vertexIndex = location[0];
|
|
if(!(drawcall->flags & DrawFlags::Indexed))
|
|
{
|
|
// for non-indexed draws get back to 0-based index
|
|
msg.location.vertex.vertexIndex -= drawcall->vertexOffset;
|
|
}
|
|
// go back to a 0-based instance index
|
|
msg.location.vertex.instance = location[1] - drawcall->instanceOffset;
|
|
msg.location.vertex.view = location[2];
|
|
}
|
|
else
|
|
{
|
|
msg.location.pixel.x = location[0] >> 16U;
|
|
msg.location.pixel.y = location[0] & 0xffff;
|
|
msg.location.pixel.sample = location[1];
|
|
msg.location.pixel.primitive = location[2];
|
|
|
|
RDCLOG("pixel %u, %u", msg.location.pixel.x, msg.location.pixel.y);
|
|
}
|
|
|
|
msg.message = StringFormat::FmtArgs(fmt.format.c_str(), args);
|
|
|
|
result.messages.push_back(msg);
|
|
}
|
|
else
|
|
{
|
|
RDCERR("Error parsing DebugPrintf buffer, unexpected stage %x from header %x", stage,
|
|
*printfBuf);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if(descpool != VK_NULL_HANDLE)
|
|
{
|
|
// delete descriptors. Technically we don't have to free the descriptor sets, but our tracking
|
|
// on
|
|
// replay doesn't handle destroying children of pooled objects so we do it explicitly anyway.
|
|
m_pDriver->vkFreeDescriptorSets(dev, descpool, (uint32_t)descSets.size(), descSets.data());
|
|
|
|
m_pDriver->vkDestroyDescriptorPool(dev, descpool, NULL);
|
|
}
|
|
|
|
for(VkDescriptorSetLayout layout : setLayouts)
|
|
m_pDriver->vkDestroyDescriptorSetLayout(dev, layout, NULL);
|
|
|
|
// delete pipeline layout
|
|
m_pDriver->vkDestroyPipelineLayout(dev, pipeLayout, NULL);
|
|
|
|
// delete pipeline
|
|
m_pDriver->vkDestroyPipeline(dev, feedbackPipe, NULL);
|
|
|
|
// delete shader/shader module
|
|
for(size_t i = 0; i < ARRAY_COUNT(modules); i++)
|
|
if(modules[i] != VK_NULL_HANDLE)
|
|
m_pDriver->vkDestroyShaderModule(dev, modules[i], NULL);
|
|
|
|
// replay from the start as we may have corrupted state while fetching the above feedback.
|
|
m_pDriver->ReplayLog(0, eventId, eReplay_Full);
|
|
}
|
|
|
|
#if ENABLED(ENABLE_UNIT_TESTS)
|
|
|
|
#undef Always
|
|
#undef None
|
|
|
|
#include "catch/catch.hpp"
|
|
|
|
TEST_CASE("Test printf format string mangling", "[vulkan]")
|
|
{
|
|
SECTION("Vector format expansion")
|
|
{
|
|
CHECK(PatchFormatString("hello %f normal %i string") == "hello %f normal %i string");
|
|
CHECK(PatchFormatString("hello %% normal %2i string") == "hello %% normal %2i string");
|
|
CHECK(PatchFormatString("hello %fv normal %iv string") == "hello %fv normal %iv string");
|
|
CHECK(PatchFormatString("hello %02.3fv normal % 2.fiv string") ==
|
|
"hello %02.3fv normal % 2.fiv string");
|
|
CHECK(PatchFormatString("vector string: %v2f | %v3i") == "vector string: %f, %f | %i, %i, %i");
|
|
CHECK(PatchFormatString("vector with precision: %04.3v4f !") ==
|
|
"vector with precision: %04.3f, %04.3f, %04.3f, %04.3f !");
|
|
CHECK(PatchFormatString("vector at end %v2f") == "vector at end %f, %f");
|
|
CHECK(PatchFormatString("%v3f vector at start") == "%f, %f, %f vector at start");
|
|
CHECK(PatchFormatString("%v2f") == "%f, %f");
|
|
CHECK(PatchFormatString("%v2u") == "%u, %u");
|
|
};
|
|
|
|
SECTION("64-bit format twiddling")
|
|
{
|
|
CHECK(PatchFormatString("hello %ul") == "hello %llu");
|
|
CHECK(PatchFormatString("%ul hello") == "%llu hello");
|
|
CHECK(PatchFormatString("%ul") == "%llu");
|
|
CHECK(PatchFormatString("hello %04ul there") == "hello %04llu there");
|
|
CHECK(PatchFormatString("hello %v2ul there") == "hello %llu, %llu there");
|
|
|
|
CHECK(PatchFormatString("hello %u l there") == "hello %u l there");
|
|
|
|
CHECK(PatchFormatString("%v2u") == "%u, %u");
|
|
CHECK(PatchFormatString("%v2ul") == "%llu, %llu");
|
|
};
|
|
};
|
|
|
|
#endif // ENABLED(ENABLE_UNIT_TESTS)
|