Update bindless feedback code to use new helpers

This commit is contained in:
baldurk
2025-01-25 16:15:32 +00:00
parent 2d067b0e70
commit 9bcb68c2de
+125 -486
View File
@@ -41,9 +41,9 @@ RDOC_CONFIG(
RDOC_CONFIG(bool, Vulkan_PrintfFetch, true, "Enable fetching printf messages from GPU.");
RDOC_CONFIG(uint32_t, Vulkan_Debug_PrintfBufferSize, 64 * 1024,
"How many bytes to reserve for a printf output buffer.");
RDOC_EXTERN_CONFIG(bool, Vulkan_Debug_DisableBufferDeviceAddress);
static const uint32_t ShaderStageHeaderBitShift = 28U;
static const uint32_t ShaderFeedbackReservedBindings = 1;
struct BindKey
{
@@ -342,42 +342,11 @@ rdcspv::Id MakeOffsettedPointer<uint32_t>(rdcspv::Editor &editor, rdcspv::Iter &
return editor.AddOperation(it, rdcspv::OpBitcast(ptrType, editor.MakeId(), finalAddr));
}
void OffsetBindingsToMatch(rdcarray<uint32_t> &modSpirv)
{
rdcspv::Editor editor(modSpirv);
editor.Prepare();
// patch all bindings up by 1
for(rdcspv::Iter it = editor.Begin(rdcspv::Section::Annotations),
end = editor.End(rdcspv::Section::Annotations);
it < end; ++it)
{
// we will use descriptor set 0 for our own purposes if we don't have a buffer address.
//
// Since bindings are arbitrary, we just increase all user bindings to make room, and we'll
// redeclare the descriptor set layouts and pipeline layout. This is inevitable in the case
// where all descriptor sets are already used. In theory we only have to do this with set 0,
// but that requires knowing which variables are in set 0 and it's simpler to increase all
// bindings.
if(it.opcode() == rdcspv::Op::Decorate)
{
rdcspv::OpDecorate dec(it);
if(dec.decoration == rdcspv::Decoration::Binding)
{
RDCASSERT(dec.decoration.binding != 0xffffffff);
dec.decoration.binding += 1;
it = dec;
}
}
}
}
template <typename uintvulkanmax_t>
void AnnotateShader(const ShaderReflection &refl, const SPIRVPatchData &patchData, ShaderStage stage,
const char *entryName, const std::map<BindKey, BindData> &offsetMap,
uint32_t maxSlot, bool usePrimitiveID, VkDeviceAddress addr,
bool bufferAddressKHR, bool usesMultiview, rdcarray<uint32_t> &modSpirv,
BufferStorageMode storageMode, bool usesMultiview, rdcarray<uint32_t> &modSpirv,
std::map<uint32_t, PrintfData> &printfData)
{
// calculate offsets for IDs on the original unmodified SPIR-V. The editor may insert some nops,
@@ -390,12 +359,13 @@ void AnnotateShader(const ShaderReflection &refl, const SPIRVPatchData &patchDat
rdcspv::Editor editor(modSpirv);
editor.Prepare();
editor.SetBufferStorageMode(storageMode);
editor.OffsetBindingsToMatchReservation(ShaderFeedbackReservedBindings);
RDCASSERTMSG("SPIR-V module is too large to encode instruction ID!", modSpirv.size() < 0xfffffffU);
const bool useBufferAddress = (addr != 0);
const uint32_t targetIndexWidth = useBufferAddress ? sizeof(uintvulkanmax_t) * 8 : 32;
const uint32_t targetIndexWidth = IsBDA(storageMode) ? sizeof(uintvulkanmax_t) * 8 : 32;
// store the maximum slot we can use, for clamping outputs to avoid writing out of bounds
rdcspv::Id maxSlotID = targetIndexWidth == 64 ? editor.AddConstantImmediate<uint64_t>(maxSlot)
@@ -411,7 +381,7 @@ void AnnotateShader(const ShaderReflection &refl, const SPIRVPatchData &patchDat
rdcspv::Id int32Type = editor.DeclareType(rdcspv::scalar<int32_t>());
rdcspv::Id f32Type = editor.DeclareType(rdcspv::scalar<float>());
rdcspv::Id uint64Type;
rdcspv::Id uint32StructID;
rdcspv::Id outputBufVarType;
rdcspv::Id indexOffsetType;
// if the module declares int64 capability, or we use it, ensure uint64 is declared in case we
@@ -422,12 +392,14 @@ void AnnotateShader(const ShaderReflection &refl, const SPIRVPatchData &patchDat
uint64Type = editor.DeclareType(rdcspv::scalar<uint64_t>());
}
if(useBufferAddress)
// for BDA we do indexing math on the pointer then convert so we don't need a container struct,
// for descriptor-based access we need to declare our struct up front with a runtime array of ints
if(IsBDA(storageMode))
{
uint32StructID = editor.AddType(rdcspv::OpTypeStruct(editor.MakeId(), {uint32Type}));
// any function parameters we add are byte offsets
indexOffsetType = editor.DeclareType(rdcspv::scalar<uintvulkanmax_t>());
outputBufVarType = uint32Type;
}
else
{
@@ -437,17 +409,20 @@ void AnnotateShader(const ShaderReflection &refl, const SPIRVPatchData &patchDat
editor.AddDecoration(rdcspv::OpDecorate(
runtimeArrayID, rdcspv::DecorationParam<rdcspv::Decoration::ArrayStride>(sizeof(uint32_t))));
uint32StructID = editor.AddType(rdcspv::OpTypeStruct(editor.MakeId(), {runtimeArrayID}));
rdcspv::Id uint32StructID =
editor.AddType(rdcspv::OpTypeStruct(editor.MakeId(), {runtimeArrayID}));
editor.SetName(uint32StructID, "__rd_feedbackStruct");
editor.AddDecoration(rdcspv::OpMemberDecorate(
uint32StructID, 0, rdcspv::DecorationParam<rdcspv::Decoration::Offset>(0)));
// any function parameters we add are uint32 indices
indexOffsetType = uint32Type;
outputBufVarType = uint32StructID;
}
editor.SetName(uint32StructID, "__rd_feedbackStruct");
editor.AddDecoration(rdcspv::OpMemberDecorate(
uint32StructID, 0, rdcspv::DecorationParam<rdcspv::Decoration::Offset>(0)));
// map from variable ID to watch, to variable ID to get offset from (as a SPIR-V constant,
// or as either byte offset for buffer addressing or ssbo index otherwise)
std::map<rdcspv::Id, rdcspv::Id> varLookup;
@@ -493,7 +468,7 @@ void AnnotateShader(const ShaderReflection &refl, const SPIRVPatchData &patchDat
if(it != offsetMap.end())
{
// store the offset for this variable so we watch for access chains and know where to store to
if(useBufferAddress)
if(IsBDA(storageMode))
{
rdcspv::Id id = varLookup[var.id] =
editor.AddConstantImmediate<uintvulkanmax_t>(uintvulkanmax_t(it->second.offset));
@@ -518,7 +493,6 @@ void AnnotateShader(const ShaderReflection &refl, const SPIRVPatchData &patchDat
}
rdcspv::Id carryStructType = editor.DeclareStructType({uint32Type, uint32Type});
rdcspv::Id bufferAddressConst, ssboVar, uint32ptrtype;
if(usesMultiview &&
(stage == ShaderStage::Pixel || stage == ShaderStage::Vertex || stage == ShaderStage::Geometry))
@@ -533,97 +507,12 @@ void AnnotateShader(const ShaderReflection &refl, const SPIRVPatchData &patchDat
}
rdcarray<rdcspv::Id> newGlobals;
rdcspv::StorageClass bufferClass = editor.PrepareAddedBufferAccess();
if(useBufferAddress)
{
// add the extension
editor.AddExtension(bufferAddressKHR ? "SPV_KHR_physical_storage_buffer"
: "SPV_EXT_physical_storage_buffer");
rdcpair<rdcspv::Id, rdcspv::Id> feedbackBuffer =
editor.AddBufferVariable(newGlobals, outputBufVarType, "__rd_feedbackBuffer", 0, 0, addr);
// change the memory model to physical storage buffer 64
rdcspv::Iter it = editor.Begin(rdcspv::Section::MemoryModel);
rdcspv::OpMemoryModel model(it);
model.addressingModel = rdcspv::AddressingModel::PhysicalStorageBuffer64;
it = model;
// add capabilities
editor.AddCapability(rdcspv::Capability::PhysicalStorageBufferAddresses);
// for simplicity on KHR we always load from uint2 so we're compatible with the case where int64
// isn't supported
if(bufferAddressKHR)
{
rdcspv::Id addressConstantLSB = editor.AddConstantImmediate<uint32_t>(addr & 0xffffffffu);
rdcspv::Id addressConstantMSB =
editor.AddConstantImmediate<uint32_t>((addr >> 32) & 0xffffffffu);
rdcspv::Id uint2 = editor.DeclareType(rdcspv::Vector(rdcspv::scalar<uint32_t>(), 2));
bufferAddressConst = editor.AddConstant(rdcspv::OpConstantComposite(
uint2, editor.MakeId(), {addressConstantLSB, addressConstantMSB}));
}
else
{
editor.AddCapability(rdcspv::Capability::Int64);
// declare the address constants and make our pointers physical storage buffer pointers
bufferAddressConst = editor.AddConstantImmediate<uint64_t>(addr);
}
uint32ptrtype =
editor.DeclareType(rdcspv::Pointer(uint32Type, rdcspv::StorageClass::PhysicalStorageBuffer));
editor.SetName(bufferAddressConst, "__rd_feedbackAddress");
// struct is block decorated
editor.AddDecoration(rdcspv::OpDecorate(uint32StructID, rdcspv::Decoration::Block));
}
else
{
rdcspv::StorageClass ssboClass = editor.StorageBufferClass();
// the pointers are SSBO pointers
rdcspv::Id bufptrtype = editor.DeclareType(rdcspv::Pointer(uint32StructID, ssboClass));
uint32ptrtype = editor.DeclareType(rdcspv::Pointer(uint32Type, ssboClass));
// patch all bindings up by 1
for(rdcspv::Iter it = editor.Begin(rdcspv::Section::Annotations),
end = editor.End(rdcspv::Section::Annotations);
it < end; ++it)
{
// we will use descriptor set 0 for our own purposes if we don't have a buffer address.
//
// Since bindings are arbitrary, we just increase all user bindings to make room, and we'll
// redeclare the descriptor set layouts and pipeline layout. This is inevitable in the case
// where all descriptor sets are already used. In theory we only have to do this with set 0,
// but that requires knowing which variables are in set 0 and it's simpler to increase all
// bindings.
if(it.opcode() == rdcspv::Op::Decorate)
{
rdcspv::OpDecorate dec(it);
if(dec.decoration == rdcspv::Decoration::Binding)
{
RDCASSERT(dec.decoration.binding != 0xffffffff);
dec.decoration.binding += 1;
it = dec;
}
}
}
// add our SSBO variable, at set 0 binding 0
ssboVar = editor.MakeId();
editor.AddVariable(rdcspv::OpVariable(bufptrtype, ssboVar, ssboClass));
editor.AddDecoration(
rdcspv::OpDecorate(ssboVar, rdcspv::DecorationParam<rdcspv::Decoration::DescriptorSet>(0)));
editor.AddDecoration(
rdcspv::OpDecorate(ssboVar, rdcspv::DecorationParam<rdcspv::Decoration::Binding>(0)));
if(editor.EntryPointAllGlobals())
newGlobals.push_back(ssboVar);
editor.SetName(ssboVar, "__rd_feedbackBuffer");
editor.DecorateStorageBufferStruct(uint32StructID);
}
rdcspv::Id uint32ptrtype = editor.DeclareType(rdcspv::Pointer(uint32Type, bufferClass));
rdcspv::Id rtarrayOffset = editor.AddConstantImmediate<uint32_t>(0U);
rdcspv::Id printfArrayOffset = rtarrayOffset;
@@ -638,7 +527,7 @@ void AnnotateShader(const ShaderReflection &refl, const SPIRVPatchData &patchDat
rdcspv::Id printfIncrement;
if(useBufferAddress)
if(IsBDA(storageMode))
{
printfIncrement = editor.AddConstantImmediate<uintvulkanmax_t>(sizeof(uint32_t));
}
@@ -1366,12 +1255,12 @@ void AnnotateShader(const ShaderReflection &refl, const SPIRVPatchData &patchDat
rdcspv::Id counterptr;
if(useBufferAddress)
if(IsBDA(storageMode))
{
// make a pointer out of the buffer address
// uint32_t *bufptr = (uint32_t *)(ptr+0)
counterptr = MakeOffsettedPointer<uintvulkanmax_t>(
editor, it, uint32ptrtype, carryStructType, bufferAddressConst, rdcspv::Id());
editor, it, uint32ptrtype, carryStructType, feedbackBuffer.second, rdcspv::Id());
it++;
}
@@ -1380,9 +1269,9 @@ void AnnotateShader(const ShaderReflection &refl, const SPIRVPatchData &patchDat
// accesschain to get the pointer we'll atomic into.
// accesschain is 0 to access rtarray (first member) then zero for the first array index
// uint32_t *bufptr = (uint32_t *)&buf.printfWords[ssboindex];
counterptr =
editor.AddOperation(it, rdcspv::OpAccessChain(uint32ptrtype, editor.MakeId(),
ssboVar, {printfArrayOffset, zero}));
counterptr = editor.AddOperation(
it, rdcspv::OpAccessChain(uint32ptrtype, editor.MakeId(), feedbackBuffer.second,
{printfArrayOffset, zero}));
it++;
}
@@ -1400,7 +1289,7 @@ void AnnotateShader(const ShaderReflection &refl, const SPIRVPatchData &patchDat
{idx, maxPrintfWordOffset}));
it++;
if(useBufferAddress)
if(IsBDA(storageMode))
{
// convert to an offset value (upconverting as needed, indexOffsetType is always the
// largest uint type)
@@ -1421,7 +1310,7 @@ void AnnotateShader(const ShaderReflection &refl, const SPIRVPatchData &patchDat
it++;
rdcspv::Id ptr = MakeOffsettedPointer<uintvulkanmax_t>(
editor, it, uint32ptrtype, carryStructType, bufferAddressConst, byteOffset);
editor, it, uint32ptrtype, carryStructType, feedbackBuffer.second, byteOffset);
it++;
editor.AddOperation(it, rdcspv::OpStore(ptr, word, memoryAccess));
@@ -1438,9 +1327,9 @@ void AnnotateShader(const ShaderReflection &refl, const SPIRVPatchData &patchDat
it, rdcspv::OpIAdd(uint32Type, editor.MakeId(), idx, printfIncrement));
it++;
rdcspv::Id ptr =
editor.AddOperation(it, rdcspv::OpAccessChain(uint32ptrtype, editor.MakeId(),
ssboVar, {printfArrayOffset, idx}));
rdcspv::Id ptr = editor.AddOperation(
it, rdcspv::OpAccessChain(uint32ptrtype, editor.MakeId(), feedbackBuffer.second,
{printfArrayOffset, idx}));
it++;
editor.AddOperation(it, rdcspv::OpStore(ptr, word));
@@ -1529,7 +1418,7 @@ void AnnotateShader(const ShaderReflection &refl, const SPIRVPatchData &patchDat
rdcspv::Id bufptr;
if(useBufferAddress)
if(IsBDA(storageMode))
{
// convert the constant embedded device address to a pointer
@@ -1548,7 +1437,7 @@ void AnnotateShader(const ShaderReflection &refl, const SPIRVPatchData &patchDat
// make a pointer out of it
// uint32_t *bufptr = (uint32_t *)(ptr + offsetaddr)
bufptr = MakeOffsettedPointer<uintvulkanmax_t>(
editor, it, uint32ptrtype, carryStructType, bufferAddressConst, offsetaddr);
editor, it, uint32ptrtype, carryStructType, feedbackBuffer.second, offsetaddr);
it++;
}
else
@@ -1564,9 +1453,9 @@ void AnnotateShader(const ShaderReflection &refl, const SPIRVPatchData &patchDat
// accesschain to get the pointer we'll atomic into.
// accesschain is 0 to access rtarray (first member) then ssboindex for array index
// uint32_t *bufptr = (uint32_t *)&buf.rtarray[ssboindex];
bufptr =
editor.AddOperation(it, rdcspv::OpAccessChain(uint32ptrtype, editor.MakeId(),
ssboVar, {rtarrayOffset, ssboindex}));
bufptr = editor.AddOperation(
it, rdcspv::OpAccessChain(uint32ptrtype, editor.MakeId(), feedbackBuffer.second,
{rtarrayOffset, ssboindex}));
it++;
}
@@ -1598,15 +1487,6 @@ bool VulkanReplay::FetchShaderFeedback(uint32_t eventId)
// if it actually has any data in it later.
VKDynamicShaderFeedback &result = m_BindlessFeedback.Usage[eventId];
bool useBufferAddress = (m_pDriver->GetExtensions(NULL).ext_KHR_buffer_device_address ||
m_pDriver->GetExtensions(NULL).ext_EXT_buffer_device_address);
if(Vulkan_Debug_DisableBufferDeviceAddress() ||
m_pDriver->GetDriverInfo().BufferDeviceAddressBrokenDriver())
useBufferAddress = false;
bool useBufferAddressKHR = m_pDriver->GetExtensions(NULL).ext_KHR_buffer_device_address;
const VulkanRenderState &state = m_pDriver->m_RenderState;
VulkanCreationInfo &creationInfo = m_pDriver->m_CreationInfo;
@@ -1634,40 +1514,25 @@ bool VulkanReplay::FetchShaderFeedback(uint32_t eventId)
bool usesPrintf = false;
VkGraphicsPipelineCreateInfo graphicsInfo = {};
VkComputePipelineCreateInfo computeInfo = {};
// get pipeline create info
if(result.compute)
for(uint32_t i = 0; i < NumShaderStages; i++)
{
m_pDriver->GetShaderCache()->MakeComputePipelineInfo(computeInfo, state.compute.pipeline);
}
else
{
m_pDriver->GetShaderCache()->MakeGraphicsPipelineInfo(graphicsInfo, state.graphics.pipeline);
if(pipeInfo.shaders[i].stage == ShaderStage::Count)
continue;
if(graphicsInfo.renderPass != VK_NULL_HANDLE)
graphicsInfo.renderPass =
creationInfo.m_RenderPass[GetResID(graphicsInfo.renderPass)].loadRPs[graphicsInfo.subpass];
graphicsInfo.subpass = 0;
usesPrintf |= pipeInfo.shaders[i].patchData->usesPrintf;
}
if(result.compute)
{
usesPrintf = pipeInfo.shaders[5].patchData->usesPrintf;
}
else
{
for(uint32_t i = 0; i < graphicsInfo.stageCount; i++)
{
VkPipelineShaderStageCreateInfo &stage =
(VkPipelineShaderStageCreateInfo &)graphicsInfo.pStages[i];
const bool hasGeomOrMesh = pipeInfo.shaders[(size_t)ShaderStage::Geometry].module != ResourceId() ||
pipeInfo.shaders[(size_t)ShaderStage::Mesh].module != ResourceId();
int idx = StageIndex(stage.stage);
const bool usePrimitiveID =
!hasGeomOrMesh && m_pDriver->GetDeviceEnabledFeatures().geometryShader != VK_FALSE;
usesPrintf |= pipeInfo.shaders[idx].patchData->usesPrintf;
}
}
const bool usesMultiview =
state.GetRenderPass() != ResourceId()
? creationInfo.m_RenderPass[state.GetRenderPass()].subpasses[state.subpass].multiviews.size() >
1
: pipeInfo.viewMask != 0;
BindlessFeedbackData feedbackData;
@@ -1821,293 +1686,89 @@ bool VulkanReplay::FetchShaderFeedback(uint32_t eventId)
VkResult vkr = VK_SUCCESS;
VkDevice dev = m_Device;
if(feedbackData.feedbackStorageSize > m_BindlessFeedback.FeedbackBuffer.TotalSize())
{
uint32_t flags = GPUBuffer::eGPUBufferGPULocal | GPUBuffer::eGPUBufferSSBO;
m_BindlessFeedback.ResizeFeedbackBuffer(m_pDriver, feedbackData.feedbackStorageSize);
if(useBufferAddress)
flags |= GPUBuffer::eGPUBufferAddressable;
m_BindlessFeedback.FeedbackBuffer.Destroy();
m_BindlessFeedback.FeedbackBuffer.Create(m_pDriver, dev, feedbackData.feedbackStorageSize, 1,
flags);
}
VkDeviceAddress bufferAddress = 0;
VkDescriptorPool descpool = VK_NULL_HANDLE;
rdcarray<VkDescriptorSetLayout> setLayouts;
rdcarray<VkDescriptorSet> descSets;
VkPipelineLayout pipeLayout = VK_NULL_HANDLE;
if(useBufferAddress)
{
RDCCOMPILE_ASSERT(VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO ==
VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_EXT,
"KHR and EXT buffer_device_address should be interchangeable here.");
VkBufferDeviceAddressInfo getAddressInfo = {VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO};
getAddressInfo.buffer = m_BindlessFeedback.FeedbackBuffer.UnwrappedBuffer();
if(useBufferAddressKHR)
bufferAddress = ObjDisp(dev)->GetBufferDeviceAddress(Unwrap(dev), &getAddressInfo);
else
bufferAddress = ObjDisp(dev)->GetBufferDeviceAddressEXT(Unwrap(dev), &getAddressInfo);
}
else
{
VkDescriptorSetLayoutBinding newBindings[] = {
// output buffer
{
0,
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1,
VkShaderStageFlags(result.compute ? VK_SHADER_STAGE_COMPUTE_BIT
: VK_SHADER_STAGE_ALL_GRAPHICS),
NULL,
},
};
RDCCOMPILE_ASSERT(ARRAY_COUNT(newBindings) == 1,
"Should only be one new descriptor for bindless feedback");
// create a duplicate set of descriptor sets, all visible to compute, with bindings shifted to
// account for new ones we need. This also copies the existing bindings into the new sets
PatchReservedDescriptors(pipe, descpool, setLayouts, descSets, VkShaderStageFlagBits(),
newBindings, ARRAY_COUNT(newBindings));
// if the pool failed due to limits, it will be NULL so bail now
if(descpool == VK_NULL_HANDLE)
return false;
// create pipeline layout with new descriptor set layouts
{
const rdcarray<VkPushConstantRange> &push =
creationInfo.m_PipelineLayout[result.compute ? pipeInfo.compLayout : pipeInfo.vertLayout]
.pushRanges;
VkPipelineLayoutCreateInfo pipeLayoutInfo = {
VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
NULL,
rdcarray<VkDescriptorSetLayoutBinding> newBindings = {
{
0,
(uint32_t)setLayouts.size(),
setLayouts.data(),
(uint32_t)push.size(),
push.data(),
};
vkr = m_pDriver->vkCreatePipelineLayout(dev, &pipeLayoutInfo, NULL, &pipeLayout);
CHECK_VKR(m_pDriver, vkr);
// we'll only use one, set both structs to keep things simple
computeInfo.layout = pipeLayout;
graphicsInfo.layout = pipeLayout;
}
// vkUpdateDescriptorSet desc set to point to buffer
VkDescriptorBufferInfo desc = {0};
m_BindlessFeedback.FeedbackBuffer.FillDescriptor(desc);
VkWriteDescriptorSet write = {
VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
NULL,
Unwrap(descSets[0]),
0,
0,
1,
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
NULL,
&desc,
NULL,
};
ObjDisp(dev)->UpdateDescriptorSets(Unwrap(dev), 1, &write, 0, NULL);
}
// create vertex shader with modified code
VkShaderModuleCreateInfo moduleCreateInfo = {VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO};
VkShaderModule modules[NumShaderStages] = {};
const rdcstr filename[NumShaderStages] = {
"bindless_vertex.spv", "bindless_hull.spv", "bindless_domain.spv", "bindless_geometry.spv",
"bindless_pixel.spv", "bindless_compute.spv", "bindless_task.spv", "bindless_mesh.spv",
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1,
VkShaderStageFlags(result.compute ? VK_SHADER_STAGE_COMPUTE_BIT
: VK_SHADER_STAGE_ALL_GRAPHICS),
NULL,
},
};
std::map<uint32_t, PrintfData> printfData[NumShaderStages];
if(result.compute)
{
VkPipelineShaderStageCreateInfo &stage = computeInfo.stage;
const VulkanCreationInfo::ShaderModule &moduleInfo =
creationInfo.m_ShaderModule[pipeInfo.shaders[5].module];
rdcarray<uint32_t> modSpirv = moduleInfo.spirv.GetSPIRV();
if(!Vulkan_Debug_FeedbackDumpDirPath().empty())
FileIO::WriteAll(Vulkan_Debug_FeedbackDumpDirPath() + "/before_" + filename[5], modSpirv);
if(m_pDriver->GetDeviceEnabledFeatures().shaderInt64)
{
AnnotateShader<uint64_t>(*pipeInfo.shaders[5].refl, *pipeInfo.shaders[5].patchData,
ShaderStage(StageIndex(stage.stage)), stage.pName,
feedbackData.offsetMap, maxSlot, false, bufferAddress,
useBufferAddressKHR, false, modSpirv, printfData[5]);
}
else
{
AnnotateShader<uint32_t>(*pipeInfo.shaders[5].refl, *pipeInfo.shaders[5].patchData,
ShaderStage(StageIndex(stage.stage)), stage.pName,
feedbackData.offsetMap, maxSlot, false, bufferAddress,
useBufferAddressKHR, false, modSpirv, printfData[5]);
}
if(!Vulkan_Debug_FeedbackDumpDirPath().empty())
FileIO::WriteAll(Vulkan_Debug_FeedbackDumpDirPath() + "/after_" + filename[5], modSpirv);
moduleCreateInfo.pCode = modSpirv.data();
moduleCreateInfo.codeSize = modSpirv.size() * sizeof(uint32_t);
vkr = m_pDriver->vkCreateShaderModule(dev, &moduleCreateInfo, NULL, &modules[0]);
CHECK_VKR(m_pDriver, vkr);
stage.module = modules[0];
}
else
{
bool hasGeomOrMesh = false;
for(uint32_t i = 0; i < graphicsInfo.stageCount; i++)
{
VkPipelineShaderStageCreateInfo &stage =
(VkPipelineShaderStageCreateInfo &)graphicsInfo.pStages[i];
if((stage.stage & (VK_SHADER_STAGE_GEOMETRY_BIT | VK_SHADER_STAGE_MESH_BIT_EXT)) != 0)
{
hasGeomOrMesh = true;
break;
}
}
bool usePrimitiveID =
!hasGeomOrMesh && m_pDriver->GetDeviceEnabledFeatures().geometryShader != VK_FALSE;
bool usesMultiview = state.GetRenderPass() != ResourceId()
? creationInfo.m_RenderPass[state.GetRenderPass()]
.subpasses[state.subpass]
.multiviews.size() > 1
: pipeInfo.viewMask != 0;
for(uint32_t i = 0; i < graphicsInfo.stageCount; i++)
{
VkPipelineShaderStageCreateInfo &stage =
(VkPipelineShaderStageCreateInfo &)graphicsInfo.pStages[i];
bool storesUnsupported = false;
if(stage.stage & VK_SHADER_STAGE_FRAGMENT_BIT)
{
if(!m_pDriver->GetDeviceEnabledFeatures().fragmentStoresAndAtomics)
storesUnsupported = true;
}
else
{
if(!m_pDriver->GetDeviceEnabledFeatures().vertexPipelineStoresAndAtomics)
storesUnsupported = true;
}
// if we are using buffer device address, we can just skip patching this shader
if(storesUnsupported && bufferAddress != 0)
{
continue;
// if we're not using BDA, we need to be sure all stages have the bindings patched in-kind.
// Otherwise if e.g. vertex stores aren't supported the vertex bindings won't be patched and
// will mismatch our patched descriptor sets
}
int idx = StageIndex(stage.stage);
const VulkanCreationInfo::ShaderModule &moduleInfo =
creationInfo.m_ShaderModule[pipeInfo.shaders[idx].module];
rdcarray<uint32_t> modSpirv = moduleInfo.spirv.GetSPIRV();
if(!Vulkan_Debug_FeedbackDumpDirPath().empty())
FileIO::WriteAll(Vulkan_Debug_FeedbackDumpDirPath() + "/before_" + filename[idx], modSpirv);
if(storesUnsupported)
{
OffsetBindingsToMatch(modSpirv);
}
else if(m_pDriver->GetDeviceEnabledFeatures().shaderInt64)
{
AnnotateShader<uint64_t>(*pipeInfo.shaders[idx].refl, *pipeInfo.shaders[idx].patchData,
ShaderStage(StageIndex(stage.stage)), stage.pName,
feedbackData.offsetMap, maxSlot, usePrimitiveID, bufferAddress,
useBufferAddressKHR, usesMultiview, modSpirv, printfData[idx]);
}
else
{
AnnotateShader<uint32_t>(*pipeInfo.shaders[idx].refl, *pipeInfo.shaders[idx].patchData,
ShaderStage(StageIndex(stage.stage)), stage.pName,
feedbackData.offsetMap, maxSlot, usePrimitiveID, bufferAddress,
useBufferAddressKHR, usesMultiview, modSpirv, printfData[idx]);
}
if(!Vulkan_Debug_FeedbackDumpDirPath().empty())
FileIO::WriteAll(Vulkan_Debug_FeedbackDumpDirPath() + "/after_" + filename[idx], modSpirv);
moduleCreateInfo.pCode = modSpirv.data();
moduleCreateInfo.codeSize = modSpirv.size() * sizeof(uint32_t);
vkr = m_pDriver->vkCreateShaderModule(dev, &moduleCreateInfo, NULL, &modules[i]);
CHECK_VKR(m_pDriver, vkr);
stage.module = modules[i];
}
}
VkPipeline feedbackPipe;
if(result.compute)
{
vkr = m_pDriver->vkCreateComputePipelines(m_Device, VK_NULL_HANDLE, 1, &computeInfo, NULL,
&feedbackPipe);
CHECK_VKR(m_pDriver, vkr);
}
else
{
vkr = m_pDriver->vkCreateGraphicsPipelines(m_Device, VK_NULL_HANDLE, 1, &graphicsInfo, NULL,
&feedbackPipe);
CHECK_VKR(m_pDriver, vkr);
}
RDCASSERTMSG("ShaderFeedbackReservedBindings is wrong",
newBindings.size() == ShaderFeedbackReservedBindings, newBindings.size());
// make copy of state to draw from
VulkanRenderState modifiedstate = state;
VulkanStatePipeline &modifiedpipe = result.compute ? modifiedstate.compute : modifiedstate.graphics;
// bind created pipeline to partial replay state
modifiedpipe.pipeline = GetResID(feedbackPipe);
AddedDescriptorData patchedBufferdata =
PrepareExtraBufferDescriptor(modifiedstate, result.compute, newBindings, false);
if(!useBufferAddress)
{
// replace descriptor set IDs with our temporary sets. The offsets we keep the same. If the
// original action had no sets, we ensure there's room (with no offsets needed)
if(patchedBufferdata.empty())
return false;
if(modifiedpipe.descSets.empty())
modifiedpipe.descSets.resize(1);
if(!patchedBufferdata.descSets.empty())
m_BindlessFeedback.FeedbackBuffer.WriteDescriptor(Unwrap(patchedBufferdata.descSets[0]), 0, 0);
for(size_t i = 0; i < descSets.size(); i++)
std::map<uint32_t, PrintfData> printfData[NumShaderStages];
auto patchCallback = [this, &printfData, &feedbackData, pipeInfo, maxSlot, usePrimitiveID,
usesMultiview](
const AddedDescriptorData &patchedBufferdata, VkShaderStageFlagBits stage,
const char *entryName, const rdcarray<uint32_t> &origSpirv,
rdcarray<uint32_t> &modSpirv, const VkSpecializationInfo *&specInfo) {
// can't patch if stores aren't supported
if((stage & VK_SHADER_STAGE_FRAGMENT_BIT) != 0)
{
modifiedpipe.descSets[i].pipeLayout = GetResID(pipeLayout);
modifiedpipe.descSets[i].descSet = GetResID(descSets[i]);
if(!m_pDriver->GetDeviceEnabledFeatures().fragmentStoresAndAtomics)
return false;
}
else if((stage & (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | VK_SHADER_STAGE_GEOMETRY_BIT)) !=
0)
{
if(!m_pDriver->GetDeviceEnabledFeatures().vertexPipelineStoresAndAtomics)
return false;
}
}
modifiedstate.subpassContents = VK_SUBPASS_CONTENTS_INLINE;
modifiedstate.dynamicRendering.flags &= ~VK_RENDERING_CONTENTS_SECONDARY_COMMAND_BUFFERS_BIT;
int idx = StageIndex(stage);
modSpirv = origSpirv;
static const rdcstr filename[NumShaderStages] = {
"bindless_vertex.spv", "bindless_hull.spv", "bindless_domain.spv",
"bindless_geometry.spv", "bindless_pixel.spv", "bindless_compute.spv",
"bindless_task.spv", "bindless_mesh.spv",
};
if(!Vulkan_Debug_FeedbackDumpDirPath().empty())
FileIO::WriteAll(Vulkan_Debug_FeedbackDumpDirPath() + "/before_" + filename[idx], modSpirv);
if(m_pDriver->GetDeviceEnabledFeatures().shaderInt64)
{
AnnotateShader<uint64_t>(*pipeInfo.shaders[idx].refl, *pipeInfo.shaders[idx].patchData,
ShaderStage(idx), entryName, feedbackData.offsetMap, maxSlot,
usePrimitiveID, m_BindlessFeedback.FeedbackBuffer.Address(),
m_StorageMode, usesMultiview, modSpirv, printfData[idx]);
}
else
{
AnnotateShader<uint32_t>(*pipeInfo.shaders[idx].refl, *pipeInfo.shaders[idx].patchData,
ShaderStage(idx), entryName, feedbackData.offsetMap, maxSlot,
usePrimitiveID, m_BindlessFeedback.FeedbackBuffer.Address(),
m_StorageMode, usesMultiview, modSpirv, printfData[idx]);
}
if(!Vulkan_Debug_FeedbackDumpDirPath().empty())
FileIO::WriteAll(Vulkan_Debug_FeedbackDumpDirPath() + "/after_" + filename[idx], modSpirv);
return true;
};
PrepareStateForPatchedShader(patchedBufferdata, modifiedstate, result.compute, patchCallback);
{
VkCommandBuffer cmd = m_pDriver->GetNextCmd();
@@ -2352,29 +2013,7 @@ bool VulkanReplay::FetchShaderFeedback(uint32_t eventId)
}
}
if(descpool != VK_NULL_HANDLE)
{
// delete descriptors. Technically we don't have to free the descriptor sets, but our tracking
// on
// replay doesn't handle destroying children of pooled objects so we do it explicitly anyway.
m_pDriver->vkFreeDescriptorSets(dev, descpool, (uint32_t)descSets.size(), descSets.data());
m_pDriver->vkDestroyDescriptorPool(dev, descpool, NULL);
}
for(VkDescriptorSetLayout layout : setLayouts)
m_pDriver->vkDestroyDescriptorSetLayout(dev, layout, NULL);
// delete pipeline layout
m_pDriver->vkDestroyPipelineLayout(dev, pipeLayout, NULL);
// delete pipeline
m_pDriver->vkDestroyPipeline(dev, feedbackPipe, NULL);
// delete shader/shader module
for(size_t i = 0; i < ARRAY_COUNT(modules); i++)
if(modules[i] != VK_NULL_HANDLE)
m_pDriver->vkDestroyShaderModule(dev, modules[i], NULL);
patchedBufferdata.Free();
return true;
}