mirror of
https://github.com/baldurk/renderdoc.git
synced 2026-05-12 13:00:32 +00:00
1847 lines
65 KiB
C++
1847 lines
65 KiB
C++
/******************************************************************************
|
|
* The MIT License (MIT)
|
|
*
|
|
* Copyright (c) 2018 Baldur Karlsson
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
* THE SOFTWARE.
|
|
******************************************************************************/
|
|
|
|
#include <float.h>
|
|
#include "3rdparty/glslang/SPIRV/spirv.hpp"
|
|
#include "driver/shaders/spirv/spirv_common.h"
|
|
#include "driver/shaders/spirv/spirv_editor.h"
|
|
#include "vk_core.h"
|
|
#include "vk_debug.h"
|
|
#include "vk_shader_cache.h"
|
|
|
|
static const char *PatchedMeshOutputEntryPoint = "rdc";
|
|
static const uint32_t MeshOutputDispatchWidth = 128;
|
|
static const uint32_t MeshOutputTBufferArraySize = 16;
|
|
|
|
static void ConvertToMeshOutputCompute(const ShaderReflection &refl, const SPIRVPatchData &patchData,
|
|
const char *entryName, std::vector<bool> isInstanced,
|
|
uint32_t &descSet, const DrawcallDescription *draw,
|
|
int32_t indexOffset, uint64_t numFetchVerts, uint32_t numVerts,
|
|
std::vector<uint32_t> &modSpirv, uint32_t &bufStride)
|
|
{
|
|
SPIRVEditor editor(modSpirv);
|
|
|
|
uint32_t numInputs = (uint32_t)refl.inputSignature.size();
|
|
|
|
uint32_t numOutputs = (uint32_t)refl.outputSignature.size();
|
|
RDCASSERT(numOutputs > 0);
|
|
|
|
descSet = 0;
|
|
|
|
for(SPIRVIterator it = editor.BeginDecorations(), end = editor.EndDecorations(); it != end; ++it)
|
|
{
|
|
// we will use the descriptor set immediately after the last set statically used by the shader.
|
|
// This means we don't have to worry about if the descriptor set layout declares more sets which
|
|
// might be invalid and un-bindable, we just trample over the next set that's unused.
|
|
// This is much easier than trying to add a new bind to an existing descriptor set (which would
|
|
// cascade into a new descriptor set layout, new pipeline layout, etc etc!). However, this might
|
|
// push us over the limit on number of descriptor sets.
|
|
if(it.opcode() == spv::OpDecorate && it.word(2) == spv::DecorationDescriptorSet)
|
|
descSet = RDCMAX(descSet, it.word(3) + 1);
|
|
}
|
|
|
|
// tbuffer types, the values are the descriptor bindings
|
|
enum tbufferType
|
|
{
|
|
tbuffer_undefined,
|
|
tbuffer_float = 2,
|
|
tbuffer_uint = 3,
|
|
tbuffer_sint = 4,
|
|
tbuffer_count,
|
|
};
|
|
|
|
struct inputOutputIDs
|
|
{
|
|
// if this is a builtin value, what builtin value is expected
|
|
ShaderBuiltin builtin = ShaderBuiltin::Undefined;
|
|
// ID of the variable
|
|
SPIRVId variableID;
|
|
// constant ID for the index of this attribute
|
|
SPIRVId constID;
|
|
// the type ID for this attribute. Must be present already by definition!
|
|
SPIRVId basetypeID;
|
|
// tbuffer type for this input
|
|
tbufferType tbuffer;
|
|
// gvec4 type for this input, used as result type when fetching from tbuffer
|
|
uint32_t vec4ID;
|
|
// Uniform Pointer ID for this output. Used only for output data, to write to output SSBO
|
|
SPIRVId uniformPtrID;
|
|
// Output Pointer ID for this attribute.
|
|
// For inputs, used to 'write' to the global at the start.
|
|
// For outputs, used to 'read' from the global at the end.
|
|
SPIRVId privatePtrID;
|
|
};
|
|
std::vector<inputOutputIDs> ins;
|
|
ins.resize(numInputs);
|
|
std::vector<inputOutputIDs> outs;
|
|
outs.resize(numOutputs);
|
|
|
|
std::set<SPIRVId> inputs;
|
|
std::set<SPIRVId> outputs;
|
|
|
|
std::map<SPIRVId, SPIRVId> typeReplacements;
|
|
|
|
// rewrite any inputs and outputs to be private storage class
|
|
for(SPIRVIterator it = editor.BeginTypes(), end = editor.EndTypes(); it != end; ++it)
|
|
{
|
|
// rewrite any input/output variables to private, and build up inputs/outputs list
|
|
if(it.opcode() == spv::OpTypePointer)
|
|
{
|
|
SPIRVId id;
|
|
|
|
if(it.word(2) == spv::StorageClassInput)
|
|
{
|
|
id = it.word(1);
|
|
inputs.insert(id);
|
|
}
|
|
else if(it.word(2) == spv::StorageClassOutput)
|
|
{
|
|
id = it.word(1);
|
|
outputs.insert(id);
|
|
|
|
SPIRVId baseId = it.word(3);
|
|
|
|
SPIRVIterator baseIt = editor.GetID(baseId);
|
|
if(baseIt && baseIt.opcode() == spv::OpTypeStruct)
|
|
outputs.insert(baseId);
|
|
}
|
|
|
|
if(id)
|
|
{
|
|
SPIRVPointer privPtr(it.word(3), spv::StorageClassPrivate);
|
|
|
|
SPIRVId origId = editor.GetType(privPtr);
|
|
|
|
if(origId)
|
|
{
|
|
// if we already had a private pointer for this type, we have to use that type - we can't
|
|
// create a new type by aliasing. Thus we need to replace any uses of 'id' with 'origId'.
|
|
typeReplacements[id] = origId;
|
|
|
|
// and remove this type declaration
|
|
editor.Remove(it);
|
|
}
|
|
else
|
|
{
|
|
editor.PreModify(it);
|
|
|
|
it.word(2) = spv::StorageClassPrivate;
|
|
|
|
// if we didn't already have this pointer, process the modified type declaration
|
|
editor.PostModify(it);
|
|
}
|
|
}
|
|
}
|
|
else if(it.opcode() == spv::OpVariable)
|
|
{
|
|
bool mod = false;
|
|
|
|
if(it.word(3) == spv::StorageClassInput)
|
|
{
|
|
mod = true;
|
|
editor.PreModify(it);
|
|
it.word(3) = spv::StorageClassPrivate;
|
|
|
|
inputs.insert(it.word(2));
|
|
}
|
|
else if(it.word(3) == spv::StorageClassOutput)
|
|
{
|
|
mod = true;
|
|
editor.PreModify(it);
|
|
it.word(3) = spv::StorageClassPrivate;
|
|
|
|
outputs.insert(it.word(2));
|
|
}
|
|
|
|
auto replIt = typeReplacements.find(it.word(1));
|
|
if(replIt != typeReplacements.end())
|
|
{
|
|
mod = true;
|
|
if(!mod)
|
|
editor.PreModify(it);
|
|
it.word(1) = typeReplacements[it.word(1)];
|
|
}
|
|
|
|
if(mod)
|
|
editor.PostModify(it);
|
|
}
|
|
else if(it.opcode() == spv::OpTypeFunction)
|
|
{
|
|
bool mod = false;
|
|
|
|
auto replIt = typeReplacements.find(it.word(1));
|
|
if(replIt != typeReplacements.end())
|
|
{
|
|
editor.PreModify(it);
|
|
mod = true;
|
|
it.word(1) = typeReplacements[it.word(1)];
|
|
}
|
|
|
|
for(size_t i = 4; i < it.size(); it++)
|
|
{
|
|
replIt = typeReplacements.find(it.word(i));
|
|
if(replIt != typeReplacements.end())
|
|
{
|
|
if(!mod)
|
|
editor.PreModify(it);
|
|
mod = true;
|
|
it.word(i) = typeReplacements[it.word(i)];
|
|
}
|
|
}
|
|
|
|
if(mod)
|
|
editor.PostModify(it);
|
|
}
|
|
else if(it.opcode() == spv::OpConstantNull)
|
|
{
|
|
auto replIt = typeReplacements.find(it.word(1));
|
|
if(replIt != typeReplacements.end())
|
|
{
|
|
editor.PreModify(it);
|
|
it.word(1) = typeReplacements[it.word(1)];
|
|
editor.PostModify(it);
|
|
}
|
|
}
|
|
}
|
|
|
|
for(SPIRVIterator it = editor.BeginFunctions(); it; ++it)
|
|
{
|
|
// identify functions with result types we might want to replace
|
|
if(it.opcode() == spv::OpFunction || it.opcode() == spv::OpFunctionParameter ||
|
|
it.opcode() == spv::OpVariable || it.opcode() == spv::OpAccessChain ||
|
|
it.opcode() == spv::OpInBoundsAccessChain || it.opcode() == spv::OpBitcast ||
|
|
it.opcode() == spv::OpUndef || it.opcode() == spv::OpExtInst ||
|
|
it.opcode() == spv::OpFunctionCall || it.opcode() == spv::OpPhi)
|
|
{
|
|
editor.PreModify(it);
|
|
|
|
uint32_t &id = it.word(1);
|
|
auto replIt = typeReplacements.find(id);
|
|
if(replIt != typeReplacements.end())
|
|
id = typeReplacements[id];
|
|
|
|
editor.PostModify(it);
|
|
}
|
|
}
|
|
|
|
// detect builtin inputs or outputs, and remove builtin decorations
|
|
for(SPIRVIterator it = editor.BeginDecorations(), end = editor.EndDecorations(); it != end; ++it)
|
|
{
|
|
// remove any builtin decorations
|
|
if(it.opcode() == spv::OpDecorate && it.word(2) == spv::DecorationBuiltIn)
|
|
{
|
|
SPIRVId id = it.word(1);
|
|
|
|
if(outputs.find(id) != outputs.end())
|
|
{
|
|
// outputs we don't have to do anything, discard the builtin information
|
|
}
|
|
else if(inputs.find(id) != inputs.end())
|
|
{
|
|
// for inputs, record the variable ID for this builtin
|
|
for(size_t i = 0; i < refl.inputSignature.size(); i++)
|
|
{
|
|
const SigParameter &sig = refl.inputSignature[i];
|
|
|
|
if(sig.systemValue ==
|
|
BuiltInToSystemAttribute(ShaderStage::Vertex, (spv::BuiltIn)it.word(3)))
|
|
{
|
|
ins[i].variableID = id;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
editor.Remove(it);
|
|
}
|
|
|
|
if(it.opcode() == spv::OpMemberDecorate && it.word(3) == spv::DecorationBuiltIn)
|
|
editor.Remove(it);
|
|
|
|
// remove block decoration from input or output structs
|
|
if(it.opcode() == spv::OpDecorate && it.word(2) == spv::DecorationBlock)
|
|
{
|
|
SPIRVId id = it.word(1);
|
|
|
|
if(outputs.find(id) != outputs.end() || inputs.find(id) != inputs.end())
|
|
editor.Remove(it);
|
|
}
|
|
|
|
// remove all invariant decoreations
|
|
if(it.opcode() == spv::OpDecorate && it.word(2) == spv::DecorationInvariant)
|
|
{
|
|
editor.Remove(it);
|
|
}
|
|
|
|
if(it.opcode() == spv::OpDecorate && it.word(2) == spv::DecorationLocation)
|
|
{
|
|
SPIRVId id = it.word(1);
|
|
|
|
if(outputs.find(id) != outputs.end())
|
|
{
|
|
// outputs we don't have to do anything, discard the location information
|
|
}
|
|
else if(inputs.find(id) != inputs.end())
|
|
{
|
|
// for inputs, record the variable ID for this location
|
|
for(size_t i = 0; i < refl.inputSignature.size(); i++)
|
|
{
|
|
const SigParameter &sig = refl.inputSignature[i];
|
|
|
|
if(sig.systemValue == ShaderBuiltin::Undefined && sig.regIndex == it.word(3))
|
|
{
|
|
ins[i].variableID = id;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
editor.Remove(it);
|
|
}
|
|
}
|
|
|
|
SPIRVId entryID = 0;
|
|
|
|
std::set<SPIRVId> entries;
|
|
|
|
for(const SPIRVEntry &entry : editor.GetEntries())
|
|
{
|
|
if(entry.name == entryName)
|
|
entryID = entry.id;
|
|
|
|
entries.insert(entry.id);
|
|
}
|
|
|
|
RDCASSERT(entryID);
|
|
|
|
for(SPIRVIterator it = editor.BeginDebug(), end2 = editor.EndDebug(); it != end2; ++it)
|
|
{
|
|
if(it.opcode() == spv::OpName &&
|
|
(inputs.find(it.word(1)) != inputs.end() || outputs.find(it.word(1)) != outputs.end()))
|
|
{
|
|
SPIRVId id = it.word(1);
|
|
std::string oldName = (const char *)&it.word(2);
|
|
editor.Remove(it);
|
|
editor.SetName(id, ("emulated_" + oldName).c_str());
|
|
}
|
|
|
|
// remove any OpName for the old entry points
|
|
if(it.opcode() == spv::OpName && entries.find(it.word(1)) != entries.end())
|
|
editor.Remove(it);
|
|
}
|
|
|
|
// declare necessary variables per-output, types and constants. We do this last so that we don't
|
|
// add a private pointer that we later try and deduplicate when collapsing output/input pointers
|
|
// to private
|
|
for(uint32_t i = 0; i < numOutputs; i++)
|
|
{
|
|
inputOutputIDs &io = outs[i];
|
|
|
|
io.builtin = refl.outputSignature[i].systemValue;
|
|
|
|
// constant for this index
|
|
io.constID = editor.AddConstantImmediate(i);
|
|
|
|
io.variableID = patchData.outputs[i].ID;
|
|
|
|
// base type - either a scalar or a vector, since matrix outputs are decayed to vectors
|
|
{
|
|
SPIRVScalar scalarType = scalar<uint32_t>();
|
|
|
|
if(refl.outputSignature[i].compType == CompType::UInt)
|
|
scalarType = scalar<uint32_t>();
|
|
else if(refl.outputSignature[i].compType == CompType::SInt)
|
|
scalarType = scalar<int32_t>();
|
|
else if(refl.outputSignature[i].compType == CompType::Float)
|
|
scalarType = scalar<float>();
|
|
else if(refl.outputSignature[i].compType == CompType::Double)
|
|
scalarType = scalar<double>();
|
|
|
|
io.vec4ID = editor.DeclareType(SPIRVVector(scalarType, 4));
|
|
|
|
if(refl.outputSignature[i].compCount > 1)
|
|
io.basetypeID =
|
|
editor.DeclareType(SPIRVVector(scalarType, refl.outputSignature[i].compCount));
|
|
else
|
|
io.basetypeID = editor.DeclareType(scalarType);
|
|
}
|
|
|
|
io.uniformPtrID = editor.DeclareType(SPIRVPointer(io.basetypeID, spv::StorageClassUniform));
|
|
io.privatePtrID = editor.DeclareType(SPIRVPointer(io.basetypeID, spv::StorageClassPrivate));
|
|
|
|
RDCASSERT(io.basetypeID && io.vec4ID && io.constID && io.privatePtrID && io.uniformPtrID,
|
|
io.basetypeID, io.vec4ID, io.constID, io.privatePtrID, io.uniformPtrID);
|
|
}
|
|
|
|
// repeat for inputs
|
|
for(uint32_t i = 0; i < numInputs; i++)
|
|
{
|
|
inputOutputIDs &io = ins[i];
|
|
|
|
io.builtin = refl.inputSignature[i].systemValue;
|
|
|
|
// constant for this index
|
|
io.constID = editor.AddConstantImmediate(i);
|
|
|
|
SPIRVScalar scalarType = scalar<uint32_t>();
|
|
|
|
// base type - either a scalar or a vector, since matrix outputs are decayed to vectors
|
|
if(refl.inputSignature[i].compType == CompType::UInt)
|
|
{
|
|
scalarType = scalar<uint32_t>();
|
|
io.tbuffer = tbuffer_uint;
|
|
}
|
|
else if(refl.inputSignature[i].compType == CompType::SInt)
|
|
{
|
|
scalarType = scalar<int32_t>();
|
|
io.tbuffer = tbuffer_sint;
|
|
}
|
|
else if(refl.inputSignature[i].compType == CompType::Float)
|
|
{
|
|
scalarType = scalar<float>();
|
|
io.tbuffer = tbuffer_float;
|
|
}
|
|
else if(refl.inputSignature[i].compType == CompType::Double)
|
|
{
|
|
RDCERR("Double inputs are not supported, will be undefined");
|
|
scalarType = scalar<double>();
|
|
}
|
|
|
|
io.vec4ID = editor.DeclareType(SPIRVVector(scalarType, 4));
|
|
|
|
if(refl.inputSignature[i].compCount > 1)
|
|
io.basetypeID = editor.DeclareType(SPIRVVector(scalarType, refl.inputSignature[i].compCount));
|
|
else
|
|
io.basetypeID = editor.DeclareType(scalarType);
|
|
|
|
io.privatePtrID = editor.DeclareType(SPIRVPointer(io.basetypeID, spv::StorageClassPrivate));
|
|
|
|
RDCASSERT(io.basetypeID && io.vec4ID && io.constID && io.privatePtrID, io.basetypeID, io.vec4ID,
|
|
io.constID, io.privatePtrID);
|
|
}
|
|
|
|
struct tbufferIDs
|
|
{
|
|
uint32_t imageTypeID;
|
|
uint32_t imageSampledTypeID;
|
|
uint32_t pointerTypeID;
|
|
uint32_t variableID;
|
|
} tbuffers[tbuffer_count];
|
|
|
|
uint32_t arraySize = editor.AddConstantImmediate<uint32_t>(MeshOutputTBufferArraySize);
|
|
|
|
for(tbufferType tb : {tbuffer_float, tbuffer_sint, tbuffer_uint})
|
|
{
|
|
SPIRVScalar scalarType = scalar<float>();
|
|
const char *name = "float_vbuffers";
|
|
|
|
if(tb == tbuffer_sint)
|
|
{
|
|
scalarType = scalar<int32_t>();
|
|
name = "int_vbuffers";
|
|
}
|
|
else if(tb == tbuffer_uint)
|
|
{
|
|
scalarType = scalar<uint32_t>();
|
|
name = "uint_vbuffers";
|
|
}
|
|
|
|
tbuffers[tb].imageTypeID = editor.DeclareType(
|
|
SPIRVImage(scalarType, spv::DimBuffer, 0, 0, 0, 1, spv::ImageFormatUnknown));
|
|
tbuffers[tb].imageSampledTypeID = editor.DeclareType(SPIRVSampledImage(tbuffers[tb].imageTypeID));
|
|
|
|
uint32_t arrayType = editor.MakeId();
|
|
editor.AddType(
|
|
SPIRVOperation(spv::OpTypeArray, {arrayType, tbuffers[tb].imageSampledTypeID, arraySize}));
|
|
|
|
uint32_t arrayPtrType =
|
|
editor.DeclareType(SPIRVPointer(arrayType, spv::StorageClassUniformConstant));
|
|
|
|
tbuffers[tb].pointerTypeID = editor.DeclareType(
|
|
SPIRVPointer(tbuffers[tb].imageSampledTypeID, spv::StorageClassUniformConstant));
|
|
|
|
tbuffers[tb].variableID = editor.MakeId();
|
|
editor.AddVariable(SPIRVOperation(
|
|
spv::OpVariable, {arrayPtrType, tbuffers[tb].variableID, spv::StorageClassUniformConstant}));
|
|
|
|
editor.SetName(tbuffers[tb].variableID, name);
|
|
|
|
editor.AddDecoration(SPIRVOperation(
|
|
spv::OpDecorate, {tbuffers[tb].variableID, (uint32_t)spv::DecorationDescriptorSet, descSet}));
|
|
editor.AddDecoration(SPIRVOperation(
|
|
spv::OpDecorate, {tbuffers[tb].variableID, (uint32_t)spv::DecorationBinding, (uint32_t)tb}));
|
|
}
|
|
|
|
SPIRVId uint32Vec4ID = 0;
|
|
SPIRVId idxImageTypeID = 0;
|
|
SPIRVId idxImagePtr = 0;
|
|
SPIRVId idxSampledTypeID = 0;
|
|
|
|
if(draw->flags & DrawFlags::UseIBuffer)
|
|
{
|
|
uint32Vec4ID = editor.DeclareType(SPIRVVector(scalar<uint32_t>(), 4));
|
|
|
|
idxImageTypeID = editor.DeclareType(
|
|
SPIRVImage(scalar<uint32_t>(), spv::DimBuffer, 0, 0, 0, 1, spv::ImageFormatUnknown));
|
|
idxSampledTypeID = editor.DeclareType(SPIRVSampledImage(idxImageTypeID));
|
|
|
|
uint32_t idxImagePtrType =
|
|
editor.DeclareType(SPIRVPointer(idxSampledTypeID, spv::StorageClassUniformConstant));
|
|
|
|
idxImagePtr = editor.MakeId();
|
|
editor.AddVariable(SPIRVOperation(
|
|
spv::OpVariable, {idxImagePtrType, idxImagePtr, spv::StorageClassUniformConstant}));
|
|
|
|
editor.SetName(idxImagePtr, "ibuffer");
|
|
|
|
editor.AddDecoration(SPIRVOperation(
|
|
spv::OpDecorate, {idxImagePtr, (uint32_t)spv::DecorationDescriptorSet, descSet}));
|
|
editor.AddDecoration(
|
|
SPIRVOperation(spv::OpDecorate, {idxImagePtr, (uint32_t)spv::DecorationBinding, 1}));
|
|
}
|
|
|
|
if(numInputs > 0)
|
|
{
|
|
editor.AddCapability(spv::CapabilitySampledBuffer);
|
|
}
|
|
|
|
SPIRVId outBufferVarID = 0;
|
|
SPIRVId numFetchVertsConstID = editor.AddConstantImmediate((int32_t)numFetchVerts);
|
|
SPIRVId numVertsConstID = editor.AddConstantImmediate((int32_t)numVerts);
|
|
SPIRVId numInstConstID = editor.AddConstantImmediate((int32_t)draw->numInstances);
|
|
|
|
editor.SetName(numFetchVertsConstID, "numFetchVerts");
|
|
editor.SetName(numVertsConstID, "numVerts");
|
|
editor.SetName(numInstConstID, "numInsts");
|
|
|
|
// declare the output buffer and its type
|
|
{
|
|
std::vector<uint32_t> words;
|
|
for(uint32_t o = 0; o < numOutputs; o++)
|
|
words.push_back(outs[o].basetypeID);
|
|
|
|
// struct vertex { ... outputs };
|
|
SPIRVId vertStructID = editor.DeclareStructType(words);
|
|
editor.SetName(vertStructID, "vertex_struct");
|
|
|
|
// vertex vertArray[];
|
|
SPIRVId runtimeArrayID =
|
|
editor.AddType(SPIRVOperation(spv::OpTypeRuntimeArray, {editor.MakeId(), vertStructID}));
|
|
editor.SetName(runtimeArrayID, "vertex_array");
|
|
|
|
// struct meshOutput { vertex vertArray[]; };
|
|
SPIRVId outputStructID = editor.DeclareStructType({runtimeArrayID});
|
|
editor.SetName(outputStructID, "meshOutput");
|
|
|
|
// meshOutput *
|
|
SPIRVId outputStructPtrID =
|
|
editor.DeclareType(SPIRVPointer(outputStructID, spv::StorageClassUniform));
|
|
editor.SetName(outputStructPtrID, "meshOutput_ptr");
|
|
|
|
// meshOutput *outputData;
|
|
outBufferVarID = editor.AddVariable(SPIRVOperation(
|
|
spv::OpVariable, {outputStructPtrID, editor.MakeId(), spv::StorageClassUniform}));
|
|
editor.SetName(outBufferVarID, "outputData");
|
|
|
|
uint32_t memberOffset = 0;
|
|
for(uint32_t o = 0; o < numOutputs; o++)
|
|
{
|
|
uint32_t elemSize = 0;
|
|
if(refl.outputSignature[o].compType == CompType::Double)
|
|
elemSize = 8;
|
|
else if(refl.outputSignature[o].compType == CompType::SInt ||
|
|
refl.outputSignature[o].compType == CompType::UInt ||
|
|
refl.outputSignature[o].compType == CompType::Float)
|
|
elemSize = 4;
|
|
else
|
|
RDCERR("Unexpected component type for output signature element");
|
|
|
|
uint32_t numComps = refl.outputSignature[o].compCount;
|
|
|
|
// ensure member is std430 packed (vec4 alignment for vec3/vec4)
|
|
if(numComps == 2)
|
|
memberOffset = AlignUp(memberOffset, 2U * elemSize);
|
|
else if(numComps > 2)
|
|
memberOffset = AlignUp(memberOffset, 4U * elemSize);
|
|
|
|
// apply decoration to each member in the struct with its offset in the struct
|
|
editor.AddDecoration(SPIRVOperation(spv::OpMemberDecorate,
|
|
{vertStructID, o, spv::DecorationOffset, memberOffset}));
|
|
|
|
memberOffset += elemSize * refl.outputSignature[o].compCount;
|
|
}
|
|
|
|
// align to 16 bytes (vec4) since we will almost certainly have
|
|
// a vec4 in the struct somewhere, and even in std430 alignment,
|
|
// the base struct alignment is still the largest base alignment
|
|
// of any member
|
|
bufStride = AlignUp16(memberOffset);
|
|
|
|
// the array is the only element in the output struct, so
|
|
// it's at offset 0
|
|
editor.AddDecoration(
|
|
SPIRVOperation(spv::OpMemberDecorate, {outputStructID, 0, spv::DecorationOffset, 0}));
|
|
|
|
// set array stride
|
|
editor.AddDecoration(
|
|
SPIRVOperation(spv::OpDecorate, {runtimeArrayID, spv::DecorationArrayStride, bufStride}));
|
|
|
|
// set object type
|
|
editor.AddDecoration(
|
|
SPIRVOperation(spv::OpDecorate, {outputStructID, spv::DecorationBufferBlock}));
|
|
|
|
// set binding
|
|
editor.AddDecoration(
|
|
SPIRVOperation(spv::OpDecorate, {outBufferVarID, spv::DecorationDescriptorSet, descSet}));
|
|
editor.AddDecoration(SPIRVOperation(spv::OpDecorate, {outBufferVarID, spv::DecorationBinding, 0}));
|
|
}
|
|
|
|
SPIRVId uint32Vec3ID = editor.DeclareType(SPIRVVector(scalar<uint32_t>(), 3));
|
|
SPIRVId invocationPtr = editor.DeclareType(SPIRVPointer(uint32Vec3ID, spv::StorageClassInput));
|
|
SPIRVId invocationId = editor.AddVariable(
|
|
SPIRVOperation(spv::OpVariable, {invocationPtr, editor.MakeId(), spv::StorageClassInput}));
|
|
editor.AddDecoration(SPIRVOperation(
|
|
spv::OpDecorate, {invocationId, spv::DecorationBuiltIn, spv::BuiltInGlobalInvocationId}));
|
|
|
|
editor.SetName(invocationId, "rdoc_invocation");
|
|
|
|
// make a new entry point that will call the old function, then when it returns extract & write
|
|
// the outputs.
|
|
SPIRVId wrapperEntry = editor.MakeId();
|
|
// don't set a debug name, as some drivers get confused when this doesn't match the entry point
|
|
// name :(.
|
|
// editor.SetName(wrapperEntry, "RenderDoc_MeshFetch_Wrapper_Entrypoint");
|
|
|
|
// we remove all entry points and just create one of our own.
|
|
SPIRVIterator it = editor.BeginEntries();
|
|
|
|
{
|
|
// there should already have been at least one entry point
|
|
RDCASSERT(it.opcode() == spv::OpEntryPoint);
|
|
// and it should have been at least 5 words (if not more) since a vertex shader cannot function
|
|
// without at least one interface ID. We only need one, so there should be plenty space.
|
|
RDCASSERT(it.size() >= 5);
|
|
|
|
editor.PreModify(it);
|
|
|
|
SPIRVOperation op(it);
|
|
|
|
op.nopRemove(5);
|
|
|
|
op[1] = spv::ExecutionModelGLCompute;
|
|
op[2] = wrapperEntry;
|
|
op[3] = MAKE_FOURCC('r', 'd', 'c', 0);
|
|
op[4] = invocationId;
|
|
|
|
editor.PostModify(it);
|
|
|
|
++it;
|
|
}
|
|
|
|
for(SPIRVIterator end = editor.EndEntries(); it != end; ++it)
|
|
editor.Remove(it);
|
|
|
|
editor.AddOperation(
|
|
it, SPIRVOperation(spv::OpExecutionMode, {wrapperEntry, spv::ExecutionModeLocalSize,
|
|
MeshOutputDispatchWidth, 1, 1}));
|
|
|
|
SPIRVId uint32ID = editor.DeclareType(scalar<uint32_t>());
|
|
SPIRVId sint32ID = editor.DeclareType(scalar<int32_t>());
|
|
|
|
// add the wrapper function
|
|
{
|
|
std::vector<SPIRVOperation> ops;
|
|
|
|
SPIRVId voidType = editor.DeclareType(scalar<void>());
|
|
SPIRVId funcType = editor.DeclareType(SPIRVFunction(voidType, {}));
|
|
|
|
ops.push_back(SPIRVOperation(spv::OpFunction,
|
|
{voidType, wrapperEntry, spv::FunctionControlMaskNone, funcType}));
|
|
|
|
ops.push_back(SPIRVOperation(spv::OpLabel, {editor.MakeId()}));
|
|
{
|
|
// uint3 invocationVec = gl_GlobalInvocationID;
|
|
uint32_t invocationVector = editor.MakeId();
|
|
ops.push_back(SPIRVOperation(spv::OpLoad, {uint32Vec3ID, invocationVector, invocationId}));
|
|
|
|
// uint invocation = invocationVec.x
|
|
uint32_t invocationID = editor.MakeId();
|
|
ops.push_back(
|
|
SPIRVOperation(spv::OpCompositeExtract, {uint32ID, invocationID, invocationVector, 0U}));
|
|
|
|
// int intInvocationID = int(invocation);
|
|
uint32_t intInvocationID = editor.MakeId();
|
|
ops.push_back(SPIRVOperation(spv::OpBitcast, {sint32ID, intInvocationID, invocationID}));
|
|
|
|
editor.SetName(intInvocationID, "invocation");
|
|
|
|
// int inst = intInvocationID / numFetchVerts
|
|
uint32_t instID = editor.MakeId();
|
|
ops.push_back(
|
|
SPIRVOperation(spv::OpSDiv, {sint32ID, instID, intInvocationID, numFetchVertsConstID}));
|
|
|
|
editor.SetName(instID, "instanceID");
|
|
|
|
// bool inBounds = inst < numInstances;
|
|
uint32_t inBounds = editor.MakeId();
|
|
ops.push_back(SPIRVOperation(
|
|
spv::OpULessThan, {editor.DeclareType(scalar<bool>()), inBounds, instID, numInstConstID}));
|
|
|
|
// if(inBounds) goto continueLabel; else goto killLabel;
|
|
uint32_t killLabel = editor.MakeId();
|
|
uint32_t continueLabel = editor.MakeId();
|
|
ops.push_back(SPIRVOperation(spv::OpSelectionMerge, {killLabel, spv::SelectionControlMaskNone}));
|
|
ops.push_back(SPIRVOperation(spv::OpBranchConditional, {inBounds, continueLabel, killLabel}));
|
|
|
|
// continueLabel:
|
|
ops.push_back(SPIRVOperation(spv::OpLabel, {continueLabel}));
|
|
|
|
// int vtx = intInvocationID % numVerts
|
|
uint32_t vtx = editor.MakeId();
|
|
ops.push_back(SPIRVOperation(spv::OpSMod, {sint32ID, vtx, intInvocationID, numVertsConstID}));
|
|
|
|
editor.SetName(vtx, "vertexID");
|
|
|
|
uint32_t vertexIndex = vtx;
|
|
|
|
// if we're indexing, look up the index buffer. We don't have to apply vertexOffset - it was
|
|
// already applied when we read back and uniq-ified the index buffer.
|
|
if(draw->flags & DrawFlags::UseIBuffer)
|
|
{
|
|
// sampledimage idximg = *idximgPtr;
|
|
uint32_t loaded = editor.MakeId();
|
|
ops.push_back(SPIRVOperation(spv::OpLoad, {idxSampledTypeID, loaded, idxImagePtr}));
|
|
|
|
// image rawimg = imageFromSampled(idximg);
|
|
uint32_t rawimg = editor.MakeId();
|
|
ops.push_back(SPIRVOperation(spv::OpImage, {idxImageTypeID, rawimg, loaded}));
|
|
|
|
// uvec4 result = texelFetch(rawimg, vtxID);
|
|
uint32_t result = editor.MakeId();
|
|
ops.push_back(SPIRVOperation(spv::OpImageFetch, {uint32Vec4ID, result, rawimg, vertexIndex}));
|
|
|
|
// uint vtxID = result.x;
|
|
uint32_t uintIndex = editor.MakeId();
|
|
ops.push_back(SPIRVOperation(spv::OpCompositeExtract, {uint32ID, uintIndex, result, 0}));
|
|
|
|
vertexIndex = editor.MakeId();
|
|
ops.push_back(SPIRVOperation(spv::OpBitcast, {sint32ID, vertexIndex, uintIndex}));
|
|
}
|
|
|
|
// int arraySlotID = inst * numVerts;
|
|
uint32_t arraySlotTempID = editor.MakeId();
|
|
ops.push_back(SPIRVOperation(spv::OpIMul, {sint32ID, arraySlotTempID, instID, numVertsConstID}));
|
|
|
|
// arraySlotID = arraySlotID + vertexIndex;
|
|
uint32_t arraySlotTemp2ID = editor.MakeId();
|
|
ops.push_back(
|
|
SPIRVOperation(spv::OpIAdd, {sint32ID, arraySlotTemp2ID, arraySlotTempID, vertexIndex}));
|
|
|
|
// arraySlotID = arraySlotID + indexOffset;
|
|
uint32_t arraySlotID = editor.MakeId();
|
|
ops.push_back(SPIRVOperation(spv::OpIAdd, {sint32ID, arraySlotID, arraySlotTemp2ID,
|
|
editor.AddConstantImmediate(indexOffset)}));
|
|
|
|
editor.SetName(arraySlotID, "arraySlot");
|
|
|
|
// we use the current value of vertexIndex and use instID, to lookup per-vertex and
|
|
// per-instance attributes. This is because when we fetched the vertex data, we advanced by
|
|
// (in non-indexed draws) vertexOffset, and by instanceOffset. Rather than fetching data
|
|
// that's only used as padding skipped over by these offsets.
|
|
uint32_t vertexLookup = vertexIndex;
|
|
uint32_t instanceLookup = instID;
|
|
|
|
if(!(draw->flags & DrawFlags::UseIBuffer))
|
|
{
|
|
// for non-indexed draws, we manually apply the vertex offset, but here after we used the
|
|
// 0-based one to calculate the array slot
|
|
vertexIndex = editor.MakeId();
|
|
ops.push_back(SPIRVOperation(
|
|
spv::OpIAdd, {sint32ID, vertexIndex, vtx,
|
|
editor.AddConstantImmediate(int32_t(draw->vertexOffset & 0x7fffffff))}));
|
|
}
|
|
editor.SetName(vertexIndex, "vertexIndex");
|
|
|
|
// instIndex = inst + instOffset
|
|
uint32_t instIndex = editor.MakeId();
|
|
ops.push_back(SPIRVOperation(
|
|
spv::OpIAdd, {sint32ID, instIndex, instID,
|
|
editor.AddConstantImmediate(int32_t(draw->instanceOffset & 0x7fffffff))}));
|
|
editor.SetName(instIndex, "instanceIndex");
|
|
|
|
uint32_t idxs[64] = {};
|
|
|
|
for(size_t i = 0; i < refl.inputSignature.size(); i++)
|
|
{
|
|
ShaderBuiltin builtin = refl.inputSignature[i].systemValue;
|
|
|
|
if(builtin == ShaderBuiltin::VertexIndex)
|
|
{
|
|
ops.push_back(SPIRVOperation(spv::OpStore, {ins[i].variableID, vertexIndex}));
|
|
}
|
|
else if(builtin == ShaderBuiltin::InstanceIndex)
|
|
{
|
|
ops.push_back(SPIRVOperation(spv::OpStore, {ins[i].variableID, instIndex}));
|
|
}
|
|
else if(builtin != ShaderBuiltin::Undefined)
|
|
{
|
|
RDCERR("Unsupported/unsupported built-in input %s", ToStr(builtin).c_str());
|
|
}
|
|
else
|
|
{
|
|
if(idxs[i] == 0)
|
|
idxs[i] = editor.AddConstantImmediate<uint32_t>((uint32_t)i);
|
|
|
|
if(idxs[refl.inputSignature[i].regIndex] == 0)
|
|
idxs[refl.inputSignature[i].regIndex] =
|
|
editor.AddConstantImmediate<uint32_t>((uint32_t)refl.inputSignature[i].regIndex);
|
|
|
|
tbufferIDs tb = tbuffers[ins[i].tbuffer];
|
|
|
|
uint32_t location = refl.inputSignature[i].regIndex;
|
|
|
|
uint32_t ptrId = editor.MakeId();
|
|
// sampledimage *imgPtr = xxx_tbuffers[i];
|
|
ops.push_back(SPIRVOperation(spv::OpAccessChain, {tb.pointerTypeID, ptrId, tb.variableID,
|
|
idxs[refl.inputSignature[i].regIndex]}));
|
|
|
|
// sampledimage img = *imgPtr;
|
|
uint32_t loaded = editor.MakeId();
|
|
ops.push_back(SPIRVOperation(spv::OpLoad, {tb.imageSampledTypeID, loaded, ptrId}));
|
|
|
|
// image rawimg = imageFromSampled(img);
|
|
uint32_t rawimg = editor.MakeId();
|
|
ops.push_back(SPIRVOperation(spv::OpImage, {tb.imageTypeID, rawimg, loaded}));
|
|
|
|
// vec4 result = texelFetch(rawimg, vtxID or instID);
|
|
uint32_t idx = location < isInstanced.size() && isInstanced[location] ? instanceLookup
|
|
: vertexLookup;
|
|
uint32_t result = editor.MakeId();
|
|
ops.push_back(SPIRVOperation(spv::OpImageFetch, {ins[i].vec4ID, result, rawimg, idx}));
|
|
|
|
// for one component, extract x, for less than 4, extract the sub-vector, otherwise
|
|
// leave
|
|
// alone (4 components)
|
|
if(refl.inputSignature[i].compCount == 1)
|
|
{
|
|
uint32_t swizzleIn = result;
|
|
result = editor.MakeId();
|
|
|
|
// baseType value = result.x;
|
|
ops.push_back(
|
|
SPIRVOperation(spv::OpCompositeExtract, {ins[i].basetypeID, result, swizzleIn, 0}));
|
|
}
|
|
else if(refl.inputSignature[i].compCount != 4)
|
|
{
|
|
uint32_t swizzleIn = result;
|
|
result = editor.MakeId();
|
|
|
|
std::vector<uint32_t> words = {ins[i].basetypeID, result, swizzleIn, swizzleIn};
|
|
|
|
for(uint32_t c = 0; c < refl.inputSignature[i].compCount; c++)
|
|
words.push_back(c);
|
|
|
|
// baseTypeN value = result.xyz;
|
|
ops.push_back(SPIRVOperation(spv::OpVectorShuffle, words));
|
|
}
|
|
|
|
// not a composite type, we can store directly
|
|
if(patchData.inputs[i].accessChain.empty())
|
|
{
|
|
// *global = value
|
|
ops.push_back(SPIRVOperation(spv::OpStore, {ins[i].variableID, result}));
|
|
}
|
|
else
|
|
{
|
|
// for composite types we need to access chain first
|
|
uint32_t subElement = editor.MakeId();
|
|
std::vector<uint32_t> words = {ins[i].privatePtrID, subElement, patchData.inputs[i].ID};
|
|
|
|
for(uint32_t accessIdx : patchData.inputs[i].accessChain)
|
|
{
|
|
if(idxs[accessIdx] == 0)
|
|
idxs[accessIdx] = editor.AddConstantImmediate<uint32_t>((uint32_t)accessIdx);
|
|
|
|
words.push_back(idxs[accessIdx]);
|
|
}
|
|
|
|
ops.push_back(SPIRVOperation(spv::OpAccessChain, words));
|
|
|
|
ops.push_back(SPIRVOperation(spv::OpStore, {subElement, result}));
|
|
}
|
|
}
|
|
}
|
|
|
|
// real_main();
|
|
ops.push_back(SPIRVOperation(spv::OpFunctionCall, {voidType, editor.MakeId(), entryID}));
|
|
|
|
SPIRVId zero = editor.AddConstantImmediate<uint32_t>(0);
|
|
|
|
for(uint32_t o = 0; o < numOutputs; o++)
|
|
{
|
|
uint32_t loaded = 0;
|
|
|
|
// not a structure member or array child, can load directly
|
|
if(patchData.outputs[o].accessChain.empty())
|
|
{
|
|
loaded = editor.MakeId();
|
|
// type loaded = *globalvar;
|
|
ops.push_back(
|
|
SPIRVOperation(spv::OpLoad, {outs[o].basetypeID, loaded, patchData.outputs[o].ID}));
|
|
}
|
|
else
|
|
{
|
|
uint32_t readPtr = editor.MakeId();
|
|
loaded = editor.MakeId();
|
|
|
|
// structure member, need to access chain first
|
|
std::vector<uint32_t> words = {outs[o].privatePtrID, readPtr, patchData.outputs[o].ID};
|
|
|
|
for(uint32_t idx : patchData.outputs[o].accessChain)
|
|
words.push_back(outs[idx].constID);
|
|
|
|
// type *readPtr = globalvar.globalsub...;
|
|
ops.push_back(SPIRVOperation(spv::OpAccessChain, words));
|
|
// type loaded = *readPtr;
|
|
ops.push_back(SPIRVOperation(spv::OpLoad, {outs[o].basetypeID, loaded, readPtr}));
|
|
}
|
|
|
|
// access chain the destination
|
|
// type *writePtr = outBuffer.verts[arraySlot].outputN
|
|
uint32_t writePtr = editor.MakeId();
|
|
ops.push_back(SPIRVOperation(
|
|
spv::OpAccessChain,
|
|
{outs[o].uniformPtrID, writePtr, outBufferVarID, zero, arraySlotID, outs[o].constID}));
|
|
|
|
// *writePtr = loaded;
|
|
ops.push_back(SPIRVOperation(spv::OpStore, {writePtr, loaded}));
|
|
}
|
|
|
|
// goto killLabel;
|
|
ops.push_back(SPIRVOperation(spv::OpBranch, {killLabel}));
|
|
|
|
// killLabel:
|
|
ops.push_back(SPIRVOperation(spv::OpLabel, {killLabel}));
|
|
}
|
|
ops.push_back(SPIRVOperation(spv::OpReturn, {}));
|
|
|
|
ops.push_back(SPIRVOperation(spv::OpFunctionEnd, {}));
|
|
|
|
editor.AddFunction(ops.data(), ops.size());
|
|
}
|
|
|
|
editor.StripNops();
|
|
}
|
|
|
|
void VulkanReplay::ClearPostVSCache()
|
|
{
|
|
VkDevice dev = m_Device;
|
|
|
|
for(auto it = m_PostVSData.begin(); it != m_PostVSData.end(); ++it)
|
|
{
|
|
m_pDriver->vkDestroyBuffer(dev, it->second.vsout.buf, NULL);
|
|
m_pDriver->vkFreeMemory(dev, it->second.vsout.bufmem, NULL);
|
|
}
|
|
|
|
m_PostVSData.clear();
|
|
}
|
|
|
|
void VulkanReplay::InitPostVSBuffers(uint32_t eventId)
|
|
{
|
|
// go through any aliasing
|
|
if(m_PostVSAlias.find(eventId) != m_PostVSAlias.end())
|
|
eventId = m_PostVSAlias[eventId];
|
|
|
|
if(m_PostVSData.find(eventId) != m_PostVSData.end())
|
|
return;
|
|
|
|
const VulkanRenderState &state = m_pDriver->m_RenderState;
|
|
VulkanCreationInfo &creationInfo = m_pDriver->m_CreationInfo;
|
|
|
|
if(state.graphics.pipeline == ResourceId() || state.renderPass == ResourceId())
|
|
return;
|
|
|
|
const VulkanCreationInfo::Pipeline &pipeInfo = creationInfo.m_Pipeline[state.graphics.pipeline];
|
|
|
|
if(pipeInfo.shaders[0].module == ResourceId())
|
|
return;
|
|
|
|
const VulkanCreationInfo::ShaderModule &moduleInfo =
|
|
creationInfo.m_ShaderModule[pipeInfo.shaders[0].module];
|
|
|
|
ShaderReflection *refl = pipeInfo.shaders[0].refl;
|
|
|
|
// no outputs from this shader? unexpected but theoretically possible (dummy VS before
|
|
// tessellation maybe). Just fill out an empty data set
|
|
if(refl->outputSignature.empty())
|
|
{
|
|
// empty vertex output signature
|
|
m_PostVSData[eventId].vsin.topo = pipeInfo.topology;
|
|
m_PostVSData[eventId].vsout.buf = VK_NULL_HANDLE;
|
|
m_PostVSData[eventId].vsout.instStride = 0;
|
|
m_PostVSData[eventId].vsout.vertStride = 0;
|
|
m_PostVSData[eventId].vsout.nearPlane = 0.0f;
|
|
m_PostVSData[eventId].vsout.farPlane = 0.0f;
|
|
m_PostVSData[eventId].vsout.useIndices = false;
|
|
m_PostVSData[eventId].vsout.hasPosOut = false;
|
|
m_PostVSData[eventId].vsout.idxBuf = ResourceId();
|
|
|
|
m_PostVSData[eventId].vsout.topo = pipeInfo.topology;
|
|
|
|
return;
|
|
}
|
|
|
|
const DrawcallDescription *drawcall = m_pDriver->GetDrawcall(eventId);
|
|
|
|
if(drawcall == NULL || drawcall->numIndices == 0 || drawcall->numInstances == 0)
|
|
return;
|
|
|
|
// the SPIR-V patching will determine the next descriptor set to use, after all sets statically
|
|
// used by the shader. This gets around the problem where the shader only uses 0 and 1, but the
|
|
// layout declares 0-4, and 2,3,4 are invalid at bind time and we are unable to bind our new set
|
|
// 5. Instead we'll notice that only 0 and 1 are used and just use 2 ourselves (although it was
|
|
// in
|
|
// the original set layout, we know it's statically unused by the shader so we can safely steal
|
|
// it).
|
|
uint32_t descSet = 0;
|
|
|
|
// we go through the driver for all these creations since they need to be properly
|
|
// registered in order to be put in the partial replay state
|
|
VkResult vkr = VK_SUCCESS;
|
|
VkDevice dev = m_Device;
|
|
|
|
VkPipelineLayout pipeLayout;
|
|
|
|
VkGraphicsPipelineCreateInfo pipeCreateInfo;
|
|
|
|
// get pipeline create info
|
|
m_pDriver->GetShaderCache()->MakeGraphicsPipelineInfo(pipeCreateInfo, state.graphics.pipeline);
|
|
|
|
VkBuffer meshBuffer = VK_NULL_HANDLE, readbackBuffer = VK_NULL_HANDLE;
|
|
VkDeviceMemory meshMem = VK_NULL_HANDLE, readbackMem = VK_NULL_HANDLE;
|
|
|
|
VkBuffer uniqIdxBuf = VK_NULL_HANDLE;
|
|
VkDeviceMemory uniqIdxBufMem = VK_NULL_HANDLE;
|
|
VkBufferView uniqIdxBufView = VK_NULL_HANDLE;
|
|
|
|
uint32_t numVerts = drawcall->numIndices;
|
|
uint64_t numFetchVerts = drawcall->numIndices;
|
|
VkDeviceSize bufSize = 0;
|
|
|
|
uint32_t idxsize = state.ibuffer.bytewidth;
|
|
|
|
int32_t baseVertex = 0;
|
|
|
|
uint32_t minIndex = 0, maxIndex = RDCMAX(drawcall->baseVertex, 0) + numVerts - 1;
|
|
|
|
uint32_t maxInstance = drawcall->instanceOffset + drawcall->numInstances - 1;
|
|
|
|
if(drawcall->flags & DrawFlags::UseIBuffer)
|
|
{
|
|
bool index16 = (idxsize == 2);
|
|
bytebuf idxdata;
|
|
std::vector<uint32_t> indices;
|
|
uint16_t *idx16 = NULL;
|
|
uint32_t *idx32 = NULL;
|
|
|
|
// fetch ibuffer
|
|
GetBufferData(state.ibuffer.buf, state.ibuffer.offs + drawcall->indexOffset * idxsize,
|
|
uint64_t(drawcall->numIndices) * idxsize, idxdata);
|
|
|
|
// figure out what the maximum index could be, so we can clamp our index buffer to something
|
|
// sane
|
|
uint32_t maxIdx = 0;
|
|
|
|
// if there are no active bindings assume the vertex shader is generating its own data
|
|
// and don't clamp the indices
|
|
if(pipeCreateInfo.pVertexInputState->vertexBindingDescriptionCount == 0)
|
|
maxIdx = ~0U;
|
|
|
|
for(uint32_t b = 0; b < pipeCreateInfo.pVertexInputState->vertexBindingDescriptionCount; b++)
|
|
{
|
|
const VkVertexInputBindingDescription &input =
|
|
pipeCreateInfo.pVertexInputState->pVertexBindingDescriptions[b];
|
|
// only vertex inputs (not instance inputs) count
|
|
if(input.inputRate == VK_VERTEX_INPUT_RATE_VERTEX)
|
|
{
|
|
if(b >= state.vbuffers.size())
|
|
continue;
|
|
|
|
ResourceId buf = state.vbuffers[b].buf;
|
|
VkDeviceSize offs = state.vbuffers[b].offs;
|
|
|
|
VkDeviceSize bufsize = creationInfo.m_Buffer[buf].size;
|
|
|
|
// the maximum valid index on this particular input is the one that reaches
|
|
// the end of the buffer. The maximum valid index at all is the one that reads
|
|
// off the end of ALL buffers (so we max it with any other maxindex value
|
|
// calculated).
|
|
if(input.stride > 0)
|
|
maxIdx = RDCMAX(maxIdx, uint32_t((bufsize - offs) / input.stride));
|
|
}
|
|
}
|
|
|
|
// in case the vertex buffers were set but had invalid stride (0), max with the number
|
|
// of vertices too. This is fine since the max here is just a conservative limit
|
|
maxIdx = RDCMAX(maxIdx, drawcall->numIndices);
|
|
|
|
// do ibuffer rebasing/remapping
|
|
|
|
idx16 = (uint16_t *)&idxdata[0];
|
|
idx32 = (uint32_t *)&idxdata[0];
|
|
|
|
// only read as many indices as were available in the buffer
|
|
uint32_t numIndices =
|
|
RDCMIN(uint32_t(index16 ? idxdata.size() / 2 : idxdata.size() / 4), drawcall->numIndices);
|
|
|
|
uint32_t idxclamp = 0;
|
|
if(drawcall->baseVertex < 0)
|
|
idxclamp = uint32_t(-drawcall->baseVertex);
|
|
|
|
// grab all unique vertex indices referenced
|
|
for(uint32_t i = 0; i < numIndices; i++)
|
|
{
|
|
uint32_t i32 = index16 ? uint32_t(idx16[i]) : idx32[i];
|
|
|
|
// apply baseVertex but clamp to 0 (don't allow index to become negative)
|
|
if(i32 < idxclamp)
|
|
i32 = 0;
|
|
else if(drawcall->baseVertex < 0)
|
|
i32 -= idxclamp;
|
|
else if(drawcall->baseVertex > 0)
|
|
i32 += drawcall->baseVertex;
|
|
|
|
// we clamp to maxIdx here, to avoid any invalid indices like 0xffffffff
|
|
// from filtering through. Worst case we index to the end of the vertex
|
|
// buffers which is generally much more reasonable
|
|
i32 = RDCMIN(maxIdx, i32);
|
|
|
|
auto it = std::lower_bound(indices.begin(), indices.end(), i32);
|
|
|
|
if(it != indices.end() && *it == i32)
|
|
continue;
|
|
|
|
indices.insert(it, i32);
|
|
}
|
|
|
|
// if we read out of bounds, we'll also have a 0 index being referenced
|
|
// (as 0 is read). Don't insert 0 if we already have 0 though
|
|
if(numIndices < drawcall->numIndices && (indices.empty() || indices[0] != 0))
|
|
indices.insert(indices.begin(), 0);
|
|
|
|
minIndex = indices[0];
|
|
maxIndex = indices[indices.size() - 1];
|
|
|
|
// set numVerts
|
|
numVerts = maxIndex - minIndex + 1;
|
|
numFetchVerts = (uint64_t)indices.size();
|
|
|
|
// An index buffer could be something like: 500, 520, 518, 553, 554, 556
|
|
// but in our vertex buffer that will be: 0, 20, 18, 53, 54, 56
|
|
// so we add -minIndex as the baseVertex when rendering. The existing baseVertex was 'applied'
|
|
// when we fetched the mesh output so it can be discarded.
|
|
baseVertex = -(int32_t)minIndex;
|
|
|
|
// create buffer with unique 0-based indices
|
|
VkBufferCreateInfo bufInfo = {
|
|
VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
|
NULL,
|
|
0,
|
|
indices.size() * sizeof(uint32_t),
|
|
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
|
|
};
|
|
|
|
vkr = m_pDriver->vkCreateBuffer(dev, &bufInfo, NULL, &uniqIdxBuf);
|
|
RDCASSERTEQUAL(vkr, VK_SUCCESS);
|
|
|
|
VkMemoryRequirements mrq = {0};
|
|
m_pDriver->vkGetBufferMemoryRequirements(dev, uniqIdxBuf, &mrq);
|
|
|
|
VkMemoryAllocateInfo allocInfo = {
|
|
VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, NULL, mrq.size,
|
|
m_pDriver->GetUploadMemoryIndex(mrq.memoryTypeBits),
|
|
};
|
|
|
|
vkr = m_pDriver->vkAllocateMemory(dev, &allocInfo, NULL, &uniqIdxBufMem);
|
|
RDCASSERTEQUAL(vkr, VK_SUCCESS);
|
|
|
|
vkr = m_pDriver->vkBindBufferMemory(dev, uniqIdxBuf, uniqIdxBufMem, 0);
|
|
RDCASSERTEQUAL(vkr, VK_SUCCESS);
|
|
|
|
VkBufferViewCreateInfo viewInfo = {
|
|
VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
|
|
NULL,
|
|
0,
|
|
uniqIdxBuf,
|
|
VK_FORMAT_R32_UINT,
|
|
0,
|
|
VK_WHOLE_SIZE,
|
|
};
|
|
|
|
vkr = m_pDriver->vkCreateBufferView(dev, &viewInfo, NULL, &uniqIdxBufView);
|
|
RDCASSERTEQUAL(vkr, VK_SUCCESS);
|
|
|
|
byte *idxData = NULL;
|
|
vkr = m_pDriver->vkMapMemory(m_Device, uniqIdxBufMem, 0, VK_WHOLE_SIZE, 0, (void **)&idxData);
|
|
RDCASSERTEQUAL(vkr, VK_SUCCESS);
|
|
|
|
memcpy(idxData, &indices[0], indices.size() * sizeof(uint32_t));
|
|
|
|
m_pDriver->vkUnmapMemory(m_Device, uniqIdxBufMem);
|
|
}
|
|
|
|
uint32_t bufStride = 0;
|
|
vector<uint32_t> modSpirv = moduleInfo.spirv.spirv;
|
|
|
|
struct CompactedAttrBuffer
|
|
{
|
|
VkDeviceMemory mem;
|
|
VkBuffer buf;
|
|
VkBufferView view;
|
|
};
|
|
|
|
std::vector<bool> attrIsInstanced;
|
|
CompactedAttrBuffer vbuffers[64];
|
|
RDCEraseEl(vbuffers);
|
|
|
|
{
|
|
VkWriteDescriptorSet descWrites[64];
|
|
uint32_t numWrites = 0;
|
|
|
|
RDCEraseEl(descWrites);
|
|
|
|
const VkPipelineVertexInputStateCreateInfo *vi = pipeCreateInfo.pVertexInputState;
|
|
|
|
RDCASSERT(vi->vertexAttributeDescriptionCount <= MeshOutputTBufferArraySize);
|
|
|
|
// we fetch the vertex buffer data up front here since there's a very high chance of either
|
|
// overlap due to interleaved attributes, or no overlap and no wastage due to separate compact
|
|
// attributes.
|
|
bytebuf origVBs[16];
|
|
|
|
for(uint32_t vb = 0; vb < vi->vertexBindingDescriptionCount; vb++)
|
|
{
|
|
VkDeviceSize offs = state.vbuffers[vb].offs;
|
|
uint64_t len = 0;
|
|
|
|
if(vi->pVertexBindingDescriptions[vb].inputRate == VK_VERTEX_INPUT_RATE_INSTANCE)
|
|
{
|
|
len = (maxInstance + 1) * vi->pVertexBindingDescriptions[vb].stride;
|
|
|
|
offs += drawcall->instanceOffset * vi->pVertexBindingDescriptions[vb].stride;
|
|
}
|
|
else
|
|
{
|
|
len = (maxIndex + 1) * vi->pVertexBindingDescriptions[vb].stride;
|
|
|
|
offs += drawcall->vertexOffset * vi->pVertexBindingDescriptions[vb].stride;
|
|
}
|
|
|
|
GetBufferData(state.vbuffers[vb].buf, offs, len, origVBs[vb]);
|
|
}
|
|
|
|
for(uint32_t i = 0; i < vi->vertexAttributeDescriptionCount; i++)
|
|
{
|
|
const VkVertexInputAttributeDescription &attrDesc = vi->pVertexAttributeDescriptions[i];
|
|
uint32_t attr = attrDesc.location;
|
|
|
|
RDCASSERT(attr < 64);
|
|
if(attr >= ARRAY_COUNT(vbuffers))
|
|
{
|
|
RDCERR("Attribute index too high! Resize array.");
|
|
continue;
|
|
}
|
|
|
|
bool isInstanced = false;
|
|
size_t stride = 1;
|
|
|
|
const byte *origVBBegin = NULL;
|
|
const byte *origVBEnd = NULL;
|
|
|
|
for(uint32_t vb = 0; vb < vi->vertexBindingDescriptionCount; vb++)
|
|
{
|
|
const VkVertexInputBindingDescription &vbDesc = vi->pVertexBindingDescriptions[vb];
|
|
if(vbDesc.binding == attrDesc.binding)
|
|
{
|
|
origVBBegin = origVBs[vb].data() + attrDesc.offset;
|
|
origVBEnd = origVBs[vb].data() + origVBs[vb].size();
|
|
stride = vbDesc.stride;
|
|
isInstanced = (vbDesc.inputRate == VK_VERTEX_INPUT_RATE_INSTANCE);
|
|
break;
|
|
}
|
|
}
|
|
|
|
RDCASSERT(origVBEnd);
|
|
|
|
// in some limited cases, provided we added the UNIFORM_TEXEL_BUFFER usage bit, we could use
|
|
// the original buffers here as-is and read out of them. However it is likely that the offset
|
|
// is not a multiple of the minimum texel buffer offset for at least some of the buffers if
|
|
// not all of them, so we simplify the code here by *always* reading back the vertex buffer
|
|
// data and uploading a compacted version.
|
|
uint32_t elemSize = GetByteSize(1, 1, 1, attrDesc.format, 0);
|
|
|
|
{
|
|
VkBufferCreateInfo bufInfo = {
|
|
VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
|
NULL,
|
|
0,
|
|
elemSize * (maxIndex + 1),
|
|
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
|
|
};
|
|
|
|
if(isInstanced)
|
|
bufInfo.size = elemSize * (maxInstance + 1);
|
|
|
|
vkr = m_pDriver->vkCreateBuffer(dev, &bufInfo, NULL, &vbuffers[attr].buf);
|
|
RDCASSERTEQUAL(vkr, VK_SUCCESS);
|
|
|
|
VkMemoryRequirements mrq = {0};
|
|
m_pDriver->vkGetBufferMemoryRequirements(dev, vbuffers[attr].buf, &mrq);
|
|
|
|
VkMemoryAllocateInfo allocInfo = {
|
|
VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, NULL, mrq.size,
|
|
m_pDriver->GetUploadMemoryIndex(mrq.memoryTypeBits),
|
|
};
|
|
|
|
vkr = m_pDriver->vkAllocateMemory(dev, &allocInfo, NULL, &vbuffers[attr].mem);
|
|
RDCASSERTEQUAL(vkr, VK_SUCCESS);
|
|
|
|
vkr = m_pDriver->vkBindBufferMemory(dev, vbuffers[attr].buf, vbuffers[attr].mem, 0);
|
|
RDCASSERTEQUAL(vkr, VK_SUCCESS);
|
|
|
|
byte *compactedData = NULL;
|
|
vkr = m_pDriver->vkMapMemory(m_Device, vbuffers[attr].mem, 0, VK_WHOLE_SIZE, 0,
|
|
(void **)&compactedData);
|
|
RDCASSERTEQUAL(vkr, VK_SUCCESS);
|
|
|
|
if(compactedData && origVBEnd)
|
|
{
|
|
const byte *src = origVBBegin;
|
|
byte *dst = compactedData;
|
|
const byte *dstEnd = dst + bufInfo.size;
|
|
while(src < origVBEnd && dst < dstEnd)
|
|
{
|
|
memcpy(dst, src, elemSize);
|
|
dst += elemSize;
|
|
src += stride;
|
|
}
|
|
}
|
|
|
|
m_pDriver->vkUnmapMemory(m_Device, vbuffers[attr].mem);
|
|
}
|
|
|
|
VkBufferViewCreateInfo info = {
|
|
VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
|
|
NULL,
|
|
0,
|
|
vbuffers[attr].buf,
|
|
attrDesc.format,
|
|
0,
|
|
VK_WHOLE_SIZE,
|
|
};
|
|
|
|
m_pDriver->vkCreateBufferView(dev, &info, NULL, &vbuffers[attr].view);
|
|
|
|
attrIsInstanced.push_back(isInstanced);
|
|
|
|
descWrites[numWrites].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
|
descWrites[numWrites].dstSet = m_MeshFetchDescSet;
|
|
if(IsSIntFormat(attrDesc.format))
|
|
descWrites[numWrites].dstBinding = 4;
|
|
else if(IsUIntFormat(attrDesc.format))
|
|
descWrites[numWrites].dstBinding = 3;
|
|
else
|
|
descWrites[numWrites].dstBinding = 2;
|
|
descWrites[numWrites].dstArrayElement = i;
|
|
descWrites[numWrites].descriptorCount = 1;
|
|
descWrites[numWrites].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
|
|
descWrites[numWrites].pTexelBufferView = &vbuffers[attr].view;
|
|
numWrites++;
|
|
}
|
|
|
|
// add a write of the index buffer
|
|
if(uniqIdxBufView != VK_NULL_HANDLE)
|
|
{
|
|
descWrites[numWrites].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
|
descWrites[numWrites].dstSet = m_MeshFetchDescSet;
|
|
descWrites[numWrites].dstBinding = 1;
|
|
descWrites[numWrites].dstArrayElement = 0;
|
|
descWrites[numWrites].descriptorCount = 1;
|
|
descWrites[numWrites].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
|
|
descWrites[numWrites].pTexelBufferView = &uniqIdxBufView;
|
|
numWrites++;
|
|
}
|
|
|
|
m_pDriver->vkUpdateDescriptorSets(dev, numWrites, descWrites, 0, NULL);
|
|
}
|
|
|
|
ConvertToMeshOutputCompute(*refl, *pipeInfo.shaders[0].patchData,
|
|
pipeInfo.shaders[0].entryPoint.c_str(), attrIsInstanced, descSet,
|
|
drawcall, baseVertex, numFetchVerts, numVerts, modSpirv, bufStride);
|
|
|
|
VkComputePipelineCreateInfo compPipeInfo = {VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO};
|
|
|
|
{
|
|
VkDescriptorSetLayout *descSetLayouts;
|
|
|
|
// descSet will be the index of our new descriptor set
|
|
descSetLayouts = new VkDescriptorSetLayout[descSet + 1];
|
|
|
|
for(uint32_t i = 0; i < descSet; i++)
|
|
descSetLayouts[i] = m_pDriver->GetResourceManager()->GetCurrentHandle<VkDescriptorSetLayout>(
|
|
creationInfo.m_PipelineLayout[pipeInfo.layout].descSetLayouts[i]);
|
|
|
|
// this layout just says it has one storage buffer
|
|
descSetLayouts[descSet] = m_MeshFetchDescSetLayout;
|
|
|
|
std::vector<VkPushConstantRange> push = creationInfo.m_PipelineLayout[pipeInfo.layout].pushRanges;
|
|
|
|
// ensure the push range is visible to the compute shader
|
|
for(VkPushConstantRange &range : push)
|
|
range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
|
|
|
VkPipelineLayoutCreateInfo pipeLayoutInfo = {
|
|
VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
|
NULL,
|
|
0,
|
|
descSet + 1,
|
|
descSetLayouts,
|
|
(uint32_t)push.size(),
|
|
push.empty() ? NULL : &push[0],
|
|
};
|
|
|
|
// create pipeline layout with same descriptor set layouts, plus our mesh output set
|
|
vkr = m_pDriver->vkCreatePipelineLayout(dev, &pipeLayoutInfo, NULL, &pipeLayout);
|
|
RDCASSERTEQUAL(vkr, VK_SUCCESS);
|
|
|
|
SAFE_DELETE_ARRAY(descSetLayouts);
|
|
|
|
// repoint pipeline layout
|
|
compPipeInfo.layout = pipeLayout;
|
|
}
|
|
|
|
// create vertex shader with modified code
|
|
VkShaderModuleCreateInfo moduleCreateInfo = {
|
|
VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, NULL, 0,
|
|
modSpirv.size() * sizeof(uint32_t), &modSpirv[0],
|
|
};
|
|
|
|
VkShaderModule module;
|
|
vkr = m_pDriver->vkCreateShaderModule(dev, &moduleCreateInfo, NULL, &module);
|
|
RDCASSERTEQUAL(vkr, VK_SUCCESS);
|
|
|
|
compPipeInfo.stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
|
|
compPipeInfo.stage.module = module;
|
|
compPipeInfo.stage.pName = PatchedMeshOutputEntryPoint;
|
|
compPipeInfo.stage.stage = VK_SHADER_STAGE_COMPUTE_BIT;
|
|
|
|
// create new pipeline
|
|
VkPipeline pipe;
|
|
vkr = m_pDriver->vkCreateComputePipelines(m_Device, VK_NULL_HANDLE, 1, &compPipeInfo, NULL, &pipe);
|
|
RDCASSERTEQUAL(vkr, VK_SUCCESS);
|
|
|
|
// make copy of state to draw from
|
|
VulkanRenderState modifiedstate = state;
|
|
|
|
// bind created pipeline to partial replay state
|
|
modifiedstate.compute.pipeline = GetResID(pipe);
|
|
|
|
// move graphics descriptor sets onto the compute pipe.
|
|
modifiedstate.compute.descSets = modifiedstate.graphics.descSets;
|
|
|
|
// push back extra descriptor set to partial replay state
|
|
// note that we examined the used pipeline layout above and inserted our descriptor set
|
|
// after any the application used. So there might be more bound, but we want to ensure to
|
|
// bind to the slot we're using
|
|
modifiedstate.compute.descSets.resize(descSet + 1);
|
|
modifiedstate.compute.descSets[descSet].descSet = GetResID(m_MeshFetchDescSet);
|
|
|
|
{
|
|
// create buffer of sufficient size
|
|
// this can't just be bufStride * num unique indices per instance, as we don't
|
|
// have a compact 0-based index to index into the buffer. We must use
|
|
// index-minIndex which is 0-based but potentially sparse, so this buffer may
|
|
// be more or less wasteful
|
|
VkBufferCreateInfo bufInfo = {
|
|
VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, NULL, 0,
|
|
numVerts * drawcall->numInstances * bufStride, 0,
|
|
};
|
|
|
|
bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
|
|
bufInfo.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
|
|
bufInfo.usage |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
|
|
bufInfo.usage |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
|
|
|
|
vkr = m_pDriver->vkCreateBuffer(dev, &bufInfo, NULL, &meshBuffer);
|
|
RDCASSERTEQUAL(vkr, VK_SUCCESS);
|
|
|
|
bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
|
|
|
|
vkr = m_pDriver->vkCreateBuffer(dev, &bufInfo, NULL, &readbackBuffer);
|
|
RDCASSERTEQUAL(vkr, VK_SUCCESS);
|
|
|
|
VkMemoryRequirements mrq = {0};
|
|
m_pDriver->vkGetBufferMemoryRequirements(dev, meshBuffer, &mrq);
|
|
|
|
VkMemoryAllocateInfo allocInfo = {
|
|
VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, NULL, mrq.size,
|
|
m_pDriver->GetGPULocalMemoryIndex(mrq.memoryTypeBits),
|
|
};
|
|
|
|
vkr = m_pDriver->vkAllocateMemory(dev, &allocInfo, NULL, &meshMem);
|
|
RDCASSERTEQUAL(vkr, VK_SUCCESS);
|
|
|
|
vkr = m_pDriver->vkBindBufferMemory(dev, meshBuffer, meshMem, 0);
|
|
RDCASSERTEQUAL(vkr, VK_SUCCESS);
|
|
|
|
m_pDriver->vkGetBufferMemoryRequirements(dev, readbackBuffer, &mrq);
|
|
|
|
allocInfo.memoryTypeIndex = m_pDriver->GetReadbackMemoryIndex(mrq.memoryTypeBits);
|
|
|
|
vkr = m_pDriver->vkAllocateMemory(dev, &allocInfo, NULL, &readbackMem);
|
|
RDCASSERTEQUAL(vkr, VK_SUCCESS);
|
|
|
|
vkr = m_pDriver->vkBindBufferMemory(dev, readbackBuffer, readbackMem, 0);
|
|
RDCASSERTEQUAL(vkr, VK_SUCCESS);
|
|
|
|
VkCommandBuffer cmd = m_pDriver->GetNextCmd();
|
|
|
|
VkCommandBufferBeginInfo beginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, NULL,
|
|
VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT};
|
|
|
|
vkr = ObjDisp(dev)->BeginCommandBuffer(Unwrap(cmd), &beginInfo);
|
|
RDCASSERTEQUAL(vkr, VK_SUCCESS);
|
|
|
|
// fill destination buffer with 0s to ensure unwritten vertices have sane data
|
|
ObjDisp(dev)->CmdFillBuffer(Unwrap(cmd), Unwrap(meshBuffer), 0, bufInfo.size, 0xbaadf00d);
|
|
|
|
VkBufferMemoryBarrier meshbufbarrier = {
|
|
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
|
NULL,
|
|
VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_HOST_WRITE_BIT,
|
|
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
|
|
VK_QUEUE_FAMILY_IGNORED,
|
|
VK_QUEUE_FAMILY_IGNORED,
|
|
};
|
|
|
|
meshbufbarrier.size = VK_WHOLE_SIZE;
|
|
|
|
VkMemoryBarrier globalbarrier = {
|
|
VK_STRUCTURE_TYPE_MEMORY_BARRIER, NULL,
|
|
VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_HOST_WRITE_BIT,
|
|
VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
|
|
};
|
|
|
|
// wait for uploads of index buffer (if used), compacted vertex buffers, and the above fill to
|
|
// finish.
|
|
DoPipelineBarrier(cmd, 1, &globalbarrier);
|
|
|
|
// set bufSize
|
|
bufSize = numVerts * drawcall->numInstances * bufStride;
|
|
|
|
// vkUpdateDescriptorSet desc set to point to buffer
|
|
VkDescriptorBufferInfo fetchdesc = {0};
|
|
fetchdesc.buffer = meshBuffer;
|
|
fetchdesc.offset = 0;
|
|
fetchdesc.range = bufInfo.size;
|
|
|
|
VkWriteDescriptorSet write = {
|
|
VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, NULL, m_MeshFetchDescSet, 0, 0, 1,
|
|
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, NULL, &fetchdesc, NULL};
|
|
m_pDriver->vkUpdateDescriptorSets(dev, 1, &write, 0, NULL);
|
|
|
|
// do single draw
|
|
modifiedstate.BindPipeline(cmd, VulkanRenderState::BindCompute, true);
|
|
uint64_t totalVerts = numFetchVerts * uint64_t(drawcall->numInstances);
|
|
|
|
// the validation layers will probably complain about this dispatch saying some arrays aren't
|
|
// fully updated. That's because they don't statically analyse that only fixed indices are
|
|
// referred to. It's safe to leave unused array indices as invalid descriptors.
|
|
ObjDisp(cmd)->CmdDispatch(Unwrap(cmd), uint32_t(totalVerts / MeshOutputDispatchWidth) + 1, 1, 1);
|
|
|
|
// wait for mesh output writing to finish
|
|
meshbufbarrier.buffer = Unwrap(meshBuffer);
|
|
meshbufbarrier.size = bufSize;
|
|
meshbufbarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
|
meshbufbarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
|
|
|
|
DoPipelineBarrier(cmd, 1, &meshbufbarrier);
|
|
|
|
VkBufferCopy bufcopy = {
|
|
0, 0, bufInfo.size,
|
|
};
|
|
|
|
// copy to readback buffer
|
|
ObjDisp(dev)->CmdCopyBuffer(Unwrap(cmd), Unwrap(meshBuffer), Unwrap(readbackBuffer), 1, &bufcopy);
|
|
|
|
meshbufbarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
|
meshbufbarrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT;
|
|
meshbufbarrier.buffer = Unwrap(readbackBuffer);
|
|
|
|
// wait for copy to finish
|
|
DoPipelineBarrier(cmd, 1, &meshbufbarrier);
|
|
|
|
vkr = ObjDisp(dev)->EndCommandBuffer(Unwrap(cmd));
|
|
RDCASSERTEQUAL(vkr, VK_SUCCESS);
|
|
|
|
// submit & flush so that we don't have to keep pipeline around for a while
|
|
m_pDriver->SubmitCmds();
|
|
m_pDriver->FlushQ();
|
|
}
|
|
|
|
for(CompactedAttrBuffer attrBuf : vbuffers)
|
|
{
|
|
m_pDriver->vkDestroyBufferView(dev, attrBuf.view, NULL);
|
|
m_pDriver->vkDestroyBuffer(dev, attrBuf.buf, NULL);
|
|
m_pDriver->vkFreeMemory(dev, attrBuf.mem, NULL);
|
|
}
|
|
|
|
// readback mesh data
|
|
byte *byteData = NULL;
|
|
vkr = m_pDriver->vkMapMemory(m_Device, readbackMem, 0, VK_WHOLE_SIZE, 0, (void **)&byteData);
|
|
|
|
// do near/far calculations
|
|
|
|
float nearp = 0.1f;
|
|
float farp = 100.0f;
|
|
|
|
Vec4f *pos0 = (Vec4f *)byteData;
|
|
|
|
bool found = false;
|
|
|
|
// expect position at the start of the buffer, as system values are sorted first
|
|
// and position is the first value
|
|
|
|
for(uint32_t i = 1;
|
|
refl->outputSignature[0].systemValue == ShaderBuiltin::Position && i < numVerts; i++)
|
|
{
|
|
//////////////////////////////////////////////////////////////////////////////////
|
|
// derive near/far, assuming a standard perspective matrix
|
|
//
|
|
// the transformation from from pre-projection {Z,W} to post-projection {Z,W}
|
|
// is linear. So we can say Zpost = Zpre*m + c . Here we assume Wpre = 1
|
|
// and we know Wpost = Zpre from the perspective matrix.
|
|
// we can then see from the perspective matrix that
|
|
// m = F/(F-N)
|
|
// c = -(F*N)/(F-N)
|
|
//
|
|
// with re-arranging and substitution, we then get:
|
|
// N = -c/m
|
|
// F = c/(1-m)
|
|
//
|
|
// so if we can derive m and c then we can determine N and F. We can do this with
|
|
// two points, and we pick them reasonably distinct on z to reduce floating-point
|
|
// error
|
|
|
|
Vec4f *pos = (Vec4f *)(byteData + i * bufStride);
|
|
|
|
// skip invalid vertices (w=0)
|
|
if(pos->w != 0.0f && fabs(pos->w - pos0->w) > 0.01f && fabs(pos->z - pos0->z) > 0.01f)
|
|
{
|
|
Vec2f A(pos0->w, pos0->z);
|
|
Vec2f B(pos->w, pos->z);
|
|
|
|
float m = (B.y - A.y) / (B.x - A.x);
|
|
float c = B.y - B.x * m;
|
|
|
|
if(m == 1.0f)
|
|
continue;
|
|
|
|
if(-c / m <= 0.000001f)
|
|
continue;
|
|
|
|
nearp = -c / m;
|
|
farp = c / (1 - m);
|
|
|
|
found = true;
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
// if we didn't find anything, all z's and w's were identical.
|
|
// If the z is positive and w greater for the first element then
|
|
// we detect this projection as reversed z with infinite far plane
|
|
if(!found && pos0->z > 0.0f && pos0->w > pos0->z)
|
|
{
|
|
nearp = pos0->z;
|
|
farp = FLT_MAX;
|
|
}
|
|
|
|
m_pDriver->vkUnmapMemory(m_Device, readbackMem);
|
|
|
|
// clean up temporary memories
|
|
m_pDriver->vkDestroyBuffer(m_Device, readbackBuffer, NULL);
|
|
m_pDriver->vkFreeMemory(m_Device, readbackMem, NULL);
|
|
|
|
if(uniqIdxBuf != VK_NULL_HANDLE)
|
|
{
|
|
m_pDriver->vkDestroyBuffer(m_Device, uniqIdxBuf, NULL);
|
|
m_pDriver->vkFreeMemory(m_Device, uniqIdxBufMem, NULL);
|
|
m_pDriver->vkDestroyBufferView(m_Device, uniqIdxBufView, NULL);
|
|
}
|
|
|
|
// fill out m_PostVSData
|
|
m_PostVSData[eventId].vsin.topo = pipeCreateInfo.pInputAssemblyState->topology;
|
|
m_PostVSData[eventId].vsout.topo = pipeCreateInfo.pInputAssemblyState->topology;
|
|
m_PostVSData[eventId].vsout.buf = meshBuffer;
|
|
m_PostVSData[eventId].vsout.bufmem = meshMem;
|
|
|
|
m_PostVSData[eventId].vsout.baseVertex = baseVertex + drawcall->baseVertex;
|
|
|
|
m_PostVSData[eventId].vsout.vertStride = bufStride;
|
|
m_PostVSData[eventId].vsout.nearPlane = nearp;
|
|
m_PostVSData[eventId].vsout.farPlane = farp;
|
|
|
|
m_PostVSData[eventId].vsout.useIndices = bool(drawcall->flags & DrawFlags::UseIBuffer);
|
|
m_PostVSData[eventId].vsout.numVerts = drawcall->numIndices;
|
|
|
|
m_PostVSData[eventId].vsout.instStride = 0;
|
|
if(drawcall->flags & DrawFlags::Instanced)
|
|
m_PostVSData[eventId].vsout.instStride = uint32_t(bufSize / drawcall->numInstances);
|
|
|
|
m_PostVSData[eventId].vsout.idxBuf = ResourceId();
|
|
if(m_PostVSData[eventId].vsout.useIndices && state.ibuffer.buf != ResourceId())
|
|
{
|
|
m_PostVSData[eventId].vsout.idxBuf = GetResourceManager()->GetOriginalID(state.ibuffer.buf);
|
|
m_PostVSData[eventId].vsout.idxOffset = state.ibuffer.offs + drawcall->indexOffset * idxsize;
|
|
m_PostVSData[eventId].vsout.idxFmt = idxsize == 2 ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32;
|
|
}
|
|
|
|
m_PostVSData[eventId].vsout.hasPosOut =
|
|
refl->outputSignature[0].systemValue == ShaderBuiltin::Position;
|
|
|
|
// delete pipeline layout
|
|
m_pDriver->vkDestroyPipelineLayout(dev, pipeLayout, NULL);
|
|
|
|
// delete pipeline
|
|
m_pDriver->vkDestroyPipeline(dev, pipe, NULL);
|
|
|
|
// delete shader/shader module
|
|
m_pDriver->vkDestroyShaderModule(dev, module, NULL);
|
|
}
|
|
struct VulkanInitPostVSCallback : public VulkanDrawcallCallback
|
|
{
|
|
VulkanInitPostVSCallback(WrappedVulkan *vk, const vector<uint32_t> &events)
|
|
: m_pDriver(vk), m_Events(events)
|
|
{
|
|
m_pDriver->SetDrawcallCB(this);
|
|
}
|
|
~VulkanInitPostVSCallback() { m_pDriver->SetDrawcallCB(NULL); }
|
|
void PreDraw(uint32_t eid, VkCommandBuffer cmd)
|
|
{
|
|
if(std::find(m_Events.begin(), m_Events.end(), eid) != m_Events.end())
|
|
m_pDriver->GetReplay()->InitPostVSBuffers(eid);
|
|
}
|
|
|
|
bool PostDraw(uint32_t eid, VkCommandBuffer cmd) { return false; }
|
|
void PostRedraw(uint32_t eid, VkCommandBuffer cmd) {}
|
|
// Dispatches don't rasterize, so do nothing
|
|
void PreDispatch(uint32_t eid, VkCommandBuffer cmd) {}
|
|
bool PostDispatch(uint32_t eid, VkCommandBuffer cmd) { return false; }
|
|
void PostRedispatch(uint32_t eid, VkCommandBuffer cmd) {}
|
|
// Ditto copy/etc
|
|
void PreMisc(uint32_t eid, DrawFlags flags, VkCommandBuffer cmd) {}
|
|
bool PostMisc(uint32_t eid, DrawFlags flags, VkCommandBuffer cmd) { return false; }
|
|
void PostRemisc(uint32_t eid, DrawFlags flags, VkCommandBuffer cmd) {}
|
|
void AliasEvent(uint32_t primary, uint32_t alias)
|
|
{
|
|
if(std::find(m_Events.begin(), m_Events.end(), primary) != m_Events.end())
|
|
m_pDriver->GetReplay()->AliasPostVSBuffers(primary, alias);
|
|
}
|
|
|
|
WrappedVulkan *m_pDriver;
|
|
const std::vector<uint32_t> &m_Events;
|
|
};
|
|
|
|
void VulkanReplay::InitPostVSBuffers(const vector<uint32_t> &events)
|
|
{
|
|
// first we must replay up to the first event without replaying it. This ensures any
|
|
// non-command buffer calls like memory unmaps etc all happen correctly before this
|
|
// command buffer
|
|
m_pDriver->ReplayLog(0, events.front(), eReplay_WithoutDraw);
|
|
|
|
VulkanInitPostVSCallback cb(m_pDriver, events);
|
|
|
|
// now we replay the events, which are guaranteed (because we generated them in
|
|
// GetPassEvents above) to come from the same command buffer, so the event IDs are
|
|
// still locally continuous, even if we jump into replaying.
|
|
m_pDriver->ReplayLog(events.front(), events.back(), eReplay_Full);
|
|
}
|
|
|
|
MeshFormat VulkanReplay::GetPostVSBuffers(uint32_t eventId, uint32_t instID, MeshDataStage stage)
|
|
{
|
|
// go through any aliasing
|
|
if(m_PostVSAlias.find(eventId) != m_PostVSAlias.end())
|
|
eventId = m_PostVSAlias[eventId];
|
|
|
|
VulkanPostVSData postvs;
|
|
RDCEraseEl(postvs);
|
|
|
|
if(m_PostVSData.find(eventId) != m_PostVSData.end())
|
|
postvs = m_PostVSData[eventId];
|
|
|
|
VulkanPostVSData::StageData s = postvs.GetStage(stage);
|
|
|
|
MeshFormat ret;
|
|
|
|
if(s.useIndices && s.idxBuf != ResourceId())
|
|
{
|
|
ret.indexResourceId = s.idxBuf;
|
|
ret.indexByteStride = s.idxFmt == VK_INDEX_TYPE_UINT16 ? 2 : 4;
|
|
}
|
|
else
|
|
{
|
|
ret.indexResourceId = ResourceId();
|
|
ret.indexByteStride = 0;
|
|
}
|
|
ret.indexByteOffset = s.idxOffset;
|
|
ret.baseVertex = s.baseVertex;
|
|
|
|
if(s.buf != VK_NULL_HANDLE)
|
|
ret.vertexResourceId = GetResID(s.buf);
|
|
else
|
|
ret.vertexResourceId = ResourceId();
|
|
|
|
ret.vertexByteOffset = s.instStride * instID;
|
|
ret.vertexByteStride = s.vertStride;
|
|
|
|
ret.format.compCount = 4;
|
|
ret.format.compByteWidth = 4;
|
|
ret.format.compType = CompType::Float;
|
|
ret.format.type = ResourceFormatType::Regular;
|
|
ret.format.bgraOrder = false;
|
|
|
|
ret.showAlpha = false;
|
|
|
|
ret.topology = MakePrimitiveTopology(s.topo, 1);
|
|
ret.numIndices = s.numVerts;
|
|
|
|
ret.unproject = s.hasPosOut;
|
|
ret.nearPlane = s.nearPlane;
|
|
ret.farPlane = s.farPlane;
|
|
|
|
return ret;
|
|
}
|