Add support for VK_KHR_draw_indirect_count

This commit is contained in:
baldurk
2018-10-11 13:55:09 +01:00
parent 4042dcab68
commit 0bbf24bede
7 changed files with 732 additions and 7 deletions
+2
View File
@@ -463,6 +463,8 @@ enum class VulkanChunk : uint32_t
vkCmdSetDeviceMask,
vkCmdDispatchBase,
vkGetDeviceQueue2,
vkCmdDrawIndirectCountKHR,
vkCmdDrawIndexedIndirectCountKHR,
Max,
};
+12
View File
@@ -691,6 +691,9 @@ static const VkExtensionProperties supportedExtensions[] = {
VK_KHR_DISPLAY_SWAPCHAIN_EXTENSION_NAME, VK_KHR_DISPLAY_SWAPCHAIN_SPEC_VERSION,
},
#endif
{
VK_KHR_DRAW_INDIRECT_COUNT_EXTENSION_NAME, VK_KHR_DRAW_INDIRECT_COUNT_SPEC_VERSION,
},
{
VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, VK_KHR_DRIVER_PROPERTIES_SPEC_VERSION,
},
@@ -2593,6 +2596,15 @@ bool WrappedVulkan::ProcessChunk(ReadSerialiser &ser, VulkanChunk chunk)
return Serialise_vkGetDeviceQueue2(ser, VK_NULL_HANDLE, NULL, NULL);
break;
case VulkanChunk::vkCmdDrawIndirectCountKHR:
return Serialise_vkCmdDrawIndirectCountKHR(ser, VK_NULL_HANDLE, VK_NULL_HANDLE, 0,
VK_NULL_HANDLE, 0, 0, 0);
break;
case VulkanChunk::vkCmdDrawIndexedIndirectCountKHR:
return Serialise_vkCmdDrawIndexedIndirectCountKHR(ser, VK_NULL_HANDLE, VK_NULL_HANDLE, 0,
VK_NULL_HANDLE, 0, 0, 0);
break;
default:
{
SystemChunk system = (SystemChunk)chunk;
+11
View File
@@ -1886,4 +1886,15 @@ public:
VkResult vkGetDisplayPlaneCapabilities2KHR(VkPhysicalDevice physicalDevice,
const VkDisplayPlaneInfo2KHR *pDisplayPlaneInfo,
VkDisplayPlaneCapabilities2KHR *pCapabilities);
// VK_KHR_draw_indirect_count
IMPLEMENT_FUNCTION_SERIALISED(void, vkCmdDrawIndirectCountKHR, VkCommandBuffer commandBuffer,
VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer,
VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
uint32_t stride);
IMPLEMENT_FUNCTION_SERIALISED(void, vkCmdDrawIndexedIndirectCountKHR,
VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
VkBuffer countBuffer, VkDeviceSize countBufferOffset,
uint32_t maxDrawCount, uint32_t stride);
};
+10 -1
View File
@@ -348,7 +348,8 @@
CheckExt(EXT_sampler_filter_minmax, VKXX); \
CheckExt(KHR_sampler_ycbcr_conversion, VK11); \
CheckExt(KHR_device_group, VK11); \
CheckExt(MVK_moltenvk, VKXX);
CheckExt(MVK_moltenvk, VKXX); \
CheckExt(KHR_draw_indirect_count, VKXX);
#define HookInitVulkanInstanceExts() \
HookInitExtension(KHR_surface, DestroySurfaceKHR); \
@@ -458,6 +459,8 @@
HookInitExtension(KHR_device_group &&KHR_surface, GetDeviceGroupSurfacePresentModesKHR); \
HookInitExtension(KHR_device_group &&KHR_swapchain, AcquireNextImage2KHR); \
HookInitExtension(protected_memory, GetDeviceQueue2); \
HookInitExtension(KHR_draw_indirect_count, CmdDrawIndirectCountKHR); \
HookInitExtension(KHR_draw_indirect_count, CmdDrawIndexedIndirectCountKHR); \
HookInitDevice_PlatformSpecific()
#define DefineHooks() \
@@ -968,6 +971,12 @@
HookDefine3(VkResult, vkGetDisplayPlaneCapabilities2KHR, VkPhysicalDevice, physicalDevice, \
const VkDisplayPlaneInfo2KHR *, pDisplayPlaneInfo, VkDisplayPlaneCapabilities2KHR *, \
pCapabilities); \
HookDefine7(void, vkCmdDrawIndirectCountKHR, VkCommandBuffer, commandBuffer, VkBuffer, buffer, \
VkDeviceSize, offset, VkBuffer, countBuffer, VkDeviceSize, countBufferOffset, \
uint32_t, maxDrawCount, uint32_t, stride); \
HookDefine7(void, vkCmdDrawIndexedIndirectCountKHR, VkCommandBuffer, commandBuffer, VkBuffer, \
buffer, VkDeviceSize, offset, VkBuffer, countBuffer, VkDeviceSize, \
countBufferOffset, uint32_t, maxDrawCount, uint32_t, stride); \
HookDefine_PlatformSpecific()
struct VkLayerInstanceDispatchTableExtended : VkLayerInstanceDispatchTable
+3 -1
View File
@@ -28,7 +28,7 @@
template <>
std::string DoStringise(const VulkanChunk &el)
{
RDCCOMPILE_ASSERT((uint32_t)VulkanChunk::Max == 1116, "Chunks changed without updating names");
RDCCOMPILE_ASSERT((uint32_t)VulkanChunk::Max == 1118, "Chunks changed without updating names");
BEGIN_ENUM_STRINGISE(VulkanChunk)
{
@@ -148,6 +148,8 @@ std::string DoStringise(const VulkanChunk &el)
STRINGISE_ENUM_CLASS(vkCmdSetDeviceMask);
STRINGISE_ENUM_CLASS(vkCmdDispatchBase);
STRINGISE_ENUM_CLASS(vkGetDeviceQueue2);
STRINGISE_ENUM_CLASS(vkCmdDrawIndirectCountKHR);
STRINGISE_ENUM_CLASS(vkCmdDrawIndexedIndirectCountKHR);
STRINGISE_ENUM_CLASS_NAMED(Max, "Max Chunk");
}
END_ENUM_STRINGISE()
@@ -2403,6 +2403,633 @@ void WrappedVulkan::vkCmdDispatchBase(VkCommandBuffer commandBuffer, uint32_t ba
}
}
template <typename SerialiserType>
bool WrappedVulkan::Serialise_vkCmdDrawIndirectCountKHR(
SerialiserType &ser, VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride)
{
SERIALISE_ELEMENT(commandBuffer);
SERIALISE_ELEMENT(buffer);
SERIALISE_ELEMENT(offset);
SERIALISE_ELEMENT(countBuffer);
SERIALISE_ELEMENT(countBufferOffset);
SERIALISE_ELEMENT(maxDrawCount);
SERIALISE_ELEMENT(stride);
Serialise_DebugMessages(ser);
SERIALISE_CHECK_READ_ERRORS();
if(IsReplayingAndReading())
{
m_LastCmdBufferID = GetResourceManager()->GetOriginalID(GetResID(commandBuffer));
// do execution (possibly partial)
if(IsActiveReplaying(m_State))
{
// this count is wrong if we're not re-recording and fetching the actual count below, but it's
// impossible without having a particular submission in mind because without a specific
// instance we can't know what the actual count was (it could vary between submissions).
// Fortunately when we're not in the re-recording command buffer the EID tracking isn't
// needed.
uint32_t count = maxDrawCount;
if(InRerecordRange(m_LastCmdBufferID))
{
commandBuffer = RerecordCmdBuf(m_LastCmdBufferID);
uint32_t curEID = m_RootEventID;
if(m_FirstEventID <= 1)
{
curEID = m_BakedCmdBufferInfo[m_LastCmdBufferID].curEventID;
if(m_Partial[Primary].partialParent == m_LastCmdBufferID)
curEID += m_Partial[Primary].baseEvent;
else if(m_Partial[Secondary].partialParent == m_LastCmdBufferID)
curEID += m_Partial[Secondary].baseEvent;
}
DrawcallUse use(m_CurChunkOffset, 0);
auto it = std::lower_bound(m_DrawcallUses.begin(), m_DrawcallUses.end(), use);
if(it == m_DrawcallUses.end() || GetDrawcall(it->eventId) == NULL)
{
RDCERR("Unexpected drawcall not found in uses vector, offset %llu", m_CurChunkOffset);
}
else
{
uint32_t baseEventID = it->eventId;
// get the number of draws by looking at how many children the parent drawcall has.
count = (uint32_t)GetDrawcall(it->eventId)->children.size();
// when we have a callback, submit every drawcall individually to the callback
if(m_DrawcallCallback && IsDrawInRenderPass())
{
for(uint32_t i = 0; i < count; i++)
{
uint32_t eventId = HandlePreCallback(commandBuffer, DrawFlags::Drawcall, i + 1);
ObjDisp(commandBuffer)
->CmdDrawIndirect(Unwrap(commandBuffer), Unwrap(buffer), offset, 1, stride);
if(eventId && m_DrawcallCallback->PostDraw(eventId, commandBuffer))
{
ObjDisp(commandBuffer)
->CmdDrawIndirect(Unwrap(commandBuffer), Unwrap(buffer), offset, 1, stride);
m_DrawcallCallback->PostRedraw(eventId, commandBuffer);
}
offset += stride;
}
}
// To add the multidraw, we made an event N that is the 'parent' marker, then
// N+1, N+2, N+3, ... for each of the sub-draws. If the first sub-draw is selected
// then we'll replay up to N but not N+1, so just do nothing - we DON'T want to draw
// the first sub-draw in that range.
else if(m_LastEventID > baseEventID)
{
uint32_t drawidx = 0;
if(m_FirstEventID <= 1)
{
// if we're replaying part-way into a multidraw, we can replay the first part
// 'easily'
// by just reducing the Count parameter to however many we want to replay. This only
// works if we're replaying from the first multidraw to the nth (n less than Count)
count = RDCMIN(count, m_LastEventID - baseEventID);
}
else
{
// otherwise we do the 'hard' case, draw only one multidraw
// note we'll never be asked to do e.g. 3rd-7th of a multidraw. Only ever 0th-nth or
// a single draw.
//
// We also need to draw the same number of draws so that DrawIndex is faithful. In
// order to preserve the draw index we write a custom indirect buffer that has zeros
// for the parameters of all previous draws.
drawidx = (curEID - baseEventID - 1);
offset += stride * drawidx;
// ensure the custom buffer is large enough
VkDeviceSize bufLength = sizeof(VkDrawIndirectCommand) * (drawidx + 1);
RDCASSERT(bufLength <= m_IndirectBufferSize, bufLength, m_IndirectBufferSize);
VkBufferMemoryBarrier bufBarrier = {
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
NULL,
VK_ACCESS_INDIRECT_COMMAND_READ_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT,
VK_QUEUE_FAMILY_IGNORED,
VK_QUEUE_FAMILY_IGNORED,
Unwrap(m_IndirectBuffer.buf),
0,
VK_WHOLE_SIZE,
};
VkCommandBufferBeginInfo beginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, NULL,
VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT};
ObjDisp(m_IndirectCommandBuffer)
->BeginCommandBuffer(Unwrap(m_IndirectCommandBuffer), &beginInfo);
// wait for any previous indirect draws to complete before filling/transferring
DoPipelineBarrier(m_IndirectCommandBuffer, 1, &bufBarrier);
// initialise to 0 so all other draws don't draw anything
ObjDisp(m_IndirectCommandBuffer)
->CmdFillBuffer(Unwrap(m_IndirectCommandBuffer), Unwrap(m_IndirectBuffer.buf), 0,
m_IndirectBufferSize, 0);
// wait for fill to complete before copy
bufBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
bufBarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
DoPipelineBarrier(m_IndirectCommandBuffer, 1, &bufBarrier);
// copy over the actual parameter set into the right place
VkBufferCopy region = {offset, bufLength - sizeof(VkDrawIndirectCommand),
sizeof(VkDrawIndirectCommand)};
ObjDisp(m_IndirectCommandBuffer)
->CmdCopyBuffer(Unwrap(m_IndirectCommandBuffer), Unwrap(buffer),
Unwrap(m_IndirectBuffer.buf), 1, &region);
// finally wait for copy to complete before drawing from it
bufBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
bufBarrier.dstAccessMask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
DoPipelineBarrier(m_IndirectCommandBuffer, 1, &bufBarrier);
ObjDisp(m_IndirectCommandBuffer)->EndCommandBuffer(Unwrap(m_IndirectCommandBuffer));
// draw from our custom buffer
m_IndirectDraw = true;
buffer = m_IndirectBuffer.buf;
offset = 0;
count = drawidx + 1;
stride = sizeof(VkDrawIndirectCommand);
}
if(IsDrawInRenderPass())
{
uint32_t eventId = HandlePreCallback(commandBuffer, DrawFlags::Drawcall, drawidx + 1);
ObjDisp(commandBuffer)
->CmdDrawIndirect(Unwrap(commandBuffer), Unwrap(buffer), offset, count, stride);
if(eventId && m_DrawcallCallback->PostDraw(eventId, commandBuffer))
{
ObjDisp(commandBuffer)
->CmdDrawIndirect(Unwrap(commandBuffer), Unwrap(buffer), offset, count, stride);
m_DrawcallCallback->PostRedraw(eventId, commandBuffer);
}
}
}
}
}
// multidraws skip the event ID past the whole thing
m_BakedCmdBufferInfo[m_LastCmdBufferID].curEventID += count + 1;
}
else
{
VkIndirectPatchData indirectPatch =
FetchIndirectData(VkIndirectPatchType::DrawIndirectCount, commandBuffer, buffer, offset,
maxDrawCount, stride, countBuffer, countBufferOffset);
ObjDisp(commandBuffer)
->CmdDrawIndirectCountKHR(Unwrap(commandBuffer), Unwrap(buffer), offset,
Unwrap(countBuffer), countBufferOffset, maxDrawCount, stride);
// add on the size we'll need for an indirect buffer in the worst case.
// Note that we'll only ever be partially replaying one draw at a time, so we only need the
// worst case.
m_IndirectBufferSize =
RDCMAX(m_IndirectBufferSize,
sizeof(VkDrawIndirectCommand) + (maxDrawCount > 0 ? maxDrawCount - 1 : 0) * stride);
string name = "vkCmdDrawIndirectCountKHR";
if(!IsDrawInRenderPass())
{
AddDebugMessage(MessageCategory::Execution, MessageSeverity::High,
MessageSource::IncorrectAPIUse,
"Drawcall in happening outside of render pass, or in secondary command "
"buffer without RENDER_PASS_CONTINUE_BIT");
}
SDChunk *baseChunk = m_StructuredFile->chunks.back();
DrawcallDescription draw;
draw.name = name;
draw.flags = DrawFlags::MultiDraw | DrawFlags::PushMarker;
AddEvent();
AddDrawcall(draw, true);
VulkanDrawcallTreeNode &drawNode = GetDrawcallStack().back()->children.back();
drawNode.indirectPatch = indirectPatch;
drawNode.resourceUsage.push_back(std::make_pair(
GetResID(buffer), EventUsage(drawNode.draw.eventId, ResourceUsage::Indirect)));
m_BakedCmdBufferInfo[m_LastCmdBufferID].curEventID++;
for(uint32_t i = 0; i < maxDrawCount; i++)
{
DrawcallDescription multi;
multi.drawIndex = i;
multi.name = name;
multi.flags |= DrawFlags::Drawcall | DrawFlags::Instanced | DrawFlags::Indirect;
// add a fake chunk for this individual indirect draw
SDChunk *fakeChunk = new SDChunk("Indirect sub-command");
fakeChunk->metadata = baseChunk->metadata;
fakeChunk->metadata.chunkID = (uint32_t)VulkanChunk::vkCmdIndirectSubCommand;
{
StructuredSerialiser structuriser(fakeChunk, ser.GetChunkLookup());
structuriser.Serialise<uint32_t>("drawIndex", 0U);
structuriser.Serialise("offset", offset);
structuriser.Serialise("command", VkDrawIndirectCommand());
}
m_StructuredFile->chunks.push_back(fakeChunk);
AddEvent();
AddDrawcall(multi, true);
m_BakedCmdBufferInfo[m_LastCmdBufferID].curEventID++;
}
draw.name = name;
draw.flags = DrawFlags::PopMarker;
AddDrawcall(draw, false);
}
}
return true;
}
void WrappedVulkan::vkCmdDrawIndirectCountKHR(VkCommandBuffer commandBuffer, VkBuffer buffer,
VkDeviceSize offset, VkBuffer countBuffer,
VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
uint32_t stride)
{
SCOPED_DBG_SINK();
SERIALISE_TIME_CALL(ObjDisp(commandBuffer)
->CmdDrawIndirectCountKHR(Unwrap(commandBuffer), Unwrap(buffer), offset,
Unwrap(countBuffer), countBufferOffset,
maxDrawCount, stride));
if(IsCaptureMode(m_State))
{
VkResourceRecord *record = GetRecord(commandBuffer);
CACHE_THREAD_SERIALISER();
ser.SetDrawChunk();
SCOPED_SERIALISE_CHUNK(VulkanChunk::vkCmdDrawIndirectCountKHR);
Serialise_vkCmdDrawIndirectCountKHR(ser, commandBuffer, buffer, offset, countBuffer,
countBufferOffset, maxDrawCount, stride);
record->AddChunk(scope.Get());
record->MarkResourceFrameReferenced(GetResID(buffer), eFrameRef_Read);
record->MarkResourceFrameReferenced(GetRecord(buffer)->baseResource, eFrameRef_Read);
if(GetRecord(buffer)->sparseInfo)
record->cmdInfo->sparse.insert(GetRecord(buffer)->sparseInfo);
record->MarkResourceFrameReferenced(GetResID(countBuffer), eFrameRef_Read);
record->MarkResourceFrameReferenced(GetRecord(countBuffer)->baseResource, eFrameRef_Read);
if(GetRecord(countBuffer)->sparseInfo)
record->cmdInfo->sparse.insert(GetRecord(countBuffer)->sparseInfo);
}
}
template <typename SerialiserType>
bool WrappedVulkan::Serialise_vkCmdDrawIndexedIndirectCountKHR(
SerialiserType &ser, VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride)
{
SERIALISE_ELEMENT(commandBuffer);
SERIALISE_ELEMENT(buffer);
SERIALISE_ELEMENT(offset);
SERIALISE_ELEMENT(countBuffer);
SERIALISE_ELEMENT(countBufferOffset);
SERIALISE_ELEMENT(maxDrawCount);
SERIALISE_ELEMENT(stride);
Serialise_DebugMessages(ser);
SERIALISE_CHECK_READ_ERRORS();
if(IsReplayingAndReading())
{
m_LastCmdBufferID = GetResourceManager()->GetOriginalID(GetResID(commandBuffer));
// do execution (possibly partial)
if(IsActiveReplaying(m_State))
{
// this count is wrong if we're not re-recording and fetching the actual count below, but it's
// impossible without having a particular submission in mind because without a specific
// instance we can't know what the actual count was (it could vary between submissions).
// Fortunately when we're not in the re-recording command buffer the EID tracking isn't
// needed.
uint32_t count = maxDrawCount;
if(InRerecordRange(m_LastCmdBufferID))
{
commandBuffer = RerecordCmdBuf(m_LastCmdBufferID);
uint32_t curEID = m_RootEventID;
if(m_FirstEventID <= 1)
{
curEID = m_BakedCmdBufferInfo[m_LastCmdBufferID].curEventID;
if(m_Partial[Primary].partialParent == m_LastCmdBufferID)
curEID += m_Partial[Primary].baseEvent;
else if(m_Partial[Secondary].partialParent == m_LastCmdBufferID)
curEID += m_Partial[Secondary].baseEvent;
}
DrawcallUse use(m_CurChunkOffset, 0);
auto it = std::lower_bound(m_DrawcallUses.begin(), m_DrawcallUses.end(), use);
if(it == m_DrawcallUses.end() || GetDrawcall(it->eventId) == NULL)
{
RDCERR("Unexpected drawcall not found in uses vector, offset %llu", m_CurChunkOffset);
}
else
{
uint32_t baseEventID = it->eventId;
// get the number of draws by looking at how many children the parent drawcall has.
count = (uint32_t)GetDrawcall(it->eventId)->children.size();
// when we have a callback, submit every drawcall individually to the callback
if(m_DrawcallCallback && IsDrawInRenderPass())
{
for(uint32_t i = 0; i < count; i++)
{
uint32_t eventId = HandlePreCallback(commandBuffer, DrawFlags::Drawcall, i + 1);
ObjDisp(commandBuffer)
->CmdDrawIndexedIndirect(Unwrap(commandBuffer), Unwrap(buffer), offset, 1, stride);
if(eventId && m_DrawcallCallback->PostDraw(eventId, commandBuffer))
{
ObjDisp(commandBuffer)
->CmdDrawIndexedIndirect(Unwrap(commandBuffer), Unwrap(buffer), offset, 1,
stride);
m_DrawcallCallback->PostRedraw(eventId, commandBuffer);
}
offset += stride;
}
}
// To add the multidraw, we made an event N that is the 'parent' marker, then
// N+1, N+2, N+3, ... for each of the sub-draws. If the first sub-draw is selected
// then we'll replay up to N but not N+1, so just do nothing - we DON'T want to draw
// the first sub-draw in that range.
else if(m_LastEventID > baseEventID)
{
uint32_t drawidx = 0;
if(m_FirstEventID <= 1)
{
// if we're replaying part-way into a multidraw, we can replay the first part
// 'easily'
// by just reducing the Count parameter to however many we want to replay. This only
// works if we're replaying from the first multidraw to the nth (n less than Count)
count = RDCMIN(count, m_LastEventID - baseEventID);
}
else
{
// otherwise we do the 'hard' case, draw only one multidraw
// note we'll never be asked to do e.g. 3rd-7th of a multidraw. Only ever 0th-nth or
// a single draw.
//
// We also need to draw the same number of draws so that DrawIndex is faithful. In
// order to preserve the draw index we write a custom indirect buffer that has zeros
// for the parameters of all previous draws.
drawidx = (curEID - baseEventID - 1);
offset += stride * drawidx;
// ensure the custom buffer is large enough
VkDeviceSize bufLength = sizeof(VkDrawIndexedIndirectCommand) * (drawidx + 1);
RDCASSERT(bufLength <= m_IndirectBufferSize, bufLength, m_IndirectBufferSize);
VkBufferMemoryBarrier bufBarrier = {
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
NULL,
VK_ACCESS_INDIRECT_COMMAND_READ_BIT,
VK_ACCESS_TRANSFER_WRITE_BIT,
VK_QUEUE_FAMILY_IGNORED,
VK_QUEUE_FAMILY_IGNORED,
Unwrap(m_IndirectBuffer.buf),
0,
VK_WHOLE_SIZE,
};
VkCommandBufferBeginInfo beginInfo = {VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, NULL,
VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT};
ObjDisp(m_IndirectCommandBuffer)
->BeginCommandBuffer(Unwrap(m_IndirectCommandBuffer), &beginInfo);
// wait for any previous indirect draws to complete before filling/transferring
DoPipelineBarrier(m_IndirectCommandBuffer, 1, &bufBarrier);
// initialise to 0 so all other draws don't draw anything
ObjDisp(m_IndirectCommandBuffer)
->CmdFillBuffer(Unwrap(m_IndirectCommandBuffer), Unwrap(m_IndirectBuffer.buf), 0,
m_IndirectBufferSize, 0);
// wait for fill to complete before copy
bufBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
bufBarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
DoPipelineBarrier(m_IndirectCommandBuffer, 1, &bufBarrier);
// copy over the actual parameter set into the right place
VkBufferCopy region = {offset, bufLength - sizeof(VkDrawIndexedIndirectCommand),
sizeof(VkDrawIndexedIndirectCommand)};
ObjDisp(m_IndirectCommandBuffer)
->CmdCopyBuffer(Unwrap(m_IndirectCommandBuffer), Unwrap(buffer),
Unwrap(m_IndirectBuffer.buf), 1, &region);
// finally wait for copy to complete before drawing from it
bufBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
bufBarrier.dstAccessMask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT;
DoPipelineBarrier(m_IndirectCommandBuffer, 1, &bufBarrier);
ObjDisp(m_IndirectCommandBuffer)->EndCommandBuffer(Unwrap(m_IndirectCommandBuffer));
// draw from our custom buffer
m_IndirectDraw = true;
buffer = m_IndirectBuffer.buf;
offset = 0;
count = drawidx + 1;
stride = sizeof(VkDrawIndexedIndirectCommand);
}
if(IsDrawInRenderPass())
{
uint32_t eventId = HandlePreCallback(commandBuffer, DrawFlags::Drawcall, drawidx + 1);
ObjDisp(commandBuffer)
->CmdDrawIndexedIndirect(Unwrap(commandBuffer), Unwrap(buffer), offset, count,
stride);
if(eventId && m_DrawcallCallback->PostDraw(eventId, commandBuffer))
{
ObjDisp(commandBuffer)
->CmdDrawIndexedIndirect(Unwrap(commandBuffer), Unwrap(buffer), offset, count,
stride);
m_DrawcallCallback->PostRedraw(eventId, commandBuffer);
}
}
}
}
}
// multidraws skip the event ID past the whole thing
m_BakedCmdBufferInfo[m_LastCmdBufferID].curEventID += count + 1;
}
else
{
VkIndirectPatchData indirectPatch =
FetchIndirectData(VkIndirectPatchType::DrawIndexedIndirectCount, commandBuffer, buffer,
offset, maxDrawCount, stride, countBuffer, countBufferOffset);
ObjDisp(commandBuffer)
->CmdDrawIndexedIndirectCountKHR(Unwrap(commandBuffer), Unwrap(buffer), offset,
Unwrap(countBuffer), countBufferOffset, maxDrawCount,
stride);
// add on the size we'll need for an indirect buffer in the worst case.
// Note that we'll only ever be partially replaying one draw at a time, so we only need the
// worst case.
m_IndirectBufferSize =
RDCMAX(m_IndirectBufferSize, sizeof(VkDrawIndexedIndirectCommand) +
(maxDrawCount > 0 ? maxDrawCount - 1 : 0) * stride);
string name = "vkCmdDrawIndexedIndirectCountKHR";
if(!IsDrawInRenderPass())
{
AddDebugMessage(MessageCategory::Execution, MessageSeverity::High,
MessageSource::IncorrectAPIUse,
"Drawcall in happening outside of render pass, or in secondary command "
"buffer without RENDER_PASS_CONTINUE_BIT");
}
SDChunk *baseChunk = m_StructuredFile->chunks.back();
DrawcallDescription draw;
draw.name = name;
draw.flags = DrawFlags::MultiDraw | DrawFlags::PushMarker;
AddEvent();
AddDrawcall(draw, true);
VulkanDrawcallTreeNode &drawNode = GetDrawcallStack().back()->children.back();
drawNode.indirectPatch = indirectPatch;
drawNode.resourceUsage.push_back(std::make_pair(
GetResID(buffer), EventUsage(drawNode.draw.eventId, ResourceUsage::Indirect)));
m_BakedCmdBufferInfo[m_LastCmdBufferID].curEventID++;
for(uint32_t i = 0; i < maxDrawCount; i++)
{
DrawcallDescription multi;
multi.drawIndex = i;
multi.name = name;
multi.flags |=
DrawFlags::Drawcall | DrawFlags::Instanced | DrawFlags::Indexed | DrawFlags::Indirect;
// add a fake chunk for this individual indirect draw
SDChunk *fakeChunk = new SDChunk("Indirect sub-command");
fakeChunk->metadata = baseChunk->metadata;
fakeChunk->metadata.chunkID = (uint32_t)VulkanChunk::vkCmdIndirectSubCommand;
{
StructuredSerialiser structuriser(fakeChunk, ser.GetChunkLookup());
structuriser.Serialise<uint32_t>("drawIndex", 0U);
structuriser.Serialise("offset", offset);
structuriser.Serialise("command", VkDrawIndexedIndirectCommand());
}
m_StructuredFile->chunks.push_back(fakeChunk);
AddEvent();
AddDrawcall(multi, true);
m_BakedCmdBufferInfo[m_LastCmdBufferID].curEventID++;
}
draw.name = name;
draw.flags = DrawFlags::PopMarker;
AddDrawcall(draw, false);
}
}
return true;
}
void WrappedVulkan::vkCmdDrawIndexedIndirectCountKHR(VkCommandBuffer commandBuffer, VkBuffer buffer,
VkDeviceSize offset, VkBuffer countBuffer,
VkDeviceSize countBufferOffset,
uint32_t maxDrawCount, uint32_t stride)
{
SCOPED_DBG_SINK();
SERIALISE_TIME_CALL(ObjDisp(commandBuffer)
->CmdDrawIndexedIndirectCountKHR(Unwrap(commandBuffer), Unwrap(buffer),
offset, Unwrap(countBuffer),
countBufferOffset, maxDrawCount, stride));
if(IsCaptureMode(m_State))
{
VkResourceRecord *record = GetRecord(commandBuffer);
CACHE_THREAD_SERIALISER();
ser.SetDrawChunk();
SCOPED_SERIALISE_CHUNK(VulkanChunk::vkCmdDrawIndexedIndirectCountKHR);
Serialise_vkCmdDrawIndexedIndirectCountKHR(ser, commandBuffer, buffer, offset, countBuffer,
countBufferOffset, maxDrawCount, stride);
record->AddChunk(scope.Get());
record->MarkResourceFrameReferenced(GetResID(buffer), eFrameRef_Read);
record->MarkResourceFrameReferenced(GetRecord(buffer)->baseResource, eFrameRef_Read);
if(GetRecord(buffer)->sparseInfo)
record->cmdInfo->sparse.insert(GetRecord(buffer)->sparseInfo);
record->MarkResourceFrameReferenced(GetResID(countBuffer), eFrameRef_Read);
record->MarkResourceFrameReferenced(GetRecord(countBuffer)->baseResource, eFrameRef_Read);
if(GetRecord(countBuffer)->sparseInfo)
record->cmdInfo->sparse.insert(GetRecord(countBuffer)->sparseInfo);
}
}
INSTANTIATE_FUNCTION_SERIALISED(void, vkCmdDraw, VkCommandBuffer commandBuffer, uint32_t vertexCount,
uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance);
@@ -2467,4 +3094,14 @@ INSTANTIATE_FUNCTION_SERIALISED(void, vkCmdResolveImage, VkCommandBuffer command
INSTANTIATE_FUNCTION_SERIALISED(void, vkCmdDispatchBase, VkCommandBuffer commandBuffer,
uint32_t baseGroupX, uint32_t baseGroupY, uint32_t baseGroupZ,
uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ);
uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ);
INSTANTIATE_FUNCTION_SERIALISED(void, vkCmdDrawIndirectCountKHR, VkCommandBuffer commandBuffer,
VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer,
VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
uint32_t stride);
INSTANTIATE_FUNCTION_SERIALISED(void, vkCmdDrawIndexedIndirectCountKHR,
VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
VkBuffer countBuffer, VkDeviceSize countBufferOffset,
uint32_t maxDrawCount, uint32_t stride);
@@ -541,14 +541,60 @@ void WrappedVulkan::InsertDrawsAndRefreshIDs(vector<VulkanDrawcallTreeNode> &cmd
n.draw.dispatchDimension[2] = args->z;
}
else if(n.indirectPatch.type == VkIndirectPatchType::DrawIndirect ||
n.indirectPatch.type == VkIndirectPatchType::DrawIndexedIndirect)
n.indirectPatch.type == VkIndirectPatchType::DrawIndexedIndirect ||
n.indirectPatch.type == VkIndirectPatchType::DrawIndirectCount ||
n.indirectPatch.type == VkIndirectPatchType::DrawIndexedIndirectCount)
{
bool hasCount = (n.indirectPatch.type == VkIndirectPatchType::DrawIndirectCount ||
n.indirectPatch.type == VkIndirectPatchType::DrawIndexedIndirectCount);
bytebuf argbuf;
GetDebugManager()->GetBufferData(GetResID(n.indirectPatch.buf), 0, 0, argbuf);
byte *ptr = argbuf.begin(), *end = argbuf.end();
if(n.indirectPatch.count == 1)
uint32_t indirectCount = n.indirectPatch.count;
if(hasCount)
{
if(argbuf.size() >= 16)
{
uint32_t *count = (uint32_t *)end;
count -= 4;
indirectCount = *count;
}
else
{
RDCERR("Couldn't get indirect draw count");
}
}
if(indirectCount > n.indirectPatch.count)
{
RDCERR("Indirect count higher than maxCount, clamping");
}
else if(indirectCount < n.indirectPatch.count)
{
// need to remove any draws we reserved that didn't actually happen, and shift any
// subsequent event and draw Ids
uint32_t shiftCount = n.indirectPatch.count - indirectCount;
// i is the pushmarker, so i + 1 is the first of the sub draws.
// i + 1 + n.indirectPatch.count is the last of the draws, we don't want to erase the next
// one (the popmarker)
cmdBufNodes.erase(cmdBufNodes.begin() + i + 1 + indirectCount,
cmdBufNodes.begin() + i + 1 + n.indirectPatch.count);
for(size_t j = i + 1 + indirectCount; j < cmdBufNodes.size(); j++)
{
cmdBufNodes[j].draw.eventId -= shiftCount;
cmdBufNodes[j].draw.drawcallId -= shiftCount;
for(APIEvent &ev : cmdBufNodes[j].draw.events)
ev.eventId -= shiftCount;
}
}
// indirect count versions always have a multidraw marker regions, but static count of 1 would
// be in-lined as a single draw, so we patch in-place
if(!hasCount && indirectCount == 1)
{
bool valid = PatchIndirectDraw(n.indirectPatch.type, n.draw, ptr, end);
@@ -563,10 +609,16 @@ void WrappedVulkan::InsertDrawsAndRefreshIDs(vector<VulkanDrawcallTreeNode> &cmd
else
{
// we should have N draws immediately following this one, check that that's the case
RDCASSERT(i + n.indirectPatch.count < cmdBufNodes.size(), i, n.indirectPatch.count,
RDCASSERT(i + indirectCount < cmdBufNodes.size(), i, indirectCount, n.indirectPatch.count,
cmdBufNodes.size());
for(size_t j = 0; j < (size_t)n.indirectPatch.count && i + j + 1 < cmdBufNodes.size(); j++)
// if there was a count, patch that onto the root drawcall name
if(hasCount)
{
n.draw.name = StringFormat::Fmt("%s(<%u>)", n.draw.name.c_str(), indirectCount);
}
for(size_t j = 0; j < (size_t)indirectCount && i + j + 1 < cmdBufNodes.size(); j++)
{
VulkanDrawcallTreeNode &n2 = cmdBufNodes[i + j + 1];