Implement workaround for broken occlusion queries on Intel. Closes #2591

* If a draw has no effects (color mask writes off, no depth attachment) then
  Intel will not return proper occlusion query results. Forcibly patching in a
  storage write works around the problem.
This commit is contained in:
baldurk
2022-06-16 15:40:59 +01:00
parent 59804c1d1e
commit 0fce39ff04
5 changed files with 186 additions and 73 deletions
+6
View File
@@ -1012,6 +1012,12 @@ VkDriverInfo::VkDriverInfo(const VkPhysicalDeviceProperties &physProps, bool act
RDCLOG("Disabling buffer_device_address on Intel - update to a newer driver for fix");
bdaBrokenDriver = true;
}
// Currently unfixed at the time of writing, Intel's drivers require manually inserted
// side-effects for occlusion queries to function properly if there are no other effects from a
// pixel shader.
// Only affects windows drivers, linux drivers are unaffected.
intelBrokenOcclusionQueries = true;
}
#endif
+4
View File
@@ -280,6 +280,9 @@ public:
// hit the case where it's necessary (doing 'whole pass' partial replay of a subsection of a
// command buffer where we need to apply dynamic state from earlier in the command buffer).
bool QualcommLineWidthDynamicStateCrash() const { return qualcommLineWidthCrash; }
// on Intel, occlusion queries are broken unless the shader has some effects. When we don't want
// it to have visible effects during pixel history we have to insert some manual side-effects
bool IntelBrokenOcclusionQueries() const { return intelBrokenOcclusionQueries; }
private:
GPUVendor m_Vendor;
@@ -293,6 +296,7 @@ private:
bool qualcommLeakingUBOOffsets = false;
bool qualcommDrefNon2DCompileCrash = false;
bool qualcommLineWidthCrash = false;
bool intelBrokenOcclusionQueries = false;
};
enum
-1
View File
@@ -90,7 +90,6 @@ public:
const MeshFormat &primary,
const MeshFormat &secondary);
void PatchOutputLocation(VkShaderModule &mod, BuiltinShader shaderType, uint32_t framebufferIndex);
void PatchFixedColShader(VkShaderModule &mod, float col[4]);
void PatchLineStripIndexBuffer(const ActionDescription *action, GPUBuffer &indexBuffer,
uint32_t &indexCount);
-49
View File
@@ -293,55 +293,6 @@ struct VulkanQuadOverdrawCallback : public VulkanActionCallback
VulkanRenderState m_PrevState;
};
void VulkanDebugManager::PatchOutputLocation(VkShaderModule &mod, BuiltinShader shaderType,
uint32_t framebufferIndex)
{
union
{
uint32_t *spirv;
float *data;
} alias;
rdcarray<uint32_t> spv = *m_pDriver->GetShaderCache()->GetBuiltinBlob(shaderType);
alias.spirv = &spv[0];
size_t spirvLength = spv.size();
bool patched = false;
size_t it = 5;
while(it < spirvLength)
{
uint16_t WordCount = alias.spirv[it] >> rdcspv::WordCountShift;
rdcspv::Op opcode = rdcspv::Op(alias.spirv[it] & rdcspv::OpCodeMask);
if(opcode == rdcspv::Op::Decorate &&
(rdcspv::Decoration(alias.spirv[it + 2]) == rdcspv::Decoration::Location))
{
alias.spirv[it + 3] = framebufferIndex;
patched = true;
break;
}
it += WordCount;
}
if(!patched)
RDCERR("Didn't patch the output location");
VkShaderModuleCreateInfo modinfo = {
VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
NULL,
0,
spv.size() * sizeof(uint32_t),
alias.spirv,
};
VkResult vkr = m_pDriver->vkCreateShaderModule(m_Device, &modinfo, NULL, &mod);
CheckVkResult(vkr);
}
void VulkanDebugManager::PatchFixedColShader(VkShaderModule &mod, float col[4])
{
union
+176 -23
View File
@@ -239,9 +239,31 @@ struct PipelineReplacements
// PixelHistoryShaderCache manages temporary shaders created for pixel history.
struct PixelHistoryShaderCache
{
PixelHistoryShaderCache(WrappedVulkan *vk) : m_pDriver(vk) {}
PixelHistoryShaderCache(WrappedVulkan *vk) : m_pDriver(vk)
{
if(m_pDriver->GetDriverInfo().IntelBrokenOcclusionQueries())
{
if(m_pDriver->GetExtensions(NULL).ext_KHR_buffer_device_address)
{
dummybuf.Create(vk, vk->GetDev(), 1024, 1, GPUBuffer::eGPUBufferGPULocal |
GPUBuffer::eGPUBufferSSBO |
GPUBuffer::eGPUBufferAddressable);
}
else
{
m_pDriver->AddDebugMessage(
MessageCategory::Miscellaneous, MessageSeverity::High, MessageSource::RuntimeWarning,
"Intel drivers currently require a workaround to return proper pixel history results, "
"but KHR_buffer_device_address is not supported so the workaround cannot be "
"implemented. Results will be inaccurate.");
}
}
}
~PixelHistoryShaderCache()
{
if(dummybuf.device != VK_NULL_HANDLE)
dummybuf.Destroy();
for(auto it = m_ShaderReplacements.begin(); it != m_ShaderReplacements.end(); ++it)
{
if(it->second != VK_NULL_HANDLE)
@@ -261,8 +283,7 @@ struct PixelHistoryShaderCache
if(it != m_FixedColFS.end())
return it->second;
VkShaderModule sh;
m_pDriver->GetDebugManager()->PatchOutputLocation(sh, BuiltinShader::FixedColFS,
framebufferIndex);
PatchOutputLocation(sh, BuiltinShader::FixedColFS, framebufferIndex);
m_FixedColFS.insert(std::make_pair(framebufferIndex, sh));
return sh;
}
@@ -275,8 +296,7 @@ struct PixelHistoryShaderCache
if(it != m_PrimIDFS.end())
return it->second;
VkShaderModule sh;
m_pDriver->GetDebugManager()->PatchOutputLocation(sh, BuiltinShader::PixelHistoryPrimIDFS,
framebufferIndex);
PatchOutputLocation(sh, BuiltinShader::PixelHistoryPrimIDFS, framebufferIndex);
m_PrimIDFS.insert(std::make_pair(framebufferIndex, sh));
return sh;
}
@@ -305,35 +325,166 @@ private:
const VulkanCreationInfo::ShaderModule &moduleInfo =
m_pDriver->GetDebugManager()->GetShaderInfo(shaderId);
rdcarray<uint32_t> modSpirv = moduleInfo.spirv.GetSPIRV();
rdcspv::Editor editor(modSpirv);
editor.Prepare();
for(const rdcspv::EntryPoint &entry : editor.GetEntries())
bool modified = false;
bool found = false;
{
if(entry.name == entryName && MakeShaderStage(entry.executionModel) == stage)
rdcspv::Editor editor(modSpirv);
editor.Prepare();
for(const rdcspv::EntryPoint &entry : editor.GetEntries())
{
// In some cases a shader might just be binding a RW resource but not writing to it.
// If there are no writes (shader was not modified), no need to replace the shader,
// just insert VK_NULL_HANDLE to indicate that this shader has been processed.
VkShaderModule module = VK_NULL_HANDLE;
bool modified = StripShaderSideEffects(editor, entry.id);
if(modified)
if(entry.name == entryName && MakeShaderStage(entry.executionModel) == stage)
{
VkShaderModuleCreateInfo moduleCreateInfo = {};
moduleCreateInfo.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
moduleCreateInfo.pCode = modSpirv.data();
moduleCreateInfo.codeSize = modSpirv.byteSize();
VkResult vkr =
m_pDriver->vkCreateShaderModule(m_pDriver->GetDev(), &moduleCreateInfo, NULL, &module);
m_pDriver->CheckVkResult(vkr);
// In some cases a shader might just be binding a RW resource but not writing to it.
// If there are no writes (shader was not modified), no need to replace the shader,
// just insert VK_NULL_HANDLE to indicate that this shader has been processed.
found = true;
modified = StripShaderSideEffects(editor, entry.id);
break;
}
return module;
}
}
if(found)
{
VkShaderModule module = VK_NULL_HANDLE;
if(modified)
{
VkShaderModuleCreateInfo moduleCreateInfo = {};
moduleCreateInfo.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
moduleCreateInfo.pCode = modSpirv.data();
moduleCreateInfo.codeSize = modSpirv.byteSize();
VkResult vkr =
m_pDriver->vkCreateShaderModule(m_pDriver->GetDev(), &moduleCreateInfo, NULL, &module);
m_pDriver->CheckVkResult(vkr);
}
return module;
}
RDCERR("Entry point %s not found", entryName.c_str());
return VK_NULL_HANDLE;
}
void PatchOutputLocation(VkShaderModule &mod, BuiltinShader shaderType, uint32_t framebufferIndex)
{
rdcarray<uint32_t> spv = *m_pDriver->GetShaderCache()->GetBuiltinBlob(shaderType);
bool patched = false;
{
rdcspv::Editor editor(spv);
editor.Prepare();
for(rdcspv::Iter it = editor.Begin(rdcspv::Section::Annotations),
end = editor.End(rdcspv::Section::Annotations);
it < end; ++it)
{
if(it.opcode() == rdcspv::Op::Decorate)
{
rdcspv::OpDecorate dec(it);
if(dec.decoration == rdcspv::Decoration::Location)
{
dec.decoration.location = framebufferIndex;
it = dec;
patched = true;
break;
}
}
}
// implement workaround for Intel drivers to force shader to have unimportant side-effects
if(dummybuf.device != VK_NULL_HANDLE)
{
VkBufferDeviceAddressInfo getAddressInfo = {VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO};
getAddressInfo.buffer = dummybuf.buf;
VkDeviceAddress bufferAddress =
m_pDriver->vkGetBufferDeviceAddress(m_pDriver->GetDev(), &getAddressInfo);
rdcspv::Id uint32Type = editor.DeclareType(rdcspv::scalar<uint32_t>());
rdcspv::Id bufptrtype = editor.DeclareType(
rdcspv::Pointer(uint32Type, rdcspv::StorageClass::PhysicalStorageBuffer));
rdcspv::Id uint1 = editor.AddConstantImmediate<uint32_t>(uint32_t(1));
editor.AddExtension("SPV_KHR_physical_storage_buffer");
{
// change the memory model to physical storage buffer 64
rdcspv::Iter it = editor.Begin(rdcspv::Section::MemoryModel);
rdcspv::OpMemoryModel model(it);
model.addressingModel = rdcspv::AddressingModel::PhysicalStorageBuffer64;
it = model;
}
editor.AddCapability(rdcspv::Capability::PhysicalStorageBufferAddresses);
rdcspv::Id addressConstantLSB =
editor.AddConstantImmediate<uint32_t>(bufferAddress & 0xFFFFFFFF);
rdcspv::Id addressConstantMSB =
editor.AddConstantImmediate<uint32_t>((bufferAddress >> 32) & 0xFFFFFFFF);
rdcspv::Id uintPair = editor.DeclareType(rdcspv::Vector(rdcspv::scalar<uint32_t>(), 2));
rdcspv::Id addressConstant = editor.AddConstant(rdcspv::OpSpecConstantComposite(
uintPair, editor.MakeId(), {addressConstantLSB, addressConstantMSB}));
rdcspv::Id scope = editor.AddConstantImmediate<uint32_t>((uint32_t)rdcspv::Scope::Device);
rdcspv::Id semantics =
editor.AddConstantImmediate<uint32_t>((uint32_t)rdcspv::MemorySemantics::AcquireRelease);
// patch every function to include a BDA write just to be safe
for(rdcspv::Iter it = editor.Begin(rdcspv::Section::Functions),
end = editor.End(rdcspv::Section::Functions);
it < end; ++it)
{
if(it.opcode() == rdcspv::Op::Function)
{
// continue to the first label so we can insert a write at the start of the function
for(; it; ++it)
{
if(it.opcode() == rdcspv::Op::Label)
{
++it;
break;
}
}
// skip past any local variables
while(it.opcode() == rdcspv::Op::Variable)
++it;
rdcspv::Id structPtr = editor.AddOperation(
it, rdcspv::OpBitcast(bufptrtype, editor.MakeId(), addressConstant));
it++;
editor.AddOperation(it, rdcspv::OpAtomicUMax(uint32Type, editor.MakeId(), structPtr,
scope, semantics, uint1));
it++;
}
}
}
}
if(!patched)
RDCERR("Didn't patch the output location");
VkShaderModuleCreateInfo modinfo = {
VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
NULL,
0,
spv.size() * sizeof(uint32_t),
spv.data(),
};
VkResult vkr = m_pDriver->vkCreateShaderModule(m_pDriver->GetDev(), &modinfo, NULL, &mod);
m_pDriver->CheckVkResult(vkr);
}
// Removes instructions from the shader that would produce side effects (writing
// to storage buffers, or images). Returns true if the shader was modified, and
// false if there were no instructions to remove.
@@ -444,6 +595,8 @@ private:
// ShaderKey consists of original shader module ID and entry point name.
typedef rdcpair<ResourceId, rdcstr> ShaderKey;
std::map<ShaderKey, VkShaderModule> m_ShaderReplacements;
GPUBuffer dummybuf;
};
// VulkanPixelHistoryCallback is a generic VulkanActionCallback that can be used for