Work around NV driver bug miscompiling quadSwizzleHelper() in some cases

This commit is contained in:
baldurk
2025-03-19 12:55:08 +00:00
parent 20eda2c3a2
commit 1ab434d983
4 changed files with 14 additions and 1 deletions
@@ -1863,6 +1863,7 @@ ShaderDebugTrace *D3D11Replay::DebugPixel(uint32_t eventId, uint32_t x, uint32_t
cfg.x = x;
cfg.y = y;
cfg.waveOps = false;
cfg.maxWaveSize = 4;
ID3D11DepthStencilView *depthView = NULL;
@@ -2235,6 +2235,7 @@ ShaderDebugTrace *D3D12Replay::DebugVertex(uint32_t eventId, uint32_t vertid, ui
cfg.vert = sv_vertid;
cfg.inst = instid;
cfg.uavslot = 1;
cfg.waveOps = m_pDevice->GetOpts1().WaveOps != FALSE;
cfg.maxWaveSize = m_pDevice->GetOpts1().WaveLaneCountMax;
DXDebug::CreateInputFetcher(dxbc, NULL, cfg, fetcher);
@@ -2821,6 +2822,7 @@ ShaderDebugTrace *D3D12Replay::DebugPixel(uint32_t eventId, uint32_t x, uint32_t
cfg.x = x;
cfg.y = y;
cfg.uavslot = 1;
cfg.waveOps = m_pDevice->GetOpts1().WaveOps != FALSE;
cfg.maxWaveSize = 4;
cfg.outputSampleCount = RDCMAX(1U, pipeDesc.SampleDesc.Count);
@@ -3474,6 +3476,7 @@ ShaderDebugTrace *D3D12Replay::DebugThread(uint32_t eventId,
groupid[2] * threadDim[2] + threadid[2],
};
cfg.uavslot = 1;
cfg.waveOps = m_pDevice->GetOpts1().WaveOps != FALSE;
cfg.maxWaveSize = m_pDevice->GetOpts1().WaveLaneCountMax;
DXDebug::CreateInputFetcher(dxbc, NULL, cfg, fetcher);
+9 -1
View File
@@ -378,7 +378,15 @@ void CreateLegacyInputFetcher(const DXBC::DXBCContainer *dxbc, const InputFetche
(sizeof(DXDebug::PSLaneData) + fetcher.laneDataBufferStride) * cfg.maxWaveSize;
fetcher.laneDataBufferStride = 0;
fetcher.hlsl += GetEmbeddedResource(quadswizzle_hlsl);
bool dxil = dxbc->GetDXILByteCode() != NULL;
// work around NV driver bug - it miscompiles the quad swizzle helper sometimes, so use the wave op instead
if(!dxil || !cfg.waveOps)
fetcher.hlsl += GetEmbeddedResource(quadswizzle_hlsl);
else
fetcher.hlsl +=
"#define quadSwizzleHelper(value, quadLaneIndex, readIndex) "
"QuadReadLaneAt(value, readIndex)\n";
fetcher.hlsl += R"(
struct LaneData
+1
View File
@@ -182,6 +182,7 @@ struct InputFetcherConfig
uint32_t uavslot = 0;
uint32_t uavspace = 0;
uint32_t maxWaveSize = 64;
bool waveOps = false;
uint32_t outputSampleCount = 1;
};