mirror of
https://github.com/baldurk/renderdoc.git
synced 2026-05-05 01:20:42 +00:00
DXIL debugger support for GPU batching of Math and SampleGather ops
This commit is contained in:
@@ -603,6 +603,12 @@ D3D12DebugManager::~D3D12DebugManager()
|
||||
|
||||
bool D3D12DebugManager::CreateShaderDebugResources()
|
||||
{
|
||||
// MathOp is 2, SampleGatherOp is 6
|
||||
const uint64_t resultMaxElementSize = sizeof(Vec4f) * (2 + 6);
|
||||
const uint32_t maxQueuedResults = D3D12DebugManager::MAX_SHADER_DEBUG_QUEUED_OPS;
|
||||
const uint64_t shaderDebugReadbackSize = resultMaxElementSize * maxQueuedResults;
|
||||
RDCCOMPILE_ASSERT(shaderDebugReadbackSize < m_ReadbackSize, "Readback buffer is not big enough");
|
||||
|
||||
rdcstr hlsl = GetEmbeddedResource(shaderdebug_hlsl);
|
||||
|
||||
D3D12RootSignature rootSig;
|
||||
|
||||
@@ -152,6 +152,11 @@ public:
|
||||
D3D12DebugManager(WrappedID3D12Device *wrapper);
|
||||
~D3D12DebugManager();
|
||||
|
||||
enum
|
||||
{
|
||||
MAX_SHADER_DEBUG_QUEUED_OPS = 128
|
||||
};
|
||||
|
||||
void GetBufferData(ID3D12Resource *buff, uint64_t offset, uint64_t length, bytebuf &retData);
|
||||
|
||||
ID3D12Resource *MakeCBuffer(UINT64 size);
|
||||
|
||||
@@ -570,7 +570,14 @@ D3D12APIWrapper::D3D12APIWrapper(WrappedID3D12Device *device, const DXIL::Progra
|
||||
m_EventId(eventId),
|
||||
m_Program(dxilProgram),
|
||||
m_Reflection(refl),
|
||||
m_DeviceThreadID(Threading::GetCurrentID())
|
||||
m_DeviceThreadID(Threading::GetCurrentID()),
|
||||
m_QueuedOpCmdList(NULL),
|
||||
m_QueuedMathOpIndex(0),
|
||||
m_QueuedSampleGatherOpIndex(0),
|
||||
m_MathOpResultOffset(0),
|
||||
m_MaxQueuedOps(D3D12DebugManager::MAX_SHADER_DEBUG_QUEUED_OPS),
|
||||
m_SampleGatherOpResultsStart(D3D12DebugManager::MAX_SHADER_DEBUG_QUEUED_OPS *
|
||||
m_MathOpResultByteSize)
|
||||
{
|
||||
// Create the storage layout for the constant buffers
|
||||
// The constant buffer data and details are filled in outside of this method
|
||||
@@ -1574,11 +1581,26 @@ UAVInfo D3D12APIWrapper::GetUAV(const BindingSlot &slot)
|
||||
}
|
||||
|
||||
// Must be called from the replay manager thread (the debugger thread)
|
||||
bool D3D12APIWrapper::CalculateMathIntrinsic(DXIL::DXOp dxOp, const ShaderVariable &input,
|
||||
ShaderVariable &output)
|
||||
bool D3D12APIWrapper::QueueMathIntrinsic(DXIL::DXOp dxOp, const ShaderVariable &input)
|
||||
{
|
||||
CHECK_DEVICE_THREAD();
|
||||
D3D12MarkerRegion region(m_Device->GetQueue()->GetReal(), "CalculateMathIntrinsic");
|
||||
ID3D12GraphicsCommandListX *cmdList = m_QueuedOpCmdList;
|
||||
if(!cmdList)
|
||||
{
|
||||
if(StartQueuedOps())
|
||||
cmdList = m_QueuedOpCmdList;
|
||||
}
|
||||
if(!cmdList)
|
||||
return false;
|
||||
|
||||
if(!QueuedOpsHasSpace())
|
||||
{
|
||||
m_Device->AddDebugMessage(MessageCategory::Execution, MessageSeverity::High,
|
||||
MessageSource::RuntimeWarning, "Too many GPU queued operations");
|
||||
return false;
|
||||
}
|
||||
|
||||
D3D12MarkerRegion region(m_Device->GetQueue()->GetReal(), "QueueMathIntrinsic");
|
||||
|
||||
int mathOp;
|
||||
switch(dxOp)
|
||||
@@ -1604,18 +1626,36 @@ bool D3D12APIWrapper::CalculateMathIntrinsic(DXIL::DXOp dxOp, const ShaderVariab
|
||||
return false;
|
||||
}
|
||||
|
||||
ShaderVariable ignored;
|
||||
return D3D12ShaderDebug::CalculateMathIntrinsic(true, m_Device, mathOp, input, output, ignored);
|
||||
return D3D12ShaderDebug::QueueMathIntrinsic(false, m_Device, cmdList, mathOp, input,
|
||||
m_QueuedMathOpIndex++);
|
||||
}
|
||||
|
||||
// Must be called from the replay manager thread (the debugger thread)
|
||||
bool D3D12APIWrapper::CalculateSampleGather(
|
||||
DXIL::DXOp dxOp, SampleGatherResourceData resourceData, SampleGatherSamplerData samplerData,
|
||||
const ShaderVariable &uv, const ShaderVariable &ddxCalc, const ShaderVariable &ddyCalc,
|
||||
const int8_t texelOffsets[3], int multisampleIndex, float lodValue, float compareValue,
|
||||
GatherChannel gatherChannel, uint32_t instructionIdx, ShaderVariable &output)
|
||||
bool D3D12APIWrapper::QueueSampleGather(DXIL::DXOp dxOp, SampleGatherResourceData resourceData,
|
||||
SampleGatherSamplerData samplerData,
|
||||
const ShaderVariable &uv, const ShaderVariable &ddxCalc,
|
||||
const ShaderVariable &ddyCalc, const int8_t texelOffsets[3],
|
||||
int multisampleIndex, float lodValue, float compareValue,
|
||||
GatherChannel gatherChannel, uint32_t instructionIdx,
|
||||
int &sampleRetType)
|
||||
{
|
||||
CHECK_DEVICE_THREAD();
|
||||
ID3D12GraphicsCommandListX *cmdList = m_QueuedOpCmdList;
|
||||
if(!cmdList)
|
||||
{
|
||||
if(StartQueuedOps())
|
||||
cmdList = m_QueuedOpCmdList;
|
||||
}
|
||||
if(!cmdList)
|
||||
return false;
|
||||
|
||||
if(!QueuedOpsHasSpace())
|
||||
{
|
||||
m_Device->AddDebugMessage(MessageCategory::Execution, MessageSeverity::High,
|
||||
MessageSource::RuntimeWarning, "Too many GPU queued operations");
|
||||
return false;
|
||||
}
|
||||
|
||||
int sampleOp;
|
||||
switch(dxOp)
|
||||
{
|
||||
@@ -1642,10 +1682,59 @@ bool D3D12APIWrapper::CalculateSampleGather(
|
||||
|
||||
const char *opString = ToStr(dxOp).c_str();
|
||||
uint8_t swizzle[4] = {0, 1, 2, 3};
|
||||
return D3D12ShaderDebug::CalculateSampleGather(
|
||||
true, m_Device, sampleOp, resourceData, samplerData, uv, ddxCalc, ddyCalc, texelOffsets,
|
||||
multisampleIndex, lodValue, compareValue, swizzle, gatherChannel, m_ShaderType,
|
||||
instructionIdx, opString, output);
|
||||
return D3D12ShaderDebug::QueueSampleGather(
|
||||
true, m_Device, m_QueuedOpCmdList, sampleOp, resourceData, samplerData, uv, ddxCalc, ddyCalc,
|
||||
texelOffsets, multisampleIndex, lodValue, compareValue, swizzle, gatherChannel, m_ShaderType,
|
||||
instructionIdx, opString, m_QueuedSampleGatherOpIndex++, sampleRetType);
|
||||
}
|
||||
|
||||
// Must be called from the replay manager thread (the debugger thread)
|
||||
bool D3D12APIWrapper::StartQueuedOps()
|
||||
{
|
||||
CHECK_DEVICE_THREAD();
|
||||
|
||||
RDCASSERTEQUAL(m_QueuedMathOpIndex, 0);
|
||||
RDCASSERTEQUAL(m_QueuedSampleGatherOpIndex, 0);
|
||||
RDCASSERTEQUAL(m_QueuedOpCmdList, NULL);
|
||||
RDCASSERTEQUAL(m_MathOpResultOffset, 0);
|
||||
|
||||
if(m_QueuedOpCmdList)
|
||||
return false;
|
||||
|
||||
m_QueuedOpCmdList = m_Device->GetDebugManager()->ResetDebugList();
|
||||
if(!m_QueuedOpCmdList)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Must be called from the replay manager thread (the debugger thread)
|
||||
bool D3D12APIWrapper::GetQueuedResults(rdcarray<ShaderVariable *> &mathOpResults,
|
||||
rdcarray<ShaderVariable *> &sampleGatherResults,
|
||||
const rdcarray<int> &sampleRetTypes)
|
||||
{
|
||||
const uint32_t countMathResultsPerGpuOp = 1;
|
||||
rdcarray<const uint8_t *> swizzles;
|
||||
uint8_t swizzle[4] = {0, 1, 2, 3};
|
||||
for(size_t i = 0; i < sampleGatherResults.size(); ++i)
|
||||
swizzles.push_back(swizzle);
|
||||
|
||||
bool ret = D3D12ShaderDebug::GetQueuedResults(m_Device, m_QueuedOpCmdList, mathOpResults,
|
||||
countMathResultsPerGpuOp, sampleGatherResults,
|
||||
sampleRetTypes, swizzles);
|
||||
|
||||
m_QueuedOpCmdList = NULL;
|
||||
m_QueuedMathOpIndex = 0;
|
||||
m_QueuedSampleGatherOpIndex = 0;
|
||||
m_MathOpResultOffset = 0;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Must be called from the replay manager thread (the debugger thread)
|
||||
bool D3D12APIWrapper::QueuedOpsHasSpace() const
|
||||
{
|
||||
return (m_QueuedMathOpIndex + m_QueuedSampleGatherOpIndex) < m_MaxQueuedOps;
|
||||
}
|
||||
|
||||
// Called from any thread
|
||||
|
||||
@@ -54,14 +54,17 @@ public:
|
||||
UAVInfo GetUAV(const BindingSlot &slot) override;
|
||||
SRVInfo GetSRV(const BindingSlot &slot) override;
|
||||
|
||||
bool CalculateMathIntrinsic(DXIL::DXOp dxOp, const ShaderVariable &input,
|
||||
ShaderVariable &output) override;
|
||||
bool CalculateSampleGather(DXIL::DXOp dxOp, SampleGatherResourceData resourceData,
|
||||
SampleGatherSamplerData samplerData, const ShaderVariable &uv,
|
||||
const ShaderVariable &ddxCalc, const ShaderVariable &ddyCalc,
|
||||
const int8_t texelOffsets[3], int multisampleIndex, float lodValue,
|
||||
float compareValue, GatherChannel gatherChannel,
|
||||
uint32_t instructionIdx, ShaderVariable &output) override;
|
||||
bool QueueMathIntrinsic(DXIL::DXOp dxOp, const ShaderVariable &input) override;
|
||||
bool QueueSampleGather(DXIL::DXOp dxOp, SampleGatherResourceData resourceData,
|
||||
SampleGatherSamplerData samplerData, const ShaderVariable &uv,
|
||||
const ShaderVariable &ddxCalc, const ShaderVariable &ddyCalc,
|
||||
const int8_t texelOffsets[3], int multisampleIndex, float lodValue,
|
||||
float compareValue, GatherChannel gatherChannel, uint32_t instructionIdx,
|
||||
int &sampleRetType) override;
|
||||
bool GetQueuedResults(rdcarray<ShaderVariable *> &mathOpResults,
|
||||
rdcarray<ShaderVariable *> &sampleGatherResults,
|
||||
const rdcarray<int> &sampleRetTypes) override;
|
||||
bool QueuedOpsHasSpace() const override;
|
||||
|
||||
ShaderVariable GetResourceInfo(DXIL::ResourceClass resClass, const DXDebug::BindingSlot &slot,
|
||||
uint32_t mipLevel) override;
|
||||
@@ -144,6 +147,7 @@ private:
|
||||
const char *opString);
|
||||
ResourceReferenceInfo FetchResourceReferenceInfo(const DXDebug::BindingSlot &slot);
|
||||
ShaderDirectAccess FetchShaderDirectAccess(DescriptorType type, const DXDebug::BindingSlot &slot);
|
||||
bool StartQueuedOps();
|
||||
|
||||
BuiltinInputs m_Builtins;
|
||||
rdcarray<DXILDebug::ThreadProperties> m_WorkgroupProperties;
|
||||
@@ -195,6 +199,16 @@ private:
|
||||
|
||||
const ShaderReflection &m_Reflection;
|
||||
WrappedID3D12Device *m_Device = NULL;
|
||||
|
||||
ID3D12GraphicsCommandListX *m_QueuedOpCmdList = NULL;
|
||||
uint32_t m_QueuedMathOpIndex = 0;
|
||||
uint32_t m_QueuedSampleGatherOpIndex = 0;
|
||||
uint64_t m_MathOpResultOffset = 0;
|
||||
const uint32_t m_MaxQueuedOps = 0;
|
||||
const uint64_t m_MathOpResultByteSize = sizeof(Vec4f) * 2;
|
||||
const uint64_t m_SampleGatherOpResultByteSize = sizeof(Vec4f);
|
||||
const uint64_t m_SampleGatherOpResultsStart;
|
||||
|
||||
const DXIL::Program *m_Program = NULL;
|
||||
const DXIL::EntryPointInterface *m_EntryPointInterface = NULL;
|
||||
const DXBC::ShaderType m_ShaderType;
|
||||
|
||||
@@ -42,6 +42,9 @@
|
||||
|
||||
using namespace DXBCBytecode;
|
||||
|
||||
const uint64_t s_MathOpResultByteSize = sizeof(Vec4f) * 2;
|
||||
const uint64_t s_SampleGatherOpResultByteSize = sizeof(Vec4f) * 6;
|
||||
|
||||
static bool IsShaderParameterVisible(DXBC::ShaderType shaderType,
|
||||
D3D12_SHADER_VISIBILITY shaderVisibility)
|
||||
{
|
||||
@@ -82,11 +85,11 @@ static D3D12_DESCRIPTOR_RANGE_TYPE ConvertOperandTypeToDescriptorType(DXBCByteco
|
||||
}
|
||||
|
||||
// Helpers used by DXBC and DXIL debuggers to interact with GPU and resources
|
||||
bool D3D12ShaderDebug::CalculateMathIntrinsic(bool dxil, WrappedID3D12Device *device, int mathOp,
|
||||
const ShaderVariable &input, ShaderVariable &output1,
|
||||
ShaderVariable &output2)
|
||||
bool D3D12ShaderDebug::QueueMathIntrinsic(bool dxil, WrappedID3D12Device *device,
|
||||
ID3D12GraphicsCommandListX *cmdList, int mathOp,
|
||||
const ShaderVariable &input, const uint32_t queueIndex)
|
||||
{
|
||||
D3D12MarkerRegion region(device->GetQueue()->GetReal(), "CalculateMathIntrinsic");
|
||||
D3D12MarkerRegion region(device->GetQueue()->GetReal(), "QueueMathIntrinsic");
|
||||
|
||||
ID3D12Resource *pResultBuffer = device->GetDebugManager()->GetShaderDebugResultBuffer();
|
||||
ID3D12Resource *pReadbackBuffer = device->GetDebugManager()->GetReadbackBuffer();
|
||||
@@ -96,7 +99,6 @@ bool D3D12ShaderDebug::CalculateMathIntrinsic(bool dxil, WrappedID3D12Device *de
|
||||
cbufferData.mathOp = mathOp;
|
||||
|
||||
// Set root signature & sig params on command list, then execute the shader
|
||||
ID3D12GraphicsCommandListX *cmdList = device->GetDebugManager()->ResetDebugList();
|
||||
device->GetDebugManager()->SetDescriptorHeaps(cmdList, true, false);
|
||||
cmdList->SetPipelineState(dxil ? device->GetDebugManager()->GetDXILMathIntrinsicsPso()
|
||||
: device->GetDebugManager()->GetMathIntrinsicsPso());
|
||||
@@ -113,52 +115,25 @@ bool D3D12ShaderDebug::CalculateMathIntrinsic(bool dxil, WrappedID3D12Device *de
|
||||
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;
|
||||
cmdList->ResourceBarrier(1, &barrier);
|
||||
|
||||
cmdList->CopyBufferRegion(pReadbackBuffer, 0, pResultBuffer, 0, sizeof(Vec4f) * 6);
|
||||
uint64_t destOffset = queueIndex * s_MathOpResultByteSize;
|
||||
cmdList->CopyBufferRegion(pReadbackBuffer, destOffset, pResultBuffer, 0, s_MathOpResultByteSize);
|
||||
|
||||
HRESULT hr = cmdList->Close();
|
||||
if(FAILED(hr))
|
||||
{
|
||||
RDCERR("Failed to close command list HRESULT: %s", ToStr(hr).c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
{
|
||||
ID3D12CommandList *l = cmdList;
|
||||
device->GetQueue()->ExecuteCommandLists(1, &l);
|
||||
device->InternalQueueWaitForIdle();
|
||||
device->GetDebugManager()->ResetDebugAlloc();
|
||||
}
|
||||
|
||||
D3D12_RANGE range = {0, sizeof(Vec4f) * 6};
|
||||
|
||||
byte *results = NULL;
|
||||
hr = pReadbackBuffer->Map(0, &range, (void **)&results);
|
||||
|
||||
if(FAILED(hr))
|
||||
{
|
||||
pReadbackBuffer->Unmap(0, &range);
|
||||
RDCERR("Failed to map readback buffer HRESULT: %s", ToStr(hr).c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
memcpy(output1.value.u32v.data(), results, sizeof(Vec4f));
|
||||
memcpy(output2.value.u32v.data(), results + sizeof(Vec4f), sizeof(Vec4f));
|
||||
|
||||
range.End = 0;
|
||||
pReadbackBuffer->Unmap(0, &range);
|
||||
barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE;
|
||||
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
||||
cmdList->ResourceBarrier(1, &barrier);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool D3D12ShaderDebug::CalculateSampleGather(
|
||||
bool dxil, WrappedID3D12Device *device, int sampleOp, SampleGatherResourceData resourceData,
|
||||
SampleGatherSamplerData samplerData, const ShaderVariable &uvIn,
|
||||
const ShaderVariable &ddxCalcIn, const ShaderVariable &ddyCalcIn, const int8_t texelOffsets[3],
|
||||
int multisampleIndex, float lodValue, float compareValue, const uint8_t swizzle[4],
|
||||
GatherChannel gatherChannel, const DXBC::ShaderType shaderType, uint32_t instruction,
|
||||
const char *opString, ShaderVariable &output)
|
||||
bool D3D12ShaderDebug::QueueSampleGather(
|
||||
bool dxil, WrappedID3D12Device *device, ID3D12GraphicsCommandListX *cmdList, int sampleOp,
|
||||
SampleGatherResourceData resourceData, SampleGatherSamplerData samplerData,
|
||||
const ShaderVariable &uvIn, const ShaderVariable &ddxCalcIn, const ShaderVariable &ddyCalcIn,
|
||||
const int8_t texelOffsets[3], int multisampleIndex, float lodValue, float compareValue,
|
||||
const uint8_t swizzle[4], GatherChannel gatherChannel, const DXBC::ShaderType shaderType,
|
||||
uint32_t instruction, const char *opString, const uint32_t queueIndex, int &sampleRetType)
|
||||
{
|
||||
D3D12MarkerRegion region(device->GetQueue()->GetReal(), "CalculateSampleGather");
|
||||
D3D12MarkerRegion region(device->GetQueue()->GetReal(), "QueueSampleGather");
|
||||
|
||||
ShaderVariable uv(uvIn);
|
||||
ShaderVariable ddxCalc(ddxCalcIn);
|
||||
@@ -269,6 +244,7 @@ bool D3D12ShaderDebug::CalculateSampleGather(
|
||||
{
|
||||
RDCERR("Unsupported return type %d in sample operation", resourceData.retType);
|
||||
}
|
||||
sampleRetType = cbufferData.debugSampleRetType;
|
||||
|
||||
cbufferData.debugSampleGatherChannel = (int)gatherChannel;
|
||||
cbufferData.debugSampleSampleIndex = multisampleIndex;
|
||||
@@ -276,6 +252,7 @@ bool D3D12ShaderDebug::CalculateSampleGather(
|
||||
cbufferData.debugSampleLod = lodValue;
|
||||
cbufferData.debugSampleCompare = compareValue;
|
||||
|
||||
// Store a copy of the event's render state to restore later
|
||||
D3D12RenderState &rs = device->GetQueue()->GetCommandData()->m_RenderState;
|
||||
D3D12RenderState prevState = rs;
|
||||
|
||||
@@ -283,7 +260,6 @@ bool D3D12ShaderDebug::CalculateSampleGather(
|
||||
ID3D12PipelineState *pso = dxil ? device->GetDebugManager()->GetDXILTexSamplePso(texelOffsets)
|
||||
: device->GetDebugManager()->GetTexSamplePso(texelOffsets);
|
||||
|
||||
ID3D12GraphicsCommandListX *cmdList = device->GetDebugManager()->ResetDebugList();
|
||||
rs.pipe = GetResID(pso);
|
||||
rs.rts.clear();
|
||||
// Set viewport/scissor unconditionally - we need to set this all the time for sampling for a
|
||||
@@ -359,7 +335,32 @@ bool D3D12ShaderDebug::CalculateSampleGather(
|
||||
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;
|
||||
cmdList->ResourceBarrier(1, &barrier);
|
||||
|
||||
cmdList->CopyBufferRegion(pReadbackBuffer, 0, pResultBuffer, 0, sizeof(Vec4f) * 6);
|
||||
const uint64_t sampleGatherOpResultsStart(D3D12DebugManager::MAX_SHADER_DEBUG_QUEUED_OPS *
|
||||
s_MathOpResultByteSize);
|
||||
|
||||
uint64_t destOffset = sampleGatherOpResultsStart + queueIndex * s_SampleGatherOpResultByteSize;
|
||||
cmdList->CopyBufferRegion(pReadbackBuffer, destOffset, pResultBuffer, 0,
|
||||
s_SampleGatherOpResultByteSize);
|
||||
|
||||
barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE;
|
||||
barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
||||
cmdList->ResourceBarrier(1, &barrier);
|
||||
|
||||
// Restore D3D12 state to what the event uses
|
||||
rs = prevState;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool D3D12ShaderDebug::GetQueuedResults(WrappedID3D12Device *device,
|
||||
ID3D12GraphicsCommandListX *cmdList,
|
||||
rdcarray<ShaderVariable *> &mathOpResults,
|
||||
uint32_t countMathResultsPerGpuOp,
|
||||
rdcarray<ShaderVariable *> &sampleGatherResults,
|
||||
const rdcarray<int> &sampleRetTypes,
|
||||
const rdcarray<const uint8_t *> &swizzles)
|
||||
{
|
||||
RDCASSERTEQUAL(sampleGatherResults.size(), sampleRetTypes.size());
|
||||
RDCASSERTEQUAL(sampleGatherResults.size(), swizzles.size());
|
||||
|
||||
HRESULT hr = cmdList->Close();
|
||||
if(FAILED(hr))
|
||||
@@ -375,46 +376,76 @@ bool D3D12ShaderDebug::CalculateSampleGather(
|
||||
device->GetDebugManager()->ResetDebugAlloc();
|
||||
}
|
||||
|
||||
rs = prevState;
|
||||
ID3D12Resource *pReadbackBuffer = device->GetDebugManager()->GetReadbackBuffer();
|
||||
|
||||
D3D12_RANGE range = {0, sizeof(Vec4f) * 6};
|
||||
|
||||
void *results = NULL;
|
||||
hr = pReadbackBuffer->Map(0, &range, &results);
|
||||
byte *gpuResults = NULL;
|
||||
hr = pReadbackBuffer->Map(0, NULL, (void **)&gpuResults);
|
||||
|
||||
if(FAILED(hr))
|
||||
{
|
||||
pReadbackBuffer->Unmap(0, &range);
|
||||
pReadbackBuffer->Unmap(0, NULL);
|
||||
RDCERR("Failed to map readback buffer HRESULT: %s", ToStr(hr).c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
ShaderVariable lookupResult("tex", 0.0f, 0.0f, 0.0f, 0.0f);
|
||||
uintptr_t bufferEnd = (uintptr_t)(gpuResults + pReadbackBuffer->GetDesc().Width);
|
||||
|
||||
float *retFloats = (float *)results;
|
||||
uint32_t *retUInts = (uint32_t *)(retFloats + 8);
|
||||
int32_t *retSInts = (int32_t *)(retUInts + 8);
|
||||
byte *gpuMathOpResults = gpuResults;
|
||||
for(uint32_t i = 0; i < mathOpResults.size(); i += countMathResultsPerGpuOp)
|
||||
{
|
||||
const size_t countBytes = sizeof(Vec4f);
|
||||
const size_t countBytesPerGpuOp = countBytes * countMathResultsPerGpuOp;
|
||||
RDCASSERT((uintptr_t)gpuMathOpResults + countBytesPerGpuOp <= bufferEnd,
|
||||
(uintptr_t)gpuMathOpResults, countBytesPerGpuOp, bufferEnd);
|
||||
RDCASSERT(countBytesPerGpuOp <= s_MathOpResultByteSize, countBytesPerGpuOp,
|
||||
s_MathOpResultByteSize);
|
||||
|
||||
if(cbufferData.debugSampleRetType == DEBUG_SAMPLE_UINT)
|
||||
{
|
||||
for(int i = 0; i < 4; i++)
|
||||
lookupResult.value.u32v[i] = retUInts[swizzle[i]];
|
||||
}
|
||||
else if(cbufferData.debugSampleRetType == DEBUG_SAMPLE_INT)
|
||||
{
|
||||
for(int i = 0; i < 4; i++)
|
||||
lookupResult.value.s32v[i] = retSInts[swizzle[i]];
|
||||
}
|
||||
else
|
||||
{
|
||||
for(int i = 0; i < 4; i++)
|
||||
lookupResult.value.f32v[i] = retFloats[swizzle[i]];
|
||||
for(uint32_t r = 0; r < countMathResultsPerGpuOp; r++)
|
||||
{
|
||||
ShaderVariable *result = mathOpResults[i + r];
|
||||
memcpy(result->value.u32v.data(), gpuMathOpResults + r * countBytes, countBytes);
|
||||
}
|
||||
gpuMathOpResults += s_MathOpResultByteSize;
|
||||
}
|
||||
|
||||
range.End = 0;
|
||||
pReadbackBuffer->Unmap(0, &range);
|
||||
const uint64_t sampleGatherOpResultsStart(D3D12DebugManager::MAX_SHADER_DEBUG_QUEUED_OPS *
|
||||
s_MathOpResultByteSize);
|
||||
byte *gpuSampleGatherOpResults = gpuResults + sampleGatherOpResultsStart;
|
||||
for(uint32_t s = 0; s < sampleGatherResults.size(); ++s)
|
||||
{
|
||||
float *retFloats = (float *)gpuSampleGatherOpResults;
|
||||
uint32_t *retUInts = (uint32_t *)(retFloats + 8);
|
||||
int32_t *retSInts = (int32_t *)(retUInts + 8);
|
||||
|
||||
output = lookupResult;
|
||||
size_t countBytes = 16;
|
||||
RDCASSERT((uintptr_t)gpuSampleGatherOpResults + countBytes <= bufferEnd,
|
||||
(uintptr_t)gpuSampleGatherOpResults, countBytes, bufferEnd);
|
||||
RDCASSERT(countBytes <= s_SampleGatherOpResultByteSize, countBytes,
|
||||
s_SampleGatherOpResultByteSize);
|
||||
|
||||
ShaderVariable &output = *sampleGatherResults[s];
|
||||
|
||||
int debugSampleRetType = sampleRetTypes[s];
|
||||
const uint8_t *swizzle = swizzles[s];
|
||||
if(debugSampleRetType == DEBUG_SAMPLE_UINT)
|
||||
{
|
||||
for(int i = 0; i < 4; i++)
|
||||
output.value.u32v[i] = retUInts[swizzle[i]];
|
||||
}
|
||||
else if(debugSampleRetType == DEBUG_SAMPLE_INT)
|
||||
{
|
||||
for(int i = 0; i < 4; i++)
|
||||
output.value.s32v[i] = retSInts[swizzle[i]];
|
||||
}
|
||||
else
|
||||
{
|
||||
for(int i = 0; i < 4; i++)
|
||||
output.value.f32v[i] = retFloats[swizzle[i]];
|
||||
}
|
||||
gpuSampleGatherOpResults += s_SampleGatherOpResultByteSize;
|
||||
}
|
||||
|
||||
pReadbackBuffer->Unmap(0, NULL);
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -533,8 +564,8 @@ D3D12Descriptor D3D12ShaderDebug::FindDescriptor(WrappedID3D12Device *device,
|
||||
srvDesc.Format = DXGI_FORMAT_UNKNOWN;
|
||||
srvDesc.Buffer.FirstElement = 0;
|
||||
// we don't know the real length or structure stride from a root descriptor, so set
|
||||
// defaults. This behaviour seems undefined in drivers, so returning 1 as the number of
|
||||
// elements is as sensible as anything else
|
||||
// defaults. This behaviour seems undefined in drivers, so returning 1 as the number
|
||||
// of elements is as sensible as anything else
|
||||
srvDesc.Buffer.NumElements = 1;
|
||||
srvDesc.Buffer.StructureByteStride = 4;
|
||||
srvDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE;
|
||||
@@ -555,8 +586,8 @@ D3D12Descriptor D3D12ShaderDebug::FindDescriptor(WrappedID3D12Device *device,
|
||||
uavDesc.Format = DXGI_FORMAT_UNKNOWN;
|
||||
uavDesc.Buffer.FirstElement = 0;
|
||||
// we don't know the real length or structure stride from a root descriptor, so set
|
||||
// defaults. This behaviour seems undefined in drivers, so returning 1 as the number of
|
||||
// elements is as sensible as anything else
|
||||
// defaults. This behaviour seems undefined in drivers, so returning 1 as the number
|
||||
// of elements is as sensible as anything else
|
||||
uavDesc.Buffer.NumElements = 1;
|
||||
uavDesc.Buffer.StructureByteStride = 4;
|
||||
uavDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE;
|
||||
@@ -1012,6 +1043,7 @@ public:
|
||||
private:
|
||||
DXBC::ShaderType GetShaderType() { return m_dxbc ? m_dxbc->m_Type : DXBC::ShaderType::Pixel; }
|
||||
WrappedID3D12Device *m_pDevice;
|
||||
ID3D12GraphicsCommandListX *m_QueuedOpCmdList;
|
||||
const DXBC::DXBCContainer *m_dxbc;
|
||||
DXBCDebug::GlobalState &m_globalState;
|
||||
uint32_t m_instruction;
|
||||
@@ -1024,13 +1056,13 @@ D3D12DebugAPIWrapper::D3D12DebugAPIWrapper(WrappedID3D12Device *device,
|
||||
DXBCDebug::GlobalState &globalState, uint32_t eid)
|
||||
: m_pDevice(device), m_dxbc(dxbc), m_globalState(globalState), m_instruction(0), m_EventID(eid)
|
||||
{
|
||||
m_QueuedOpCmdList = NULL;
|
||||
}
|
||||
|
||||
D3D12DebugAPIWrapper::~D3D12DebugAPIWrapper()
|
||||
{
|
||||
// if we replayed to before the action for fetching some UAVs, replay back to after the action to
|
||||
// keep
|
||||
// the state consistent.
|
||||
// if we replayed to before the action for fetching some UAVs, replay back to after the action
|
||||
// to keep the state consistent.
|
||||
if(m_DidReplay)
|
||||
{
|
||||
D3D12MarkerRegion region(m_pDevice->GetQueue()->GetReal(), "ResetReplay");
|
||||
@@ -1092,9 +1124,9 @@ void D3D12DebugAPIWrapper::FetchSRV(const DXBCDebug::BindingSlot &slot)
|
||||
D3D12_RESOURCE_DESC resDesc = pResource->GetDesc();
|
||||
|
||||
// DXBC allows root buffers to have a stride of up to 16 bytes in the shader, which
|
||||
// means encoding the byte offset into the first element here is wrong without knowing
|
||||
// what the actual accessed stride is. Instead we only fetch the data from that offset
|
||||
// onwards.
|
||||
// means encoding the byte offset into the first element here is wrong without
|
||||
// knowing what the actual accessed stride is. Instead we only fetch the data from
|
||||
// that offset onwards.
|
||||
|
||||
// TODO: Root buffers can be 32-bit UINT/SINT/FLOAT. Using UINT for now, but the
|
||||
// resource desc format or the DXBC reflection info might be more correct.
|
||||
@@ -1266,9 +1298,9 @@ void D3D12DebugAPIWrapper::FetchUAV(const DXBCDebug::BindingSlot &slot)
|
||||
D3D12_RESOURCE_DESC resDesc = pResource->GetDesc();
|
||||
|
||||
// DXBC allows root buffers to have a stride of up to 16 bytes in the shader, which
|
||||
// means encoding the byte offset into the first element here is wrong without knowing
|
||||
// what the actual accessed stride is. Instead we only fetch the data from that offset
|
||||
// onwards.
|
||||
// means encoding the byte offset into the first element here is wrong without
|
||||
// knowing what the actual accessed stride is. Instead we only fetch the data from
|
||||
// that offset onwards.
|
||||
|
||||
// TODO: Root buffers can be 32-bit UINT/SINT/FLOAT. Using UINT for now, but the
|
||||
// resource desc format or the DXBC reflection info might be more correct.
|
||||
@@ -1394,6 +1426,7 @@ void D3D12DebugAPIWrapper::FetchUAV(const DXBCDebug::BindingSlot &slot)
|
||||
slot.registerSpace);
|
||||
}
|
||||
|
||||
// Used by the DXBC Debugger
|
||||
bool D3D12DebugAPIWrapper::CalculateMathIntrinsic(DXBCBytecode::OpcodeType opcode,
|
||||
const ShaderVariable &input,
|
||||
ShaderVariable &output1, ShaderVariable &output2)
|
||||
@@ -1413,7 +1446,26 @@ bool D3D12DebugAPIWrapper::CalculateMathIntrinsic(DXBCBytecode::OpcodeType opcod
|
||||
return false;
|
||||
}
|
||||
|
||||
return D3D12ShaderDebug::CalculateMathIntrinsic(false, m_pDevice, mathOp, input, output1, output2);
|
||||
RDCASSERT(!m_QueuedOpCmdList);
|
||||
m_QueuedOpCmdList = m_pDevice->GetDebugManager()->ResetDebugList();
|
||||
const uint32_t queueIndex = 0;
|
||||
if(!D3D12ShaderDebug::QueueMathIntrinsic(false, m_pDevice, m_QueuedOpCmdList, mathOp, input,
|
||||
queueIndex))
|
||||
return false;
|
||||
|
||||
rdcarray<ShaderVariable *> mathOpResults;
|
||||
mathOpResults.push_back(&output1);
|
||||
mathOpResults.push_back(&output2);
|
||||
rdcarray<ShaderVariable *> sampleGatherResults;
|
||||
rdcarray<int> sampleRetTypes;
|
||||
rdcarray<const uint8_t *> swizzles;
|
||||
|
||||
const uint32_t countMathResultsPerGpuOp = 2;
|
||||
bool ret = D3D12ShaderDebug::GetQueuedResults(m_pDevice, m_QueuedOpCmdList, mathOpResults,
|
||||
countMathResultsPerGpuOp, sampleGatherResults,
|
||||
sampleRetTypes, swizzles);
|
||||
m_QueuedOpCmdList = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
D3D12Descriptor D3D12DebugAPIWrapper::FindDescriptor(DXBCBytecode::OperandType type,
|
||||
@@ -1493,6 +1545,7 @@ ShaderVariable D3D12DebugAPIWrapper::GetResourceInfo(DXBCBytecode::OperandType t
|
||||
dim, false);
|
||||
}
|
||||
|
||||
// Used by the DXBC Debugger
|
||||
bool D3D12DebugAPIWrapper::CalculateSampleGather(
|
||||
DXBCBytecode::OpcodeType opcode, DXDebug::SampleGatherResourceData resourceData,
|
||||
DXDebug::SampleGatherSamplerData samplerData, const ShaderVariable &uv,
|
||||
@@ -1525,10 +1578,29 @@ bool D3D12DebugAPIWrapper::CalculateSampleGather(
|
||||
return false;
|
||||
}
|
||||
|
||||
return D3D12ShaderDebug::CalculateSampleGather(
|
||||
false, m_pDevice, sampleOp, resourceData, samplerData, uv, ddxCalc, ddyCalc, texelOffsets,
|
||||
multisampleIndex, lodOrCompareValue, lodOrCompareValue, swizzle, gatherChannel,
|
||||
GetShaderType(), m_instruction, opString, output);
|
||||
RDCASSERT(!m_QueuedOpCmdList);
|
||||
m_QueuedOpCmdList = m_pDevice->GetDebugManager()->ResetDebugList();
|
||||
int sampleRetType = 0;
|
||||
const uint32_t queueIndex = 0;
|
||||
if(!D3D12ShaderDebug::QueueSampleGather(
|
||||
false, m_pDevice, m_QueuedOpCmdList, sampleOp, resourceData, samplerData, uv, ddxCalc,
|
||||
ddyCalc, texelOffsets, multisampleIndex, lodOrCompareValue, lodOrCompareValue, swizzle,
|
||||
gatherChannel, GetShaderType(), m_instruction, opString, queueIndex, sampleRetType))
|
||||
return false;
|
||||
|
||||
rdcarray<ShaderVariable *> mathOpResults;
|
||||
rdcarray<ShaderVariable *> sampleGatherResults;
|
||||
sampleGatherResults.push_back(&output);
|
||||
rdcarray<int> sampleRetTypes;
|
||||
sampleRetTypes.push_back(sampleRetType);
|
||||
rdcarray<const uint8_t *> swizzles;
|
||||
swizzles.push_back(swizzle);
|
||||
|
||||
bool ret = D3D12ShaderDebug::GetQueuedResults(m_pDevice, m_QueuedOpCmdList, mathOpResults, 0,
|
||||
sampleGatherResults, sampleRetTypes, swizzles);
|
||||
m_QueuedOpCmdList = NULL;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void GatherConstantBuffers(WrappedID3D12Device *pDevice, const DXBCBytecode::Program &program,
|
||||
@@ -2974,11 +3046,11 @@ ShaderDebugTrace *D3D12Replay::DebugPixel(uint32_t eventId, uint32_t x, uint32_t
|
||||
return new ShaderDebugTrace;
|
||||
}
|
||||
|
||||
// if we encounter multiple hits at our destination pixel co-ord (or any other) we check to see if
|
||||
// a specific primitive was requested (via primitive parameter not being set to ~0U). If it was,
|
||||
// debug that pixel, otherwise do a best-estimate of which fragment was the last to successfully
|
||||
// depth test and debug that, just by checking if the depth test is ordered and picking the final
|
||||
// fragment in the series
|
||||
// if we encounter multiple hits at our destination pixel co-ord (or any other) we check to see
|
||||
// if a specific primitive was requested (via primitive parameter not being set to ~0U). If it
|
||||
// was, debug that pixel, otherwise do a best-estimate of which fragment was the last to
|
||||
// successfully depth test and debug that, just by checking if the depth test is ordered and
|
||||
// picking the final fragment in the series
|
||||
|
||||
// Get depth func and determine "winner" pixel
|
||||
DXDebug::DebugHit *pWinnerHit = NULL;
|
||||
|
||||
@@ -39,18 +39,21 @@ typedef DXDebug::GatherChannel GatherChannel;
|
||||
typedef DXBCBytecode::SamplerMode SamplerMode;
|
||||
|
||||
// Helpers used by DXBC and DXIL debuggers to interact with GPU and resources
|
||||
bool CalculateMathIntrinsic(bool dxil, WrappedID3D12Device *device, int mathOp,
|
||||
const ShaderVariable &input, ShaderVariable &output1,
|
||||
ShaderVariable &output2);
|
||||
bool QueueMathIntrinsic(bool dxil, WrappedID3D12Device *device, ID3D12GraphicsCommandListX *cmdList,
|
||||
int mathOp, const ShaderVariable &input, const uint32_t queueIndex);
|
||||
|
||||
bool CalculateSampleGather(bool dxil, WrappedID3D12Device *device, int sampleOp,
|
||||
SampleGatherResourceData resourceData,
|
||||
SampleGatherSamplerData samplerData, const ShaderVariable &uv,
|
||||
const ShaderVariable &ddxCalc, const ShaderVariable &ddyCalc,
|
||||
const int8_t texelOffsets[3], int multisampleIndex, float lodValue,
|
||||
float compareValue, const uint8_t swizzle[4],
|
||||
GatherChannel gatherChannel, const DXBC::ShaderType shaderType,
|
||||
uint32_t instruction, const char *opString, ShaderVariable &output);
|
||||
bool QueueSampleGather(bool dxil, WrappedID3D12Device *device, ID3D12GraphicsCommandListX *cmdList,
|
||||
int sampleOp, SampleGatherResourceData resourceData,
|
||||
SampleGatherSamplerData samplerData, const ShaderVariable &uv,
|
||||
const ShaderVariable &ddxCalc, const ShaderVariable &ddyCalc,
|
||||
const int8_t texelOffsets[3], int multisampleIndex, float lodValue,
|
||||
float compareValue, const uint8_t swizzle[4], GatherChannel gatherChannel,
|
||||
const DXBC::ShaderType shaderType, uint32_t instruction,
|
||||
const char *opString, const uint32_t queueIndex, int &sampleRetType);
|
||||
bool GetQueuedResults(WrappedID3D12Device *device, ID3D12GraphicsCommandListX *cmdList,
|
||||
rdcarray<ShaderVariable *> &mathOpResults, uint32_t countMathResultsPerGpuOp,
|
||||
rdcarray<ShaderVariable *> &sampleGatherResults,
|
||||
const rdcarray<int> &sampleRetTypes, const rdcarray<const uint8_t *> &swizzles);
|
||||
|
||||
D3D12Descriptor FindDescriptor(WrappedID3D12Device *device,
|
||||
const DXDebug::HeapDescriptorType heapType, uint32_t descriptorIndex);
|
||||
|
||||
@@ -10482,6 +10482,7 @@ void Debugger::ProcessQueuedOps()
|
||||
CHECK_DEBUGGER_THREAD();
|
||||
ProcessQueuedGpuMathOps();
|
||||
ProcessQueuedGpuSampleGatherOps();
|
||||
SyncPendingGpuOps();
|
||||
}
|
||||
|
||||
// Must be called from the replay manager thread (the debugger thread)
|
||||
@@ -10508,11 +10509,14 @@ void Debugger::ProcessQueuedGpuMathOps()
|
||||
{
|
||||
if(m_QueuedGpuMathOps[lane])
|
||||
{
|
||||
if(!m_ApiWrapper->QueuedOpsHasSpace())
|
||||
SyncPendingGpuOps();
|
||||
|
||||
m_QueuedGpuMathOps[lane] = false;
|
||||
const GpuMathOperation &mathOp = m_Workgroup[lane].GetQueuedGpuMathOp();
|
||||
|
||||
uint32_t workgroupIndex = mathOp.workgroupIndex;
|
||||
if(m_ApiWrapper->CalculateMathIntrinsic(mathOp.dxOp, mathOp.input, *mathOp.result))
|
||||
if(m_ApiWrapper->QueueMathIntrinsic(mathOp.dxOp, mathOp.input))
|
||||
{
|
||||
m_PendingGpuMathsOpsResults.push_back(mathOp.result);
|
||||
}
|
||||
@@ -10536,25 +10540,32 @@ void Debugger::ProcessQueuedGpuSampleGatherOps()
|
||||
{
|
||||
if(m_QueuedGpuSampleGatherOps[lane])
|
||||
{
|
||||
if(!m_ApiWrapper->QueuedOpsHasSpace())
|
||||
SyncPendingGpuOps();
|
||||
|
||||
m_QueuedGpuSampleGatherOps[lane] = false;
|
||||
const GpuSampleGatherOperation &sampleGatherOp = m_Workgroup[lane].GetQueuedGpuSampleGatherOp();
|
||||
|
||||
uint32_t workgroupIndex = sampleGatherOp.workgroupIndex;
|
||||
ShaderVariable &result = *sampleGatherOp.result;
|
||||
bool hasResult = false;
|
||||
if(!m_ApiWrapper->CalculateSampleGather(
|
||||
int sampleRetType = 0;
|
||||
if(!m_ApiWrapper->QueueSampleGather(
|
||||
sampleGatherOp.dxOp, sampleGatherOp.resourceData, sampleGatherOp.samplerData,
|
||||
sampleGatherOp.uv, sampleGatherOp.ddxCalc, sampleGatherOp.ddyCalc,
|
||||
sampleGatherOp.texelOffsets, sampleGatherOp.multisampleIndex, sampleGatherOp.lodValue,
|
||||
sampleGatherOp.compareValue, sampleGatherOp.gatherChannel,
|
||||
sampleGatherOp.instructionIdx, *sampleGatherOp.result))
|
||||
sampleGatherOp.instructionIdx, sampleRetType))
|
||||
{
|
||||
// sample failed. Pretend we got 0 columns back
|
||||
set0001(result);
|
||||
hasResult = true;
|
||||
}
|
||||
if(!hasResult)
|
||||
{
|
||||
m_PendingGpuSampleGatherOpsResults.push_back(sampleGatherOp.result);
|
||||
m_PendingGpuSampleGatherOpsSampleRetTypes.push_back(sampleRetType);
|
||||
}
|
||||
|
||||
DXIL_DEBUG_RDCASSERT(!m_PendingLanes[workgroupIndex]);
|
||||
m_PendingLanes[workgroupIndex] = true;
|
||||
@@ -10562,6 +10573,24 @@ void Debugger::ProcessQueuedGpuSampleGatherOps()
|
||||
}
|
||||
}
|
||||
|
||||
// Must be called from the replay manager thread (the debugger thread)
|
||||
void Debugger::SyncPendingGpuOps()
|
||||
{
|
||||
CHECK_DEBUGGER_THREAD();
|
||||
if(m_PendingGpuMathsOpsResults.empty() && m_PendingGpuSampleGatherOpsResults.empty())
|
||||
return;
|
||||
|
||||
if(!(m_ApiWrapper->GetQueuedResults(m_PendingGpuMathsOpsResults, m_PendingGpuSampleGatherOpsResults,
|
||||
m_PendingGpuSampleGatherOpsSampleRetTypes)))
|
||||
{
|
||||
RDCERR("GetQueuedResults failed");
|
||||
return;
|
||||
}
|
||||
m_PendingGpuMathsOpsResults.clear();
|
||||
m_PendingGpuSampleGatherOpsResults.clear();
|
||||
m_PendingGpuSampleGatherOpsSampleRetTypes.clear();
|
||||
}
|
||||
|
||||
void Debugger::SimulationJobHelper()
|
||||
{
|
||||
while(AtomicLoad(&atomic_simulationFinished) == 0)
|
||||
|
||||
@@ -307,14 +307,17 @@ public:
|
||||
virtual UAVInfo GetUAV(const BindingSlot &slot) = 0;
|
||||
virtual SRVInfo GetSRV(const BindingSlot &slot) = 0;
|
||||
|
||||
virtual bool CalculateMathIntrinsic(DXIL::DXOp dxOp, const ShaderVariable &input,
|
||||
ShaderVariable &output) = 0;
|
||||
virtual bool CalculateSampleGather(DXIL::DXOp dxOp, SampleGatherResourceData resourceData,
|
||||
SampleGatherSamplerData samplerData, const ShaderVariable &uv,
|
||||
const ShaderVariable &ddxCalc, const ShaderVariable &ddyCalc,
|
||||
const int8_t texelOffsets[3], int multisampleIndex,
|
||||
float lodValue, float compareValue, GatherChannel gatherChannel,
|
||||
uint32_t instructionIdx, ShaderVariable &output) = 0;
|
||||
virtual bool QueueMathIntrinsic(DXIL::DXOp dxOp, const ShaderVariable &input) = 0;
|
||||
virtual bool QueueSampleGather(DXIL::DXOp dxOp, SampleGatherResourceData resourceData,
|
||||
SampleGatherSamplerData samplerData, const ShaderVariable &uv,
|
||||
const ShaderVariable &ddxCalc, const ShaderVariable &ddyCalc,
|
||||
const int8_t texelOffsets[3], int multisampleIndex, float lodValue,
|
||||
float compareValue, GatherChannel gatherChannel,
|
||||
uint32_t instructionIdx, int &sampleRetType) = 0;
|
||||
virtual bool GetQueuedResults(rdcarray<ShaderVariable *> &mathOpResults,
|
||||
rdcarray<ShaderVariable *> &sampleGatherResults,
|
||||
const rdcarray<int> &sampleRetTypes) = 0;
|
||||
virtual bool QueuedOpsHasSpace() const = 0;
|
||||
virtual ShaderVariable GetResourceInfo(DXIL::ResourceClass resClass,
|
||||
const DXDebug::BindingSlot &slot, uint32_t mipLevel) = 0;
|
||||
virtual ShaderVariable GetSampleInfo(DXIL::ResourceClass resClass,
|
||||
@@ -948,6 +951,7 @@ private:
|
||||
void ProcessQueuedOps();
|
||||
void ProcessQueuedGpuMathOps();
|
||||
void ProcessQueuedGpuSampleGatherOps();
|
||||
void SyncPendingGpuOps();
|
||||
void SyncPendingLanes();
|
||||
|
||||
void QueueGpuMathOp(uint32_t lane);
|
||||
@@ -970,6 +974,7 @@ private:
|
||||
rdcarray<bool> m_PendingLanes;
|
||||
rdcarray<ShaderVariable *> m_PendingGpuMathsOpsResults;
|
||||
rdcarray<ShaderVariable *> m_PendingGpuSampleGatherOpsResults;
|
||||
rdcarray<int> m_PendingGpuSampleGatherOpsSampleRetTypes;
|
||||
|
||||
// the live mutable global variables, to initialise a stack frame's live list
|
||||
rdcarray<bool> m_LiveGlobals;
|
||||
|
||||
Reference in New Issue
Block a user