mirror of
https://github.com/baldurk/renderdoc.git
synced 2026-05-04 09:00:44 +00:00
Support for DXIL SM6.6 Derivatives in Compute Shaders
linear layout (4x1x1) : for 1D workgroup (Nx1x1) otherwise quad layout (2x2x1) HLSL ddx() ddx_coarse() ddy() ddy_coarse() ddx_fine() ddy_fine() CalculateLevelOfDetail() CalculateLevelOfDetailUnclamped() Sample() SampleBias() SampleCmp() DXIL DXOp::DerivCoarseX DXOp::DerivCoarseY DXOp::DerivFineX DXOp::DerivFineY DXOp::CalculateLOD DXOp::Sample DXOp::SampleBias
This commit is contained in:
@@ -1523,12 +1523,13 @@ void MemoryTracking::ConvertGlobalAllocToLocal(Id allocId)
|
||||
|
||||
// Must be called from the replay manager thread (the debugger thread)
|
||||
ThreadState::ThreadState(Debugger &debugger, const GlobalState &globalState, uint32_t maxSSAId,
|
||||
uint32_t laneIndex, uint32_t numThreads)
|
||||
uint32_t laneIndex, uint32_t numThreads, ShaderFeatures shaderFeatures)
|
||||
: m_Debugger(debugger),
|
||||
m_GlobalState(globalState),
|
||||
m_Program(debugger.GetProgram()),
|
||||
m_MaxSSAId(maxSSAId),
|
||||
m_WorkgroupIndex(laneIndex)
|
||||
m_WorkgroupIndex(laneIndex),
|
||||
m_Features(shaderFeatures)
|
||||
{
|
||||
THREADSTATE_CHECK_DEBUGGER_THREAD();
|
||||
m_ShaderType = m_Program.GetShaderType();
|
||||
@@ -3099,9 +3100,12 @@ bool ThreadState::ExecuteInstruction(const rdcarray<ThreadState> &workgroup)
|
||||
case DXOp::DerivFineX:
|
||||
case DXOp::DerivFineY:
|
||||
{
|
||||
if(m_ShaderType != DXBC::ShaderType::Pixel || workgroup.size() < 4)
|
||||
if(!(m_Features & ShaderFeatures::Derivatives) || (workgroup.size() < 4) ||
|
||||
m_QuadNeighbours.contains(~0U))
|
||||
{
|
||||
RDCERR("Undefined results using derivative instruction outside of a pixel shader.");
|
||||
RDCERR(
|
||||
"Undefined results using derivative instruction in shader without support for "
|
||||
"derivatives");
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -6880,12 +6884,16 @@ bool ThreadState::PerformGPUResourceOp(const rdcarray<ThreadState> &workgroup, O
|
||||
|
||||
ShaderVariable ddx;
|
||||
ShaderVariable ddy;
|
||||
// Sample, SampleBias, CalculateLOD need DDX, DDY
|
||||
if((dxOpCode == DXOp::Sample) || (dxOpCode == DXOp::SampleBias) || (dxOpCode == DXOp::CalculateLOD))
|
||||
// Sample, SampleBias, SampleCmp, CalculateLOD need DDX, DDY
|
||||
if((dxOpCode == DXOp::Sample) || (dxOpCode == DXOp::SampleBias) ||
|
||||
(dxOpCode == DXOp::SampleCmp) || (dxOpCode == DXOp::CalculateLOD))
|
||||
{
|
||||
if(m_ShaderType != DXBC::ShaderType::Pixel || m_QuadNeighbours.contains(~0U))
|
||||
if(!(m_Features & ShaderFeatures::Derivatives) || (workgroup.size() < 4) ||
|
||||
m_QuadNeighbours.contains(~0U))
|
||||
{
|
||||
RDCERR("Undefined results using derivative instruction outside of a pixel shader.");
|
||||
RDCERR(
|
||||
"Undefined results using derivative instruction in shader without support for "
|
||||
"derivatives");
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -9129,6 +9137,13 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *apiWrapper, uint32_t eve
|
||||
ShaderDebugTrace *ret = new ShaderDebugTrace;
|
||||
ret->stage = shaderStage;
|
||||
|
||||
ShaderFeatures shaderFeatures = ShaderFeatures::None;
|
||||
bool isSM66Plus = (m_Program->GetMajorVersion() > 6) ||
|
||||
((m_Program->GetMajorVersion() == 6) && (m_Program->GetMinorVersion() >= 6));
|
||||
|
||||
if((shaderStage == ShaderStage::Fragment) || ((shaderStage == ShaderStage::Compute) && isSM66Plus))
|
||||
shaderFeatures |= ShaderFeatures::Derivatives;
|
||||
|
||||
// Get the global state from the API wrapper
|
||||
m_GlobalState.builtins = apiWrapper->GetBuiltins();
|
||||
m_GlobalState.subgroupSize = apiWrapper->GetSubgroupSize();
|
||||
@@ -9137,7 +9152,8 @@ ShaderDebugTrace *Debugger::BeginDebug(DebugAPIWrapper *apiWrapper, uint32_t eve
|
||||
|
||||
for(uint32_t i = 0; i < threadsInWorkgroup; i++)
|
||||
{
|
||||
m_Workgroup.push_back(ThreadState(*this, m_GlobalState, maxSSAId, i, threadsInWorkgroup));
|
||||
m_Workgroup.push_back(
|
||||
ThreadState(*this, m_GlobalState, maxSSAId, i, threadsInWorkgroup, shaderFeatures));
|
||||
m_QueuedDeviceThreadSteps[i] = false;
|
||||
m_QueuedGpuMathOps[i] = false;
|
||||
m_QueuedGpuSampleGatherOps[i] = false;
|
||||
|
||||
@@ -435,10 +435,18 @@ struct GpuSampleGatherOperation
|
||||
ShaderVariable *result = NULL;
|
||||
};
|
||||
|
||||
enum class ShaderFeatures : uint32_t
|
||||
{
|
||||
None = 0,
|
||||
Derivatives = 1 << 0,
|
||||
};
|
||||
|
||||
BITMASK_OPERATORS(ShaderFeatures);
|
||||
|
||||
struct ThreadState
|
||||
{
|
||||
ThreadState(Debugger &debugger, const GlobalState &globalState, uint32_t maxSSAId,
|
||||
uint32_t laneIndex, uint32_t numThreads);
|
||||
uint32_t laneIndex, uint32_t numThreads, ShaderFeatures shaderFeatures);
|
||||
~ThreadState();
|
||||
|
||||
void EnterEntryPoint(const DXIL::Function *function, bool hasDebugState);
|
||||
@@ -719,6 +727,8 @@ private:
|
||||
|
||||
rdcarray<bool> m_ActiveMask;
|
||||
|
||||
ShaderFeatures m_Features;
|
||||
|
||||
ShaderDebugState m_PendingDebugState;
|
||||
ShaderVariable m_PendingResultData;
|
||||
GpuMathOperation m_QueuedGpuMathOp;
|
||||
|
||||
@@ -1151,12 +1151,48 @@ void Program::Parse(const DXBC::Reflection *reflection)
|
||||
|
||||
for(Function *f : m_Functions)
|
||||
{
|
||||
if(f->family != FunctionFamily::DXOp)
|
||||
continue;
|
||||
if(f->name == "dx.op.barrier")
|
||||
m_Threadscope |= DXBC::ThreadScope::Workgroup;
|
||||
if(f->name.beginsWith("dx.op.quadReadLaneAt.") || f->name.beginsWith("dx.op.quadOp.") ||
|
||||
f->name.beginsWith("dx.op.quadVote."))
|
||||
m_Threadscope |= DXBC::ThreadScope::Quad;
|
||||
}
|
||||
// Compute shaders using derivatives require quad scope
|
||||
// DXOp::DerivCoarseX
|
||||
// DXOp::DerivCoarseY
|
||||
// DXOp::DerivFineX
|
||||
// DXOp::DerivFineY
|
||||
// DXOp::CalculateLOD
|
||||
// DXOp::Sample
|
||||
// DXOp::SampleBias
|
||||
// DXOp::SampleCmp
|
||||
for(Function *f : m_Functions)
|
||||
{
|
||||
if(f->external)
|
||||
continue;
|
||||
for(size_t funcIdx = 0; funcIdx < f->instructions.size(); funcIdx++)
|
||||
{
|
||||
const Instruction *inst = f->instructions[funcIdx];
|
||||
if(inst->op != Operation::Call)
|
||||
continue;
|
||||
const Function *callFunc = inst->getFuncCall();
|
||||
if(callFunc->family != FunctionFamily::DXOp)
|
||||
continue;
|
||||
|
||||
DXOp dxOpCode = DXOp::NumOpCodes;
|
||||
RDCASSERT(getival<DXOp>(inst->args[0], dxOpCode));
|
||||
RDCASSERT(dxOpCode < DXOp::NumOpCodes, dxOpCode, DXOp::NumOpCodes);
|
||||
if((dxOpCode == DXOp::DerivCoarseX) || (dxOpCode == DXOp::DerivCoarseY) ||
|
||||
(dxOpCode == DXOp::DerivFineX) || (dxOpCode == DXOp::DerivFineY) ||
|
||||
(dxOpCode == DXOp::CalculateLOD) || (dxOpCode == DXOp::Sample) ||
|
||||
(dxOpCode == DXOp::SampleBias) || (dxOpCode == DXOp::SampleCmp))
|
||||
{
|
||||
m_Threadscope |= DXBC::ThreadScope::Quad;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_Parsed = true;
|
||||
|
||||
Reference in New Issue
Block a user