From 4539ff5c43853daa1b40f063d192fc415b311a03 Mon Sep 17 00:00:00 2001 From: Jake Turner Date: Mon, 19 May 2025 08:54:20 +0100 Subject: [PATCH] DXIL Debugger GroupShared simulation changes On the active thread * GSM reads come from the local GSM cache (not the global GSM data) * GSM writes go to the local GSM cache and the global GSM data * Workgroup Memory barrier populates the local GSM cache with the data from the global GSM data On the non-active threads: * GSM reads/write always operate on the global GSM data * Workgroup Memory barrier is ignored --- renderdoc/driver/shaders/dxil/dxil_debug.cpp | 207 +++++++++++++++---- renderdoc/driver/shaders/dxil/dxil_debug.h | 5 +- 2 files changed, 172 insertions(+), 40 deletions(-) diff --git a/renderdoc/driver/shaders/dxil/dxil_debug.cpp b/renderdoc/driver/shaders/dxil/dxil_debug.cpp index 865a7edc5..b8d4e5399 100644 --- a/renderdoc/driver/shaders/dxil/dxil_debug.cpp +++ b/renderdoc/driver/shaders/dxil/dxil_debug.cpp @@ -41,6 +41,9 @@ using namespace rdcshaders; // TODO: Support UAVs with counter // TODO: Extend debug data parsing: DW_TAG_array_type for the base element type // TODO: Extend debug data parsing: N-dimensional arrays, mapping covers whole sub-array +// TODO: Load's from Pointers should re-read the memory to get accurate values +// TODO: Change Pointers to be GPUPointers : update pointers aliased to base pointers +// TODO: Unify and share the code for reading and writing thru pointers // Notes: // The phi node capture variables are not shown in the UI @@ -96,6 +99,19 @@ using namespace DXDebug; const uint32_t POINTER_MAGIC = 0xBEAFDEAF; +static bool IsEncodedPointer(const ShaderVariable &var) +{ + if(var.type != VarType::GPUPointer) + { + return false; + } + if(var.value.u32v[1] != POINTER_MAGIC) + { + return false; + } + return true; +} + static void EncodePointer(DXILDebug::Id ptrId, uint64_t offset, uint64_t size, ShaderVariable &var) { var.type = VarType::GPUPointer; @@ -108,16 +124,12 @@ static void EncodePointer(DXILDebug::Id ptrId, uint64_t offset, uint64_t size, S static bool DecodePointer(DXILDebug::Id &ptrId, uint64_t &offset, uint64_t &size, const ShaderVariable &var) { - if(var.type != VarType::GPUPointer) + if(!IsEncodedPointer(var)) { - RDCERR("Calling DecodePointer on non-pointer type %s", ToStr(var.type).c_str()); - return false; - } - if(var.value.u32v[1] != POINTER_MAGIC) - { - RDCERR("Calling DecodePointer on non encoded pointer type %u", var.value.u32v[1]); + RDCERR("Calling DecodePointer on non encoded pointer"); return false; } + ptrId = var.value.u32v[0]; offset = var.value.u64v[1]; size = var.value.u64v[2]; @@ -655,6 +667,35 @@ static uint8_t GetShaderVariableElementByteSize(const ShaderVariable &var) return GetShaderVariableElementByteSize(var.members[0]); } +static void UpdateShaderVariableFromBackingMemory(ShaderVariable &var, const void *ptr) +{ + // Memory copy from backing memory to base memory variable + size_t elementSize = GetShaderVariableElementByteSize(var); + const uint8_t *src = (const uint8_t *)ptr; + if(var.members.size() == 0) + { + RDCASSERTEQUAL(var.rows, 1); + RDCASSERTEQUAL(var.columns, 1); + if(elementSize <= sizeof(ShaderValue)) + memcpy(&var.value, src, elementSize); + else + RDCERR("Updating MemoryVariable elementSize %u too large max %u", elementSize, + sizeof(ShaderValue)); + } + else + { + for(uint32_t i = 0; i < var.members.size(); ++i) + { + if(elementSize <= sizeof(ShaderValue)) + memcpy(&var.members[i].value, src, elementSize); + else + RDCERR("Updating MemoryVariable member %u elementSize %u too large max %u", i, elementSize, + sizeof(ShaderValue)); + src += elementSize; + } + } +} + static DXBC::ResourceRetType ConvertComponentTypeToResourceRetType(const ComponentType compType) { switch(compType) @@ -1601,7 +1642,7 @@ void MemoryTracking::AllocateMemoryForType(const DXIL::Type *type, Id allocId, b size_t byteSize = ComputeDXILTypeByteSize(type->inner); void *backingMem = malloc(byteSize); memset(backingMem, 0, byteSize); - m_Allocations[allocId] = {backingMem, byteSize, global}; + m_Allocations[allocId] = {backingMem, byteSize, global, !global}; // Create a pointer to represent this allocation m_Pointers[allocId] = {allocId, backingMem, byteSize}; @@ -1622,7 +1663,7 @@ ThreadState::~ThreadState() { for(auto it : m_Memory.m_Allocations) { - if(!it.second.global) + if(it.second.localMemory) free(it.second.backingMemory); } } @@ -3140,6 +3181,8 @@ bool ThreadState::ExecuteInstruction(DebugAPIWrapper *apiWrapper, // For thread barriers the threads must be converged if(barrierMode & BarrierMode::SyncThreadGroup) RDCASSERT(!WorkgroupIsDiverged(workgroup)); + if(barrierMode & BarrierMode::TGSMFence) + ExecuteMemoryBarrier(); break; } case DXOp::Discard: @@ -4811,7 +4854,7 @@ bool ThreadState::ExecuteInstruction(DebugAPIWrapper *apiWrapper, // SV_StartInstanceLocation // StartInstanceLocation from Draw*Instanced - // Needed for debugger support of multi-threaded compute execution + // SM 6.8 case DXOp::BarrierByMemoryType: case DXOp::BarrierByMemoryHandle: @@ -5094,7 +5137,7 @@ bool ThreadState::ExecuteInstruction(DebugAPIWrapper *apiWrapper, } const MemoryTracking::Allocation &allocation = itAlloc->second; ShaderVariable arg; - if(allocation.global && !IsVariableAssigned(ptrId)) + if(allocation.globalVarAlloc && !IsVariableAssigned(ptrId)) { RDCASSERT(IsVariableAssigned(baseMemoryId)); arg = m_Variables[baseMemoryId]; @@ -5151,6 +5194,24 @@ bool ThreadState::ExecuteInstruction(DebugAPIWrapper *apiWrapper, const MemoryTracking::Allocation &allocation = itAlloc->second; UpdateMemoryVariableFromBackingMemory(baseMemoryId, allocation.backingMemory); + // active lane : writes to a GSM variable, write to local and global backing memory + if(m_State) + { + if(m_GlobalState.groupSharedMemoryIds.contains(baseMemoryId)) + { + // Compute the local pointer offset and apply it to the global base memory + ptrdiff_t offset = (uintptr_t)memory - (uintptr_t)allocation.backingMemory; + auto globalMem = m_GlobalState.memory.m_Allocations.find(baseMemoryId); + if(globalMem == m_GlobalState.memory.m_Allocations.end()) + { + RDCERR("Unknown global memory allocation Id %u", baseMemoryId); + break; + } + void *globalMemory = (void *)((uintptr_t)globalMem->second.backingMemory + offset); + allocSize = ptr.size; + UpdateBackingMemoryFromVariable(globalMemory, allocSize, val); + } + } // record the change to the base memory variable if it is not the ptrId variable if(recordBaseMemoryChange) { @@ -5241,7 +5302,7 @@ bool ThreadState::ExecuteInstruction(DebugAPIWrapper *apiWrapper, // Ensure global variables use global memory // Ensure non-global variables do not use global memory - if(allocation.global) + if(allocation.globalVarAlloc) RDCASSERT(cast(inst.args[0])); else RDCASSERT(!cast(inst.args[0])); @@ -6143,7 +6204,16 @@ bool ThreadState::ExecuteInstruction(DebugAPIWrapper *apiWrapper, RDCASSERTNOTEQUAL(resultId, DXILDebug::INVALID_ID); RDCASSERT(IsVariableAssigned(ptrId)); - const ShaderVariable a = m_Variables[ptrId]; + ShaderVariable a; + if(allocation.globalVarAlloc && !IsVariableAssigned(ptrId)) + { + RDCASSERT(IsVariableAssigned(baseMemoryId)); + a = m_Variables[baseMemoryId]; + } + else + { + a = m_Variables[ptrId]; + } size_t newValueArgIdx = (opCode == Operation::CompareExchange) ? 2 : 1; ShaderVariable b; @@ -6264,6 +6334,25 @@ bool ThreadState::ExecuteInstruction(DebugAPIWrapper *apiWrapper, UpdateMemoryVariableFromBackingMemory(baseMemoryId, allocMemoryBackingPtr); + // active lane : writes to a GSM variable, write to local and global backing memory + if(m_State) + { + if(m_GlobalState.groupSharedMemoryIds.contains(baseMemoryId)) + { + // Compute the local pointer offset and apply it to the global base memory + ptrdiff_t offset = (uintptr_t)memory - (uintptr_t)allocation.backingMemory; + auto globalMem = m_GlobalState.memory.m_Allocations.find(baseMemoryId); + if(globalMem == m_GlobalState.memory.m_Allocations.end()) + { + RDCERR("Unknown global memory allocation Id %u", baseMemoryId); + break; + } + void *globalMemory = (void *)((uintptr_t)globalMem->second.backingMemory + offset); + allocSize = ptr.size; + UpdateBackingMemoryFromVariable(globalMemory, allocSize, res); + } + } + // record the change to the base memory variable if(recordBaseMemoryChange) { @@ -6646,31 +6735,7 @@ void ThreadState::UpdateBackingMemoryFromVariable(void *ptr, uint64_t &allocSize void ThreadState::UpdateMemoryVariableFromBackingMemory(Id memoryId, const void *ptr) { ShaderVariable &baseMemory = m_Variables[memoryId]; - // Memory copy from backing memory to base memory variable - size_t elementSize = GetShaderVariableElementByteSize(baseMemory); - const uint8_t *src = (const uint8_t *)ptr; - if(baseMemory.members.size() == 0) - { - RDCASSERTEQUAL(baseMemory.rows, 1); - RDCASSERTEQUAL(baseMemory.columns, 1); - if(elementSize <= sizeof(ShaderValue)) - memcpy(&baseMemory.value, src, elementSize); - else - RDCERR("Updating MemoryVariable elementSize %u too large max %u", elementSize, - sizeof(ShaderValue)); - } - else - { - for(uint32_t i = 0; i < baseMemory.members.size(); ++i) - { - if(elementSize <= sizeof(ShaderValue)) - memcpy(&baseMemory.members[i].value, src, elementSize); - else - RDCERR("Updating MemoryVariable member %u elementSize %u too large max %u", i, elementSize, - sizeof(ShaderValue)); - src += elementSize; - } - } + UpdateShaderVariableFromBackingMemory(baseMemory, ptr); } void ThreadState::PerformGPUResourceOp(const rdcarray &workgroup, Operation opCode, @@ -7162,11 +7227,61 @@ ShaderValue ThreadState::DDY(bool fine, Operation opCode, DXOp dxOpCode, return ret; } +void ThreadState::ExecuteMemoryBarrier() +{ + // ignore if not the active thread + if(!m_State) + return; + + // copy the global GSM memory into the local GSM cache + for(Id id : m_GlobalState.groupSharedMemoryIds) + { + auto globalMem = m_GlobalState.memory.m_Allocations.find(id); + if(globalMem == m_GlobalState.memory.m_Allocations.end()) + { + RDCERR("Unknown global memory allocation for GSM Id %u", id); + continue; + } + RDCASSERT(globalMem->second.globalVarAlloc); + RDCASSERT(!globalMem->second.localMemory); + + auto localMem = m_Memory.m_Allocations.find(id); + if(localMem == m_Memory.m_Allocations.end()) + { + RDCERR("Unknown local memory allocation for GSM Id %u", id); + continue; + } + RDCASSERT(localMem->second.globalVarAlloc); + RDCASSERT(localMem->second.localMemory); + + auto localVar = m_Variables.find(id); + if(localVar == m_Variables.end()) + { + RDCERR("Unknown local memory allocation for GSM Id %u", id); + continue; + } + ShaderVariableChange change; + ShaderVariable &local = localVar->second; + change.before = local; + const void *globalBackingMemory = globalMem->second.backingMemory; + UpdateMemoryVariableFromBackingMemory(id, globalBackingMemory); + change.after = local; + if(!(change.after == change.before)) + m_State->changes.push_back(change); + + // Update local backing memory from the local variable + RDCASSERTEQUAL(globalMem->second.size, localMem->second.size); + void *localBackingMemory = localMem->second.backingMemory; + const size_t allocSize = (size_t)globalMem->second.size; + memcpy(localBackingMemory, globalBackingMemory, allocSize); + } +} + GlobalState::~GlobalState() { for(auto it : memory.m_Allocations) { - RDCASSERT(it.second.global); + RDCASSERT(!it.second.localMemory); free(it.second.backingMemory); } } @@ -8620,6 +8735,8 @@ ShaderDebugTrace *Debugger::BeginDebug(uint32_t eventId, const DXBC::DXBCContain } } } + if(gv->type->addrSpace == DXIL::Type::PointerAddrSpace::GroupShared) + m_GlobalState.groupSharedMemoryIds.push_back(globalVar.id); m_GlobalState.globals.push_back(globalVar); m_LiveGlobals[globalVar.id] = true; } @@ -9408,6 +9525,18 @@ rdcarray Debugger::ContinueDebug(DebugAPIWrapper *apiWrapper) } } + // active lane : needs it own local backing memory, copied from global at the start + for(Id id : m_GlobalState.groupSharedMemoryIds) + { + MemoryTracking::Allocation &globalAlloc = active.m_Memory.m_Allocations[id]; + RDCASSERT(globalAlloc.globalVarAlloc); + const size_t allocSize = (size_t)globalAlloc.size; + void *localBackingMem = malloc(allocSize); + memcpy(localBackingMem, globalAlloc.backingMemory, allocSize); + active.m_Memory.m_Allocations[id] = {localBackingMem, globalAlloc.size, true, true}; + active.m_Memory.m_Pointers[id] = {id, localBackingMem, globalAlloc.size}; + } + // globals won't be filled out by entering the entry point, ensure their change is registered. for(const GlobalVariable &gv : m_GlobalState.globals) initial.changes.push_back({ShaderVariable(), gv.var}); diff --git a/renderdoc/driver/shaders/dxil/dxil_debug.h b/renderdoc/driver/shaders/dxil/dxil_debug.h index f8dafc086..9b6d50d53 100644 --- a/renderdoc/driver/shaders/dxil/dxil_debug.h +++ b/renderdoc/driver/shaders/dxil/dxil_debug.h @@ -211,7 +211,8 @@ struct MemoryTracking // the allocated memory void *backingMemory; uint64_t size; - bool global; + bool globalVarAlloc; + bool localMemory; }; // Represents pointers within a Allocation memory allocation (heap) @@ -292,6 +293,7 @@ struct ThreadState void ProcessScopeChange(const rdcarray &oldLive, const rdcarray &newLive); + void ExecuteMemoryBarrier(); static bool WorkgroupIsDiverged(const rdcarray &workgroup); static bool QuadIsDiverged(const rdcarray &workgroup, const rdcfixedarray &quadNeighbours); @@ -444,6 +446,7 @@ struct GlobalState rdcarray constantBlocks; std::map constantBlocksDatas; + rdcarray groupSharedMemoryIds; // resources may be read-write but the variable itself doesn't change rdcarray readOnlyResources; rdcarray readWriteResources;