diff --git a/renderdoc/driver/shaders/dxbc/dxbc_bytecode.cpp b/renderdoc/driver/shaders/dxbc/dxbc_bytecode.cpp index 5feef721e..6b164080d 100644 --- a/renderdoc/driver/shaders/dxbc/dxbc_bytecode.cpp +++ b/renderdoc/driver/shaders/dxbc/dxbc_bytecode.cpp @@ -416,6 +416,12 @@ rdcstr Program::GetDebugStatus() bool supported = false; // whitelist supported instructions here + switch(op.operation) + { + case OPCODE_AMD_U64_ATOMIC: + case OPCODE_NV_U64_ATOMIC: supported = true; break; + default: break; + } if(!supported) return StringFormat::Fmt("Unsupported shader extension '%s' used", diff --git a/renderdoc/driver/shaders/dxbc/dxbc_debug.cpp b/renderdoc/driver/shaders/dxbc/dxbc_debug.cpp index 32d269891..657192489 100644 --- a/renderdoc/driver/shaders/dxbc/dxbc_debug.cpp +++ b/renderdoc/driver/shaders/dxbc/dxbc_debug.cpp @@ -234,6 +234,9 @@ VarType OperationType(const DXBCBytecode::OpcodeType &op) case OPCODE_ITOD: case OPCODE_UTOD: return VarType::Double; + case OPCODE_AMD_U64_ATOMIC: + case OPCODE_NV_U64_ATOMIC: return VarType::UInt; + default: RDCERR("Unhandled operation %d in shader debugging", op); return VarType::Float; } } @@ -439,6 +442,9 @@ bool OperationFlushing(const DXBCBytecode::OpcodeType &op) case OPCODE_ITOD: case OPCODE_UTOD: return false; + case OPCODE_AMD_U64_ATOMIC: + case OPCODE_NV_U64_ATOMIC: return false; + default: RDCERR("Unhandled operation %d in shader debugging", op); break; } @@ -4280,6 +4286,141 @@ void ThreadState::StepNext(ShaderDebugState *state, DebugAPIWrapper *apiWrapper, } break; } + + ////////////////////////////////////////////////////////////////////////// + // Vendor extensions + ////////////////////////////////////////////////////////////////////////// + case OPCODE_AMD_U64_ATOMIC: + case OPCODE_NV_U64_ATOMIC: + { + VendorAtomicOp atomicOp = (VendorAtomicOp)op.preciseValues; + + uint32_t resIndex = (uint32_t)op.operands[2].indices[0].index; + ShaderVariable dstAddress, compare, value; + + int param = 2; + + if(op.texelOffset[0] == 1) + { + // single operand for address - simple + dstAddress = srcOpers[param++]; + } + else if(op.texelOffset[0] == 2) + { + dstAddress = srcOpers[param++]; + dstAddress.value.u.y = srcOpers[param++].value.u.x; + dstAddress.value.u.z = srcOpers[param++].value.u.z; + } + else + { + RDCERR("Unexpected parameter compression value %d ", op.texelOffset[0]); + break; + } + + if(atomicOp == ATOMIC_OP_CAS) + { + if(op.texelOffset[1] == 1) + { + compare = srcOpers[param++]; + } + else if(op.texelOffset[1] == 2) + { + compare = srcOpers[param++]; + compare.value.u.y = srcOpers[param++].value.u.x; + compare.value.u.z = srcOpers[param++].value.u.z; + } + else + { + RDCERR("Unexpected parameter compression value %d ", op.texelOffset[1]); + break; + } + } + + if(op.texelOffset[2] == 1) + { + value = srcOpers[param++]; + } + else if(op.texelOffset[2] == 2) + { + value = srcOpers[param++]; + value.value.u.y = srcOpers[param++].value.u.x; + value.value.u.z = srcOpers[param++].value.u.z; + } + else + { + RDCERR("Unexpected parameter compression value %d ", op.texelOffset[2]); + break; + } + + BindingSlot slot = GetBindingSlotForIdentifier(*program, TYPE_UNORDERED_ACCESS_VIEW, resIndex); + GlobalState::UAVIterator uav = global.uavs.find(slot); + if(uav == global.uavs.end()) + { + if(!apiWrapper->FetchUAV(slot)) + { + RDCERR("Invalid UAV reg=%u, space=%u", slot.shaderRegister, slot.registerSpace); + return; + } + uav = global.uavs.find(slot); + } + + MarkResourceAccess(state, TYPE_UNORDERED_ACCESS_VIEW, slot); + + const uint32_t stride = sizeof(uint64_t); + byte *data = &uav->second.data[0]; + + RDCASSERT(data); + + if(data) + { + if(uav->second.tex) + { + data += dstAddress.value.u.x * stride; + data += dstAddress.value.u.y * uav->second.rowPitch; + data += dstAddress.value.u.z * uav->second.depthPitch; + } + else + { + data += uav->second.firstElement * stride + dstAddress.value.u.x; + } + } + + if(data && data < uav->second.data.end() && !Finished()) + { + ShaderVariable result(rdcstr(), 0U, 0U, 0U, 0U); + + uint64_t *data64 = (uint64_t *)data; + + result.value.u.x = uint32_t(*data64); + SetDst(state, op.operands[0], op, result); + result.value.u.x = uint32_t((*data64) >> 32U); + SetDst(state, op.operands[1], op, result); + + uint64_t compare64 = compare.value.u64v[0]; + uint64_t value64 = value.value.u64v[0]; + + switch(atomicOp) + { + case ATOMIC_OP_NONE: break; + case ATOMIC_OP_AND: *data64 = *data64 & value64; break; + case ATOMIC_OP_OR: *data64 = *data64 | value64; break; + case ATOMIC_OP_XOR: *data64 = *data64 ^ value64; break; + case ATOMIC_OP_ADD: *data64 = *data64 + value64; break; + case ATOMIC_OP_MAX: *data64 = RDCMAX(*data64, value64); break; + case ATOMIC_OP_MIN: *data64 = RDCMIN(*data64, value64); break; + case ATOMIC_OP_SWAP: *data64 = value64; break; + case ATOMIC_OP_CAS: + if(*data64 == compare64) + *data64 = value64; + break; + } + } + break; + } + + ////////////////////////////////////////////////////////////////////////// + // + ////////////////////////////////////////////////////////////////////////// default: { RDCERR("Unsupported operation %d in assembly debugging", op.operation); diff --git a/renderdoc/driver/shaders/dxbc/dxbc_disassemble.cpp b/renderdoc/driver/shaders/dxbc/dxbc_disassemble.cpp index 27c9106eb..75dc4b530 100644 --- a/renderdoc/driver/shaders/dxbc/dxbc_disassemble.cpp +++ b/renderdoc/driver/shaders/dxbc/dxbc_disassemble.cpp @@ -1227,15 +1227,24 @@ void Program::PostprocessVendorExtensions() op.operands.back().setComps(srcParam[0].comps[0], srcParam[1].comps[0], srcParam[2].comps[0], 0xff); op.operands.back().name = "address"; + + // store in texelOffset whether the parameter is combined (1) or split (2) + op.texelOffset[0] = 1; } else { op.operands.push_back(srcParam[0]); op.operands.back().name = "address.x"; + op.operands.back().setComps(srcParam[0].comps[0], 0xff, 0xff, 0xff); op.operands.push_back(srcParam[1]); op.operands.back().name = "address.y"; + op.operands.back().setComps(srcParam[1].comps[0], 0xff, 0xff, 0xff); op.operands.push_back(srcParam[2]); op.operands.back().name = "address.z"; + op.operands.back().setComps(srcParam[2].comps[0], 0xff, 0xff, 0xff); + + // store in texelOffset whether the parameter is combined (1) or split (2) + op.texelOffset[0] = 2; } // for CAS, the compare value next @@ -1247,13 +1256,21 @@ void Program::PostprocessVendorExtensions() op.operands.back().setComps(srcParam[5].comps[0], srcParam[6].comps[0], 0xff, 0xff); op.operands.back().values[1] = srcParam[6].values[0]; op.operands.back().name = "compare_value"; + + // store in texelOffset whether the parameter is combined (1) or split (2) + op.texelOffset[1] = 1; } else { op.operands.push_back(srcParam[5].swizzle(0)); op.operands.back().name = "compare_value.x"; + op.operands.back().setComps(srcParam[5].comps[0], 0xff, 0xff, 0xff); op.operands.push_back(srcParam[6].swizzle(0)); op.operands.back().name = "compare_value.y"; + op.operands.back().setComps(srcParam[6].comps[0], 0xff, 0xff, 0xff); + + // store in texelOffset whether the parameter is combined (1) or split (2) + op.texelOffset[1] = 2; } } @@ -1264,13 +1281,21 @@ void Program::PostprocessVendorExtensions() op.operands.back().setComps(srcParam[3].comps[0], srcParam[4].comps[0], 0xff, 0xff); op.operands.back().values[1] = srcParam[4].values[0]; op.operands.back().name = "value"; + + // store in texelOffset whether the parameter is combined (1) or split (2) + op.texelOffset[2] = 1; } else { op.operands.push_back(srcParam[3].swizzle(0)); op.operands.back().name = "value.x"; + op.operands.back().setComps(srcParam[3].comps[0], 0xff, 0xff, 0xff); op.operands.push_back(srcParam[4].swizzle(0)); op.operands.back().name = "value.y"; + op.operands.back().setComps(srcParam[4].comps[0], 0xff, 0xff, 0xff); + + // store in texelOffset whether the parameter is combined (1) or split (2) + op.texelOffset[2] = 2; } } @@ -1907,12 +1932,26 @@ void Program::PostprocessVendorExtensions() break; } + // insert second dummy return value for high bits + op.operands.insert(0, curOp.operands[1]); + + // make both of them NULL + op.operands[0].type = TYPE_NULL; + op.operands[0].setComps(0xff, 0xff, 0xff, 0xff); + op.operands[1].type = TYPE_NULL; + op.operands[1].setComps(0xff, 0xff, 0xff, 0xff); + atomicop = (NvShaderAtomic)srcParam[2].values[0]; op.operands.push_back(srcParam[0]); op.operands.back().numComponents = NUMCOMPS_1; - op.operands.back().setComps(srcParam[0].comps[0], 0xff, 0xff, 0xff); - op.operands.back().name = "byteAddress"; + op.operands.back().name = "address"; + + // store in texelOffset whether the parameter is combined (1) or split (2). + // on nv we assume the parameters are always combined + op.texelOffset[0] = 1; + op.texelOffset[1] = 1; + op.texelOffset[2] = 1; if(atomicop == NvShaderAtomic::CompareAndSwap) { @@ -2147,6 +2186,14 @@ void Program::PostprocessVendorExtensions() op.operands[0] = curOp.operands[0]; op.str = ToStr(op.operation); + // if this is an atomic64, the low/high bits are separate operands + if(op.operation == OPCODE_NV_U64_ATOMIC) + { + op.operands[1] = curOp.operands[0]; + op.operands[0].setComps(curOp.operands[0].comps[0], 0xff, 0xff, 0xff); + op.operands[1].setComps(curOp.operands[0].comps[1], 0xff, 0xff, 0xff); + } + switch((VendorAtomicOp)op.preciseValues) { case ATOMIC_OP_NONE: break;