diff --git a/renderdoc/os/posix/android/android_process.cpp b/renderdoc/os/posix/android/android_process.cpp index 1d7e2f820..a85ccf63f 100644 --- a/renderdoc/os/posix/android/android_process.cpp +++ b/renderdoc/os/posix/android/android_process.cpp @@ -98,6 +98,19 @@ int GetIdentPort(pid_t childPid) return ret; } +void StopAtMainInChild() +{ +} + +bool StopChildAtMain(pid_t childPid) +{ + return false; +} + +void ResumeProcess(pid_t childPid) +{ +} + // because OSUtility::DebuggerPresent is called often we want it to be // cheap. Opening and parsing a file would cause high overhead on each // call, so instead we just cache it at startup. This fails in the case diff --git a/renderdoc/os/posix/apple/apple_process.cpp b/renderdoc/os/posix/apple/apple_process.cpp index 4f59d05cc..792821362 100644 --- a/renderdoc/os/posix/apple/apple_process.cpp +++ b/renderdoc/os/posix/apple/apple_process.cpp @@ -148,6 +148,19 @@ int GetIdentPort(pid_t childPid) return 0; } +void StopAtMainInChild() +{ +} + +bool StopChildAtMain(pid_t childPid) +{ + return false; +} + +void ResumeProcess(pid_t childPid) +{ +} + void CacheDebuggerPresent() { } diff --git a/renderdoc/os/posix/ggp/ggp_process.cpp b/renderdoc/os/posix/ggp/ggp_process.cpp index 5841cdb5c..ee78b1b1c 100644 --- a/renderdoc/os/posix/ggp/ggp_process.cpp +++ b/renderdoc/os/posix/ggp/ggp_process.cpp @@ -136,6 +136,19 @@ int GetIdentPort(pid_t childPid) return ret; } +void StopAtMainInChild() +{ +} + +bool StopChildAtMain(pid_t childPid) +{ + return false; +} + +void ResumeProcess(pid_t childPid) +{ +} + // because OSUtility::DebuggerPresent is called often we want it to be // cheap. Opening and parsing a file would cause high overhead on each // call, so instead we just cache it at startup. This fails in the case diff --git a/renderdoc/os/posix/linux/linux_hook.cpp b/renderdoc/os/posix/linux/linux_hook.cpp index 45cd757d1..8d44041ad 100644 --- a/renderdoc/os/posix/linux/linux_hook.cpp +++ b/renderdoc/os/posix/linux/linux_hook.cpp @@ -83,6 +83,10 @@ __attribute__((visibility("default"))) void *dlopen(const char *filename, int fl int GetIdentPort(pid_t childPid); +void StopAtMainInChild(); +bool StopChildAtMain(pid_t childPid); +void ResumeProcess(pid_t childPid); + __attribute__((visibility("default"))) pid_t fork() { if(!realfork) @@ -96,25 +100,59 @@ __attribute__((visibility("default"))) pid_t fork() pid_t ret = realfork(); - if(ret > 0) + if(ret == 0) { - // in parent process, kick off a thread to get the ident - Threading::ThreadHandle handle = Threading::CreateThread([ret]() { - // don't accept a return value of our own ident, that means we've checked too early and exec - // hasn't run yet - const uint32_t ownIdent = RenderDoc::Inst().GetTargetControlIdent(); - uint32_t ident = ownIdent; - for(uint32_t i = 0; i < 10 && ident == ownIdent; i++) - { - ident = (uint32_t)GetIdentPort(ret); - if(ident == ownIdent) - usleep(1000); - } + StopAtMainInChild(); + } + else if(ret > 0) + { + bool stopped = StopChildAtMain(ret); - RenderDoc::Inst().AddChildProcess((uint32_t)ret, (uint32_t)ident); - RenderDoc::Inst().CompleteChildThread((uint32_t)ret); - }); - RenderDoc::Inst().AddChildThread((uint32_t)ret, handle); + if(stopped) + { + int ident = GetIdentPort(ret); + + ResumeProcess(ret); + + if(ident) + { + RDCLOG("Identified child process %u with ident %u", ret, ident); + RenderDoc::Inst().AddChildProcess((uint32_t)ret, (uint32_t)ident); + } + else + { + RDCERR("Couldn't get ident for PID %u after stopping at main", ret); + } + } + else + { + // resume the process just in case something went wrong. This should be harmless if we're not + // actually tracing + ResumeProcess(ret); + + // ptrace_scope isn't amenable, or we hit an error. We'll have to spin up a thread to check + // the ident on the child process and add it as soon as it's available + Threading::ThreadHandle handle = Threading::CreateThread([ret]() { + RDCLOG("Starting thread to get ident for PID %u", ret); + + // don't accept a return value of our own ident, that means we've checked too early and exec + // hasn't run yet + const uint32_t ownIdent = RenderDoc::Inst().GetTargetControlIdent(); + uint32_t ident = ownIdent; + for(uint32_t i = 0; i < 10 && ident == ownIdent; i++) + { + ident = (uint32_t)GetIdentPort(ret); + if(ident == ownIdent) + usleep(1000); + } + + RDCLOG("PID %u has ident %u", ret, ident); + + RenderDoc::Inst().AddChildProcess((uint32_t)ret, (uint32_t)ident); + RenderDoc::Inst().CompleteChildThread((uint32_t)ret); + }); + RenderDoc::Inst().AddChildThread((uint32_t)ret, handle); + } } return ret; diff --git a/renderdoc/os/posix/linux/linux_process.cpp b/renderdoc/os/posix/linux/linux_process.cpp index 5841cdb5c..68c3879de 100644 --- a/renderdoc/os/posix/linux/linux_process.cpp +++ b/renderdoc/os/posix/linux/linux_process.cpp @@ -22,13 +22,25 @@ * THE SOFTWARE. ******************************************************************************/ +#include +#include +#include +#include +#include +#include #include #include #include "api/replay/data_types.h" #include "common/common.h" #include "common/formatting.h" +#include "core/core.h" +#include "core/settings.h" #include "os/os_specific.h" +RDOC_CONFIG(bool, Linux_PtraceChildProcesses, true, + "Use ptrace(2) to trace child processes at startup to ensure connection is made as " + "early as possible."); + extern char **environ; // we wait 1ns, then 2ns, then 4ns, etc so our total is 0xfff etc @@ -136,6 +148,277 @@ int GetIdentPort(pid_t childPid) return ret; } +static bool ptrace_scope_ok() +{ + if(!Linux_PtraceChildProcesses()) + return false; + + rdcstr contents; + FileIO::ReadAll("/proc/sys/kernel/yama/ptrace_scope", contents); + contents.trim(); + if(!contents.empty()) + { + int ptrace_scope = atoi(contents.c_str()); + if(ptrace_scope > 1) + { + if(RenderDoc::Inst().IsReplayApp()) + { + static bool warned = false; + if(!warned) + { + warned = true; + RDCWARN( + "ptrace_scope value %d means ptrace can't be used to pause child processes while " + "attaching.", + ptrace_scope); + } + } + return false; + } + } + + return true; +} + +static uint64_t get_nanotime() +{ + timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + uint64_t ret = uint64_t(ts.tv_sec) * 1000000000ULL + uint32_t(ts.tv_nsec & 0xffffffff); + return ret; +} + +#if ENABLED(RDOC_X64) +#define INST_PTR_REG rip +#else +#define INST_PTR_REG eip +#endif + +static uint64_t get_child_ip(pid_t childPid) +{ + user_regs_struct regs = {}; + + long ptraceRet = ptrace(PTRACE_GETREGS, childPid, NULL, ®s); + if(ptraceRet == 0) + return uint64_t(regs.INST_PTR_REG); + + return 0; +} + +static bool wait_traced_child(pid_t childPid, uint32_t timeoutMS, int &status) +{ + // spin waiting for the traced child, with a 100ms timeout + status = 0; + uint64_t start_nano = get_nanotime(); + uint64_t end_nano = 0; + int ret = 0; + + const uint64_t timeoutNanoseconds = uint64_t(timeoutMS) * 1000 * 1000; + + while((ret = waitpid(childPid, &status, WNOHANG)) == 0) + { + status = 0; + + // if we're in a capturing process then the process itself might have done waitpid(-1) and + // swallowed the wait for our child. So as an alternative we check to see if we can query the + // instruction pointer, which is only possible if the child is stopped. + uint64_t ip = get_child_ip(childPid); + if(ip != 0) + { + // do waitpid again in case we raced and the child stopped in between the call to waitpid and + // get_child_ip. + ret = waitpid(childPid, &status, WNOHANG); + + // if it still didn't succeed, set status to 0 so we know we're earlying out and don't check + // the status codes. + if(ret == 0) + status = 0; + return true; + } + + usleep(10); + + // check the timeout + end_nano = get_nanotime(); + if(end_nano - start_nano > timeoutNanoseconds) + break; + } + + return WIFSTOPPED(status); +} + +bool StopChildAtMain(pid_t childPid) +{ + // don't do this unless the ptrace scope is OK. + if(!ptrace_scope_ok()) + return false; + + int childStatus = 0; + + // we have a low timeout for this stop since it should happen almost immediately (right after the + // fork). If it didn't then we want to fail relatively fast. + if(!wait_traced_child(childPid, 100, childStatus)) + { + RDCERR("Didn't get initial stop from child PID %u", childPid); + return false; + } + + if(childStatus > 0 && WSTOPSIG(childStatus) != SIGSTOP) + { + RDCERR("Initial signal from child PID %u was %x, expected %x", childPid, WSTOPSIG(childStatus), + SIGSTOP); + return false; + } + + long ptraceRet = 0; + + // continue until exec + ptraceRet = ptrace(PTRACE_SETOPTIONS, childPid, NULL, PTRACE_O_TRACEEXEC); + RDCASSERTEQUAL(ptraceRet, 0); + + // continue + ptraceRet = ptrace(PTRACE_CONT, childPid, NULL, NULL); + RDCASSERTEQUAL(ptraceRet, 0); + + // we're not under control of when the application calls exec() after fork() in the case of child + // processes, so be a little more generous with the timeout + if(!wait_traced_child(childPid, 250, childStatus)) + { + RDCERR("Didn't get to execve in child PID %u", childPid); + return false; + } + + if(childStatus > 0 && (childStatus >> 8) != (SIGTRAP | (PTRACE_EVENT_EXEC << 8))) + { + RDCERR("Exec wait event from child PID %u was status %x, expected %x", childPid, + (childStatus >> 8), (SIGTRAP | (PTRACE_EVENT_EXEC << 8))); + return false; + } + + rdcstr exepath; + long basePointer = 0; + uint32_t sectionOffset = 0; + + rdcstr mapsName = StringFormat::Fmt("/proc/%u/maps", childPid); + + FILE *maps = FileIO::fopen(mapsName.c_str(), "r"); + + if(!maps) + { + RDCERR("Couldn't open %s", mapsName.c_str()); + return false; + } + + while(!feof(maps)) + { + char line[512] = {0}; + if(fgets(line, 511, maps)) + { + if(strstr(line, "r-xp")) + { + RDCCOMPILE_ASSERT(sizeof(long) == sizeof(void *), "Expected long to be pointer sized"); + int pathOffset = 0; + int num = sscanf(line, "%lx-%*x r-xp %x %*x:%*x %*u %n", &basePointer, §ionOffset, + &pathOffset); + + if(num != 2 || pathOffset == 0) + { + RDCERR("Couldn't parse first executable mapping '%s'", rdcstr(line).trimmed().c_str()); + return false; + } + + exepath = line + pathOffset; + exepath.trim(); + break; + } + } + } + + if(basePointer == 0) + { + RDCERR("Couldn't find executable mapping in maps file"); + return false; + } + + FileIO::fclose(maps); + + FILE *elf = FileIO::fopen(exepath.c_str(), "r"); + + if(!elf) + { + RDCERR("Couldn't open %s to parse ELF header", exepath.c_str()); + return false; + } + + Elf64_Ehdr elf_header; + size_t read = FileIO::fread(&elf_header, sizeof(elf_header), 1, elf); + FileIO::fclose(elf); + + if(read != 1) + { + RDCERR("Couldn't read ELF header from %s", exepath.c_str()); + return false; + } + + void *entry = (void *)(basePointer + elf_header.e_entry - sectionOffset); + + long origEntryWord = ptrace(PTRACE_PEEKTEXT, childPid, entry, 0); + + long breakpointWord = (origEntryWord & 0xffffff00) | 0xcc; + ptraceRet = ptrace(PTRACE_POKETEXT, childPid, entry, breakpointWord); + RDCASSERTEQUAL(ptraceRet, 0); + + // continue + ptraceRet = ptrace(PTRACE_CONT, childPid, NULL, NULL); + RDCASSERTEQUAL(ptraceRet, 0); + + // it could take a long time to hit main so we have a large timeout here + if(!wait_traced_child(childPid, 2000, childStatus)) + { + RDCERR("Didn't hit breakpoint in PID %u (%x)", childPid, childStatus); + return false; + } + + // we're now at main! now just need to clean up after ourselves + + user_regs_struct regs = {}; + + ptraceRet = ptrace(PTRACE_GETREGS, childPid, NULL, ®s); + RDCASSERTEQUAL(ptraceRet, 0); + + // step back past the byte we inserted the breakpoint on + regs.INST_PTR_REG--; + ptraceRet = ptrace(PTRACE_SETREGS, childPid, NULL, ®s); + RDCASSERTEQUAL(ptraceRet, 0); + + // restore the function + ptraceRet = ptrace(PTRACE_POKETEXT, childPid, entry, origEntryWord); + RDCASSERTEQUAL(ptraceRet, 0); + + // we'll resume after reading the ident port in the calling function + return true; +} + +void StopAtMainInChild() +{ + // don't do this unless the ptrace scope is OK. + if(!ptrace_scope_ok()) + return; + + // allow parent tracing, and immediately stop so the parent process can attach + ptrace(PTRACE_TRACEME, 0, 0, 0); + raise(SIGSTOP); +} + +void ResumeProcess(pid_t childPid) +{ + if(childPid != 0) + { + // try to detach and resume the process, ignoring any errors if we weren't tracing + ptrace(PTRACE_DETACH, childPid, NULL, NULL); + } +} + // because OSUtility::DebuggerPresent is called often we want it to be // cheap. Opening and parsing a file would cause high overhead on each // call, so instead we just cache it at startup. This fails in the case diff --git a/renderdoc/os/posix/posix_process.cpp b/renderdoc/os/posix/posix_process.cpp index a2a2eed43..8b2f2cef1 100644 --- a/renderdoc/os/posix/posix_process.cpp +++ b/renderdoc/os/posix/posix_process.cpp @@ -44,6 +44,12 @@ char **GetCurrentEnvironment(); int GetIdentPort(pid_t childPid); +// functions to try and let the child run just far enough to get to main() but no further. This lets +// us check the ident port and resume. +void StopAtMainInChild(); +bool StopChildAtMain(pid_t childPid); +void ResumeProcess(pid_t childPid); + #if ENABLED(RDOC_APPLE) #define PRELOAD_ENV_VAR "DYLD_INSERT_LIBRARIES" @@ -572,6 +578,8 @@ static pid_t RunProcess(const char *app, const char *workingDir, const char *cmd childPid = fork(); if(childPid == 0) { + StopAtMainInChild(); + FileIO::ReleaseFDAfterFork(); if(stdoutPipe) { @@ -593,22 +601,27 @@ static pid_t RunProcess(const char *app, const char *workingDir, const char *cmd fprintf(stderr, "exec failed\n"); _exit(1); } - else if(!stdoutPipe) + else { - // remember this PID so we can wait on it later - SCOPED_SPINLOCK(zombieLock); + if(!stdoutPipe) + { + // remember this PID so we can wait on it later + SCOPED_SPINLOCK(zombieLock); - PIDNode *node = NULL; + PIDNode *node = NULL; - // take a child from the free list if available, otherwise allocate a new one - if(freeChildren.head) - node = freeChildren.pop_front(); - else - node = new PIDNode(); + // take a child from the free list if available, otherwise allocate a new one + if(freeChildren.head) + node = freeChildren.pop_front(); + else + node = new PIDNode(); - node->pid = childPid; + node->pid = childPid; - children.append(node); + children.append(node); + } + + StopChildAtMain(childPid); } } @@ -650,8 +663,10 @@ uint32_t Process::LaunchProcess(const char *app, const char *workingDir, const c } char **currentEnvironment = GetCurrentEnvironment(); - uint32_t ret = (uint32_t)RunProcess(app, workingDir, cmdLine, currentEnvironment, - result ? stdoutPipe : NULL, result ? stderrPipe : NULL); + pid_t ret = RunProcess(app, workingDir, cmdLine, currentEnvironment, result ? stdoutPipe : NULL, + result ? stderrPipe : NULL); + + ResumeProcess(ret); if(result) { @@ -698,7 +713,7 @@ uint32_t Process::LaunchProcess(const char *app, const char *workingDir, const c close(stderrPipe[0]); } - return ret; + return (uint32_t)ret; } uint32_t Process::LaunchScript(const char *script, const char *workingDir, const char *argList, @@ -826,17 +841,20 @@ rdcpair Process::LaunchAndInjectIntoProcess( i++; } + RDCLOG("Running process %s for injection", app); + pid_t childPid = RunProcess(app, workingDir, cmdLine, envp); int ret = 0; if(childPid != (pid_t)0) { - // wait for child to have opened its socket - usleep(1000); - + // ideally we stopped at main so we can check the port immediately. Otherwise this will do an + // exponential wait to get it as soon as possible ret = GetIdentPort(childPid); + ResumeProcess(ret); + if(waitForExit) { int dummy = 0;