base/process/launch_posix.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "base/process/launch.h"
   6
   7 #include <dirent.h>
   8 #include <errno.h>
   9 #include <fcntl.h>
  10 #include <sched.h>
  11 #include <setjmp.h>
  12 #include <signal.h>
  13 #include <stdlib.h>
  14 #include <sys/resource.h>
  15 #include <sys/syscall.h>
  16 #include <sys/time.h>
  17 #include <sys/types.h>
  18 #include <sys/wait.h>
  19 #include <unistd.h>
  20
  21 #include <iterator>
  22 #include <limits>
  23 #include <set>
  24
  25 #include "base/command_line.h"
  26 #include "base/compiler_specific.h"
  27 #include "base/debug/debugger.h"
  28 #include "base/debug/stack_trace.h"
  29 #include "base/files/dir_reader_posix.h"
  30 #include "base/files/file_util.h"
  31 #include "base/files/scoped_file.h"
  32 #include "base/logging.h"
  33 #include "base/memory/scoped_ptr.h"
  34 #include "base/posix/eintr_wrapper.h"
  35 #include "base/process/process.h"
  36 #include "base/process/process_metrics.h"
  37 #include "base/strings/stringprintf.h"
  38 #include "base/synchronization/waitable_event.h"
  39 #include "base/third_party/dynamic_annotations/dynamic_annotations.h"
  40 #include "base/third_party/valgrind/valgrind.h"
  41 #include "base/threading/platform_thread.h"
  42 #include "base/threading/thread_restrictions.h"
  43 #include "build/build_config.h"
  44
  45 #if defined(OS_LINUX)
  46 #include <sys/prctl.h>
  47 #endif
  48
  49 #if defined(OS_CHROMEOS)
  50 #include <sys/ioctl.h>
  51 #endif
  52
  53 #if defined(OS_FREEBSD)
  54 #include <sys/event.h>
  55 #include <sys/ucontext.h>
  56 #endif
  57
  58 #if defined(OS_MACOSX)
  59 #include <crt_externs.h>
  60 #include <sys/event.h>
  61 #else
  62 extern char** environ;
  63 #endif
  64
  65 namespace base {
  66
  67 #if !defined(OS_NACL_NONSFI)
  68
  69 namespace {
  70
  71 // Get the process's "environment" (i.e. the thing that setenv/getenv
  72 // work with).
  73 char** GetEnvironment() {
  74 #if defined(OS_MACOSX)
  75   return *_NSGetEnviron();
  76 #else
  77   return environ;
  78 #endif
  79 }
  80
  81 // Set the process's "environment" (i.e. the thing that setenv/getenv
  82 // work with).
  83 void SetEnvironment(char** env) {
  84 #if defined(OS_MACOSX)
  85   *_NSGetEnviron() = env;
  86 #else
  87   environ = env;
  88 #endif
  89 }
  90
  91 // Set the calling thread's signal mask to new_sigmask and return
  92 // the previous signal mask.
  93 sigset_t SetSignalMask(const sigset_t& new_sigmask) {
  94   sigset_t old_sigmask;
  95 #if defined(OS_ANDROID)
  96   // POSIX says pthread_sigmask() must be used in multi-threaded processes,
  97   // but Android's pthread_sigmask() was broken until 4.1:
  98   // https://code.google.com/p/android/issues/detail?id=15337
  99   // http://stackoverflow.com/questions/13777109/pthread-sigmask-on-android-not-working
 100   RAW_CHECK(sigprocmask(SIG_SETMASK, &new_sigmask, &old_sigmask) == 0);
 101 #else
 102   RAW_CHECK(pthread_sigmask(SIG_SETMASK, &new_sigmask, &old_sigmask) == 0);
 103 #endif
 104   return old_sigmask;
 105 }
 106
 107 #if !defined(OS_LINUX) || \
 108     (!defined(__i386__) && !defined(__x86_64__) && !defined(__arm__))
 109 void ResetChildSignalHandlersToDefaults() {
 110   // The previous signal handlers are likely to be meaningless in the child's
 111   // context so we reset them to the defaults for now. http://crbug.com/44953
 112   // These signal handlers are set up at least in browser_main_posix.cc:
 113   // BrowserMainPartsPosix::PreEarlyInitialization and stack_trace_posix.cc:
 114   // EnableInProcessStackDumping.
 115   signal(SIGHUP, SIG_DFL);
 116   signal(SIGINT, SIG_DFL);
 117   signal(SIGILL, SIG_DFL);
 118   signal(SIGABRT, SIG_DFL);
 119   signal(SIGFPE, SIG_DFL);
 120   signal(SIGBUS, SIG_DFL);
 121   signal(SIGSEGV, SIG_DFL);
 122   signal(SIGSYS, SIG_DFL);
 123   signal(SIGTERM, SIG_DFL);
 124 }
 125
 126 #else
 127
 128 // TODO(jln): remove the Linux special case once kernels are fixed.
 129
 130 // Internally the kernel makes sigset_t an array of long large enough to have
 131 // one bit per signal.
 132 typedef uint64_t kernel_sigset_t;
 133
 134 // This is what struct sigaction looks like to the kernel at least on X86 and
 135 // ARM. MIPS, for instance, is very different.
 136 struct kernel_sigaction {
 137   void* k_sa_handler;  // For this usage it only needs to be a generic pointer.
 138   unsigned long k_sa_flags;
 139   void* k_sa_restorer;  // For this usage it only needs to be a generic pointer.
 140   kernel_sigset_t k_sa_mask;
 141 };
 142
 143 // glibc's sigaction() will prevent access to sa_restorer, so we need to roll
 144 // our own.
 145 int sys_rt_sigaction(int sig, const struct kernel_sigaction* act,
 146                      struct kernel_sigaction* oact) {
 147   return syscall(SYS_rt_sigaction, sig, act, oact, sizeof(kernel_sigset_t));
 148 }
 149
 150 // This function is intended to be used in between fork() and execve() and will
 151 // reset all signal handlers to the default.
 152 // The motivation for going through all of them is that sa_restorer can leak
 153 // from parents and help defeat ASLR on buggy kernels.  We reset it to NULL.
 154 // See crbug.com/177956.
 155 void ResetChildSignalHandlersToDefaults(void) {
 156   for (int signum = 1; ; ++signum) {
 157     struct kernel_sigaction act = {0};
 158     int sigaction_get_ret = sys_rt_sigaction(signum, NULL, &act);
 159     if (sigaction_get_ret && errno == EINVAL) {
 160 #if !defined(NDEBUG)
 161       // Linux supports 32 real-time signals from 33 to 64.
 162       // If the number of signals in the Linux kernel changes, someone should
 163       // look at this code.
 164       const int kNumberOfSignals = 64;
 165       RAW_CHECK(signum == kNumberOfSignals + 1);
 166 #endif  // !defined(NDEBUG)
 167       break;
 168     }
 169     // All other failures are fatal.
 170     if (sigaction_get_ret) {
 171       RAW_LOG(FATAL, "sigaction (get) failed.");
 172     }
 173
 174     // The kernel won't allow to re-set SIGKILL or SIGSTOP.
 175     if (signum != SIGSTOP && signum != SIGKILL) {
 176       act.k_sa_handler = reinterpret_cast<void*>(SIG_DFL);
 177       act.k_sa_restorer = NULL;
 178       if (sys_rt_sigaction(signum, &act, NULL)) {
 179         RAW_LOG(FATAL, "sigaction (set) failed.");
 180       }
 181     }
 182 #if !defined(NDEBUG)
 183     // Now ask the kernel again and check that no restorer will leak.
 184     if (sys_rt_sigaction(signum, NULL, &act) || act.k_sa_restorer) {
 185       RAW_LOG(FATAL, "Cound not fix sa_restorer.");
 186     }
 187 #endif  // !defined(NDEBUG)
 188   }
 189 }
 190 #endif  // !defined(OS_LINUX) ||
 191         // (!defined(__i386__) && !defined(__x86_64__) && !defined(__arm__))
 192 }  // anonymous namespace
 193
 194 // Functor for |ScopedDIR| (below).
 195 struct ScopedDIRClose {
 196   inline void operator()(DIR* x) const {
 197     if (x)
 198       closedir(x);
 199   }
 200 };
 201
 202 // Automatically closes |DIR*|s.
 203 typedef scoped_ptr<DIR, ScopedDIRClose> ScopedDIR;
 204
 205 #if defined(OS_LINUX)
 206 static const char kFDDir[] = "/proc/self/fd";
 207 #elif defined(OS_MACOSX)
 208 static const char kFDDir[] = "/dev/fd";
 209 #elif defined(OS_SOLARIS)
 210 static const char kFDDir[] = "/dev/fd";
 211 #elif defined(OS_FREEBSD)
 212 static const char kFDDir[] = "/dev/fd";
 213 #elif defined(OS_OPENBSD)
 214 static const char kFDDir[] = "/dev/fd";
 215 #elif defined(OS_ANDROID)
 216 static const char kFDDir[] = "/proc/self/fd";
 217 #endif
 218
 219 void CloseSuperfluousFds(const base::InjectiveMultimap& saved_mapping) {
 220   // DANGER: no calls to malloc or locks are allowed from now on:
 221   // http://crbug.com/36678
 222
 223   // Get the maximum number of FDs possible.
 224   size_t max_fds = GetMaxFds();
 225
 226   DirReaderPosix fd_dir(kFDDir);
 227   if (!fd_dir.IsValid()) {
 228     // Fallback case: Try every possible fd.
 229     for (size_t i = 0; i < max_fds; ++i) {
 230       const int fd = static_cast<int>(i);
 231       if (fd == STDIN_FILENO || fd == STDOUT_FILENO || fd == STDERR_FILENO)
 232         continue;
 233       // Cannot use STL iterators here, since debug iterators use locks.
 234       size_t j;
 235       for (j = 0; j < saved_mapping.size(); j++) {
 236         if (fd == saved_mapping[j].dest)
 237           break;
 238       }
 239       if (j < saved_mapping.size())
 240         continue;
 241
 242       // Since we're just trying to close anything we can find,
 243       // ignore any error return values of close().
 244       close(fd);
 245     }
 246     return;
 247   }
 248
 249   const int dir_fd = fd_dir.fd();
 250
 251   for ( ; fd_dir.Next(); ) {
 252     // Skip . and .. entries.
 253     if (fd_dir.name()[0] == '.')
 254       continue;
 255
 256     char *endptr;
 257     errno = 0;
 258     const long int fd = strtol(fd_dir.name(), &endptr, 10);
 259     if (fd_dir.name()[0] == 0 || *endptr || fd < 0 || errno)
 260       continue;
 261     if (fd == STDIN_FILENO || fd == STDOUT_FILENO || fd == STDERR_FILENO)
 262       continue;
 263     // Cannot use STL iterators here, since debug iterators use locks.
 264     size_t i;
 265     for (i = 0; i < saved_mapping.size(); i++) {
 266       if (fd == saved_mapping[i].dest)
 267         break;
 268     }
 269     if (i < saved_mapping.size())
 270       continue;
 271     if (fd == dir_fd)
 272       continue;
 273
 274     // When running under Valgrind, Valgrind opens several FDs for its
 275     // own use and will complain if we try to close them.  All of
 276     // these FDs are >= |max_fds|, so we can check against that here
 277     // before closing.  See https://bugs.kde.org/show_bug.cgi?id=191758
 278     if (fd < static_cast<int>(max_fds)) {
 279       int ret = IGNORE_EINTR(close(fd));
 280       DPCHECK(ret == 0);
 281     }
 282   }
 283 }
 284
 285 Process LaunchProcess(const CommandLine& cmdline,
 286                       const LaunchOptions& options) {
 287   return LaunchProcess(cmdline.argv(), options);
 288 }
 289
 290 Process LaunchProcess(const std::vector<std::string>& argv,
 291                       const LaunchOptions& options) {
 292   size_t fd_shuffle_size = 0;
 293   if (options.fds_to_remap) {
 294     fd_shuffle_size = options.fds_to_remap->size();
 295   }
 296
 297   InjectiveMultimap fd_shuffle1;
 298   InjectiveMultimap fd_shuffle2;
 299   fd_shuffle1.reserve(fd_shuffle_size);
 300   fd_shuffle2.reserve(fd_shuffle_size);
 301
 302   scoped_ptr<char* []> argv_cstr(new char* [argv.size() + 1]);
 303   for (size_t i = 0; i < argv.size(); i++) {
 304     argv_cstr[i] = const_cast<char*>(argv[i].c_str());
 305   }
 306   argv_cstr[argv.size()] = NULL;
 307
 308   scoped_ptr<char*[]> new_environ;
 309   char* const empty_environ = NULL;
 310   char* const* old_environ = GetEnvironment();
 311   if (options.clear_environ)
 312     old_environ = &empty_environ;
 313   if (!options.environ.empty())
 314     new_environ = AlterEnvironment(old_environ, options.environ);
 315
 316   sigset_t full_sigset;
 317   sigfillset(&full_sigset);
 318   const sigset_t orig_sigmask = SetSignalMask(full_sigset);
 319
 320   const char* current_directory = nullptr;
 321   if (!options.current_directory.empty()) {
 322     current_directory = options.current_directory.value().c_str();
 323   }
 324
 325   pid_t pid;
 326 #if defined(OS_LINUX)
 327   if (options.clone_flags) {
 328     // Signal handling in this function assumes the creation of a new
 329     // process, so we check that a thread is not being created by mistake
 330     // and that signal handling follows the process-creation rules.
 331     RAW_CHECK(
 332         !(options.clone_flags & (CLONE_SIGHAND | CLONE_THREAD | CLONE_VM)));
 333
 334     // We specify a null ptid and ctid.
 335     RAW_CHECK(
 336         !(options.clone_flags &
 337           (CLONE_CHILD_CLEARTID | CLONE_CHILD_SETTID | CLONE_PARENT_SETTID)));
 338
 339     // Since we use waitpid, we do not support custom termination signals in the
 340     // clone flags.
 341     RAW_CHECK((options.clone_flags & 0xff) == 0);
 342
 343     pid = ForkWithFlags(options.clone_flags | SIGCHLD, nullptr, nullptr);
 344   } else
 345 #endif
 346   {
 347     pid = fork();
 348   }
 349
 350   // Always restore the original signal mask in the parent.
 351   if (pid != 0) {
 352     SetSignalMask(orig_sigmask);
 353   }
 354
 355   if (pid < 0) {
 356     DPLOG(ERROR) << "fork";
 357     return Process();
 358   } else if (pid == 0) {
 359     // Child process
 360
 361     // DANGER: no calls to malloc or locks are allowed from now on:
 362     // http://crbug.com/36678
 363
 364     // DANGER: fork() rule: in the child, if you don't end up doing exec*(),
 365     // you call _exit() instead of exit(). This is because _exit() does not
 366     // call any previously-registered (in the parent) exit handlers, which
 367     // might do things like block waiting for threads that don't even exist
 368     // in the child.
 369
 370     // If a child process uses the readline library, the process block forever.
 371     // In BSD like OSes including OS X it is safe to assign /dev/null as stdin.
 372     // See http://crbug.com/56596.
 373     base::ScopedFD null_fd(HANDLE_EINTR(open("/dev/null", O_RDONLY)));
 374     if (!null_fd.is_valid()) {
 375       RAW_LOG(ERROR, "Failed to open /dev/null");
 376       _exit(127);
 377     }
 378
 379     int new_fd = HANDLE_EINTR(dup2(null_fd.get(), STDIN_FILENO));
 380     if (new_fd != STDIN_FILENO) {
 381       RAW_LOG(ERROR, "Failed to dup /dev/null for stdin");
 382       _exit(127);
 383     }
 384
 385     if (options.new_process_group) {
 386       // Instead of inheriting the process group ID of the parent, the child
 387       // starts off a new process group with pgid equal to its process ID.
 388       if (setpgid(0, 0) < 0) {
 389         RAW_LOG(ERROR, "setpgid failed");
 390         _exit(127);
 391       }
 392     }
 393
 394     if (options.maximize_rlimits) {
 395       // Some resource limits need to be maximal in this child.
 396       for (size_t i = 0; i < options.maximize_rlimits->size(); ++i) {
 397         const int resource = (*options.maximize_rlimits)[i];
 398         struct rlimit limit;
 399         if (getrlimit(resource, &limit) < 0) {
 400           RAW_LOG(WARNING, "getrlimit failed");
 401         } else if (limit.rlim_cur < limit.rlim_max) {
 402           limit.rlim_cur = limit.rlim_max;
 403           if (setrlimit(resource, &limit) < 0) {
 404             RAW_LOG(WARNING, "setrlimit failed");
 405           }
 406         }
 407       }
 408     }
 409
 410 #if defined(OS_MACOSX)
 411     RestoreDefaultExceptionHandler();
 412     if (!options.replacement_bootstrap_name.empty())
 413       ReplaceBootstrapPort(options.replacement_bootstrap_name);
 414 #endif  // defined(OS_MACOSX)
 415
 416     ResetChildSignalHandlersToDefaults();
 417     SetSignalMask(orig_sigmask);
 418
 419 #if 0
 420     // When debugging it can be helpful to check that we really aren't making
 421     // any hidden calls to malloc.
 422     void *malloc_thunk =
 423         reinterpret_cast<void*>(reinterpret_cast<intptr_t>(malloc) & ~4095);
 424     mprotect(malloc_thunk, 4096, PROT_READ | PROT_WRITE | PROT_EXEC);
 425     memset(reinterpret_cast<void*>(malloc), 0xff, 8);
 426 #endif  // 0
 427
 428 #if defined(OS_CHROMEOS)
 429     if (options.ctrl_terminal_fd >= 0) {
 430       // Set process' controlling terminal.
 431       if (HANDLE_EINTR(setsid()) != -1) {
 432         if (HANDLE_EINTR(
 433                 ioctl(options.ctrl_terminal_fd, TIOCSCTTY, NULL)) == -1) {
 434           RAW_LOG(WARNING, "ioctl(TIOCSCTTY), ctrl terminal not set");
 435         }
 436       } else {
 437         RAW_LOG(WARNING, "setsid failed, ctrl terminal not set");
 438       }
 439     }
 440 #endif  // defined(OS_CHROMEOS)
 441
 442     if (options.fds_to_remap) {
 443       // Cannot use STL iterators here, since debug iterators use locks.
 444       for (size_t i = 0; i < options.fds_to_remap->size(); ++i) {
 445         const FileHandleMappingVector::value_type& value =
 446             (*options.fds_to_remap)[i];
 447         fd_shuffle1.push_back(InjectionArc(value.first, value.second, false));
 448         fd_shuffle2.push_back(InjectionArc(value.first, value.second, false));
 449       }
 450     }
 451
 452     if (!options.environ.empty() || options.clear_environ)
 453       SetEnvironment(new_environ.get());
 454
 455     // fd_shuffle1 is mutated by this call because it cannot malloc.
 456     if (!ShuffleFileDescriptors(&fd_shuffle1))
 457       _exit(127);
 458
 459     CloseSuperfluousFds(fd_shuffle2);
 460
 461     // Set NO_NEW_PRIVS by default. Since NO_NEW_PRIVS only exists in kernel
 462     // 3.5+, do not check the return value of prctl here.
 463 #if defined(OS_LINUX)
 464 #ifndef PR_SET_NO_NEW_PRIVS
 465 #define PR_SET_NO_NEW_PRIVS 38
 466 #endif
 467     if (!options.allow_new_privs) {
 468       if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) && errno != EINVAL) {
 469         // Only log if the error is not EINVAL (i.e. not supported).
 470         RAW_LOG(FATAL, "prctl(PR_SET_NO_NEW_PRIVS) failed");
 471       }
 472     }
 473
 474     if (options.kill_on_parent_death) {
 475       if (prctl(PR_SET_PDEATHSIG, SIGKILL) != 0) {
 476         RAW_LOG(ERROR, "prctl(PR_SET_PDEATHSIG) failed");
 477         _exit(127);
 478       }
 479     }
 480 #endif
 481
 482     if (current_directory != nullptr) {
 483       RAW_CHECK(chdir(current_directory) == 0);
 484     }
 485
 486     if (options.pre_exec_delegate != nullptr) {
 487       options.pre_exec_delegate->RunAsyncSafe();
 488     }
 489
 490     execvp(argv_cstr[0], argv_cstr.get());
 491
 492     RAW_LOG(ERROR, "LaunchProcess: failed to execvp:");
 493     RAW_LOG(ERROR, argv_cstr[0]);
 494     _exit(127);
 495   } else {
 496     // Parent process
 497     if (options.wait) {
 498       // While this isn't strictly disk IO, waiting for another process to
 499       // finish is the sort of thing ThreadRestrictions is trying to prevent.
 500       base::ThreadRestrictions::AssertIOAllowed();
 501       pid_t ret = HANDLE_EINTR(waitpid(pid, 0, 0));
 502       DPCHECK(ret > 0);
 503     }
 504   }
 505
 506   return Process(pid);
 507 }
 508
 509 void RaiseProcessToHighPriority() {
 510   // On POSIX, we don't actually do anything here.  We could try to nice() or
 511   // setpriority() or sched_getscheduler, but these all require extra rights.
 512 }
 513
 514 // Return value used by GetAppOutputInternal to encapsulate the various exit
 515 // scenarios from the function.
 516 enum GetAppOutputInternalResult {
 517   EXECUTE_FAILURE,
 518   EXECUTE_SUCCESS,
 519   GOT_MAX_OUTPUT,
 520 };
 521
 522 // Executes the application specified by |argv| and wait for it to exit. Stores
 523 // the output (stdout) in |output|. If |do_search_path| is set, it searches the
 524 // path for the application; in that case, |envp| must be null, and it will use
 525 // the current environment. If |do_search_path| is false, |argv[0]| should fully
 526 // specify the path of the application, and |envp| will be used as the
 527 // environment. If |include_stderr| is true, includes stderr otherwise redirects
 528 // it to /dev/null.
 529 // If we successfully start the application and get all requested output, we
 530 // return GOT_MAX_OUTPUT, or if there is a problem starting or exiting
 531 // the application we return RUN_FAILURE. Otherwise we return EXECUTE_SUCCESS.
 532 // The GOT_MAX_OUTPUT return value exists so a caller that asks for limited
 533 // output can treat this as a success, despite having an exit code of SIG_PIPE
 534 // due to us closing the output pipe.
 535 // In the case of EXECUTE_SUCCESS, the application exit code will be returned
 536 // in |*exit_code|, which should be checked to determine if the application
 537 // ran successfully.
 538 static GetAppOutputInternalResult GetAppOutputInternal(
 539     const std::vector<std::string>& argv,
 540     char* const envp[],
 541     bool include_stderr,
 542     std::string* output,
 543     size_t max_output,
 544     bool do_search_path,
 545     int* exit_code) {
 546   // Doing a blocking wait for another command to finish counts as IO.
 547   base::ThreadRestrictions::AssertIOAllowed();
 548   // exit_code must be supplied so calling function can determine success.
 549   DCHECK(exit_code);
 550   *exit_code = EXIT_FAILURE;
 551
 552   int pipe_fd[2];
 553   pid_t pid;
 554   InjectiveMultimap fd_shuffle1, fd_shuffle2;
 555   scoped_ptr<char*[]> argv_cstr(new char*[argv.size() + 1]);
 556
 557   fd_shuffle1.reserve(3);
 558   fd_shuffle2.reserve(3);
 559
 560   // Either |do_search_path| should be false or |envp| should be null, but not
 561   // both.
 562   DCHECK(!do_search_path ^ !envp);
 563
 564   if (pipe(pipe_fd) < 0)
 565     return EXECUTE_FAILURE;
 566
 567   switch (pid = fork()) {
 568     case -1:  // error
 569       close(pipe_fd[0]);
 570       close(pipe_fd[1]);
 571       return EXECUTE_FAILURE;
 572     case 0:  // child
 573       {
 574         // DANGER: no calls to malloc or locks are allowed from now on:
 575         // http://crbug.com/36678
 576
 577 #if defined(OS_MACOSX)
 578         RestoreDefaultExceptionHandler();
 579 #endif
 580
 581         // Obscure fork() rule: in the child, if you don't end up doing exec*(),
 582         // you call _exit() instead of exit(). This is because _exit() does not
 583         // call any previously-registered (in the parent) exit handlers, which
 584         // might do things like block waiting for threads that don't even exist
 585         // in the child.
 586         int dev_null = open("/dev/null", O_WRONLY);
 587         if (dev_null < 0)
 588           _exit(127);
 589
 590         fd_shuffle1.push_back(InjectionArc(pipe_fd[1], STDOUT_FILENO, true));
 591         fd_shuffle1.push_back(InjectionArc(
 592             include_stderr ? pipe_fd[1] : dev_null,
 593             STDERR_FILENO, true));
 594         fd_shuffle1.push_back(InjectionArc(dev_null, STDIN_FILENO, true));
 595         // Adding another element here? Remeber to increase the argument to
 596         // reserve(), above.
 597
 598         for (size_t i = 0; i < fd_shuffle1.size(); ++i)
 599           fd_shuffle2.push_back(fd_shuffle1[i]);
 600
 601         if (!ShuffleFileDescriptors(&fd_shuffle1))
 602           _exit(127);
 603
 604         CloseSuperfluousFds(fd_shuffle2);
 605
 606         for (size_t i = 0; i < argv.size(); i++)
 607           argv_cstr[i] = const_cast<char*>(argv[i].c_str());
 608         argv_cstr[argv.size()] = NULL;
 609         if (do_search_path)
 610           execvp(argv_cstr[0], argv_cstr.get());
 611         else
 612           execve(argv_cstr[0], argv_cstr.get(), envp);
 613         _exit(127);
 614       }
 615     default:  // parent
 616       {
 617         // Close our writing end of pipe now. Otherwise later read would not
 618         // be able to detect end of child's output (in theory we could still
 619         // write to the pipe).
 620         close(pipe_fd[1]);
 621
 622         output->clear();
 623         char buffer[256];
 624         size_t output_buf_left = max_output;
 625         ssize_t bytes_read = 1;  // A lie to properly handle |max_output == 0|
 626                                  // case in the logic below.
 627
 628         while (output_buf_left > 0) {
 629           bytes_read = HANDLE_EINTR(read(pipe_fd[0], buffer,
 630                                     std::min(output_buf_left, sizeof(buffer))));
 631           if (bytes_read <= 0)
 632             break;
 633           output->append(buffer, bytes_read);
 634           output_buf_left -= static_cast<size_t>(bytes_read);
 635         }
 636         close(pipe_fd[0]);
 637
 638         // Always wait for exit code (even if we know we'll declare
 639         // GOT_MAX_OUTPUT).
 640         Process process(pid);
 641         bool success = process.WaitForExit(exit_code);
 642
 643         // If we stopped because we read as much as we wanted, we return
 644         // GOT_MAX_OUTPUT (because the child may exit due to |SIGPIPE|).
 645         if (!output_buf_left && bytes_read > 0)
 646           return GOT_MAX_OUTPUT;
 647         else if (success)
 648           return EXECUTE_SUCCESS;
 649         return EXECUTE_FAILURE;
 650       }
 651   }
 652 }
 653
 654 bool GetAppOutput(const CommandLine& cl, std::string* output) {
 655   return GetAppOutput(cl.argv(), output);
 656 }
 657
 658 bool GetAppOutput(const std::vector<std::string>& argv, std::string* output) {
 659   // Run |execve()| with the current environment and store "unlimited" data.
 660   int exit_code;
 661   GetAppOutputInternalResult result = GetAppOutputInternal(
 662       argv, NULL, false, output, std::numeric_limits<std::size_t>::max(), true,
 663       &exit_code);
 664   return result == EXECUTE_SUCCESS && exit_code == EXIT_SUCCESS;
 665 }
 666
 667 bool GetAppOutputAndError(const CommandLine& cl, std::string* output) {
 668   // Run |execve()| with the current environment and store "unlimited" data.
 669   int exit_code;
 670   GetAppOutputInternalResult result = GetAppOutputInternal(
 671       cl.argv(), NULL, true, output, std::numeric_limits<std::size_t>::max(),
 672       true, &exit_code);
 673   return result == EXECUTE_SUCCESS && exit_code == EXIT_SUCCESS;
 674 }
 675
 676 // TODO(viettrungluu): Conceivably, we should have a timeout as well, so we
 677 // don't hang if what we're calling hangs.
 678 bool GetAppOutputRestricted(const CommandLine& cl,
 679                             std::string* output, size_t max_output) {
 680   // Run |execve()| with the empty environment.
 681   char* const empty_environ = NULL;
 682   int exit_code;
 683   GetAppOutputInternalResult result = GetAppOutputInternal(
 684       cl.argv(), &empty_environ, false, output, max_output, false, &exit_code);
 685   return result == GOT_MAX_OUTPUT || (result == EXECUTE_SUCCESS &&
 686                                       exit_code == EXIT_SUCCESS);
 687 }
 688
 689 bool GetAppOutputWithExitCode(const CommandLine& cl,
 690                               std::string* output,
 691                               int* exit_code) {
 692   // Run |execve()| with the current environment and store "unlimited" data.
 693   GetAppOutputInternalResult result = GetAppOutputInternal(
 694       cl.argv(), NULL, false, output, std::numeric_limits<std::size_t>::max(),
 695       true, exit_code);
 696   return result == EXECUTE_SUCCESS;
 697 }
 698
 699 #endif  // !defined(OS_NACL_NONSFI)
 700
 701 #if defined(OS_LINUX) || defined(OS_NACL_NONSFI)
 702 namespace {
 703
 704 bool IsRunningOnValgrind() {
 705   return RUNNING_ON_VALGRIND;
 706 }
 707
 708 // This function runs on the stack specified on the clone call. It uses longjmp
 709 // to switch back to the original stack so the child can return from sys_clone.
 710 int CloneHelper(void* arg) {
 711   jmp_buf* env_ptr = reinterpret_cast<jmp_buf*>(arg);
 712   longjmp(*env_ptr, 1);
 713
 714   // Should not be reached.
 715   RAW_CHECK(false);
 716   return 1;
 717 }
 718
 719 // This function is noinline to ensure that stack_buf is below the stack pointer
 720 // that is saved when setjmp is called below. This is needed because when
 721 // compiled with FORTIFY_SOURCE, glibc's longjmp checks that the stack is moved
 722 // upwards. See crbug.com/442912 for more details.
 723 #if defined(ADDRESS_SANITIZER)
 724 // Disable AddressSanitizer instrumentation for this function to make sure
 725 // |stack_buf| is allocated on thread stack instead of ASan's fake stack.
 726 // Under ASan longjmp() will attempt to clean up the area between the old and
 727 // new stack pointers and print a warning that may confuse the user.
 728 __attribute__((no_sanitize_address))
 729 #endif
 730 NOINLINE pid_t CloneAndLongjmpInChild(unsigned long flags,
 731                                       pid_t* ptid,
 732                                       pid_t* ctid,
 733                                       jmp_buf* env) {
 734   // We use the libc clone wrapper instead of making the syscall
 735   // directly because making the syscall may fail to update the libc's
 736   // internal pid cache. The libc interface unfortunately requires
 737   // specifying a new stack, so we use setjmp/longjmp to emulate
 738   // fork-like behavior.
 739   char stack_buf[PTHREAD_STACK_MIN];
 740 #if defined(ARCH_CPU_X86_FAMILY) || defined(ARCH_CPU_ARM_FAMILY) || \
 741     defined(ARCH_CPU_MIPS64_FAMILY) || defined(ARCH_CPU_MIPS_FAMILY)
 742   // The stack grows downward.
 743   void* stack = stack_buf + sizeof(stack_buf);
 744 #else
 745 #error "Unsupported architecture"
 746 #endif
 747   return clone(&CloneHelper, stack, flags, env, ptid, nullptr, ctid);
 748 }
 749
 750 }  // anonymous namespace
 751
 752 pid_t ForkWithFlags(unsigned long flags, pid_t* ptid, pid_t* ctid) {
 753   const bool clone_tls_used = flags & CLONE_SETTLS;
 754   const bool invalid_ctid =
 755       (flags & (CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID)) && !ctid;
 756   const bool invalid_ptid = (flags & CLONE_PARENT_SETTID) && !ptid;
 757
 758   // We do not support CLONE_VM.
 759   const bool clone_vm_used = flags & CLONE_VM;
 760
 761   if (clone_tls_used || invalid_ctid || invalid_ptid || clone_vm_used) {
 762     RAW_LOG(FATAL, "Invalid usage of ForkWithFlags");
 763   }
 764
 765   // Valgrind's clone implementation does not support specifiying a child_stack
 766   // without CLONE_VM, so we cannot use libc's clone wrapper when running under
 767   // Valgrind. As a result, the libc pid cache may be incorrect under Valgrind.
 768   // See crbug.com/442817 for more details.
 769   if (IsRunningOnValgrind()) {
 770     // See kernel/fork.c in Linux. There is different ordering of sys_clone
 771     // parameters depending on CONFIG_CLONE_BACKWARDS* configuration options.
 772 #if defined(ARCH_CPU_X86_64)
 773     return syscall(__NR_clone, flags, nullptr, ptid, ctid, nullptr);
 774 #elif defined(ARCH_CPU_X86) || defined(ARCH_CPU_ARM_FAMILY) || \
 775     defined(ARCH_CPU_MIPS_FAMILY) || defined(ARCH_CPU_MIPS64_FAMILY)
 776     // CONFIG_CLONE_BACKWARDS defined.
 777     return syscall(__NR_clone, flags, nullptr, ptid, nullptr, ctid);
 778 #else
 779 #error "Unsupported architecture"
 780 #endif
 781   }
 782
 783   jmp_buf env;
 784   if (setjmp(env) == 0) {
 785     return CloneAndLongjmpInChild(flags, ptid, ctid, &env);
 786   }
 787
 788   return 0;
 789 }
 790 #endif  // defined(OS_LINUX) || defined(OS_NACL_NONSFI)
 791
 792 }  // namespace base