1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/process/launch.h"
14 #include <sys/resource.h>
15 #include <sys/syscall.h>
17 #include <sys/types.h>
25 #include "base/command_line.h"
26 #include "base/compiler_specific.h"
27 #include "base/debug/debugger.h"
28 #include "base/debug/stack_trace.h"
29 #include "base/files/dir_reader_posix.h"
30 #include "base/files/file_util.h"
31 #include "base/files/scoped_file.h"
32 #include "base/logging.h"
33 #include "base/memory/scoped_ptr.h"
34 #include "base/posix/eintr_wrapper.h"
35 #include "base/process/process.h"
36 #include "base/process/process_metrics.h"
37 #include "base/strings/stringprintf.h"
38 #include "base/synchronization/waitable_event.h"
39 #include "base/third_party/dynamic_annotations/dynamic_annotations.h"
40 #include "base/third_party/valgrind/valgrind.h"
41 #include "base/threading/platform_thread.h"
42 #include "base/threading/thread_restrictions.h"
43 #include "build/build_config.h"
46 #include <sys/prctl.h>
49 #if defined(OS_CHROMEOS)
50 #include <sys/ioctl.h>
53 #if defined(OS_FREEBSD)
54 #include <sys/event.h>
55 #include <sys/ucontext.h>
58 #if defined(OS_MACOSX)
59 #include <crt_externs.h>
60 #include <sys/event.h>
62 extern char** environ
;
67 #if !defined(OS_NACL_NONSFI)
71 // Get the process's "environment" (i.e. the thing that setenv/getenv
73 char** GetEnvironment() {
74 #if defined(OS_MACOSX)
75 return *_NSGetEnviron();
81 // Set the process's "environment" (i.e. the thing that setenv/getenv
83 void SetEnvironment(char** env
) {
84 #if defined(OS_MACOSX)
85 *_NSGetEnviron() = env
;
91 // Set the calling thread's signal mask to new_sigmask and return
92 // the previous signal mask.
93 sigset_t
SetSignalMask(const sigset_t
& new_sigmask
) {
95 #if defined(OS_ANDROID)
96 // POSIX says pthread_sigmask() must be used in multi-threaded processes,
97 // but Android's pthread_sigmask() was broken until 4.1:
98 // https://code.google.com/p/android/issues/detail?id=15337
99 // http://stackoverflow.com/questions/13777109/pthread-sigmask-on-android-not-working
100 RAW_CHECK(sigprocmask(SIG_SETMASK
, &new_sigmask
, &old_sigmask
) == 0);
102 RAW_CHECK(pthread_sigmask(SIG_SETMASK
, &new_sigmask
, &old_sigmask
) == 0);
107 #if !defined(OS_LINUX) || \
108 (!defined(__i386__) && !defined(__x86_64__) && !defined(__arm__))
109 void ResetChildSignalHandlersToDefaults() {
110 // The previous signal handlers are likely to be meaningless in the child's
111 // context so we reset them to the defaults for now. http://crbug.com/44953
112 // These signal handlers are set up at least in browser_main_posix.cc:
113 // BrowserMainPartsPosix::PreEarlyInitialization and stack_trace_posix.cc:
114 // EnableInProcessStackDumping.
115 signal(SIGHUP
, SIG_DFL
);
116 signal(SIGINT
, SIG_DFL
);
117 signal(SIGILL
, SIG_DFL
);
118 signal(SIGABRT
, SIG_DFL
);
119 signal(SIGFPE
, SIG_DFL
);
120 signal(SIGBUS
, SIG_DFL
);
121 signal(SIGSEGV
, SIG_DFL
);
122 signal(SIGSYS
, SIG_DFL
);
123 signal(SIGTERM
, SIG_DFL
);
128 // TODO(jln): remove the Linux special case once kernels are fixed.
130 // Internally the kernel makes sigset_t an array of long large enough to have
131 // one bit per signal.
132 typedef uint64_t kernel_sigset_t
;
134 // This is what struct sigaction looks like to the kernel at least on X86 and
135 // ARM. MIPS, for instance, is very different.
136 struct kernel_sigaction
{
137 void* k_sa_handler
; // For this usage it only needs to be a generic pointer.
138 unsigned long k_sa_flags
;
139 void* k_sa_restorer
; // For this usage it only needs to be a generic pointer.
140 kernel_sigset_t k_sa_mask
;
143 // glibc's sigaction() will prevent access to sa_restorer, so we need to roll
145 int sys_rt_sigaction(int sig
, const struct kernel_sigaction
* act
,
146 struct kernel_sigaction
* oact
) {
147 return syscall(SYS_rt_sigaction
, sig
, act
, oact
, sizeof(kernel_sigset_t
));
150 // This function is intended to be used in between fork() and execve() and will
151 // reset all signal handlers to the default.
152 // The motivation for going through all of them is that sa_restorer can leak
153 // from parents and help defeat ASLR on buggy kernels. We reset it to NULL.
154 // See crbug.com/177956.
155 void ResetChildSignalHandlersToDefaults(void) {
156 for (int signum
= 1; ; ++signum
) {
157 struct kernel_sigaction act
= {0};
158 int sigaction_get_ret
= sys_rt_sigaction(signum
, NULL
, &act
);
159 if (sigaction_get_ret
&& errno
== EINVAL
) {
161 // Linux supports 32 real-time signals from 33 to 64.
162 // If the number of signals in the Linux kernel changes, someone should
163 // look at this code.
164 const int kNumberOfSignals
= 64;
165 RAW_CHECK(signum
== kNumberOfSignals
+ 1);
166 #endif // !defined(NDEBUG)
169 // All other failures are fatal.
170 if (sigaction_get_ret
) {
171 RAW_LOG(FATAL
, "sigaction (get) failed.");
174 // The kernel won't allow to re-set SIGKILL or SIGSTOP.
175 if (signum
!= SIGSTOP
&& signum
!= SIGKILL
) {
176 act
.k_sa_handler
= reinterpret_cast<void*>(SIG_DFL
);
177 act
.k_sa_restorer
= NULL
;
178 if (sys_rt_sigaction(signum
, &act
, NULL
)) {
179 RAW_LOG(FATAL
, "sigaction (set) failed.");
183 // Now ask the kernel again and check that no restorer will leak.
184 if (sys_rt_sigaction(signum
, NULL
, &act
) || act
.k_sa_restorer
) {
185 RAW_LOG(FATAL
, "Cound not fix sa_restorer.");
187 #endif // !defined(NDEBUG)
190 #endif // !defined(OS_LINUX) ||
191 // (!defined(__i386__) && !defined(__x86_64__) && !defined(__arm__))
192 } // anonymous namespace
194 // Functor for |ScopedDIR| (below).
195 struct ScopedDIRClose
{
196 inline void operator()(DIR* x
) const {
202 // Automatically closes |DIR*|s.
203 typedef scoped_ptr
<DIR, ScopedDIRClose
> ScopedDIR
;
205 #if defined(OS_LINUX)
206 static const char kFDDir
[] = "/proc/self/fd";
207 #elif defined(OS_MACOSX)
208 static const char kFDDir
[] = "/dev/fd";
209 #elif defined(OS_SOLARIS)
210 static const char kFDDir
[] = "/dev/fd";
211 #elif defined(OS_FREEBSD)
212 static const char kFDDir
[] = "/dev/fd";
213 #elif defined(OS_OPENBSD)
214 static const char kFDDir
[] = "/dev/fd";
215 #elif defined(OS_ANDROID)
216 static const char kFDDir
[] = "/proc/self/fd";
219 void CloseSuperfluousFds(const base::InjectiveMultimap
& saved_mapping
) {
220 // DANGER: no calls to malloc or locks are allowed from now on:
221 // http://crbug.com/36678
223 // Get the maximum number of FDs possible.
224 size_t max_fds
= GetMaxFds();
226 DirReaderPosix
fd_dir(kFDDir
);
227 if (!fd_dir
.IsValid()) {
228 // Fallback case: Try every possible fd.
229 for (size_t i
= 0; i
< max_fds
; ++i
) {
230 const int fd
= static_cast<int>(i
);
231 if (fd
== STDIN_FILENO
|| fd
== STDOUT_FILENO
|| fd
== STDERR_FILENO
)
233 // Cannot use STL iterators here, since debug iterators use locks.
235 for (j
= 0; j
< saved_mapping
.size(); j
++) {
236 if (fd
== saved_mapping
[j
].dest
)
239 if (j
< saved_mapping
.size())
242 // Since we're just trying to close anything we can find,
243 // ignore any error return values of close().
249 const int dir_fd
= fd_dir
.fd();
251 for ( ; fd_dir
.Next(); ) {
252 // Skip . and .. entries.
253 if (fd_dir
.name()[0] == '.')
258 const long int fd
= strtol(fd_dir
.name(), &endptr
, 10);
259 if (fd_dir
.name()[0] == 0 || *endptr
|| fd
< 0 || errno
)
261 if (fd
== STDIN_FILENO
|| fd
== STDOUT_FILENO
|| fd
== STDERR_FILENO
)
263 // Cannot use STL iterators here, since debug iterators use locks.
265 for (i
= 0; i
< saved_mapping
.size(); i
++) {
266 if (fd
== saved_mapping
[i
].dest
)
269 if (i
< saved_mapping
.size())
274 // When running under Valgrind, Valgrind opens several FDs for its
275 // own use and will complain if we try to close them. All of
276 // these FDs are >= |max_fds|, so we can check against that here
277 // before closing. See https://bugs.kde.org/show_bug.cgi?id=191758
278 if (fd
< static_cast<int>(max_fds
)) {
279 int ret
= IGNORE_EINTR(close(fd
));
285 Process
LaunchProcess(const CommandLine
& cmdline
,
286 const LaunchOptions
& options
) {
287 return LaunchProcess(cmdline
.argv(), options
);
290 Process
LaunchProcess(const std::vector
<std::string
>& argv
,
291 const LaunchOptions
& options
) {
292 size_t fd_shuffle_size
= 0;
293 if (options
.fds_to_remap
) {
294 fd_shuffle_size
= options
.fds_to_remap
->size();
297 InjectiveMultimap fd_shuffle1
;
298 InjectiveMultimap fd_shuffle2
;
299 fd_shuffle1
.reserve(fd_shuffle_size
);
300 fd_shuffle2
.reserve(fd_shuffle_size
);
302 scoped_ptr
<char* []> argv_cstr(new char* [argv
.size() + 1]);
303 for (size_t i
= 0; i
< argv
.size(); i
++) {
304 argv_cstr
[i
] = const_cast<char*>(argv
[i
].c_str());
306 argv_cstr
[argv
.size()] = NULL
;
308 scoped_ptr
<char*[]> new_environ
;
309 char* const empty_environ
= NULL
;
310 char* const* old_environ
= GetEnvironment();
311 if (options
.clear_environ
)
312 old_environ
= &empty_environ
;
313 if (!options
.environ
.empty())
314 new_environ
= AlterEnvironment(old_environ
, options
.environ
);
316 sigset_t full_sigset
;
317 sigfillset(&full_sigset
);
318 const sigset_t orig_sigmask
= SetSignalMask(full_sigset
);
320 const char* current_directory
= nullptr;
321 if (!options
.current_directory
.empty()) {
322 current_directory
= options
.current_directory
.value().c_str();
326 #if defined(OS_LINUX)
327 if (options
.clone_flags
) {
328 // Signal handling in this function assumes the creation of a new
329 // process, so we check that a thread is not being created by mistake
330 // and that signal handling follows the process-creation rules.
332 !(options
.clone_flags
& (CLONE_SIGHAND
| CLONE_THREAD
| CLONE_VM
)));
334 // We specify a null ptid and ctid.
336 !(options
.clone_flags
&
337 (CLONE_CHILD_CLEARTID
| CLONE_CHILD_SETTID
| CLONE_PARENT_SETTID
)));
339 // Since we use waitpid, we do not support custom termination signals in the
341 RAW_CHECK((options
.clone_flags
& 0xff) == 0);
343 pid
= ForkWithFlags(options
.clone_flags
| SIGCHLD
, nullptr, nullptr);
350 // Always restore the original signal mask in the parent.
352 SetSignalMask(orig_sigmask
);
356 DPLOG(ERROR
) << "fork";
358 } else if (pid
== 0) {
361 // DANGER: no calls to malloc or locks are allowed from now on:
362 // http://crbug.com/36678
364 // DANGER: fork() rule: in the child, if you don't end up doing exec*(),
365 // you call _exit() instead of exit(). This is because _exit() does not
366 // call any previously-registered (in the parent) exit handlers, which
367 // might do things like block waiting for threads that don't even exist
370 // If a child process uses the readline library, the process block forever.
371 // In BSD like OSes including OS X it is safe to assign /dev/null as stdin.
372 // See http://crbug.com/56596.
373 base::ScopedFD
null_fd(HANDLE_EINTR(open("/dev/null", O_RDONLY
)));
374 if (!null_fd
.is_valid()) {
375 RAW_LOG(ERROR
, "Failed to open /dev/null");
379 int new_fd
= HANDLE_EINTR(dup2(null_fd
.get(), STDIN_FILENO
));
380 if (new_fd
!= STDIN_FILENO
) {
381 RAW_LOG(ERROR
, "Failed to dup /dev/null for stdin");
385 if (options
.new_process_group
) {
386 // Instead of inheriting the process group ID of the parent, the child
387 // starts off a new process group with pgid equal to its process ID.
388 if (setpgid(0, 0) < 0) {
389 RAW_LOG(ERROR
, "setpgid failed");
394 if (options
.maximize_rlimits
) {
395 // Some resource limits need to be maximal in this child.
396 for (size_t i
= 0; i
< options
.maximize_rlimits
->size(); ++i
) {
397 const int resource
= (*options
.maximize_rlimits
)[i
];
399 if (getrlimit(resource
, &limit
) < 0) {
400 RAW_LOG(WARNING
, "getrlimit failed");
401 } else if (limit
.rlim_cur
< limit
.rlim_max
) {
402 limit
.rlim_cur
= limit
.rlim_max
;
403 if (setrlimit(resource
, &limit
) < 0) {
404 RAW_LOG(WARNING
, "setrlimit failed");
410 #if defined(OS_MACOSX)
411 RestoreDefaultExceptionHandler();
412 #endif // defined(OS_MACOSX)
414 ResetChildSignalHandlersToDefaults();
415 SetSignalMask(orig_sigmask
);
418 // When debugging it can be helpful to check that we really aren't making
419 // any hidden calls to malloc.
421 reinterpret_cast<void*>(reinterpret_cast<intptr_t>(malloc
) & ~4095);
422 mprotect(malloc_thunk
, 4096, PROT_READ
| PROT_WRITE
| PROT_EXEC
);
423 memset(reinterpret_cast<void*>(malloc
), 0xff, 8);
426 #if defined(OS_CHROMEOS)
427 if (options
.ctrl_terminal_fd
>= 0) {
428 // Set process' controlling terminal.
429 if (HANDLE_EINTR(setsid()) != -1) {
431 ioctl(options
.ctrl_terminal_fd
, TIOCSCTTY
, NULL
)) == -1) {
432 RAW_LOG(WARNING
, "ioctl(TIOCSCTTY), ctrl terminal not set");
435 RAW_LOG(WARNING
, "setsid failed, ctrl terminal not set");
438 #endif // defined(OS_CHROMEOS)
440 if (options
.fds_to_remap
) {
441 // Cannot use STL iterators here, since debug iterators use locks.
442 for (size_t i
= 0; i
< options
.fds_to_remap
->size(); ++i
) {
443 const FileHandleMappingVector::value_type
& value
=
444 (*options
.fds_to_remap
)[i
];
445 fd_shuffle1
.push_back(InjectionArc(value
.first
, value
.second
, false));
446 fd_shuffle2
.push_back(InjectionArc(value
.first
, value
.second
, false));
450 if (!options
.environ
.empty() || options
.clear_environ
)
451 SetEnvironment(new_environ
.get());
453 // fd_shuffle1 is mutated by this call because it cannot malloc.
454 if (!ShuffleFileDescriptors(&fd_shuffle1
))
457 CloseSuperfluousFds(fd_shuffle2
);
459 // Set NO_NEW_PRIVS by default. Since NO_NEW_PRIVS only exists in kernel
460 // 3.5+, do not check the return value of prctl here.
461 #if defined(OS_LINUX)
462 #ifndef PR_SET_NO_NEW_PRIVS
463 #define PR_SET_NO_NEW_PRIVS 38
465 if (!options
.allow_new_privs
) {
466 if (prctl(PR_SET_NO_NEW_PRIVS
, 1, 0, 0, 0) && errno
!= EINVAL
) {
467 // Only log if the error is not EINVAL (i.e. not supported).
468 RAW_LOG(FATAL
, "prctl(PR_SET_NO_NEW_PRIVS) failed");
472 if (options
.kill_on_parent_death
) {
473 if (prctl(PR_SET_PDEATHSIG
, SIGKILL
) != 0) {
474 RAW_LOG(ERROR
, "prctl(PR_SET_PDEATHSIG) failed");
480 if (current_directory
!= nullptr) {
481 RAW_CHECK(chdir(current_directory
) == 0);
484 if (options
.pre_exec_delegate
!= nullptr) {
485 options
.pre_exec_delegate
->RunAsyncSafe();
488 execvp(argv_cstr
[0], argv_cstr
.get());
490 RAW_LOG(ERROR
, "LaunchProcess: failed to execvp:");
491 RAW_LOG(ERROR
, argv_cstr
[0]);
496 // While this isn't strictly disk IO, waiting for another process to
497 // finish is the sort of thing ThreadRestrictions is trying to prevent.
498 base::ThreadRestrictions::AssertIOAllowed();
499 pid_t ret
= HANDLE_EINTR(waitpid(pid
, 0, 0));
507 void RaiseProcessToHighPriority() {
508 // On POSIX, we don't actually do anything here. We could try to nice() or
509 // setpriority() or sched_getscheduler, but these all require extra rights.
512 // Return value used by GetAppOutputInternal to encapsulate the various exit
513 // scenarios from the function.
514 enum GetAppOutputInternalResult
{
520 // Executes the application specified by |argv| and wait for it to exit. Stores
521 // the output (stdout) in |output|. If |do_search_path| is set, it searches the
522 // path for the application; in that case, |envp| must be null, and it will use
523 // the current environment. If |do_search_path| is false, |argv[0]| should fully
524 // specify the path of the application, and |envp| will be used as the
525 // environment. If |include_stderr| is true, includes stderr otherwise redirects
527 // If we successfully start the application and get all requested output, we
528 // return GOT_MAX_OUTPUT, or if there is a problem starting or exiting
529 // the application we return RUN_FAILURE. Otherwise we return EXECUTE_SUCCESS.
530 // The GOT_MAX_OUTPUT return value exists so a caller that asks for limited
531 // output can treat this as a success, despite having an exit code of SIG_PIPE
532 // due to us closing the output pipe.
533 // In the case of EXECUTE_SUCCESS, the application exit code will be returned
534 // in |*exit_code|, which should be checked to determine if the application
536 static GetAppOutputInternalResult
GetAppOutputInternal(
537 const std::vector
<std::string
>& argv
,
544 // Doing a blocking wait for another command to finish counts as IO.
545 base::ThreadRestrictions::AssertIOAllowed();
546 // exit_code must be supplied so calling function can determine success.
548 *exit_code
= EXIT_FAILURE
;
552 InjectiveMultimap fd_shuffle1
, fd_shuffle2
;
553 scoped_ptr
<char*[]> argv_cstr(new char*[argv
.size() + 1]);
555 fd_shuffle1
.reserve(3);
556 fd_shuffle2
.reserve(3);
558 // Either |do_search_path| should be false or |envp| should be null, but not
560 DCHECK(!do_search_path
^ !envp
);
562 if (pipe(pipe_fd
) < 0)
563 return EXECUTE_FAILURE
;
565 switch (pid
= fork()) {
569 return EXECUTE_FAILURE
;
572 // DANGER: no calls to malloc or locks are allowed from now on:
573 // http://crbug.com/36678
575 #if defined(OS_MACOSX)
576 RestoreDefaultExceptionHandler();
579 // Obscure fork() rule: in the child, if you don't end up doing exec*(),
580 // you call _exit() instead of exit(). This is because _exit() does not
581 // call any previously-registered (in the parent) exit handlers, which
582 // might do things like block waiting for threads that don't even exist
584 int dev_null
= open("/dev/null", O_WRONLY
);
588 fd_shuffle1
.push_back(InjectionArc(pipe_fd
[1], STDOUT_FILENO
, true));
589 fd_shuffle1
.push_back(InjectionArc(
590 include_stderr
? pipe_fd
[1] : dev_null
,
591 STDERR_FILENO
, true));
592 fd_shuffle1
.push_back(InjectionArc(dev_null
, STDIN_FILENO
, true));
593 // Adding another element here? Remeber to increase the argument to
596 for (size_t i
= 0; i
< fd_shuffle1
.size(); ++i
)
597 fd_shuffle2
.push_back(fd_shuffle1
[i
]);
599 if (!ShuffleFileDescriptors(&fd_shuffle1
))
602 CloseSuperfluousFds(fd_shuffle2
);
604 for (size_t i
= 0; i
< argv
.size(); i
++)
605 argv_cstr
[i
] = const_cast<char*>(argv
[i
].c_str());
606 argv_cstr
[argv
.size()] = NULL
;
608 execvp(argv_cstr
[0], argv_cstr
.get());
610 execve(argv_cstr
[0], argv_cstr
.get(), envp
);
615 // Close our writing end of pipe now. Otherwise later read would not
616 // be able to detect end of child's output (in theory we could still
617 // write to the pipe).
622 size_t output_buf_left
= max_output
;
623 ssize_t bytes_read
= 1; // A lie to properly handle |max_output == 0|
624 // case in the logic below.
626 while (output_buf_left
> 0) {
627 bytes_read
= HANDLE_EINTR(read(pipe_fd
[0], buffer
,
628 std::min(output_buf_left
, sizeof(buffer
))));
631 output
->append(buffer
, bytes_read
);
632 output_buf_left
-= static_cast<size_t>(bytes_read
);
636 // Always wait for exit code (even if we know we'll declare
638 Process
process(pid
);
639 bool success
= process
.WaitForExit(exit_code
);
641 // If we stopped because we read as much as we wanted, we return
642 // GOT_MAX_OUTPUT (because the child may exit due to |SIGPIPE|).
643 if (!output_buf_left
&& bytes_read
> 0)
644 return GOT_MAX_OUTPUT
;
646 return EXECUTE_SUCCESS
;
647 return EXECUTE_FAILURE
;
652 bool GetAppOutput(const CommandLine
& cl
, std::string
* output
) {
653 return GetAppOutput(cl
.argv(), output
);
656 bool GetAppOutput(const std::vector
<std::string
>& argv
, std::string
* output
) {
657 // Run |execve()| with the current environment and store "unlimited" data.
659 GetAppOutputInternalResult result
= GetAppOutputInternal(
660 argv
, NULL
, false, output
, std::numeric_limits
<std::size_t>::max(), true,
662 return result
== EXECUTE_SUCCESS
&& exit_code
== EXIT_SUCCESS
;
665 bool GetAppOutputAndError(const CommandLine
& cl
, std::string
* output
) {
666 // Run |execve()| with the current environment and store "unlimited" data.
668 GetAppOutputInternalResult result
= GetAppOutputInternal(
669 cl
.argv(), NULL
, true, output
, std::numeric_limits
<std::size_t>::max(),
671 return result
== EXECUTE_SUCCESS
&& exit_code
== EXIT_SUCCESS
;
674 // TODO(viettrungluu): Conceivably, we should have a timeout as well, so we
675 // don't hang if what we're calling hangs.
676 bool GetAppOutputRestricted(const CommandLine
& cl
,
677 std::string
* output
, size_t max_output
) {
678 // Run |execve()| with the empty environment.
679 char* const empty_environ
= NULL
;
681 GetAppOutputInternalResult result
= GetAppOutputInternal(
682 cl
.argv(), &empty_environ
, false, output
, max_output
, false, &exit_code
);
683 return result
== GOT_MAX_OUTPUT
|| (result
== EXECUTE_SUCCESS
&&
684 exit_code
== EXIT_SUCCESS
);
687 bool GetAppOutputWithExitCode(const CommandLine
& cl
,
690 // Run |execve()| with the current environment and store "unlimited" data.
691 GetAppOutputInternalResult result
= GetAppOutputInternal(
692 cl
.argv(), NULL
, false, output
, std::numeric_limits
<std::size_t>::max(),
694 return result
== EXECUTE_SUCCESS
;
697 #endif // !defined(OS_NACL_NONSFI)
699 #if defined(OS_LINUX) || defined(OS_NACL_NONSFI)
702 bool IsRunningOnValgrind() {
703 return RUNNING_ON_VALGRIND
;
706 // This function runs on the stack specified on the clone call. It uses longjmp
707 // to switch back to the original stack so the child can return from sys_clone.
708 int CloneHelper(void* arg
) {
709 jmp_buf* env_ptr
= reinterpret_cast<jmp_buf*>(arg
);
710 longjmp(*env_ptr
, 1);
712 // Should not be reached.
717 // This function is noinline to ensure that stack_buf is below the stack pointer
718 // that is saved when setjmp is called below. This is needed because when
719 // compiled with FORTIFY_SOURCE, glibc's longjmp checks that the stack is moved
720 // upwards. See crbug.com/442912 for more details.
721 #if defined(ADDRESS_SANITIZER)
722 // Disable AddressSanitizer instrumentation for this function to make sure
723 // |stack_buf| is allocated on thread stack instead of ASan's fake stack.
724 // Under ASan longjmp() will attempt to clean up the area between the old and
725 // new stack pointers and print a warning that may confuse the user.
726 __attribute__((no_sanitize_address
))
728 NOINLINE pid_t
CloneAndLongjmpInChild(unsigned long flags
,
732 // We use the libc clone wrapper instead of making the syscall
733 // directly because making the syscall may fail to update the libc's
734 // internal pid cache. The libc interface unfortunately requires
735 // specifying a new stack, so we use setjmp/longjmp to emulate
736 // fork-like behavior.
737 char stack_buf
[PTHREAD_STACK_MIN
];
738 #if defined(ARCH_CPU_X86_FAMILY) || defined(ARCH_CPU_ARM_FAMILY) || \
739 defined(ARCH_CPU_MIPS64_FAMILY) || defined(ARCH_CPU_MIPS_FAMILY)
740 // The stack grows downward.
741 void* stack
= stack_buf
+ sizeof(stack_buf
);
743 #error "Unsupported architecture"
745 return clone(&CloneHelper
, stack
, flags
, env
, ptid
, nullptr, ctid
);
748 } // anonymous namespace
750 pid_t
ForkWithFlags(unsigned long flags
, pid_t
* ptid
, pid_t
* ctid
) {
751 const bool clone_tls_used
= flags
& CLONE_SETTLS
;
752 const bool invalid_ctid
=
753 (flags
& (CLONE_CHILD_SETTID
| CLONE_CHILD_CLEARTID
)) && !ctid
;
754 const bool invalid_ptid
= (flags
& CLONE_PARENT_SETTID
) && !ptid
;
756 // We do not support CLONE_VM.
757 const bool clone_vm_used
= flags
& CLONE_VM
;
759 if (clone_tls_used
|| invalid_ctid
|| invalid_ptid
|| clone_vm_used
) {
760 RAW_LOG(FATAL
, "Invalid usage of ForkWithFlags");
763 // Valgrind's clone implementation does not support specifiying a child_stack
764 // without CLONE_VM, so we cannot use libc's clone wrapper when running under
765 // Valgrind. As a result, the libc pid cache may be incorrect under Valgrind.
766 // See crbug.com/442817 for more details.
767 if (IsRunningOnValgrind()) {
768 // See kernel/fork.c in Linux. There is different ordering of sys_clone
769 // parameters depending on CONFIG_CLONE_BACKWARDS* configuration options.
770 #if defined(ARCH_CPU_X86_64)
771 return syscall(__NR_clone
, flags
, nullptr, ptid
, ctid
, nullptr);
772 #elif defined(ARCH_CPU_X86) || defined(ARCH_CPU_ARM_FAMILY) || \
773 defined(ARCH_CPU_MIPS_FAMILY) || defined(ARCH_CPU_MIPS64_FAMILY)
774 // CONFIG_CLONE_BACKWARDS defined.
775 return syscall(__NR_clone
, flags
, nullptr, ptid
, nullptr, ctid
);
777 #error "Unsupported architecture"
782 if (setjmp(env
) == 0) {
783 return CloneAndLongjmpInChild(flags
, ptid
, ctid
, &env
);
788 #endif // defined(OS_LINUX) || defined(OS_NACL_NONSFI)