1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/process/kill.h"
12 #include "base/files/file_util.h"
13 #include "base/files/scoped_file.h"
14 #include "base/logging.h"
15 #include "base/posix/eintr_wrapper.h"
21 const int kWaitBeforeKillSeconds
= 2;
23 // Reap |child| process. This call blocks until completion.
24 void BlockingReap(pid_t child
) {
25 const pid_t result
= HANDLE_EINTR(waitpid(child
, NULL
, 0));
27 DPLOG(ERROR
) << "waitpid(" << child
<< ", NULL, 0)";
31 // Waits for |timeout| seconds for the given |child| to exit and reap it. If
32 // the child doesn't exit within the time specified, kills it.
34 // This function takes two approaches: first, it tries to use kqueue to
35 // observe when the process exits. kevent can monitor a kqueue with a
36 // timeout, so this method is preferred to wait for a specified period of
37 // time. Once the kqueue indicates the process has exited, waitpid will reap
38 // the exited child. If the kqueue doesn't provide an exit event notification,
39 // before the timeout expires, or if the kqueue fails or misbehaves, the
40 // process will be mercilessly killed and reaped.
42 // A child process passed to this function may be in one of several states:
43 // running, terminated and not yet reaped, and (apparently, and unfortunately)
44 // terminated and already reaped. Normally, a process will at least have been
45 // asked to exit before this function is called, but this is not required.
46 // If a process is terminating and unreaped, there may be a window between the
47 // time that kqueue will no longer recognize it and when it becomes an actual
48 // zombie that a non-blocking (WNOHANG) waitpid can reap. This condition is
49 // detected when kqueue indicates that the process is not running and a
50 // non-blocking waitpid fails to reap the process but indicates that it is
51 // still running. In this event, a blocking attempt to reap the process
52 // collects the known-dying child, preventing zombies from congregating.
54 // In the event that the kqueue misbehaves entirely, as it might under a
55 // EMFILE condition ("too many open files", or out of file descriptors), this
56 // function will forcibly kill and reap the child without delay. This
57 // eliminates another potential zombie vector. (If you're out of file
58 // descriptors, you're probably deep into something else, but that doesn't
59 // mean that zombies be allowed to kick you while you're down.)
61 // The fact that this function seemingly can be called to wait on a child
62 // that's not only already terminated but already reaped is a bit of a
63 // problem: a reaped child's pid can be reclaimed and may refer to a distinct
64 // process in that case. The fact that this function can seemingly be called
65 // to wait on a process that's not even a child is also a problem: kqueue will
66 // work in that case, but waitpid won't, and killing a non-child might not be
68 void WaitForChildToDie(pid_t child
, int timeout
) {
72 // DON'T ADD ANY EARLY RETURNS TO THIS FUNCTION without ensuring that
73 // |child| has been reaped. Specifically, even if a kqueue, kevent, or other
74 // call fails, this function should fall back to the last resort of trying
75 // to kill and reap the process. Not observing this rule will resurrect
80 ScopedFD
kq(HANDLE_EINTR(kqueue()));
82 DPLOG(ERROR
) << "kqueue()";
84 struct kevent change
= {0};
85 EV_SET(&change
, child
, EVFILT_PROC
, EV_ADD
, NOTE_EXIT
, 0, NULL
);
86 result
= HANDLE_EINTR(kevent(kq
.get(), &change
, 1, NULL
, 0, NULL
));
90 DPLOG(ERROR
) << "kevent (setup " << child
<< ")";
92 // At this point, one of the following has occurred:
93 // 1. The process has died but has not yet been reaped.
94 // 2. The process has died and has already been reaped.
95 // 3. The process is in the process of dying. It's no longer
96 // kqueueable, but it may not be waitable yet either. Mark calls
97 // this case the "zombie death race".
99 result
= HANDLE_EINTR(waitpid(child
, NULL
, WNOHANG
));
102 // A positive result indicates case 1. waitpid succeeded and reaped
103 // the child. A result of -1 indicates case 2. The child has already
104 // been reaped. In both of these cases, no further action is
109 // |result| is 0, indicating case 3. The process will be waitable in
110 // short order. Fall back out of the kqueue code to kill it (for good
111 // measure) and reap it.
114 // Keep track of the elapsed time to be able to restart kevent if it's
116 TimeDelta remaining_delta
= TimeDelta::FromSeconds(timeout
);
117 TimeTicks deadline
= TimeTicks::Now() + remaining_delta
;
119 struct kevent event
= {0};
120 while (remaining_delta
.InMilliseconds() > 0) {
121 const struct timespec remaining_timespec
= remaining_delta
.ToTimeSpec();
122 result
= kevent(kq
.get(), NULL
, 0, &event
, 1, &remaining_timespec
);
123 if (result
== -1 && errno
== EINTR
) {
124 remaining_delta
= deadline
- TimeTicks::Now();
132 DPLOG(ERROR
) << "kevent (wait " << child
<< ")";
133 } else if (result
> 1) {
134 DLOG(ERROR
) << "kevent (wait " << child
<< "): unexpected result "
136 } else if (result
== 1) {
137 if ((event
.fflags
& NOTE_EXIT
) &&
138 (event
.ident
== static_cast<uintptr_t>(child
))) {
139 // The process is dead or dying. This won't block for long, if at
144 DLOG(ERROR
) << "kevent (wait " << child
145 << "): unexpected event: fflags=" << event
.fflags
146 << ", ident=" << event
.ident
;
152 // The child is still alive, or is very freshly dead. Be sure by sending it
153 // a signal. This is safe even if it's freshly dead, because it will be a
154 // zombie (or on the way to zombiedom) and kill will return 0 even if the
155 // signal is not delivered to a live process.
156 result
= kill(child
, SIGKILL
);
158 DPLOG(ERROR
) << "kill(" << child
<< ", SIGKILL)";
160 // The child is definitely on the way out now. BlockingReap won't need to
161 // wait for long, if at all.
168 void EnsureProcessTerminated(ProcessHandle process
) {
169 WaitForChildToDie(process
, kWaitBeforeKillSeconds
);