base/process_util_mac.mm

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "base/process_util.h"
   6
   7 #import <Cocoa/Cocoa.h>
   8 #include <crt_externs.h>
   9 #include <dlfcn.h>
  10 #include <errno.h>
  11 #include <mach/mach.h>
  12 #include <mach/mach_init.h>
  13 #include <mach/mach_vm.h>
  14 #include <mach/shared_region.h>
  15 #include <mach/task.h>
  16 #include <mach-o/nlist.h>
  17 #include <malloc/malloc.h>
  18 #import <objc/runtime.h>
  19 #include <signal.h>
  20 #include <spawn.h>
  21 #include <sys/event.h>
  22 #include <sys/mman.h>
  23 #include <sys/sysctl.h>
  24 #include <sys/types.h>
  25 #include <sys/wait.h>
  26
  27 #include <new>
  28 #include <string>
  29
  30 #include "base/debug/debugger.h"
  31 #include "base/file_util.h"
  32 #include "base/hash_tables.h"
  33 #include "base/lazy_instance.h"
  34 #include "base/logging.h"
  35 #include "base/mac/mac_util.h"
  36 #include "base/mac/scoped_mach_port.h"
  37 #include "base/posix/eintr_wrapper.h"
  38 #include "base/string_util.h"
  39 #include "base/sys_info.h"
  40 #include "base/threading/thread_local.h"
  41 #include "third_party/apple_apsl/CFBase.h"
  42 #include "third_party/apple_apsl/malloc.h"
  43 #include "third_party/mach_override/mach_override.h"
  44
  45 namespace base {
  46
  47 void RestoreDefaultExceptionHandler() {
  48   // This function is tailored to remove the Breakpad exception handler.
  49   // exception_mask matches s_exception_mask in
  50   // breakpad/src/client/mac/handler/exception_handler.cc
  51   const exception_mask_t exception_mask = EXC_MASK_BAD_ACCESS |
  52                                           EXC_MASK_BAD_INSTRUCTION |
  53                                           EXC_MASK_ARITHMETIC |
  54                                           EXC_MASK_BREAKPOINT;
  55
  56   // Setting the exception port to MACH_PORT_NULL may not be entirely
  57   // kosher to restore the default exception handler, but in practice,
  58   // it results in the exception port being set to Apple Crash Reporter,
  59   // the desired behavior.
  60   task_set_exception_ports(mach_task_self(), exception_mask, MACH_PORT_NULL,
  61                            EXCEPTION_DEFAULT, THREAD_STATE_NONE);
  62 }
  63
  64 ProcessIterator::ProcessIterator(const ProcessFilter* filter)
  65     : index_of_kinfo_proc_(0),
  66       filter_(filter) {
  67   // Get a snapshot of all of my processes (yes, as we loop it can go stale, but
  68   // but trying to find where we were in a constantly changing list is basically
  69   // impossible.
  70
  71   int mib[] = { CTL_KERN, KERN_PROC, KERN_PROC_UID, geteuid() };
  72
  73   // Since more processes could start between when we get the size and when
  74   // we get the list, we do a loop to keep trying until we get it.
  75   bool done = false;
  76   int try_num = 1;
  77   const int max_tries = 10;
  78   do {
  79     // Get the size of the buffer
  80     size_t len = 0;
  81     if (sysctl(mib, arraysize(mib), NULL, &len, NULL, 0) < 0) {
  82       DLOG(ERROR) << "failed to get the size needed for the process list";
  83       kinfo_procs_.resize(0);
  84       done = true;
  85     } else {
  86       size_t num_of_kinfo_proc = len / sizeof(struct kinfo_proc);
  87       // Leave some spare room for process table growth (more could show up
  88       // between when we check and now)
  89       num_of_kinfo_proc += 16;
  90       kinfo_procs_.resize(num_of_kinfo_proc);
  91       len = num_of_kinfo_proc * sizeof(struct kinfo_proc);
  92       // Load the list of processes
  93       if (sysctl(mib, arraysize(mib), &kinfo_procs_[0], &len, NULL, 0) < 0) {
  94         // If we get a mem error, it just means we need a bigger buffer, so
  95         // loop around again.  Anything else is a real error and give up.
  96         if (errno != ENOMEM) {
  97           DLOG(ERROR) << "failed to get the process list";
  98           kinfo_procs_.resize(0);
  99           done = true;
 100         }
 101       } else {
 102         // Got the list, just make sure we're sized exactly right
 103         size_t num_of_kinfo_proc = len / sizeof(struct kinfo_proc);
 104         kinfo_procs_.resize(num_of_kinfo_proc);
 105         done = true;
 106       }
 107     }
 108   } while (!done && (try_num++ < max_tries));
 109
 110   if (!done) {
 111     DLOG(ERROR) << "failed to collect the process list in a few tries";
 112     kinfo_procs_.resize(0);
 113   }
 114 }
 115
 116 ProcessIterator::~ProcessIterator() {
 117 }
 118
 119 bool ProcessIterator::CheckForNextProcess() {
 120   std::string data;
 121   for (; index_of_kinfo_proc_ < kinfo_procs_.size(); ++index_of_kinfo_proc_) {
 122     kinfo_proc& kinfo = kinfo_procs_[index_of_kinfo_proc_];
 123
 124     // Skip processes just awaiting collection
 125     if ((kinfo.kp_proc.p_pid > 0) && (kinfo.kp_proc.p_stat == SZOMB))
 126       continue;
 127
 128     int mib[] = { CTL_KERN, KERN_PROCARGS, kinfo.kp_proc.p_pid };
 129
 130     // Find out what size buffer we need.
 131     size_t data_len = 0;
 132     if (sysctl(mib, arraysize(mib), NULL, &data_len, NULL, 0) < 0) {
 133       DVPLOG(1) << "failed to figure out the buffer size for a commandline";
 134       continue;
 135     }
 136
 137     data.resize(data_len);
 138     if (sysctl(mib, arraysize(mib), &data[0], &data_len, NULL, 0) < 0) {
 139       DVPLOG(1) << "failed to fetch a commandline";
 140       continue;
 141     }
 142
 143     // |data| contains all the command line parameters of the process, separated
 144     // by blocks of one or more null characters. We tokenize |data| into a
 145     // vector of strings using '\0' as a delimiter and populate
 146     // |entry_.cmd_line_args_|.
 147     std::string delimiters;
 148     delimiters.push_back('\0');
 149     Tokenize(data, delimiters, &entry_.cmd_line_args_);
 150
 151     // |data| starts with the full executable path followed by a null character.
 152     // We search for the first instance of '\0' and extract everything before it
 153     // to populate |entry_.exe_file_|.
 154     size_t exec_name_end = data.find('\0');
 155     if (exec_name_end == std::string::npos) {
 156       DLOG(ERROR) << "command line data didn't match expected format";
 157       continue;
 158     }
 159
 160     entry_.pid_ = kinfo.kp_proc.p_pid;
 161     entry_.ppid_ = kinfo.kp_eproc.e_ppid;
 162     entry_.gid_ = kinfo.kp_eproc.e_pgid;
 163     size_t last_slash = data.rfind('/', exec_name_end);
 164     if (last_slash == std::string::npos)
 165       entry_.exe_file_.assign(data, 0, exec_name_end);
 166     else
 167       entry_.exe_file_.assign(data, last_slash + 1,
 168                               exec_name_end - last_slash - 1);
 169     // Start w/ the next entry next time through
 170     ++index_of_kinfo_proc_;
 171     // Done
 172     return true;
 173   }
 174   return false;
 175 }
 176
 177 bool NamedProcessIterator::IncludeEntry() {
 178   return (executable_name_ == entry().exe_file() &&
 179           ProcessIterator::IncludeEntry());
 180 }
 181
 182
 183 // ------------------------------------------------------------------------
 184 // NOTE: about ProcessMetrics
 185 //
 186 // Getting a mach task from a pid for another process requires permissions in
 187 // general, so there doesn't really seem to be a way to do these (and spinning
 188 // up ps to fetch each stats seems dangerous to put in a base api for anyone to
 189 // call). Child processes ipc their port, so return something if available,
 190 // otherwise return 0.
 191 //
 192
 193 ProcessMetrics::ProcessMetrics(ProcessHandle process,
 194                                ProcessMetrics::PortProvider* port_provider)
 195     : process_(process),
 196       last_time_(0),
 197       last_system_time_(0),
 198       port_provider_(port_provider) {
 199   processor_count_ = SysInfo::NumberOfProcessors();
 200 }
 201
 202 // static
 203 ProcessMetrics* ProcessMetrics::CreateProcessMetrics(
 204     ProcessHandle process,
 205     ProcessMetrics::PortProvider* port_provider) {
 206   return new ProcessMetrics(process, port_provider);
 207 }
 208
 209 bool ProcessMetrics::GetIOCounters(IoCounters* io_counters) const {
 210   return false;
 211 }
 212
 213 static bool GetTaskInfo(mach_port_t task, task_basic_info_64* task_info_data) {
 214   if (task == MACH_PORT_NULL)
 215     return false;
 216   mach_msg_type_number_t count = TASK_BASIC_INFO_64_COUNT;
 217   kern_return_t kr = task_info(task,
 218                                TASK_BASIC_INFO_64,
 219                                reinterpret_cast<task_info_t>(task_info_data),
 220                                &count);
 221   // Most likely cause for failure: |task| is a zombie.
 222   return kr == KERN_SUCCESS;
 223 }
 224
 225 size_t ProcessMetrics::GetPagefileUsage() const {
 226   task_basic_info_64 task_info_data;
 227   if (!GetTaskInfo(TaskForPid(process_), &task_info_data))
 228     return 0;
 229   return task_info_data.virtual_size;
 230 }
 231
 232 size_t ProcessMetrics::GetPeakPagefileUsage() const {
 233   return 0;
 234 }
 235
 236 size_t ProcessMetrics::GetWorkingSetSize() const {
 237   task_basic_info_64 task_info_data;
 238   if (!GetTaskInfo(TaskForPid(process_), &task_info_data))
 239     return 0;
 240   return task_info_data.resident_size;
 241 }
 242
 243 size_t ProcessMetrics::GetPeakWorkingSetSize() const {
 244   return 0;
 245 }
 246
 247 static bool GetCPUTypeForProcess(pid_t pid, cpu_type_t* cpu_type) {
 248   size_t len = sizeof(*cpu_type);
 249   int result = sysctlbyname("sysctl.proc_cputype",
 250                             cpu_type,
 251                             &len,
 252                             NULL,
 253                             0);
 254   if (result != 0) {
 255     DPLOG(ERROR) << "sysctlbyname(""sysctl.proc_cputype"")";
 256     return false;
 257   }
 258
 259   return true;
 260 }
 261
 262 static bool IsAddressInSharedRegion(mach_vm_address_t addr, cpu_type_t type) {
 263   if (type == CPU_TYPE_I386)
 264     return addr >= SHARED_REGION_BASE_I386 &&
 265            addr < (SHARED_REGION_BASE_I386 + SHARED_REGION_SIZE_I386);
 266   else if (type == CPU_TYPE_X86_64)
 267     return addr >= SHARED_REGION_BASE_X86_64 &&
 268            addr < (SHARED_REGION_BASE_X86_64 + SHARED_REGION_SIZE_X86_64);
 269   else
 270     return false;
 271 }
 272
 273 // This is a rough approximation of the algorithm that libtop uses.
 274 // private_bytes is the size of private resident memory.
 275 // shared_bytes is the size of shared resident memory.
 276 bool ProcessMetrics::GetMemoryBytes(size_t* private_bytes,
 277                                     size_t* shared_bytes) {
 278   kern_return_t kr;
 279   size_t private_pages_count = 0;
 280   size_t shared_pages_count = 0;
 281
 282   if (!private_bytes && !shared_bytes)
 283     return true;
 284
 285   mach_port_t task = TaskForPid(process_);
 286   if (task == MACH_PORT_NULL) {
 287     DLOG(ERROR) << "Invalid process";
 288     return false;
 289   }
 290
 291   cpu_type_t cpu_type;
 292   if (!GetCPUTypeForProcess(process_, &cpu_type))
 293     return false;
 294
 295   // The same region can be referenced multiple times. To avoid double counting
 296   // we need to keep track of which regions we've already counted.
 297   base::hash_set<int> seen_objects;
 298
 299   // We iterate through each VM region in the task's address map. For shared
 300   // memory we add up all the pages that are marked as shared. Like libtop we
 301   // try to avoid counting pages that are also referenced by other tasks. Since
 302   // we don't have access to the VM regions of other tasks the only hint we have
 303   // is if the address is in the shared region area.
 304   //
 305   // Private memory is much simpler. We simply count the pages that are marked
 306   // as private or copy on write (COW).
 307   //
 308   // See libtop_update_vm_regions in
 309   // http://www.opensource.apple.com/source/top/top-67/libtop.c
 310   mach_vm_size_t size = 0;
 311   for (mach_vm_address_t address = MACH_VM_MIN_ADDRESS;; address += size) {
 312     vm_region_top_info_data_t info;
 313     mach_msg_type_number_t info_count = VM_REGION_TOP_INFO_COUNT;
 314     mach_port_t object_name;
 315     kr = mach_vm_region(task,
 316                         &address,
 317                         &size,
 318                         VM_REGION_TOP_INFO,
 319                         (vm_region_info_t)&info,
 320                         &info_count,
 321                         &object_name);
 322     if (kr == KERN_INVALID_ADDRESS) {
 323       // We're at the end of the address space.
 324       break;
 325     } else if (kr != KERN_SUCCESS) {
 326       DLOG(ERROR) << "Calling mach_vm_region failed with error: "
 327                  << mach_error_string(kr);
 328       return false;
 329     }
 330
 331     if (IsAddressInSharedRegion(address, cpu_type) &&
 332         info.share_mode != SM_PRIVATE)
 333       continue;
 334
 335     if (info.share_mode == SM_COW && info.ref_count == 1)
 336       info.share_mode = SM_PRIVATE;
 337
 338     switch (info.share_mode) {
 339       case SM_PRIVATE:
 340         private_pages_count += info.private_pages_resident;
 341         private_pages_count += info.shared_pages_resident;
 342         break;
 343       case SM_COW:
 344         private_pages_count += info.private_pages_resident;
 345         // Fall through
 346       case SM_SHARED:
 347         if (seen_objects.count(info.obj_id) == 0) {
 348           // Only count the first reference to this region.
 349           seen_objects.insert(info.obj_id);
 350           shared_pages_count += info.shared_pages_resident;
 351         }
 352         break;
 353       default:
 354         break;
 355     }
 356   }
 357
 358   vm_size_t page_size;
 359   kr = host_page_size(task, &page_size);
 360   if (kr != KERN_SUCCESS) {
 361     DLOG(ERROR) << "Failed to fetch host page size, error: "
 362                << mach_error_string(kr);
 363     return false;
 364   }
 365
 366   if (private_bytes)
 367     *private_bytes = private_pages_count * page_size;
 368   if (shared_bytes)
 369     *shared_bytes = shared_pages_count * page_size;
 370
 371   return true;
 372 }
 373
 374 void ProcessMetrics::GetCommittedKBytes(CommittedKBytes* usage) const {
 375 }
 376
 377 bool ProcessMetrics::GetWorkingSetKBytes(WorkingSetKBytes* ws_usage) const {
 378   size_t priv = GetWorkingSetSize();
 379   if (!priv)
 380     return false;
 381   ws_usage->priv = priv / 1024;
 382   ws_usage->shareable = 0;
 383   ws_usage->shared = 0;
 384   return true;
 385 }
 386
 387 #define TIME_VALUE_TO_TIMEVAL(a, r) do {  \
 388   (r)->tv_sec = (a)->seconds;             \
 389   (r)->tv_usec = (a)->microseconds;       \
 390 } while (0)
 391
 392 double ProcessMetrics::GetCPUUsage() {
 393   mach_port_t task = TaskForPid(process_);
 394   if (task == MACH_PORT_NULL)
 395     return 0;
 396
 397   kern_return_t kr;
 398
 399   // Libtop explicitly loops over the threads (libtop_pinfo_update_cpu_usage()
 400   // in libtop.c), but this is more concise and gives the same results:
 401   task_thread_times_info thread_info_data;
 402   mach_msg_type_number_t thread_info_count = TASK_THREAD_TIMES_INFO_COUNT;
 403   kr = task_info(task,
 404                  TASK_THREAD_TIMES_INFO,
 405                  reinterpret_cast<task_info_t>(&thread_info_data),
 406                  &thread_info_count);
 407   if (kr != KERN_SUCCESS) {
 408     // Most likely cause: |task| is a zombie.
 409     return 0;
 410   }
 411
 412   task_basic_info_64 task_info_data;
 413   if (!GetTaskInfo(task, &task_info_data))
 414     return 0;
 415
 416   /* Set total_time. */
 417   // thread info contains live time...
 418   struct timeval user_timeval, system_timeval, task_timeval;
 419   TIME_VALUE_TO_TIMEVAL(&thread_info_data.user_time, &user_timeval);
 420   TIME_VALUE_TO_TIMEVAL(&thread_info_data.system_time, &system_timeval);
 421   timeradd(&user_timeval, &system_timeval, &task_timeval);
 422
 423   // ... task info contains terminated time.
 424   TIME_VALUE_TO_TIMEVAL(&task_info_data.user_time, &user_timeval);
 425   TIME_VALUE_TO_TIMEVAL(&task_info_data.system_time, &system_timeval);
 426   timeradd(&user_timeval, &task_timeval, &task_timeval);
 427   timeradd(&system_timeval, &task_timeval, &task_timeval);
 428
 429   struct timeval now;
 430   int retval = gettimeofday(&now, NULL);
 431   if (retval)
 432     return 0;
 433
 434   int64 time = TimeValToMicroseconds(now);
 435   int64 task_time = TimeValToMicroseconds(task_timeval);
 436
 437   if ((last_system_time_ == 0) || (last_time_ == 0)) {
 438     // First call, just set the last values.
 439     last_system_time_ = task_time;
 440     last_time_ = time;
 441     return 0;
 442   }
 443
 444   int64 system_time_delta = task_time - last_system_time_;
 445   int64 time_delta = time - last_time_;
 446   DCHECK_NE(0U, time_delta);
 447   if (time_delta == 0)
 448     return 0;
 449
 450   // We add time_delta / 2 so the result is rounded.
 451   double cpu = static_cast<double>((system_time_delta * 100.0) / time_delta);
 452
 453   last_system_time_ = task_time;
 454   last_time_ = time;
 455
 456   return cpu;
 457 }
 458
 459 mach_port_t ProcessMetrics::TaskForPid(ProcessHandle process) const {
 460   mach_port_t task = MACH_PORT_NULL;
 461   if (port_provider_)
 462     task = port_provider_->TaskForPid(process_);
 463   if (task == MACH_PORT_NULL && process_ == getpid())
 464     task = mach_task_self();
 465   return task;
 466 }
 467
 468 // ------------------------------------------------------------------------
 469
 470 // Bytes committed by the system.
 471 size_t GetSystemCommitCharge() {
 472   base::mac::ScopedMachPort host(mach_host_self());
 473   mach_msg_type_number_t count = HOST_VM_INFO_COUNT;
 474   vm_statistics_data_t data;
 475   kern_return_t kr = host_statistics(host, HOST_VM_INFO,
 476                                      reinterpret_cast<host_info_t>(&data),
 477                                      &count);
 478   if (kr) {
 479     DLOG(WARNING) << "Failed to fetch host statistics.";
 480     return 0;
 481   }
 482
 483   vm_size_t page_size;
 484   kr = host_page_size(host, &page_size);
 485   if (kr) {
 486     DLOG(ERROR) << "Failed to fetch host page size.";
 487     return 0;
 488   }
 489
 490   return (data.active_count * page_size) / 1024;
 491 }
 492
 493 namespace {
 494
 495 // Finds the library path for malloc() and thus the libC part of libSystem,
 496 // which in Lion is in a separate image.
 497 const char* LookUpLibCPath() {
 498   const void* addr = reinterpret_cast<void*>(&malloc);
 499
 500   Dl_info info;
 501   if (dladdr(addr, &info))
 502     return info.dli_fname;
 503
 504   DLOG(WARNING) << "Could not find image path for malloc()";
 505   return NULL;
 506 }
 507
 508 typedef void(*malloc_error_break_t)(void);
 509 malloc_error_break_t g_original_malloc_error_break = NULL;
 510
 511 // Returns the function pointer for malloc_error_break. This symbol is declared
 512 // as __private_extern__ and cannot be dlsym()ed. Instead, use nlist() to
 513 // get it.
 514 malloc_error_break_t LookUpMallocErrorBreak() {
 515 #if ARCH_CPU_32_BITS
 516   const char* lib_c_path = LookUpLibCPath();
 517   if (!lib_c_path)
 518     return NULL;
 519
 520   // Only need to look up two symbols, but nlist() requires a NULL-terminated
 521   // array and takes no count.
 522   struct nlist nl[3];
 523   bzero(&nl, sizeof(nl));
 524
 525   // The symbol to find.
 526   nl[0].n_un.n_name = const_cast<char*>("_malloc_error_break");
 527
 528   // A reference symbol by which the address of the desired symbol will be
 529   // calculated.
 530   nl[1].n_un.n_name = const_cast<char*>("_malloc");
 531
 532   int rv = nlist(lib_c_path, nl);
 533   if (rv != 0 || nl[0].n_type == N_UNDF || nl[1].n_type == N_UNDF) {
 534     return NULL;
 535   }
 536
 537   // nlist() returns addresses as offsets in the image, not the instruction
 538   // pointer in memory. Use the known in-memory address of malloc()
 539   // to compute the offset for malloc_error_break().
 540   uintptr_t reference_addr = reinterpret_cast<uintptr_t>(&malloc);
 541   reference_addr -= nl[1].n_value;
 542   reference_addr += nl[0].n_value;
 543
 544   return reinterpret_cast<malloc_error_break_t>(reference_addr);
 545 #endif  // ARCH_CPU_32_BITS
 546
 547   return NULL;
 548 }
 549
 550 // Simple scoper that saves the current value of errno, resets it to 0, and on
 551 // destruction puts the old value back. This is so that CrMallocErrorBreak can
 552 // safely test errno free from the effects of other routines.
 553 class ScopedClearErrno {
 554  public:
 555   ScopedClearErrno() : old_errno_(errno) {
 556     errno = 0;
 557   }
 558   ~ScopedClearErrno() {
 559     if (errno == 0)
 560       errno = old_errno_;
 561   }
 562
 563  private:
 564   int old_errno_;
 565
 566   DISALLOW_COPY_AND_ASSIGN(ScopedClearErrno);
 567 };
 568
 569 // Combines ThreadLocalBoolean with AutoReset.  It would be convenient
 570 // to compose ThreadLocalPointer<bool> with base::AutoReset<bool>, but that
 571 // would require allocating some storage for the bool.
 572 class ThreadLocalBooleanAutoReset {
 573  public:
 574   ThreadLocalBooleanAutoReset(ThreadLocalBoolean* tlb, bool new_value)
 575       : scoped_tlb_(tlb),
 576         original_value_(tlb->Get()) {
 577     scoped_tlb_->Set(new_value);
 578   }
 579   ~ThreadLocalBooleanAutoReset() {
 580     scoped_tlb_->Set(original_value_);
 581   }
 582
 583  private:
 584   ThreadLocalBoolean* scoped_tlb_;
 585   bool original_value_;
 586
 587   DISALLOW_COPY_AND_ASSIGN(ThreadLocalBooleanAutoReset);
 588 };
 589
 590 base::LazyInstance<ThreadLocalBoolean>::Leaky
 591     g_unchecked_malloc = LAZY_INSTANCE_INITIALIZER;
 592
 593 // NOTE(shess): This is called when the malloc library noticed that the heap
 594 // is fubar.  Avoid calls which will re-enter the malloc library.
 595 void CrMallocErrorBreak() {
 596   g_original_malloc_error_break();
 597
 598   // Out of memory is certainly not heap corruption, and not necessarily
 599   // something for which the process should be terminated. Leave that decision
 600   // to the OOM killer.  The EBADF case comes up because the malloc library
 601   // attempts to log to ASL (syslog) before calling this code, which fails
 602   // accessing a Unix-domain socket because of sandboxing.
 603   if (errno == ENOMEM || (errno == EBADF && g_unchecked_malloc.Get().Get()))
 604     return;
 605
 606   // A unit test checks this error message, so it needs to be in release builds.
 607   char buf[1024] =
 608       "Terminating process due to a potential for future heap corruption: "
 609       "errno=";
 610   char errnobuf[] = {
 611     '0' + ((errno / 100) % 10),
 612     '0' + ((errno / 10) % 10),
 613     '0' + (errno % 10),
 614     '\000'
 615   };
 616   COMPILE_ASSERT(ELAST <= 999, errno_too_large_to_encode);
 617   strlcat(buf, errnobuf, sizeof(buf));
 618   RAW_LOG(ERROR, buf);
 619
 620   // Crash by writing to NULL+errno to allow analyzing errno from
 621   // crash dump info (setting a breakpad key would re-enter the malloc
 622   // library).  Max documented errno in intro(2) is actually 102, but
 623   // it really just needs to be "small" to stay on the right vm page.
 624   const int kMaxErrno = 256;
 625   char* volatile death_ptr = NULL;
 626   death_ptr += std::min(errno, kMaxErrno);
 627   *death_ptr = '!';
 628 }
 629
 630 }  // namespace
 631
 632 void EnableTerminationOnHeapCorruption() {
 633 #ifdef ADDRESS_SANITIZER
 634   // Don't do anything special on heap corruption, because it should be handled
 635   // by AddressSanitizer.
 636   return;
 637 #endif
 638
 639   // Only override once, otherwise CrMallocErrorBreak() will recurse
 640   // to itself.
 641   if (g_original_malloc_error_break)
 642     return;
 643
 644   malloc_error_break_t malloc_error_break = LookUpMallocErrorBreak();
 645   if (!malloc_error_break) {
 646     DLOG(WARNING) << "Could not find malloc_error_break";
 647     return;
 648   }
 649
 650   mach_error_t err = mach_override_ptr(
 651      (void*)malloc_error_break,
 652      (void*)&CrMallocErrorBreak,
 653      (void**)&g_original_malloc_error_break);
 654
 655   if (err != err_none)
 656     DLOG(WARNING) << "Could not override malloc_error_break; error = " << err;
 657 }
 658
 659 // ------------------------------------------------------------------------
 660
 661 namespace {
 662
 663 bool g_oom_killer_enabled;
 664
 665 // === C malloc/calloc/valloc/realloc/posix_memalign ===
 666
 667 typedef void* (*malloc_type)(struct _malloc_zone_t* zone,
 668                              size_t size);
 669 typedef void* (*calloc_type)(struct _malloc_zone_t* zone,
 670                              size_t num_items,
 671                              size_t size);
 672 typedef void* (*valloc_type)(struct _malloc_zone_t* zone,
 673                              size_t size);
 674 typedef void (*free_type)(struct _malloc_zone_t* zone,
 675                           void* ptr);
 676 typedef void* (*realloc_type)(struct _malloc_zone_t* zone,
 677                               void* ptr,
 678                               size_t size);
 679 typedef void* (*memalign_type)(struct _malloc_zone_t* zone,
 680                                size_t alignment,
 681                                size_t size);
 682
 683 malloc_type g_old_malloc;
 684 calloc_type g_old_calloc;
 685 valloc_type g_old_valloc;
 686 free_type g_old_free;
 687 realloc_type g_old_realloc;
 688 memalign_type g_old_memalign;
 689
 690 malloc_type g_old_malloc_purgeable;
 691 calloc_type g_old_calloc_purgeable;
 692 valloc_type g_old_valloc_purgeable;
 693 free_type g_old_free_purgeable;
 694 realloc_type g_old_realloc_purgeable;
 695 memalign_type g_old_memalign_purgeable;
 696
 697 void* oom_killer_malloc(struct _malloc_zone_t* zone,
 698                         size_t size) {
 699   ScopedClearErrno clear_errno;
 700   void* result = g_old_malloc(zone, size);
 701   if (!result && size)
 702     debug::BreakDebugger();
 703   return result;
 704 }
 705
 706 void* oom_killer_calloc(struct _malloc_zone_t* zone,
 707                         size_t num_items,
 708                         size_t size) {
 709   ScopedClearErrno clear_errno;
 710   void* result = g_old_calloc(zone, num_items, size);
 711   if (!result && num_items && size)
 712     debug::BreakDebugger();
 713   return result;
 714 }
 715
 716 void* oom_killer_valloc(struct _malloc_zone_t* zone,
 717                         size_t size) {
 718   ScopedClearErrno clear_errno;
 719   void* result = g_old_valloc(zone, size);
 720   if (!result && size)
 721     debug::BreakDebugger();
 722   return result;
 723 }
 724
 725 void oom_killer_free(struct _malloc_zone_t* zone,
 726                      void* ptr) {
 727   ScopedClearErrno clear_errno;
 728   g_old_free(zone, ptr);
 729 }
 730
 731 void* oom_killer_realloc(struct _malloc_zone_t* zone,
 732                          void* ptr,
 733                          size_t size) {
 734   ScopedClearErrno clear_errno;
 735   void* result = g_old_realloc(zone, ptr, size);
 736   if (!result && size)
 737     debug::BreakDebugger();
 738   return result;
 739 }
 740
 741 void* oom_killer_memalign(struct _malloc_zone_t* zone,
 742                           size_t alignment,
 743                           size_t size) {
 744   ScopedClearErrno clear_errno;
 745   void* result = g_old_memalign(zone, alignment, size);
 746   // Only die if posix_memalign would have returned ENOMEM, since there are
 747   // other reasons why NULL might be returned (see
 748   // http://opensource.apple.com/source/Libc/Libc-583/gen/malloc.c ).
 749   if (!result && size && alignment >= sizeof(void*)
 750       && (alignment & (alignment - 1)) == 0) {
 751     debug::BreakDebugger();
 752   }
 753   return result;
 754 }
 755
 756 void* oom_killer_malloc_purgeable(struct _malloc_zone_t* zone,
 757                                   size_t size) {
 758   ScopedClearErrno clear_errno;
 759   void* result = g_old_malloc_purgeable(zone, size);
 760   if (!result && size)
 761     debug::BreakDebugger();
 762   return result;
 763 }
 764
 765 void* oom_killer_calloc_purgeable(struct _malloc_zone_t* zone,
 766                                   size_t num_items,
 767                                   size_t size) {
 768   ScopedClearErrno clear_errno;
 769   void* result = g_old_calloc_purgeable(zone, num_items, size);
 770   if (!result && num_items && size)
 771     debug::BreakDebugger();
 772   return result;
 773 }
 774
 775 void* oom_killer_valloc_purgeable(struct _malloc_zone_t* zone,
 776                                   size_t size) {
 777   ScopedClearErrno clear_errno;
 778   void* result = g_old_valloc_purgeable(zone, size);
 779   if (!result && size)
 780     debug::BreakDebugger();
 781   return result;
 782 }
 783
 784 void oom_killer_free_purgeable(struct _malloc_zone_t* zone,
 785                                void* ptr) {
 786   ScopedClearErrno clear_errno;
 787   g_old_free_purgeable(zone, ptr);
 788 }
 789
 790 void* oom_killer_realloc_purgeable(struct _malloc_zone_t* zone,
 791                                    void* ptr,
 792                                    size_t size) {
 793   ScopedClearErrno clear_errno;
 794   void* result = g_old_realloc_purgeable(zone, ptr, size);
 795   if (!result && size)
 796     debug::BreakDebugger();
 797   return result;
 798 }
 799
 800 void* oom_killer_memalign_purgeable(struct _malloc_zone_t* zone,
 801                                     size_t alignment,
 802                                     size_t size) {
 803   ScopedClearErrno clear_errno;
 804   void* result = g_old_memalign_purgeable(zone, alignment, size);
 805   // Only die if posix_memalign would have returned ENOMEM, since there are
 806   // other reasons why NULL might be returned (see
 807   // http://opensource.apple.com/source/Libc/Libc-583/gen/malloc.c ).
 808   if (!result && size && alignment >= sizeof(void*)
 809       && (alignment & (alignment - 1)) == 0) {
 810     debug::BreakDebugger();
 811   }
 812   return result;
 813 }
 814
 815 // === C++ operator new ===
 816
 817 void oom_killer_new() {
 818   debug::BreakDebugger();
 819 }
 820
 821 // === Core Foundation CFAllocators ===
 822
 823 bool CanGetContextForCFAllocator() {
 824   return !base::mac::IsOSLaterThanMountainLion_DontCallThis();
 825 }
 826
 827 CFAllocatorContext* ContextForCFAllocator(CFAllocatorRef allocator) {
 828   if (base::mac::IsOSSnowLeopard()) {
 829     ChromeCFAllocatorLeopards* our_allocator =
 830         const_cast<ChromeCFAllocatorLeopards*>(
 831             reinterpret_cast<const ChromeCFAllocatorLeopards*>(allocator));
 832     return &our_allocator->_context;
 833   } else if (base::mac::IsOSLion() || base::mac::IsOSMountainLion()) {
 834     ChromeCFAllocatorLions* our_allocator =
 835         const_cast<ChromeCFAllocatorLions*>(
 836             reinterpret_cast<const ChromeCFAllocatorLions*>(allocator));
 837     return &our_allocator->_context;
 838   } else {
 839     return NULL;
 840   }
 841 }
 842
 843 CFAllocatorAllocateCallBack g_old_cfallocator_system_default;
 844 CFAllocatorAllocateCallBack g_old_cfallocator_malloc;
 845 CFAllocatorAllocateCallBack g_old_cfallocator_malloc_zone;
 846
 847 void* oom_killer_cfallocator_system_default(CFIndex alloc_size,
 848                                             CFOptionFlags hint,
 849                                             void* info) {
 850   void* result = g_old_cfallocator_system_default(alloc_size, hint, info);
 851   if (!result)
 852     debug::BreakDebugger();
 853   return result;
 854 }
 855
 856 void* oom_killer_cfallocator_malloc(CFIndex alloc_size,
 857                                     CFOptionFlags hint,
 858                                     void* info) {
 859   void* result = g_old_cfallocator_malloc(alloc_size, hint, info);
 860   if (!result)
 861     debug::BreakDebugger();
 862   return result;
 863 }
 864
 865 void* oom_killer_cfallocator_malloc_zone(CFIndex alloc_size,
 866                                          CFOptionFlags hint,
 867                                          void* info) {
 868   void* result = g_old_cfallocator_malloc_zone(alloc_size, hint, info);
 869   if (!result)
 870     debug::BreakDebugger();
 871   return result;
 872 }
 873
 874 // === Cocoa NSObject allocation ===
 875
 876 typedef id (*allocWithZone_t)(id, SEL, NSZone*);
 877 allocWithZone_t g_old_allocWithZone;
 878
 879 id oom_killer_allocWithZone(id self, SEL _cmd, NSZone* zone)
 880 {
 881   id result = g_old_allocWithZone(self, _cmd, zone);
 882   if (!result)
 883     debug::BreakDebugger();
 884   return result;
 885 }
 886
 887 }  // namespace
 888
 889 void* UncheckedMalloc(size_t size) {
 890   if (g_old_malloc) {
 891     ScopedClearErrno clear_errno;
 892     ThreadLocalBooleanAutoReset flag(g_unchecked_malloc.Pointer(), true);
 893     return g_old_malloc(malloc_default_zone(), size);
 894   }
 895   return malloc(size);
 896 }
 897
 898 void EnableTerminationOnOutOfMemory() {
 899   if (g_oom_killer_enabled)
 900     return;
 901
 902   g_oom_killer_enabled = true;
 903
 904   // === C malloc/calloc/valloc/realloc/posix_memalign ===
 905
 906   // This approach is not perfect, as requests for amounts of memory larger than
 907   // MALLOC_ABSOLUTE_MAX_SIZE (currently SIZE_T_MAX - (2 * PAGE_SIZE)) will
 908   // still fail with a NULL rather than dying (see
 909   // http://opensource.apple.com/source/Libc/Libc-583/gen/malloc.c for details).
 910   // Unfortunately, it's the best we can do. Also note that this does not affect
 911   // allocations from non-default zones.
 912
 913   CHECK(!g_old_malloc && !g_old_calloc && !g_old_valloc && !g_old_realloc &&
 914         !g_old_memalign) << "Old allocators unexpectedly non-null";
 915
 916   CHECK(!g_old_malloc_purgeable && !g_old_calloc_purgeable &&
 917         !g_old_valloc_purgeable && !g_old_realloc_purgeable &&
 918         !g_old_memalign_purgeable) << "Old allocators unexpectedly non-null";
 919
 920 #if !defined(ADDRESS_SANITIZER)
 921   // Don't do anything special on OOM for the malloc zones replaced by
 922   // AddressSanitizer, as modifying or protecting them may not work correctly.
 923
 924   // See http://trac.webkit.org/changeset/53362/trunk/Tools/DumpRenderTree/mac
 925   bool zone_allocators_protected = base::mac::IsOSLionOrLater();
 926
 927   ChromeMallocZone* default_zone =
 928       reinterpret_cast<ChromeMallocZone*>(malloc_default_zone());
 929   ChromeMallocZone* purgeable_zone =
 930       reinterpret_cast<ChromeMallocZone*>(malloc_default_purgeable_zone());
 931
 932   vm_address_t page_start_default = 0;
 933   vm_address_t page_start_purgeable = 0;
 934   vm_size_t len_default = 0;
 935   vm_size_t len_purgeable = 0;
 936   if (zone_allocators_protected) {
 937     page_start_default = reinterpret_cast<vm_address_t>(default_zone) &
 938         static_cast<vm_size_t>(~(getpagesize() - 1));
 939     len_default = reinterpret_cast<vm_address_t>(default_zone) -
 940         page_start_default + sizeof(ChromeMallocZone);
 941     mprotect(reinterpret_cast<void*>(page_start_default), len_default,
 942              PROT_READ | PROT_WRITE);
 943
 944     if (purgeable_zone) {
 945       page_start_purgeable = reinterpret_cast<vm_address_t>(purgeable_zone) &
 946           static_cast<vm_size_t>(~(getpagesize() - 1));
 947       len_purgeable = reinterpret_cast<vm_address_t>(purgeable_zone) -
 948           page_start_purgeable + sizeof(ChromeMallocZone);
 949       mprotect(reinterpret_cast<void*>(page_start_purgeable), len_purgeable,
 950                PROT_READ | PROT_WRITE);
 951     }
 952   }
 953
 954   // Default zone
 955
 956   g_old_malloc = default_zone->malloc;
 957   g_old_calloc = default_zone->calloc;
 958   g_old_valloc = default_zone->valloc;
 959   g_old_free = default_zone->free;
 960   g_old_realloc = default_zone->realloc;
 961   CHECK(g_old_malloc && g_old_calloc && g_old_valloc && g_old_free &&
 962         g_old_realloc)
 963       << "Failed to get system allocation functions.";
 964
 965   default_zone->malloc = oom_killer_malloc;
 966   default_zone->calloc = oom_killer_calloc;
 967   default_zone->valloc = oom_killer_valloc;
 968   default_zone->free = oom_killer_free;
 969   default_zone->realloc = oom_killer_realloc;
 970
 971   if (default_zone->version >= 5) {
 972     g_old_memalign = default_zone->memalign;
 973     if (g_old_memalign)
 974       default_zone->memalign = oom_killer_memalign;
 975   }
 976
 977   // Purgeable zone (if it exists)
 978
 979   if (purgeable_zone) {
 980     g_old_malloc_purgeable = purgeable_zone->malloc;
 981     g_old_calloc_purgeable = purgeable_zone->calloc;
 982     g_old_valloc_purgeable = purgeable_zone->valloc;
 983     g_old_free_purgeable = purgeable_zone->free;
 984     g_old_realloc_purgeable = purgeable_zone->realloc;
 985     CHECK(g_old_malloc_purgeable && g_old_calloc_purgeable &&
 986           g_old_valloc_purgeable && g_old_free_purgeable &&
 987           g_old_realloc_purgeable)
 988         << "Failed to get system allocation functions.";
 989
 990     purgeable_zone->malloc = oom_killer_malloc_purgeable;
 991     purgeable_zone->calloc = oom_killer_calloc_purgeable;
 992     purgeable_zone->valloc = oom_killer_valloc_purgeable;
 993     purgeable_zone->free = oom_killer_free_purgeable;
 994     purgeable_zone->realloc = oom_killer_realloc_purgeable;
 995
 996     if (purgeable_zone->version >= 5) {
 997       g_old_memalign_purgeable = purgeable_zone->memalign;
 998       if (g_old_memalign_purgeable)
 999         purgeable_zone->memalign = oom_killer_memalign_purgeable;
1000     }
1001   }
1002
1003   if (zone_allocators_protected) {
1004     mprotect(reinterpret_cast<void*>(page_start_default), len_default,
1005              PROT_READ);
1006     if (purgeable_zone) {
1007       mprotect(reinterpret_cast<void*>(page_start_purgeable), len_purgeable,
1008                PROT_READ);
1009     }
1010   }
1011 #endif
1012
1013   // === C malloc_zone_batch_malloc ===
1014
1015   // batch_malloc is omitted because the default malloc zone's implementation
1016   // only supports batch_malloc for "tiny" allocations from the free list. It
1017   // will fail for allocations larger than "tiny", and will only allocate as
1018   // many blocks as it's able to from the free list. These factors mean that it
1019   // can return less than the requested memory even in a non-out-of-memory
1020   // situation. There's no good way to detect whether a batch_malloc failure is
1021   // due to these other factors, or due to genuine memory or address space
1022   // exhaustion. The fact that it only allocates space from the "tiny" free list
1023   // means that it's likely that a failure will not be due to memory exhaustion.
1024   // Similarly, these constraints on batch_malloc mean that callers must always
1025   // be expecting to receive less memory than was requested, even in situations
1026   // where memory pressure is not a concern. Finally, the only public interface
1027   // to batch_malloc is malloc_zone_batch_malloc, which is specific to the
1028   // system's malloc implementation. It's unlikely that anyone's even heard of
1029   // it.
1030
1031   // === C++ operator new ===
1032
1033   // Yes, operator new does call through to malloc, but this will catch failures
1034   // that our imperfect handling of malloc cannot.
1035
1036   std::set_new_handler(oom_killer_new);
1037
1038 #ifndef ADDRESS_SANITIZER
1039   // === Core Foundation CFAllocators ===
1040
1041   // This will not catch allocation done by custom allocators, but will catch
1042   // all allocation done by system-provided ones.
1043
1044   CHECK(!g_old_cfallocator_system_default && !g_old_cfallocator_malloc &&
1045         !g_old_cfallocator_malloc_zone)
1046       << "Old allocators unexpectedly non-null";
1047
1048   bool cf_allocator_internals_known = CanGetContextForCFAllocator();
1049
1050   if (cf_allocator_internals_known) {
1051     CFAllocatorContext* context =
1052         ContextForCFAllocator(kCFAllocatorSystemDefault);
1053     CHECK(context) << "Failed to get context for kCFAllocatorSystemDefault.";
1054     g_old_cfallocator_system_default = context->allocate;
1055     CHECK(g_old_cfallocator_system_default)
1056         << "Failed to get kCFAllocatorSystemDefault allocation function.";
1057     context->allocate = oom_killer_cfallocator_system_default;
1058
1059     context = ContextForCFAllocator(kCFAllocatorMalloc);
1060     CHECK(context) << "Failed to get context for kCFAllocatorMalloc.";
1061     g_old_cfallocator_malloc = context->allocate;
1062     CHECK(g_old_cfallocator_malloc)
1063         << "Failed to get kCFAllocatorMalloc allocation function.";
1064     context->allocate = oom_killer_cfallocator_malloc;
1065
1066     context = ContextForCFAllocator(kCFAllocatorMallocZone);
1067     CHECK(context) << "Failed to get context for kCFAllocatorMallocZone.";
1068     g_old_cfallocator_malloc_zone = context->allocate;
1069     CHECK(g_old_cfallocator_malloc_zone)
1070         << "Failed to get kCFAllocatorMallocZone allocation function.";
1071     context->allocate = oom_killer_cfallocator_malloc_zone;
1072   } else {
1073     NSLog(@"Internals of CFAllocator not known; out-of-memory failures via "
1074         "CFAllocator will not result in termination. http://crbug.com/45650");
1075   }
1076 #endif
1077
1078   // === Cocoa NSObject allocation ===
1079
1080   // Note that both +[NSObject new] and +[NSObject alloc] call through to
1081   // +[NSObject allocWithZone:].
1082
1083   CHECK(!g_old_allocWithZone)
1084       << "Old allocator unexpectedly non-null";
1085
1086   Class nsobject_class = [NSObject class];
1087   Method orig_method = class_getClassMethod(nsobject_class,
1088                                             @selector(allocWithZone:));
1089   g_old_allocWithZone = reinterpret_cast<allocWithZone_t>(
1090       method_getImplementation(orig_method));
1091   CHECK(g_old_allocWithZone)
1092       << "Failed to get allocWithZone allocation function.";
1093   method_setImplementation(orig_method,
1094                            reinterpret_cast<IMP>(oom_killer_allocWithZone));
1095 }
1096
1097 ProcessId GetParentProcessId(ProcessHandle process) {
1098   struct kinfo_proc info;
1099   size_t length = sizeof(struct kinfo_proc);
1100   int mib[4] = { CTL_KERN, KERN_PROC, KERN_PROC_PID, process };
1101   if (sysctl(mib, 4, &info, &length, NULL, 0) < 0) {
1102     DPLOG(ERROR) << "sysctl";
1103     return -1;
1104   }
1105   if (length == 0)
1106     return -1;
1107   return info.kp_eproc.e_ppid;
1108 }
1109
1110 namespace {
1111
1112 const int kWaitBeforeKillSeconds = 2;
1113
1114 // Reap |child| process. This call blocks until completion.
1115 void BlockingReap(pid_t child) {
1116   const pid_t result = HANDLE_EINTR(waitpid(child, NULL, 0));
1117   if (result == -1) {
1118     DPLOG(ERROR) << "waitpid(" << child << ", NULL, 0)";
1119   }
1120 }
1121
1122 // Waits for |timeout| seconds for the given |child| to exit and reap it. If
1123 // the child doesn't exit within the time specified, kills it.
1124 //
1125 // This function takes two approaches: first, it tries to use kqueue to
1126 // observe when the process exits. kevent can monitor a kqueue with a
1127 // timeout, so this method is preferred to wait for a specified period of
1128 // time. Once the kqueue indicates the process has exited, waitpid will reap
1129 // the exited child. If the kqueue doesn't provide an exit event notification,
1130 // before the timeout expires, or if the kqueue fails or misbehaves, the
1131 // process will be mercilessly killed and reaped.
1132 //
1133 // A child process passed to this function may be in one of several states:
1134 // running, terminated and not yet reaped, and (apparently, and unfortunately)
1135 // terminated and already reaped. Normally, a process will at least have been
1136 // asked to exit before this function is called, but this is not required.
1137 // If a process is terminating and unreaped, there may be a window between the
1138 // time that kqueue will no longer recognize it and when it becomes an actual
1139 // zombie that a non-blocking (WNOHANG) waitpid can reap. This condition is
1140 // detected when kqueue indicates that the process is not running and a
1141 // non-blocking waitpid fails to reap the process but indicates that it is
1142 // still running. In this event, a blocking attempt to reap the process
1143 // collects the known-dying child, preventing zombies from congregating.
1144 //
1145 // In the event that the kqueue misbehaves entirely, as it might under a
1146 // EMFILE condition ("too many open files", or out of file descriptors), this
1147 // function will forcibly kill and reap the child without delay. This
1148 // eliminates another potential zombie vector. (If you're out of file
1149 // descriptors, you're probably deep into something else, but that doesn't
1150 // mean that zombies be allowed to kick you while you're down.)
1151 //
1152 // The fact that this function seemingly can be called to wait on a child
1153 // that's not only already terminated but already reaped is a bit of a
1154 // problem: a reaped child's pid can be reclaimed and may refer to a distinct
1155 // process in that case. The fact that this function can seemingly be called
1156 // to wait on a process that's not even a child is also a problem: kqueue will
1157 // work in that case, but waitpid won't, and killing a non-child might not be
1158 // the best approach.
1159 void WaitForChildToDie(pid_t child, int timeout) {
1160   DCHECK(child > 0);
1161   DCHECK(timeout > 0);
1162
1163   // DON'T ADD ANY EARLY RETURNS TO THIS FUNCTION without ensuring that
1164   // |child| has been reaped. Specifically, even if a kqueue, kevent, or other
1165   // call fails, this function should fall back to the last resort of trying
1166   // to kill and reap the process. Not observing this rule will resurrect
1167   // zombies.
1168
1169   int result;
1170
1171   int kq = HANDLE_EINTR(kqueue());
1172   if (kq == -1) {
1173     DPLOG(ERROR) << "kqueue()";
1174   } else {
1175     file_util::ScopedFD auto_close_kq(&kq);
1176
1177     struct kevent change = {0};
1178     EV_SET(&change, child, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, NULL);
1179     result = HANDLE_EINTR(kevent(kq, &change, 1, NULL, 0, NULL));
1180
1181     if (result == -1) {
1182       if (errno != ESRCH) {
1183         DPLOG(ERROR) << "kevent (setup " << child << ")";
1184       } else {
1185         // At this point, one of the following has occurred:
1186         // 1. The process has died but has not yet been reaped.
1187         // 2. The process has died and has already been reaped.
1188         // 3. The process is in the process of dying. It's no longer
1189         //    kqueueable, but it may not be waitable yet either. Mark calls
1190         //    this case the "zombie death race".
1191
1192         result = HANDLE_EINTR(waitpid(child, NULL, WNOHANG));
1193
1194         if (result != 0) {
1195           // A positive result indicates case 1. waitpid succeeded and reaped
1196           // the child. A result of -1 indicates case 2. The child has already
1197           // been reaped. In both of these cases, no further action is
1198           // necessary.
1199           return;
1200         }
1201
1202         // |result| is 0, indicating case 3. The process will be waitable in
1203         // short order. Fall back out of the kqueue code to kill it (for good
1204         // measure) and reap it.
1205       }
1206     } else {
1207       // Keep track of the elapsed time to be able to restart kevent if it's
1208       // interrupted.
1209       TimeDelta remaining_delta = TimeDelta::FromSeconds(timeout);
1210       Time deadline = Time::Now() + remaining_delta;
1211       result = -1;
1212       struct kevent event = {0};
1213       while (remaining_delta.InMilliseconds() > 0) {
1214         const struct timespec remaining_timespec = remaining_delta.ToTimeSpec();
1215         result = kevent(kq, NULL, 0, &event, 1, &remaining_timespec);
1216         if (result == -1 && errno == EINTR) {
1217           remaining_delta = deadline - Time::Now();
1218           result = 0;
1219         } else {
1220           break;
1221         }
1222       }
1223
1224       if (result == -1) {
1225         DPLOG(ERROR) << "kevent (wait " << child << ")";
1226       } else if (result > 1) {
1227         DLOG(ERROR) << "kevent (wait " << child << "): unexpected result "
1228                     << result;
1229       } else if (result == 1) {
1230         if ((event.fflags & NOTE_EXIT) &&
1231             (event.ident == static_cast<uintptr_t>(child))) {
1232           // The process is dead or dying. This won't block for long, if at
1233           // all.
1234           BlockingReap(child);
1235           return;
1236         } else {
1237           DLOG(ERROR) << "kevent (wait " << child
1238                       << "): unexpected event: fflags=" << event.fflags
1239                       << ", ident=" << event.ident;
1240         }
1241       }
1242     }
1243   }
1244
1245   // The child is still alive, or is very freshly dead. Be sure by sending it
1246   // a signal. This is safe even if it's freshly dead, because it will be a
1247   // zombie (or on the way to zombiedom) and kill will return 0 even if the
1248   // signal is not delivered to a live process.
1249   result = kill(child, SIGKILL);
1250   if (result == -1) {
1251     DPLOG(ERROR) << "kill(" << child << ", SIGKILL)";
1252   } else {
1253     // The child is definitely on the way out now. BlockingReap won't need to
1254     // wait for long, if at all.
1255     BlockingReap(child);
1256   }
1257 }
1258
1259 }  // namespace
1260
1261 void EnsureProcessTerminated(ProcessHandle process) {
1262   WaitForChildToDie(process, kWaitBeforeKillSeconds);
1263 }
1264
1265 }  // namespace base