base/process_util_mac.mm

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "base/process_util.h"
   6
   7 #import <Cocoa/Cocoa.h>
   8 #include <crt_externs.h>
   9 #include <errno.h>
  10 #include <mach/mach.h>
  11 #include <mach/mach_init.h>
  12 #include <mach/mach_vm.h>
  13 #include <mach/shared_region.h>
  14 #include <mach/task.h>
  15 #include <malloc/malloc.h>
  16 #import <objc/runtime.h>
  17 #include <signal.h>
  18 #include <spawn.h>
  19 #include <sys/event.h>
  20 #include <sys/sysctl.h>
  21 #include <sys/types.h>
  22 #include <sys/wait.h>
  23
  24 #include <new>
  25 #include <string>
  26
  27 #include "base/debug/debugger.h"
  28 #include "base/file_util.h"
  29 #include "base/hash_tables.h"
  30 #include "base/lazy_instance.h"
  31 #include "base/logging.h"
  32 #include "base/mac/mac_util.h"
  33 #include "base/mac/scoped_mach_port.h"
  34 #include "base/posix/eintr_wrapper.h"
  35 #include "base/string_util.h"
  36 #include "base/sys_info.h"
  37 #include "third_party/apple_apsl/CFBase.h"
  38 #include "third_party/apple_apsl/malloc.h"
  39
  40 #if ARCH_CPU_32_BITS
  41 #include <dlfcn.h>
  42 #include <mach-o/nlist.h>
  43
  44 #include "base/threading/thread_local.h"
  45 #include "third_party/mach_override/mach_override.h"
  46 #endif  // ARCH_CPU_32_BITS
  47
  48 namespace base {
  49
  50 void RestoreDefaultExceptionHandler() {
  51   // This function is tailored to remove the Breakpad exception handler.
  52   // exception_mask matches s_exception_mask in
  53   // breakpad/src/client/mac/handler/exception_handler.cc
  54   const exception_mask_t exception_mask = EXC_MASK_BAD_ACCESS |
  55                                           EXC_MASK_BAD_INSTRUCTION |
  56                                           EXC_MASK_ARITHMETIC |
  57                                           EXC_MASK_BREAKPOINT;
  58
  59   // Setting the exception port to MACH_PORT_NULL may not be entirely
  60   // kosher to restore the default exception handler, but in practice,
  61   // it results in the exception port being set to Apple Crash Reporter,
  62   // the desired behavior.
  63   task_set_exception_ports(mach_task_self(), exception_mask, MACH_PORT_NULL,
  64                            EXCEPTION_DEFAULT, THREAD_STATE_NONE);
  65 }
  66
  67 ProcessIterator::ProcessIterator(const ProcessFilter* filter)
  68     : index_of_kinfo_proc_(0),
  69       filter_(filter) {
  70   // Get a snapshot of all of my processes (yes, as we loop it can go stale, but
  71   // but trying to find where we were in a constantly changing list is basically
  72   // impossible.
  73
  74   int mib[] = { CTL_KERN, KERN_PROC, KERN_PROC_UID, geteuid() };
  75
  76   // Since more processes could start between when we get the size and when
  77   // we get the list, we do a loop to keep trying until we get it.
  78   bool done = false;
  79   int try_num = 1;
  80   const int max_tries = 10;
  81   do {
  82     // Get the size of the buffer
  83     size_t len = 0;
  84     if (sysctl(mib, arraysize(mib), NULL, &len, NULL, 0) < 0) {
  85       DLOG(ERROR) << "failed to get the size needed for the process list";
  86       kinfo_procs_.resize(0);
  87       done = true;
  88     } else {
  89       size_t num_of_kinfo_proc = len / sizeof(struct kinfo_proc);
  90       // Leave some spare room for process table growth (more could show up
  91       // between when we check and now)
  92       num_of_kinfo_proc += 16;
  93       kinfo_procs_.resize(num_of_kinfo_proc);
  94       len = num_of_kinfo_proc * sizeof(struct kinfo_proc);
  95       // Load the list of processes
  96       if (sysctl(mib, arraysize(mib), &kinfo_procs_[0], &len, NULL, 0) < 0) {
  97         // If we get a mem error, it just means we need a bigger buffer, so
  98         // loop around again.  Anything else is a real error and give up.
  99         if (errno != ENOMEM) {
 100           DLOG(ERROR) << "failed to get the process list";
 101           kinfo_procs_.resize(0);
 102           done = true;
 103         }
 104       } else {
 105         // Got the list, just make sure we're sized exactly right
 106         size_t num_of_kinfo_proc = len / sizeof(struct kinfo_proc);
 107         kinfo_procs_.resize(num_of_kinfo_proc);
 108         done = true;
 109       }
 110     }
 111   } while (!done && (try_num++ < max_tries));
 112
 113   if (!done) {
 114     DLOG(ERROR) << "failed to collect the process list in a few tries";
 115     kinfo_procs_.resize(0);
 116   }
 117 }
 118
 119 ProcessIterator::~ProcessIterator() {
 120 }
 121
 122 bool ProcessIterator::CheckForNextProcess() {
 123   std::string data;
 124   for (; index_of_kinfo_proc_ < kinfo_procs_.size(); ++index_of_kinfo_proc_) {
 125     kinfo_proc& kinfo = kinfo_procs_[index_of_kinfo_proc_];
 126
 127     // Skip processes just awaiting collection
 128     if ((kinfo.kp_proc.p_pid > 0) && (kinfo.kp_proc.p_stat == SZOMB))
 129       continue;
 130
 131     int mib[] = { CTL_KERN, KERN_PROCARGS, kinfo.kp_proc.p_pid };
 132
 133     // Find out what size buffer we need.
 134     size_t data_len = 0;
 135     if (sysctl(mib, arraysize(mib), NULL, &data_len, NULL, 0) < 0) {
 136       DVPLOG(1) << "failed to figure out the buffer size for a commandline";
 137       continue;
 138     }
 139
 140     data.resize(data_len);
 141     if (sysctl(mib, arraysize(mib), &data[0], &data_len, NULL, 0) < 0) {
 142       DVPLOG(1) << "failed to fetch a commandline";
 143       continue;
 144     }
 145
 146     // |data| contains all the command line parameters of the process, separated
 147     // by blocks of one or more null characters. We tokenize |data| into a
 148     // vector of strings using '\0' as a delimiter and populate
 149     // |entry_.cmd_line_args_|.
 150     std::string delimiters;
 151     delimiters.push_back('\0');
 152     Tokenize(data, delimiters, &entry_.cmd_line_args_);
 153
 154     // |data| starts with the full executable path followed by a null character.
 155     // We search for the first instance of '\0' and extract everything before it
 156     // to populate |entry_.exe_file_|.
 157     size_t exec_name_end = data.find('\0');
 158     if (exec_name_end == std::string::npos) {
 159       DLOG(ERROR) << "command line data didn't match expected format";
 160       continue;
 161     }
 162
 163     entry_.pid_ = kinfo.kp_proc.p_pid;
 164     entry_.ppid_ = kinfo.kp_eproc.e_ppid;
 165     entry_.gid_ = kinfo.kp_eproc.e_pgid;
 166     size_t last_slash = data.rfind('/', exec_name_end);
 167     if (last_slash == std::string::npos)
 168       entry_.exe_file_.assign(data, 0, exec_name_end);
 169     else
 170       entry_.exe_file_.assign(data, last_slash + 1,
 171                               exec_name_end - last_slash - 1);
 172     // Start w/ the next entry next time through
 173     ++index_of_kinfo_proc_;
 174     // Done
 175     return true;
 176   }
 177   return false;
 178 }
 179
 180 bool NamedProcessIterator::IncludeEntry() {
 181   return (executable_name_ == entry().exe_file() &&
 182           ProcessIterator::IncludeEntry());
 183 }
 184
 185
 186 // ------------------------------------------------------------------------
 187 // NOTE: about ProcessMetrics
 188 //
 189 // Getting a mach task from a pid for another process requires permissions in
 190 // general, so there doesn't really seem to be a way to do these (and spinning
 191 // up ps to fetch each stats seems dangerous to put in a base api for anyone to
 192 // call). Child processes ipc their port, so return something if available,
 193 // otherwise return 0.
 194 //
 195
 196 ProcessMetrics::ProcessMetrics(ProcessHandle process,
 197                                ProcessMetrics::PortProvider* port_provider)
 198     : process_(process),
 199       last_time_(0),
 200       last_system_time_(0),
 201       port_provider_(port_provider) {
 202   processor_count_ = SysInfo::NumberOfProcessors();
 203 }
 204
 205 // static
 206 ProcessMetrics* ProcessMetrics::CreateProcessMetrics(
 207     ProcessHandle process,
 208     ProcessMetrics::PortProvider* port_provider) {
 209   return new ProcessMetrics(process, port_provider);
 210 }
 211
 212 bool ProcessMetrics::GetIOCounters(IoCounters* io_counters) const {
 213   return false;
 214 }
 215
 216 static bool GetTaskInfo(mach_port_t task, task_basic_info_64* task_info_data) {
 217   if (task == MACH_PORT_NULL)
 218     return false;
 219   mach_msg_type_number_t count = TASK_BASIC_INFO_64_COUNT;
 220   kern_return_t kr = task_info(task,
 221                                TASK_BASIC_INFO_64,
 222                                reinterpret_cast<task_info_t>(task_info_data),
 223                                &count);
 224   // Most likely cause for failure: |task| is a zombie.
 225   return kr == KERN_SUCCESS;
 226 }
 227
 228 size_t ProcessMetrics::GetPagefileUsage() const {
 229   task_basic_info_64 task_info_data;
 230   if (!GetTaskInfo(TaskForPid(process_), &task_info_data))
 231     return 0;
 232   return task_info_data.virtual_size;
 233 }
 234
 235 size_t ProcessMetrics::GetPeakPagefileUsage() const {
 236   return 0;
 237 }
 238
 239 size_t ProcessMetrics::GetWorkingSetSize() const {
 240   task_basic_info_64 task_info_data;
 241   if (!GetTaskInfo(TaskForPid(process_), &task_info_data))
 242     return 0;
 243   return task_info_data.resident_size;
 244 }
 245
 246 size_t ProcessMetrics::GetPeakWorkingSetSize() const {
 247   return 0;
 248 }
 249
 250 static bool GetCPUTypeForProcess(pid_t pid, cpu_type_t* cpu_type) {
 251   size_t len = sizeof(*cpu_type);
 252   int result = sysctlbyname("sysctl.proc_cputype",
 253                             cpu_type,
 254                             &len,
 255                             NULL,
 256                             0);
 257   if (result != 0) {
 258     DPLOG(ERROR) << "sysctlbyname(""sysctl.proc_cputype"")";
 259     return false;
 260   }
 261
 262   return true;
 263 }
 264
 265 static bool IsAddressInSharedRegion(mach_vm_address_t addr, cpu_type_t type) {
 266   if (type == CPU_TYPE_I386)
 267     return addr >= SHARED_REGION_BASE_I386 &&
 268            addr < (SHARED_REGION_BASE_I386 + SHARED_REGION_SIZE_I386);
 269   else if (type == CPU_TYPE_X86_64)
 270     return addr >= SHARED_REGION_BASE_X86_64 &&
 271            addr < (SHARED_REGION_BASE_X86_64 + SHARED_REGION_SIZE_X86_64);
 272   else
 273     return false;
 274 }
 275
 276 // This is a rough approximation of the algorithm that libtop uses.
 277 // private_bytes is the size of private resident memory.
 278 // shared_bytes is the size of shared resident memory.
 279 bool ProcessMetrics::GetMemoryBytes(size_t* private_bytes,
 280                                     size_t* shared_bytes) {
 281   kern_return_t kr;
 282   size_t private_pages_count = 0;
 283   size_t shared_pages_count = 0;
 284
 285   if (!private_bytes && !shared_bytes)
 286     return true;
 287
 288   mach_port_t task = TaskForPid(process_);
 289   if (task == MACH_PORT_NULL) {
 290     DLOG(ERROR) << "Invalid process";
 291     return false;
 292   }
 293
 294   cpu_type_t cpu_type;
 295   if (!GetCPUTypeForProcess(process_, &cpu_type))
 296     return false;
 297
 298   // The same region can be referenced multiple times. To avoid double counting
 299   // we need to keep track of which regions we've already counted.
 300   base::hash_set<int> seen_objects;
 301
 302   // We iterate through each VM region in the task's address map. For shared
 303   // memory we add up all the pages that are marked as shared. Like libtop we
 304   // try to avoid counting pages that are also referenced by other tasks. Since
 305   // we don't have access to the VM regions of other tasks the only hint we have
 306   // is if the address is in the shared region area.
 307   //
 308   // Private memory is much simpler. We simply count the pages that are marked
 309   // as private or copy on write (COW).
 310   //
 311   // See libtop_update_vm_regions in
 312   // http://www.opensource.apple.com/source/top/top-67/libtop.c
 313   mach_vm_size_t size = 0;
 314   for (mach_vm_address_t address = MACH_VM_MIN_ADDRESS;; address += size) {
 315     vm_region_top_info_data_t info;
 316     mach_msg_type_number_t info_count = VM_REGION_TOP_INFO_COUNT;
 317     mach_port_t object_name;
 318     kr = mach_vm_region(task,
 319                         &address,
 320                         &size,
 321                         VM_REGION_TOP_INFO,
 322                         (vm_region_info_t)&info,
 323                         &info_count,
 324                         &object_name);
 325     if (kr == KERN_INVALID_ADDRESS) {
 326       // We're at the end of the address space.
 327       break;
 328     } else if (kr != KERN_SUCCESS) {
 329       DLOG(ERROR) << "Calling mach_vm_region failed with error: "
 330                  << mach_error_string(kr);
 331       return false;
 332     }
 333
 334     if (IsAddressInSharedRegion(address, cpu_type) &&
 335         info.share_mode != SM_PRIVATE)
 336       continue;
 337
 338     if (info.share_mode == SM_COW && info.ref_count == 1)
 339       info.share_mode = SM_PRIVATE;
 340
 341     switch (info.share_mode) {
 342       case SM_PRIVATE:
 343         private_pages_count += info.private_pages_resident;
 344         private_pages_count += info.shared_pages_resident;
 345         break;
 346       case SM_COW:
 347         private_pages_count += info.private_pages_resident;
 348         // Fall through
 349       case SM_SHARED:
 350         if (seen_objects.count(info.obj_id) == 0) {
 351           // Only count the first reference to this region.
 352           seen_objects.insert(info.obj_id);
 353           shared_pages_count += info.shared_pages_resident;
 354         }
 355         break;
 356       default:
 357         break;
 358     }
 359   }
 360
 361   vm_size_t page_size;
 362   kr = host_page_size(task, &page_size);
 363   if (kr != KERN_SUCCESS) {
 364     DLOG(ERROR) << "Failed to fetch host page size, error: "
 365                << mach_error_string(kr);
 366     return false;
 367   }
 368
 369   if (private_bytes)
 370     *private_bytes = private_pages_count * page_size;
 371   if (shared_bytes)
 372     *shared_bytes = shared_pages_count * page_size;
 373
 374   return true;
 375 }
 376
 377 void ProcessMetrics::GetCommittedKBytes(CommittedKBytes* usage) const {
 378 }
 379
 380 bool ProcessMetrics::GetWorkingSetKBytes(WorkingSetKBytes* ws_usage) const {
 381   size_t priv = GetWorkingSetSize();
 382   if (!priv)
 383     return false;
 384   ws_usage->priv = priv / 1024;
 385   ws_usage->shareable = 0;
 386   ws_usage->shared = 0;
 387   return true;
 388 }
 389
 390 #define TIME_VALUE_TO_TIMEVAL(a, r) do {  \
 391   (r)->tv_sec = (a)->seconds;             \
 392   (r)->tv_usec = (a)->microseconds;       \
 393 } while (0)
 394
 395 double ProcessMetrics::GetCPUUsage() {
 396   mach_port_t task = TaskForPid(process_);
 397   if (task == MACH_PORT_NULL)
 398     return 0;
 399
 400   kern_return_t kr;
 401
 402   // Libtop explicitly loops over the threads (libtop_pinfo_update_cpu_usage()
 403   // in libtop.c), but this is more concise and gives the same results:
 404   task_thread_times_info thread_info_data;
 405   mach_msg_type_number_t thread_info_count = TASK_THREAD_TIMES_INFO_COUNT;
 406   kr = task_info(task,
 407                  TASK_THREAD_TIMES_INFO,
 408                  reinterpret_cast<task_info_t>(&thread_info_data),
 409                  &thread_info_count);
 410   if (kr != KERN_SUCCESS) {
 411     // Most likely cause: |task| is a zombie.
 412     return 0;
 413   }
 414
 415   task_basic_info_64 task_info_data;
 416   if (!GetTaskInfo(task, &task_info_data))
 417     return 0;
 418
 419   /* Set total_time. */
 420   // thread info contains live time...
 421   struct timeval user_timeval, system_timeval, task_timeval;
 422   TIME_VALUE_TO_TIMEVAL(&thread_info_data.user_time, &user_timeval);
 423   TIME_VALUE_TO_TIMEVAL(&thread_info_data.system_time, &system_timeval);
 424   timeradd(&user_timeval, &system_timeval, &task_timeval);
 425
 426   // ... task info contains terminated time.
 427   TIME_VALUE_TO_TIMEVAL(&task_info_data.user_time, &user_timeval);
 428   TIME_VALUE_TO_TIMEVAL(&task_info_data.system_time, &system_timeval);
 429   timeradd(&user_timeval, &task_timeval, &task_timeval);
 430   timeradd(&system_timeval, &task_timeval, &task_timeval);
 431
 432   struct timeval now;
 433   int retval = gettimeofday(&now, NULL);
 434   if (retval)
 435     return 0;
 436
 437   int64 time = TimeValToMicroseconds(now);
 438   int64 task_time = TimeValToMicroseconds(task_timeval);
 439
 440   if ((last_system_time_ == 0) || (last_time_ == 0)) {
 441     // First call, just set the last values.
 442     last_system_time_ = task_time;
 443     last_time_ = time;
 444     return 0;
 445   }
 446
 447   int64 system_time_delta = task_time - last_system_time_;
 448   int64 time_delta = time - last_time_;
 449   DCHECK_NE(0U, time_delta);
 450   if (time_delta == 0)
 451     return 0;
 452
 453   last_system_time_ = task_time;
 454   last_time_ = time;
 455
 456   return static_cast<double>(system_time_delta * 100.0) / time_delta;
 457 }
 458
 459 mach_port_t ProcessMetrics::TaskForPid(ProcessHandle process) const {
 460   mach_port_t task = MACH_PORT_NULL;
 461   if (port_provider_)
 462     task = port_provider_->TaskForPid(process_);
 463   if (task == MACH_PORT_NULL && process_ == getpid())
 464     task = mach_task_self();
 465   return task;
 466 }
 467
 468 // ------------------------------------------------------------------------
 469
 470 // Bytes committed by the system.
 471 size_t GetSystemCommitCharge() {
 472   base::mac::ScopedMachPort host(mach_host_self());
 473   mach_msg_type_number_t count = HOST_VM_INFO_COUNT;
 474   vm_statistics_data_t data;
 475   kern_return_t kr = host_statistics(host, HOST_VM_INFO,
 476                                      reinterpret_cast<host_info_t>(&data),
 477                                      &count);
 478   if (kr) {
 479     DLOG(WARNING) << "Failed to fetch host statistics.";
 480     return 0;
 481   }
 482
 483   vm_size_t page_size;
 484   kr = host_page_size(host, &page_size);
 485   if (kr) {
 486     DLOG(ERROR) << "Failed to fetch host page size.";
 487     return 0;
 488   }
 489
 490   return (data.active_count * page_size) / 1024;
 491 }
 492
 493 // These are helpers for EnableTerminationOnHeapCorruption, which is a no-op
 494 // on 64 bit Macs.
 495 #if ARCH_CPU_32_BITS
 496 namespace {
 497
 498 // Finds the library path for malloc() and thus the libC part of libSystem,
 499 // which in Lion is in a separate image.
 500 const char* LookUpLibCPath() {
 501   const void* addr = reinterpret_cast<void*>(&malloc);
 502
 503   Dl_info info;
 504   if (dladdr(addr, &info))
 505     return info.dli_fname;
 506
 507   DLOG(WARNING) << "Could not find image path for malloc()";
 508   return NULL;
 509 }
 510
 511 typedef void(*malloc_error_break_t)(void);
 512 malloc_error_break_t g_original_malloc_error_break = NULL;
 513
 514 // Returns the function pointer for malloc_error_break. This symbol is declared
 515 // as __private_extern__ and cannot be dlsym()ed. Instead, use nlist() to
 516 // get it.
 517 malloc_error_break_t LookUpMallocErrorBreak() {
 518   const char* lib_c_path = LookUpLibCPath();
 519   if (!lib_c_path)
 520     return NULL;
 521
 522   // Only need to look up two symbols, but nlist() requires a NULL-terminated
 523   // array and takes no count.
 524   struct nlist nl[3];
 525   bzero(&nl, sizeof(nl));
 526
 527   // The symbol to find.
 528   nl[0].n_un.n_name = const_cast<char*>("_malloc_error_break");
 529
 530   // A reference symbol by which the address of the desired symbol will be
 531   // calculated.
 532   nl[1].n_un.n_name = const_cast<char*>("_malloc");
 533
 534   int rv = nlist(lib_c_path, nl);
 535   if (rv != 0 || nl[0].n_type == N_UNDF || nl[1].n_type == N_UNDF) {
 536     return NULL;
 537   }
 538
 539   // nlist() returns addresses as offsets in the image, not the instruction
 540   // pointer in memory. Use the known in-memory address of malloc()
 541   // to compute the offset for malloc_error_break().
 542   uintptr_t reference_addr = reinterpret_cast<uintptr_t>(&malloc);
 543   reference_addr -= nl[1].n_value;
 544   reference_addr += nl[0].n_value;
 545
 546   return reinterpret_cast<malloc_error_break_t>(reference_addr);
 547 }
 548
 549 // Simple scoper that saves the current value of errno, resets it to 0, and on
 550 // destruction puts the old value back. This is so that CrMallocErrorBreak can
 551 // safely test errno free from the effects of other routines.
 552 class ScopedClearErrno {
 553  public:
 554   ScopedClearErrno() : old_errno_(errno) {
 555     errno = 0;
 556   }
 557   ~ScopedClearErrno() {
 558     if (errno == 0)
 559       errno = old_errno_;
 560   }
 561
 562  private:
 563   int old_errno_;
 564
 565   DISALLOW_COPY_AND_ASSIGN(ScopedClearErrno);
 566 };
 567
 568 // Combines ThreadLocalBoolean with AutoReset.  It would be convenient
 569 // to compose ThreadLocalPointer<bool> with base::AutoReset<bool>, but that
 570 // would require allocating some storage for the bool.
 571 class ThreadLocalBooleanAutoReset {
 572  public:
 573   ThreadLocalBooleanAutoReset(ThreadLocalBoolean* tlb, bool new_value)
 574       : scoped_tlb_(tlb),
 575         original_value_(tlb->Get()) {
 576     scoped_tlb_->Set(new_value);
 577   }
 578   ~ThreadLocalBooleanAutoReset() {
 579     scoped_tlb_->Set(original_value_);
 580   }
 581
 582  private:
 583   ThreadLocalBoolean* scoped_tlb_;
 584   bool original_value_;
 585
 586   DISALLOW_COPY_AND_ASSIGN(ThreadLocalBooleanAutoReset);
 587 };
 588
 589 base::LazyInstance<ThreadLocalBoolean>::Leaky
 590     g_unchecked_malloc = LAZY_INSTANCE_INITIALIZER;
 591
 592 // NOTE(shess): This is called when the malloc library noticed that the heap
 593 // is fubar.  Avoid calls which will re-enter the malloc library.
 594 void CrMallocErrorBreak() {
 595   g_original_malloc_error_break();
 596
 597   // Out of memory is certainly not heap corruption, and not necessarily
 598   // something for which the process should be terminated. Leave that decision
 599   // to the OOM killer.  The EBADF case comes up because the malloc library
 600   // attempts to log to ASL (syslog) before calling this code, which fails
 601   // accessing a Unix-domain socket because of sandboxing.
 602   if (errno == ENOMEM || (errno == EBADF && g_unchecked_malloc.Get().Get()))
 603     return;
 604
 605   // A unit test checks this error message, so it needs to be in release builds.
 606   char buf[1024] =
 607       "Terminating process due to a potential for future heap corruption: "
 608       "errno=";
 609   char errnobuf[] = {
 610     '0' + ((errno / 100) % 10),
 611     '0' + ((errno / 10) % 10),
 612     '0' + (errno % 10),
 613     '\000'
 614   };
 615   COMPILE_ASSERT(ELAST <= 999, errno_too_large_to_encode);
 616   strlcat(buf, errnobuf, sizeof(buf));
 617   RAW_LOG(ERROR, buf);
 618
 619   // Crash by writing to NULL+errno to allow analyzing errno from
 620   // crash dump info (setting a breakpad key would re-enter the malloc
 621   // library).  Max documented errno in intro(2) is actually 102, but
 622   // it really just needs to be "small" to stay on the right vm page.
 623   const int kMaxErrno = 256;
 624   char* volatile death_ptr = NULL;
 625   death_ptr += std::min(errno, kMaxErrno);
 626   *death_ptr = '!';
 627 }
 628
 629 }  // namespace
 630 #endif  // ARCH_CPU_32_BITS
 631
 632 void EnableTerminationOnHeapCorruption() {
 633 #if defined(ADDRESS_SANITIZER) || ARCH_CPU_64_BITS
 634   // AddressSanitizer handles heap corruption, and on 64 bit Macs, the malloc
 635   // system automatically abort()s on heap corruption.
 636   return;
 637 #else
 638   // Only override once, otherwise CrMallocErrorBreak() will recurse
 639   // to itself.
 640   if (g_original_malloc_error_break)
 641     return;
 642
 643   malloc_error_break_t malloc_error_break = LookUpMallocErrorBreak();
 644   if (!malloc_error_break) {
 645     DLOG(WARNING) << "Could not find malloc_error_break";
 646     return;
 647   }
 648
 649   mach_error_t err = mach_override_ptr(
 650      (void*)malloc_error_break,
 651      (void*)&CrMallocErrorBreak,
 652      (void**)&g_original_malloc_error_break);
 653
 654   if (err != err_none)
 655     DLOG(WARNING) << "Could not override malloc_error_break; error = " << err;
 656 #endif  // defined(ADDRESS_SANITIZER) || ARCH_CPU_64_BITS
 657 }
 658
 659 // ------------------------------------------------------------------------
 660
 661 namespace {
 662
 663 bool g_oom_killer_enabled;
 664
 665 // Starting with Mac OS X 10.7, the zone allocators set up by the system are
 666 // read-only, to prevent them from being overwritten in an attack. However,
 667 // blindly unprotecting and reprotecting the zone allocators fails with
 668 // GuardMalloc because GuardMalloc sets up its zone allocator using a block of
 669 // memory in its bss. Explicit saving/restoring of the protection is required.
 670 //
 671 // This function takes a pointer to a malloc zone, de-protects it if necessary,
 672 // and returns (in the out parameters) a region of memory (if any) to be
 673 // re-protected when modifications are complete. This approach assumes that
 674 // there is no contention for the protection of this memory.
 675 void DeprotectMallocZone(ChromeMallocZone* default_zone,
 676                          mach_vm_address_t* reprotection_start,
 677                          mach_vm_size_t* reprotection_length,
 678                          vm_prot_t* reprotection_value) {
 679   mach_port_t unused;
 680   *reprotection_start = reinterpret_cast<mach_vm_address_t>(default_zone);
 681   struct vm_region_basic_info_64 info;
 682   mach_msg_type_number_t count = VM_REGION_BASIC_INFO_COUNT_64;
 683   kern_return_t result =
 684       mach_vm_region(mach_task_self(),
 685                      reprotection_start,
 686                      reprotection_length,
 687                      VM_REGION_BASIC_INFO_64,
 688                      reinterpret_cast<vm_region_info_t>(&info),
 689                      &count,
 690                      &unused);
 691   CHECK(result == KERN_SUCCESS);
 692
 693   result = mach_port_deallocate(mach_task_self(), unused);
 694   CHECK(result == KERN_SUCCESS);
 695
 696   // Does the region fully enclose the zone pointers? Possibly unwarranted
 697   // simplification used: using the size of a full version 8 malloc zone rather
 698   // than the actual smaller size if the passed-in zone is not version 8.
 699   CHECK(*reprotection_start <=
 700             reinterpret_cast<mach_vm_address_t>(default_zone));
 701   mach_vm_size_t zone_offset = reinterpret_cast<mach_vm_size_t>(default_zone) -
 702       reinterpret_cast<mach_vm_size_t>(*reprotection_start);
 703   CHECK(zone_offset + sizeof(ChromeMallocZone) <= *reprotection_length);
 704
 705   if (info.protection & VM_PROT_WRITE) {
 706     // No change needed; the zone is already writable.
 707     *reprotection_start = 0;
 708     *reprotection_length = 0;
 709     *reprotection_value = VM_PROT_NONE;
 710   } else {
 711     *reprotection_value = info.protection;
 712     result = mach_vm_protect(mach_task_self(),
 713                              *reprotection_start,
 714                              *reprotection_length,
 715                              false,
 716                              info.protection | VM_PROT_WRITE);
 717     CHECK(result == KERN_SUCCESS);
 718   }
 719 }
 720
 721 // === C malloc/calloc/valloc/realloc/posix_memalign ===
 722
 723 typedef void* (*malloc_type)(struct _malloc_zone_t* zone,
 724                              size_t size);
 725 typedef void* (*calloc_type)(struct _malloc_zone_t* zone,
 726                              size_t num_items,
 727                              size_t size);
 728 typedef void* (*valloc_type)(struct _malloc_zone_t* zone,
 729                              size_t size);
 730 typedef void (*free_type)(struct _malloc_zone_t* zone,
 731                           void* ptr);
 732 typedef void* (*realloc_type)(struct _malloc_zone_t* zone,
 733                               void* ptr,
 734                               size_t size);
 735 typedef void* (*memalign_type)(struct _malloc_zone_t* zone,
 736                                size_t alignment,
 737                                size_t size);
 738
 739 malloc_type g_old_malloc;
 740 calloc_type g_old_calloc;
 741 valloc_type g_old_valloc;
 742 free_type g_old_free;
 743 realloc_type g_old_realloc;
 744 memalign_type g_old_memalign;
 745
 746 malloc_type g_old_malloc_purgeable;
 747 calloc_type g_old_calloc_purgeable;
 748 valloc_type g_old_valloc_purgeable;
 749 free_type g_old_free_purgeable;
 750 realloc_type g_old_realloc_purgeable;
 751 memalign_type g_old_memalign_purgeable;
 752
 753 void* oom_killer_malloc(struct _malloc_zone_t* zone,
 754                         size_t size) {
 755 #if ARCH_CPU_32_BITS
 756   ScopedClearErrno clear_errno;
 757 #endif  // ARCH_CPU_32_BITS
 758   void* result = g_old_malloc(zone, size);
 759   if (!result && size)
 760     debug::BreakDebugger();
 761   return result;
 762 }
 763
 764 void* oom_killer_calloc(struct _malloc_zone_t* zone,
 765                         size_t num_items,
 766                         size_t size) {
 767 #if ARCH_CPU_32_BITS
 768   ScopedClearErrno clear_errno;
 769 #endif  // ARCH_CPU_32_BITS
 770   void* result = g_old_calloc(zone, num_items, size);
 771   if (!result && num_items && size)
 772     debug::BreakDebugger();
 773   return result;
 774 }
 775
 776 void* oom_killer_valloc(struct _malloc_zone_t* zone,
 777                         size_t size) {
 778 #if ARCH_CPU_32_BITS
 779   ScopedClearErrno clear_errno;
 780 #endif  // ARCH_CPU_32_BITS
 781   void* result = g_old_valloc(zone, size);
 782   if (!result && size)
 783     debug::BreakDebugger();
 784   return result;
 785 }
 786
 787 void oom_killer_free(struct _malloc_zone_t* zone,
 788                      void* ptr) {
 789 #if ARCH_CPU_32_BITS
 790   ScopedClearErrno clear_errno;
 791 #endif  // ARCH_CPU_32_BITS
 792   g_old_free(zone, ptr);
 793 }
 794
 795 void* oom_killer_realloc(struct _malloc_zone_t* zone,
 796                          void* ptr,
 797                          size_t size) {
 798 #if ARCH_CPU_32_BITS
 799   ScopedClearErrno clear_errno;
 800 #endif  // ARCH_CPU_32_BITS
 801   void* result = g_old_realloc(zone, ptr, size);
 802   if (!result && size)
 803     debug::BreakDebugger();
 804   return result;
 805 }
 806
 807 void* oom_killer_memalign(struct _malloc_zone_t* zone,
 808                           size_t alignment,
 809                           size_t size) {
 810 #if ARCH_CPU_32_BITS
 811   ScopedClearErrno clear_errno;
 812 #endif  // ARCH_CPU_32_BITS
 813   void* result = g_old_memalign(zone, alignment, size);
 814   // Only die if posix_memalign would have returned ENOMEM, since there are
 815   // other reasons why NULL might be returned (see
 816   // http://opensource.apple.com/source/Libc/Libc-583/gen/malloc.c ).
 817   if (!result && size && alignment >= sizeof(void*)
 818       && (alignment & (alignment - 1)) == 0) {
 819     debug::BreakDebugger();
 820   }
 821   return result;
 822 }
 823
 824 void* oom_killer_malloc_purgeable(struct _malloc_zone_t* zone,
 825                                   size_t size) {
 826 #if ARCH_CPU_32_BITS
 827   ScopedClearErrno clear_errno;
 828 #endif  // ARCH_CPU_32_BITS
 829   void* result = g_old_malloc_purgeable(zone, size);
 830   if (!result && size)
 831     debug::BreakDebugger();
 832   return result;
 833 }
 834
 835 void* oom_killer_calloc_purgeable(struct _malloc_zone_t* zone,
 836                                   size_t num_items,
 837                                   size_t size) {
 838 #if ARCH_CPU_32_BITS
 839   ScopedClearErrno clear_errno;
 840 #endif  // ARCH_CPU_32_BITS
 841   void* result = g_old_calloc_purgeable(zone, num_items, size);
 842   if (!result && num_items && size)
 843     debug::BreakDebugger();
 844   return result;
 845 }
 846
 847 void* oom_killer_valloc_purgeable(struct _malloc_zone_t* zone,
 848                                   size_t size) {
 849 #if ARCH_CPU_32_BITS
 850   ScopedClearErrno clear_errno;
 851 #endif  // ARCH_CPU_32_BITS
 852   void* result = g_old_valloc_purgeable(zone, size);
 853   if (!result && size)
 854     debug::BreakDebugger();
 855   return result;
 856 }
 857
 858 void oom_killer_free_purgeable(struct _malloc_zone_t* zone,
 859                                void* ptr) {
 860 #if ARCH_CPU_32_BITS
 861   ScopedClearErrno clear_errno;
 862 #endif  // ARCH_CPU_32_BITS
 863   g_old_free_purgeable(zone, ptr);
 864 }
 865
 866 void* oom_killer_realloc_purgeable(struct _malloc_zone_t* zone,
 867                                    void* ptr,
 868                                    size_t size) {
 869 #if ARCH_CPU_32_BITS
 870   ScopedClearErrno clear_errno;
 871 #endif  // ARCH_CPU_32_BITS
 872   void* result = g_old_realloc_purgeable(zone, ptr, size);
 873   if (!result && size)
 874     debug::BreakDebugger();
 875   return result;
 876 }
 877
 878 void* oom_killer_memalign_purgeable(struct _malloc_zone_t* zone,
 879                                     size_t alignment,
 880                                     size_t size) {
 881 #if ARCH_CPU_32_BITS
 882   ScopedClearErrno clear_errno;
 883 #endif  // ARCH_CPU_32_BITS
 884   void* result = g_old_memalign_purgeable(zone, alignment, size);
 885   // Only die if posix_memalign would have returned ENOMEM, since there are
 886   // other reasons why NULL might be returned (see
 887   // http://opensource.apple.com/source/Libc/Libc-583/gen/malloc.c ).
 888   if (!result && size && alignment >= sizeof(void*)
 889       && (alignment & (alignment - 1)) == 0) {
 890     debug::BreakDebugger();
 891   }
 892   return result;
 893 }
 894
 895 // === C++ operator new ===
 896
 897 void oom_killer_new() {
 898   debug::BreakDebugger();
 899 }
 900
 901 // === Core Foundation CFAllocators ===
 902
 903 bool CanGetContextForCFAllocator() {
 904   return !base::mac::IsOSLaterThanMountainLion_DontCallThis();
 905 }
 906
 907 CFAllocatorContext* ContextForCFAllocator(CFAllocatorRef allocator) {
 908   if (base::mac::IsOSSnowLeopard()) {
 909     ChromeCFAllocatorLeopards* our_allocator =
 910         const_cast<ChromeCFAllocatorLeopards*>(
 911             reinterpret_cast<const ChromeCFAllocatorLeopards*>(allocator));
 912     return &our_allocator->_context;
 913   } else if (base::mac::IsOSLion() || base::mac::IsOSMountainLion()) {
 914     ChromeCFAllocatorLions* our_allocator =
 915         const_cast<ChromeCFAllocatorLions*>(
 916             reinterpret_cast<const ChromeCFAllocatorLions*>(allocator));
 917     return &our_allocator->_context;
 918   } else {
 919     return NULL;
 920   }
 921 }
 922
 923 CFAllocatorAllocateCallBack g_old_cfallocator_system_default;
 924 CFAllocatorAllocateCallBack g_old_cfallocator_malloc;
 925 CFAllocatorAllocateCallBack g_old_cfallocator_malloc_zone;
 926
 927 void* oom_killer_cfallocator_system_default(CFIndex alloc_size,
 928                                             CFOptionFlags hint,
 929                                             void* info) {
 930   void* result = g_old_cfallocator_system_default(alloc_size, hint, info);
 931   if (!result)
 932     debug::BreakDebugger();
 933   return result;
 934 }
 935
 936 void* oom_killer_cfallocator_malloc(CFIndex alloc_size,
 937                                     CFOptionFlags hint,
 938                                     void* info) {
 939   void* result = g_old_cfallocator_malloc(alloc_size, hint, info);
 940   if (!result)
 941     debug::BreakDebugger();
 942   return result;
 943 }
 944
 945 void* oom_killer_cfallocator_malloc_zone(CFIndex alloc_size,
 946                                          CFOptionFlags hint,
 947                                          void* info) {
 948   void* result = g_old_cfallocator_malloc_zone(alloc_size, hint, info);
 949   if (!result)
 950     debug::BreakDebugger();
 951   return result;
 952 }
 953
 954 // === Cocoa NSObject allocation ===
 955
 956 typedef id (*allocWithZone_t)(id, SEL, NSZone*);
 957 allocWithZone_t g_old_allocWithZone;
 958
 959 id oom_killer_allocWithZone(id self, SEL _cmd, NSZone* zone)
 960 {
 961   id result = g_old_allocWithZone(self, _cmd, zone);
 962   if (!result)
 963     debug::BreakDebugger();
 964   return result;
 965 }
 966
 967 }  // namespace
 968
 969 void* UncheckedMalloc(size_t size) {
 970   if (g_old_malloc) {
 971 #if ARCH_CPU_32_BITS
 972     ScopedClearErrno clear_errno;
 973     ThreadLocalBooleanAutoReset flag(g_unchecked_malloc.Pointer(), true);
 974 #endif  // ARCH_CPU_32_BITS
 975     return g_old_malloc(malloc_default_zone(), size);
 976   }
 977   return malloc(size);
 978 }
 979
 980 void EnableTerminationOnOutOfMemory() {
 981   if (g_oom_killer_enabled)
 982     return;
 983
 984   g_oom_killer_enabled = true;
 985
 986   // === C malloc/calloc/valloc/realloc/posix_memalign ===
 987
 988   // This approach is not perfect, as requests for amounts of memory larger than
 989   // MALLOC_ABSOLUTE_MAX_SIZE (currently SIZE_T_MAX - (2 * PAGE_SIZE)) will
 990   // still fail with a NULL rather than dying (see
 991   // http://opensource.apple.com/source/Libc/Libc-583/gen/malloc.c for details).
 992   // Unfortunately, it's the best we can do. Also note that this does not affect
 993   // allocations from non-default zones.
 994
 995   CHECK(!g_old_malloc && !g_old_calloc && !g_old_valloc && !g_old_realloc &&
 996         !g_old_memalign) << "Old allocators unexpectedly non-null";
 997
 998   CHECK(!g_old_malloc_purgeable && !g_old_calloc_purgeable &&
 999         !g_old_valloc_purgeable && !g_old_realloc_purgeable &&
1000         !g_old_memalign_purgeable) << "Old allocators unexpectedly non-null";
1001
1002 #if !defined(ADDRESS_SANITIZER)
1003   // Don't do anything special on OOM for the malloc zones replaced by
1004   // AddressSanitizer, as modifying or protecting them may not work correctly.
1005
1006   ChromeMallocZone* default_zone =
1007       reinterpret_cast<ChromeMallocZone*>(malloc_default_zone());
1008   ChromeMallocZone* purgeable_zone =
1009       reinterpret_cast<ChromeMallocZone*>(malloc_default_purgeable_zone());
1010
1011   mach_vm_address_t default_reprotection_start = 0;
1012   mach_vm_size_t default_reprotection_length = 0;
1013   vm_prot_t default_reprotection_value = VM_PROT_NONE;
1014   DeprotectMallocZone(default_zone,
1015                       &default_reprotection_start,
1016                       &default_reprotection_length,
1017                       &default_reprotection_value);
1018
1019   mach_vm_address_t purgeable_reprotection_start = 0;
1020   mach_vm_size_t purgeable_reprotection_length = 0;
1021   vm_prot_t purgeable_reprotection_value = VM_PROT_NONE;
1022   if (purgeable_zone) {
1023     DeprotectMallocZone(purgeable_zone,
1024                         &purgeable_reprotection_start,
1025                         &purgeable_reprotection_length,
1026                         &purgeable_reprotection_value);
1027   }
1028
1029   // Default zone
1030
1031   g_old_malloc = default_zone->malloc;
1032   g_old_calloc = default_zone->calloc;
1033   g_old_valloc = default_zone->valloc;
1034   g_old_free = default_zone->free;
1035   g_old_realloc = default_zone->realloc;
1036   CHECK(g_old_malloc && g_old_calloc && g_old_valloc && g_old_free &&
1037         g_old_realloc)
1038       << "Failed to get system allocation functions.";
1039
1040   default_zone->malloc = oom_killer_malloc;
1041   default_zone->calloc = oom_killer_calloc;
1042   default_zone->valloc = oom_killer_valloc;
1043   default_zone->free = oom_killer_free;
1044   default_zone->realloc = oom_killer_realloc;
1045
1046   if (default_zone->version >= 5) {
1047     g_old_memalign = default_zone->memalign;
1048     if (g_old_memalign)
1049       default_zone->memalign = oom_killer_memalign;
1050   }
1051
1052   // Purgeable zone (if it exists)
1053
1054   if (purgeable_zone) {
1055     g_old_malloc_purgeable = purgeable_zone->malloc;
1056     g_old_calloc_purgeable = purgeable_zone->calloc;
1057     g_old_valloc_purgeable = purgeable_zone->valloc;
1058     g_old_free_purgeable = purgeable_zone->free;
1059     g_old_realloc_purgeable = purgeable_zone->realloc;
1060     CHECK(g_old_malloc_purgeable && g_old_calloc_purgeable &&
1061           g_old_valloc_purgeable && g_old_free_purgeable &&
1062           g_old_realloc_purgeable)
1063         << "Failed to get system allocation functions.";
1064
1065     purgeable_zone->malloc = oom_killer_malloc_purgeable;
1066     purgeable_zone->calloc = oom_killer_calloc_purgeable;
1067     purgeable_zone->valloc = oom_killer_valloc_purgeable;
1068     purgeable_zone->free = oom_killer_free_purgeable;
1069     purgeable_zone->realloc = oom_killer_realloc_purgeable;
1070
1071     if (purgeable_zone->version >= 5) {
1072       g_old_memalign_purgeable = purgeable_zone->memalign;
1073       if (g_old_memalign_purgeable)
1074         purgeable_zone->memalign = oom_killer_memalign_purgeable;
1075     }
1076   }
1077
1078   // Restore protection if it was active.
1079
1080   if (default_reprotection_start) {
1081     kern_return_t result = mach_vm_protect(mach_task_self(),
1082                                            default_reprotection_start,
1083                                            default_reprotection_length,
1084                                            false,
1085                                            default_reprotection_value);
1086     CHECK(result == KERN_SUCCESS);
1087   }
1088
1089   if (purgeable_reprotection_start) {
1090     kern_return_t result = mach_vm_protect(mach_task_self(),
1091                                            purgeable_reprotection_start,
1092                                            purgeable_reprotection_length,
1093                                            false,
1094                                            purgeable_reprotection_value);
1095     CHECK(result == KERN_SUCCESS);
1096   }
1097 #endif
1098
1099   // === C malloc_zone_batch_malloc ===
1100
1101   // batch_malloc is omitted because the default malloc zone's implementation
1102   // only supports batch_malloc for "tiny" allocations from the free list. It
1103   // will fail for allocations larger than "tiny", and will only allocate as
1104   // many blocks as it's able to from the free list. These factors mean that it
1105   // can return less than the requested memory even in a non-out-of-memory
1106   // situation. There's no good way to detect whether a batch_malloc failure is
1107   // due to these other factors, or due to genuine memory or address space
1108   // exhaustion. The fact that it only allocates space from the "tiny" free list
1109   // means that it's likely that a failure will not be due to memory exhaustion.
1110   // Similarly, these constraints on batch_malloc mean that callers must always
1111   // be expecting to receive less memory than was requested, even in situations
1112   // where memory pressure is not a concern. Finally, the only public interface
1113   // to batch_malloc is malloc_zone_batch_malloc, which is specific to the
1114   // system's malloc implementation. It's unlikely that anyone's even heard of
1115   // it.
1116
1117   // === C++ operator new ===
1118
1119   // Yes, operator new does call through to malloc, but this will catch failures
1120   // that our imperfect handling of malloc cannot.
1121
1122   std::set_new_handler(oom_killer_new);
1123
1124 #ifndef ADDRESS_SANITIZER
1125   // === Core Foundation CFAllocators ===
1126
1127   // This will not catch allocation done by custom allocators, but will catch
1128   // all allocation done by system-provided ones.
1129
1130   CHECK(!g_old_cfallocator_system_default && !g_old_cfallocator_malloc &&
1131         !g_old_cfallocator_malloc_zone)
1132       << "Old allocators unexpectedly non-null";
1133
1134   bool cf_allocator_internals_known = CanGetContextForCFAllocator();
1135
1136   if (cf_allocator_internals_known) {
1137     CFAllocatorContext* context =
1138         ContextForCFAllocator(kCFAllocatorSystemDefault);
1139     CHECK(context) << "Failed to get context for kCFAllocatorSystemDefault.";
1140     g_old_cfallocator_system_default = context->allocate;
1141     CHECK(g_old_cfallocator_system_default)
1142         << "Failed to get kCFAllocatorSystemDefault allocation function.";
1143     context->allocate = oom_killer_cfallocator_system_default;
1144
1145     context = ContextForCFAllocator(kCFAllocatorMalloc);
1146     CHECK(context) << "Failed to get context for kCFAllocatorMalloc.";
1147     g_old_cfallocator_malloc = context->allocate;
1148     CHECK(g_old_cfallocator_malloc)
1149         << "Failed to get kCFAllocatorMalloc allocation function.";
1150     context->allocate = oom_killer_cfallocator_malloc;
1151
1152     context = ContextForCFAllocator(kCFAllocatorMallocZone);
1153     CHECK(context) << "Failed to get context for kCFAllocatorMallocZone.";
1154     g_old_cfallocator_malloc_zone = context->allocate;
1155     CHECK(g_old_cfallocator_malloc_zone)
1156         << "Failed to get kCFAllocatorMallocZone allocation function.";
1157     context->allocate = oom_killer_cfallocator_malloc_zone;
1158   } else {
1159     NSLog(@"Internals of CFAllocator not known; out-of-memory failures via "
1160         "CFAllocator will not result in termination. http://crbug.com/45650");
1161   }
1162 #endif
1163
1164   // === Cocoa NSObject allocation ===
1165
1166   // Note that both +[NSObject new] and +[NSObject alloc] call through to
1167   // +[NSObject allocWithZone:].
1168
1169   CHECK(!g_old_allocWithZone)
1170       << "Old allocator unexpectedly non-null";
1171
1172   Class nsobject_class = [NSObject class];
1173   Method orig_method = class_getClassMethod(nsobject_class,
1174                                             @selector(allocWithZone:));
1175   g_old_allocWithZone = reinterpret_cast<allocWithZone_t>(
1176       method_getImplementation(orig_method));
1177   CHECK(g_old_allocWithZone)
1178       << "Failed to get allocWithZone allocation function.";
1179   method_setImplementation(orig_method,
1180                            reinterpret_cast<IMP>(oom_killer_allocWithZone));
1181 }
1182
1183 ProcessId GetParentProcessId(ProcessHandle process) {
1184   struct kinfo_proc info;
1185   size_t length = sizeof(struct kinfo_proc);
1186   int mib[4] = { CTL_KERN, KERN_PROC, KERN_PROC_PID, process };
1187   if (sysctl(mib, 4, &info, &length, NULL, 0) < 0) {
1188     DPLOG(ERROR) << "sysctl";
1189     return -1;
1190   }
1191   if (length == 0)
1192     return -1;
1193   return info.kp_eproc.e_ppid;
1194 }
1195
1196 namespace {
1197
1198 const int kWaitBeforeKillSeconds = 2;
1199
1200 // Reap |child| process. This call blocks until completion.
1201 void BlockingReap(pid_t child) {
1202   const pid_t result = HANDLE_EINTR(waitpid(child, NULL, 0));
1203   if (result == -1) {
1204     DPLOG(ERROR) << "waitpid(" << child << ", NULL, 0)";
1205   }
1206 }
1207
1208 // Waits for |timeout| seconds for the given |child| to exit and reap it. If
1209 // the child doesn't exit within the time specified, kills it.
1210 //
1211 // This function takes two approaches: first, it tries to use kqueue to
1212 // observe when the process exits. kevent can monitor a kqueue with a
1213 // timeout, so this method is preferred to wait for a specified period of
1214 // time. Once the kqueue indicates the process has exited, waitpid will reap
1215 // the exited child. If the kqueue doesn't provide an exit event notification,
1216 // before the timeout expires, or if the kqueue fails or misbehaves, the
1217 // process will be mercilessly killed and reaped.
1218 //
1219 // A child process passed to this function may be in one of several states:
1220 // running, terminated and not yet reaped, and (apparently, and unfortunately)
1221 // terminated and already reaped. Normally, a process will at least have been
1222 // asked to exit before this function is called, but this is not required.
1223 // If a process is terminating and unreaped, there may be a window between the
1224 // time that kqueue will no longer recognize it and when it becomes an actual
1225 // zombie that a non-blocking (WNOHANG) waitpid can reap. This condition is
1226 // detected when kqueue indicates that the process is not running and a
1227 // non-blocking waitpid fails to reap the process but indicates that it is
1228 // still running. In this event, a blocking attempt to reap the process
1229 // collects the known-dying child, preventing zombies from congregating.
1230 //
1231 // In the event that the kqueue misbehaves entirely, as it might under a
1232 // EMFILE condition ("too many open files", or out of file descriptors), this
1233 // function will forcibly kill and reap the child without delay. This
1234 // eliminates another potential zombie vector. (If you're out of file
1235 // descriptors, you're probably deep into something else, but that doesn't
1236 // mean that zombies be allowed to kick you while you're down.)
1237 //
1238 // The fact that this function seemingly can be called to wait on a child
1239 // that's not only already terminated but already reaped is a bit of a
1240 // problem: a reaped child's pid can be reclaimed and may refer to a distinct
1241 // process in that case. The fact that this function can seemingly be called
1242 // to wait on a process that's not even a child is also a problem: kqueue will
1243 // work in that case, but waitpid won't, and killing a non-child might not be
1244 // the best approach.
1245 void WaitForChildToDie(pid_t child, int timeout) {
1246   DCHECK(child > 0);
1247   DCHECK(timeout > 0);
1248
1249   // DON'T ADD ANY EARLY RETURNS TO THIS FUNCTION without ensuring that
1250   // |child| has been reaped. Specifically, even if a kqueue, kevent, or other
1251   // call fails, this function should fall back to the last resort of trying
1252   // to kill and reap the process. Not observing this rule will resurrect
1253   // zombies.
1254
1255   int result;
1256
1257   int kq = HANDLE_EINTR(kqueue());
1258   if (kq == -1) {
1259     DPLOG(ERROR) << "kqueue()";
1260   } else {
1261     file_util::ScopedFD auto_close_kq(&kq);
1262
1263     struct kevent change = {0};
1264     EV_SET(&change, child, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, NULL);
1265     result = HANDLE_EINTR(kevent(kq, &change, 1, NULL, 0, NULL));
1266
1267     if (result == -1) {
1268       if (errno != ESRCH) {
1269         DPLOG(ERROR) << "kevent (setup " << child << ")";
1270       } else {
1271         // At this point, one of the following has occurred:
1272         // 1. The process has died but has not yet been reaped.
1273         // 2. The process has died and has already been reaped.
1274         // 3. The process is in the process of dying. It's no longer
1275         //    kqueueable, but it may not be waitable yet either. Mark calls
1276         //    this case the "zombie death race".
1277
1278         result = HANDLE_EINTR(waitpid(child, NULL, WNOHANG));
1279
1280         if (result != 0) {
1281           // A positive result indicates case 1. waitpid succeeded and reaped
1282           // the child. A result of -1 indicates case 2. The child has already
1283           // been reaped. In both of these cases, no further action is
1284           // necessary.
1285           return;
1286         }
1287
1288         // |result| is 0, indicating case 3. The process will be waitable in
1289         // short order. Fall back out of the kqueue code to kill it (for good
1290         // measure) and reap it.
1291       }
1292     } else {
1293       // Keep track of the elapsed time to be able to restart kevent if it's
1294       // interrupted.
1295       TimeDelta remaining_delta = TimeDelta::FromSeconds(timeout);
1296       TimeTicks deadline = TimeTicks::Now() + remaining_delta;
1297       result = -1;
1298       struct kevent event = {0};
1299       while (remaining_delta.InMilliseconds() > 0) {
1300         const struct timespec remaining_timespec = remaining_delta.ToTimeSpec();
1301         result = kevent(kq, NULL, 0, &event, 1, &remaining_timespec);
1302         if (result == -1 && errno == EINTR) {
1303           remaining_delta = deadline - TimeTicks::Now();
1304           result = 0;
1305         } else {
1306           break;
1307         }
1308       }
1309
1310       if (result == -1) {
1311         DPLOG(ERROR) << "kevent (wait " << child << ")";
1312       } else if (result > 1) {
1313         DLOG(ERROR) << "kevent (wait " << child << "): unexpected result "
1314                     << result;
1315       } else if (result == 1) {
1316         if ((event.fflags & NOTE_EXIT) &&
1317             (event.ident == static_cast<uintptr_t>(child))) {
1318           // The process is dead or dying. This won't block for long, if at
1319           // all.
1320           BlockingReap(child);
1321           return;
1322         } else {
1323           DLOG(ERROR) << "kevent (wait " << child
1324                       << "): unexpected event: fflags=" << event.fflags
1325                       << ", ident=" << event.ident;
1326         }
1327       }
1328     }
1329   }
1330
1331   // The child is still alive, or is very freshly dead. Be sure by sending it
1332   // a signal. This is safe even if it's freshly dead, because it will be a
1333   // zombie (or on the way to zombiedom) and kill will return 0 even if the
1334   // signal is not delivered to a live process.
1335   result = kill(child, SIGKILL);
1336   if (result == -1) {
1337     DPLOG(ERROR) << "kill(" << child << ", SIGKILL)";
1338   } else {
1339     // The child is definitely on the way out now. BlockingReap won't need to
1340     // wait for long, if at all.
1341     BlockingReap(child);
1342   }
1343 }
1344
1345 }  // namespace
1346
1347 void EnsureProcessTerminated(ProcessHandle process) {
1348   WaitForChildToDie(process, kWaitBeforeKillSeconds);
1349 }
1350
1351 }  // namespace base