base/process_util_mac.mm

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "base/process_util.h"
   6
   7 #import <Cocoa/Cocoa.h>
   8 #include <crt_externs.h>
   9 #include <errno.h>
  10 #include <mach/mach.h>
  11 #include <mach/mach_init.h>
  12 #include <mach/mach_vm.h>
  13 #include <mach/shared_region.h>
  14 #include <mach/task.h>
  15 #include <malloc/malloc.h>
  16 #import <objc/runtime.h>
  17 #include <signal.h>
  18 #include <spawn.h>
  19 #include <sys/event.h>
  20 #include <sys/sysctl.h>
  21 #include <sys/types.h>
  22 #include <sys/wait.h>
  23
  24 #include <new>
  25 #include <string>
  26
  27 #include "base/debug/debugger.h"
  28 #include "base/file_util.h"
  29 #include "base/hash_tables.h"
  30 #include "base/lazy_instance.h"
  31 #include "base/logging.h"
  32 #include "base/mac/mac_util.h"
  33 #include "base/mac/scoped_mach_port.h"
  34 #include "base/posix/eintr_wrapper.h"
  35 #include "base/scoped_clear_errno.h"
  36 #include "base/string_util.h"
  37 #include "base/sys_info.h"
  38 #include "third_party/apple_apsl/CFBase.h"
  39 #include "third_party/apple_apsl/malloc.h"
  40
  41 #if ARCH_CPU_32_BITS
  42 #include <dlfcn.h>
  43 #include <mach-o/nlist.h>
  44
  45 #include "base/threading/thread_local.h"
  46 #include "third_party/mach_override/mach_override.h"
  47 #endif  // ARCH_CPU_32_BITS
  48
  49 namespace base {
  50
  51 void RestoreDefaultExceptionHandler() {
  52   // This function is tailored to remove the Breakpad exception handler.
  53   // exception_mask matches s_exception_mask in
  54   // breakpad/src/client/mac/handler/exception_handler.cc
  55   const exception_mask_t exception_mask = EXC_MASK_BAD_ACCESS |
  56                                           EXC_MASK_BAD_INSTRUCTION |
  57                                           EXC_MASK_ARITHMETIC |
  58                                           EXC_MASK_BREAKPOINT;
  59
  60   // Setting the exception port to MACH_PORT_NULL may not be entirely
  61   // kosher to restore the default exception handler, but in practice,
  62   // it results in the exception port being set to Apple Crash Reporter,
  63   // the desired behavior.
  64   task_set_exception_ports(mach_task_self(), exception_mask, MACH_PORT_NULL,
  65                            EXCEPTION_DEFAULT, THREAD_STATE_NONE);
  66 }
  67
  68
  69
  70 // These are helpers for EnableTerminationOnHeapCorruption, which is a no-op
  71 // on 64 bit Macs.
  72 #if ARCH_CPU_32_BITS
  73 namespace {
  74
  75 // Finds the library path for malloc() and thus the libC part of libSystem,
  76 // which in Lion is in a separate image.
  77 const char* LookUpLibCPath() {
  78   const void* addr = reinterpret_cast<void*>(&malloc);
  79
  80   Dl_info info;
  81   if (dladdr(addr, &info))
  82     return info.dli_fname;
  83
  84   DLOG(WARNING) << "Could not find image path for malloc()";
  85   return NULL;
  86 }
  87
  88 typedef void(*malloc_error_break_t)(void);
  89 malloc_error_break_t g_original_malloc_error_break = NULL;
  90
  91 // Returns the function pointer for malloc_error_break. This symbol is declared
  92 // as __private_extern__ and cannot be dlsym()ed. Instead, use nlist() to
  93 // get it.
  94 malloc_error_break_t LookUpMallocErrorBreak() {
  95   const char* lib_c_path = LookUpLibCPath();
  96   if (!lib_c_path)
  97     return NULL;
  98
  99   // Only need to look up two symbols, but nlist() requires a NULL-terminated
 100   // array and takes no count.
 101   struct nlist nl[3];
 102   bzero(&nl, sizeof(nl));
 103
 104   // The symbol to find.
 105   nl[0].n_un.n_name = const_cast<char*>("_malloc_error_break");
 106
 107   // A reference symbol by which the address of the desired symbol will be
 108   // calculated.
 109   nl[1].n_un.n_name = const_cast<char*>("_malloc");
 110
 111   int rv = nlist(lib_c_path, nl);
 112   if (rv != 0 || nl[0].n_type == N_UNDF || nl[1].n_type == N_UNDF) {
 113     return NULL;
 114   }
 115
 116   // nlist() returns addresses as offsets in the image, not the instruction
 117   // pointer in memory. Use the known in-memory address of malloc()
 118   // to compute the offset for malloc_error_break().
 119   uintptr_t reference_addr = reinterpret_cast<uintptr_t>(&malloc);
 120   reference_addr -= nl[1].n_value;
 121   reference_addr += nl[0].n_value;
 122
 123   return reinterpret_cast<malloc_error_break_t>(reference_addr);
 124 }
 125
 126 // Combines ThreadLocalBoolean with AutoReset.  It would be convenient
 127 // to compose ThreadLocalPointer<bool> with base::AutoReset<bool>, but that
 128 // would require allocating some storage for the bool.
 129 class ThreadLocalBooleanAutoReset {
 130  public:
 131   ThreadLocalBooleanAutoReset(ThreadLocalBoolean* tlb, bool new_value)
 132       : scoped_tlb_(tlb),
 133         original_value_(tlb->Get()) {
 134     scoped_tlb_->Set(new_value);
 135   }
 136   ~ThreadLocalBooleanAutoReset() {
 137     scoped_tlb_->Set(original_value_);
 138   }
 139
 140  private:
 141   ThreadLocalBoolean* scoped_tlb_;
 142   bool original_value_;
 143
 144   DISALLOW_COPY_AND_ASSIGN(ThreadLocalBooleanAutoReset);
 145 };
 146
 147 base::LazyInstance<ThreadLocalBoolean>::Leaky
 148     g_unchecked_malloc = LAZY_INSTANCE_INITIALIZER;
 149
 150 // NOTE(shess): This is called when the malloc library noticed that the heap
 151 // is fubar.  Avoid calls which will re-enter the malloc library.
 152 void CrMallocErrorBreak() {
 153   g_original_malloc_error_break();
 154
 155   // Out of memory is certainly not heap corruption, and not necessarily
 156   // something for which the process should be terminated. Leave that decision
 157   // to the OOM killer.  The EBADF case comes up because the malloc library
 158   // attempts to log to ASL (syslog) before calling this code, which fails
 159   // accessing a Unix-domain socket because of sandboxing.
 160   if (errno == ENOMEM || (errno == EBADF && g_unchecked_malloc.Get().Get()))
 161     return;
 162
 163   // A unit test checks this error message, so it needs to be in release builds.
 164   char buf[1024] =
 165       "Terminating process due to a potential for future heap corruption: "
 166       "errno=";
 167   char errnobuf[] = {
 168     '0' + ((errno / 100) % 10),
 169     '0' + ((errno / 10) % 10),
 170     '0' + (errno % 10),
 171     '\000'
 172   };
 173   COMPILE_ASSERT(ELAST <= 999, errno_too_large_to_encode);
 174   strlcat(buf, errnobuf, sizeof(buf));
 175   RAW_LOG(ERROR, buf);
 176
 177   // Crash by writing to NULL+errno to allow analyzing errno from
 178   // crash dump info (setting a breakpad key would re-enter the malloc
 179   // library).  Max documented errno in intro(2) is actually 102, but
 180   // it really just needs to be "small" to stay on the right vm page.
 181   const int kMaxErrno = 256;
 182   char* volatile death_ptr = NULL;
 183   death_ptr += std::min(errno, kMaxErrno);
 184   *death_ptr = '!';
 185 }
 186
 187 }  // namespace
 188 #endif  // ARCH_CPU_32_BITS
 189
 190 void EnableTerminationOnHeapCorruption() {
 191 #if defined(ADDRESS_SANITIZER) || ARCH_CPU_64_BITS
 192   // AddressSanitizer handles heap corruption, and on 64 bit Macs, the malloc
 193   // system automatically abort()s on heap corruption.
 194   return;
 195 #else
 196   // Only override once, otherwise CrMallocErrorBreak() will recurse
 197   // to itself.
 198   if (g_original_malloc_error_break)
 199     return;
 200
 201   malloc_error_break_t malloc_error_break = LookUpMallocErrorBreak();
 202   if (!malloc_error_break) {
 203     DLOG(WARNING) << "Could not find malloc_error_break";
 204     return;
 205   }
 206
 207   mach_error_t err = mach_override_ptr(
 208      (void*)malloc_error_break,
 209      (void*)&CrMallocErrorBreak,
 210      (void**)&g_original_malloc_error_break);
 211
 212   if (err != err_none)
 213     DLOG(WARNING) << "Could not override malloc_error_break; error = " << err;
 214 #endif  // defined(ADDRESS_SANITIZER) || ARCH_CPU_64_BITS
 215 }
 216
 217 // ------------------------------------------------------------------------
 218
 219 namespace {
 220
 221 bool g_oom_killer_enabled;
 222
 223 // Starting with Mac OS X 10.7, the zone allocators set up by the system are
 224 // read-only, to prevent them from being overwritten in an attack. However,
 225 // blindly unprotecting and reprotecting the zone allocators fails with
 226 // GuardMalloc because GuardMalloc sets up its zone allocator using a block of
 227 // memory in its bss. Explicit saving/restoring of the protection is required.
 228 //
 229 // This function takes a pointer to a malloc zone, de-protects it if necessary,
 230 // and returns (in the out parameters) a region of memory (if any) to be
 231 // re-protected when modifications are complete. This approach assumes that
 232 // there is no contention for the protection of this memory.
 233 void DeprotectMallocZone(ChromeMallocZone* default_zone,
 234                          mach_vm_address_t* reprotection_start,
 235                          mach_vm_size_t* reprotection_length,
 236                          vm_prot_t* reprotection_value) {
 237   mach_port_t unused;
 238   *reprotection_start = reinterpret_cast<mach_vm_address_t>(default_zone);
 239   struct vm_region_basic_info_64 info;
 240   mach_msg_type_number_t count = VM_REGION_BASIC_INFO_COUNT_64;
 241   kern_return_t result =
 242       mach_vm_region(mach_task_self(),
 243                      reprotection_start,
 244                      reprotection_length,
 245                      VM_REGION_BASIC_INFO_64,
 246                      reinterpret_cast<vm_region_info_t>(&info),
 247                      &count,
 248                      &unused);
 249   CHECK(result == KERN_SUCCESS);
 250
 251   result = mach_port_deallocate(mach_task_self(), unused);
 252   CHECK(result == KERN_SUCCESS);
 253
 254   // Does the region fully enclose the zone pointers? Possibly unwarranted
 255   // simplification used: using the size of a full version 8 malloc zone rather
 256   // than the actual smaller size if the passed-in zone is not version 8.
 257   CHECK(*reprotection_start <=
 258             reinterpret_cast<mach_vm_address_t>(default_zone));
 259   mach_vm_size_t zone_offset = reinterpret_cast<mach_vm_size_t>(default_zone) -
 260       reinterpret_cast<mach_vm_size_t>(*reprotection_start);
 261   CHECK(zone_offset + sizeof(ChromeMallocZone) <= *reprotection_length);
 262
 263   if (info.protection & VM_PROT_WRITE) {
 264     // No change needed; the zone is already writable.
 265     *reprotection_start = 0;
 266     *reprotection_length = 0;
 267     *reprotection_value = VM_PROT_NONE;
 268   } else {
 269     *reprotection_value = info.protection;
 270     result = mach_vm_protect(mach_task_self(),
 271                              *reprotection_start,
 272                              *reprotection_length,
 273                              false,
 274                              info.protection | VM_PROT_WRITE);
 275     CHECK(result == KERN_SUCCESS);
 276   }
 277 }
 278
 279 // === C malloc/calloc/valloc/realloc/posix_memalign ===
 280
 281 typedef void* (*malloc_type)(struct _malloc_zone_t* zone,
 282                              size_t size);
 283 typedef void* (*calloc_type)(struct _malloc_zone_t* zone,
 284                              size_t num_items,
 285                              size_t size);
 286 typedef void* (*valloc_type)(struct _malloc_zone_t* zone,
 287                              size_t size);
 288 typedef void (*free_type)(struct _malloc_zone_t* zone,
 289                           void* ptr);
 290 typedef void* (*realloc_type)(struct _malloc_zone_t* zone,
 291                               void* ptr,
 292                               size_t size);
 293 typedef void* (*memalign_type)(struct _malloc_zone_t* zone,
 294                                size_t alignment,
 295                                size_t size);
 296
 297 malloc_type g_old_malloc;
 298 calloc_type g_old_calloc;
 299 valloc_type g_old_valloc;
 300 free_type g_old_free;
 301 realloc_type g_old_realloc;
 302 memalign_type g_old_memalign;
 303
 304 malloc_type g_old_malloc_purgeable;
 305 calloc_type g_old_calloc_purgeable;
 306 valloc_type g_old_valloc_purgeable;
 307 free_type g_old_free_purgeable;
 308 realloc_type g_old_realloc_purgeable;
 309 memalign_type g_old_memalign_purgeable;
 310
 311 void* oom_killer_malloc(struct _malloc_zone_t* zone,
 312                         size_t size) {
 313 #if ARCH_CPU_32_BITS
 314   ScopedClearErrno clear_errno;
 315 #endif  // ARCH_CPU_32_BITS
 316   void* result = g_old_malloc(zone, size);
 317   if (!result && size)
 318     debug::BreakDebugger();
 319   return result;
 320 }
 321
 322 void* oom_killer_calloc(struct _malloc_zone_t* zone,
 323                         size_t num_items,
 324                         size_t size) {
 325 #if ARCH_CPU_32_BITS
 326   ScopedClearErrno clear_errno;
 327 #endif  // ARCH_CPU_32_BITS
 328   void* result = g_old_calloc(zone, num_items, size);
 329   if (!result && num_items && size)
 330     debug::BreakDebugger();
 331   return result;
 332 }
 333
 334 void* oom_killer_valloc(struct _malloc_zone_t* zone,
 335                         size_t size) {
 336 #if ARCH_CPU_32_BITS
 337   ScopedClearErrno clear_errno;
 338 #endif  // ARCH_CPU_32_BITS
 339   void* result = g_old_valloc(zone, size);
 340   if (!result && size)
 341     debug::BreakDebugger();
 342   return result;
 343 }
 344
 345 void oom_killer_free(struct _malloc_zone_t* zone,
 346                      void* ptr) {
 347 #if ARCH_CPU_32_BITS
 348   ScopedClearErrno clear_errno;
 349 #endif  // ARCH_CPU_32_BITS
 350   g_old_free(zone, ptr);
 351 }
 352
 353 void* oom_killer_realloc(struct _malloc_zone_t* zone,
 354                          void* ptr,
 355                          size_t size) {
 356 #if ARCH_CPU_32_BITS
 357   ScopedClearErrno clear_errno;
 358 #endif  // ARCH_CPU_32_BITS
 359   void* result = g_old_realloc(zone, ptr, size);
 360   if (!result && size)
 361     debug::BreakDebugger();
 362   return result;
 363 }
 364
 365 void* oom_killer_memalign(struct _malloc_zone_t* zone,
 366                           size_t alignment,
 367                           size_t size) {
 368 #if ARCH_CPU_32_BITS
 369   ScopedClearErrno clear_errno;
 370 #endif  // ARCH_CPU_32_BITS
 371   void* result = g_old_memalign(zone, alignment, size);
 372   // Only die if posix_memalign would have returned ENOMEM, since there are
 373   // other reasons why NULL might be returned (see
 374   // http://opensource.apple.com/source/Libc/Libc-583/gen/malloc.c ).
 375   if (!result && size && alignment >= sizeof(void*)
 376       && (alignment & (alignment - 1)) == 0) {
 377     debug::BreakDebugger();
 378   }
 379   return result;
 380 }
 381
 382 void* oom_killer_malloc_purgeable(struct _malloc_zone_t* zone,
 383                                   size_t size) {
 384 #if ARCH_CPU_32_BITS
 385   ScopedClearErrno clear_errno;
 386 #endif  // ARCH_CPU_32_BITS
 387   void* result = g_old_malloc_purgeable(zone, size);
 388   if (!result && size)
 389     debug::BreakDebugger();
 390   return result;
 391 }
 392
 393 void* oom_killer_calloc_purgeable(struct _malloc_zone_t* zone,
 394                                   size_t num_items,
 395                                   size_t size) {
 396 #if ARCH_CPU_32_BITS
 397   ScopedClearErrno clear_errno;
 398 #endif  // ARCH_CPU_32_BITS
 399   void* result = g_old_calloc_purgeable(zone, num_items, size);
 400   if (!result && num_items && size)
 401     debug::BreakDebugger();
 402   return result;
 403 }
 404
 405 void* oom_killer_valloc_purgeable(struct _malloc_zone_t* zone,
 406                                   size_t size) {
 407 #if ARCH_CPU_32_BITS
 408   ScopedClearErrno clear_errno;
 409 #endif  // ARCH_CPU_32_BITS
 410   void* result = g_old_valloc_purgeable(zone, size);
 411   if (!result && size)
 412     debug::BreakDebugger();
 413   return result;
 414 }
 415
 416 void oom_killer_free_purgeable(struct _malloc_zone_t* zone,
 417                                void* ptr) {
 418 #if ARCH_CPU_32_BITS
 419   ScopedClearErrno clear_errno;
 420 #endif  // ARCH_CPU_32_BITS
 421   g_old_free_purgeable(zone, ptr);
 422 }
 423
 424 void* oom_killer_realloc_purgeable(struct _malloc_zone_t* zone,
 425                                    void* ptr,
 426                                    size_t size) {
 427 #if ARCH_CPU_32_BITS
 428   ScopedClearErrno clear_errno;
 429 #endif  // ARCH_CPU_32_BITS
 430   void* result = g_old_realloc_purgeable(zone, ptr, size);
 431   if (!result && size)
 432     debug::BreakDebugger();
 433   return result;
 434 }
 435
 436 void* oom_killer_memalign_purgeable(struct _malloc_zone_t* zone,
 437                                     size_t alignment,
 438                                     size_t size) {
 439 #if ARCH_CPU_32_BITS
 440   ScopedClearErrno clear_errno;
 441 #endif  // ARCH_CPU_32_BITS
 442   void* result = g_old_memalign_purgeable(zone, alignment, size);
 443   // Only die if posix_memalign would have returned ENOMEM, since there are
 444   // other reasons why NULL might be returned (see
 445   // http://opensource.apple.com/source/Libc/Libc-583/gen/malloc.c ).
 446   if (!result && size && alignment >= sizeof(void*)
 447       && (alignment & (alignment - 1)) == 0) {
 448     debug::BreakDebugger();
 449   }
 450   return result;
 451 }
 452
 453 // === C++ operator new ===
 454
 455 void oom_killer_new() {
 456   debug::BreakDebugger();
 457 }
 458
 459 // === Core Foundation CFAllocators ===
 460
 461 bool CanGetContextForCFAllocator() {
 462   return !base::mac::IsOSLaterThanMountainLion_DontCallThis();
 463 }
 464
 465 CFAllocatorContext* ContextForCFAllocator(CFAllocatorRef allocator) {
 466   if (base::mac::IsOSSnowLeopard()) {
 467     ChromeCFAllocatorLeopards* our_allocator =
 468         const_cast<ChromeCFAllocatorLeopards*>(
 469             reinterpret_cast<const ChromeCFAllocatorLeopards*>(allocator));
 470     return &our_allocator->_context;
 471   } else if (base::mac::IsOSLion() || base::mac::IsOSMountainLion()) {
 472     ChromeCFAllocatorLions* our_allocator =
 473         const_cast<ChromeCFAllocatorLions*>(
 474             reinterpret_cast<const ChromeCFAllocatorLions*>(allocator));
 475     return &our_allocator->_context;
 476   } else {
 477     return NULL;
 478   }
 479 }
 480
 481 CFAllocatorAllocateCallBack g_old_cfallocator_system_default;
 482 CFAllocatorAllocateCallBack g_old_cfallocator_malloc;
 483 CFAllocatorAllocateCallBack g_old_cfallocator_malloc_zone;
 484
 485 void* oom_killer_cfallocator_system_default(CFIndex alloc_size,
 486                                             CFOptionFlags hint,
 487                                             void* info) {
 488   void* result = g_old_cfallocator_system_default(alloc_size, hint, info);
 489   if (!result)
 490     debug::BreakDebugger();
 491   return result;
 492 }
 493
 494 void* oom_killer_cfallocator_malloc(CFIndex alloc_size,
 495                                     CFOptionFlags hint,
 496                                     void* info) {
 497   void* result = g_old_cfallocator_malloc(alloc_size, hint, info);
 498   if (!result)
 499     debug::BreakDebugger();
 500   return result;
 501 }
 502
 503 void* oom_killer_cfallocator_malloc_zone(CFIndex alloc_size,
 504                                          CFOptionFlags hint,
 505                                          void* info) {
 506   void* result = g_old_cfallocator_malloc_zone(alloc_size, hint, info);
 507   if (!result)
 508     debug::BreakDebugger();
 509   return result;
 510 }
 511
 512 // === Cocoa NSObject allocation ===
 513
 514 typedef id (*allocWithZone_t)(id, SEL, NSZone*);
 515 allocWithZone_t g_old_allocWithZone;
 516
 517 id oom_killer_allocWithZone(id self, SEL _cmd, NSZone* zone)
 518 {
 519   id result = g_old_allocWithZone(self, _cmd, zone);
 520   if (!result)
 521     debug::BreakDebugger();
 522   return result;
 523 }
 524
 525 }  // namespace
 526
 527 void* UncheckedMalloc(size_t size) {
 528   if (g_old_malloc) {
 529 #if ARCH_CPU_32_BITS
 530     ScopedClearErrno clear_errno;
 531     ThreadLocalBooleanAutoReset flag(g_unchecked_malloc.Pointer(), true);
 532 #endif  // ARCH_CPU_32_BITS
 533     return g_old_malloc(malloc_default_zone(), size);
 534   }
 535   return malloc(size);
 536 }
 537
 538 void EnableTerminationOnOutOfMemory() {
 539   if (g_oom_killer_enabled)
 540     return;
 541
 542   g_oom_killer_enabled = true;
 543
 544   // === C malloc/calloc/valloc/realloc/posix_memalign ===
 545
 546   // This approach is not perfect, as requests for amounts of memory larger than
 547   // MALLOC_ABSOLUTE_MAX_SIZE (currently SIZE_T_MAX - (2 * PAGE_SIZE)) will
 548   // still fail with a NULL rather than dying (see
 549   // http://opensource.apple.com/source/Libc/Libc-583/gen/malloc.c for details).
 550   // Unfortunately, it's the best we can do. Also note that this does not affect
 551   // allocations from non-default zones.
 552
 553   CHECK(!g_old_malloc && !g_old_calloc && !g_old_valloc && !g_old_realloc &&
 554         !g_old_memalign) << "Old allocators unexpectedly non-null";
 555
 556   CHECK(!g_old_malloc_purgeable && !g_old_calloc_purgeable &&
 557         !g_old_valloc_purgeable && !g_old_realloc_purgeable &&
 558         !g_old_memalign_purgeable) << "Old allocators unexpectedly non-null";
 559
 560 #if !defined(ADDRESS_SANITIZER)
 561   // Don't do anything special on OOM for the malloc zones replaced by
 562   // AddressSanitizer, as modifying or protecting them may not work correctly.
 563
 564   ChromeMallocZone* default_zone =
 565       reinterpret_cast<ChromeMallocZone*>(malloc_default_zone());
 566   ChromeMallocZone* purgeable_zone =
 567       reinterpret_cast<ChromeMallocZone*>(malloc_default_purgeable_zone());
 568
 569   mach_vm_address_t default_reprotection_start = 0;
 570   mach_vm_size_t default_reprotection_length = 0;
 571   vm_prot_t default_reprotection_value = VM_PROT_NONE;
 572   DeprotectMallocZone(default_zone,
 573                       &default_reprotection_start,
 574                       &default_reprotection_length,
 575                       &default_reprotection_value);
 576
 577   mach_vm_address_t purgeable_reprotection_start = 0;
 578   mach_vm_size_t purgeable_reprotection_length = 0;
 579   vm_prot_t purgeable_reprotection_value = VM_PROT_NONE;
 580   if (purgeable_zone) {
 581     DeprotectMallocZone(purgeable_zone,
 582                         &purgeable_reprotection_start,
 583                         &purgeable_reprotection_length,
 584                         &purgeable_reprotection_value);
 585   }
 586
 587   // Default zone
 588
 589   g_old_malloc = default_zone->malloc;
 590   g_old_calloc = default_zone->calloc;
 591   g_old_valloc = default_zone->valloc;
 592   g_old_free = default_zone->free;
 593   g_old_realloc = default_zone->realloc;
 594   CHECK(g_old_malloc && g_old_calloc && g_old_valloc && g_old_free &&
 595         g_old_realloc)
 596       << "Failed to get system allocation functions.";
 597
 598   default_zone->malloc = oom_killer_malloc;
 599   default_zone->calloc = oom_killer_calloc;
 600   default_zone->valloc = oom_killer_valloc;
 601   default_zone->free = oom_killer_free;
 602   default_zone->realloc = oom_killer_realloc;
 603
 604   if (default_zone->version >= 5) {
 605     g_old_memalign = default_zone->memalign;
 606     if (g_old_memalign)
 607       default_zone->memalign = oom_killer_memalign;
 608   }
 609
 610   // Purgeable zone (if it exists)
 611
 612   if (purgeable_zone) {
 613     g_old_malloc_purgeable = purgeable_zone->malloc;
 614     g_old_calloc_purgeable = purgeable_zone->calloc;
 615     g_old_valloc_purgeable = purgeable_zone->valloc;
 616     g_old_free_purgeable = purgeable_zone->free;
 617     g_old_realloc_purgeable = purgeable_zone->realloc;
 618     CHECK(g_old_malloc_purgeable && g_old_calloc_purgeable &&
 619           g_old_valloc_purgeable && g_old_free_purgeable &&
 620           g_old_realloc_purgeable)
 621         << "Failed to get system allocation functions.";
 622
 623     purgeable_zone->malloc = oom_killer_malloc_purgeable;
 624     purgeable_zone->calloc = oom_killer_calloc_purgeable;
 625     purgeable_zone->valloc = oom_killer_valloc_purgeable;
 626     purgeable_zone->free = oom_killer_free_purgeable;
 627     purgeable_zone->realloc = oom_killer_realloc_purgeable;
 628
 629     if (purgeable_zone->version >= 5) {
 630       g_old_memalign_purgeable = purgeable_zone->memalign;
 631       if (g_old_memalign_purgeable)
 632         purgeable_zone->memalign = oom_killer_memalign_purgeable;
 633     }
 634   }
 635
 636   // Restore protection if it was active.
 637
 638   if (default_reprotection_start) {
 639     kern_return_t result = mach_vm_protect(mach_task_self(),
 640                                            default_reprotection_start,
 641                                            default_reprotection_length,
 642                                            false,
 643                                            default_reprotection_value);
 644     CHECK(result == KERN_SUCCESS);
 645   }
 646
 647   if (purgeable_reprotection_start) {
 648     kern_return_t result = mach_vm_protect(mach_task_self(),
 649                                            purgeable_reprotection_start,
 650                                            purgeable_reprotection_length,
 651                                            false,
 652                                            purgeable_reprotection_value);
 653     CHECK(result == KERN_SUCCESS);
 654   }
 655 #endif
 656
 657   // === C malloc_zone_batch_malloc ===
 658
 659   // batch_malloc is omitted because the default malloc zone's implementation
 660   // only supports batch_malloc for "tiny" allocations from the free list. It
 661   // will fail for allocations larger than "tiny", and will only allocate as
 662   // many blocks as it's able to from the free list. These factors mean that it
 663   // can return less than the requested memory even in a non-out-of-memory
 664   // situation. There's no good way to detect whether a batch_malloc failure is
 665   // due to these other factors, or due to genuine memory or address space
 666   // exhaustion. The fact that it only allocates space from the "tiny" free list
 667   // means that it's likely that a failure will not be due to memory exhaustion.
 668   // Similarly, these constraints on batch_malloc mean that callers must always
 669   // be expecting to receive less memory than was requested, even in situations
 670   // where memory pressure is not a concern. Finally, the only public interface
 671   // to batch_malloc is malloc_zone_batch_malloc, which is specific to the
 672   // system's malloc implementation. It's unlikely that anyone's even heard of
 673   // it.
 674
 675   // === C++ operator new ===
 676
 677   // Yes, operator new does call through to malloc, but this will catch failures
 678   // that our imperfect handling of malloc cannot.
 679
 680   std::set_new_handler(oom_killer_new);
 681
 682 #ifndef ADDRESS_SANITIZER
 683   // === Core Foundation CFAllocators ===
 684
 685   // This will not catch allocation done by custom allocators, but will catch
 686   // all allocation done by system-provided ones.
 687
 688   CHECK(!g_old_cfallocator_system_default && !g_old_cfallocator_malloc &&
 689         !g_old_cfallocator_malloc_zone)
 690       << "Old allocators unexpectedly non-null";
 691
 692   bool cf_allocator_internals_known = CanGetContextForCFAllocator();
 693
 694   if (cf_allocator_internals_known) {
 695     CFAllocatorContext* context =
 696         ContextForCFAllocator(kCFAllocatorSystemDefault);
 697     CHECK(context) << "Failed to get context for kCFAllocatorSystemDefault.";
 698     g_old_cfallocator_system_default = context->allocate;
 699     CHECK(g_old_cfallocator_system_default)
 700         << "Failed to get kCFAllocatorSystemDefault allocation function.";
 701     context->allocate = oom_killer_cfallocator_system_default;
 702
 703     context = ContextForCFAllocator(kCFAllocatorMalloc);
 704     CHECK(context) << "Failed to get context for kCFAllocatorMalloc.";
 705     g_old_cfallocator_malloc = context->allocate;
 706     CHECK(g_old_cfallocator_malloc)
 707         << "Failed to get kCFAllocatorMalloc allocation function.";
 708     context->allocate = oom_killer_cfallocator_malloc;
 709
 710     context = ContextForCFAllocator(kCFAllocatorMallocZone);
 711     CHECK(context) << "Failed to get context for kCFAllocatorMallocZone.";
 712     g_old_cfallocator_malloc_zone = context->allocate;
 713     CHECK(g_old_cfallocator_malloc_zone)
 714         << "Failed to get kCFAllocatorMallocZone allocation function.";
 715     context->allocate = oom_killer_cfallocator_malloc_zone;
 716   } else {
 717     NSLog(@"Internals of CFAllocator not known; out-of-memory failures via "
 718         "CFAllocator will not result in termination. http://crbug.com/45650");
 719   }
 720 #endif
 721
 722   // === Cocoa NSObject allocation ===
 723
 724   // Note that both +[NSObject new] and +[NSObject alloc] call through to
 725   // +[NSObject allocWithZone:].
 726
 727   CHECK(!g_old_allocWithZone)
 728       << "Old allocator unexpectedly non-null";
 729
 730   Class nsobject_class = [NSObject class];
 731   Method orig_method = class_getClassMethod(nsobject_class,
 732                                             @selector(allocWithZone:));
 733   g_old_allocWithZone = reinterpret_cast<allocWithZone_t>(
 734       method_getImplementation(orig_method));
 735   CHECK(g_old_allocWithZone)
 736       << "Failed to get allocWithZone allocation function.";
 737   method_setImplementation(orig_method,
 738                            reinterpret_cast<IMP>(oom_killer_allocWithZone));
 739 }
 740
 741 ProcessId GetParentProcessId(ProcessHandle process) {
 742   struct kinfo_proc info;
 743   size_t length = sizeof(struct kinfo_proc);
 744   int mib[4] = { CTL_KERN, KERN_PROC, KERN_PROC_PID, process };
 745   if (sysctl(mib, 4, &info, &length, NULL, 0) < 0) {
 746     DPLOG(ERROR) << "sysctl";
 747     return -1;
 748   }
 749   if (length == 0)
 750     return -1;
 751   return info.kp_eproc.e_ppid;
 752 }
 753
 754 namespace {
 755
 756 const int kWaitBeforeKillSeconds = 2;
 757
 758 // Reap |child| process. This call blocks until completion.
 759 void BlockingReap(pid_t child) {
 760   const pid_t result = HANDLE_EINTR(waitpid(child, NULL, 0));
 761   if (result == -1) {
 762     DPLOG(ERROR) << "waitpid(" << child << ", NULL, 0)";
 763   }
 764 }
 765
 766 // Waits for |timeout| seconds for the given |child| to exit and reap it. If
 767 // the child doesn't exit within the time specified, kills it.
 768 //
 769 // This function takes two approaches: first, it tries to use kqueue to
 770 // observe when the process exits. kevent can monitor a kqueue with a
 771 // timeout, so this method is preferred to wait for a specified period of
 772 // time. Once the kqueue indicates the process has exited, waitpid will reap
 773 // the exited child. If the kqueue doesn't provide an exit event notification,
 774 // before the timeout expires, or if the kqueue fails or misbehaves, the
 775 // process will be mercilessly killed and reaped.
 776 //
 777 // A child process passed to this function may be in one of several states:
 778 // running, terminated and not yet reaped, and (apparently, and unfortunately)
 779 // terminated and already reaped. Normally, a process will at least have been
 780 // asked to exit before this function is called, but this is not required.
 781 // If a process is terminating and unreaped, there may be a window between the
 782 // time that kqueue will no longer recognize it and when it becomes an actual
 783 // zombie that a non-blocking (WNOHANG) waitpid can reap. This condition is
 784 // detected when kqueue indicates that the process is not running and a
 785 // non-blocking waitpid fails to reap the process but indicates that it is
 786 // still running. In this event, a blocking attempt to reap the process
 787 // collects the known-dying child, preventing zombies from congregating.
 788 //
 789 // In the event that the kqueue misbehaves entirely, as it might under a
 790 // EMFILE condition ("too many open files", or out of file descriptors), this
 791 // function will forcibly kill and reap the child without delay. This
 792 // eliminates another potential zombie vector. (If you're out of file
 793 // descriptors, you're probably deep into something else, but that doesn't
 794 // mean that zombies be allowed to kick you while you're down.)
 795 //
 796 // The fact that this function seemingly can be called to wait on a child
 797 // that's not only already terminated but already reaped is a bit of a
 798 // problem: a reaped child's pid can be reclaimed and may refer to a distinct
 799 // process in that case. The fact that this function can seemingly be called
 800 // to wait on a process that's not even a child is also a problem: kqueue will
 801 // work in that case, but waitpid won't, and killing a non-child might not be
 802 // the best approach.
 803 void WaitForChildToDie(pid_t child, int timeout) {
 804   DCHECK(child > 0);
 805   DCHECK(timeout > 0);
 806
 807   // DON'T ADD ANY EARLY RETURNS TO THIS FUNCTION without ensuring that
 808   // |child| has been reaped. Specifically, even if a kqueue, kevent, or other
 809   // call fails, this function should fall back to the last resort of trying
 810   // to kill and reap the process. Not observing this rule will resurrect
 811   // zombies.
 812
 813   int result;
 814
 815   int kq = HANDLE_EINTR(kqueue());
 816   if (kq == -1) {
 817     DPLOG(ERROR) << "kqueue()";
 818   } else {
 819     file_util::ScopedFD auto_close_kq(&kq);
 820
 821     struct kevent change = {0};
 822     EV_SET(&change, child, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, NULL);
 823     result = HANDLE_EINTR(kevent(kq, &change, 1, NULL, 0, NULL));
 824
 825     if (result == -1) {
 826       if (errno != ESRCH) {
 827         DPLOG(ERROR) << "kevent (setup " << child << ")";
 828       } else {
 829         // At this point, one of the following has occurred:
 830         // 1. The process has died but has not yet been reaped.
 831         // 2. The process has died and has already been reaped.
 832         // 3. The process is in the process of dying. It's no longer
 833         //    kqueueable, but it may not be waitable yet either. Mark calls
 834         //    this case the "zombie death race".
 835
 836         result = HANDLE_EINTR(waitpid(child, NULL, WNOHANG));
 837
 838         if (result != 0) {
 839           // A positive result indicates case 1. waitpid succeeded and reaped
 840           // the child. A result of -1 indicates case 2. The child has already
 841           // been reaped. In both of these cases, no further action is
 842           // necessary.
 843           return;
 844         }
 845
 846         // |result| is 0, indicating case 3. The process will be waitable in
 847         // short order. Fall back out of the kqueue code to kill it (for good
 848         // measure) and reap it.
 849       }
 850     } else {
 851       // Keep track of the elapsed time to be able to restart kevent if it's
 852       // interrupted.
 853       TimeDelta remaining_delta = TimeDelta::FromSeconds(timeout);
 854       TimeTicks deadline = TimeTicks::Now() + remaining_delta;
 855       result = -1;
 856       struct kevent event = {0};
 857       while (remaining_delta.InMilliseconds() > 0) {
 858         const struct timespec remaining_timespec = remaining_delta.ToTimeSpec();
 859         result = kevent(kq, NULL, 0, &event, 1, &remaining_timespec);
 860         if (result == -1 && errno == EINTR) {
 861           remaining_delta = deadline - TimeTicks::Now();
 862           result = 0;
 863         } else {
 864           break;
 865         }
 866       }
 867
 868       if (result == -1) {
 869         DPLOG(ERROR) << "kevent (wait " << child << ")";
 870       } else if (result > 1) {
 871         DLOG(ERROR) << "kevent (wait " << child << "): unexpected result "
 872                     << result;
 873       } else if (result == 1) {
 874         if ((event.fflags & NOTE_EXIT) &&
 875             (event.ident == static_cast<uintptr_t>(child))) {
 876           // The process is dead or dying. This won't block for long, if at
 877           // all.
 878           BlockingReap(child);
 879           return;
 880         } else {
 881           DLOG(ERROR) << "kevent (wait " << child
 882                       << "): unexpected event: fflags=" << event.fflags
 883                       << ", ident=" << event.ident;
 884         }
 885       }
 886     }
 887   }
 888
 889   // The child is still alive, or is very freshly dead. Be sure by sending it
 890   // a signal. This is safe even if it's freshly dead, because it will be a
 891   // zombie (or on the way to zombiedom) and kill will return 0 even if the
 892   // signal is not delivered to a live process.
 893   result = kill(child, SIGKILL);
 894   if (result == -1) {
 895     DPLOG(ERROR) << "kill(" << child << ", SIGKILL)";
 896   } else {
 897     // The child is definitely on the way out now. BlockingReap won't need to
 898     // wait for long, if at all.
 899     BlockingReap(child);
 900   }
 901 }
 902
 903 }  // namespace
 904
 905 void EnsureProcessTerminated(ProcessHandle process) {
 906   WaitForChildToDie(process, kWaitBeforeKillSeconds);
 907 }
 908
 909 }  // namespace base