memory/build/mozjemalloc.cpp

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 // Portions of this file were originally under the following license:
   8 //
   9 // Copyright (C) 2006-2008 Jason Evans <jasone@FreeBSD.org>.
  10 // All rights reserved.
  11 // Copyright (C) 2007-2017 Mozilla Foundation.
  12 //
  13 // Redistribution and use in source and binary forms, with or without
  14 // modification, are permitted provided that the following conditions
  15 // are met:
  16 // 1. Redistributions of source code must retain the above copyright
  17 //    notice(s), this list of conditions and the following disclaimer as
  18 //    the first lines of this file unmodified other than the possible
  19 //    addition of one or more copyright notices.
  20 // 2. Redistributions in binary form must reproduce the above copyright
  21 //    notice(s), this list of conditions and the following disclaimer in
  22 //    the documentation and/or other materials provided with the
  23 //    distribution.
  24 //
  25 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
  26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  28 // PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
  29 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  30 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  31 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  32 // BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  33 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  34 // OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  35 // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  36 //
  37 // *****************************************************************************
  38 //
  39 // This allocator implementation is designed to provide scalable performance
  40 // for multi-threaded programs on multi-processor systems.  The following
  41 // features are included for this purpose:
  42 //
  43 //   + Multiple arenas are used if there are multiple CPUs, which reduces lock
  44 //     contention and cache sloshing.
  45 //
  46 //   + Cache line sharing between arenas is avoided for internal data
  47 //     structures.
  48 //
  49 //   + Memory is managed in chunks and runs (chunks can be split into runs),
  50 //     rather than as individual pages.  This provides a constant-time
  51 //     mechanism for associating allocations with particular arenas.
  52 //
  53 // Allocation requests are rounded up to the nearest size class, and no record
  54 // of the original request size is maintained.  Allocations are broken into
  55 // categories according to size class.  Assuming runtime defaults, the size
  56 // classes in each category are as follows (for x86, x86_64 and Apple Silicon):
  57 //
  58 //   |=========================================================|
  59 //   | Category | Subcategory    |     x86 |  x86_64 | Mac ARM |
  60 //   |---------------------------+---------+---------+---------|
  61 //   | Word size                 |  32 bit |  64 bit |  64 bit |
  62 //   | Page size                 |    4 Kb |    4 Kb |   16 Kb |
  63 //   |=========================================================|
  64 //   | Small    | Tiny           |    4/-w |      -w |       - |
  65 //   |          |                |       8 |    8/-w |       8 |
  66 //   |          |----------------+---------|---------|---------|
  67 //   |          | Quantum-spaced |      16 |      16 |      16 |
  68 //   |          |                |      32 |      32 |      32 |
  69 //   |          |                |      48 |      48 |      48 |
  70 //   |          |                |     ... |     ... |     ... |
  71 //   |          |                |     480 |     480 |     480 |
  72 //   |          |                |     496 |     496 |     496 |
  73 //   |          |----------------+---------|---------|---------|
  74 //   |          | Quantum-wide-  |     512 |     512 |     512 |
  75 //   |          | spaced         |     768 |     768 |     768 |
  76 //   |          |                |     ... |     ... |     ... |
  77 //   |          |                |    3584 |    3584 |    3584 |
  78 //   |          |                |    3840 |    3840 |    3840 |
  79 //   |          |----------------+---------|---------|---------|
  80 //   |          | Sub-page       |       - |       - |    4096 |
  81 //   |          |                |       - |       - |    8 kB |
  82 //   |=========================================================|
  83 //   | Large                     |    4 kB |    4 kB |       - |
  84 //   |                           |    8 kB |    8 kB |       - |
  85 //   |                           |   12 kB |   12 kB |       - |
  86 //   |                           |   16 kB |   16 kB |   16 kB |
  87 //   |                           |     ... |     ... |       - |
  88 //   |                           |   32 kB |   32 kB |   32 kB |
  89 //   |                           |     ... |     ... |     ... |
  90 //   |                           | 1008 kB | 1008 kB | 1008 kB |
  91 //   |                           | 1012 kB | 1012 kB |       - |
  92 //   |                           | 1016 kB | 1016 kB |       - |
  93 //   |                           | 1020 kB | 1020 kB |       - |
  94 //   |=========================================================|
  95 //   | Huge                      |    1 MB |    1 MB |    1 MB |
  96 //   |                           |    2 MB |    2 MB |    2 MB |
  97 //   |                           |    3 MB |    3 MB |    3 MB |
  98 //   |                           |     ... |     ... |     ... |
  99 //   |=========================================================|
 100 //
 101 // Legend:
 102 //   n:    Size class exists for this platform.
 103 //   n/-w: This size class doesn't exist on Windows (see kMinTinyClass).
 104 //   -:    This size class doesn't exist for this platform.
 105 //   ...:  Size classes follow a pattern here.
 106 //
 107 // NOTE: Due to Mozilla bug 691003, we cannot reserve less than one word for an
 108 // allocation on Linux or Mac.  So on 32-bit *nix, the smallest bucket size is
 109 // 4 bytes, and on 64-bit, the smallest bucket size is 8 bytes.
 110 //
 111 // A different mechanism is used for each category:
 112 //
 113 //   Small : Each size class is segregated into its own set of runs.  Each run
 114 //           maintains a bitmap of which regions are free/allocated.
 115 //
 116 //   Large : Each allocation is backed by a dedicated run.  Metadata are stored
 117 //           in the associated arena chunk header maps.
 118 //
 119 //   Huge : Each allocation is backed by a dedicated contiguous set of chunks.
 120 //          Metadata are stored in a separate red-black tree.
 121 //
 122 // *****************************************************************************
 123
 124 #include "mozmemory_wrap.h"
 125 #include "mozjemalloc.h"
 126 #include "mozjemalloc_types.h"
 127
 128 #include <cstring>
 129 #include <cerrno>
 130 #include <optional>
 131 #include <type_traits>
 132 #ifdef XP_WIN
 133 #  include <io.h>
 134 #  include <windows.h>
 135 #else
 136 #  include <sys/mman.h>
 137 #  include <unistd.h>
 138 #endif
 139 #ifdef XP_DARWIN
 140 #  include <libkern/OSAtomic.h>
 141 #  include <mach/mach_init.h>
 142 #  include <mach/vm_map.h>
 143 #endif
 144
 145 #include "mozilla/Atomics.h"
 146 #include "mozilla/Alignment.h"
 147 #include "mozilla/ArrayUtils.h"
 148 #include "mozilla/Assertions.h"
 149 #include "mozilla/CheckedInt.h"
 150 #include "mozilla/DebugOnly.h"
 151 #include "mozilla/DoublyLinkedList.h"
 152 #include "mozilla/HelperMacros.h"
 153 #include "mozilla/Likely.h"
 154 #include "mozilla/Literals.h"
 155 #include "mozilla/MathAlgorithms.h"
 156 #include "mozilla/RandomNum.h"
 157 // Note: MozTaggedAnonymousMmap() could call an LD_PRELOADed mmap
 158 // instead of the one defined here; use only MozTagAnonymousMemory().
 159 #include "mozilla/TaggedAnonymousMemory.h"
 160 #include "mozilla/ThreadLocal.h"
 161 #include "mozilla/UniquePtr.h"
 162 #include "mozilla/Unused.h"
 163 #include "mozilla/XorShift128PlusRNG.h"
 164 #include "mozilla/fallible.h"
 165 #include "rb.h"
 166 #include "Mutex.h"
 167 #include "PHC.h"
 168 #include "Utils.h"
 169
 170 #if defined(XP_WIN)
 171 #  include "mozmemory_utils.h"
 172 #endif
 173
 174 // For GetGeckoProcessType(), when it's used.
 175 #if defined(XP_WIN) && !defined(JS_STANDALONE)
 176 #  include "mozilla/ProcessType.h"
 177 #endif
 178
 179 using namespace mozilla;
 180
 181 // On Linux, we use madvise(MADV_DONTNEED) to release memory back to the
 182 // operating system.  If we release 1MB of live pages with MADV_DONTNEED, our
 183 // RSS will decrease by 1MB (almost) immediately.
 184 //
 185 // On Mac, we use madvise(MADV_FREE).  Unlike MADV_DONTNEED on Linux, MADV_FREE
 186 // on Mac doesn't cause the OS to release the specified pages immediately; the
 187 // OS keeps them in our process until the machine comes under memory pressure.
 188 //
 189 // It's therefore difficult to measure the process's RSS on Mac, since, in the
 190 // absence of memory pressure, the contribution from the heap to RSS will not
 191 // decrease due to our madvise calls.
 192 //
 193 // We therefore define MALLOC_DOUBLE_PURGE on Mac.  This causes jemalloc to
 194 // track which pages have been MADV_FREE'd.  You can then call
 195 // jemalloc_purge_freed_pages(), which will force the OS to release those
 196 // MADV_FREE'd pages, making the process's RSS reflect its true memory usage.
 197
 198 #ifdef XP_DARWIN
 199 #  define MALLOC_DOUBLE_PURGE
 200 #endif
 201
 202 #ifdef XP_WIN
 203 #  define MALLOC_DECOMMIT
 204 #endif
 205
 206 // Define MALLOC_RUNTIME_CONFIG depending on MOZ_DEBUG. Overriding this as
 207 // a build option allows us to build mozjemalloc/firefox without runtime asserts
 208 // but with runtime configuration. Making some testing easier.
 209
 210 #ifdef MOZ_DEBUG
 211 #  define MALLOC_RUNTIME_CONFIG
 212 #endif
 213
 214 // When MALLOC_STATIC_PAGESIZE is defined, the page size is fixed at
 215 // compile-time for better performance, as opposed to determined at
 216 // runtime. Some platforms can have different page sizes at runtime
 217 // depending on kernel configuration, so they are opted out by default.
 218 // Debug builds are opted out too, for test coverage.
 219 #ifndef MALLOC_RUNTIME_CONFIG
 220 #  if !defined(__ia64__) && !defined(__sparc__) && !defined(__mips__) &&       \
 221       !defined(__aarch64__) && !defined(__powerpc__) && !defined(XP_MACOSX) && \
 222       !defined(__loongarch__)
 223 #    define MALLOC_STATIC_PAGESIZE 1
 224 #  endif
 225 #endif
 226
 227 #ifdef XP_WIN
 228 #  define STDERR_FILENO 2
 229
 230 // Implement getenv without using malloc.
 231 static char mozillaMallocOptionsBuf[64];
 232
 233 #  define getenv xgetenv
 234 static char* getenv(const char* name) {
 235   if (GetEnvironmentVariableA(name, mozillaMallocOptionsBuf,
 236                               sizeof(mozillaMallocOptionsBuf)) > 0) {
 237     return mozillaMallocOptionsBuf;
 238   }
 239
 240   return nullptr;
 241 }
 242 #endif
 243
 244 #ifndef XP_WIN
 245 // Newer Linux systems support MADV_FREE, but we're not supporting
 246 // that properly. bug #1406304.
 247 #  if defined(XP_LINUX) && defined(MADV_FREE)
 248 #    undef MADV_FREE
 249 #  endif
 250 #  ifndef MADV_FREE
 251 #    define MADV_FREE MADV_DONTNEED
 252 #  endif
 253 #endif
 254
 255 // Some tools, such as /dev/dsp wrappers, LD_PRELOAD libraries that
 256 // happen to override mmap() and call dlsym() from their overridden
 257 // mmap(). The problem is that dlsym() calls malloc(), and this ends
 258 // up in a dead lock in jemalloc.
 259 // On these systems, we prefer to directly use the system call.
 260 // We do that for Linux systems and kfreebsd with GNU userland.
 261 // Note sanity checks are not done (alignment of offset, ...) because
 262 // the uses of mmap are pretty limited, in jemalloc.
 263 //
 264 // On Alpha, glibc has a bug that prevents syscall() to work for system
 265 // calls with 6 arguments.
 266 #if (defined(XP_LINUX) && !defined(__alpha__)) || \
 267     (defined(__FreeBSD_kernel__) && defined(__GLIBC__))
 268 #  include <sys/syscall.h>
 269 #  if defined(SYS_mmap) || defined(SYS_mmap2)
 270 static inline void* _mmap(void* addr, size_t length, int prot, int flags,
 271                           int fd, off_t offset) {
 272 // S390 only passes one argument to the mmap system call, which is a
 273 // pointer to a structure containing the arguments.
 274 #    ifdef __s390__
 275   struct {
 276     void* addr;
 277     size_t length;
 278     long prot;
 279     long flags;
 280     long fd;
 281     off_t offset;
 282   } args = {addr, length, prot, flags, fd, offset};
 283   return (void*)syscall(SYS_mmap, &args);
 284 #    else
 285 #      if defined(ANDROID) && defined(__aarch64__) && defined(SYS_mmap2)
 286   // Android NDK defines SYS_mmap2 for AArch64 despite it not supporting mmap2.
 287 #        undef SYS_mmap2
 288 #      endif
 289 #      ifdef SYS_mmap2
 290   return (void*)syscall(SYS_mmap2, addr, length, prot, flags, fd, offset >> 12);
 291 #      else
 292   return (void*)syscall(SYS_mmap, addr, length, prot, flags, fd, offset);
 293 #      endif
 294 #    endif
 295 }
 296 #    define mmap _mmap
 297 #    define munmap(a, l) syscall(SYS_munmap, a, l)
 298 #  endif
 299 #endif
 300
 301 // ***************************************************************************
 302 // Structures for chunk headers for chunks used for non-huge allocations.
 303
 304 struct arena_t;
 305
 306 // Each element of the chunk map corresponds to one page within the chunk.
 307 struct arena_chunk_map_t {
 308   // Linkage for run trees. Used for arena_t's tree or available runs.
 309   RedBlackTreeNode<arena_chunk_map_t> link;
 310
 311   // Run address (or size) and various flags are stored together.  The bit
 312   // layout looks like (assuming 32-bit system):
 313   //
 314   //   ???????? ???????? ????---b fmckdzla
 315   //
 316   // ? : Unallocated: Run address for first/last pages, unset for internal
 317   //                  pages.
 318   //     Small: Run address.
 319   //     Large: Run size for first page, unset for trailing pages.
 320   // - : Unused.
 321   // b : Busy?
 322   // f : Fresh memory?
 323   // m : MADV_FREE/MADV_DONTNEED'ed?
 324   // c : decommitted?
 325   // k : key?
 326   // d : dirty?
 327   // z : zeroed?
 328   // l : large?
 329   // a : allocated?
 330   //
 331   // Following are example bit patterns for consecutive pages from the three
 332   // types of runs.
 333   //
 334   // r : run address
 335   // s : run size
 336   // x : don't care
 337   // - : 0
 338   // [cdzla] : bit set
 339   //
 340   //   Unallocated:
 341   //     ssssssss ssssssss ssss---- --c-----
 342   //     xxxxxxxx xxxxxxxx xxxx---- ----d---
 343   //     ssssssss ssssssss ssss---- -----z--
 344   //
 345   //     Note that the size fields are set for the first and last unallocated
 346   //     page only.  The pages in-between have invalid/"don't care" size fields,
 347   //     they're not cleared during things such as coalescing free runs.
 348   //
 349   //     Pages before the first or after the last page in a free run must be
 350   //     allocated or busy.  Run coalescing depends on the sizes being set in
 351   //     the first and last page.  Purging pages and releasing chunks require
 352   //     that unallocated pages are always coalesced and the first page has a
 353   //     correct size.
 354   //
 355   //   Small:
 356   //     rrrrrrrr rrrrrrrr rrrr---- -------a
 357   //     rrrrrrrr rrrrrrrr rrrr---- -------a
 358   //     rrrrrrrr rrrrrrrr rrrr---- -------a
 359   //
 360   //   Large:
 361   //     ssssssss ssssssss ssss---- ------la
 362   //     -------- -------- -------- ------la
 363   //     -------- -------- -------- ------la
 364   //
 365   //     Note that only the first page has the size set.
 366   //
 367   size_t bits;
 368
 369 // A page can be in one of several states.
 370 //
 371 // CHUNK_MAP_ALLOCATED marks allocated pages, the only other bit that can be
 372 // combined is CHUNK_MAP_LARGE.
 373 //
 374 // CHUNK_MAP_LARGE may be combined with CHUNK_MAP_ALLOCATED to show that the
 375 // allocation is a "large" allocation (see SizeClass), rather than a run of
 376 // small allocations.  The interpretation of the gPageSizeMask bits depends onj
 377 // this bit, see the description above.
 378 //
 379 // CHUNK_MAP_DIRTY is used to mark pages that were allocated and are now freed.
 380 // They may contain their previous contents (or poison).  CHUNK_MAP_DIRTY, when
 381 // set, must be the only set bit.
 382 //
 383 // CHUNK_MAP_MADVISED marks pages which are madvised (with either MADV_DONTNEED
 384 // or MADV_FREE).  This is only valid if MALLOC_DECOMMIT is not defined.  When
 385 // set, it must be the only bit set.
 386 //
 387 // CHUNK_MAP_DECOMMITTED is used if CHUNK_MAP_DECOMMITTED is defined.  Unused
 388 // dirty pages may be decommitted and marked as CHUNK_MAP_DECOMMITTED.  They
 389 // must be re-committed with pages_commit() before they can be touched.
 390 //
 391 // CHUNK_MAP_FRESH is set on pages that have never been used before (the chunk
 392 // is newly allocated or they were decommitted and have now been recommitted.
 393 // CHUNK_MAP_FRESH is also used for "double purged" pages meaning that they were
 394 // madvised and later were unmapped and remapped to force them out of the
 395 // program's resident set.  This is enabled when MALLOC_DOUBLE_PURGE is defined
 396 // (eg on MacOS).
 397 //
 398 // CHUNK_MAP_BUSY is set by a thread when the thread wants to manipulate the
 399 // pages without holding a lock. Other threads must not touch these pages
 400 // regardless of whether they hold a lock.
 401 //
 402 // CHUNK_MAP_ZEROED is set on pages that are known to contain zeros.
 403 //
 404 // CHUNK_MAP_DIRTY, _DECOMMITED _MADVISED and _FRESH are always mutually
 405 // exclusive.
 406 //
 407 // CHUNK_MAP_KEY is never used on real pages, only on lookup keys.
 408 //
 409 #define CHUNK_MAP_BUSY ((size_t)0x100U)
 410 #define CHUNK_MAP_FRESH ((size_t)0x80U)
 411 #define CHUNK_MAP_MADVISED ((size_t)0x40U)
 412 #define CHUNK_MAP_DECOMMITTED ((size_t)0x20U)
 413 #define CHUNK_MAP_MADVISED_OR_DECOMMITTED \
 414   (CHUNK_MAP_MADVISED | CHUNK_MAP_DECOMMITTED)
 415 #define CHUNK_MAP_FRESH_MADVISED_OR_DECOMMITTED \
 416   (CHUNK_MAP_FRESH | CHUNK_MAP_MADVISED | CHUNK_MAP_DECOMMITTED)
 417 #define CHUNK_MAP_FRESH_MADVISED_DECOMMITTED_OR_BUSY              \
 418   (CHUNK_MAP_FRESH | CHUNK_MAP_MADVISED | CHUNK_MAP_DECOMMITTED | \
 419    CHUNK_MAP_BUSY)
 420 #define CHUNK_MAP_KEY ((size_t)0x10U)
 421 #define CHUNK_MAP_DIRTY ((size_t)0x08U)
 422 #define CHUNK_MAP_ZEROED ((size_t)0x04U)
 423 #define CHUNK_MAP_LARGE ((size_t)0x02U)
 424 #define CHUNK_MAP_ALLOCATED ((size_t)0x01U)
 425 };
 426
 427 // Arena chunk header.
 428 struct arena_chunk_t {
 429   // Arena that owns the chunk.
 430   arena_t* arena;
 431
 432   // Linkage for the arena's tree of dirty chunks.
 433   RedBlackTreeNode<arena_chunk_t> link_dirty;
 434
 435 #ifdef MALLOC_DOUBLE_PURGE
 436   // If we're double-purging, we maintain a linked list of chunks which
 437   // have pages which have been madvise(MADV_FREE)'d but not explicitly
 438   // purged.
 439   //
 440   // We're currently lazy and don't remove a chunk from this list when
 441   // all its madvised pages are recommitted.
 442   DoublyLinkedListElement<arena_chunk_t> chunks_madvised_elem;
 443 #endif
 444
 445   // Number of dirty pages.
 446   size_t ndirty;
 447
 448   bool mIsPurging;
 449   bool mDying;
 450
 451   // Map of pages within chunk that keeps track of free/large/small.
 452   arena_chunk_map_t map[];  // Dynamically sized.
 453
 454   bool IsEmpty();
 455 };
 456
 457 // ***************************************************************************
 458 // Constants defining allocator size classes and behavior.
 459
 460 // Our size classes are inclusive ranges of memory sizes.  By describing the
 461 // minimums and how memory is allocated in each range the maximums can be
 462 // calculated.
 463
 464 // Smallest size class to support.  On Windows the smallest allocation size
 465 // must be 8 bytes on 32-bit, 16 bytes on 64-bit.  On Linux and Mac, even
 466 // malloc(1) must reserve a word's worth of memory (see Mozilla bug 691003).
 467 #ifdef XP_WIN
 468 static const size_t kMinTinyClass = sizeof(void*) * 2;
 469 #else
 470 static const size_t kMinTinyClass = sizeof(void*);
 471 #endif
 472
 473 // Maximum tiny size class.
 474 static const size_t kMaxTinyClass = 8;
 475
 476 // Smallest quantum-spaced size classes. It could actually also be labelled a
 477 // tiny allocation, and is spaced as such from the largest tiny size class.
 478 // Tiny classes being powers of 2, this is twice as large as the largest of
 479 // them.
 480 static const size_t kMinQuantumClass = kMaxTinyClass * 2;
 481 static const size_t kMinQuantumWideClass = 512;
 482 static const size_t kMinSubPageClass = 4_KiB;
 483
 484 // Amount (quantum) separating quantum-spaced size classes.
 485 static const size_t kQuantum = 16;
 486 static const size_t kQuantumMask = kQuantum - 1;
 487 static const size_t kQuantumWide = 256;
 488 static const size_t kQuantumWideMask = kQuantumWide - 1;
 489
 490 static const size_t kMaxQuantumClass = kMinQuantumWideClass - kQuantum;
 491 static const size_t kMaxQuantumWideClass = kMinSubPageClass - kQuantumWide;
 492
 493 // We can optimise some divisions to shifts if these are powers of two.
 494 static_assert(mozilla::IsPowerOfTwo(kQuantum),
 495               "kQuantum is not a power of two");
 496 static_assert(mozilla::IsPowerOfTwo(kQuantumWide),
 497               "kQuantumWide is not a power of two");
 498
 499 static_assert(kMaxQuantumClass % kQuantum == 0,
 500               "kMaxQuantumClass is not a multiple of kQuantum");
 501 static_assert(kMaxQuantumWideClass % kQuantumWide == 0,
 502               "kMaxQuantumWideClass is not a multiple of kQuantumWide");
 503 static_assert(kQuantum < kQuantumWide,
 504               "kQuantum must be smaller than kQuantumWide");
 505 static_assert(mozilla::IsPowerOfTwo(kMinSubPageClass),
 506               "kMinSubPageClass is not a power of two");
 507
 508 // Number of (2^n)-spaced tiny classes.
 509 static const size_t kNumTinyClasses =
 510     LOG2(kMaxTinyClass) - LOG2(kMinTinyClass) + 1;
 511
 512 // Number of quantum-spaced classes.  We add kQuantum(Max) before subtracting to
 513 // avoid underflow when a class is empty (Max<Min).
 514 static const size_t kNumQuantumClasses =
 515     (kMaxQuantumClass + kQuantum - kMinQuantumClass) / kQuantum;
 516 static const size_t kNumQuantumWideClasses =
 517     (kMaxQuantumWideClass + kQuantumWide - kMinQuantumWideClass) / kQuantumWide;
 518
 519 // Size and alignment of memory chunks that are allocated by the OS's virtual
 520 // memory system.
 521 static const size_t kChunkSize = 1_MiB;
 522 static const size_t kChunkSizeMask = kChunkSize - 1;
 523
 524 #ifdef MALLOC_STATIC_PAGESIZE
 525 // VM page size. It must divide the runtime CPU page size or the code
 526 // will abort.
 527 // Platform specific page size conditions copied from js/public/HeapAPI.h
 528 #  if defined(__powerpc64__)
 529 static const size_t gPageSize = 64_KiB;
 530 #  elif defined(__loongarch64)
 531 static const size_t gPageSize = 16_KiB;
 532 #  else
 533 static const size_t gPageSize = 4_KiB;
 534 #  endif
 535 static const size_t gRealPageSize = gPageSize;
 536
 537 #else
 538 // When MALLOC_OPTIONS contains one or several `P`s, the page size used
 539 // across the allocator is multiplied by 2 for each `P`, but we also keep
 540 // the real page size for code paths that need it. gPageSize is thus a
 541 // power of two greater or equal to gRealPageSize.
 542 static size_t gRealPageSize;
 543 static size_t gPageSize;
 544 #endif
 545
 546 #ifdef MALLOC_STATIC_PAGESIZE
 547 #  define DECLARE_GLOBAL(type, name)
 548 #  define DEFINE_GLOBALS
 549 #  define END_GLOBALS
 550 #  define DEFINE_GLOBAL(type) static const type
 551 #  define GLOBAL_LOG2 LOG2
 552 #  define GLOBAL_ASSERT_HELPER1(x) static_assert(x, #x)
 553 #  define GLOBAL_ASSERT_HELPER2(x, y) static_assert(x, y)
 554 #  define GLOBAL_ASSERT(...)                                               \
 555     MACRO_CALL(                                                            \
 556         MOZ_PASTE_PREFIX_AND_ARG_COUNT(GLOBAL_ASSERT_HELPER, __VA_ARGS__), \
 557         (__VA_ARGS__))
 558 #  define GLOBAL_CONSTEXPR constexpr
 559 #else
 560 #  define DECLARE_GLOBAL(type, name) static type name;
 561 #  define DEFINE_GLOBALS static void DefineGlobals() {
 562 #  define END_GLOBALS }
 563 #  define DEFINE_GLOBAL(type)
 564 #  define GLOBAL_LOG2 FloorLog2
 565 #  define GLOBAL_ASSERT MOZ_RELEASE_ASSERT
 566 #  define GLOBAL_CONSTEXPR
 567 #endif
 568
 569 DECLARE_GLOBAL(size_t, gMaxSubPageClass)
 570 DECLARE_GLOBAL(uint8_t, gNumSubPageClasses)
 571 DECLARE_GLOBAL(uint8_t, gPageSize2Pow)
 572 DECLARE_GLOBAL(size_t, gPageSizeMask)
 573 DECLARE_GLOBAL(size_t, gChunkNumPages)
 574 DECLARE_GLOBAL(size_t, gChunkHeaderNumPages)
 575 DECLARE_GLOBAL(size_t, gMaxLargeClass)
 576
 577 DEFINE_GLOBALS
 578
 579 // Largest sub-page size class, or zero if there are none
 580 DEFINE_GLOBAL(size_t)
 581 gMaxSubPageClass = gPageSize / 2 >= kMinSubPageClass ? gPageSize / 2 : 0;
 582
 583 // Max size class for bins.
 584 #define gMaxBinClass \
 585   (gMaxSubPageClass ? gMaxSubPageClass : kMaxQuantumWideClass)
 586
 587 // Number of sub-page bins.
 588 DEFINE_GLOBAL(uint8_t)
 589 gNumSubPageClasses = []() GLOBAL_CONSTEXPR -> uint8_t {
 590   if GLOBAL_CONSTEXPR (gMaxSubPageClass != 0) {
 591     return FloorLog2(gMaxSubPageClass) - LOG2(kMinSubPageClass) + 1;
 592   }
 593   return 0;
 594 }();
 595
 596 DEFINE_GLOBAL(uint8_t) gPageSize2Pow = GLOBAL_LOG2(gPageSize);
 597 DEFINE_GLOBAL(size_t) gPageSizeMask = gPageSize - 1;
 598
 599 // Number of pages in a chunk.
 600 DEFINE_GLOBAL(size_t) gChunkNumPages = kChunkSize >> gPageSize2Pow;
 601
 602 // Number of pages necessary for a chunk header plus a guard page.
 603 DEFINE_GLOBAL(size_t)
 604 gChunkHeaderNumPages =
 605     1 + (((sizeof(arena_chunk_t) + sizeof(arena_chunk_map_t) * gChunkNumPages +
 606            gPageSizeMask) &
 607           ~gPageSizeMask) >>
 608          gPageSize2Pow);
 609
 610 // One chunk, minus the header, minus a guard page
 611 DEFINE_GLOBAL(size_t)
 612 gMaxLargeClass =
 613     kChunkSize - gPageSize - (gChunkHeaderNumPages << gPageSize2Pow);
 614
 615 // Various sanity checks that regard configuration.
 616 GLOBAL_ASSERT(1ULL << gPageSize2Pow == gPageSize,
 617               "Page size is not a power of two");
 618 GLOBAL_ASSERT(kQuantum >= sizeof(void*));
 619 GLOBAL_ASSERT(kQuantum <= kQuantumWide);
 620 GLOBAL_ASSERT(!kNumQuantumWideClasses ||
 621               kQuantumWide <= (kMinSubPageClass - kMaxQuantumClass));
 622
 623 GLOBAL_ASSERT(kQuantumWide <= kMaxQuantumClass);
 624
 625 GLOBAL_ASSERT(gMaxSubPageClass >= kMinSubPageClass || gMaxSubPageClass == 0);
 626 GLOBAL_ASSERT(gMaxLargeClass >= gMaxSubPageClass);
 627 GLOBAL_ASSERT(kChunkSize >= gPageSize);
 628 GLOBAL_ASSERT(kQuantum * 4 <= kChunkSize);
 629
 630 END_GLOBALS
 631
 632 // Recycle at most 128 MiB of chunks. This means we retain at most
 633 // 6.25% of the process address space on a 32-bit OS for later use.
 634 static const size_t gRecycleLimit = 128_MiB;
 635
 636 // The current amount of recycled bytes, updated atomically.
 637 static Atomic<size_t, ReleaseAcquire> gRecycledSize;
 638
 639 // Maximum number of dirty pages per arena.
 640 #define DIRTY_MAX_DEFAULT (1U << 8)
 641
 642 static size_t opt_dirty_max = DIRTY_MAX_DEFAULT;
 643
 644 // Return the smallest chunk multiple that is >= s.
 645 #define CHUNK_CEILING(s) (((s) + kChunkSizeMask) & ~kChunkSizeMask)
 646
 647 // Return the smallest cacheline multiple that is >= s.
 648 #define CACHELINE_CEILING(s) \
 649   (((s) + (kCacheLineSize - 1)) & ~(kCacheLineSize - 1))
 650
 651 // Return the smallest quantum multiple that is >= a.
 652 #define QUANTUM_CEILING(a) (((a) + (kQuantumMask)) & ~(kQuantumMask))
 653 #define QUANTUM_WIDE_CEILING(a) \
 654   (((a) + (kQuantumWideMask)) & ~(kQuantumWideMask))
 655
 656 // Return the smallest sub page-size  that is >= a.
 657 #define SUBPAGE_CEILING(a) (RoundUpPow2(a))
 658
 659 // Return the smallest pagesize multiple that is >= s.
 660 #define PAGE_CEILING(s) (((s) + gPageSizeMask) & ~gPageSizeMask)
 661
 662 // Number of all the small-allocated classes
 663 #define NUM_SMALL_CLASSES                                          \
 664   (kNumTinyClasses + kNumQuantumClasses + kNumQuantumWideClasses + \
 665    gNumSubPageClasses)
 666
 667 // ***************************************************************************
 668 // MALLOC_DECOMMIT and MALLOC_DOUBLE_PURGE are mutually exclusive.
 669 #if defined(MALLOC_DECOMMIT) && defined(MALLOC_DOUBLE_PURGE)
 670 #  error MALLOC_DECOMMIT and MALLOC_DOUBLE_PURGE are mutually exclusive.
 671 #endif
 672
 673 static void* base_alloc(size_t aSize);
 674
 675 // Set to true once the allocator has been initialized.
 676 #if defined(_MSC_VER) && !defined(__clang__)
 677 // MSVC may create a static initializer for an Atomic<bool>, which may actually
 678 // run after `malloc_init` has been called once, which triggers multiple
 679 // initializations.
 680 // We work around the problem by not using an Atomic<bool> at all. There is a
 681 // theoretical problem with using `malloc_initialized` non-atomically, but
 682 // practically, this is only true if `malloc_init` is never called before
 683 // threads are created.
 684 static bool malloc_initialized;
 685 #else
 686 static Atomic<bool, MemoryOrdering::ReleaseAcquire> malloc_initialized;
 687 #endif
 688
 689 // This lock must be held while bootstrapping us.
 690 static StaticMutex gInitLock MOZ_UNANNOTATED = {STATIC_MUTEX_INIT};
 691
 692 // ***************************************************************************
 693 // Statistics data structures.
 694
 695 struct arena_stats_t {
 696   // Number of bytes currently mapped.
 697   size_t mapped;
 698
 699   // Current number of committed pages (non madvised/decommitted)
 700   size_t committed;
 701
 702   // Per-size-category statistics.
 703   size_t allocated_small;
 704
 705   size_t allocated_large;
 706
 707   // The number of "memory operations" aka mallocs/frees.
 708   size_t operations;
 709 };
 710
 711 // ***************************************************************************
 712 // Extent data structures.
 713
 714 enum ChunkType {
 715   UNKNOWN_CHUNK,
 716   ZEROED_CHUNK,    // chunk only contains zeroes.
 717   ARENA_CHUNK,     // used to back arena runs created by arena_t::AllocRun.
 718   HUGE_CHUNK,      // used to back huge allocations (e.g. arena_t::MallocHuge).
 719   RECYCLED_CHUNK,  // chunk has been stored for future use by chunk_recycle.
 720 };
 721
 722 // Tree of extents.
 723 struct extent_node_t {
 724   union {
 725     // Linkage for the size/address-ordered tree for chunk recycling.
 726     RedBlackTreeNode<extent_node_t> mLinkBySize;
 727     // Arena id for huge allocations. It's meant to match mArena->mId,
 728     // which only holds true when the arena hasn't been disposed of.
 729     arena_id_t mArenaId;
 730   };
 731
 732   // Linkage for the address-ordered tree.
 733   RedBlackTreeNode<extent_node_t> mLinkByAddr;
 734
 735   // Pointer to the extent that this tree node is responsible for.
 736   void* mAddr;
 737
 738   // Total region size.
 739   size_t mSize;
 740
 741   union {
 742     // What type of chunk is there; used for chunk recycling.
 743     ChunkType mChunkType;
 744
 745     // A pointer to the associated arena, for huge allocations.
 746     arena_t* mArena;
 747   };
 748 };
 749
 750 struct ExtentTreeSzTrait {
 751   static RedBlackTreeNode<extent_node_t>& GetTreeNode(extent_node_t* aThis) {
 752     return aThis->mLinkBySize;
 753   }
 754
 755   static inline Order Compare(extent_node_t* aNode, extent_node_t* aOther) {
 756     Order ret = CompareInt(aNode->mSize, aOther->mSize);
 757     return (ret != Order::eEqual) ? ret
 758                                   : CompareAddr(aNode->mAddr, aOther->mAddr);
 759   }
 760 };
 761
 762 struct ExtentTreeTrait {
 763   static RedBlackTreeNode<extent_node_t>& GetTreeNode(extent_node_t* aThis) {
 764     return aThis->mLinkByAddr;
 765   }
 766
 767   static inline Order Compare(extent_node_t* aNode, extent_node_t* aOther) {
 768     return CompareAddr(aNode->mAddr, aOther->mAddr);
 769   }
 770 };
 771
 772 struct ExtentTreeBoundsTrait : public ExtentTreeTrait {
 773   static inline Order Compare(extent_node_t* aKey, extent_node_t* aNode) {
 774     uintptr_t key_addr = reinterpret_cast<uintptr_t>(aKey->mAddr);
 775     uintptr_t node_addr = reinterpret_cast<uintptr_t>(aNode->mAddr);
 776     size_t node_size = aNode->mSize;
 777
 778     // Is aKey within aNode?
 779     if (node_addr <= key_addr && key_addr < node_addr + node_size) {
 780       return Order::eEqual;
 781     }
 782
 783     return CompareAddr(aKey->mAddr, aNode->mAddr);
 784   }
 785 };
 786
 787 // Describe size classes to which allocations are rounded up to.
 788 // TODO: add large and huge types when the arena allocation code
 789 // changes in a way that allows it to be beneficial.
 790 class SizeClass {
 791  public:
 792   enum ClassType {
 793     Tiny,
 794     Quantum,
 795     QuantumWide,
 796     SubPage,
 797     Large,
 798   };
 799
 800   explicit inline SizeClass(size_t aSize) {
 801     if (aSize <= kMaxTinyClass) {
 802       mType = Tiny;
 803       mSize = std::max(RoundUpPow2(aSize), kMinTinyClass);
 804     } else if (aSize <= kMaxQuantumClass) {
 805       mType = Quantum;
 806       mSize = QUANTUM_CEILING(aSize);
 807     } else if (aSize <= kMaxQuantumWideClass) {
 808       mType = QuantumWide;
 809       mSize = QUANTUM_WIDE_CEILING(aSize);
 810     } else if (aSize <= gMaxSubPageClass) {
 811       mType = SubPage;
 812       mSize = SUBPAGE_CEILING(aSize);
 813     } else if (aSize <= gMaxLargeClass) {
 814       mType = Large;
 815       mSize = PAGE_CEILING(aSize);
 816     } else {
 817       MOZ_MAKE_COMPILER_ASSUME_IS_UNREACHABLE("Invalid size");
 818     }
 819   }
 820
 821   SizeClass& operator=(const SizeClass& aOther) = default;
 822
 823   bool operator==(const SizeClass& aOther) { return aOther.mSize == mSize; }
 824
 825   size_t Size() { return mSize; }
 826
 827   ClassType Type() { return mType; }
 828
 829   SizeClass Next() { return SizeClass(mSize + 1); }
 830
 831  private:
 832   ClassType mType;
 833   size_t mSize;
 834 };
 835
 836 // Fast division
 837 //
 838 // During deallocation we want to divide by the size class.  This class
 839 // provides a routine and sets up a constant as follows.
 840 //
 841 // To divide by a number D that is not a power of two we multiply by (2^17 /
 842 // D) and then right shift by 17 positions.
 843 //
 844 //   X / D
 845 //
 846 // becomes
 847 //
 848 //   (X * m) >> p
 849 //
 850 // Where m is calculated during the FastDivisor constructor similarly to:
 851 //
 852 //   m = 2^p / D
 853 //
 854 template <typename T>
 855 class FastDivisor {
 856  private:
 857   // The shift amount (p) is chosen to minimise the size of m while
 858   // working for divisors up to 65536 in steps of 16.  I arrived at 17
 859   // experimentally.  I wanted a low number to minimise the range of m
 860   // so it can fit in a uint16_t, 16 didn't work but 17 worked perfectly.
 861   //
 862   // We'd need to increase this if we allocated memory on smaller boundaries
 863   // than 16.
 864   static const unsigned p = 17;
 865
 866   // We can fit the inverted divisor in 16 bits, but we template it here for
 867   // convenience.
 868   T m;
 869
 870  public:
 871   // Needed so mBins can be constructed.
 872   FastDivisor() : m(0) {}
 873
 874   FastDivisor(unsigned div, unsigned max) {
 875     MOZ_ASSERT(div <= max);
 876
 877     // divide_inv_shift is large enough.
 878     MOZ_ASSERT((1U << p) >= div);
 879
 880     // The calculation here for m is formula 26 from Section
 881     // 10-9 "Unsigned Division by Divisors >= 1" in
 882     // Henry S. Warren, Jr.'s Hacker's Delight, 2nd Ed.
 883     unsigned m_ = ((1U << p) + div - 1 - (((1U << p) - 1) % div)) / div;
 884
 885     // Make sure that max * m does not overflow.
 886     MOZ_DIAGNOSTIC_ASSERT(max < UINT_MAX / m_);
 887
 888     MOZ_ASSERT(m_ <= std::numeric_limits<T>::max());
 889     m = static_cast<T>(m_);
 890
 891     // Initialisation made m non-zero.
 892     MOZ_ASSERT(m);
 893
 894     // Test that all the divisions in the range we expected would work.
 895 #ifdef MOZ_DEBUG
 896     for (unsigned num = 0; num < max; num += div) {
 897       MOZ_ASSERT(num / div == divide(num));
 898     }
 899 #endif
 900   }
 901
 902   // Note that this always occurs in uint32_t regardless of m's type.  If m is
 903   // a uint16_t it will be zero-extended before the multiplication.  We also use
 904   // uint32_t rather than something that could possibly be larger because it is
 905   // most-likely the cheapest multiplication.
 906   inline uint32_t divide(uint32_t num) const {
 907     // Check that m was initialised.
 908     MOZ_ASSERT(m);
 909     return (num * m) >> p;
 910   }
 911 };
 912
 913 template <typename T>
 914 unsigned inline operator/(unsigned num, FastDivisor<T> divisor) {
 915   return divisor.divide(num);
 916 }
 917
 918 // ***************************************************************************
 919 // Radix tree data structures.
 920 //
 921 // The number of bits passed to the template is the number of significant bits
 922 // in an address to do a radix lookup with.
 923 //
 924 // An address is looked up by splitting it in kBitsPerLevel bit chunks, except
 925 // the most significant bits, where the bit chunk is kBitsAtLevel1 which can be
 926 // different if Bits is not a multiple of kBitsPerLevel.
 927 //
 928 // With e.g. sizeof(void*)=4, Bits=16 and kBitsPerLevel=8, an address is split
 929 // like the following:
 930 // 0x12345678 -> mRoot[0x12][0x34]
 931 template <size_t Bits>
 932 class AddressRadixTree {
 933 // Size of each radix tree node (as a power of 2).
 934 // This impacts tree depth.
 935 #ifdef HAVE_64BIT_BUILD
 936   static const size_t kNodeSize = kCacheLineSize;
 937 #else
 938   static const size_t kNodeSize = 16_KiB;
 939 #endif
 940   static const size_t kBitsPerLevel = LOG2(kNodeSize) - LOG2(sizeof(void*));
 941   static const size_t kBitsAtLevel1 =
 942       (Bits % kBitsPerLevel) ? Bits % kBitsPerLevel : kBitsPerLevel;
 943   static const size_t kHeight = (Bits + kBitsPerLevel - 1) / kBitsPerLevel;
 944   static_assert(kBitsAtLevel1 + (kHeight - 1) * kBitsPerLevel == Bits,
 945                 "AddressRadixTree parameters don't work out");
 946
 947   Mutex mLock MOZ_UNANNOTATED;
 948   // We guard only the single slot creations and assume read-only is safe
 949   // at any time.
 950   void** mRoot;
 951
 952  public:
 953   bool Init() MOZ_REQUIRES(gInitLock) MOZ_EXCLUDES(mLock);
 954
 955   inline void* Get(void* aAddr) MOZ_EXCLUDES(mLock);
 956
 957   // Returns whether the value was properly set.
 958   inline bool Set(void* aAddr, void* aValue) MOZ_EXCLUDES(mLock);
 959
 960   inline bool Unset(void* aAddr) MOZ_EXCLUDES(mLock) {
 961     return Set(aAddr, nullptr);
 962   }
 963
 964  private:
 965   // GetSlotInternal is agnostic wrt mLock and used directly only in DEBUG
 966   // code.
 967   inline void** GetSlotInternal(void* aAddr, bool aCreate);
 968
 969   inline void** GetSlotIfExists(void* aAddr) MOZ_EXCLUDES(mLock) {
 970     return GetSlotInternal(aAddr, false);
 971   }
 972   inline void** GetOrCreateSlot(void* aAddr) MOZ_REQUIRES(mLock) {
 973     return GetSlotInternal(aAddr, true);
 974   }
 975 };
 976
 977 // ***************************************************************************
 978 // Arena data structures.
 979
 980 struct arena_bin_t;
 981
 982 struct ArenaChunkMapLink {
 983   static RedBlackTreeNode<arena_chunk_map_t>& GetTreeNode(
 984       arena_chunk_map_t* aThis) {
 985     return aThis->link;
 986   }
 987 };
 988
 989 struct ArenaAvailTreeTrait : public ArenaChunkMapLink {
 990   static inline Order Compare(arena_chunk_map_t* aNode,
 991                               arena_chunk_map_t* aOther) {
 992     size_t size1 = aNode->bits & ~gPageSizeMask;
 993     size_t size2 = aOther->bits & ~gPageSizeMask;
 994     Order ret = CompareInt(size1, size2);
 995     return (ret != Order::eEqual)
 996                ? ret
 997                : CompareAddr((aNode->bits & CHUNK_MAP_KEY) ? nullptr : aNode,
 998                              aOther);
 999   }
1000 };
1001
1002 struct ArenaDirtyChunkTrait {
1003   static RedBlackTreeNode<arena_chunk_t>& GetTreeNode(arena_chunk_t* aThis) {
1004     return aThis->link_dirty;
1005   }
1006
1007   static inline Order Compare(arena_chunk_t* aNode, arena_chunk_t* aOther) {
1008     MOZ_ASSERT(aNode);
1009     MOZ_ASSERT(aOther);
1010     return CompareAddr(aNode, aOther);
1011   }
1012 };
1013
1014 #ifdef MALLOC_DOUBLE_PURGE
1015 namespace mozilla {
1016
1017 template <>
1018 struct GetDoublyLinkedListElement<arena_chunk_t> {
1019   static DoublyLinkedListElement<arena_chunk_t>& Get(arena_chunk_t* aThis) {
1020     return aThis->chunks_madvised_elem;
1021   }
1022 };
1023 }  // namespace mozilla
1024 #endif
1025
1026 struct arena_run_t {
1027 #if defined(MOZ_DIAGNOSTIC_ASSERT_ENABLED)
1028   uint32_t mMagic;
1029 #  define ARENA_RUN_MAGIC 0x384adf93
1030
1031   // On 64-bit platforms, having the arena_bin_t pointer following
1032   // the mMagic field means there's padding between both fields, making
1033   // the run header larger than necessary.
1034   // But when MOZ_DIAGNOSTIC_ASSERT_ENABLED is not set, starting the
1035   // header with this field followed by the arena_bin_t pointer yields
1036   // the same padding. We do want the mMagic field to appear first, so
1037   // depending whether MOZ_DIAGNOSTIC_ASSERT_ENABLED is set or not, we
1038   // move some field to avoid padding.
1039
1040   // Number of free regions in run.
1041   unsigned mNumFree;
1042 #endif
1043
1044   // Used by arena_bin_t::mNonFullRuns.
1045   DoublyLinkedListElement<arena_run_t> mRunListElem;
1046
1047   // Bin this run is associated with.
1048   arena_bin_t* mBin;
1049
1050   // Index of first element that might have a free region.
1051   unsigned mRegionsMinElement;
1052
1053 #if !defined(MOZ_DIAGNOSTIC_ASSERT_ENABLED)
1054   // Number of free regions in run.
1055   unsigned mNumFree;
1056 #endif
1057
1058   // Bitmask of in-use regions (0: in use, 1: free).
1059   unsigned mRegionsMask[];  // Dynamically sized.
1060 };
1061
1062 namespace mozilla {
1063
1064 template <>
1065 struct GetDoublyLinkedListElement<arena_run_t> {
1066   static DoublyLinkedListElement<arena_run_t>& Get(arena_run_t* aThis) {
1067     return aThis->mRunListElem;
1068   }
1069 };
1070
1071 }  // namespace mozilla
1072
1073 struct arena_bin_t {
1074   // We use a LIFO ("last-in-first-out") policy to refill non-full runs.
1075   //
1076   // This has the following reasons:
1077   // 1. It is cheap, as all our non-full-runs' book-keeping is O(1), no
1078   //    tree-balancing or walking is needed.
1079   // 2. It also helps to increase the probability for CPU cache hits for the
1080   //    book-keeping and the reused slots themselves, as the same memory was
1081   //    most recently touched during free, especially when used from the same
1082   //    core (or via the same shared cache, depending on the architecture).
1083   DoublyLinkedList<arena_run_t> mNonFullRuns;
1084
1085   // Bin's size class.
1086   size_t mSizeClass;
1087
1088   // Total number of regions in a run for this bin's size class.
1089   uint32_t mRunNumRegions;
1090
1091   // Number of elements in a run's mRegionsMask for this bin's size class.
1092   uint32_t mRunNumRegionsMask;
1093
1094   // Offset of first region in a run for this bin's size class.
1095   uint32_t mRunFirstRegionOffset;
1096
1097   // Current number of runs in this bin, full or otherwise.
1098   uint32_t mNumRuns;
1099
1100   // A constant for fast division by size class.  This value is 16 bits wide so
1101   // it is placed last.
1102   FastDivisor<uint16_t> mSizeDivisor;
1103
1104   // Total number of pages in a run for this bin's size class.
1105   uint8_t mRunSizePages;
1106
1107   // Amount of overhead runs are allowed to have.
1108   static constexpr double kRunOverhead = 1.6_percent;
1109   static constexpr double kRunRelaxedOverhead = 2.4_percent;
1110
1111   // Initialize a bin for the given size class.
1112   // The generated run sizes, for a page size of 4 KiB, are:
1113   //   size|run       size|run       size|run       size|run
1114   //  class|size     class|size     class|size     class|size
1115   //     4   4 KiB      8   4 KiB     16   4 KiB     32   4 KiB
1116   //    48   4 KiB     64   4 KiB     80   4 KiB     96   4 KiB
1117   //   112   4 KiB    128   8 KiB    144   4 KiB    160   8 KiB
1118   //   176   4 KiB    192   4 KiB    208   8 KiB    224   4 KiB
1119   //   240   8 KiB    256  16 KiB    272   8 KiB    288   4 KiB
1120   //   304  12 KiB    320  12 KiB    336   4 KiB    352   8 KiB
1121   //   368   4 KiB    384   8 KiB    400  20 KiB    416  16 KiB
1122   //   432  12 KiB    448   4 KiB    464  16 KiB    480   8 KiB
1123   //   496  20 KiB    512  32 KiB    768  16 KiB   1024  64 KiB
1124   //  1280  24 KiB   1536  32 KiB   1792  16 KiB   2048 128 KiB
1125   //  2304  16 KiB   2560  48 KiB   2816  36 KiB   3072  64 KiB
1126   //  3328  36 KiB   3584  32 KiB   3840  64 KiB
1127   inline void Init(SizeClass aSizeClass);
1128 };
1129
1130 // We try to keep the above structure aligned with common cache lines sizes,
1131 // often that's 64 bytes on x86 and ARM, we don't make assumptions for other
1132 // architectures.
1133 #if defined(__x86_64__) || defined(__aarch64__)
1134 // On 64bit platforms this structure is often 48 bytes
1135 // long, which means every other array element will be properly aligned.
1136 static_assert(sizeof(arena_bin_t) == 48);
1137 #elif defined(__x86__) || defined(__arm__)
1138 static_assert(sizeof(arena_bin_t) == 32);
1139 #endif
1140
1141 struct arena_t {
1142 #if defined(MOZ_DIAGNOSTIC_ASSERT_ENABLED)
1143   uint32_t mMagic;
1144 #  define ARENA_MAGIC 0x947d3d24
1145 #endif
1146
1147   // Linkage for the tree of arenas by id.
1148   // This just provides the memory to be used by the collection tree
1149   // and thus needs no arena_t::mLock.
1150   RedBlackTreeNode<arena_t> mLink;
1151
1152   // Arena id, that we keep away from the beginning of the struct so that
1153   // free list pointers in TypedBaseAlloc<arena_t> don't overflow in it,
1154   // and it keeps the value it had after the destructor.
1155   arena_id_t mId;
1156
1157   // Operations on this arena require that lock be locked. The MaybeMutex
1158   // class will elude locking if the arena is accessed from a single thread
1159   // only (currently only the main thread can be used like this).
1160   MaybeMutex mLock MOZ_UNANNOTATED;
1161
1162   // The lock is required to write to fields of mStats, but it is not needed to
1163   // read them, so long as inconsistents reads are okay (fields might not make
1164   // sense together).
1165   arena_stats_t mStats MOZ_GUARDED_BY(mLock);
1166
1167   // We can read the allocated counts from mStats without a lock:
1168   size_t AllocatedBytes() const MOZ_NO_THREAD_SAFETY_ANALYSIS {
1169     return mStats.allocated_small + mStats.allocated_large;
1170   }
1171
1172   // We can read the operations field from mStats without a lock:
1173   size_t Operations() const MOZ_NO_THREAD_SAFETY_ANALYSIS {
1174     return mStats.operations;
1175   }
1176
1177  private:
1178   // Tree of dirty-page-containing chunks this arena manages.
1179   RedBlackTree<arena_chunk_t, ArenaDirtyChunkTrait> mChunksDirty
1180       MOZ_GUARDED_BY(mLock);
1181
1182 #ifdef MALLOC_DOUBLE_PURGE
1183   // Head of a linked list of MADV_FREE'd-page-containing chunks this
1184   // arena manages.
1185   DoublyLinkedList<arena_chunk_t> mChunksMAdvised MOZ_GUARDED_BY(mLock);
1186 #endif
1187
1188   // In order to avoid rapid chunk allocation/deallocation when an arena
1189   // oscillates right on the cusp of needing a new chunk, cache the most
1190   // recently freed chunk.  The spare is left in the arena's chunk trees
1191   // until it is deleted.
1192   //
1193   // There is one spare chunk per arena, rather than one spare total, in
1194   // order to avoid interactions between multiple threads that could make
1195   // a single spare inadequate.
1196   arena_chunk_t* mSpare MOZ_GUARDED_BY(mLock);
1197
1198   // A per-arena opt-in to randomize the offset of small allocations
1199   // Needs no lock, read-only.
1200   bool mRandomizeSmallAllocations;
1201
1202   // Whether this is a private arena. Multiple public arenas are just a
1203   // performance optimization and not a safety feature.
1204   //
1205   // Since, for example, we don't want thread-local arenas to grow too much, we
1206   // use the default arena for bigger allocations. We use this member to allow
1207   // realloc() to switch out of our arena if needed (which is not allowed for
1208   // private arenas for security).
1209   // Needs no lock, read-only.
1210   bool mIsPrivate;
1211
1212   // A pseudorandom number generator. Initially null, it gets initialized
1213   // on first use to avoid recursive malloc initialization (e.g. on OSX
1214   // arc4random allocates memory).
1215   mozilla::non_crypto::XorShift128PlusRNG* mPRNG MOZ_GUARDED_BY(mLock);
1216   bool mIsPRNGInitializing MOZ_GUARDED_BY(mLock);
1217
1218  public:
1219   // Current count of pages within unused runs that are potentially
1220   // dirty, and for which madvise(... MADV_FREE) has not been called.  By
1221   // tracking this, we can institute a limit on how much dirty unused
1222   // memory is mapped for each arena.
1223   size_t mNumDirty MOZ_GUARDED_BY(mLock);
1224
1225   // The current number of pages that are available without a system call (but
1226   // probably a page fault).
1227   size_t mNumMAdvised MOZ_GUARDED_BY(mLock);
1228   size_t mNumFresh MOZ_GUARDED_BY(mLock);
1229
1230   // Maximum value allowed for mNumDirty.
1231   // Needs no lock, read-only.
1232   size_t mMaxDirty;
1233
1234   // Needs no lock, read-only.
1235   int32_t mMaxDirtyIncreaseOverride;
1236   int32_t mMaxDirtyDecreaseOverride;
1237
1238  private:
1239   // Size/address-ordered tree of this arena's available runs.  This tree
1240   // is used for first-best-fit run allocation.
1241   RedBlackTree<arena_chunk_map_t, ArenaAvailTreeTrait> mRunsAvail
1242       MOZ_GUARDED_BY(mLock);
1243
1244  public:
1245   // mBins is used to store rings of free regions of the following sizes,
1246   // assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
1247   //
1248   //  | mBins[i] | size |
1249   //  +----------+------+
1250   //  |       0  |    2 |
1251   //  |       1  |    4 |
1252   //  |       2  |    8 |
1253   //  +----------+------+
1254   //  |       3  |   16 |
1255   //  |       4  |   32 |
1256   //  |       5  |   48 |
1257   //  |       6  |   64 |
1258   //  |          :      :
1259   //  |          :      :
1260   //  |      33  |  496 |
1261   //  |      34  |  512 |
1262   //  +----------+------+
1263   //  |      35  |  768 |
1264   //  |      36  | 1024 |
1265   //  |          :      :
1266   //  |          :      :
1267   //  |      46  | 3584 |
1268   //  |      47  | 3840 |
1269   //  +----------+------+
1270   arena_bin_t mBins[] MOZ_GUARDED_BY(mLock);  // Dynamically sized.
1271
1272   explicit arena_t(arena_params_t* aParams, bool aIsPrivate);
1273   ~arena_t();
1274
1275   void ResetSmallAllocRandomization();
1276
1277   void InitPRNG() MOZ_REQUIRES(mLock);
1278
1279  private:
1280   void InitChunk(arena_chunk_t* aChunk, size_t aMinCommittedPages)
1281       MOZ_REQUIRES(mLock);
1282
1283   // Remove the chunk from the arena.  This removes it from all the page counts.
1284   // It assumes its run has already been removed and lets the caller clear
1285   // mSpare as necessary.
1286   bool RemoveChunk(arena_chunk_t* aChunk) MOZ_REQUIRES(mLock);
1287
1288   // This may return a chunk that should be destroyed with chunk_dealloc outside
1289   // of the arena lock.  It is not the same chunk as was passed in (since that
1290   // chunk now becomes mSpare).
1291   [[nodiscard]] arena_chunk_t* DemoteChunkToSpare(arena_chunk_t* aChunk)
1292       MOZ_REQUIRES(mLock);
1293
1294   // Try to merge the run with its neighbours. Returns the new index of the run
1295   // (since it may have merged with an earlier one).
1296   size_t TryCoalesce(arena_chunk_t* aChunk, size_t run_ind, size_t run_pages,
1297                      size_t size) MOZ_REQUIRES(mLock);
1298
1299   arena_run_t* AllocRun(size_t aSize, bool aLarge, bool aZero)
1300       MOZ_REQUIRES(mLock);
1301
1302   arena_chunk_t* DallocRun(arena_run_t* aRun, bool aDirty) MOZ_REQUIRES(mLock);
1303
1304   [[nodiscard]] bool SplitRun(arena_run_t* aRun, size_t aSize, bool aLarge,
1305                               bool aZero) MOZ_REQUIRES(mLock);
1306
1307   void TrimRunHead(arena_chunk_t* aChunk, arena_run_t* aRun, size_t aOldSize,
1308                    size_t aNewSize) MOZ_REQUIRES(mLock);
1309
1310   void TrimRunTail(arena_chunk_t* aChunk, arena_run_t* aRun, size_t aOldSize,
1311                    size_t aNewSize, bool dirty) MOZ_REQUIRES(mLock);
1312
1313   arena_run_t* GetNewEmptyBinRun(arena_bin_t* aBin) MOZ_REQUIRES(mLock);
1314
1315   inline arena_run_t* GetNonFullBinRun(arena_bin_t* aBin) MOZ_REQUIRES(mLock);
1316
1317   inline uint8_t FindFreeBitInMask(uint32_t aMask, uint32_t& aRng)
1318       MOZ_REQUIRES(mLock);
1319
1320   inline void* ArenaRunRegAlloc(arena_run_t* aRun, arena_bin_t* aBin)
1321       MOZ_REQUIRES(mLock);
1322
1323   inline void* MallocSmall(size_t aSize, bool aZero) MOZ_EXCLUDES(mLock);
1324
1325   void* MallocLarge(size_t aSize, bool aZero) MOZ_EXCLUDES(mLock);
1326
1327   void* MallocHuge(size_t aSize, bool aZero) MOZ_EXCLUDES(mLock);
1328
1329   void* PallocLarge(size_t aAlignment, size_t aSize, size_t aAllocSize)
1330       MOZ_EXCLUDES(mLock);
1331
1332   void* PallocHuge(size_t aSize, size_t aAlignment, bool aZero)
1333       MOZ_EXCLUDES(mLock);
1334
1335   void RallocShrinkLarge(arena_chunk_t* aChunk, void* aPtr, size_t aSize,
1336                          size_t aOldSize) MOZ_EXCLUDES(mLock);
1337
1338   bool RallocGrowLarge(arena_chunk_t* aChunk, void* aPtr, size_t aSize,
1339                        size_t aOldSize) MOZ_EXCLUDES(mLock);
1340
1341   void* RallocSmallOrLarge(void* aPtr, size_t aSize, size_t aOldSize)
1342       MOZ_EXCLUDES(mLock);
1343
1344   void* RallocHuge(void* aPtr, size_t aSize, size_t aOldSize)
1345       MOZ_EXCLUDES(mLock);
1346
1347  public:
1348   inline void* Malloc(size_t aSize, bool aZero) MOZ_EXCLUDES(mLock);
1349
1350   void* Palloc(size_t aAlignment, size_t aSize) MOZ_EXCLUDES(mLock);
1351
1352   // This may return a chunk that should be destroyed with chunk_dealloc outside
1353   // of the arena lock.  It is not the same chunk as was passed in (since that
1354   // chunk now becomes mSpare).
1355   [[nodiscard]] inline arena_chunk_t* DallocSmall(arena_chunk_t* aChunk,
1356                                                   void* aPtr,
1357                                                   arena_chunk_map_t* aMapElm)
1358       MOZ_REQUIRES(mLock);
1359
1360   [[nodiscard]] arena_chunk_t* DallocLarge(arena_chunk_t* aChunk, void* aPtr)
1361       MOZ_REQUIRES(mLock);
1362
1363   void* Ralloc(void* aPtr, size_t aSize, size_t aOldSize) MOZ_EXCLUDES(mLock);
1364
1365   size_t EffectiveMaxDirty();
1366
1367 #ifdef MALLOC_DECOMMIT
1368   // During a commit operation (for aReqPages) we have the opportunity of
1369   // commiting at most aRemPages additional pages.  How many should we commit to
1370   // amortise system calls?
1371   size_t ExtraCommitPages(size_t aReqPages, size_t aRemainingPages)
1372       MOZ_REQUIRES(mLock);
1373 #endif
1374
1375   // Purge some dirty pages.
1376   //
1377   // If this arena has more than EffectiveMaxDirty() dirty pages or aForce is
1378   // true, then purge one run of dirty pages.
1379   //
1380   // This must be called without the mLock held (it'll take the lock).
1381   //
1382   // To release more than a single run of pages then it's best to call Purge
1383   // in a loop.  It returns true if mNumDirty > EffectiveMaxDirty() so that the
1384   // caller knows if the loop should continue.
1385   //
1386   bool Purge(bool aForce = false) MOZ_EXCLUDES(mLock);
1387
1388   class PurgeInfo {
1389    private:
1390     size_t mDirtyInd = 0;
1391     size_t mDirtyNPages = 0;
1392     size_t mFreeRunInd = 0;
1393     size_t mFreeRunLen = 0;
1394
1395    public:
1396     arena_t& mArena;
1397
1398     arena_chunk_t* mChunk = nullptr;
1399
1400     size_t FreeRunLenBytes() const { return mFreeRunLen << gPageSize2Pow; }
1401
1402     // The last index of the free run.
1403     size_t FreeRunLastInd() const { return mFreeRunInd + mFreeRunLen - 1; }
1404
1405     void* DirtyPtr() const {
1406       return (void*)(uintptr_t(mChunk) + (mDirtyInd << gPageSize2Pow));
1407     }
1408
1409     size_t DirtyLenBytes() const { return mDirtyNPages << gPageSize2Pow; }
1410
1411     // Purging memory is seperated into 3 phases.
1412     //  * FindDirtyPages() which find the dirty pages in a chunk and marks the
1413     //    run and chunk as busy while holding the lock.
1414     //  * Release the pages (without the lock)
1415     //  * UpdatePagesAndCounts() which marks the dirty pages as not-dirty and
1416     //    updates other counters (while holding the lock).
1417     //
1418     // FindDirtyPages() will return false purging should not continue purging in
1419     // this chunk.  Either because it has no dirty pages or is dying.
1420     bool FindDirtyPages(bool aPurgedOnce) MOZ_REQUIRES(mArena.mLock);
1421
1422     // Returns a pair, the first field indicates if there are more dirty pages
1423     // remaining in the current chunk. The second field if non-null points to a
1424     // chunk that must be released by the caller.
1425     std::pair<bool, arena_chunk_t*> UpdatePagesAndCounts()
1426         MOZ_REQUIRES(mArena.mLock);
1427
1428     // FinishPurgingInChunk() is used whenever we decide to stop purging in a
1429     // chunk, This could be because there are no more dirty pages, or the chunk
1430     // is dying, or we hit the arena-level threshold.
1431     void FinishPurgingInChunk(bool aAddToMAdvised) MOZ_REQUIRES(mArena.mLock);
1432
1433     explicit PurgeInfo(arena_t& arena, arena_chunk_t* chunk)
1434         : mArena(arena), mChunk(chunk) {}
1435   };
1436
1437   void HardPurge();
1438
1439   bool IsMainThreadOnly() const { return !mLock.LockIsEnabled(); }
1440
1441   void* operator new(size_t aCount) = delete;
1442
1443   void* operator new(size_t aCount, const fallible_t&) noexcept;
1444
1445   void operator delete(void*);
1446 };
1447
1448 struct ArenaTreeTrait {
1449   static RedBlackTreeNode<arena_t>& GetTreeNode(arena_t* aThis) {
1450     return aThis->mLink;
1451   }
1452
1453   static inline Order Compare(arena_t* aNode, arena_t* aOther) {
1454     MOZ_ASSERT(aNode);
1455     MOZ_ASSERT(aOther);
1456     return CompareInt(aNode->mId, aOther->mId);
1457   }
1458 };
1459
1460 // Bookkeeping for all the arenas used by the allocator.
1461 // Arenas are separated in two categories:
1462 // - "private" arenas, used through the moz_arena_* API
1463 // - all the other arenas: the default arena, and thread-local arenas,
1464 //   used by the standard API.
1465 class ArenaCollection {
1466  public:
1467   bool Init() MOZ_REQUIRES(gInitLock) MOZ_EXCLUDES(mLock) {
1468     MOZ_PUSH_IGNORE_THREAD_SAFETY
1469     mArenas.Init();
1470     mPrivateArenas.Init();
1471     mMainThreadArenas.Init();
1472     MOZ_POP_THREAD_SAFETY
1473     arena_params_t params;
1474     // The main arena allows more dirty pages than the default for other arenas.
1475     params.mMaxDirty = opt_dirty_max;
1476     mDefaultArena =
1477         mLock.Init() ? CreateArena(/* aIsPrivate = */ false, &params) : nullptr;
1478     return bool(mDefaultArena);
1479   }
1480
1481   inline arena_t* GetById(arena_id_t aArenaId, bool aIsPrivate)
1482       MOZ_EXCLUDES(mLock);
1483
1484   arena_t* CreateArena(bool aIsPrivate, arena_params_t* aParams)
1485       MOZ_EXCLUDES(mLock);
1486
1487   void DisposeArena(arena_t* aArena) MOZ_EXCLUDES(mLock) {
1488     MutexAutoLock lock(mLock);
1489     Tree& tree =
1490         aArena->IsMainThreadOnly() ? mMainThreadArenas : mPrivateArenas;
1491
1492     MOZ_RELEASE_ASSERT(tree.Search(aArena), "Arena not in tree");
1493     tree.Remove(aArena);
1494     delete aArena;
1495   }
1496
1497   void SetDefaultMaxDirtyPageModifier(int32_t aModifier) {
1498     mDefaultMaxDirtyPageModifier = aModifier;
1499   }
1500   int32_t DefaultMaxDirtyPageModifier() { return mDefaultMaxDirtyPageModifier; }
1501
1502   using Tree = RedBlackTree<arena_t, ArenaTreeTrait>;
1503
1504   struct Iterator : Tree::Iterator {
1505     explicit Iterator(Tree* aTree, Tree* aSecondTree,
1506                       Tree* aThirdTree = nullptr)
1507         : Tree::Iterator(aTree),
1508           mSecondTree(aSecondTree),
1509           mThirdTree(aThirdTree) {}
1510
1511     Item<Iterator> begin() {
1512       return Item<Iterator>(this, *Tree::Iterator::begin());
1513     }
1514
1515     Item<Iterator> end() { return Item<Iterator>(this, nullptr); }
1516
1517     arena_t* Next() {
1518       arena_t* result = Tree::Iterator::Next();
1519       if (!result && mSecondTree) {
1520         new (this) Iterator(mSecondTree, mThirdTree);
1521         result = *Tree::Iterator::begin();
1522       }
1523       return result;
1524     }
1525
1526    private:
1527     Tree* mSecondTree;
1528     Tree* mThirdTree;
1529   };
1530
1531   Iterator iter() MOZ_REQUIRES(mLock) {
1532     if (IsOnMainThreadWeak()) {
1533       return Iterator(&mArenas, &mPrivateArenas, &mMainThreadArenas);
1534     }
1535     return Iterator(&mArenas, &mPrivateArenas);
1536   }
1537
1538   Iterator iter_all() {
1539     return Iterator(&mArenas, &mPrivateArenas, &mMainThreadArenas);
1540   }
1541
1542   inline arena_t* GetDefault() { return mDefaultArena; }
1543
1544   Mutex mLock MOZ_UNANNOTATED;
1545
1546   // We're running on the main thread which is set by a call to SetMainThread().
1547   bool IsOnMainThread() const {
1548     return mMainThreadId.isSome() &&
1549            ThreadIdEqual(mMainThreadId.value(), GetThreadId());
1550   }
1551
1552   // We're running on the main thread or SetMainThread() has never been called.
1553   bool IsOnMainThreadWeak() const {
1554     return mMainThreadId.isNothing() || IsOnMainThread();
1555   }
1556
1557   // After a fork set the new thread ID in the child.
1558   // This is done as the first thing after a fork, before mLock even re-inits.
1559   void ResetMainThread() MOZ_EXCLUDES(mLock) {
1560     // The post fork handler in the child can run from a MacOS worker thread,
1561     // so we can't set our main thread to it here.  Instead we have to clear it.
1562     mMainThreadId = Nothing();
1563   }
1564
1565   void SetMainThread() MOZ_EXCLUDES(mLock) {
1566     MutexAutoLock lock(mLock);
1567     MOZ_ASSERT(mMainThreadId.isNothing());
1568     mMainThreadId = Some(GetThreadId());
1569   }
1570
1571  private:
1572   const static arena_id_t MAIN_THREAD_ARENA_BIT = 0x1;
1573
1574   // Can be called with or without lock, depending on aTree.
1575   inline arena_t* GetByIdInternal(Tree& aTree, arena_id_t aArenaId);
1576
1577   arena_id_t MakeRandArenaId(bool aIsMainThreadOnly) const MOZ_REQUIRES(mLock);
1578   static bool ArenaIdIsMainThreadOnly(arena_id_t aArenaId) {
1579     return aArenaId & MAIN_THREAD_ARENA_BIT;
1580   }
1581
1582   arena_t* mDefaultArena;
1583   arena_id_t mLastPublicArenaId MOZ_GUARDED_BY(mLock);
1584
1585   // Accessing mArenas and mPrivateArenas can only be done while holding mLock.
1586   Tree mArenas MOZ_GUARDED_BY(mLock);
1587   Tree mPrivateArenas MOZ_GUARDED_BY(mLock);
1588
1589   // Some mMainThreadArenas accesses to mMainThreadArenas can (and should) elude
1590   // the lock, see GetById().
1591   Tree mMainThreadArenas MOZ_GUARDED_BY(mLock);
1592
1593   Atomic<int32_t, MemoryOrdering::Relaxed> mDefaultMaxDirtyPageModifier;
1594   // This is never changed except for forking, and it does not need mLock.
1595   Maybe<ThreadId> mMainThreadId;
1596 };
1597
1598 MOZ_RUNINIT static ArenaCollection gArenas;
1599
1600 // ******
1601 // Chunks.
1602 static AddressRadixTree<(sizeof(void*) << 3) - LOG2(kChunkSize)> gChunkRTree;
1603
1604 // Protects chunk-related data structures.
1605 static Mutex chunks_mtx;
1606
1607 // Trees of chunks that were previously allocated (trees differ only in node
1608 // ordering).  These are used when allocating chunks, in an attempt to re-use
1609 // address space.  Depending on function, different tree orderings are needed,
1610 // which is why there are two trees with the same contents.
1611 static RedBlackTree<extent_node_t, ExtentTreeSzTrait> gChunksBySize
1612     MOZ_GUARDED_BY(chunks_mtx);
1613 static RedBlackTree<extent_node_t, ExtentTreeTrait> gChunksByAddress
1614     MOZ_GUARDED_BY(chunks_mtx);
1615
1616 // Protects huge allocation-related data structures.
1617 static Mutex huge_mtx;
1618
1619 // Tree of chunks that are stand-alone huge allocations.
1620 static RedBlackTree<extent_node_t, ExtentTreeTrait> huge
1621     MOZ_GUARDED_BY(huge_mtx);
1622
1623 // Huge allocation statistics.
1624 static size_t huge_allocated MOZ_GUARDED_BY(huge_mtx);
1625 static size_t huge_mapped MOZ_GUARDED_BY(huge_mtx);
1626 static size_t huge_operations MOZ_GUARDED_BY(huge_mtx);
1627
1628 // **************************
1629 // base (internal allocation).
1630
1631 static Mutex base_mtx;
1632
1633 // Current pages that are being used for internal memory allocations.  These
1634 // pages are carved up in cacheline-size quanta, so that there is no chance of
1635 // false cache line sharing.
1636 static void* base_pages MOZ_GUARDED_BY(base_mtx);
1637 static void* base_next_addr MOZ_GUARDED_BY(base_mtx);
1638 static void* base_next_decommitted MOZ_GUARDED_BY(base_mtx);
1639 // Address immediately past base_pages.
1640 static void* base_past_addr MOZ_GUARDED_BY(base_mtx);
1641 static size_t base_mapped MOZ_GUARDED_BY(base_mtx);
1642 static size_t base_committed MOZ_GUARDED_BY(base_mtx);
1643
1644 // ******
1645 // Arenas.
1646
1647 // The arena associated with the current thread (per
1648 // jemalloc_thread_local_arena) On OSX, __thread/thread_local circles back
1649 // calling malloc to allocate storage on first access on each thread, which
1650 // leads to an infinite loop, but pthread-based TLS somehow doesn't have this
1651 // problem.
1652 #if !defined(XP_DARWIN)
1653 static MOZ_THREAD_LOCAL(arena_t*) thread_arena;
1654 #else
1655 static detail::ThreadLocal<arena_t*, detail::ThreadLocalKeyStorage>
1656     thread_arena;
1657 #endif
1658
1659 // *****************************
1660 // Runtime configuration options.
1661
1662 #ifdef MALLOC_RUNTIME_CONFIG
1663 #  define MALLOC_RUNTIME_VAR static
1664 #else
1665 #  define MALLOC_RUNTIME_VAR static const
1666 #endif
1667
1668 enum PoisonType {
1669   NONE,
1670   SOME,
1671   ALL,
1672 };
1673
1674 MALLOC_RUNTIME_VAR bool opt_junk = false;
1675 MALLOC_RUNTIME_VAR bool opt_zero = false;
1676
1677 #ifdef EARLY_BETA_OR_EARLIER
1678 MALLOC_RUNTIME_VAR PoisonType opt_poison = ALL;
1679 #else
1680 MALLOC_RUNTIME_VAR PoisonType opt_poison = SOME;
1681 #endif
1682
1683 // Keep this larger than and ideally a multiple of kCacheLineSize;
1684 MALLOC_RUNTIME_VAR size_t opt_poison_size = 256;
1685 #ifndef MALLOC_RUNTIME_CONFIG
1686 static_assert(opt_poison_size >= kCacheLineSize);
1687 static_assert((opt_poison_size % kCacheLineSize) == 0);
1688 #endif
1689
1690 static bool opt_randomize_small = true;
1691
1692 // ***************************************************************************
1693 // Begin forward declarations.
1694
1695 static void* chunk_alloc(size_t aSize, size_t aAlignment, bool aBase);
1696 static void chunk_dealloc(void* aChunk, size_t aSize, ChunkType aType);
1697 #ifdef MOZ_DEBUG
1698 static void chunk_assert_zero(void* aPtr, size_t aSize);
1699 #endif
1700 static void huge_dalloc(void* aPtr, arena_t* aArena);
1701 static bool malloc_init_hard();
1702
1703 #ifndef XP_WIN
1704 #  ifdef XP_DARWIN
1705 #    define FORK_HOOK extern "C"
1706 #  else
1707 #    define FORK_HOOK static
1708 #  endif
1709 FORK_HOOK void _malloc_prefork(void);
1710 FORK_HOOK void _malloc_postfork_parent(void);
1711 FORK_HOOK void _malloc_postfork_child(void);
1712 #  ifdef XP_DARWIN
1713 FORK_HOOK void _malloc_postfork(void);
1714 #  endif
1715 #endif
1716
1717 // End forward declarations.
1718 // ***************************************************************************
1719
1720 // FreeBSD's pthreads implementation calls malloc(3), so the malloc
1721 // implementation has to take pains to avoid infinite recursion during
1722 // initialization.
1723 // Returns whether the allocator was successfully initialized.
1724 static inline bool malloc_init() {
1725   if (!malloc_initialized) {
1726     return malloc_init_hard();
1727   }
1728   return true;
1729 }
1730
1731 static void _malloc_message(const char* p) {
1732 #if !defined(XP_WIN)
1733 #  define _write write
1734 #endif
1735   // Pretend to check _write() errors to suppress gcc warnings about
1736   // warn_unused_result annotations in some versions of glibc headers.
1737   if (_write(STDERR_FILENO, p, (unsigned int)strlen(p)) < 0) {
1738     return;
1739   }
1740 }
1741
1742 template <typename... Args>
1743 static void _malloc_message(const char* p, Args... args) {
1744   _malloc_message(p);
1745   _malloc_message(args...);
1746 }
1747
1748 #ifdef ANDROID
1749 // Android's pthread.h does not declare pthread_atfork() until SDK 21.
1750 extern "C" MOZ_EXPORT int pthread_atfork(void (*)(void), void (*)(void),
1751                                          void (*)(void));
1752 #endif
1753
1754 // ***************************************************************************
1755 // Begin Utility functions/macros.
1756
1757 // Return the chunk address for allocation address a.
1758 static inline arena_chunk_t* GetChunkForPtr(const void* aPtr) {
1759   return (arena_chunk_t*)(uintptr_t(aPtr) & ~kChunkSizeMask);
1760 }
1761
1762 // Return the chunk offset of address a.
1763 static inline size_t GetChunkOffsetForPtr(const void* aPtr) {
1764   return (size_t)(uintptr_t(aPtr) & kChunkSizeMask);
1765 }
1766
1767 static inline const char* _getprogname(void) { return "<jemalloc>"; }
1768
1769 static inline void MaybePoison(void* aPtr, size_t aSize) {
1770   size_t size;
1771   switch (opt_poison) {
1772     case NONE:
1773       return;
1774     case SOME:
1775       size = std::min(aSize, opt_poison_size);
1776       break;
1777     case ALL:
1778       size = aSize;
1779       break;
1780   }
1781   MOZ_ASSERT(size != 0 && size <= aSize);
1782   memset(aPtr, kAllocPoison, size);
1783 }
1784
1785 // Fill the given range of memory with zeroes or junk depending on opt_junk and
1786 // opt_zero.
1787 static inline void ApplyZeroOrJunk(void* aPtr, size_t aSize) {
1788   if (opt_junk) {
1789     memset(aPtr, kAllocJunk, aSize);
1790   } else if (opt_zero) {
1791     memset(aPtr, 0, aSize);
1792   }
1793 }
1794
1795 // On Windows, delay crashing on OOM.
1796 #ifdef XP_WIN
1797
1798 // Implementation of VirtualAlloc wrapper (bug 1716727).
1799 namespace MozAllocRetries {
1800
1801 // Maximum retry count on OOM.
1802 constexpr size_t kMaxAttempts = 10;
1803 // Minimum delay time between retries. (The actual delay time may be larger. See
1804 // Microsoft's documentation for ::Sleep() for details.)
1805 constexpr size_t kDelayMs = 50;
1806
1807 using StallSpecs = ::mozilla::StallSpecs;
1808
1809 static constexpr StallSpecs maxStall = {.maxAttempts = kMaxAttempts,
1810                                         .delayMs = kDelayMs};
1811
1812 static inline StallSpecs GetStallSpecs() {
1813 #  if defined(JS_STANDALONE)
1814   // GetGeckoProcessType() isn't available in this configuration. (SpiderMonkey
1815   // on Windows mostly skips this in favor of directly calling ::VirtualAlloc(),
1816   // though, so it's probably not going to matter whether we stall here or not.)
1817   return maxStall;
1818 #  else
1819   switch (GetGeckoProcessType()) {
1820     // For the main process, stall for the maximum permissible time period. (The
1821     // main process is the most important one to keep alive.)
1822     case GeckoProcessType::GeckoProcessType_Default:
1823       return maxStall;
1824
1825     // For all other process types, stall for at most half as long.
1826     default:
1827       return {.maxAttempts = maxStall.maxAttempts / 2,
1828               .delayMs = maxStall.delayMs};
1829   }
1830 #  endif
1831 }
1832
1833 }  // namespace MozAllocRetries
1834
1835 namespace mozilla {
1836
1837 StallSpecs GetAllocatorStallSpecs() {
1838   return ::MozAllocRetries::GetStallSpecs();
1839 }
1840
1841 // Drop-in wrapper around VirtualAlloc. When out of memory, may attempt to stall
1842 // and retry rather than returning immediately, in hopes that the page file is
1843 // about to be expanded by Windows.
1844 //
1845 // Ref:
1846 // https://docs.microsoft.com/en-us/troubleshoot/windows-client/performance/slow-page-file-growth-memory-allocation-errors
1847 void* MozVirtualAlloc(LPVOID lpAddress, SIZE_T dwSize, DWORD flAllocationType,
1848                       DWORD flProtect) {
1849   using namespace MozAllocRetries;
1850
1851   DWORD const lastError = ::GetLastError();
1852
1853   constexpr auto IsOOMError = [] {
1854     switch (::GetLastError()) {
1855       // This is the usual error result from VirtualAlloc for OOM.
1856       case ERROR_COMMITMENT_LIMIT:
1857       // Although rare, this has also been observed in low-memory situations.
1858       // (Presumably this means Windows can't allocate enough kernel-side space
1859       // for its own internal representation of the process's virtual address
1860       // space.)
1861       case ERROR_NOT_ENOUGH_MEMORY:
1862         return true;
1863     }
1864     return false;
1865   };
1866
1867   {
1868     void* ptr = ::VirtualAlloc(lpAddress, dwSize, flAllocationType, flProtect);
1869     if (MOZ_LIKELY(ptr)) return ptr;
1870
1871     // We can't do anything for errors other than OOM...
1872     if (!IsOOMError()) return nullptr;
1873     // ... or if this wasn't a request to commit memory in the first place.
1874     // (This function has no strategy for resolving MEM_RESERVE failures.)
1875     if (!(flAllocationType & MEM_COMMIT)) return nullptr;
1876   }
1877
1878   // Retry as many times as desired (possibly zero).
1879   const StallSpecs stallSpecs = GetStallSpecs();
1880
1881   const auto ret =
1882       stallSpecs.StallAndRetry(&::Sleep, [&]() -> std::optional<void*> {
1883         void* ptr =
1884             ::VirtualAlloc(lpAddress, dwSize, flAllocationType, flProtect);
1885
1886         if (ptr) {
1887           // The OOM status has been handled, and should not be reported to
1888           // telemetry.
1889           if (IsOOMError()) {
1890             ::SetLastError(lastError);
1891           }
1892           return ptr;
1893         }
1894
1895         // Failure for some reason other than OOM.
1896         if (!IsOOMError()) {
1897           return nullptr;
1898         }
1899
1900         return std::nullopt;
1901       });
1902
1903   return ret.value_or(nullptr);
1904 }
1905
1906 }  // namespace mozilla
1907
1908 #endif  // XP_WIN
1909
1910 // ***************************************************************************
1911
1912 static inline void pages_decommit(void* aAddr, size_t aSize) {
1913 #ifdef XP_WIN
1914   // The region starting at addr may have been allocated in multiple calls
1915   // to VirtualAlloc and recycled, so decommitting the entire region in one
1916   // go may not be valid. However, since we allocate at least a chunk at a
1917   // time, we may touch any region in chunksized increments.
1918   size_t pages_size = std::min(aSize, kChunkSize - GetChunkOffsetForPtr(aAddr));
1919   while (aSize > 0) {
1920     // This will cause Access Violation on read and write and thus act as a
1921     // guard page or region as well.
1922     if (!VirtualFree(aAddr, pages_size, MEM_DECOMMIT)) {
1923       MOZ_CRASH();
1924     }
1925     aAddr = (void*)((uintptr_t)aAddr + pages_size);
1926     aSize -= pages_size;
1927     pages_size = std::min(aSize, kChunkSize);
1928   }
1929 #else
1930   if (mmap(aAddr, aSize, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1,
1931            0) == MAP_FAILED) {
1932     // We'd like to report the OOM for our tooling, but we can't allocate
1933     // memory at this point, so avoid the use of printf.
1934     const char out_of_mappings[] =
1935         "[unhandlable oom] Failed to mmap, likely no more mappings "
1936         "available " __FILE__ " : " MOZ_STRINGIFY(__LINE__);
1937     if (errno == ENOMEM) {
1938 #  ifndef ANDROID
1939       fputs(out_of_mappings, stderr);
1940       fflush(stderr);
1941 #  endif
1942       MOZ_CRASH_ANNOTATE(out_of_mappings);
1943     }
1944     MOZ_REALLY_CRASH(__LINE__);
1945   }
1946   MozTagAnonymousMemory(aAddr, aSize, "jemalloc-decommitted");
1947 #endif
1948 }
1949
1950 // Commit pages. Returns whether pages were committed.
1951 [[nodiscard]] static inline bool pages_commit(void* aAddr, size_t aSize) {
1952 #ifdef XP_WIN
1953   // The region starting at addr may have been allocated in multiple calls
1954   // to VirtualAlloc and recycled, so committing the entire region in one
1955   // go may not be valid. However, since we allocate at least a chunk at a
1956   // time, we may touch any region in chunksized increments.
1957   size_t pages_size = std::min(aSize, kChunkSize - GetChunkOffsetForPtr(aAddr));
1958   while (aSize > 0) {
1959     if (!MozVirtualAlloc(aAddr, pages_size, MEM_COMMIT, PAGE_READWRITE)) {
1960       return false;
1961     }
1962     aAddr = (void*)((uintptr_t)aAddr + pages_size);
1963     aSize -= pages_size;
1964     pages_size = std::min(aSize, kChunkSize);
1965   }
1966 #else
1967   if (mmap(aAddr, aSize, PROT_READ | PROT_WRITE,
1968            MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0) == MAP_FAILED) {
1969     return false;
1970   }
1971   MozTagAnonymousMemory(aAddr, aSize, "jemalloc");
1972 #endif
1973   return true;
1974 }
1975
1976 // Initialize base allocation data structures.
1977 static void base_init() MOZ_REQUIRES(gInitLock) {
1978   base_mtx.Init();
1979   MOZ_PUSH_IGNORE_THREAD_SAFETY
1980   base_mapped = 0;
1981   base_committed = 0;
1982   MOZ_POP_THREAD_SAFETY
1983 }
1984
1985 static bool base_pages_alloc(size_t minsize) MOZ_REQUIRES(base_mtx) {
1986   size_t csize;
1987   size_t pminsize;
1988
1989   MOZ_ASSERT(minsize != 0);
1990   csize = CHUNK_CEILING(minsize);
1991   base_pages = chunk_alloc(csize, kChunkSize, true);
1992   if (!base_pages) {
1993     return true;
1994   }
1995   base_next_addr = base_pages;
1996   base_past_addr = (void*)((uintptr_t)base_pages + csize);
1997   // Leave enough pages for minsize committed, since otherwise they would
1998   // have to be immediately recommitted.
1999   pminsize = PAGE_CEILING(minsize);
2000   base_next_decommitted = (void*)((uintptr_t)base_pages + pminsize);
2001   if (pminsize < csize) {
2002     pages_decommit(base_next_decommitted, csize - pminsize);
2003   }
2004   base_mapped += csize;
2005   base_committed += pminsize;
2006
2007   return false;
2008 }
2009
2010 static void* base_alloc(size_t aSize) {
2011   void* ret;
2012   size_t csize;
2013
2014   // Round size up to nearest multiple of the cacheline size.
2015   csize = CACHELINE_CEILING(aSize);
2016
2017   MutexAutoLock lock(base_mtx);
2018   // Make sure there's enough space for the allocation.
2019   if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
2020     if (base_pages_alloc(csize)) {
2021       return nullptr;
2022     }
2023   }
2024   // Allocate.
2025   ret = base_next_addr;
2026   base_next_addr = (void*)((uintptr_t)base_next_addr + csize);
2027   // Make sure enough pages are committed for the new allocation.
2028   if ((uintptr_t)base_next_addr > (uintptr_t)base_next_decommitted) {
2029     void* pbase_next_addr = (void*)(PAGE_CEILING((uintptr_t)base_next_addr));
2030
2031     if (!pages_commit(
2032             base_next_decommitted,
2033             (uintptr_t)pbase_next_addr - (uintptr_t)base_next_decommitted)) {
2034       return nullptr;
2035     }
2036
2037     base_committed +=
2038         (uintptr_t)pbase_next_addr - (uintptr_t)base_next_decommitted;
2039     base_next_decommitted = pbase_next_addr;
2040   }
2041
2042   return ret;
2043 }
2044
2045 static void* base_calloc(size_t aNumber, size_t aSize) {
2046   void* ret = base_alloc(aNumber * aSize);
2047   if (ret) {
2048     memset(ret, 0, aNumber * aSize);
2049   }
2050   return ret;
2051 }
2052
2053 // A specialization of the base allocator with a free list.
2054 template <typename T>
2055 struct TypedBaseAlloc {
2056   static T* sFirstFree;
2057
2058   static size_t size_of() { return sizeof(T); }
2059
2060   static T* alloc() {
2061     T* ret;
2062
2063     base_mtx.Lock();
2064     if (sFirstFree) {
2065       ret = sFirstFree;
2066       sFirstFree = *(T**)ret;
2067       base_mtx.Unlock();
2068     } else {
2069       base_mtx.Unlock();
2070       ret = (T*)base_alloc(size_of());
2071     }
2072
2073     return ret;
2074   }
2075
2076   static void dealloc(T* aNode) {
2077     MutexAutoLock lock(base_mtx);
2078     *(T**)aNode = sFirstFree;
2079     sFirstFree = aNode;
2080   }
2081 };
2082
2083 using ExtentAlloc = TypedBaseAlloc<extent_node_t>;
2084
2085 template <>
2086 extent_node_t* ExtentAlloc::sFirstFree = nullptr;
2087
2088 template <>
2089 arena_t* TypedBaseAlloc<arena_t>::sFirstFree = nullptr;
2090
2091 template <>
2092 size_t TypedBaseAlloc<arena_t>::size_of() {
2093   // Allocate enough space for trailing bins.
2094   return sizeof(arena_t) + (sizeof(arena_bin_t) * NUM_SMALL_CLASSES);
2095 }
2096
2097 template <typename T>
2098 struct BaseAllocFreePolicy {
2099   void operator()(T* aPtr) { TypedBaseAlloc<T>::dealloc(aPtr); }
2100 };
2101
2102 using UniqueBaseNode =
2103     UniquePtr<extent_node_t, BaseAllocFreePolicy<extent_node_t>>;
2104
2105 // End Utility functions/macros.
2106 // ***************************************************************************
2107 // Begin chunk management functions.
2108
2109 #ifdef XP_WIN
2110
2111 static void* pages_map(void* aAddr, size_t aSize) {
2112   void* ret = nullptr;
2113   ret = MozVirtualAlloc(aAddr, aSize, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
2114   return ret;
2115 }
2116
2117 static void pages_unmap(void* aAddr, size_t aSize) {
2118   if (VirtualFree(aAddr, 0, MEM_RELEASE) == 0) {
2119     _malloc_message(_getprogname(), ": (malloc) Error in VirtualFree()\n");
2120   }
2121 }
2122 #else
2123
2124 static void pages_unmap(void* aAddr, size_t aSize) {
2125   if (munmap(aAddr, aSize) == -1) {
2126     char buf[64];
2127
2128     if (strerror_r(errno, buf, sizeof(buf)) == 0) {
2129       _malloc_message(_getprogname(), ": (malloc) Error in munmap(): ", buf,
2130                       "\n");
2131     }
2132   }
2133 }
2134
2135 static void* pages_map(void* aAddr, size_t aSize) {
2136   void* ret;
2137 #  if defined(__ia64__) || \
2138       (defined(__sparc__) && defined(__arch64__) && defined(__linux__))
2139   // The JS engine assumes that all allocated pointers have their high 17 bits
2140   // clear, which ia64's mmap doesn't support directly. However, we can emulate
2141   // it by passing mmap an "addr" parameter with those bits clear. The mmap will
2142   // return that address, or the nearest available memory above that address,
2143   // providing a near-guarantee that those bits are clear. If they are not, we
2144   // return nullptr below to indicate out-of-memory.
2145   //
2146   // The addr is chosen as 0x0000070000000000, which still allows about 120TB of
2147   // virtual address space.
2148   //
2149   // See Bug 589735 for more information.
2150   bool check_placement = true;
2151   if (!aAddr) {
2152     aAddr = (void*)0x0000070000000000;
2153     check_placement = false;
2154   }
2155 #  endif
2156
2157 #  if defined(__sparc__) && defined(__arch64__) && defined(__linux__)
2158   const uintptr_t start = 0x0000070000000000ULL;
2159   const uintptr_t end = 0x0000800000000000ULL;
2160
2161   // Copied from js/src/gc/Memory.cpp and adapted for this source
2162   uintptr_t hint;
2163   void* region = MAP_FAILED;
2164   for (hint = start; region == MAP_FAILED && hint + aSize <= end;
2165        hint += kChunkSize) {
2166     region = mmap((void*)hint, aSize, PROT_READ | PROT_WRITE,
2167                   MAP_PRIVATE | MAP_ANON, -1, 0);
2168     if (region != MAP_FAILED) {
2169       if (((size_t)region + (aSize - 1)) & 0xffff800000000000) {
2170         if (munmap(region, aSize)) {
2171           MOZ_ASSERT(errno == ENOMEM);
2172         }
2173         region = MAP_FAILED;
2174       }
2175     }
2176   }
2177   ret = region;
2178 #  else
2179   // We don't use MAP_FIXED here, because it can cause the *replacement*
2180   // of existing mappings, and we only want to create new mappings.
2181   ret =
2182       mmap(aAddr, aSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
2183   MOZ_ASSERT(ret);
2184 #  endif
2185   if (ret == MAP_FAILED) {
2186     ret = nullptr;
2187   }
2188 #  if defined(__ia64__) || \
2189       (defined(__sparc__) && defined(__arch64__) && defined(__linux__))
2190   // If the allocated memory doesn't have its upper 17 bits clear, consider it
2191   // as out of memory.
2192   else if ((long long)ret & 0xffff800000000000) {
2193     munmap(ret, aSize);
2194     ret = nullptr;
2195   }
2196   // If the caller requested a specific memory location, verify that's what mmap
2197   // returned.
2198   else if (check_placement && ret != aAddr) {
2199 #  else
2200   else if (aAddr && ret != aAddr) {
2201 #  endif
2202     // We succeeded in mapping memory, but not in the right place.
2203     pages_unmap(ret, aSize);
2204     ret = nullptr;
2205   }
2206   if (ret) {
2207     MozTagAnonymousMemory(ret, aSize, "jemalloc");
2208   }
2209
2210 #  if defined(__ia64__) || \
2211       (defined(__sparc__) && defined(__arch64__) && defined(__linux__))
2212   MOZ_ASSERT(!ret || (!check_placement && ret) ||
2213              (check_placement && ret == aAddr));
2214 #  else
2215   MOZ_ASSERT(!ret || (!aAddr && ret != aAddr) || (aAddr && ret == aAddr));
2216 #  endif
2217   return ret;
2218 }
2219 #endif
2220
2221 #ifdef XP_DARWIN
2222 #  define VM_COPY_MIN kChunkSize
2223 static inline void pages_copy(void* dest, const void* src, size_t n) {
2224   MOZ_ASSERT((void*)((uintptr_t)dest & ~gPageSizeMask) == dest);
2225   MOZ_ASSERT(n >= VM_COPY_MIN);
2226   MOZ_ASSERT((void*)((uintptr_t)src & ~gPageSizeMask) == src);
2227
2228   kern_return_t r = vm_copy(mach_task_self(), (vm_address_t)src, (vm_size_t)n,
2229                             (vm_address_t)dest);
2230   if (r != KERN_SUCCESS) {
2231     MOZ_CRASH("vm_copy() failed");
2232   }
2233 }
2234
2235 #endif
2236
2237 template <size_t Bits>
2238 bool AddressRadixTree<Bits>::Init() {
2239   mLock.Init();
2240   mRoot = (void**)base_calloc(1 << kBitsAtLevel1, sizeof(void*));
2241   return mRoot;
2242 }
2243
2244 template <size_t Bits>
2245 void** AddressRadixTree<Bits>::GetSlotInternal(void* aAddr, bool aCreate) {
2246   uintptr_t key = reinterpret_cast<uintptr_t>(aAddr);
2247   uintptr_t subkey;
2248   unsigned i, lshift, height, bits;
2249   void** node;
2250   void** child;
2251
2252   for (i = lshift = 0, height = kHeight, node = mRoot; i < height - 1;
2253        i++, lshift += bits, node = child) {
2254     bits = i ? kBitsPerLevel : kBitsAtLevel1;
2255     subkey = (key << lshift) >> ((sizeof(void*) << 3) - bits);
2256     child = (void**)node[subkey];
2257     if (!child && aCreate) {
2258       child = (void**)base_calloc(1 << kBitsPerLevel, sizeof(void*));
2259       if (child) {
2260         node[subkey] = child;
2261       }
2262     }
2263     if (!child) {
2264       return nullptr;
2265     }
2266   }
2267
2268   // node is a leaf, so it contains values rather than node
2269   // pointers.
2270   bits = i ? kBitsPerLevel : kBitsAtLevel1;
2271   subkey = (key << lshift) >> ((sizeof(void*) << 3) - bits);
2272   return &node[subkey];
2273 }
2274
2275 template <size_t Bits>
2276 void* AddressRadixTree<Bits>::Get(void* aAddr) {
2277   void* ret = nullptr;
2278
2279   void** slot = GetSlotIfExists(aAddr);
2280
2281   if (slot) {
2282     ret = *slot;
2283   }
2284 #ifdef MOZ_DEBUG
2285   MutexAutoLock lock(mLock);
2286
2287   // Suppose that it were possible for a jemalloc-allocated chunk to be
2288   // munmap()ped, followed by a different allocator in another thread re-using
2289   // overlapping virtual memory, all without invalidating the cached rtree
2290   // value.  The result would be a false positive (the rtree would claim that
2291   // jemalloc owns memory that it had actually discarded).  I don't think this
2292   // scenario is possible, but the following assertion is a prudent sanity
2293   // check.
2294   if (!slot) {
2295     // In case a slot has been created in the meantime.
2296     slot = GetSlotInternal(aAddr, false);
2297   }
2298   if (slot) {
2299     // The MutexAutoLock above should act as a memory barrier, forcing
2300     // the compiler to emit a new read instruction for *slot.
2301     MOZ_ASSERT(ret == *slot);
2302   } else {
2303     MOZ_ASSERT(ret == nullptr);
2304   }
2305 #endif
2306   return ret;
2307 }
2308
2309 template <size_t Bits>
2310 bool AddressRadixTree<Bits>::Set(void* aAddr, void* aValue) {
2311   MutexAutoLock lock(mLock);
2312   void** slot = GetOrCreateSlot(aAddr);
2313   if (slot) {
2314     *slot = aValue;
2315   }
2316   return slot;
2317 }
2318
2319 // pages_trim, chunk_alloc_mmap_slow and chunk_alloc_mmap were cherry-picked
2320 // from upstream jemalloc 3.4.1 to fix Mozilla bug 956501.
2321
2322 // Return the offset between a and the nearest aligned address at or below a.
2323 #define ALIGNMENT_ADDR2OFFSET(a, alignment) \
2324   ((size_t)((uintptr_t)(a) & ((alignment) - 1)))
2325
2326 // Return the smallest alignment multiple that is >= s.
2327 #define ALIGNMENT_CEILING(s, alignment) \
2328   (((s) + ((alignment) - 1)) & (~((alignment) - 1)))
2329
2330 static void* pages_trim(void* addr, size_t alloc_size, size_t leadsize,
2331                         size_t size) {
2332   void* ret = (void*)((uintptr_t)addr + leadsize);
2333
2334   MOZ_ASSERT(alloc_size >= leadsize + size);
2335 #ifdef XP_WIN
2336   {
2337     void* new_addr;
2338
2339     pages_unmap(addr, alloc_size);
2340     new_addr = pages_map(ret, size);
2341     if (new_addr == ret) {
2342       return ret;
2343     }
2344     if (new_addr) {
2345       pages_unmap(new_addr, size);
2346     }
2347     return nullptr;
2348   }
2349 #else
2350   {
2351     size_t trailsize = alloc_size - leadsize - size;
2352
2353     if (leadsize != 0) {
2354       pages_unmap(addr, leadsize);
2355     }
2356     if (trailsize != 0) {
2357       pages_unmap((void*)((uintptr_t)ret + size), trailsize);
2358     }
2359     return ret;
2360   }
2361 #endif
2362 }
2363
2364 static void* chunk_alloc_mmap_slow(size_t size, size_t alignment) {
2365   void *ret, *pages;
2366   size_t alloc_size, leadsize;
2367
2368   alloc_size = size + alignment - gRealPageSize;
2369   // Beware size_t wrap-around.
2370   if (alloc_size < size) {
2371     return nullptr;
2372   }
2373   do {
2374     pages = pages_map(nullptr, alloc_size);
2375     if (!pages) {
2376       return nullptr;
2377     }
2378     leadsize =
2379         ALIGNMENT_CEILING((uintptr_t)pages, alignment) - (uintptr_t)pages;
2380     ret = pages_trim(pages, alloc_size, leadsize, size);
2381   } while (!ret);
2382
2383   MOZ_ASSERT(ret);
2384   return ret;
2385 }
2386
2387 static void* chunk_alloc_mmap(size_t size, size_t alignment) {
2388   void* ret;
2389   size_t offset;
2390
2391   // Ideally, there would be a way to specify alignment to mmap() (like
2392   // NetBSD has), but in the absence of such a feature, we have to work
2393   // hard to efficiently create aligned mappings. The reliable, but
2394   // slow method is to create a mapping that is over-sized, then trim the
2395   // excess. However, that always results in one or two calls to
2396   // pages_unmap().
2397   //
2398   // Optimistically try mapping precisely the right amount before falling
2399   // back to the slow method, with the expectation that the optimistic
2400   // approach works most of the time.
2401   ret = pages_map(nullptr, size);
2402   if (!ret) {
2403     return nullptr;
2404   }
2405   offset = ALIGNMENT_ADDR2OFFSET(ret, alignment);
2406   if (offset != 0) {
2407     pages_unmap(ret, size);
2408     return chunk_alloc_mmap_slow(size, alignment);
2409   }
2410
2411   MOZ_ASSERT(ret);
2412   return ret;
2413 }
2414
2415 // Purge and release the pages in the chunk of length `length` at `addr` to
2416 // the OS.
2417 // Returns whether the pages are guaranteed to be full of zeroes when the
2418 // function returns.
2419 // The force_zero argument explicitly requests that the memory is guaranteed
2420 // to be full of zeroes when the function returns.
2421 static bool pages_purge(void* addr, size_t length, bool force_zero) {
2422   pages_decommit(addr, length);
2423   return true;
2424 }
2425
2426 static void* chunk_recycle(size_t aSize, size_t aAlignment) {
2427   extent_node_t key;
2428
2429   size_t alloc_size = aSize + aAlignment - kChunkSize;
2430   // Beware size_t wrap-around.
2431   if (alloc_size < aSize) {
2432     return nullptr;
2433   }
2434   key.mAddr = nullptr;
2435   key.mSize = alloc_size;
2436   chunks_mtx.Lock();
2437   extent_node_t* node = gChunksBySize.SearchOrNext(&key);
2438   if (!node) {
2439     chunks_mtx.Unlock();
2440     return nullptr;
2441   }
2442   size_t leadsize = ALIGNMENT_CEILING((uintptr_t)node->mAddr, aAlignment) -
2443                     (uintptr_t)node->mAddr;
2444   MOZ_ASSERT(node->mSize >= leadsize + aSize);
2445   size_t trailsize = node->mSize - leadsize - aSize;
2446   void* ret = (void*)((uintptr_t)node->mAddr + leadsize);
2447
2448   // All recycled chunks are zeroed (because they're purged) before being
2449   // recycled.
2450   MOZ_ASSERT(node->mChunkType == ZEROED_CHUNK);
2451
2452   // Remove node from the tree.
2453   gChunksBySize.Remove(node);
2454   gChunksByAddress.Remove(node);
2455   if (leadsize != 0) {
2456     // Insert the leading space as a smaller chunk.
2457     node->mSize = leadsize;
2458     gChunksBySize.Insert(node);
2459     gChunksByAddress.Insert(node);
2460     node = nullptr;
2461   }
2462   if (trailsize != 0) {
2463     // Insert the trailing space as a smaller chunk.
2464     if (!node) {
2465       // An additional node is required, but
2466       // TypedBaseAlloc::alloc() can cause a new base chunk to be
2467       // allocated.  Drop chunks_mtx in order to avoid
2468       // deadlock, and if node allocation fails, deallocate
2469       // the result before returning an error.
2470       chunks_mtx.Unlock();
2471       node = ExtentAlloc::alloc();
2472       if (!node) {
2473         chunk_dealloc(ret, aSize, ZEROED_CHUNK);
2474         return nullptr;
2475       }
2476       chunks_mtx.Lock();
2477     }
2478     node->mAddr = (void*)((uintptr_t)(ret) + aSize);
2479     node->mSize = trailsize;
2480     node->mChunkType = ZEROED_CHUNK;
2481     gChunksBySize.Insert(node);
2482     gChunksByAddress.Insert(node);
2483     node = nullptr;
2484   }
2485
2486   gRecycledSize -= aSize;
2487
2488   chunks_mtx.Unlock();
2489
2490   if (node) {
2491     ExtentAlloc::dealloc(node);
2492   }
2493   if (!pages_commit(ret, aSize)) {
2494     return nullptr;
2495   }
2496
2497   return ret;
2498 }
2499
2500 static void chunks_init() MOZ_REQUIRES(gInitLock) {
2501   // Initialize chunks data.
2502   chunks_mtx.Init();
2503   MOZ_PUSH_IGNORE_THREAD_SAFETY
2504   gChunksBySize.Init();
2505   gChunksByAddress.Init();
2506   MOZ_POP_THREAD_SAFETY
2507 }
2508
2509 #ifdef XP_WIN
2510 // On Windows, calls to VirtualAlloc and VirtualFree must be matched, making it
2511 // awkward to recycle allocations of varying sizes. Therefore we only allow
2512 // recycling when the size equals the chunksize, unless deallocation is entirely
2513 // disabled.
2514 #  define CAN_RECYCLE(size) ((size) == kChunkSize)
2515 #else
2516 #  define CAN_RECYCLE(size) true
2517 #endif
2518
2519 // Allocates `size` bytes of system memory aligned for `alignment`.
2520 // `base` indicates whether the memory will be used for the base allocator
2521 // (e.g. base_alloc).
2522 // `zeroed` is an outvalue that returns whether the allocated memory is
2523 // guaranteed to be full of zeroes. It can be omitted when the caller doesn't
2524 // care about the result.
2525 static void* chunk_alloc(size_t aSize, size_t aAlignment, bool aBase) {
2526   void* ret = nullptr;
2527
2528   MOZ_ASSERT(aSize != 0);
2529   MOZ_ASSERT((aSize & kChunkSizeMask) == 0);
2530   MOZ_ASSERT(aAlignment != 0);
2531   MOZ_ASSERT((aAlignment & kChunkSizeMask) == 0);
2532
2533   // Base allocations can't be fulfilled by recycling because of
2534   // possible deadlock or infinite recursion.
2535   if (CAN_RECYCLE(aSize) && !aBase) {
2536     ret = chunk_recycle(aSize, aAlignment);
2537   }
2538   if (!ret) {
2539     ret = chunk_alloc_mmap(aSize, aAlignment);
2540   }
2541   if (ret && !aBase) {
2542     if (!gChunkRTree.Set(ret, ret)) {
2543       chunk_dealloc(ret, aSize, UNKNOWN_CHUNK);
2544       return nullptr;
2545     }
2546   }
2547
2548   MOZ_ASSERT(GetChunkOffsetForPtr(ret) == 0);
2549   return ret;
2550 }
2551
2552 #ifdef MOZ_DEBUG
2553 static void chunk_assert_zero(void* aPtr, size_t aSize) {
2554   size_t i;
2555   size_t* p = (size_t*)(uintptr_t)aPtr;
2556
2557   for (i = 0; i < aSize / sizeof(size_t); i++) {
2558     MOZ_ASSERT(p[i] == 0);
2559   }
2560 }
2561 #endif
2562
2563 static void chunk_record(void* aChunk, size_t aSize, ChunkType aType) {
2564   extent_node_t key;
2565
2566   if (aType != ZEROED_CHUNK) {
2567     if (pages_purge(aChunk, aSize, aType == HUGE_CHUNK)) {
2568       aType = ZEROED_CHUNK;
2569     }
2570   }
2571
2572   // Allocate a node before acquiring chunks_mtx even though it might not
2573   // be needed, because TypedBaseAlloc::alloc() may cause a new base chunk to
2574   // be allocated, which could cause deadlock if chunks_mtx were already
2575   // held.
2576   UniqueBaseNode xnode(ExtentAlloc::alloc());
2577   // Use xprev to implement conditional deferred deallocation of prev.
2578   UniqueBaseNode xprev;
2579
2580   // RAII deallocates xnode and xprev defined above after unlocking
2581   // in order to avoid potential dead-locks
2582   MutexAutoLock lock(chunks_mtx);
2583   key.mAddr = (void*)((uintptr_t)aChunk + aSize);
2584   extent_node_t* node = gChunksByAddress.SearchOrNext(&key);
2585   // Try to coalesce forward.
2586   if (node && node->mAddr == key.mAddr) {
2587     // Coalesce chunk with the following address range.  This does
2588     // not change the position within gChunksByAddress, so only
2589     // remove/insert from/into gChunksBySize.
2590     gChunksBySize.Remove(node);
2591     node->mAddr = aChunk;
2592     node->mSize += aSize;
2593     if (node->mChunkType != aType) {
2594       node->mChunkType = RECYCLED_CHUNK;
2595     }
2596     gChunksBySize.Insert(node);
2597   } else {
2598     // Coalescing forward failed, so insert a new node.
2599     if (!xnode) {
2600       // TypedBaseAlloc::alloc() failed, which is an exceedingly
2601       // unlikely failure.  Leak chunk; its pages have
2602       // already been purged, so this is only a virtual
2603       // memory leak.
2604       return;
2605     }
2606     node = xnode.release();
2607     node->mAddr = aChunk;
2608     node->mSize = aSize;
2609     node->mChunkType = aType;
2610     gChunksByAddress.Insert(node);
2611     gChunksBySize.Insert(node);
2612   }
2613
2614   // Try to coalesce backward.
2615   extent_node_t* prev = gChunksByAddress.Prev(node);
2616   if (prev && (void*)((uintptr_t)prev->mAddr + prev->mSize) == aChunk) {
2617     // Coalesce chunk with the previous address range.  This does
2618     // not change the position within gChunksByAddress, so only
2619     // remove/insert node from/into gChunksBySize.
2620     gChunksBySize.Remove(prev);
2621     gChunksByAddress.Remove(prev);
2622
2623     gChunksBySize.Remove(node);
2624     node->mAddr = prev->mAddr;
2625     node->mSize += prev->mSize;
2626     if (node->mChunkType != prev->mChunkType) {
2627       node->mChunkType = RECYCLED_CHUNK;
2628     }
2629     gChunksBySize.Insert(node);
2630
2631     xprev.reset(prev);
2632   }
2633
2634   gRecycledSize += aSize;
2635 }
2636
2637 static void chunk_dealloc(void* aChunk, size_t aSize, ChunkType aType) {
2638   MOZ_ASSERT(aChunk);
2639   MOZ_ASSERT(GetChunkOffsetForPtr(aChunk) == 0);
2640   MOZ_ASSERT(aSize != 0);
2641   MOZ_ASSERT((aSize & kChunkSizeMask) == 0);
2642
2643   gChunkRTree.Unset(aChunk);
2644
2645   if (CAN_RECYCLE(aSize)) {
2646     size_t recycled_so_far = gRecycledSize;
2647     // In case some race condition put us above the limit.
2648     if (recycled_so_far < gRecycleLimit) {
2649       size_t recycle_remaining = gRecycleLimit - recycled_so_far;
2650       size_t to_recycle;
2651       if (aSize > recycle_remaining) {
2652         to_recycle = recycle_remaining;
2653         // Drop pages that would overflow the recycle limit
2654         pages_trim(aChunk, aSize, 0, to_recycle);
2655       } else {
2656         to_recycle = aSize;
2657       }
2658       chunk_record(aChunk, to_recycle, aType);
2659       return;
2660     }
2661   }
2662
2663   pages_unmap(aChunk, aSize);
2664 }
2665
2666 #undef CAN_RECYCLE
2667
2668 // End chunk management functions.
2669 // ***************************************************************************
2670 // Begin arena.
2671
2672 static inline arena_t* thread_local_arena(bool enabled) {
2673   arena_t* arena;
2674
2675   if (enabled) {
2676     // The arena will essentially be leaked if this function is
2677     // called with `false`, but it doesn't matter at the moment.
2678     // because in practice nothing actually calls this function
2679     // with `false`, except maybe at shutdown.
2680     arena =
2681         gArenas.CreateArena(/* aIsPrivate = */ false, /* aParams = */ nullptr);
2682   } else {
2683     arena = gArenas.GetDefault();
2684   }
2685   thread_arena.set(arena);
2686   return arena;
2687 }
2688
2689 inline void MozJemalloc::jemalloc_thread_local_arena(bool aEnabled) {
2690   if (malloc_init()) {
2691     thread_local_arena(aEnabled);
2692   }
2693 }
2694
2695 // Choose an arena based on a per-thread value.
2696 static inline arena_t* choose_arena(size_t size) {
2697   arena_t* ret = nullptr;
2698
2699   // We can only use TLS if this is a PIC library, since for the static
2700   // library version, libc's malloc is used by TLS allocation, which
2701   // introduces a bootstrapping issue.
2702
2703   if (size > kMaxQuantumClass) {
2704     // Force the default arena for larger allocations.
2705     ret = gArenas.GetDefault();
2706   } else {
2707     // Check TLS to see if our thread has requested a pinned arena.
2708     ret = thread_arena.get();
2709     // If ret is non-null, it must not be in the first page.
2710     MOZ_DIAGNOSTIC_ASSERT_IF(ret, (size_t)ret >= gPageSize);
2711     if (!ret) {
2712       // Nothing in TLS. Pin this thread to the default arena.
2713       ret = thread_local_arena(false);
2714     }
2715   }
2716
2717   MOZ_DIAGNOSTIC_ASSERT(ret);
2718   return ret;
2719 }
2720
2721 inline uint8_t arena_t::FindFreeBitInMask(uint32_t aMask, uint32_t& aRng) {
2722   if (mPRNG != nullptr) {
2723     if (aRng == UINT_MAX) {
2724       aRng = mPRNG->next() % 32;
2725     }
2726     uint8_t bitIndex;
2727     // RotateRight asserts when provided bad input.
2728     aMask = aRng ? RotateRight(aMask, aRng)
2729                  : aMask;  // Rotate the mask a random number of slots
2730     bitIndex = CountTrailingZeroes32(aMask);
2731     return (bitIndex + aRng) % 32;
2732   }
2733   return CountTrailingZeroes32(aMask);
2734 }
2735
2736 inline void* arena_t::ArenaRunRegAlloc(arena_run_t* aRun, arena_bin_t* aBin) {
2737   void* ret;
2738   unsigned i, mask, bit, regind;
2739   uint32_t rndPos = UINT_MAX;
2740
2741   MOZ_DIAGNOSTIC_ASSERT(aRun->mMagic == ARENA_RUN_MAGIC);
2742   MOZ_ASSERT(aRun->mRegionsMinElement < aBin->mRunNumRegionsMask);
2743
2744   // Move the first check outside the loop, so that aRun->mRegionsMinElement can
2745   // be updated unconditionally, without the possibility of updating it
2746   // multiple times.
2747   i = aRun->mRegionsMinElement;
2748   mask = aRun->mRegionsMask[i];
2749   if (mask != 0) {
2750     bit = FindFreeBitInMask(mask, rndPos);
2751
2752     regind = ((i << (LOG2(sizeof(int)) + 3)) + bit);
2753     MOZ_ASSERT(regind < aBin->mRunNumRegions);
2754     ret = (void*)(((uintptr_t)aRun) + aBin->mRunFirstRegionOffset +
2755                   (aBin->mSizeClass * regind));
2756
2757     // Clear bit.
2758     mask ^= (1U << bit);
2759     aRun->mRegionsMask[i] = mask;
2760
2761     return ret;
2762   }
2763
2764   for (i++; i < aBin->mRunNumRegionsMask; i++) {
2765     mask = aRun->mRegionsMask[i];
2766     if (mask != 0) {
2767       bit = FindFreeBitInMask(mask, rndPos);
2768
2769       regind = ((i << (LOG2(sizeof(int)) + 3)) + bit);
2770       MOZ_ASSERT(regind < aBin->mRunNumRegions);
2771       ret = (void*)(((uintptr_t)aRun) + aBin->mRunFirstRegionOffset +
2772                     (aBin->mSizeClass * regind));
2773
2774       // Clear bit.
2775       mask ^= (1U << bit);
2776       aRun->mRegionsMask[i] = mask;
2777
2778       // Make a note that nothing before this element
2779       // contains a free region.
2780       aRun->mRegionsMinElement = i;  // Low payoff: + (mask == 0);
2781
2782       return ret;
2783     }
2784   }
2785   // Not reached.
2786   MOZ_DIAGNOSTIC_ASSERT(0);
2787   return nullptr;
2788 }
2789
2790 static inline void arena_run_reg_dalloc(arena_run_t* run, arena_bin_t* bin,
2791                                         void* ptr, size_t size) {
2792   uint32_t diff, regind;
2793   unsigned elm, bit;
2794
2795   MOZ_DIAGNOSTIC_ASSERT(run->mMagic == ARENA_RUN_MAGIC);
2796
2797   // Avoid doing division with a variable divisor if possible.  Using
2798   // actual division here can reduce allocator throughput by over 20%!
2799   diff =
2800       (uint32_t)((uintptr_t)ptr - (uintptr_t)run - bin->mRunFirstRegionOffset);
2801
2802   MOZ_ASSERT(diff <=
2803              (static_cast<unsigned>(bin->mRunSizePages) << gPageSize2Pow));
2804   regind = diff / bin->mSizeDivisor;
2805
2806   MOZ_DIAGNOSTIC_ASSERT(diff == regind * size);
2807   MOZ_DIAGNOSTIC_ASSERT(regind < bin->mRunNumRegions);
2808
2809   elm = regind >> (LOG2(sizeof(int)) + 3);
2810   if (elm < run->mRegionsMinElement) {
2811     run->mRegionsMinElement = elm;
2812   }
2813   bit = regind - (elm << (LOG2(sizeof(int)) + 3));
2814   MOZ_RELEASE_ASSERT((run->mRegionsMask[elm] & (1U << bit)) == 0,
2815                      "Double-free?");
2816   run->mRegionsMask[elm] |= (1U << bit);
2817 }
2818
2819 bool arena_t::SplitRun(arena_run_t* aRun, size_t aSize, bool aLarge,
2820                        bool aZero) {
2821   arena_chunk_t* chunk = GetChunkForPtr(aRun);
2822   size_t old_ndirty = chunk->ndirty;
2823   size_t run_ind =
2824       (unsigned)((uintptr_t(aRun) - uintptr_t(chunk)) >> gPageSize2Pow);
2825   size_t total_pages =
2826       (chunk->map[run_ind].bits & ~gPageSizeMask) >> gPageSize2Pow;
2827   size_t need_pages = (aSize >> gPageSize2Pow);
2828   MOZ_ASSERT(need_pages > 0);
2829   MOZ_ASSERT(need_pages <= total_pages);
2830   size_t rem_pages = total_pages - need_pages;
2831
2832   MOZ_ASSERT((chunk->map[run_ind].bits & CHUNK_MAP_BUSY) == 0);
2833
2834 #ifdef MALLOC_DECOMMIT
2835   size_t i = 0;
2836   while (i < need_pages) {
2837     MOZ_ASSERT((chunk->map[run_ind + i].bits & CHUNK_MAP_BUSY) == 0);
2838
2839     // Commit decommitted pages if necessary.  If a decommitted
2840     // page is encountered, commit all needed adjacent decommitted
2841     // pages in one operation, in order to reduce system call
2842     // overhead.
2843     if (chunk->map[run_ind + i].bits & CHUNK_MAP_DECOMMITTED) {
2844       // Advance i+j to just past the index of the last page
2845       // to commit.  Clear CHUNK_MAP_DECOMMITTED along the way.
2846       size_t j;
2847       for (j = 0; i + j < need_pages &&
2848                   (chunk->map[run_ind + i + j].bits & CHUNK_MAP_DECOMMITTED);
2849            j++) {
2850         // DECOMMITTED, MADVISED and FRESH are mutually exclusive.
2851         MOZ_ASSERT((chunk->map[run_ind + i + j].bits &
2852                     (CHUNK_MAP_FRESH | CHUNK_MAP_MADVISED)) == 0);
2853       }
2854
2855       // Consider committing more pages to amortise calls to VirtualAlloc.
2856       // This only makes sense at the edge of our run hence the if condition
2857       // here.
2858       if (i + j == need_pages) {
2859         size_t extra_commit = ExtraCommitPages(j, rem_pages);
2860         for (; i + j < need_pages + extra_commit &&
2861                (chunk->map[run_ind + i + j].bits &
2862                 CHUNK_MAP_MADVISED_OR_DECOMMITTED);
2863              j++) {
2864           MOZ_ASSERT((chunk->map[run_ind + i + j].bits &
2865                       (CHUNK_MAP_FRESH | CHUNK_MAP_MADVISED)) == 0);
2866         }
2867       }
2868
2869       if (!pages_commit(
2870               (void*)(uintptr_t(chunk) + ((run_ind + i) << gPageSize2Pow)),
2871               j << gPageSize2Pow)) {
2872         return false;
2873       }
2874
2875       // pages_commit zeroes pages, so mark them as such if it succeeded.
2876       // That's checked further below to avoid manually zeroing the pages.
2877       for (size_t k = 0; k < j; k++) {
2878         chunk->map[run_ind + i + k].bits =
2879             (chunk->map[run_ind + i + k].bits & ~CHUNK_MAP_DECOMMITTED) |
2880             CHUNK_MAP_ZEROED | CHUNK_MAP_FRESH;
2881       }
2882
2883       mNumFresh += j;
2884       i += j;
2885     } else {
2886       i++;
2887     }
2888   }
2889 #endif
2890
2891   mRunsAvail.Remove(&chunk->map[run_ind]);
2892
2893   // Keep track of trailing unused pages for later use.
2894   if (rem_pages > 0) {
2895     chunk->map[run_ind + need_pages].bits =
2896         (rem_pages << gPageSize2Pow) |
2897         (chunk->map[run_ind + need_pages].bits & gPageSizeMask);
2898     chunk->map[run_ind + total_pages - 1].bits =
2899         (rem_pages << gPageSize2Pow) |
2900         (chunk->map[run_ind + total_pages - 1].bits & gPageSizeMask);
2901     mRunsAvail.Insert(&chunk->map[run_ind + need_pages]);
2902   }
2903
2904   for (size_t i = 0; i < need_pages; i++) {
2905     // Zero if necessary.
2906     if (aZero) {
2907       if ((chunk->map[run_ind + i].bits & CHUNK_MAP_ZEROED) == 0) {
2908         memset((void*)(uintptr_t(chunk) + ((run_ind + i) << gPageSize2Pow)), 0,
2909                gPageSize);
2910         // CHUNK_MAP_ZEROED is cleared below.
2911       }
2912     }
2913
2914     // Update dirty page accounting.
2915     if (chunk->map[run_ind + i].bits & CHUNK_MAP_DIRTY) {
2916       chunk->ndirty--;
2917       mNumDirty--;
2918       // CHUNK_MAP_DIRTY is cleared below.
2919     } else if (chunk->map[run_ind + i].bits & CHUNK_MAP_MADVISED) {
2920       mStats.committed++;
2921       mNumMAdvised--;
2922     }
2923
2924     if (chunk->map[run_ind + i].bits & CHUNK_MAP_FRESH) {
2925       mStats.committed++;
2926       mNumFresh--;
2927     }
2928
2929     // This bit has already been cleared
2930     MOZ_ASSERT(!(chunk->map[run_ind + i].bits & CHUNK_MAP_DECOMMITTED));
2931
2932     // Initialize the chunk map.  This clears the dirty, zeroed and madvised
2933     // bits, decommitted is cleared above.
2934     if (aLarge) {
2935       chunk->map[run_ind + i].bits = CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
2936     } else {
2937       chunk->map[run_ind + i].bits = size_t(aRun) | CHUNK_MAP_ALLOCATED;
2938     }
2939   }
2940
2941   // Set the run size only in the first element for large runs.  This is
2942   // primarily a debugging aid, since the lack of size info for trailing
2943   // pages only matters if the application tries to operate on an
2944   // interior pointer.
2945   if (aLarge) {
2946     chunk->map[run_ind].bits |= aSize;
2947   }
2948
2949   if (chunk->ndirty == 0 && old_ndirty > 0 && !chunk->mIsPurging) {
2950     mChunksDirty.Remove(chunk);
2951   }
2952   return true;
2953 }
2954
2955 void arena_t::InitChunk(arena_chunk_t* aChunk, size_t aMinCommittedPages) {
2956   mStats.mapped += kChunkSize;
2957
2958   aChunk->arena = this;
2959
2960   // Claim that no pages are in use, since the header is merely overhead.
2961   aChunk->ndirty = 0;
2962
2963   aChunk->mIsPurging = false;
2964   aChunk->mDying = false;
2965
2966   // Setup the chunk's pages in two phases.  First we mark which pages are
2967   // committed & decommitted and perform the decommit.  Then we update the map
2968   // to create the runs.
2969
2970   // Clear the bits for the real header pages.
2971   size_t i;
2972   for (i = 0; i < gChunkHeaderNumPages - 1; i++) {
2973     aChunk->map[i].bits = 0;
2974   }
2975   mStats.committed += gChunkHeaderNumPages - 1;
2976
2977   // Decommit the last header page (=leading page) as a guard.
2978   pages_decommit((void*)(uintptr_t(aChunk) + (i << gPageSize2Pow)), gPageSize);
2979   aChunk->map[i++].bits = CHUNK_MAP_DECOMMITTED;
2980
2981   // If MALLOC_DECOMMIT is enabled then commit only the pages we're about to
2982   // use.  Otherwise commit all of them.
2983 #ifdef MALLOC_DECOMMIT
2984   size_t n_fresh_pages =
2985       aMinCommittedPages +
2986       ExtraCommitPages(
2987           aMinCommittedPages,
2988           gChunkNumPages - gChunkHeaderNumPages - aMinCommittedPages - 1);
2989 #else
2990   size_t n_fresh_pages = gChunkNumPages - 1 - gChunkHeaderNumPages;
2991 #endif
2992
2993   // The committed pages are marked as Fresh.  Our caller, SplitRun will update
2994   // this when it uses them.
2995   for (size_t j = 0; j < n_fresh_pages; j++) {
2996     aChunk->map[i + j].bits = CHUNK_MAP_ZEROED | CHUNK_MAP_FRESH;
2997   }
2998   i += n_fresh_pages;
2999   mNumFresh += n_fresh_pages;
3000
3001 #ifndef MALLOC_DECOMMIT
3002   // If MALLOC_DECOMMIT isn't defined then all the pages are fresh and setup in
3003   // the loop above.
3004   MOZ_ASSERT(i == gChunkNumPages - 1);
3005 #endif
3006
3007   // If MALLOC_DECOMMIT is defined, then this will decommit the remainder of the
3008   // chunk plus the last page which is a guard page, if it is not defined it
3009   // will only decommit the guard page.
3010   pages_decommit((void*)(uintptr_t(aChunk) + (i << gPageSize2Pow)),
3011                  (gChunkNumPages - i) << gPageSize2Pow);
3012   for (; i < gChunkNumPages; i++) {
3013     aChunk->map[i].bits = CHUNK_MAP_DECOMMITTED;
3014   }
3015
3016   // aMinCommittedPages will create a valid run.
3017   MOZ_ASSERT(aMinCommittedPages > 0);
3018   MOZ_ASSERT(aMinCommittedPages <= gChunkNumPages - gChunkHeaderNumPages - 1);
3019
3020   // Create the run.
3021   aChunk->map[gChunkHeaderNumPages].bits |= gMaxLargeClass;
3022   aChunk->map[gChunkNumPages - 2].bits |= gMaxLargeClass;
3023   mRunsAvail.Insert(&aChunk->map[gChunkHeaderNumPages]);
3024
3025 #ifdef MALLOC_DOUBLE_PURGE
3026   new (&aChunk->chunks_madvised_elem) DoublyLinkedListElement<arena_chunk_t>();
3027 #endif
3028 }
3029
3030 bool arena_t::RemoveChunk(arena_chunk_t* aChunk) {
3031   aChunk->mDying = true;
3032
3033   // If the chunk has busy pages that means that a Purge() is in progress.
3034   // We can't remove the chunk now, instead Purge() will do it.
3035   if (aChunk->mIsPurging) {
3036     return false;
3037   }
3038
3039   if (aChunk->ndirty > 0) {
3040     MOZ_ASSERT(aChunk->arena == this);
3041     mChunksDirty.Remove(aChunk);
3042     mNumDirty -= aChunk->ndirty;
3043     mStats.committed -= aChunk->ndirty;
3044   }
3045
3046   // Count the number of madvised/fresh pages and update the stats.
3047   size_t madvised = 0;
3048   size_t fresh = 0;
3049   for (size_t i = gChunkHeaderNumPages; i < gChunkNumPages - 1; i++) {
3050     // There must not be any pages that are not fresh, madvised, decommitted
3051     // or dirty.
3052     MOZ_ASSERT(aChunk->map[i].bits &
3053                (CHUNK_MAP_FRESH_MADVISED_OR_DECOMMITTED | CHUNK_MAP_DIRTY));
3054     MOZ_ASSERT((aChunk->map[i].bits & CHUNK_MAP_BUSY) == 0);
3055
3056     if (aChunk->map[i].bits & CHUNK_MAP_MADVISED) {
3057       madvised++;
3058     } else if (aChunk->map[i].bits & CHUNK_MAP_FRESH) {
3059       fresh++;
3060     }
3061   }
3062
3063   mNumMAdvised -= madvised;
3064   mNumFresh -= fresh;
3065
3066 #ifdef MALLOC_DOUBLE_PURGE
3067   if (mChunksMAdvised.ElementProbablyInList(aChunk)) {
3068     mChunksMAdvised.remove(aChunk);
3069   }
3070 #endif
3071
3072   mStats.mapped -= kChunkSize;
3073   mStats.committed -= gChunkHeaderNumPages - 1;
3074
3075   return true;
3076 }
3077
3078 bool arena_chunk_t::IsEmpty() {
3079   return (map[gChunkHeaderNumPages].bits &
3080           (~gPageSizeMask | CHUNK_MAP_ALLOCATED)) == gMaxLargeClass;
3081 }
3082
3083 arena_chunk_t* arena_t::DemoteChunkToSpare(arena_chunk_t* aChunk) {
3084   if (mSpare) {
3085     if (!RemoveChunk(mSpare)) {
3086       // If we can't remove the spare chunk now purge will finish removing it
3087       // later.  Set it to null so that the return below will return null and
3088       // our caller won't delete the chunk before Purge() is finished.
3089       mSpare = nullptr;
3090     }
3091   }
3092
3093   arena_chunk_t* chunk_dealloc = mSpare;
3094   mSpare = aChunk;
3095   return chunk_dealloc;
3096 }
3097
3098 arena_run_t* arena_t::AllocRun(size_t aSize, bool aLarge, bool aZero) {
3099   arena_run_t* run;
3100   arena_chunk_map_t* mapelm;
3101   arena_chunk_map_t key;
3102
3103   MOZ_ASSERT(aSize <= gMaxLargeClass);
3104   MOZ_ASSERT((aSize & gPageSizeMask) == 0);
3105
3106   // Search the arena's chunks for the lowest best fit.
3107   key.bits = aSize | CHUNK_MAP_KEY;
3108   mapelm = mRunsAvail.SearchOrNext(&key);
3109   if (mapelm) {
3110     arena_chunk_t* chunk = GetChunkForPtr(mapelm);
3111     size_t pageind =
3112         (uintptr_t(mapelm) - uintptr_t(chunk->map)) / sizeof(arena_chunk_map_t);
3113
3114     MOZ_ASSERT((chunk->map[pageind].bits & CHUNK_MAP_BUSY) == 0);
3115     run = (arena_run_t*)(uintptr_t(chunk) + (pageind << gPageSize2Pow));
3116   } else if (mSpare && !mSpare->mIsPurging) {
3117     // Use the spare.
3118     arena_chunk_t* chunk = mSpare;
3119     mSpare = nullptr;
3120     run = (arena_run_t*)(uintptr_t(chunk) +
3121                          (gChunkHeaderNumPages << gPageSize2Pow));
3122     // Insert the run into the tree of available runs.
3123     MOZ_ASSERT((chunk->map[gChunkHeaderNumPages].bits & CHUNK_MAP_BUSY) == 0);
3124     mRunsAvail.Insert(&chunk->map[gChunkHeaderNumPages]);
3125   } else {
3126     // No usable runs.  Create a new chunk from which to allocate
3127     // the run.
3128     arena_chunk_t* chunk =
3129         (arena_chunk_t*)chunk_alloc(kChunkSize, kChunkSize, false);
3130     if (!chunk) {
3131       return nullptr;
3132     }
3133
3134     InitChunk(chunk, aSize >> gPageSize2Pow);
3135     run = (arena_run_t*)(uintptr_t(chunk) +
3136                          (gChunkHeaderNumPages << gPageSize2Pow));
3137   }
3138   // Update page map.
3139   return SplitRun(run, aSize, aLarge, aZero) ? run : nullptr;
3140 }
3141
3142 size_t arena_t::EffectiveMaxDirty() {
3143   int32_t modifier = gArenas.DefaultMaxDirtyPageModifier();
3144   if (modifier) {
3145     int32_t arenaOverride =
3146         modifier > 0 ? mMaxDirtyIncreaseOverride : mMaxDirtyDecreaseOverride;
3147     if (arenaOverride) {
3148       modifier = arenaOverride;
3149     }
3150   }
3151
3152   return modifier >= 0 ? mMaxDirty << modifier : mMaxDirty >> -modifier;
3153 }
3154
3155 #ifdef MALLOC_DECOMMIT
3156
3157 size_t arena_t::ExtraCommitPages(size_t aReqPages, size_t aRemainingPages) {
3158   const int32_t modifier = gArenas.DefaultMaxDirtyPageModifier();
3159   if (modifier < 0) {
3160     return 0;
3161   }
3162
3163   // The maximum size of the page cache
3164   const size_t max_page_cache = EffectiveMaxDirty();
3165
3166   // The current size of the page cache, note that we use mNumFresh +
3167   // mNumMAdvised here but Purge() does not.
3168   const size_t page_cache = mNumDirty + mNumFresh + mNumMAdvised;
3169
3170   if (page_cache > max_page_cache) {
3171     // We're already exceeding our dirty page count even though we're trying
3172     // to allocate.  This can happen due to fragmentation.  Don't commit
3173     // excess memory since we're probably here due to a larger allocation and
3174     // small amounts of memory are certainly available in the page cache.
3175     return 0;
3176   }
3177   if (modifier > 0) {
3178     // If modifier is > 0 then we want to keep all the pages we can, but don't
3179     // exceed the size of the page cache.  The subtraction cannot underflow
3180     // because of the condition above.
3181     return std::min(aRemainingPages, max_page_cache - page_cache);
3182   }
3183
3184   // The rest is arbitrary and involves a some assumptions.  I've broken it down
3185   // into simple expressions to document them more clearly.
3186
3187   // Assumption 1: a quarter of EffectiveMaxDirty() is a sensible "minimum
3188   // target" for the dirty page cache.  Likewise 3 quarters is a sensible
3189   // "maximum target".  Note that for the maximum we avoid using the whole page
3190   // cache now so that a free that follows this allocation doesn't immeidatly
3191   // call Purge (churning memory).
3192   const size_t min = max_page_cache / 4;
3193   const size_t max = 3 * max_page_cache / 4;
3194
3195   // Assumption 2: Committing 32 pages at a time is sufficient to amortise
3196   // VirtualAlloc costs.
3197   size_t amortisation_threshold = 32;
3198
3199   // extra_pages is the number of additional pages needed to meet
3200   // amortisation_threshold.
3201   size_t extra_pages = aReqPages < amortisation_threshold
3202                            ? amortisation_threshold - aReqPages
3203                            : 0;
3204
3205   // If committing extra_pages isn't enough to hit the minimum target then
3206   // increase it.
3207   if (page_cache + extra_pages < min) {
3208     extra_pages = min - page_cache;
3209   } else if (page_cache + extra_pages > max) {
3210     // If committing extra_pages would exceed our maximum target then it may
3211     // still be useful to allocate extra pages.  One of the reasons this can
3212     // happen could be fragmentation of the cache,
3213
3214     // Therefore reduce the amortisation threshold so that we might allocate
3215     // some extra pages but avoid exceeding the dirty page cache.
3216     amortisation_threshold /= 2;
3217     extra_pages = std::min(aReqPages < amortisation_threshold
3218                                ? amortisation_threshold - aReqPages
3219                                : 0,
3220                            max_page_cache - page_cache);
3221   }
3222
3223   // Cap extra_pages to aRemainingPages and adjust aRemainingPages.  We will
3224   // commit at least this many extra pages.
3225   extra_pages = std::min(extra_pages, aRemainingPages);
3226
3227   // Finally if commiting a small number of additional pages now can prevent
3228   // a small commit later then try to commit a little more now, provided we
3229   // don't exceed max_page_cache.
3230   if ((aRemainingPages - extra_pages) < amortisation_threshold / 2 &&
3231       (page_cache + aRemainingPages) < max_page_cache) {
3232     return aRemainingPages;
3233   }
3234
3235   return extra_pages;
3236 }
3237 #endif
3238
3239 bool arena_t::Purge(bool aForce) {
3240   arena_chunk_t* chunk;
3241
3242   // The first critical section will find a chunk and mark dirty pages in it as
3243   // busy.
3244   {
3245     MaybeMutexAutoLock lock(mLock);
3246
3247 #ifdef MOZ_DEBUG
3248     size_t ndirty = 0;
3249     for (auto* chunk : mChunksDirty.iter()) {
3250       ndirty += chunk->ndirty;
3251     }
3252     // Not all dirty chunks are in mChunksDirty as others might be being Purged.
3253     MOZ_ASSERT(ndirty <= mNumDirty);
3254 #endif
3255
3256     if (mNumDirty <= (aForce ? 0 : EffectiveMaxDirty() >> 1)) {
3257       return false;
3258     }
3259
3260     // Take a single chunk and attempt to purge some of its dirty pages.  The
3261     // loop below will purge memory from the chunk until either:
3262     //  * The dirty page count for the arena hits its target,
3263     //  * Another thread attempts to delete this chunk, or
3264     //  * The chunk has no more dirty pages.
3265     // In any of these cases the loop will break and Purge() will return, which
3266     // means it may return before the arena meets its dirty page count target,
3267     // the return value is used by the caller to call Purge() again where it
3268     // will take the next chunk with dirty pages.
3269     chunk = mChunksDirty.Last();
3270     if (!chunk) {
3271       // There are chunks with dirty pages (because mNumDirty > 0 above) but
3272       // they're not in mChunksDirty.  That can happen if they're busy being
3273       // purged by other threads.
3274       return false;
3275     }
3276     MOZ_ASSERT(chunk->ndirty > 0);
3277
3278     // Mark the chunk as busy so it won't be deleted and remove it from
3279     // mChunksDirty so we're the only thread purging it.
3280     MOZ_ASSERT(!chunk->mIsPurging);
3281     mChunksDirty.Remove(chunk);
3282     chunk->mIsPurging = true;
3283   }  // MaybeMutexAutoLock
3284
3285   // True if we should continue purging memory from this arena.
3286   bool continue_purge_arena = true;
3287
3288   // True if we should continue purging memory in this chunk.
3289   bool continue_purge_chunk = true;
3290
3291   // True if at least one Purge operation has occured and therefore we need to
3292   // call FinishPurgingInChunk() before returning.
3293   bool purged_once = false;
3294
3295   while (continue_purge_chunk && continue_purge_arena) {
3296     // This structure is used to communicate between the two PurgePhase
3297     // functions.
3298     PurgeInfo purge_info(*this, chunk);
3299
3300     {
3301       // Phase 1: Find pages that need purging.
3302       MaybeMutexAutoLock lock(purge_info.mArena.mLock);
3303       MOZ_ASSERT(chunk->mIsPurging);
3304
3305       continue_purge_chunk = purge_info.FindDirtyPages(purged_once);
3306       continue_purge_arena =
3307           purge_info.mArena.mNumDirty > (aForce ? 0 : EffectiveMaxDirty() >> 1);
3308     }
3309     if (!continue_purge_chunk) {
3310       if (chunk->mDying) {
3311         // Phase one already unlinked the chunk from structures, we just need to
3312         // release the memory.
3313         chunk_dealloc((void*)chunk, kChunkSize, ARENA_CHUNK);
3314       }
3315       // There's nothing else to do here, our caller may execute Purge() again
3316       // if continue_purge_arena is true.
3317       return continue_purge_arena;
3318     }
3319
3320 #ifdef MALLOC_DECOMMIT
3321     pages_decommit(purge_info.DirtyPtr(), purge_info.DirtyLenBytes());
3322 #else
3323 #  ifdef XP_SOLARIS
3324     posix_madvise(purge_info.DirtyPtr(), purge_info.DirtyLenBytes(), MADV_FREE);
3325 #  else
3326     madvise(purge_info.DirtyPtr(), purge_info.DirtyLenBytes(), MADV_FREE);
3327 #  endif
3328 #endif
3329
3330     arena_chunk_t* chunk_to_release = nullptr;
3331
3332     {
3333       // Phase 2: Mark the pages with their final state (madvised or
3334       // decommitted) and fix up any other bookkeeping.
3335       MaybeMutexAutoLock lock(purge_info.mArena.mLock);
3336       MOZ_ASSERT(chunk->mIsPurging);
3337
3338       auto [cpc, ctr] = purge_info.UpdatePagesAndCounts();
3339       continue_purge_chunk = cpc;
3340       chunk_to_release = ctr;
3341       continue_purge_arena =
3342           purge_info.mArena.mNumDirty > (aForce ? 0 : EffectiveMaxDirty() >> 1);
3343
3344       if (!continue_purge_chunk || !continue_purge_arena) {
3345         // We're going to stop purging here so update the chunk's bookkeeping.
3346         purge_info.FinishPurgingInChunk(true);
3347       }
3348     }  // MaybeMutexAutoLock
3349
3350     // Phase 2 can release the spare chunk (not always == chunk) so an extra
3351     // parameter is used to return that chunk.
3352     if (chunk_to_release) {
3353       chunk_dealloc((void*)chunk_to_release, kChunkSize, ARENA_CHUNK);
3354     }
3355     purged_once = true;
3356   }
3357
3358   return continue_purge_arena;
3359 }
3360
3361 bool arena_t::PurgeInfo::FindDirtyPages(bool aPurgedOnce) {
3362   // It's possible that the previously dirty pages have now been
3363   // allocated or the chunk is dying.
3364   if (mChunk->ndirty == 0 || mChunk->mDying) {
3365     // Add the chunk to the mChunksMAdvised list if it's had at least one
3366     // madvise.
3367     FinishPurgingInChunk(aPurgedOnce);
3368     return false;
3369   }
3370
3371   // Look for the first dirty page, as we do record the beginning of the run
3372   // that contains the dirty page.
3373   bool previous_page_is_allocated = true;
3374   for (size_t i = gChunkHeaderNumPages; i < gChunkNumPages - 1; i++) {
3375     size_t bits = mChunk->map[i].bits;
3376
3377     // We must not find any busy pages because this chunk shouldn't be in
3378     // the dirty list.
3379     MOZ_ASSERT((bits & CHUNK_MAP_BUSY) == 0);
3380
3381     // The first page belonging to a free run has the allocated bit clear
3382     // and a non-zero size. To distinguish it from the last page of a free
3383     // run we track the allocated bit of the previous page, if it's set
3384     // then this is the first.
3385     if ((bits & CHUNK_MAP_ALLOCATED) == 0 && (bits & ~gPageSizeMask) != 0 &&
3386         previous_page_is_allocated) {
3387       mFreeRunInd = i;
3388       mFreeRunLen = bits >> gPageSize2Pow;
3389     }
3390
3391     if (bits & CHUNK_MAP_DIRTY) {
3392       MOZ_ASSERT(
3393           (mChunk->map[i].bits & CHUNK_MAP_FRESH_MADVISED_OR_DECOMMITTED) == 0);
3394       mDirtyInd = i;
3395       break;
3396     }
3397
3398     previous_page_is_allocated = bits & CHUNK_MAP_ALLOCATED;
3399   }
3400   MOZ_ASSERT(mDirtyInd != 0);
3401   MOZ_ASSERT(mFreeRunInd >= gChunkHeaderNumPages);
3402   MOZ_ASSERT(mFreeRunInd <= mDirtyInd);
3403   MOZ_ASSERT(mFreeRunLen > 0);
3404
3405   // Look for the next not-dirty page, it could be the guard page at the end
3406   // of the chunk.
3407   for (size_t i = 0; mDirtyInd + i < gChunkNumPages; i++) {
3408     size_t& bits = mChunk->map[mDirtyInd + i].bits;
3409
3410     // We must not find any busy pages because this chunk shouldn't be in the
3411     // dirty list.
3412     MOZ_ASSERT(!(bits & CHUNK_MAP_BUSY));
3413
3414     if (!(bits & CHUNK_MAP_DIRTY)) {
3415       mDirtyNPages = i;
3416       break;
3417     }
3418     MOZ_ASSERT((bits & CHUNK_MAP_FRESH_MADVISED_OR_DECOMMITTED) == 0);
3419     bits ^= CHUNK_MAP_DIRTY;
3420   }
3421   MOZ_ASSERT(mDirtyNPages > 0);
3422   MOZ_ASSERT(mDirtyNPages <= mChunk->ndirty);
3423   MOZ_ASSERT(mFreeRunInd + mFreeRunLen >= mDirtyInd + mDirtyNPages);
3424
3425   // Mark the run as busy so that another thread freeing memory won't try to
3426   // coalesce it.
3427   mChunk->map[mFreeRunInd].bits |= CHUNK_MAP_BUSY;
3428   mChunk->map[FreeRunLastInd()].bits |= CHUNK_MAP_BUSY;
3429
3430   mChunk->ndirty -= mDirtyNPages;
3431   mArena.mNumDirty -= mDirtyNPages;
3432
3433   // Before we unlock ensure that no other thread can allocate from these
3434   // pages.
3435   if (mArena.mSpare != mChunk) {
3436     mArena.mRunsAvail.Remove(&mChunk->map[mFreeRunInd]);
3437   }
3438   return true;
3439 }
3440
3441 std::pair<bool, arena_chunk_t*> arena_t::PurgeInfo::UpdatePagesAndCounts() {
3442   for (size_t i = 0; i < mDirtyNPages; i++) {
3443     // The page must not have any of the madvised, decommited or dirty bits
3444     // set.
3445     MOZ_ASSERT((mChunk->map[mDirtyInd + i].bits &
3446                 (CHUNK_MAP_FRESH_MADVISED_OR_DECOMMITTED | CHUNK_MAP_DIRTY)) ==
3447                0);
3448 #ifdef MALLOC_DECOMMIT
3449     const size_t free_operation = CHUNK_MAP_DECOMMITTED;
3450 #else
3451     const size_t free_operation = CHUNK_MAP_MADVISED;
3452 #endif
3453     mChunk->map[mDirtyInd + i].bits ^= free_operation;
3454   }
3455
3456   // Remove the CHUNK_MAP_BUSY marks from the run.
3457 #ifdef MOZ_DEBUG
3458   MOZ_ASSERT(mChunk->map[mFreeRunInd].bits & CHUNK_MAP_BUSY);
3459   MOZ_ASSERT(mChunk->map[FreeRunLastInd()].bits & CHUNK_MAP_BUSY);
3460 #endif
3461   mChunk->map[mFreeRunInd].bits &= ~CHUNK_MAP_BUSY;
3462   mChunk->map[FreeRunLastInd()].bits &= ~CHUNK_MAP_BUSY;
3463
3464 #ifndef MALLOC_DECOMMIT
3465   mArena.mNumMAdvised += mDirtyNPages;
3466 #endif
3467
3468   mArena.mStats.committed -= mDirtyNPages;
3469
3470   if (mChunk->mDying) {
3471     // A dying chunk doesn't need to be coaleased, it will already have one
3472     // large run.
3473     MOZ_ASSERT(mFreeRunInd == gChunkHeaderNumPages &&
3474                mFreeRunLen == gChunkNumPages - gChunkHeaderNumPages - 1);
3475
3476     return std::make_pair(false, mChunk);
3477   }
3478
3479   bool was_empty = mChunk->IsEmpty();
3480   mFreeRunInd =
3481       mArena.TryCoalesce(mChunk, mFreeRunInd, mFreeRunLen, FreeRunLenBytes());
3482
3483   arena_chunk_t* chunk_to_release = nullptr;
3484   if (!was_empty && mChunk->IsEmpty()) {
3485     // This now-empty chunk will become the spare chunk and the spare
3486     // chunk will be returned for deletion.
3487     chunk_to_release = mArena.DemoteChunkToSpare(mChunk);
3488   }
3489
3490   if (mChunk != mArena.mSpare) {
3491     mArena.mRunsAvail.Insert(&mChunk->map[mFreeRunInd]);
3492   }
3493
3494   return std::make_pair(mChunk->ndirty != 0, chunk_to_release);
3495 }
3496
3497 void arena_t::PurgeInfo::FinishPurgingInChunk(bool aAddToMAdvised) {
3498   // If there's no more purge activity for this chunk then finish up while
3499   // we still have the lock.
3500   MOZ_ASSERT(mChunk->mIsPurging);
3501   mChunk->mIsPurging = false;
3502
3503   if (mChunk->mDying) {
3504     // Another thread tried to delete this chunk while we weren't holding
3505     // the lock.  Now it's our responsibility to finish deleting it.  First
3506     // clear its dirty pages so that RemoveChunk() doesn't try to remove it
3507     // from mChunksDirty because it won't be there.
3508     mArena.mNumDirty -= mChunk->ndirty;
3509     mArena.mStats.committed -= mChunk->ndirty;
3510     mChunk->ndirty = 0;
3511
3512     DebugOnly<bool> release_chunk = mArena.RemoveChunk(mChunk);
3513     // RemoveChunk() can't return false because mIsPurging was false
3514     // during the call.
3515     MOZ_ASSERT(release_chunk);
3516     return;
3517   }
3518
3519   if (mChunk->ndirty != 0) {
3520     mArena.mChunksDirty.Insert(mChunk);
3521   }
3522
3523 #ifdef MALLOC_DOUBLE_PURGE
3524   if (aAddToMAdvised) {
3525     // The chunk might already be in the list, but this
3526     // makes sure it's at the front.
3527     if (mArena.mChunksMAdvised.ElementProbablyInList(mChunk)) {
3528       mArena.mChunksMAdvised.remove(mChunk);
3529     }
3530     mArena.mChunksMAdvised.pushFront(mChunk);
3531   }
3532 #endif
3533 }
3534
3535 // run_pages and size make each-other redundant. But we use them both and the
3536 // caller computes both so this function requires both and will assert if they
3537 // are inconsistent.
3538 size_t arena_t::TryCoalesce(arena_chunk_t* aChunk, size_t run_ind,
3539                             size_t run_pages, size_t size) {
3540   // Copy in/out parameters to local variables so that we don't need '*'
3541   // operators throughout this code but also so that type checking is stricter
3542   // (references are too easily coerced).
3543   MOZ_ASSERT(size == run_pages << gPageSize2Pow);
3544
3545   // Try to coalesce forward.
3546   if (run_ind + run_pages < gChunkNumPages - 1 &&
3547       (aChunk->map[run_ind + run_pages].bits &
3548        (CHUNK_MAP_ALLOCATED | CHUNK_MAP_BUSY)) == 0) {
3549     size_t nrun_size = aChunk->map[run_ind + run_pages].bits & ~gPageSizeMask;
3550
3551     // Remove successor from tree of available runs; the coalesced run is
3552     // inserted later.
3553     mRunsAvail.Remove(&aChunk->map[run_ind + run_pages]);
3554
3555     size += nrun_size;
3556     run_pages = size >> gPageSize2Pow;
3557
3558     MOZ_DIAGNOSTIC_ASSERT((aChunk->map[run_ind + run_pages - 1].bits &
3559                            ~gPageSizeMask) == nrun_size);
3560     aChunk->map[run_ind].bits =
3561         size | (aChunk->map[run_ind].bits & gPageSizeMask);
3562     aChunk->map[run_ind + run_pages - 1].bits =
3563         size | (aChunk->map[run_ind + run_pages - 1].bits & gPageSizeMask);
3564   }
3565
3566   // Try to coalesce backward.
3567   if (run_ind > gChunkHeaderNumPages &&
3568       (aChunk->map[run_ind - 1].bits &
3569        (CHUNK_MAP_ALLOCATED | CHUNK_MAP_BUSY)) == 0) {
3570     size_t prun_size = aChunk->map[run_ind - 1].bits & ~gPageSizeMask;
3571
3572     run_ind -= prun_size >> gPageSize2Pow;
3573
3574     // Remove predecessor from tree of available runs; the coalesced run is
3575     // inserted later.
3576     mRunsAvail.Remove(&aChunk->map[run_ind]);
3577
3578     size += prun_size;
3579     run_pages = size >> gPageSize2Pow;
3580
3581     MOZ_DIAGNOSTIC_ASSERT((aChunk->map[run_ind].bits & ~gPageSizeMask) ==
3582                           prun_size);
3583     aChunk->map[run_ind].bits =
3584         size | (aChunk->map[run_ind].bits & gPageSizeMask);
3585     aChunk->map[run_ind + run_pages - 1].bits =
3586         size | (aChunk->map[run_ind + run_pages - 1].bits & gPageSizeMask);
3587   }
3588
3589   return run_ind;
3590 }
3591
3592 arena_chunk_t* arena_t::DallocRun(arena_run_t* aRun, bool aDirty) {
3593   arena_chunk_t* chunk;
3594   size_t size, run_ind, run_pages;
3595
3596   chunk = GetChunkForPtr(aRun);
3597   run_ind = (size_t)((uintptr_t(aRun) - uintptr_t(chunk)) >> gPageSize2Pow);
3598   MOZ_DIAGNOSTIC_ASSERT(run_ind >= gChunkHeaderNumPages);
3599   MOZ_RELEASE_ASSERT(run_ind < gChunkNumPages - 1);
3600   if ((chunk->map[run_ind].bits & CHUNK_MAP_LARGE) != 0) {
3601     size = chunk->map[run_ind].bits & ~gPageSizeMask;
3602     run_pages = (size >> gPageSize2Pow);
3603   } else {
3604     run_pages = aRun->mBin->mRunSizePages;
3605     size = run_pages << gPageSize2Pow;
3606   }
3607
3608   // Mark pages as unallocated in the chunk map.
3609   if (aDirty) {
3610     size_t i;
3611
3612     for (i = 0; i < run_pages; i++) {
3613       MOZ_DIAGNOSTIC_ASSERT((chunk->map[run_ind + i].bits & CHUNK_MAP_DIRTY) ==
3614                             0);
3615       chunk->map[run_ind + i].bits = CHUNK_MAP_DIRTY;
3616     }
3617
3618     if (chunk->ndirty == 0 && !chunk->mIsPurging) {
3619       mChunksDirty.Insert(chunk);
3620     }
3621     chunk->ndirty += run_pages;
3622     mNumDirty += run_pages;
3623   } else {
3624     size_t i;
3625
3626     for (i = 0; i < run_pages; i++) {
3627       chunk->map[run_ind + i].bits &= ~(CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED);
3628     }
3629   }
3630   chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits & gPageSizeMask);
3631   chunk->map[run_ind + run_pages - 1].bits =
3632       size | (chunk->map[run_ind + run_pages - 1].bits & gPageSizeMask);
3633
3634   run_ind = TryCoalesce(chunk, run_ind, run_pages, size);
3635
3636   // Deallocate chunk if it is now completely unused.
3637   arena_chunk_t* chunk_dealloc = nullptr;
3638   if (chunk->IsEmpty()) {
3639     chunk_dealloc = DemoteChunkToSpare(chunk);
3640   } else {
3641     // Insert into tree of available runs, now that coalescing is complete.
3642     mRunsAvail.Insert(&chunk->map[run_ind]);
3643   }
3644
3645   return chunk_dealloc;
3646 }
3647
3648 void arena_t::TrimRunHead(arena_chunk_t* aChunk, arena_run_t* aRun,
3649                           size_t aOldSize, size_t aNewSize) {
3650   size_t pageind = (uintptr_t(aRun) - uintptr_t(aChunk)) >> gPageSize2Pow;
3651   size_t head_npages = (aOldSize - aNewSize) >> gPageSize2Pow;
3652
3653   MOZ_ASSERT(aOldSize > aNewSize);
3654
3655   // Update the chunk map so that arena_t::RunDalloc() can treat the
3656   // leading run as separately allocated.
3657   aChunk->map[pageind].bits =
3658       (aOldSize - aNewSize) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
3659   aChunk->map[pageind + head_npages].bits =
3660       aNewSize | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
3661
3662   DebugOnly<arena_chunk_t*> no_chunk = DallocRun(aRun, false);
3663   // This will never release a chunk as there's still at least one allocated
3664   // run.
3665   MOZ_ASSERT(!no_chunk);
3666 }
3667
3668 void arena_t::TrimRunTail(arena_chunk_t* aChunk, arena_run_t* aRun,
3669                           size_t aOldSize, size_t aNewSize, bool aDirty) {
3670   size_t pageind = (uintptr_t(aRun) - uintptr_t(aChunk)) >> gPageSize2Pow;
3671   size_t npages = aNewSize >> gPageSize2Pow;
3672
3673   MOZ_ASSERT(aOldSize > aNewSize);
3674
3675   // Update the chunk map so that arena_t::RunDalloc() can treat the
3676   // trailing run as separately allocated.
3677   aChunk->map[pageind].bits = aNewSize | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
3678   aChunk->map[pageind + npages].bits =
3679       (aOldSize - aNewSize) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
3680
3681   DebugOnly<arena_chunk_t*> no_chunk =
3682       DallocRun((arena_run_t*)(uintptr_t(aRun) + aNewSize), aDirty);
3683
3684   // This will never release a chunk as there's still at least one allocated
3685   // run.
3686   MOZ_ASSERT(!no_chunk);
3687 }
3688
3689 arena_run_t* arena_t::GetNewEmptyBinRun(arena_bin_t* aBin) {
3690   arena_run_t* run;
3691   unsigned i, remainder;
3692
3693   // Allocate a new run.
3694   run = AllocRun(static_cast<size_t>(aBin->mRunSizePages) << gPageSize2Pow,
3695                  false, false);
3696   if (!run) {
3697     return nullptr;
3698   }
3699
3700   // Initialize run internals.
3701   run->mBin = aBin;
3702
3703   for (i = 0; i < aBin->mRunNumRegionsMask - 1; i++) {
3704     run->mRegionsMask[i] = UINT_MAX;
3705   }
3706   remainder = aBin->mRunNumRegions & ((1U << (LOG2(sizeof(int)) + 3)) - 1);
3707   if (remainder == 0) {
3708     run->mRegionsMask[i] = UINT_MAX;
3709   } else {
3710     // The last element has spare bits that need to be unset.
3711     run->mRegionsMask[i] =
3712         (UINT_MAX >> ((1U << (LOG2(sizeof(int)) + 3)) - remainder));
3713   }
3714
3715   run->mRegionsMinElement = 0;
3716
3717   run->mNumFree = aBin->mRunNumRegions;
3718 #if defined(MOZ_DIAGNOSTIC_ASSERT_ENABLED)
3719   run->mMagic = ARENA_RUN_MAGIC;
3720 #endif
3721
3722   // Make sure we continue to use this run for subsequent allocations.
3723   new (&run->mRunListElem) DoublyLinkedListElement<arena_run_t>();
3724   aBin->mNonFullRuns.pushFront(run);
3725
3726   aBin->mNumRuns++;
3727   return run;
3728 }
3729
3730 arena_run_t* arena_t::GetNonFullBinRun(arena_bin_t* aBin) {
3731   auto mrf_head = aBin->mNonFullRuns.begin();
3732   if (mrf_head) {
3733     // Take the head and if we are going to fill it, remove it from our list.
3734     arena_run_t* run = &(*mrf_head);
3735     if (run->mNumFree == 1) {
3736       aBin->mNonFullRuns.remove(run);
3737     }
3738     return run;
3739   }
3740   return GetNewEmptyBinRun(aBin);
3741 }
3742
3743 void arena_bin_t::Init(SizeClass aSizeClass) {
3744   size_t try_run_size;
3745   unsigned try_nregs, try_mask_nelms, try_reg0_offset;
3746   // Size of the run header, excluding mRegionsMask.
3747   static const size_t kFixedHeaderSize = offsetof(arena_run_t, mRegionsMask);
3748
3749   MOZ_ASSERT(aSizeClass.Size() <= gMaxBinClass);
3750
3751   try_run_size = gPageSize;
3752
3753   mSizeClass = aSizeClass.Size();
3754   mNumRuns = 0;
3755
3756   // Run size expansion loop.
3757   while (true) {
3758     try_nregs = ((try_run_size - kFixedHeaderSize) / mSizeClass) +
3759                 1;  // Counter-act try_nregs-- in loop.
3760
3761     // The do..while loop iteratively reduces the number of regions until
3762     // the run header and the regions no longer overlap.  A closed formula
3763     // would be quite messy, since there is an interdependency between the
3764     // header's mask length and the number of regions.
3765     do {
3766       try_nregs--;
3767       try_mask_nelms =
3768           (try_nregs >> (LOG2(sizeof(int)) + 3)) +
3769           ((try_nregs & ((1U << (LOG2(sizeof(int)) + 3)) - 1)) ? 1 : 0);
3770       try_reg0_offset = try_run_size - (try_nregs * mSizeClass);
3771     } while (kFixedHeaderSize + (sizeof(unsigned) * try_mask_nelms) >
3772              try_reg0_offset);
3773
3774     // Try to keep the run overhead below kRunOverhead.
3775     if (Fraction(try_reg0_offset, try_run_size) <= kRunOverhead) {
3776       break;
3777     }
3778
3779     // If the overhead is larger than the size class, it means the size class
3780     // is small and doesn't align very well with the header. It's desirable to
3781     // have smaller run sizes for them, so relax the overhead requirement.
3782     if (try_reg0_offset > mSizeClass) {
3783       if (Fraction(try_reg0_offset, try_run_size) <= kRunRelaxedOverhead) {
3784         break;
3785       }
3786     }
3787
3788     // The run header includes one bit per region of the given size. For sizes
3789     // small enough, the number of regions is large enough that growing the run
3790     // size barely moves the needle for the overhead because of all those bits.
3791     // For example, for a size of 8 bytes, adding 4KiB to the run size adds
3792     // close to 512 bits to the header, which is 64 bytes.
3793     // With such overhead, there is no way to get to the wanted overhead above,
3794     // so we give up if the required size for mRegionsMask more than doubles the
3795     // size of the run header.
3796     if (try_mask_nelms * sizeof(unsigned) >= kFixedHeaderSize) {
3797       break;
3798     }
3799
3800     // If next iteration is going to be larger than the largest possible large
3801     // size class, then we didn't find a setup where the overhead is small
3802     // enough, and we can't do better than the current settings, so just use
3803     // that.
3804     if (try_run_size + gPageSize > gMaxLargeClass) {
3805       break;
3806     }
3807
3808     // Try more aggressive settings.
3809     try_run_size += gPageSize;
3810   }
3811
3812   MOZ_ASSERT(kFixedHeaderSize + (sizeof(unsigned) * try_mask_nelms) <=
3813              try_reg0_offset);
3814   MOZ_ASSERT((try_mask_nelms << (LOG2(sizeof(int)) + 3)) >= try_nregs);
3815
3816   // Our list management would break if mRunNumRegions == 1 and we should use
3817   // a large size class instead, anyways.
3818   MOZ_ASSERT(try_nregs > 1);
3819
3820   // Copy final settings.
3821   MOZ_ASSERT((try_run_size >> gPageSize2Pow) <= UINT8_MAX);
3822   mRunSizePages = static_cast<uint8_t>(try_run_size >> gPageSize2Pow);
3823   mRunNumRegions = try_nregs;
3824   mRunNumRegionsMask = try_mask_nelms;
3825   mRunFirstRegionOffset = try_reg0_offset;
3826   mSizeDivisor = FastDivisor<uint16_t>(aSizeClass.Size(), try_run_size);
3827 }
3828
3829 void arena_t::ResetSmallAllocRandomization() {
3830   if (MOZ_UNLIKELY(opt_randomize_small)) {
3831     MaybeMutexAutoLock lock(mLock);
3832     InitPRNG();
3833   }
3834   mRandomizeSmallAllocations = opt_randomize_small;
3835 }
3836
3837 void arena_t::InitPRNG() {
3838   // Both another thread could race and the code backing RandomUint64
3839   // (arc4random for example) may allocate memory while here, so we must
3840   // ensure to start the mPRNG initialization only once and to not hold
3841   // the lock while initializing.
3842   mIsPRNGInitializing = true;
3843   {
3844     mLock.Unlock();
3845     mozilla::Maybe<uint64_t> prngState1 = mozilla::RandomUint64();
3846     mozilla::Maybe<uint64_t> prngState2 = mozilla::RandomUint64();
3847     mLock.Lock();
3848
3849     mozilla::non_crypto::XorShift128PlusRNG prng(prngState1.valueOr(0),
3850                                                  prngState2.valueOr(0));
3851     if (mPRNG) {
3852       *mPRNG = prng;
3853     } else {
3854       void* backing =
3855           base_alloc(sizeof(mozilla::non_crypto::XorShift128PlusRNG));
3856       mPRNG = new (backing)
3857           mozilla::non_crypto::XorShift128PlusRNG(std::move(prng));
3858     }
3859   }
3860   mIsPRNGInitializing = false;
3861 }
3862
3863 void* arena_t::MallocSmall(size_t aSize, bool aZero) {
3864   void* ret;
3865   arena_bin_t* bin;
3866   arena_run_t* run;
3867   SizeClass sizeClass(aSize);
3868   aSize = sizeClass.Size();
3869
3870   switch (sizeClass.Type()) {
3871     case SizeClass::Tiny:
3872       bin = &mBins[FloorLog2(aSize / kMinTinyClass)];
3873       break;
3874     case SizeClass::Quantum:
3875       // Although we divide 2 things by kQuantum, the compiler will
3876       // reduce `kMinQuantumClass / kQuantum` and `kNumTinyClasses` to a
3877       // single constant.
3878       bin = &mBins[kNumTinyClasses + (aSize / kQuantum) -
3879                    (kMinQuantumClass / kQuantum)];
3880       break;
3881     case SizeClass::QuantumWide:
3882       bin =
3883           &mBins[kNumTinyClasses + kNumQuantumClasses + (aSize / kQuantumWide) -
3884                  (kMinQuantumWideClass / kQuantumWide)];
3885       break;
3886     case SizeClass::SubPage:
3887       bin =
3888           &mBins[kNumTinyClasses + kNumQuantumClasses + kNumQuantumWideClasses +
3889                  (FloorLog2(aSize) - LOG2(kMinSubPageClass))];
3890       break;
3891     default:
3892       MOZ_MAKE_COMPILER_ASSUME_IS_UNREACHABLE("Unexpected size class type");
3893   }
3894   MOZ_DIAGNOSTIC_ASSERT(aSize == bin->mSizeClass);
3895
3896   {
3897     MaybeMutexAutoLock lock(mLock);
3898
3899 #ifdef MOZ_DEBUG
3900     bool isInitializingThread(false);
3901 #endif
3902
3903     if (MOZ_UNLIKELY(mRandomizeSmallAllocations && mPRNG == nullptr &&
3904                      !mIsPRNGInitializing)) {
3905 #ifdef MOZ_DEBUG
3906       isInitializingThread = true;
3907 #endif
3908       InitPRNG();
3909     }
3910
3911     MOZ_ASSERT(!mRandomizeSmallAllocations || mPRNG ||
3912                (mIsPRNGInitializing && !isInitializingThread));
3913
3914     run = GetNonFullBinRun(bin);
3915     if (MOZ_UNLIKELY(!run)) {
3916       return nullptr;
3917     }
3918     MOZ_DIAGNOSTIC_ASSERT(run->mMagic == ARENA_RUN_MAGIC);
3919     MOZ_DIAGNOSTIC_ASSERT(run->mNumFree > 0);
3920     ret = ArenaRunRegAlloc(run, bin);
3921     MOZ_DIAGNOSTIC_ASSERT(ret);
3922     run->mNumFree--;
3923     if (!ret) {
3924       return nullptr;
3925     }
3926
3927     mStats.allocated_small += aSize;
3928     mStats.operations++;
3929   }
3930
3931   if (!aZero) {
3932     ApplyZeroOrJunk(ret, aSize);
3933   } else {
3934     memset(ret, 0, aSize);
3935   }
3936
3937   return ret;
3938 }
3939
3940 void* arena_t::MallocLarge(size_t aSize, bool aZero) {
3941   void* ret;
3942
3943   // Large allocation.
3944   aSize = PAGE_CEILING(aSize);
3945
3946   {
3947     MaybeMutexAutoLock lock(mLock);
3948     ret = AllocRun(aSize, true, aZero);
3949     if (!ret) {
3950       return nullptr;
3951     }
3952     mStats.allocated_large += aSize;
3953     mStats.operations++;
3954   }
3955
3956   if (!aZero) {
3957     ApplyZeroOrJunk(ret, aSize);
3958   }
3959
3960   return ret;
3961 }
3962
3963 void* arena_t::Malloc(size_t aSize, bool aZero) {
3964   MOZ_DIAGNOSTIC_ASSERT(mMagic == ARENA_MAGIC);
3965   MOZ_ASSERT(aSize != 0);
3966
3967   if (aSize <= gMaxBinClass) {
3968     return MallocSmall(aSize, aZero);
3969   }
3970   if (aSize <= gMaxLargeClass) {
3971     return MallocLarge(aSize, aZero);
3972   }
3973   return MallocHuge(aSize, aZero);
3974 }
3975
3976 // Only handles large allocations that require more than page alignment.
3977 void* arena_t::PallocLarge(size_t aAlignment, size_t aSize, size_t aAllocSize) {
3978   void* ret;
3979   size_t offset;
3980   arena_chunk_t* chunk;
3981
3982   MOZ_ASSERT((aSize & gPageSizeMask) == 0);
3983   MOZ_ASSERT((aAlignment & gPageSizeMask) == 0);
3984
3985   {
3986     MaybeMutexAutoLock lock(mLock);
3987     ret = AllocRun(aAllocSize, true, false);
3988     if (!ret) {
3989       return nullptr;
3990     }
3991
3992     chunk = GetChunkForPtr(ret);
3993
3994     offset = uintptr_t(ret) & (aAlignment - 1);
3995     MOZ_ASSERT((offset & gPageSizeMask) == 0);
3996     MOZ_ASSERT(offset < aAllocSize);
3997     if (offset == 0) {
3998       TrimRunTail(chunk, (arena_run_t*)ret, aAllocSize, aSize, false);
3999     } else {
4000       size_t leadsize, trailsize;
4001
4002       leadsize = aAlignment - offset;
4003       if (leadsize > 0) {
4004         TrimRunHead(chunk, (arena_run_t*)ret, aAllocSize,
4005                     aAllocSize - leadsize);
4006         ret = (void*)(uintptr_t(ret) + leadsize);
4007       }
4008
4009       trailsize = aAllocSize - leadsize - aSize;
4010       if (trailsize != 0) {
4011         // Trim trailing space.
4012         MOZ_ASSERT(trailsize < aAllocSize);
4013         TrimRunTail(chunk, (arena_run_t*)ret, aSize + trailsize, aSize, false);
4014       }
4015     }
4016
4017     mStats.allocated_large += aSize;
4018     mStats.operations++;
4019   }
4020
4021   // Note that since Bug 1488780we don't attempt purge dirty memory on this code
4022   // path. In general there won't be dirty memory above the threshold after an
4023   // allocation, only after free.  The exception is if the dirty page threshold
4024   // has changed.
4025
4026   ApplyZeroOrJunk(ret, aSize);
4027   return ret;
4028 }
4029
4030 void* arena_t::Palloc(size_t aAlignment, size_t aSize) {
4031   void* ret;
4032   size_t ceil_size;
4033
4034   // Round size up to the nearest multiple of alignment.
4035   //
4036   // This done, we can take advantage of the fact that for each small
4037   // size class, every object is aligned at the smallest power of two
4038   // that is non-zero in the base two representation of the size.  For
4039   // example:
4040   //
4041   //   Size |   Base 2 | Minimum alignment
4042   //   -----+----------+------------------
4043   //     96 |  1100000 |  32
4044   //    144 | 10100000 |  32
4045   //    192 | 11000000 |  64
4046   //
4047   // Depending on runtime settings, it is possible that arena_malloc()
4048   // will further round up to a power of two, but that never causes
4049   // correctness issues.
4050   ceil_size = ALIGNMENT_CEILING(aSize, aAlignment);
4051
4052   // (ceil_size < aSize) protects against the combination of maximal
4053   // alignment and size greater than maximal alignment.
4054   if (ceil_size < aSize) {
4055     // size_t overflow.
4056     return nullptr;
4057   }
4058
4059   if (ceil_size <= gPageSize ||
4060       (aAlignment <= gPageSize && ceil_size <= gMaxLargeClass)) {
4061     ret = Malloc(ceil_size, false);
4062   } else {
4063     size_t run_size;
4064
4065     // We can't achieve sub-page alignment, so round up alignment
4066     // permanently; it makes later calculations simpler.
4067     aAlignment = PAGE_CEILING(aAlignment);
4068     ceil_size = PAGE_CEILING(aSize);
4069
4070     // (ceil_size < aSize) protects against very large sizes within
4071     // pagesize of SIZE_T_MAX.
4072     //
4073     // (ceil_size + aAlignment < ceil_size) protects against the
4074     // combination of maximal alignment and ceil_size large enough
4075     // to cause overflow.  This is similar to the first overflow
4076     // check above, but it needs to be repeated due to the new
4077     // ceil_size value, which may now be *equal* to maximal
4078     // alignment, whereas before we only detected overflow if the
4079     // original size was *greater* than maximal alignment.
4080     if (ceil_size < aSize || ceil_size + aAlignment < ceil_size) {
4081       // size_t overflow.
4082       return nullptr;
4083     }
4084
4085     // Calculate the size of the over-size run that arena_palloc()
4086     // would need to allocate in order to guarantee the alignment.
4087     if (ceil_size >= aAlignment) {
4088       run_size = ceil_size + aAlignment - gPageSize;
4089     } else {
4090       // It is possible that (aAlignment << 1) will cause
4091       // overflow, but it doesn't matter because we also
4092       // subtract pagesize, which in the case of overflow
4093       // leaves us with a very large run_size.  That causes
4094       // the first conditional below to fail, which means
4095       // that the bogus run_size value never gets used for
4096       // anything important.
4097       run_size = (aAlignment << 1) - gPageSize;
4098     }
4099
4100     if (run_size <= gMaxLargeClass) {
4101       ret = PallocLarge(aAlignment, ceil_size, run_size);
4102     } else if (aAlignment <= kChunkSize) {
4103       ret = MallocHuge(ceil_size, false);
4104     } else {
4105       ret = PallocHuge(ceil_size, aAlignment, false);
4106     }
4107   }
4108
4109   MOZ_ASSERT((uintptr_t(ret) & (aAlignment - 1)) == 0);
4110   return ret;
4111 }
4112
4113 class AllocInfo {
4114  public:
4115   template <bool Validate = false>
4116   static inline AllocInfo Get(const void* aPtr) {
4117     // If the allocator is not initialized, the pointer can't belong to it.
4118     if (Validate && !malloc_initialized) {
4119       return AllocInfo();
4120     }
4121
4122     auto chunk = GetChunkForPtr(aPtr);
4123     if (Validate) {
4124       if (!chunk || !gChunkRTree.Get(chunk)) {
4125         return AllocInfo();
4126       }
4127     }
4128
4129     if (chunk != aPtr) {
4130       MOZ_DIAGNOSTIC_ASSERT(chunk->arena->mMagic == ARENA_MAGIC);
4131       size_t pageind = (((uintptr_t)aPtr - (uintptr_t)chunk) >> gPageSize2Pow);
4132       return GetInChunk(aPtr, chunk, pageind);
4133     }
4134
4135     extent_node_t key;
4136
4137     // Huge allocation
4138     key.mAddr = chunk;
4139     MutexAutoLock lock(huge_mtx);
4140     extent_node_t* node = huge.Search(&key);
4141     if (Validate && !node) {
4142       return AllocInfo();
4143     }
4144     return AllocInfo(node->mSize, node);
4145   }
4146
4147   // Get the allocation information for a pointer we know is within a chunk
4148   // (Small or large, not huge).
4149   static inline AllocInfo GetInChunk(const void* aPtr, arena_chunk_t* aChunk,
4150                                      size_t pageind) {
4151     size_t mapbits = aChunk->map[pageind].bits;
4152     MOZ_DIAGNOSTIC_ASSERT((mapbits & CHUNK_MAP_ALLOCATED) != 0);
4153
4154     size_t size;
4155     if ((mapbits & CHUNK_MAP_LARGE) == 0) {
4156       arena_run_t* run = (arena_run_t*)(mapbits & ~gPageSizeMask);
4157       MOZ_DIAGNOSTIC_ASSERT(run->mMagic == ARENA_RUN_MAGIC);
4158       size = run->mBin->mSizeClass;
4159     } else {
4160       size = mapbits & ~gPageSizeMask;
4161       MOZ_DIAGNOSTIC_ASSERT(size != 0);
4162     }
4163
4164     return AllocInfo(size, aChunk);
4165   }
4166
4167   // Validate ptr before assuming that it points to an allocation.  Currently,
4168   // the following validation is performed:
4169   //
4170   // + Check that ptr is not nullptr.
4171   //
4172   // + Check that ptr lies within a mapped chunk.
4173   static inline AllocInfo GetValidated(const void* aPtr) {
4174     return Get<true>(aPtr);
4175   }
4176
4177   AllocInfo() : mSize(0), mChunk(nullptr) {}
4178
4179   explicit AllocInfo(size_t aSize, arena_chunk_t* aChunk)
4180       : mSize(aSize), mChunk(aChunk) {
4181     MOZ_ASSERT(mSize <= gMaxLargeClass);
4182   }
4183
4184   explicit AllocInfo(size_t aSize, extent_node_t* aNode)
4185       : mSize(aSize), mNode(aNode) {
4186     MOZ_ASSERT(mSize > gMaxLargeClass);
4187   }
4188
4189   size_t Size() { return mSize; }
4190
4191   arena_t* Arena() {
4192     if (mSize <= gMaxLargeClass) {
4193       return mChunk->arena;
4194     }
4195     // Best effort detection that we're not trying to access an already
4196     // disposed arena. In the case of a disposed arena, the memory location
4197     // pointed by mNode->mArena is either free (but still a valid memory
4198     // region, per TypedBaseAlloc<arena_t>), in which case its id was reset,
4199     // or has been reallocated for a new region, and its id is very likely
4200     // different (per randomness). In both cases, the id is unlikely to
4201     // match what it was for the disposed arena.
4202     MOZ_RELEASE_ASSERT(mNode->mArenaId == mNode->mArena->mId);
4203     return mNode->mArena;
4204   }
4205
4206   bool IsValid() const { return !!mSize; }
4207
4208  private:
4209   size_t mSize;
4210   union {
4211     // Pointer to the chunk associated with the allocation for small
4212     // and large allocations.
4213     arena_chunk_t* mChunk;
4214
4215     // Pointer to the extent node for huge allocations.
4216     extent_node_t* mNode;
4217   };
4218 };
4219
4220 inline void MozJemalloc::jemalloc_ptr_info(const void* aPtr,
4221                                            jemalloc_ptr_info_t* aInfo) {
4222   arena_chunk_t* chunk = GetChunkForPtr(aPtr);
4223
4224   // Is the pointer null, or within one chunk's size of null?
4225   // Alternatively, if the allocator is not initialized yet, the pointer
4226   // can't be known.
4227   if (!chunk || !malloc_initialized) {
4228     *aInfo = {TagUnknown, nullptr, 0, 0};
4229     return;
4230   }
4231
4232   // Look for huge allocations before looking for |chunk| in gChunkRTree.
4233   // This is necessary because |chunk| won't be in gChunkRTree if it's
4234   // the second or subsequent chunk in a huge allocation.
4235   extent_node_t* node;
4236   extent_node_t key;
4237   {
4238     MutexAutoLock lock(huge_mtx);
4239     key.mAddr = const_cast<void*>(aPtr);
4240     node =
4241         reinterpret_cast<RedBlackTree<extent_node_t, ExtentTreeBoundsTrait>*>(
4242             &huge)
4243             ->Search(&key);
4244     if (node) {
4245       *aInfo = {TagLiveAlloc, node->mAddr, node->mSize, node->mArena->mId};
4246       return;
4247     }
4248   }
4249
4250   // It's not a huge allocation. Check if we have a known chunk.
4251   if (!gChunkRTree.Get(chunk)) {
4252     *aInfo = {TagUnknown, nullptr, 0, 0};
4253     return;
4254   }
4255
4256   MOZ_DIAGNOSTIC_ASSERT(chunk->arena->mMagic == ARENA_MAGIC);
4257
4258   // Get the page number within the chunk.
4259   size_t pageind = (((uintptr_t)aPtr - (uintptr_t)chunk) >> gPageSize2Pow);
4260   if (pageind < gChunkHeaderNumPages) {
4261     // Within the chunk header.
4262     *aInfo = {TagUnknown, nullptr, 0, 0};
4263     return;
4264   }
4265
4266   size_t mapbits = chunk->map[pageind].bits;
4267
4268   if (!(mapbits & CHUNK_MAP_ALLOCATED)) {
4269     void* pageaddr = (void*)(uintptr_t(aPtr) & ~gPageSizeMask);
4270     *aInfo = {TagFreedPage, pageaddr, gPageSize, chunk->arena->mId};
4271     return;
4272   }
4273
4274   if (mapbits & CHUNK_MAP_LARGE) {
4275     // It's a large allocation. Only the first page of a large
4276     // allocation contains its size, so if the address is not in
4277     // the first page, scan back to find the allocation size.
4278     size_t size;
4279     while (true) {
4280       size = mapbits & ~gPageSizeMask;
4281       if (size != 0) {
4282         break;
4283       }
4284
4285       // The following two return paths shouldn't occur in
4286       // practice unless there is heap corruption.
4287       pageind--;
4288       MOZ_DIAGNOSTIC_ASSERT(pageind >= gChunkHeaderNumPages);
4289       if (pageind < gChunkHeaderNumPages) {
4290         *aInfo = {TagUnknown, nullptr, 0, 0};
4291         return;
4292       }
4293
4294       mapbits = chunk->map[pageind].bits;
4295       MOZ_DIAGNOSTIC_ASSERT(mapbits & CHUNK_MAP_LARGE);
4296       if (!(mapbits & CHUNK_MAP_LARGE)) {
4297         *aInfo = {TagUnknown, nullptr, 0, 0};
4298         return;
4299       }
4300     }
4301
4302     void* addr = ((char*)chunk) + (pageind << gPageSize2Pow);
4303     *aInfo = {TagLiveAlloc, addr, size, chunk->arena->mId};
4304     return;
4305   }
4306
4307   // It must be a small allocation.
4308   auto run = (arena_run_t*)(mapbits & ~gPageSizeMask);
4309   MOZ_DIAGNOSTIC_ASSERT(run->mMagic == ARENA_RUN_MAGIC);
4310
4311   // The allocation size is stored in the run metadata.
4312   size_t size = run->mBin->mSizeClass;
4313
4314   // Address of the first possible pointer in the run after its headers.
4315   uintptr_t reg0_addr = (uintptr_t)run + run->mBin->mRunFirstRegionOffset;
4316   if (aPtr < (void*)reg0_addr) {
4317     // In the run header.
4318     *aInfo = {TagUnknown, nullptr, 0, 0};
4319     return;
4320   }
4321
4322   // Position in the run.
4323   unsigned regind = ((uintptr_t)aPtr - reg0_addr) / size;
4324
4325   // Pointer to the allocation's base address.
4326   void* addr = (void*)(reg0_addr + regind * size);
4327
4328   // Check if the allocation has been freed.
4329   unsigned elm = regind >> (LOG2(sizeof(int)) + 3);
4330   unsigned bit = regind - (elm << (LOG2(sizeof(int)) + 3));
4331   PtrInfoTag tag =
4332       ((run->mRegionsMask[elm] & (1U << bit))) ? TagFreedAlloc : TagLiveAlloc;
4333
4334   *aInfo = {tag, addr, size, chunk->arena->mId};
4335 }
4336
4337 namespace Debug {
4338 // Helper for debuggers. We don't want it to be inlined and optimized out.
4339 MOZ_NEVER_INLINE jemalloc_ptr_info_t* jemalloc_ptr_info(const void* aPtr) {
4340   static jemalloc_ptr_info_t info;
4341   MozJemalloc::jemalloc_ptr_info(aPtr, &info);
4342   return &info;
4343 }
4344 }  // namespace Debug
4345
4346 arena_chunk_t* arena_t::DallocSmall(arena_chunk_t* aChunk, void* aPtr,
4347                                     arena_chunk_map_t* aMapElm) {
4348   arena_run_t* run;
4349   arena_bin_t* bin;
4350   size_t size;
4351
4352   run = (arena_run_t*)(aMapElm->bits & ~gPageSizeMask);
4353   MOZ_DIAGNOSTIC_ASSERT(run->mMagic == ARENA_RUN_MAGIC);
4354   bin = run->mBin;
4355   size = bin->mSizeClass;
4356   MOZ_DIAGNOSTIC_ASSERT(uintptr_t(aPtr) >=
4357                         uintptr_t(run) + bin->mRunFirstRegionOffset);
4358
4359   arena_run_reg_dalloc(run, bin, aPtr, size);
4360   run->mNumFree++;
4361   arena_chunk_t* dealloc_chunk = nullptr;
4362
4363   if (run->mNumFree == bin->mRunNumRegions) {
4364     // This run is entirely freed, remove it from our bin.
4365 #if defined(MOZ_DIAGNOSTIC_ASSERT_ENABLED)
4366     run->mMagic = 0;
4367 #endif
4368     MOZ_ASSERT(bin->mNonFullRuns.ElementProbablyInList(run));
4369     bin->mNonFullRuns.remove(run);
4370     dealloc_chunk = DallocRun(run, true);
4371     bin->mNumRuns--;
4372   } else if (run->mNumFree == 1) {
4373     // This is first slot we freed from this run, start tracking.
4374     MOZ_ASSERT(!bin->mNonFullRuns.ElementProbablyInList(run));
4375     bin->mNonFullRuns.pushFront(run);
4376   }
4377   // else we just keep the run in mNonFullRuns where it is.
4378   // Note that we could move it to the head of the list here to get a strict
4379   // "most-recently-freed" order, but some of our benchmarks seem to be more
4380   // sensible to the increased overhead that this brings than to the order
4381   // supposedly slightly better for keeping CPU caches warm if we do.
4382   // In general we cannot foresee the future, so any order we choose might
4383   // perform different for different use cases and needs to be balanced with
4384   // the book-keeping overhead via measurements.
4385
4386   mStats.allocated_small -= size;
4387   mStats.operations++;
4388
4389   return dealloc_chunk;
4390 }
4391
4392 arena_chunk_t* arena_t::DallocLarge(arena_chunk_t* aChunk, void* aPtr) {
4393   MOZ_DIAGNOSTIC_ASSERT((uintptr_t(aPtr) & gPageSizeMask) == 0);
4394   size_t pageind = (uintptr_t(aPtr) - uintptr_t(aChunk)) >> gPageSize2Pow;
4395   size_t size = aChunk->map[pageind].bits & ~gPageSizeMask;
4396
4397   mStats.allocated_large -= size;
4398   mStats.operations++;
4399
4400   return DallocRun((arena_run_t*)aPtr, true);
4401 }
4402
4403 static inline void arena_dalloc(void* aPtr, size_t aOffset, arena_t* aArena) {
4404   MOZ_ASSERT(aPtr);
4405   MOZ_ASSERT(aOffset != 0);
4406   MOZ_ASSERT(GetChunkOffsetForPtr(aPtr) == aOffset);
4407
4408   auto chunk = (arena_chunk_t*)((uintptr_t)aPtr - aOffset);
4409   auto arena = chunk->arena;
4410   MOZ_ASSERT(arena);
4411   MOZ_DIAGNOSTIC_ASSERT(arena->mMagic == ARENA_MAGIC);
4412   MOZ_RELEASE_ASSERT(!aArena || arena == aArena);
4413
4414   size_t pageind = aOffset >> gPageSize2Pow;
4415   if (opt_poison) {
4416     AllocInfo info = AllocInfo::GetInChunk(aPtr, chunk, pageind);
4417     MOZ_ASSERT(info.IsValid());
4418     MaybePoison(aPtr, info.Size());
4419   }
4420
4421   arena_chunk_t* chunk_dealloc_delay = nullptr;
4422   bool should_purge;
4423   {
4424     MaybeMutexAutoLock lock(arena->mLock);
4425     arena_chunk_map_t* mapelm = &chunk->map[pageind];
4426     MOZ_RELEASE_ASSERT(
4427         (mapelm->bits &
4428          (CHUNK_MAP_FRESH_MADVISED_OR_DECOMMITTED | CHUNK_MAP_ZEROED)) == 0,
4429         "Freeing in a page with bad bits.");
4430     MOZ_RELEASE_ASSERT((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0,
4431                        "Double-free?");
4432     if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) {
4433       // Small allocation.
4434       chunk_dealloc_delay = arena->DallocSmall(chunk, aPtr, mapelm);
4435     } else {
4436       // Large allocation.
4437       chunk_dealloc_delay = arena->DallocLarge(chunk, aPtr);
4438     }
4439
4440     should_purge = arena->mNumDirty > arena->EffectiveMaxDirty();
4441   }
4442
4443   if (chunk_dealloc_delay) {
4444     chunk_dealloc((void*)chunk_dealloc_delay, kChunkSize, ARENA_CHUNK);
4445   }
4446
4447   while (should_purge) {
4448     should_purge = arena->Purge();
4449   }
4450 }
4451
4452 static inline void idalloc(void* ptr, arena_t* aArena) {
4453   size_t offset;
4454
4455   MOZ_ASSERT(ptr);
4456
4457   offset = GetChunkOffsetForPtr(ptr);
4458   if (offset != 0) {
4459     arena_dalloc(ptr, offset, aArena);
4460   } else {
4461     huge_dalloc(ptr, aArena);
4462   }
4463 }
4464
4465 void arena_t::RallocShrinkLarge(arena_chunk_t* aChunk, void* aPtr, size_t aSize,
4466                                 size_t aOldSize) {
4467   MOZ_ASSERT(aSize < aOldSize);
4468
4469   // Shrink the run, and make trailing pages available for other
4470   // allocations.
4471   bool should_purge;
4472   {
4473     MaybeMutexAutoLock lock(mLock);
4474     TrimRunTail(aChunk, (arena_run_t*)aPtr, aOldSize, aSize, true);
4475     mStats.allocated_large -= aOldSize - aSize;
4476     mStats.operations++;
4477
4478     should_purge = mNumDirty > EffectiveMaxDirty();
4479   }
4480   while (should_purge) {
4481     should_purge = Purge();
4482   }
4483 }
4484
4485 // Returns whether reallocation was successful.
4486 bool arena_t::RallocGrowLarge(arena_chunk_t* aChunk, void* aPtr, size_t aSize,
4487                               size_t aOldSize) {
4488   size_t pageind = (uintptr_t(aPtr) - uintptr_t(aChunk)) >> gPageSize2Pow;
4489   size_t npages = aOldSize >> gPageSize2Pow;
4490
4491   MaybeMutexAutoLock lock(mLock);
4492   MOZ_DIAGNOSTIC_ASSERT(aOldSize ==
4493                         (aChunk->map[pageind].bits & ~gPageSizeMask));
4494
4495   // Try to extend the run.
4496   MOZ_ASSERT(aSize > aOldSize);
4497   if (pageind + npages < gChunkNumPages - 1 &&
4498       (aChunk->map[pageind + npages].bits &
4499        (CHUNK_MAP_ALLOCATED | CHUNK_MAP_BUSY)) == 0 &&
4500       (aChunk->map[pageind + npages].bits & ~gPageSizeMask) >=
4501           aSize - aOldSize) {
4502     // The next run is available and sufficiently large.  Split the
4503     // following run, then merge the first part with the existing
4504     // allocation.
4505     if (!SplitRun((arena_run_t*)(uintptr_t(aChunk) +
4506                                  ((pageind + npages) << gPageSize2Pow)),
4507                   aSize - aOldSize, true, false)) {
4508       return false;
4509     }
4510
4511     aChunk->map[pageind].bits = aSize | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
4512     aChunk->map[pageind + npages].bits = CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
4513
4514     mStats.allocated_large += aSize - aOldSize;
4515     mStats.operations++;
4516     return true;
4517   }
4518
4519   return false;
4520 }
4521
4522 void* arena_t::RallocSmallOrLarge(void* aPtr, size_t aSize, size_t aOldSize) {
4523   void* ret;
4524   size_t copysize;
4525   SizeClass sizeClass(aSize);
4526
4527   // Try to avoid moving the allocation.
4528   if (aOldSize <= gMaxLargeClass && sizeClass.Size() == aOldSize) {
4529     if (aSize < aOldSize) {
4530       MaybePoison((void*)(uintptr_t(aPtr) + aSize), aOldSize - aSize);
4531     }
4532     return aPtr;
4533   }
4534   if (sizeClass.Type() == SizeClass::Large && aOldSize > gMaxBinClass &&
4535       aOldSize <= gMaxLargeClass) {
4536     arena_chunk_t* chunk = GetChunkForPtr(aPtr);
4537     if (sizeClass.Size() < aOldSize) {
4538       // Fill before shrinking in order to avoid a race.
4539       MaybePoison((void*)((uintptr_t)aPtr + aSize), aOldSize - aSize);
4540       RallocShrinkLarge(chunk, aPtr, sizeClass.Size(), aOldSize);
4541       return aPtr;
4542     }
4543     if (RallocGrowLarge(chunk, aPtr, sizeClass.Size(), aOldSize)) {
4544       ApplyZeroOrJunk((void*)((uintptr_t)aPtr + aOldSize), aSize - aOldSize);
4545       return aPtr;
4546     }
4547   }
4548
4549   // If we get here, then aSize and aOldSize are different enough that we
4550   // need to move the object or the run can't be expanded because the memory
4551   // after it is allocated or busy.  In that case, fall back to allocating new
4552   // space and copying. Allow non-private arenas to switch arenas.
4553   ret = (mIsPrivate ? this : choose_arena(aSize))->Malloc(aSize, false);
4554   if (!ret) {
4555     return nullptr;
4556   }
4557
4558   // Junk/zero-filling were already done by arena_t::Malloc().
4559   copysize = (aSize < aOldSize) ? aSize : aOldSize;
4560 #ifdef VM_COPY_MIN
4561   if (copysize >= VM_COPY_MIN) {
4562     pages_copy(ret, aPtr, copysize);
4563   } else
4564 #endif
4565   {
4566     memcpy(ret, aPtr, copysize);
4567   }
4568   idalloc(aPtr, this);
4569   return ret;
4570 }
4571
4572 void* arena_t::Ralloc(void* aPtr, size_t aSize, size_t aOldSize) {
4573   MOZ_DIAGNOSTIC_ASSERT(mMagic == ARENA_MAGIC);
4574   MOZ_ASSERT(aPtr);
4575   MOZ_ASSERT(aSize != 0);
4576
4577   return (aSize <= gMaxLargeClass) ? RallocSmallOrLarge(aPtr, aSize, aOldSize)
4578                                    : RallocHuge(aPtr, aSize, aOldSize);
4579 }
4580
4581 void* arena_t::operator new(size_t aCount, const fallible_t&) noexcept {
4582   MOZ_ASSERT(aCount == sizeof(arena_t));
4583   return TypedBaseAlloc<arena_t>::alloc();
4584 }
4585
4586 void arena_t::operator delete(void* aPtr) {
4587   TypedBaseAlloc<arena_t>::dealloc((arena_t*)aPtr);
4588 }
4589
4590 arena_t::arena_t(arena_params_t* aParams, bool aIsPrivate) {
4591   unsigned i;
4592
4593   memset(&mLink, 0, sizeof(mLink));
4594   memset(&mStats, 0, sizeof(arena_stats_t));
4595   mId = 0;
4596
4597   // Initialize chunks.
4598   mChunksDirty.Init();
4599 #ifdef MALLOC_DOUBLE_PURGE
4600   new (&mChunksMAdvised) DoublyLinkedList<arena_chunk_t>();
4601 #endif
4602   mSpare = nullptr;
4603
4604   mRandomizeSmallAllocations = opt_randomize_small;
4605   MaybeMutex::DoLock doLock = MaybeMutex::MUST_LOCK;
4606   if (aParams) {
4607     uint32_t randFlags = aParams->mFlags & ARENA_FLAG_RANDOMIZE_SMALL_MASK;
4608     switch (randFlags) {
4609       case ARENA_FLAG_RANDOMIZE_SMALL_ENABLED:
4610         mRandomizeSmallAllocations = true;
4611         break;
4612       case ARENA_FLAG_RANDOMIZE_SMALL_DISABLED:
4613         mRandomizeSmallAllocations = false;
4614         break;
4615       case ARENA_FLAG_RANDOMIZE_SMALL_DEFAULT:
4616       default:
4617         break;
4618     }
4619
4620     uint32_t threadFlags = aParams->mFlags & ARENA_FLAG_THREAD_MASK;
4621     if (threadFlags == ARENA_FLAG_THREAD_MAIN_THREAD_ONLY) {
4622       // At the moment we require that any ARENA_FLAG_THREAD_MAIN_THREAD_ONLY
4623       // arenas are created and therefore always accessed by the main thread.
4624       // This is for two reasons:
4625       //  * it allows jemalloc_stats to read their statistics (we also require
4626       //    that jemalloc_stats is only used on the main thread).
4627       //  * Only main-thread or threadsafe arenas can be guanteed to be in a
4628       //    consistent state after a fork() from the main thread.  If fork()
4629       //    occurs off-thread then the new child process cannot use these arenas
4630       //    (new children should usually exec() or exit() since other data may
4631       //    also be inconsistent).
4632       MOZ_ASSERT(gArenas.IsOnMainThread());
4633       MOZ_ASSERT(aIsPrivate);
4634       doLock = MaybeMutex::AVOID_LOCK_UNSAFE;
4635     }
4636
4637     mMaxDirtyIncreaseOverride = aParams->mMaxDirtyIncreaseOverride;
4638     mMaxDirtyDecreaseOverride = aParams->mMaxDirtyDecreaseOverride;
4639   } else {
4640     mMaxDirtyIncreaseOverride = 0;
4641     mMaxDirtyDecreaseOverride = 0;
4642   }
4643
4644   MOZ_RELEASE_ASSERT(mLock.Init(doLock));
4645
4646   mPRNG = nullptr;
4647   mIsPRNGInitializing = false;
4648
4649   mIsPrivate = aIsPrivate;
4650
4651   mNumDirty = 0;
4652   mNumFresh = 0;
4653   mNumMAdvised = 0;
4654   // The default maximum amount of dirty pages allowed on arenas is a fraction
4655   // of opt_dirty_max.
4656   mMaxDirty = (aParams && aParams->mMaxDirty) ? aParams->mMaxDirty
4657                                               : (opt_dirty_max / 8);
4658
4659   mRunsAvail.Init();
4660
4661   // Initialize bins.
4662   SizeClass sizeClass(1);
4663
4664   for (i = 0;; i++) {
4665     arena_bin_t& bin = mBins[i];
4666     bin.Init(sizeClass);
4667
4668     // SizeClass doesn't want sizes larger than gMaxBinClass for now.
4669     if (sizeClass.Size() == gMaxBinClass) {
4670       break;
4671     }
4672     sizeClass = sizeClass.Next();
4673   }
4674   MOZ_ASSERT(i == NUM_SMALL_CLASSES - 1);
4675
4676 #if defined(MOZ_DIAGNOSTIC_ASSERT_ENABLED)
4677   mMagic = ARENA_MAGIC;
4678 #endif
4679 }
4680
4681 arena_t::~arena_t() {
4682   size_t i;
4683   MaybeMutexAutoLock lock(mLock);
4684   MOZ_RELEASE_ASSERT(!mLink.Left() && !mLink.Right(),
4685                      "Arena is still registered");
4686   MOZ_RELEASE_ASSERT(!mStats.allocated_small && !mStats.allocated_large,
4687                      "Arena is not empty");
4688   if (mSpare) {
4689     chunk_dealloc(mSpare, kChunkSize, ARENA_CHUNK);
4690   }
4691   for (i = 0; i < NUM_SMALL_CLASSES; i++) {
4692     MOZ_RELEASE_ASSERT(mBins[i].mNonFullRuns.isEmpty(), "Bin is not empty");
4693   }
4694 #ifdef MOZ_DEBUG
4695   {
4696     MutexAutoLock lock(huge_mtx);
4697     // This is an expensive check, so we only do it on debug builds.
4698     for (auto node : huge.iter()) {
4699       MOZ_RELEASE_ASSERT(node->mArenaId != mId, "Arena has huge allocations");
4700     }
4701   }
4702 #endif
4703   mId = 0;
4704 }
4705
4706 arena_t* ArenaCollection::CreateArena(bool aIsPrivate,
4707                                       arena_params_t* aParams) {
4708   arena_t* ret = new (fallible) arena_t(aParams, aIsPrivate);
4709   if (!ret) {
4710     // Only reached if there is an OOM error.
4711
4712     // OOM here is quite inconvenient to propagate, since dealing with it
4713     // would require a check for failure in the fast path.  Instead, punt
4714     // by using the first arena.
4715     // In practice, this is an extremely unlikely failure.
4716     _malloc_message(_getprogname(), ": (malloc) Error initializing arena\n");
4717
4718     return mDefaultArena;
4719   }
4720
4721   MutexAutoLock lock(mLock);
4722
4723   // For public arenas, it's fine to just use incrementing arena id
4724   if (!aIsPrivate) {
4725     ret->mId = mLastPublicArenaId++;
4726     mArenas.Insert(ret);
4727     return ret;
4728   }
4729
4730   // For private arenas, generate a cryptographically-secure random id for the
4731   // new arena. If an attacker manages to get control of the process, this
4732   // should make it more difficult for them to "guess" the ID of a memory
4733   // arena, stopping them from getting data they may want
4734   Tree& tree = (ret->IsMainThreadOnly()) ? mMainThreadArenas : mPrivateArenas;
4735   arena_id_t arena_id;
4736   do {
4737     arena_id = MakeRandArenaId(ret->IsMainThreadOnly());
4738     // Keep looping until we ensure that the random number we just generated
4739     // isn't already in use by another active arena
4740   } while (GetByIdInternal(tree, arena_id));
4741
4742   ret->mId = arena_id;
4743   tree.Insert(ret);
4744   return ret;
4745 }
4746
4747 arena_id_t ArenaCollection::MakeRandArenaId(bool aIsMainThreadOnly) const {
4748   uint64_t rand;
4749   do {
4750     mozilla::Maybe<uint64_t> maybeRandomId = mozilla::RandomUint64();
4751     MOZ_RELEASE_ASSERT(maybeRandomId.isSome());
4752
4753     rand = maybeRandomId.value();
4754
4755     // Set or clear the least significant bit depending on if this is a
4756     // main-thread-only arena.  We use this in GetById.
4757     if (aIsMainThreadOnly) {
4758       rand = rand | MAIN_THREAD_ARENA_BIT;
4759     } else {
4760       rand = rand & ~MAIN_THREAD_ARENA_BIT;
4761     }
4762
4763     // Avoid 0 as an arena Id. We use 0 for disposed arenas.
4764   } while (rand == 0);
4765
4766   return arena_id_t(rand);
4767 }
4768
4769 // End arena.
4770 // ***************************************************************************
4771 // Begin general internal functions.
4772
4773 // Initialize huge allocation data.
4774 static void huge_init() MOZ_REQUIRES(gInitLock) {
4775   huge_mtx.Init();
4776   MOZ_PUSH_IGNORE_THREAD_SAFETY
4777   huge.Init();
4778   huge_allocated = 0;
4779   huge_mapped = 0;
4780   huge_operations = 0;
4781   MOZ_POP_THREAD_SAFETY
4782 }
4783
4784 void* arena_t::MallocHuge(size_t aSize, bool aZero) {
4785   return PallocHuge(aSize, kChunkSize, aZero);
4786 }
4787
4788 void* arena_t::PallocHuge(size_t aSize, size_t aAlignment, bool aZero) {
4789   void* ret;
4790   size_t csize;
4791   size_t psize;
4792   extent_node_t* node;
4793
4794   // We're going to configure guard pages in the region between the
4795   // page-aligned size and the chunk-aligned size, so if those are the same
4796   // then we need to force that region into existence.
4797   csize = CHUNK_CEILING(aSize + gPageSize);
4798   if (csize < aSize) {
4799     // size is large enough to cause size_t wrap-around.
4800     return nullptr;
4801   }
4802
4803   // Allocate an extent node with which to track the chunk.
4804   node = ExtentAlloc::alloc();
4805   if (!node) {
4806     return nullptr;
4807   }
4808
4809   // Allocate one or more contiguous chunks for this request.
4810   ret = chunk_alloc(csize, aAlignment, false);
4811   if (!ret) {
4812     ExtentAlloc::dealloc(node);
4813     return nullptr;
4814   }
4815   psize = PAGE_CEILING(aSize);
4816 #ifdef MOZ_DEBUG
4817   if (aZero) {
4818     chunk_assert_zero(ret, psize);
4819   }
4820 #endif
4821
4822   // Insert node into huge.
4823   node->mAddr = ret;
4824   node->mSize = psize;
4825   node->mArena = this;
4826   node->mArenaId = mId;
4827
4828   {
4829     MutexAutoLock lock(huge_mtx);
4830     huge.Insert(node);
4831
4832     // Although we allocated space for csize bytes, we indicate that we've
4833     // allocated only psize bytes.
4834     //
4835     // If DECOMMIT is defined, this is a reasonable thing to do, since
4836     // we'll explicitly decommit the bytes in excess of psize.
4837     //
4838     // If DECOMMIT is not defined, then we're relying on the OS to be lazy
4839     // about how it allocates physical pages to mappings.  If we never
4840     // touch the pages in excess of psize, the OS won't allocate a physical
4841     // page, and we won't use more than psize bytes of physical memory.
4842     //
4843     // A correct program will only touch memory in excess of how much it
4844     // requested if it first calls malloc_usable_size and finds out how
4845     // much space it has to play with.  But because we set node->mSize =
4846     // psize above, malloc_usable_size will return psize, not csize, and
4847     // the program will (hopefully) never touch bytes in excess of psize.
4848     // Thus those bytes won't take up space in physical memory, and we can
4849     // reasonably claim we never "allocated" them in the first place.
4850     huge_allocated += psize;
4851     huge_mapped += csize;
4852     huge_operations++;
4853   }
4854
4855   pages_decommit((void*)((uintptr_t)ret + psize), csize - psize);
4856
4857   if (!aZero) {
4858     ApplyZeroOrJunk(ret, psize);
4859   }
4860
4861   return ret;
4862 }
4863
4864 void* arena_t::RallocHuge(void* aPtr, size_t aSize, size_t aOldSize) {
4865   void* ret;
4866   size_t copysize;
4867
4868   // Avoid moving the allocation if the size class would not change.
4869   if (aOldSize > gMaxLargeClass &&
4870       CHUNK_CEILING(aSize + gPageSize) == CHUNK_CEILING(aOldSize + gPageSize)) {
4871     size_t psize = PAGE_CEILING(aSize);
4872     if (aSize < aOldSize) {
4873       MaybePoison((void*)((uintptr_t)aPtr + aSize), aOldSize - aSize);
4874     }
4875     if (psize < aOldSize) {
4876       extent_node_t key;
4877
4878       pages_decommit((void*)((uintptr_t)aPtr + psize), aOldSize - psize);
4879
4880       // Update recorded size.
4881       MutexAutoLock lock(huge_mtx);
4882       key.mAddr = const_cast<void*>(aPtr);
4883       extent_node_t* node = huge.Search(&key);
4884       MOZ_ASSERT(node);
4885       MOZ_ASSERT(node->mSize == aOldSize);
4886       MOZ_RELEASE_ASSERT(node->mArena == this);
4887       huge_allocated -= aOldSize - psize;
4888       huge_operations++;
4889       // No need to change huge_mapped, because we didn't (un)map anything.
4890       node->mSize = psize;
4891     } else if (psize > aOldSize) {
4892       if (!pages_commit((void*)((uintptr_t)aPtr + aOldSize),
4893                         psize - aOldSize)) {
4894         return nullptr;
4895       }
4896
4897       // We need to update the recorded size if the size increased,
4898       // so malloc_usable_size doesn't return a value smaller than
4899       // what was requested via realloc().
4900       extent_node_t key;
4901       MutexAutoLock lock(huge_mtx);
4902       key.mAddr = const_cast<void*>(aPtr);
4903       extent_node_t* node = huge.Search(&key);
4904       MOZ_ASSERT(node);
4905       MOZ_ASSERT(node->mSize == aOldSize);
4906       MOZ_RELEASE_ASSERT(node->mArena == this);
4907       huge_allocated += psize - aOldSize;
4908       huge_operations++;
4909       // No need to change huge_mapped, because we didn't
4910       // (un)map anything.
4911       node->mSize = psize;
4912     }
4913
4914     if (aSize > aOldSize) {
4915       ApplyZeroOrJunk((void*)((uintptr_t)aPtr + aOldSize), aSize - aOldSize);
4916     }
4917     return aPtr;
4918   }
4919
4920   // If we get here, then aSize and aOldSize are different enough that we
4921   // need to use a different size class.  In that case, fall back to allocating
4922   // new space and copying. Allow non-private arenas to switch arenas.
4923   ret = (mIsPrivate ? this : choose_arena(aSize))->MallocHuge(aSize, false);
4924   if (!ret) {
4925     return nullptr;
4926   }
4927
4928   copysize = (aSize < aOldSize) ? aSize : aOldSize;
4929 #ifdef VM_COPY_MIN
4930   if (copysize >= VM_COPY_MIN) {
4931     pages_copy(ret, aPtr, copysize);
4932   } else
4933 #endif
4934   {
4935     memcpy(ret, aPtr, copysize);
4936   }
4937   idalloc(aPtr, this);
4938   return ret;
4939 }
4940
4941 static void huge_dalloc(void* aPtr, arena_t* aArena) {
4942   extent_node_t* node;
4943   size_t mapped = 0;
4944   {
4945     extent_node_t key;
4946     MutexAutoLock lock(huge_mtx);
4947
4948     // Extract from tree of huge allocations.
4949     key.mAddr = aPtr;
4950     node = huge.Search(&key);
4951     MOZ_RELEASE_ASSERT(node, "Double-free?");
4952     MOZ_ASSERT(node->mAddr == aPtr);
4953     MOZ_RELEASE_ASSERT(!aArena || node->mArena == aArena);
4954     // See AllocInfo::Arena.
4955     MOZ_RELEASE_ASSERT(node->mArenaId == node->mArena->mId);
4956     huge.Remove(node);
4957
4958     mapped = CHUNK_CEILING(node->mSize + gPageSize);
4959     huge_allocated -= node->mSize;
4960     huge_mapped -= mapped;
4961     huge_operations++;
4962   }
4963
4964   // Unmap chunk.
4965   chunk_dealloc(node->mAddr, mapped, HUGE_CHUNK);
4966
4967   ExtentAlloc::dealloc(node);
4968 }
4969
4970 size_t GetKernelPageSize() {
4971   static size_t kernel_page_size = ([]() {
4972 #ifdef XP_WIN
4973     SYSTEM_INFO info;
4974     GetSystemInfo(&info);
4975     return info.dwPageSize;
4976 #else
4977     long result = sysconf(_SC_PAGESIZE);
4978     MOZ_ASSERT(result != -1);
4979     return result;
4980 #endif
4981   })();
4982   return kernel_page_size;
4983 }
4984
4985 // Returns whether the allocator was successfully initialized.
4986 static bool malloc_init_hard() {
4987   unsigned i;
4988   const char* opts;
4989
4990   AutoLock<StaticMutex> lock(gInitLock);
4991
4992   if (malloc_initialized) {
4993     // Another thread initialized the allocator before this one
4994     // acquired gInitLock.
4995     return true;
4996   }
4997
4998   if (!thread_arena.init()) {
4999     return true;
5000   }
5001
5002   // Get page size and number of CPUs
5003   const size_t page_size = GetKernelPageSize();
5004   // We assume that the page size is a power of 2.
5005   MOZ_ASSERT(IsPowerOfTwo(page_size));
5006 #ifdef MALLOC_STATIC_PAGESIZE
5007   if (gPageSize % page_size) {
5008     _malloc_message(
5009         _getprogname(),
5010         "Compile-time page size does not divide the runtime one.\n");
5011     MOZ_CRASH();
5012   }
5013 #else
5014   gRealPageSize = gPageSize = page_size;
5015 #endif
5016
5017   // Get runtime configuration.
5018   if ((opts = getenv("MALLOC_OPTIONS"))) {
5019     for (i = 0; opts[i] != '\0'; i++) {
5020       // All options are single letters, some take a *prefix* numeric argument.
5021
5022       // Parse the argument.
5023       unsigned prefix_arg = 0;
5024       while (opts[i] >= '0' && opts[i] <= '9') {
5025         prefix_arg *= 10;
5026         prefix_arg += opts[i] - '0';
5027         i++;
5028       }
5029
5030       switch (opts[i]) {
5031         case 'f':
5032           opt_dirty_max >>= prefix_arg ? prefix_arg : 1;
5033           break;
5034         case 'F':
5035           prefix_arg = prefix_arg ? prefix_arg : 1;
5036           if (opt_dirty_max == 0) {
5037             opt_dirty_max = 1;
5038             prefix_arg--;
5039           }
5040           opt_dirty_max <<= prefix_arg;
5041           if (opt_dirty_max == 0) {
5042             // If the shift above overflowed all the bits then clamp the result
5043             // instead.  If we started with DIRTY_MAX_DEFAULT then this will
5044             // always be a power of two so choose the maximum power of two that
5045             // fits in a size_t.
5046             opt_dirty_max = size_t(1) << (sizeof(size_t) * CHAR_BIT - 1);
5047           }
5048           break;
5049 #ifdef MALLOC_RUNTIME_CONFIG
5050         case 'j':
5051           opt_junk = false;
5052           break;
5053         case 'J':
5054           opt_junk = true;
5055           break;
5056         case 'q':
5057           // The argument selects how much poisoning to do.
5058           opt_poison = NONE;
5059           break;
5060         case 'Q':
5061           if (opts[i + 1] == 'Q') {
5062             // Maximum poisoning.
5063             i++;
5064             opt_poison = ALL;
5065           } else {
5066             opt_poison = SOME;
5067             opt_poison_size = kCacheLineSize * prefix_arg;
5068           }
5069           break;
5070         case 'z':
5071           opt_zero = false;
5072           break;
5073         case 'Z':
5074           opt_zero = true;
5075           break;
5076 #  ifndef MALLOC_STATIC_PAGESIZE
5077         case 'P':
5078           MOZ_ASSERT(gPageSize >= 4_KiB);
5079           MOZ_ASSERT(gPageSize <= 64_KiB);
5080           prefix_arg = prefix_arg ? prefix_arg : 1;
5081           gPageSize <<= prefix_arg;
5082           // We know that if the shift causes gPageSize to be zero then it's
5083           // because it shifted all the bits off.  We didn't start with zero.
5084           // Therefore if gPageSize is out of bounds we set it to 64KiB.
5085           if (gPageSize < 4_KiB || gPageSize > 64_KiB) {
5086             gPageSize = 64_KiB;
5087           }
5088           break;
5089 #  endif
5090 #endif
5091         case 'r':
5092           opt_randomize_small = false;
5093           break;
5094         case 'R':
5095           opt_randomize_small = true;
5096           break;
5097         default: {
5098           char cbuf[2];
5099
5100           cbuf[0] = opts[i];
5101           cbuf[1] = '\0';
5102           _malloc_message(_getprogname(),
5103                           ": (malloc) Unsupported character "
5104                           "in malloc options: '",
5105                           cbuf, "'\n");
5106         }
5107       }
5108     }
5109   }
5110
5111 #ifndef MALLOC_STATIC_PAGESIZE
5112   DefineGlobals();
5113 #endif
5114   gRecycledSize = 0;
5115
5116   chunks_init();
5117   huge_init();
5118   base_init();
5119
5120   // Initialize arenas collection here.
5121   if (!gArenas.Init()) {
5122     return false;
5123   }
5124
5125   // Assign the default arena to the initial thread.
5126   thread_arena.set(gArenas.GetDefault());
5127
5128   if (!gChunkRTree.Init()) {
5129     return false;
5130   }
5131
5132   malloc_initialized = true;
5133
5134   // Dummy call so that the function is not removed by dead-code elimination
5135   Debug::jemalloc_ptr_info(nullptr);
5136
5137 #if !defined(XP_WIN) && !defined(XP_DARWIN)
5138   // Prevent potential deadlock on malloc locks after fork.
5139   pthread_atfork(_malloc_prefork, _malloc_postfork_parent,
5140                  _malloc_postfork_child);
5141 #endif
5142
5143   return true;
5144 }
5145
5146 // End general internal functions.
5147 // ***************************************************************************
5148 // Begin malloc(3)-compatible functions.
5149
5150 // The BaseAllocator class is a helper class that implements the base allocator
5151 // functions (malloc, calloc, realloc, free, memalign) for a given arena,
5152 // or an appropriately chosen arena (per choose_arena()) when none is given.
5153 struct BaseAllocator {
5154 #define MALLOC_DECL(name, return_type, ...) \
5155   inline return_type name(__VA_ARGS__);
5156
5157 #define MALLOC_FUNCS MALLOC_FUNCS_MALLOC_BASE
5158 #include "malloc_decls.h"
5159
5160   explicit BaseAllocator(arena_t* aArena) : mArena(aArena) {}
5161
5162  private:
5163   arena_t* mArena;
5164 };
5165
5166 #define MALLOC_DECL(name, return_type, ...)                  \
5167   inline return_type MozJemalloc::name(                      \
5168       ARGS_HELPER(TYPED_ARGS, ##__VA_ARGS__)) {              \
5169     BaseAllocator allocator(nullptr);                        \
5170     return allocator.name(ARGS_HELPER(ARGS, ##__VA_ARGS__)); \
5171   }
5172 #define MALLOC_FUNCS MALLOC_FUNCS_MALLOC_BASE
5173 #include "malloc_decls.h"
5174
5175 inline void* BaseAllocator::malloc(size_t aSize) {
5176   void* ret;
5177   arena_t* arena;
5178
5179   if (!malloc_init()) {
5180     ret = nullptr;
5181     goto RETURN;
5182   }
5183
5184   if (aSize == 0) {
5185     aSize = 1;
5186   }
5187   // If mArena is non-null, it must not be in the first page.
5188   MOZ_DIAGNOSTIC_ASSERT_IF(mArena, (size_t)mArena >= gPageSize);
5189   arena = mArena ? mArena : choose_arena(aSize);
5190   ret = arena->Malloc(aSize, /* aZero = */ false);
5191
5192 RETURN:
5193   if (!ret) {
5194     errno = ENOMEM;
5195   }
5196
5197   return ret;
5198 }
5199
5200 inline void* BaseAllocator::memalign(size_t aAlignment, size_t aSize) {
5201   MOZ_ASSERT(((aAlignment - 1) & aAlignment) == 0);
5202
5203   if (!malloc_init()) {
5204     return nullptr;
5205   }
5206
5207   if (aSize == 0) {
5208     aSize = 1;
5209   }
5210
5211   aAlignment = aAlignment < sizeof(void*) ? sizeof(void*) : aAlignment;
5212   arena_t* arena = mArena ? mArena : choose_arena(aSize);
5213   return arena->Palloc(aAlignment, aSize);
5214 }
5215
5216 inline void* BaseAllocator::calloc(size_t aNum, size_t aSize) {
5217   void* ret;
5218
5219   if (malloc_init()) {
5220     CheckedInt<size_t> checkedSize = CheckedInt<size_t>(aNum) * aSize;
5221     if (checkedSize.isValid()) {
5222       size_t allocSize = checkedSize.value();
5223       if (allocSize == 0) {
5224         allocSize = 1;
5225       }
5226       arena_t* arena = mArena ? mArena : choose_arena(allocSize);
5227       ret = arena->Malloc(allocSize, /* aZero = */ true);
5228     } else {
5229       ret = nullptr;
5230     }
5231   } else {
5232     ret = nullptr;
5233   }
5234
5235   if (!ret) {
5236     errno = ENOMEM;
5237   }
5238
5239   return ret;
5240 }
5241
5242 inline void* BaseAllocator::realloc(void* aPtr, size_t aSize) {
5243   void* ret;
5244
5245   if (aSize == 0) {
5246     aSize = 1;
5247   }
5248
5249   if (aPtr) {
5250     MOZ_RELEASE_ASSERT(malloc_initialized);
5251
5252     auto info = AllocInfo::Get(aPtr);
5253     auto arena = info.Arena();
5254     MOZ_RELEASE_ASSERT(!mArena || arena == mArena);
5255     ret = arena->Ralloc(aPtr, aSize, info.Size());
5256   } else {
5257     if (!malloc_init()) {
5258       ret = nullptr;
5259     } else {
5260       arena_t* arena = mArena ? mArena : choose_arena(aSize);
5261       ret = arena->Malloc(aSize, /* aZero = */ false);
5262     }
5263   }
5264
5265   if (!ret) {
5266     errno = ENOMEM;
5267   }
5268   return ret;
5269 }
5270
5271 inline void BaseAllocator::free(void* aPtr) {
5272   size_t offset;
5273
5274   // A version of idalloc that checks for nullptr pointer.
5275   offset = GetChunkOffsetForPtr(aPtr);
5276   if (offset != 0) {
5277     MOZ_RELEASE_ASSERT(malloc_initialized);
5278     arena_dalloc(aPtr, offset, mArena);
5279   } else if (aPtr) {
5280     MOZ_RELEASE_ASSERT(malloc_initialized);
5281     huge_dalloc(aPtr, mArena);
5282   }
5283 }
5284
5285 inline int MozJemalloc::posix_memalign(void** aMemPtr, size_t aAlignment,
5286                                        size_t aSize) {
5287   return AlignedAllocator<memalign>::posix_memalign(aMemPtr, aAlignment, aSize);
5288 }
5289
5290 inline void* MozJemalloc::aligned_alloc(size_t aAlignment, size_t aSize) {
5291   return AlignedAllocator<memalign>::aligned_alloc(aAlignment, aSize);
5292 }
5293
5294 inline void* MozJemalloc::valloc(size_t aSize) {
5295   return AlignedAllocator<memalign>::valloc(aSize);
5296 }
5297
5298 // End malloc(3)-compatible functions.
5299 // ***************************************************************************
5300 // Begin non-standard functions.
5301
5302 // This was added by Mozilla for use by SQLite.
5303 inline size_t MozJemalloc::malloc_good_size(size_t aSize) {
5304   if (aSize <= gMaxLargeClass) {
5305     // Small or large
5306     aSize = SizeClass(aSize).Size();
5307   } else {
5308     // Huge.  We use PAGE_CEILING to get psize, instead of using
5309     // CHUNK_CEILING to get csize.  This ensures that this
5310     // malloc_usable_size(malloc(n)) always matches
5311     // malloc_good_size(n).
5312     aSize = PAGE_CEILING(aSize);
5313   }
5314   return aSize;
5315 }
5316
5317 inline size_t MozJemalloc::malloc_usable_size(usable_ptr_t aPtr) {
5318   return AllocInfo::GetValidated(aPtr).Size();
5319 }
5320
5321 inline void MozJemalloc::jemalloc_stats_internal(
5322     jemalloc_stats_t* aStats, jemalloc_bin_stats_t* aBinStats) {
5323   size_t non_arena_mapped, chunk_header_size;
5324
5325   if (!aStats) {
5326     return;
5327   }
5328   if (!malloc_init()) {
5329     memset(aStats, 0, sizeof(*aStats));
5330     return;
5331   }
5332   if (aBinStats) {
5333     memset(aBinStats, 0, sizeof(jemalloc_bin_stats_t) * NUM_SMALL_CLASSES);
5334   }
5335
5336   // Gather runtime settings.
5337   aStats->opt_junk = opt_junk;
5338   aStats->opt_randomize_small = opt_randomize_small;
5339   aStats->opt_zero = opt_zero;
5340   aStats->quantum = kQuantum;
5341   aStats->quantum_max = kMaxQuantumClass;
5342   aStats->quantum_wide = kQuantumWide;
5343   aStats->quantum_wide_max = kMaxQuantumWideClass;
5344   aStats->subpage_max = gMaxSubPageClass;
5345   aStats->large_max = gMaxLargeClass;
5346   aStats->chunksize = kChunkSize;
5347   aStats->page_size = gPageSize;
5348   aStats->dirty_max = opt_dirty_max;
5349
5350   // Gather current memory usage statistics.
5351   aStats->narenas = 0;
5352   aStats->mapped = 0;
5353   aStats->allocated = 0;
5354   aStats->waste = 0;
5355   aStats->pages_dirty = 0;
5356   aStats->pages_fresh = 0;
5357   aStats->pages_madvised = 0;
5358   aStats->bookkeeping = 0;
5359   aStats->bin_unused = 0;
5360
5361   non_arena_mapped = 0;
5362
5363   // Get huge mapped/allocated.
5364   {
5365     MutexAutoLock lock(huge_mtx);
5366     non_arena_mapped += huge_mapped;
5367     aStats->allocated += huge_allocated;
5368     aStats->num_operations += huge_operations;
5369     MOZ_ASSERT(huge_mapped >= huge_allocated);
5370   }
5371
5372   // Get base mapped/allocated.
5373   {
5374     MutexAutoLock lock(base_mtx);
5375     non_arena_mapped += base_mapped;
5376     aStats->bookkeeping += base_committed;
5377     MOZ_ASSERT(base_mapped >= base_committed);
5378   }
5379
5380   gArenas.mLock.Lock();
5381
5382   // Stats can only read complete information if its run on the main thread.
5383   MOZ_ASSERT(gArenas.IsOnMainThreadWeak());
5384
5385   // Iterate over arenas.
5386   for (auto arena : gArenas.iter()) {
5387     // Cannot safely read stats for this arena and therefore stats would be
5388     // incomplete.
5389     MOZ_ASSERT(arena->mLock.SafeOnThisThread());
5390
5391     size_t arena_mapped, arena_allocated, arena_committed, arena_dirty,
5392         arena_fresh, arena_madvised, j, arena_unused, arena_headers;
5393
5394     arena_headers = 0;
5395     arena_unused = 0;
5396
5397     {
5398       MaybeMutexAutoLock lock(arena->mLock);
5399
5400       arena_mapped = arena->mStats.mapped;
5401
5402       // "committed" counts dirty and allocated memory.
5403       arena_committed = arena->mStats.committed << gPageSize2Pow;
5404
5405       arena_allocated =
5406           arena->mStats.allocated_small + arena->mStats.allocated_large;
5407
5408       arena_dirty = arena->mNumDirty << gPageSize2Pow;
5409       arena_fresh = arena->mNumFresh << gPageSize2Pow;
5410       arena_madvised = arena->mNumMAdvised << gPageSize2Pow;
5411
5412       aStats->num_operations += arena->mStats.operations;
5413
5414       for (j = 0; j < NUM_SMALL_CLASSES; j++) {
5415         arena_bin_t* bin = &arena->mBins[j];
5416         size_t bin_unused = 0;
5417         size_t num_non_full_runs = 0;
5418
5419         for (auto run : bin->mNonFullRuns) {
5420           bin_unused += run.mNumFree * bin->mSizeClass;
5421           num_non_full_runs++;
5422         }
5423
5424         arena_unused += bin_unused;
5425         arena_headers += bin->mNumRuns * bin->mRunFirstRegionOffset;
5426         if (aBinStats) {
5427           aBinStats[j].size = bin->mSizeClass;
5428           aBinStats[j].num_non_full_runs += num_non_full_runs;
5429           aBinStats[j].num_runs += bin->mNumRuns;
5430           aBinStats[j].bytes_unused += bin_unused;
5431           size_t bytes_per_run = static_cast<size_t>(bin->mRunSizePages)
5432                                  << gPageSize2Pow;
5433           aBinStats[j].bytes_total +=
5434               bin->mNumRuns * (bytes_per_run - bin->mRunFirstRegionOffset);
5435           aBinStats[j].bytes_per_run = bytes_per_run;
5436         }
5437       }
5438     }
5439
5440     MOZ_ASSERT(arena_mapped >= arena_committed);
5441     MOZ_ASSERT(arena_committed >= arena_allocated + arena_dirty);
5442
5443     aStats->mapped += arena_mapped;
5444     aStats->allocated += arena_allocated;
5445     aStats->pages_dirty += arena_dirty;
5446     aStats->pages_fresh += arena_fresh;
5447     aStats->pages_madvised += arena_madvised;
5448     // "waste" is committed memory that is neither dirty nor
5449     // allocated.  If you change this definition please update
5450     // memory/replace/logalloc/replay/Replay.cpp's jemalloc_stats calculation of
5451     // committed.
5452     MOZ_ASSERT(arena_committed >=
5453                (arena_allocated + arena_dirty + arena_unused + arena_headers));
5454     aStats->waste += arena_committed - arena_allocated - arena_dirty -
5455                      arena_unused - arena_headers;
5456     aStats->bin_unused += arena_unused;
5457     aStats->bookkeeping += arena_headers;
5458     aStats->narenas++;
5459   }
5460   gArenas.mLock.Unlock();
5461
5462   // Account for arena chunk headers in bookkeeping rather than waste.
5463   chunk_header_size =
5464       ((aStats->mapped / aStats->chunksize) * (gChunkHeaderNumPages - 1))
5465       << gPageSize2Pow;
5466
5467   aStats->mapped += non_arena_mapped;
5468   aStats->bookkeeping += chunk_header_size;
5469   aStats->waste -= chunk_header_size;
5470
5471   MOZ_ASSERT(aStats->mapped >= aStats->allocated + aStats->waste +
5472                                    aStats->pages_dirty + aStats->bookkeeping);
5473 }
5474
5475 inline void MozJemalloc::jemalloc_stats_lite(jemalloc_stats_lite_t* aStats) {
5476   if (!aStats) {
5477     return;
5478   }
5479   if (!malloc_init()) {
5480     memset(aStats, 0, sizeof(*aStats));
5481     return;
5482   }
5483
5484   aStats->allocated_bytes = 0;
5485   aStats->num_operations = 0;
5486
5487   // Get huge mapped/allocated.
5488   {
5489     MutexAutoLock lock(huge_mtx);
5490     aStats->allocated_bytes += huge_allocated;
5491     aStats->num_operations += huge_operations;
5492     MOZ_ASSERT(huge_mapped >= huge_allocated);
5493   }
5494
5495   {
5496     MutexAutoLock lock(gArenas.mLock);
5497     for (auto arena : gArenas.iter_all()) {
5498       // We don't need to lock the arena to access these fields.
5499       aStats->allocated_bytes += arena->AllocatedBytes();
5500       aStats->num_operations += arena->Operations();
5501     }
5502   }
5503 }
5504
5505 inline size_t MozJemalloc::jemalloc_stats_num_bins() {
5506   return NUM_SMALL_CLASSES;
5507 }
5508
5509 inline void MozJemalloc::jemalloc_set_main_thread() {
5510   MOZ_ASSERT(malloc_initialized);
5511   gArenas.SetMainThread();
5512 }
5513
5514 #ifdef MALLOC_DOUBLE_PURGE
5515
5516 // Explicitly remove all of this chunk's MADV_FREE'd pages from memory.
5517 static size_t hard_purge_chunk(arena_chunk_t* aChunk) {
5518   size_t total_npages = 0;
5519   // See similar logic in arena_t::Purge().
5520   for (size_t i = gChunkHeaderNumPages; i < gChunkNumPages; i++) {
5521     // Find all adjacent pages with CHUNK_MAP_MADVISED set.
5522     size_t npages;
5523     for (npages = 0; aChunk->map[i + npages].bits & CHUNK_MAP_MADVISED &&
5524                      i + npages < gChunkNumPages;
5525          npages++) {
5526       // Turn off the page's CHUNK_MAP_MADVISED bit and turn on its
5527       // CHUNK_MAP_FRESH bit.
5528       MOZ_DIAGNOSTIC_ASSERT(!(aChunk->map[i + npages].bits &
5529                               (CHUNK_MAP_FRESH | CHUNK_MAP_DECOMMITTED)));
5530       aChunk->map[i + npages].bits ^= (CHUNK_MAP_MADVISED | CHUNK_MAP_FRESH);
5531     }
5532
5533     // We could use mincore to find out which pages are actually
5534     // present, but it's not clear that's better.
5535     if (npages > 0) {
5536       pages_decommit(((char*)aChunk) + (i << gPageSize2Pow),
5537                      npages << gPageSize2Pow);
5538       Unused << pages_commit(((char*)aChunk) + (i << gPageSize2Pow),
5539                              npages << gPageSize2Pow);
5540     }
5541     total_npages += npages;
5542     i += npages;
5543   }
5544
5545   return total_npages;
5546 }
5547
5548 // Explicitly remove all of this arena's MADV_FREE'd pages from memory.
5549 void arena_t::HardPurge() {
5550   MaybeMutexAutoLock lock(mLock);
5551
5552   while (!mChunksMAdvised.isEmpty()) {
5553     arena_chunk_t* chunk = mChunksMAdvised.popFront();
5554     size_t npages = hard_purge_chunk(chunk);
5555     mNumMAdvised -= npages;
5556     mNumFresh += npages;
5557   }
5558 }
5559
5560 inline void MozJemalloc::jemalloc_purge_freed_pages() {
5561   if (malloc_initialized) {
5562     MutexAutoLock lock(gArenas.mLock);
5563     MOZ_ASSERT(gArenas.IsOnMainThreadWeak());
5564     for (auto arena : gArenas.iter()) {
5565       arena->HardPurge();
5566     }
5567   }
5568 }
5569
5570 #else  // !defined MALLOC_DOUBLE_PURGE
5571
5572 inline void MozJemalloc::jemalloc_purge_freed_pages() {
5573   // Do nothing.
5574 }
5575
5576 #endif  // defined MALLOC_DOUBLE_PURGE
5577
5578 inline void MozJemalloc::jemalloc_free_dirty_pages(void) {
5579   if (malloc_initialized) {
5580     MutexAutoLock lock(gArenas.mLock);
5581     MOZ_ASSERT(gArenas.IsOnMainThreadWeak());
5582     for (auto* arena : gArenas.iter()) {
5583       bool do_purge = true;
5584       while (do_purge) {
5585         do_purge = arena->Purge(true);
5586       }
5587     }
5588   }
5589 }
5590
5591 inline void MozJemalloc::jemalloc_free_excess_dirty_pages(void) {
5592   if (malloc_initialized) {
5593     MutexAutoLock lock(gArenas.mLock);
5594     for (auto* arena : gArenas.iter()) {
5595       bool do_purge = true;
5596       while (do_purge) {
5597         do_purge = arena->Purge();
5598       }
5599     }
5600   }
5601 }
5602
5603 inline arena_t* ArenaCollection::GetByIdInternal(Tree& aTree,
5604                                                  arena_id_t aArenaId) {
5605   // Use AlignedStorage2 to avoid running the arena_t constructor, while
5606   // we only need it as a placeholder for mId.
5607   mozilla::AlignedStorage2<arena_t> key;
5608   key.addr()->mId = aArenaId;
5609   return aTree.Search(key.addr());
5610 }
5611
5612 inline arena_t* ArenaCollection::GetById(arena_id_t aArenaId, bool aIsPrivate) {
5613   if (!malloc_initialized) {
5614     return nullptr;
5615   }
5616
5617   Tree* tree = nullptr;
5618   if (aIsPrivate) {
5619     if (ArenaIdIsMainThreadOnly(aArenaId)) {
5620       // The main thread only arenas support lock free access, so it's desirable
5621       // to do GetById without taking mLock either.
5622       //
5623       // Races can occur between writers and writers, or between writers and
5624       // readers.  The only writer is the main thread and it will never race
5625       // against itself so we can elude the lock when the main thread is
5626       // reading.
5627       MOZ_ASSERT(IsOnMainThread());
5628       MOZ_PUSH_IGNORE_THREAD_SAFETY
5629       arena_t* result = GetByIdInternal(mMainThreadArenas, aArenaId);
5630       MOZ_POP_THREAD_SAFETY
5631       MOZ_RELEASE_ASSERT(result);
5632       return result;
5633     }
5634     tree = &mPrivateArenas;
5635   } else {
5636     tree = &mArenas;
5637   }
5638
5639   MutexAutoLock lock(mLock);
5640   arena_t* result = GetByIdInternal(*tree, aArenaId);
5641   MOZ_RELEASE_ASSERT(result);
5642   return result;
5643 }
5644
5645 inline arena_id_t MozJemalloc::moz_create_arena_with_params(
5646     arena_params_t* aParams) {
5647   if (malloc_init()) {
5648     arena_t* arena = gArenas.CreateArena(/* IsPrivate = */ true, aParams);
5649     return arena->mId;
5650   }
5651   return 0;
5652 }
5653
5654 inline void MozJemalloc::moz_dispose_arena(arena_id_t aArenaId) {
5655   arena_t* arena = gArenas.GetById(aArenaId, /* IsPrivate = */ true);
5656   MOZ_RELEASE_ASSERT(arena);
5657   gArenas.DisposeArena(arena);
5658 }
5659
5660 inline void MozJemalloc::moz_set_max_dirty_page_modifier(int32_t aModifier) {
5661   gArenas.SetDefaultMaxDirtyPageModifier(aModifier);
5662 }
5663
5664 inline void MozJemalloc::jemalloc_reset_small_alloc_randomization(
5665     bool aRandomizeSmall) {
5666   // When this process got forked by ForkServer then it inherited the existing
5667   // state of mozjemalloc. Specifically, parsing of MALLOC_OPTIONS has already
5668   // been done but it may not reflect anymore the current set of options after
5669   // the fork().
5670   //
5671   // Similar behavior is also present on Android where it is also required to
5672   // perform this step.
5673   //
5674   // Content process will have randomization on small malloc disabled via the
5675   // MALLOC_OPTIONS environment variable set by parent process, missing this
5676   // will lead to serious performance regressions because CPU prefetch will
5677   // break, cf bug 1912262.  However on forkserver-forked Content processes, the
5678   // environment is not yet reset when the postfork child handler is being
5679   // called.
5680   //
5681   // This API is here to allow those Content processes (spawned by ForkServer or
5682   // Android service) to notify jemalloc to turn off the randomization on small
5683   // allocations and perform the required reinitialization of already existing
5684   // arena's PRNG.  It is important to make sure that the PRNG state is properly
5685   // re-initialized otherwise child processes would share all the same state.
5686
5687   {
5688     AutoLock<StaticMutex> lock(gInitLock);
5689     opt_randomize_small = aRandomizeSmall;
5690   }
5691
5692   MutexAutoLock lock(gArenas.mLock);
5693   for (auto* arena : gArenas.iter()) {
5694     arena->ResetSmallAllocRandomization();
5695   }
5696 }
5697
5698 #define MALLOC_DECL(name, return_type, ...)                          \
5699   inline return_type MozJemalloc::moz_arena_##name(                  \
5700       arena_id_t aArenaId, ARGS_HELPER(TYPED_ARGS, ##__VA_ARGS__)) { \
5701     BaseAllocator allocator(                                         \
5702         gArenas.GetById(aArenaId, /* IsPrivate = */ true));          \
5703     return allocator.name(ARGS_HELPER(ARGS, ##__VA_ARGS__));         \
5704   }
5705 #define MALLOC_FUNCS MALLOC_FUNCS_MALLOC_BASE
5706 #include "malloc_decls.h"
5707
5708 // End non-standard functions.
5709 // ***************************************************************************
5710 #ifndef XP_WIN
5711 // Begin library-private functions, used by threading libraries for protection
5712 // of malloc during fork().  These functions are only called if the program is
5713 // running in threaded mode, so there is no need to check whether the program
5714 // is threaded here.
5715 //
5716 // Note that the only way to keep the main-thread-only arenas in a consistent
5717 // state for the child is if fork is called from the main thread only.  Or the
5718 // child must not use them, eg it should call exec().  We attempt to prevent the
5719 // child for accessing these arenas by refusing to re-initialise them.
5720 //
5721 // This is only accessed in the fork handlers while gArenas.mLock is held.
5722 static pthread_t gForkingThread;
5723
5724 #  ifdef XP_DARWIN
5725 // This is only accessed in the fork handlers while gArenas.mLock is held.
5726 static pid_t gForkingProcess;
5727 #  endif
5728
5729 FORK_HOOK
5730 void _malloc_prefork(void) MOZ_NO_THREAD_SAFETY_ANALYSIS {
5731   // Acquire all mutexes in a safe order.
5732   gArenas.mLock.Lock();
5733   gForkingThread = pthread_self();
5734 #  ifdef XP_DARWIN
5735   gForkingProcess = getpid();
5736 #  endif
5737
5738   for (auto arena : gArenas.iter()) {
5739     if (arena->mLock.LockIsEnabled()) {
5740       arena->mLock.Lock();
5741     }
5742   }
5743
5744   base_mtx.Lock();
5745
5746   huge_mtx.Lock();
5747 }
5748
5749 FORK_HOOK
5750 void _malloc_postfork_parent(void) MOZ_NO_THREAD_SAFETY_ANALYSIS {
5751   // Release all mutexes, now that fork() has completed.
5752   huge_mtx.Unlock();
5753
5754   base_mtx.Unlock();
5755
5756   for (auto arena : gArenas.iter()) {
5757     if (arena->mLock.LockIsEnabled()) {
5758       arena->mLock.Unlock();
5759     }
5760   }
5761
5762   gArenas.mLock.Unlock();
5763 }
5764
5765 FORK_HOOK
5766 void _malloc_postfork_child(void) {
5767   // Do this before iterating over the arenas.
5768   gArenas.ResetMainThread();
5769
5770   // Reinitialize all mutexes, now that fork() has completed.
5771   huge_mtx.Init();
5772
5773   base_mtx.Init();
5774
5775   MOZ_PUSH_IGNORE_THREAD_SAFETY
5776   for (auto arena : gArenas.iter()) {
5777     arena->mLock.Reinit(gForkingThread);
5778   }
5779   MOZ_POP_THREAD_SAFETY
5780
5781   gArenas.mLock.Init();
5782 }
5783
5784 #  ifdef XP_DARWIN
5785 FORK_HOOK
5786 void _malloc_postfork(void) {
5787   // On MacOS we need to check if this is running in the parent or child
5788   // process.
5789   bool is_in_parent = getpid() == gForkingProcess;
5790   gForkingProcess = 0;
5791   if (is_in_parent) {
5792     _malloc_postfork_parent();
5793   } else {
5794     _malloc_postfork_child();
5795   }
5796 }
5797 #  endif  // XP_DARWIN
5798 #endif    // ! XP_WIN
5799
5800 // End library-private functions.
5801 // ***************************************************************************
5802 #ifdef MOZ_REPLACE_MALLOC
5803 // Windows doesn't come with weak imports as they are possible with
5804 // LD_PRELOAD or DYLD_INSERT_LIBRARIES on Linux/OSX. On this platform,
5805 // the replacement functions are defined as variable pointers to the
5806 // function resolved with GetProcAddress() instead of weak definitions
5807 // of functions. On Android, the same needs to happen as well, because
5808 // the Android linker doesn't handle weak linking with non LD_PRELOADed
5809 // libraries, but LD_PRELOADing is not very convenient on Android, with
5810 // the zygote.
5811 #  ifdef XP_DARWIN
5812 #    define MOZ_REPLACE_WEAK __attribute__((weak_import))
5813 #  elif defined(XP_WIN) || defined(ANDROID)
5814 #    define MOZ_DYNAMIC_REPLACE_INIT
5815 #    define replace_init replace_init_decl
5816 #  elif defined(__GNUC__)
5817 #    define MOZ_REPLACE_WEAK __attribute__((weak))
5818 #  endif
5819
5820 #  include "replace_malloc.h"
5821
5822 #  define MALLOC_DECL(name, return_type, ...) CanonicalMalloc::name,
5823
5824 // The default malloc table, i.e. plain allocations. It never changes. It's
5825 // used by init(), and not used after that.
5826 static const malloc_table_t gDefaultMallocTable = {
5827 #  include "malloc_decls.h"
5828 };
5829
5830 // The malloc table installed by init(). It never changes from that point
5831 // onward. It will be the same as gDefaultMallocTable if no replace-malloc tool
5832 // is enabled at startup.
5833 static malloc_table_t gOriginalMallocTable = {
5834 #  include "malloc_decls.h"
5835 };
5836
5837 // The malloc table installed by jemalloc_replace_dynamic(). (Read the
5838 // comments above that function for more details.)
5839 static malloc_table_t gDynamicMallocTable = {
5840 #  include "malloc_decls.h"
5841 };
5842
5843 // This briefly points to gDefaultMallocTable at startup. After that, it points
5844 // to either gOriginalMallocTable or gDynamicMallocTable. It's atomic to avoid
5845 // races when switching between tables.
5846 static Atomic<malloc_table_t const*, mozilla::MemoryOrdering::Relaxed>
5847     gMallocTablePtr;
5848
5849 #  ifdef MOZ_DYNAMIC_REPLACE_INIT
5850 #    undef replace_init
5851 typedef decltype(replace_init_decl) replace_init_impl_t;
5852 static replace_init_impl_t* replace_init = nullptr;
5853 #  endif
5854
5855 #  ifdef XP_WIN
5856 typedef HMODULE replace_malloc_handle_t;
5857
5858 static replace_malloc_handle_t replace_malloc_handle() {
5859   wchar_t replace_malloc_lib[1024];
5860   if (GetEnvironmentVariableW(L"MOZ_REPLACE_MALLOC_LIB", replace_malloc_lib,
5861                               std::size(replace_malloc_lib)) > 0) {
5862     return LoadLibraryW(replace_malloc_lib);
5863   }
5864   return nullptr;
5865 }
5866
5867 #    define REPLACE_MALLOC_GET_INIT_FUNC(handle) \
5868       (replace_init_impl_t*)GetProcAddress(handle, "replace_init")
5869
5870 #  elif defined(ANDROID)
5871 #    include <dlfcn.h>
5872
5873 typedef void* replace_malloc_handle_t;
5874
5875 static replace_malloc_handle_t replace_malloc_handle() {
5876   const char* replace_malloc_lib = getenv("MOZ_REPLACE_MALLOC_LIB");
5877   if (replace_malloc_lib && *replace_malloc_lib) {
5878     return dlopen(replace_malloc_lib, RTLD_LAZY);
5879   }
5880   return nullptr;
5881 }
5882
5883 #    define REPLACE_MALLOC_GET_INIT_FUNC(handle) \
5884       (replace_init_impl_t*)dlsym(handle, "replace_init")
5885
5886 #  endif
5887
5888 static void replace_malloc_init_funcs(malloc_table_t*);
5889
5890 #  ifdef MOZ_REPLACE_MALLOC_STATIC
5891 extern "C" void logalloc_init(malloc_table_t*, ReplaceMallocBridge**);
5892
5893 extern "C" void dmd_init(malloc_table_t*, ReplaceMallocBridge**);
5894 #  endif
5895
5896 void phc_init(malloc_table_t*, ReplaceMallocBridge**);
5897
5898 bool Equals(const malloc_table_t& aTable1, const malloc_table_t& aTable2) {
5899   return memcmp(&aTable1, &aTable2, sizeof(malloc_table_t)) == 0;
5900 }
5901
5902 // Below is the malloc implementation overriding jemalloc and calling the
5903 // replacement functions if they exist.
5904 static ReplaceMallocBridge* gReplaceMallocBridge = nullptr;
5905 static void init() {
5906   malloc_table_t tempTable = gDefaultMallocTable;
5907
5908 #  ifdef MOZ_DYNAMIC_REPLACE_INIT
5909   replace_malloc_handle_t handle = replace_malloc_handle();
5910   if (handle) {
5911     replace_init = REPLACE_MALLOC_GET_INIT_FUNC(handle);
5912   }
5913 #  endif
5914
5915   // Set this *before* calling replace_init, otherwise if replace_init calls
5916   // malloc() we'll get an infinite loop.
5917   gMallocTablePtr = &gDefaultMallocTable;
5918
5919   // Pass in the default allocator table so replace functions can copy and use
5920   // it for their allocations. The replace_init() function should modify the
5921   // table if it wants to be active, otherwise leave it unmodified.
5922   if (replace_init) {
5923     replace_init(&tempTable, &gReplaceMallocBridge);
5924   }
5925 #  ifdef MOZ_REPLACE_MALLOC_STATIC
5926   if (Equals(tempTable, gDefaultMallocTable)) {
5927     logalloc_init(&tempTable, &gReplaceMallocBridge);
5928   }
5929 #    ifdef MOZ_DMD
5930   if (Equals(tempTable, gDefaultMallocTable)) {
5931     dmd_init(&tempTable, &gReplaceMallocBridge);
5932   }
5933 #    endif
5934 #  endif
5935   if (!Equals(tempTable, gDefaultMallocTable)) {
5936     replace_malloc_init_funcs(&tempTable);
5937   }
5938   gOriginalMallocTable = tempTable;
5939   gMallocTablePtr = &gOriginalMallocTable;
5940 }
5941
5942 // WARNING WARNING WARNING: this function should be used with extreme care. It
5943 // is not as general-purpose as it looks. It is currently used by
5944 // tools/profiler/core/memory_hooks.cpp for counting allocations and probably
5945 // should not be used for any other purpose.
5946 //
5947 // This function allows the original malloc table to be temporarily replaced by
5948 // a different malloc table. Or, if the argument is nullptr, it switches back to
5949 // the original malloc table.
5950 //
5951 // Limitations:
5952 //
5953 // - It is not threadsafe. If multiple threads pass it the same
5954 //   `replace_init_func` at the same time, there will be data races writing to
5955 //   the malloc_table_t within that function.
5956 //
5957 // - Only one replacement can be installed. No nesting is allowed.
5958 //
5959 // - The new malloc table must be able to free allocations made by the original
5960 //   malloc table, and upon removal the original malloc table must be able to
5961 //   free allocations made by the new malloc table. This means the new malloc
5962 //   table can only do simple things like recording extra information, while
5963 //   delegating actual allocation/free operations to the original malloc table.
5964 //
5965 MOZ_JEMALLOC_API void jemalloc_replace_dynamic(
5966     jemalloc_init_func replace_init_func) {
5967   if (replace_init_func) {
5968     malloc_table_t tempTable = gOriginalMallocTable;
5969     (*replace_init_func)(&tempTable, &gReplaceMallocBridge);
5970     if (!Equals(tempTable, gOriginalMallocTable)) {
5971       replace_malloc_init_funcs(&tempTable);
5972
5973       // Temporarily switch back to the original malloc table. In the
5974       // (supported) non-nested case, this is a no-op. But just in case this is
5975       // a (unsupported) nested call, it makes the overwriting of
5976       // gDynamicMallocTable less racy, because ongoing calls to malloc() and
5977       // friends won't go through gDynamicMallocTable.
5978       gMallocTablePtr = &gOriginalMallocTable;
5979
5980       gDynamicMallocTable = tempTable;
5981       gMallocTablePtr = &gDynamicMallocTable;
5982       // We assume that dynamic replaces don't occur close enough for a
5983       // thread to still have old copies of the table pointer when the 2nd
5984       // replace occurs.
5985     }
5986   } else {
5987     // Switch back to the original malloc table.
5988     gMallocTablePtr = &gOriginalMallocTable;
5989   }
5990 }
5991
5992 #  define MALLOC_DECL(name, return_type, ...)                           \
5993     inline return_type ReplaceMalloc::name(                             \
5994         ARGS_HELPER(TYPED_ARGS, ##__VA_ARGS__)) {                       \
5995       if (MOZ_UNLIKELY(!gMallocTablePtr)) {                             \
5996         init();                                                         \
5997       }                                                                 \
5998       return (*gMallocTablePtr).name(ARGS_HELPER(ARGS, ##__VA_ARGS__)); \
5999     }
6000 #  include "malloc_decls.h"
6001
6002 MOZ_JEMALLOC_API struct ReplaceMallocBridge* get_bridge(void) {
6003   if (MOZ_UNLIKELY(!gMallocTablePtr)) {
6004     init();
6005   }
6006   return gReplaceMallocBridge;
6007 }
6008
6009 // posix_memalign, aligned_alloc, memalign and valloc all implement some kind
6010 // of aligned memory allocation. For convenience, a replace-malloc library can
6011 // skip defining replace_posix_memalign, replace_aligned_alloc and
6012 // replace_valloc, and default implementations will be automatically derived
6013 // from replace_memalign.
6014 static void replace_malloc_init_funcs(malloc_table_t* table) {
6015   if (table->posix_memalign == CanonicalMalloc::posix_memalign &&
6016       table->memalign != CanonicalMalloc::memalign) {
6017     table->posix_memalign =
6018         AlignedAllocator<ReplaceMalloc::memalign>::posix_memalign;
6019   }
6020   if (table->aligned_alloc == CanonicalMalloc::aligned_alloc &&
6021       table->memalign != CanonicalMalloc::memalign) {
6022     table->aligned_alloc =
6023         AlignedAllocator<ReplaceMalloc::memalign>::aligned_alloc;
6024   }
6025   if (table->valloc == CanonicalMalloc::valloc &&
6026       table->memalign != CanonicalMalloc::memalign) {
6027     table->valloc = AlignedAllocator<ReplaceMalloc::memalign>::valloc;
6028   }
6029   if (table->moz_create_arena_with_params ==
6030           CanonicalMalloc::moz_create_arena_with_params &&
6031       table->malloc != CanonicalMalloc::malloc) {
6032 #  define MALLOC_DECL(name, ...) \
6033     table->name = DummyArenaAllocator<ReplaceMalloc>::name;
6034 #  define MALLOC_FUNCS MALLOC_FUNCS_ARENA_BASE
6035 #  include "malloc_decls.h"
6036   }
6037   if (table->moz_arena_malloc == CanonicalMalloc::moz_arena_malloc &&
6038       table->malloc != CanonicalMalloc::malloc) {
6039 #  define MALLOC_DECL(name, ...) \
6040     table->name = DummyArenaAllocator<ReplaceMalloc>::name;
6041 #  define MALLOC_FUNCS MALLOC_FUNCS_ARENA_ALLOC
6042 #  include "malloc_decls.h"
6043   }
6044 }
6045
6046 #endif  // MOZ_REPLACE_MALLOC
6047 // ***************************************************************************
6048 // Definition of all the _impl functions
6049 // GENERIC_MALLOC_DECL2_MINGW is only used for the MinGW build, and aliases
6050 // the malloc funcs (e.g. malloc) to the je_ versions. It does not generate
6051 // aliases for the other functions (jemalloc and arena functions).
6052 //
6053 // We do need aliases for the other mozglue.def-redirected functions though,
6054 // these are done at the bottom of mozmemory_wrap.cpp
6055 #define GENERIC_MALLOC_DECL2_MINGW(name, name_impl, return_type, ...) \
6056   return_type name(ARGS_HELPER(TYPED_ARGS, ##__VA_ARGS__))            \
6057       __attribute__((alias(MOZ_STRINGIFY(name_impl))));
6058
6059 #define GENERIC_MALLOC_DECL2(attributes, name, name_impl, return_type, ...)  \
6060   return_type name_impl(ARGS_HELPER(TYPED_ARGS, ##__VA_ARGS__)) attributes { \
6061     return DefaultMalloc::name(ARGS_HELPER(ARGS, ##__VA_ARGS__));            \
6062   }
6063
6064 #ifndef __MINGW32__
6065 #  define GENERIC_MALLOC_DECL(attributes, name, return_type, ...)    \
6066     GENERIC_MALLOC_DECL2(attributes, name, name##_impl, return_type, \
6067                          ##__VA_ARGS__)
6068 #else
6069 #  define GENERIC_MALLOC_DECL(attributes, name, return_type, ...)    \
6070     GENERIC_MALLOC_DECL2(attributes, name, name##_impl, return_type, \
6071                          ##__VA_ARGS__)                              \
6072     GENERIC_MALLOC_DECL2_MINGW(name, name##_impl, return_type, ##__VA_ARGS__)
6073 #endif
6074
6075 #define NOTHROW_MALLOC_DECL(...) \
6076   MOZ_MEMORY_API MACRO_CALL(GENERIC_MALLOC_DECL, (noexcept(true), __VA_ARGS__))
6077 #define MALLOC_DECL(...) \
6078   MOZ_MEMORY_API MACRO_CALL(GENERIC_MALLOC_DECL, (, __VA_ARGS__))
6079 #define MALLOC_FUNCS MALLOC_FUNCS_MALLOC
6080 #include "malloc_decls.h"
6081
6082 #undef GENERIC_MALLOC_DECL
6083 #define GENERIC_MALLOC_DECL(attributes, name, return_type, ...) \
6084   GENERIC_MALLOC_DECL2(attributes, name, name, return_type, ##__VA_ARGS__)
6085
6086 #define MALLOC_DECL(...) \
6087   MOZ_JEMALLOC_API MACRO_CALL(GENERIC_MALLOC_DECL, (, __VA_ARGS__))
6088 #define MALLOC_FUNCS (MALLOC_FUNCS_JEMALLOC | MALLOC_FUNCS_ARENA)
6089 #include "malloc_decls.h"
6090 // ***************************************************************************
6091
6092 #ifdef HAVE_DLFCN_H
6093 #  include <dlfcn.h>
6094 #endif
6095
6096 #if defined(__GLIBC__) && !defined(__UCLIBC__)
6097 // glibc provides the RTLD_DEEPBIND flag for dlopen which can make it possible
6098 // to inconsistently reference libc's malloc(3)-compatible functions
6099 // (bug 493541).
6100 //
6101 // These definitions interpose hooks in glibc.  The functions are actually
6102 // passed an extra argument for the caller return address, which will be
6103 // ignored.
6104
6105 extern "C" {
6106 MOZ_EXPORT void (*__free_hook)(void*) = free_impl;
6107 MOZ_EXPORT void* (*__malloc_hook)(size_t) = malloc_impl;
6108 MOZ_EXPORT void* (*__realloc_hook)(void*, size_t) = realloc_impl;
6109 MOZ_EXPORT void* (*__memalign_hook)(size_t, size_t) = memalign_impl;
6110 }
6111
6112 #elif defined(RTLD_DEEPBIND)
6113 // XXX On systems that support RTLD_GROUP or DF_1_GROUP, do their
6114 // implementations permit similar inconsistencies?  Should STV_SINGLETON
6115 // visibility be used for interposition where available?
6116 #  error \
6117       "Interposing malloc is unsafe on this system without libc malloc hooks."
6118 #endif
6119
6120 #ifdef XP_WIN
6121 MOZ_EXPORT void* _recalloc(void* aPtr, size_t aCount, size_t aSize) {
6122   size_t oldsize = aPtr ? AllocInfo::Get(aPtr).Size() : 0;
6123   CheckedInt<size_t> checkedSize = CheckedInt<size_t>(aCount) * aSize;
6124
6125   if (!checkedSize.isValid()) {
6126     return nullptr;
6127   }
6128
6129   size_t newsize = checkedSize.value();
6130
6131   // In order for all trailing bytes to be zeroed, the caller needs to
6132   // use calloc(), followed by recalloc().  However, the current calloc()
6133   // implementation only zeros the bytes requested, so if recalloc() is
6134   // to work 100% correctly, calloc() will need to change to zero
6135   // trailing bytes.
6136   aPtr = DefaultMalloc::realloc(aPtr, newsize);
6137   if (aPtr && oldsize < newsize) {
6138     memset((void*)((uintptr_t)aPtr + oldsize), 0, newsize - oldsize);
6139   }
6140
6141   return aPtr;
6142 }
6143
6144 // This impl of _expand doesn't ever actually expand or shrink blocks: it
6145 // simply replies that you may continue using a shrunk block.
6146 MOZ_EXPORT void* _expand(void* aPtr, size_t newsize) {
6147   if (AllocInfo::Get(aPtr).Size() >= newsize) {
6148     return aPtr;
6149   }
6150
6151   return nullptr;
6152 }
6153
6154 MOZ_EXPORT size_t _msize(void* aPtr) {
6155   return DefaultMalloc::malloc_usable_size(aPtr);
6156 }
6157 #endif
6158
6159 #ifdef MOZ_PHC
6160 // Compile PHC and mozjemalloc together so that PHC can inline mozjemalloc.
6161 #  include "PHC.cpp"
6162 #endif