lib/libc/stdlib/jemalloc.c

   1 /*      $NetBSD: jemalloc.c,v 1.21 2010/03/04 22:48:31 enami Exp $      */
   2
   3 /*-
   4  * Copyright (C) 2006,2007 Jason Evans <jasone@FreeBSD.org>.
   5  * All rights reserved.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  * 1. Redistributions of source code must retain the above copyright
  11  *    notice(s), this list of conditions and the following disclaimer as
  12  *    the first lines of this file unmodified other than the possible
  13  *    addition of one or more copyright notices.
  14  * 2. Redistributions in binary form must reproduce the above copyright
  15  *    notice(s), this list of conditions and the following disclaimer in
  16  *    the documentation and/or other materials provided with the
  17  *    distribution.
  18  *
  19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
  20  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
  23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  26  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  28  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  29  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30  *
  31  *******************************************************************************
  32  *
  33  * This allocator implementation is designed to provide scalable performance
  34  * for multi-threaded programs on multi-processor systems.  The following
  35  * features are included for this purpose:
  36  *
  37  *   + Multiple arenas are used if there are multiple CPUs, which reduces lock
  38  *     contention and cache sloshing.
  39  *
  40  *   + Cache line sharing between arenas is avoided for internal data
  41  *     structures.
  42  *
  43  *   + Memory is managed in chunks and runs (chunks can be split into runs),
  44  *     rather than as individual pages.  This provides a constant-time
  45  *     mechanism for associating allocations with particular arenas.
  46  *
  47  * Allocation requests are rounded up to the nearest size class, and no record
  48  * of the original request size is maintained.  Allocations are broken into
  49  * categories according to size class.  Assuming runtime defaults, 4 kB pages
  50  * and a 16 byte quantum, the size classes in each category are as follows:
  51  *
  52  *   |=====================================|
  53  *   | Category | Subcategory    |    Size |
  54  *   |=====================================|
  55  *   | Small    | Tiny           |       2 |
  56  *   |          |                |       4 |
  57  *   |          |                |       8 |
  58  *   |          |----------------+---------|
  59  *   |          | Quantum-spaced |      16 |
  60  *   |          |                |      32 |
  61  *   |          |                |      48 |
  62  *   |          |                |     ... |
  63  *   |          |                |     480 |
  64  *   |          |                |     496 |
  65  *   |          |                |     512 |
  66  *   |          |----------------+---------|
  67  *   |          | Sub-page       |    1 kB |
  68  *   |          |                |    2 kB |
  69  *   |=====================================|
  70  *   | Large                     |    4 kB |
  71  *   |                           |    8 kB |
  72  *   |                           |   12 kB |
  73  *   |                           |     ... |
  74  *   |                           | 1012 kB |
  75  *   |                           | 1016 kB |
  76  *   |                           | 1020 kB |
  77  *   |=====================================|
  78  *   | Huge                      |    1 MB |
  79  *   |                           |    2 MB |
  80  *   |                           |    3 MB |
  81  *   |                           |     ... |
  82  *   |=====================================|
  83  *
  84  * A different mechanism is used for each category:
  85  *
  86  *   Small : Each size class is segregated into its own set of runs.  Each run
  87  *           maintains a bitmap of which regions are free/allocated.
  88  *
  89  *   Large : Each allocation is backed by a dedicated run.  Metadata are stored
  90  *           in the associated arena chunk header maps.
  91  *
  92  *   Huge : Each allocation is backed by a dedicated contiguous set of chunks.
  93  *          Metadata are stored in a separate red-black tree.
  94  *
  95  *******************************************************************************
  96  */
  97
  98 /* LINTLIBRARY */
  99
 100 #ifdef __NetBSD__
 101 #  define xutrace(a, b)         utrace("malloc", (a), (b))
 102 #  define __DECONST(x, y)       ((x)__UNCONST(y))
 103 #  define NO_TLS
 104 #else
 105 #  define xutrace(a, b)         utrace((a), (b))
 106 #endif  /* __NetBSD__ */
 107
 108 /*
 109  * MALLOC_PRODUCTION disables assertions and statistics gathering.  It also
 110  * defaults the A and J runtime options to off.  These settings are appropriate
 111  * for production systems.
 112  */
 113 #define MALLOC_PRODUCTION
 114
 115 #ifndef MALLOC_PRODUCTION
 116 #  define MALLOC_DEBUG
 117 #endif
 118
 119 #include <sys/cdefs.h>
 120 /* __FBSDID("$FreeBSD: src/lib/libc/stdlib/malloc.c,v 1.147 2007/06/15 22:00:16 jasone Exp $"); */
 121 __RCSID("$NetBSD: jemalloc.c,v 1.21 2010/03/04 22:48:31 enami Exp $");
 122
 123 #ifdef __FreeBSD__
 124 #include "libc_private.h"
 125 #ifdef MALLOC_DEBUG
 126 #  define _LOCK_DEBUG
 127 #endif
 128 #include "spinlock.h"
 129 #endif
 130 #include "namespace.h"
 131 #include <sys/mman.h>
 132 #include <sys/param.h>
 133 #ifdef __FreeBSD__
 134 #include <sys/stddef.h>
 135 #endif
 136 #include <sys/time.h>
 137 #include <sys/types.h>
 138 #include <sys/sysctl.h>
 139 #include <sys/tree.h>
 140 #include <sys/uio.h>
 141 #include <sys/ktrace.h> /* Must come after several other sys/ includes. */
 142
 143 #ifdef __FreeBSD__
 144 #include <machine/atomic.h>
 145 #include <machine/cpufunc.h>
 146 #endif
 147 #include <machine/vmparam.h>
 148
 149 #include <errno.h>
 150 #include <limits.h>
 151 #include <pthread.h>
 152 #include <sched.h>
 153 #include <stdarg.h>
 154 #include <stdbool.h>
 155 #include <stdio.h>
 156 #include <stdint.h>
 157 #include <stdlib.h>
 158 #include <string.h>
 159 #include <strings.h>
 160 #include <unistd.h>
 161
 162 #ifdef __NetBSD__
 163 #  include <reentrant.h>
 164 #  include "extern.h"
 165
 166 #define STRERROR_R(a, b, c)     __strerror_r(a, b, c);
 167 /*
 168  * A non localized version of strerror, that avoids bringing in
 169  * stdio and the locale code. All the malloc messages are in English
 170  * so why bother?
 171  */
 172 static int
 173 __strerror_r(int e, char *s, size_t l)
 174 {
 175         int rval;
 176         size_t slen;
 177
 178         if (e >= 0 && e < sys_nerr) {
 179                 slen = strlcpy(s, sys_errlist[e], l);
 180                 rval = 0;
 181         } else {
 182                 slen = snprintf_ss(s, l, "Unknown error %u", e);
 183                 rval = EINVAL;
 184         }
 185         return slen >= l ? ERANGE : rval;
 186 }
 187 #endif
 188
 189 #ifdef __FreeBSD__
 190 #define STRERROR_R(a, b, c)     strerror_r(a, b, c);
 191 #include "un-namespace.h"
 192 #endif
 193
 194 /* MALLOC_STATS enables statistics calculation. */
 195 #ifndef MALLOC_PRODUCTION
 196 #  define MALLOC_STATS
 197 #endif
 198
 199 #ifdef MALLOC_DEBUG
 200 #  ifdef NDEBUG
 201 #    undef NDEBUG
 202 #  endif
 203 #else
 204 #  ifndef NDEBUG
 205 #    define NDEBUG
 206 #  endif
 207 #endif
 208 #include <assert.h>
 209
 210 #ifdef MALLOC_DEBUG
 211    /* Disable inlining to make debugging easier. */
 212 #  define inline
 213 #endif
 214
 215 /* Size of stack-allocated buffer passed to strerror_r(). */
 216 #define STRERROR_BUF            64
 217
 218 /* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */
 219 #ifdef __i386__
 220 #  define QUANTUM_2POW_MIN      4
 221 #  define SIZEOF_PTR_2POW       2
 222 #  define USE_BRK
 223 #endif
 224 #ifdef __ia64__
 225 #  define QUANTUM_2POW_MIN      4
 226 #  define SIZEOF_PTR_2POW       3
 227 #endif
 228 #ifdef __alpha__
 229 #  define QUANTUM_2POW_MIN      4
 230 #  define SIZEOF_PTR_2POW       3
 231 #  define NO_TLS
 232 #endif
 233 #ifdef __sparc64__
 234 #  define QUANTUM_2POW_MIN      4
 235 #  define SIZEOF_PTR_2POW       3
 236 #  define NO_TLS
 237 #endif
 238 #ifdef __amd64__
 239 #  define QUANTUM_2POW_MIN      4
 240 #  define SIZEOF_PTR_2POW       3
 241 #endif
 242 #ifdef __arm__
 243 #  define QUANTUM_2POW_MIN      3
 244 #  define SIZEOF_PTR_2POW       2
 245 #  define USE_BRK
 246 #  define NO_TLS
 247 #endif
 248 #ifdef __powerpc__
 249 #  define QUANTUM_2POW_MIN      4
 250 #  define SIZEOF_PTR_2POW       2
 251 #  define USE_BRK
 252 #endif
 253 #if defined(__sparc__) && !defined(__sparc64__)
 254 #  define QUANTUM_2POW_MIN      4
 255 #  define SIZEOF_PTR_2POW       2
 256 #  define USE_BRK
 257 #endif
 258 #ifdef __vax__
 259 #  define QUANTUM_2POW_MIN      4
 260 #  define SIZEOF_PTR_2POW       2
 261 #  define USE_BRK
 262 #endif
 263 #ifdef __sh__
 264 #  define QUANTUM_2POW_MIN      4
 265 #  define SIZEOF_PTR_2POW       2
 266 #  define USE_BRK
 267 #endif
 268 #ifdef __m68k__
 269 #  define QUANTUM_2POW_MIN      4
 270 #  define SIZEOF_PTR_2POW       2
 271 #  define USE_BRK
 272 #endif
 273 #ifdef __mips__
 274 #  define QUANTUM_2POW_MIN      4
 275 #  define SIZEOF_PTR_2POW       2
 276 #  define USE_BRK
 277 #endif
 278 #ifdef __hppa__
 279 #  define QUANTUM_2POW_MIN     4
 280 #  define SIZEOF_PTR_2POW      2
 281 #  define USE_BRK
 282 #endif
 283
 284 #define SIZEOF_PTR              (1 << SIZEOF_PTR_2POW)
 285
 286 /* sizeof(int) == (1 << SIZEOF_INT_2POW). */
 287 #ifndef SIZEOF_INT_2POW
 288 #  define SIZEOF_INT_2POW       2
 289 #endif
 290
 291 /* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
 292 #if (!defined(PIC) && !defined(NO_TLS))
 293 #  define NO_TLS
 294 #endif
 295
 296 /*
 297  * Size and alignment of memory chunks that are allocated by the OS's virtual
 298  * memory system.
 299  */
 300 #define CHUNK_2POW_DEFAULT      20
 301
 302 /*
 303  * Maximum size of L1 cache line.  This is used to avoid cache line aliasing,
 304  * so over-estimates are okay (up to a point), but under-estimates will
 305  * negatively affect performance.
 306  */
 307 #define CACHELINE_2POW          6
 308 #define CACHELINE               ((size_t)(1 << CACHELINE_2POW))
 309
 310 /* Smallest size class to support. */
 311 #define TINY_MIN_2POW           1
 312
 313 /*
 314  * Maximum size class that is a multiple of the quantum, but not (necessarily)
 315  * a power of 2.  Above this size, allocations are rounded up to the nearest
 316  * power of 2.
 317  */
 318 #define SMALL_MAX_2POW_DEFAULT  9
 319 #define SMALL_MAX_DEFAULT       (1 << SMALL_MAX_2POW_DEFAULT)
 320
 321 /*
 322  * Maximum desired run header overhead.  Runs are sized as small as possible
 323  * such that this setting is still honored, without violating other constraints.
 324  * The goal is to make runs as small as possible without exceeding a per run
 325  * external fragmentation threshold.
 326  *
 327  * Note that it is possible to set this low enough that it cannot be honored
 328  * for some/all object sizes, since there is one bit of header overhead per
 329  * object (plus a constant).  In such cases, this constraint is relaxed.
 330  *
 331  * RUN_MAX_OVRHD_RELAX specifies the maximum number of bits per region of
 332  * overhead for which RUN_MAX_OVRHD is relaxed.
 333  */
 334 #define RUN_MAX_OVRHD           0.015
 335 #define RUN_MAX_OVRHD_RELAX     1.5
 336
 337 /* Put a cap on small object run size.  This overrides RUN_MAX_OVRHD. */
 338 #define RUN_MAX_SMALL_2POW      15
 339 #define RUN_MAX_SMALL           (1 << RUN_MAX_SMALL_2POW)
 340
 341 /******************************************************************************/
 342
 343 #ifdef __FreeBSD__
 344 /*
 345  * Mutexes based on spinlocks.  We can't use normal pthread mutexes, because
 346  * they require malloc()ed memory.
 347  */
 348 typedef struct {
 349         spinlock_t      lock;
 350 } malloc_mutex_t;
 351
 352 /* Set to true once the allocator has been initialized. */
 353 static bool malloc_initialized = false;
 354
 355 /* Used to avoid initialization races. */
 356 static malloc_mutex_t init_lock = {_SPINLOCK_INITIALIZER};
 357 #else
 358 #define malloc_mutex_t  mutex_t
 359
 360 /* Set to true once the allocator has been initialized. */
 361 static bool malloc_initialized = false;
 362
 363 /* Used to avoid initialization races. */
 364 static mutex_t init_lock = MUTEX_INITIALIZER;
 365 #endif
 366
 367 /******************************************************************************/
 368 /*
 369  * Statistics data structures.
 370  */
 371
 372 #ifdef MALLOC_STATS
 373
 374 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
 375 struct malloc_bin_stats_s {
 376         /*
 377          * Number of allocation requests that corresponded to the size of this
 378          * bin.
 379          */
 380         uint64_t        nrequests;
 381
 382         /* Total number of runs created for this bin's size class. */
 383         uint64_t        nruns;
 384
 385         /*
 386          * Total number of runs reused by extracting them from the runs tree for
 387          * this bin's size class.
 388          */
 389         uint64_t        reruns;
 390
 391         /* High-water mark for this bin. */
 392         unsigned long   highruns;
 393
 394         /* Current number of runs in this bin. */
 395         unsigned long   curruns;
 396 };
 397
 398 typedef struct arena_stats_s arena_stats_t;
 399 struct arena_stats_s {
 400         /* Number of bytes currently mapped. */
 401         size_t          mapped;
 402
 403         /* Per-size-category statistics. */
 404         size_t          allocated_small;
 405         uint64_t        nmalloc_small;
 406         uint64_t        ndalloc_small;
 407
 408         size_t          allocated_large;
 409         uint64_t        nmalloc_large;
 410         uint64_t        ndalloc_large;
 411 };
 412
 413 typedef struct chunk_stats_s chunk_stats_t;
 414 struct chunk_stats_s {
 415         /* Number of chunks that were allocated. */
 416         uint64_t        nchunks;
 417
 418         /* High-water mark for number of chunks allocated. */
 419         unsigned long   highchunks;
 420
 421         /*
 422          * Current number of chunks allocated.  This value isn't maintained for
 423          * any other purpose, so keep track of it in order to be able to set
 424          * highchunks.
 425          */
 426         unsigned long   curchunks;
 427 };
 428
 429 #endif /* #ifdef MALLOC_STATS */
 430
 431 /******************************************************************************/
 432 /*
 433  * Chunk data structures.
 434  */
 435
 436 /* Tree of chunks. */
 437 typedef struct chunk_node_s chunk_node_t;
 438 struct chunk_node_s {
 439         /* Linkage for the chunk tree. */
 440         RB_ENTRY(chunk_node_s) link;
 441
 442         /*
 443          * Pointer to the chunk that this tree node is responsible for.  In some
 444          * (but certainly not all) cases, this data structure is placed at the
 445          * beginning of the corresponding chunk, so this field may point to this
 446          * node.
 447          */
 448         void    *chunk;
 449
 450         /* Total chunk size. */
 451         size_t  size;
 452 };
 453 typedef struct chunk_tree_s chunk_tree_t;
 454 RB_HEAD(chunk_tree_s, chunk_node_s);
 455
 456 /******************************************************************************/
 457 /*
 458  * Arena data structures.
 459  */
 460
 461 typedef struct arena_s arena_t;
 462 typedef struct arena_bin_s arena_bin_t;
 463
 464 typedef struct arena_chunk_map_s arena_chunk_map_t;
 465 struct arena_chunk_map_s {
 466         /* Number of pages in run. */
 467         uint32_t        npages;
 468         /*
 469          * Position within run.  For a free run, this is POS_FREE for the first
 470          * and last pages.  The POS_FREE special value makes it possible to
 471          * quickly coalesce free runs.
 472          *
 473          * This is the limiting factor for chunksize; there can be at most 2^31
 474          * pages in a run.
 475          */
 476 #define POS_FREE ((uint32_t)0xffffffffU)
 477         uint32_t        pos;
 478 };
 479
 480 /* Arena chunk header. */
 481 typedef struct arena_chunk_s arena_chunk_t;
 482 struct arena_chunk_s {
 483         /* Arena that owns the chunk. */
 484         arena_t *arena;
 485
 486         /* Linkage for the arena's chunk tree. */
 487         RB_ENTRY(arena_chunk_s) link;
 488
 489         /*
 490          * Number of pages in use.  This is maintained in order to make
 491          * detection of empty chunks fast.
 492          */
 493         uint32_t pages_used;
 494
 495         /*
 496          * Every time a free run larger than this value is created/coalesced,
 497          * this value is increased.  The only way that the value decreases is if
 498          * arena_run_alloc() fails to find a free run as large as advertised by
 499          * this value.
 500          */
 501         uint32_t max_frun_npages;
 502
 503         /*
 504          * Every time a free run that starts at an earlier page than this value
 505          * is created/coalesced, this value is decreased.  It is reset in a
 506          * similar fashion to max_frun_npages.
 507          */
 508         uint32_t min_frun_ind;
 509
 510         /*
 511          * Map of pages within chunk that keeps track of free/large/small.  For
 512          * free runs, only the map entries for the first and last pages are
 513          * kept up to date, so that free runs can be quickly coalesced.
 514          */
 515         arena_chunk_map_t map[1]; /* Dynamically sized. */
 516 };
 517 typedef struct arena_chunk_tree_s arena_chunk_tree_t;
 518 RB_HEAD(arena_chunk_tree_s, arena_chunk_s);
 519
 520 typedef struct arena_run_s arena_run_t;
 521 struct arena_run_s {
 522         /* Linkage for run trees. */
 523         RB_ENTRY(arena_run_s) link;
 524
 525 #ifdef MALLOC_DEBUG
 526         uint32_t        magic;
 527 #  define ARENA_RUN_MAGIC 0x384adf93
 528 #endif
 529
 530         /* Bin this run is associated with. */
 531         arena_bin_t     *bin;
 532
 533         /* Index of first element that might have a free region. */
 534         unsigned        regs_minelm;
 535
 536         /* Number of free regions in run. */
 537         unsigned        nfree;
 538
 539         /* Bitmask of in-use regions (0: in use, 1: free). */
 540         unsigned        regs_mask[1]; /* Dynamically sized. */
 541 };
 542 typedef struct arena_run_tree_s arena_run_tree_t;
 543 RB_HEAD(arena_run_tree_s, arena_run_s);
 544
 545 struct arena_bin_s {
 546         /*
 547          * Current run being used to service allocations of this bin's size
 548          * class.
 549          */
 550         arena_run_t     *runcur;
 551
 552         /*
 553          * Tree of non-full runs.  This tree is used when looking for an
 554          * existing run when runcur is no longer usable.  We choose the
 555          * non-full run that is lowest in memory; this policy tends to keep
 556          * objects packed well, and it can also help reduce the number of
 557          * almost-empty chunks.
 558          */
 559         arena_run_tree_t runs;
 560
 561         /* Size of regions in a run for this bin's size class. */
 562         size_t          reg_size;
 563
 564         /* Total size of a run for this bin's size class. */
 565         size_t          run_size;
 566
 567         /* Total number of regions in a run for this bin's size class. */
 568         uint32_t        nregs;
 569
 570         /* Number of elements in a run's regs_mask for this bin's size class. */
 571         uint32_t        regs_mask_nelms;
 572
 573         /* Offset of first region in a run for this bin's size class. */
 574         uint32_t        reg0_offset;
 575
 576 #ifdef MALLOC_STATS
 577         /* Bin statistics. */
 578         malloc_bin_stats_t stats;
 579 #endif
 580 };
 581
 582 struct arena_s {
 583 #ifdef MALLOC_DEBUG
 584         uint32_t                magic;
 585 #  define ARENA_MAGIC 0x947d3d24
 586 #endif
 587
 588         /* All operations on this arena require that mtx be locked. */
 589         malloc_mutex_t          mtx;
 590
 591 #ifdef MALLOC_STATS
 592         arena_stats_t           stats;
 593 #endif
 594
 595         /*
 596          * Tree of chunks this arena manages.
 597          */
 598         arena_chunk_tree_t      chunks;
 599
 600         /*
 601          * In order to avoid rapid chunk allocation/deallocation when an arena
 602          * oscillates right on the cusp of needing a new chunk, cache the most
 603          * recently freed chunk.  This caching is disabled by opt_hint.
 604          *
 605          * There is one spare chunk per arena, rather than one spare total, in
 606          * order to avoid interactions between multiple threads that could make
 607          * a single spare inadequate.
 608          */
 609         arena_chunk_t *spare;
 610
 611         /*
 612          * bins is used to store rings of free regions of the following sizes,
 613          * assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
 614          *
 615          *   bins[i] | size |
 616          *   --------+------+
 617          *        0  |    2 |
 618          *        1  |    4 |
 619          *        2  |    8 |
 620          *   --------+------+
 621          *        3  |   16 |
 622          *        4  |   32 |
 623          *        5  |   48 |
 624          *        6  |   64 |
 625          *           :      :
 626          *           :      :
 627          *       33  |  496 |
 628          *       34  |  512 |
 629          *   --------+------+
 630          *       35  | 1024 |
 631          *       36  | 2048 |
 632          *   --------+------+
 633          */
 634         arena_bin_t             bins[1]; /* Dynamically sized. */
 635 };
 636
 637 /******************************************************************************/
 638 /*
 639  * Data.
 640  */
 641
 642 /* Number of CPUs. */
 643 static unsigned         ncpus;
 644
 645 /* VM page size. */
 646 static size_t           pagesize;
 647 static size_t           pagesize_mask;
 648 static int              pagesize_2pow;
 649
 650 /* Various bin-related settings. */
 651 static size_t           bin_maxclass; /* Max size class for bins. */
 652 static unsigned         ntbins; /* Number of (2^n)-spaced tiny bins. */
 653 static unsigned         nqbins; /* Number of quantum-spaced bins. */
 654 static unsigned         nsbins; /* Number of (2^n)-spaced sub-page bins. */
 655 static size_t           small_min;
 656 static size_t           small_max;
 657
 658 /* Various quantum-related settings. */
 659 static size_t           quantum;
 660 static size_t           quantum_mask; /* (quantum - 1). */
 661
 662 /* Various chunk-related settings. */
 663 static size_t           chunksize;
 664 static size_t           chunksize_mask; /* (chunksize - 1). */
 665 static int              chunksize_2pow;
 666 static unsigned         chunk_npages;
 667 static unsigned         arena_chunk_header_npages;
 668 static size_t           arena_maxclass; /* Max size class for arenas. */
 669
 670 /********/
 671 /*
 672  * Chunks.
 673  */
 674
 675 /* Protects chunk-related data structures. */
 676 static malloc_mutex_t   chunks_mtx;
 677
 678 /* Tree of chunks that are stand-alone huge allocations. */
 679 static chunk_tree_t     huge;
 680
 681 #ifdef USE_BRK
 682 /*
 683  * Try to use brk for chunk-size allocations, due to address space constraints.
 684  */
 685 /*
 686  * Protects sbrk() calls.  This must be separate from chunks_mtx, since
 687  * base_pages_alloc() also uses sbrk(), but cannot lock chunks_mtx (doing so
 688  * could cause recursive lock acquisition).
 689  */
 690 static malloc_mutex_t   brk_mtx;
 691 /* Result of first sbrk(0) call. */
 692 static void             *brk_base;
 693 /* Current end of brk, or ((void *)-1) if brk is exhausted. */
 694 static void             *brk_prev;
 695 /* Current upper limit on brk addresses. */
 696 static void             *brk_max;
 697 #endif
 698
 699 #ifdef MALLOC_STATS
 700 /* Huge allocation statistics. */
 701 static uint64_t         huge_nmalloc;
 702 static uint64_t         huge_ndalloc;
 703 static uint64_t         huge_nralloc;
 704 static size_t           huge_allocated;
 705 #endif
 706
 707 /*
 708  * Tree of chunks that were previously allocated.  This is used when allocating
 709  * chunks, in an attempt to re-use address space.
 710  */
 711 static chunk_tree_t     old_chunks;
 712
 713 /****************************/
 714 /*
 715  * base (internal allocation).
 716  */
 717
 718 /*
 719  * Current pages that are being used for internal memory allocations.  These
 720  * pages are carved up in cacheline-size quanta, so that there is no chance of
 721  * false cache line sharing.
 722  */
 723 static void             *base_pages;
 724 static void             *base_next_addr;
 725 static void             *base_past_addr; /* Addr immediately past base_pages. */
 726 static chunk_node_t     *base_chunk_nodes; /* LIFO cache of chunk nodes. */
 727 static malloc_mutex_t   base_mtx;
 728 #ifdef MALLOC_STATS
 729 static size_t           base_mapped;
 730 #endif
 731
 732 /********/
 733 /*
 734  * Arenas.
 735  */
 736
 737 /*
 738  * Arenas that are used to service external requests.  Not all elements of the
 739  * arenas array are necessarily used; arenas are created lazily as needed.
 740  */
 741 static arena_t          **arenas;
 742 static unsigned         narenas;
 743 static unsigned         next_arena;
 744 static malloc_mutex_t   arenas_mtx; /* Protects arenas initialization. */
 745
 746 #ifndef NO_TLS
 747 /*
 748  * Map of pthread_self() --> arenas[???], used for selecting an arena to use
 749  * for allocations.
 750  */
 751 static __thread arena_t *arenas_map;
 752 #define get_arenas_map()        (arenas_map)
 753 #define set_arenas_map(x)       (arenas_map = x)
 754 #else
 755 static thread_key_t arenas_map_key;
 756 #define get_arenas_map()        thr_getspecific(arenas_map_key)
 757 #define set_arenas_map(x)       thr_setspecific(arenas_map_key, x)
 758 #endif
 759
 760 #ifdef MALLOC_STATS
 761 /* Chunk statistics. */
 762 static chunk_stats_t    stats_chunks;
 763 #endif
 764
 765 /*******************************/
 766 /*
 767  * Runtime configuration options.
 768  */
 769 const char      *_malloc_options;
 770
 771 #ifndef MALLOC_PRODUCTION
 772 static bool     opt_abort = true;
 773 static bool     opt_junk = true;
 774 #else
 775 static bool     opt_abort = false;
 776 static bool     opt_junk = false;
 777 #endif
 778 static bool     opt_hint = false;
 779 static bool     opt_print_stats = false;
 780 static int      opt_quantum_2pow = QUANTUM_2POW_MIN;
 781 static int      opt_small_max_2pow = SMALL_MAX_2POW_DEFAULT;
 782 static int      opt_chunk_2pow = CHUNK_2POW_DEFAULT;
 783 static bool     opt_utrace = false;
 784 static bool     opt_sysv = false;
 785 static bool     opt_xmalloc = false;
 786 static bool     opt_zero = false;
 787 static int32_t  opt_narenas_lshift = 0;
 788
 789 typedef struct {
 790         void    *p;
 791         size_t  s;
 792         void    *r;
 793 } malloc_utrace_t;
 794
 795 #define UTRACE(a, b, c)                                                 \
 796         if (opt_utrace) {                                               \
 797                 malloc_utrace_t ut;                                     \
 798                 ut.p = a;                                               \
 799                 ut.s = b;                                               \
 800                 ut.r = c;                                               \
 801                 xutrace(&ut, sizeof(ut));                               \
 802         }
 803
 804 /******************************************************************************/
 805 /*
 806  * Begin function prototypes for non-inline static functions.
 807  */
 808
 809 static void     wrtmessage(const char *p1, const char *p2, const char *p3,
 810                 const char *p4);
 811 #ifdef MALLOC_STATS
 812 static void     malloc_printf(const char *format, ...);
 813 #endif
 814 static char     *umax2s(uintmax_t x, char *s);
 815 static bool     base_pages_alloc(size_t minsize);
 816 static void     *base_alloc(size_t size);
 817 static chunk_node_t *base_chunk_node_alloc(void);
 818 static void     base_chunk_node_dealloc(chunk_node_t *node);
 819 #ifdef MALLOC_STATS
 820 static void     stats_print(arena_t *arena);
 821 #endif
 822 static void     *pages_map(void *addr, size_t size);
 823 static void     *pages_map_align(void *addr, size_t size, int align);
 824 static void     pages_unmap(void *addr, size_t size);
 825 static void     *chunk_alloc(size_t size);
 826 static void     chunk_dealloc(void *chunk, size_t size);
 827 static void     arena_run_split(arena_t *arena, arena_run_t *run, size_t size);
 828 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
 829 static void     arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
 830 static arena_run_t *arena_run_alloc(arena_t *arena, size_t size);
 831 static void     arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size);
 832 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
 833 static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
 834 static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
 835 static void     *arena_malloc(arena_t *arena, size_t size);
 836 static void     *arena_palloc(arena_t *arena, size_t alignment, size_t size,
 837     size_t alloc_size);
 838 static size_t   arena_salloc(const void *ptr);
 839 static void     *arena_ralloc(void *ptr, size_t size, size_t oldsize);
 840 static void     arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
 841 static bool     arena_new(arena_t *arena);
 842 static arena_t  *arenas_extend(unsigned ind);
 843 static void     *huge_malloc(size_t size);
 844 static void     *huge_palloc(size_t alignment, size_t size);
 845 static void     *huge_ralloc(void *ptr, size_t size, size_t oldsize);
 846 static void     huge_dalloc(void *ptr);
 847 static void     *imalloc(size_t size);
 848 static void     *ipalloc(size_t alignment, size_t size);
 849 static void     *icalloc(size_t size);
 850 static size_t   isalloc(const void *ptr);
 851 static void     *iralloc(void *ptr, size_t size);
 852 static void     idalloc(void *ptr);
 853 static void     malloc_print_stats(void);
 854 static bool     malloc_init_hard(void);
 855
 856 /*
 857  * End function prototypes.
 858  */
 859 /******************************************************************************/
 860 /*
 861  * Begin mutex.
 862  */
 863
 864 #ifdef __NetBSD__
 865 #define malloc_mutex_init(m)    mutex_init(m, NULL)
 866 #define malloc_mutex_lock(m)    mutex_lock(m)
 867 #define malloc_mutex_unlock(m)  mutex_unlock(m)
 868 #else   /* __NetBSD__ */
 869 static inline void
 870 malloc_mutex_init(malloc_mutex_t *a_mutex)
 871 {
 872         static const spinlock_t lock = _SPINLOCK_INITIALIZER;
 873
 874         a_mutex->lock = lock;
 875 }
 876
 877 static inline void
 878 malloc_mutex_lock(malloc_mutex_t *a_mutex)
 879 {
 880
 881         if (__isthreaded)
 882                 _SPINLOCK(&a_mutex->lock);
 883 }
 884
 885 static inline void
 886 malloc_mutex_unlock(malloc_mutex_t *a_mutex)
 887 {
 888
 889         if (__isthreaded)
 890                 _SPINUNLOCK(&a_mutex->lock);
 891 }
 892 #endif  /* __NetBSD__ */
 893
 894 /*
 895  * End mutex.
 896  */
 897 /******************************************************************************/
 898 /*
 899  * Begin Utility functions/macros.
 900  */
 901
 902 /* Return the chunk address for allocation address a. */
 903 #define CHUNK_ADDR2BASE(a)                                              \
 904         ((void *)((uintptr_t)(a) & ~chunksize_mask))
 905
 906 /* Return the chunk offset of address a. */
 907 #define CHUNK_ADDR2OFFSET(a)                                            \
 908         ((size_t)((uintptr_t)(a) & chunksize_mask))
 909
 910 /* Return the smallest chunk multiple that is >= s. */
 911 #define CHUNK_CEILING(s)                                                \
 912         (((s) + chunksize_mask) & ~chunksize_mask)
 913
 914 /* Return the smallest cacheline multiple that is >= s. */
 915 #define CACHELINE_CEILING(s)                                            \
 916         (((s) + (CACHELINE - 1)) & ~(CACHELINE - 1))
 917
 918 /* Return the smallest quantum multiple that is >= a. */
 919 #define QUANTUM_CEILING(a)                                              \
 920         (((a) + quantum_mask) & ~quantum_mask)
 921
 922 /* Return the smallest pagesize multiple that is >= s. */
 923 #define PAGE_CEILING(s)                                                 \
 924         (((s) + pagesize_mask) & ~pagesize_mask)
 925
 926 /* Compute the smallest power of 2 that is >= x. */
 927 static inline size_t
 928 pow2_ceil(size_t x)
 929 {
 930
 931         x--;
 932         x |= x >> 1;
 933         x |= x >> 2;
 934         x |= x >> 4;
 935         x |= x >> 8;
 936         x |= x >> 16;
 937 #if (SIZEOF_PTR == 8)
 938         x |= x >> 32;
 939 #endif
 940         x++;
 941         return (x);
 942 }
 943
 944 static void
 945 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
 946 {
 947
 948         write(STDERR_FILENO, p1, strlen(p1));
 949         write(STDERR_FILENO, p2, strlen(p2));
 950         write(STDERR_FILENO, p3, strlen(p3));
 951         write(STDERR_FILENO, p4, strlen(p4));
 952 }
 953
 954 void    (*_malloc_message)(const char *p1, const char *p2, const char *p3,
 955             const char *p4) = wrtmessage;
 956
 957 #ifdef MALLOC_STATS
 958 /*
 959  * Print to stderr in such a way as to (hopefully) avoid memory allocation.
 960  */
 961 static void
 962 malloc_printf(const char *format, ...)
 963 {
 964         char buf[4096];
 965         va_list ap;
 966
 967         va_start(ap, format);
 968         vsnprintf(buf, sizeof(buf), format, ap);
 969         va_end(ap);
 970         _malloc_message(buf, "", "", "");
 971 }
 972 #endif
 973
 974 /*
 975  * We don't want to depend on vsnprintf() for production builds, since that can
 976  * cause unnecessary bloat for static binaries.  umax2s() provides minimal
 977  * integer printing functionality, so that malloc_printf() use can be limited to
 978  * MALLOC_STATS code.
 979  */
 980 #define UMAX2S_BUFSIZE  21
 981 static char *
 982 umax2s(uintmax_t x, char *s)
 983 {
 984         unsigned i;
 985
 986         /* Make sure UMAX2S_BUFSIZE is large enough. */
 987         /* LINTED */
 988         assert(sizeof(uintmax_t) <= 8);
 989
 990         i = UMAX2S_BUFSIZE - 1;
 991         s[i] = '\0';
 992         do {
 993                 i--;
 994                 s[i] = "0123456789"[(int)x % 10];
 995                 x /= (uintmax_t)10LL;
 996         } while (x > 0);
 997
 998         return (&s[i]);
 999 }
1000
1001 /******************************************************************************/
1002
1003 static bool
1004 base_pages_alloc(size_t minsize)
1005 {
1006         size_t csize = 0;
1007
1008 #ifdef USE_BRK
1009         /*
1010          * Do special brk allocation here, since base allocations don't need to
1011          * be chunk-aligned.
1012          */
1013         if (brk_prev != (void *)-1) {
1014                 void *brk_cur;
1015                 intptr_t incr;
1016
1017                 if (minsize != 0)
1018                         csize = CHUNK_CEILING(minsize);
1019
1020                 malloc_mutex_lock(&brk_mtx);
1021                 do {
1022                         /* Get the current end of brk. */
1023                         brk_cur = sbrk(0);
1024
1025                         /*
1026                          * Calculate how much padding is necessary to
1027                          * chunk-align the end of brk.  Don't worry about
1028                          * brk_cur not being chunk-aligned though.
1029                          */
1030                         incr = (intptr_t)chunksize
1031                             - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1032                         assert(incr >= 0);
1033                         if ((size_t)incr < minsize)
1034                                 incr += csize;
1035
1036                         brk_prev = sbrk(incr);
1037                         if (brk_prev == brk_cur) {
1038                                 /* Success. */
1039                                 malloc_mutex_unlock(&brk_mtx);
1040                                 base_pages = brk_cur;
1041                                 base_next_addr = base_pages;
1042                                 base_past_addr = (void *)((uintptr_t)base_pages
1043                                     + incr);
1044 #ifdef MALLOC_STATS
1045                                 base_mapped += incr;
1046 #endif
1047                                 return (false);
1048                         }
1049                 } while (brk_prev != (void *)-1);
1050                 malloc_mutex_unlock(&brk_mtx);
1051         }
1052         if (minsize == 0) {
1053                 /*
1054                  * Failure during initialization doesn't matter, so avoid
1055                  * falling through to the mmap-based page mapping code.
1056                  */
1057                 return (true);
1058         }
1059 #endif
1060         assert(minsize != 0);
1061         csize = PAGE_CEILING(minsize);
1062         base_pages = pages_map(NULL, csize);
1063         if (base_pages == NULL)
1064                 return (true);
1065         base_next_addr = base_pages;
1066         base_past_addr = (void *)((uintptr_t)base_pages + csize);
1067 #ifdef MALLOC_STATS
1068         base_mapped += csize;
1069 #endif
1070         return (false);
1071 }
1072
1073 static void *
1074 base_alloc(size_t size)
1075 {
1076         void *ret;
1077         size_t csize;
1078
1079         /* Round size up to nearest multiple of the cacheline size. */
1080         csize = CACHELINE_CEILING(size);
1081
1082         malloc_mutex_lock(&base_mtx);
1083
1084         /* Make sure there's enough space for the allocation. */
1085         if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
1086                 if (base_pages_alloc(csize)) {
1087                         ret = NULL;
1088                         goto RETURN;
1089                 }
1090         }
1091
1092         /* Allocate. */
1093         ret = base_next_addr;
1094         base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
1095
1096 RETURN:
1097         malloc_mutex_unlock(&base_mtx);
1098         return (ret);
1099 }
1100
1101 static chunk_node_t *
1102 base_chunk_node_alloc(void)
1103 {
1104         chunk_node_t *ret;
1105
1106         malloc_mutex_lock(&base_mtx);
1107         if (base_chunk_nodes != NULL) {
1108                 ret = base_chunk_nodes;
1109                 /* LINTED */
1110                 base_chunk_nodes = *(chunk_node_t **)ret;
1111                 malloc_mutex_unlock(&base_mtx);
1112         } else {
1113                 malloc_mutex_unlock(&base_mtx);
1114                 ret = (chunk_node_t *)base_alloc(sizeof(chunk_node_t));
1115         }
1116
1117         return (ret);
1118 }
1119
1120 static void
1121 base_chunk_node_dealloc(chunk_node_t *node)
1122 {
1123
1124         malloc_mutex_lock(&base_mtx);
1125         /* LINTED */
1126         *(chunk_node_t **)node = base_chunk_nodes;
1127         base_chunk_nodes = node;
1128         malloc_mutex_unlock(&base_mtx);
1129 }
1130
1131 /******************************************************************************/
1132
1133 #ifdef MALLOC_STATS
1134 static void
1135 stats_print(arena_t *arena)
1136 {
1137         unsigned i;
1138         int gap_start;
1139
1140         malloc_printf(
1141             "          allocated/mapped            nmalloc      ndalloc\n");
1142
1143         malloc_printf("small: %12zu %-12s %12llu %12llu\n",
1144             arena->stats.allocated_small, "", arena->stats.nmalloc_small,
1145             arena->stats.ndalloc_small);
1146         malloc_printf("large: %12zu %-12s %12llu %12llu\n",
1147             arena->stats.allocated_large, "", arena->stats.nmalloc_large,
1148             arena->stats.ndalloc_large);
1149         malloc_printf("total: %12zu/%-12zu %12llu %12llu\n",
1150             arena->stats.allocated_small + arena->stats.allocated_large,
1151             arena->stats.mapped,
1152             arena->stats.nmalloc_small + arena->stats.nmalloc_large,
1153             arena->stats.ndalloc_small + arena->stats.ndalloc_large);
1154
1155         malloc_printf("bins:     bin   size regs pgs  requests   newruns"
1156             "    reruns maxruns curruns\n");
1157         for (i = 0, gap_start = -1; i < ntbins + nqbins + nsbins; i++) {
1158                 if (arena->bins[i].stats.nrequests == 0) {
1159                         if (gap_start == -1)
1160                                 gap_start = i;
1161                 } else {
1162                         if (gap_start != -1) {
1163                                 if (i > gap_start + 1) {
1164                                         /* Gap of more than one size class. */
1165                                         malloc_printf("[%u..%u]\n",
1166                                             gap_start, i - 1);
1167                                 } else {
1168                                         /* Gap of one size class. */
1169                                         malloc_printf("[%u]\n", gap_start);
1170                                 }
1171                                 gap_start = -1;
1172                         }
1173                         malloc_printf(
1174                             "%13u %1s %4u %4u %3u %9llu %9llu"
1175                             " %9llu %7lu %7lu\n",
1176                             i,
1177                             i < ntbins ? "T" : i < ntbins + nqbins ? "Q" : "S",
1178                             arena->bins[i].reg_size,
1179                             arena->bins[i].nregs,
1180                             arena->bins[i].run_size >> pagesize_2pow,
1181                             arena->bins[i].stats.nrequests,
1182                             arena->bins[i].stats.nruns,
1183                             arena->bins[i].stats.reruns,
1184                             arena->bins[i].stats.highruns,
1185                             arena->bins[i].stats.curruns);
1186                 }
1187         }
1188         if (gap_start != -1) {
1189                 if (i > gap_start + 1) {
1190                         /* Gap of more than one size class. */
1191                         malloc_printf("[%u..%u]\n", gap_start, i - 1);
1192                 } else {
1193                         /* Gap of one size class. */
1194                         malloc_printf("[%u]\n", gap_start);
1195                 }
1196         }
1197 }
1198 #endif
1199
1200 /*
1201  * End Utility functions/macros.
1202  */
1203 /******************************************************************************/
1204 /*
1205  * Begin chunk management functions.
1206  */
1207
1208 #ifndef lint
1209 static inline int
1210 chunk_comp(chunk_node_t *a, chunk_node_t *b)
1211 {
1212
1213         assert(a != NULL);
1214         assert(b != NULL);
1215
1216         if ((uintptr_t)a->chunk < (uintptr_t)b->chunk)
1217                 return (-1);
1218         else if (a->chunk == b->chunk)
1219                 return (0);
1220         else
1221                 return (1);
1222 }
1223
1224 /* Generate red-black tree code for chunks. */
1225 RB_GENERATE_STATIC(chunk_tree_s, chunk_node_s, link, chunk_comp);
1226 #endif
1227
1228 static void *
1229 pages_map_align(void *addr, size_t size, int align)
1230 {
1231         void *ret;
1232
1233         /*
1234          * We don't use MAP_FIXED here, because it can cause the *replacement*
1235          * of existing mappings, and we only want to create new mappings.
1236          */
1237         ret = mmap(addr, size, PROT_READ | PROT_WRITE,
1238             MAP_PRIVATE | MAP_ANON | MAP_ALIGNED(align), -1, 0);
1239         assert(ret != NULL);
1240
1241         if (ret == MAP_FAILED)
1242                 ret = NULL;
1243         else if (addr != NULL && ret != addr) {
1244                 /*
1245                  * We succeeded in mapping memory, but not in the right place.
1246                  */
1247                 if (munmap(ret, size) == -1) {
1248                         char buf[STRERROR_BUF];
1249
1250                         STRERROR_R(errno, buf, sizeof(buf));
1251                         _malloc_message(getprogname(),
1252                             ": (malloc) Error in munmap(): ", buf, "\n");
1253                         if (opt_abort)
1254                                 abort();
1255                 }
1256                 ret = NULL;
1257         }
1258
1259         assert(ret == NULL || (addr == NULL && ret != addr)
1260             || (addr != NULL && ret == addr));
1261         return (ret);
1262 }
1263
1264 static void *
1265 pages_map(void *addr, size_t size)
1266 {
1267
1268         return pages_map_align(addr, size, 0);
1269 }
1270
1271 static void
1272 pages_unmap(void *addr, size_t size)
1273 {
1274
1275         if (munmap(addr, size) == -1) {
1276                 char buf[STRERROR_BUF];
1277
1278                 STRERROR_R(errno, buf, sizeof(buf));
1279                 _malloc_message(getprogname(),
1280                     ": (malloc) Error in munmap(): ", buf, "\n");
1281                 if (opt_abort)
1282                         abort();
1283         }
1284 }
1285
1286 static void *
1287 chunk_alloc(size_t size)
1288 {
1289         void *ret, *chunk;
1290         chunk_node_t *tchunk, *delchunk;
1291
1292         assert(size != 0);
1293         assert((size & chunksize_mask) == 0);
1294
1295         malloc_mutex_lock(&chunks_mtx);
1296
1297         if (size == chunksize) {
1298                 /*
1299                  * Check for address ranges that were previously chunks and try
1300                  * to use them.
1301                  */
1302
1303                 /* LINTED */
1304                 tchunk = RB_MIN(chunk_tree_s, &old_chunks);
1305                 while (tchunk != NULL) {
1306                         /* Found an address range.  Try to recycle it. */
1307
1308                         chunk = tchunk->chunk;
1309                         delchunk = tchunk;
1310                         /* LINTED */
1311                         tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
1312
1313                         /* Remove delchunk from the tree. */
1314                         /* LINTED */
1315                         RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
1316                         base_chunk_node_dealloc(delchunk);
1317
1318 #ifdef USE_BRK
1319                         if ((uintptr_t)chunk >= (uintptr_t)brk_base
1320                             && (uintptr_t)chunk < (uintptr_t)brk_max) {
1321                                 /* Re-use a previously freed brk chunk. */
1322                                 ret = chunk;
1323                                 goto RETURN;
1324                         }
1325 #endif
1326                         if ((ret = pages_map(chunk, size)) != NULL) {
1327                                 /* Success. */
1328                                 goto RETURN;
1329                         }
1330                 }
1331         }
1332
1333         /*
1334          * Try to over-allocate, but allow the OS to place the allocation
1335          * anywhere.  Beware of size_t wrap-around.
1336          */
1337         if (size + chunksize > size) {
1338                 if ((ret = pages_map_align(NULL, size, chunksize_2pow))
1339                     != NULL) {
1340                         goto RETURN;
1341                 }
1342         }
1343
1344 #ifdef USE_BRK
1345         /*
1346          * Try to create allocations in brk, in order to make full use of
1347          * limited address space.
1348          */
1349         if (brk_prev != (void *)-1) {
1350                 void *brk_cur;
1351                 intptr_t incr;
1352
1353                 /*
1354                  * The loop is necessary to recover from races with other
1355                  * threads that are using brk for something other than malloc.
1356                  */
1357                 malloc_mutex_lock(&brk_mtx);
1358                 do {
1359                         /* Get the current end of brk. */
1360                         brk_cur = sbrk(0);
1361
1362                         /*
1363                          * Calculate how much padding is necessary to
1364                          * chunk-align the end of brk.
1365                          */
1366                         incr = (intptr_t)size
1367                             - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1368                         if (incr == (intptr_t)size) {
1369                                 ret = brk_cur;
1370                         } else {
1371                                 ret = (void *)((intptr_t)brk_cur + incr);
1372                                 incr += size;
1373                         }
1374
1375                         brk_prev = sbrk(incr);
1376                         if (brk_prev == brk_cur) {
1377                                 /* Success. */
1378                                 malloc_mutex_unlock(&brk_mtx);
1379                                 brk_max = (void *)((intptr_t)ret + size);
1380                                 goto RETURN;
1381                         }
1382                 } while (brk_prev != (void *)-1);
1383                 malloc_mutex_unlock(&brk_mtx);
1384         }
1385 #endif
1386
1387         /* All strategies for allocation failed. */
1388         ret = NULL;
1389 RETURN:
1390         if (ret != NULL) {
1391                 chunk_node_t key;
1392                 /*
1393                  * Clean out any entries in old_chunks that overlap with the
1394                  * memory we just allocated.
1395                  */
1396                 key.chunk = ret;
1397                 /* LINTED */
1398                 tchunk = RB_NFIND(chunk_tree_s, &old_chunks, &key);
1399                 while (tchunk != NULL
1400                     && (uintptr_t)tchunk->chunk >= (uintptr_t)ret
1401                     && (uintptr_t)tchunk->chunk < (uintptr_t)ret + size) {
1402                         delchunk = tchunk;
1403                         /* LINTED */
1404                         tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
1405                         /* LINTED */
1406                         RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
1407                         base_chunk_node_dealloc(delchunk);
1408                 }
1409
1410         }
1411 #ifdef MALLOC_STATS
1412         if (ret != NULL) {
1413                 stats_chunks.nchunks += (size / chunksize);
1414                 stats_chunks.curchunks += (size / chunksize);
1415         }
1416         if (stats_chunks.curchunks > stats_chunks.highchunks)
1417                 stats_chunks.highchunks = stats_chunks.curchunks;
1418 #endif
1419         malloc_mutex_unlock(&chunks_mtx);
1420
1421         assert(CHUNK_ADDR2BASE(ret) == ret);
1422         return (ret);
1423 }
1424
1425 static void
1426 chunk_dealloc(void *chunk, size_t size)
1427 {
1428         chunk_node_t *node;
1429
1430         assert(chunk != NULL);
1431         assert(CHUNK_ADDR2BASE(chunk) == chunk);
1432         assert(size != 0);
1433         assert((size & chunksize_mask) == 0);
1434
1435         malloc_mutex_lock(&chunks_mtx);
1436
1437 #ifdef USE_BRK
1438         if ((uintptr_t)chunk >= (uintptr_t)brk_base
1439             && (uintptr_t)chunk < (uintptr_t)brk_max) {
1440                 void *brk_cur;
1441
1442                 malloc_mutex_lock(&brk_mtx);
1443                 /* Get the current end of brk. */
1444                 brk_cur = sbrk(0);
1445
1446                 /*
1447                  * Try to shrink the data segment if this chunk is at the end
1448                  * of the data segment.  The sbrk() call here is subject to a
1449                  * race condition with threads that use brk(2) or sbrk(2)
1450                  * directly, but the alternative would be to leak memory for
1451                  * the sake of poorly designed multi-threaded programs.
1452                  */
1453                 if (brk_cur == brk_max
1454                     && (void *)((uintptr_t)chunk + size) == brk_max
1455                     && sbrk(-(intptr_t)size) == brk_max) {
1456                         malloc_mutex_unlock(&brk_mtx);
1457                         if (brk_prev == brk_max) {
1458                                 /* Success. */
1459                                 brk_prev = (void *)((intptr_t)brk_max
1460                                     - (intptr_t)size);
1461                                 brk_max = brk_prev;
1462                         }
1463                 } else {
1464                         size_t offset;
1465
1466                         malloc_mutex_unlock(&brk_mtx);
1467                         madvise(chunk, size, MADV_FREE);
1468
1469                         /*
1470                          * Iteratively create records of each chunk-sized
1471                          * memory region that 'chunk' is comprised of, so that
1472                          * the address range can be recycled if memory usage
1473                          * increases later on.
1474                          */
1475                         for (offset = 0; offset < size; offset += chunksize) {
1476                                 node = base_chunk_node_alloc();
1477                                 if (node == NULL)
1478                                         break;
1479
1480                                 node->chunk = (void *)((uintptr_t)chunk
1481                                     + (uintptr_t)offset);
1482                                 node->size = chunksize;
1483                                 /* LINTED */
1484                                 RB_INSERT(chunk_tree_s, &old_chunks, node);
1485                         }
1486                 }
1487         } else {
1488 #endif
1489                 pages_unmap(chunk, size);
1490
1491                 /*
1492                  * Make a record of the chunk's address, so that the address
1493                  * range can be recycled if memory usage increases later on.
1494                  * Don't bother to create entries if (size > chunksize), since
1495                  * doing so could cause scalability issues for truly gargantuan
1496                  * objects (many gigabytes or larger).
1497                  */
1498                 if (size == chunksize) {
1499                         node = base_chunk_node_alloc();
1500                         if (node != NULL) {
1501                                 node->chunk = (void *)(uintptr_t)chunk;
1502                                 node->size = chunksize;
1503                                 /* LINTED */
1504                                 RB_INSERT(chunk_tree_s, &old_chunks, node);
1505                         }
1506                 }
1507 #ifdef USE_BRK
1508         }
1509 #endif
1510
1511 #ifdef MALLOC_STATS
1512         stats_chunks.curchunks -= (size / chunksize);
1513 #endif
1514         malloc_mutex_unlock(&chunks_mtx);
1515 }
1516
1517 /*
1518  * End chunk management functions.
1519  */
1520 /******************************************************************************/
1521 /*
1522  * Begin arena.
1523  */
1524
1525 /*
1526  * Choose an arena based on a per-thread and (optimistically) per-CPU value.
1527  *
1528  * We maintain at least one block of arenas.  Usually there are more.
1529  * The blocks are $ncpu arenas in size.  Whole blocks are 'hashed'
1530  * amongst threads.  To accomplish this, next_arena advances only in
1531  * ncpu steps.
1532  */
1533 static __noinline arena_t *
1534 choose_arena_hard(void)
1535 {
1536         unsigned i, curcpu;
1537         arena_t **map;
1538
1539         /* Initialize the current block of arenas and advance to next. */
1540         malloc_mutex_lock(&arenas_mtx);
1541         assert(next_arena % ncpus == 0);
1542         assert(narenas % ncpus == 0);
1543         map = &arenas[next_arena];
1544         set_arenas_map(map);
1545         for (i = 0; i < ncpus; i++) {
1546                 if (arenas[next_arena] == NULL)
1547                         arenas_extend(next_arena);
1548                 next_arena = (next_arena + 1) % narenas;
1549         }
1550         malloc_mutex_unlock(&arenas_mtx);
1551
1552         /*
1553          * If we were unable to allocate an arena above, then default to
1554          * the first arena, which is always present.
1555          */
1556         curcpu = thr_curcpu();
1557         if (map[curcpu] != NULL)
1558                 return map[curcpu];
1559         return arenas[0];
1560 }
1561
1562 static inline arena_t *
1563 choose_arena(void)
1564 {
1565         unsigned curcpu;
1566         arena_t **map;
1567
1568         map = get_arenas_map();
1569         curcpu = thr_curcpu();
1570         if (__predict_true(map != NULL && map[curcpu] != NULL))
1571                 return map[curcpu];
1572
1573         return choose_arena_hard();
1574 }
1575
1576 #ifndef lint
1577 static inline int
1578 arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
1579 {
1580
1581         assert(a != NULL);
1582         assert(b != NULL);
1583
1584         if ((uintptr_t)a < (uintptr_t)b)
1585                 return (-1);
1586         else if (a == b)
1587                 return (0);
1588         else
1589                 return (1);
1590 }
1591
1592 /* Generate red-black tree code for arena chunks. */
1593 RB_GENERATE_STATIC(arena_chunk_tree_s, arena_chunk_s, link, arena_chunk_comp);
1594 #endif
1595
1596 #ifndef lint
1597 static inline int
1598 arena_run_comp(arena_run_t *a, arena_run_t *b)
1599 {
1600
1601         assert(a != NULL);
1602         assert(b != NULL);
1603
1604         if ((uintptr_t)a < (uintptr_t)b)
1605                 return (-1);
1606         else if (a == b)
1607                 return (0);
1608         else
1609                 return (1);
1610 }
1611
1612 /* Generate red-black tree code for arena runs. */
1613 RB_GENERATE_STATIC(arena_run_tree_s, arena_run_s, link, arena_run_comp);
1614 #endif
1615
1616 static inline void *
1617 arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
1618 {
1619         void *ret;
1620         unsigned i, mask, bit, regind;
1621
1622         assert(run->magic == ARENA_RUN_MAGIC);
1623         assert(run->regs_minelm < bin->regs_mask_nelms);
1624
1625         /*
1626          * Move the first check outside the loop, so that run->regs_minelm can
1627          * be updated unconditionally, without the possibility of updating it
1628          * multiple times.
1629          */
1630         i = run->regs_minelm;
1631         mask = run->regs_mask[i];
1632         if (mask != 0) {
1633                 /* Usable allocation found. */
1634                 bit = ffs((int)mask) - 1;
1635
1636                 regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1637                 ret = (void *)(((uintptr_t)run) + bin->reg0_offset
1638                     + (bin->reg_size * regind));
1639
1640                 /* Clear bit. */
1641                 mask ^= (1 << bit);
1642                 run->regs_mask[i] = mask;
1643
1644                 return (ret);
1645         }
1646
1647         for (i++; i < bin->regs_mask_nelms; i++) {
1648                 mask = run->regs_mask[i];
1649                 if (mask != 0) {
1650                         /* Usable allocation found. */
1651                         bit = ffs((int)mask) - 1;
1652
1653                         regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1654                         ret = (void *)(((uintptr_t)run) + bin->reg0_offset
1655                             + (bin->reg_size * regind));
1656
1657                         /* Clear bit. */
1658                         mask ^= (1 << bit);
1659                         run->regs_mask[i] = mask;
1660
1661                         /*
1662                          * Make a note that nothing before this element
1663                          * contains a free region.
1664                          */
1665                         run->regs_minelm = i; /* Low payoff: + (mask == 0); */
1666
1667                         return (ret);
1668                 }
1669         }
1670         /* Not reached. */
1671         /* LINTED */
1672         assert(0);
1673         return (NULL);
1674 }
1675
1676 static inline void
1677 arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
1678 {
1679         /*
1680          * To divide by a number D that is not a power of two we multiply
1681          * by (2^21 / D) and then right shift by 21 positions.
1682          *
1683          *   X / D
1684          *
1685          * becomes
1686          *
1687          *   (X * size_invs[(D >> QUANTUM_2POW_MIN) - 3]) >> SIZE_INV_SHIFT
1688          */
1689 #define SIZE_INV_SHIFT 21
1690 #define SIZE_INV(s) (((1 << SIZE_INV_SHIFT) / (s << QUANTUM_2POW_MIN)) + 1)
1691         static const unsigned size_invs[] = {
1692             SIZE_INV(3),
1693             SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
1694             SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
1695             SIZE_INV(12),SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
1696             SIZE_INV(16),SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
1697             SIZE_INV(20),SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
1698             SIZE_INV(24),SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
1699             SIZE_INV(28),SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
1700 #if (QUANTUM_2POW_MIN < 4)
1701             ,
1702             SIZE_INV(32), SIZE_INV(33), SIZE_INV(34), SIZE_INV(35),
1703             SIZE_INV(36), SIZE_INV(37), SIZE_INV(38), SIZE_INV(39),
1704             SIZE_INV(40), SIZE_INV(41), SIZE_INV(42), SIZE_INV(43),
1705             SIZE_INV(44), SIZE_INV(45), SIZE_INV(46), SIZE_INV(47),
1706             SIZE_INV(48), SIZE_INV(49), SIZE_INV(50), SIZE_INV(51),
1707             SIZE_INV(52), SIZE_INV(53), SIZE_INV(54), SIZE_INV(55),
1708             SIZE_INV(56), SIZE_INV(57), SIZE_INV(58), SIZE_INV(59),
1709             SIZE_INV(60), SIZE_INV(61), SIZE_INV(62), SIZE_INV(63)
1710 #endif
1711         };
1712         unsigned diff, regind, elm, bit;
1713
1714         /* LINTED */
1715         assert(run->magic == ARENA_RUN_MAGIC);
1716         assert(((sizeof(size_invs)) / sizeof(unsigned)) + 3
1717             >= (SMALL_MAX_DEFAULT >> QUANTUM_2POW_MIN));
1718
1719         /*
1720          * Avoid doing division with a variable divisor if possible.  Using
1721          * actual division here can reduce allocator throughput by over 20%!
1722          */
1723         diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
1724         if ((size & (size - 1)) == 0) {
1725                 /*
1726                  * log2_table allows fast division of a power of two in the
1727                  * [1..128] range.
1728                  *
1729                  * (x / divisor) becomes (x >> log2_table[divisor - 1]).
1730                  */
1731                 static const unsigned char log2_table[] = {
1732                     0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4,
1733                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
1734                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1735                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
1736                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1737                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1738                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1739                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7
1740                 };
1741
1742                 if (size <= 128)
1743                         regind = (diff >> log2_table[size - 1]);
1744                 else if (size <= 32768)
1745                         regind = diff >> (8 + log2_table[(size >> 8) - 1]);
1746                 else {
1747                         /*
1748                          * The page size is too large for us to use the lookup
1749                          * table.  Use real division.
1750                          */
1751                         regind = (unsigned)(diff / size);
1752                 }
1753         } else if (size <= ((sizeof(size_invs) / sizeof(unsigned))
1754             << QUANTUM_2POW_MIN) + 2) {
1755                 regind = size_invs[(size >> QUANTUM_2POW_MIN) - 3] * diff;
1756                 regind >>= SIZE_INV_SHIFT;
1757         } else {
1758                 /*
1759                  * size_invs isn't large enough to handle this size class, so
1760                  * calculate regind using actual division.  This only happens
1761                  * if the user increases small_max via the 'S' runtime
1762                  * configuration option.
1763                  */
1764                 regind = (unsigned)(diff / size);
1765         };
1766         assert(diff == regind * size);
1767         assert(regind < bin->nregs);
1768
1769         elm = regind >> (SIZEOF_INT_2POW + 3);
1770         if (elm < run->regs_minelm)
1771                 run->regs_minelm = elm;
1772         bit = regind - (elm << (SIZEOF_INT_2POW + 3));
1773         assert((run->regs_mask[elm] & (1 << bit)) == 0);
1774         run->regs_mask[elm] |= (1 << bit);
1775 #undef SIZE_INV
1776 #undef SIZE_INV_SHIFT
1777 }
1778
1779 static void
1780 arena_run_split(arena_t *arena, arena_run_t *run, size_t size)
1781 {
1782         arena_chunk_t *chunk;
1783         unsigned run_ind, map_offset, total_pages, need_pages, rem_pages;
1784         unsigned i;
1785
1786         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1787         run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1788             >> pagesize_2pow);
1789         total_pages = chunk->map[run_ind].npages;
1790         need_pages = (unsigned)(size >> pagesize_2pow);
1791         assert(need_pages <= total_pages);
1792         rem_pages = total_pages - need_pages;
1793
1794         /* Split enough pages from the front of run to fit allocation size. */
1795         map_offset = run_ind;
1796         for (i = 0; i < need_pages; i++) {
1797                 chunk->map[map_offset + i].npages = need_pages;
1798                 chunk->map[map_offset + i].pos = i;
1799         }
1800
1801         /* Keep track of trailing unused pages for later use. */
1802         if (rem_pages > 0) {
1803                 /* Update map for trailing pages. */
1804                 map_offset += need_pages;
1805                 chunk->map[map_offset].npages = rem_pages;
1806                 chunk->map[map_offset].pos = POS_FREE;
1807                 chunk->map[map_offset + rem_pages - 1].npages = rem_pages;
1808                 chunk->map[map_offset + rem_pages - 1].pos = POS_FREE;
1809         }
1810
1811         chunk->pages_used += need_pages;
1812 }
1813
1814 static arena_chunk_t *
1815 arena_chunk_alloc(arena_t *arena)
1816 {
1817         arena_chunk_t *chunk;
1818
1819         if (arena->spare != NULL) {
1820                 chunk = arena->spare;
1821                 arena->spare = NULL;
1822
1823                 /* LINTED */
1824                 RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
1825         } else {
1826                 chunk = (arena_chunk_t *)chunk_alloc(chunksize);
1827                 if (chunk == NULL)
1828                         return (NULL);
1829 #ifdef MALLOC_STATS
1830                 arena->stats.mapped += chunksize;
1831 #endif
1832
1833                 chunk->arena = arena;
1834
1835                 /* LINTED */
1836                 RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
1837
1838                 /*
1839                  * Claim that no pages are in use, since the header is merely
1840                  * overhead.
1841                  */
1842                 chunk->pages_used = 0;
1843
1844                 chunk->max_frun_npages = chunk_npages -
1845                     arena_chunk_header_npages;
1846                 chunk->min_frun_ind = arena_chunk_header_npages;
1847
1848                 /*
1849                  * Initialize enough of the map to support one maximal free run.
1850                  */
1851                 chunk->map[arena_chunk_header_npages].npages = chunk_npages -
1852                     arena_chunk_header_npages;
1853                 chunk->map[arena_chunk_header_npages].pos = POS_FREE;
1854                 chunk->map[chunk_npages - 1].npages = chunk_npages -
1855                     arena_chunk_header_npages;
1856                 chunk->map[chunk_npages - 1].pos = POS_FREE;
1857         }
1858
1859         return (chunk);
1860 }
1861
1862 static void
1863 arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
1864 {
1865
1866         /*
1867          * Remove chunk from the chunk tree, regardless of whether this chunk
1868          * will be cached, so that the arena does not use it.
1869          */
1870         /* LINTED */
1871         RB_REMOVE(arena_chunk_tree_s, &chunk->arena->chunks, chunk);
1872
1873         if (opt_hint == false) {
1874                 if (arena->spare != NULL) {
1875                         chunk_dealloc((void *)arena->spare, chunksize);
1876 #ifdef MALLOC_STATS
1877                         arena->stats.mapped -= chunksize;
1878 #endif
1879                 }
1880                 arena->spare = chunk;
1881         } else {
1882                 assert(arena->spare == NULL);
1883                 chunk_dealloc((void *)chunk, chunksize);
1884 #ifdef MALLOC_STATS
1885                 arena->stats.mapped -= chunksize;
1886 #endif
1887         }
1888 }
1889
1890 static arena_run_t *
1891 arena_run_alloc(arena_t *arena, size_t size)
1892 {
1893         arena_chunk_t *chunk;
1894         arena_run_t *run;
1895         unsigned need_npages, limit_pages, compl_need_npages;
1896
1897         assert(size <= (chunksize - (arena_chunk_header_npages <<
1898             pagesize_2pow)));
1899         assert((size & pagesize_mask) == 0);
1900
1901         /*
1902          * Search through arena's chunks in address order for a free run that is
1903          * large enough.  Look for the first fit.
1904          */
1905         need_npages = (unsigned)(size >> pagesize_2pow);
1906         limit_pages = chunk_npages - arena_chunk_header_npages;
1907         compl_need_npages = limit_pages - need_npages;
1908         /* LINTED */
1909         RB_FOREACH(chunk, arena_chunk_tree_s, &arena->chunks) {
1910                 /*
1911                  * Avoid searching this chunk if there are not enough
1912                  * contiguous free pages for there to possibly be a large
1913                  * enough free run.
1914                  */
1915                 if (chunk->pages_used <= compl_need_npages &&
1916                     need_npages <= chunk->max_frun_npages) {
1917                         arena_chunk_map_t *mapelm;
1918                         unsigned i;
1919                         unsigned max_frun_npages = 0;
1920                         unsigned min_frun_ind = chunk_npages;
1921
1922                         assert(chunk->min_frun_ind >=
1923                             arena_chunk_header_npages);
1924                         for (i = chunk->min_frun_ind; i < chunk_npages;) {
1925                                 mapelm = &chunk->map[i];
1926                                 if (mapelm->pos == POS_FREE) {
1927                                         if (mapelm->npages >= need_npages) {
1928                                                 run = (arena_run_t *)
1929                                                     ((uintptr_t)chunk + (i <<
1930                                                     pagesize_2pow));
1931                                                 /* Update page map. */
1932                                                 arena_run_split(arena, run,
1933                                                     size);
1934                                                 return (run);
1935                                         }
1936                                         if (mapelm->npages >
1937                                             max_frun_npages) {
1938                                                 max_frun_npages =
1939                                                     mapelm->npages;
1940                                         }
1941                                         if (i < min_frun_ind) {
1942                                                 min_frun_ind = i;
1943                                                 if (i < chunk->min_frun_ind)
1944                                                         chunk->min_frun_ind = i;
1945                                         }
1946                                 }
1947                                 i += mapelm->npages;
1948                         }
1949                         /*
1950                          * Search failure.  Reset cached chunk->max_frun_npages.
1951                          * chunk->min_frun_ind was already reset above (if
1952                          * necessary).
1953                          */
1954                         chunk->max_frun_npages = max_frun_npages;
1955                 }
1956         }
1957
1958         /*
1959          * No usable runs.  Create a new chunk from which to allocate the run.
1960          */
1961         chunk = arena_chunk_alloc(arena);
1962         if (chunk == NULL)
1963                 return (NULL);
1964         run = (arena_run_t *)((uintptr_t)chunk + (arena_chunk_header_npages <<
1965             pagesize_2pow));
1966         /* Update page map. */
1967         arena_run_split(arena, run, size);
1968         return (run);
1969 }
1970
1971 static void
1972 arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size)
1973 {
1974         arena_chunk_t *chunk;
1975         unsigned run_ind, run_pages;
1976
1977         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1978
1979         run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1980             >> pagesize_2pow);
1981         assert(run_ind >= arena_chunk_header_npages);
1982         assert(run_ind < (chunksize >> pagesize_2pow));
1983         run_pages = (unsigned)(size >> pagesize_2pow);
1984         assert(run_pages == chunk->map[run_ind].npages);
1985
1986         /* Subtract pages from count of pages used in chunk. */
1987         chunk->pages_used -= run_pages;
1988
1989         /* Mark run as deallocated. */
1990         assert(chunk->map[run_ind].npages == run_pages);
1991         chunk->map[run_ind].pos = POS_FREE;
1992         assert(chunk->map[run_ind + run_pages - 1].npages == run_pages);
1993         chunk->map[run_ind + run_pages - 1].pos = POS_FREE;
1994
1995         /*
1996          * Tell the kernel that we don't need the data in this run, but only if
1997          * requested via runtime configuration.
1998          */
1999         if (opt_hint)
2000                 madvise(run, size, MADV_FREE);
2001
2002         /* Try to coalesce with neighboring runs. */
2003         if (run_ind > arena_chunk_header_npages &&
2004             chunk->map[run_ind - 1].pos == POS_FREE) {
2005                 unsigned prev_npages;
2006
2007                 /* Coalesce with previous run. */
2008                 prev_npages = chunk->map[run_ind - 1].npages;
2009                 run_ind -= prev_npages;
2010                 assert(chunk->map[run_ind].npages == prev_npages);
2011                 assert(chunk->map[run_ind].pos == POS_FREE);
2012                 run_pages += prev_npages;
2013
2014                 chunk->map[run_ind].npages = run_pages;
2015                 assert(chunk->map[run_ind].pos == POS_FREE);
2016                 chunk->map[run_ind + run_pages - 1].npages = run_pages;
2017                 assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2018         }
2019
2020         if (run_ind + run_pages < chunk_npages &&
2021             chunk->map[run_ind + run_pages].pos == POS_FREE) {
2022                 unsigned next_npages;
2023
2024                 /* Coalesce with next run. */
2025                 next_npages = chunk->map[run_ind + run_pages].npages;
2026                 run_pages += next_npages;
2027                 assert(chunk->map[run_ind + run_pages - 1].npages ==
2028                     next_npages);
2029                 assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2030
2031                 chunk->map[run_ind].npages = run_pages;
2032                 chunk->map[run_ind].pos = POS_FREE;
2033                 chunk->map[run_ind + run_pages - 1].npages = run_pages;
2034                 assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2035         }
2036
2037         if (chunk->map[run_ind].npages > chunk->max_frun_npages)
2038                 chunk->max_frun_npages = chunk->map[run_ind].npages;
2039         if (run_ind < chunk->min_frun_ind)
2040                 chunk->min_frun_ind = run_ind;
2041
2042         /* Deallocate chunk if it is now completely unused. */
2043         if (chunk->pages_used == 0)
2044                 arena_chunk_dealloc(arena, chunk);
2045 }
2046
2047 static arena_run_t *
2048 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
2049 {
2050         arena_run_t *run;
2051         unsigned i, remainder;
2052
2053         /* Look for a usable run. */
2054         /* LINTED */
2055         if ((run = RB_MIN(arena_run_tree_s, &bin->runs)) != NULL) {
2056                 /* run is guaranteed to have available space. */
2057                 /* LINTED */
2058                 RB_REMOVE(arena_run_tree_s, &bin->runs, run);
2059 #ifdef MALLOC_STATS
2060                 bin->stats.reruns++;
2061 #endif
2062                 return (run);
2063         }
2064         /* No existing runs have any space available. */
2065
2066         /* Allocate a new run. */
2067         run = arena_run_alloc(arena, bin->run_size);
2068         if (run == NULL)
2069                 return (NULL);
2070
2071         /* Initialize run internals. */
2072         run->bin = bin;
2073
2074         for (i = 0; i < bin->regs_mask_nelms; i++)
2075                 run->regs_mask[i] = UINT_MAX;
2076         remainder = bin->nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1);
2077         if (remainder != 0) {
2078                 /* The last element has spare bits that need to be unset. */
2079                 run->regs_mask[i] = (UINT_MAX >> ((1 << (SIZEOF_INT_2POW + 3))
2080                     - remainder));
2081         }
2082
2083         run->regs_minelm = 0;
2084
2085         run->nfree = bin->nregs;
2086 #ifdef MALLOC_DEBUG
2087         run->magic = ARENA_RUN_MAGIC;
2088 #endif
2089
2090 #ifdef MALLOC_STATS
2091         bin->stats.nruns++;
2092         bin->stats.curruns++;
2093         if (bin->stats.curruns > bin->stats.highruns)
2094                 bin->stats.highruns = bin->stats.curruns;
2095 #endif
2096         return (run);
2097 }
2098
2099 /* bin->runcur must have space available before this function is called. */
2100 static inline void *
2101 arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
2102 {
2103         void *ret;
2104
2105         assert(run->magic == ARENA_RUN_MAGIC);
2106         assert(run->nfree > 0);
2107
2108         ret = arena_run_reg_alloc(run, bin);
2109         assert(ret != NULL);
2110         run->nfree--;
2111
2112         return (ret);
2113 }
2114
2115 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
2116 static void *
2117 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
2118 {
2119
2120         bin->runcur = arena_bin_nonfull_run_get(arena, bin);
2121         if (bin->runcur == NULL)
2122                 return (NULL);
2123         assert(bin->runcur->magic == ARENA_RUN_MAGIC);
2124         assert(bin->runcur->nfree > 0);
2125
2126         return (arena_bin_malloc_easy(arena, bin, bin->runcur));
2127 }
2128
2129 /*
2130  * Calculate bin->run_size such that it meets the following constraints:
2131  *
2132  *   *) bin->run_size >= min_run_size
2133  *   *) bin->run_size <= arena_maxclass
2134  *   *) bin->run_size <= RUN_MAX_SMALL
2135  *   *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
2136  *
2137  * bin->nregs, bin->regs_mask_nelms, and bin->reg0_offset are
2138  * also calculated here, since these settings are all interdependent.
2139  */
2140 static size_t
2141 arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
2142 {
2143         size_t try_run_size, good_run_size;
2144         unsigned good_nregs, good_mask_nelms, good_reg0_offset;
2145         unsigned try_nregs, try_mask_nelms, try_reg0_offset;
2146         float max_ovrhd = RUN_MAX_OVRHD;
2147
2148         assert(min_run_size >= pagesize);
2149         assert(min_run_size <= arena_maxclass);
2150         assert(min_run_size <= RUN_MAX_SMALL);
2151
2152         /*
2153          * Calculate known-valid settings before entering the run_size
2154          * expansion loop, so that the first part of the loop always copies
2155          * valid settings.
2156          *
2157          * The do..while loop iteratively reduces the number of regions until
2158          * the run header and the regions no longer overlap.  A closed formula
2159          * would be quite messy, since there is an interdependency between the
2160          * header's mask length and the number of regions.
2161          */
2162         try_run_size = min_run_size;
2163         try_nregs = (unsigned)(((try_run_size - sizeof(arena_run_t)) /
2164             bin->reg_size) + 1); /* Counter-act the first line of the loop. */
2165         do {
2166                 try_nregs--;
2167                 try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
2168                     ((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ? 1 : 0);
2169                 try_reg0_offset = (unsigned)(try_run_size -
2170                     (try_nregs * bin->reg_size));
2171         } while (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1))
2172             > try_reg0_offset);
2173
2174         /* run_size expansion loop. */
2175         do {
2176                 /*
2177                  * Copy valid settings before trying more aggressive settings.
2178                  */
2179                 good_run_size = try_run_size;
2180                 good_nregs = try_nregs;
2181                 good_mask_nelms = try_mask_nelms;
2182                 good_reg0_offset = try_reg0_offset;
2183
2184                 /* Try more aggressive settings. */
2185                 try_run_size += pagesize;
2186                 try_nregs = (unsigned)(((try_run_size - sizeof(arena_run_t)) /
2187                     bin->reg_size) + 1); /* Counter-act try_nregs-- in loop. */
2188                 do {
2189                         try_nregs--;
2190                         try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
2191                             ((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ?
2192                             1 : 0);
2193                         try_reg0_offset = (unsigned)(try_run_size - (try_nregs *
2194                             bin->reg_size));
2195                 } while (sizeof(arena_run_t) + (sizeof(unsigned) *
2196                     (try_mask_nelms - 1)) > try_reg0_offset);
2197         } while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
2198             && max_ovrhd > RUN_MAX_OVRHD_RELAX / ((float)(bin->reg_size << 3))
2199             && ((float)(try_reg0_offset)) / ((float)(try_run_size)) >
2200             max_ovrhd);
2201
2202         assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1))
2203             <= good_reg0_offset);
2204         assert((good_mask_nelms << (SIZEOF_INT_2POW + 3)) >= good_nregs);
2205
2206         /* Copy final settings. */
2207         bin->run_size = good_run_size;
2208         bin->nregs = good_nregs;
2209         bin->regs_mask_nelms = good_mask_nelms;
2210         bin->reg0_offset = good_reg0_offset;
2211
2212         return (good_run_size);
2213 }
2214
2215 static void *
2216 arena_malloc(arena_t *arena, size_t size)
2217 {
2218         void *ret;
2219
2220         assert(arena != NULL);
2221         assert(arena->magic == ARENA_MAGIC);
2222         assert(size != 0);
2223         assert(QUANTUM_CEILING(size) <= arena_maxclass);
2224
2225         if (size <= bin_maxclass) {
2226                 arena_bin_t *bin;
2227                 arena_run_t *run;
2228
2229                 /* Small allocation. */
2230
2231                 if (size < small_min) {
2232                         /* Tiny. */
2233                         size = pow2_ceil(size);
2234                         bin = &arena->bins[ffs((int)(size >> (TINY_MIN_2POW +
2235                             1)))];
2236 #if (!defined(NDEBUG) || defined(MALLOC_STATS))
2237                         /*
2238                          * Bin calculation is always correct, but we may need
2239                          * to fix size for the purposes of assertions and/or
2240                          * stats accuracy.
2241                          */
2242                         if (size < (1 << TINY_MIN_2POW))
2243                                 size = (1 << TINY_MIN_2POW);
2244 #endif
2245                 } else if (size <= small_max) {
2246                         /* Quantum-spaced. */
2247                         size = QUANTUM_CEILING(size);
2248                         bin = &arena->bins[ntbins + (size >> opt_quantum_2pow)
2249                             - 1];
2250                 } else {
2251                         /* Sub-page. */
2252                         size = pow2_ceil(size);
2253                         bin = &arena->bins[ntbins + nqbins
2254                             + (ffs((int)(size >> opt_small_max_2pow)) - 2)];
2255                 }
2256                 assert(size == bin->reg_size);
2257
2258                 malloc_mutex_lock(&arena->mtx);
2259                 if ((run = bin->runcur) != NULL && run->nfree > 0)
2260                         ret = arena_bin_malloc_easy(arena, bin, run);
2261                 else
2262                         ret = arena_bin_malloc_hard(arena, bin);
2263
2264                 if (ret == NULL) {
2265                         malloc_mutex_unlock(&arena->mtx);
2266                         return (NULL);
2267                 }
2268
2269 #ifdef MALLOC_STATS
2270                 bin->stats.nrequests++;
2271                 arena->stats.nmalloc_small++;
2272                 arena->stats.allocated_small += size;
2273 #endif
2274         } else {
2275                 /* Large allocation. */
2276                 size = PAGE_CEILING(size);
2277                 malloc_mutex_lock(&arena->mtx);
2278                 ret = (void *)arena_run_alloc(arena, size);
2279                 if (ret == NULL) {
2280                         malloc_mutex_unlock(&arena->mtx);
2281                         return (NULL);
2282                 }
2283 #ifdef MALLOC_STATS
2284                 arena->stats.nmalloc_large++;
2285                 arena->stats.allocated_large += size;
2286 #endif
2287         }
2288
2289         malloc_mutex_unlock(&arena->mtx);
2290
2291         if (opt_junk)
2292                 memset(ret, 0xa5, size);
2293         else if (opt_zero)
2294                 memset(ret, 0, size);
2295         return (ret);
2296 }
2297
2298 static inline void
2299 arena_palloc_trim(arena_t *arena, arena_chunk_t *chunk, unsigned pageind,
2300     unsigned npages)
2301 {
2302         unsigned i;
2303
2304         assert(npages > 0);
2305
2306         /*
2307          * Modifiy the map such that arena_run_dalloc() sees the run as
2308          * separately allocated.
2309          */
2310         for (i = 0; i < npages; i++) {
2311                 chunk->map[pageind + i].npages = npages;
2312                 chunk->map[pageind + i].pos = i;
2313         }
2314         arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)chunk + (pageind <<
2315             pagesize_2pow)), npages << pagesize_2pow);
2316 }
2317
2318 /* Only handles large allocations that require more than page alignment. */
2319 static void *
2320 arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
2321 {
2322         void *ret;
2323         size_t offset;
2324         arena_chunk_t *chunk;
2325         unsigned pageind, i, npages;
2326
2327         assert((size & pagesize_mask) == 0);
2328         assert((alignment & pagesize_mask) == 0);
2329
2330         npages = (unsigned)(size >> pagesize_2pow);
2331
2332         malloc_mutex_lock(&arena->mtx);
2333         ret = (void *)arena_run_alloc(arena, alloc_size);
2334         if (ret == NULL) {
2335                 malloc_mutex_unlock(&arena->mtx);
2336                 return (NULL);
2337         }
2338
2339         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
2340
2341         offset = (uintptr_t)ret & (alignment - 1);
2342         assert((offset & pagesize_mask) == 0);
2343         assert(offset < alloc_size);
2344         if (offset == 0) {
2345                 pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk) >>
2346                     pagesize_2pow);
2347
2348                 /* Update the map for the run to be kept. */
2349                 for (i = 0; i < npages; i++) {
2350                         chunk->map[pageind + i].npages = npages;
2351                         assert(chunk->map[pageind + i].pos == i);
2352                 }
2353
2354                 /* Trim trailing space. */
2355                 arena_palloc_trim(arena, chunk, pageind + npages,
2356                     (unsigned)((alloc_size - size) >> pagesize_2pow));
2357         } else {
2358                 size_t leadsize, trailsize;
2359
2360                 leadsize = alignment - offset;
2361                 ret = (void *)((uintptr_t)ret + leadsize);
2362                 pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk) >>
2363                     pagesize_2pow);
2364
2365                 /* Update the map for the run to be kept. */
2366                 for (i = 0; i < npages; i++) {
2367                         chunk->map[pageind + i].npages = npages;
2368                         chunk->map[pageind + i].pos = i;
2369                 }
2370
2371                 /* Trim leading space. */
2372                 arena_palloc_trim(arena, chunk,
2373                     (unsigned)(pageind - (leadsize >> pagesize_2pow)),
2374                     (unsigned)(leadsize >> pagesize_2pow));
2375
2376                 trailsize = alloc_size - leadsize - size;
2377                 if (trailsize != 0) {
2378                         /* Trim trailing space. */
2379                         assert(trailsize < alloc_size);
2380                         arena_palloc_trim(arena, chunk, pageind + npages,
2381                             (unsigned)(trailsize >> pagesize_2pow));
2382                 }
2383         }
2384
2385 #ifdef MALLOC_STATS
2386         arena->stats.nmalloc_large++;
2387         arena->stats.allocated_large += size;
2388 #endif
2389         malloc_mutex_unlock(&arena->mtx);
2390
2391         if (opt_junk)
2392                 memset(ret, 0xa5, size);
2393         else if (opt_zero)
2394                 memset(ret, 0, size);
2395         return (ret);
2396 }
2397
2398 /* Return the size of the allocation pointed to by ptr. */
2399 static size_t
2400 arena_salloc(const void *ptr)
2401 {
2402         size_t ret;
2403         arena_chunk_t *chunk;
2404         arena_chunk_map_t *mapelm;
2405         unsigned pageind;
2406
2407         assert(ptr != NULL);
2408         assert(CHUNK_ADDR2BASE(ptr) != ptr);
2409
2410         /*
2411          * No arena data structures that we query here can change in a way that
2412          * affects this function, so we don't need to lock.
2413          */
2414         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2415         pageind = (unsigned)(((uintptr_t)ptr - (uintptr_t)chunk) >>
2416             pagesize_2pow);
2417         mapelm = &chunk->map[pageind];
2418         if (mapelm->pos != 0 || ptr != (char *)((uintptr_t)chunk) + (pageind <<
2419             pagesize_2pow)) {
2420                 arena_run_t *run;
2421
2422                 pageind -= mapelm->pos;
2423
2424                 run = (arena_run_t *)((uintptr_t)chunk + (pageind <<
2425                     pagesize_2pow));
2426                 assert(run->magic == ARENA_RUN_MAGIC);
2427                 ret = run->bin->reg_size;
2428         } else
2429                 ret = mapelm->npages << pagesize_2pow;
2430
2431         return (ret);
2432 }
2433
2434 static void *
2435 arena_ralloc(void *ptr, size_t size, size_t oldsize)
2436 {
2437         void *ret;
2438
2439         /* Avoid moving the allocation if the size class would not change. */
2440         if (size < small_min) {
2441                 if (oldsize < small_min &&
2442                     ffs((int)(pow2_ceil(size) >> (TINY_MIN_2POW + 1)))
2443                     == ffs((int)(pow2_ceil(oldsize) >> (TINY_MIN_2POW + 1))))
2444                         goto IN_PLACE;
2445         } else if (size <= small_max) {
2446                 if (oldsize >= small_min && oldsize <= small_max &&
2447                     (QUANTUM_CEILING(size) >> opt_quantum_2pow)
2448                     == (QUANTUM_CEILING(oldsize) >> opt_quantum_2pow))
2449                         goto IN_PLACE;
2450         } else {
2451                 /*
2452                  * We make no attempt to resize runs here, though it would be
2453                  * possible to do so.
2454                  */
2455                 if (oldsize > small_max && PAGE_CEILING(size) == oldsize)
2456                         goto IN_PLACE;
2457         }
2458
2459         /*
2460          * If we get here, then size and oldsize are different enough that we
2461          * need to use a different size class.  In that case, fall back to
2462          * allocating new space and copying.
2463          */
2464         ret = arena_malloc(choose_arena(), size);
2465         if (ret == NULL)
2466                 return (NULL);
2467
2468         /* Junk/zero-filling were already done by arena_malloc(). */
2469         if (size < oldsize)
2470                 memcpy(ret, ptr, size);
2471         else
2472                 memcpy(ret, ptr, oldsize);
2473         idalloc(ptr);
2474         return (ret);
2475 IN_PLACE:
2476         if (opt_junk && size < oldsize)
2477                 memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size);
2478         else if (opt_zero && size > oldsize)
2479                 memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize);
2480         return (ptr);
2481 }
2482
2483 static void
2484 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
2485 {
2486         unsigned pageind;
2487         arena_chunk_map_t *mapelm;
2488         size_t size;
2489
2490         assert(arena != NULL);
2491         assert(arena->magic == ARENA_MAGIC);
2492         assert(chunk->arena == arena);
2493         assert(ptr != NULL);
2494         assert(CHUNK_ADDR2BASE(ptr) != ptr);
2495
2496         pageind = (unsigned)(((uintptr_t)ptr - (uintptr_t)chunk) >>
2497             pagesize_2pow);
2498         mapelm = &chunk->map[pageind];
2499         if (mapelm->pos != 0 || ptr != (char *)((uintptr_t)chunk) + (pageind <<
2500             pagesize_2pow)) {
2501                 arena_run_t *run;
2502                 arena_bin_t *bin;
2503
2504                 /* Small allocation. */
2505
2506                 pageind -= mapelm->pos;
2507
2508                 run = (arena_run_t *)((uintptr_t)chunk + (pageind <<
2509                     pagesize_2pow));
2510                 assert(run->magic == ARENA_RUN_MAGIC);
2511                 bin = run->bin;
2512                 size = bin->reg_size;
2513
2514                 if (opt_junk)
2515                         memset(ptr, 0x5a, size);
2516
2517                 malloc_mutex_lock(&arena->mtx);
2518                 arena_run_reg_dalloc(run, bin, ptr, size);
2519                 run->nfree++;
2520
2521                 if (run->nfree == bin->nregs) {
2522                         /* Deallocate run. */
2523                         if (run == bin->runcur)
2524                                 bin->runcur = NULL;
2525                         else if (bin->nregs != 1) {
2526                                 /*
2527                                  * This block's conditional is necessary because
2528                                  * if the run only contains one region, then it
2529                                  * never gets inserted into the non-full runs
2530                                  * tree.
2531                                  */
2532                                 /* LINTED */
2533                                 RB_REMOVE(arena_run_tree_s, &bin->runs, run);
2534                         }
2535 #ifdef MALLOC_DEBUG
2536                         run->magic = 0;
2537 #endif
2538                         arena_run_dalloc(arena, run, bin->run_size);
2539 #ifdef MALLOC_STATS
2540                         bin->stats.curruns--;
2541 #endif
2542                 } else if (run->nfree == 1 && run != bin->runcur) {
2543                         /*
2544                          * Make sure that bin->runcur always refers to the
2545                          * lowest non-full run, if one exists.
2546                          */
2547                         if (bin->runcur == NULL)
2548                                 bin->runcur = run;
2549                         else if ((uintptr_t)run < (uintptr_t)bin->runcur) {
2550                                 /* Switch runcur. */
2551                                 if (bin->runcur->nfree > 0) {
2552                                         /* Insert runcur. */
2553                                         /* LINTED */
2554                                         RB_INSERT(arena_run_tree_s, &bin->runs,
2555                                             bin->runcur);
2556                                 }
2557                                 bin->runcur = run;
2558                         } else {
2559                                 /* LINTED */
2560                                 RB_INSERT(arena_run_tree_s, &bin->runs, run);
2561                         }
2562                 }
2563 #ifdef MALLOC_STATS
2564                 arena->stats.allocated_small -= size;
2565                 arena->stats.ndalloc_small++;
2566 #endif
2567         } else {
2568                 /* Large allocation. */
2569
2570                 size = mapelm->npages << pagesize_2pow;
2571                 assert((((uintptr_t)ptr) & pagesize_mask) == 0);
2572
2573                 if (opt_junk)
2574                         memset(ptr, 0x5a, size);
2575
2576                 malloc_mutex_lock(&arena->mtx);
2577                 arena_run_dalloc(arena, (arena_run_t *)ptr, size);
2578 #ifdef MALLOC_STATS
2579                 arena->stats.allocated_large -= size;
2580                 arena->stats.ndalloc_large++;
2581 #endif
2582         }
2583
2584         malloc_mutex_unlock(&arena->mtx);
2585 }
2586
2587 static bool
2588 arena_new(arena_t *arena)
2589 {
2590         unsigned i;
2591         arena_bin_t *bin;
2592         size_t prev_run_size;
2593
2594         malloc_mutex_init(&arena->mtx);
2595
2596 #ifdef MALLOC_STATS
2597         memset(&arena->stats, 0, sizeof(arena_stats_t));
2598 #endif
2599
2600         /* Initialize chunks. */
2601         RB_INIT(&arena->chunks);
2602         arena->spare = NULL;
2603
2604         /* Initialize bins. */
2605         prev_run_size = pagesize;
2606
2607         /* (2^n)-spaced tiny bins. */
2608         for (i = 0; i < ntbins; i++) {
2609                 bin = &arena->bins[i];
2610                 bin->runcur = NULL;
2611                 RB_INIT(&bin->runs);
2612
2613                 bin->reg_size = (1 << (TINY_MIN_2POW + i));
2614                 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2615
2616 #ifdef MALLOC_STATS
2617                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2618 #endif
2619         }
2620
2621         /* Quantum-spaced bins. */
2622         for (; i < ntbins + nqbins; i++) {
2623                 bin = &arena->bins[i];
2624                 bin->runcur = NULL;
2625                 RB_INIT(&bin->runs);
2626
2627                 bin->reg_size = quantum * (i - ntbins + 1);
2628 /*
2629                 pow2_size = pow2_ceil(quantum * (i - ntbins + 1));
2630 */
2631                 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2632
2633 #ifdef MALLOC_STATS
2634                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2635 #endif
2636         }
2637
2638         /* (2^n)-spaced sub-page bins. */
2639         for (; i < ntbins + nqbins + nsbins; i++) {
2640                 bin = &arena->bins[i];
2641                 bin->runcur = NULL;
2642                 RB_INIT(&bin->runs);
2643
2644                 bin->reg_size = (small_max << (i - (ntbins + nqbins) + 1));
2645
2646                 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2647
2648 #ifdef MALLOC_STATS
2649                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2650 #endif
2651         }
2652
2653 #ifdef MALLOC_DEBUG
2654         arena->magic = ARENA_MAGIC;
2655 #endif
2656
2657         return (false);
2658 }
2659
2660 /* Create a new arena and insert it into the arenas array at index ind. */
2661 static arena_t *
2662 arenas_extend(unsigned ind)
2663 {
2664         arena_t *ret;
2665
2666         /* Allocate enough space for trailing bins. */
2667         ret = (arena_t *)base_alloc(sizeof(arena_t)
2668             + (sizeof(arena_bin_t) * (ntbins + nqbins + nsbins - 1)));
2669         if (ret != NULL && arena_new(ret) == false) {
2670                 arenas[ind] = ret;
2671                 return (ret);
2672         }
2673         /* Only reached if there is an OOM error. */
2674
2675         /*
2676          * OOM here is quite inconvenient to propagate, since dealing with it
2677          * would require a check for failure in the fast path.  Instead, punt
2678          * by using arenas[0].  In practice, this is an extremely unlikely
2679          * failure.
2680          */
2681         _malloc_message(getprogname(),
2682             ": (malloc) Error initializing arena\n", "", "");
2683         if (opt_abort)
2684                 abort();
2685
2686         return (arenas[0]);
2687 }
2688
2689 /*
2690  * End arena.
2691  */
2692 /******************************************************************************/
2693 /*
2694  * Begin general internal functions.
2695  */
2696
2697 static void *
2698 huge_malloc(size_t size)
2699 {
2700         void *ret;
2701         size_t csize;
2702         chunk_node_t *node;
2703
2704         /* Allocate one or more contiguous chunks for this request. */
2705
2706         csize = CHUNK_CEILING(size);
2707         if (csize == 0) {
2708                 /* size is large enough to cause size_t wrap-around. */
2709                 return (NULL);
2710         }
2711
2712         /* Allocate a chunk node with which to track the chunk. */
2713         node = base_chunk_node_alloc();
2714         if (node == NULL)
2715                 return (NULL);
2716
2717         ret = chunk_alloc(csize);
2718         if (ret == NULL) {
2719                 base_chunk_node_dealloc(node);
2720                 return (NULL);
2721         }
2722
2723         /* Insert node into huge. */
2724         node->chunk = ret;
2725         node->size = csize;
2726
2727         malloc_mutex_lock(&chunks_mtx);
2728         RB_INSERT(chunk_tree_s, &huge, node);
2729 #ifdef MALLOC_STATS
2730         huge_nmalloc++;
2731         huge_allocated += csize;
2732 #endif
2733         malloc_mutex_unlock(&chunks_mtx);
2734
2735         if (opt_junk)
2736                 memset(ret, 0xa5, csize);
2737         else if (opt_zero)
2738                 memset(ret, 0, csize);
2739
2740         return (ret);
2741 }
2742
2743 /* Only handles large allocations that require more than chunk alignment. */
2744 static void *
2745 huge_palloc(size_t alignment, size_t size)
2746 {
2747         void *ret;
2748         size_t alloc_size, chunk_size, offset;
2749         chunk_node_t *node;
2750
2751         /*
2752          * This allocation requires alignment that is even larger than chunk
2753          * alignment.  This means that huge_malloc() isn't good enough.
2754          *
2755          * Allocate almost twice as many chunks as are demanded by the size or
2756          * alignment, in order to assure the alignment can be achieved, then
2757          * unmap leading and trailing chunks.
2758          */
2759         assert(alignment >= chunksize);
2760
2761         chunk_size = CHUNK_CEILING(size);
2762
2763         if (size >= alignment)
2764                 alloc_size = chunk_size + alignment - chunksize;
2765         else
2766                 alloc_size = (alignment << 1) - chunksize;
2767
2768         /* Allocate a chunk node with which to track the chunk. */
2769         node = base_chunk_node_alloc();
2770         if (node == NULL)
2771                 return (NULL);
2772
2773         ret = chunk_alloc(alloc_size);
2774         if (ret == NULL) {
2775                 base_chunk_node_dealloc(node);
2776                 return (NULL);
2777         }
2778
2779         offset = (uintptr_t)ret & (alignment - 1);
2780         assert((offset & chunksize_mask) == 0);
2781         assert(offset < alloc_size);
2782         if (offset == 0) {
2783                 /* Trim trailing space. */
2784                 chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
2785                     - chunk_size);
2786         } else {
2787                 size_t trailsize;
2788
2789                 /* Trim leading space. */
2790                 chunk_dealloc(ret, alignment - offset);
2791
2792                 ret = (void *)((uintptr_t)ret + (alignment - offset));
2793
2794                 trailsize = alloc_size - (alignment - offset) - chunk_size;
2795                 if (trailsize != 0) {
2796                     /* Trim trailing space. */
2797                     assert(trailsize < alloc_size);
2798                     chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
2799                         trailsize);
2800                 }
2801         }
2802
2803         /* Insert node into huge. */
2804         node->chunk = ret;
2805         node->size = chunk_size;
2806
2807         malloc_mutex_lock(&chunks_mtx);
2808         RB_INSERT(chunk_tree_s, &huge, node);
2809 #ifdef MALLOC_STATS
2810         huge_nmalloc++;
2811         huge_allocated += chunk_size;
2812 #endif
2813         malloc_mutex_unlock(&chunks_mtx);
2814
2815         if (opt_junk)
2816                 memset(ret, 0xa5, chunk_size);
2817         else if (opt_zero)
2818                 memset(ret, 0, chunk_size);
2819
2820         return (ret);
2821 }
2822
2823 static void *
2824 huge_ralloc(void *ptr, size_t size, size_t oldsize)
2825 {
2826         void *ret;
2827
2828         /* Avoid moving the allocation if the size class would not change. */
2829         if (oldsize > arena_maxclass &&
2830             CHUNK_CEILING(size) == CHUNK_CEILING(oldsize)) {
2831                 if (opt_junk && size < oldsize) {
2832                         memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize
2833                             - size);
2834                 } else if (opt_zero && size > oldsize) {
2835                         memset((void *)((uintptr_t)ptr + oldsize), 0, size
2836                             - oldsize);
2837                 }
2838                 return (ptr);
2839         }
2840
2841         if (CHUNK_ADDR2BASE(ptr) == ptr
2842 #ifdef USE_BRK
2843             && ((uintptr_t)ptr < (uintptr_t)brk_base
2844             || (uintptr_t)ptr >= (uintptr_t)brk_max)
2845 #endif
2846             ) {
2847                 chunk_node_t *node, key;
2848                 void *newptr;
2849                 size_t oldcsize;
2850                 size_t newcsize;
2851
2852                 newcsize = CHUNK_CEILING(size);
2853                 oldcsize = CHUNK_CEILING(oldsize);
2854                 assert(oldcsize != newcsize);
2855                 if (newcsize == 0) {
2856                         /* size_t wrap-around */
2857                         return (NULL);
2858                 }
2859
2860                 /*
2861                  * Remove the old region from the tree now.  If mremap()
2862                  * returns the region to the system, other thread may
2863                  * map it for same huge allocation and insert it to the
2864                  * tree before we acquire the mutex lock again.
2865                  */
2866                 malloc_mutex_lock(&chunks_mtx);
2867                 key.chunk = __DECONST(void *, ptr);
2868                 /* LINTED */
2869                 node = RB_FIND(chunk_tree_s, &huge, &key);
2870                 assert(node != NULL);
2871                 assert(node->chunk == ptr);
2872                 assert(node->size == oldcsize);
2873                 RB_REMOVE(chunk_tree_s, &huge, node);
2874                 malloc_mutex_unlock(&chunks_mtx);
2875
2876                 newptr = mremap(ptr, oldcsize, NULL, newcsize,
2877                     MAP_ALIGNED(chunksize_2pow));
2878                 if (newptr == MAP_FAILED) {
2879                         /* We still own the old region. */
2880                         malloc_mutex_lock(&chunks_mtx);
2881                         RB_INSERT(chunk_tree_s, &huge, node);
2882                         malloc_mutex_unlock(&chunks_mtx);
2883                 } else {
2884                         assert(CHUNK_ADDR2BASE(newptr) == newptr);
2885
2886                         /* Insert new or resized old region. */
2887                         malloc_mutex_lock(&chunks_mtx);
2888                         node->size = newcsize;
2889                         node->chunk = newptr;
2890                         RB_INSERT(chunk_tree_s, &huge, node);
2891 #ifdef MALLOC_STATS
2892                         huge_nralloc++;
2893                         huge_allocated += newcsize - oldcsize;
2894                         if (newcsize > oldcsize) {
2895                                 stats_chunks.curchunks +=
2896                                     (newcsize - oldcsize) / chunksize;
2897                                 if (stats_chunks.curchunks >
2898                                     stats_chunks.highchunks)
2899                                         stats_chunks.highchunks =
2900                                             stats_chunks.curchunks;
2901                         } else {
2902                                 stats_chunks.curchunks -=
2903                                     (oldcsize - newcsize) / chunksize;
2904                         }
2905 #endif
2906                         malloc_mutex_unlock(&chunks_mtx);
2907
2908                         if (opt_junk && size < oldsize) {
2909                                 memset((void *)((uintptr_t)newptr + size), 0x5a,
2910                                     newcsize - size);
2911                         } else if (opt_zero && size > oldsize) {
2912                                 memset((void *)((uintptr_t)newptr + oldsize), 0,
2913                                     size - oldsize);
2914                         }
2915                         return (newptr);
2916                 }
2917         }
2918
2919         /*
2920          * If we get here, then size and oldsize are different enough that we
2921          * need to use a different size class.  In that case, fall back to
2922          * allocating new space and copying.
2923          */
2924         ret = huge_malloc(size);
2925         if (ret == NULL)
2926                 return (NULL);
2927
2928         if (CHUNK_ADDR2BASE(ptr) == ptr) {
2929                 /* The old allocation is a chunk. */
2930                 if (size < oldsize)
2931                         memcpy(ret, ptr, size);
2932                 else
2933                         memcpy(ret, ptr, oldsize);
2934         } else {
2935                 /* The old allocation is a region. */
2936                 assert(oldsize < size);
2937                 memcpy(ret, ptr, oldsize);
2938         }
2939         idalloc(ptr);
2940         return (ret);
2941 }
2942
2943 static void
2944 huge_dalloc(void *ptr)
2945 {
2946         chunk_node_t key;
2947         chunk_node_t *node;
2948
2949         malloc_mutex_lock(&chunks_mtx);
2950
2951         /* Extract from tree of huge allocations. */
2952         key.chunk = ptr;
2953         /* LINTED */
2954         node = RB_FIND(chunk_tree_s, &huge, &key);
2955         assert(node != NULL);
2956         assert(node->chunk == ptr);
2957         /* LINTED */
2958         RB_REMOVE(chunk_tree_s, &huge, node);
2959
2960 #ifdef MALLOC_STATS
2961         huge_ndalloc++;
2962         huge_allocated -= node->size;
2963 #endif
2964
2965         malloc_mutex_unlock(&chunks_mtx);
2966
2967         /* Unmap chunk. */
2968 #ifdef USE_BRK
2969         if (opt_junk)
2970                 memset(node->chunk, 0x5a, node->size);
2971 #endif
2972         chunk_dealloc(node->chunk, node->size);
2973
2974         base_chunk_node_dealloc(node);
2975 }
2976
2977 static void *
2978 imalloc(size_t size)
2979 {
2980         void *ret;
2981
2982         assert(size != 0);
2983
2984         if (size <= arena_maxclass)
2985                 ret = arena_malloc(choose_arena(), size);
2986         else
2987                 ret = huge_malloc(size);
2988
2989         return (ret);
2990 }
2991
2992 static void *
2993 ipalloc(size_t alignment, size_t size)
2994 {
2995         void *ret;
2996         size_t ceil_size;
2997
2998         /*
2999          * Round size up to the nearest multiple of alignment.
3000          *
3001          * This done, we can take advantage of the fact that for each small
3002          * size class, every object is aligned at the smallest power of two
3003          * that is non-zero in the base two representation of the size.  For
3004          * example:
3005          *
3006          *   Size |   Base 2 | Minimum alignment
3007          *   -----+----------+------------------
3008          *     96 |  1100000 |  32
3009          *    144 | 10100000 |  32
3010          *    192 | 11000000 |  64
3011          *
3012          * Depending on runtime settings, it is possible that arena_malloc()
3013          * will further round up to a power of two, but that never causes
3014          * correctness issues.
3015          */
3016         ceil_size = (size + (alignment - 1)) & (-alignment);
3017         /*
3018          * (ceil_size < size) protects against the combination of maximal
3019          * alignment and size greater than maximal alignment.
3020          */
3021         if (ceil_size < size) {
3022                 /* size_t overflow. */
3023                 return (NULL);
3024         }
3025
3026         if (ceil_size <= pagesize || (alignment <= pagesize
3027             && ceil_size <= arena_maxclass))
3028                 ret = arena_malloc(choose_arena(), ceil_size);
3029         else {
3030                 size_t run_size;
3031
3032                 /*
3033                  * We can't achieve sub-page alignment, so round up alignment
3034                  * permanently; it makes later calculations simpler.
3035                  */
3036                 alignment = PAGE_CEILING(alignment);
3037                 ceil_size = PAGE_CEILING(size);
3038                 /*
3039                  * (ceil_size < size) protects against very large sizes within
3040                  * pagesize of SIZE_T_MAX.
3041                  *
3042                  * (ceil_size + alignment < ceil_size) protects against the
3043                  * combination of maximal alignment and ceil_size large enough
3044                  * to cause overflow.  This is similar to the first overflow
3045                  * check above, but it needs to be repeated due to the new
3046                  * ceil_size value, which may now be *equal* to maximal
3047                  * alignment, whereas before we only detected overflow if the
3048                  * original size was *greater* than maximal alignment.
3049                  */
3050                 if (ceil_size < size || ceil_size + alignment < ceil_size) {
3051                         /* size_t overflow. */
3052                         return (NULL);
3053                 }
3054
3055                 /*
3056                  * Calculate the size of the over-size run that arena_palloc()
3057                  * would need to allocate in order to guarantee the alignment.
3058                  */
3059                 if (ceil_size >= alignment)
3060                         run_size = ceil_size + alignment - pagesize;
3061                 else {
3062                         /*
3063                          * It is possible that (alignment << 1) will cause
3064                          * overflow, but it doesn't matter because we also
3065                          * subtract pagesize, which in the case of overflow
3066                          * leaves us with a very large run_size.  That causes
3067                          * the first conditional below to fail, which means
3068                          * that the bogus run_size value never gets used for
3069                          * anything important.
3070                          */
3071                         run_size = (alignment << 1) - pagesize;
3072                 }
3073
3074                 if (run_size <= arena_maxclass) {
3075                         ret = arena_palloc(choose_arena(), alignment, ceil_size,
3076                             run_size);
3077                 } else if (alignment <= chunksize)
3078                         ret = huge_malloc(ceil_size);
3079                 else
3080                         ret = huge_palloc(alignment, ceil_size);
3081         }
3082
3083         assert(((uintptr_t)ret & (alignment - 1)) == 0);
3084         return (ret);
3085 }
3086
3087 static void *
3088 icalloc(size_t size)
3089 {
3090         void *ret;
3091
3092         if (size <= arena_maxclass) {
3093                 ret = arena_malloc(choose_arena(), size);
3094                 if (ret == NULL)
3095                         return (NULL);
3096                 memset(ret, 0, size);
3097         } else {
3098                 /*
3099                  * The virtual memory system provides zero-filled pages, so
3100                  * there is no need to do so manually, unless opt_junk is
3101                  * enabled, in which case huge_malloc() fills huge allocations
3102                  * with junk.
3103                  */
3104                 ret = huge_malloc(size);
3105                 if (ret == NULL)
3106                         return (NULL);
3107
3108                 if (opt_junk)
3109                         memset(ret, 0, size);
3110 #ifdef USE_BRK
3111                 else if ((uintptr_t)ret >= (uintptr_t)brk_base
3112                     && (uintptr_t)ret < (uintptr_t)brk_max) {
3113                         /*
3114                          * This may be a re-used brk chunk.  Therefore, zero
3115                          * the memory.
3116                          */
3117                         memset(ret, 0, size);
3118                 }
3119 #endif
3120         }
3121
3122         return (ret);
3123 }
3124
3125 static size_t
3126 isalloc(const void *ptr)
3127 {
3128         size_t ret;
3129         arena_chunk_t *chunk;
3130
3131         assert(ptr != NULL);
3132
3133         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3134         if (chunk != ptr) {
3135                 /* Region. */
3136                 assert(chunk->arena->magic == ARENA_MAGIC);
3137
3138                 ret = arena_salloc(ptr);
3139         } else {
3140                 chunk_node_t *node, key;
3141
3142                 /* Chunk (huge allocation). */
3143
3144                 malloc_mutex_lock(&chunks_mtx);
3145
3146                 /* Extract from tree of huge allocations. */
3147                 key.chunk = __DECONST(void *, ptr);
3148                 /* LINTED */
3149                 node = RB_FIND(chunk_tree_s, &huge, &key);
3150                 assert(node != NULL);
3151
3152                 ret = node->size;
3153
3154                 malloc_mutex_unlock(&chunks_mtx);
3155         }
3156
3157         return (ret);
3158 }
3159
3160 static void *
3161 iralloc(void *ptr, size_t size)
3162 {
3163         void *ret;
3164         size_t oldsize;
3165
3166         assert(ptr != NULL);
3167         assert(size != 0);
3168
3169         oldsize = isalloc(ptr);
3170
3171         if (size <= arena_maxclass)
3172                 ret = arena_ralloc(ptr, size, oldsize);
3173         else
3174                 ret = huge_ralloc(ptr, size, oldsize);
3175
3176         return (ret);
3177 }
3178
3179 static void
3180 idalloc(void *ptr)
3181 {
3182         arena_chunk_t *chunk;
3183
3184         assert(ptr != NULL);
3185
3186         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3187         if (chunk != ptr) {
3188                 /* Region. */
3189                 arena_dalloc(chunk->arena, chunk, ptr);
3190         } else
3191                 huge_dalloc(ptr);
3192 }
3193
3194 static void
3195 malloc_print_stats(void)
3196 {
3197
3198         if (opt_print_stats) {
3199                 char s[UMAX2S_BUFSIZE];
3200                 _malloc_message("___ Begin malloc statistics ___\n", "", "",
3201                     "");
3202                 _malloc_message("Assertions ",
3203 #ifdef NDEBUG
3204                     "disabled",
3205 #else
3206                     "enabled",
3207 #endif
3208                     "\n", "");
3209                 _malloc_message("Boolean MALLOC_OPTIONS: ",
3210                     opt_abort ? "A" : "a",
3211                     opt_junk ? "J" : "j",
3212                     opt_hint ? "H" : "h");
3213                 _malloc_message(opt_utrace ? "PU" : "Pu",
3214                     opt_sysv ? "V" : "v",
3215                     opt_xmalloc ? "X" : "x",
3216                     opt_zero ? "Z\n" : "z\n");
3217
3218                 _malloc_message("CPUs: ", umax2s(ncpus, s), "\n", "");
3219                 _malloc_message("Max arenas: ", umax2s(narenas, s), "\n", "");
3220                 _malloc_message("Pointer size: ", umax2s(sizeof(void *), s),
3221                     "\n", "");
3222                 _malloc_message("Quantum size: ", umax2s(quantum, s), "\n", "");
3223                 _malloc_message("Max small size: ", umax2s(small_max, s), "\n",
3224                     "");
3225
3226                 _malloc_message("Chunk size: ", umax2s(chunksize, s), "", "");
3227                 _malloc_message(" (2^", umax2s(opt_chunk_2pow, s), ")\n", "");
3228
3229 #ifdef MALLOC_STATS
3230                 {
3231                         size_t allocated, mapped;
3232                         unsigned i;
3233                         arena_t *arena;
3234
3235                         /* Calculate and print allocated/mapped stats. */
3236
3237                         /* arenas. */
3238                         for (i = 0, allocated = 0; i < narenas; i++) {
3239                                 if (arenas[i] != NULL) {
3240                                         malloc_mutex_lock(&arenas[i]->mtx);
3241                                         allocated +=
3242                                             arenas[i]->stats.allocated_small;
3243                                         allocated +=
3244                                             arenas[i]->stats.allocated_large;
3245                                         malloc_mutex_unlock(&arenas[i]->mtx);
3246                                 }
3247                         }
3248
3249                         /* huge/base. */
3250                         malloc_mutex_lock(&chunks_mtx);
3251                         allocated += huge_allocated;
3252                         mapped = stats_chunks.curchunks * chunksize;
3253                         malloc_mutex_unlock(&chunks_mtx);
3254
3255                         malloc_mutex_lock(&base_mtx);
3256                         mapped += base_mapped;
3257                         malloc_mutex_unlock(&base_mtx);
3258
3259                         malloc_printf("Allocated: %zu, mapped: %zu\n",
3260                             allocated, mapped);
3261
3262                         /* Print chunk stats. */
3263                         {
3264                                 chunk_stats_t chunks_stats;
3265
3266                                 malloc_mutex_lock(&chunks_mtx);
3267                                 chunks_stats = stats_chunks;
3268                                 malloc_mutex_unlock(&chunks_mtx);
3269
3270                                 malloc_printf("chunks: nchunks   "
3271                                     "highchunks    curchunks\n");
3272                                 malloc_printf("  %13llu%13lu%13lu\n",
3273                                     chunks_stats.nchunks,
3274                                     chunks_stats.highchunks,
3275                                     chunks_stats.curchunks);
3276                         }
3277
3278                         /* Print chunk stats. */
3279                         malloc_printf(
3280                             "huge: nmalloc      ndalloc      "
3281                             "nralloc    allocated\n");
3282                         malloc_printf(" %12llu %12llu %12llu %12zu\n",
3283                             huge_nmalloc, huge_ndalloc, huge_nralloc,
3284                             huge_allocated);
3285
3286                         /* Print stats for each arena. */
3287                         for (i = 0; i < narenas; i++) {
3288                                 arena = arenas[i];
3289                                 if (arena != NULL) {
3290                                         malloc_printf(
3291                                             "\narenas[%u] @ %p\n", i, arena);
3292                                         malloc_mutex_lock(&arena->mtx);
3293                                         stats_print(arena);
3294                                         malloc_mutex_unlock(&arena->mtx);
3295                                 }
3296                         }
3297                 }
3298 #endif /* #ifdef MALLOC_STATS */
3299                 _malloc_message("--- End malloc statistics ---\n", "", "", "");
3300         }
3301 }
3302
3303 /*
3304  * FreeBSD's pthreads implementation calls malloc(3), so the malloc
3305  * implementation has to take pains to avoid infinite recursion during
3306  * initialization.
3307  */
3308 static inline bool
3309 malloc_init(void)
3310 {
3311
3312         if (malloc_initialized == false)
3313                 return (malloc_init_hard());
3314
3315         return (false);
3316 }
3317
3318 static bool
3319 malloc_init_hard(void)
3320 {
3321         unsigned i, j;
3322         ssize_t linklen;
3323         char buf[PATH_MAX + 1];
3324         const char *opts = "";
3325
3326         malloc_mutex_lock(&init_lock);
3327         if (malloc_initialized) {
3328                 /*
3329                  * Another thread initialized the allocator before this one
3330                  * acquired init_lock.
3331                  */
3332                 malloc_mutex_unlock(&init_lock);
3333                 return (false);
3334         }
3335
3336         /* Get number of CPUs. */
3337         {
3338                 int mib[2];
3339                 size_t len;
3340
3341                 mib[0] = CTL_HW;
3342                 mib[1] = HW_NCPU;
3343                 len = sizeof(ncpus);
3344                 if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) == -1) {
3345                         /* Error. */
3346                         ncpus = 1;
3347                 }
3348         }
3349
3350         /* Get page size. */
3351         {
3352                 long result;
3353
3354                 result = sysconf(_SC_PAGESIZE);
3355                 assert(result != -1);
3356                 pagesize = (unsigned) result;
3357
3358                 /*
3359                  * We assume that pagesize is a power of 2 when calculating
3360                  * pagesize_mask and pagesize_2pow.
3361                  */
3362                 assert(((result - 1) & result) == 0);
3363                 pagesize_mask = result - 1;
3364                 pagesize_2pow = ffs((int)result) - 1;
3365         }
3366
3367         for (i = 0; i < 3; i++) {
3368                 /* Get runtime configuration. */
3369                 switch (i) {
3370                 case 0:
3371                         if ((linklen = readlink("/etc/malloc.conf", buf,
3372                                                 sizeof(buf) - 1)) != -1) {
3373                                 /*
3374                                  * Use the contents of the "/etc/malloc.conf"
3375                                  * symbolic link's name.
3376                                  */
3377                                 buf[linklen] = '\0';
3378                                 opts = buf;
3379                         } else {
3380                                 /* No configuration specified. */
3381                                 buf[0] = '\0';
3382                                 opts = buf;
3383                         }
3384                         break;
3385                 case 1:
3386                         if ((opts = getenv("MALLOC_OPTIONS")) != NULL &&
3387                             issetugid() == 0) {
3388                                 /*
3389                                  * Do nothing; opts is already initialized to
3390                                  * the value of the MALLOC_OPTIONS environment
3391                                  * variable.
3392                                  */
3393                         } else {
3394                                 /* No configuration specified. */
3395                                 buf[0] = '\0';
3396                                 opts = buf;
3397                         }
3398                         break;
3399                 case 2:
3400                         if (_malloc_options != NULL) {
3401                             /*
3402                              * Use options that were compiled into the program.
3403                              */
3404                             opts = _malloc_options;
3405                         } else {
3406                                 /* No configuration specified. */
3407                                 buf[0] = '\0';
3408                                 opts = buf;
3409                         }
3410                         break;
3411                 default:
3412                         /* NOTREACHED */
3413                         /* LINTED */
3414                         assert(false);
3415                 }
3416
3417                 for (j = 0; opts[j] != '\0'; j++) {
3418                         switch (opts[j]) {
3419                         case 'a':
3420                                 opt_abort = false;
3421                                 break;
3422                         case 'A':
3423                                 opt_abort = true;
3424                                 break;
3425                         case 'h':
3426                                 opt_hint = false;
3427                                 break;
3428                         case 'H':
3429                                 opt_hint = true;
3430                                 break;
3431                         case 'j':
3432                                 opt_junk = false;
3433                                 break;
3434                         case 'J':
3435                                 opt_junk = true;
3436                                 break;
3437                         case 'k':
3438                                 /*
3439                                  * Chunks always require at least one header
3440                                  * page, so chunks can never be smaller than
3441                                  * two pages.
3442                                  */
3443                                 if (opt_chunk_2pow > pagesize_2pow + 1)
3444                                         opt_chunk_2pow--;
3445                                 break;
3446                         case 'K':
3447                                 if (opt_chunk_2pow + 1 <
3448                                     (int)(sizeof(size_t) << 3))
3449                                         opt_chunk_2pow++;
3450                                 break;
3451                         case 'n':
3452                                 opt_narenas_lshift--;
3453                                 break;
3454                         case 'N':
3455                                 opt_narenas_lshift++;
3456                                 break;
3457                         case 'p':
3458                                 opt_print_stats = false;
3459                                 break;
3460                         case 'P':
3461                                 opt_print_stats = true;
3462                                 break;
3463                         case 'q':
3464                                 if (opt_quantum_2pow > QUANTUM_2POW_MIN)
3465                                         opt_quantum_2pow--;
3466                                 break;
3467                         case 'Q':
3468                                 if (opt_quantum_2pow < pagesize_2pow - 1)
3469                                         opt_quantum_2pow++;
3470                                 break;
3471                         case 's':
3472                                 if (opt_small_max_2pow > QUANTUM_2POW_MIN)
3473                                         opt_small_max_2pow--;
3474                                 break;
3475                         case 'S':
3476                                 if (opt_small_max_2pow < pagesize_2pow - 1)
3477                                         opt_small_max_2pow++;
3478                                 break;
3479                         case 'u':
3480                                 opt_utrace = false;
3481                                 break;
3482                         case 'U':
3483                                 opt_utrace = true;
3484                                 break;
3485                         case 'v':
3486                                 opt_sysv = false;
3487                                 break;
3488                         case 'V':
3489                                 opt_sysv = true;
3490                                 break;
3491                         case 'x':
3492                                 opt_xmalloc = false;
3493                                 break;
3494                         case 'X':
3495                                 opt_xmalloc = true;
3496                                 break;
3497                         case 'z':
3498                                 opt_zero = false;
3499                                 break;
3500                         case 'Z':
3501                                 opt_zero = true;
3502                                 break;
3503                         default: {
3504                                 char cbuf[2];
3505
3506                                 cbuf[0] = opts[j];
3507                                 cbuf[1] = '\0';
3508                                 _malloc_message(getprogname(),
3509                                     ": (malloc) Unsupported character in "
3510                                     "malloc options: '", cbuf, "'\n");
3511                         }
3512                         }
3513                 }
3514         }
3515
3516         /* Take care to call atexit() only once. */
3517         if (opt_print_stats) {
3518                 /* Print statistics at exit. */
3519                 atexit(malloc_print_stats);
3520         }
3521
3522         /* Set variables according to the value of opt_small_max_2pow. */
3523         if (opt_small_max_2pow < opt_quantum_2pow)
3524                 opt_small_max_2pow = opt_quantum_2pow;
3525         small_max = (1 << opt_small_max_2pow);
3526
3527         /* Set bin-related variables. */
3528         bin_maxclass = (pagesize >> 1);
3529         assert(opt_quantum_2pow >= TINY_MIN_2POW);
3530         ntbins = (unsigned)(opt_quantum_2pow - TINY_MIN_2POW);
3531         assert(ntbins <= opt_quantum_2pow);
3532         nqbins = (unsigned)(small_max >> opt_quantum_2pow);
3533         nsbins = (unsigned)(pagesize_2pow - opt_small_max_2pow - 1);
3534
3535         /* Set variables according to the value of opt_quantum_2pow. */
3536         quantum = (1 << opt_quantum_2pow);
3537         quantum_mask = quantum - 1;
3538         if (ntbins > 0)
3539                 small_min = (quantum >> 1) + 1;
3540         else
3541                 small_min = 1;
3542         assert(small_min <= quantum);
3543
3544         /* Set variables according to the value of opt_chunk_2pow. */
3545         chunksize = (1LU << opt_chunk_2pow);
3546         chunksize_mask = chunksize - 1;
3547         chunksize_2pow = (unsigned)opt_chunk_2pow;
3548         chunk_npages = (unsigned)(chunksize >> pagesize_2pow);
3549         {
3550                 unsigned header_size;
3551
3552                 header_size = (unsigned)(sizeof(arena_chunk_t) +
3553                     (sizeof(arena_chunk_map_t) * (chunk_npages - 1)));
3554                 arena_chunk_header_npages = (header_size >> pagesize_2pow);
3555                 if ((header_size & pagesize_mask) != 0)
3556                         arena_chunk_header_npages++;
3557         }
3558         arena_maxclass = chunksize - (arena_chunk_header_npages <<
3559             pagesize_2pow);
3560
3561         UTRACE(0, 0, 0);
3562
3563 #ifdef MALLOC_STATS
3564         memset(&stats_chunks, 0, sizeof(chunk_stats_t));
3565 #endif
3566
3567         /* Various sanity checks that regard configuration. */
3568         assert(quantum >= sizeof(void *));
3569         assert(quantum <= pagesize);
3570         assert(chunksize >= pagesize);
3571         assert(quantum * 4 <= chunksize);
3572
3573         /* Initialize chunks data. */
3574         malloc_mutex_init(&chunks_mtx);
3575         RB_INIT(&huge);
3576 #ifdef USE_BRK
3577         malloc_mutex_init(&brk_mtx);
3578         brk_base = sbrk(0);
3579         brk_prev = brk_base;
3580         brk_max = brk_base;
3581 #endif
3582 #ifdef MALLOC_STATS
3583         huge_nmalloc = 0;
3584         huge_ndalloc = 0;
3585         huge_nralloc = 0;
3586         huge_allocated = 0;
3587 #endif
3588         RB_INIT(&old_chunks);
3589
3590         /* Initialize base allocation data structures. */
3591 #ifdef MALLOC_STATS
3592         base_mapped = 0;
3593 #endif
3594 #ifdef USE_BRK
3595         /*
3596          * Allocate a base chunk here, since it doesn't actually have to be
3597          * chunk-aligned.  Doing this before allocating any other chunks allows
3598          * the use of space that would otherwise be wasted.
3599          */
3600         base_pages_alloc(0);
3601 #endif
3602         base_chunk_nodes = NULL;
3603         malloc_mutex_init(&base_mtx);
3604
3605         if (ncpus > 1) {
3606                 /*
3607                  * For SMP systems, create four times as many arenas as there
3608                  * are CPUs by default.
3609                  */
3610                 opt_narenas_lshift += 2;
3611         }
3612
3613 #ifdef NO_TLS
3614         /* Initialize arena key. */
3615         (void)thr_keycreate(&arenas_map_key, NULL);
3616 #endif
3617
3618         /* Determine how many arenas to use. */
3619         narenas = ncpus;
3620         if (opt_narenas_lshift > 0) {
3621                 if ((narenas << opt_narenas_lshift) > narenas)
3622                         narenas <<= opt_narenas_lshift;
3623                 /*
3624                  * Make sure not to exceed the limits of what base_malloc()
3625                  * can handle.
3626                  */
3627                 if (narenas * sizeof(arena_t *) > chunksize)
3628                         narenas = (unsigned)(chunksize / sizeof(arena_t *));
3629         } else if (opt_narenas_lshift < 0) {
3630                 if ((narenas << opt_narenas_lshift) < narenas)
3631                         narenas <<= opt_narenas_lshift;
3632                 /* Make sure there is at least one arena. */
3633                 if (narenas == 0)
3634                         narenas = 1;
3635         }
3636
3637         next_arena = 0;
3638
3639         /* Allocate and initialize arenas. */
3640         arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
3641         if (arenas == NULL) {
3642                 malloc_mutex_unlock(&init_lock);
3643                 return (true);
3644         }
3645         /*
3646          * Zero the array.  In practice, this should always be pre-zeroed,
3647          * since it was just mmap()ed, but let's be sure.
3648          */
3649         memset(arenas, 0, sizeof(arena_t *) * narenas);
3650
3651         /*
3652          * Initialize one arena here.  The rest are lazily created in
3653          * arena_choose_hard().
3654          */
3655         arenas_extend(0);
3656         if (arenas[0] == NULL) {
3657                 malloc_mutex_unlock(&init_lock);
3658                 return (true);
3659         }
3660
3661         malloc_mutex_init(&arenas_mtx);
3662
3663         malloc_initialized = true;
3664         malloc_mutex_unlock(&init_lock);
3665         return (false);
3666 }
3667
3668 /*
3669  * End general internal functions.
3670  */
3671 /******************************************************************************/
3672 /*
3673  * Begin malloc(3)-compatible functions.
3674  */
3675
3676 void *
3677 malloc(size_t size)
3678 {
3679         void *ret;
3680
3681         if (malloc_init()) {
3682                 ret = NULL;
3683                 goto RETURN;
3684         }
3685
3686         if (size == 0) {
3687                 if (opt_sysv == false)
3688                         size = 1;
3689                 else {
3690                         ret = NULL;
3691                         goto RETURN;
3692                 }
3693         }
3694
3695         ret = imalloc(size);
3696
3697 RETURN:
3698         if (ret == NULL) {
3699                 if (opt_xmalloc) {
3700                         _malloc_message(getprogname(),
3701                             ": (malloc) Error in malloc(): out of memory\n", "",
3702                             "");
3703                         abort();
3704                 }
3705                 errno = ENOMEM;
3706         }
3707
3708         UTRACE(0, size, ret);
3709         return (ret);
3710 }
3711
3712 int
3713 posix_memalign(void **memptr, size_t alignment, size_t size)
3714 {
3715         int ret;
3716         void *result;
3717
3718         if (malloc_init())
3719                 result = NULL;
3720         else {
3721                 /* Make sure that alignment is a large enough power of 2. */
3722                 if (((alignment - 1) & alignment) != 0
3723                     || alignment < sizeof(void *)) {
3724                         if (opt_xmalloc) {
3725                                 _malloc_message(getprogname(),
3726                                     ": (malloc) Error in posix_memalign(): "
3727                                     "invalid alignment\n", "", "");
3728                                 abort();
3729                         }
3730                         result = NULL;
3731                         ret = EINVAL;
3732                         goto RETURN;
3733                 }
3734
3735                 result = ipalloc(alignment, size);
3736         }
3737
3738         if (result == NULL) {
3739                 if (opt_xmalloc) {
3740                         _malloc_message(getprogname(),
3741                         ": (malloc) Error in posix_memalign(): out of memory\n",
3742                         "", "");
3743                         abort();
3744                 }
3745                 ret = ENOMEM;
3746                 goto RETURN;
3747         }
3748
3749         *memptr = result;
3750         ret = 0;
3751
3752 RETURN:
3753         UTRACE(0, size, result);
3754         return (ret);
3755 }
3756
3757 void *
3758 calloc(size_t num, size_t size)
3759 {
3760         void *ret;
3761         size_t num_size;
3762
3763         if (malloc_init()) {
3764                 num_size = 0;
3765                 ret = NULL;
3766                 goto RETURN;
3767         }
3768
3769         num_size = num * size;
3770         if (num_size == 0) {
3771                 if ((opt_sysv == false) && ((num == 0) || (size == 0)))
3772                         num_size = 1;
3773                 else {
3774                         ret = NULL;
3775                         goto RETURN;
3776                 }
3777         /*
3778          * Try to avoid division here.  We know that it isn't possible to
3779          * overflow during multiplication if neither operand uses any of the
3780          * most significant half of the bits in a size_t.
3781          */
3782         } else if ((unsigned long long)((num | size) &
3783            ((unsigned long long)SIZE_T_MAX << (sizeof(size_t) << 2))) &&
3784            (num_size / size != num)) {
3785                 /* size_t overflow. */
3786                 ret = NULL;
3787                 goto RETURN;
3788         }
3789
3790         ret = icalloc(num_size);
3791
3792 RETURN:
3793         if (ret == NULL) {
3794                 if (opt_xmalloc) {
3795                         _malloc_message(getprogname(),
3796                             ": (malloc) Error in calloc(): out of memory\n", "",
3797                             "");
3798                         abort();
3799                 }
3800                 errno = ENOMEM;
3801         }
3802
3803         UTRACE(0, num_size, ret);
3804         return (ret);
3805 }
3806
3807 void *
3808 realloc(void *ptr, size_t size)
3809 {
3810         void *ret;
3811
3812         if (size == 0) {
3813                 if (opt_sysv == false)
3814                         size = 1;
3815                 else {
3816                         if (ptr != NULL)
3817                                 idalloc(ptr);
3818                         ret = NULL;
3819                         goto RETURN;
3820                 }
3821         }
3822
3823         if (ptr != NULL) {
3824                 assert(malloc_initialized);
3825
3826                 ret = iralloc(ptr, size);
3827
3828                 if (ret == NULL) {
3829                         if (opt_xmalloc) {
3830                                 _malloc_message(getprogname(),
3831                                     ": (malloc) Error in realloc(): out of "
3832                                     "memory\n", "", "");
3833                                 abort();
3834                         }
3835                         errno = ENOMEM;
3836                 }
3837         } else {
3838                 if (malloc_init())
3839                         ret = NULL;
3840                 else
3841                         ret = imalloc(size);
3842
3843                 if (ret == NULL) {
3844                         if (opt_xmalloc) {
3845                                 _malloc_message(getprogname(),
3846                                     ": (malloc) Error in realloc(): out of "
3847                                     "memory\n", "", "");
3848                                 abort();
3849                         }
3850                         errno = ENOMEM;
3851                 }
3852         }
3853
3854 RETURN:
3855         UTRACE(ptr, size, ret);
3856         return (ret);
3857 }
3858
3859 void
3860 free(void *ptr)
3861 {
3862
3863         UTRACE(ptr, 0, 0);
3864         if (ptr != NULL) {
3865                 assert(malloc_initialized);
3866
3867                 idalloc(ptr);
3868         }
3869 }
3870
3871 /*
3872  * End malloc(3)-compatible functions.
3873  */
3874 /******************************************************************************/
3875 /*
3876  * Begin non-standard functions.
3877  */
3878 #ifndef __NetBSD__
3879 size_t
3880 malloc_usable_size(const void *ptr)
3881 {
3882
3883         assert(ptr != NULL);
3884
3885         return (isalloc(ptr));
3886 }
3887 #endif
3888
3889 /*
3890  * End non-standard functions.
3891  */
3892 /******************************************************************************/
3893 /*
3894  * Begin library-private functions, used by threading libraries for protection
3895  * of malloc during fork().  These functions are only called if the program is
3896  * running in threaded mode, so there is no need to check whether the program
3897  * is threaded here.
3898  */
3899
3900 void
3901 _malloc_prefork(void)
3902 {
3903         unsigned i;
3904
3905         /* Acquire all mutexes in a safe order. */
3906
3907         malloc_mutex_lock(&arenas_mtx);
3908         for (i = 0; i < narenas; i++) {
3909                 if (arenas[i] != NULL)
3910                         malloc_mutex_lock(&arenas[i]->mtx);
3911         }
3912         malloc_mutex_unlock(&arenas_mtx);
3913
3914         malloc_mutex_lock(&base_mtx);
3915
3916         malloc_mutex_lock(&chunks_mtx);
3917 }
3918
3919 void
3920 _malloc_postfork(void)
3921 {
3922         unsigned i;
3923
3924         /* Release all mutexes, now that fork() has completed. */
3925
3926         malloc_mutex_unlock(&chunks_mtx);
3927
3928         malloc_mutex_unlock(&base_mtx);
3929
3930         malloc_mutex_lock(&arenas_mtx);
3931         for (i = 0; i < narenas; i++) {
3932                 if (arenas[i] != NULL)
3933                         malloc_mutex_unlock(&arenas[i]->mtx);
3934         }
3935         malloc_mutex_unlock(&arenas_mtx);
3936 }
3937
3938 /*
3939  * End library-private functions.
3940  */
3941 /******************************************************************************/