lib/libc/stdlib/jemalloc.c

   1 /*      $NetBSD: jemalloc.c,v 1.28 2012/03/21 14:32:22 christos Exp $   */
   2
   3 /*-
   4  * Copyright (C) 2006,2007 Jason Evans <jasone@FreeBSD.org>.
   5  * All rights reserved.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  * 1. Redistributions of source code must retain the above copyright
  11  *    notice(s), this list of conditions and the following disclaimer as
  12  *    the first lines of this file unmodified other than the possible
  13  *    addition of one or more copyright notices.
  14  * 2. Redistributions in binary form must reproduce the above copyright
  15  *    notice(s), this list of conditions and the following disclaimer in
  16  *    the documentation and/or other materials provided with the
  17  *    distribution.
  18  *
  19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
  20  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
  23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  26  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  28  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  29  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30  *
  31  *******************************************************************************
  32  *
  33  * This allocator implementation is designed to provide scalable performance
  34  * for multi-threaded programs on multi-processor systems.  The following
  35  * features are included for this purpose:
  36  *
  37  *   + Multiple arenas are used if there are multiple CPUs, which reduces lock
  38  *     contention and cache sloshing.
  39  *
  40  *   + Cache line sharing between arenas is avoided for internal data
  41  *     structures.
  42  *
  43  *   + Memory is managed in chunks and runs (chunks can be split into runs),
  44  *     rather than as individual pages.  This provides a constant-time
  45  *     mechanism for associating allocations with particular arenas.
  46  *
  47  * Allocation requests are rounded up to the nearest size class, and no record
  48  * of the original request size is maintained.  Allocations are broken into
  49  * categories according to size class.  Assuming runtime defaults, 4 kB pages
  50  * and a 16 byte quantum, the size classes in each category are as follows:
  51  *
  52  *   |=====================================|
  53  *   | Category | Subcategory    |    Size |
  54  *   |=====================================|
  55  *   | Small    | Tiny           |       2 |
  56  *   |          |                |       4 |
  57  *   |          |                |       8 |
  58  *   |          |----------------+---------|
  59  *   |          | Quantum-spaced |      16 |
  60  *   |          |                |      32 |
  61  *   |          |                |      48 |
  62  *   |          |                |     ... |
  63  *   |          |                |     480 |
  64  *   |          |                |     496 |
  65  *   |          |                |     512 |
  66  *   |          |----------------+---------|
  67  *   |          | Sub-page       |    1 kB |
  68  *   |          |                |    2 kB |
  69  *   |=====================================|
  70  *   | Large                     |    4 kB |
  71  *   |                           |    8 kB |
  72  *   |                           |   12 kB |
  73  *   |                           |     ... |
  74  *   |                           | 1012 kB |
  75  *   |                           | 1016 kB |
  76  *   |                           | 1020 kB |
  77  *   |=====================================|
  78  *   | Huge                      |    1 MB |
  79  *   |                           |    2 MB |
  80  *   |                           |    3 MB |
  81  *   |                           |     ... |
  82  *   |=====================================|
  83  *
  84  * A different mechanism is used for each category:
  85  *
  86  *   Small : Each size class is segregated into its own set of runs.  Each run
  87  *           maintains a bitmap of which regions are free/allocated.
  88  *
  89  *   Large : Each allocation is backed by a dedicated run.  Metadata are stored
  90  *           in the associated arena chunk header maps.
  91  *
  92  *   Huge : Each allocation is backed by a dedicated contiguous set of chunks.
  93  *          Metadata are stored in a separate red-black tree.
  94  *
  95  *******************************************************************************
  96  */
  97
  98 /* LINTLIBRARY */
  99
 100 #ifdef __NetBSD__
 101 #  define xutrace(a, b)         utrace("malloc", (a), (b))
 102 #  define __DECONST(x, y)       ((x)__UNCONST(y))
 103 #  define NO_TLS
 104 #else
 105 #  define xutrace(a, b)         utrace((a), (b))
 106 #endif  /* __NetBSD__ */
 107
 108 /*
 109  * MALLOC_PRODUCTION disables assertions and statistics gathering.  It also
 110  * defaults the A and J runtime options to off.  These settings are appropriate
 111  * for production systems.
 112  */
 113 #define MALLOC_PRODUCTION
 114
 115 #ifndef MALLOC_PRODUCTION
 116 #  define MALLOC_DEBUG
 117 #endif
 118
 119 #include <sys/cdefs.h>
 120 /* __FBSDID("$FreeBSD: src/lib/libc/stdlib/malloc.c,v 1.147 2007/06/15 22:00:16 jasone Exp $"); */
 121 __RCSID("$NetBSD: jemalloc.c,v 1.28 2012/03/21 14:32:22 christos Exp $");
 122
 123 #ifdef __FreeBSD__
 124 #include "libc_private.h"
 125 #ifdef MALLOC_DEBUG
 126 #  define _LOCK_DEBUG
 127 #endif
 128 #include "spinlock.h"
 129 #endif
 130 #include "namespace.h"
 131 #include <sys/mman.h>
 132 #include <sys/param.h>
 133 #ifdef __FreeBSD__
 134 #include <sys/stddef.h>
 135 #endif
 136 #include <sys/time.h>
 137 #include <sys/types.h>
 138 #include <sys/sysctl.h>
 139 #include <sys/tree.h>
 140 #include <sys/uio.h>
 141 #include <sys/ktrace.h> /* Must come after several other sys/ includes. */
 142
 143 #ifdef __FreeBSD__
 144 #include <machine/atomic.h>
 145 #include <machine/cpufunc.h>
 146 #endif
 147 #include <machine/vmparam.h>
 148
 149 #include <errno.h>
 150 #include <limits.h>
 151 #include <pthread.h>
 152 #include <sched.h>
 153 #include <stdarg.h>
 154 #include <stdbool.h>
 155 #include <stdio.h>
 156 #include <stdint.h>
 157 #include <stdlib.h>
 158 #include <string.h>
 159 #include <strings.h>
 160 #include <unistd.h>
 161
 162 #ifdef __NetBSD__
 163 #  include <reentrant.h>
 164 #  include "extern.h"
 165
 166 #define STRERROR_R(a, b, c)     __strerror_r(a, b, c);
 167 /*
 168  * A non localized version of strerror, that avoids bringing in
 169  * stdio and the locale code. All the malloc messages are in English
 170  * so why bother?
 171  */
 172 static int
 173 __strerror_r(int e, char *s, size_t l)
 174 {
 175         int rval;
 176         size_t slen;
 177
 178         if (e >= 0 && e < sys_nerr) {
 179                 slen = strlcpy(s, sys_errlist[e], l);
 180                 rval = 0;
 181         } else {
 182                 slen = snprintf_ss(s, l, "Unknown error %u", e);
 183                 rval = EINVAL;
 184         }
 185         return slen >= l ? ERANGE : rval;
 186 }
 187 #endif
 188
 189 #ifdef __FreeBSD__
 190 #define STRERROR_R(a, b, c)     strerror_r(a, b, c);
 191 #include "un-namespace.h"
 192 #endif
 193
 194 /* MALLOC_STATS enables statistics calculation. */
 195 #ifndef MALLOC_PRODUCTION
 196 #  define MALLOC_STATS
 197 #endif
 198
 199 #ifdef MALLOC_DEBUG
 200 #  ifdef NDEBUG
 201 #    undef NDEBUG
 202 #  endif
 203 #else
 204 #  ifndef NDEBUG
 205 #    define NDEBUG
 206 #  endif
 207 #endif
 208 #include <assert.h>
 209
 210 #ifdef MALLOC_DEBUG
 211    /* Disable inlining to make debugging easier. */
 212 #  define inline
 213 #endif
 214
 215 /* Size of stack-allocated buffer passed to strerror_r(). */
 216 #define STRERROR_BUF            64
 217
 218 /* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */
 219 #ifdef __i386__
 220 #  define QUANTUM_2POW_MIN      4
 221 #  define SIZEOF_PTR_2POW       2
 222 #  define USE_BRK
 223 #endif
 224 #ifdef __ia64__
 225 #  define QUANTUM_2POW_MIN      4
 226 #  define SIZEOF_PTR_2POW       3
 227 #endif
 228 #ifdef __alpha__
 229 #  define QUANTUM_2POW_MIN      4
 230 #  define SIZEOF_PTR_2POW       3
 231 #  define NO_TLS
 232 #endif
 233 #ifdef __sparc64__
 234 #  define QUANTUM_2POW_MIN      4
 235 #  define SIZEOF_PTR_2POW       3
 236 #  define NO_TLS
 237 #endif
 238 #ifdef __amd64__
 239 #  define QUANTUM_2POW_MIN      4
 240 #  define SIZEOF_PTR_2POW       3
 241 #endif
 242 #ifdef __arm__
 243 #  define QUANTUM_2POW_MIN      3
 244 #  define SIZEOF_PTR_2POW       2
 245 #  define USE_BRK
 246 #  define NO_TLS
 247 #endif
 248 #ifdef __powerpc__
 249 #  define QUANTUM_2POW_MIN      4
 250 #  define SIZEOF_PTR_2POW       2
 251 #  define USE_BRK
 252 #endif
 253 #if defined(__sparc__) && !defined(__sparc64__)
 254 #  define QUANTUM_2POW_MIN      4
 255 #  define SIZEOF_PTR_2POW       2
 256 #  define USE_BRK
 257 #endif
 258 #ifdef __vax__
 259 #  define QUANTUM_2POW_MIN      4
 260 #  define SIZEOF_PTR_2POW       2
 261 #  define USE_BRK
 262 #endif
 263 #ifdef __sh__
 264 #  define QUANTUM_2POW_MIN      4
 265 #  define SIZEOF_PTR_2POW       2
 266 #  define USE_BRK
 267 #endif
 268 #ifdef __m68k__
 269 #  define QUANTUM_2POW_MIN      4
 270 #  define SIZEOF_PTR_2POW       2
 271 #  define USE_BRK
 272 #endif
 273 #ifdef __mips__
 274 #  define QUANTUM_2POW_MIN      4
 275 #  define SIZEOF_PTR_2POW       2
 276 #  define USE_BRK
 277 #endif
 278 #ifdef __hppa__
 279 #  define QUANTUM_2POW_MIN     4
 280 #  define SIZEOF_PTR_2POW      2
 281 #  define USE_BRK
 282 #endif
 283
 284 #define SIZEOF_PTR              (1 << SIZEOF_PTR_2POW)
 285
 286 /* sizeof(int) == (1 << SIZEOF_INT_2POW). */
 287 #ifndef SIZEOF_INT_2POW
 288 #  define SIZEOF_INT_2POW       2
 289 #endif
 290
 291 /* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
 292 #if (!defined(PIC) && !defined(NO_TLS))
 293 #  define NO_TLS
 294 #endif
 295
 296 /*
 297  * Size and alignment of memory chunks that are allocated by the OS's virtual
 298  * memory system.
 299  */
 300 #define CHUNK_2POW_DEFAULT      20
 301
 302 /*
 303  * Maximum size of L1 cache line.  This is used to avoid cache line aliasing,
 304  * so over-estimates are okay (up to a point), but under-estimates will
 305  * negatively affect performance.
 306  */
 307 #define CACHELINE_2POW          6
 308 #define CACHELINE               ((size_t)(1 << CACHELINE_2POW))
 309
 310 /* Smallest size class to support. */
 311 #define TINY_MIN_2POW           1
 312
 313 /*
 314  * Maximum size class that is a multiple of the quantum, but not (necessarily)
 315  * a power of 2.  Above this size, allocations are rounded up to the nearest
 316  * power of 2.
 317  */
 318 #define SMALL_MAX_2POW_DEFAULT  9
 319 #define SMALL_MAX_DEFAULT       (1 << SMALL_MAX_2POW_DEFAULT)
 320
 321 /*
 322  * RUN_MAX_OVRHD indicates maximum desired run header overhead.  Runs are sized
 323  * as small as possible such that this setting is still honored, without
 324  * violating other constraints.  The goal is to make runs as small as possible
 325  * without exceeding a per run external fragmentation threshold.
 326  *
 327  * We use binary fixed point math for overhead computations, where the binary
 328  * point is implicitly RUN_BFP bits to the left.
 329  *
 330  * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
 331  * honored for some/all object sizes, since there is one bit of header overhead
 332  * per object (plus a constant).  This constraint is relaxed (ignored) for runs
 333  * that are so small that the per-region overhead is greater than:
 334  *
 335  *   (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
 336  */
 337 #define RUN_BFP                 12
 338 /*                              \/   Implicit binary fixed point. */
 339 #define RUN_MAX_OVRHD           0x0000003dU
 340 #define RUN_MAX_OVRHD_RELAX     0x00001800U
 341
 342 /* Put a cap on small object run size.  This overrides RUN_MAX_OVRHD. */
 343 #define RUN_MAX_SMALL_2POW      15
 344 #define RUN_MAX_SMALL           (1 << RUN_MAX_SMALL_2POW)
 345
 346 /******************************************************************************/
 347
 348 #ifdef __FreeBSD__
 349 /*
 350  * Mutexes based on spinlocks.  We can't use normal pthread mutexes, because
 351  * they require malloc()ed memory.
 352  */
 353 typedef struct {
 354         spinlock_t      lock;
 355 } malloc_mutex_t;
 356
 357 /* Set to true once the allocator has been initialized. */
 358 static bool malloc_initialized = false;
 359
 360 /* Used to avoid initialization races. */
 361 static malloc_mutex_t init_lock = {_SPINLOCK_INITIALIZER};
 362 #else
 363 #define malloc_mutex_t  mutex_t
 364
 365 /* Set to true once the allocator has been initialized. */
 366 static bool malloc_initialized = false;
 367
 368 /* Used to avoid initialization races. */
 369 static mutex_t init_lock = MUTEX_INITIALIZER;
 370 #endif
 371
 372 /******************************************************************************/
 373 /*
 374  * Statistics data structures.
 375  */
 376
 377 #ifdef MALLOC_STATS
 378
 379 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
 380 struct malloc_bin_stats_s {
 381         /*
 382          * Number of allocation requests that corresponded to the size of this
 383          * bin.
 384          */
 385         uint64_t        nrequests;
 386
 387         /* Total number of runs created for this bin's size class. */
 388         uint64_t        nruns;
 389
 390         /*
 391          * Total number of runs reused by extracting them from the runs tree for
 392          * this bin's size class.
 393          */
 394         uint64_t        reruns;
 395
 396         /* High-water mark for this bin. */
 397         unsigned long   highruns;
 398
 399         /* Current number of runs in this bin. */
 400         unsigned long   curruns;
 401 };
 402
 403 typedef struct arena_stats_s arena_stats_t;
 404 struct arena_stats_s {
 405         /* Number of bytes currently mapped. */
 406         size_t          mapped;
 407
 408         /* Per-size-category statistics. */
 409         size_t          allocated_small;
 410         uint64_t        nmalloc_small;
 411         uint64_t        ndalloc_small;
 412
 413         size_t          allocated_large;
 414         uint64_t        nmalloc_large;
 415         uint64_t        ndalloc_large;
 416 };
 417
 418 typedef struct chunk_stats_s chunk_stats_t;
 419 struct chunk_stats_s {
 420         /* Number of chunks that were allocated. */
 421         uint64_t        nchunks;
 422
 423         /* High-water mark for number of chunks allocated. */
 424         unsigned long   highchunks;
 425
 426         /*
 427          * Current number of chunks allocated.  This value isn't maintained for
 428          * any other purpose, so keep track of it in order to be able to set
 429          * highchunks.
 430          */
 431         unsigned long   curchunks;
 432 };
 433
 434 #endif /* #ifdef MALLOC_STATS */
 435
 436 /******************************************************************************/
 437 /*
 438  * Chunk data structures.
 439  */
 440
 441 /* Tree of chunks. */
 442 typedef struct chunk_node_s chunk_node_t;
 443 struct chunk_node_s {
 444         /* Linkage for the chunk tree. */
 445         RB_ENTRY(chunk_node_s) link;
 446
 447         /*
 448          * Pointer to the chunk that this tree node is responsible for.  In some
 449          * (but certainly not all) cases, this data structure is placed at the
 450          * beginning of the corresponding chunk, so this field may point to this
 451          * node.
 452          */
 453         void    *chunk;
 454
 455         /* Total chunk size. */
 456         size_t  size;
 457 };
 458 typedef struct chunk_tree_s chunk_tree_t;
 459 RB_HEAD(chunk_tree_s, chunk_node_s);
 460
 461 /******************************************************************************/
 462 /*
 463  * Arena data structures.
 464  */
 465
 466 typedef struct arena_s arena_t;
 467 typedef struct arena_bin_s arena_bin_t;
 468
 469 typedef struct arena_chunk_map_s arena_chunk_map_t;
 470 struct arena_chunk_map_s {
 471         /* Number of pages in run. */
 472         uint32_t        npages;
 473         /*
 474          * Position within run.  For a free run, this is POS_FREE for the first
 475          * and last pages.  The POS_FREE special value makes it possible to
 476          * quickly coalesce free runs.
 477          *
 478          * This is the limiting factor for chunksize; there can be at most 2^31
 479          * pages in a run.
 480          */
 481 #define POS_FREE ((uint32_t)0xffffffffU)
 482         uint32_t        pos;
 483 };
 484
 485 /* Arena chunk header. */
 486 typedef struct arena_chunk_s arena_chunk_t;
 487 struct arena_chunk_s {
 488         /* Arena that owns the chunk. */
 489         arena_t *arena;
 490
 491         /* Linkage for the arena's chunk tree. */
 492         RB_ENTRY(arena_chunk_s) link;
 493
 494         /*
 495          * Number of pages in use.  This is maintained in order to make
 496          * detection of empty chunks fast.
 497          */
 498         uint32_t pages_used;
 499
 500         /*
 501          * Every time a free run larger than this value is created/coalesced,
 502          * this value is increased.  The only way that the value decreases is if
 503          * arena_run_alloc() fails to find a free run as large as advertised by
 504          * this value.
 505          */
 506         uint32_t max_frun_npages;
 507
 508         /*
 509          * Every time a free run that starts at an earlier page than this value
 510          * is created/coalesced, this value is decreased.  It is reset in a
 511          * similar fashion to max_frun_npages.
 512          */
 513         uint32_t min_frun_ind;
 514
 515         /*
 516          * Map of pages within chunk that keeps track of free/large/small.  For
 517          * free runs, only the map entries for the first and last pages are
 518          * kept up to date, so that free runs can be quickly coalesced.
 519          */
 520         arena_chunk_map_t map[1]; /* Dynamically sized. */
 521 };
 522 typedef struct arena_chunk_tree_s arena_chunk_tree_t;
 523 RB_HEAD(arena_chunk_tree_s, arena_chunk_s);
 524
 525 typedef struct arena_run_s arena_run_t;
 526 struct arena_run_s {
 527         /* Linkage for run trees. */
 528         RB_ENTRY(arena_run_s) link;
 529
 530 #ifdef MALLOC_DEBUG
 531         uint32_t        magic;
 532 #  define ARENA_RUN_MAGIC 0x384adf93
 533 #endif
 534
 535         /* Bin this run is associated with. */
 536         arena_bin_t     *bin;
 537
 538         /* Index of first element that might have a free region. */
 539         unsigned        regs_minelm;
 540
 541         /* Number of free regions in run. */
 542         unsigned        nfree;
 543
 544         /* Bitmask of in-use regions (0: in use, 1: free). */
 545         unsigned        regs_mask[1]; /* Dynamically sized. */
 546 };
 547 typedef struct arena_run_tree_s arena_run_tree_t;
 548 RB_HEAD(arena_run_tree_s, arena_run_s);
 549
 550 struct arena_bin_s {
 551         /*
 552          * Current run being used to service allocations of this bin's size
 553          * class.
 554          */
 555         arena_run_t     *runcur;
 556
 557         /*
 558          * Tree of non-full runs.  This tree is used when looking for an
 559          * existing run when runcur is no longer usable.  We choose the
 560          * non-full run that is lowest in memory; this policy tends to keep
 561          * objects packed well, and it can also help reduce the number of
 562          * almost-empty chunks.
 563          */
 564         arena_run_tree_t runs;
 565
 566         /* Size of regions in a run for this bin's size class. */
 567         size_t          reg_size;
 568
 569         /* Total size of a run for this bin's size class. */
 570         size_t          run_size;
 571
 572         /* Total number of regions in a run for this bin's size class. */
 573         uint32_t        nregs;
 574
 575         /* Number of elements in a run's regs_mask for this bin's size class. */
 576         uint32_t        regs_mask_nelms;
 577
 578         /* Offset of first region in a run for this bin's size class. */
 579         uint32_t        reg0_offset;
 580
 581 #ifdef MALLOC_STATS
 582         /* Bin statistics. */
 583         malloc_bin_stats_t stats;
 584 #endif
 585 };
 586
 587 struct arena_s {
 588 #ifdef MALLOC_DEBUG
 589         uint32_t                magic;
 590 #  define ARENA_MAGIC 0x947d3d24
 591 #endif
 592
 593         /* All operations on this arena require that mtx be locked. */
 594         malloc_mutex_t          mtx;
 595
 596 #ifdef MALLOC_STATS
 597         arena_stats_t           stats;
 598 #endif
 599
 600         /*
 601          * Tree of chunks this arena manages.
 602          */
 603         arena_chunk_tree_t      chunks;
 604
 605         /*
 606          * In order to avoid rapid chunk allocation/deallocation when an arena
 607          * oscillates right on the cusp of needing a new chunk, cache the most
 608          * recently freed chunk.  This caching is disabled by opt_hint.
 609          *
 610          * There is one spare chunk per arena, rather than one spare total, in
 611          * order to avoid interactions between multiple threads that could make
 612          * a single spare inadequate.
 613          */
 614         arena_chunk_t *spare;
 615
 616         /*
 617          * bins is used to store rings of free regions of the following sizes,
 618          * assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
 619          *
 620          *   bins[i] | size |
 621          *   --------+------+
 622          *        0  |    2 |
 623          *        1  |    4 |
 624          *        2  |    8 |
 625          *   --------+------+
 626          *        3  |   16 |
 627          *        4  |   32 |
 628          *        5  |   48 |
 629          *        6  |   64 |
 630          *           :      :
 631          *           :      :
 632          *       33  |  496 |
 633          *       34  |  512 |
 634          *   --------+------+
 635          *       35  | 1024 |
 636          *       36  | 2048 |
 637          *   --------+------+
 638          */
 639         arena_bin_t             bins[1]; /* Dynamically sized. */
 640 };
 641
 642 /******************************************************************************/
 643 /*
 644  * Data.
 645  */
 646
 647 /* Number of CPUs. */
 648 static unsigned         ncpus;
 649
 650 /* VM page size. */
 651 static size_t           pagesize;
 652 static size_t           pagesize_mask;
 653 static int              pagesize_2pow;
 654
 655 /* Various bin-related settings. */
 656 static size_t           bin_maxclass; /* Max size class for bins. */
 657 static unsigned         ntbins; /* Number of (2^n)-spaced tiny bins. */
 658 static unsigned         nqbins; /* Number of quantum-spaced bins. */
 659 static unsigned         nsbins; /* Number of (2^n)-spaced sub-page bins. */
 660 static size_t           small_min;
 661 static size_t           small_max;
 662
 663 /* Various quantum-related settings. */
 664 static size_t           quantum;
 665 static size_t           quantum_mask; /* (quantum - 1). */
 666
 667 /* Various chunk-related settings. */
 668 static size_t           chunksize;
 669 static size_t           chunksize_mask; /* (chunksize - 1). */
 670 static int              chunksize_2pow;
 671 static unsigned         chunk_npages;
 672 static unsigned         arena_chunk_header_npages;
 673 static size_t           arena_maxclass; /* Max size class for arenas. */
 674
 675 /********/
 676 /*
 677  * Chunks.
 678  */
 679
 680 /* Protects chunk-related data structures. */
 681 static malloc_mutex_t   chunks_mtx;
 682
 683 /* Tree of chunks that are stand-alone huge allocations. */
 684 static chunk_tree_t     huge;
 685
 686 #ifdef USE_BRK
 687 /*
 688  * Try to use brk for chunk-size allocations, due to address space constraints.
 689  */
 690 /*
 691  * Protects sbrk() calls.  This must be separate from chunks_mtx, since
 692  * base_pages_alloc() also uses sbrk(), but cannot lock chunks_mtx (doing so
 693  * could cause recursive lock acquisition).
 694  */
 695 static malloc_mutex_t   brk_mtx;
 696 /* Result of first sbrk(0) call. */
 697 static void             *brk_base;
 698 /* Current end of brk, or ((void *)-1) if brk is exhausted. */
 699 static void             *brk_prev;
 700 /* Current upper limit on brk addresses. */
 701 static void             *brk_max;
 702 #endif
 703
 704 #ifdef MALLOC_STATS
 705 /* Huge allocation statistics. */
 706 static uint64_t         huge_nmalloc;
 707 static uint64_t         huge_ndalloc;
 708 static uint64_t         huge_nralloc;
 709 static size_t           huge_allocated;
 710 #endif
 711
 712 /*
 713  * Tree of chunks that were previously allocated.  This is used when allocating
 714  * chunks, in an attempt to re-use address space.
 715  */
 716 static chunk_tree_t     old_chunks;
 717
 718 /****************************/
 719 /*
 720  * base (internal allocation).
 721  */
 722
 723 /*
 724  * Current pages that are being used for internal memory allocations.  These
 725  * pages are carved up in cacheline-size quanta, so that there is no chance of
 726  * false cache line sharing.
 727  */
 728 static void             *base_pages;
 729 static void             *base_next_addr;
 730 static void             *base_past_addr; /* Addr immediately past base_pages. */
 731 static chunk_node_t     *base_chunk_nodes; /* LIFO cache of chunk nodes. */
 732 static malloc_mutex_t   base_mtx;
 733 #ifdef MALLOC_STATS
 734 static size_t           base_mapped;
 735 #endif
 736
 737 /********/
 738 /*
 739  * Arenas.
 740  */
 741
 742 /*
 743  * Arenas that are used to service external requests.  Not all elements of the
 744  * arenas array are necessarily used; arenas are created lazily as needed.
 745  */
 746 static arena_t          **arenas;
 747 static unsigned         narenas;
 748 static unsigned         next_arena;
 749 static malloc_mutex_t   arenas_mtx; /* Protects arenas initialization. */
 750
 751 #ifndef NO_TLS
 752 /*
 753  * Map of pthread_self() --> arenas[???], used for selecting an arena to use
 754  * for allocations.
 755  */
 756 static __thread arena_t *arenas_map;
 757 #define get_arenas_map()        (arenas_map)
 758 #define set_arenas_map(x)       (arenas_map = x)
 759 #else
 760 static thread_key_t arenas_map_key;
 761 #define get_arenas_map()        thr_getspecific(arenas_map_key)
 762 #define set_arenas_map(x)       thr_setspecific(arenas_map_key, x)
 763 #endif
 764
 765 #ifdef MALLOC_STATS
 766 /* Chunk statistics. */
 767 static chunk_stats_t    stats_chunks;
 768 #endif
 769
 770 /*******************************/
 771 /*
 772  * Runtime configuration options.
 773  */
 774 const char      *_malloc_options;
 775
 776 #ifndef MALLOC_PRODUCTION
 777 static bool     opt_abort = true;
 778 static bool     opt_junk = true;
 779 #else
 780 static bool     opt_abort = false;
 781 static bool     opt_junk = false;
 782 #endif
 783 static bool     opt_hint = false;
 784 static bool     opt_print_stats = false;
 785 static int      opt_quantum_2pow = QUANTUM_2POW_MIN;
 786 static int      opt_small_max_2pow = SMALL_MAX_2POW_DEFAULT;
 787 static int      opt_chunk_2pow = CHUNK_2POW_DEFAULT;
 788 static bool     opt_utrace = false;
 789 static bool     opt_sysv = false;
 790 static bool     opt_xmalloc = false;
 791 static bool     opt_zero = false;
 792 static int32_t  opt_narenas_lshift = 0;
 793
 794 typedef struct {
 795         void    *p;
 796         size_t  s;
 797         void    *r;
 798 } malloc_utrace_t;
 799
 800 #define UTRACE(a, b, c)                                                 \
 801         if (opt_utrace) {                                               \
 802                 malloc_utrace_t ut;                                     \
 803                 ut.p = a;                                               \
 804                 ut.s = b;                                               \
 805                 ut.r = c;                                               \
 806                 xutrace(&ut, sizeof(ut));                               \
 807         }
 808
 809 /******************************************************************************/
 810 /*
 811  * Begin function prototypes for non-inline static functions.
 812  */
 813
 814 static void     wrtmessage(const char *p1, const char *p2, const char *p3,
 815                 const char *p4);
 816 #ifdef MALLOC_STATS
 817 static void     malloc_printf(const char *format, ...);
 818 #endif
 819 static char     *size_t2s(size_t x, char *s);
 820 static bool     base_pages_alloc(size_t minsize);
 821 static void     *base_alloc(size_t size);
 822 static chunk_node_t *base_chunk_node_alloc(void);
 823 static void     base_chunk_node_dealloc(chunk_node_t *node);
 824 #ifdef MALLOC_STATS
 825 static void     stats_print(arena_t *arena);
 826 #endif
 827 static void     *pages_map(void *addr, size_t size);
 828 static void     *pages_map_align(void *addr, size_t size, int align);
 829 static void     pages_unmap(void *addr, size_t size);
 830 static void     *chunk_alloc(size_t size);
 831 static void     chunk_dealloc(void *chunk, size_t size);
 832 static void     arena_run_split(arena_t *arena, arena_run_t *run, size_t size);
 833 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
 834 static void     arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
 835 static arena_run_t *arena_run_alloc(arena_t *arena, size_t size);
 836 static void     arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size);
 837 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
 838 static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
 839 static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
 840 static void     *arena_malloc(arena_t *arena, size_t size);
 841 static void     *arena_palloc(arena_t *arena, size_t alignment, size_t size,
 842     size_t alloc_size);
 843 static size_t   arena_salloc(const void *ptr);
 844 static void     *arena_ralloc(void *ptr, size_t size, size_t oldsize);
 845 static void     arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
 846 static bool     arena_new(arena_t *arena);
 847 static arena_t  *arenas_extend(unsigned ind);
 848 static void     *huge_malloc(size_t size);
 849 static void     *huge_palloc(size_t alignment, size_t size);
 850 static void     *huge_ralloc(void *ptr, size_t size, size_t oldsize);
 851 static void     huge_dalloc(void *ptr);
 852 static void     *imalloc(size_t size);
 853 static void     *ipalloc(size_t alignment, size_t size);
 854 static void     *icalloc(size_t size);
 855 static size_t   isalloc(const void *ptr);
 856 static void     *iralloc(void *ptr, size_t size);
 857 static void     idalloc(void *ptr);
 858 static void     malloc_print_stats(void);
 859 static bool     malloc_init_hard(void);
 860
 861 /*
 862  * End function prototypes.
 863  */
 864 /******************************************************************************/
 865 /*
 866  * Begin mutex.
 867  */
 868
 869 #ifdef __NetBSD__
 870 #define malloc_mutex_init(m)    mutex_init(m, NULL)
 871 #define malloc_mutex_lock(m)    mutex_lock(m)
 872 #define malloc_mutex_unlock(m)  mutex_unlock(m)
 873 #else   /* __NetBSD__ */
 874 static inline void
 875 malloc_mutex_init(malloc_mutex_t *a_mutex)
 876 {
 877         static const spinlock_t lock = _SPINLOCK_INITIALIZER;
 878
 879         a_mutex->lock = lock;
 880 }
 881
 882 static inline void
 883 malloc_mutex_lock(malloc_mutex_t *a_mutex)
 884 {
 885
 886         if (__isthreaded)
 887                 _SPINLOCK(&a_mutex->lock);
 888 }
 889
 890 static inline void
 891 malloc_mutex_unlock(malloc_mutex_t *a_mutex)
 892 {
 893
 894         if (__isthreaded)
 895                 _SPINUNLOCK(&a_mutex->lock);
 896 }
 897 #endif  /* __NetBSD__ */
 898
 899 /*
 900  * End mutex.
 901  */
 902 /******************************************************************************/
 903 /*
 904  * Begin Utility functions/macros.
 905  */
 906
 907 /* Return the chunk address for allocation address a. */
 908 #define CHUNK_ADDR2BASE(a)                                              \
 909         ((void *)((uintptr_t)(a) & ~chunksize_mask))
 910
 911 /* Return the chunk offset of address a. */
 912 #define CHUNK_ADDR2OFFSET(a)                                            \
 913         ((size_t)((uintptr_t)(a) & chunksize_mask))
 914
 915 /* Return the smallest chunk multiple that is >= s. */
 916 #define CHUNK_CEILING(s)                                                \
 917         (((s) + chunksize_mask) & ~chunksize_mask)
 918
 919 /* Return the smallest cacheline multiple that is >= s. */
 920 #define CACHELINE_CEILING(s)                                            \
 921         (((s) + (CACHELINE - 1)) & ~(CACHELINE - 1))
 922
 923 /* Return the smallest quantum multiple that is >= a. */
 924 #define QUANTUM_CEILING(a)                                              \
 925         (((a) + quantum_mask) & ~quantum_mask)
 926
 927 /* Return the smallest pagesize multiple that is >= s. */
 928 #define PAGE_CEILING(s)                                                 \
 929         (((s) + pagesize_mask) & ~pagesize_mask)
 930
 931 /* Compute the smallest power of 2 that is >= x. */
 932 static inline size_t
 933 pow2_ceil(size_t x)
 934 {
 935
 936         x--;
 937         x |= x >> 1;
 938         x |= x >> 2;
 939         x |= x >> 4;
 940         x |= x >> 8;
 941         x |= x >> 16;
 942 #if (SIZEOF_PTR == 8)
 943         x |= x >> 32;
 944 #endif
 945         x++;
 946         return (x);
 947 }
 948
 949 static void
 950 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
 951 {
 952
 953         write(STDERR_FILENO, p1, strlen(p1));
 954         write(STDERR_FILENO, p2, strlen(p2));
 955         write(STDERR_FILENO, p3, strlen(p3));
 956         write(STDERR_FILENO, p4, strlen(p4));
 957 }
 958
 959 void    (*_malloc_message)(const char *p1, const char *p2, const char *p3,
 960             const char *p4) = wrtmessage;
 961
 962 #ifdef MALLOC_STATS
 963 /*
 964  * Print to stderr in such a way as to (hopefully) avoid memory allocation.
 965  */
 966 static void
 967 malloc_printf(const char *format, ...)
 968 {
 969         char buf[4096];
 970         va_list ap;
 971
 972         va_start(ap, format);
 973         vsnprintf(buf, sizeof(buf), format, ap);
 974         va_end(ap);
 975         _malloc_message(buf, "", "", "");
 976 }
 977 #endif
 978
 979 /*
 980  * We don't want to depend on vsnprintf() for production builds, since that can
 981  * cause unnecessary bloat for static binaries.  size_t2s() provides minimal
 982  * integer printing functionality, so that malloc_printf() use can be limited to
 983  * MALLOC_STATS code.
 984  */
 985 #define UMAX2S_BUFSIZE  21
 986 static char *
 987 size_t2s(size_t x, char *s)
 988 {
 989         unsigned i;
 990
 991         /* Make sure UMAX2S_BUFSIZE is large enough. */
 992         /* LINTED */
 993         assert(sizeof(size_t) <= 8);
 994
 995         i = UMAX2S_BUFSIZE - 1;
 996         s[i] = '\0';
 997         do {
 998                 i--;
 999                 s[i] = "0123456789"[(int)x % 10];
1000                 x /= (uintmax_t)10LL;
1001         } while (x > 0);
1002
1003         return (&s[i]);
1004 }
1005
1006 /******************************************************************************/
1007
1008 static bool
1009 base_pages_alloc(size_t minsize)
1010 {
1011         size_t csize = 0;
1012
1013 #ifdef USE_BRK
1014         /*
1015          * Do special brk allocation here, since base allocations don't need to
1016          * be chunk-aligned.
1017          */
1018         if (brk_prev != (void *)-1) {
1019                 void *brk_cur;
1020                 intptr_t incr;
1021
1022                 if (minsize != 0)
1023                         csize = CHUNK_CEILING(minsize);
1024
1025                 malloc_mutex_lock(&brk_mtx);
1026                 do {
1027                         /* Get the current end of brk. */
1028                         brk_cur = sbrk(0);
1029
1030                         /*
1031                          * Calculate how much padding is necessary to
1032                          * chunk-align the end of brk.  Don't worry about
1033                          * brk_cur not being chunk-aligned though.
1034                          */
1035                         incr = (intptr_t)chunksize
1036                             - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1037                         assert(incr >= 0);
1038                         if ((size_t)incr < minsize)
1039                                 incr += csize;
1040
1041                         brk_prev = sbrk(incr);
1042                         if (brk_prev == brk_cur) {
1043                                 /* Success. */
1044                                 malloc_mutex_unlock(&brk_mtx);
1045                                 base_pages = brk_cur;
1046                                 base_next_addr = base_pages;
1047                                 base_past_addr = (void *)((uintptr_t)base_pages
1048                                     + incr);
1049 #ifdef MALLOC_STATS
1050                                 base_mapped += incr;
1051 #endif
1052                                 return (false);
1053                         }
1054                 } while (brk_prev != (void *)-1);
1055                 malloc_mutex_unlock(&brk_mtx);
1056         }
1057         if (minsize == 0) {
1058                 /*
1059                  * Failure during initialization doesn't matter, so avoid
1060                  * falling through to the mmap-based page mapping code.
1061                  */
1062                 return (true);
1063         }
1064 #endif
1065         assert(minsize != 0);
1066         csize = PAGE_CEILING(minsize);
1067         base_pages = pages_map(NULL, csize);
1068         if (base_pages == NULL)
1069                 return (true);
1070         base_next_addr = base_pages;
1071         base_past_addr = (void *)((uintptr_t)base_pages + csize);
1072 #ifdef MALLOC_STATS
1073         base_mapped += csize;
1074 #endif
1075         return (false);
1076 }
1077
1078 static void *
1079 base_alloc(size_t size)
1080 {
1081         void *ret;
1082         size_t csize;
1083
1084         /* Round size up to nearest multiple of the cacheline size. */
1085         csize = CACHELINE_CEILING(size);
1086
1087         malloc_mutex_lock(&base_mtx);
1088
1089         /* Make sure there's enough space for the allocation. */
1090         if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
1091                 if (base_pages_alloc(csize)) {
1092                         ret = NULL;
1093                         goto RETURN;
1094                 }
1095         }
1096
1097         /* Allocate. */
1098         ret = base_next_addr;
1099         base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
1100
1101 RETURN:
1102         malloc_mutex_unlock(&base_mtx);
1103         return (ret);
1104 }
1105
1106 static chunk_node_t *
1107 base_chunk_node_alloc(void)
1108 {
1109         chunk_node_t *ret;
1110
1111         malloc_mutex_lock(&base_mtx);
1112         if (base_chunk_nodes != NULL) {
1113                 ret = base_chunk_nodes;
1114                 /* LINTED */
1115                 base_chunk_nodes = *(chunk_node_t **)ret;
1116                 malloc_mutex_unlock(&base_mtx);
1117         } else {
1118                 malloc_mutex_unlock(&base_mtx);
1119                 ret = (chunk_node_t *)base_alloc(sizeof(chunk_node_t));
1120         }
1121
1122         return (ret);
1123 }
1124
1125 static void
1126 base_chunk_node_dealloc(chunk_node_t *node)
1127 {
1128
1129         malloc_mutex_lock(&base_mtx);
1130         /* LINTED */
1131         *(chunk_node_t **)node = base_chunk_nodes;
1132         base_chunk_nodes = node;
1133         malloc_mutex_unlock(&base_mtx);
1134 }
1135
1136 /******************************************************************************/
1137
1138 #ifdef MALLOC_STATS
1139 static void
1140 stats_print(arena_t *arena)
1141 {
1142         unsigned i;
1143         int gap_start;
1144
1145         malloc_printf(
1146             "          allocated/mapped            nmalloc      ndalloc\n");
1147
1148         malloc_printf("small: %12zu %-12s %12llu %12llu\n",
1149             arena->stats.allocated_small, "", arena->stats.nmalloc_small,
1150             arena->stats.ndalloc_small);
1151         malloc_printf("large: %12zu %-12s %12llu %12llu\n",
1152             arena->stats.allocated_large, "", arena->stats.nmalloc_large,
1153             arena->stats.ndalloc_large);
1154         malloc_printf("total: %12zu/%-12zu %12llu %12llu\n",
1155             arena->stats.allocated_small + arena->stats.allocated_large,
1156             arena->stats.mapped,
1157             arena->stats.nmalloc_small + arena->stats.nmalloc_large,
1158             arena->stats.ndalloc_small + arena->stats.ndalloc_large);
1159
1160         malloc_printf("bins:     bin   size regs pgs  requests   newruns"
1161             "    reruns maxruns curruns\n");
1162         for (i = 0, gap_start = -1; i < ntbins + nqbins + nsbins; i++) {
1163                 if (arena->bins[i].stats.nrequests == 0) {
1164                         if (gap_start == -1)
1165                                 gap_start = i;
1166                 } else {
1167                         if (gap_start != -1) {
1168                                 if (i > gap_start + 1) {
1169                                         /* Gap of more than one size class. */
1170                                         malloc_printf("[%u..%u]\n",
1171                                             gap_start, i - 1);
1172                                 } else {
1173                                         /* Gap of one size class. */
1174                                         malloc_printf("[%u]\n", gap_start);
1175                                 }
1176                                 gap_start = -1;
1177                         }
1178                         malloc_printf(
1179                             "%13u %1s %4u %4u %3u %9llu %9llu"
1180                             " %9llu %7lu %7lu\n",
1181                             i,
1182                             i < ntbins ? "T" : i < ntbins + nqbins ? "Q" : "S",
1183                             arena->bins[i].reg_size,
1184                             arena->bins[i].nregs,
1185                             arena->bins[i].run_size >> pagesize_2pow,
1186                             arena->bins[i].stats.nrequests,
1187                             arena->bins[i].stats.nruns,
1188                             arena->bins[i].stats.reruns,
1189                             arena->bins[i].stats.highruns,
1190                             arena->bins[i].stats.curruns);
1191                 }
1192         }
1193         if (gap_start != -1) {
1194                 if (i > gap_start + 1) {
1195                         /* Gap of more than one size class. */
1196                         malloc_printf("[%u..%u]\n", gap_start, i - 1);
1197                 } else {
1198                         /* Gap of one size class. */
1199                         malloc_printf("[%u]\n", gap_start);
1200                 }
1201         }
1202 }
1203 #endif
1204
1205 /*
1206  * End Utility functions/macros.
1207  */
1208 /******************************************************************************/
1209 /*
1210  * Begin chunk management functions.
1211  */
1212
1213 #ifndef lint
1214 static inline int
1215 chunk_comp(chunk_node_t *a, chunk_node_t *b)
1216 {
1217
1218         assert(a != NULL);
1219         assert(b != NULL);
1220
1221         if ((uintptr_t)a->chunk < (uintptr_t)b->chunk)
1222                 return (-1);
1223         else if (a->chunk == b->chunk)
1224                 return (0);
1225         else
1226                 return (1);
1227 }
1228
1229 /* Generate red-black tree code for chunks. */
1230 RB_GENERATE_STATIC(chunk_tree_s, chunk_node_s, link, chunk_comp);
1231 #endif
1232
1233 static void *
1234 pages_map_align(void *addr, size_t size, int align)
1235 {
1236         void *ret;
1237
1238         /*
1239          * We don't use MAP_FIXED here, because it can cause the *replacement*
1240          * of existing mappings, and we only want to create new mappings.
1241          */
1242         ret = mmap(addr, size, PROT_READ | PROT_WRITE,
1243             MAP_PRIVATE | MAP_ANON | MAP_ALIGNED(align), -1, 0);
1244         assert(ret != NULL);
1245
1246         if (ret == MAP_FAILED)
1247                 ret = NULL;
1248         else if (addr != NULL && ret != addr) {
1249                 /*
1250                  * We succeeded in mapping memory, but not in the right place.
1251                  */
1252                 if (munmap(ret, size) == -1) {
1253                         char buf[STRERROR_BUF];
1254
1255                         STRERROR_R(errno, buf, sizeof(buf));
1256                         _malloc_message(getprogname(),
1257                             ": (malloc) Error in munmap(): ", buf, "\n");
1258                         if (opt_abort)
1259                                 abort();
1260                 }
1261                 ret = NULL;
1262         }
1263
1264         assert(ret == NULL || (addr == NULL && ret != addr)
1265             || (addr != NULL && ret == addr));
1266         return (ret);
1267 }
1268
1269 static void *
1270 pages_map(void *addr, size_t size)
1271 {
1272
1273         return pages_map_align(addr, size, 0);
1274 }
1275
1276 static void
1277 pages_unmap(void *addr, size_t size)
1278 {
1279
1280         if (munmap(addr, size) == -1) {
1281                 char buf[STRERROR_BUF];
1282
1283                 STRERROR_R(errno, buf, sizeof(buf));
1284                 _malloc_message(getprogname(),
1285                     ": (malloc) Error in munmap(): ", buf, "\n");
1286                 if (opt_abort)
1287                         abort();
1288         }
1289 }
1290
1291 static void *
1292 chunk_alloc(size_t size)
1293 {
1294         void *ret, *chunk;
1295         chunk_node_t *tchunk, *delchunk;
1296
1297         assert(size != 0);
1298         assert((size & chunksize_mask) == 0);
1299
1300         malloc_mutex_lock(&chunks_mtx);
1301
1302         if (size == chunksize) {
1303                 /*
1304                  * Check for address ranges that were previously chunks and try
1305                  * to use them.
1306                  */
1307
1308                 /* LINTED */
1309                 tchunk = RB_MIN(chunk_tree_s, &old_chunks);
1310                 while (tchunk != NULL) {
1311                         /* Found an address range.  Try to recycle it. */
1312
1313                         chunk = tchunk->chunk;
1314                         delchunk = tchunk;
1315                         /* LINTED */
1316                         tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
1317
1318                         /* Remove delchunk from the tree. */
1319                         /* LINTED */
1320                         RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
1321                         base_chunk_node_dealloc(delchunk);
1322
1323 #ifdef USE_BRK
1324                         if ((uintptr_t)chunk >= (uintptr_t)brk_base
1325                             && (uintptr_t)chunk < (uintptr_t)brk_max) {
1326                                 /* Re-use a previously freed brk chunk. */
1327                                 ret = chunk;
1328                                 goto RETURN;
1329                         }
1330 #endif
1331                         if ((ret = pages_map(chunk, size)) != NULL) {
1332                                 /* Success. */
1333                                 goto RETURN;
1334                         }
1335                 }
1336         }
1337
1338         /*
1339          * Try to over-allocate, but allow the OS to place the allocation
1340          * anywhere.  Beware of size_t wrap-around.
1341          */
1342         if (size + chunksize > size) {
1343                 if ((ret = pages_map_align(NULL, size, chunksize_2pow))
1344                     != NULL) {
1345                         goto RETURN;
1346                 }
1347         }
1348
1349 #ifdef USE_BRK
1350         /*
1351          * Try to create allocations in brk, in order to make full use of
1352          * limited address space.
1353          */
1354         if (brk_prev != (void *)-1) {
1355                 void *brk_cur;
1356                 intptr_t incr;
1357
1358                 /*
1359                  * The loop is necessary to recover from races with other
1360                  * threads that are using brk for something other than malloc.
1361                  */
1362                 malloc_mutex_lock(&brk_mtx);
1363                 do {
1364                         /* Get the current end of brk. */
1365                         brk_cur = sbrk(0);
1366
1367                         /*
1368                          * Calculate how much padding is necessary to
1369                          * chunk-align the end of brk.
1370                          */
1371                         incr = (intptr_t)size
1372                             - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1373                         if (incr == (intptr_t)size) {
1374                                 ret = brk_cur;
1375                         } else {
1376                                 ret = (void *)((intptr_t)brk_cur + incr);
1377                                 incr += size;
1378                         }
1379
1380                         brk_prev = sbrk(incr);
1381                         if (brk_prev == brk_cur) {
1382                                 /* Success. */
1383                                 malloc_mutex_unlock(&brk_mtx);
1384                                 brk_max = (void *)((intptr_t)ret + size);
1385                                 goto RETURN;
1386                         }
1387                 } while (brk_prev != (void *)-1);
1388                 malloc_mutex_unlock(&brk_mtx);
1389         }
1390 #endif
1391
1392         /* All strategies for allocation failed. */
1393         ret = NULL;
1394 RETURN:
1395         if (ret != NULL) {
1396                 chunk_node_t key;
1397                 /*
1398                  * Clean out any entries in old_chunks that overlap with the
1399                  * memory we just allocated.
1400                  */
1401                 key.chunk = ret;
1402                 /* LINTED */
1403                 tchunk = RB_NFIND(chunk_tree_s, &old_chunks, &key);
1404                 while (tchunk != NULL
1405                     && (uintptr_t)tchunk->chunk >= (uintptr_t)ret
1406                     && (uintptr_t)tchunk->chunk < (uintptr_t)ret + size) {
1407                         delchunk = tchunk;
1408                         /* LINTED */
1409                         tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
1410                         /* LINTED */
1411                         RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
1412                         base_chunk_node_dealloc(delchunk);
1413                 }
1414
1415         }
1416 #ifdef MALLOC_STATS
1417         if (ret != NULL) {
1418                 stats_chunks.nchunks += (size / chunksize);
1419                 stats_chunks.curchunks += (size / chunksize);
1420         }
1421         if (stats_chunks.curchunks > stats_chunks.highchunks)
1422                 stats_chunks.highchunks = stats_chunks.curchunks;
1423 #endif
1424         malloc_mutex_unlock(&chunks_mtx);
1425
1426         assert(CHUNK_ADDR2BASE(ret) == ret);
1427         return (ret);
1428 }
1429
1430 static void
1431 chunk_dealloc(void *chunk, size_t size)
1432 {
1433         chunk_node_t *node;
1434
1435         assert(chunk != NULL);
1436         assert(CHUNK_ADDR2BASE(chunk) == chunk);
1437         assert(size != 0);
1438         assert((size & chunksize_mask) == 0);
1439
1440         malloc_mutex_lock(&chunks_mtx);
1441
1442 #ifdef USE_BRK
1443         if ((uintptr_t)chunk >= (uintptr_t)brk_base
1444             && (uintptr_t)chunk < (uintptr_t)brk_max) {
1445                 void *brk_cur;
1446
1447                 malloc_mutex_lock(&brk_mtx);
1448                 /* Get the current end of brk. */
1449                 brk_cur = sbrk(0);
1450
1451                 /*
1452                  * Try to shrink the data segment if this chunk is at the end
1453                  * of the data segment.  The sbrk() call here is subject to a
1454                  * race condition with threads that use brk(2) or sbrk(2)
1455                  * directly, but the alternative would be to leak memory for
1456                  * the sake of poorly designed multi-threaded programs.
1457                  */
1458                 if (brk_cur == brk_max
1459                     && (void *)((uintptr_t)chunk + size) == brk_max
1460                     && sbrk(-(intptr_t)size) == brk_max) {
1461                         malloc_mutex_unlock(&brk_mtx);
1462                         if (brk_prev == brk_max) {
1463                                 /* Success. */
1464                                 brk_prev = (void *)((intptr_t)brk_max
1465                                     - (intptr_t)size);
1466                                 brk_max = brk_prev;
1467                         }
1468                 } else {
1469                         size_t offset;
1470
1471                         malloc_mutex_unlock(&brk_mtx);
1472                         madvise(chunk, size, MADV_FREE);
1473
1474                         /*
1475                          * Iteratively create records of each chunk-sized
1476                          * memory region that 'chunk' is comprised of, so that
1477                          * the address range can be recycled if memory usage
1478                          * increases later on.
1479                          */
1480                         for (offset = 0; offset < size; offset += chunksize) {
1481                                 node = base_chunk_node_alloc();
1482                                 if (node == NULL)
1483                                         break;
1484
1485                                 node->chunk = (void *)((uintptr_t)chunk
1486                                     + (uintptr_t)offset);
1487                                 node->size = chunksize;
1488                                 /* LINTED */
1489                                 RB_INSERT(chunk_tree_s, &old_chunks, node);
1490                         }
1491                 }
1492         } else {
1493 #endif
1494                 pages_unmap(chunk, size);
1495
1496                 /*
1497                  * Make a record of the chunk's address, so that the address
1498                  * range can be recycled if memory usage increases later on.
1499                  * Don't bother to create entries if (size > chunksize), since
1500                  * doing so could cause scalability issues for truly gargantuan
1501                  * objects (many gigabytes or larger).
1502                  */
1503                 if (size == chunksize) {
1504                         node = base_chunk_node_alloc();
1505                         if (node != NULL) {
1506                                 node->chunk = (void *)(uintptr_t)chunk;
1507                                 node->size = chunksize;
1508                                 /* LINTED */
1509                                 RB_INSERT(chunk_tree_s, &old_chunks, node);
1510                         }
1511                 }
1512 #ifdef USE_BRK
1513         }
1514 #endif
1515
1516 #ifdef MALLOC_STATS
1517         stats_chunks.curchunks -= (size / chunksize);
1518 #endif
1519         malloc_mutex_unlock(&chunks_mtx);
1520 }
1521
1522 /*
1523  * End chunk management functions.
1524  */
1525 /******************************************************************************/
1526 /*
1527  * Begin arena.
1528  */
1529
1530 /*
1531  * Choose an arena based on a per-thread and (optimistically) per-CPU value.
1532  *
1533  * We maintain at least one block of arenas.  Usually there are more.
1534  * The blocks are $ncpu arenas in size.  Whole blocks are 'hashed'
1535  * amongst threads.  To accomplish this, next_arena advances only in
1536  * ncpu steps.
1537  */
1538 static __noinline arena_t *
1539 choose_arena_hard(void)
1540 {
1541         unsigned i, curcpu;
1542         arena_t **map;
1543
1544         /* Initialize the current block of arenas and advance to next. */
1545         malloc_mutex_lock(&arenas_mtx);
1546         assert(next_arena % ncpus == 0);
1547         assert(narenas % ncpus == 0);
1548         map = &arenas[next_arena];
1549         set_arenas_map(map);
1550         for (i = 0; i < ncpus; i++) {
1551                 if (arenas[next_arena] == NULL)
1552                         arenas_extend(next_arena);
1553                 next_arena = (next_arena + 1) % narenas;
1554         }
1555         malloc_mutex_unlock(&arenas_mtx);
1556
1557         /*
1558          * If we were unable to allocate an arena above, then default to
1559          * the first arena, which is always present.
1560          */
1561         curcpu = thr_curcpu();
1562         if (map[curcpu] != NULL)
1563                 return map[curcpu];
1564         return arenas[0];
1565 }
1566
1567 static inline arena_t *
1568 choose_arena(void)
1569 {
1570         unsigned curcpu;
1571         arena_t **map;
1572
1573         map = get_arenas_map();
1574         curcpu = thr_curcpu();
1575         if (__predict_true(map != NULL && map[curcpu] != NULL))
1576                 return map[curcpu];
1577
1578         return choose_arena_hard();
1579 }
1580
1581 #ifndef lint
1582 static inline int
1583 arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
1584 {
1585
1586         assert(a != NULL);
1587         assert(b != NULL);
1588
1589         if ((uintptr_t)a < (uintptr_t)b)
1590                 return (-1);
1591         else if (a == b)
1592                 return (0);
1593         else
1594                 return (1);
1595 }
1596
1597 /* Generate red-black tree code for arena chunks. */
1598 RB_GENERATE_STATIC(arena_chunk_tree_s, arena_chunk_s, link, arena_chunk_comp);
1599 #endif
1600
1601 #ifndef lint
1602 static inline int
1603 arena_run_comp(arena_run_t *a, arena_run_t *b)
1604 {
1605
1606         assert(a != NULL);
1607         assert(b != NULL);
1608
1609         if ((uintptr_t)a < (uintptr_t)b)
1610                 return (-1);
1611         else if (a == b)
1612                 return (0);
1613         else
1614                 return (1);
1615 }
1616
1617 /* Generate red-black tree code for arena runs. */
1618 RB_GENERATE_STATIC(arena_run_tree_s, arena_run_s, link, arena_run_comp);
1619 #endif
1620
1621 static inline void *
1622 arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
1623 {
1624         void *ret;
1625         unsigned i, mask, bit, regind;
1626
1627         assert(run->magic == ARENA_RUN_MAGIC);
1628         assert(run->regs_minelm < bin->regs_mask_nelms);
1629
1630         /*
1631          * Move the first check outside the loop, so that run->regs_minelm can
1632          * be updated unconditionally, without the possibility of updating it
1633          * multiple times.
1634          */
1635         i = run->regs_minelm;
1636         mask = run->regs_mask[i];
1637         if (mask != 0) {
1638                 /* Usable allocation found. */
1639                 bit = ffs((int)mask) - 1;
1640
1641                 regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1642                 ret = (void *)(((uintptr_t)run) + bin->reg0_offset
1643                     + (bin->reg_size * regind));
1644
1645                 /* Clear bit. */
1646                 mask ^= (1 << bit);
1647                 run->regs_mask[i] = mask;
1648
1649                 return (ret);
1650         }
1651
1652         for (i++; i < bin->regs_mask_nelms; i++) {
1653                 mask = run->regs_mask[i];
1654                 if (mask != 0) {
1655                         /* Usable allocation found. */
1656                         bit = ffs((int)mask) - 1;
1657
1658                         regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1659                         ret = (void *)(((uintptr_t)run) + bin->reg0_offset
1660                             + (bin->reg_size * regind));
1661
1662                         /* Clear bit. */
1663                         mask ^= (1 << bit);
1664                         run->regs_mask[i] = mask;
1665
1666                         /*
1667                          * Make a note that nothing before this element
1668                          * contains a free region.
1669                          */
1670                         run->regs_minelm = i; /* Low payoff: + (mask == 0); */
1671
1672                         return (ret);
1673                 }
1674         }
1675         /* Not reached. */
1676         /* LINTED */
1677         assert(0);
1678         return (NULL);
1679 }
1680
1681 static inline void
1682 arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
1683 {
1684         /*
1685          * To divide by a number D that is not a power of two we multiply
1686          * by (2^21 / D) and then right shift by 21 positions.
1687          *
1688          *   X / D
1689          *
1690          * becomes
1691          *
1692          *   (X * size_invs[(D >> QUANTUM_2POW_MIN) - 3]) >> SIZE_INV_SHIFT
1693          */
1694 #define SIZE_INV_SHIFT 21
1695 #define SIZE_INV(s) (((1 << SIZE_INV_SHIFT) / (s << QUANTUM_2POW_MIN)) + 1)
1696         static const unsigned size_invs[] = {
1697             SIZE_INV(3),
1698             SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
1699             SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
1700             SIZE_INV(12),SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
1701             SIZE_INV(16),SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
1702             SIZE_INV(20),SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
1703             SIZE_INV(24),SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
1704             SIZE_INV(28),SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
1705 #if (QUANTUM_2POW_MIN < 4)
1706             ,
1707             SIZE_INV(32), SIZE_INV(33), SIZE_INV(34), SIZE_INV(35),
1708             SIZE_INV(36), SIZE_INV(37), SIZE_INV(38), SIZE_INV(39),
1709             SIZE_INV(40), SIZE_INV(41), SIZE_INV(42), SIZE_INV(43),
1710             SIZE_INV(44), SIZE_INV(45), SIZE_INV(46), SIZE_INV(47),
1711             SIZE_INV(48), SIZE_INV(49), SIZE_INV(50), SIZE_INV(51),
1712             SIZE_INV(52), SIZE_INV(53), SIZE_INV(54), SIZE_INV(55),
1713             SIZE_INV(56), SIZE_INV(57), SIZE_INV(58), SIZE_INV(59),
1714             SIZE_INV(60), SIZE_INV(61), SIZE_INV(62), SIZE_INV(63)
1715 #endif
1716         };
1717         unsigned diff, regind, elm, bit;
1718
1719         /* LINTED */
1720         assert(run->magic == ARENA_RUN_MAGIC);
1721         assert(((sizeof(size_invs)) / sizeof(unsigned)) + 3
1722             >= (SMALL_MAX_DEFAULT >> QUANTUM_2POW_MIN));
1723
1724         /*
1725          * Avoid doing division with a variable divisor if possible.  Using
1726          * actual division here can reduce allocator throughput by over 20%!
1727          */
1728         diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
1729         if ((size & (size - 1)) == 0) {
1730                 /*
1731                  * log2_table allows fast division of a power of two in the
1732                  * [1..128] range.
1733                  *
1734                  * (x / divisor) becomes (x >> log2_table[divisor - 1]).
1735                  */
1736                 static const unsigned char log2_table[] = {
1737                     0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4,
1738                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
1739                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1740                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
1741                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1742                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1743                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1744                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7
1745                 };
1746
1747                 if (size <= 128)
1748                         regind = (diff >> log2_table[size - 1]);
1749                 else if (size <= 32768)
1750                         regind = diff >> (8 + log2_table[(size >> 8) - 1]);
1751                 else {
1752                         /*
1753                          * The page size is too large for us to use the lookup
1754                          * table.  Use real division.
1755                          */
1756                         regind = (unsigned)(diff / size);
1757                 }
1758         } else if (size <= ((sizeof(size_invs) / sizeof(unsigned))
1759             << QUANTUM_2POW_MIN) + 2) {
1760                 regind = size_invs[(size >> QUANTUM_2POW_MIN) - 3] * diff;
1761                 regind >>= SIZE_INV_SHIFT;
1762         } else {
1763                 /*
1764                  * size_invs isn't large enough to handle this size class, so
1765                  * calculate regind using actual division.  This only happens
1766                  * if the user increases small_max via the 'S' runtime
1767                  * configuration option.
1768                  */
1769                 regind = (unsigned)(diff / size);
1770         };
1771         assert(diff == regind * size);
1772         assert(regind < bin->nregs);
1773
1774         elm = regind >> (SIZEOF_INT_2POW + 3);
1775         if (elm < run->regs_minelm)
1776                 run->regs_minelm = elm;
1777         bit = regind - (elm << (SIZEOF_INT_2POW + 3));
1778         assert((run->regs_mask[elm] & (1 << bit)) == 0);
1779         run->regs_mask[elm] |= (1 << bit);
1780 #undef SIZE_INV
1781 #undef SIZE_INV_SHIFT
1782 }
1783
1784 static void
1785 arena_run_split(arena_t *arena, arena_run_t *run, size_t size)
1786 {
1787         arena_chunk_t *chunk;
1788         unsigned run_ind, map_offset, total_pages, need_pages, rem_pages;
1789         unsigned i;
1790
1791         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1792         run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1793             >> pagesize_2pow);
1794         total_pages = chunk->map[run_ind].npages;
1795         need_pages = (unsigned)(size >> pagesize_2pow);
1796         assert(need_pages <= total_pages);
1797         rem_pages = total_pages - need_pages;
1798
1799         /* Split enough pages from the front of run to fit allocation size. */
1800         map_offset = run_ind;
1801         for (i = 0; i < need_pages; i++) {
1802                 chunk->map[map_offset + i].npages = need_pages;
1803                 chunk->map[map_offset + i].pos = i;
1804         }
1805
1806         /* Keep track of trailing unused pages for later use. */
1807         if (rem_pages > 0) {
1808                 /* Update map for trailing pages. */
1809                 map_offset += need_pages;
1810                 chunk->map[map_offset].npages = rem_pages;
1811                 chunk->map[map_offset].pos = POS_FREE;
1812                 chunk->map[map_offset + rem_pages - 1].npages = rem_pages;
1813                 chunk->map[map_offset + rem_pages - 1].pos = POS_FREE;
1814         }
1815
1816         chunk->pages_used += need_pages;
1817 }
1818
1819 static arena_chunk_t *
1820 arena_chunk_alloc(arena_t *arena)
1821 {
1822         arena_chunk_t *chunk;
1823
1824         if (arena->spare != NULL) {
1825                 chunk = arena->spare;
1826                 arena->spare = NULL;
1827
1828                 /* LINTED */
1829                 RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
1830         } else {
1831                 chunk = (arena_chunk_t *)chunk_alloc(chunksize);
1832                 if (chunk == NULL)
1833                         return (NULL);
1834 #ifdef MALLOC_STATS
1835                 arena->stats.mapped += chunksize;
1836 #endif
1837
1838                 chunk->arena = arena;
1839
1840                 /* LINTED */
1841                 RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
1842
1843                 /*
1844                  * Claim that no pages are in use, since the header is merely
1845                  * overhead.
1846                  */
1847                 chunk->pages_used = 0;
1848
1849                 chunk->max_frun_npages = chunk_npages -
1850                     arena_chunk_header_npages;
1851                 chunk->min_frun_ind = arena_chunk_header_npages;
1852
1853                 /*
1854                  * Initialize enough of the map to support one maximal free run.
1855                  */
1856                 chunk->map[arena_chunk_header_npages].npages = chunk_npages -
1857                     arena_chunk_header_npages;
1858                 chunk->map[arena_chunk_header_npages].pos = POS_FREE;
1859                 chunk->map[chunk_npages - 1].npages = chunk_npages -
1860                     arena_chunk_header_npages;
1861                 chunk->map[chunk_npages - 1].pos = POS_FREE;
1862         }
1863
1864         return (chunk);
1865 }
1866
1867 static void
1868 arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
1869 {
1870
1871         /*
1872          * Remove chunk from the chunk tree, regardless of whether this chunk
1873          * will be cached, so that the arena does not use it.
1874          */
1875         /* LINTED */
1876         RB_REMOVE(arena_chunk_tree_s, &chunk->arena->chunks, chunk);
1877
1878         if (opt_hint == false) {
1879                 if (arena->spare != NULL) {
1880                         chunk_dealloc((void *)arena->spare, chunksize);
1881 #ifdef MALLOC_STATS
1882                         arena->stats.mapped -= chunksize;
1883 #endif
1884                 }
1885                 arena->spare = chunk;
1886         } else {
1887                 assert(arena->spare == NULL);
1888                 chunk_dealloc((void *)chunk, chunksize);
1889 #ifdef MALLOC_STATS
1890                 arena->stats.mapped -= chunksize;
1891 #endif
1892         }
1893 }
1894
1895 static arena_run_t *
1896 arena_run_alloc(arena_t *arena, size_t size)
1897 {
1898         arena_chunk_t *chunk;
1899         arena_run_t *run;
1900         unsigned need_npages, limit_pages, compl_need_npages;
1901
1902         assert(size <= (chunksize - (arena_chunk_header_npages <<
1903             pagesize_2pow)));
1904         assert((size & pagesize_mask) == 0);
1905
1906         /*
1907          * Search through arena's chunks in address order for a free run that is
1908          * large enough.  Look for the first fit.
1909          */
1910         need_npages = (unsigned)(size >> pagesize_2pow);
1911         limit_pages = chunk_npages - arena_chunk_header_npages;
1912         compl_need_npages = limit_pages - need_npages;
1913         /* LINTED */
1914         RB_FOREACH(chunk, arena_chunk_tree_s, &arena->chunks) {
1915                 /*
1916                  * Avoid searching this chunk if there are not enough
1917                  * contiguous free pages for there to possibly be a large
1918                  * enough free run.
1919                  */
1920                 if (chunk->pages_used <= compl_need_npages &&
1921                     need_npages <= chunk->max_frun_npages) {
1922                         arena_chunk_map_t *mapelm;
1923                         unsigned i;
1924                         unsigned max_frun_npages = 0;
1925                         unsigned min_frun_ind = chunk_npages;
1926
1927                         assert(chunk->min_frun_ind >=
1928                             arena_chunk_header_npages);
1929                         for (i = chunk->min_frun_ind; i < chunk_npages;) {
1930                                 mapelm = &chunk->map[i];
1931                                 if (mapelm->pos == POS_FREE) {
1932                                         if (mapelm->npages >= need_npages) {
1933                                                 run = (arena_run_t *)
1934                                                     ((uintptr_t)chunk + (i <<
1935                                                     pagesize_2pow));
1936                                                 /* Update page map. */
1937                                                 arena_run_split(arena, run,
1938                                                     size);
1939                                                 return (run);
1940                                         }
1941                                         if (mapelm->npages >
1942                                             max_frun_npages) {
1943                                                 max_frun_npages =
1944                                                     mapelm->npages;
1945                                         }
1946                                         if (i < min_frun_ind) {
1947                                                 min_frun_ind = i;
1948                                                 if (i < chunk->min_frun_ind)
1949                                                         chunk->min_frun_ind = i;
1950                                         }
1951                                 }
1952                                 i += mapelm->npages;
1953                         }
1954                         /*
1955                          * Search failure.  Reset cached chunk->max_frun_npages.
1956                          * chunk->min_frun_ind was already reset above (if
1957                          * necessary).
1958                          */
1959                         chunk->max_frun_npages = max_frun_npages;
1960                 }
1961         }
1962
1963         /*
1964          * No usable runs.  Create a new chunk from which to allocate the run.
1965          */
1966         chunk = arena_chunk_alloc(arena);
1967         if (chunk == NULL)
1968                 return (NULL);
1969         run = (arena_run_t *)((uintptr_t)chunk + (arena_chunk_header_npages <<
1970             pagesize_2pow));
1971         /* Update page map. */
1972         arena_run_split(arena, run, size);
1973         return (run);
1974 }
1975
1976 static void
1977 arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size)
1978 {
1979         arena_chunk_t *chunk;
1980         unsigned run_ind, run_pages;
1981
1982         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1983
1984         run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1985             >> pagesize_2pow);
1986         assert(run_ind >= arena_chunk_header_npages);
1987         assert(run_ind < (chunksize >> pagesize_2pow));
1988         run_pages = (unsigned)(size >> pagesize_2pow);
1989         assert(run_pages == chunk->map[run_ind].npages);
1990
1991         /* Subtract pages from count of pages used in chunk. */
1992         chunk->pages_used -= run_pages;
1993
1994         /* Mark run as deallocated. */
1995         assert(chunk->map[run_ind].npages == run_pages);
1996         chunk->map[run_ind].pos = POS_FREE;
1997         assert(chunk->map[run_ind + run_pages - 1].npages == run_pages);
1998         chunk->map[run_ind + run_pages - 1].pos = POS_FREE;
1999
2000         /*
2001          * Tell the kernel that we don't need the data in this run, but only if
2002          * requested via runtime configuration.
2003          */
2004         if (opt_hint)
2005                 madvise(run, size, MADV_FREE);
2006
2007         /* Try to coalesce with neighboring runs. */
2008         if (run_ind > arena_chunk_header_npages &&
2009             chunk->map[run_ind - 1].pos == POS_FREE) {
2010                 unsigned prev_npages;
2011
2012                 /* Coalesce with previous run. */
2013                 prev_npages = chunk->map[run_ind - 1].npages;
2014                 run_ind -= prev_npages;
2015                 assert(chunk->map[run_ind].npages == prev_npages);
2016                 assert(chunk->map[run_ind].pos == POS_FREE);
2017                 run_pages += prev_npages;
2018
2019                 chunk->map[run_ind].npages = run_pages;
2020                 assert(chunk->map[run_ind].pos == POS_FREE);
2021                 chunk->map[run_ind + run_pages - 1].npages = run_pages;
2022                 assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2023         }
2024
2025         if (run_ind + run_pages < chunk_npages &&
2026             chunk->map[run_ind + run_pages].pos == POS_FREE) {
2027                 unsigned next_npages;
2028
2029                 /* Coalesce with next run. */
2030                 next_npages = chunk->map[run_ind + run_pages].npages;
2031                 run_pages += next_npages;
2032                 assert(chunk->map[run_ind + run_pages - 1].npages ==
2033                     next_npages);
2034                 assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2035
2036                 chunk->map[run_ind].npages = run_pages;
2037                 chunk->map[run_ind].pos = POS_FREE;
2038                 chunk->map[run_ind + run_pages - 1].npages = run_pages;
2039                 assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2040         }
2041
2042         if (chunk->map[run_ind].npages > chunk->max_frun_npages)
2043                 chunk->max_frun_npages = chunk->map[run_ind].npages;
2044         if (run_ind < chunk->min_frun_ind)
2045                 chunk->min_frun_ind = run_ind;
2046
2047         /* Deallocate chunk if it is now completely unused. */
2048         if (chunk->pages_used == 0)
2049                 arena_chunk_dealloc(arena, chunk);
2050 }
2051
2052 static arena_run_t *
2053 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
2054 {
2055         arena_run_t *run;
2056         unsigned i, remainder;
2057
2058         /* Look for a usable run. */
2059         /* LINTED */
2060         if ((run = RB_MIN(arena_run_tree_s, &bin->runs)) != NULL) {
2061                 /* run is guaranteed to have available space. */
2062                 /* LINTED */
2063                 RB_REMOVE(arena_run_tree_s, &bin->runs, run);
2064 #ifdef MALLOC_STATS
2065                 bin->stats.reruns++;
2066 #endif
2067                 return (run);
2068         }
2069         /* No existing runs have any space available. */
2070
2071         /* Allocate a new run. */
2072         run = arena_run_alloc(arena, bin->run_size);
2073         if (run == NULL)
2074                 return (NULL);
2075
2076         /* Initialize run internals. */
2077         run->bin = bin;
2078
2079         for (i = 0; i < bin->regs_mask_nelms; i++)
2080                 run->regs_mask[i] = UINT_MAX;
2081         remainder = bin->nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1);
2082         if (remainder != 0) {
2083                 /* The last element has spare bits that need to be unset. */
2084                 run->regs_mask[i] = (UINT_MAX >> ((1 << (SIZEOF_INT_2POW + 3))
2085                     - remainder));
2086         }
2087
2088         run->regs_minelm = 0;
2089
2090         run->nfree = bin->nregs;
2091 #ifdef MALLOC_DEBUG
2092         run->magic = ARENA_RUN_MAGIC;
2093 #endif
2094
2095 #ifdef MALLOC_STATS
2096         bin->stats.nruns++;
2097         bin->stats.curruns++;
2098         if (bin->stats.curruns > bin->stats.highruns)
2099                 bin->stats.highruns = bin->stats.curruns;
2100 #endif
2101         return (run);
2102 }
2103
2104 /* bin->runcur must have space available before this function is called. */
2105 static inline void *
2106 arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
2107 {
2108         void *ret;
2109
2110         assert(run->magic == ARENA_RUN_MAGIC);
2111         assert(run->nfree > 0);
2112
2113         ret = arena_run_reg_alloc(run, bin);
2114         assert(ret != NULL);
2115         run->nfree--;
2116
2117         return (ret);
2118 }
2119
2120 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
2121 static void *
2122 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
2123 {
2124
2125         bin->runcur = arena_bin_nonfull_run_get(arena, bin);
2126         if (bin->runcur == NULL)
2127                 return (NULL);
2128         assert(bin->runcur->magic == ARENA_RUN_MAGIC);
2129         assert(bin->runcur->nfree > 0);
2130
2131         return (arena_bin_malloc_easy(arena, bin, bin->runcur));
2132 }
2133
2134 /*
2135  * Calculate bin->run_size such that it meets the following constraints:
2136  *
2137  *   *) bin->run_size >= min_run_size
2138  *   *) bin->run_size <= arena_maxclass
2139  *   *) bin->run_size <= RUN_MAX_SMALL
2140  *   *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
2141  *
2142  * bin->nregs, bin->regs_mask_nelms, and bin->reg0_offset are
2143  * also calculated here, since these settings are all interdependent.
2144  */
2145 static size_t
2146 arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
2147 {
2148         size_t try_run_size, good_run_size;
2149         unsigned good_nregs, good_mask_nelms, good_reg0_offset;
2150         unsigned try_nregs, try_mask_nelms, try_reg0_offset;
2151
2152         assert(min_run_size >= pagesize);
2153         assert(min_run_size <= arena_maxclass);
2154         assert(min_run_size <= RUN_MAX_SMALL);
2155
2156         /*
2157          * Calculate known-valid settings before entering the run_size
2158          * expansion loop, so that the first part of the loop always copies
2159          * valid settings.
2160          *
2161          * The do..while loop iteratively reduces the number of regions until
2162          * the run header and the regions no longer overlap.  A closed formula
2163          * would be quite messy, since there is an interdependency between the
2164          * header's mask length and the number of regions.
2165          */
2166         try_run_size = min_run_size;
2167         try_nregs = (unsigned)(((try_run_size - sizeof(arena_run_t)) /
2168             bin->reg_size) + 1); /* Counter-act try_nregs-- in loop. */
2169         do {
2170                 try_nregs--;
2171                 try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
2172                     ((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ? 1 : 0);
2173                 try_reg0_offset = (unsigned)(try_run_size -
2174                     (try_nregs * bin->reg_size));
2175         } while (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1))
2176             > try_reg0_offset);
2177
2178         /* run_size expansion loop. */
2179         do {
2180                 /*
2181                  * Copy valid settings before trying more aggressive settings.
2182                  */
2183                 good_run_size = try_run_size;
2184                 good_nregs = try_nregs;
2185                 good_mask_nelms = try_mask_nelms;
2186                 good_reg0_offset = try_reg0_offset;
2187
2188                 /* Try more aggressive settings. */
2189                 try_run_size += pagesize;
2190                 try_nregs = (unsigned)(((try_run_size - sizeof(arena_run_t)) /
2191                     bin->reg_size) + 1); /* Counter-act try_nregs-- in loop. */
2192                 do {
2193                         try_nregs--;
2194                         try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
2195                             ((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ?
2196                             1 : 0);
2197                         try_reg0_offset = (unsigned)(try_run_size - (try_nregs *
2198                             bin->reg_size));
2199                 } while (sizeof(arena_run_t) + (sizeof(unsigned) *
2200                     (try_mask_nelms - 1)) > try_reg0_offset);
2201         } while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
2202             && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
2203             && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size);
2204
2205         assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1))
2206             <= good_reg0_offset);
2207         assert((good_mask_nelms << (SIZEOF_INT_2POW + 3)) >= good_nregs);
2208
2209         /* Copy final settings. */
2210         bin->run_size = good_run_size;
2211         bin->nregs = good_nregs;
2212         bin->regs_mask_nelms = good_mask_nelms;
2213         bin->reg0_offset = good_reg0_offset;
2214
2215         return (good_run_size);
2216 }
2217
2218 static void *
2219 arena_malloc(arena_t *arena, size_t size)
2220 {
2221         void *ret;
2222
2223         assert(arena != NULL);
2224         assert(arena->magic == ARENA_MAGIC);
2225         assert(size != 0);
2226         assert(QUANTUM_CEILING(size) <= arena_maxclass);
2227
2228         if (size <= bin_maxclass) {
2229                 arena_bin_t *bin;
2230                 arena_run_t *run;
2231
2232                 /* Small allocation. */
2233
2234                 if (size < small_min) {
2235                         /* Tiny. */
2236                         size = pow2_ceil(size);
2237                         bin = &arena->bins[ffs((int)(size >> (TINY_MIN_2POW +
2238                             1)))];
2239 #if (!defined(NDEBUG) || defined(MALLOC_STATS))
2240                         /*
2241                          * Bin calculation is always correct, but we may need
2242                          * to fix size for the purposes of assertions and/or
2243                          * stats accuracy.
2244                          */
2245                         if (size < (1 << TINY_MIN_2POW))
2246                                 size = (1 << TINY_MIN_2POW);
2247 #endif
2248                 } else if (size <= small_max) {
2249                         /* Quantum-spaced. */
2250                         size = QUANTUM_CEILING(size);
2251                         bin = &arena->bins[ntbins + (size >> opt_quantum_2pow)
2252                             - 1];
2253                 } else {
2254                         /* Sub-page. */
2255                         size = pow2_ceil(size);
2256                         bin = &arena->bins[ntbins + nqbins
2257                             + (ffs((int)(size >> opt_small_max_2pow)) - 2)];
2258                 }
2259                 assert(size == bin->reg_size);
2260
2261                 malloc_mutex_lock(&arena->mtx);
2262                 if ((run = bin->runcur) != NULL && run->nfree > 0)
2263                         ret = arena_bin_malloc_easy(arena, bin, run);
2264                 else
2265                         ret = arena_bin_malloc_hard(arena, bin);
2266
2267                 if (ret == NULL) {
2268                         malloc_mutex_unlock(&arena->mtx);
2269                         return (NULL);
2270                 }
2271
2272 #ifdef MALLOC_STATS
2273                 bin->stats.nrequests++;
2274                 arena->stats.nmalloc_small++;
2275                 arena->stats.allocated_small += size;
2276 #endif
2277         } else {
2278                 /* Large allocation. */
2279                 size = PAGE_CEILING(size);
2280                 malloc_mutex_lock(&arena->mtx);
2281                 ret = (void *)arena_run_alloc(arena, size);
2282                 if (ret == NULL) {
2283                         malloc_mutex_unlock(&arena->mtx);
2284                         return (NULL);
2285                 }
2286 #ifdef MALLOC_STATS
2287                 arena->stats.nmalloc_large++;
2288                 arena->stats.allocated_large += size;
2289 #endif
2290         }
2291
2292         malloc_mutex_unlock(&arena->mtx);
2293
2294         if (opt_junk)
2295                 memset(ret, 0xa5, size);
2296         else if (opt_zero)
2297                 memset(ret, 0, size);
2298         return (ret);
2299 }
2300
2301 static inline void
2302 arena_palloc_trim(arena_t *arena, arena_chunk_t *chunk, unsigned pageind,
2303     unsigned npages)
2304 {
2305         unsigned i;
2306
2307         assert(npages > 0);
2308
2309         /*
2310          * Modifiy the map such that arena_run_dalloc() sees the run as
2311          * separately allocated.
2312          */
2313         for (i = 0; i < npages; i++) {
2314                 chunk->map[pageind + i].npages = npages;
2315                 chunk->map[pageind + i].pos = i;
2316         }
2317         arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)chunk + (pageind <<
2318             pagesize_2pow)), npages << pagesize_2pow);
2319 }
2320
2321 /* Only handles large allocations that require more than page alignment. */
2322 static void *
2323 arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
2324 {
2325         void *ret;
2326         size_t offset;
2327         arena_chunk_t *chunk;
2328         unsigned pageind, i, npages;
2329
2330         assert((size & pagesize_mask) == 0);
2331         assert((alignment & pagesize_mask) == 0);
2332
2333         npages = (unsigned)(size >> pagesize_2pow);
2334
2335         malloc_mutex_lock(&arena->mtx);
2336         ret = (void *)arena_run_alloc(arena, alloc_size);
2337         if (ret == NULL) {
2338                 malloc_mutex_unlock(&arena->mtx);
2339                 return (NULL);
2340         }
2341
2342         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
2343
2344         offset = (uintptr_t)ret & (alignment - 1);
2345         assert((offset & pagesize_mask) == 0);
2346         assert(offset < alloc_size);
2347         if (offset == 0) {
2348                 pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk) >>
2349                     pagesize_2pow);
2350
2351                 /* Update the map for the run to be kept. */
2352                 for (i = 0; i < npages; i++) {
2353                         chunk->map[pageind + i].npages = npages;
2354                         assert(chunk->map[pageind + i].pos == i);
2355                 }
2356
2357                 /* Trim trailing space. */
2358                 arena_palloc_trim(arena, chunk, pageind + npages,
2359                     (unsigned)((alloc_size - size) >> pagesize_2pow));
2360         } else {
2361                 size_t leadsize, trailsize;
2362
2363                 leadsize = alignment - offset;
2364                 ret = (void *)((uintptr_t)ret + leadsize);
2365                 pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk) >>
2366                     pagesize_2pow);
2367
2368                 /* Update the map for the run to be kept. */
2369                 for (i = 0; i < npages; i++) {
2370                         chunk->map[pageind + i].npages = npages;
2371                         chunk->map[pageind + i].pos = i;
2372                 }
2373
2374                 /* Trim leading space. */
2375                 arena_palloc_trim(arena, chunk,
2376                     (unsigned)(pageind - (leadsize >> pagesize_2pow)),
2377                     (unsigned)(leadsize >> pagesize_2pow));
2378
2379                 trailsize = alloc_size - leadsize - size;
2380                 if (trailsize != 0) {
2381                         /* Trim trailing space. */
2382                         assert(trailsize < alloc_size);
2383                         arena_palloc_trim(arena, chunk, pageind + npages,
2384                             (unsigned)(trailsize >> pagesize_2pow));
2385                 }
2386         }
2387
2388 #ifdef MALLOC_STATS
2389         arena->stats.nmalloc_large++;
2390         arena->stats.allocated_large += size;
2391 #endif
2392         malloc_mutex_unlock(&arena->mtx);
2393
2394         if (opt_junk)
2395                 memset(ret, 0xa5, size);
2396         else if (opt_zero)
2397                 memset(ret, 0, size);
2398         return (ret);
2399 }
2400
2401 /* Return the size of the allocation pointed to by ptr. */
2402 static size_t
2403 arena_salloc(const void *ptr)
2404 {
2405         size_t ret;
2406         arena_chunk_t *chunk;
2407         arena_chunk_map_t *mapelm;
2408         unsigned pageind;
2409
2410         assert(ptr != NULL);
2411         assert(CHUNK_ADDR2BASE(ptr) != ptr);
2412
2413         /*
2414          * No arena data structures that we query here can change in a way that
2415          * affects this function, so we don't need to lock.
2416          */
2417         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2418         pageind = (unsigned)(((uintptr_t)ptr - (uintptr_t)chunk) >>
2419             pagesize_2pow);
2420         mapelm = &chunk->map[pageind];
2421         if (mapelm->pos != 0 || ptr != (char *)((uintptr_t)chunk) + (pageind <<
2422             pagesize_2pow)) {
2423                 arena_run_t *run;
2424
2425                 pageind -= mapelm->pos;
2426
2427                 run = (arena_run_t *)((uintptr_t)chunk + (pageind <<
2428                     pagesize_2pow));
2429                 assert(run->magic == ARENA_RUN_MAGIC);
2430                 ret = run->bin->reg_size;
2431         } else
2432                 ret = mapelm->npages << pagesize_2pow;
2433
2434         return (ret);
2435 }
2436
2437 static void *
2438 arena_ralloc(void *ptr, size_t size, size_t oldsize)
2439 {
2440         void *ret;
2441
2442         /* Avoid moving the allocation if the size class would not change. */
2443         if (size < small_min) {
2444                 if (oldsize < small_min &&
2445                     ffs((int)(pow2_ceil(size) >> (TINY_MIN_2POW + 1)))
2446                     == ffs((int)(pow2_ceil(oldsize) >> (TINY_MIN_2POW + 1))))
2447                         goto IN_PLACE;
2448         } else if (size <= small_max) {
2449                 if (oldsize >= small_min && oldsize <= small_max &&
2450                     (QUANTUM_CEILING(size) >> opt_quantum_2pow)
2451                     == (QUANTUM_CEILING(oldsize) >> opt_quantum_2pow))
2452                         goto IN_PLACE;
2453         } else {
2454                 /*
2455                  * We make no attempt to resize runs here, though it would be
2456                  * possible to do so.
2457                  */
2458                 if (oldsize > small_max && PAGE_CEILING(size) == oldsize)
2459                         goto IN_PLACE;
2460         }
2461
2462         /*
2463          * If we get here, then size and oldsize are different enough that we
2464          * need to use a different size class.  In that case, fall back to
2465          * allocating new space and copying.
2466          */
2467         ret = arena_malloc(choose_arena(), size);
2468         if (ret == NULL)
2469                 return (NULL);
2470
2471         /* Junk/zero-filling were already done by arena_malloc(). */
2472         if (size < oldsize)
2473                 memcpy(ret, ptr, size);
2474         else
2475                 memcpy(ret, ptr, oldsize);
2476         idalloc(ptr);
2477         return (ret);
2478 IN_PLACE:
2479         if (opt_junk && size < oldsize)
2480                 memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size);
2481         else if (opt_zero && size > oldsize)
2482                 memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize);
2483         return (ptr);
2484 }
2485
2486 static void
2487 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
2488 {
2489         unsigned pageind;
2490         arena_chunk_map_t *mapelm;
2491         size_t size;
2492
2493         assert(arena != NULL);
2494         assert(arena->magic == ARENA_MAGIC);
2495         assert(chunk->arena == arena);
2496         assert(ptr != NULL);
2497         assert(CHUNK_ADDR2BASE(ptr) != ptr);
2498
2499         pageind = (unsigned)(((uintptr_t)ptr - (uintptr_t)chunk) >>
2500             pagesize_2pow);
2501         mapelm = &chunk->map[pageind];
2502         if (mapelm->pos != 0 || ptr != (char *)((uintptr_t)chunk) + (pageind <<
2503             pagesize_2pow)) {
2504                 arena_run_t *run;
2505                 arena_bin_t *bin;
2506
2507                 /* Small allocation. */
2508
2509                 pageind -= mapelm->pos;
2510
2511                 run = (arena_run_t *)((uintptr_t)chunk + (pageind <<
2512                     pagesize_2pow));
2513                 assert(run->magic == ARENA_RUN_MAGIC);
2514                 bin = run->bin;
2515                 size = bin->reg_size;
2516
2517                 if (opt_junk)
2518                         memset(ptr, 0x5a, size);
2519
2520                 malloc_mutex_lock(&arena->mtx);
2521                 arena_run_reg_dalloc(run, bin, ptr, size);
2522                 run->nfree++;
2523
2524                 if (run->nfree == bin->nregs) {
2525                         /* Deallocate run. */
2526                         if (run == bin->runcur)
2527                                 bin->runcur = NULL;
2528                         else if (bin->nregs != 1) {
2529                                 /*
2530                                  * This block's conditional is necessary because
2531                                  * if the run only contains one region, then it
2532                                  * never gets inserted into the non-full runs
2533                                  * tree.
2534                                  */
2535                                 /* LINTED */
2536                                 RB_REMOVE(arena_run_tree_s, &bin->runs, run);
2537                         }
2538 #ifdef MALLOC_DEBUG
2539                         run->magic = 0;
2540 #endif
2541                         arena_run_dalloc(arena, run, bin->run_size);
2542 #ifdef MALLOC_STATS
2543                         bin->stats.curruns--;
2544 #endif
2545                 } else if (run->nfree == 1 && run != bin->runcur) {
2546                         /*
2547                          * Make sure that bin->runcur always refers to the
2548                          * lowest non-full run, if one exists.
2549                          */
2550                         if (bin->runcur == NULL)
2551                                 bin->runcur = run;
2552                         else if ((uintptr_t)run < (uintptr_t)bin->runcur) {
2553                                 /* Switch runcur. */
2554                                 if (bin->runcur->nfree > 0) {
2555                                         /* Insert runcur. */
2556                                         /* LINTED */
2557                                         RB_INSERT(arena_run_tree_s, &bin->runs,
2558                                             bin->runcur);
2559                                 }
2560                                 bin->runcur = run;
2561                         } else {
2562                                 /* LINTED */
2563                                 RB_INSERT(arena_run_tree_s, &bin->runs, run);
2564                         }
2565                 }
2566 #ifdef MALLOC_STATS
2567                 arena->stats.allocated_small -= size;
2568                 arena->stats.ndalloc_small++;
2569 #endif
2570         } else {
2571                 /* Large allocation. */
2572
2573                 size = mapelm->npages << pagesize_2pow;
2574                 assert((((uintptr_t)ptr) & pagesize_mask) == 0);
2575
2576                 if (opt_junk)
2577                         memset(ptr, 0x5a, size);
2578
2579                 malloc_mutex_lock(&arena->mtx);
2580                 arena_run_dalloc(arena, (arena_run_t *)ptr, size);
2581 #ifdef MALLOC_STATS
2582                 arena->stats.allocated_large -= size;
2583                 arena->stats.ndalloc_large++;
2584 #endif
2585         }
2586
2587         malloc_mutex_unlock(&arena->mtx);
2588 }
2589
2590 static bool
2591 arena_new(arena_t *arena)
2592 {
2593         unsigned i;
2594         arena_bin_t *bin;
2595         size_t prev_run_size;
2596
2597         malloc_mutex_init(&arena->mtx);
2598
2599 #ifdef MALLOC_STATS
2600         memset(&arena->stats, 0, sizeof(arena_stats_t));
2601 #endif
2602
2603         /* Initialize chunks. */
2604         RB_INIT(&arena->chunks);
2605         arena->spare = NULL;
2606
2607         /* Initialize bins. */
2608         prev_run_size = pagesize;
2609
2610         /* (2^n)-spaced tiny bins. */
2611         for (i = 0; i < ntbins; i++) {
2612                 bin = &arena->bins[i];
2613                 bin->runcur = NULL;
2614                 RB_INIT(&bin->runs);
2615
2616                 bin->reg_size = (1 << (TINY_MIN_2POW + i));
2617                 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2618
2619 #ifdef MALLOC_STATS
2620                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2621 #endif
2622         }
2623
2624         /* Quantum-spaced bins. */
2625         for (; i < ntbins + nqbins; i++) {
2626                 bin = &arena->bins[i];
2627                 bin->runcur = NULL;
2628                 RB_INIT(&bin->runs);
2629
2630                 bin->reg_size = quantum * (i - ntbins + 1);
2631 /*
2632                 pow2_size = pow2_ceil(quantum * (i - ntbins + 1));
2633 */
2634                 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2635
2636 #ifdef MALLOC_STATS
2637                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2638 #endif
2639         }
2640
2641         /* (2^n)-spaced sub-page bins. */
2642         for (; i < ntbins + nqbins + nsbins; i++) {
2643                 bin = &arena->bins[i];
2644                 bin->runcur = NULL;
2645                 RB_INIT(&bin->runs);
2646
2647                 bin->reg_size = (small_max << (i - (ntbins + nqbins) + 1));
2648
2649                 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2650
2651 #ifdef MALLOC_STATS
2652                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2653 #endif
2654         }
2655
2656 #ifdef MALLOC_DEBUG
2657         arena->magic = ARENA_MAGIC;
2658 #endif
2659
2660         return (false);
2661 }
2662
2663 /* Create a new arena and insert it into the arenas array at index ind. */
2664 static arena_t *
2665 arenas_extend(unsigned ind)
2666 {
2667         arena_t *ret;
2668
2669         /* Allocate enough space for trailing bins. */
2670         ret = (arena_t *)base_alloc(sizeof(arena_t)
2671             + (sizeof(arena_bin_t) * (ntbins + nqbins + nsbins - 1)));
2672         if (ret != NULL && arena_new(ret) == false) {
2673                 arenas[ind] = ret;
2674                 return (ret);
2675         }
2676         /* Only reached if there is an OOM error. */
2677
2678         /*
2679          * OOM here is quite inconvenient to propagate, since dealing with it
2680          * would require a check for failure in the fast path.  Instead, punt
2681          * by using arenas[0].  In practice, this is an extremely unlikely
2682          * failure.
2683          */
2684         _malloc_message(getprogname(),
2685             ": (malloc) Error initializing arena\n", "", "");
2686         if (opt_abort)
2687                 abort();
2688
2689         return (arenas[0]);
2690 }
2691
2692 /*
2693  * End arena.
2694  */
2695 /******************************************************************************/
2696 /*
2697  * Begin general internal functions.
2698  */
2699
2700 static void *
2701 huge_malloc(size_t size)
2702 {
2703         void *ret;
2704         size_t csize;
2705         chunk_node_t *node;
2706
2707         /* Allocate one or more contiguous chunks for this request. */
2708
2709         csize = CHUNK_CEILING(size);
2710         if (csize == 0) {
2711                 /* size is large enough to cause size_t wrap-around. */
2712                 return (NULL);
2713         }
2714
2715         /* Allocate a chunk node with which to track the chunk. */
2716         node = base_chunk_node_alloc();
2717         if (node == NULL)
2718                 return (NULL);
2719
2720         ret = chunk_alloc(csize);
2721         if (ret == NULL) {
2722                 base_chunk_node_dealloc(node);
2723                 return (NULL);
2724         }
2725
2726         /* Insert node into huge. */
2727         node->chunk = ret;
2728         node->size = csize;
2729
2730         malloc_mutex_lock(&chunks_mtx);
2731         RB_INSERT(chunk_tree_s, &huge, node);
2732 #ifdef MALLOC_STATS
2733         huge_nmalloc++;
2734         huge_allocated += csize;
2735 #endif
2736         malloc_mutex_unlock(&chunks_mtx);
2737
2738         if (opt_junk)
2739                 memset(ret, 0xa5, csize);
2740         else if (opt_zero)
2741                 memset(ret, 0, csize);
2742
2743         return (ret);
2744 }
2745
2746 /* Only handles large allocations that require more than chunk alignment. */
2747 static void *
2748 huge_palloc(size_t alignment, size_t size)
2749 {
2750         void *ret;
2751         size_t alloc_size, chunk_size, offset;
2752         chunk_node_t *node;
2753
2754         /*
2755          * This allocation requires alignment that is even larger than chunk
2756          * alignment.  This means that huge_malloc() isn't good enough.
2757          *
2758          * Allocate almost twice as many chunks as are demanded by the size or
2759          * alignment, in order to assure the alignment can be achieved, then
2760          * unmap leading and trailing chunks.
2761          */
2762         assert(alignment >= chunksize);
2763
2764         chunk_size = CHUNK_CEILING(size);
2765
2766         if (size >= alignment)
2767                 alloc_size = chunk_size + alignment - chunksize;
2768         else
2769                 alloc_size = (alignment << 1) - chunksize;
2770
2771         /* Allocate a chunk node with which to track the chunk. */
2772         node = base_chunk_node_alloc();
2773         if (node == NULL)
2774                 return (NULL);
2775
2776         ret = chunk_alloc(alloc_size);
2777         if (ret == NULL) {
2778                 base_chunk_node_dealloc(node);
2779                 return (NULL);
2780         }
2781
2782         offset = (uintptr_t)ret & (alignment - 1);
2783         assert((offset & chunksize_mask) == 0);
2784         assert(offset < alloc_size);
2785         if (offset == 0) {
2786                 /* Trim trailing space. */
2787                 chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
2788                     - chunk_size);
2789         } else {
2790                 size_t trailsize;
2791
2792                 /* Trim leading space. */
2793                 chunk_dealloc(ret, alignment - offset);
2794
2795                 ret = (void *)((uintptr_t)ret + (alignment - offset));
2796
2797                 trailsize = alloc_size - (alignment - offset) - chunk_size;
2798                 if (trailsize != 0) {
2799                     /* Trim trailing space. */
2800                     assert(trailsize < alloc_size);
2801                     chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
2802                         trailsize);
2803                 }
2804         }
2805
2806         /* Insert node into huge. */
2807         node->chunk = ret;
2808         node->size = chunk_size;
2809
2810         malloc_mutex_lock(&chunks_mtx);
2811         RB_INSERT(chunk_tree_s, &huge, node);
2812 #ifdef MALLOC_STATS
2813         huge_nmalloc++;
2814         huge_allocated += chunk_size;
2815 #endif
2816         malloc_mutex_unlock(&chunks_mtx);
2817
2818         if (opt_junk)
2819                 memset(ret, 0xa5, chunk_size);
2820         else if (opt_zero)
2821                 memset(ret, 0, chunk_size);
2822
2823         return (ret);
2824 }
2825
2826 static void *
2827 huge_ralloc(void *ptr, size_t size, size_t oldsize)
2828 {
2829         void *ret;
2830
2831         /* Avoid moving the allocation if the size class would not change. */
2832         if (oldsize > arena_maxclass &&
2833             CHUNK_CEILING(size) == CHUNK_CEILING(oldsize)) {
2834                 if (opt_junk && size < oldsize) {
2835                         memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize
2836                             - size);
2837                 } else if (opt_zero && size > oldsize) {
2838                         memset((void *)((uintptr_t)ptr + oldsize), 0, size
2839                             - oldsize);
2840                 }
2841                 return (ptr);
2842         }
2843
2844         if (CHUNK_ADDR2BASE(ptr) == ptr
2845 #ifdef USE_BRK
2846             && ((uintptr_t)ptr < (uintptr_t)brk_base
2847             || (uintptr_t)ptr >= (uintptr_t)brk_max)
2848 #endif
2849             ) {
2850                 chunk_node_t *node, key;
2851                 void *newptr;
2852                 size_t oldcsize;
2853                 size_t newcsize;
2854
2855                 newcsize = CHUNK_CEILING(size);
2856                 oldcsize = CHUNK_CEILING(oldsize);
2857                 assert(oldcsize != newcsize);
2858                 if (newcsize == 0) {
2859                         /* size_t wrap-around */
2860                         return (NULL);
2861                 }
2862
2863                 /*
2864                  * Remove the old region from the tree now.  If mremap()
2865                  * returns the region to the system, other thread may
2866                  * map it for same huge allocation and insert it to the
2867                  * tree before we acquire the mutex lock again.
2868                  */
2869                 malloc_mutex_lock(&chunks_mtx);
2870                 key.chunk = __DECONST(void *, ptr);
2871                 /* LINTED */
2872                 node = RB_FIND(chunk_tree_s, &huge, &key);
2873                 assert(node != NULL);
2874                 assert(node->chunk == ptr);
2875                 assert(node->size == oldcsize);
2876                 RB_REMOVE(chunk_tree_s, &huge, node);
2877                 malloc_mutex_unlock(&chunks_mtx);
2878
2879                 newptr = mremap(ptr, oldcsize, NULL, newcsize,
2880                     MAP_ALIGNED(chunksize_2pow));
2881                 if (newptr == MAP_FAILED) {
2882                         /* We still own the old region. */
2883                         malloc_mutex_lock(&chunks_mtx);
2884                         RB_INSERT(chunk_tree_s, &huge, node);
2885                         malloc_mutex_unlock(&chunks_mtx);
2886                 } else {
2887                         assert(CHUNK_ADDR2BASE(newptr) == newptr);
2888
2889                         /* Insert new or resized old region. */
2890                         malloc_mutex_lock(&chunks_mtx);
2891                         node->size = newcsize;
2892                         node->chunk = newptr;
2893                         RB_INSERT(chunk_tree_s, &huge, node);
2894 #ifdef MALLOC_STATS
2895                         huge_nralloc++;
2896                         huge_allocated += newcsize - oldcsize;
2897                         if (newcsize > oldcsize) {
2898                                 stats_chunks.curchunks +=
2899                                     (newcsize - oldcsize) / chunksize;
2900                                 if (stats_chunks.curchunks >
2901                                     stats_chunks.highchunks)
2902                                         stats_chunks.highchunks =
2903                                             stats_chunks.curchunks;
2904                         } else {
2905                                 stats_chunks.curchunks -=
2906                                     (oldcsize - newcsize) / chunksize;
2907                         }
2908 #endif
2909                         malloc_mutex_unlock(&chunks_mtx);
2910
2911                         if (opt_junk && size < oldsize) {
2912                                 memset((void *)((uintptr_t)newptr + size), 0x5a,
2913                                     newcsize - size);
2914                         } else if (opt_zero && size > oldsize) {
2915                                 memset((void *)((uintptr_t)newptr + oldsize), 0,
2916                                     size - oldsize);
2917                         }
2918                         return (newptr);
2919                 }
2920         }
2921
2922         /*
2923          * If we get here, then size and oldsize are different enough that we
2924          * need to use a different size class.  In that case, fall back to
2925          * allocating new space and copying.
2926          */
2927         ret = huge_malloc(size);
2928         if (ret == NULL)
2929                 return (NULL);
2930
2931         if (CHUNK_ADDR2BASE(ptr) == ptr) {
2932                 /* The old allocation is a chunk. */
2933                 if (size < oldsize)
2934                         memcpy(ret, ptr, size);
2935                 else
2936                         memcpy(ret, ptr, oldsize);
2937         } else {
2938                 /* The old allocation is a region. */
2939                 assert(oldsize < size);
2940                 memcpy(ret, ptr, oldsize);
2941         }
2942         idalloc(ptr);
2943         return (ret);
2944 }
2945
2946 static void
2947 huge_dalloc(void *ptr)
2948 {
2949         chunk_node_t key;
2950         chunk_node_t *node;
2951
2952         malloc_mutex_lock(&chunks_mtx);
2953
2954         /* Extract from tree of huge allocations. */
2955         key.chunk = ptr;
2956         /* LINTED */
2957         node = RB_FIND(chunk_tree_s, &huge, &key);
2958         assert(node != NULL);
2959         assert(node->chunk == ptr);
2960         /* LINTED */
2961         RB_REMOVE(chunk_tree_s, &huge, node);
2962
2963 #ifdef MALLOC_STATS
2964         huge_ndalloc++;
2965         huge_allocated -= node->size;
2966 #endif
2967
2968         malloc_mutex_unlock(&chunks_mtx);
2969
2970         /* Unmap chunk. */
2971 #ifdef USE_BRK
2972         if (opt_junk)
2973                 memset(node->chunk, 0x5a, node->size);
2974 #endif
2975         chunk_dealloc(node->chunk, node->size);
2976
2977         base_chunk_node_dealloc(node);
2978 }
2979
2980 static void *
2981 imalloc(size_t size)
2982 {
2983         void *ret;
2984
2985         assert(size != 0);
2986
2987         if (size <= arena_maxclass)
2988                 ret = arena_malloc(choose_arena(), size);
2989         else
2990                 ret = huge_malloc(size);
2991
2992         return (ret);
2993 }
2994
2995 static void *
2996 ipalloc(size_t alignment, size_t size)
2997 {
2998         void *ret;
2999         size_t ceil_size;
3000
3001         /*
3002          * Round size up to the nearest multiple of alignment.
3003          *
3004          * This done, we can take advantage of the fact that for each small
3005          * size class, every object is aligned at the smallest power of two
3006          * that is non-zero in the base two representation of the size.  For
3007          * example:
3008          *
3009          *   Size |   Base 2 | Minimum alignment
3010          *   -----+----------+------------------
3011          *     96 |  1100000 |  32
3012          *    144 | 10100000 |  32
3013          *    192 | 11000000 |  64
3014          *
3015          * Depending on runtime settings, it is possible that arena_malloc()
3016          * will further round up to a power of two, but that never causes
3017          * correctness issues.
3018          */
3019         ceil_size = (size + (alignment - 1)) & (-alignment);
3020         /*
3021          * (ceil_size < size) protects against the combination of maximal
3022          * alignment and size greater than maximal alignment.
3023          */
3024         if (ceil_size < size) {
3025                 /* size_t overflow. */
3026                 return (NULL);
3027         }
3028
3029         if (ceil_size <= pagesize || (alignment <= pagesize
3030             && ceil_size <= arena_maxclass))
3031                 ret = arena_malloc(choose_arena(), ceil_size);
3032         else {
3033                 size_t run_size;
3034
3035                 /*
3036                  * We can't achieve sub-page alignment, so round up alignment
3037                  * permanently; it makes later calculations simpler.
3038                  */
3039                 alignment = PAGE_CEILING(alignment);
3040                 ceil_size = PAGE_CEILING(size);
3041                 /*
3042                  * (ceil_size < size) protects against very large sizes within
3043                  * pagesize of SIZE_T_MAX.
3044                  *
3045                  * (ceil_size + alignment < ceil_size) protects against the
3046                  * combination of maximal alignment and ceil_size large enough
3047                  * to cause overflow.  This is similar to the first overflow
3048                  * check above, but it needs to be repeated due to the new
3049                  * ceil_size value, which may now be *equal* to maximal
3050                  * alignment, whereas before we only detected overflow if the
3051                  * original size was *greater* than maximal alignment.
3052                  */
3053                 if (ceil_size < size || ceil_size + alignment < ceil_size) {
3054                         /* size_t overflow. */
3055                         return (NULL);
3056                 }
3057
3058                 /*
3059                  * Calculate the size of the over-size run that arena_palloc()
3060                  * would need to allocate in order to guarantee the alignment.
3061                  */
3062                 if (ceil_size >= alignment)
3063                         run_size = ceil_size + alignment - pagesize;
3064                 else {
3065                         /*
3066                          * It is possible that (alignment << 1) will cause
3067                          * overflow, but it doesn't matter because we also
3068                          * subtract pagesize, which in the case of overflow
3069                          * leaves us with a very large run_size.  That causes
3070                          * the first conditional below to fail, which means
3071                          * that the bogus run_size value never gets used for
3072                          * anything important.
3073                          */
3074                         run_size = (alignment << 1) - pagesize;
3075                 }
3076
3077                 if (run_size <= arena_maxclass) {
3078                         ret = arena_palloc(choose_arena(), alignment, ceil_size,
3079                             run_size);
3080                 } else if (alignment <= chunksize)
3081                         ret = huge_malloc(ceil_size);
3082                 else
3083                         ret = huge_palloc(alignment, ceil_size);
3084         }
3085
3086         assert(((uintptr_t)ret & (alignment - 1)) == 0);
3087         return (ret);
3088 }
3089
3090 static void *
3091 icalloc(size_t size)
3092 {
3093         void *ret;
3094
3095         if (size <= arena_maxclass) {
3096                 ret = arena_malloc(choose_arena(), size);
3097                 if (ret == NULL)
3098                         return (NULL);
3099                 memset(ret, 0, size);
3100         } else {
3101                 /*
3102                  * The virtual memory system provides zero-filled pages, so
3103                  * there is no need to do so manually, unless opt_junk is
3104                  * enabled, in which case huge_malloc() fills huge allocations
3105                  * with junk.
3106                  */
3107                 ret = huge_malloc(size);
3108                 if (ret == NULL)
3109                         return (NULL);
3110
3111                 if (opt_junk)
3112                         memset(ret, 0, size);
3113 #ifdef USE_BRK
3114                 else if ((uintptr_t)ret >= (uintptr_t)brk_base
3115                     && (uintptr_t)ret < (uintptr_t)brk_max) {
3116                         /*
3117                          * This may be a re-used brk chunk.  Therefore, zero
3118                          * the memory.
3119                          */
3120                         memset(ret, 0, size);
3121                 }
3122 #endif
3123         }
3124
3125         return (ret);
3126 }
3127
3128 static size_t
3129 isalloc(const void *ptr)
3130 {
3131         size_t ret;
3132         arena_chunk_t *chunk;
3133
3134         assert(ptr != NULL);
3135
3136         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3137         if (chunk != ptr) {
3138                 /* Region. */
3139                 assert(chunk->arena->magic == ARENA_MAGIC);
3140
3141                 ret = arena_salloc(ptr);
3142         } else {
3143                 chunk_node_t *node, key;
3144
3145                 /* Chunk (huge allocation). */
3146
3147                 malloc_mutex_lock(&chunks_mtx);
3148
3149                 /* Extract from tree of huge allocations. */
3150                 key.chunk = __DECONST(void *, ptr);
3151                 /* LINTED */
3152                 node = RB_FIND(chunk_tree_s, &huge, &key);
3153                 assert(node != NULL);
3154
3155                 ret = node->size;
3156
3157                 malloc_mutex_unlock(&chunks_mtx);
3158         }
3159
3160         return (ret);
3161 }
3162
3163 static void *
3164 iralloc(void *ptr, size_t size)
3165 {
3166         void *ret;
3167         size_t oldsize;
3168
3169         assert(ptr != NULL);
3170         assert(size != 0);
3171
3172         oldsize = isalloc(ptr);
3173
3174         if (size <= arena_maxclass)
3175                 ret = arena_ralloc(ptr, size, oldsize);
3176         else
3177                 ret = huge_ralloc(ptr, size, oldsize);
3178
3179         return (ret);
3180 }
3181
3182 static void
3183 idalloc(void *ptr)
3184 {
3185         arena_chunk_t *chunk;
3186
3187         assert(ptr != NULL);
3188
3189         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3190         if (chunk != ptr) {
3191                 /* Region. */
3192                 arena_dalloc(chunk->arena, chunk, ptr);
3193         } else
3194                 huge_dalloc(ptr);
3195 }
3196
3197 static void
3198 malloc_print_stats(void)
3199 {
3200
3201         if (opt_print_stats) {
3202                 char s[UMAX2S_BUFSIZE];
3203                 _malloc_message("___ Begin malloc statistics ___\n", "", "",
3204                     "");
3205                 _malloc_message("Assertions ",
3206 #ifdef NDEBUG
3207                     "disabled",
3208 #else
3209                     "enabled",
3210 #endif
3211                     "\n", "");
3212                 _malloc_message("Boolean MALLOC_OPTIONS: ",
3213                     opt_abort ? "A" : "a",
3214                     opt_junk ? "J" : "j",
3215                     opt_hint ? "H" : "h");
3216                 _malloc_message(opt_utrace ? "PU" : "Pu",
3217                     opt_sysv ? "V" : "v",
3218                     opt_xmalloc ? "X" : "x",
3219                     opt_zero ? "Z\n" : "z\n");
3220
3221                 _malloc_message("CPUs: ", size_t2s(ncpus, s), "\n", "");
3222                 _malloc_message("Max arenas: ", size_t2s(narenas, s), "\n", "");
3223                 _malloc_message("Pointer size: ", size_t2s(sizeof(void *), s),
3224                     "\n", "");
3225                 _malloc_message("Quantum size: ", size_t2s(quantum, s), "\n", "");
3226                 _malloc_message("Max small size: ", size_t2s(small_max, s), "\n",
3227                     "");
3228
3229                 _malloc_message("Chunk size: ", size_t2s(chunksize, s), "", "");
3230                 _malloc_message(" (2^", size_t2s((size_t)opt_chunk_2pow, s),
3231                     ")\n", "");
3232
3233 #ifdef MALLOC_STATS
3234                 {
3235                         size_t allocated, mapped;
3236                         unsigned i;
3237                         arena_t *arena;
3238
3239                         /* Calculate and print allocated/mapped stats. */
3240
3241                         /* arenas. */
3242                         for (i = 0, allocated = 0; i < narenas; i++) {
3243                                 if (arenas[i] != NULL) {
3244                                         malloc_mutex_lock(&arenas[i]->mtx);
3245                                         allocated +=
3246                                             arenas[i]->stats.allocated_small;
3247                                         allocated +=
3248                                             arenas[i]->stats.allocated_large;
3249                                         malloc_mutex_unlock(&arenas[i]->mtx);
3250                                 }
3251                         }
3252
3253                         /* huge/base. */
3254                         malloc_mutex_lock(&chunks_mtx);
3255                         allocated += huge_allocated;
3256                         mapped = stats_chunks.curchunks * chunksize;
3257                         malloc_mutex_unlock(&chunks_mtx);
3258
3259                         malloc_mutex_lock(&base_mtx);
3260                         mapped += base_mapped;
3261                         malloc_mutex_unlock(&base_mtx);
3262
3263                         malloc_printf("Allocated: %zu, mapped: %zu\n",
3264                             allocated, mapped);
3265
3266                         /* Print chunk stats. */
3267                         {
3268                                 chunk_stats_t chunks_stats;
3269
3270                                 malloc_mutex_lock(&chunks_mtx);
3271                                 chunks_stats = stats_chunks;
3272                                 malloc_mutex_unlock(&chunks_mtx);
3273
3274                                 malloc_printf("chunks: nchunks   "
3275                                     "highchunks    curchunks\n");
3276                                 malloc_printf("  %13llu%13lu%13lu\n",
3277                                     chunks_stats.nchunks,
3278                                     chunks_stats.highchunks,
3279                                     chunks_stats.curchunks);
3280                         }
3281
3282                         /* Print chunk stats. */
3283                         malloc_printf(
3284                             "huge: nmalloc      ndalloc      "
3285                             "nralloc    allocated\n");
3286                         malloc_printf(" %12llu %12llu %12llu %12zu\n",
3287                             huge_nmalloc, huge_ndalloc, huge_nralloc,
3288                             huge_allocated);
3289
3290                         /* Print stats for each arena. */
3291                         for (i = 0; i < narenas; i++) {
3292                                 arena = arenas[i];
3293                                 if (arena != NULL) {
3294                                         malloc_printf(
3295                                             "\narenas[%u] @ %p\n", i, arena);
3296                                         malloc_mutex_lock(&arena->mtx);
3297                                         stats_print(arena);
3298                                         malloc_mutex_unlock(&arena->mtx);
3299                                 }
3300                         }
3301                 }
3302 #endif /* #ifdef MALLOC_STATS */
3303                 _malloc_message("--- End malloc statistics ---\n", "", "", "");
3304         }
3305 }
3306
3307 /*
3308  * FreeBSD's pthreads implementation calls malloc(3), so the malloc
3309  * implementation has to take pains to avoid infinite recursion during
3310  * initialization.
3311  */
3312 static inline bool
3313 malloc_init(void)
3314 {
3315
3316         if (malloc_initialized == false)
3317                 return (malloc_init_hard());
3318
3319         return (false);
3320 }
3321
3322 static bool
3323 malloc_init_hard(void)
3324 {
3325         unsigned i, j;
3326         ssize_t linklen;
3327         char buf[PATH_MAX + 1];
3328         const char *opts = "";
3329         int serrno;
3330
3331         malloc_mutex_lock(&init_lock);
3332         if (malloc_initialized) {
3333                 /*
3334                  * Another thread initialized the allocator before this one
3335                  * acquired init_lock.
3336                  */
3337                 malloc_mutex_unlock(&init_lock);
3338                 return (false);
3339         }
3340
3341         serrno = errno;
3342         /* Get number of CPUs. */
3343         {
3344                 int mib[2];
3345                 size_t len;
3346
3347                 mib[0] = CTL_HW;
3348                 mib[1] = HW_NCPU;
3349                 len = sizeof(ncpus);
3350                 if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) == -1) {
3351                         /* Error. */
3352                         ncpus = 1;
3353                 }
3354         }
3355
3356         /* Get page size. */
3357         {
3358                 long result;
3359
3360                 result = sysconf(_SC_PAGESIZE);
3361                 assert(result != -1);
3362                 pagesize = (unsigned) result;
3363
3364                 /*
3365                  * We assume that pagesize is a power of 2 when calculating
3366                  * pagesize_mask and pagesize_2pow.
3367                  */
3368                 assert(((result - 1) & result) == 0);
3369                 pagesize_mask = result - 1;
3370                 pagesize_2pow = ffs((int)result) - 1;
3371         }
3372
3373         for (i = 0; i < 3; i++) {
3374                 /* Get runtime configuration. */
3375                 switch (i) {
3376                 case 0:
3377                         if ((linklen = readlink("/etc/malloc.conf", buf,
3378                                                 sizeof(buf) - 1)) != -1) {
3379                                 /*
3380                                  * Use the contents of the "/etc/malloc.conf"
3381                                  * symbolic link's name.
3382                                  */
3383                                 buf[linklen] = '\0';
3384                                 opts = buf;
3385                         } else {
3386                                 /* No configuration specified. */
3387                                 buf[0] = '\0';
3388                                 opts = buf;
3389                         }
3390                         break;
3391                 case 1:
3392                         if ((opts = getenv("MALLOC_OPTIONS")) != NULL &&
3393                             issetugid() == 0) {
3394                                 /*
3395                                  * Do nothing; opts is already initialized to
3396                                  * the value of the MALLOC_OPTIONS environment
3397                                  * variable.
3398                                  */
3399                         } else {
3400                                 /* No configuration specified. */
3401                                 buf[0] = '\0';
3402                                 opts = buf;
3403                         }
3404                         break;
3405                 case 2:
3406                         if (_malloc_options != NULL) {
3407                             /*
3408                              * Use options that were compiled into the program.
3409                              */
3410                             opts = _malloc_options;
3411                         } else {
3412                                 /* No configuration specified. */
3413                                 buf[0] = '\0';
3414                                 opts = buf;
3415                         }
3416                         break;
3417                 default:
3418                         /* NOTREACHED */
3419                         /* LINTED */
3420                         assert(false);
3421                 }
3422
3423                 for (j = 0; opts[j] != '\0'; j++) {
3424                         switch (opts[j]) {
3425                         case 'a':
3426                                 opt_abort = false;
3427                                 break;
3428                         case 'A':
3429                                 opt_abort = true;
3430                                 break;
3431                         case 'h':
3432                                 opt_hint = false;
3433                                 break;
3434                         case 'H':
3435                                 opt_hint = true;
3436                                 break;
3437                         case 'j':
3438                                 opt_junk = false;
3439                                 break;
3440                         case 'J':
3441                                 opt_junk = true;
3442                                 break;
3443                         case 'k':
3444                                 /*
3445                                  * Chunks always require at least one header
3446                                  * page, so chunks can never be smaller than
3447                                  * two pages.
3448                                  */
3449                                 if (opt_chunk_2pow > pagesize_2pow + 1)
3450                                         opt_chunk_2pow--;
3451                                 break;
3452                         case 'K':
3453                                 if (opt_chunk_2pow + 1 <
3454                                     (int)(sizeof(size_t) << 3))
3455                                         opt_chunk_2pow++;
3456                                 break;
3457                         case 'n':
3458                                 opt_narenas_lshift--;
3459                                 break;
3460                         case 'N':
3461                                 opt_narenas_lshift++;
3462                                 break;
3463                         case 'p':
3464                                 opt_print_stats = false;
3465                                 break;
3466                         case 'P':
3467                                 opt_print_stats = true;
3468                                 break;
3469                         case 'q':
3470                                 if (opt_quantum_2pow > QUANTUM_2POW_MIN)
3471                                         opt_quantum_2pow--;
3472                                 break;
3473                         case 'Q':
3474                                 if (opt_quantum_2pow < pagesize_2pow - 1)
3475                                         opt_quantum_2pow++;
3476                                 break;
3477                         case 's':
3478                                 if (opt_small_max_2pow > QUANTUM_2POW_MIN)
3479                                         opt_small_max_2pow--;
3480                                 break;
3481                         case 'S':
3482                                 if (opt_small_max_2pow < pagesize_2pow - 1)
3483                                         opt_small_max_2pow++;
3484                                 break;
3485                         case 'u':
3486                                 opt_utrace = false;
3487                                 break;
3488                         case 'U':
3489                                 opt_utrace = true;
3490                                 break;
3491                         case 'v':
3492                                 opt_sysv = false;
3493                                 break;
3494                         case 'V':
3495                                 opt_sysv = true;
3496                                 break;
3497                         case 'x':
3498                                 opt_xmalloc = false;
3499                                 break;
3500                         case 'X':
3501                                 opt_xmalloc = true;
3502                                 break;
3503                         case 'z':
3504                                 opt_zero = false;
3505                                 break;
3506                         case 'Z':
3507                                 opt_zero = true;
3508                                 break;
3509                         default: {
3510                                 char cbuf[2];
3511
3512                                 cbuf[0] = opts[j];
3513                                 cbuf[1] = '\0';
3514                                 _malloc_message(getprogname(),
3515                                     ": (malloc) Unsupported character in "
3516                                     "malloc options: '", cbuf, "'\n");
3517                         }
3518                         }
3519                 }
3520         }
3521         errno = serrno;
3522
3523         /* Take care to call atexit() only once. */
3524         if (opt_print_stats) {
3525                 /* Print statistics at exit. */
3526                 atexit(malloc_print_stats);
3527         }
3528
3529         /* Set variables according to the value of opt_small_max_2pow. */
3530         if (opt_small_max_2pow < opt_quantum_2pow)
3531                 opt_small_max_2pow = opt_quantum_2pow;
3532         small_max = (1 << opt_small_max_2pow);
3533
3534         /* Set bin-related variables. */
3535         bin_maxclass = (pagesize >> 1);
3536         assert(opt_quantum_2pow >= TINY_MIN_2POW);
3537         ntbins = (unsigned)(opt_quantum_2pow - TINY_MIN_2POW);
3538         assert(ntbins <= opt_quantum_2pow);
3539         nqbins = (unsigned)(small_max >> opt_quantum_2pow);
3540         nsbins = (unsigned)(pagesize_2pow - opt_small_max_2pow - 1);
3541
3542         /* Set variables according to the value of opt_quantum_2pow. */
3543         quantum = (1 << opt_quantum_2pow);
3544         quantum_mask = quantum - 1;
3545         if (ntbins > 0)
3546                 small_min = (quantum >> 1) + 1;
3547         else
3548                 small_min = 1;
3549         assert(small_min <= quantum);
3550
3551         /* Set variables according to the value of opt_chunk_2pow. */
3552         chunksize = (1LU << opt_chunk_2pow);
3553         chunksize_mask = chunksize - 1;
3554         chunksize_2pow = (unsigned)opt_chunk_2pow;
3555         chunk_npages = (unsigned)(chunksize >> pagesize_2pow);
3556         {
3557                 unsigned header_size;
3558
3559                 header_size = (unsigned)(sizeof(arena_chunk_t) +
3560                     (sizeof(arena_chunk_map_t) * (chunk_npages - 1)));
3561                 arena_chunk_header_npages = (header_size >> pagesize_2pow);
3562                 if ((header_size & pagesize_mask) != 0)
3563                         arena_chunk_header_npages++;
3564         }
3565         arena_maxclass = chunksize - (arena_chunk_header_npages <<
3566             pagesize_2pow);
3567
3568         UTRACE(0, 0, 0);
3569
3570 #ifdef MALLOC_STATS
3571         memset(&stats_chunks, 0, sizeof(chunk_stats_t));
3572 #endif
3573
3574         /* Various sanity checks that regard configuration. */
3575         assert(quantum >= sizeof(void *));
3576         assert(quantum <= pagesize);
3577         assert(chunksize >= pagesize);
3578         assert(quantum * 4 <= chunksize);
3579
3580         /* Initialize chunks data. */
3581         malloc_mutex_init(&chunks_mtx);
3582         RB_INIT(&huge);
3583 #ifdef USE_BRK
3584         malloc_mutex_init(&brk_mtx);
3585         brk_base = sbrk(0);
3586         brk_prev = brk_base;
3587         brk_max = brk_base;
3588 #endif
3589 #ifdef MALLOC_STATS
3590         huge_nmalloc = 0;
3591         huge_ndalloc = 0;
3592         huge_nralloc = 0;
3593         huge_allocated = 0;
3594 #endif
3595         RB_INIT(&old_chunks);
3596
3597         /* Initialize base allocation data structures. */
3598 #ifdef MALLOC_STATS
3599         base_mapped = 0;
3600 #endif
3601 #ifdef USE_BRK
3602         /*
3603          * Allocate a base chunk here, since it doesn't actually have to be
3604          * chunk-aligned.  Doing this before allocating any other chunks allows
3605          * the use of space that would otherwise be wasted.
3606          */
3607         base_pages_alloc(0);
3608 #endif
3609         base_chunk_nodes = NULL;
3610         malloc_mutex_init(&base_mtx);
3611
3612         if (ncpus > 1) {
3613                 /*
3614                  * For SMP systems, create four times as many arenas as there
3615                  * are CPUs by default.
3616                  */
3617                 opt_narenas_lshift += 2;
3618         }
3619
3620 #ifdef NO_TLS
3621         /* Initialize arena key. */
3622         (void)thr_keycreate(&arenas_map_key, NULL);
3623 #endif
3624
3625         /* Determine how many arenas to use. */
3626         narenas = ncpus;
3627         if (opt_narenas_lshift > 0) {
3628                 if ((narenas << opt_narenas_lshift) > narenas)
3629                         narenas <<= opt_narenas_lshift;
3630                 /*
3631                  * Make sure not to exceed the limits of what base_malloc()
3632                  * can handle.
3633                  */
3634                 if (narenas * sizeof(arena_t *) > chunksize)
3635                         narenas = (unsigned)(chunksize / sizeof(arena_t *));
3636         } else if (opt_narenas_lshift < 0) {
3637                 if ((narenas << opt_narenas_lshift) < narenas)
3638                         narenas <<= opt_narenas_lshift;
3639                 /* Make sure there is at least one arena. */
3640                 if (narenas == 0)
3641                         narenas = 1;
3642         }
3643
3644         next_arena = 0;
3645
3646         /* Allocate and initialize arenas. */
3647         arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
3648         if (arenas == NULL) {
3649                 malloc_mutex_unlock(&init_lock);
3650                 return (true);
3651         }
3652         /*
3653          * Zero the array.  In practice, this should always be pre-zeroed,
3654          * since it was just mmap()ed, but let's be sure.
3655          */
3656         memset(arenas, 0, sizeof(arena_t *) * narenas);
3657
3658         /*
3659          * Initialize one arena here.  The rest are lazily created in
3660          * arena_choose_hard().
3661          */
3662         arenas_extend(0);
3663         if (arenas[0] == NULL) {
3664                 malloc_mutex_unlock(&init_lock);
3665                 return (true);
3666         }
3667
3668         malloc_mutex_init(&arenas_mtx);
3669
3670         malloc_initialized = true;
3671         malloc_mutex_unlock(&init_lock);
3672         return (false);
3673 }
3674
3675 /*
3676  * End general internal functions.
3677  */
3678 /******************************************************************************/
3679 /*
3680  * Begin malloc(3)-compatible functions.
3681  */
3682
3683 void *
3684 malloc(size_t size)
3685 {
3686         void *ret;
3687
3688         if (malloc_init()) {
3689                 ret = NULL;
3690                 goto RETURN;
3691         }
3692
3693         if (size == 0) {
3694                 if (opt_sysv == false)
3695                         size = 1;
3696                 else {
3697                         ret = NULL;
3698                         goto RETURN;
3699                 }
3700         }
3701
3702         ret = imalloc(size);
3703
3704 RETURN:
3705         if (ret == NULL) {
3706                 if (opt_xmalloc) {
3707                         _malloc_message(getprogname(),
3708                             ": (malloc) Error in malloc(): out of memory\n", "",
3709                             "");
3710                         abort();
3711                 }
3712                 errno = ENOMEM;
3713         }
3714
3715         UTRACE(0, size, ret);
3716         return (ret);
3717 }
3718
3719 int
3720 posix_memalign(void **memptr, size_t alignment, size_t size)
3721 {
3722         int ret;
3723         void *result;
3724
3725         if (malloc_init())
3726                 result = NULL;
3727         else {
3728                 /* Make sure that alignment is a large enough power of 2. */
3729                 if (((alignment - 1) & alignment) != 0
3730                     || alignment < sizeof(void *)) {
3731                         if (opt_xmalloc) {
3732                                 _malloc_message(getprogname(),
3733                                     ": (malloc) Error in posix_memalign(): "
3734                                     "invalid alignment\n", "", "");
3735                                 abort();
3736                         }
3737                         result = NULL;
3738                         ret = EINVAL;
3739                         goto RETURN;
3740                 }
3741
3742                 result = ipalloc(alignment, size);
3743         }
3744
3745         if (result == NULL) {
3746                 if (opt_xmalloc) {
3747                         _malloc_message(getprogname(),
3748                         ": (malloc) Error in posix_memalign(): out of memory\n",
3749                         "", "");
3750                         abort();
3751                 }
3752                 ret = ENOMEM;
3753                 goto RETURN;
3754         }
3755
3756         *memptr = result;
3757         ret = 0;
3758
3759 RETURN:
3760         UTRACE(0, size, result);
3761         return (ret);
3762 }
3763
3764 void *
3765 calloc(size_t num, size_t size)
3766 {
3767         void *ret;
3768         size_t num_size;
3769
3770         if (malloc_init()) {
3771                 num_size = 0;
3772                 ret = NULL;
3773                 goto RETURN;
3774         }
3775
3776         num_size = num * size;
3777         if (num_size == 0) {
3778                 if ((opt_sysv == false) && ((num == 0) || (size == 0)))
3779                         num_size = 1;
3780                 else {
3781                         ret = NULL;
3782                         goto RETURN;
3783                 }
3784         /*
3785          * Try to avoid division here.  We know that it isn't possible to
3786          * overflow during multiplication if neither operand uses any of the
3787          * most significant half of the bits in a size_t.
3788          */
3789         } else if ((unsigned long long)((num | size) &
3790            ((unsigned long long)SIZE_T_MAX << (sizeof(size_t) << 2))) &&
3791            (num_size / size != num)) {
3792                 /* size_t overflow. */
3793                 ret = NULL;
3794                 goto RETURN;
3795         }
3796
3797         ret = icalloc(num_size);
3798
3799 RETURN:
3800         if (ret == NULL) {
3801                 if (opt_xmalloc) {
3802                         _malloc_message(getprogname(),
3803                             ": (malloc) Error in calloc(): out of memory\n", "",
3804                             "");
3805                         abort();
3806                 }
3807                 errno = ENOMEM;
3808         }
3809
3810         UTRACE(0, num_size, ret);
3811         return (ret);
3812 }
3813
3814 void *
3815 realloc(void *ptr, size_t size)
3816 {
3817         void *ret;
3818
3819         if (size == 0) {
3820                 if (opt_sysv == false)
3821                         size = 1;
3822                 else {
3823                         if (ptr != NULL)
3824                                 idalloc(ptr);
3825                         ret = NULL;
3826                         goto RETURN;
3827                 }
3828         }
3829
3830         if (ptr != NULL) {
3831                 assert(malloc_initialized);
3832
3833                 ret = iralloc(ptr, size);
3834
3835                 if (ret == NULL) {
3836                         if (opt_xmalloc) {
3837                                 _malloc_message(getprogname(),
3838                                     ": (malloc) Error in realloc(): out of "
3839                                     "memory\n", "", "");
3840                                 abort();
3841                         }
3842                         errno = ENOMEM;
3843                 }
3844         } else {
3845                 if (malloc_init())
3846                         ret = NULL;
3847                 else
3848                         ret = imalloc(size);
3849
3850                 if (ret == NULL) {
3851                         if (opt_xmalloc) {
3852                                 _malloc_message(getprogname(),
3853                                     ": (malloc) Error in realloc(): out of "
3854                                     "memory\n", "", "");
3855                                 abort();
3856                         }
3857                         errno = ENOMEM;
3858                 }
3859         }
3860
3861 RETURN:
3862         UTRACE(ptr, size, ret);
3863         return (ret);
3864 }
3865
3866 void
3867 free(void *ptr)
3868 {
3869
3870         UTRACE(ptr, 0, 0);
3871         if (ptr != NULL) {
3872                 assert(malloc_initialized);
3873
3874                 idalloc(ptr);
3875         }
3876 }
3877
3878 /*
3879  * End malloc(3)-compatible functions.
3880  */
3881 /******************************************************************************/
3882 /*
3883  * Begin non-standard functions.
3884  */
3885 #ifndef __NetBSD__
3886 size_t
3887 malloc_usable_size(const void *ptr)
3888 {
3889
3890         assert(ptr != NULL);
3891
3892         return (isalloc(ptr));
3893 }
3894 #endif
3895
3896 /*
3897  * End non-standard functions.
3898  */
3899 /******************************************************************************/
3900 /*
3901  * Begin library-private functions, used by threading libraries for protection
3902  * of malloc during fork().  These functions are only called if the program is
3903  * running in threaded mode, so there is no need to check whether the program
3904  * is threaded here.
3905  */
3906
3907 void
3908 _malloc_prefork(void)
3909 {
3910         unsigned i;
3911
3912         /* Acquire all mutexes in a safe order. */
3913
3914         malloc_mutex_lock(&arenas_mtx);
3915         for (i = 0; i < narenas; i++) {
3916                 if (arenas[i] != NULL)
3917                         malloc_mutex_lock(&arenas[i]->mtx);
3918         }
3919         malloc_mutex_unlock(&arenas_mtx);
3920
3921         malloc_mutex_lock(&base_mtx);
3922
3923         malloc_mutex_lock(&chunks_mtx);
3924 }
3925
3926 void
3927 _malloc_postfork(void)
3928 {
3929         unsigned i;
3930
3931         /* Release all mutexes, now that fork() has completed. */
3932
3933         malloc_mutex_unlock(&chunks_mtx);
3934
3935         malloc_mutex_unlock(&base_mtx);
3936
3937         malloc_mutex_lock(&arenas_mtx);
3938         for (i = 0; i < narenas; i++) {
3939                 if (arenas[i] != NULL)
3940                         malloc_mutex_unlock(&arenas[i]->mtx);
3941         }
3942         malloc_mutex_unlock(&arenas_mtx);
3943 }
3944
3945 /*
3946  * End library-private functions.
3947  */
3948 /******************************************************************************/