lib/libc/stdlib/jemalloc.c

   1 /*      $NetBSD: jemalloc.c,v 1.38 2015/07/26 17:21:55 martin Exp $     */
   2
   3 /*-
   4  * Copyright (C) 2006,2007 Jason Evans <jasone@FreeBSD.org>.
   5  * All rights reserved.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  * 1. Redistributions of source code must retain the above copyright
  11  *    notice(s), this list of conditions and the following disclaimer as
  12  *    the first lines of this file unmodified other than the possible
  13  *    addition of one or more copyright notices.
  14  * 2. Redistributions in binary form must reproduce the above copyright
  15  *    notice(s), this list of conditions and the following disclaimer in
  16  *    the documentation and/or other materials provided with the
  17  *    distribution.
  18  *
  19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
  20  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
  23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  26  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  28  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  29  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30  *
  31  *******************************************************************************
  32  *
  33  * This allocator implementation is designed to provide scalable performance
  34  * for multi-threaded programs on multi-processor systems.  The following
  35  * features are included for this purpose:
  36  *
  37  *   + Multiple arenas are used if there are multiple CPUs, which reduces lock
  38  *     contention and cache sloshing.
  39  *
  40  *   + Cache line sharing between arenas is avoided for internal data
  41  *     structures.
  42  *
  43  *   + Memory is managed in chunks and runs (chunks can be split into runs),
  44  *     rather than as individual pages.  This provides a constant-time
  45  *     mechanism for associating allocations with particular arenas.
  46  *
  47  * Allocation requests are rounded up to the nearest size class, and no record
  48  * of the original request size is maintained.  Allocations are broken into
  49  * categories according to size class.  Assuming runtime defaults, 4 kB pages
  50  * and a 16 byte quantum, the size classes in each category are as follows:
  51  *
  52  *   |=====================================|
  53  *   | Category | Subcategory    |    Size |
  54  *   |=====================================|
  55  *   | Small    | Tiny           |       2 |
  56  *   |          |                |       4 |
  57  *   |          |                |       8 |
  58  *   |          |----------------+---------|
  59  *   |          | Quantum-spaced |      16 |
  60  *   |          |                |      32 |
  61  *   |          |                |      48 |
  62  *   |          |                |     ... |
  63  *   |          |                |     480 |
  64  *   |          |                |     496 |
  65  *   |          |                |     512 |
  66  *   |          |----------------+---------|
  67  *   |          | Sub-page       |    1 kB |
  68  *   |          |                |    2 kB |
  69  *   |=====================================|
  70  *   | Large                     |    4 kB |
  71  *   |                           |    8 kB |
  72  *   |                           |   12 kB |
  73  *   |                           |     ... |
  74  *   |                           | 1012 kB |
  75  *   |                           | 1016 kB |
  76  *   |                           | 1020 kB |
  77  *   |=====================================|
  78  *   | Huge                      |    1 MB |
  79  *   |                           |    2 MB |
  80  *   |                           |    3 MB |
  81  *   |                           |     ... |
  82  *   |=====================================|
  83  *
  84  * A different mechanism is used for each category:
  85  *
  86  *   Small : Each size class is segregated into its own set of runs.  Each run
  87  *           maintains a bitmap of which regions are free/allocated.
  88  *
  89  *   Large : Each allocation is backed by a dedicated run.  Metadata are stored
  90  *           in the associated arena chunk header maps.
  91  *
  92  *   Huge : Each allocation is backed by a dedicated contiguous set of chunks.
  93  *          Metadata are stored in a separate red-black tree.
  94  *
  95  *******************************************************************************
  96  */
  97
  98 /* LINTLIBRARY */
  99
 100 #if defined(__NetBSD__) || defined(__minix)
 101 #  define xutrace(a, b)         utrace("malloc", (a), (b))
 102 #  define __DECONST(x, y)       ((x)__UNCONST(y))
 103 #  define NO_TLS
 104 #else
 105 #  define xutrace(a, b)         utrace((a), (b))
 106 #endif  /* __NetBSD__ */
 107
 108 /*
 109  * MALLOC_PRODUCTION disables assertions and statistics gathering.  It also
 110  * defaults the A and J runtime options to off.  These settings are appropriate
 111  * for production systems.
 112  */
 113 #define MALLOC_PRODUCTION
 114
 115 #ifndef MALLOC_PRODUCTION
 116 #  define MALLOC_DEBUG
 117 #endif
 118
 119 #include <sys/cdefs.h>
 120 /* __FBSDID("$FreeBSD: src/lib/libc/stdlib/malloc.c,v 1.147 2007/06/15 22:00:16 jasone Exp $"); */
 121 __RCSID("$NetBSD: jemalloc.c,v 1.38 2015/07/26 17:21:55 martin Exp $");
 122
 123 #ifdef __FreeBSD__
 124 #include "libc_private.h"
 125 #ifdef MALLOC_DEBUG
 126 #  define _LOCK_DEBUG
 127 #endif
 128 #include "spinlock.h"
 129 #endif
 130 #include "namespace.h"
 131 #include <sys/mman.h>
 132 #include <sys/param.h>
 133 #ifdef __FreeBSD__
 134 #include <sys/stddef.h>
 135 #endif
 136 #include <sys/time.h>
 137 #include <sys/types.h>
 138 #include <sys/sysctl.h>
 139 #include <sys/tree.h>
 140 #include <sys/uio.h>
 141 #include <sys/ktrace.h> /* Must come after several other sys/ includes. */
 142
 143 #ifdef __FreeBSD__
 144 #include <machine/atomic.h>
 145 #include <machine/cpufunc.h>
 146 #endif
 147 #include <machine/vmparam.h>
 148
 149 #include <errno.h>
 150 #include <limits.h>
 151 #include <pthread.h>
 152 #include <sched.h>
 153 #include <stdarg.h>
 154 #include <stdbool.h>
 155 #include <stdio.h>
 156 #include <stdint.h>
 157 #include <stdlib.h>
 158 #include <string.h>
 159 #include <strings.h>
 160 #include <unistd.h>
 161
 162 #ifdef __NetBSD__
 163 #  include <reentrant.h>
 164 #  include "extern.h"
 165
 166 #define STRERROR_R(a, b, c)     __strerror_r(a, b, c);
 167 /*
 168  * A non localized version of strerror, that avoids bringing in
 169  * stdio and the locale code. All the malloc messages are in English
 170  * so why bother?
 171  */
 172 static int
 173 __strerror_r(int e, char *s, size_t l)
 174 {
 175         int rval;
 176         size_t slen;
 177
 178         if (e >= 0 && e < sys_nerr) {
 179                 slen = strlcpy(s, sys_errlist[e], l);
 180                 rval = 0;
 181         } else {
 182                 slen = snprintf_ss(s, l, "Unknown error %u", e);
 183                 rval = EINVAL;
 184         }
 185         return slen >= l ? ERANGE : rval;
 186 }
 187 #endif
 188
 189 #ifdef __FreeBSD__
 190 #define STRERROR_R(a, b, c)     strerror_r(a, b, c);
 191 #include "un-namespace.h"
 192 #endif
 193
 194 /* MALLOC_STATS enables statistics calculation. */
 195 #ifndef MALLOC_PRODUCTION
 196 #  define MALLOC_STATS
 197 #endif
 198
 199 #ifdef MALLOC_DEBUG
 200 #  ifdef NDEBUG
 201 #    undef NDEBUG
 202 #  endif
 203 #else
 204 #  ifndef NDEBUG
 205 #    define NDEBUG
 206 #  endif
 207 #endif
 208 #include <assert.h>
 209
 210 #ifdef MALLOC_DEBUG
 211    /* Disable inlining to make debugging easier. */
 212 #  define inline
 213 #endif
 214
 215 /* Size of stack-allocated buffer passed to strerror_r(). */
 216 #define STRERROR_BUF            64
 217
 218 /* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */
 219
 220 /*
 221  * If you touch the TINY_MIN_2POW definition for any architecture, please
 222  * make sure to adjust the corresponding definition for JEMALLOC_TINY_MIN_2POW
 223  * in the gcc 4.8 tree in dist/gcc/tree-ssa-ccp.c and verify that a native
 224  * gcc is still buildable!
 225  */
 226
 227 #ifdef __i386__
 228 #  define QUANTUM_2POW_MIN      4
 229 #  define SIZEOF_PTR_2POW       2
 230 #  define USE_BRK
 231 #endif
 232 #ifdef __ia64__
 233 #  define QUANTUM_2POW_MIN      4
 234 #  define SIZEOF_PTR_2POW       3
 235 #endif
 236 #ifdef __aarch64__
 237 #  define QUANTUM_2POW_MIN      4
 238 #  define SIZEOF_PTR_2POW       3
 239 #  define NO_TLS
 240 #endif
 241 #ifdef __alpha__
 242 #  define QUANTUM_2POW_MIN      4
 243 #  define SIZEOF_PTR_2POW       3
 244 #  define TINY_MIN_2POW         3
 245 #  define NO_TLS
 246 #endif
 247 #ifdef __sparc64__
 248 #  define QUANTUM_2POW_MIN      4
 249 #  define SIZEOF_PTR_2POW       3
 250 #  define TINY_MIN_2POW         3
 251 #  define NO_TLS
 252 #endif
 253 #ifdef __amd64__
 254 #  define QUANTUM_2POW_MIN      4
 255 #  define SIZEOF_PTR_2POW       3
 256 #  define TINY_MIN_2POW         3
 257 #endif
 258 #ifdef __arm__
 259 #  define QUANTUM_2POW_MIN      3
 260 #  define SIZEOF_PTR_2POW       2
 261 #  define USE_BRK
 262 #  ifdef __ARM_EABI__
 263 #    define TINY_MIN_2POW       3
 264 #  endif
 265 #  define NO_TLS
 266 #endif
 267 #ifdef __powerpc__
 268 #  define QUANTUM_2POW_MIN      4
 269 #  define SIZEOF_PTR_2POW       2
 270 #  define USE_BRK
 271 #  define TINY_MIN_2POW         3
 272 #endif
 273 #if defined(__sparc__) && !defined(__sparc64__)
 274 #  define QUANTUM_2POW_MIN      4
 275 #  define SIZEOF_PTR_2POW       2
 276 #  define USE_BRK
 277 #endif
 278 #ifdef __or1k__
 279 #  define QUANTUM_2POW_MIN      4
 280 #  define SIZEOF_PTR_2POW       2
 281 #  define USE_BRK
 282 #endif
 283 #ifdef __vax__
 284 #  define QUANTUM_2POW_MIN      4
 285 #  define SIZEOF_PTR_2POW       2
 286 #  define USE_BRK
 287 #endif
 288 #ifdef __sh__
 289 #  define QUANTUM_2POW_MIN      4
 290 #  define SIZEOF_PTR_2POW       2
 291 #  define USE_BRK
 292 #endif
 293 #ifdef __m68k__
 294 #  define QUANTUM_2POW_MIN      4
 295 #  define SIZEOF_PTR_2POW       2
 296 #  define USE_BRK
 297 #endif
 298 #if defined(__mips__) || defined(__riscv__)
 299 # ifdef _LP64
 300 #  define SIZEOF_PTR_2POW       3
 301 #  define TINY_MIN_2POW         3
 302 # else
 303 #  define SIZEOF_PTR_2POW       2
 304 # endif
 305 # define QUANTUM_2POW_MIN       4
 306 # define USE_BRK
 307 #endif
 308 #ifdef __hppa__
 309 #  define QUANTUM_2POW_MIN     4
 310 #  define SIZEOF_PTR_2POW      2
 311 #  define USE_BRK
 312 #endif
 313
 314 #define SIZEOF_PTR              (1 << SIZEOF_PTR_2POW)
 315
 316 /* sizeof(int) == (1 << SIZEOF_INT_2POW). */
 317 #ifndef SIZEOF_INT_2POW
 318 #  define SIZEOF_INT_2POW       2
 319 #endif
 320
 321 /*
 322  * Size and alignment of memory chunks that are allocated by the OS's virtual
 323  * memory system.
 324  */
 325 #define CHUNK_2POW_DEFAULT      20
 326
 327 /*
 328  * Maximum size of L1 cache line.  This is used to avoid cache line aliasing,
 329  * so over-estimates are okay (up to a point), but under-estimates will
 330  * negatively affect performance.
 331  */
 332 #define CACHELINE_2POW          6
 333 #define CACHELINE               ((size_t)(1 << CACHELINE_2POW))
 334
 335 /* Smallest size class to support. */
 336 #ifndef TINY_MIN_2POW
 337 #define TINY_MIN_2POW           2
 338 #endif
 339
 340 /*
 341  * Maximum size class that is a multiple of the quantum, but not (necessarily)
 342  * a power of 2.  Above this size, allocations are rounded up to the nearest
 343  * power of 2.
 344  */
 345 #define SMALL_MAX_2POW_DEFAULT  9
 346 #define SMALL_MAX_DEFAULT       (1 << SMALL_MAX_2POW_DEFAULT)
 347
 348 /*
 349  * RUN_MAX_OVRHD indicates maximum desired run header overhead.  Runs are sized
 350  * as small as possible such that this setting is still honored, without
 351  * violating other constraints.  The goal is to make runs as small as possible
 352  * without exceeding a per run external fragmentation threshold.
 353  *
 354  * We use binary fixed point math for overhead computations, where the binary
 355  * point is implicitly RUN_BFP bits to the left.
 356  *
 357  * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
 358  * honored for some/all object sizes, since there is one bit of header overhead
 359  * per object (plus a constant).  This constraint is relaxed (ignored) for runs
 360  * that are so small that the per-region overhead is greater than:
 361  *
 362  *   (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
 363  */
 364 #define RUN_BFP                 12
 365 /*                              \/   Implicit binary fixed point. */
 366 #define RUN_MAX_OVRHD           0x0000003dU
 367 #define RUN_MAX_OVRHD_RELAX     0x00001800U
 368
 369 /* Put a cap on small object run size.  This overrides RUN_MAX_OVRHD. */
 370 #define RUN_MAX_SMALL_2POW      15
 371 #define RUN_MAX_SMALL           (1 << RUN_MAX_SMALL_2POW)
 372
 373 /******************************************************************************/
 374
 375 #ifdef __FreeBSD__
 376 /*
 377  * Mutexes based on spinlocks.  We can't use normal pthread mutexes, because
 378  * they require malloc()ed memory.
 379  */
 380 typedef struct {
 381         spinlock_t      lock;
 382 } malloc_mutex_t;
 383
 384 /* Set to true once the allocator has been initialized. */
 385 static bool malloc_initialized = false;
 386
 387 /* Used to avoid initialization races. */
 388 static malloc_mutex_t init_lock = {_SPINLOCK_INITIALIZER};
 389 #else
 390 #define malloc_mutex_t  mutex_t
 391
 392 /* Set to true once the allocator has been initialized. */
 393 static bool malloc_initialized = false;
 394
 395 #ifdef _REENTRANT
 396 /* Used to avoid initialization races. */
 397 static mutex_t init_lock = MUTEX_INITIALIZER;
 398 #endif
 399 #endif
 400
 401 /******************************************************************************/
 402 /*
 403  * Statistics data structures.
 404  */
 405
 406 #ifdef MALLOC_STATS
 407
 408 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
 409 struct malloc_bin_stats_s {
 410         /*
 411          * Number of allocation requests that corresponded to the size of this
 412          * bin.
 413          */
 414         uint64_t        nrequests;
 415
 416         /* Total number of runs created for this bin's size class. */
 417         uint64_t        nruns;
 418
 419         /*
 420          * Total number of runs reused by extracting them from the runs tree for
 421          * this bin's size class.
 422          */
 423         uint64_t        reruns;
 424
 425         /* High-water mark for this bin. */
 426         unsigned long   highruns;
 427
 428         /* Current number of runs in this bin. */
 429         unsigned long   curruns;
 430 };
 431
 432 typedef struct arena_stats_s arena_stats_t;
 433 struct arena_stats_s {
 434         /* Number of bytes currently mapped. */
 435         size_t          mapped;
 436
 437         /* Per-size-category statistics. */
 438         size_t          allocated_small;
 439         uint64_t        nmalloc_small;
 440         uint64_t        ndalloc_small;
 441
 442         size_t          allocated_large;
 443         uint64_t        nmalloc_large;
 444         uint64_t        ndalloc_large;
 445 };
 446
 447 typedef struct chunk_stats_s chunk_stats_t;
 448 struct chunk_stats_s {
 449         /* Number of chunks that were allocated. */
 450         uint64_t        nchunks;
 451
 452         /* High-water mark for number of chunks allocated. */
 453         unsigned long   highchunks;
 454
 455         /*
 456          * Current number of chunks allocated.  This value isn't maintained for
 457          * any other purpose, so keep track of it in order to be able to set
 458          * highchunks.
 459          */
 460         unsigned long   curchunks;
 461 };
 462
 463 #endif /* #ifdef MALLOC_STATS */
 464
 465 /******************************************************************************/
 466 /*
 467  * Chunk data structures.
 468  */
 469
 470 /* Tree of chunks. */
 471 typedef struct chunk_node_s chunk_node_t;
 472 struct chunk_node_s {
 473         /* Linkage for the chunk tree. */
 474         RB_ENTRY(chunk_node_s) link;
 475
 476         /*
 477          * Pointer to the chunk that this tree node is responsible for.  In some
 478          * (but certainly not all) cases, this data structure is placed at the
 479          * beginning of the corresponding chunk, so this field may point to this
 480          * node.
 481          */
 482         void    *chunk;
 483
 484         /* Total chunk size. */
 485         size_t  size;
 486 };
 487 typedef struct chunk_tree_s chunk_tree_t;
 488 RB_HEAD(chunk_tree_s, chunk_node_s);
 489
 490 /******************************************************************************/
 491 /*
 492  * Arena data structures.
 493  */
 494
 495 typedef struct arena_s arena_t;
 496 typedef struct arena_bin_s arena_bin_t;
 497
 498 typedef struct arena_chunk_map_s arena_chunk_map_t;
 499 struct arena_chunk_map_s {
 500         /* Number of pages in run. */
 501         uint32_t        npages;
 502         /*
 503          * Position within run.  For a free run, this is POS_FREE for the first
 504          * and last pages.  The POS_FREE special value makes it possible to
 505          * quickly coalesce free runs.
 506          *
 507          * This is the limiting factor for chunksize; there can be at most 2^31
 508          * pages in a run.
 509          */
 510 #define POS_FREE ((uint32_t)0xffffffffU)
 511         uint32_t        pos;
 512 };
 513
 514 /* Arena chunk header. */
 515 typedef struct arena_chunk_s arena_chunk_t;
 516 struct arena_chunk_s {
 517         /* Arena that owns the chunk. */
 518         arena_t *arena;
 519
 520         /* Linkage for the arena's chunk tree. */
 521         RB_ENTRY(arena_chunk_s) link;
 522
 523         /*
 524          * Number of pages in use.  This is maintained in order to make
 525          * detection of empty chunks fast.
 526          */
 527         uint32_t pages_used;
 528
 529         /*
 530          * Every time a free run larger than this value is created/coalesced,
 531          * this value is increased.  The only way that the value decreases is if
 532          * arena_run_alloc() fails to find a free run as large as advertised by
 533          * this value.
 534          */
 535         uint32_t max_frun_npages;
 536
 537         /*
 538          * Every time a free run that starts at an earlier page than this value
 539          * is created/coalesced, this value is decreased.  It is reset in a
 540          * similar fashion to max_frun_npages.
 541          */
 542         uint32_t min_frun_ind;
 543
 544         /*
 545          * Map of pages within chunk that keeps track of free/large/small.  For
 546          * free runs, only the map entries for the first and last pages are
 547          * kept up to date, so that free runs can be quickly coalesced.
 548          */
 549         arena_chunk_map_t map[1]; /* Dynamically sized. */
 550 };
 551 typedef struct arena_chunk_tree_s arena_chunk_tree_t;
 552 RB_HEAD(arena_chunk_tree_s, arena_chunk_s);
 553
 554 typedef struct arena_run_s arena_run_t;
 555 struct arena_run_s {
 556         /* Linkage for run trees. */
 557         RB_ENTRY(arena_run_s) link;
 558
 559 #ifdef MALLOC_DEBUG
 560         uint32_t        magic;
 561 #  define ARENA_RUN_MAGIC 0x384adf93
 562 #endif
 563
 564         /* Bin this run is associated with. */
 565         arena_bin_t     *bin;
 566
 567         /* Index of first element that might have a free region. */
 568         unsigned        regs_minelm;
 569
 570         /* Number of free regions in run. */
 571         unsigned        nfree;
 572
 573         /* Bitmask of in-use regions (0: in use, 1: free). */
 574         unsigned        regs_mask[1]; /* Dynamically sized. */
 575 };
 576 typedef struct arena_run_tree_s arena_run_tree_t;
 577 RB_HEAD(arena_run_tree_s, arena_run_s);
 578
 579 struct arena_bin_s {
 580         /*
 581          * Current run being used to service allocations of this bin's size
 582          * class.
 583          */
 584         arena_run_t     *runcur;
 585
 586         /*
 587          * Tree of non-full runs.  This tree is used when looking for an
 588          * existing run when runcur is no longer usable.  We choose the
 589          * non-full run that is lowest in memory; this policy tends to keep
 590          * objects packed well, and it can also help reduce the number of
 591          * almost-empty chunks.
 592          */
 593         arena_run_tree_t runs;
 594
 595         /* Size of regions in a run for this bin's size class. */
 596         size_t          reg_size;
 597
 598         /* Total size of a run for this bin's size class. */
 599         size_t          run_size;
 600
 601         /* Total number of regions in a run for this bin's size class. */
 602         uint32_t        nregs;
 603
 604         /* Number of elements in a run's regs_mask for this bin's size class. */
 605         uint32_t        regs_mask_nelms;
 606
 607         /* Offset of first region in a run for this bin's size class. */
 608         uint32_t        reg0_offset;
 609
 610 #ifdef MALLOC_STATS
 611         /* Bin statistics. */
 612         malloc_bin_stats_t stats;
 613 #endif
 614 };
 615
 616 struct arena_s {
 617 #ifdef MALLOC_DEBUG
 618         uint32_t                magic;
 619 #  define ARENA_MAGIC 0x947d3d24
 620 #endif
 621
 622         /* All operations on this arena require that mtx be locked. */
 623         malloc_mutex_t          mtx;
 624
 625 #ifdef MALLOC_STATS
 626         arena_stats_t           stats;
 627 #endif
 628
 629         /*
 630          * Tree of chunks this arena manages.
 631          */
 632         arena_chunk_tree_t      chunks;
 633
 634         /*
 635          * In order to avoid rapid chunk allocation/deallocation when an arena
 636          * oscillates right on the cusp of needing a new chunk, cache the most
 637          * recently freed chunk.  This caching is disabled by opt_hint.
 638          *
 639          * There is one spare chunk per arena, rather than one spare total, in
 640          * order to avoid interactions between multiple threads that could make
 641          * a single spare inadequate.
 642          */
 643         arena_chunk_t *spare;
 644
 645         /*
 646          * bins is used to store rings of free regions of the following sizes,
 647          * assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
 648          *
 649          *   bins[i] | size |
 650          *   --------+------+
 651          *        0  |    2 |
 652          *        1  |    4 |
 653          *        2  |    8 |
 654          *   --------+------+
 655          *        3  |   16 |
 656          *        4  |   32 |
 657          *        5  |   48 |
 658          *        6  |   64 |
 659          *           :      :
 660          *           :      :
 661          *       33  |  496 |
 662          *       34  |  512 |
 663          *   --------+------+
 664          *       35  | 1024 |
 665          *       36  | 2048 |
 666          *   --------+------+
 667          */
 668         arena_bin_t             bins[1]; /* Dynamically sized. */
 669 };
 670
 671 /******************************************************************************/
 672 /*
 673  * Data.
 674  */
 675
 676 /* Number of CPUs. */
 677 static unsigned         ncpus;
 678
 679 /* VM page size. */
 680 static size_t           pagesize;
 681 static size_t           pagesize_mask;
 682 static int              pagesize_2pow;
 683
 684 /* Various bin-related settings. */
 685 static size_t           bin_maxclass; /* Max size class for bins. */
 686 static unsigned         ntbins; /* Number of (2^n)-spaced tiny bins. */
 687 static unsigned         nqbins; /* Number of quantum-spaced bins. */
 688 static unsigned         nsbins; /* Number of (2^n)-spaced sub-page bins. */
 689 static size_t           small_min;
 690 static size_t           small_max;
 691
 692 /* Various quantum-related settings. */
 693 static size_t           quantum;
 694 static size_t           quantum_mask; /* (quantum - 1). */
 695
 696 /* Various chunk-related settings. */
 697 static size_t           chunksize;
 698 static size_t           chunksize_mask; /* (chunksize - 1). */
 699 static int              chunksize_2pow;
 700 static unsigned         chunk_npages;
 701 static unsigned         arena_chunk_header_npages;
 702 static size_t           arena_maxclass; /* Max size class for arenas. */
 703
 704 /********/
 705 /*
 706  * Chunks.
 707  */
 708
 709 #ifdef _REENTRANT
 710 /* Protects chunk-related data structures. */
 711 static malloc_mutex_t   chunks_mtx;
 712 #endif
 713
 714 /* Tree of chunks that are stand-alone huge allocations. */
 715 static chunk_tree_t     huge;
 716
 717 #ifdef USE_BRK
 718 /*
 719  * Try to use brk for chunk-size allocations, due to address space constraints.
 720  */
 721 /*
 722  * Protects sbrk() calls.  This must be separate from chunks_mtx, since
 723  * base_pages_alloc() also uses sbrk(), but cannot lock chunks_mtx (doing so
 724  * could cause recursive lock acquisition).
 725  */
 726 static malloc_mutex_t   brk_mtx;
 727 /* Result of first sbrk(0) call. */
 728 static void             *brk_base;
 729 /* Current end of brk, or ((void *)-1) if brk is exhausted. */
 730 static void             *brk_prev;
 731 /* Current upper limit on brk addresses. */
 732 static void             *brk_max;
 733 #endif
 734
 735 #ifdef MALLOC_STATS
 736 /* Huge allocation statistics. */
 737 static uint64_t         huge_nmalloc;
 738 static uint64_t         huge_ndalloc;
 739 static uint64_t         huge_nralloc;
 740 static size_t           huge_allocated;
 741 #endif
 742
 743 /*
 744  * Tree of chunks that were previously allocated.  This is used when allocating
 745  * chunks, in an attempt to re-use address space.
 746  */
 747 static chunk_tree_t     old_chunks;
 748
 749 /****************************/
 750 /*
 751  * base (internal allocation).
 752  */
 753
 754 /*
 755  * Current pages that are being used for internal memory allocations.  These
 756  * pages are carved up in cacheline-size quanta, so that there is no chance of
 757  * false cache line sharing.
 758  */
 759 static void             *base_pages;
 760 static void             *base_next_addr;
 761 static void             *base_past_addr; /* Addr immediately past base_pages. */
 762 static chunk_node_t     *base_chunk_nodes; /* LIFO cache of chunk nodes. */
 763 #ifdef _REENTRANT
 764 static malloc_mutex_t   base_mtx;
 765 #endif
 766 #ifdef MALLOC_STATS
 767 static size_t           base_mapped;
 768 #endif
 769
 770 /********/
 771 /*
 772  * Arenas.
 773  */
 774
 775 /*
 776  * Arenas that are used to service external requests.  Not all elements of the
 777  * arenas array are necessarily used; arenas are created lazily as needed.
 778  */
 779 static arena_t          **arenas;
 780 static unsigned         narenas;
 781 static unsigned         next_arena;
 782 #ifdef _REENTRANT
 783 static malloc_mutex_t   arenas_mtx; /* Protects arenas initialization. */
 784 #endif
 785
 786 /*
 787  * Map of pthread_self() --> arenas[???], used for selecting an arena to use
 788  * for allocations.
 789  */
 790 #ifndef NO_TLS
 791 static __thread arena_t **arenas_map;
 792 #else
 793 static arena_t  **arenas_map;
 794 #endif
 795
 796 #if !defined(NO_TLS) || !defined(_REENTRANT)
 797 # define        get_arenas_map()        (arenas_map)
 798 # define        set_arenas_map(x)       (arenas_map = x)
 799 #else
 800
 801 static thread_key_t arenas_map_key = -1;
 802
 803 static inline arena_t **
 804 get_arenas_map(void)
 805 {
 806         if (!__isthreaded)
 807                 return arenas_map;
 808
 809         if (arenas_map_key == -1) {
 810                 (void)thr_keycreate(&arenas_map_key, NULL);
 811                 if (arenas_map != NULL) {
 812                         thr_setspecific(arenas_map_key, arenas_map);
 813                         arenas_map = NULL;
 814                 }
 815         }
 816
 817         return thr_getspecific(arenas_map_key);
 818 }
 819
 820 static __inline void
 821 set_arenas_map(arena_t **a)
 822 {
 823         if (!__isthreaded) {
 824                 arenas_map = a;
 825                 return;
 826         }
 827
 828         if (arenas_map_key == -1) {
 829                 (void)thr_keycreate(&arenas_map_key, NULL);
 830                 if (arenas_map != NULL) {
 831                         _DIAGASSERT(arenas_map == a);
 832                         arenas_map = NULL;
 833                 }
 834         }
 835
 836         thr_setspecific(arenas_map_key, a);
 837 }
 838 #endif
 839
 840 #ifdef MALLOC_STATS
 841 /* Chunk statistics. */
 842 static chunk_stats_t    stats_chunks;
 843 #endif
 844
 845 /*******************************/
 846 /*
 847  * Runtime configuration options.
 848  */
 849 const char      *_malloc_options;
 850
 851 #ifndef MALLOC_PRODUCTION
 852 static bool     opt_abort = true;
 853 static bool     opt_junk = true;
 854 #else
 855 static bool     opt_abort = false;
 856 static bool     opt_junk = false;
 857 #endif
 858 static bool     opt_hint = false;
 859 static bool     opt_print_stats = false;
 860 static int      opt_quantum_2pow = QUANTUM_2POW_MIN;
 861 static int      opt_small_max_2pow = SMALL_MAX_2POW_DEFAULT;
 862 static int      opt_chunk_2pow = CHUNK_2POW_DEFAULT;
 863 static bool     opt_utrace = false;
 864 static bool     opt_sysv = false;
 865 static bool     opt_xmalloc = false;
 866 static bool     opt_zero = false;
 867 static int32_t  opt_narenas_lshift = 0;
 868
 869 typedef struct {
 870         void    *p;
 871         size_t  s;
 872         void    *r;
 873 } malloc_utrace_t;
 874
 875 #define UTRACE(a, b, c)                                                 \
 876         if (opt_utrace) {                                               \
 877                 malloc_utrace_t ut;                                     \
 878                 ut.p = a;                                               \
 879                 ut.s = b;                                               \
 880                 ut.r = c;                                               \
 881                 xutrace(&ut, sizeof(ut));                               \
 882         }
 883
 884 /******************************************************************************/
 885 /*
 886  * Begin function prototypes for non-inline static functions.
 887  */
 888
 889 static void     wrtmessage(const char *p1, const char *p2, const char *p3,
 890                 const char *p4);
 891 #ifdef MALLOC_STATS
 892 static void     malloc_printf(const char *format, ...);
 893 #endif
 894 static char     *size_t2s(size_t x, char *s);
 895 static bool     base_pages_alloc(size_t minsize);
 896 static void     *base_alloc(size_t size);
 897 static chunk_node_t *base_chunk_node_alloc(void);
 898 static void     base_chunk_node_dealloc(chunk_node_t *node);
 899 #ifdef MALLOC_STATS
 900 static void     stats_print(arena_t *arena);
 901 #endif
 902 static void     *pages_map(void *addr, size_t size);
 903 static void     *pages_map_align(void *addr, size_t size, int align);
 904 static void     pages_unmap(void *addr, size_t size);
 905 static void     *chunk_alloc(size_t size);
 906 static void     chunk_dealloc(void *chunk, size_t size);
 907 static void     arena_run_split(arena_t *arena, arena_run_t *run, size_t size);
 908 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
 909 static void     arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
 910 static arena_run_t *arena_run_alloc(arena_t *arena, size_t size);
 911 static void     arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size);
 912 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
 913 static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
 914 static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
 915 static void     *arena_malloc(arena_t *arena, size_t size);
 916 static void     *arena_palloc(arena_t *arena, size_t alignment, size_t size,
 917     size_t alloc_size);
 918 static size_t   arena_salloc(const void *ptr);
 919 static void     *arena_ralloc(void *ptr, size_t size, size_t oldsize);
 920 static void     arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
 921 static bool     arena_new(arena_t *arena);
 922 static arena_t  *arenas_extend(unsigned ind);
 923 static void     *huge_malloc(size_t size);
 924 static void     *huge_palloc(size_t alignment, size_t size);
 925 static void     *huge_ralloc(void *ptr, size_t size, size_t oldsize);
 926 static void     huge_dalloc(void *ptr);
 927 static void     *imalloc(size_t size);
 928 static void     *ipalloc(size_t alignment, size_t size);
 929 static void     *icalloc(size_t size);
 930 static size_t   isalloc(const void *ptr);
 931 static void     *iralloc(void *ptr, size_t size);
 932 static void     idalloc(void *ptr);
 933 static void     malloc_print_stats(void);
 934 static bool     malloc_init_hard(void);
 935
 936 /*
 937  * End function prototypes.
 938  */
 939 /******************************************************************************/
 940 /*
 941  * Begin mutex.
 942  */
 943
 944 #ifdef __NetBSD__
 945 #define malloc_mutex_init(m)    mutex_init(m, NULL)
 946 #define malloc_mutex_lock(m)    mutex_lock(m)
 947 #define malloc_mutex_unlock(m)  mutex_unlock(m)
 948 #else   /* __NetBSD__ */
 949 static inline void
 950 malloc_mutex_init(malloc_mutex_t *a_mutex)
 951 {
 952         static const spinlock_t lock = _SPINLOCK_INITIALIZER;
 953
 954         a_mutex->lock = lock;
 955 }
 956
 957 static inline void
 958 malloc_mutex_lock(malloc_mutex_t *a_mutex)
 959 {
 960
 961         if (__isthreaded)
 962                 _SPINLOCK(&a_mutex->lock);
 963 }
 964
 965 static inline void
 966 malloc_mutex_unlock(malloc_mutex_t *a_mutex)
 967 {
 968
 969         if (__isthreaded)
 970                 _SPINUNLOCK(&a_mutex->lock);
 971 }
 972 #endif  /* __NetBSD__ */
 973
 974 /*
 975  * End mutex.
 976  */
 977 /******************************************************************************/
 978 /*
 979  * Begin Utility functions/macros.
 980  */
 981
 982 /* Return the chunk address for allocation address a. */
 983 #define CHUNK_ADDR2BASE(a)                                              \
 984         ((void *)((uintptr_t)(a) & ~chunksize_mask))
 985
 986 /* Return the chunk offset of address a. */
 987 #define CHUNK_ADDR2OFFSET(a)                                            \
 988         ((size_t)((uintptr_t)(a) & chunksize_mask))
 989
 990 /* Return the smallest chunk multiple that is >= s. */
 991 #define CHUNK_CEILING(s)                                                \
 992         (((s) + chunksize_mask) & ~chunksize_mask)
 993
 994 /* Return the smallest cacheline multiple that is >= s. */
 995 #define CACHELINE_CEILING(s)                                            \
 996         (((s) + (CACHELINE - 1)) & ~(CACHELINE - 1))
 997
 998 /* Return the smallest quantum multiple that is >= a. */
 999 #define QUANTUM_CEILING(a)                                              \
1000         (((a) + quantum_mask) & ~quantum_mask)
1001
1002 /* Return the smallest pagesize multiple that is >= s. */
1003 #define PAGE_CEILING(s)                                                 \
1004         (((s) + pagesize_mask) & ~pagesize_mask)
1005
1006 /* Compute the smallest power of 2 that is >= x. */
1007 static inline size_t
1008 pow2_ceil(size_t x)
1009 {
1010
1011         x--;
1012         x |= x >> 1;
1013         x |= x >> 2;
1014         x |= x >> 4;
1015         x |= x >> 8;
1016         x |= x >> 16;
1017 #if (SIZEOF_PTR == 8)
1018         x |= x >> 32;
1019 #endif
1020         x++;
1021         return (x);
1022 }
1023
1024 static void
1025 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
1026 {
1027
1028         write(STDERR_FILENO, p1, strlen(p1));
1029         write(STDERR_FILENO, p2, strlen(p2));
1030         write(STDERR_FILENO, p3, strlen(p3));
1031         write(STDERR_FILENO, p4, strlen(p4));
1032 }
1033
1034 void    (*_malloc_message)(const char *p1, const char *p2, const char *p3,
1035             const char *p4) = wrtmessage;
1036
1037 #ifdef MALLOC_STATS
1038 /*
1039  * Print to stderr in such a way as to (hopefully) avoid memory allocation.
1040  */
1041 static void
1042 malloc_printf(const char *format, ...)
1043 {
1044         char buf[4096];
1045         va_list ap;
1046
1047         va_start(ap, format);
1048         vsnprintf(buf, sizeof(buf), format, ap);
1049         va_end(ap);
1050         _malloc_message(buf, "", "", "");
1051 }
1052 #endif
1053
1054 /*
1055  * We don't want to depend on vsnprintf() for production builds, since that can
1056  * cause unnecessary bloat for static binaries.  size_t2s() provides minimal
1057  * integer printing functionality, so that malloc_printf() use can be limited to
1058  * MALLOC_STATS code.
1059  */
1060 #define UMAX2S_BUFSIZE  21
1061 static char *
1062 size_t2s(size_t x, char *s)
1063 {
1064         unsigned i;
1065
1066         /* Make sure UMAX2S_BUFSIZE is large enough. */
1067         /* LINTED */
1068         assert(sizeof(size_t) <= 8);
1069
1070         i = UMAX2S_BUFSIZE - 1;
1071         s[i] = '\0';
1072         do {
1073                 i--;
1074                 s[i] = "0123456789"[(int)x % 10];
1075                 x /= (uintmax_t)10LL;
1076         } while (x > 0);
1077
1078         return (&s[i]);
1079 }
1080
1081 /******************************************************************************/
1082
1083 static bool
1084 base_pages_alloc(size_t minsize)
1085 {
1086         size_t csize = 0;
1087
1088 #ifdef USE_BRK
1089         /*
1090          * Do special brk allocation here, since base allocations don't need to
1091          * be chunk-aligned.
1092          */
1093         if (brk_prev != (void *)-1) {
1094                 void *brk_cur;
1095                 intptr_t incr;
1096
1097                 if (minsize != 0)
1098                         csize = CHUNK_CEILING(minsize);
1099
1100                 malloc_mutex_lock(&brk_mtx);
1101                 do {
1102                         /* Get the current end of brk. */
1103                         brk_cur = sbrk(0);
1104
1105                         /*
1106                          * Calculate how much padding is necessary to
1107                          * chunk-align the end of brk.  Don't worry about
1108                          * brk_cur not being chunk-aligned though.
1109                          */
1110                         incr = (intptr_t)chunksize
1111                             - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1112                         assert(incr >= 0);
1113                         if ((size_t)incr < minsize)
1114                                 incr += csize;
1115
1116                         brk_prev = sbrk(incr);
1117                         if (brk_prev == brk_cur) {
1118                                 /* Success. */
1119                                 malloc_mutex_unlock(&brk_mtx);
1120                                 base_pages = brk_cur;
1121                                 base_next_addr = base_pages;
1122                                 base_past_addr = (void *)((uintptr_t)base_pages
1123                                     + incr);
1124 #ifdef MALLOC_STATS
1125                                 base_mapped += incr;
1126 #endif
1127                                 return (false);
1128                         }
1129                 } while (brk_prev != (void *)-1);
1130                 malloc_mutex_unlock(&brk_mtx);
1131         }
1132         if (minsize == 0) {
1133                 /*
1134                  * Failure during initialization doesn't matter, so avoid
1135                  * falling through to the mmap-based page mapping code.
1136                  */
1137                 return (true);
1138         }
1139 #endif
1140         assert(minsize != 0);
1141         csize = PAGE_CEILING(minsize);
1142         base_pages = pages_map(NULL, csize);
1143         if (base_pages == NULL)
1144                 return (true);
1145         base_next_addr = base_pages;
1146         base_past_addr = (void *)((uintptr_t)base_pages + csize);
1147 #ifdef MALLOC_STATS
1148         base_mapped += csize;
1149 #endif
1150         return (false);
1151 }
1152
1153 static void *
1154 base_alloc(size_t size)
1155 {
1156         void *ret;
1157         size_t csize;
1158
1159         /* Round size up to nearest multiple of the cacheline size. */
1160         csize = CACHELINE_CEILING(size);
1161
1162         malloc_mutex_lock(&base_mtx);
1163
1164         /* Make sure there's enough space for the allocation. */
1165         if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
1166                 if (base_pages_alloc(csize)) {
1167                         ret = NULL;
1168                         goto RETURN;
1169                 }
1170         }
1171
1172         /* Allocate. */
1173         ret = base_next_addr;
1174         base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
1175
1176 RETURN:
1177         malloc_mutex_unlock(&base_mtx);
1178         return (ret);
1179 }
1180
1181 static chunk_node_t *
1182 base_chunk_node_alloc(void)
1183 {
1184         chunk_node_t *ret;
1185
1186         malloc_mutex_lock(&base_mtx);
1187         if (base_chunk_nodes != NULL) {
1188                 ret = base_chunk_nodes;
1189                 /* LINTED */
1190                 base_chunk_nodes = *(chunk_node_t **)ret;
1191                 malloc_mutex_unlock(&base_mtx);
1192         } else {
1193                 malloc_mutex_unlock(&base_mtx);
1194                 ret = (chunk_node_t *)base_alloc(sizeof(chunk_node_t));
1195         }
1196
1197         return (ret);
1198 }
1199
1200 static void
1201 base_chunk_node_dealloc(chunk_node_t *node)
1202 {
1203
1204         malloc_mutex_lock(&base_mtx);
1205         /* LINTED */
1206         *(chunk_node_t **)node = base_chunk_nodes;
1207         base_chunk_nodes = node;
1208         malloc_mutex_unlock(&base_mtx);
1209 }
1210
1211 /******************************************************************************/
1212
1213 #ifdef MALLOC_STATS
1214 static void
1215 stats_print(arena_t *arena)
1216 {
1217         unsigned i;
1218         int gap_start;
1219
1220         malloc_printf(
1221             "          allocated/mapped            nmalloc      ndalloc\n");
1222
1223         malloc_printf("small: %12zu %-12s %12llu %12llu\n",
1224             arena->stats.allocated_small, "", arena->stats.nmalloc_small,
1225             arena->stats.ndalloc_small);
1226         malloc_printf("large: %12zu %-12s %12llu %12llu\n",
1227             arena->stats.allocated_large, "", arena->stats.nmalloc_large,
1228             arena->stats.ndalloc_large);
1229         malloc_printf("total: %12zu/%-12zu %12llu %12llu\n",
1230             arena->stats.allocated_small + arena->stats.allocated_large,
1231             arena->stats.mapped,
1232             arena->stats.nmalloc_small + arena->stats.nmalloc_large,
1233             arena->stats.ndalloc_small + arena->stats.ndalloc_large);
1234
1235         malloc_printf("bins:     bin   size regs pgs  requests   newruns"
1236             "    reruns maxruns curruns\n");
1237         for (i = 0, gap_start = -1; i < ntbins + nqbins + nsbins; i++) {
1238                 if (arena->bins[i].stats.nrequests == 0) {
1239                         if (gap_start == -1)
1240                                 gap_start = i;
1241                 } else {
1242                         if (gap_start != -1) {
1243                                 if (i > gap_start + 1) {
1244                                         /* Gap of more than one size class. */
1245                                         malloc_printf("[%u..%u]\n",
1246                                             gap_start, i - 1);
1247                                 } else {
1248                                         /* Gap of one size class. */
1249                                         malloc_printf("[%u]\n", gap_start);
1250                                 }
1251                                 gap_start = -1;
1252                         }
1253                         malloc_printf(
1254                             "%13u %1s %4u %4u %3u %9llu %9llu"
1255                             " %9llu %7lu %7lu\n",
1256                             i,
1257                             i < ntbins ? "T" : i < ntbins + nqbins ? "Q" : "S",
1258                             arena->bins[i].reg_size,
1259                             arena->bins[i].nregs,
1260                             arena->bins[i].run_size >> pagesize_2pow,
1261                             arena->bins[i].stats.nrequests,
1262                             arena->bins[i].stats.nruns,
1263                             arena->bins[i].stats.reruns,
1264                             arena->bins[i].stats.highruns,
1265                             arena->bins[i].stats.curruns);
1266                 }
1267         }
1268         if (gap_start != -1) {
1269                 if (i > gap_start + 1) {
1270                         /* Gap of more than one size class. */
1271                         malloc_printf("[%u..%u]\n", gap_start, i - 1);
1272                 } else {
1273                         /* Gap of one size class. */
1274                         malloc_printf("[%u]\n", gap_start);
1275                 }
1276         }
1277 }
1278 #endif
1279
1280 /*
1281  * End Utility functions/macros.
1282  */
1283 /******************************************************************************/
1284 /*
1285  * Begin chunk management functions.
1286  */
1287
1288 #ifndef lint
1289 static inline int
1290 chunk_comp(chunk_node_t *a, chunk_node_t *b)
1291 {
1292
1293         assert(a != NULL);
1294         assert(b != NULL);
1295
1296         if ((uintptr_t)a->chunk < (uintptr_t)b->chunk)
1297                 return (-1);
1298         else if (a->chunk == b->chunk)
1299                 return (0);
1300         else
1301                 return (1);
1302 }
1303
1304 /* Generate red-black tree code for chunks. */
1305 RB_GENERATE_STATIC(chunk_tree_s, chunk_node_s, link, chunk_comp);
1306 #endif
1307
1308 static void *
1309 pages_map_align(void *addr, size_t size, int align)
1310 {
1311         void *ret;
1312
1313         /*
1314          * We don't use MAP_FIXED here, because it can cause the *replacement*
1315          * of existing mappings, and we only want to create new mappings.
1316          */
1317         ret = mmap(addr, size, PROT_READ | PROT_WRITE,
1318             MAP_PRIVATE | MAP_ANON | MAP_ALIGNED(align), -1, 0);
1319         assert(ret != NULL);
1320
1321         if (ret == MAP_FAILED)
1322                 ret = NULL;
1323         else if (addr != NULL && ret != addr) {
1324                 /*
1325                  * We succeeded in mapping memory, but not in the right place.
1326                  */
1327                 if (munmap(ret, size) == -1) {
1328                         char buf[STRERROR_BUF];
1329
1330                         STRERROR_R(errno, buf, sizeof(buf));
1331                         _malloc_message(getprogname(),
1332                             ": (malloc) Error in munmap(): ", buf, "\n");
1333                         if (opt_abort)
1334                                 abort();
1335                 }
1336                 ret = NULL;
1337         }
1338
1339         assert(ret == NULL || (addr == NULL && ret != addr)
1340             || (addr != NULL && ret == addr));
1341         return (ret);
1342 }
1343
1344 static void *
1345 pages_map(void *addr, size_t size)
1346 {
1347
1348         return pages_map_align(addr, size, 0);
1349 }
1350
1351 static void
1352 pages_unmap(void *addr, size_t size)
1353 {
1354
1355         if (munmap(addr, size) == -1) {
1356                 char buf[STRERROR_BUF];
1357
1358                 STRERROR_R(errno, buf, sizeof(buf));
1359                 _malloc_message(getprogname(),
1360                     ": (malloc) Error in munmap(): ", buf, "\n");
1361                 if (opt_abort)
1362                         abort();
1363         }
1364 }
1365
1366 static void *
1367 chunk_alloc(size_t size)
1368 {
1369         void *ret, *chunk;
1370         chunk_node_t *tchunk, *delchunk;
1371
1372         assert(size != 0);
1373         assert((size & chunksize_mask) == 0);
1374
1375         malloc_mutex_lock(&chunks_mtx);
1376
1377         if (size == chunksize) {
1378                 /*
1379                  * Check for address ranges that were previously chunks and try
1380                  * to use them.
1381                  */
1382
1383                 /* LINTED */
1384                 tchunk = RB_MIN(chunk_tree_s, &old_chunks);
1385                 while (tchunk != NULL) {
1386                         /* Found an address range.  Try to recycle it. */
1387
1388                         chunk = tchunk->chunk;
1389                         delchunk = tchunk;
1390                         /* LINTED */
1391                         tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
1392
1393                         /* Remove delchunk from the tree. */
1394                         /* LINTED */
1395                         RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
1396                         base_chunk_node_dealloc(delchunk);
1397
1398 #ifdef USE_BRK
1399                         if ((uintptr_t)chunk >= (uintptr_t)brk_base
1400                             && (uintptr_t)chunk < (uintptr_t)brk_max) {
1401                                 /* Re-use a previously freed brk chunk. */
1402                                 ret = chunk;
1403                                 goto RETURN;
1404                         }
1405 #endif
1406                         if ((ret = pages_map(chunk, size)) != NULL) {
1407                                 /* Success. */
1408                                 goto RETURN;
1409                         }
1410                 }
1411         }
1412
1413         /*
1414          * Try to over-allocate, but allow the OS to place the allocation
1415          * anywhere.  Beware of size_t wrap-around.
1416          */
1417         if (size + chunksize > size) {
1418                 if ((ret = pages_map_align(NULL, size, chunksize_2pow))
1419                     != NULL) {
1420                         goto RETURN;
1421                 }
1422         }
1423
1424 #ifdef USE_BRK
1425         /*
1426          * Try to create allocations in brk, in order to make full use of
1427          * limited address space.
1428          */
1429         if (brk_prev != (void *)-1) {
1430                 void *brk_cur;
1431                 intptr_t incr;
1432
1433                 /*
1434                  * The loop is necessary to recover from races with other
1435                  * threads that are using brk for something other than malloc.
1436                  */
1437                 malloc_mutex_lock(&brk_mtx);
1438                 do {
1439                         /* Get the current end of brk. */
1440                         brk_cur = sbrk(0);
1441
1442                         /*
1443                          * Calculate how much padding is necessary to
1444                          * chunk-align the end of brk.
1445                          */
1446                         incr = (intptr_t)size
1447                             - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1448                         if (incr == (intptr_t)size) {
1449                                 ret = brk_cur;
1450                         } else {
1451                                 ret = (void *)((intptr_t)brk_cur + incr);
1452                                 incr += size;
1453                         }
1454
1455                         brk_prev = sbrk(incr);
1456                         if (brk_prev == brk_cur) {
1457                                 /* Success. */
1458                                 malloc_mutex_unlock(&brk_mtx);
1459                                 brk_max = (void *)((intptr_t)ret + size);
1460                                 goto RETURN;
1461                         }
1462                 } while (brk_prev != (void *)-1);
1463                 malloc_mutex_unlock(&brk_mtx);
1464         }
1465 #endif
1466
1467         /* All strategies for allocation failed. */
1468         ret = NULL;
1469 RETURN:
1470         if (ret != NULL) {
1471                 chunk_node_t key;
1472                 /*
1473                  * Clean out any entries in old_chunks that overlap with the
1474                  * memory we just allocated.
1475                  */
1476                 key.chunk = ret;
1477                 /* LINTED */
1478                 tchunk = RB_NFIND(chunk_tree_s, &old_chunks, &key);
1479                 while (tchunk != NULL
1480                     && (uintptr_t)tchunk->chunk >= (uintptr_t)ret
1481                     && (uintptr_t)tchunk->chunk < (uintptr_t)ret + size) {
1482                         delchunk = tchunk;
1483                         /* LINTED */
1484                         tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
1485                         /* LINTED */
1486                         RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
1487                         base_chunk_node_dealloc(delchunk);
1488                 }
1489
1490         }
1491 #ifdef MALLOC_STATS
1492         if (ret != NULL) {
1493                 stats_chunks.nchunks += (size / chunksize);
1494                 stats_chunks.curchunks += (size / chunksize);
1495         }
1496         if (stats_chunks.curchunks > stats_chunks.highchunks)
1497                 stats_chunks.highchunks = stats_chunks.curchunks;
1498 #endif
1499         malloc_mutex_unlock(&chunks_mtx);
1500
1501         assert(CHUNK_ADDR2BASE(ret) == ret);
1502         return (ret);
1503 }
1504
1505 static void
1506 chunk_dealloc(void *chunk, size_t size)
1507 {
1508         chunk_node_t *node;
1509
1510         assert(chunk != NULL);
1511         assert(CHUNK_ADDR2BASE(chunk) == chunk);
1512         assert(size != 0);
1513         assert((size & chunksize_mask) == 0);
1514
1515         malloc_mutex_lock(&chunks_mtx);
1516
1517 #ifdef USE_BRK
1518         if ((uintptr_t)chunk >= (uintptr_t)brk_base
1519             && (uintptr_t)chunk < (uintptr_t)brk_max) {
1520                 void *brk_cur;
1521
1522                 malloc_mutex_lock(&brk_mtx);
1523                 /* Get the current end of brk. */
1524                 brk_cur = sbrk(0);
1525
1526                 /*
1527                  * Try to shrink the data segment if this chunk is at the end
1528                  * of the data segment.  The sbrk() call here is subject to a
1529                  * race condition with threads that use brk(2) or sbrk(2)
1530                  * directly, but the alternative would be to leak memory for
1531                  * the sake of poorly designed multi-threaded programs.
1532                  */
1533                 if (brk_cur == brk_max
1534                     && (void *)((uintptr_t)chunk + size) == brk_max
1535                     && sbrk(-(intptr_t)size) == brk_max) {
1536                         malloc_mutex_unlock(&brk_mtx);
1537                         if (brk_prev == brk_max) {
1538                                 /* Success. */
1539                                 brk_prev = (void *)((intptr_t)brk_max
1540                                     - (intptr_t)size);
1541                                 brk_max = brk_prev;
1542                         }
1543                 } else {
1544                         size_t offset;
1545
1546                         malloc_mutex_unlock(&brk_mtx);
1547                         madvise(chunk, size, MADV_FREE);
1548
1549                         /*
1550                          * Iteratively create records of each chunk-sized
1551                          * memory region that 'chunk' is comprised of, so that
1552                          * the address range can be recycled if memory usage
1553                          * increases later on.
1554                          */
1555                         for (offset = 0; offset < size; offset += chunksize) {
1556                                 node = base_chunk_node_alloc();
1557                                 if (node == NULL)
1558                                         break;
1559
1560                                 node->chunk = (void *)((uintptr_t)chunk
1561                                     + (uintptr_t)offset);
1562                                 node->size = chunksize;
1563                                 /* LINTED */
1564                                 RB_INSERT(chunk_tree_s, &old_chunks, node);
1565                         }
1566                 }
1567         } else {
1568 #endif
1569                 pages_unmap(chunk, size);
1570
1571                 /*
1572                  * Make a record of the chunk's address, so that the address
1573                  * range can be recycled if memory usage increases later on.
1574                  * Don't bother to create entries if (size > chunksize), since
1575                  * doing so could cause scalability issues for truly gargantuan
1576                  * objects (many gigabytes or larger).
1577                  */
1578                 if (size == chunksize) {
1579                         node = base_chunk_node_alloc();
1580                         if (node != NULL) {
1581                                 node->chunk = (void *)(uintptr_t)chunk;
1582                                 node->size = chunksize;
1583                                 /* LINTED */
1584                                 RB_INSERT(chunk_tree_s, &old_chunks, node);
1585                         }
1586                 }
1587 #ifdef USE_BRK
1588         }
1589 #endif
1590
1591 #ifdef MALLOC_STATS
1592         stats_chunks.curchunks -= (size / chunksize);
1593 #endif
1594         malloc_mutex_unlock(&chunks_mtx);
1595 }
1596
1597 /*
1598  * End chunk management functions.
1599  */
1600 /******************************************************************************/
1601 /*
1602  * Begin arena.
1603  */
1604
1605 /*
1606  * Choose an arena based on a per-thread and (optimistically) per-CPU value.
1607  *
1608  * We maintain at least one block of arenas.  Usually there are more.
1609  * The blocks are $ncpu arenas in size.  Whole blocks are 'hashed'
1610  * amongst threads.  To accomplish this, next_arena advances only in
1611  * ncpu steps.
1612  */
1613 static __noinline arena_t *
1614 choose_arena_hard(void)
1615 {
1616         unsigned i, curcpu;
1617         arena_t **map;
1618
1619         /* Initialize the current block of arenas and advance to next. */
1620         malloc_mutex_lock(&arenas_mtx);
1621         assert(next_arena % ncpus == 0);
1622         assert(narenas % ncpus == 0);
1623         map = &arenas[next_arena];
1624         set_arenas_map(map);
1625         for (i = 0; i < ncpus; i++) {
1626                 if (arenas[next_arena] == NULL)
1627                         arenas_extend(next_arena);
1628                 next_arena = (next_arena + 1) % narenas;
1629         }
1630         malloc_mutex_unlock(&arenas_mtx);
1631
1632         /*
1633          * If we were unable to allocate an arena above, then default to
1634          * the first arena, which is always present.
1635          */
1636         curcpu = thr_curcpu();
1637         if (map[curcpu] != NULL)
1638                 return map[curcpu];
1639         return arenas[0];
1640 }
1641
1642 static inline arena_t *
1643 choose_arena(void)
1644 {
1645         unsigned curcpu;
1646         arena_t **map;
1647
1648         map = get_arenas_map();
1649         curcpu = thr_curcpu();
1650         if (__predict_true(map != NULL && map[curcpu] != NULL))
1651                 return map[curcpu];
1652
1653         return choose_arena_hard();
1654 }
1655
1656 #ifndef lint
1657 static inline int
1658 arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
1659 {
1660
1661         assert(a != NULL);
1662         assert(b != NULL);
1663
1664         if ((uintptr_t)a < (uintptr_t)b)
1665                 return (-1);
1666         else if (a == b)
1667                 return (0);
1668         else
1669                 return (1);
1670 }
1671
1672 /* Generate red-black tree code for arena chunks. */
1673 RB_GENERATE_STATIC(arena_chunk_tree_s, arena_chunk_s, link, arena_chunk_comp);
1674 #endif
1675
1676 #ifndef lint
1677 static inline int
1678 arena_run_comp(arena_run_t *a, arena_run_t *b)
1679 {
1680
1681         assert(a != NULL);
1682         assert(b != NULL);
1683
1684         if ((uintptr_t)a < (uintptr_t)b)
1685                 return (-1);
1686         else if (a == b)
1687                 return (0);
1688         else
1689                 return (1);
1690 }
1691
1692 /* Generate red-black tree code for arena runs. */
1693 RB_GENERATE_STATIC(arena_run_tree_s, arena_run_s, link, arena_run_comp);
1694 #endif
1695
1696 static inline void *
1697 arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
1698 {
1699         void *ret;
1700         unsigned i, mask, bit, regind;
1701
1702         assert(run->magic == ARENA_RUN_MAGIC);
1703         assert(run->regs_minelm < bin->regs_mask_nelms);
1704
1705         /*
1706          * Move the first check outside the loop, so that run->regs_minelm can
1707          * be updated unconditionally, without the possibility of updating it
1708          * multiple times.
1709          */
1710         i = run->regs_minelm;
1711         mask = run->regs_mask[i];
1712         if (mask != 0) {
1713                 /* Usable allocation found. */
1714                 bit = ffs((int)mask) - 1;
1715
1716                 regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1717                 ret = (void *)(((uintptr_t)run) + bin->reg0_offset
1718                     + (bin->reg_size * regind));
1719
1720                 /* Clear bit. */
1721                 mask ^= (1 << bit);
1722                 run->regs_mask[i] = mask;
1723
1724                 return (ret);
1725         }
1726
1727         for (i++; i < bin->regs_mask_nelms; i++) {
1728                 mask = run->regs_mask[i];
1729                 if (mask != 0) {
1730                         /* Usable allocation found. */
1731                         bit = ffs((int)mask) - 1;
1732
1733                         regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1734                         ret = (void *)(((uintptr_t)run) + bin->reg0_offset
1735                             + (bin->reg_size * regind));
1736
1737                         /* Clear bit. */
1738                         mask ^= (1 << bit);
1739                         run->regs_mask[i] = mask;
1740
1741                         /*
1742                          * Make a note that nothing before this element
1743                          * contains a free region.
1744                          */
1745                         run->regs_minelm = i; /* Low payoff: + (mask == 0); */
1746
1747                         return (ret);
1748                 }
1749         }
1750         /* Not reached. */
1751         /* LINTED */
1752         assert(0);
1753         return (NULL);
1754 }
1755
1756 static inline void
1757 arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
1758 {
1759         /*
1760          * To divide by a number D that is not a power of two we multiply
1761          * by (2^21 / D) and then right shift by 21 positions.
1762          *
1763          *   X / D
1764          *
1765          * becomes
1766          *
1767          *   (X * size_invs[(D >> QUANTUM_2POW_MIN) - 3]) >> SIZE_INV_SHIFT
1768          */
1769 #define SIZE_INV_SHIFT 21
1770 #define SIZE_INV(s) (((1 << SIZE_INV_SHIFT) / (s << QUANTUM_2POW_MIN)) + 1)
1771         static const unsigned size_invs[] = {
1772             SIZE_INV(3),
1773             SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
1774             SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
1775             SIZE_INV(12),SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
1776             SIZE_INV(16),SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
1777             SIZE_INV(20),SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
1778             SIZE_INV(24),SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
1779             SIZE_INV(28),SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
1780 #if (QUANTUM_2POW_MIN < 4)
1781             ,
1782             SIZE_INV(32), SIZE_INV(33), SIZE_INV(34), SIZE_INV(35),
1783             SIZE_INV(36), SIZE_INV(37), SIZE_INV(38), SIZE_INV(39),
1784             SIZE_INV(40), SIZE_INV(41), SIZE_INV(42), SIZE_INV(43),
1785             SIZE_INV(44), SIZE_INV(45), SIZE_INV(46), SIZE_INV(47),
1786             SIZE_INV(48), SIZE_INV(49), SIZE_INV(50), SIZE_INV(51),
1787             SIZE_INV(52), SIZE_INV(53), SIZE_INV(54), SIZE_INV(55),
1788             SIZE_INV(56), SIZE_INV(57), SIZE_INV(58), SIZE_INV(59),
1789             SIZE_INV(60), SIZE_INV(61), SIZE_INV(62), SIZE_INV(63)
1790 #endif
1791         };
1792         unsigned diff, regind, elm, bit;
1793
1794         /* LINTED */
1795         assert(run->magic == ARENA_RUN_MAGIC);
1796         assert(((sizeof(size_invs)) / sizeof(unsigned)) + 3
1797             >= (SMALL_MAX_DEFAULT >> QUANTUM_2POW_MIN));
1798
1799         /*
1800          * Avoid doing division with a variable divisor if possible.  Using
1801          * actual division here can reduce allocator throughput by over 20%!
1802          */
1803         diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
1804         if ((size & (size - 1)) == 0) {
1805                 /*
1806                  * log2_table allows fast division of a power of two in the
1807                  * [1..128] range.
1808                  *
1809                  * (x / divisor) becomes (x >> log2_table[divisor - 1]).
1810                  */
1811                 static const unsigned char log2_table[] = {
1812                     0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4,
1813                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
1814                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1815                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
1816                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1817                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1818                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1819                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7
1820                 };
1821
1822                 if (size <= 128)
1823                         regind = (diff >> log2_table[size - 1]);
1824                 else if (size <= 32768)
1825                         regind = diff >> (8 + log2_table[(size >> 8) - 1]);
1826                 else {
1827                         /*
1828                          * The page size is too large for us to use the lookup
1829                          * table.  Use real division.
1830                          */
1831                         regind = (unsigned)(diff / size);
1832                 }
1833         } else if (size <= ((sizeof(size_invs) / sizeof(unsigned))
1834             << QUANTUM_2POW_MIN) + 2) {
1835                 regind = size_invs[(size >> QUANTUM_2POW_MIN) - 3] * diff;
1836                 regind >>= SIZE_INV_SHIFT;
1837         } else {
1838                 /*
1839                  * size_invs isn't large enough to handle this size class, so
1840                  * calculate regind using actual division.  This only happens
1841                  * if the user increases small_max via the 'S' runtime
1842                  * configuration option.
1843                  */
1844                 regind = (unsigned)(diff / size);
1845         };
1846         assert(diff == regind * size);
1847         assert(regind < bin->nregs);
1848
1849         elm = regind >> (SIZEOF_INT_2POW + 3);
1850         if (elm < run->regs_minelm)
1851                 run->regs_minelm = elm;
1852         bit = regind - (elm << (SIZEOF_INT_2POW + 3));
1853         assert((run->regs_mask[elm] & (1 << bit)) == 0);
1854         run->regs_mask[elm] |= (1 << bit);
1855 #undef SIZE_INV
1856 #undef SIZE_INV_SHIFT
1857 }
1858
1859 static void
1860 arena_run_split(arena_t *arena, arena_run_t *run, size_t size)
1861 {
1862         arena_chunk_t *chunk;
1863         unsigned run_ind, map_offset, total_pages, need_pages, rem_pages;
1864         unsigned i;
1865
1866         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1867         run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1868             >> pagesize_2pow);
1869         total_pages = chunk->map[run_ind].npages;
1870         need_pages = (unsigned)(size >> pagesize_2pow);
1871         assert(need_pages <= total_pages);
1872         rem_pages = total_pages - need_pages;
1873
1874         /* Split enough pages from the front of run to fit allocation size. */
1875         map_offset = run_ind;
1876         for (i = 0; i < need_pages; i++) {
1877                 chunk->map[map_offset + i].npages = need_pages;
1878                 chunk->map[map_offset + i].pos = i;
1879         }
1880
1881         /* Keep track of trailing unused pages for later use. */
1882         if (rem_pages > 0) {
1883                 /* Update map for trailing pages. */
1884                 map_offset += need_pages;
1885                 chunk->map[map_offset].npages = rem_pages;
1886                 chunk->map[map_offset].pos = POS_FREE;
1887                 chunk->map[map_offset + rem_pages - 1].npages = rem_pages;
1888                 chunk->map[map_offset + rem_pages - 1].pos = POS_FREE;
1889         }
1890
1891         chunk->pages_used += need_pages;
1892 }
1893
1894 static arena_chunk_t *
1895 arena_chunk_alloc(arena_t *arena)
1896 {
1897         arena_chunk_t *chunk;
1898
1899         if (arena->spare != NULL) {
1900                 chunk = arena->spare;
1901                 arena->spare = NULL;
1902
1903                 /* LINTED */
1904                 RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
1905         } else {
1906                 chunk = (arena_chunk_t *)chunk_alloc(chunksize);
1907                 if (chunk == NULL)
1908                         return (NULL);
1909 #ifdef MALLOC_STATS
1910                 arena->stats.mapped += chunksize;
1911 #endif
1912
1913                 chunk->arena = arena;
1914
1915                 /* LINTED */
1916                 RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
1917
1918                 /*
1919                  * Claim that no pages are in use, since the header is merely
1920                  * overhead.
1921                  */
1922                 chunk->pages_used = 0;
1923
1924                 chunk->max_frun_npages = chunk_npages -
1925                     arena_chunk_header_npages;
1926                 chunk->min_frun_ind = arena_chunk_header_npages;
1927
1928                 /*
1929                  * Initialize enough of the map to support one maximal free run.
1930                  */
1931                 chunk->map[arena_chunk_header_npages].npages = chunk_npages -
1932                     arena_chunk_header_npages;
1933                 chunk->map[arena_chunk_header_npages].pos = POS_FREE;
1934                 chunk->map[chunk_npages - 1].npages = chunk_npages -
1935                     arena_chunk_header_npages;
1936                 chunk->map[chunk_npages - 1].pos = POS_FREE;
1937         }
1938
1939         return (chunk);
1940 }
1941
1942 static void
1943 arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
1944 {
1945
1946         /*
1947          * Remove chunk from the chunk tree, regardless of whether this chunk
1948          * will be cached, so that the arena does not use it.
1949          */
1950         /* LINTED */
1951         RB_REMOVE(arena_chunk_tree_s, &chunk->arena->chunks, chunk);
1952
1953         if (opt_hint == false) {
1954                 if (arena->spare != NULL) {
1955                         chunk_dealloc((void *)arena->spare, chunksize);
1956 #ifdef MALLOC_STATS
1957                         arena->stats.mapped -= chunksize;
1958 #endif
1959                 }
1960                 arena->spare = chunk;
1961         } else {
1962                 assert(arena->spare == NULL);
1963                 chunk_dealloc((void *)chunk, chunksize);
1964 #ifdef MALLOC_STATS
1965                 arena->stats.mapped -= chunksize;
1966 #endif
1967         }
1968 }
1969
1970 static arena_run_t *
1971 arena_run_alloc(arena_t *arena, size_t size)
1972 {
1973         arena_chunk_t *chunk;
1974         arena_run_t *run;
1975         unsigned need_npages, limit_pages, compl_need_npages;
1976
1977         assert(size <= (chunksize - (arena_chunk_header_npages <<
1978             pagesize_2pow)));
1979         assert((size & pagesize_mask) == 0);
1980
1981         /*
1982          * Search through arena's chunks in address order for a free run that is
1983          * large enough.  Look for the first fit.
1984          */
1985         need_npages = (unsigned)(size >> pagesize_2pow);
1986         limit_pages = chunk_npages - arena_chunk_header_npages;
1987         compl_need_npages = limit_pages - need_npages;
1988         /* LINTED */
1989         RB_FOREACH(chunk, arena_chunk_tree_s, &arena->chunks) {
1990                 /*
1991                  * Avoid searching this chunk if there are not enough
1992                  * contiguous free pages for there to possibly be a large
1993                  * enough free run.
1994                  */
1995                 if (chunk->pages_used <= compl_need_npages &&
1996                     need_npages <= chunk->max_frun_npages) {
1997                         arena_chunk_map_t *mapelm;
1998                         unsigned i;
1999                         unsigned max_frun_npages = 0;
2000                         unsigned min_frun_ind = chunk_npages;
2001
2002                         assert(chunk->min_frun_ind >=
2003                             arena_chunk_header_npages);
2004                         for (i = chunk->min_frun_ind; i < chunk_npages;) {
2005                                 mapelm = &chunk->map[i];
2006                                 if (mapelm->pos == POS_FREE) {
2007                                         if (mapelm->npages >= need_npages) {
2008                                                 run = (arena_run_t *)
2009                                                     ((uintptr_t)chunk + (i <<
2010                                                     pagesize_2pow));
2011                                                 /* Update page map. */
2012                                                 arena_run_split(arena, run,
2013                                                     size);
2014                                                 return (run);
2015                                         }
2016                                         if (mapelm->npages >
2017                                             max_frun_npages) {
2018                                                 max_frun_npages =
2019                                                     mapelm->npages;
2020                                         }
2021                                         if (i < min_frun_ind) {
2022                                                 min_frun_ind = i;
2023                                                 if (i < chunk->min_frun_ind)
2024                                                         chunk->min_frun_ind = i;
2025                                         }
2026                                 }
2027                                 i += mapelm->npages;
2028                         }
2029                         /*
2030                          * Search failure.  Reset cached chunk->max_frun_npages.
2031                          * chunk->min_frun_ind was already reset above (if
2032                          * necessary).
2033                          */
2034                         chunk->max_frun_npages = max_frun_npages;
2035                 }
2036         }
2037
2038         /*
2039          * No usable runs.  Create a new chunk from which to allocate the run.
2040          */
2041         chunk = arena_chunk_alloc(arena);
2042         if (chunk == NULL)
2043                 return (NULL);
2044         run = (arena_run_t *)((uintptr_t)chunk + (arena_chunk_header_npages <<
2045             pagesize_2pow));
2046         /* Update page map. */
2047         arena_run_split(arena, run, size);
2048         return (run);
2049 }
2050
2051 static void
2052 arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size)
2053 {
2054         arena_chunk_t *chunk;
2055         unsigned run_ind, run_pages;
2056
2057         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
2058
2059         run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
2060             >> pagesize_2pow);
2061         assert(run_ind >= arena_chunk_header_npages);
2062         assert(run_ind < (chunksize >> pagesize_2pow));
2063         run_pages = (unsigned)(size >> pagesize_2pow);
2064         assert(run_pages == chunk->map[run_ind].npages);
2065
2066         /* Subtract pages from count of pages used in chunk. */
2067         chunk->pages_used -= run_pages;
2068
2069         /* Mark run as deallocated. */
2070         assert(chunk->map[run_ind].npages == run_pages);
2071         chunk->map[run_ind].pos = POS_FREE;
2072         assert(chunk->map[run_ind + run_pages - 1].npages == run_pages);
2073         chunk->map[run_ind + run_pages - 1].pos = POS_FREE;
2074
2075         /*
2076          * Tell the kernel that we don't need the data in this run, but only if
2077          * requested via runtime configuration.
2078          */
2079         if (opt_hint)
2080                 madvise(run, size, MADV_FREE);
2081
2082         /* Try to coalesce with neighboring runs. */
2083         if (run_ind > arena_chunk_header_npages &&
2084             chunk->map[run_ind - 1].pos == POS_FREE) {
2085                 unsigned prev_npages;
2086
2087                 /* Coalesce with previous run. */
2088                 prev_npages = chunk->map[run_ind - 1].npages;
2089                 run_ind -= prev_npages;
2090                 assert(chunk->map[run_ind].npages == prev_npages);
2091                 assert(chunk->map[run_ind].pos == POS_FREE);
2092                 run_pages += prev_npages;
2093
2094                 chunk->map[run_ind].npages = run_pages;
2095                 assert(chunk->map[run_ind].pos == POS_FREE);
2096                 chunk->map[run_ind + run_pages - 1].npages = run_pages;
2097                 assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2098         }
2099
2100         if (run_ind + run_pages < chunk_npages &&
2101             chunk->map[run_ind + run_pages].pos == POS_FREE) {
2102                 unsigned next_npages;
2103
2104                 /* Coalesce with next run. */
2105                 next_npages = chunk->map[run_ind + run_pages].npages;
2106                 run_pages += next_npages;
2107                 assert(chunk->map[run_ind + run_pages - 1].npages ==
2108                     next_npages);
2109                 assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2110
2111                 chunk->map[run_ind].npages = run_pages;
2112                 chunk->map[run_ind].pos = POS_FREE;
2113                 chunk->map[run_ind + run_pages - 1].npages = run_pages;
2114                 assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2115         }
2116
2117         if (chunk->map[run_ind].npages > chunk->max_frun_npages)
2118                 chunk->max_frun_npages = chunk->map[run_ind].npages;
2119         if (run_ind < chunk->min_frun_ind)
2120                 chunk->min_frun_ind = run_ind;
2121
2122         /* Deallocate chunk if it is now completely unused. */
2123         if (chunk->pages_used == 0)
2124                 arena_chunk_dealloc(arena, chunk);
2125 }
2126
2127 static arena_run_t *
2128 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
2129 {
2130         arena_run_t *run;
2131         unsigned i, remainder;
2132
2133         /* Look for a usable run. */
2134         /* LINTED */
2135         if ((run = RB_MIN(arena_run_tree_s, &bin->runs)) != NULL) {
2136                 /* run is guaranteed to have available space. */
2137                 /* LINTED */
2138                 RB_REMOVE(arena_run_tree_s, &bin->runs, run);
2139 #ifdef MALLOC_STATS
2140                 bin->stats.reruns++;
2141 #endif
2142                 return (run);
2143         }
2144         /* No existing runs have any space available. */
2145
2146         /* Allocate a new run. */
2147         run = arena_run_alloc(arena, bin->run_size);
2148         if (run == NULL)
2149                 return (NULL);
2150
2151         /* Initialize run internals. */
2152         run->bin = bin;
2153
2154         for (i = 0; i < bin->regs_mask_nelms; i++)
2155                 run->regs_mask[i] = UINT_MAX;
2156         remainder = bin->nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1);
2157         if (remainder != 0) {
2158                 /* The last element has spare bits that need to be unset. */
2159                 run->regs_mask[i] = (UINT_MAX >> ((1 << (SIZEOF_INT_2POW + 3))
2160                     - remainder));
2161         }
2162
2163         run->regs_minelm = 0;
2164
2165         run->nfree = bin->nregs;
2166 #ifdef MALLOC_DEBUG
2167         run->magic = ARENA_RUN_MAGIC;
2168 #endif
2169
2170 #ifdef MALLOC_STATS
2171         bin->stats.nruns++;
2172         bin->stats.curruns++;
2173         if (bin->stats.curruns > bin->stats.highruns)
2174                 bin->stats.highruns = bin->stats.curruns;
2175 #endif
2176         return (run);
2177 }
2178
2179 /* bin->runcur must have space available before this function is called. */
2180 static inline void *
2181 arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
2182 {
2183         void *ret;
2184
2185         assert(run->magic == ARENA_RUN_MAGIC);
2186         assert(run->nfree > 0);
2187
2188         ret = arena_run_reg_alloc(run, bin);
2189         assert(ret != NULL);
2190         run->nfree--;
2191
2192         return (ret);
2193 }
2194
2195 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
2196 static void *
2197 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
2198 {
2199
2200         bin->runcur = arena_bin_nonfull_run_get(arena, bin);
2201         if (bin->runcur == NULL)
2202                 return (NULL);
2203         assert(bin->runcur->magic == ARENA_RUN_MAGIC);
2204         assert(bin->runcur->nfree > 0);
2205
2206         return (arena_bin_malloc_easy(arena, bin, bin->runcur));
2207 }
2208
2209 /*
2210  * Calculate bin->run_size such that it meets the following constraints:
2211  *
2212  *   *) bin->run_size >= min_run_size
2213  *   *) bin->run_size <= arena_maxclass
2214  *   *) bin->run_size <= RUN_MAX_SMALL
2215  *   *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
2216  *
2217  * bin->nregs, bin->regs_mask_nelms, and bin->reg0_offset are
2218  * also calculated here, since these settings are all interdependent.
2219  */
2220 static size_t
2221 arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
2222 {
2223         size_t try_run_size, good_run_size;
2224         unsigned good_nregs, good_mask_nelms, good_reg0_offset;
2225         unsigned try_nregs, try_mask_nelms, try_reg0_offset;
2226
2227         assert(min_run_size >= pagesize);
2228         assert(min_run_size <= arena_maxclass);
2229         assert(min_run_size <= RUN_MAX_SMALL);
2230
2231         /*
2232          * Calculate known-valid settings before entering the run_size
2233          * expansion loop, so that the first part of the loop always copies
2234          * valid settings.
2235          *
2236          * The do..while loop iteratively reduces the number of regions until
2237          * the run header and the regions no longer overlap.  A closed formula
2238          * would be quite messy, since there is an interdependency between the
2239          * header's mask length and the number of regions.
2240          */
2241         try_run_size = min_run_size;
2242         try_nregs = (unsigned)(((try_run_size - sizeof(arena_run_t)) /
2243             bin->reg_size) + 1); /* Counter-act try_nregs-- in loop. */
2244         do {
2245                 try_nregs--;
2246                 try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
2247                     ((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ? 1 : 0);
2248                 try_reg0_offset = (unsigned)(try_run_size -
2249                     (try_nregs * bin->reg_size));
2250         } while (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1))
2251             > try_reg0_offset);
2252
2253         /* run_size expansion loop. */
2254         do {
2255                 /*
2256                  * Copy valid settings before trying more aggressive settings.
2257                  */
2258                 good_run_size = try_run_size;
2259                 good_nregs = try_nregs;
2260                 good_mask_nelms = try_mask_nelms;
2261                 good_reg0_offset = try_reg0_offset;
2262
2263                 /* Try more aggressive settings. */
2264                 try_run_size += pagesize;
2265                 try_nregs = (unsigned)(((try_run_size - sizeof(arena_run_t)) /
2266                     bin->reg_size) + 1); /* Counter-act try_nregs-- in loop. */
2267                 do {
2268                         try_nregs--;
2269                         try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
2270                             ((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ?
2271                             1 : 0);
2272                         try_reg0_offset = (unsigned)(try_run_size - (try_nregs *
2273                             bin->reg_size));
2274                 } while (sizeof(arena_run_t) + (sizeof(unsigned) *
2275                     (try_mask_nelms - 1)) > try_reg0_offset);
2276         } while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
2277             && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
2278             && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size);
2279
2280         assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1))
2281             <= good_reg0_offset);
2282         assert((good_mask_nelms << (SIZEOF_INT_2POW + 3)) >= good_nregs);
2283
2284         /* Copy final settings. */
2285         bin->run_size = good_run_size;
2286         bin->nregs = good_nregs;
2287         bin->regs_mask_nelms = good_mask_nelms;
2288         bin->reg0_offset = good_reg0_offset;
2289
2290         return (good_run_size);
2291 }
2292
2293 static void *
2294 arena_malloc(arena_t *arena, size_t size)
2295 {
2296         void *ret;
2297
2298         assert(arena != NULL);
2299         assert(arena->magic == ARENA_MAGIC);
2300         assert(size != 0);
2301         assert(QUANTUM_CEILING(size) <= arena_maxclass);
2302
2303         if (size <= bin_maxclass) {
2304                 arena_bin_t *bin;
2305                 arena_run_t *run;
2306
2307                 /* Small allocation. */
2308
2309                 if (size < small_min) {
2310                         /* Tiny. */
2311                         size = pow2_ceil(size);
2312                         bin = &arena->bins[ffs((int)(size >> (TINY_MIN_2POW +
2313                             1)))];
2314 #if (!defined(NDEBUG) || defined(MALLOC_STATS))
2315                         /*
2316                          * Bin calculation is always correct, but we may need
2317                          * to fix size for the purposes of assertions and/or
2318                          * stats accuracy.
2319                          */
2320                         if (size < (1 << TINY_MIN_2POW))
2321                                 size = (1 << TINY_MIN_2POW);
2322 #endif
2323                 } else if (size <= small_max) {
2324                         /* Quantum-spaced. */
2325                         size = QUANTUM_CEILING(size);
2326                         bin = &arena->bins[ntbins + (size >> opt_quantum_2pow)
2327                             - 1];
2328                 } else {
2329                         /* Sub-page. */
2330                         size = pow2_ceil(size);
2331                         bin = &arena->bins[ntbins + nqbins
2332                             + (ffs((int)(size >> opt_small_max_2pow)) - 2)];
2333                 }
2334                 assert(size == bin->reg_size);
2335
2336                 malloc_mutex_lock(&arena->mtx);
2337                 if ((run = bin->runcur) != NULL && run->nfree > 0)
2338                         ret = arena_bin_malloc_easy(arena, bin, run);
2339                 else
2340                         ret = arena_bin_malloc_hard(arena, bin);
2341
2342                 if (ret == NULL) {
2343                         malloc_mutex_unlock(&arena->mtx);
2344                         return (NULL);
2345                 }
2346
2347 #ifdef MALLOC_STATS
2348                 bin->stats.nrequests++;
2349                 arena->stats.nmalloc_small++;
2350                 arena->stats.allocated_small += size;
2351 #endif
2352         } else {
2353                 /* Large allocation. */
2354                 size = PAGE_CEILING(size);
2355                 malloc_mutex_lock(&arena->mtx);
2356                 ret = (void *)arena_run_alloc(arena, size);
2357                 if (ret == NULL) {
2358                         malloc_mutex_unlock(&arena->mtx);
2359                         return (NULL);
2360                 }
2361 #ifdef MALLOC_STATS
2362                 arena->stats.nmalloc_large++;
2363                 arena->stats.allocated_large += size;
2364 #endif
2365         }
2366
2367         malloc_mutex_unlock(&arena->mtx);
2368
2369         if (opt_junk)
2370                 memset(ret, 0xa5, size);
2371         else if (opt_zero)
2372                 memset(ret, 0, size);
2373         return (ret);
2374 }
2375
2376 static inline void
2377 arena_palloc_trim(arena_t *arena, arena_chunk_t *chunk, unsigned pageind,
2378     unsigned npages)
2379 {
2380         unsigned i;
2381
2382         assert(npages > 0);
2383
2384         /*
2385          * Modifiy the map such that arena_run_dalloc() sees the run as
2386          * separately allocated.
2387          */
2388         for (i = 0; i < npages; i++) {
2389                 chunk->map[pageind + i].npages = npages;
2390                 chunk->map[pageind + i].pos = i;
2391         }
2392         arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)chunk + (pageind <<
2393             pagesize_2pow)), npages << pagesize_2pow);
2394 }
2395
2396 /* Only handles large allocations that require more than page alignment. */
2397 static void *
2398 arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
2399 {
2400         void *ret;
2401         size_t offset;
2402         arena_chunk_t *chunk;
2403         unsigned pageind, i, npages;
2404
2405         assert((size & pagesize_mask) == 0);
2406         assert((alignment & pagesize_mask) == 0);
2407
2408         npages = (unsigned)(size >> pagesize_2pow);
2409
2410         malloc_mutex_lock(&arena->mtx);
2411         ret = (void *)arena_run_alloc(arena, alloc_size);
2412         if (ret == NULL) {
2413                 malloc_mutex_unlock(&arena->mtx);
2414                 return (NULL);
2415         }
2416
2417         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
2418
2419         offset = (uintptr_t)ret & (alignment - 1);
2420         assert((offset & pagesize_mask) == 0);
2421         assert(offset < alloc_size);
2422         if (offset == 0) {
2423                 pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk) >>
2424                     pagesize_2pow);
2425
2426                 /* Update the map for the run to be kept. */
2427                 for (i = 0; i < npages; i++) {
2428                         chunk->map[pageind + i].npages = npages;
2429                         assert(chunk->map[pageind + i].pos == i);
2430                 }
2431
2432                 /* Trim trailing space. */
2433                 arena_palloc_trim(arena, chunk, pageind + npages,
2434                     (unsigned)((alloc_size - size) >> pagesize_2pow));
2435         } else {
2436                 size_t leadsize, trailsize;
2437
2438                 leadsize = alignment - offset;
2439                 ret = (void *)((uintptr_t)ret + leadsize);
2440                 pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk) >>
2441                     pagesize_2pow);
2442
2443                 /* Update the map for the run to be kept. */
2444                 for (i = 0; i < npages; i++) {
2445                         chunk->map[pageind + i].npages = npages;
2446                         chunk->map[pageind + i].pos = i;
2447                 }
2448
2449                 /* Trim leading space. */
2450                 arena_palloc_trim(arena, chunk,
2451                     (unsigned)(pageind - (leadsize >> pagesize_2pow)),
2452                     (unsigned)(leadsize >> pagesize_2pow));
2453
2454                 trailsize = alloc_size - leadsize - size;
2455                 if (trailsize != 0) {
2456                         /* Trim trailing space. */
2457                         assert(trailsize < alloc_size);
2458                         arena_palloc_trim(arena, chunk, pageind + npages,
2459                             (unsigned)(trailsize >> pagesize_2pow));
2460                 }
2461         }
2462
2463 #ifdef MALLOC_STATS
2464         arena->stats.nmalloc_large++;
2465         arena->stats.allocated_large += size;
2466 #endif
2467         malloc_mutex_unlock(&arena->mtx);
2468
2469         if (opt_junk)
2470                 memset(ret, 0xa5, size);
2471         else if (opt_zero)
2472                 memset(ret, 0, size);
2473         return (ret);
2474 }
2475
2476 /* Return the size of the allocation pointed to by ptr. */
2477 static size_t
2478 arena_salloc(const void *ptr)
2479 {
2480         size_t ret;
2481         arena_chunk_t *chunk;
2482         arena_chunk_map_t *mapelm;
2483         unsigned pageind;
2484
2485         assert(ptr != NULL);
2486         assert(CHUNK_ADDR2BASE(ptr) != ptr);
2487
2488         /*
2489          * No arena data structures that we query here can change in a way that
2490          * affects this function, so we don't need to lock.
2491          */
2492         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2493         pageind = (unsigned)(((uintptr_t)ptr - (uintptr_t)chunk) >>
2494             pagesize_2pow);
2495         mapelm = &chunk->map[pageind];
2496         if (mapelm->pos != 0 || ptr != (char *)((uintptr_t)chunk) + (pageind <<
2497             pagesize_2pow)) {
2498                 arena_run_t *run;
2499
2500                 pageind -= mapelm->pos;
2501
2502                 run = (arena_run_t *)((uintptr_t)chunk + (pageind <<
2503                     pagesize_2pow));
2504                 assert(run->magic == ARENA_RUN_MAGIC);
2505                 ret = run->bin->reg_size;
2506         } else
2507                 ret = mapelm->npages << pagesize_2pow;
2508
2509         return (ret);
2510 }
2511
2512 static void *
2513 arena_ralloc(void *ptr, size_t size, size_t oldsize)
2514 {
2515         void *ret;
2516
2517         /* Avoid moving the allocation if the size class would not change. */
2518         if (size < small_min) {
2519                 if (oldsize < small_min &&
2520                     ffs((int)(pow2_ceil(size) >> (TINY_MIN_2POW + 1)))
2521                     == ffs((int)(pow2_ceil(oldsize) >> (TINY_MIN_2POW + 1))))
2522                         goto IN_PLACE;
2523         } else if (size <= small_max) {
2524                 if (oldsize >= small_min && oldsize <= small_max &&
2525                     (QUANTUM_CEILING(size) >> opt_quantum_2pow)
2526                     == (QUANTUM_CEILING(oldsize) >> opt_quantum_2pow))
2527                         goto IN_PLACE;
2528         } else {
2529                 /*
2530                  * We make no attempt to resize runs here, though it would be
2531                  * possible to do so.
2532                  */
2533                 if (oldsize > small_max && PAGE_CEILING(size) == oldsize)
2534                         goto IN_PLACE;
2535         }
2536
2537         /*
2538          * If we get here, then size and oldsize are different enough that we
2539          * need to use a different size class.  In that case, fall back to
2540          * allocating new space and copying.
2541          */
2542         ret = arena_malloc(choose_arena(), size);
2543         if (ret == NULL)
2544                 return (NULL);
2545
2546         /* Junk/zero-filling were already done by arena_malloc(). */
2547         if (size < oldsize)
2548                 memcpy(ret, ptr, size);
2549         else
2550                 memcpy(ret, ptr, oldsize);
2551         idalloc(ptr);
2552         return (ret);
2553 IN_PLACE:
2554         if (opt_junk && size < oldsize)
2555                 memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size);
2556         else if (opt_zero && size > oldsize)
2557                 memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize);
2558         return (ptr);
2559 }
2560
2561 static void
2562 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
2563 {
2564         unsigned pageind;
2565         arena_chunk_map_t *mapelm;
2566         size_t size;
2567
2568         assert(arena != NULL);
2569         assert(arena->magic == ARENA_MAGIC);
2570         assert(chunk->arena == arena);
2571         assert(ptr != NULL);
2572         assert(CHUNK_ADDR2BASE(ptr) != ptr);
2573
2574         pageind = (unsigned)(((uintptr_t)ptr - (uintptr_t)chunk) >>
2575             pagesize_2pow);
2576         mapelm = &chunk->map[pageind];
2577         if (mapelm->pos != 0 || ptr != (char *)((uintptr_t)chunk) + (pageind <<
2578             pagesize_2pow)) {
2579                 arena_run_t *run;
2580                 arena_bin_t *bin;
2581
2582                 /* Small allocation. */
2583
2584                 pageind -= mapelm->pos;
2585
2586                 run = (arena_run_t *)((uintptr_t)chunk + (pageind <<
2587                     pagesize_2pow));
2588                 assert(run->magic == ARENA_RUN_MAGIC);
2589                 bin = run->bin;
2590                 size = bin->reg_size;
2591
2592                 if (opt_junk)
2593                         memset(ptr, 0x5a, size);
2594
2595                 malloc_mutex_lock(&arena->mtx);
2596                 arena_run_reg_dalloc(run, bin, ptr, size);
2597                 run->nfree++;
2598
2599                 if (run->nfree == bin->nregs) {
2600                         /* Deallocate run. */
2601                         if (run == bin->runcur)
2602                                 bin->runcur = NULL;
2603                         else if (bin->nregs != 1) {
2604                                 /*
2605                                  * This block's conditional is necessary because
2606                                  * if the run only contains one region, then it
2607                                  * never gets inserted into the non-full runs
2608                                  * tree.
2609                                  */
2610                                 /* LINTED */
2611                                 RB_REMOVE(arena_run_tree_s, &bin->runs, run);
2612                         }
2613 #ifdef MALLOC_DEBUG
2614                         run->magic = 0;
2615 #endif
2616                         arena_run_dalloc(arena, run, bin->run_size);
2617 #ifdef MALLOC_STATS
2618                         bin->stats.curruns--;
2619 #endif
2620                 } else if (run->nfree == 1 && run != bin->runcur) {
2621                         /*
2622                          * Make sure that bin->runcur always refers to the
2623                          * lowest non-full run, if one exists.
2624                          */
2625                         if (bin->runcur == NULL)
2626                                 bin->runcur = run;
2627                         else if ((uintptr_t)run < (uintptr_t)bin->runcur) {
2628                                 /* Switch runcur. */
2629                                 if (bin->runcur->nfree > 0) {
2630                                         /* Insert runcur. */
2631                                         /* LINTED */
2632                                         RB_INSERT(arena_run_tree_s, &bin->runs,
2633                                             bin->runcur);
2634                                 }
2635                                 bin->runcur = run;
2636                         } else {
2637                                 /* LINTED */
2638                                 RB_INSERT(arena_run_tree_s, &bin->runs, run);
2639                         }
2640                 }
2641 #ifdef MALLOC_STATS
2642                 arena->stats.allocated_small -= size;
2643                 arena->stats.ndalloc_small++;
2644 #endif
2645         } else {
2646                 /* Large allocation. */
2647
2648                 size = mapelm->npages << pagesize_2pow;
2649                 assert((((uintptr_t)ptr) & pagesize_mask) == 0);
2650
2651                 if (opt_junk)
2652                         memset(ptr, 0x5a, size);
2653
2654                 malloc_mutex_lock(&arena->mtx);
2655                 arena_run_dalloc(arena, (arena_run_t *)ptr, size);
2656 #ifdef MALLOC_STATS
2657                 arena->stats.allocated_large -= size;
2658                 arena->stats.ndalloc_large++;
2659 #endif
2660         }
2661
2662         malloc_mutex_unlock(&arena->mtx);
2663 }
2664
2665 static bool
2666 arena_new(arena_t *arena)
2667 {
2668         unsigned i;
2669         arena_bin_t *bin;
2670         size_t prev_run_size;
2671
2672         malloc_mutex_init(&arena->mtx);
2673
2674 #ifdef MALLOC_STATS
2675         memset(&arena->stats, 0, sizeof(arena_stats_t));
2676 #endif
2677
2678         /* Initialize chunks. */
2679         RB_INIT(&arena->chunks);
2680         arena->spare = NULL;
2681
2682         /* Initialize bins. */
2683         prev_run_size = pagesize;
2684
2685         /* (2^n)-spaced tiny bins. */
2686         for (i = 0; i < ntbins; i++) {
2687                 bin = &arena->bins[i];
2688                 bin->runcur = NULL;
2689                 RB_INIT(&bin->runs);
2690
2691                 bin->reg_size = (1 << (TINY_MIN_2POW + i));
2692                 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2693
2694 #ifdef MALLOC_STATS
2695                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2696 #endif
2697         }
2698
2699         /* Quantum-spaced bins. */
2700         for (; i < ntbins + nqbins; i++) {
2701                 bin = &arena->bins[i];
2702                 bin->runcur = NULL;
2703                 RB_INIT(&bin->runs);
2704
2705                 bin->reg_size = quantum * (i - ntbins + 1);
2706 /*
2707                 pow2_size = pow2_ceil(quantum * (i - ntbins + 1));
2708 */
2709                 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2710
2711 #ifdef MALLOC_STATS
2712                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2713 #endif
2714         }
2715
2716         /* (2^n)-spaced sub-page bins. */
2717         for (; i < ntbins + nqbins + nsbins; i++) {
2718                 bin = &arena->bins[i];
2719                 bin->runcur = NULL;
2720                 RB_INIT(&bin->runs);
2721
2722                 bin->reg_size = (small_max << (i - (ntbins + nqbins) + 1));
2723
2724                 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2725
2726 #ifdef MALLOC_STATS
2727                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2728 #endif
2729         }
2730
2731 #ifdef MALLOC_DEBUG
2732         arena->magic = ARENA_MAGIC;
2733 #endif
2734
2735         return (false);
2736 }
2737
2738 /* Create a new arena and insert it into the arenas array at index ind. */
2739 static arena_t *
2740 arenas_extend(unsigned ind)
2741 {
2742         arena_t *ret;
2743
2744         /* Allocate enough space for trailing bins. */
2745         ret = (arena_t *)base_alloc(sizeof(arena_t)
2746             + (sizeof(arena_bin_t) * (ntbins + nqbins + nsbins - 1)));
2747         if (ret != NULL && arena_new(ret) == false) {
2748                 arenas[ind] = ret;
2749                 return (ret);
2750         }
2751         /* Only reached if there is an OOM error. */
2752
2753         /*
2754          * OOM here is quite inconvenient to propagate, since dealing with it
2755          * would require a check for failure in the fast path.  Instead, punt
2756          * by using arenas[0].  In practice, this is an extremely unlikely
2757          * failure.
2758          */
2759         _malloc_message(getprogname(),
2760             ": (malloc) Error initializing arena\n", "", "");
2761         if (opt_abort)
2762                 abort();
2763
2764         return (arenas[0]);
2765 }
2766
2767 /*
2768  * End arena.
2769  */
2770 /******************************************************************************/
2771 /*
2772  * Begin general internal functions.
2773  */
2774
2775 static void *
2776 huge_malloc(size_t size)
2777 {
2778         void *ret;
2779         size_t csize;
2780         chunk_node_t *node;
2781
2782         /* Allocate one or more contiguous chunks for this request. */
2783
2784         csize = CHUNK_CEILING(size);
2785         if (csize == 0) {
2786                 /* size is large enough to cause size_t wrap-around. */
2787                 return (NULL);
2788         }
2789
2790         /* Allocate a chunk node with which to track the chunk. */
2791         node = base_chunk_node_alloc();
2792         if (node == NULL)
2793                 return (NULL);
2794
2795         ret = chunk_alloc(csize);
2796         if (ret == NULL) {
2797                 base_chunk_node_dealloc(node);
2798                 return (NULL);
2799         }
2800
2801         /* Insert node into huge. */
2802         node->chunk = ret;
2803         node->size = csize;
2804
2805         malloc_mutex_lock(&chunks_mtx);
2806         RB_INSERT(chunk_tree_s, &huge, node);
2807 #ifdef MALLOC_STATS
2808         huge_nmalloc++;
2809         huge_allocated += csize;
2810 #endif
2811         malloc_mutex_unlock(&chunks_mtx);
2812
2813         if (opt_junk)
2814                 memset(ret, 0xa5, csize);
2815         else if (opt_zero)
2816                 memset(ret, 0, csize);
2817
2818         return (ret);
2819 }
2820
2821 /* Only handles large allocations that require more than chunk alignment. */
2822 static void *
2823 huge_palloc(size_t alignment, size_t size)
2824 {
2825         void *ret;
2826         size_t alloc_size, chunk_size, offset;
2827         chunk_node_t *node;
2828
2829         /*
2830          * This allocation requires alignment that is even larger than chunk
2831          * alignment.  This means that huge_malloc() isn't good enough.
2832          *
2833          * Allocate almost twice as many chunks as are demanded by the size or
2834          * alignment, in order to assure the alignment can be achieved, then
2835          * unmap leading and trailing chunks.
2836          */
2837         assert(alignment >= chunksize);
2838
2839         chunk_size = CHUNK_CEILING(size);
2840
2841         if (size >= alignment)
2842                 alloc_size = chunk_size + alignment - chunksize;
2843         else
2844                 alloc_size = (alignment << 1) - chunksize;
2845
2846         /* Allocate a chunk node with which to track the chunk. */
2847         node = base_chunk_node_alloc();
2848         if (node == NULL)
2849                 return (NULL);
2850
2851         ret = chunk_alloc(alloc_size);
2852         if (ret == NULL) {
2853                 base_chunk_node_dealloc(node);
2854                 return (NULL);
2855         }
2856
2857         offset = (uintptr_t)ret & (alignment - 1);
2858         assert((offset & chunksize_mask) == 0);
2859         assert(offset < alloc_size);
2860         if (offset == 0) {
2861                 /* Trim trailing space. */
2862                 chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
2863                     - chunk_size);
2864         } else {
2865                 size_t trailsize;
2866
2867                 /* Trim leading space. */
2868                 chunk_dealloc(ret, alignment - offset);
2869
2870                 ret = (void *)((uintptr_t)ret + (alignment - offset));
2871
2872                 trailsize = alloc_size - (alignment - offset) - chunk_size;
2873                 if (trailsize != 0) {
2874                     /* Trim trailing space. */
2875                     assert(trailsize < alloc_size);
2876                     chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
2877                         trailsize);
2878                 }
2879         }
2880
2881         /* Insert node into huge. */
2882         node->chunk = ret;
2883         node->size = chunk_size;
2884
2885         malloc_mutex_lock(&chunks_mtx);
2886         RB_INSERT(chunk_tree_s, &huge, node);
2887 #ifdef MALLOC_STATS
2888         huge_nmalloc++;
2889         huge_allocated += chunk_size;
2890 #endif
2891         malloc_mutex_unlock(&chunks_mtx);
2892
2893         if (opt_junk)
2894                 memset(ret, 0xa5, chunk_size);
2895         else if (opt_zero)
2896                 memset(ret, 0, chunk_size);
2897
2898         return (ret);
2899 }
2900
2901 static void *
2902 huge_ralloc(void *ptr, size_t size, size_t oldsize)
2903 {
2904         void *ret;
2905
2906         /* Avoid moving the allocation if the size class would not change. */
2907         if (oldsize > arena_maxclass &&
2908             CHUNK_CEILING(size) == CHUNK_CEILING(oldsize)) {
2909                 if (opt_junk && size < oldsize) {
2910                         memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize
2911                             - size);
2912                 } else if (opt_zero && size > oldsize) {
2913                         memset((void *)((uintptr_t)ptr + oldsize), 0, size
2914                             - oldsize);
2915                 }
2916                 return (ptr);
2917         }
2918
2919         if (CHUNK_ADDR2BASE(ptr) == ptr
2920 #ifdef USE_BRK
2921             && ((uintptr_t)ptr < (uintptr_t)brk_base
2922             || (uintptr_t)ptr >= (uintptr_t)brk_max)
2923 #endif
2924             ) {
2925                 chunk_node_t *node, key;
2926                 void *newptr;
2927                 size_t oldcsize;
2928                 size_t newcsize;
2929
2930                 newcsize = CHUNK_CEILING(size);
2931                 oldcsize = CHUNK_CEILING(oldsize);
2932                 assert(oldcsize != newcsize);
2933                 if (newcsize == 0) {
2934                         /* size_t wrap-around */
2935                         return (NULL);
2936                 }
2937
2938                 /*
2939                  * Remove the old region from the tree now.  If mremap()
2940                  * returns the region to the system, other thread may
2941                  * map it for same huge allocation and insert it to the
2942                  * tree before we acquire the mutex lock again.
2943                  */
2944                 malloc_mutex_lock(&chunks_mtx);
2945                 key.chunk = __DECONST(void *, ptr);
2946                 /* LINTED */
2947                 node = RB_FIND(chunk_tree_s, &huge, &key);
2948                 assert(node != NULL);
2949                 assert(node->chunk == ptr);
2950                 assert(node->size == oldcsize);
2951                 RB_REMOVE(chunk_tree_s, &huge, node);
2952                 malloc_mutex_unlock(&chunks_mtx);
2953
2954                 newptr = mremap(ptr, oldcsize, NULL, newcsize,
2955                     MAP_ALIGNED(chunksize_2pow));
2956                 if (newptr == MAP_FAILED) {
2957                         /* We still own the old region. */
2958                         malloc_mutex_lock(&chunks_mtx);
2959                         RB_INSERT(chunk_tree_s, &huge, node);
2960                         malloc_mutex_unlock(&chunks_mtx);
2961                 } else {
2962                         assert(CHUNK_ADDR2BASE(newptr) == newptr);
2963
2964                         /* Insert new or resized old region. */
2965                         malloc_mutex_lock(&chunks_mtx);
2966                         node->size = newcsize;
2967                         node->chunk = newptr;
2968                         RB_INSERT(chunk_tree_s, &huge, node);
2969 #ifdef MALLOC_STATS
2970                         huge_nralloc++;
2971                         huge_allocated += newcsize - oldcsize;
2972                         if (newcsize > oldcsize) {
2973                                 stats_chunks.curchunks +=
2974                                     (newcsize - oldcsize) / chunksize;
2975                                 if (stats_chunks.curchunks >
2976                                     stats_chunks.highchunks)
2977                                         stats_chunks.highchunks =
2978                                             stats_chunks.curchunks;
2979                         } else {
2980                                 stats_chunks.curchunks -=
2981                                     (oldcsize - newcsize) / chunksize;
2982                         }
2983 #endif
2984                         malloc_mutex_unlock(&chunks_mtx);
2985
2986                         if (opt_junk && size < oldsize) {
2987                                 memset((void *)((uintptr_t)newptr + size), 0x5a,
2988                                     newcsize - size);
2989                         } else if (opt_zero && size > oldsize) {
2990                                 memset((void *)((uintptr_t)newptr + oldsize), 0,
2991                                     size - oldsize);
2992                         }
2993                         return (newptr);
2994                 }
2995         }
2996
2997         /*
2998          * If we get here, then size and oldsize are different enough that we
2999          * need to use a different size class.  In that case, fall back to
3000          * allocating new space and copying.
3001          */
3002         ret = huge_malloc(size);
3003         if (ret == NULL)
3004                 return (NULL);
3005
3006         if (CHUNK_ADDR2BASE(ptr) == ptr) {
3007                 /* The old allocation is a chunk. */
3008                 if (size < oldsize)
3009                         memcpy(ret, ptr, size);
3010                 else
3011                         memcpy(ret, ptr, oldsize);
3012         } else {
3013                 /* The old allocation is a region. */
3014                 assert(oldsize < size);
3015                 memcpy(ret, ptr, oldsize);
3016         }
3017         idalloc(ptr);
3018         return (ret);
3019 }
3020
3021 static void
3022 huge_dalloc(void *ptr)
3023 {
3024         chunk_node_t key;
3025         chunk_node_t *node;
3026
3027         malloc_mutex_lock(&chunks_mtx);
3028
3029         /* Extract from tree of huge allocations. */
3030         key.chunk = ptr;
3031         /* LINTED */
3032         node = RB_FIND(chunk_tree_s, &huge, &key);
3033         assert(node != NULL);
3034         assert(node->chunk == ptr);
3035         /* LINTED */
3036         RB_REMOVE(chunk_tree_s, &huge, node);
3037
3038 #ifdef MALLOC_STATS
3039         huge_ndalloc++;
3040         huge_allocated -= node->size;
3041 #endif
3042
3043         malloc_mutex_unlock(&chunks_mtx);
3044
3045         /* Unmap chunk. */
3046 #ifdef USE_BRK
3047         if (opt_junk)
3048                 memset(node->chunk, 0x5a, node->size);
3049 #endif
3050         chunk_dealloc(node->chunk, node->size);
3051
3052         base_chunk_node_dealloc(node);
3053 }
3054
3055 static void *
3056 imalloc(size_t size)
3057 {
3058         void *ret;
3059
3060         assert(size != 0);
3061
3062         if (size <= arena_maxclass)
3063                 ret = arena_malloc(choose_arena(), size);
3064         else
3065                 ret = huge_malloc(size);
3066
3067         return (ret);
3068 }
3069
3070 static void *
3071 ipalloc(size_t alignment, size_t size)
3072 {
3073         void *ret;
3074         size_t ceil_size;
3075
3076         /*
3077          * Round size up to the nearest multiple of alignment.
3078          *
3079          * This done, we can take advantage of the fact that for each small
3080          * size class, every object is aligned at the smallest power of two
3081          * that is non-zero in the base two representation of the size.  For
3082          * example:
3083          *
3084          *   Size |   Base 2 | Minimum alignment
3085          *   -----+----------+------------------
3086          *     96 |  1100000 |  32
3087          *    144 | 10100000 |  32
3088          *    192 | 11000000 |  64
3089          *
3090          * Depending on runtime settings, it is possible that arena_malloc()
3091          * will further round up to a power of two, but that never causes
3092          * correctness issues.
3093          */
3094         ceil_size = (size + (alignment - 1)) & (-alignment);
3095         /*
3096          * (ceil_size < size) protects against the combination of maximal
3097          * alignment and size greater than maximal alignment.
3098          */
3099         if (ceil_size < size) {
3100                 /* size_t overflow. */
3101                 return (NULL);
3102         }
3103
3104         if (ceil_size <= pagesize || (alignment <= pagesize
3105             && ceil_size <= arena_maxclass))
3106                 ret = arena_malloc(choose_arena(), ceil_size);
3107         else {
3108                 size_t run_size;
3109
3110                 /*
3111                  * We can't achieve sub-page alignment, so round up alignment
3112                  * permanently; it makes later calculations simpler.
3113                  */
3114                 alignment = PAGE_CEILING(alignment);
3115                 ceil_size = PAGE_CEILING(size);
3116                 /*
3117                  * (ceil_size < size) protects against very large sizes within
3118                  * pagesize of SIZE_T_MAX.
3119                  *
3120                  * (ceil_size + alignment < ceil_size) protects against the
3121                  * combination of maximal alignment and ceil_size large enough
3122                  * to cause overflow.  This is similar to the first overflow
3123                  * check above, but it needs to be repeated due to the new
3124                  * ceil_size value, which may now be *equal* to maximal
3125                  * alignment, whereas before we only detected overflow if the
3126                  * original size was *greater* than maximal alignment.
3127                  */
3128                 if (ceil_size < size || ceil_size + alignment < ceil_size) {
3129                         /* size_t overflow. */
3130                         return (NULL);
3131                 }
3132
3133                 /*
3134                  * Calculate the size of the over-size run that arena_palloc()
3135                  * would need to allocate in order to guarantee the alignment.
3136                  */
3137                 if (ceil_size >= alignment)
3138                         run_size = ceil_size + alignment - pagesize;
3139                 else {
3140                         /*
3141                          * It is possible that (alignment << 1) will cause
3142                          * overflow, but it doesn't matter because we also
3143                          * subtract pagesize, which in the case of overflow
3144                          * leaves us with a very large run_size.  That causes
3145                          * the first conditional below to fail, which means
3146                          * that the bogus run_size value never gets used for
3147                          * anything important.
3148                          */
3149                         run_size = (alignment << 1) - pagesize;
3150                 }
3151
3152                 if (run_size <= arena_maxclass) {
3153                         ret = arena_palloc(choose_arena(), alignment, ceil_size,
3154                             run_size);
3155                 } else if (alignment <= chunksize)
3156                         ret = huge_malloc(ceil_size);
3157                 else
3158                         ret = huge_palloc(alignment, ceil_size);
3159         }
3160
3161         assert(((uintptr_t)ret & (alignment - 1)) == 0);
3162         return (ret);
3163 }
3164
3165 static void *
3166 icalloc(size_t size)
3167 {
3168         void *ret;
3169
3170         if (size <= arena_maxclass) {
3171                 ret = arena_malloc(choose_arena(), size);
3172                 if (ret == NULL)
3173                         return (NULL);
3174                 memset(ret, 0, size);
3175         } else {
3176                 /*
3177                  * The virtual memory system provides zero-filled pages, so
3178                  * there is no need to do so manually, unless opt_junk is
3179                  * enabled, in which case huge_malloc() fills huge allocations
3180                  * with junk.
3181                  */
3182                 ret = huge_malloc(size);
3183                 if (ret == NULL)
3184                         return (NULL);
3185
3186                 if (opt_junk)
3187                         memset(ret, 0, size);
3188 #ifdef USE_BRK
3189                 else if ((uintptr_t)ret >= (uintptr_t)brk_base
3190                     && (uintptr_t)ret < (uintptr_t)brk_max) {
3191                         /*
3192                          * This may be a re-used brk chunk.  Therefore, zero
3193                          * the memory.
3194                          */
3195                         memset(ret, 0, size);
3196                 }
3197 #endif
3198         }
3199
3200         return (ret);
3201 }
3202
3203 static size_t
3204 isalloc(const void *ptr)
3205 {
3206         size_t ret;
3207         arena_chunk_t *chunk;
3208
3209         assert(ptr != NULL);
3210
3211         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3212         if (chunk != ptr) {
3213                 /* Region. */
3214                 assert(chunk->arena->magic == ARENA_MAGIC);
3215
3216                 ret = arena_salloc(ptr);
3217         } else {
3218                 chunk_node_t *node, key;
3219
3220                 /* Chunk (huge allocation). */
3221
3222                 malloc_mutex_lock(&chunks_mtx);
3223
3224                 /* Extract from tree of huge allocations. */
3225                 key.chunk = __DECONST(void *, ptr);
3226                 /* LINTED */
3227                 node = RB_FIND(chunk_tree_s, &huge, &key);
3228                 assert(node != NULL);
3229
3230                 ret = node->size;
3231
3232                 malloc_mutex_unlock(&chunks_mtx);
3233         }
3234
3235         return (ret);
3236 }
3237
3238 static void *
3239 iralloc(void *ptr, size_t size)
3240 {
3241         void *ret;
3242         size_t oldsize;
3243
3244         assert(ptr != NULL);
3245         assert(size != 0);
3246
3247         oldsize = isalloc(ptr);
3248
3249         if (size <= arena_maxclass)
3250                 ret = arena_ralloc(ptr, size, oldsize);
3251         else
3252                 ret = huge_ralloc(ptr, size, oldsize);
3253
3254         return (ret);
3255 }
3256
3257 static void
3258 idalloc(void *ptr)
3259 {
3260         arena_chunk_t *chunk;
3261
3262         assert(ptr != NULL);
3263
3264         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3265         if (chunk != ptr) {
3266                 /* Region. */
3267                 arena_dalloc(chunk->arena, chunk, ptr);
3268         } else
3269                 huge_dalloc(ptr);
3270 }
3271
3272 static void
3273 malloc_print_stats(void)
3274 {
3275
3276         if (opt_print_stats) {
3277                 char s[UMAX2S_BUFSIZE];
3278                 _malloc_message("___ Begin malloc statistics ___\n", "", "",
3279                     "");
3280                 _malloc_message("Assertions ",
3281 #ifdef NDEBUG
3282                     "disabled",
3283 #else
3284                     "enabled",
3285 #endif
3286                     "\n", "");
3287                 _malloc_message("Boolean MALLOC_OPTIONS: ",
3288                     opt_abort ? "A" : "a",
3289                     opt_junk ? "J" : "j",
3290                     opt_hint ? "H" : "h");
3291                 _malloc_message(opt_utrace ? "PU" : "Pu",
3292                     opt_sysv ? "V" : "v",
3293                     opt_xmalloc ? "X" : "x",
3294                     opt_zero ? "Z\n" : "z\n");
3295
3296                 _malloc_message("CPUs: ", size_t2s(ncpus, s), "\n", "");
3297                 _malloc_message("Max arenas: ", size_t2s(narenas, s), "\n", "");
3298                 _malloc_message("Pointer size: ", size_t2s(sizeof(void *), s),
3299                     "\n", "");
3300                 _malloc_message("Quantum size: ", size_t2s(quantum, s), "\n", "");
3301                 _malloc_message("Max small size: ", size_t2s(small_max, s), "\n",
3302                     "");
3303
3304                 _malloc_message("Chunk size: ", size_t2s(chunksize, s), "", "");
3305                 _malloc_message(" (2^", size_t2s((size_t)opt_chunk_2pow, s),
3306                     ")\n", "");
3307
3308 #ifdef MALLOC_STATS
3309                 {
3310                         size_t allocated, mapped;
3311                         unsigned i;
3312                         arena_t *arena;
3313
3314                         /* Calculate and print allocated/mapped stats. */
3315
3316                         /* arenas. */
3317                         for (i = 0, allocated = 0; i < narenas; i++) {
3318                                 if (arenas[i] != NULL) {
3319                                         malloc_mutex_lock(&arenas[i]->mtx);
3320                                         allocated +=
3321                                             arenas[i]->stats.allocated_small;
3322                                         allocated +=
3323                                             arenas[i]->stats.allocated_large;
3324                                         malloc_mutex_unlock(&arenas[i]->mtx);
3325                                 }
3326                         }
3327
3328                         /* huge/base. */
3329                         malloc_mutex_lock(&chunks_mtx);
3330                         allocated += huge_allocated;
3331                         mapped = stats_chunks.curchunks * chunksize;
3332                         malloc_mutex_unlock(&chunks_mtx);
3333
3334                         malloc_mutex_lock(&base_mtx);
3335                         mapped += base_mapped;
3336                         malloc_mutex_unlock(&base_mtx);
3337
3338                         malloc_printf("Allocated: %zu, mapped: %zu\n",
3339                             allocated, mapped);
3340
3341                         /* Print chunk stats. */
3342                         {
3343                                 chunk_stats_t chunks_stats;
3344
3345                                 malloc_mutex_lock(&chunks_mtx);
3346                                 chunks_stats = stats_chunks;
3347                                 malloc_mutex_unlock(&chunks_mtx);
3348
3349                                 malloc_printf("chunks: nchunks   "
3350                                     "highchunks    curchunks\n");
3351                                 malloc_printf("  %13llu%13lu%13lu\n",
3352                                     chunks_stats.nchunks,
3353                                     chunks_stats.highchunks,
3354                                     chunks_stats.curchunks);
3355                         }
3356
3357                         /* Print chunk stats. */
3358                         malloc_printf(
3359                             "huge: nmalloc      ndalloc      "
3360                             "nralloc    allocated\n");
3361                         malloc_printf(" %12llu %12llu %12llu %12zu\n",
3362                             huge_nmalloc, huge_ndalloc, huge_nralloc,
3363                             huge_allocated);
3364
3365                         /* Print stats for each arena. */
3366                         for (i = 0; i < narenas; i++) {
3367                                 arena = arenas[i];
3368                                 if (arena != NULL) {
3369                                         malloc_printf(
3370                                             "\narenas[%u] @ %p\n", i, arena);
3371                                         malloc_mutex_lock(&arena->mtx);
3372                                         stats_print(arena);
3373                                         malloc_mutex_unlock(&arena->mtx);
3374                                 }
3375                         }
3376                 }
3377 #endif /* #ifdef MALLOC_STATS */
3378                 _malloc_message("--- End malloc statistics ---\n", "", "", "");
3379         }
3380 }
3381
3382 /*
3383  * FreeBSD's pthreads implementation calls malloc(3), so the malloc
3384  * implementation has to take pains to avoid infinite recursion during
3385  * initialization.
3386  */
3387 static inline bool
3388 malloc_init(void)
3389 {
3390
3391         if (malloc_initialized == false)
3392                 return (malloc_init_hard());
3393
3394         return (false);
3395 }
3396
3397 static bool
3398 malloc_init_hard(void)
3399 {
3400         unsigned i, j;
3401         ssize_t linklen;
3402         char buf[PATH_MAX + 1];
3403         const char *opts = "";
3404         int serrno;
3405
3406         malloc_mutex_lock(&init_lock);
3407         if (malloc_initialized) {
3408                 /*
3409                  * Another thread initialized the allocator before this one
3410                  * acquired init_lock.
3411                  */
3412                 malloc_mutex_unlock(&init_lock);
3413                 return (false);
3414         }
3415
3416         serrno = errno;
3417         /* Get number of CPUs. */
3418         {
3419                 int mib[2];
3420                 size_t len;
3421
3422                 mib[0] = CTL_HW;
3423                 mib[1] = HW_NCPU;
3424                 len = sizeof(ncpus);
3425                 if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) == -1) {
3426                         /* Error. */
3427                         ncpus = 1;
3428                 }
3429         }
3430
3431         /* Get page size. */
3432         {
3433                 long result;
3434
3435                 result = sysconf(_SC_PAGESIZE);
3436                 assert(result != -1);
3437                 pagesize = (unsigned) result;
3438
3439                 /*
3440                  * We assume that pagesize is a power of 2 when calculating
3441                  * pagesize_mask and pagesize_2pow.
3442                  */
3443                 assert(((result - 1) & result) == 0);
3444                 pagesize_mask = result - 1;
3445                 pagesize_2pow = ffs((int)result) - 1;
3446         }
3447
3448         for (i = 0; i < 3; i++) {
3449                 /* Get runtime configuration. */
3450                 switch (i) {
3451                 case 0:
3452                         if ((linklen = readlink("/etc/malloc.conf", buf,
3453                                                 sizeof(buf) - 1)) != -1) {
3454                                 /*
3455                                  * Use the contents of the "/etc/malloc.conf"
3456                                  * symbolic link's name.
3457                                  */
3458                                 buf[linklen] = '\0';
3459                                 opts = buf;
3460                         } else {
3461                                 /* No configuration specified. */
3462                                 buf[0] = '\0';
3463                                 opts = buf;
3464                         }
3465                         break;
3466                 case 1:
3467                         if ((opts = getenv("MALLOC_OPTIONS")) != NULL &&
3468                             issetugid() == 0) {
3469                                 /*
3470                                  * Do nothing; opts is already initialized to
3471                                  * the value of the MALLOC_OPTIONS environment
3472                                  * variable.
3473                                  */
3474                         } else {
3475                                 /* No configuration specified. */
3476                                 buf[0] = '\0';
3477                                 opts = buf;
3478                         }
3479                         break;
3480                 case 2:
3481                         if (_malloc_options != NULL) {
3482                             /*
3483                              * Use options that were compiled into the program.
3484                              */
3485                             opts = _malloc_options;
3486                         } else {
3487                                 /* No configuration specified. */
3488                                 buf[0] = '\0';
3489                                 opts = buf;
3490                         }
3491                         break;
3492                 default:
3493                         /* NOTREACHED */
3494                         /* LINTED */
3495                         assert(false);
3496                 }
3497
3498                 for (j = 0; opts[j] != '\0'; j++) {
3499                         switch (opts[j]) {
3500                         case 'a':
3501                                 opt_abort = false;
3502                                 break;
3503                         case 'A':
3504                                 opt_abort = true;
3505                                 break;
3506                         case 'h':
3507                                 opt_hint = false;
3508                                 break;
3509                         case 'H':
3510                                 opt_hint = true;
3511                                 break;
3512                         case 'j':
3513                                 opt_junk = false;
3514                                 break;
3515                         case 'J':
3516                                 opt_junk = true;
3517                                 break;
3518                         case 'k':
3519                                 /*
3520                                  * Chunks always require at least one header
3521                                  * page, so chunks can never be smaller than
3522                                  * two pages.
3523                                  */
3524                                 if (opt_chunk_2pow > pagesize_2pow + 1)
3525                                         opt_chunk_2pow--;
3526                                 break;
3527                         case 'K':
3528                                 if (opt_chunk_2pow + 1 <
3529                                     (int)(sizeof(size_t) << 3))
3530                                         opt_chunk_2pow++;
3531                                 break;
3532                         case 'n':
3533                                 opt_narenas_lshift--;
3534                                 break;
3535                         case 'N':
3536                                 opt_narenas_lshift++;
3537                                 break;
3538                         case 'p':
3539                                 opt_print_stats = false;
3540                                 break;
3541                         case 'P':
3542                                 opt_print_stats = true;
3543                                 break;
3544                         case 'q':
3545                                 if (opt_quantum_2pow > QUANTUM_2POW_MIN)
3546                                         opt_quantum_2pow--;
3547                                 break;
3548                         case 'Q':
3549                                 if (opt_quantum_2pow < pagesize_2pow - 1)
3550                                         opt_quantum_2pow++;
3551                                 break;
3552                         case 's':
3553                                 if (opt_small_max_2pow > QUANTUM_2POW_MIN)
3554                                         opt_small_max_2pow--;
3555                                 break;
3556                         case 'S':
3557                                 if (opt_small_max_2pow < pagesize_2pow - 1)
3558                                         opt_small_max_2pow++;
3559                                 break;
3560                         case 'u':
3561                                 opt_utrace = false;
3562                                 break;
3563                         case 'U':
3564                                 opt_utrace = true;
3565                                 break;
3566                         case 'v':
3567                                 opt_sysv = false;
3568                                 break;
3569                         case 'V':
3570                                 opt_sysv = true;
3571                                 break;
3572                         case 'x':
3573                                 opt_xmalloc = false;
3574                                 break;
3575                         case 'X':
3576                                 opt_xmalloc = true;
3577                                 break;
3578                         case 'z':
3579                                 opt_zero = false;
3580                                 break;
3581                         case 'Z':
3582                                 opt_zero = true;
3583                                 break;
3584                         default: {
3585                                 char cbuf[2];
3586
3587                                 cbuf[0] = opts[j];
3588                                 cbuf[1] = '\0';
3589                                 _malloc_message(getprogname(),
3590                                     ": (malloc) Unsupported character in "
3591                                     "malloc options: '", cbuf, "'\n");
3592                         }
3593                         }
3594                 }
3595         }
3596         errno = serrno;
3597
3598         /* Take care to call atexit() only once. */
3599         if (opt_print_stats) {
3600                 /* Print statistics at exit. */
3601                 atexit(malloc_print_stats);
3602         }
3603
3604         /* Set variables according to the value of opt_small_max_2pow. */
3605         if (opt_small_max_2pow < opt_quantum_2pow)
3606                 opt_small_max_2pow = opt_quantum_2pow;
3607         small_max = (1 << opt_small_max_2pow);
3608
3609         /* Set bin-related variables. */
3610         bin_maxclass = (pagesize >> 1);
3611         assert(opt_quantum_2pow >= TINY_MIN_2POW);
3612         ntbins = (unsigned)(opt_quantum_2pow - TINY_MIN_2POW);
3613         assert(ntbins <= opt_quantum_2pow);
3614         nqbins = (unsigned)(small_max >> opt_quantum_2pow);
3615         nsbins = (unsigned)(pagesize_2pow - opt_small_max_2pow - 1);
3616
3617         /* Set variables according to the value of opt_quantum_2pow. */
3618         quantum = (1 << opt_quantum_2pow);
3619         quantum_mask = quantum - 1;
3620         if (ntbins > 0)
3621                 small_min = (quantum >> 1) + 1;
3622         else
3623                 small_min = 1;
3624         assert(small_min <= quantum);
3625
3626         /* Set variables according to the value of opt_chunk_2pow. */
3627         chunksize = (1LU << opt_chunk_2pow);
3628         chunksize_mask = chunksize - 1;
3629         chunksize_2pow = (unsigned)opt_chunk_2pow;
3630         chunk_npages = (unsigned)(chunksize >> pagesize_2pow);
3631         {
3632                 unsigned header_size;
3633
3634                 header_size = (unsigned)(sizeof(arena_chunk_t) +
3635                     (sizeof(arena_chunk_map_t) * (chunk_npages - 1)));
3636                 arena_chunk_header_npages = (header_size >> pagesize_2pow);
3637                 if ((header_size & pagesize_mask) != 0)
3638                         arena_chunk_header_npages++;
3639         }
3640         arena_maxclass = chunksize - (arena_chunk_header_npages <<
3641             pagesize_2pow);
3642
3643         UTRACE(0, 0, 0);
3644
3645 #ifdef MALLOC_STATS
3646         memset(&stats_chunks, 0, sizeof(chunk_stats_t));
3647 #endif
3648
3649         /* Various sanity checks that regard configuration. */
3650         assert(quantum >= sizeof(void *));
3651         assert(quantum <= pagesize);
3652         assert(chunksize >= pagesize);
3653         assert(quantum * 4 <= chunksize);
3654
3655         /* Initialize chunks data. */
3656         malloc_mutex_init(&chunks_mtx);
3657         RB_INIT(&huge);
3658 #ifdef USE_BRK
3659         malloc_mutex_init(&brk_mtx);
3660         brk_base = sbrk(0);
3661         brk_prev = brk_base;
3662         brk_max = brk_base;
3663 #endif
3664 #ifdef MALLOC_STATS
3665         huge_nmalloc = 0;
3666         huge_ndalloc = 0;
3667         huge_nralloc = 0;
3668         huge_allocated = 0;
3669 #endif
3670         RB_INIT(&old_chunks);
3671
3672         /* Initialize base allocation data structures. */
3673 #ifdef MALLOC_STATS
3674         base_mapped = 0;
3675 #endif
3676 #ifdef USE_BRK
3677         /*
3678          * Allocate a base chunk here, since it doesn't actually have to be
3679          * chunk-aligned.  Doing this before allocating any other chunks allows
3680          * the use of space that would otherwise be wasted.
3681          */
3682         base_pages_alloc(0);
3683 #endif
3684         base_chunk_nodes = NULL;
3685         malloc_mutex_init(&base_mtx);
3686
3687         if (ncpus > 1) {
3688                 /*
3689                  * For SMP systems, create four times as many arenas as there
3690                  * are CPUs by default.
3691                  */
3692                 opt_narenas_lshift += 2;
3693         }
3694
3695         /* Determine how many arenas to use. */
3696         narenas = ncpus;
3697         if (opt_narenas_lshift > 0) {
3698                 if ((narenas << opt_narenas_lshift) > narenas)
3699                         narenas <<= opt_narenas_lshift;
3700                 /*
3701                  * Make sure not to exceed the limits of what base_malloc()
3702                  * can handle.
3703                  */
3704                 if (narenas * sizeof(arena_t *) > chunksize)
3705                         narenas = (unsigned)(chunksize / sizeof(arena_t *));
3706         } else if (opt_narenas_lshift < 0) {
3707                 if ((narenas << opt_narenas_lshift) < narenas)
3708                         narenas <<= opt_narenas_lshift;
3709                 /* Make sure there is at least one arena. */
3710                 if (narenas == 0)
3711                         narenas = 1;
3712         }
3713
3714         next_arena = 0;
3715
3716         /* Allocate and initialize arenas. */
3717         arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
3718         if (arenas == NULL) {
3719                 malloc_mutex_unlock(&init_lock);
3720                 return (true);
3721         }
3722         /*
3723          * Zero the array.  In practice, this should always be pre-zeroed,
3724          * since it was just mmap()ed, but let's be sure.
3725          */
3726         memset(arenas, 0, sizeof(arena_t *) * narenas);
3727
3728         /*
3729          * Initialize one arena here.  The rest are lazily created in
3730          * arena_choose_hard().
3731          */
3732         arenas_extend(0);
3733         if (arenas[0] == NULL) {
3734                 malloc_mutex_unlock(&init_lock);
3735                 return (true);
3736         }
3737
3738         malloc_mutex_init(&arenas_mtx);
3739
3740         malloc_initialized = true;
3741         malloc_mutex_unlock(&init_lock);
3742         return (false);
3743 }
3744
3745 /*
3746  * End general internal functions.
3747  */
3748 /******************************************************************************/
3749 /*
3750  * Begin malloc(3)-compatible functions.
3751  */
3752
3753 void *
3754 malloc(size_t size)
3755 {
3756         void *ret;
3757
3758         if (malloc_init()) {
3759                 ret = NULL;
3760                 goto RETURN;
3761         }
3762
3763         if (size == 0) {
3764                 if (opt_sysv == false)
3765                         size = 1;
3766                 else {
3767                         ret = NULL;
3768                         goto RETURN;
3769                 }
3770         }
3771
3772         ret = imalloc(size);
3773
3774 RETURN:
3775         if (ret == NULL) {
3776                 if (opt_xmalloc) {
3777                         _malloc_message(getprogname(),
3778                             ": (malloc) Error in malloc(): out of memory\n", "",
3779                             "");
3780                         abort();
3781                 }
3782                 errno = ENOMEM;
3783         }
3784
3785         UTRACE(0, size, ret);
3786         return (ret);
3787 }
3788
3789 int
3790 posix_memalign(void **memptr, size_t alignment, size_t size)
3791 {
3792         int ret;
3793         void *result;
3794
3795         if (malloc_init())
3796                 result = NULL;
3797         else {
3798                 /* Make sure that alignment is a large enough power of 2. */
3799                 if (((alignment - 1) & alignment) != 0
3800                     || alignment < sizeof(void *)) {
3801                         if (opt_xmalloc) {
3802                                 _malloc_message(getprogname(),
3803                                     ": (malloc) Error in posix_memalign(): "
3804                                     "invalid alignment\n", "", "");
3805                                 abort();
3806                         }
3807                         result = NULL;
3808                         ret = EINVAL;
3809                         goto RETURN;
3810                 }
3811
3812                 result = ipalloc(alignment, size);
3813         }
3814
3815         if (result == NULL) {
3816                 if (opt_xmalloc) {
3817                         _malloc_message(getprogname(),
3818                         ": (malloc) Error in posix_memalign(): out of memory\n",
3819                         "", "");
3820                         abort();
3821                 }
3822                 ret = ENOMEM;
3823                 goto RETURN;
3824         }
3825
3826         *memptr = result;
3827         ret = 0;
3828
3829 RETURN:
3830         UTRACE(0, size, result);
3831         return (ret);
3832 }
3833
3834 void *
3835 calloc(size_t num, size_t size)
3836 {
3837         void *ret;
3838         size_t num_size;
3839
3840         if (malloc_init()) {
3841                 num_size = 0;
3842                 ret = NULL;
3843                 goto RETURN;
3844         }
3845
3846         num_size = num * size;
3847         if (num_size == 0) {
3848                 if ((opt_sysv == false) && ((num == 0) || (size == 0)))
3849                         num_size = 1;
3850                 else {
3851                         ret = NULL;
3852                         goto RETURN;
3853                 }
3854         /*
3855          * Try to avoid division here.  We know that it isn't possible to
3856          * overflow during multiplication if neither operand uses any of the
3857          * most significant half of the bits in a size_t.
3858          */
3859         } else if ((unsigned long long)((num | size) &
3860            ((unsigned long long)SIZE_T_MAX << (sizeof(size_t) << 2))) &&
3861            (num_size / size != num)) {
3862                 /* size_t overflow. */
3863                 ret = NULL;
3864                 goto RETURN;
3865         }
3866
3867         ret = icalloc(num_size);
3868
3869 RETURN:
3870         if (ret == NULL) {
3871                 if (opt_xmalloc) {
3872                         _malloc_message(getprogname(),
3873                             ": (malloc) Error in calloc(): out of memory\n", "",
3874                             "");
3875                         abort();
3876                 }
3877                 errno = ENOMEM;
3878         }
3879
3880         UTRACE(0, num_size, ret);
3881         return (ret);
3882 }
3883
3884 void *
3885 realloc(void *ptr, size_t size)
3886 {
3887         void *ret;
3888
3889         if (size == 0) {
3890                 if (opt_sysv == false)
3891                         size = 1;
3892                 else {
3893                         if (ptr != NULL)
3894                                 idalloc(ptr);
3895                         ret = NULL;
3896                         goto RETURN;
3897                 }
3898         }
3899
3900         if (ptr != NULL) {
3901                 assert(malloc_initialized);
3902
3903                 ret = iralloc(ptr, size);
3904
3905                 if (ret == NULL) {
3906                         if (opt_xmalloc) {
3907                                 _malloc_message(getprogname(),
3908                                     ": (malloc) Error in realloc(): out of "
3909                                     "memory\n", "", "");
3910                                 abort();
3911                         }
3912                         errno = ENOMEM;
3913                 }
3914         } else {
3915                 if (malloc_init())
3916                         ret = NULL;
3917                 else
3918                         ret = imalloc(size);
3919
3920                 if (ret == NULL) {
3921                         if (opt_xmalloc) {
3922                                 _malloc_message(getprogname(),
3923                                     ": (malloc) Error in realloc(): out of "
3924                                     "memory\n", "", "");
3925                                 abort();
3926                         }
3927                         errno = ENOMEM;
3928                 }
3929         }
3930
3931 RETURN:
3932         UTRACE(ptr, size, ret);
3933         return (ret);
3934 }
3935
3936 void
3937 free(void *ptr)
3938 {
3939
3940         UTRACE(ptr, 0, 0);
3941         if (ptr != NULL) {
3942                 assert(malloc_initialized);
3943
3944                 idalloc(ptr);
3945         }
3946 }
3947
3948 /*
3949  * End malloc(3)-compatible functions.
3950  */
3951 /******************************************************************************/
3952 /*
3953  * Begin non-standard functions.
3954  */
3955 #ifndef __NetBSD__
3956 size_t
3957 malloc_usable_size(const void *ptr)
3958 {
3959
3960         assert(ptr != NULL);
3961
3962         return (isalloc(ptr));
3963 }
3964 #endif
3965
3966 /*
3967  * End non-standard functions.
3968  */
3969 /******************************************************************************/
3970 /*
3971  * Begin library-private functions, used by threading libraries for protection
3972  * of malloc during fork().  These functions are only called if the program is
3973  * running in threaded mode, so there is no need to check whether the program
3974  * is threaded here.
3975  */
3976
3977 void
3978 _malloc_prefork(void)
3979 {
3980         unsigned i;
3981
3982         /* Acquire all mutexes in a safe order. */
3983
3984         malloc_mutex_lock(&arenas_mtx);
3985         for (i = 0; i < narenas; i++) {
3986                 if (arenas[i] != NULL)
3987                         malloc_mutex_lock(&arenas[i]->mtx);
3988         }
3989
3990         malloc_mutex_lock(&base_mtx);
3991
3992         malloc_mutex_lock(&chunks_mtx);
3993 }
3994
3995 void
3996 _malloc_postfork(void)
3997 {
3998         unsigned i;
3999
4000         /* Release all mutexes, now that fork() has completed. */
4001
4002         malloc_mutex_unlock(&chunks_mtx);
4003
4004         malloc_mutex_unlock(&base_mtx);
4005
4006         for (i = 0; i < narenas; i++) {
4007                 if (arenas[i] != NULL)
4008                         malloc_mutex_unlock(&arenas[i]->mtx);
4009         }
4010         malloc_mutex_unlock(&arenas_mtx);
4011 }
4012
4013 /*
4014  * End library-private functions.
4015  */
4016 /******************************************************************************/