__aeabi_ldivmod: fix sign logic
[minix.git] / lib / libc / stdlib / jemalloc.c
blobb3c6f738542e21dca5cf273f2bff39fc4de19f93
1 /* $NetBSD: jemalloc.c,v 1.28 2012/03/21 14:32:22 christos Exp $ */
3 /*-
4 * Copyright (C) 2006,2007 Jason Evans <jasone@FreeBSD.org>.
5 * All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice(s), this list of conditions and the following disclaimer as
12 * the first lines of this file unmodified other than the possible
13 * addition of one or more copyright notices.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice(s), this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
20 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
26 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
28 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 *******************************************************************************
33 * This allocator implementation is designed to provide scalable performance
34 * for multi-threaded programs on multi-processor systems. The following
35 * features are included for this purpose:
37 * + Multiple arenas are used if there are multiple CPUs, which reduces lock
38 * contention and cache sloshing.
40 * + Cache line sharing between arenas is avoided for internal data
41 * structures.
43 * + Memory is managed in chunks and runs (chunks can be split into runs),
44 * rather than as individual pages. This provides a constant-time
45 * mechanism for associating allocations with particular arenas.
47 * Allocation requests are rounded up to the nearest size class, and no record
48 * of the original request size is maintained. Allocations are broken into
49 * categories according to size class. Assuming runtime defaults, 4 kB pages
50 * and a 16 byte quantum, the size classes in each category are as follows:
52 * |=====================================|
53 * | Category | Subcategory | Size |
54 * |=====================================|
55 * | Small | Tiny | 2 |
56 * | | | 4 |
57 * | | | 8 |
58 * | |----------------+---------|
59 * | | Quantum-spaced | 16 |
60 * | | | 32 |
61 * | | | 48 |
62 * | | | ... |
63 * | | | 480 |
64 * | | | 496 |
65 * | | | 512 |
66 * | |----------------+---------|
67 * | | Sub-page | 1 kB |
68 * | | | 2 kB |
69 * |=====================================|
70 * | Large | 4 kB |
71 * | | 8 kB |
72 * | | 12 kB |
73 * | | ... |
74 * | | 1012 kB |
75 * | | 1016 kB |
76 * | | 1020 kB |
77 * |=====================================|
78 * | Huge | 1 MB |
79 * | | 2 MB |
80 * | | 3 MB |
81 * | | ... |
82 * |=====================================|
84 * A different mechanism is used for each category:
86 * Small : Each size class is segregated into its own set of runs. Each run
87 * maintains a bitmap of which regions are free/allocated.
89 * Large : Each allocation is backed by a dedicated run. Metadata are stored
90 * in the associated arena chunk header maps.
92 * Huge : Each allocation is backed by a dedicated contiguous set of chunks.
93 * Metadata are stored in a separate red-black tree.
95 *******************************************************************************
98 /* LINTLIBRARY */
100 #ifdef __NetBSD__
101 # define xutrace(a, b) utrace("malloc", (a), (b))
102 # define __DECONST(x, y) ((x)__UNCONST(y))
103 # define NO_TLS
104 #else
105 # define xutrace(a, b) utrace((a), (b))
106 #endif /* __NetBSD__ */
109 * MALLOC_PRODUCTION disables assertions and statistics gathering. It also
110 * defaults the A and J runtime options to off. These settings are appropriate
111 * for production systems.
113 #define MALLOC_PRODUCTION
115 #ifndef MALLOC_PRODUCTION
116 # define MALLOC_DEBUG
117 #endif
119 #include <sys/cdefs.h>
120 /* __FBSDID("$FreeBSD: src/lib/libc/stdlib/malloc.c,v 1.147 2007/06/15 22:00:16 jasone Exp $"); */
121 __RCSID("$NetBSD: jemalloc.c,v 1.28 2012/03/21 14:32:22 christos Exp $");
123 #ifdef __FreeBSD__
124 #include "libc_private.h"
125 #ifdef MALLOC_DEBUG
126 # define _LOCK_DEBUG
127 #endif
128 #include "spinlock.h"
129 #endif
130 #include "namespace.h"
131 #include <sys/mman.h>
132 #include <sys/param.h>
133 #ifdef __FreeBSD__
134 #include <sys/stddef.h>
135 #endif
136 #include <sys/time.h>
137 #include <sys/types.h>
138 #include <sys/sysctl.h>
139 #include <sys/tree.h>
140 #include <sys/uio.h>
141 #include <sys/ktrace.h> /* Must come after several other sys/ includes. */
143 #ifdef __FreeBSD__
144 #include <machine/atomic.h>
145 #include <machine/cpufunc.h>
146 #endif
147 #include <machine/vmparam.h>
149 #include <errno.h>
150 #include <limits.h>
151 #include <pthread.h>
152 #include <sched.h>
153 #include <stdarg.h>
154 #include <stdbool.h>
155 #include <stdio.h>
156 #include <stdint.h>
157 #include <stdlib.h>
158 #include <string.h>
159 #include <strings.h>
160 #include <unistd.h>
162 #ifdef __NetBSD__
163 # include <reentrant.h>
164 # include "extern.h"
166 #define STRERROR_R(a, b, c) __strerror_r(a, b, c);
168 * A non localized version of strerror, that avoids bringing in
169 * stdio and the locale code. All the malloc messages are in English
170 * so why bother?
172 static int
173 __strerror_r(int e, char *s, size_t l)
175 int rval;
176 size_t slen;
178 if (e >= 0 && e < sys_nerr) {
179 slen = strlcpy(s, sys_errlist[e], l);
180 rval = 0;
181 } else {
182 slen = snprintf_ss(s, l, "Unknown error %u", e);
183 rval = EINVAL;
185 return slen >= l ? ERANGE : rval;
187 #endif
189 #ifdef __FreeBSD__
190 #define STRERROR_R(a, b, c) strerror_r(a, b, c);
191 #include "un-namespace.h"
192 #endif
194 /* MALLOC_STATS enables statistics calculation. */
195 #ifndef MALLOC_PRODUCTION
196 # define MALLOC_STATS
197 #endif
199 #ifdef MALLOC_DEBUG
200 # ifdef NDEBUG
201 # undef NDEBUG
202 # endif
203 #else
204 # ifndef NDEBUG
205 # define NDEBUG
206 # endif
207 #endif
208 #include <assert.h>
210 #ifdef MALLOC_DEBUG
211 /* Disable inlining to make debugging easier. */
212 # define inline
213 #endif
215 /* Size of stack-allocated buffer passed to strerror_r(). */
216 #define STRERROR_BUF 64
218 /* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */
219 #ifdef __i386__
220 # define QUANTUM_2POW_MIN 4
221 # define SIZEOF_PTR_2POW 2
222 # define USE_BRK
223 #endif
224 #ifdef __ia64__
225 # define QUANTUM_2POW_MIN 4
226 # define SIZEOF_PTR_2POW 3
227 #endif
228 #ifdef __alpha__
229 # define QUANTUM_2POW_MIN 4
230 # define SIZEOF_PTR_2POW 3
231 # define NO_TLS
232 #endif
233 #ifdef __sparc64__
234 # define QUANTUM_2POW_MIN 4
235 # define SIZEOF_PTR_2POW 3
236 # define NO_TLS
237 #endif
238 #ifdef __amd64__
239 # define QUANTUM_2POW_MIN 4
240 # define SIZEOF_PTR_2POW 3
241 #endif
242 #ifdef __arm__
243 # define QUANTUM_2POW_MIN 3
244 # define SIZEOF_PTR_2POW 2
245 # define USE_BRK
246 # define NO_TLS
247 #endif
248 #ifdef __powerpc__
249 # define QUANTUM_2POW_MIN 4
250 # define SIZEOF_PTR_2POW 2
251 # define USE_BRK
252 #endif
253 #if defined(__sparc__) && !defined(__sparc64__)
254 # define QUANTUM_2POW_MIN 4
255 # define SIZEOF_PTR_2POW 2
256 # define USE_BRK
257 #endif
258 #ifdef __vax__
259 # define QUANTUM_2POW_MIN 4
260 # define SIZEOF_PTR_2POW 2
261 # define USE_BRK
262 #endif
263 #ifdef __sh__
264 # define QUANTUM_2POW_MIN 4
265 # define SIZEOF_PTR_2POW 2
266 # define USE_BRK
267 #endif
268 #ifdef __m68k__
269 # define QUANTUM_2POW_MIN 4
270 # define SIZEOF_PTR_2POW 2
271 # define USE_BRK
272 #endif
273 #ifdef __mips__
274 # define QUANTUM_2POW_MIN 4
275 # define SIZEOF_PTR_2POW 2
276 # define USE_BRK
277 #endif
278 #ifdef __hppa__
279 # define QUANTUM_2POW_MIN 4
280 # define SIZEOF_PTR_2POW 2
281 # define USE_BRK
282 #endif
284 #define SIZEOF_PTR (1 << SIZEOF_PTR_2POW)
286 /* sizeof(int) == (1 << SIZEOF_INT_2POW). */
287 #ifndef SIZEOF_INT_2POW
288 # define SIZEOF_INT_2POW 2
289 #endif
291 /* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
292 #if (!defined(PIC) && !defined(NO_TLS))
293 # define NO_TLS
294 #endif
297 * Size and alignment of memory chunks that are allocated by the OS's virtual
298 * memory system.
300 #define CHUNK_2POW_DEFAULT 20
303 * Maximum size of L1 cache line. This is used to avoid cache line aliasing,
304 * so over-estimates are okay (up to a point), but under-estimates will
305 * negatively affect performance.
307 #define CACHELINE_2POW 6
308 #define CACHELINE ((size_t)(1 << CACHELINE_2POW))
310 /* Smallest size class to support. */
311 #define TINY_MIN_2POW 1
314 * Maximum size class that is a multiple of the quantum, but not (necessarily)
315 * a power of 2. Above this size, allocations are rounded up to the nearest
316 * power of 2.
318 #define SMALL_MAX_2POW_DEFAULT 9
319 #define SMALL_MAX_DEFAULT (1 << SMALL_MAX_2POW_DEFAULT)
322 * RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized
323 * as small as possible such that this setting is still honored, without
324 * violating other constraints. The goal is to make runs as small as possible
325 * without exceeding a per run external fragmentation threshold.
327 * We use binary fixed point math for overhead computations, where the binary
328 * point is implicitly RUN_BFP bits to the left.
330 * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
331 * honored for some/all object sizes, since there is one bit of header overhead
332 * per object (plus a constant). This constraint is relaxed (ignored) for runs
333 * that are so small that the per-region overhead is greater than:
335 * (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
337 #define RUN_BFP 12
338 /* \/ Implicit binary fixed point. */
339 #define RUN_MAX_OVRHD 0x0000003dU
340 #define RUN_MAX_OVRHD_RELAX 0x00001800U
342 /* Put a cap on small object run size. This overrides RUN_MAX_OVRHD. */
343 #define RUN_MAX_SMALL_2POW 15
344 #define RUN_MAX_SMALL (1 << RUN_MAX_SMALL_2POW)
346 /******************************************************************************/
348 #ifdef __FreeBSD__
350 * Mutexes based on spinlocks. We can't use normal pthread mutexes, because
351 * they require malloc()ed memory.
353 typedef struct {
354 spinlock_t lock;
355 } malloc_mutex_t;
357 /* Set to true once the allocator has been initialized. */
358 static bool malloc_initialized = false;
360 /* Used to avoid initialization races. */
361 static malloc_mutex_t init_lock = {_SPINLOCK_INITIALIZER};
362 #else
363 #define malloc_mutex_t mutex_t
365 /* Set to true once the allocator has been initialized. */
366 static bool malloc_initialized = false;
368 /* Used to avoid initialization races. */
369 static mutex_t init_lock = MUTEX_INITIALIZER;
370 #endif
372 /******************************************************************************/
374 * Statistics data structures.
377 #ifdef MALLOC_STATS
379 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
380 struct malloc_bin_stats_s {
382 * Number of allocation requests that corresponded to the size of this
383 * bin.
385 uint64_t nrequests;
387 /* Total number of runs created for this bin's size class. */
388 uint64_t nruns;
391 * Total number of runs reused by extracting them from the runs tree for
392 * this bin's size class.
394 uint64_t reruns;
396 /* High-water mark for this bin. */
397 unsigned long highruns;
399 /* Current number of runs in this bin. */
400 unsigned long curruns;
403 typedef struct arena_stats_s arena_stats_t;
404 struct arena_stats_s {
405 /* Number of bytes currently mapped. */
406 size_t mapped;
408 /* Per-size-category statistics. */
409 size_t allocated_small;
410 uint64_t nmalloc_small;
411 uint64_t ndalloc_small;
413 size_t allocated_large;
414 uint64_t nmalloc_large;
415 uint64_t ndalloc_large;
418 typedef struct chunk_stats_s chunk_stats_t;
419 struct chunk_stats_s {
420 /* Number of chunks that were allocated. */
421 uint64_t nchunks;
423 /* High-water mark for number of chunks allocated. */
424 unsigned long highchunks;
427 * Current number of chunks allocated. This value isn't maintained for
428 * any other purpose, so keep track of it in order to be able to set
429 * highchunks.
431 unsigned long curchunks;
434 #endif /* #ifdef MALLOC_STATS */
436 /******************************************************************************/
438 * Chunk data structures.
441 /* Tree of chunks. */
442 typedef struct chunk_node_s chunk_node_t;
443 struct chunk_node_s {
444 /* Linkage for the chunk tree. */
445 RB_ENTRY(chunk_node_s) link;
448 * Pointer to the chunk that this tree node is responsible for. In some
449 * (but certainly not all) cases, this data structure is placed at the
450 * beginning of the corresponding chunk, so this field may point to this
451 * node.
453 void *chunk;
455 /* Total chunk size. */
456 size_t size;
458 typedef struct chunk_tree_s chunk_tree_t;
459 RB_HEAD(chunk_tree_s, chunk_node_s);
461 /******************************************************************************/
463 * Arena data structures.
466 typedef struct arena_s arena_t;
467 typedef struct arena_bin_s arena_bin_t;
469 typedef struct arena_chunk_map_s arena_chunk_map_t;
470 struct arena_chunk_map_s {
471 /* Number of pages in run. */
472 uint32_t npages;
474 * Position within run. For a free run, this is POS_FREE for the first
475 * and last pages. The POS_FREE special value makes it possible to
476 * quickly coalesce free runs.
478 * This is the limiting factor for chunksize; there can be at most 2^31
479 * pages in a run.
481 #define POS_FREE ((uint32_t)0xffffffffU)
482 uint32_t pos;
485 /* Arena chunk header. */
486 typedef struct arena_chunk_s arena_chunk_t;
487 struct arena_chunk_s {
488 /* Arena that owns the chunk. */
489 arena_t *arena;
491 /* Linkage for the arena's chunk tree. */
492 RB_ENTRY(arena_chunk_s) link;
495 * Number of pages in use. This is maintained in order to make
496 * detection of empty chunks fast.
498 uint32_t pages_used;
501 * Every time a free run larger than this value is created/coalesced,
502 * this value is increased. The only way that the value decreases is if
503 * arena_run_alloc() fails to find a free run as large as advertised by
504 * this value.
506 uint32_t max_frun_npages;
509 * Every time a free run that starts at an earlier page than this value
510 * is created/coalesced, this value is decreased. It is reset in a
511 * similar fashion to max_frun_npages.
513 uint32_t min_frun_ind;
516 * Map of pages within chunk that keeps track of free/large/small. For
517 * free runs, only the map entries for the first and last pages are
518 * kept up to date, so that free runs can be quickly coalesced.
520 arena_chunk_map_t map[1]; /* Dynamically sized. */
522 typedef struct arena_chunk_tree_s arena_chunk_tree_t;
523 RB_HEAD(arena_chunk_tree_s, arena_chunk_s);
525 typedef struct arena_run_s arena_run_t;
526 struct arena_run_s {
527 /* Linkage for run trees. */
528 RB_ENTRY(arena_run_s) link;
530 #ifdef MALLOC_DEBUG
531 uint32_t magic;
532 # define ARENA_RUN_MAGIC 0x384adf93
533 #endif
535 /* Bin this run is associated with. */
536 arena_bin_t *bin;
538 /* Index of first element that might have a free region. */
539 unsigned regs_minelm;
541 /* Number of free regions in run. */
542 unsigned nfree;
544 /* Bitmask of in-use regions (0: in use, 1: free). */
545 unsigned regs_mask[1]; /* Dynamically sized. */
547 typedef struct arena_run_tree_s arena_run_tree_t;
548 RB_HEAD(arena_run_tree_s, arena_run_s);
550 struct arena_bin_s {
552 * Current run being used to service allocations of this bin's size
553 * class.
555 arena_run_t *runcur;
558 * Tree of non-full runs. This tree is used when looking for an
559 * existing run when runcur is no longer usable. We choose the
560 * non-full run that is lowest in memory; this policy tends to keep
561 * objects packed well, and it can also help reduce the number of
562 * almost-empty chunks.
564 arena_run_tree_t runs;
566 /* Size of regions in a run for this bin's size class. */
567 size_t reg_size;
569 /* Total size of a run for this bin's size class. */
570 size_t run_size;
572 /* Total number of regions in a run for this bin's size class. */
573 uint32_t nregs;
575 /* Number of elements in a run's regs_mask for this bin's size class. */
576 uint32_t regs_mask_nelms;
578 /* Offset of first region in a run for this bin's size class. */
579 uint32_t reg0_offset;
581 #ifdef MALLOC_STATS
582 /* Bin statistics. */
583 malloc_bin_stats_t stats;
584 #endif
587 struct arena_s {
588 #ifdef MALLOC_DEBUG
589 uint32_t magic;
590 # define ARENA_MAGIC 0x947d3d24
591 #endif
593 /* All operations on this arena require that mtx be locked. */
594 malloc_mutex_t mtx;
596 #ifdef MALLOC_STATS
597 arena_stats_t stats;
598 #endif
601 * Tree of chunks this arena manages.
603 arena_chunk_tree_t chunks;
606 * In order to avoid rapid chunk allocation/deallocation when an arena
607 * oscillates right on the cusp of needing a new chunk, cache the most
608 * recently freed chunk. This caching is disabled by opt_hint.
610 * There is one spare chunk per arena, rather than one spare total, in
611 * order to avoid interactions between multiple threads that could make
612 * a single spare inadequate.
614 arena_chunk_t *spare;
617 * bins is used to store rings of free regions of the following sizes,
618 * assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
620 * bins[i] | size |
621 * --------+------+
622 * 0 | 2 |
623 * 1 | 4 |
624 * 2 | 8 |
625 * --------+------+
626 * 3 | 16 |
627 * 4 | 32 |
628 * 5 | 48 |
629 * 6 | 64 |
630 * : :
631 * : :
632 * 33 | 496 |
633 * 34 | 512 |
634 * --------+------+
635 * 35 | 1024 |
636 * 36 | 2048 |
637 * --------+------+
639 arena_bin_t bins[1]; /* Dynamically sized. */
642 /******************************************************************************/
644 * Data.
647 /* Number of CPUs. */
648 static unsigned ncpus;
650 /* VM page size. */
651 static size_t pagesize;
652 static size_t pagesize_mask;
653 static int pagesize_2pow;
655 /* Various bin-related settings. */
656 static size_t bin_maxclass; /* Max size class for bins. */
657 static unsigned ntbins; /* Number of (2^n)-spaced tiny bins. */
658 static unsigned nqbins; /* Number of quantum-spaced bins. */
659 static unsigned nsbins; /* Number of (2^n)-spaced sub-page bins. */
660 static size_t small_min;
661 static size_t small_max;
663 /* Various quantum-related settings. */
664 static size_t quantum;
665 static size_t quantum_mask; /* (quantum - 1). */
667 /* Various chunk-related settings. */
668 static size_t chunksize;
669 static size_t chunksize_mask; /* (chunksize - 1). */
670 static int chunksize_2pow;
671 static unsigned chunk_npages;
672 static unsigned arena_chunk_header_npages;
673 static size_t arena_maxclass; /* Max size class for arenas. */
675 /********/
677 * Chunks.
680 /* Protects chunk-related data structures. */
681 static malloc_mutex_t chunks_mtx;
683 /* Tree of chunks that are stand-alone huge allocations. */
684 static chunk_tree_t huge;
686 #ifdef USE_BRK
688 * Try to use brk for chunk-size allocations, due to address space constraints.
691 * Protects sbrk() calls. This must be separate from chunks_mtx, since
692 * base_pages_alloc() also uses sbrk(), but cannot lock chunks_mtx (doing so
693 * could cause recursive lock acquisition).
695 static malloc_mutex_t brk_mtx;
696 /* Result of first sbrk(0) call. */
697 static void *brk_base;
698 /* Current end of brk, or ((void *)-1) if brk is exhausted. */
699 static void *brk_prev;
700 /* Current upper limit on brk addresses. */
701 static void *brk_max;
702 #endif
704 #ifdef MALLOC_STATS
705 /* Huge allocation statistics. */
706 static uint64_t huge_nmalloc;
707 static uint64_t huge_ndalloc;
708 static uint64_t huge_nralloc;
709 static size_t huge_allocated;
710 #endif
713 * Tree of chunks that were previously allocated. This is used when allocating
714 * chunks, in an attempt to re-use address space.
716 static chunk_tree_t old_chunks;
718 /****************************/
720 * base (internal allocation).
724 * Current pages that are being used for internal memory allocations. These
725 * pages are carved up in cacheline-size quanta, so that there is no chance of
726 * false cache line sharing.
728 static void *base_pages;
729 static void *base_next_addr;
730 static void *base_past_addr; /* Addr immediately past base_pages. */
731 static chunk_node_t *base_chunk_nodes; /* LIFO cache of chunk nodes. */
732 static malloc_mutex_t base_mtx;
733 #ifdef MALLOC_STATS
734 static size_t base_mapped;
735 #endif
737 /********/
739 * Arenas.
743 * Arenas that are used to service external requests. Not all elements of the
744 * arenas array are necessarily used; arenas are created lazily as needed.
746 static arena_t **arenas;
747 static unsigned narenas;
748 static unsigned next_arena;
749 static malloc_mutex_t arenas_mtx; /* Protects arenas initialization. */
751 #ifndef NO_TLS
753 * Map of pthread_self() --> arenas[???], used for selecting an arena to use
754 * for allocations.
756 static __thread arena_t *arenas_map;
757 #define get_arenas_map() (arenas_map)
758 #define set_arenas_map(x) (arenas_map = x)
759 #else
760 static thread_key_t arenas_map_key;
761 #define get_arenas_map() thr_getspecific(arenas_map_key)
762 #define set_arenas_map(x) thr_setspecific(arenas_map_key, x)
763 #endif
765 #ifdef MALLOC_STATS
766 /* Chunk statistics. */
767 static chunk_stats_t stats_chunks;
768 #endif
770 /*******************************/
772 * Runtime configuration options.
774 const char *_malloc_options;
776 #ifndef MALLOC_PRODUCTION
777 static bool opt_abort = true;
778 static bool opt_junk = true;
779 #else
780 static bool opt_abort = false;
781 static bool opt_junk = false;
782 #endif
783 static bool opt_hint = false;
784 static bool opt_print_stats = false;
785 static int opt_quantum_2pow = QUANTUM_2POW_MIN;
786 static int opt_small_max_2pow = SMALL_MAX_2POW_DEFAULT;
787 static int opt_chunk_2pow = CHUNK_2POW_DEFAULT;
788 static bool opt_utrace = false;
789 static bool opt_sysv = false;
790 static bool opt_xmalloc = false;
791 static bool opt_zero = false;
792 static int32_t opt_narenas_lshift = 0;
794 typedef struct {
795 void *p;
796 size_t s;
797 void *r;
798 } malloc_utrace_t;
800 #define UTRACE(a, b, c) \
801 if (opt_utrace) { \
802 malloc_utrace_t ut; \
803 ut.p = a; \
804 ut.s = b; \
805 ut.r = c; \
806 xutrace(&ut, sizeof(ut)); \
809 /******************************************************************************/
811 * Begin function prototypes for non-inline static functions.
814 static void wrtmessage(const char *p1, const char *p2, const char *p3,
815 const char *p4);
816 #ifdef MALLOC_STATS
817 static void malloc_printf(const char *format, ...);
818 #endif
819 static char *size_t2s(size_t x, char *s);
820 static bool base_pages_alloc(size_t minsize);
821 static void *base_alloc(size_t size);
822 static chunk_node_t *base_chunk_node_alloc(void);
823 static void base_chunk_node_dealloc(chunk_node_t *node);
824 #ifdef MALLOC_STATS
825 static void stats_print(arena_t *arena);
826 #endif
827 static void *pages_map(void *addr, size_t size);
828 static void *pages_map_align(void *addr, size_t size, int align);
829 static void pages_unmap(void *addr, size_t size);
830 static void *chunk_alloc(size_t size);
831 static void chunk_dealloc(void *chunk, size_t size);
832 static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size);
833 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
834 static void arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
835 static arena_run_t *arena_run_alloc(arena_t *arena, size_t size);
836 static void arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size);
837 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
838 static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
839 static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
840 static void *arena_malloc(arena_t *arena, size_t size);
841 static void *arena_palloc(arena_t *arena, size_t alignment, size_t size,
842 size_t alloc_size);
843 static size_t arena_salloc(const void *ptr);
844 static void *arena_ralloc(void *ptr, size_t size, size_t oldsize);
845 static void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
846 static bool arena_new(arena_t *arena);
847 static arena_t *arenas_extend(unsigned ind);
848 static void *huge_malloc(size_t size);
849 static void *huge_palloc(size_t alignment, size_t size);
850 static void *huge_ralloc(void *ptr, size_t size, size_t oldsize);
851 static void huge_dalloc(void *ptr);
852 static void *imalloc(size_t size);
853 static void *ipalloc(size_t alignment, size_t size);
854 static void *icalloc(size_t size);
855 static size_t isalloc(const void *ptr);
856 static void *iralloc(void *ptr, size_t size);
857 static void idalloc(void *ptr);
858 static void malloc_print_stats(void);
859 static bool malloc_init_hard(void);
862 * End function prototypes.
864 /******************************************************************************/
866 * Begin mutex.
869 #ifdef __NetBSD__
870 #define malloc_mutex_init(m) mutex_init(m, NULL)
871 #define malloc_mutex_lock(m) mutex_lock(m)
872 #define malloc_mutex_unlock(m) mutex_unlock(m)
873 #else /* __NetBSD__ */
874 static inline void
875 malloc_mutex_init(malloc_mutex_t *a_mutex)
877 static const spinlock_t lock = _SPINLOCK_INITIALIZER;
879 a_mutex->lock = lock;
882 static inline void
883 malloc_mutex_lock(malloc_mutex_t *a_mutex)
886 if (__isthreaded)
887 _SPINLOCK(&a_mutex->lock);
890 static inline void
891 malloc_mutex_unlock(malloc_mutex_t *a_mutex)
894 if (__isthreaded)
895 _SPINUNLOCK(&a_mutex->lock);
897 #endif /* __NetBSD__ */
900 * End mutex.
902 /******************************************************************************/
904 * Begin Utility functions/macros.
907 /* Return the chunk address for allocation address a. */
908 #define CHUNK_ADDR2BASE(a) \
909 ((void *)((uintptr_t)(a) & ~chunksize_mask))
911 /* Return the chunk offset of address a. */
912 #define CHUNK_ADDR2OFFSET(a) \
913 ((size_t)((uintptr_t)(a) & chunksize_mask))
915 /* Return the smallest chunk multiple that is >= s. */
916 #define CHUNK_CEILING(s) \
917 (((s) + chunksize_mask) & ~chunksize_mask)
919 /* Return the smallest cacheline multiple that is >= s. */
920 #define CACHELINE_CEILING(s) \
921 (((s) + (CACHELINE - 1)) & ~(CACHELINE - 1))
923 /* Return the smallest quantum multiple that is >= a. */
924 #define QUANTUM_CEILING(a) \
925 (((a) + quantum_mask) & ~quantum_mask)
927 /* Return the smallest pagesize multiple that is >= s. */
928 #define PAGE_CEILING(s) \
929 (((s) + pagesize_mask) & ~pagesize_mask)
931 /* Compute the smallest power of 2 that is >= x. */
932 static inline size_t
933 pow2_ceil(size_t x)
936 x--;
937 x |= x >> 1;
938 x |= x >> 2;
939 x |= x >> 4;
940 x |= x >> 8;
941 x |= x >> 16;
942 #if (SIZEOF_PTR == 8)
943 x |= x >> 32;
944 #endif
945 x++;
946 return (x);
949 static void
950 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
953 write(STDERR_FILENO, p1, strlen(p1));
954 write(STDERR_FILENO, p2, strlen(p2));
955 write(STDERR_FILENO, p3, strlen(p3));
956 write(STDERR_FILENO, p4, strlen(p4));
959 void (*_malloc_message)(const char *p1, const char *p2, const char *p3,
960 const char *p4) = wrtmessage;
962 #ifdef MALLOC_STATS
964 * Print to stderr in such a way as to (hopefully) avoid memory allocation.
966 static void
967 malloc_printf(const char *format, ...)
969 char buf[4096];
970 va_list ap;
972 va_start(ap, format);
973 vsnprintf(buf, sizeof(buf), format, ap);
974 va_end(ap);
975 _malloc_message(buf, "", "", "");
977 #endif
980 * We don't want to depend on vsnprintf() for production builds, since that can
981 * cause unnecessary bloat for static binaries. size_t2s() provides minimal
982 * integer printing functionality, so that malloc_printf() use can be limited to
983 * MALLOC_STATS code.
985 #define UMAX2S_BUFSIZE 21
986 static char *
987 size_t2s(size_t x, char *s)
989 unsigned i;
991 /* Make sure UMAX2S_BUFSIZE is large enough. */
992 /* LINTED */
993 assert(sizeof(size_t) <= 8);
995 i = UMAX2S_BUFSIZE - 1;
996 s[i] = '\0';
997 do {
998 i--;
999 s[i] = "0123456789"[(int)x % 10];
1000 x /= (uintmax_t)10LL;
1001 } while (x > 0);
1003 return (&s[i]);
1006 /******************************************************************************/
1008 static bool
1009 base_pages_alloc(size_t minsize)
1011 size_t csize = 0;
1013 #ifdef USE_BRK
1015 * Do special brk allocation here, since base allocations don't need to
1016 * be chunk-aligned.
1018 if (brk_prev != (void *)-1) {
1019 void *brk_cur;
1020 intptr_t incr;
1022 if (minsize != 0)
1023 csize = CHUNK_CEILING(minsize);
1025 malloc_mutex_lock(&brk_mtx);
1026 do {
1027 /* Get the current end of brk. */
1028 brk_cur = sbrk(0);
1031 * Calculate how much padding is necessary to
1032 * chunk-align the end of brk. Don't worry about
1033 * brk_cur not being chunk-aligned though.
1035 incr = (intptr_t)chunksize
1036 - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1037 assert(incr >= 0);
1038 if ((size_t)incr < minsize)
1039 incr += csize;
1041 brk_prev = sbrk(incr);
1042 if (brk_prev == brk_cur) {
1043 /* Success. */
1044 malloc_mutex_unlock(&brk_mtx);
1045 base_pages = brk_cur;
1046 base_next_addr = base_pages;
1047 base_past_addr = (void *)((uintptr_t)base_pages
1048 + incr);
1049 #ifdef MALLOC_STATS
1050 base_mapped += incr;
1051 #endif
1052 return (false);
1054 } while (brk_prev != (void *)-1);
1055 malloc_mutex_unlock(&brk_mtx);
1057 if (minsize == 0) {
1059 * Failure during initialization doesn't matter, so avoid
1060 * falling through to the mmap-based page mapping code.
1062 return (true);
1064 #endif
1065 assert(minsize != 0);
1066 csize = PAGE_CEILING(minsize);
1067 base_pages = pages_map(NULL, csize);
1068 if (base_pages == NULL)
1069 return (true);
1070 base_next_addr = base_pages;
1071 base_past_addr = (void *)((uintptr_t)base_pages + csize);
1072 #ifdef MALLOC_STATS
1073 base_mapped += csize;
1074 #endif
1075 return (false);
1078 static void *
1079 base_alloc(size_t size)
1081 void *ret;
1082 size_t csize;
1084 /* Round size up to nearest multiple of the cacheline size. */
1085 csize = CACHELINE_CEILING(size);
1087 malloc_mutex_lock(&base_mtx);
1089 /* Make sure there's enough space for the allocation. */
1090 if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
1091 if (base_pages_alloc(csize)) {
1092 ret = NULL;
1093 goto RETURN;
1097 /* Allocate. */
1098 ret = base_next_addr;
1099 base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
1101 RETURN:
1102 malloc_mutex_unlock(&base_mtx);
1103 return (ret);
1106 static chunk_node_t *
1107 base_chunk_node_alloc(void)
1109 chunk_node_t *ret;
1111 malloc_mutex_lock(&base_mtx);
1112 if (base_chunk_nodes != NULL) {
1113 ret = base_chunk_nodes;
1114 /* LINTED */
1115 base_chunk_nodes = *(chunk_node_t **)ret;
1116 malloc_mutex_unlock(&base_mtx);
1117 } else {
1118 malloc_mutex_unlock(&base_mtx);
1119 ret = (chunk_node_t *)base_alloc(sizeof(chunk_node_t));
1122 return (ret);
1125 static void
1126 base_chunk_node_dealloc(chunk_node_t *node)
1129 malloc_mutex_lock(&base_mtx);
1130 /* LINTED */
1131 *(chunk_node_t **)node = base_chunk_nodes;
1132 base_chunk_nodes = node;
1133 malloc_mutex_unlock(&base_mtx);
1136 /******************************************************************************/
1138 #ifdef MALLOC_STATS
1139 static void
1140 stats_print(arena_t *arena)
1142 unsigned i;
1143 int gap_start;
1145 malloc_printf(
1146 " allocated/mapped nmalloc ndalloc\n");
1148 malloc_printf("small: %12zu %-12s %12llu %12llu\n",
1149 arena->stats.allocated_small, "", arena->stats.nmalloc_small,
1150 arena->stats.ndalloc_small);
1151 malloc_printf("large: %12zu %-12s %12llu %12llu\n",
1152 arena->stats.allocated_large, "", arena->stats.nmalloc_large,
1153 arena->stats.ndalloc_large);
1154 malloc_printf("total: %12zu/%-12zu %12llu %12llu\n",
1155 arena->stats.allocated_small + arena->stats.allocated_large,
1156 arena->stats.mapped,
1157 arena->stats.nmalloc_small + arena->stats.nmalloc_large,
1158 arena->stats.ndalloc_small + arena->stats.ndalloc_large);
1160 malloc_printf("bins: bin size regs pgs requests newruns"
1161 " reruns maxruns curruns\n");
1162 for (i = 0, gap_start = -1; i < ntbins + nqbins + nsbins; i++) {
1163 if (arena->bins[i].stats.nrequests == 0) {
1164 if (gap_start == -1)
1165 gap_start = i;
1166 } else {
1167 if (gap_start != -1) {
1168 if (i > gap_start + 1) {
1169 /* Gap of more than one size class. */
1170 malloc_printf("[%u..%u]\n",
1171 gap_start, i - 1);
1172 } else {
1173 /* Gap of one size class. */
1174 malloc_printf("[%u]\n", gap_start);
1176 gap_start = -1;
1178 malloc_printf(
1179 "%13u %1s %4u %4u %3u %9llu %9llu"
1180 " %9llu %7lu %7lu\n",
1182 i < ntbins ? "T" : i < ntbins + nqbins ? "Q" : "S",
1183 arena->bins[i].reg_size,
1184 arena->bins[i].nregs,
1185 arena->bins[i].run_size >> pagesize_2pow,
1186 arena->bins[i].stats.nrequests,
1187 arena->bins[i].stats.nruns,
1188 arena->bins[i].stats.reruns,
1189 arena->bins[i].stats.highruns,
1190 arena->bins[i].stats.curruns);
1193 if (gap_start != -1) {
1194 if (i > gap_start + 1) {
1195 /* Gap of more than one size class. */
1196 malloc_printf("[%u..%u]\n", gap_start, i - 1);
1197 } else {
1198 /* Gap of one size class. */
1199 malloc_printf("[%u]\n", gap_start);
1203 #endif
1206 * End Utility functions/macros.
1208 /******************************************************************************/
1210 * Begin chunk management functions.
1213 #ifndef lint
1214 static inline int
1215 chunk_comp(chunk_node_t *a, chunk_node_t *b)
1218 assert(a != NULL);
1219 assert(b != NULL);
1221 if ((uintptr_t)a->chunk < (uintptr_t)b->chunk)
1222 return (-1);
1223 else if (a->chunk == b->chunk)
1224 return (0);
1225 else
1226 return (1);
1229 /* Generate red-black tree code for chunks. */
1230 RB_GENERATE_STATIC(chunk_tree_s, chunk_node_s, link, chunk_comp);
1231 #endif
1233 static void *
1234 pages_map_align(void *addr, size_t size, int align)
1236 void *ret;
1239 * We don't use MAP_FIXED here, because it can cause the *replacement*
1240 * of existing mappings, and we only want to create new mappings.
1242 ret = mmap(addr, size, PROT_READ | PROT_WRITE,
1243 MAP_PRIVATE | MAP_ANON | MAP_ALIGNED(align), -1, 0);
1244 assert(ret != NULL);
1246 if (ret == MAP_FAILED)
1247 ret = NULL;
1248 else if (addr != NULL && ret != addr) {
1250 * We succeeded in mapping memory, but not in the right place.
1252 if (munmap(ret, size) == -1) {
1253 char buf[STRERROR_BUF];
1255 STRERROR_R(errno, buf, sizeof(buf));
1256 _malloc_message(getprogname(),
1257 ": (malloc) Error in munmap(): ", buf, "\n");
1258 if (opt_abort)
1259 abort();
1261 ret = NULL;
1264 assert(ret == NULL || (addr == NULL && ret != addr)
1265 || (addr != NULL && ret == addr));
1266 return (ret);
1269 static void *
1270 pages_map(void *addr, size_t size)
1273 return pages_map_align(addr, size, 0);
1276 static void
1277 pages_unmap(void *addr, size_t size)
1280 if (munmap(addr, size) == -1) {
1281 char buf[STRERROR_BUF];
1283 STRERROR_R(errno, buf, sizeof(buf));
1284 _malloc_message(getprogname(),
1285 ": (malloc) Error in munmap(): ", buf, "\n");
1286 if (opt_abort)
1287 abort();
1291 static void *
1292 chunk_alloc(size_t size)
1294 void *ret, *chunk;
1295 chunk_node_t *tchunk, *delchunk;
1297 assert(size != 0);
1298 assert((size & chunksize_mask) == 0);
1300 malloc_mutex_lock(&chunks_mtx);
1302 if (size == chunksize) {
1304 * Check for address ranges that were previously chunks and try
1305 * to use them.
1308 /* LINTED */
1309 tchunk = RB_MIN(chunk_tree_s, &old_chunks);
1310 while (tchunk != NULL) {
1311 /* Found an address range. Try to recycle it. */
1313 chunk = tchunk->chunk;
1314 delchunk = tchunk;
1315 /* LINTED */
1316 tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
1318 /* Remove delchunk from the tree. */
1319 /* LINTED */
1320 RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
1321 base_chunk_node_dealloc(delchunk);
1323 #ifdef USE_BRK
1324 if ((uintptr_t)chunk >= (uintptr_t)brk_base
1325 && (uintptr_t)chunk < (uintptr_t)brk_max) {
1326 /* Re-use a previously freed brk chunk. */
1327 ret = chunk;
1328 goto RETURN;
1330 #endif
1331 if ((ret = pages_map(chunk, size)) != NULL) {
1332 /* Success. */
1333 goto RETURN;
1339 * Try to over-allocate, but allow the OS to place the allocation
1340 * anywhere. Beware of size_t wrap-around.
1342 if (size + chunksize > size) {
1343 if ((ret = pages_map_align(NULL, size, chunksize_2pow))
1344 != NULL) {
1345 goto RETURN;
1349 #ifdef USE_BRK
1351 * Try to create allocations in brk, in order to make full use of
1352 * limited address space.
1354 if (brk_prev != (void *)-1) {
1355 void *brk_cur;
1356 intptr_t incr;
1359 * The loop is necessary to recover from races with other
1360 * threads that are using brk for something other than malloc.
1362 malloc_mutex_lock(&brk_mtx);
1363 do {
1364 /* Get the current end of brk. */
1365 brk_cur = sbrk(0);
1368 * Calculate how much padding is necessary to
1369 * chunk-align the end of brk.
1371 incr = (intptr_t)size
1372 - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1373 if (incr == (intptr_t)size) {
1374 ret = brk_cur;
1375 } else {
1376 ret = (void *)((intptr_t)brk_cur + incr);
1377 incr += size;
1380 brk_prev = sbrk(incr);
1381 if (brk_prev == brk_cur) {
1382 /* Success. */
1383 malloc_mutex_unlock(&brk_mtx);
1384 brk_max = (void *)((intptr_t)ret + size);
1385 goto RETURN;
1387 } while (brk_prev != (void *)-1);
1388 malloc_mutex_unlock(&brk_mtx);
1390 #endif
1392 /* All strategies for allocation failed. */
1393 ret = NULL;
1394 RETURN:
1395 if (ret != NULL) {
1396 chunk_node_t key;
1398 * Clean out any entries in old_chunks that overlap with the
1399 * memory we just allocated.
1401 key.chunk = ret;
1402 /* LINTED */
1403 tchunk = RB_NFIND(chunk_tree_s, &old_chunks, &key);
1404 while (tchunk != NULL
1405 && (uintptr_t)tchunk->chunk >= (uintptr_t)ret
1406 && (uintptr_t)tchunk->chunk < (uintptr_t)ret + size) {
1407 delchunk = tchunk;
1408 /* LINTED */
1409 tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
1410 /* LINTED */
1411 RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
1412 base_chunk_node_dealloc(delchunk);
1416 #ifdef MALLOC_STATS
1417 if (ret != NULL) {
1418 stats_chunks.nchunks += (size / chunksize);
1419 stats_chunks.curchunks += (size / chunksize);
1421 if (stats_chunks.curchunks > stats_chunks.highchunks)
1422 stats_chunks.highchunks = stats_chunks.curchunks;
1423 #endif
1424 malloc_mutex_unlock(&chunks_mtx);
1426 assert(CHUNK_ADDR2BASE(ret) == ret);
1427 return (ret);
1430 static void
1431 chunk_dealloc(void *chunk, size_t size)
1433 chunk_node_t *node;
1435 assert(chunk != NULL);
1436 assert(CHUNK_ADDR2BASE(chunk) == chunk);
1437 assert(size != 0);
1438 assert((size & chunksize_mask) == 0);
1440 malloc_mutex_lock(&chunks_mtx);
1442 #ifdef USE_BRK
1443 if ((uintptr_t)chunk >= (uintptr_t)brk_base
1444 && (uintptr_t)chunk < (uintptr_t)brk_max) {
1445 void *brk_cur;
1447 malloc_mutex_lock(&brk_mtx);
1448 /* Get the current end of brk. */
1449 brk_cur = sbrk(0);
1452 * Try to shrink the data segment if this chunk is at the end
1453 * of the data segment. The sbrk() call here is subject to a
1454 * race condition with threads that use brk(2) or sbrk(2)
1455 * directly, but the alternative would be to leak memory for
1456 * the sake of poorly designed multi-threaded programs.
1458 if (brk_cur == brk_max
1459 && (void *)((uintptr_t)chunk + size) == brk_max
1460 && sbrk(-(intptr_t)size) == brk_max) {
1461 malloc_mutex_unlock(&brk_mtx);
1462 if (brk_prev == brk_max) {
1463 /* Success. */
1464 brk_prev = (void *)((intptr_t)brk_max
1465 - (intptr_t)size);
1466 brk_max = brk_prev;
1468 } else {
1469 size_t offset;
1471 malloc_mutex_unlock(&brk_mtx);
1472 madvise(chunk, size, MADV_FREE);
1475 * Iteratively create records of each chunk-sized
1476 * memory region that 'chunk' is comprised of, so that
1477 * the address range can be recycled if memory usage
1478 * increases later on.
1480 for (offset = 0; offset < size; offset += chunksize) {
1481 node = base_chunk_node_alloc();
1482 if (node == NULL)
1483 break;
1485 node->chunk = (void *)((uintptr_t)chunk
1486 + (uintptr_t)offset);
1487 node->size = chunksize;
1488 /* LINTED */
1489 RB_INSERT(chunk_tree_s, &old_chunks, node);
1492 } else {
1493 #endif
1494 pages_unmap(chunk, size);
1497 * Make a record of the chunk's address, so that the address
1498 * range can be recycled if memory usage increases later on.
1499 * Don't bother to create entries if (size > chunksize), since
1500 * doing so could cause scalability issues for truly gargantuan
1501 * objects (many gigabytes or larger).
1503 if (size == chunksize) {
1504 node = base_chunk_node_alloc();
1505 if (node != NULL) {
1506 node->chunk = (void *)(uintptr_t)chunk;
1507 node->size = chunksize;
1508 /* LINTED */
1509 RB_INSERT(chunk_tree_s, &old_chunks, node);
1512 #ifdef USE_BRK
1514 #endif
1516 #ifdef MALLOC_STATS
1517 stats_chunks.curchunks -= (size / chunksize);
1518 #endif
1519 malloc_mutex_unlock(&chunks_mtx);
1523 * End chunk management functions.
1525 /******************************************************************************/
1527 * Begin arena.
1531 * Choose an arena based on a per-thread and (optimistically) per-CPU value.
1533 * We maintain at least one block of arenas. Usually there are more.
1534 * The blocks are $ncpu arenas in size. Whole blocks are 'hashed'
1535 * amongst threads. To accomplish this, next_arena advances only in
1536 * ncpu steps.
1538 static __noinline arena_t *
1539 choose_arena_hard(void)
1541 unsigned i, curcpu;
1542 arena_t **map;
1544 /* Initialize the current block of arenas and advance to next. */
1545 malloc_mutex_lock(&arenas_mtx);
1546 assert(next_arena % ncpus == 0);
1547 assert(narenas % ncpus == 0);
1548 map = &arenas[next_arena];
1549 set_arenas_map(map);
1550 for (i = 0; i < ncpus; i++) {
1551 if (arenas[next_arena] == NULL)
1552 arenas_extend(next_arena);
1553 next_arena = (next_arena + 1) % narenas;
1555 malloc_mutex_unlock(&arenas_mtx);
1558 * If we were unable to allocate an arena above, then default to
1559 * the first arena, which is always present.
1561 curcpu = thr_curcpu();
1562 if (map[curcpu] != NULL)
1563 return map[curcpu];
1564 return arenas[0];
1567 static inline arena_t *
1568 choose_arena(void)
1570 unsigned curcpu;
1571 arena_t **map;
1573 map = get_arenas_map();
1574 curcpu = thr_curcpu();
1575 if (__predict_true(map != NULL && map[curcpu] != NULL))
1576 return map[curcpu];
1578 return choose_arena_hard();
1581 #ifndef lint
1582 static inline int
1583 arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
1586 assert(a != NULL);
1587 assert(b != NULL);
1589 if ((uintptr_t)a < (uintptr_t)b)
1590 return (-1);
1591 else if (a == b)
1592 return (0);
1593 else
1594 return (1);
1597 /* Generate red-black tree code for arena chunks. */
1598 RB_GENERATE_STATIC(arena_chunk_tree_s, arena_chunk_s, link, arena_chunk_comp);
1599 #endif
1601 #ifndef lint
1602 static inline int
1603 arena_run_comp(arena_run_t *a, arena_run_t *b)
1606 assert(a != NULL);
1607 assert(b != NULL);
1609 if ((uintptr_t)a < (uintptr_t)b)
1610 return (-1);
1611 else if (a == b)
1612 return (0);
1613 else
1614 return (1);
1617 /* Generate red-black tree code for arena runs. */
1618 RB_GENERATE_STATIC(arena_run_tree_s, arena_run_s, link, arena_run_comp);
1619 #endif
1621 static inline void *
1622 arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
1624 void *ret;
1625 unsigned i, mask, bit, regind;
1627 assert(run->magic == ARENA_RUN_MAGIC);
1628 assert(run->regs_minelm < bin->regs_mask_nelms);
1631 * Move the first check outside the loop, so that run->regs_minelm can
1632 * be updated unconditionally, without the possibility of updating it
1633 * multiple times.
1635 i = run->regs_minelm;
1636 mask = run->regs_mask[i];
1637 if (mask != 0) {
1638 /* Usable allocation found. */
1639 bit = ffs((int)mask) - 1;
1641 regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1642 ret = (void *)(((uintptr_t)run) + bin->reg0_offset
1643 + (bin->reg_size * regind));
1645 /* Clear bit. */
1646 mask ^= (1 << bit);
1647 run->regs_mask[i] = mask;
1649 return (ret);
1652 for (i++; i < bin->regs_mask_nelms; i++) {
1653 mask = run->regs_mask[i];
1654 if (mask != 0) {
1655 /* Usable allocation found. */
1656 bit = ffs((int)mask) - 1;
1658 regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1659 ret = (void *)(((uintptr_t)run) + bin->reg0_offset
1660 + (bin->reg_size * regind));
1662 /* Clear bit. */
1663 mask ^= (1 << bit);
1664 run->regs_mask[i] = mask;
1667 * Make a note that nothing before this element
1668 * contains a free region.
1670 run->regs_minelm = i; /* Low payoff: + (mask == 0); */
1672 return (ret);
1675 /* Not reached. */
1676 /* LINTED */
1677 assert(0);
1678 return (NULL);
1681 static inline void
1682 arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
1685 * To divide by a number D that is not a power of two we multiply
1686 * by (2^21 / D) and then right shift by 21 positions.
1688 * X / D
1690 * becomes
1692 * (X * size_invs[(D >> QUANTUM_2POW_MIN) - 3]) >> SIZE_INV_SHIFT
1694 #define SIZE_INV_SHIFT 21
1695 #define SIZE_INV(s) (((1 << SIZE_INV_SHIFT) / (s << QUANTUM_2POW_MIN)) + 1)
1696 static const unsigned size_invs[] = {
1697 SIZE_INV(3),
1698 SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
1699 SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
1700 SIZE_INV(12),SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
1701 SIZE_INV(16),SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
1702 SIZE_INV(20),SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
1703 SIZE_INV(24),SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
1704 SIZE_INV(28),SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
1705 #if (QUANTUM_2POW_MIN < 4)
1707 SIZE_INV(32), SIZE_INV(33), SIZE_INV(34), SIZE_INV(35),
1708 SIZE_INV(36), SIZE_INV(37), SIZE_INV(38), SIZE_INV(39),
1709 SIZE_INV(40), SIZE_INV(41), SIZE_INV(42), SIZE_INV(43),
1710 SIZE_INV(44), SIZE_INV(45), SIZE_INV(46), SIZE_INV(47),
1711 SIZE_INV(48), SIZE_INV(49), SIZE_INV(50), SIZE_INV(51),
1712 SIZE_INV(52), SIZE_INV(53), SIZE_INV(54), SIZE_INV(55),
1713 SIZE_INV(56), SIZE_INV(57), SIZE_INV(58), SIZE_INV(59),
1714 SIZE_INV(60), SIZE_INV(61), SIZE_INV(62), SIZE_INV(63)
1715 #endif
1717 unsigned diff, regind, elm, bit;
1719 /* LINTED */
1720 assert(run->magic == ARENA_RUN_MAGIC);
1721 assert(((sizeof(size_invs)) / sizeof(unsigned)) + 3
1722 >= (SMALL_MAX_DEFAULT >> QUANTUM_2POW_MIN));
1725 * Avoid doing division with a variable divisor if possible. Using
1726 * actual division here can reduce allocator throughput by over 20%!
1728 diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
1729 if ((size & (size - 1)) == 0) {
1731 * log2_table allows fast division of a power of two in the
1732 * [1..128] range.
1734 * (x / divisor) becomes (x >> log2_table[divisor - 1]).
1736 static const unsigned char log2_table[] = {
1737 0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4,
1738 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
1739 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1740 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
1741 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1742 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1743 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1744 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7
1747 if (size <= 128)
1748 regind = (diff >> log2_table[size - 1]);
1749 else if (size <= 32768)
1750 regind = diff >> (8 + log2_table[(size >> 8) - 1]);
1751 else {
1753 * The page size is too large for us to use the lookup
1754 * table. Use real division.
1756 regind = (unsigned)(diff / size);
1758 } else if (size <= ((sizeof(size_invs) / sizeof(unsigned))
1759 << QUANTUM_2POW_MIN) + 2) {
1760 regind = size_invs[(size >> QUANTUM_2POW_MIN) - 3] * diff;
1761 regind >>= SIZE_INV_SHIFT;
1762 } else {
1764 * size_invs isn't large enough to handle this size class, so
1765 * calculate regind using actual division. This only happens
1766 * if the user increases small_max via the 'S' runtime
1767 * configuration option.
1769 regind = (unsigned)(diff / size);
1771 assert(diff == regind * size);
1772 assert(regind < bin->nregs);
1774 elm = regind >> (SIZEOF_INT_2POW + 3);
1775 if (elm < run->regs_minelm)
1776 run->regs_minelm = elm;
1777 bit = regind - (elm << (SIZEOF_INT_2POW + 3));
1778 assert((run->regs_mask[elm] & (1 << bit)) == 0);
1779 run->regs_mask[elm] |= (1 << bit);
1780 #undef SIZE_INV
1781 #undef SIZE_INV_SHIFT
1784 static void
1785 arena_run_split(arena_t *arena, arena_run_t *run, size_t size)
1787 arena_chunk_t *chunk;
1788 unsigned run_ind, map_offset, total_pages, need_pages, rem_pages;
1789 unsigned i;
1791 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1792 run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1793 >> pagesize_2pow);
1794 total_pages = chunk->map[run_ind].npages;
1795 need_pages = (unsigned)(size >> pagesize_2pow);
1796 assert(need_pages <= total_pages);
1797 rem_pages = total_pages - need_pages;
1799 /* Split enough pages from the front of run to fit allocation size. */
1800 map_offset = run_ind;
1801 for (i = 0; i < need_pages; i++) {
1802 chunk->map[map_offset + i].npages = need_pages;
1803 chunk->map[map_offset + i].pos = i;
1806 /* Keep track of trailing unused pages for later use. */
1807 if (rem_pages > 0) {
1808 /* Update map for trailing pages. */
1809 map_offset += need_pages;
1810 chunk->map[map_offset].npages = rem_pages;
1811 chunk->map[map_offset].pos = POS_FREE;
1812 chunk->map[map_offset + rem_pages - 1].npages = rem_pages;
1813 chunk->map[map_offset + rem_pages - 1].pos = POS_FREE;
1816 chunk->pages_used += need_pages;
1819 static arena_chunk_t *
1820 arena_chunk_alloc(arena_t *arena)
1822 arena_chunk_t *chunk;
1824 if (arena->spare != NULL) {
1825 chunk = arena->spare;
1826 arena->spare = NULL;
1828 /* LINTED */
1829 RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
1830 } else {
1831 chunk = (arena_chunk_t *)chunk_alloc(chunksize);
1832 if (chunk == NULL)
1833 return (NULL);
1834 #ifdef MALLOC_STATS
1835 arena->stats.mapped += chunksize;
1836 #endif
1838 chunk->arena = arena;
1840 /* LINTED */
1841 RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
1844 * Claim that no pages are in use, since the header is merely
1845 * overhead.
1847 chunk->pages_used = 0;
1849 chunk->max_frun_npages = chunk_npages -
1850 arena_chunk_header_npages;
1851 chunk->min_frun_ind = arena_chunk_header_npages;
1854 * Initialize enough of the map to support one maximal free run.
1856 chunk->map[arena_chunk_header_npages].npages = chunk_npages -
1857 arena_chunk_header_npages;
1858 chunk->map[arena_chunk_header_npages].pos = POS_FREE;
1859 chunk->map[chunk_npages - 1].npages = chunk_npages -
1860 arena_chunk_header_npages;
1861 chunk->map[chunk_npages - 1].pos = POS_FREE;
1864 return (chunk);
1867 static void
1868 arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
1872 * Remove chunk from the chunk tree, regardless of whether this chunk
1873 * will be cached, so that the arena does not use it.
1875 /* LINTED */
1876 RB_REMOVE(arena_chunk_tree_s, &chunk->arena->chunks, chunk);
1878 if (opt_hint == false) {
1879 if (arena->spare != NULL) {
1880 chunk_dealloc((void *)arena->spare, chunksize);
1881 #ifdef MALLOC_STATS
1882 arena->stats.mapped -= chunksize;
1883 #endif
1885 arena->spare = chunk;
1886 } else {
1887 assert(arena->spare == NULL);
1888 chunk_dealloc((void *)chunk, chunksize);
1889 #ifdef MALLOC_STATS
1890 arena->stats.mapped -= chunksize;
1891 #endif
1895 static arena_run_t *
1896 arena_run_alloc(arena_t *arena, size_t size)
1898 arena_chunk_t *chunk;
1899 arena_run_t *run;
1900 unsigned need_npages, limit_pages, compl_need_npages;
1902 assert(size <= (chunksize - (arena_chunk_header_npages <<
1903 pagesize_2pow)));
1904 assert((size & pagesize_mask) == 0);
1907 * Search through arena's chunks in address order for a free run that is
1908 * large enough. Look for the first fit.
1910 need_npages = (unsigned)(size >> pagesize_2pow);
1911 limit_pages = chunk_npages - arena_chunk_header_npages;
1912 compl_need_npages = limit_pages - need_npages;
1913 /* LINTED */
1914 RB_FOREACH(chunk, arena_chunk_tree_s, &arena->chunks) {
1916 * Avoid searching this chunk if there are not enough
1917 * contiguous free pages for there to possibly be a large
1918 * enough free run.
1920 if (chunk->pages_used <= compl_need_npages &&
1921 need_npages <= chunk->max_frun_npages) {
1922 arena_chunk_map_t *mapelm;
1923 unsigned i;
1924 unsigned max_frun_npages = 0;
1925 unsigned min_frun_ind = chunk_npages;
1927 assert(chunk->min_frun_ind >=
1928 arena_chunk_header_npages);
1929 for (i = chunk->min_frun_ind; i < chunk_npages;) {
1930 mapelm = &chunk->map[i];
1931 if (mapelm->pos == POS_FREE) {
1932 if (mapelm->npages >= need_npages) {
1933 run = (arena_run_t *)
1934 ((uintptr_t)chunk + (i <<
1935 pagesize_2pow));
1936 /* Update page map. */
1937 arena_run_split(arena, run,
1938 size);
1939 return (run);
1941 if (mapelm->npages >
1942 max_frun_npages) {
1943 max_frun_npages =
1944 mapelm->npages;
1946 if (i < min_frun_ind) {
1947 min_frun_ind = i;
1948 if (i < chunk->min_frun_ind)
1949 chunk->min_frun_ind = i;
1952 i += mapelm->npages;
1955 * Search failure. Reset cached chunk->max_frun_npages.
1956 * chunk->min_frun_ind was already reset above (if
1957 * necessary).
1959 chunk->max_frun_npages = max_frun_npages;
1964 * No usable runs. Create a new chunk from which to allocate the run.
1966 chunk = arena_chunk_alloc(arena);
1967 if (chunk == NULL)
1968 return (NULL);
1969 run = (arena_run_t *)((uintptr_t)chunk + (arena_chunk_header_npages <<
1970 pagesize_2pow));
1971 /* Update page map. */
1972 arena_run_split(arena, run, size);
1973 return (run);
1976 static void
1977 arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size)
1979 arena_chunk_t *chunk;
1980 unsigned run_ind, run_pages;
1982 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1984 run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1985 >> pagesize_2pow);
1986 assert(run_ind >= arena_chunk_header_npages);
1987 assert(run_ind < (chunksize >> pagesize_2pow));
1988 run_pages = (unsigned)(size >> pagesize_2pow);
1989 assert(run_pages == chunk->map[run_ind].npages);
1991 /* Subtract pages from count of pages used in chunk. */
1992 chunk->pages_used -= run_pages;
1994 /* Mark run as deallocated. */
1995 assert(chunk->map[run_ind].npages == run_pages);
1996 chunk->map[run_ind].pos = POS_FREE;
1997 assert(chunk->map[run_ind + run_pages - 1].npages == run_pages);
1998 chunk->map[run_ind + run_pages - 1].pos = POS_FREE;
2001 * Tell the kernel that we don't need the data in this run, but only if
2002 * requested via runtime configuration.
2004 if (opt_hint)
2005 madvise(run, size, MADV_FREE);
2007 /* Try to coalesce with neighboring runs. */
2008 if (run_ind > arena_chunk_header_npages &&
2009 chunk->map[run_ind - 1].pos == POS_FREE) {
2010 unsigned prev_npages;
2012 /* Coalesce with previous run. */
2013 prev_npages = chunk->map[run_ind - 1].npages;
2014 run_ind -= prev_npages;
2015 assert(chunk->map[run_ind].npages == prev_npages);
2016 assert(chunk->map[run_ind].pos == POS_FREE);
2017 run_pages += prev_npages;
2019 chunk->map[run_ind].npages = run_pages;
2020 assert(chunk->map[run_ind].pos == POS_FREE);
2021 chunk->map[run_ind + run_pages - 1].npages = run_pages;
2022 assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2025 if (run_ind + run_pages < chunk_npages &&
2026 chunk->map[run_ind + run_pages].pos == POS_FREE) {
2027 unsigned next_npages;
2029 /* Coalesce with next run. */
2030 next_npages = chunk->map[run_ind + run_pages].npages;
2031 run_pages += next_npages;
2032 assert(chunk->map[run_ind + run_pages - 1].npages ==
2033 next_npages);
2034 assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2036 chunk->map[run_ind].npages = run_pages;
2037 chunk->map[run_ind].pos = POS_FREE;
2038 chunk->map[run_ind + run_pages - 1].npages = run_pages;
2039 assert(chunk->map[run_ind + run_pages - 1].pos == POS_FREE);
2042 if (chunk->map[run_ind].npages > chunk->max_frun_npages)
2043 chunk->max_frun_npages = chunk->map[run_ind].npages;
2044 if (run_ind < chunk->min_frun_ind)
2045 chunk->min_frun_ind = run_ind;
2047 /* Deallocate chunk if it is now completely unused. */
2048 if (chunk->pages_used == 0)
2049 arena_chunk_dealloc(arena, chunk);
2052 static arena_run_t *
2053 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
2055 arena_run_t *run;
2056 unsigned i, remainder;
2058 /* Look for a usable run. */
2059 /* LINTED */
2060 if ((run = RB_MIN(arena_run_tree_s, &bin->runs)) != NULL) {
2061 /* run is guaranteed to have available space. */
2062 /* LINTED */
2063 RB_REMOVE(arena_run_tree_s, &bin->runs, run);
2064 #ifdef MALLOC_STATS
2065 bin->stats.reruns++;
2066 #endif
2067 return (run);
2069 /* No existing runs have any space available. */
2071 /* Allocate a new run. */
2072 run = arena_run_alloc(arena, bin->run_size);
2073 if (run == NULL)
2074 return (NULL);
2076 /* Initialize run internals. */
2077 run->bin = bin;
2079 for (i = 0; i < bin->regs_mask_nelms; i++)
2080 run->regs_mask[i] = UINT_MAX;
2081 remainder = bin->nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1);
2082 if (remainder != 0) {
2083 /* The last element has spare bits that need to be unset. */
2084 run->regs_mask[i] = (UINT_MAX >> ((1 << (SIZEOF_INT_2POW + 3))
2085 - remainder));
2088 run->regs_minelm = 0;
2090 run->nfree = bin->nregs;
2091 #ifdef MALLOC_DEBUG
2092 run->magic = ARENA_RUN_MAGIC;
2093 #endif
2095 #ifdef MALLOC_STATS
2096 bin->stats.nruns++;
2097 bin->stats.curruns++;
2098 if (bin->stats.curruns > bin->stats.highruns)
2099 bin->stats.highruns = bin->stats.curruns;
2100 #endif
2101 return (run);
2104 /* bin->runcur must have space available before this function is called. */
2105 static inline void *
2106 arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
2108 void *ret;
2110 assert(run->magic == ARENA_RUN_MAGIC);
2111 assert(run->nfree > 0);
2113 ret = arena_run_reg_alloc(run, bin);
2114 assert(ret != NULL);
2115 run->nfree--;
2117 return (ret);
2120 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
2121 static void *
2122 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
2125 bin->runcur = arena_bin_nonfull_run_get(arena, bin);
2126 if (bin->runcur == NULL)
2127 return (NULL);
2128 assert(bin->runcur->magic == ARENA_RUN_MAGIC);
2129 assert(bin->runcur->nfree > 0);
2131 return (arena_bin_malloc_easy(arena, bin, bin->runcur));
2135 * Calculate bin->run_size such that it meets the following constraints:
2137 * *) bin->run_size >= min_run_size
2138 * *) bin->run_size <= arena_maxclass
2139 * *) bin->run_size <= RUN_MAX_SMALL
2140 * *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
2142 * bin->nregs, bin->regs_mask_nelms, and bin->reg0_offset are
2143 * also calculated here, since these settings are all interdependent.
2145 static size_t
2146 arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
2148 size_t try_run_size, good_run_size;
2149 unsigned good_nregs, good_mask_nelms, good_reg0_offset;
2150 unsigned try_nregs, try_mask_nelms, try_reg0_offset;
2152 assert(min_run_size >= pagesize);
2153 assert(min_run_size <= arena_maxclass);
2154 assert(min_run_size <= RUN_MAX_SMALL);
2157 * Calculate known-valid settings before entering the run_size
2158 * expansion loop, so that the first part of the loop always copies
2159 * valid settings.
2161 * The do..while loop iteratively reduces the number of regions until
2162 * the run header and the regions no longer overlap. A closed formula
2163 * would be quite messy, since there is an interdependency between the
2164 * header's mask length and the number of regions.
2166 try_run_size = min_run_size;
2167 try_nregs = (unsigned)(((try_run_size - sizeof(arena_run_t)) /
2168 bin->reg_size) + 1); /* Counter-act try_nregs-- in loop. */
2169 do {
2170 try_nregs--;
2171 try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
2172 ((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ? 1 : 0);
2173 try_reg0_offset = (unsigned)(try_run_size -
2174 (try_nregs * bin->reg_size));
2175 } while (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1))
2176 > try_reg0_offset);
2178 /* run_size expansion loop. */
2179 do {
2181 * Copy valid settings before trying more aggressive settings.
2183 good_run_size = try_run_size;
2184 good_nregs = try_nregs;
2185 good_mask_nelms = try_mask_nelms;
2186 good_reg0_offset = try_reg0_offset;
2188 /* Try more aggressive settings. */
2189 try_run_size += pagesize;
2190 try_nregs = (unsigned)(((try_run_size - sizeof(arena_run_t)) /
2191 bin->reg_size) + 1); /* Counter-act try_nregs-- in loop. */
2192 do {
2193 try_nregs--;
2194 try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
2195 ((try_nregs & ((1 << (SIZEOF_INT_2POW + 3)) - 1)) ?
2196 1 : 0);
2197 try_reg0_offset = (unsigned)(try_run_size - (try_nregs *
2198 bin->reg_size));
2199 } while (sizeof(arena_run_t) + (sizeof(unsigned) *
2200 (try_mask_nelms - 1)) > try_reg0_offset);
2201 } while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
2202 && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
2203 && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size);
2205 assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1))
2206 <= good_reg0_offset);
2207 assert((good_mask_nelms << (SIZEOF_INT_2POW + 3)) >= good_nregs);
2209 /* Copy final settings. */
2210 bin->run_size = good_run_size;
2211 bin->nregs = good_nregs;
2212 bin->regs_mask_nelms = good_mask_nelms;
2213 bin->reg0_offset = good_reg0_offset;
2215 return (good_run_size);
2218 static void *
2219 arena_malloc(arena_t *arena, size_t size)
2221 void *ret;
2223 assert(arena != NULL);
2224 assert(arena->magic == ARENA_MAGIC);
2225 assert(size != 0);
2226 assert(QUANTUM_CEILING(size) <= arena_maxclass);
2228 if (size <= bin_maxclass) {
2229 arena_bin_t *bin;
2230 arena_run_t *run;
2232 /* Small allocation. */
2234 if (size < small_min) {
2235 /* Tiny. */
2236 size = pow2_ceil(size);
2237 bin = &arena->bins[ffs((int)(size >> (TINY_MIN_2POW +
2238 1)))];
2239 #if (!defined(NDEBUG) || defined(MALLOC_STATS))
2241 * Bin calculation is always correct, but we may need
2242 * to fix size for the purposes of assertions and/or
2243 * stats accuracy.
2245 if (size < (1 << TINY_MIN_2POW))
2246 size = (1 << TINY_MIN_2POW);
2247 #endif
2248 } else if (size <= small_max) {
2249 /* Quantum-spaced. */
2250 size = QUANTUM_CEILING(size);
2251 bin = &arena->bins[ntbins + (size >> opt_quantum_2pow)
2252 - 1];
2253 } else {
2254 /* Sub-page. */
2255 size = pow2_ceil(size);
2256 bin = &arena->bins[ntbins + nqbins
2257 + (ffs((int)(size >> opt_small_max_2pow)) - 2)];
2259 assert(size == bin->reg_size);
2261 malloc_mutex_lock(&arena->mtx);
2262 if ((run = bin->runcur) != NULL && run->nfree > 0)
2263 ret = arena_bin_malloc_easy(arena, bin, run);
2264 else
2265 ret = arena_bin_malloc_hard(arena, bin);
2267 if (ret == NULL) {
2268 malloc_mutex_unlock(&arena->mtx);
2269 return (NULL);
2272 #ifdef MALLOC_STATS
2273 bin->stats.nrequests++;
2274 arena->stats.nmalloc_small++;
2275 arena->stats.allocated_small += size;
2276 #endif
2277 } else {
2278 /* Large allocation. */
2279 size = PAGE_CEILING(size);
2280 malloc_mutex_lock(&arena->mtx);
2281 ret = (void *)arena_run_alloc(arena, size);
2282 if (ret == NULL) {
2283 malloc_mutex_unlock(&arena->mtx);
2284 return (NULL);
2286 #ifdef MALLOC_STATS
2287 arena->stats.nmalloc_large++;
2288 arena->stats.allocated_large += size;
2289 #endif
2292 malloc_mutex_unlock(&arena->mtx);
2294 if (opt_junk)
2295 memset(ret, 0xa5, size);
2296 else if (opt_zero)
2297 memset(ret, 0, size);
2298 return (ret);
2301 static inline void
2302 arena_palloc_trim(arena_t *arena, arena_chunk_t *chunk, unsigned pageind,
2303 unsigned npages)
2305 unsigned i;
2307 assert(npages > 0);
2310 * Modifiy the map such that arena_run_dalloc() sees the run as
2311 * separately allocated.
2313 for (i = 0; i < npages; i++) {
2314 chunk->map[pageind + i].npages = npages;
2315 chunk->map[pageind + i].pos = i;
2317 arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)chunk + (pageind <<
2318 pagesize_2pow)), npages << pagesize_2pow);
2321 /* Only handles large allocations that require more than page alignment. */
2322 static void *
2323 arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
2325 void *ret;
2326 size_t offset;
2327 arena_chunk_t *chunk;
2328 unsigned pageind, i, npages;
2330 assert((size & pagesize_mask) == 0);
2331 assert((alignment & pagesize_mask) == 0);
2333 npages = (unsigned)(size >> pagesize_2pow);
2335 malloc_mutex_lock(&arena->mtx);
2336 ret = (void *)arena_run_alloc(arena, alloc_size);
2337 if (ret == NULL) {
2338 malloc_mutex_unlock(&arena->mtx);
2339 return (NULL);
2342 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
2344 offset = (uintptr_t)ret & (alignment - 1);
2345 assert((offset & pagesize_mask) == 0);
2346 assert(offset < alloc_size);
2347 if (offset == 0) {
2348 pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk) >>
2349 pagesize_2pow);
2351 /* Update the map for the run to be kept. */
2352 for (i = 0; i < npages; i++) {
2353 chunk->map[pageind + i].npages = npages;
2354 assert(chunk->map[pageind + i].pos == i);
2357 /* Trim trailing space. */
2358 arena_palloc_trim(arena, chunk, pageind + npages,
2359 (unsigned)((alloc_size - size) >> pagesize_2pow));
2360 } else {
2361 size_t leadsize, trailsize;
2363 leadsize = alignment - offset;
2364 ret = (void *)((uintptr_t)ret + leadsize);
2365 pageind = (unsigned)(((uintptr_t)ret - (uintptr_t)chunk) >>
2366 pagesize_2pow);
2368 /* Update the map for the run to be kept. */
2369 for (i = 0; i < npages; i++) {
2370 chunk->map[pageind + i].npages = npages;
2371 chunk->map[pageind + i].pos = i;
2374 /* Trim leading space. */
2375 arena_palloc_trim(arena, chunk,
2376 (unsigned)(pageind - (leadsize >> pagesize_2pow)),
2377 (unsigned)(leadsize >> pagesize_2pow));
2379 trailsize = alloc_size - leadsize - size;
2380 if (trailsize != 0) {
2381 /* Trim trailing space. */
2382 assert(trailsize < alloc_size);
2383 arena_palloc_trim(arena, chunk, pageind + npages,
2384 (unsigned)(trailsize >> pagesize_2pow));
2388 #ifdef MALLOC_STATS
2389 arena->stats.nmalloc_large++;
2390 arena->stats.allocated_large += size;
2391 #endif
2392 malloc_mutex_unlock(&arena->mtx);
2394 if (opt_junk)
2395 memset(ret, 0xa5, size);
2396 else if (opt_zero)
2397 memset(ret, 0, size);
2398 return (ret);
2401 /* Return the size of the allocation pointed to by ptr. */
2402 static size_t
2403 arena_salloc(const void *ptr)
2405 size_t ret;
2406 arena_chunk_t *chunk;
2407 arena_chunk_map_t *mapelm;
2408 unsigned pageind;
2410 assert(ptr != NULL);
2411 assert(CHUNK_ADDR2BASE(ptr) != ptr);
2414 * No arena data structures that we query here can change in a way that
2415 * affects this function, so we don't need to lock.
2417 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2418 pageind = (unsigned)(((uintptr_t)ptr - (uintptr_t)chunk) >>
2419 pagesize_2pow);
2420 mapelm = &chunk->map[pageind];
2421 if (mapelm->pos != 0 || ptr != (char *)((uintptr_t)chunk) + (pageind <<
2422 pagesize_2pow)) {
2423 arena_run_t *run;
2425 pageind -= mapelm->pos;
2427 run = (arena_run_t *)((uintptr_t)chunk + (pageind <<
2428 pagesize_2pow));
2429 assert(run->magic == ARENA_RUN_MAGIC);
2430 ret = run->bin->reg_size;
2431 } else
2432 ret = mapelm->npages << pagesize_2pow;
2434 return (ret);
2437 static void *
2438 arena_ralloc(void *ptr, size_t size, size_t oldsize)
2440 void *ret;
2442 /* Avoid moving the allocation if the size class would not change. */
2443 if (size < small_min) {
2444 if (oldsize < small_min &&
2445 ffs((int)(pow2_ceil(size) >> (TINY_MIN_2POW + 1)))
2446 == ffs((int)(pow2_ceil(oldsize) >> (TINY_MIN_2POW + 1))))
2447 goto IN_PLACE;
2448 } else if (size <= small_max) {
2449 if (oldsize >= small_min && oldsize <= small_max &&
2450 (QUANTUM_CEILING(size) >> opt_quantum_2pow)
2451 == (QUANTUM_CEILING(oldsize) >> opt_quantum_2pow))
2452 goto IN_PLACE;
2453 } else {
2455 * We make no attempt to resize runs here, though it would be
2456 * possible to do so.
2458 if (oldsize > small_max && PAGE_CEILING(size) == oldsize)
2459 goto IN_PLACE;
2463 * If we get here, then size and oldsize are different enough that we
2464 * need to use a different size class. In that case, fall back to
2465 * allocating new space and copying.
2467 ret = arena_malloc(choose_arena(), size);
2468 if (ret == NULL)
2469 return (NULL);
2471 /* Junk/zero-filling were already done by arena_malloc(). */
2472 if (size < oldsize)
2473 memcpy(ret, ptr, size);
2474 else
2475 memcpy(ret, ptr, oldsize);
2476 idalloc(ptr);
2477 return (ret);
2478 IN_PLACE:
2479 if (opt_junk && size < oldsize)
2480 memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size);
2481 else if (opt_zero && size > oldsize)
2482 memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize);
2483 return (ptr);
2486 static void
2487 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
2489 unsigned pageind;
2490 arena_chunk_map_t *mapelm;
2491 size_t size;
2493 assert(arena != NULL);
2494 assert(arena->magic == ARENA_MAGIC);
2495 assert(chunk->arena == arena);
2496 assert(ptr != NULL);
2497 assert(CHUNK_ADDR2BASE(ptr) != ptr);
2499 pageind = (unsigned)(((uintptr_t)ptr - (uintptr_t)chunk) >>
2500 pagesize_2pow);
2501 mapelm = &chunk->map[pageind];
2502 if (mapelm->pos != 0 || ptr != (char *)((uintptr_t)chunk) + (pageind <<
2503 pagesize_2pow)) {
2504 arena_run_t *run;
2505 arena_bin_t *bin;
2507 /* Small allocation. */
2509 pageind -= mapelm->pos;
2511 run = (arena_run_t *)((uintptr_t)chunk + (pageind <<
2512 pagesize_2pow));
2513 assert(run->magic == ARENA_RUN_MAGIC);
2514 bin = run->bin;
2515 size = bin->reg_size;
2517 if (opt_junk)
2518 memset(ptr, 0x5a, size);
2520 malloc_mutex_lock(&arena->mtx);
2521 arena_run_reg_dalloc(run, bin, ptr, size);
2522 run->nfree++;
2524 if (run->nfree == bin->nregs) {
2525 /* Deallocate run. */
2526 if (run == bin->runcur)
2527 bin->runcur = NULL;
2528 else if (bin->nregs != 1) {
2530 * This block's conditional is necessary because
2531 * if the run only contains one region, then it
2532 * never gets inserted into the non-full runs
2533 * tree.
2535 /* LINTED */
2536 RB_REMOVE(arena_run_tree_s, &bin->runs, run);
2538 #ifdef MALLOC_DEBUG
2539 run->magic = 0;
2540 #endif
2541 arena_run_dalloc(arena, run, bin->run_size);
2542 #ifdef MALLOC_STATS
2543 bin->stats.curruns--;
2544 #endif
2545 } else if (run->nfree == 1 && run != bin->runcur) {
2547 * Make sure that bin->runcur always refers to the
2548 * lowest non-full run, if one exists.
2550 if (bin->runcur == NULL)
2551 bin->runcur = run;
2552 else if ((uintptr_t)run < (uintptr_t)bin->runcur) {
2553 /* Switch runcur. */
2554 if (bin->runcur->nfree > 0) {
2555 /* Insert runcur. */
2556 /* LINTED */
2557 RB_INSERT(arena_run_tree_s, &bin->runs,
2558 bin->runcur);
2560 bin->runcur = run;
2561 } else {
2562 /* LINTED */
2563 RB_INSERT(arena_run_tree_s, &bin->runs, run);
2566 #ifdef MALLOC_STATS
2567 arena->stats.allocated_small -= size;
2568 arena->stats.ndalloc_small++;
2569 #endif
2570 } else {
2571 /* Large allocation. */
2573 size = mapelm->npages << pagesize_2pow;
2574 assert((((uintptr_t)ptr) & pagesize_mask) == 0);
2576 if (opt_junk)
2577 memset(ptr, 0x5a, size);
2579 malloc_mutex_lock(&arena->mtx);
2580 arena_run_dalloc(arena, (arena_run_t *)ptr, size);
2581 #ifdef MALLOC_STATS
2582 arena->stats.allocated_large -= size;
2583 arena->stats.ndalloc_large++;
2584 #endif
2587 malloc_mutex_unlock(&arena->mtx);
2590 static bool
2591 arena_new(arena_t *arena)
2593 unsigned i;
2594 arena_bin_t *bin;
2595 size_t prev_run_size;
2597 malloc_mutex_init(&arena->mtx);
2599 #ifdef MALLOC_STATS
2600 memset(&arena->stats, 0, sizeof(arena_stats_t));
2601 #endif
2603 /* Initialize chunks. */
2604 RB_INIT(&arena->chunks);
2605 arena->spare = NULL;
2607 /* Initialize bins. */
2608 prev_run_size = pagesize;
2610 /* (2^n)-spaced tiny bins. */
2611 for (i = 0; i < ntbins; i++) {
2612 bin = &arena->bins[i];
2613 bin->runcur = NULL;
2614 RB_INIT(&bin->runs);
2616 bin->reg_size = (1 << (TINY_MIN_2POW + i));
2617 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2619 #ifdef MALLOC_STATS
2620 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2621 #endif
2624 /* Quantum-spaced bins. */
2625 for (; i < ntbins + nqbins; i++) {
2626 bin = &arena->bins[i];
2627 bin->runcur = NULL;
2628 RB_INIT(&bin->runs);
2630 bin->reg_size = quantum * (i - ntbins + 1);
2632 pow2_size = pow2_ceil(quantum * (i - ntbins + 1));
2634 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2636 #ifdef MALLOC_STATS
2637 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2638 #endif
2641 /* (2^n)-spaced sub-page bins. */
2642 for (; i < ntbins + nqbins + nsbins; i++) {
2643 bin = &arena->bins[i];
2644 bin->runcur = NULL;
2645 RB_INIT(&bin->runs);
2647 bin->reg_size = (small_max << (i - (ntbins + nqbins) + 1));
2649 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
2651 #ifdef MALLOC_STATS
2652 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2653 #endif
2656 #ifdef MALLOC_DEBUG
2657 arena->magic = ARENA_MAGIC;
2658 #endif
2660 return (false);
2663 /* Create a new arena and insert it into the arenas array at index ind. */
2664 static arena_t *
2665 arenas_extend(unsigned ind)
2667 arena_t *ret;
2669 /* Allocate enough space for trailing bins. */
2670 ret = (arena_t *)base_alloc(sizeof(arena_t)
2671 + (sizeof(arena_bin_t) * (ntbins + nqbins + nsbins - 1)));
2672 if (ret != NULL && arena_new(ret) == false) {
2673 arenas[ind] = ret;
2674 return (ret);
2676 /* Only reached if there is an OOM error. */
2679 * OOM here is quite inconvenient to propagate, since dealing with it
2680 * would require a check for failure in the fast path. Instead, punt
2681 * by using arenas[0]. In practice, this is an extremely unlikely
2682 * failure.
2684 _malloc_message(getprogname(),
2685 ": (malloc) Error initializing arena\n", "", "");
2686 if (opt_abort)
2687 abort();
2689 return (arenas[0]);
2693 * End arena.
2695 /******************************************************************************/
2697 * Begin general internal functions.
2700 static void *
2701 huge_malloc(size_t size)
2703 void *ret;
2704 size_t csize;
2705 chunk_node_t *node;
2707 /* Allocate one or more contiguous chunks for this request. */
2709 csize = CHUNK_CEILING(size);
2710 if (csize == 0) {
2711 /* size is large enough to cause size_t wrap-around. */
2712 return (NULL);
2715 /* Allocate a chunk node with which to track the chunk. */
2716 node = base_chunk_node_alloc();
2717 if (node == NULL)
2718 return (NULL);
2720 ret = chunk_alloc(csize);
2721 if (ret == NULL) {
2722 base_chunk_node_dealloc(node);
2723 return (NULL);
2726 /* Insert node into huge. */
2727 node->chunk = ret;
2728 node->size = csize;
2730 malloc_mutex_lock(&chunks_mtx);
2731 RB_INSERT(chunk_tree_s, &huge, node);
2732 #ifdef MALLOC_STATS
2733 huge_nmalloc++;
2734 huge_allocated += csize;
2735 #endif
2736 malloc_mutex_unlock(&chunks_mtx);
2738 if (opt_junk)
2739 memset(ret, 0xa5, csize);
2740 else if (opt_zero)
2741 memset(ret, 0, csize);
2743 return (ret);
2746 /* Only handles large allocations that require more than chunk alignment. */
2747 static void *
2748 huge_palloc(size_t alignment, size_t size)
2750 void *ret;
2751 size_t alloc_size, chunk_size, offset;
2752 chunk_node_t *node;
2755 * This allocation requires alignment that is even larger than chunk
2756 * alignment. This means that huge_malloc() isn't good enough.
2758 * Allocate almost twice as many chunks as are demanded by the size or
2759 * alignment, in order to assure the alignment can be achieved, then
2760 * unmap leading and trailing chunks.
2762 assert(alignment >= chunksize);
2764 chunk_size = CHUNK_CEILING(size);
2766 if (size >= alignment)
2767 alloc_size = chunk_size + alignment - chunksize;
2768 else
2769 alloc_size = (alignment << 1) - chunksize;
2771 /* Allocate a chunk node with which to track the chunk. */
2772 node = base_chunk_node_alloc();
2773 if (node == NULL)
2774 return (NULL);
2776 ret = chunk_alloc(alloc_size);
2777 if (ret == NULL) {
2778 base_chunk_node_dealloc(node);
2779 return (NULL);
2782 offset = (uintptr_t)ret & (alignment - 1);
2783 assert((offset & chunksize_mask) == 0);
2784 assert(offset < alloc_size);
2785 if (offset == 0) {
2786 /* Trim trailing space. */
2787 chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
2788 - chunk_size);
2789 } else {
2790 size_t trailsize;
2792 /* Trim leading space. */
2793 chunk_dealloc(ret, alignment - offset);
2795 ret = (void *)((uintptr_t)ret + (alignment - offset));
2797 trailsize = alloc_size - (alignment - offset) - chunk_size;
2798 if (trailsize != 0) {
2799 /* Trim trailing space. */
2800 assert(trailsize < alloc_size);
2801 chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
2802 trailsize);
2806 /* Insert node into huge. */
2807 node->chunk = ret;
2808 node->size = chunk_size;
2810 malloc_mutex_lock(&chunks_mtx);
2811 RB_INSERT(chunk_tree_s, &huge, node);
2812 #ifdef MALLOC_STATS
2813 huge_nmalloc++;
2814 huge_allocated += chunk_size;
2815 #endif
2816 malloc_mutex_unlock(&chunks_mtx);
2818 if (opt_junk)
2819 memset(ret, 0xa5, chunk_size);
2820 else if (opt_zero)
2821 memset(ret, 0, chunk_size);
2823 return (ret);
2826 static void *
2827 huge_ralloc(void *ptr, size_t size, size_t oldsize)
2829 void *ret;
2831 /* Avoid moving the allocation if the size class would not change. */
2832 if (oldsize > arena_maxclass &&
2833 CHUNK_CEILING(size) == CHUNK_CEILING(oldsize)) {
2834 if (opt_junk && size < oldsize) {
2835 memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize
2836 - size);
2837 } else if (opt_zero && size > oldsize) {
2838 memset((void *)((uintptr_t)ptr + oldsize), 0, size
2839 - oldsize);
2841 return (ptr);
2844 if (CHUNK_ADDR2BASE(ptr) == ptr
2845 #ifdef USE_BRK
2846 && ((uintptr_t)ptr < (uintptr_t)brk_base
2847 || (uintptr_t)ptr >= (uintptr_t)brk_max)
2848 #endif
2850 chunk_node_t *node, key;
2851 void *newptr;
2852 size_t oldcsize;
2853 size_t newcsize;
2855 newcsize = CHUNK_CEILING(size);
2856 oldcsize = CHUNK_CEILING(oldsize);
2857 assert(oldcsize != newcsize);
2858 if (newcsize == 0) {
2859 /* size_t wrap-around */
2860 return (NULL);
2864 * Remove the old region from the tree now. If mremap()
2865 * returns the region to the system, other thread may
2866 * map it for same huge allocation and insert it to the
2867 * tree before we acquire the mutex lock again.
2869 malloc_mutex_lock(&chunks_mtx);
2870 key.chunk = __DECONST(void *, ptr);
2871 /* LINTED */
2872 node = RB_FIND(chunk_tree_s, &huge, &key);
2873 assert(node != NULL);
2874 assert(node->chunk == ptr);
2875 assert(node->size == oldcsize);
2876 RB_REMOVE(chunk_tree_s, &huge, node);
2877 malloc_mutex_unlock(&chunks_mtx);
2879 newptr = mremap(ptr, oldcsize, NULL, newcsize,
2880 MAP_ALIGNED(chunksize_2pow));
2881 if (newptr == MAP_FAILED) {
2882 /* We still own the old region. */
2883 malloc_mutex_lock(&chunks_mtx);
2884 RB_INSERT(chunk_tree_s, &huge, node);
2885 malloc_mutex_unlock(&chunks_mtx);
2886 } else {
2887 assert(CHUNK_ADDR2BASE(newptr) == newptr);
2889 /* Insert new or resized old region. */
2890 malloc_mutex_lock(&chunks_mtx);
2891 node->size = newcsize;
2892 node->chunk = newptr;
2893 RB_INSERT(chunk_tree_s, &huge, node);
2894 #ifdef MALLOC_STATS
2895 huge_nralloc++;
2896 huge_allocated += newcsize - oldcsize;
2897 if (newcsize > oldcsize) {
2898 stats_chunks.curchunks +=
2899 (newcsize - oldcsize) / chunksize;
2900 if (stats_chunks.curchunks >
2901 stats_chunks.highchunks)
2902 stats_chunks.highchunks =
2903 stats_chunks.curchunks;
2904 } else {
2905 stats_chunks.curchunks -=
2906 (oldcsize - newcsize) / chunksize;
2908 #endif
2909 malloc_mutex_unlock(&chunks_mtx);
2911 if (opt_junk && size < oldsize) {
2912 memset((void *)((uintptr_t)newptr + size), 0x5a,
2913 newcsize - size);
2914 } else if (opt_zero && size > oldsize) {
2915 memset((void *)((uintptr_t)newptr + oldsize), 0,
2916 size - oldsize);
2918 return (newptr);
2923 * If we get here, then size and oldsize are different enough that we
2924 * need to use a different size class. In that case, fall back to
2925 * allocating new space and copying.
2927 ret = huge_malloc(size);
2928 if (ret == NULL)
2929 return (NULL);
2931 if (CHUNK_ADDR2BASE(ptr) == ptr) {
2932 /* The old allocation is a chunk. */
2933 if (size < oldsize)
2934 memcpy(ret, ptr, size);
2935 else
2936 memcpy(ret, ptr, oldsize);
2937 } else {
2938 /* The old allocation is a region. */
2939 assert(oldsize < size);
2940 memcpy(ret, ptr, oldsize);
2942 idalloc(ptr);
2943 return (ret);
2946 static void
2947 huge_dalloc(void *ptr)
2949 chunk_node_t key;
2950 chunk_node_t *node;
2952 malloc_mutex_lock(&chunks_mtx);
2954 /* Extract from tree of huge allocations. */
2955 key.chunk = ptr;
2956 /* LINTED */
2957 node = RB_FIND(chunk_tree_s, &huge, &key);
2958 assert(node != NULL);
2959 assert(node->chunk == ptr);
2960 /* LINTED */
2961 RB_REMOVE(chunk_tree_s, &huge, node);
2963 #ifdef MALLOC_STATS
2964 huge_ndalloc++;
2965 huge_allocated -= node->size;
2966 #endif
2968 malloc_mutex_unlock(&chunks_mtx);
2970 /* Unmap chunk. */
2971 #ifdef USE_BRK
2972 if (opt_junk)
2973 memset(node->chunk, 0x5a, node->size);
2974 #endif
2975 chunk_dealloc(node->chunk, node->size);
2977 base_chunk_node_dealloc(node);
2980 static void *
2981 imalloc(size_t size)
2983 void *ret;
2985 assert(size != 0);
2987 if (size <= arena_maxclass)
2988 ret = arena_malloc(choose_arena(), size);
2989 else
2990 ret = huge_malloc(size);
2992 return (ret);
2995 static void *
2996 ipalloc(size_t alignment, size_t size)
2998 void *ret;
2999 size_t ceil_size;
3002 * Round size up to the nearest multiple of alignment.
3004 * This done, we can take advantage of the fact that for each small
3005 * size class, every object is aligned at the smallest power of two
3006 * that is non-zero in the base two representation of the size. For
3007 * example:
3009 * Size | Base 2 | Minimum alignment
3010 * -----+----------+------------------
3011 * 96 | 1100000 | 32
3012 * 144 | 10100000 | 32
3013 * 192 | 11000000 | 64
3015 * Depending on runtime settings, it is possible that arena_malloc()
3016 * will further round up to a power of two, but that never causes
3017 * correctness issues.
3019 ceil_size = (size + (alignment - 1)) & (-alignment);
3021 * (ceil_size < size) protects against the combination of maximal
3022 * alignment and size greater than maximal alignment.
3024 if (ceil_size < size) {
3025 /* size_t overflow. */
3026 return (NULL);
3029 if (ceil_size <= pagesize || (alignment <= pagesize
3030 && ceil_size <= arena_maxclass))
3031 ret = arena_malloc(choose_arena(), ceil_size);
3032 else {
3033 size_t run_size;
3036 * We can't achieve sub-page alignment, so round up alignment
3037 * permanently; it makes later calculations simpler.
3039 alignment = PAGE_CEILING(alignment);
3040 ceil_size = PAGE_CEILING(size);
3042 * (ceil_size < size) protects against very large sizes within
3043 * pagesize of SIZE_T_MAX.
3045 * (ceil_size + alignment < ceil_size) protects against the
3046 * combination of maximal alignment and ceil_size large enough
3047 * to cause overflow. This is similar to the first overflow
3048 * check above, but it needs to be repeated due to the new
3049 * ceil_size value, which may now be *equal* to maximal
3050 * alignment, whereas before we only detected overflow if the
3051 * original size was *greater* than maximal alignment.
3053 if (ceil_size < size || ceil_size + alignment < ceil_size) {
3054 /* size_t overflow. */
3055 return (NULL);
3059 * Calculate the size of the over-size run that arena_palloc()
3060 * would need to allocate in order to guarantee the alignment.
3062 if (ceil_size >= alignment)
3063 run_size = ceil_size + alignment - pagesize;
3064 else {
3066 * It is possible that (alignment << 1) will cause
3067 * overflow, but it doesn't matter because we also
3068 * subtract pagesize, which in the case of overflow
3069 * leaves us with a very large run_size. That causes
3070 * the first conditional below to fail, which means
3071 * that the bogus run_size value never gets used for
3072 * anything important.
3074 run_size = (alignment << 1) - pagesize;
3077 if (run_size <= arena_maxclass) {
3078 ret = arena_palloc(choose_arena(), alignment, ceil_size,
3079 run_size);
3080 } else if (alignment <= chunksize)
3081 ret = huge_malloc(ceil_size);
3082 else
3083 ret = huge_palloc(alignment, ceil_size);
3086 assert(((uintptr_t)ret & (alignment - 1)) == 0);
3087 return (ret);
3090 static void *
3091 icalloc(size_t size)
3093 void *ret;
3095 if (size <= arena_maxclass) {
3096 ret = arena_malloc(choose_arena(), size);
3097 if (ret == NULL)
3098 return (NULL);
3099 memset(ret, 0, size);
3100 } else {
3102 * The virtual memory system provides zero-filled pages, so
3103 * there is no need to do so manually, unless opt_junk is
3104 * enabled, in which case huge_malloc() fills huge allocations
3105 * with junk.
3107 ret = huge_malloc(size);
3108 if (ret == NULL)
3109 return (NULL);
3111 if (opt_junk)
3112 memset(ret, 0, size);
3113 #ifdef USE_BRK
3114 else if ((uintptr_t)ret >= (uintptr_t)brk_base
3115 && (uintptr_t)ret < (uintptr_t)brk_max) {
3117 * This may be a re-used brk chunk. Therefore, zero
3118 * the memory.
3120 memset(ret, 0, size);
3122 #endif
3125 return (ret);
3128 static size_t
3129 isalloc(const void *ptr)
3131 size_t ret;
3132 arena_chunk_t *chunk;
3134 assert(ptr != NULL);
3136 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3137 if (chunk != ptr) {
3138 /* Region. */
3139 assert(chunk->arena->magic == ARENA_MAGIC);
3141 ret = arena_salloc(ptr);
3142 } else {
3143 chunk_node_t *node, key;
3145 /* Chunk (huge allocation). */
3147 malloc_mutex_lock(&chunks_mtx);
3149 /* Extract from tree of huge allocations. */
3150 key.chunk = __DECONST(void *, ptr);
3151 /* LINTED */
3152 node = RB_FIND(chunk_tree_s, &huge, &key);
3153 assert(node != NULL);
3155 ret = node->size;
3157 malloc_mutex_unlock(&chunks_mtx);
3160 return (ret);
3163 static void *
3164 iralloc(void *ptr, size_t size)
3166 void *ret;
3167 size_t oldsize;
3169 assert(ptr != NULL);
3170 assert(size != 0);
3172 oldsize = isalloc(ptr);
3174 if (size <= arena_maxclass)
3175 ret = arena_ralloc(ptr, size, oldsize);
3176 else
3177 ret = huge_ralloc(ptr, size, oldsize);
3179 return (ret);
3182 static void
3183 idalloc(void *ptr)
3185 arena_chunk_t *chunk;
3187 assert(ptr != NULL);
3189 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3190 if (chunk != ptr) {
3191 /* Region. */
3192 arena_dalloc(chunk->arena, chunk, ptr);
3193 } else
3194 huge_dalloc(ptr);
3197 static void
3198 malloc_print_stats(void)
3201 if (opt_print_stats) {
3202 char s[UMAX2S_BUFSIZE];
3203 _malloc_message("___ Begin malloc statistics ___\n", "", "",
3204 "");
3205 _malloc_message("Assertions ",
3206 #ifdef NDEBUG
3207 "disabled",
3208 #else
3209 "enabled",
3210 #endif
3211 "\n", "");
3212 _malloc_message("Boolean MALLOC_OPTIONS: ",
3213 opt_abort ? "A" : "a",
3214 opt_junk ? "J" : "j",
3215 opt_hint ? "H" : "h");
3216 _malloc_message(opt_utrace ? "PU" : "Pu",
3217 opt_sysv ? "V" : "v",
3218 opt_xmalloc ? "X" : "x",
3219 opt_zero ? "Z\n" : "z\n");
3221 _malloc_message("CPUs: ", size_t2s(ncpus, s), "\n", "");
3222 _malloc_message("Max arenas: ", size_t2s(narenas, s), "\n", "");
3223 _malloc_message("Pointer size: ", size_t2s(sizeof(void *), s),
3224 "\n", "");
3225 _malloc_message("Quantum size: ", size_t2s(quantum, s), "\n", "");
3226 _malloc_message("Max small size: ", size_t2s(small_max, s), "\n",
3227 "");
3229 _malloc_message("Chunk size: ", size_t2s(chunksize, s), "", "");
3230 _malloc_message(" (2^", size_t2s((size_t)opt_chunk_2pow, s),
3231 ")\n", "");
3233 #ifdef MALLOC_STATS
3235 size_t allocated, mapped;
3236 unsigned i;
3237 arena_t *arena;
3239 /* Calculate and print allocated/mapped stats. */
3241 /* arenas. */
3242 for (i = 0, allocated = 0; i < narenas; i++) {
3243 if (arenas[i] != NULL) {
3244 malloc_mutex_lock(&arenas[i]->mtx);
3245 allocated +=
3246 arenas[i]->stats.allocated_small;
3247 allocated +=
3248 arenas[i]->stats.allocated_large;
3249 malloc_mutex_unlock(&arenas[i]->mtx);
3253 /* huge/base. */
3254 malloc_mutex_lock(&chunks_mtx);
3255 allocated += huge_allocated;
3256 mapped = stats_chunks.curchunks * chunksize;
3257 malloc_mutex_unlock(&chunks_mtx);
3259 malloc_mutex_lock(&base_mtx);
3260 mapped += base_mapped;
3261 malloc_mutex_unlock(&base_mtx);
3263 malloc_printf("Allocated: %zu, mapped: %zu\n",
3264 allocated, mapped);
3266 /* Print chunk stats. */
3268 chunk_stats_t chunks_stats;
3270 malloc_mutex_lock(&chunks_mtx);
3271 chunks_stats = stats_chunks;
3272 malloc_mutex_unlock(&chunks_mtx);
3274 malloc_printf("chunks: nchunks "
3275 "highchunks curchunks\n");
3276 malloc_printf(" %13llu%13lu%13lu\n",
3277 chunks_stats.nchunks,
3278 chunks_stats.highchunks,
3279 chunks_stats.curchunks);
3282 /* Print chunk stats. */
3283 malloc_printf(
3284 "huge: nmalloc ndalloc "
3285 "nralloc allocated\n");
3286 malloc_printf(" %12llu %12llu %12llu %12zu\n",
3287 huge_nmalloc, huge_ndalloc, huge_nralloc,
3288 huge_allocated);
3290 /* Print stats for each arena. */
3291 for (i = 0; i < narenas; i++) {
3292 arena = arenas[i];
3293 if (arena != NULL) {
3294 malloc_printf(
3295 "\narenas[%u] @ %p\n", i, arena);
3296 malloc_mutex_lock(&arena->mtx);
3297 stats_print(arena);
3298 malloc_mutex_unlock(&arena->mtx);
3302 #endif /* #ifdef MALLOC_STATS */
3303 _malloc_message("--- End malloc statistics ---\n", "", "", "");
3308 * FreeBSD's pthreads implementation calls malloc(3), so the malloc
3309 * implementation has to take pains to avoid infinite recursion during
3310 * initialization.
3312 static inline bool
3313 malloc_init(void)
3316 if (malloc_initialized == false)
3317 return (malloc_init_hard());
3319 return (false);
3322 static bool
3323 malloc_init_hard(void)
3325 unsigned i, j;
3326 ssize_t linklen;
3327 char buf[PATH_MAX + 1];
3328 const char *opts = "";
3329 int serrno;
3331 malloc_mutex_lock(&init_lock);
3332 if (malloc_initialized) {
3334 * Another thread initialized the allocator before this one
3335 * acquired init_lock.
3337 malloc_mutex_unlock(&init_lock);
3338 return (false);
3341 serrno = errno;
3342 /* Get number of CPUs. */
3344 int mib[2];
3345 size_t len;
3347 mib[0] = CTL_HW;
3348 mib[1] = HW_NCPU;
3349 len = sizeof(ncpus);
3350 if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) == -1) {
3351 /* Error. */
3352 ncpus = 1;
3356 /* Get page size. */
3358 long result;
3360 result = sysconf(_SC_PAGESIZE);
3361 assert(result != -1);
3362 pagesize = (unsigned) result;
3365 * We assume that pagesize is a power of 2 when calculating
3366 * pagesize_mask and pagesize_2pow.
3368 assert(((result - 1) & result) == 0);
3369 pagesize_mask = result - 1;
3370 pagesize_2pow = ffs((int)result) - 1;
3373 for (i = 0; i < 3; i++) {
3374 /* Get runtime configuration. */
3375 switch (i) {
3376 case 0:
3377 if ((linklen = readlink("/etc/malloc.conf", buf,
3378 sizeof(buf) - 1)) != -1) {
3380 * Use the contents of the "/etc/malloc.conf"
3381 * symbolic link's name.
3383 buf[linklen] = '\0';
3384 opts = buf;
3385 } else {
3386 /* No configuration specified. */
3387 buf[0] = '\0';
3388 opts = buf;
3390 break;
3391 case 1:
3392 if ((opts = getenv("MALLOC_OPTIONS")) != NULL &&
3393 issetugid() == 0) {
3395 * Do nothing; opts is already initialized to
3396 * the value of the MALLOC_OPTIONS environment
3397 * variable.
3399 } else {
3400 /* No configuration specified. */
3401 buf[0] = '\0';
3402 opts = buf;
3404 break;
3405 case 2:
3406 if (_malloc_options != NULL) {
3408 * Use options that were compiled into the program.
3410 opts = _malloc_options;
3411 } else {
3412 /* No configuration specified. */
3413 buf[0] = '\0';
3414 opts = buf;
3416 break;
3417 default:
3418 /* NOTREACHED */
3419 /* LINTED */
3420 assert(false);
3423 for (j = 0; opts[j] != '\0'; j++) {
3424 switch (opts[j]) {
3425 case 'a':
3426 opt_abort = false;
3427 break;
3428 case 'A':
3429 opt_abort = true;
3430 break;
3431 case 'h':
3432 opt_hint = false;
3433 break;
3434 case 'H':
3435 opt_hint = true;
3436 break;
3437 case 'j':
3438 opt_junk = false;
3439 break;
3440 case 'J':
3441 opt_junk = true;
3442 break;
3443 case 'k':
3445 * Chunks always require at least one header
3446 * page, so chunks can never be smaller than
3447 * two pages.
3449 if (opt_chunk_2pow > pagesize_2pow + 1)
3450 opt_chunk_2pow--;
3451 break;
3452 case 'K':
3453 if (opt_chunk_2pow + 1 <
3454 (int)(sizeof(size_t) << 3))
3455 opt_chunk_2pow++;
3456 break;
3457 case 'n':
3458 opt_narenas_lshift--;
3459 break;
3460 case 'N':
3461 opt_narenas_lshift++;
3462 break;
3463 case 'p':
3464 opt_print_stats = false;
3465 break;
3466 case 'P':
3467 opt_print_stats = true;
3468 break;
3469 case 'q':
3470 if (opt_quantum_2pow > QUANTUM_2POW_MIN)
3471 opt_quantum_2pow--;
3472 break;
3473 case 'Q':
3474 if (opt_quantum_2pow < pagesize_2pow - 1)
3475 opt_quantum_2pow++;
3476 break;
3477 case 's':
3478 if (opt_small_max_2pow > QUANTUM_2POW_MIN)
3479 opt_small_max_2pow--;
3480 break;
3481 case 'S':
3482 if (opt_small_max_2pow < pagesize_2pow - 1)
3483 opt_small_max_2pow++;
3484 break;
3485 case 'u':
3486 opt_utrace = false;
3487 break;
3488 case 'U':
3489 opt_utrace = true;
3490 break;
3491 case 'v':
3492 opt_sysv = false;
3493 break;
3494 case 'V':
3495 opt_sysv = true;
3496 break;
3497 case 'x':
3498 opt_xmalloc = false;
3499 break;
3500 case 'X':
3501 opt_xmalloc = true;
3502 break;
3503 case 'z':
3504 opt_zero = false;
3505 break;
3506 case 'Z':
3507 opt_zero = true;
3508 break;
3509 default: {
3510 char cbuf[2];
3512 cbuf[0] = opts[j];
3513 cbuf[1] = '\0';
3514 _malloc_message(getprogname(),
3515 ": (malloc) Unsupported character in "
3516 "malloc options: '", cbuf, "'\n");
3521 errno = serrno;
3523 /* Take care to call atexit() only once. */
3524 if (opt_print_stats) {
3525 /* Print statistics at exit. */
3526 atexit(malloc_print_stats);
3529 /* Set variables according to the value of opt_small_max_2pow. */
3530 if (opt_small_max_2pow < opt_quantum_2pow)
3531 opt_small_max_2pow = opt_quantum_2pow;
3532 small_max = (1 << opt_small_max_2pow);
3534 /* Set bin-related variables. */
3535 bin_maxclass = (pagesize >> 1);
3536 assert(opt_quantum_2pow >= TINY_MIN_2POW);
3537 ntbins = (unsigned)(opt_quantum_2pow - TINY_MIN_2POW);
3538 assert(ntbins <= opt_quantum_2pow);
3539 nqbins = (unsigned)(small_max >> opt_quantum_2pow);
3540 nsbins = (unsigned)(pagesize_2pow - opt_small_max_2pow - 1);
3542 /* Set variables according to the value of opt_quantum_2pow. */
3543 quantum = (1 << opt_quantum_2pow);
3544 quantum_mask = quantum - 1;
3545 if (ntbins > 0)
3546 small_min = (quantum >> 1) + 1;
3547 else
3548 small_min = 1;
3549 assert(small_min <= quantum);
3551 /* Set variables according to the value of opt_chunk_2pow. */
3552 chunksize = (1LU << opt_chunk_2pow);
3553 chunksize_mask = chunksize - 1;
3554 chunksize_2pow = (unsigned)opt_chunk_2pow;
3555 chunk_npages = (unsigned)(chunksize >> pagesize_2pow);
3557 unsigned header_size;
3559 header_size = (unsigned)(sizeof(arena_chunk_t) +
3560 (sizeof(arena_chunk_map_t) * (chunk_npages - 1)));
3561 arena_chunk_header_npages = (header_size >> pagesize_2pow);
3562 if ((header_size & pagesize_mask) != 0)
3563 arena_chunk_header_npages++;
3565 arena_maxclass = chunksize - (arena_chunk_header_npages <<
3566 pagesize_2pow);
3568 UTRACE(0, 0, 0);
3570 #ifdef MALLOC_STATS
3571 memset(&stats_chunks, 0, sizeof(chunk_stats_t));
3572 #endif
3574 /* Various sanity checks that regard configuration. */
3575 assert(quantum >= sizeof(void *));
3576 assert(quantum <= pagesize);
3577 assert(chunksize >= pagesize);
3578 assert(quantum * 4 <= chunksize);
3580 /* Initialize chunks data. */
3581 malloc_mutex_init(&chunks_mtx);
3582 RB_INIT(&huge);
3583 #ifdef USE_BRK
3584 malloc_mutex_init(&brk_mtx);
3585 brk_base = sbrk(0);
3586 brk_prev = brk_base;
3587 brk_max = brk_base;
3588 #endif
3589 #ifdef MALLOC_STATS
3590 huge_nmalloc = 0;
3591 huge_ndalloc = 0;
3592 huge_nralloc = 0;
3593 huge_allocated = 0;
3594 #endif
3595 RB_INIT(&old_chunks);
3597 /* Initialize base allocation data structures. */
3598 #ifdef MALLOC_STATS
3599 base_mapped = 0;
3600 #endif
3601 #ifdef USE_BRK
3603 * Allocate a base chunk here, since it doesn't actually have to be
3604 * chunk-aligned. Doing this before allocating any other chunks allows
3605 * the use of space that would otherwise be wasted.
3607 base_pages_alloc(0);
3608 #endif
3609 base_chunk_nodes = NULL;
3610 malloc_mutex_init(&base_mtx);
3612 if (ncpus > 1) {
3614 * For SMP systems, create four times as many arenas as there
3615 * are CPUs by default.
3617 opt_narenas_lshift += 2;
3620 #ifdef NO_TLS
3621 /* Initialize arena key. */
3622 (void)thr_keycreate(&arenas_map_key, NULL);
3623 #endif
3625 /* Determine how many arenas to use. */
3626 narenas = ncpus;
3627 if (opt_narenas_lshift > 0) {
3628 if ((narenas << opt_narenas_lshift) > narenas)
3629 narenas <<= opt_narenas_lshift;
3631 * Make sure not to exceed the limits of what base_malloc()
3632 * can handle.
3634 if (narenas * sizeof(arena_t *) > chunksize)
3635 narenas = (unsigned)(chunksize / sizeof(arena_t *));
3636 } else if (opt_narenas_lshift < 0) {
3637 if ((narenas << opt_narenas_lshift) < narenas)
3638 narenas <<= opt_narenas_lshift;
3639 /* Make sure there is at least one arena. */
3640 if (narenas == 0)
3641 narenas = 1;
3644 next_arena = 0;
3646 /* Allocate and initialize arenas. */
3647 arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
3648 if (arenas == NULL) {
3649 malloc_mutex_unlock(&init_lock);
3650 return (true);
3653 * Zero the array. In practice, this should always be pre-zeroed,
3654 * since it was just mmap()ed, but let's be sure.
3656 memset(arenas, 0, sizeof(arena_t *) * narenas);
3659 * Initialize one arena here. The rest are lazily created in
3660 * arena_choose_hard().
3662 arenas_extend(0);
3663 if (arenas[0] == NULL) {
3664 malloc_mutex_unlock(&init_lock);
3665 return (true);
3668 malloc_mutex_init(&arenas_mtx);
3670 malloc_initialized = true;
3671 malloc_mutex_unlock(&init_lock);
3672 return (false);
3676 * End general internal functions.
3678 /******************************************************************************/
3680 * Begin malloc(3)-compatible functions.
3683 void *
3684 malloc(size_t size)
3686 void *ret;
3688 if (malloc_init()) {
3689 ret = NULL;
3690 goto RETURN;
3693 if (size == 0) {
3694 if (opt_sysv == false)
3695 size = 1;
3696 else {
3697 ret = NULL;
3698 goto RETURN;
3702 ret = imalloc(size);
3704 RETURN:
3705 if (ret == NULL) {
3706 if (opt_xmalloc) {
3707 _malloc_message(getprogname(),
3708 ": (malloc) Error in malloc(): out of memory\n", "",
3709 "");
3710 abort();
3712 errno = ENOMEM;
3715 UTRACE(0, size, ret);
3716 return (ret);
3720 posix_memalign(void **memptr, size_t alignment, size_t size)
3722 int ret;
3723 void *result;
3725 if (malloc_init())
3726 result = NULL;
3727 else {
3728 /* Make sure that alignment is a large enough power of 2. */
3729 if (((alignment - 1) & alignment) != 0
3730 || alignment < sizeof(void *)) {
3731 if (opt_xmalloc) {
3732 _malloc_message(getprogname(),
3733 ": (malloc) Error in posix_memalign(): "
3734 "invalid alignment\n", "", "");
3735 abort();
3737 result = NULL;
3738 ret = EINVAL;
3739 goto RETURN;
3742 result = ipalloc(alignment, size);
3745 if (result == NULL) {
3746 if (opt_xmalloc) {
3747 _malloc_message(getprogname(),
3748 ": (malloc) Error in posix_memalign(): out of memory\n",
3749 "", "");
3750 abort();
3752 ret = ENOMEM;
3753 goto RETURN;
3756 *memptr = result;
3757 ret = 0;
3759 RETURN:
3760 UTRACE(0, size, result);
3761 return (ret);
3764 void *
3765 calloc(size_t num, size_t size)
3767 void *ret;
3768 size_t num_size;
3770 if (malloc_init()) {
3771 num_size = 0;
3772 ret = NULL;
3773 goto RETURN;
3776 num_size = num * size;
3777 if (num_size == 0) {
3778 if ((opt_sysv == false) && ((num == 0) || (size == 0)))
3779 num_size = 1;
3780 else {
3781 ret = NULL;
3782 goto RETURN;
3785 * Try to avoid division here. We know that it isn't possible to
3786 * overflow during multiplication if neither operand uses any of the
3787 * most significant half of the bits in a size_t.
3789 } else if ((unsigned long long)((num | size) &
3790 ((unsigned long long)SIZE_T_MAX << (sizeof(size_t) << 2))) &&
3791 (num_size / size != num)) {
3792 /* size_t overflow. */
3793 ret = NULL;
3794 goto RETURN;
3797 ret = icalloc(num_size);
3799 RETURN:
3800 if (ret == NULL) {
3801 if (opt_xmalloc) {
3802 _malloc_message(getprogname(),
3803 ": (malloc) Error in calloc(): out of memory\n", "",
3804 "");
3805 abort();
3807 errno = ENOMEM;
3810 UTRACE(0, num_size, ret);
3811 return (ret);
3814 void *
3815 realloc(void *ptr, size_t size)
3817 void *ret;
3819 if (size == 0) {
3820 if (opt_sysv == false)
3821 size = 1;
3822 else {
3823 if (ptr != NULL)
3824 idalloc(ptr);
3825 ret = NULL;
3826 goto RETURN;
3830 if (ptr != NULL) {
3831 assert(malloc_initialized);
3833 ret = iralloc(ptr, size);
3835 if (ret == NULL) {
3836 if (opt_xmalloc) {
3837 _malloc_message(getprogname(),
3838 ": (malloc) Error in realloc(): out of "
3839 "memory\n", "", "");
3840 abort();
3842 errno = ENOMEM;
3844 } else {
3845 if (malloc_init())
3846 ret = NULL;
3847 else
3848 ret = imalloc(size);
3850 if (ret == NULL) {
3851 if (opt_xmalloc) {
3852 _malloc_message(getprogname(),
3853 ": (malloc) Error in realloc(): out of "
3854 "memory\n", "", "");
3855 abort();
3857 errno = ENOMEM;
3861 RETURN:
3862 UTRACE(ptr, size, ret);
3863 return (ret);
3866 void
3867 free(void *ptr)
3870 UTRACE(ptr, 0, 0);
3871 if (ptr != NULL) {
3872 assert(malloc_initialized);
3874 idalloc(ptr);
3879 * End malloc(3)-compatible functions.
3881 /******************************************************************************/
3883 * Begin non-standard functions.
3885 #ifndef __NetBSD__
3886 size_t
3887 malloc_usable_size(const void *ptr)
3890 assert(ptr != NULL);
3892 return (isalloc(ptr));
3894 #endif
3897 * End non-standard functions.
3899 /******************************************************************************/
3901 * Begin library-private functions, used by threading libraries for protection
3902 * of malloc during fork(). These functions are only called if the program is
3903 * running in threaded mode, so there is no need to check whether the program
3904 * is threaded here.
3907 void
3908 _malloc_prefork(void)
3910 unsigned i;
3912 /* Acquire all mutexes in a safe order. */
3914 malloc_mutex_lock(&arenas_mtx);
3915 for (i = 0; i < narenas; i++) {
3916 if (arenas[i] != NULL)
3917 malloc_mutex_lock(&arenas[i]->mtx);
3919 malloc_mutex_unlock(&arenas_mtx);
3921 malloc_mutex_lock(&base_mtx);
3923 malloc_mutex_lock(&chunks_mtx);
3926 void
3927 _malloc_postfork(void)
3929 unsigned i;
3931 /* Release all mutexes, now that fork() has completed. */
3933 malloc_mutex_unlock(&chunks_mtx);
3935 malloc_mutex_unlock(&base_mtx);
3937 malloc_mutex_lock(&arenas_mtx);
3938 for (i = 0; i < narenas; i++) {
3939 if (arenas[i] != NULL)
3940 malloc_mutex_unlock(&arenas[i]->mtx);
3942 malloc_mutex_unlock(&arenas_mtx);
3946 * End library-private functions.
3948 /******************************************************************************/