1 /* $NetBSD: jemalloc.c,v 1.21 2010/03/04 22:48:31 enami Exp $ */
4 * Copyright (C) 2006,2007 Jason Evans <jasone@FreeBSD.org>.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice(s), this list of conditions and the following disclaimer as
12 * the first lines of this file unmodified other than the possible
13 * addition of one or more copyright notices.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice(s), this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
20 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
26 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
28 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 *******************************************************************************
33 * This allocator implementation is designed to provide scalable performance
34 * for multi-threaded programs on multi-processor systems. The following
35 * features are included for this purpose:
37 * + Multiple arenas are used if there are multiple CPUs, which reduces lock
38 * contention and cache sloshing.
40 * + Cache line sharing between arenas is avoided for internal data
43 * + Memory is managed in chunks and runs (chunks can be split into runs),
44 * rather than as individual pages. This provides a constant-time
45 * mechanism for associating allocations with particular arenas.
47 * Allocation requests are rounded up to the nearest size class, and no record
48 * of the original request size is maintained. Allocations are broken into
49 * categories according to size class. Assuming runtime defaults, 4 kB pages
50 * and a 16 byte quantum, the size classes in each category are as follows:
52 * |=====================================|
53 * | Category | Subcategory | Size |
54 * |=====================================|
55 * | Small | Tiny | 2 |
58 * | |----------------+---------|
59 * | | Quantum-spaced | 16 |
66 * | |----------------+---------|
67 * | | Sub-page | 1 kB |
69 * |=====================================|
77 * |=====================================|
82 * |=====================================|
84 * A different mechanism is used for each category:
86 * Small : Each size class is segregated into its own set of runs. Each run
87 * maintains a bitmap of which regions are free/allocated.
89 * Large : Each allocation is backed by a dedicated run. Metadata are stored
90 * in the associated arena chunk header maps.
92 * Huge : Each allocation is backed by a dedicated contiguous set of chunks.
93 * Metadata are stored in a separate red-black tree.
95 *******************************************************************************
101 # define xutrace(a, b) utrace("malloc", (a), (b))
102 # define __DECONST(x, y) ((x)__UNCONST(y))
105 # define xutrace(a, b) utrace((a), (b))
106 #endif /* __NetBSD__ */
109 * MALLOC_PRODUCTION disables assertions and statistics gathering. It also
110 * defaults the A and J runtime options to off. These settings are appropriate
111 * for production systems.
113 #define MALLOC_PRODUCTION
115 #ifndef MALLOC_PRODUCTION
116 # define MALLOC_DEBUG
119 #include <sys/cdefs.h>
120 /* __FBSDID("$FreeBSD: src/lib/libc/stdlib/malloc.c,v 1.147 2007/06/15 22:00:16 jasone Exp $"); */
121 __RCSID("$NetBSD: jemalloc.c,v 1.21 2010/03/04 22:48:31 enami Exp $");
124 #include "libc_private.h"
128 #include "spinlock.h"
130 #include "namespace.h"
131 #include <sys/mman.h>
132 #include <sys/param.h>
134 #include <sys/stddef.h>
136 #include <sys/time.h>
137 #include <sys/types.h>
138 #include <sys/sysctl.h>
139 #include <sys/tree.h>
141 #include <sys/ktrace.h> /* Must come after several other sys/ includes. */
144 #include <machine/atomic.h>
145 #include <machine/cpufunc.h>
147 #include <machine/vmparam.h>
163 # include <reentrant.h>
166 #define STRERROR_R(a, b, c) __strerror_r(a, b, c);
168 * A non localized version of strerror, that avoids bringing in
169 * stdio and the locale code. All the malloc messages are in English
173 __strerror_r(int e
, char *s
, size_t l
)
178 if (e
>= 0 && e
< sys_nerr
) {
179 slen
= strlcpy(s
, sys_errlist
[e
], l
);
182 slen
= snprintf_ss(s
, l
, "Unknown error %u", e
);
185 return slen
>= l
? ERANGE
: rval
;
190 #define STRERROR_R(a, b, c) strerror_r(a, b, c);
191 #include "un-namespace.h"
194 /* MALLOC_STATS enables statistics calculation. */
195 #ifndef MALLOC_PRODUCTION
196 # define MALLOC_STATS
211 /* Disable inlining to make debugging easier. */
215 /* Size of stack-allocated buffer passed to strerror_r(). */
216 #define STRERROR_BUF 64
218 /* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */
220 # define QUANTUM_2POW_MIN 4
221 # define SIZEOF_PTR_2POW 2
225 # define QUANTUM_2POW_MIN 4
226 # define SIZEOF_PTR_2POW 3
229 # define QUANTUM_2POW_MIN 4
230 # define SIZEOF_PTR_2POW 3
234 # define QUANTUM_2POW_MIN 4
235 # define SIZEOF_PTR_2POW 3
239 # define QUANTUM_2POW_MIN 4
240 # define SIZEOF_PTR_2POW 3
243 # define QUANTUM_2POW_MIN 3
244 # define SIZEOF_PTR_2POW 2
249 # define QUANTUM_2POW_MIN 4
250 # define SIZEOF_PTR_2POW 2
253 #if defined(__sparc__) && !defined(__sparc64__)
254 # define QUANTUM_2POW_MIN 4
255 # define SIZEOF_PTR_2POW 2
259 # define QUANTUM_2POW_MIN 4
260 # define SIZEOF_PTR_2POW 2
264 # define QUANTUM_2POW_MIN 4
265 # define SIZEOF_PTR_2POW 2
269 # define QUANTUM_2POW_MIN 4
270 # define SIZEOF_PTR_2POW 2
274 # define QUANTUM_2POW_MIN 4
275 # define SIZEOF_PTR_2POW 2
279 # define QUANTUM_2POW_MIN 4
280 # define SIZEOF_PTR_2POW 2
284 #define SIZEOF_PTR (1 << SIZEOF_PTR_2POW)
286 /* sizeof(int) == (1 << SIZEOF_INT_2POW). */
287 #ifndef SIZEOF_INT_2POW
288 # define SIZEOF_INT_2POW 2
291 /* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
292 #if (!defined(PIC) && !defined(NO_TLS))
297 * Size and alignment of memory chunks that are allocated by the OS's virtual
300 #define CHUNK_2POW_DEFAULT 20
303 * Maximum size of L1 cache line. This is used to avoid cache line aliasing,
304 * so over-estimates are okay (up to a point), but under-estimates will
305 * negatively affect performance.
307 #define CACHELINE_2POW 6
308 #define CACHELINE ((size_t)(1 << CACHELINE_2POW))
310 /* Smallest size class to support. */
311 #define TINY_MIN_2POW 1
314 * Maximum size class that is a multiple of the quantum, but not (necessarily)
315 * a power of 2. Above this size, allocations are rounded up to the nearest
318 #define SMALL_MAX_2POW_DEFAULT 9
319 #define SMALL_MAX_DEFAULT (1 << SMALL_MAX_2POW_DEFAULT)
322 * Maximum desired run header overhead. Runs are sized as small as possible
323 * such that this setting is still honored, without violating other constraints.
324 * The goal is to make runs as small as possible without exceeding a per run
325 * external fragmentation threshold.
327 * Note that it is possible to set this low enough that it cannot be honored
328 * for some/all object sizes, since there is one bit of header overhead per
329 * object (plus a constant). In such cases, this constraint is relaxed.
331 * RUN_MAX_OVRHD_RELAX specifies the maximum number of bits per region of
332 * overhead for which RUN_MAX_OVRHD is relaxed.
334 #define RUN_MAX_OVRHD 0.015
335 #define RUN_MAX_OVRHD_RELAX 1.5
337 /* Put a cap on small object run size. This overrides RUN_MAX_OVRHD. */
338 #define RUN_MAX_SMALL_2POW 15
339 #define RUN_MAX_SMALL (1 << RUN_MAX_SMALL_2POW)
341 /******************************************************************************/
345 * Mutexes based on spinlocks. We can't use normal pthread mutexes, because
346 * they require malloc()ed memory.
352 /* Set to true once the allocator has been initialized. */
353 static bool malloc_initialized
= false;
355 /* Used to avoid initialization races. */
356 static malloc_mutex_t init_lock
= {_SPINLOCK_INITIALIZER
};
358 #define malloc_mutex_t mutex_t
360 /* Set to true once the allocator has been initialized. */
361 static bool malloc_initialized
= false;
363 /* Used to avoid initialization races. */
364 static mutex_t init_lock
= MUTEX_INITIALIZER
;
367 /******************************************************************************/
369 * Statistics data structures.
374 typedef struct malloc_bin_stats_s malloc_bin_stats_t
;
375 struct malloc_bin_stats_s
{
377 * Number of allocation requests that corresponded to the size of this
382 /* Total number of runs created for this bin's size class. */
386 * Total number of runs reused by extracting them from the runs tree for
387 * this bin's size class.
391 /* High-water mark for this bin. */
392 unsigned long highruns
;
394 /* Current number of runs in this bin. */
395 unsigned long curruns
;
398 typedef struct arena_stats_s arena_stats_t
;
399 struct arena_stats_s
{
400 /* Number of bytes currently mapped. */
403 /* Per-size-category statistics. */
404 size_t allocated_small
;
405 uint64_t nmalloc_small
;
406 uint64_t ndalloc_small
;
408 size_t allocated_large
;
409 uint64_t nmalloc_large
;
410 uint64_t ndalloc_large
;
413 typedef struct chunk_stats_s chunk_stats_t
;
414 struct chunk_stats_s
{
415 /* Number of chunks that were allocated. */
418 /* High-water mark for number of chunks allocated. */
419 unsigned long highchunks
;
422 * Current number of chunks allocated. This value isn't maintained for
423 * any other purpose, so keep track of it in order to be able to set
426 unsigned long curchunks
;
429 #endif /* #ifdef MALLOC_STATS */
431 /******************************************************************************/
433 * Chunk data structures.
436 /* Tree of chunks. */
437 typedef struct chunk_node_s chunk_node_t
;
438 struct chunk_node_s
{
439 /* Linkage for the chunk tree. */
440 RB_ENTRY(chunk_node_s
) link
;
443 * Pointer to the chunk that this tree node is responsible for. In some
444 * (but certainly not all) cases, this data structure is placed at the
445 * beginning of the corresponding chunk, so this field may point to this
450 /* Total chunk size. */
453 typedef struct chunk_tree_s chunk_tree_t
;
454 RB_HEAD(chunk_tree_s
, chunk_node_s
);
456 /******************************************************************************/
458 * Arena data structures.
461 typedef struct arena_s arena_t
;
462 typedef struct arena_bin_s arena_bin_t
;
464 typedef struct arena_chunk_map_s arena_chunk_map_t
;
465 struct arena_chunk_map_s
{
466 /* Number of pages in run. */
469 * Position within run. For a free run, this is POS_FREE for the first
470 * and last pages. The POS_FREE special value makes it possible to
471 * quickly coalesce free runs.
473 * This is the limiting factor for chunksize; there can be at most 2^31
476 #define POS_FREE ((uint32_t)0xffffffffU)
480 /* Arena chunk header. */
481 typedef struct arena_chunk_s arena_chunk_t
;
482 struct arena_chunk_s
{
483 /* Arena that owns the chunk. */
486 /* Linkage for the arena's chunk tree. */
487 RB_ENTRY(arena_chunk_s
) link
;
490 * Number of pages in use. This is maintained in order to make
491 * detection of empty chunks fast.
496 * Every time a free run larger than this value is created/coalesced,
497 * this value is increased. The only way that the value decreases is if
498 * arena_run_alloc() fails to find a free run as large as advertised by
501 uint32_t max_frun_npages
;
504 * Every time a free run that starts at an earlier page than this value
505 * is created/coalesced, this value is decreased. It is reset in a
506 * similar fashion to max_frun_npages.
508 uint32_t min_frun_ind
;
511 * Map of pages within chunk that keeps track of free/large/small. For
512 * free runs, only the map entries for the first and last pages are
513 * kept up to date, so that free runs can be quickly coalesced.
515 arena_chunk_map_t map
[1]; /* Dynamically sized. */
517 typedef struct arena_chunk_tree_s arena_chunk_tree_t
;
518 RB_HEAD(arena_chunk_tree_s
, arena_chunk_s
);
520 typedef struct arena_run_s arena_run_t
;
522 /* Linkage for run trees. */
523 RB_ENTRY(arena_run_s
) link
;
527 # define ARENA_RUN_MAGIC 0x384adf93
530 /* Bin this run is associated with. */
533 /* Index of first element that might have a free region. */
534 unsigned regs_minelm
;
536 /* Number of free regions in run. */
539 /* Bitmask of in-use regions (0: in use, 1: free). */
540 unsigned regs_mask
[1]; /* Dynamically sized. */
542 typedef struct arena_run_tree_s arena_run_tree_t
;
543 RB_HEAD(arena_run_tree_s
, arena_run_s
);
547 * Current run being used to service allocations of this bin's size
553 * Tree of non-full runs. This tree is used when looking for an
554 * existing run when runcur is no longer usable. We choose the
555 * non-full run that is lowest in memory; this policy tends to keep
556 * objects packed well, and it can also help reduce the number of
557 * almost-empty chunks.
559 arena_run_tree_t runs
;
561 /* Size of regions in a run for this bin's size class. */
564 /* Total size of a run for this bin's size class. */
567 /* Total number of regions in a run for this bin's size class. */
570 /* Number of elements in a run's regs_mask for this bin's size class. */
571 uint32_t regs_mask_nelms
;
573 /* Offset of first region in a run for this bin's size class. */
574 uint32_t reg0_offset
;
577 /* Bin statistics. */
578 malloc_bin_stats_t stats
;
585 # define ARENA_MAGIC 0x947d3d24
588 /* All operations on this arena require that mtx be locked. */
596 * Tree of chunks this arena manages.
598 arena_chunk_tree_t chunks
;
601 * In order to avoid rapid chunk allocation/deallocation when an arena
602 * oscillates right on the cusp of needing a new chunk, cache the most
603 * recently freed chunk. This caching is disabled by opt_hint.
605 * There is one spare chunk per arena, rather than one spare total, in
606 * order to avoid interactions between multiple threads that could make
607 * a single spare inadequate.
609 arena_chunk_t
*spare
;
612 * bins is used to store rings of free regions of the following sizes,
613 * assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
634 arena_bin_t bins
[1]; /* Dynamically sized. */
637 /******************************************************************************/
642 /* Number of CPUs. */
643 static unsigned ncpus
;
646 static size_t pagesize
;
647 static size_t pagesize_mask
;
648 static int pagesize_2pow
;
650 /* Various bin-related settings. */
651 static size_t bin_maxclass
; /* Max size class for bins. */
652 static unsigned ntbins
; /* Number of (2^n)-spaced tiny bins. */
653 static unsigned nqbins
; /* Number of quantum-spaced bins. */
654 static unsigned nsbins
; /* Number of (2^n)-spaced sub-page bins. */
655 static size_t small_min
;
656 static size_t small_max
;
658 /* Various quantum-related settings. */
659 static size_t quantum
;
660 static size_t quantum_mask
; /* (quantum - 1). */
662 /* Various chunk-related settings. */
663 static size_t chunksize
;
664 static size_t chunksize_mask
; /* (chunksize - 1). */
665 static int chunksize_2pow
;
666 static unsigned chunk_npages
;
667 static unsigned arena_chunk_header_npages
;
668 static size_t arena_maxclass
; /* Max size class for arenas. */
675 /* Protects chunk-related data structures. */
676 static malloc_mutex_t chunks_mtx
;
678 /* Tree of chunks that are stand-alone huge allocations. */
679 static chunk_tree_t huge
;
683 * Try to use brk for chunk-size allocations, due to address space constraints.
686 * Protects sbrk() calls. This must be separate from chunks_mtx, since
687 * base_pages_alloc() also uses sbrk(), but cannot lock chunks_mtx (doing so
688 * could cause recursive lock acquisition).
690 static malloc_mutex_t brk_mtx
;
691 /* Result of first sbrk(0) call. */
692 static void *brk_base
;
693 /* Current end of brk, or ((void *)-1) if brk is exhausted. */
694 static void *brk_prev
;
695 /* Current upper limit on brk addresses. */
696 static void *brk_max
;
700 /* Huge allocation statistics. */
701 static uint64_t huge_nmalloc
;
702 static uint64_t huge_ndalloc
;
703 static uint64_t huge_nralloc
;
704 static size_t huge_allocated
;
708 * Tree of chunks that were previously allocated. This is used when allocating
709 * chunks, in an attempt to re-use address space.
711 static chunk_tree_t old_chunks
;
713 /****************************/
715 * base (internal allocation).
719 * Current pages that are being used for internal memory allocations. These
720 * pages are carved up in cacheline-size quanta, so that there is no chance of
721 * false cache line sharing.
723 static void *base_pages
;
724 static void *base_next_addr
;
725 static void *base_past_addr
; /* Addr immediately past base_pages. */
726 static chunk_node_t
*base_chunk_nodes
; /* LIFO cache of chunk nodes. */
727 static malloc_mutex_t base_mtx
;
729 static size_t base_mapped
;
738 * Arenas that are used to service external requests. Not all elements of the
739 * arenas array are necessarily used; arenas are created lazily as needed.
741 static arena_t
**arenas
;
742 static unsigned narenas
;
743 static unsigned next_arena
;
744 static malloc_mutex_t arenas_mtx
; /* Protects arenas initialization. */
748 * Map of pthread_self() --> arenas[???], used for selecting an arena to use
751 static __thread arena_t
*arenas_map
;
752 #define get_arenas_map() (arenas_map)
753 #define set_arenas_map(x) (arenas_map = x)
755 static thread_key_t arenas_map_key
;
756 #define get_arenas_map() thr_getspecific(arenas_map_key)
757 #define set_arenas_map(x) thr_setspecific(arenas_map_key, x)
761 /* Chunk statistics. */
762 static chunk_stats_t stats_chunks
;
765 /*******************************/
767 * Runtime configuration options.
769 const char *_malloc_options
;
771 #ifndef MALLOC_PRODUCTION
772 static bool opt_abort
= true;
773 static bool opt_junk
= true;
775 static bool opt_abort
= false;
776 static bool opt_junk
= false;
778 static bool opt_hint
= false;
779 static bool opt_print_stats
= false;
780 static int opt_quantum_2pow
= QUANTUM_2POW_MIN
;
781 static int opt_small_max_2pow
= SMALL_MAX_2POW_DEFAULT
;
782 static int opt_chunk_2pow
= CHUNK_2POW_DEFAULT
;
783 static bool opt_utrace
= false;
784 static bool opt_sysv
= false;
785 static bool opt_xmalloc
= false;
786 static bool opt_zero
= false;
787 static int32_t opt_narenas_lshift
= 0;
795 #define UTRACE(a, b, c) \
797 malloc_utrace_t ut; \
801 xutrace(&ut, sizeof(ut)); \
804 /******************************************************************************/
806 * Begin function prototypes for non-inline static functions.
809 static void wrtmessage(const char *p1
, const char *p2
, const char *p3
,
812 static void malloc_printf(const char *format
, ...);
814 static char *umax2s(uintmax_t x
, char *s
);
815 static bool base_pages_alloc(size_t minsize
);
816 static void *base_alloc(size_t size
);
817 static chunk_node_t
*base_chunk_node_alloc(void);
818 static void base_chunk_node_dealloc(chunk_node_t
*node
);
820 static void stats_print(arena_t
*arena
);
822 static void *pages_map(void *addr
, size_t size
);
823 static void *pages_map_align(void *addr
, size_t size
, int align
);
824 static void pages_unmap(void *addr
, size_t size
);
825 static void *chunk_alloc(size_t size
);
826 static void chunk_dealloc(void *chunk
, size_t size
);
827 static void arena_run_split(arena_t
*arena
, arena_run_t
*run
, size_t size
);
828 static arena_chunk_t
*arena_chunk_alloc(arena_t
*arena
);
829 static void arena_chunk_dealloc(arena_t
*arena
, arena_chunk_t
*chunk
);
830 static arena_run_t
*arena_run_alloc(arena_t
*arena
, size_t size
);
831 static void arena_run_dalloc(arena_t
*arena
, arena_run_t
*run
, size_t size
);
832 static arena_run_t
*arena_bin_nonfull_run_get(arena_t
*arena
, arena_bin_t
*bin
);
833 static void *arena_bin_malloc_hard(arena_t
*arena
, arena_bin_t
*bin
);
834 static size_t arena_bin_run_size_calc(arena_bin_t
*bin
, size_t min_run_size
);
835 static void *arena_malloc(arena_t
*arena
, size_t size
);
836 static void *arena_palloc(arena_t
*arena
, size_t alignment
, size_t size
,
838 static size_t arena_salloc(const void *ptr
);
839 static void *arena_ralloc(void *ptr
, size_t size
, size_t oldsize
);
840 static void arena_dalloc(arena_t
*arena
, arena_chunk_t
*chunk
, void *ptr
);
841 static bool arena_new(arena_t
*arena
);
842 static arena_t
*arenas_extend(unsigned ind
);
843 static void *huge_malloc(size_t size
);
844 static void *huge_palloc(size_t alignment
, size_t size
);
845 static void *huge_ralloc(void *ptr
, size_t size
, size_t oldsize
);
846 static void huge_dalloc(void *ptr
);
847 static void *imalloc(size_t size
);
848 static void *ipalloc(size_t alignment
, size_t size
);
849 static void *icalloc(size_t size
);
850 static size_t isalloc(const void *ptr
);
851 static void *iralloc(void *ptr
, size_t size
);
852 static void idalloc(void *ptr
);
853 static void malloc_print_stats(void);
854 static bool malloc_init_hard(void);
857 * End function prototypes.
859 /******************************************************************************/
865 #define malloc_mutex_init(m) mutex_init(m, NULL)
866 #define malloc_mutex_lock(m) mutex_lock(m)
867 #define malloc_mutex_unlock(m) mutex_unlock(m)
868 #else /* __NetBSD__ */
870 malloc_mutex_init(malloc_mutex_t
*a_mutex
)
872 static const spinlock_t lock
= _SPINLOCK_INITIALIZER
;
874 a_mutex
->lock
= lock
;
878 malloc_mutex_lock(malloc_mutex_t
*a_mutex
)
882 _SPINLOCK(&a_mutex
->lock
);
886 malloc_mutex_unlock(malloc_mutex_t
*a_mutex
)
890 _SPINUNLOCK(&a_mutex
->lock
);
892 #endif /* __NetBSD__ */
897 /******************************************************************************/
899 * Begin Utility functions/macros.
902 /* Return the chunk address for allocation address a. */
903 #define CHUNK_ADDR2BASE(a) \
904 ((void *)((uintptr_t)(a) & ~chunksize_mask))
906 /* Return the chunk offset of address a. */
907 #define CHUNK_ADDR2OFFSET(a) \
908 ((size_t)((uintptr_t)(a) & chunksize_mask))
910 /* Return the smallest chunk multiple that is >= s. */
911 #define CHUNK_CEILING(s) \
912 (((s) + chunksize_mask) & ~chunksize_mask)
914 /* Return the smallest cacheline multiple that is >= s. */
915 #define CACHELINE_CEILING(s) \
916 (((s) + (CACHELINE - 1)) & ~(CACHELINE - 1))
918 /* Return the smallest quantum multiple that is >= a. */
919 #define QUANTUM_CEILING(a) \
920 (((a) + quantum_mask) & ~quantum_mask)
922 /* Return the smallest pagesize multiple that is >= s. */
923 #define PAGE_CEILING(s) \
924 (((s) + pagesize_mask) & ~pagesize_mask)
926 /* Compute the smallest power of 2 that is >= x. */
937 #if (SIZEOF_PTR == 8)
945 wrtmessage(const char *p1
, const char *p2
, const char *p3
, const char *p4
)
948 write(STDERR_FILENO
, p1
, strlen(p1
));
949 write(STDERR_FILENO
, p2
, strlen(p2
));
950 write(STDERR_FILENO
, p3
, strlen(p3
));
951 write(STDERR_FILENO
, p4
, strlen(p4
));
954 void (*_malloc_message
)(const char *p1
, const char *p2
, const char *p3
,
955 const char *p4
) = wrtmessage
;
959 * Print to stderr in such a way as to (hopefully) avoid memory allocation.
962 malloc_printf(const char *format
, ...)
967 va_start(ap
, format
);
968 vsnprintf(buf
, sizeof(buf
), format
, ap
);
970 _malloc_message(buf
, "", "", "");
975 * We don't want to depend on vsnprintf() for production builds, since that can
976 * cause unnecessary bloat for static binaries. umax2s() provides minimal
977 * integer printing functionality, so that malloc_printf() use can be limited to
980 #define UMAX2S_BUFSIZE 21
982 umax2s(uintmax_t x
, char *s
)
986 /* Make sure UMAX2S_BUFSIZE is large enough. */
988 assert(sizeof(uintmax_t) <= 8);
990 i
= UMAX2S_BUFSIZE
- 1;
994 s
[i
] = "0123456789"[(int)x
% 10];
995 x
/= (uintmax_t)10LL;
1001 /******************************************************************************/
1004 base_pages_alloc(size_t minsize
)
1010 * Do special brk allocation here, since base allocations don't need to
1013 if (brk_prev
!= (void *)-1) {
1018 csize
= CHUNK_CEILING(minsize
);
1020 malloc_mutex_lock(&brk_mtx
);
1022 /* Get the current end of brk. */
1026 * Calculate how much padding is necessary to
1027 * chunk-align the end of brk. Don't worry about
1028 * brk_cur not being chunk-aligned though.
1030 incr
= (intptr_t)chunksize
1031 - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur
);
1033 if ((size_t)incr
< minsize
)
1036 brk_prev
= sbrk(incr
);
1037 if (brk_prev
== brk_cur
) {
1039 malloc_mutex_unlock(&brk_mtx
);
1040 base_pages
= brk_cur
;
1041 base_next_addr
= base_pages
;
1042 base_past_addr
= (void *)((uintptr_t)base_pages
1045 base_mapped
+= incr
;
1049 } while (brk_prev
!= (void *)-1);
1050 malloc_mutex_unlock(&brk_mtx
);
1054 * Failure during initialization doesn't matter, so avoid
1055 * falling through to the mmap-based page mapping code.
1060 assert(minsize
!= 0);
1061 csize
= PAGE_CEILING(minsize
);
1062 base_pages
= pages_map(NULL
, csize
);
1063 if (base_pages
== NULL
)
1065 base_next_addr
= base_pages
;
1066 base_past_addr
= (void *)((uintptr_t)base_pages
+ csize
);
1068 base_mapped
+= csize
;
1074 base_alloc(size_t size
)
1079 /* Round size up to nearest multiple of the cacheline size. */
1080 csize
= CACHELINE_CEILING(size
);
1082 malloc_mutex_lock(&base_mtx
);
1084 /* Make sure there's enough space for the allocation. */
1085 if ((uintptr_t)base_next_addr
+ csize
> (uintptr_t)base_past_addr
) {
1086 if (base_pages_alloc(csize
)) {
1093 ret
= base_next_addr
;
1094 base_next_addr
= (void *)((uintptr_t)base_next_addr
+ csize
);
1097 malloc_mutex_unlock(&base_mtx
);
1101 static chunk_node_t
*
1102 base_chunk_node_alloc(void)
1106 malloc_mutex_lock(&base_mtx
);
1107 if (base_chunk_nodes
!= NULL
) {
1108 ret
= base_chunk_nodes
;
1110 base_chunk_nodes
= *(chunk_node_t
**)ret
;
1111 malloc_mutex_unlock(&base_mtx
);
1113 malloc_mutex_unlock(&base_mtx
);
1114 ret
= (chunk_node_t
*)base_alloc(sizeof(chunk_node_t
));
1121 base_chunk_node_dealloc(chunk_node_t
*node
)
1124 malloc_mutex_lock(&base_mtx
);
1126 *(chunk_node_t
**)node
= base_chunk_nodes
;
1127 base_chunk_nodes
= node
;
1128 malloc_mutex_unlock(&base_mtx
);
1131 /******************************************************************************/
1135 stats_print(arena_t
*arena
)
1141 " allocated/mapped nmalloc ndalloc\n");
1143 malloc_printf("small: %12zu %-12s %12llu %12llu\n",
1144 arena
->stats
.allocated_small
, "", arena
->stats
.nmalloc_small
,
1145 arena
->stats
.ndalloc_small
);
1146 malloc_printf("large: %12zu %-12s %12llu %12llu\n",
1147 arena
->stats
.allocated_large
, "", arena
->stats
.nmalloc_large
,
1148 arena
->stats
.ndalloc_large
);
1149 malloc_printf("total: %12zu/%-12zu %12llu %12llu\n",
1150 arena
->stats
.allocated_small
+ arena
->stats
.allocated_large
,
1151 arena
->stats
.mapped
,
1152 arena
->stats
.nmalloc_small
+ arena
->stats
.nmalloc_large
,
1153 arena
->stats
.ndalloc_small
+ arena
->stats
.ndalloc_large
);
1155 malloc_printf("bins: bin size regs pgs requests newruns"
1156 " reruns maxruns curruns\n");
1157 for (i
= 0, gap_start
= -1; i
< ntbins
+ nqbins
+ nsbins
; i
++) {
1158 if (arena
->bins
[i
].stats
.nrequests
== 0) {
1159 if (gap_start
== -1)
1162 if (gap_start
!= -1) {
1163 if (i
> gap_start
+ 1) {
1164 /* Gap of more than one size class. */
1165 malloc_printf("[%u..%u]\n",
1168 /* Gap of one size class. */
1169 malloc_printf("[%u]\n", gap_start
);
1174 "%13u %1s %4u %4u %3u %9llu %9llu"
1175 " %9llu %7lu %7lu\n",
1177 i
< ntbins
? "T" : i
< ntbins
+ nqbins
? "Q" : "S",
1178 arena
->bins
[i
].reg_size
,
1179 arena
->bins
[i
].nregs
,
1180 arena
->bins
[i
].run_size
>> pagesize_2pow
,
1181 arena
->bins
[i
].stats
.nrequests
,
1182 arena
->bins
[i
].stats
.nruns
,
1183 arena
->bins
[i
].stats
.reruns
,
1184 arena
->bins
[i
].stats
.highruns
,
1185 arena
->bins
[i
].stats
.curruns
);
1188 if (gap_start
!= -1) {
1189 if (i
> gap_start
+ 1) {
1190 /* Gap of more than one size class. */
1191 malloc_printf("[%u..%u]\n", gap_start
, i
- 1);
1193 /* Gap of one size class. */
1194 malloc_printf("[%u]\n", gap_start
);
1201 * End Utility functions/macros.
1203 /******************************************************************************/
1205 * Begin chunk management functions.
1210 chunk_comp(chunk_node_t
*a
, chunk_node_t
*b
)
1216 if ((uintptr_t)a
->chunk
< (uintptr_t)b
->chunk
)
1218 else if (a
->chunk
== b
->chunk
)
1224 /* Generate red-black tree code for chunks. */
1225 RB_GENERATE_STATIC(chunk_tree_s
, chunk_node_s
, link
, chunk_comp
);
1229 pages_map_align(void *addr
, size_t size
, int align
)
1234 * We don't use MAP_FIXED here, because it can cause the *replacement*
1235 * of existing mappings, and we only want to create new mappings.
1237 ret
= mmap(addr
, size
, PROT_READ
| PROT_WRITE
,
1238 MAP_PRIVATE
| MAP_ANON
| MAP_ALIGNED(align
), -1, 0);
1239 assert(ret
!= NULL
);
1241 if (ret
== MAP_FAILED
)
1243 else if (addr
!= NULL
&& ret
!= addr
) {
1245 * We succeeded in mapping memory, but not in the right place.
1247 if (munmap(ret
, size
) == -1) {
1248 char buf
[STRERROR_BUF
];
1250 STRERROR_R(errno
, buf
, sizeof(buf
));
1251 _malloc_message(getprogname(),
1252 ": (malloc) Error in munmap(): ", buf
, "\n");
1259 assert(ret
== NULL
|| (addr
== NULL
&& ret
!= addr
)
1260 || (addr
!= NULL
&& ret
== addr
));
1265 pages_map(void *addr
, size_t size
)
1268 return pages_map_align(addr
, size
, 0);
1272 pages_unmap(void *addr
, size_t size
)
1275 if (munmap(addr
, size
) == -1) {
1276 char buf
[STRERROR_BUF
];
1278 STRERROR_R(errno
, buf
, sizeof(buf
));
1279 _malloc_message(getprogname(),
1280 ": (malloc) Error in munmap(): ", buf
, "\n");
1287 chunk_alloc(size_t size
)
1290 chunk_node_t
*tchunk
, *delchunk
;
1293 assert((size
& chunksize_mask
) == 0);
1295 malloc_mutex_lock(&chunks_mtx
);
1297 if (size
== chunksize
) {
1299 * Check for address ranges that were previously chunks and try
1304 tchunk
= RB_MIN(chunk_tree_s
, &old_chunks
);
1305 while (tchunk
!= NULL
) {
1306 /* Found an address range. Try to recycle it. */
1308 chunk
= tchunk
->chunk
;
1311 tchunk
= RB_NEXT(chunk_tree_s
, &old_chunks
, delchunk
);
1313 /* Remove delchunk from the tree. */
1315 RB_REMOVE(chunk_tree_s
, &old_chunks
, delchunk
);
1316 base_chunk_node_dealloc(delchunk
);
1319 if ((uintptr_t)chunk
>= (uintptr_t)brk_base
1320 && (uintptr_t)chunk
< (uintptr_t)brk_max
) {
1321 /* Re-use a previously freed brk chunk. */
1326 if ((ret
= pages_map(chunk
, size
)) != NULL
) {
1334 * Try to over-allocate, but allow the OS to place the allocation
1335 * anywhere. Beware of size_t wrap-around.
1337 if (size
+ chunksize
> size
) {
1338 if ((ret
= pages_map_align(NULL
, size
, chunksize_2pow
))
1346 * Try to create allocations in brk, in order to make full use of
1347 * limited address space.
1349 if (brk_prev
!= (void *)-1) {
1354 * The loop is necessary to recover from races with other
1355 * threads that are using brk for something other than malloc.
1357 malloc_mutex_lock(&brk_mtx
);
1359 /* Get the current end of brk. */
1363 * Calculate how much padding is necessary to
1364 * chunk-align the end of brk.
1366 incr
= (intptr_t)size
1367 - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur
);
1368 if (incr
== (intptr_t)size
) {
1371 ret
= (void *)((intptr_t)brk_cur
+ incr
);
1375 brk_prev
= sbrk(incr
);
1376 if (brk_prev
== brk_cur
) {
1378 malloc_mutex_unlock(&brk_mtx
);
1379 brk_max
= (void *)((intptr_t)ret
+ size
);
1382 } while (brk_prev
!= (void *)-1);
1383 malloc_mutex_unlock(&brk_mtx
);
1387 /* All strategies for allocation failed. */
1393 * Clean out any entries in old_chunks that overlap with the
1394 * memory we just allocated.
1398 tchunk
= RB_NFIND(chunk_tree_s
, &old_chunks
, &key
);
1399 while (tchunk
!= NULL
1400 && (uintptr_t)tchunk
->chunk
>= (uintptr_t)ret
1401 && (uintptr_t)tchunk
->chunk
< (uintptr_t)ret
+ size
) {
1404 tchunk
= RB_NEXT(chunk_tree_s
, &old_chunks
, delchunk
);
1406 RB_REMOVE(chunk_tree_s
, &old_chunks
, delchunk
);
1407 base_chunk_node_dealloc(delchunk
);
1413 stats_chunks
.nchunks
+= (size
/ chunksize
);
1414 stats_chunks
.curchunks
+= (size
/ chunksize
);
1416 if (stats_chunks
.curchunks
> stats_chunks
.highchunks
)
1417 stats_chunks
.highchunks
= stats_chunks
.curchunks
;
1419 malloc_mutex_unlock(&chunks_mtx
);
1421 assert(CHUNK_ADDR2BASE(ret
) == ret
);
1426 chunk_dealloc(void *chunk
, size_t size
)
1430 assert(chunk
!= NULL
);
1431 assert(CHUNK_ADDR2BASE(chunk
) == chunk
);
1433 assert((size
& chunksize_mask
) == 0);
1435 malloc_mutex_lock(&chunks_mtx
);
1438 if ((uintptr_t)chunk
>= (uintptr_t)brk_base
1439 && (uintptr_t)chunk
< (uintptr_t)brk_max
) {
1442 malloc_mutex_lock(&brk_mtx
);
1443 /* Get the current end of brk. */
1447 * Try to shrink the data segment if this chunk is at the end
1448 * of the data segment. The sbrk() call here is subject to a
1449 * race condition with threads that use brk(2) or sbrk(2)
1450 * directly, but the alternative would be to leak memory for
1451 * the sake of poorly designed multi-threaded programs.
1453 if (brk_cur
== brk_max
1454 && (void *)((uintptr_t)chunk
+ size
) == brk_max
1455 && sbrk(-(intptr_t)size
) == brk_max
) {
1456 malloc_mutex_unlock(&brk_mtx
);
1457 if (brk_prev
== brk_max
) {
1459 brk_prev
= (void *)((intptr_t)brk_max
1466 malloc_mutex_unlock(&brk_mtx
);
1467 madvise(chunk
, size
, MADV_FREE
);
1470 * Iteratively create records of each chunk-sized
1471 * memory region that 'chunk' is comprised of, so that
1472 * the address range can be recycled if memory usage
1473 * increases later on.
1475 for (offset
= 0; offset
< size
; offset
+= chunksize
) {
1476 node
= base_chunk_node_alloc();
1480 node
->chunk
= (void *)((uintptr_t)chunk
1481 + (uintptr_t)offset
);
1482 node
->size
= chunksize
;
1484 RB_INSERT(chunk_tree_s
, &old_chunks
, node
);
1489 pages_unmap(chunk
, size
);
1492 * Make a record of the chunk's address, so that the address
1493 * range can be recycled if memory usage increases later on.
1494 * Don't bother to create entries if (size > chunksize), since
1495 * doing so could cause scalability issues for truly gargantuan
1496 * objects (many gigabytes or larger).
1498 if (size
== chunksize
) {
1499 node
= base_chunk_node_alloc();
1501 node
->chunk
= (void *)(uintptr_t)chunk
;
1502 node
->size
= chunksize
;
1504 RB_INSERT(chunk_tree_s
, &old_chunks
, node
);
1512 stats_chunks
.curchunks
-= (size
/ chunksize
);
1514 malloc_mutex_unlock(&chunks_mtx
);
1518 * End chunk management functions.
1520 /******************************************************************************/
1526 * Choose an arena based on a per-thread and (optimistically) per-CPU value.
1528 * We maintain at least one block of arenas. Usually there are more.
1529 * The blocks are $ncpu arenas in size. Whole blocks are 'hashed'
1530 * amongst threads. To accomplish this, next_arena advances only in
1533 static __noinline arena_t
*
1534 choose_arena_hard(void)
1539 /* Initialize the current block of arenas and advance to next. */
1540 malloc_mutex_lock(&arenas_mtx
);
1541 assert(next_arena
% ncpus
== 0);
1542 assert(narenas
% ncpus
== 0);
1543 map
= &arenas
[next_arena
];
1544 set_arenas_map(map
);
1545 for (i
= 0; i
< ncpus
; i
++) {
1546 if (arenas
[next_arena
] == NULL
)
1547 arenas_extend(next_arena
);
1548 next_arena
= (next_arena
+ 1) % narenas
;
1550 malloc_mutex_unlock(&arenas_mtx
);
1553 * If we were unable to allocate an arena above, then default to
1554 * the first arena, which is always present.
1556 curcpu
= thr_curcpu();
1557 if (map
[curcpu
] != NULL
)
1562 static inline arena_t
*
1568 map
= get_arenas_map();
1569 curcpu
= thr_curcpu();
1570 if (__predict_true(map
!= NULL
&& map
[curcpu
] != NULL
))
1573 return choose_arena_hard();
1578 arena_chunk_comp(arena_chunk_t
*a
, arena_chunk_t
*b
)
1584 if ((uintptr_t)a
< (uintptr_t)b
)
1592 /* Generate red-black tree code for arena chunks. */
1593 RB_GENERATE_STATIC(arena_chunk_tree_s
, arena_chunk_s
, link
, arena_chunk_comp
);
1598 arena_run_comp(arena_run_t
*a
, arena_run_t
*b
)
1604 if ((uintptr_t)a
< (uintptr_t)b
)
1612 /* Generate red-black tree code for arena runs. */
1613 RB_GENERATE_STATIC(arena_run_tree_s
, arena_run_s
, link
, arena_run_comp
);
1616 static inline void *
1617 arena_run_reg_alloc(arena_run_t
*run
, arena_bin_t
*bin
)
1620 unsigned i
, mask
, bit
, regind
;
1622 assert(run
->magic
== ARENA_RUN_MAGIC
);
1623 assert(run
->regs_minelm
< bin
->regs_mask_nelms
);
1626 * Move the first check outside the loop, so that run->regs_minelm can
1627 * be updated unconditionally, without the possibility of updating it
1630 i
= run
->regs_minelm
;
1631 mask
= run
->regs_mask
[i
];
1633 /* Usable allocation found. */
1634 bit
= ffs((int)mask
) - 1;
1636 regind
= ((i
<< (SIZEOF_INT_2POW
+ 3)) + bit
);
1637 ret
= (void *)(((uintptr_t)run
) + bin
->reg0_offset
1638 + (bin
->reg_size
* regind
));
1642 run
->regs_mask
[i
] = mask
;
1647 for (i
++; i
< bin
->regs_mask_nelms
; i
++) {
1648 mask
= run
->regs_mask
[i
];
1650 /* Usable allocation found. */
1651 bit
= ffs((int)mask
) - 1;
1653 regind
= ((i
<< (SIZEOF_INT_2POW
+ 3)) + bit
);
1654 ret
= (void *)(((uintptr_t)run
) + bin
->reg0_offset
1655 + (bin
->reg_size
* regind
));
1659 run
->regs_mask
[i
] = mask
;
1662 * Make a note that nothing before this element
1663 * contains a free region.
1665 run
->regs_minelm
= i
; /* Low payoff: + (mask == 0); */
1677 arena_run_reg_dalloc(arena_run_t
*run
, arena_bin_t
*bin
, void *ptr
, size_t size
)
1680 * To divide by a number D that is not a power of two we multiply
1681 * by (2^21 / D) and then right shift by 21 positions.
1687 * (X * size_invs[(D >> QUANTUM_2POW_MIN) - 3]) >> SIZE_INV_SHIFT
1689 #define SIZE_INV_SHIFT 21
1690 #define SIZE_INV(s) (((1 << SIZE_INV_SHIFT) / (s << QUANTUM_2POW_MIN)) + 1)
1691 static const unsigned size_invs
[] = {
1693 SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
1694 SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
1695 SIZE_INV(12),SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
1696 SIZE_INV(16),SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
1697 SIZE_INV(20),SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
1698 SIZE_INV(24),SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
1699 SIZE_INV(28),SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
1700 #if (QUANTUM_2POW_MIN < 4)
1702 SIZE_INV(32), SIZE_INV(33), SIZE_INV(34), SIZE_INV(35),
1703 SIZE_INV(36), SIZE_INV(37), SIZE_INV(38), SIZE_INV(39),
1704 SIZE_INV(40), SIZE_INV(41), SIZE_INV(42), SIZE_INV(43),
1705 SIZE_INV(44), SIZE_INV(45), SIZE_INV(46), SIZE_INV(47),
1706 SIZE_INV(48), SIZE_INV(49), SIZE_INV(50), SIZE_INV(51),
1707 SIZE_INV(52), SIZE_INV(53), SIZE_INV(54), SIZE_INV(55),
1708 SIZE_INV(56), SIZE_INV(57), SIZE_INV(58), SIZE_INV(59),
1709 SIZE_INV(60), SIZE_INV(61), SIZE_INV(62), SIZE_INV(63)
1712 unsigned diff
, regind
, elm
, bit
;
1715 assert(run
->magic
== ARENA_RUN_MAGIC
);
1716 assert(((sizeof(size_invs
)) / sizeof(unsigned)) + 3
1717 >= (SMALL_MAX_DEFAULT
>> QUANTUM_2POW_MIN
));
1720 * Avoid doing division with a variable divisor if possible. Using
1721 * actual division here can reduce allocator throughput by over 20%!
1723 diff
= (unsigned)((uintptr_t)ptr
- (uintptr_t)run
- bin
->reg0_offset
);
1724 if ((size
& (size
- 1)) == 0) {
1726 * log2_table allows fast division of a power of two in the
1729 * (x / divisor) becomes (x >> log2_table[divisor - 1]).
1731 static const unsigned char log2_table
[] = {
1732 0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4,
1733 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
1734 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1735 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
1736 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1737 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1738 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1739 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7
1743 regind
= (diff
>> log2_table
[size
- 1]);
1744 else if (size
<= 32768)
1745 regind
= diff
>> (8 + log2_table
[(size
>> 8) - 1]);
1748 * The page size is too large for us to use the lookup
1749 * table. Use real division.
1751 regind
= (unsigned)(diff
/ size
);
1753 } else if (size
<= ((sizeof(size_invs
) / sizeof(unsigned))
1754 << QUANTUM_2POW_MIN
) + 2) {
1755 regind
= size_invs
[(size
>> QUANTUM_2POW_MIN
) - 3] * diff
;
1756 regind
>>= SIZE_INV_SHIFT
;
1759 * size_invs isn't large enough to handle this size class, so
1760 * calculate regind using actual division. This only happens
1761 * if the user increases small_max via the 'S' runtime
1762 * configuration option.
1764 regind
= (unsigned)(diff
/ size
);
1766 assert(diff
== regind
* size
);
1767 assert(regind
< bin
->nregs
);
1769 elm
= regind
>> (SIZEOF_INT_2POW
+ 3);
1770 if (elm
< run
->regs_minelm
)
1771 run
->regs_minelm
= elm
;
1772 bit
= regind
- (elm
<< (SIZEOF_INT_2POW
+ 3));
1773 assert((run
->regs_mask
[elm
] & (1 << bit
)) == 0);
1774 run
->regs_mask
[elm
] |= (1 << bit
);
1776 #undef SIZE_INV_SHIFT
1780 arena_run_split(arena_t
*arena
, arena_run_t
*run
, size_t size
)
1782 arena_chunk_t
*chunk
;
1783 unsigned run_ind
, map_offset
, total_pages
, need_pages
, rem_pages
;
1786 chunk
= (arena_chunk_t
*)CHUNK_ADDR2BASE(run
);
1787 run_ind
= (unsigned)(((uintptr_t)run
- (uintptr_t)chunk
)
1789 total_pages
= chunk
->map
[run_ind
].npages
;
1790 need_pages
= (unsigned)(size
>> pagesize_2pow
);
1791 assert(need_pages
<= total_pages
);
1792 rem_pages
= total_pages
- need_pages
;
1794 /* Split enough pages from the front of run to fit allocation size. */
1795 map_offset
= run_ind
;
1796 for (i
= 0; i
< need_pages
; i
++) {
1797 chunk
->map
[map_offset
+ i
].npages
= need_pages
;
1798 chunk
->map
[map_offset
+ i
].pos
= i
;
1801 /* Keep track of trailing unused pages for later use. */
1802 if (rem_pages
> 0) {
1803 /* Update map for trailing pages. */
1804 map_offset
+= need_pages
;
1805 chunk
->map
[map_offset
].npages
= rem_pages
;
1806 chunk
->map
[map_offset
].pos
= POS_FREE
;
1807 chunk
->map
[map_offset
+ rem_pages
- 1].npages
= rem_pages
;
1808 chunk
->map
[map_offset
+ rem_pages
- 1].pos
= POS_FREE
;
1811 chunk
->pages_used
+= need_pages
;
1814 static arena_chunk_t
*
1815 arena_chunk_alloc(arena_t
*arena
)
1817 arena_chunk_t
*chunk
;
1819 if (arena
->spare
!= NULL
) {
1820 chunk
= arena
->spare
;
1821 arena
->spare
= NULL
;
1824 RB_INSERT(arena_chunk_tree_s
, &arena
->chunks
, chunk
);
1826 chunk
= (arena_chunk_t
*)chunk_alloc(chunksize
);
1830 arena
->stats
.mapped
+= chunksize
;
1833 chunk
->arena
= arena
;
1836 RB_INSERT(arena_chunk_tree_s
, &arena
->chunks
, chunk
);
1839 * Claim that no pages are in use, since the header is merely
1842 chunk
->pages_used
= 0;
1844 chunk
->max_frun_npages
= chunk_npages
-
1845 arena_chunk_header_npages
;
1846 chunk
->min_frun_ind
= arena_chunk_header_npages
;
1849 * Initialize enough of the map to support one maximal free run.
1851 chunk
->map
[arena_chunk_header_npages
].npages
= chunk_npages
-
1852 arena_chunk_header_npages
;
1853 chunk
->map
[arena_chunk_header_npages
].pos
= POS_FREE
;
1854 chunk
->map
[chunk_npages
- 1].npages
= chunk_npages
-
1855 arena_chunk_header_npages
;
1856 chunk
->map
[chunk_npages
- 1].pos
= POS_FREE
;
1863 arena_chunk_dealloc(arena_t
*arena
, arena_chunk_t
*chunk
)
1867 * Remove chunk from the chunk tree, regardless of whether this chunk
1868 * will be cached, so that the arena does not use it.
1871 RB_REMOVE(arena_chunk_tree_s
, &chunk
->arena
->chunks
, chunk
);
1873 if (opt_hint
== false) {
1874 if (arena
->spare
!= NULL
) {
1875 chunk_dealloc((void *)arena
->spare
, chunksize
);
1877 arena
->stats
.mapped
-= chunksize
;
1880 arena
->spare
= chunk
;
1882 assert(arena
->spare
== NULL
);
1883 chunk_dealloc((void *)chunk
, chunksize
);
1885 arena
->stats
.mapped
-= chunksize
;
1890 static arena_run_t
*
1891 arena_run_alloc(arena_t
*arena
, size_t size
)
1893 arena_chunk_t
*chunk
;
1895 unsigned need_npages
, limit_pages
, compl_need_npages
;
1897 assert(size
<= (chunksize
- (arena_chunk_header_npages
<<
1899 assert((size
& pagesize_mask
) == 0);
1902 * Search through arena's chunks in address order for a free run that is
1903 * large enough. Look for the first fit.
1905 need_npages
= (unsigned)(size
>> pagesize_2pow
);
1906 limit_pages
= chunk_npages
- arena_chunk_header_npages
;
1907 compl_need_npages
= limit_pages
- need_npages
;
1909 RB_FOREACH(chunk
, arena_chunk_tree_s
, &arena
->chunks
) {
1911 * Avoid searching this chunk if there are not enough
1912 * contiguous free pages for there to possibly be a large
1915 if (chunk
->pages_used
<= compl_need_npages
&&
1916 need_npages
<= chunk
->max_frun_npages
) {
1917 arena_chunk_map_t
*mapelm
;
1919 unsigned max_frun_npages
= 0;
1920 unsigned min_frun_ind
= chunk_npages
;
1922 assert(chunk
->min_frun_ind
>=
1923 arena_chunk_header_npages
);
1924 for (i
= chunk
->min_frun_ind
; i
< chunk_npages
;) {
1925 mapelm
= &chunk
->map
[i
];
1926 if (mapelm
->pos
== POS_FREE
) {
1927 if (mapelm
->npages
>= need_npages
) {
1928 run
= (arena_run_t
*)
1929 ((uintptr_t)chunk
+ (i
<<
1931 /* Update page map. */
1932 arena_run_split(arena
, run
,
1936 if (mapelm
->npages
>
1941 if (i
< min_frun_ind
) {
1943 if (i
< chunk
->min_frun_ind
)
1944 chunk
->min_frun_ind
= i
;
1947 i
+= mapelm
->npages
;
1950 * Search failure. Reset cached chunk->max_frun_npages.
1951 * chunk->min_frun_ind was already reset above (if
1954 chunk
->max_frun_npages
= max_frun_npages
;
1959 * No usable runs. Create a new chunk from which to allocate the run.
1961 chunk
= arena_chunk_alloc(arena
);
1964 run
= (arena_run_t
*)((uintptr_t)chunk
+ (arena_chunk_header_npages
<<
1966 /* Update page map. */
1967 arena_run_split(arena
, run
, size
);
1972 arena_run_dalloc(arena_t
*arena
, arena_run_t
*run
, size_t size
)
1974 arena_chunk_t
*chunk
;
1975 unsigned run_ind
, run_pages
;
1977 chunk
= (arena_chunk_t
*)CHUNK_ADDR2BASE(run
);
1979 run_ind
= (unsigned)(((uintptr_t)run
- (uintptr_t)chunk
)
1981 assert(run_ind
>= arena_chunk_header_npages
);
1982 assert(run_ind
< (chunksize
>> pagesize_2pow
));
1983 run_pages
= (unsigned)(size
>> pagesize_2pow
);
1984 assert(run_pages
== chunk
->map
[run_ind
].npages
);
1986 /* Subtract pages from count of pages used in chunk. */
1987 chunk
->pages_used
-= run_pages
;
1989 /* Mark run as deallocated. */
1990 assert(chunk
->map
[run_ind
].npages
== run_pages
);
1991 chunk
->map
[run_ind
].pos
= POS_FREE
;
1992 assert(chunk
->map
[run_ind
+ run_pages
- 1].npages
== run_pages
);
1993 chunk
->map
[run_ind
+ run_pages
- 1].pos
= POS_FREE
;
1996 * Tell the kernel that we don't need the data in this run, but only if
1997 * requested via runtime configuration.
2000 madvise(run
, size
, MADV_FREE
);
2002 /* Try to coalesce with neighboring runs. */
2003 if (run_ind
> arena_chunk_header_npages
&&
2004 chunk
->map
[run_ind
- 1].pos
== POS_FREE
) {
2005 unsigned prev_npages
;
2007 /* Coalesce with previous run. */
2008 prev_npages
= chunk
->map
[run_ind
- 1].npages
;
2009 run_ind
-= prev_npages
;
2010 assert(chunk
->map
[run_ind
].npages
== prev_npages
);
2011 assert(chunk
->map
[run_ind
].pos
== POS_FREE
);
2012 run_pages
+= prev_npages
;
2014 chunk
->map
[run_ind
].npages
= run_pages
;
2015 assert(chunk
->map
[run_ind
].pos
== POS_FREE
);
2016 chunk
->map
[run_ind
+ run_pages
- 1].npages
= run_pages
;
2017 assert(chunk
->map
[run_ind
+ run_pages
- 1].pos
== POS_FREE
);
2020 if (run_ind
+ run_pages
< chunk_npages
&&
2021 chunk
->map
[run_ind
+ run_pages
].pos
== POS_FREE
) {
2022 unsigned next_npages
;
2024 /* Coalesce with next run. */
2025 next_npages
= chunk
->map
[run_ind
+ run_pages
].npages
;
2026 run_pages
+= next_npages
;
2027 assert(chunk
->map
[run_ind
+ run_pages
- 1].npages
==
2029 assert(chunk
->map
[run_ind
+ run_pages
- 1].pos
== POS_FREE
);
2031 chunk
->map
[run_ind
].npages
= run_pages
;
2032 chunk
->map
[run_ind
].pos
= POS_FREE
;
2033 chunk
->map
[run_ind
+ run_pages
- 1].npages
= run_pages
;
2034 assert(chunk
->map
[run_ind
+ run_pages
- 1].pos
== POS_FREE
);
2037 if (chunk
->map
[run_ind
].npages
> chunk
->max_frun_npages
)
2038 chunk
->max_frun_npages
= chunk
->map
[run_ind
].npages
;
2039 if (run_ind
< chunk
->min_frun_ind
)
2040 chunk
->min_frun_ind
= run_ind
;
2042 /* Deallocate chunk if it is now completely unused. */
2043 if (chunk
->pages_used
== 0)
2044 arena_chunk_dealloc(arena
, chunk
);
2047 static arena_run_t
*
2048 arena_bin_nonfull_run_get(arena_t
*arena
, arena_bin_t
*bin
)
2051 unsigned i
, remainder
;
2053 /* Look for a usable run. */
2055 if ((run
= RB_MIN(arena_run_tree_s
, &bin
->runs
)) != NULL
) {
2056 /* run is guaranteed to have available space. */
2058 RB_REMOVE(arena_run_tree_s
, &bin
->runs
, run
);
2060 bin
->stats
.reruns
++;
2064 /* No existing runs have any space available. */
2066 /* Allocate a new run. */
2067 run
= arena_run_alloc(arena
, bin
->run_size
);
2071 /* Initialize run internals. */
2074 for (i
= 0; i
< bin
->regs_mask_nelms
; i
++)
2075 run
->regs_mask
[i
] = UINT_MAX
;
2076 remainder
= bin
->nregs
& ((1 << (SIZEOF_INT_2POW
+ 3)) - 1);
2077 if (remainder
!= 0) {
2078 /* The last element has spare bits that need to be unset. */
2079 run
->regs_mask
[i
] = (UINT_MAX
>> ((1 << (SIZEOF_INT_2POW
+ 3))
2083 run
->regs_minelm
= 0;
2085 run
->nfree
= bin
->nregs
;
2087 run
->magic
= ARENA_RUN_MAGIC
;
2092 bin
->stats
.curruns
++;
2093 if (bin
->stats
.curruns
> bin
->stats
.highruns
)
2094 bin
->stats
.highruns
= bin
->stats
.curruns
;
2099 /* bin->runcur must have space available before this function is called. */
2100 static inline void *
2101 arena_bin_malloc_easy(arena_t
*arena
, arena_bin_t
*bin
, arena_run_t
*run
)
2105 assert(run
->magic
== ARENA_RUN_MAGIC
);
2106 assert(run
->nfree
> 0);
2108 ret
= arena_run_reg_alloc(run
, bin
);
2109 assert(ret
!= NULL
);
2115 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
2117 arena_bin_malloc_hard(arena_t
*arena
, arena_bin_t
*bin
)
2120 bin
->runcur
= arena_bin_nonfull_run_get(arena
, bin
);
2121 if (bin
->runcur
== NULL
)
2123 assert(bin
->runcur
->magic
== ARENA_RUN_MAGIC
);
2124 assert(bin
->runcur
->nfree
> 0);
2126 return (arena_bin_malloc_easy(arena
, bin
, bin
->runcur
));
2130 * Calculate bin->run_size such that it meets the following constraints:
2132 * *) bin->run_size >= min_run_size
2133 * *) bin->run_size <= arena_maxclass
2134 * *) bin->run_size <= RUN_MAX_SMALL
2135 * *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
2137 * bin->nregs, bin->regs_mask_nelms, and bin->reg0_offset are
2138 * also calculated here, since these settings are all interdependent.
2141 arena_bin_run_size_calc(arena_bin_t
*bin
, size_t min_run_size
)
2143 size_t try_run_size
, good_run_size
;
2144 unsigned good_nregs
, good_mask_nelms
, good_reg0_offset
;
2145 unsigned try_nregs
, try_mask_nelms
, try_reg0_offset
;
2146 float max_ovrhd
= RUN_MAX_OVRHD
;
2148 assert(min_run_size
>= pagesize
);
2149 assert(min_run_size
<= arena_maxclass
);
2150 assert(min_run_size
<= RUN_MAX_SMALL
);
2153 * Calculate known-valid settings before entering the run_size
2154 * expansion loop, so that the first part of the loop always copies
2157 * The do..while loop iteratively reduces the number of regions until
2158 * the run header and the regions no longer overlap. A closed formula
2159 * would be quite messy, since there is an interdependency between the
2160 * header's mask length and the number of regions.
2162 try_run_size
= min_run_size
;
2163 try_nregs
= (unsigned)(((try_run_size
- sizeof(arena_run_t
)) /
2164 bin
->reg_size
) + 1); /* Counter-act the first line of the loop. */
2167 try_mask_nelms
= (try_nregs
>> (SIZEOF_INT_2POW
+ 3)) +
2168 ((try_nregs
& ((1 << (SIZEOF_INT_2POW
+ 3)) - 1)) ? 1 : 0);
2169 try_reg0_offset
= (unsigned)(try_run_size
-
2170 (try_nregs
* bin
->reg_size
));
2171 } while (sizeof(arena_run_t
) + (sizeof(unsigned) * (try_mask_nelms
- 1))
2174 /* run_size expansion loop. */
2177 * Copy valid settings before trying more aggressive settings.
2179 good_run_size
= try_run_size
;
2180 good_nregs
= try_nregs
;
2181 good_mask_nelms
= try_mask_nelms
;
2182 good_reg0_offset
= try_reg0_offset
;
2184 /* Try more aggressive settings. */
2185 try_run_size
+= pagesize
;
2186 try_nregs
= (unsigned)(((try_run_size
- sizeof(arena_run_t
)) /
2187 bin
->reg_size
) + 1); /* Counter-act try_nregs-- in loop. */
2190 try_mask_nelms
= (try_nregs
>> (SIZEOF_INT_2POW
+ 3)) +
2191 ((try_nregs
& ((1 << (SIZEOF_INT_2POW
+ 3)) - 1)) ?
2193 try_reg0_offset
= (unsigned)(try_run_size
- (try_nregs
*
2195 } while (sizeof(arena_run_t
) + (sizeof(unsigned) *
2196 (try_mask_nelms
- 1)) > try_reg0_offset
);
2197 } while (try_run_size
<= arena_maxclass
&& try_run_size
<= RUN_MAX_SMALL
2198 && max_ovrhd
> RUN_MAX_OVRHD_RELAX
/ ((float)(bin
->reg_size
<< 3))
2199 && ((float)(try_reg0_offset
)) / ((float)(try_run_size
)) >
2202 assert(sizeof(arena_run_t
) + (sizeof(unsigned) * (good_mask_nelms
- 1))
2203 <= good_reg0_offset
);
2204 assert((good_mask_nelms
<< (SIZEOF_INT_2POW
+ 3)) >= good_nregs
);
2206 /* Copy final settings. */
2207 bin
->run_size
= good_run_size
;
2208 bin
->nregs
= good_nregs
;
2209 bin
->regs_mask_nelms
= good_mask_nelms
;
2210 bin
->reg0_offset
= good_reg0_offset
;
2212 return (good_run_size
);
2216 arena_malloc(arena_t
*arena
, size_t size
)
2220 assert(arena
!= NULL
);
2221 assert(arena
->magic
== ARENA_MAGIC
);
2223 assert(QUANTUM_CEILING(size
) <= arena_maxclass
);
2225 if (size
<= bin_maxclass
) {
2229 /* Small allocation. */
2231 if (size
< small_min
) {
2233 size
= pow2_ceil(size
);
2234 bin
= &arena
->bins
[ffs((int)(size
>> (TINY_MIN_2POW
+
2236 #if (!defined(NDEBUG) || defined(MALLOC_STATS))
2238 * Bin calculation is always correct, but we may need
2239 * to fix size for the purposes of assertions and/or
2242 if (size
< (1 << TINY_MIN_2POW
))
2243 size
= (1 << TINY_MIN_2POW
);
2245 } else if (size
<= small_max
) {
2246 /* Quantum-spaced. */
2247 size
= QUANTUM_CEILING(size
);
2248 bin
= &arena
->bins
[ntbins
+ (size
>> opt_quantum_2pow
)
2252 size
= pow2_ceil(size
);
2253 bin
= &arena
->bins
[ntbins
+ nqbins
2254 + (ffs((int)(size
>> opt_small_max_2pow
)) - 2)];
2256 assert(size
== bin
->reg_size
);
2258 malloc_mutex_lock(&arena
->mtx
);
2259 if ((run
= bin
->runcur
) != NULL
&& run
->nfree
> 0)
2260 ret
= arena_bin_malloc_easy(arena
, bin
, run
);
2262 ret
= arena_bin_malloc_hard(arena
, bin
);
2265 malloc_mutex_unlock(&arena
->mtx
);
2270 bin
->stats
.nrequests
++;
2271 arena
->stats
.nmalloc_small
++;
2272 arena
->stats
.allocated_small
+= size
;
2275 /* Large allocation. */
2276 size
= PAGE_CEILING(size
);
2277 malloc_mutex_lock(&arena
->mtx
);
2278 ret
= (void *)arena_run_alloc(arena
, size
);
2280 malloc_mutex_unlock(&arena
->mtx
);
2284 arena
->stats
.nmalloc_large
++;
2285 arena
->stats
.allocated_large
+= size
;
2289 malloc_mutex_unlock(&arena
->mtx
);
2292 memset(ret
, 0xa5, size
);
2294 memset(ret
, 0, size
);
2299 arena_palloc_trim(arena_t
*arena
, arena_chunk_t
*chunk
, unsigned pageind
,
2307 * Modifiy the map such that arena_run_dalloc() sees the run as
2308 * separately allocated.
2310 for (i
= 0; i
< npages
; i
++) {
2311 chunk
->map
[pageind
+ i
].npages
= npages
;
2312 chunk
->map
[pageind
+ i
].pos
= i
;
2314 arena_run_dalloc(arena
, (arena_run_t
*)((uintptr_t)chunk
+ (pageind
<<
2315 pagesize_2pow
)), npages
<< pagesize_2pow
);
2318 /* Only handles large allocations that require more than page alignment. */
2320 arena_palloc(arena_t
*arena
, size_t alignment
, size_t size
, size_t alloc_size
)
2324 arena_chunk_t
*chunk
;
2325 unsigned pageind
, i
, npages
;
2327 assert((size
& pagesize_mask
) == 0);
2328 assert((alignment
& pagesize_mask
) == 0);
2330 npages
= (unsigned)(size
>> pagesize_2pow
);
2332 malloc_mutex_lock(&arena
->mtx
);
2333 ret
= (void *)arena_run_alloc(arena
, alloc_size
);
2335 malloc_mutex_unlock(&arena
->mtx
);
2339 chunk
= (arena_chunk_t
*)CHUNK_ADDR2BASE(ret
);
2341 offset
= (uintptr_t)ret
& (alignment
- 1);
2342 assert((offset
& pagesize_mask
) == 0);
2343 assert(offset
< alloc_size
);
2345 pageind
= (unsigned)(((uintptr_t)ret
- (uintptr_t)chunk
) >>
2348 /* Update the map for the run to be kept. */
2349 for (i
= 0; i
< npages
; i
++) {
2350 chunk
->map
[pageind
+ i
].npages
= npages
;
2351 assert(chunk
->map
[pageind
+ i
].pos
== i
);
2354 /* Trim trailing space. */
2355 arena_palloc_trim(arena
, chunk
, pageind
+ npages
,
2356 (unsigned)((alloc_size
- size
) >> pagesize_2pow
));
2358 size_t leadsize
, trailsize
;
2360 leadsize
= alignment
- offset
;
2361 ret
= (void *)((uintptr_t)ret
+ leadsize
);
2362 pageind
= (unsigned)(((uintptr_t)ret
- (uintptr_t)chunk
) >>
2365 /* Update the map for the run to be kept. */
2366 for (i
= 0; i
< npages
; i
++) {
2367 chunk
->map
[pageind
+ i
].npages
= npages
;
2368 chunk
->map
[pageind
+ i
].pos
= i
;
2371 /* Trim leading space. */
2372 arena_palloc_trim(arena
, chunk
,
2373 (unsigned)(pageind
- (leadsize
>> pagesize_2pow
)),
2374 (unsigned)(leadsize
>> pagesize_2pow
));
2376 trailsize
= alloc_size
- leadsize
- size
;
2377 if (trailsize
!= 0) {
2378 /* Trim trailing space. */
2379 assert(trailsize
< alloc_size
);
2380 arena_palloc_trim(arena
, chunk
, pageind
+ npages
,
2381 (unsigned)(trailsize
>> pagesize_2pow
));
2386 arena
->stats
.nmalloc_large
++;
2387 arena
->stats
.allocated_large
+= size
;
2389 malloc_mutex_unlock(&arena
->mtx
);
2392 memset(ret
, 0xa5, size
);
2394 memset(ret
, 0, size
);
2398 /* Return the size of the allocation pointed to by ptr. */
2400 arena_salloc(const void *ptr
)
2403 arena_chunk_t
*chunk
;
2404 arena_chunk_map_t
*mapelm
;
2407 assert(ptr
!= NULL
);
2408 assert(CHUNK_ADDR2BASE(ptr
) != ptr
);
2411 * No arena data structures that we query here can change in a way that
2412 * affects this function, so we don't need to lock.
2414 chunk
= (arena_chunk_t
*)CHUNK_ADDR2BASE(ptr
);
2415 pageind
= (unsigned)(((uintptr_t)ptr
- (uintptr_t)chunk
) >>
2417 mapelm
= &chunk
->map
[pageind
];
2418 if (mapelm
->pos
!= 0 || ptr
!= (char *)((uintptr_t)chunk
) + (pageind
<<
2422 pageind
-= mapelm
->pos
;
2424 run
= (arena_run_t
*)((uintptr_t)chunk
+ (pageind
<<
2426 assert(run
->magic
== ARENA_RUN_MAGIC
);
2427 ret
= run
->bin
->reg_size
;
2429 ret
= mapelm
->npages
<< pagesize_2pow
;
2435 arena_ralloc(void *ptr
, size_t size
, size_t oldsize
)
2439 /* Avoid moving the allocation if the size class would not change. */
2440 if (size
< small_min
) {
2441 if (oldsize
< small_min
&&
2442 ffs((int)(pow2_ceil(size
) >> (TINY_MIN_2POW
+ 1)))
2443 == ffs((int)(pow2_ceil(oldsize
) >> (TINY_MIN_2POW
+ 1))))
2445 } else if (size
<= small_max
) {
2446 if (oldsize
>= small_min
&& oldsize
<= small_max
&&
2447 (QUANTUM_CEILING(size
) >> opt_quantum_2pow
)
2448 == (QUANTUM_CEILING(oldsize
) >> opt_quantum_2pow
))
2452 * We make no attempt to resize runs here, though it would be
2453 * possible to do so.
2455 if (oldsize
> small_max
&& PAGE_CEILING(size
) == oldsize
)
2460 * If we get here, then size and oldsize are different enough that we
2461 * need to use a different size class. In that case, fall back to
2462 * allocating new space and copying.
2464 ret
= arena_malloc(choose_arena(), size
);
2468 /* Junk/zero-filling were already done by arena_malloc(). */
2470 memcpy(ret
, ptr
, size
);
2472 memcpy(ret
, ptr
, oldsize
);
2476 if (opt_junk
&& size
< oldsize
)
2477 memset((void *)((uintptr_t)ptr
+ size
), 0x5a, oldsize
- size
);
2478 else if (opt_zero
&& size
> oldsize
)
2479 memset((void *)((uintptr_t)ptr
+ oldsize
), 0, size
- oldsize
);
2484 arena_dalloc(arena_t
*arena
, arena_chunk_t
*chunk
, void *ptr
)
2487 arena_chunk_map_t
*mapelm
;
2490 assert(arena
!= NULL
);
2491 assert(arena
->magic
== ARENA_MAGIC
);
2492 assert(chunk
->arena
== arena
);
2493 assert(ptr
!= NULL
);
2494 assert(CHUNK_ADDR2BASE(ptr
) != ptr
);
2496 pageind
= (unsigned)(((uintptr_t)ptr
- (uintptr_t)chunk
) >>
2498 mapelm
= &chunk
->map
[pageind
];
2499 if (mapelm
->pos
!= 0 || ptr
!= (char *)((uintptr_t)chunk
) + (pageind
<<
2504 /* Small allocation. */
2506 pageind
-= mapelm
->pos
;
2508 run
= (arena_run_t
*)((uintptr_t)chunk
+ (pageind
<<
2510 assert(run
->magic
== ARENA_RUN_MAGIC
);
2512 size
= bin
->reg_size
;
2515 memset(ptr
, 0x5a, size
);
2517 malloc_mutex_lock(&arena
->mtx
);
2518 arena_run_reg_dalloc(run
, bin
, ptr
, size
);
2521 if (run
->nfree
== bin
->nregs
) {
2522 /* Deallocate run. */
2523 if (run
== bin
->runcur
)
2525 else if (bin
->nregs
!= 1) {
2527 * This block's conditional is necessary because
2528 * if the run only contains one region, then it
2529 * never gets inserted into the non-full runs
2533 RB_REMOVE(arena_run_tree_s
, &bin
->runs
, run
);
2538 arena_run_dalloc(arena
, run
, bin
->run_size
);
2540 bin
->stats
.curruns
--;
2542 } else if (run
->nfree
== 1 && run
!= bin
->runcur
) {
2544 * Make sure that bin->runcur always refers to the
2545 * lowest non-full run, if one exists.
2547 if (bin
->runcur
== NULL
)
2549 else if ((uintptr_t)run
< (uintptr_t)bin
->runcur
) {
2550 /* Switch runcur. */
2551 if (bin
->runcur
->nfree
> 0) {
2552 /* Insert runcur. */
2554 RB_INSERT(arena_run_tree_s
, &bin
->runs
,
2560 RB_INSERT(arena_run_tree_s
, &bin
->runs
, run
);
2564 arena
->stats
.allocated_small
-= size
;
2565 arena
->stats
.ndalloc_small
++;
2568 /* Large allocation. */
2570 size
= mapelm
->npages
<< pagesize_2pow
;
2571 assert((((uintptr_t)ptr
) & pagesize_mask
) == 0);
2574 memset(ptr
, 0x5a, size
);
2576 malloc_mutex_lock(&arena
->mtx
);
2577 arena_run_dalloc(arena
, (arena_run_t
*)ptr
, size
);
2579 arena
->stats
.allocated_large
-= size
;
2580 arena
->stats
.ndalloc_large
++;
2584 malloc_mutex_unlock(&arena
->mtx
);
2588 arena_new(arena_t
*arena
)
2592 size_t prev_run_size
;
2594 malloc_mutex_init(&arena
->mtx
);
2597 memset(&arena
->stats
, 0, sizeof(arena_stats_t
));
2600 /* Initialize chunks. */
2601 RB_INIT(&arena
->chunks
);
2602 arena
->spare
= NULL
;
2604 /* Initialize bins. */
2605 prev_run_size
= pagesize
;
2607 /* (2^n)-spaced tiny bins. */
2608 for (i
= 0; i
< ntbins
; i
++) {
2609 bin
= &arena
->bins
[i
];
2611 RB_INIT(&bin
->runs
);
2613 bin
->reg_size
= (1 << (TINY_MIN_2POW
+ i
));
2614 prev_run_size
= arena_bin_run_size_calc(bin
, prev_run_size
);
2617 memset(&bin
->stats
, 0, sizeof(malloc_bin_stats_t
));
2621 /* Quantum-spaced bins. */
2622 for (; i
< ntbins
+ nqbins
; i
++) {
2623 bin
= &arena
->bins
[i
];
2625 RB_INIT(&bin
->runs
);
2627 bin
->reg_size
= quantum
* (i
- ntbins
+ 1);
2629 pow2_size = pow2_ceil(quantum * (i - ntbins + 1));
2631 prev_run_size
= arena_bin_run_size_calc(bin
, prev_run_size
);
2634 memset(&bin
->stats
, 0, sizeof(malloc_bin_stats_t
));
2638 /* (2^n)-spaced sub-page bins. */
2639 for (; i
< ntbins
+ nqbins
+ nsbins
; i
++) {
2640 bin
= &arena
->bins
[i
];
2642 RB_INIT(&bin
->runs
);
2644 bin
->reg_size
= (small_max
<< (i
- (ntbins
+ nqbins
) + 1));
2646 prev_run_size
= arena_bin_run_size_calc(bin
, prev_run_size
);
2649 memset(&bin
->stats
, 0, sizeof(malloc_bin_stats_t
));
2654 arena
->magic
= ARENA_MAGIC
;
2660 /* Create a new arena and insert it into the arenas array at index ind. */
2662 arenas_extend(unsigned ind
)
2666 /* Allocate enough space for trailing bins. */
2667 ret
= (arena_t
*)base_alloc(sizeof(arena_t
)
2668 + (sizeof(arena_bin_t
) * (ntbins
+ nqbins
+ nsbins
- 1)));
2669 if (ret
!= NULL
&& arena_new(ret
) == false) {
2673 /* Only reached if there is an OOM error. */
2676 * OOM here is quite inconvenient to propagate, since dealing with it
2677 * would require a check for failure in the fast path. Instead, punt
2678 * by using arenas[0]. In practice, this is an extremely unlikely
2681 _malloc_message(getprogname(),
2682 ": (malloc) Error initializing arena\n", "", "");
2692 /******************************************************************************/
2694 * Begin general internal functions.
2698 huge_malloc(size_t size
)
2704 /* Allocate one or more contiguous chunks for this request. */
2706 csize
= CHUNK_CEILING(size
);
2708 /* size is large enough to cause size_t wrap-around. */
2712 /* Allocate a chunk node with which to track the chunk. */
2713 node
= base_chunk_node_alloc();
2717 ret
= chunk_alloc(csize
);
2719 base_chunk_node_dealloc(node
);
2723 /* Insert node into huge. */
2727 malloc_mutex_lock(&chunks_mtx
);
2728 RB_INSERT(chunk_tree_s
, &huge
, node
);
2731 huge_allocated
+= csize
;
2733 malloc_mutex_unlock(&chunks_mtx
);
2736 memset(ret
, 0xa5, csize
);
2738 memset(ret
, 0, csize
);
2743 /* Only handles large allocations that require more than chunk alignment. */
2745 huge_palloc(size_t alignment
, size_t size
)
2748 size_t alloc_size
, chunk_size
, offset
;
2752 * This allocation requires alignment that is even larger than chunk
2753 * alignment. This means that huge_malloc() isn't good enough.
2755 * Allocate almost twice as many chunks as are demanded by the size or
2756 * alignment, in order to assure the alignment can be achieved, then
2757 * unmap leading and trailing chunks.
2759 assert(alignment
>= chunksize
);
2761 chunk_size
= CHUNK_CEILING(size
);
2763 if (size
>= alignment
)
2764 alloc_size
= chunk_size
+ alignment
- chunksize
;
2766 alloc_size
= (alignment
<< 1) - chunksize
;
2768 /* Allocate a chunk node with which to track the chunk. */
2769 node
= base_chunk_node_alloc();
2773 ret
= chunk_alloc(alloc_size
);
2775 base_chunk_node_dealloc(node
);
2779 offset
= (uintptr_t)ret
& (alignment
- 1);
2780 assert((offset
& chunksize_mask
) == 0);
2781 assert(offset
< alloc_size
);
2783 /* Trim trailing space. */
2784 chunk_dealloc((void *)((uintptr_t)ret
+ chunk_size
), alloc_size
2789 /* Trim leading space. */
2790 chunk_dealloc(ret
, alignment
- offset
);
2792 ret
= (void *)((uintptr_t)ret
+ (alignment
- offset
));
2794 trailsize
= alloc_size
- (alignment
- offset
) - chunk_size
;
2795 if (trailsize
!= 0) {
2796 /* Trim trailing space. */
2797 assert(trailsize
< alloc_size
);
2798 chunk_dealloc((void *)((uintptr_t)ret
+ chunk_size
),
2803 /* Insert node into huge. */
2805 node
->size
= chunk_size
;
2807 malloc_mutex_lock(&chunks_mtx
);
2808 RB_INSERT(chunk_tree_s
, &huge
, node
);
2811 huge_allocated
+= chunk_size
;
2813 malloc_mutex_unlock(&chunks_mtx
);
2816 memset(ret
, 0xa5, chunk_size
);
2818 memset(ret
, 0, chunk_size
);
2824 huge_ralloc(void *ptr
, size_t size
, size_t oldsize
)
2828 /* Avoid moving the allocation if the size class would not change. */
2829 if (oldsize
> arena_maxclass
&&
2830 CHUNK_CEILING(size
) == CHUNK_CEILING(oldsize
)) {
2831 if (opt_junk
&& size
< oldsize
) {
2832 memset((void *)((uintptr_t)ptr
+ size
), 0x5a, oldsize
2834 } else if (opt_zero
&& size
> oldsize
) {
2835 memset((void *)((uintptr_t)ptr
+ oldsize
), 0, size
2841 if (CHUNK_ADDR2BASE(ptr
) == ptr
2843 && ((uintptr_t)ptr
< (uintptr_t)brk_base
2844 || (uintptr_t)ptr
>= (uintptr_t)brk_max
)
2847 chunk_node_t
*node
, key
;
2852 newcsize
= CHUNK_CEILING(size
);
2853 oldcsize
= CHUNK_CEILING(oldsize
);
2854 assert(oldcsize
!= newcsize
);
2855 if (newcsize
== 0) {
2856 /* size_t wrap-around */
2861 * Remove the old region from the tree now. If mremap()
2862 * returns the region to the system, other thread may
2863 * map it for same huge allocation and insert it to the
2864 * tree before we acquire the mutex lock again.
2866 malloc_mutex_lock(&chunks_mtx
);
2867 key
.chunk
= __DECONST(void *, ptr
);
2869 node
= RB_FIND(chunk_tree_s
, &huge
, &key
);
2870 assert(node
!= NULL
);
2871 assert(node
->chunk
== ptr
);
2872 assert(node
->size
== oldcsize
);
2873 RB_REMOVE(chunk_tree_s
, &huge
, node
);
2874 malloc_mutex_unlock(&chunks_mtx
);
2876 newptr
= mremap(ptr
, oldcsize
, NULL
, newcsize
,
2877 MAP_ALIGNED(chunksize_2pow
));
2878 if (newptr
== MAP_FAILED
) {
2879 /* We still own the old region. */
2880 malloc_mutex_lock(&chunks_mtx
);
2881 RB_INSERT(chunk_tree_s
, &huge
, node
);
2882 malloc_mutex_unlock(&chunks_mtx
);
2884 assert(CHUNK_ADDR2BASE(newptr
) == newptr
);
2886 /* Insert new or resized old region. */
2887 malloc_mutex_lock(&chunks_mtx
);
2888 node
->size
= newcsize
;
2889 node
->chunk
= newptr
;
2890 RB_INSERT(chunk_tree_s
, &huge
, node
);
2893 huge_allocated
+= newcsize
- oldcsize
;
2894 if (newcsize
> oldcsize
) {
2895 stats_chunks
.curchunks
+=
2896 (newcsize
- oldcsize
) / chunksize
;
2897 if (stats_chunks
.curchunks
>
2898 stats_chunks
.highchunks
)
2899 stats_chunks
.highchunks
=
2900 stats_chunks
.curchunks
;
2902 stats_chunks
.curchunks
-=
2903 (oldcsize
- newcsize
) / chunksize
;
2906 malloc_mutex_unlock(&chunks_mtx
);
2908 if (opt_junk
&& size
< oldsize
) {
2909 memset((void *)((uintptr_t)newptr
+ size
), 0x5a,
2911 } else if (opt_zero
&& size
> oldsize
) {
2912 memset((void *)((uintptr_t)newptr
+ oldsize
), 0,
2920 * If we get here, then size and oldsize are different enough that we
2921 * need to use a different size class. In that case, fall back to
2922 * allocating new space and copying.
2924 ret
= huge_malloc(size
);
2928 if (CHUNK_ADDR2BASE(ptr
) == ptr
) {
2929 /* The old allocation is a chunk. */
2931 memcpy(ret
, ptr
, size
);
2933 memcpy(ret
, ptr
, oldsize
);
2935 /* The old allocation is a region. */
2936 assert(oldsize
< size
);
2937 memcpy(ret
, ptr
, oldsize
);
2944 huge_dalloc(void *ptr
)
2949 malloc_mutex_lock(&chunks_mtx
);
2951 /* Extract from tree of huge allocations. */
2954 node
= RB_FIND(chunk_tree_s
, &huge
, &key
);
2955 assert(node
!= NULL
);
2956 assert(node
->chunk
== ptr
);
2958 RB_REMOVE(chunk_tree_s
, &huge
, node
);
2962 huge_allocated
-= node
->size
;
2965 malloc_mutex_unlock(&chunks_mtx
);
2970 memset(node
->chunk
, 0x5a, node
->size
);
2972 chunk_dealloc(node
->chunk
, node
->size
);
2974 base_chunk_node_dealloc(node
);
2978 imalloc(size_t size
)
2984 if (size
<= arena_maxclass
)
2985 ret
= arena_malloc(choose_arena(), size
);
2987 ret
= huge_malloc(size
);
2993 ipalloc(size_t alignment
, size_t size
)
2999 * Round size up to the nearest multiple of alignment.
3001 * This done, we can take advantage of the fact that for each small
3002 * size class, every object is aligned at the smallest power of two
3003 * that is non-zero in the base two representation of the size. For
3006 * Size | Base 2 | Minimum alignment
3007 * -----+----------+------------------
3009 * 144 | 10100000 | 32
3010 * 192 | 11000000 | 64
3012 * Depending on runtime settings, it is possible that arena_malloc()
3013 * will further round up to a power of two, but that never causes
3014 * correctness issues.
3016 ceil_size
= (size
+ (alignment
- 1)) & (-alignment
);
3018 * (ceil_size < size) protects against the combination of maximal
3019 * alignment and size greater than maximal alignment.
3021 if (ceil_size
< size
) {
3022 /* size_t overflow. */
3026 if (ceil_size
<= pagesize
|| (alignment
<= pagesize
3027 && ceil_size
<= arena_maxclass
))
3028 ret
= arena_malloc(choose_arena(), ceil_size
);
3033 * We can't achieve sub-page alignment, so round up alignment
3034 * permanently; it makes later calculations simpler.
3036 alignment
= PAGE_CEILING(alignment
);
3037 ceil_size
= PAGE_CEILING(size
);
3039 * (ceil_size < size) protects against very large sizes within
3040 * pagesize of SIZE_T_MAX.
3042 * (ceil_size + alignment < ceil_size) protects against the
3043 * combination of maximal alignment and ceil_size large enough
3044 * to cause overflow. This is similar to the first overflow
3045 * check above, but it needs to be repeated due to the new
3046 * ceil_size value, which may now be *equal* to maximal
3047 * alignment, whereas before we only detected overflow if the
3048 * original size was *greater* than maximal alignment.
3050 if (ceil_size
< size
|| ceil_size
+ alignment
< ceil_size
) {
3051 /* size_t overflow. */
3056 * Calculate the size of the over-size run that arena_palloc()
3057 * would need to allocate in order to guarantee the alignment.
3059 if (ceil_size
>= alignment
)
3060 run_size
= ceil_size
+ alignment
- pagesize
;
3063 * It is possible that (alignment << 1) will cause
3064 * overflow, but it doesn't matter because we also
3065 * subtract pagesize, which in the case of overflow
3066 * leaves us with a very large run_size. That causes
3067 * the first conditional below to fail, which means
3068 * that the bogus run_size value never gets used for
3069 * anything important.
3071 run_size
= (alignment
<< 1) - pagesize
;
3074 if (run_size
<= arena_maxclass
) {
3075 ret
= arena_palloc(choose_arena(), alignment
, ceil_size
,
3077 } else if (alignment
<= chunksize
)
3078 ret
= huge_malloc(ceil_size
);
3080 ret
= huge_palloc(alignment
, ceil_size
);
3083 assert(((uintptr_t)ret
& (alignment
- 1)) == 0);
3088 icalloc(size_t size
)
3092 if (size
<= arena_maxclass
) {
3093 ret
= arena_malloc(choose_arena(), size
);
3096 memset(ret
, 0, size
);
3099 * The virtual memory system provides zero-filled pages, so
3100 * there is no need to do so manually, unless opt_junk is
3101 * enabled, in which case huge_malloc() fills huge allocations
3104 ret
= huge_malloc(size
);
3109 memset(ret
, 0, size
);
3111 else if ((uintptr_t)ret
>= (uintptr_t)brk_base
3112 && (uintptr_t)ret
< (uintptr_t)brk_max
) {
3114 * This may be a re-used brk chunk. Therefore, zero
3117 memset(ret
, 0, size
);
3126 isalloc(const void *ptr
)
3129 arena_chunk_t
*chunk
;
3131 assert(ptr
!= NULL
);
3133 chunk
= (arena_chunk_t
*)CHUNK_ADDR2BASE(ptr
);
3136 assert(chunk
->arena
->magic
== ARENA_MAGIC
);
3138 ret
= arena_salloc(ptr
);
3140 chunk_node_t
*node
, key
;
3142 /* Chunk (huge allocation). */
3144 malloc_mutex_lock(&chunks_mtx
);
3146 /* Extract from tree of huge allocations. */
3147 key
.chunk
= __DECONST(void *, ptr
);
3149 node
= RB_FIND(chunk_tree_s
, &huge
, &key
);
3150 assert(node
!= NULL
);
3154 malloc_mutex_unlock(&chunks_mtx
);
3161 iralloc(void *ptr
, size_t size
)
3166 assert(ptr
!= NULL
);
3169 oldsize
= isalloc(ptr
);
3171 if (size
<= arena_maxclass
)
3172 ret
= arena_ralloc(ptr
, size
, oldsize
);
3174 ret
= huge_ralloc(ptr
, size
, oldsize
);
3182 arena_chunk_t
*chunk
;
3184 assert(ptr
!= NULL
);
3186 chunk
= (arena_chunk_t
*)CHUNK_ADDR2BASE(ptr
);
3189 arena_dalloc(chunk
->arena
, chunk
, ptr
);
3195 malloc_print_stats(void)
3198 if (opt_print_stats
) {
3199 char s
[UMAX2S_BUFSIZE
];
3200 _malloc_message("___ Begin malloc statistics ___\n", "", "",
3202 _malloc_message("Assertions ",
3209 _malloc_message("Boolean MALLOC_OPTIONS: ",
3210 opt_abort
? "A" : "a",
3211 opt_junk
? "J" : "j",
3212 opt_hint
? "H" : "h");
3213 _malloc_message(opt_utrace
? "PU" : "Pu",
3214 opt_sysv
? "V" : "v",
3215 opt_xmalloc
? "X" : "x",
3216 opt_zero
? "Z\n" : "z\n");
3218 _malloc_message("CPUs: ", umax2s(ncpus
, s
), "\n", "");
3219 _malloc_message("Max arenas: ", umax2s(narenas
, s
), "\n", "");
3220 _malloc_message("Pointer size: ", umax2s(sizeof(void *), s
),
3222 _malloc_message("Quantum size: ", umax2s(quantum
, s
), "\n", "");
3223 _malloc_message("Max small size: ", umax2s(small_max
, s
), "\n",
3226 _malloc_message("Chunk size: ", umax2s(chunksize
, s
), "", "");
3227 _malloc_message(" (2^", umax2s(opt_chunk_2pow
, s
), ")\n", "");
3231 size_t allocated
, mapped
;
3235 /* Calculate and print allocated/mapped stats. */
3238 for (i
= 0, allocated
= 0; i
< narenas
; i
++) {
3239 if (arenas
[i
] != NULL
) {
3240 malloc_mutex_lock(&arenas
[i
]->mtx
);
3242 arenas
[i
]->stats
.allocated_small
;
3244 arenas
[i
]->stats
.allocated_large
;
3245 malloc_mutex_unlock(&arenas
[i
]->mtx
);
3250 malloc_mutex_lock(&chunks_mtx
);
3251 allocated
+= huge_allocated
;
3252 mapped
= stats_chunks
.curchunks
* chunksize
;
3253 malloc_mutex_unlock(&chunks_mtx
);
3255 malloc_mutex_lock(&base_mtx
);
3256 mapped
+= base_mapped
;
3257 malloc_mutex_unlock(&base_mtx
);
3259 malloc_printf("Allocated: %zu, mapped: %zu\n",
3262 /* Print chunk stats. */
3264 chunk_stats_t chunks_stats
;
3266 malloc_mutex_lock(&chunks_mtx
);
3267 chunks_stats
= stats_chunks
;
3268 malloc_mutex_unlock(&chunks_mtx
);
3270 malloc_printf("chunks: nchunks "
3271 "highchunks curchunks\n");
3272 malloc_printf(" %13llu%13lu%13lu\n",
3273 chunks_stats
.nchunks
,
3274 chunks_stats
.highchunks
,
3275 chunks_stats
.curchunks
);
3278 /* Print chunk stats. */
3280 "huge: nmalloc ndalloc "
3281 "nralloc allocated\n");
3282 malloc_printf(" %12llu %12llu %12llu %12zu\n",
3283 huge_nmalloc
, huge_ndalloc
, huge_nralloc
,
3286 /* Print stats for each arena. */
3287 for (i
= 0; i
< narenas
; i
++) {
3289 if (arena
!= NULL
) {
3291 "\narenas[%u] @ %p\n", i
, arena
);
3292 malloc_mutex_lock(&arena
->mtx
);
3294 malloc_mutex_unlock(&arena
->mtx
);
3298 #endif /* #ifdef MALLOC_STATS */
3299 _malloc_message("--- End malloc statistics ---\n", "", "", "");
3304 * FreeBSD's pthreads implementation calls malloc(3), so the malloc
3305 * implementation has to take pains to avoid infinite recursion during
3312 if (malloc_initialized
== false)
3313 return (malloc_init_hard());
3319 malloc_init_hard(void)
3323 char buf
[PATH_MAX
+ 1];
3324 const char *opts
= "";
3326 malloc_mutex_lock(&init_lock
);
3327 if (malloc_initialized
) {
3329 * Another thread initialized the allocator before this one
3330 * acquired init_lock.
3332 malloc_mutex_unlock(&init_lock
);
3336 /* Get number of CPUs. */
3343 len
= sizeof(ncpus
);
3344 if (sysctl(mib
, 2, &ncpus
, &len
, (void *) 0, 0) == -1) {
3350 /* Get page size. */
3354 result
= sysconf(_SC_PAGESIZE
);
3355 assert(result
!= -1);
3356 pagesize
= (unsigned) result
;
3359 * We assume that pagesize is a power of 2 when calculating
3360 * pagesize_mask and pagesize_2pow.
3362 assert(((result
- 1) & result
) == 0);
3363 pagesize_mask
= result
- 1;
3364 pagesize_2pow
= ffs((int)result
) - 1;
3367 for (i
= 0; i
< 3; i
++) {
3368 /* Get runtime configuration. */
3371 if ((linklen
= readlink("/etc/malloc.conf", buf
,
3372 sizeof(buf
) - 1)) != -1) {
3374 * Use the contents of the "/etc/malloc.conf"
3375 * symbolic link's name.
3377 buf
[linklen
] = '\0';
3380 /* No configuration specified. */
3386 if ((opts
= getenv("MALLOC_OPTIONS")) != NULL
&&
3389 * Do nothing; opts is already initialized to
3390 * the value of the MALLOC_OPTIONS environment
3394 /* No configuration specified. */
3400 if (_malloc_options
!= NULL
) {
3402 * Use options that were compiled into the program.
3404 opts
= _malloc_options
;
3406 /* No configuration specified. */
3417 for (j
= 0; opts
[j
] != '\0'; j
++) {
3439 * Chunks always require at least one header
3440 * page, so chunks can never be smaller than
3443 if (opt_chunk_2pow
> pagesize_2pow
+ 1)
3447 if (opt_chunk_2pow
+ 1 <
3448 (int)(sizeof(size_t) << 3))
3452 opt_narenas_lshift
--;
3455 opt_narenas_lshift
++;
3458 opt_print_stats
= false;
3461 opt_print_stats
= true;
3464 if (opt_quantum_2pow
> QUANTUM_2POW_MIN
)
3468 if (opt_quantum_2pow
< pagesize_2pow
- 1)
3472 if (opt_small_max_2pow
> QUANTUM_2POW_MIN
)
3473 opt_small_max_2pow
--;
3476 if (opt_small_max_2pow
< pagesize_2pow
- 1)
3477 opt_small_max_2pow
++;
3492 opt_xmalloc
= false;
3508 _malloc_message(getprogname(),
3509 ": (malloc) Unsupported character in "
3510 "malloc options: '", cbuf
, "'\n");
3516 /* Take care to call atexit() only once. */
3517 if (opt_print_stats
) {
3518 /* Print statistics at exit. */
3519 atexit(malloc_print_stats
);
3522 /* Set variables according to the value of opt_small_max_2pow. */
3523 if (opt_small_max_2pow
< opt_quantum_2pow
)
3524 opt_small_max_2pow
= opt_quantum_2pow
;
3525 small_max
= (1 << opt_small_max_2pow
);
3527 /* Set bin-related variables. */
3528 bin_maxclass
= (pagesize
>> 1);
3529 assert(opt_quantum_2pow
>= TINY_MIN_2POW
);
3530 ntbins
= (unsigned)(opt_quantum_2pow
- TINY_MIN_2POW
);
3531 assert(ntbins
<= opt_quantum_2pow
);
3532 nqbins
= (unsigned)(small_max
>> opt_quantum_2pow
);
3533 nsbins
= (unsigned)(pagesize_2pow
- opt_small_max_2pow
- 1);
3535 /* Set variables according to the value of opt_quantum_2pow. */
3536 quantum
= (1 << opt_quantum_2pow
);
3537 quantum_mask
= quantum
- 1;
3539 small_min
= (quantum
>> 1) + 1;
3542 assert(small_min
<= quantum
);
3544 /* Set variables according to the value of opt_chunk_2pow. */
3545 chunksize
= (1LU << opt_chunk_2pow
);
3546 chunksize_mask
= chunksize
- 1;
3547 chunksize_2pow
= (unsigned)opt_chunk_2pow
;
3548 chunk_npages
= (unsigned)(chunksize
>> pagesize_2pow
);
3550 unsigned header_size
;
3552 header_size
= (unsigned)(sizeof(arena_chunk_t
) +
3553 (sizeof(arena_chunk_map_t
) * (chunk_npages
- 1)));
3554 arena_chunk_header_npages
= (header_size
>> pagesize_2pow
);
3555 if ((header_size
& pagesize_mask
) != 0)
3556 arena_chunk_header_npages
++;
3558 arena_maxclass
= chunksize
- (arena_chunk_header_npages
<<
3564 memset(&stats_chunks
, 0, sizeof(chunk_stats_t
));
3567 /* Various sanity checks that regard configuration. */
3568 assert(quantum
>= sizeof(void *));
3569 assert(quantum
<= pagesize
);
3570 assert(chunksize
>= pagesize
);
3571 assert(quantum
* 4 <= chunksize
);
3573 /* Initialize chunks data. */
3574 malloc_mutex_init(&chunks_mtx
);
3577 malloc_mutex_init(&brk_mtx
);
3579 brk_prev
= brk_base
;
3588 RB_INIT(&old_chunks
);
3590 /* Initialize base allocation data structures. */
3596 * Allocate a base chunk here, since it doesn't actually have to be
3597 * chunk-aligned. Doing this before allocating any other chunks allows
3598 * the use of space that would otherwise be wasted.
3600 base_pages_alloc(0);
3602 base_chunk_nodes
= NULL
;
3603 malloc_mutex_init(&base_mtx
);
3607 * For SMP systems, create four times as many arenas as there
3608 * are CPUs by default.
3610 opt_narenas_lshift
+= 2;
3614 /* Initialize arena key. */
3615 (void)thr_keycreate(&arenas_map_key
, NULL
);
3618 /* Determine how many arenas to use. */
3620 if (opt_narenas_lshift
> 0) {
3621 if ((narenas
<< opt_narenas_lshift
) > narenas
)
3622 narenas
<<= opt_narenas_lshift
;
3624 * Make sure not to exceed the limits of what base_malloc()
3627 if (narenas
* sizeof(arena_t
*) > chunksize
)
3628 narenas
= (unsigned)(chunksize
/ sizeof(arena_t
*));
3629 } else if (opt_narenas_lshift
< 0) {
3630 if ((narenas
<< opt_narenas_lshift
) < narenas
)
3631 narenas
<<= opt_narenas_lshift
;
3632 /* Make sure there is at least one arena. */
3639 /* Allocate and initialize arenas. */
3640 arenas
= (arena_t
**)base_alloc(sizeof(arena_t
*) * narenas
);
3641 if (arenas
== NULL
) {
3642 malloc_mutex_unlock(&init_lock
);
3646 * Zero the array. In practice, this should always be pre-zeroed,
3647 * since it was just mmap()ed, but let's be sure.
3649 memset(arenas
, 0, sizeof(arena_t
*) * narenas
);
3652 * Initialize one arena here. The rest are lazily created in
3653 * arena_choose_hard().
3656 if (arenas
[0] == NULL
) {
3657 malloc_mutex_unlock(&init_lock
);
3661 malloc_mutex_init(&arenas_mtx
);
3663 malloc_initialized
= true;
3664 malloc_mutex_unlock(&init_lock
);
3669 * End general internal functions.
3671 /******************************************************************************/
3673 * Begin malloc(3)-compatible functions.
3681 if (malloc_init()) {
3687 if (opt_sysv
== false)
3695 ret
= imalloc(size
);
3700 _malloc_message(getprogname(),
3701 ": (malloc) Error in malloc(): out of memory\n", "",
3708 UTRACE(0, size
, ret
);
3713 posix_memalign(void **memptr
, size_t alignment
, size_t size
)
3721 /* Make sure that alignment is a large enough power of 2. */
3722 if (((alignment
- 1) & alignment
) != 0
3723 || alignment
< sizeof(void *)) {
3725 _malloc_message(getprogname(),
3726 ": (malloc) Error in posix_memalign(): "
3727 "invalid alignment\n", "", "");
3735 result
= ipalloc(alignment
, size
);
3738 if (result
== NULL
) {
3740 _malloc_message(getprogname(),
3741 ": (malloc) Error in posix_memalign(): out of memory\n",
3753 UTRACE(0, size
, result
);
3758 calloc(size_t num
, size_t size
)
3763 if (malloc_init()) {
3769 num_size
= num
* size
;
3770 if (num_size
== 0) {
3771 if ((opt_sysv
== false) && ((num
== 0) || (size
== 0)))
3778 * Try to avoid division here. We know that it isn't possible to
3779 * overflow during multiplication if neither operand uses any of the
3780 * most significant half of the bits in a size_t.
3782 } else if ((unsigned long long)((num
| size
) &
3783 ((unsigned long long)SIZE_T_MAX
<< (sizeof(size_t) << 2))) &&
3784 (num_size
/ size
!= num
)) {
3785 /* size_t overflow. */
3790 ret
= icalloc(num_size
);
3795 _malloc_message(getprogname(),
3796 ": (malloc) Error in calloc(): out of memory\n", "",
3803 UTRACE(0, num_size
, ret
);
3808 realloc(void *ptr
, size_t size
)
3813 if (opt_sysv
== false)
3824 assert(malloc_initialized
);
3826 ret
= iralloc(ptr
, size
);
3830 _malloc_message(getprogname(),
3831 ": (malloc) Error in realloc(): out of "
3832 "memory\n", "", "");
3841 ret
= imalloc(size
);
3845 _malloc_message(getprogname(),
3846 ": (malloc) Error in realloc(): out of "
3847 "memory\n", "", "");
3855 UTRACE(ptr
, size
, ret
);
3865 assert(malloc_initialized
);
3872 * End malloc(3)-compatible functions.
3874 /******************************************************************************/
3876 * Begin non-standard functions.
3880 malloc_usable_size(const void *ptr
)
3883 assert(ptr
!= NULL
);
3885 return (isalloc(ptr
));
3890 * End non-standard functions.
3892 /******************************************************************************/
3894 * Begin library-private functions, used by threading libraries for protection
3895 * of malloc during fork(). These functions are only called if the program is
3896 * running in threaded mode, so there is no need to check whether the program
3901 _malloc_prefork(void)
3905 /* Acquire all mutexes in a safe order. */
3907 malloc_mutex_lock(&arenas_mtx
);
3908 for (i
= 0; i
< narenas
; i
++) {
3909 if (arenas
[i
] != NULL
)
3910 malloc_mutex_lock(&arenas
[i
]->mtx
);
3912 malloc_mutex_unlock(&arenas_mtx
);
3914 malloc_mutex_lock(&base_mtx
);
3916 malloc_mutex_lock(&chunks_mtx
);
3920 _malloc_postfork(void)
3924 /* Release all mutexes, now that fork() has completed. */
3926 malloc_mutex_unlock(&chunks_mtx
);
3928 malloc_mutex_unlock(&base_mtx
);
3930 malloc_mutex_lock(&arenas_mtx
);
3931 for (i
= 0; i
< narenas
; i
++) {
3932 if (arenas
[i
] != NULL
)
3933 malloc_mutex_unlock(&arenas
[i
]->mtx
);
3935 malloc_mutex_unlock(&arenas_mtx
);
3939 * End library-private functions.
3941 /******************************************************************************/