4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 #include "mtmalloc_impl.h"
38 #include <sys/param.h>
39 #include <sys/sysmacros.h>
42 * To turn on the asserts just compile -DDEBUG
52 * The MT hot malloc implementation contained herein is designed to be
53 * plug-compatible with the libc version of malloc. It is not intended
54 * to replace that implementation until we decide that it is ok to break
55 * customer apps (Solaris 3.0).
57 * For requests up to 2^^16, the allocator initializes itself into NCPUS
58 * worth of chains of caches. When a memory request is made, the calling thread
59 * is vectored into one of NCPUS worth of caches. The LWP id gives us a cheap,
60 * contention-reducing index to use, eventually, this should be replaced with
61 * the actual CPU sequence number, when an interface to get it is available.
63 * Once the thread is vectored into one of the list of caches the real
64 * allocation of the memory begins. The size is determined to figure out which
65 * bucket the allocation should be satisfied from. The management of free
66 * buckets is done via a bitmask. A free bucket is represented by a 1. The
67 * first free bit represents the first free bucket. The position of the bit,
68 * represents the position of the bucket in the arena.
70 * When the memory from the arena is handed out, the address of the cache
71 * control structure is written in the word preceeding the returned memory.
72 * This cache control address is used during free() to mark the buffer free
73 * in the cache control structure.
75 * When all available memory in a cache has been depleted, a new chunk of memory
76 * is allocated via sbrk(). The new cache is allocated from this chunk of memory
77 * and initialized in the function create_cache(). New caches are installed at
78 * the front of a singly linked list of the same size memory pools. This helps
79 * to ensure that there will tend to be available memory in the beginning of the
82 * Long linked lists hurt performance. To decrease this effect, there is a
83 * tunable, requestsize, that bumps up the sbrk allocation size and thus
84 * increases the number of available blocks within an arena. We also keep
85 * a "hint" for each cache list, which is the last cache in the list allocated
86 * from. This lowers the cost of searching if there are a lot of fully
87 * allocated blocks at the front of the list.
89 * For requests greater than 2^^16 (oversize allocations), there are two pieces
90 * of overhead. There is the OVERHEAD used to hold the cache addr
91 * (&oversize_list), plus an oversize_t structure to further describe the block.
93 * The oversize list is kept as defragmented as possible by coalescing
94 * freed oversized allocations with adjacent neighbors.
96 * Addresses handed out are stored in a hash table, and are aligned on
97 * MTMALLOC_MIN_ALIGN-byte boundaries at both ends. Request sizes are rounded-up
98 * where necessary in order to achieve this. This eases the implementation of
99 * MTDEBUGPATTERN and MTINITPATTERN, particularly where coalescing occurs.
101 * A memalign allocation takes memalign header overhead. There's two
102 * types of memalign headers distinguished by MTMALLOC_MEMALIGN_MAGIC
103 * and MTMALLOC_MEMALIGN_MIN_MAGIC. When the size of memory taken to
104 * get to the aligned address from malloc'ed address is the minimum size
105 * OVERHEAD, we create a header taking only one OVERHEAD space with magic
106 * number MTMALLOC_MEMALIGN_MIN_MAGIC, and we know by subtracting OVERHEAD
107 * from memaligned address, we can get to the malloc'ed address. Otherwise,
108 * we create a memalign header taking two OVERHEAD space, one stores
109 * MTMALLOC_MEMALIGN_MAGIC magic number, the other one points back to the
113 #if defined(__i386) || defined(__amd64)
114 #include <arpa/inet.h> /* for htonl() */
117 static void * morecore(size_t);
118 static void create_cache(cache_t
*, size_t bufsize
, uint_t hunks
);
119 static void * malloc_internal(size_t, percpu_t
*);
120 static void * oversize(size_t);
121 static oversize_t
*find_oversize(size_t);
122 static void add_oversize(oversize_t
*);
123 static void copy_pattern(uint32_t, void *, size_t);
124 static void * verify_pattern(uint32_t, void *, size_t);
125 static void reinit_cpu_list(void);
126 static void reinit_cache(cache_t
*);
127 static void free_oversize(oversize_t
*);
128 static oversize_t
*oversize_header_alloc(uintptr_t, size_t);
131 * oversize hash table stuff
133 #define NUM_BUCKETS 67 /* must be prime */
134 #define HASH_OVERSIZE(caddr) ((uintptr_t)(caddr) % NUM_BUCKETS)
135 oversize_t
*ovsz_hashtab
[NUM_BUCKETS
];
137 #define ALIGN(x, a) ((((uintptr_t)(x) + ((uintptr_t)(a) - 1)) \
138 & ~((uintptr_t)(a) - 1)))
140 /* need this to deal with little endianess of x86 */
141 #if defined(__i386) || defined(__amd64)
142 #define FLIP_EM(x) htonl((x))
144 #define FLIP_EM(x) (x)
147 #define INSERT_ONLY 0
148 #define COALESCE_LEFT 0x00000001
149 #define COALESCE_RIGHT 0x00000002
150 #define COALESCE_WITH_BOTH_SIDES (COALESCE_LEFT | COALESCE_RIGHT)
152 #define OVERHEAD 8 /* size needed to write cache addr */
153 #define HUNKSIZE 8192 /* just a multiplier */
155 #define MAX_CACHED_SHIFT 16 /* 64K is the max cached size */
156 #define MAX_CACHED (1 << MAX_CACHED_SHIFT)
157 #define MIN_CACHED_SHIFT 4 /* smaller requests rounded up */
158 #define MTMALLOC_MIN_ALIGN 8 /* min guaranteed alignment */
160 /* maximum size before overflow */
161 #define MAX_MTMALLOC (SIZE_MAX - (SIZE_MAX % MTMALLOC_MIN_ALIGN) \
164 #define NUM_CACHES (MAX_CACHED_SHIFT - MIN_CACHED_SHIFT + 1)
165 #define CACHELIST_SIZE ALIGN(NUM_CACHES * sizeof (cache_head_t), \
166 CACHE_COHERENCY_UNIT)
168 #define MINSIZE 9 /* for requestsize, tunable */
169 #define MAXSIZE 256 /* arbitrary, big enough, for requestsize */
171 #define FREEPATTERN 0xdeadbeef /* debug fill pattern for free buf */
172 #define INITPATTERN 0xbaddcafe /* debug fill pattern for new buf */
174 #define misaligned(p) ((unsigned)(p) & (sizeof (int) - 1))
175 #define IS_OVERSIZE(x, y) (((x) < (y)) && (((x) > MAX_CACHED)? 1 : 0))
177 static long requestsize
= MINSIZE
; /* 9 pages per cache; tunable; 9 is min */
179 static uint_t cpu_mask
;
180 static curcpu_func curcpu
;
182 static int32_t debugopt
;
183 static int32_t reinit
;
185 static percpu_t
*cpu_list
;
186 static oversize_t oversize_list
;
187 static mutex_t oversize_lock
= DEFAULTMUTEX
;
189 static int ncpus
= 0;
191 #define MTMALLOC_OVERSIZE_MAGIC ((uintptr_t)&oversize_list)
192 #define MTMALLOC_MEMALIGN_MAGIC ((uintptr_t)&oversize_list + 1)
193 #define MTMALLOC_MEMALIGN_MIN_MAGIC ((uintptr_t)&oversize_list + 2)
196 * We require allocations handed out to be aligned on MTMALLOC_MIN_ALIGN-byte
197 * boundaries. We round up sizeof (oversize_t) (when necessary) to ensure that
200 #define OVSZ_SIZE (ALIGN(sizeof (oversize_t), MTMALLOC_MIN_ALIGN))
201 #define OVSZ_HEADER_SIZE (OVSZ_SIZE + OVERHEAD)
204 * memalign header takes 2 OVERHEAD space. One for memalign magic, and the
205 * other one points back to the start address of originally allocated space.
207 #define MEMALIGN_HEADER_SIZE 2 * OVERHEAD
208 #define MEMALIGN_HEADER_ALLOC(x, shift, malloc_addr)\
209 if (shift == OVERHEAD)\
210 *((uintptr_t *)((caddr_t)x - OVERHEAD)) = \
211 MTMALLOC_MEMALIGN_MIN_MAGIC; \
213 *((uintptr_t *)((caddr_t)x - OVERHEAD)) = \
214 MTMALLOC_MEMALIGN_MAGIC; \
215 *((uintptr_t *)((caddr_t)x - 2 * OVERHEAD)) = \
216 (uintptr_t)malloc_addr; \
220 * Add big to the oversize hash table at the head of the relevant bucket.
223 insert_hash(oversize_t
*big
)
225 caddr_t ret
= big
->addr
;
226 int bucket
= HASH_OVERSIZE(ret
);
228 assert(MUTEX_HELD(&oversize_lock
));
229 big
->hash_next
= ovsz_hashtab
[bucket
];
230 ovsz_hashtab
[bucket
] = big
;
236 percpu_t
*list_rotor
;
239 if (bytes
> MAX_CACHED
)
240 return (oversize(bytes
));
242 list_index
= (curcpu() & cpu_mask
);
244 list_rotor
= &cpu_list
[list_index
];
246 return (malloc_internal(bytes
, list_rotor
));
250 realloc(void * ptr
, size_t bytes
)
252 void *new, *data_ptr
;
258 return (malloc(bytes
));
266 mem
= (caddr_t
)ptr
- OVERHEAD
;
269 * Optimization possibility :
271 * q = realloc(p, 64);
272 * q can be same as p.
273 * Apply this optimization for the normal
274 * sized caches for now.
276 if (*(uintptr_t *)mem
< MTMALLOC_OVERSIZE_MAGIC
||
277 *(uintptr_t *)mem
> MTMALLOC_MEMALIGN_MIN_MAGIC
) {
278 cacheptr
= (cache_t
*)*(uintptr_t *)mem
;
279 if (bytes
<= (cacheptr
->mt_size
- OVERHEAD
))
289 * If new == ptr, ptr has previously been freed. Passing a freed pointer
290 * to realloc() is not allowed - unless the caller specifically states
291 * otherwise, in which case we must avoid freeing ptr (ie new) before we
292 * return new. There is (obviously) no requirement to memcpy() ptr to
293 * new before we return.
296 if (!(debugopt
& MTDOUBLEFREE
))
301 if (*(uintptr_t *)mem
== MTMALLOC_MEMALIGN_MAGIC
) {
303 ptr
= (void *)*(uintptr_t *)mem
;
304 mem
= (caddr_t
)ptr
- OVERHEAD
;
305 shift
= (size_t)((uintptr_t)data_ptr
- (uintptr_t)ptr
);
306 } else if (*(uintptr_t *)mem
== MTMALLOC_MEMALIGN_MIN_MAGIC
) {
312 if (*(uintptr_t *)mem
== MTMALLOC_OVERSIZE_MAGIC
) {
315 old
= (oversize_t
*)(mem
- OVSZ_SIZE
);
316 (void) memcpy(new, data_ptr
, MIN(bytes
, old
->size
- shift
));
321 cacheptr
= (cache_t
*)*(uintptr_t *)mem
;
323 (void) memcpy(new, data_ptr
,
324 MIN(cacheptr
->mt_size
- OVERHEAD
- shift
, bytes
));
331 calloc(size_t nelem
, size_t bytes
)
336 if (nelem
== 0 || bytes
== 0) {
339 size
= nelem
* bytes
;
341 /* check for overflow */
342 if ((size
/ nelem
) != bytes
) {
351 (void) memset(ptr
, 0, size
);
365 int32_t which_bit
, num_bytes
;
370 mem
= (caddr_t
)ptr
- OVERHEAD
;
372 if (*(uintptr_t *)mem
== MTMALLOC_MEMALIGN_MAGIC
) {
374 ptr
= (void *)*(uintptr_t *)mem
;
375 mem
= (caddr_t
)ptr
- OVERHEAD
;
376 } else if (*(uintptr_t *)mem
== MTMALLOC_MEMALIGN_MIN_MAGIC
) {
381 if (*(uintptr_t *)mem
== MTMALLOC_OVERSIZE_MAGIC
) {
382 oversize_t
*big
, **opp
;
385 big
= (oversize_t
*)(mem
- OVSZ_SIZE
);
386 (void) mutex_lock(&oversize_lock
);
388 bucket
= HASH_OVERSIZE(big
->addr
);
389 for (opp
= &ovsz_hashtab
[bucket
]; *opp
!= NULL
;
390 opp
= &(*opp
)->hash_next
)
395 if (!(debugopt
& MTDOUBLEFREE
))
397 (void) mutex_unlock(&oversize_lock
);
401 *opp
= big
->hash_next
; /* remove big from the hash table */
402 big
->hash_next
= NULL
;
404 if (debugopt
& MTDEBUGPATTERN
)
405 copy_pattern(FREEPATTERN
, ptr
, big
->size
);
407 (void) mutex_unlock(&oversize_lock
);
411 cacheptr
= (cache_t
*)*(uintptr_t *)mem
;
412 freeblocks
= cacheptr
->mt_freelist
;
415 * This is the distance measured in bits into the arena.
416 * The value of offset is in bytes but there is a 1-1 correlation
417 * between distance into the arena and distance into the
420 offset
= mem
- cacheptr
->mt_arena
;
423 * i is total number of bits to offset into freelist bitmask.
426 i
= offset
/ cacheptr
->mt_size
;
431 * which_bit is the bit offset into the byte in the freelist.
432 * if our freelist bitmask looks like 0xf3 and we are freeing
433 * block 5 (ie: the 6th block) our mask will be 0xf7 after
434 * the free. Things go left to right that's why the mask is 0x80
437 which_bit
= i
- (num_bytes
<< 3);
439 mask
= 0x80 >> which_bit
;
441 freeblocks
+= num_bytes
;
443 if (debugopt
& MTDEBUGPATTERN
)
444 copy_pattern(FREEPATTERN
, ptr
, cacheptr
->mt_size
- OVERHEAD
);
446 (void) mutex_lock(&cacheptr
->mt_cache_lock
);
448 if (*freeblocks
& mask
) {
449 if (!(debugopt
& MTDOUBLEFREE
))
453 cacheptr
->mt_nfree
++;
456 (void) mutex_unlock(&cacheptr
->mt_cache_lock
);
460 memalign(size_t alignment
, size_t size
)
467 if (size
== 0 || alignment
== 0 || misaligned(alignment
) ||
468 (alignment
& (alignment
- 1)) != 0) {
473 /* <= MTMALLOC_MIN_ALIGN, malloc can provide directly */
474 if (alignment
<= MTMALLOC_MIN_ALIGN
)
475 return (malloc(size
));
477 alloc_size
= size
+ alignment
- MTMALLOC_MIN_ALIGN
;
479 if (alloc_size
< size
) { /* overflow */
484 alloc_buf
= malloc(alloc_size
);
486 if (alloc_buf
== NULL
)
487 /* malloc sets errno */
491 * If alloc_size > MAX_CACHED, malloc() will have returned a multiple of
492 * MTMALLOC_MIN_ALIGN, having rounded-up alloc_size if necessary. Since
493 * we will use alloc_size to return the excess fragments to the free
494 * list, we also round-up alloc_size if necessary.
496 if ((alloc_size
> MAX_CACHED
) &&
497 (alloc_size
& (MTMALLOC_MIN_ALIGN
- 1)))
498 alloc_size
= ALIGN(alloc_size
, MTMALLOC_MIN_ALIGN
);
500 if ((offset
= (uintptr_t)alloc_buf
& (alignment
- 1)) == 0) {
501 /* aligned correctly */
503 size_t frag_size
= alloc_size
-
504 (size
+ MTMALLOC_MIN_ALIGN
+ OVSZ_HEADER_SIZE
);
507 * If the leftover piece of the memory > MAX_CACHED,
508 * split off the piece and return it back to the freelist.
510 if (IS_OVERSIZE(frag_size
, alloc_size
)) {
511 oversize_t
*orig
, *tail
;
514 taddr
= ALIGN((uintptr_t)alloc_buf
+ size
,
516 data_size
= taddr
- (uintptr_t)alloc_buf
;
517 orig
= (oversize_t
*)((uintptr_t)alloc_buf
-
519 frag_size
= orig
->size
- data_size
-
521 orig
->size
= data_size
;
522 tail
= oversize_header_alloc(taddr
, frag_size
);
527 uchar_t oversize_bits
= 0;
528 size_t head_sz
, data_sz
, tail_sz
;
529 uintptr_t ret_addr
, taddr
, shift
, tshift
;
530 oversize_t
*orig
, *tail
, *big
;
533 /* needs to be aligned */
534 shift
= alignment
- offset
;
536 assert(shift
>= MTMALLOC_MIN_ALIGN
);
538 ret_addr
= ((uintptr_t)alloc_buf
+ shift
);
539 ret_buf
= (void *)ret_addr
;
541 if (alloc_size
<= MAX_CACHED
) {
542 MEMALIGN_HEADER_ALLOC(ret_addr
, shift
, alloc_buf
);
547 * Only check for the fragments when the memory is allocted
548 * from oversize_list. Split off a fragment and return it
549 * to the oversize freelist when it's > MAX_CACHED.
552 head_sz
= shift
- MAX(MEMALIGN_HEADER_SIZE
, OVSZ_HEADER_SIZE
);
554 tail_sz
= alloc_size
-
555 (shift
+ size
+ MTMALLOC_MIN_ALIGN
+ OVSZ_HEADER_SIZE
);
557 oversize_bits
|= IS_OVERSIZE(head_sz
, alloc_size
) |
558 IS_OVERSIZE(size
, alloc_size
) << DATA_SHIFT
|
559 IS_OVERSIZE(tail_sz
, alloc_size
) << TAIL_SHIFT
;
561 switch (oversize_bits
) {
564 MEMALIGN_HEADER_ALLOC(ret_addr
, shift
,
569 * If we can extend data > MAX_CACHED and have
570 * head still > MAX_CACHED, we split head-end
571 * as the case of head-end and data oversized,
572 * otherwise just create memalign header.
574 tsize
= (shift
+ size
) - (MAX_CACHED
+ 8 +
575 MTMALLOC_MIN_ALIGN
+ OVSZ_HEADER_SIZE
);
577 if (!IS_OVERSIZE(tsize
, alloc_size
)) {
578 MEMALIGN_HEADER_ALLOC(ret_addr
, shift
,
582 tsize
+= OVSZ_HEADER_SIZE
;
583 taddr
= ALIGN((uintptr_t)alloc_buf
+
584 tsize
, MTMALLOC_MIN_ALIGN
);
585 tshift
= ret_addr
- taddr
;
586 MEMALIGN_HEADER_ALLOC(ret_addr
, tshift
,
589 shift
= ret_addr
- (uintptr_t)alloc_buf
;
592 case HEAD_AND_DATA_OVERSIZE
:
594 * Split off the head fragment and
595 * return it back to oversize freelist.
596 * Create oversize header for the piece
597 * of (data + tail fragment).
599 orig
= (oversize_t
*)((uintptr_t)alloc_buf
-
601 big
= oversize_header_alloc(ret_addr
-
602 OVSZ_HEADER_SIZE
, (orig
->size
- shift
));
603 (void) mutex_lock(&oversize_lock
);
605 (void) mutex_unlock(&oversize_lock
);
606 orig
->size
= shift
- OVSZ_HEADER_SIZE
;
608 /* free up the head fragment */
613 * If we can extend data > MAX_CACHED and have
614 * tail-end still > MAX_CACHED, we split tail
615 * end, otherwise just create memalign header.
617 orig
= (oversize_t
*)((uintptr_t)alloc_buf
-
619 tsize
= orig
->size
- (MAX_CACHED
+ 8 +
620 shift
+ OVSZ_HEADER_SIZE
+
622 if (!IS_OVERSIZE(tsize
, alloc_size
)) {
623 MEMALIGN_HEADER_ALLOC(ret_addr
, shift
,
627 size
= MAX_CACHED
+ 8;
630 case DATA_AND_TAIL_OVERSIZE
:
632 * Split off the tail fragment and
633 * return it back to oversize freelist.
634 * Create memalign header and adjust
635 * the size for the piece of
636 * (head fragment + data).
638 taddr
= ALIGN(ret_addr
+ size
,
640 data_sz
= (size_t)(taddr
-
641 (uintptr_t)alloc_buf
);
642 orig
= (oversize_t
*)((uintptr_t)alloc_buf
-
644 tsize
= orig
->size
- data_sz
;
645 orig
->size
= data_sz
;
646 MEMALIGN_HEADER_ALLOC(ret_buf
, shift
,
648 tsize
-= OVSZ_HEADER_SIZE
;
649 tail
= oversize_header_alloc(taddr
, tsize
);
652 case HEAD_AND_TAIL_OVERSIZE
:
654 * Split off the head fragment.
655 * We try to free up tail-end when we can
656 * extend data size to (MAX_CACHED + 8)
657 * and remain tail-end oversized.
658 * The bottom line is all split pieces
659 * should be oversize in size.
661 orig
= (oversize_t
*)((uintptr_t)alloc_buf
-
663 tsize
= orig
->size
- (MAX_CACHED
+ 8 +
664 OVSZ_HEADER_SIZE
+ shift
+
667 if (!IS_OVERSIZE(tsize
, alloc_size
)) {
669 * If the chunk is not big enough
670 * to make both data and tail oversize
671 * we just keep them as one piece.
673 big
= oversize_header_alloc(ret_addr
-
676 (void) mutex_lock(&oversize_lock
);
678 (void) mutex_unlock(&oversize_lock
);
679 orig
->size
= shift
- OVSZ_HEADER_SIZE
;
684 * extend data size > MAX_CACHED
685 * and handle it as head, data, tail
688 size
= MAX_CACHED
+ 8;
693 * split off the head and tail fragments,
694 * return them back to the oversize freelist.
695 * Alloc oversize header for data seg.
697 orig
= (oversize_t
*)((uintptr_t)alloc_buf
-
700 orig
->size
= shift
- OVSZ_HEADER_SIZE
;
703 taddr
= ALIGN(ret_addr
+ size
,
705 data_sz
= taddr
- ret_addr
;
706 assert(tsize
> (shift
+ data_sz
+
709 (shift
+ data_sz
+ OVSZ_HEADER_SIZE
);
711 /* create oversize header for data seg */
712 big
= oversize_header_alloc(ret_addr
-
713 OVSZ_HEADER_SIZE
, data_sz
);
714 (void) mutex_lock(&oversize_lock
);
716 (void) mutex_unlock(&oversize_lock
);
718 /* create oversize header for tail fragment */
719 tail
= oversize_header_alloc(taddr
, tail_sz
);
723 /* should not reach here */
734 static unsigned pagesize
;
740 pagesize
= sysconf(_SC_PAGESIZE
);
742 return (memalign(pagesize
, size
));
746 mallocctl(int cmd
, long value
)
752 * Reinitialize free blocks in case malloc() is called prior
755 if (value
&& !(debugopt
& cmd
)) {
769 if (value
>= MINSIZE
&& value
<= MAXSIZE
)
778 * Initialization function, called from the init section of the library.
779 * No locking is required here because we are single-threaded during
780 * library initialization.
788 size_t cache_space_needed
;
791 curcpu_func new_curcpu
;
793 percpu_t
*new_cpu_list
;
799 * Get a decent "current cpu identifier", to be used to reduce
800 * contention. Eventually, this should be replaced by an interface
801 * to get the actual CPU sequence number in libthread/liblwp.
803 new_curcpu
= (curcpu_func
)thr_self
;
804 if ((ncpus
= 2 * sysconf(_SC_NPROCESSORS_CONF
)) <= 0)
805 ncpus
= 4; /* decent default value */
807 /* round ncpus up to a power of 2 */
808 while (ncpus
& (ncpus
- 1))
811 new_cpu_mask
= ncpus
- 1; /* create the cpu mask */
814 * We now do some magic with the brk. What we want to get in the
815 * end is a bunch of well-aligned stuff in a big initial allocation.
816 * Along the way, we do sanity checks to make sure no one else has
817 * touched the brk (which shouldn't happen, but it's always good to
820 * First, make sure sbrk is sane, and store the current brk in oldbrk.
822 oldbrk
= (uintptr_t)sbrk(0);
823 if ((void *)oldbrk
== (void *)-1)
824 abort(); /* sbrk is broken -- we're doomed. */
827 * Now, align the brk to a multiple of CACHE_COHERENCY_UNIT, so that
828 * the percpu structures and cache lists will be properly aligned.
830 * 2. All hunks will be page-aligned, assuming HUNKSIZE >= PAGESIZE,
831 * so they can be paged out individually.
833 newbrk
= ALIGN(oldbrk
, CACHE_COHERENCY_UNIT
);
834 if (newbrk
!= oldbrk
&& (uintptr_t)sbrk(newbrk
- oldbrk
) != oldbrk
)
835 abort(); /* sbrk is broken -- we're doomed. */
838 * For each cpu, there is one percpu_t and a list of caches
840 cache_space_needed
= ncpus
* (sizeof (percpu_t
) + CACHELIST_SIZE
);
842 new_cpu_list
= (percpu_t
*)sbrk(cache_space_needed
);
844 if (new_cpu_list
== (percpu_t
*)-1 ||
845 (uintptr_t)new_cpu_list
!= newbrk
)
846 abort(); /* sbrk is broken -- we're doomed. */
849 * Finally, align the brk to HUNKSIZE so that all hunks are
850 * page-aligned, to avoid edge-effects.
853 newbrk
= (uintptr_t)new_cpu_list
+ cache_space_needed
;
855 padding
= ALIGN(newbrk
, HUNKSIZE
) - newbrk
;
857 if (padding
> 0 && (uintptr_t)sbrk(padding
) != newbrk
)
858 abort(); /* sbrk is broken -- we're doomed. */
860 list_addr
= ((uintptr_t)new_cpu_list
+ (sizeof (percpu_t
) * ncpus
));
862 /* initialize the percpu list */
863 for (i
= 0; i
< ncpus
; i
++) {
864 new_cpu_list
[i
].mt_caches
= (cache_head_t
*)list_addr
;
865 for (j
= 0; j
< NUM_CACHES
; j
++) {
866 new_cpu_list
[i
].mt_caches
[j
].mt_cache
= NULL
;
867 new_cpu_list
[i
].mt_caches
[j
].mt_hint
= NULL
;
870 (void) mutex_init(&new_cpu_list
[i
].mt_parent_lock
,
873 /* get the correct cache list alignment */
874 list_addr
+= CACHELIST_SIZE
;
878 * Initialize oversize listhead
880 oversize_list
.next_bysize
= &oversize_list
;
881 oversize_list
.prev_bysize
= &oversize_list
;
882 oversize_list
.next_byaddr
= &oversize_list
;
883 oversize_list
.prev_byaddr
= &oversize_list
;
884 oversize_list
.addr
= NULL
;
885 oversize_list
.size
= 0; /* sentinal */
888 * Now install the global variables.
891 cpu_mask
= new_cpu_mask
;
892 cpu_list
= new_cpu_list
;
896 create_cache(cache_t
*cp
, size_t size
, uint_t chunksize
)
900 (void) mutex_init(&cp
->mt_cache_lock
, USYNC_THREAD
, NULL
);
902 cp
->mt_freelist
= ((caddr_t
)cp
+ sizeof (cache_t
));
903 cp
->mt_span
= chunksize
* HUNKSIZE
- sizeof (cache_t
);
904 cp
->mt_hunks
= chunksize
;
906 * rough calculation. We will need to adjust later.
908 nblocks
= cp
->mt_span
/ cp
->mt_size
;
910 if (nblocks
== 0) { /* less than 8 free blocks in this pool */
911 int32_t numblocks
= 0;
912 long i
= cp
->mt_span
;
913 size_t sub
= cp
->mt_size
;
921 cp
->mt_arena
= (caddr_t
)ALIGN(cp
->mt_freelist
+ 8, 8);
922 cp
->mt_nfree
= numblocks
;
923 while (numblocks
--) {
924 mask
|= 0x80 >> numblocks
;
926 *(cp
->mt_freelist
) = mask
;
928 cp
->mt_arena
= (caddr_t
)ALIGN((caddr_t
)cp
->mt_freelist
+
930 /* recompute nblocks */
931 nblocks
= (uintptr_t)((caddr_t
)cp
->mt_freelist
+
932 cp
->mt_span
- cp
->mt_arena
) / cp
->mt_size
;
933 cp
->mt_nfree
= ((nblocks
>> 3) << 3);
934 /* Set everything to free */
935 (void) memset(cp
->mt_freelist
, 0xff, nblocks
>> 3);
938 if (debugopt
& MTDEBUGPATTERN
)
939 copy_pattern(FREEPATTERN
, cp
->mt_arena
, cp
->mt_size
* nblocks
);
945 reinit_cpu_list(void)
947 oversize_t
*wp
= oversize_list
.next_bysize
;
950 cache_head_t
*cachehead
;
952 /* Reinitialize free oversize blocks. */
953 (void) mutex_lock(&oversize_lock
);
954 if (debugopt
& MTDEBUGPATTERN
)
955 for (; wp
!= &oversize_list
; wp
= wp
->next_bysize
)
956 copy_pattern(FREEPATTERN
, wp
->addr
, wp
->size
);
957 (void) mutex_unlock(&oversize_lock
);
959 /* Reinitialize free blocks. */
960 for (cpuptr
= &cpu_list
[0]; cpuptr
< &cpu_list
[ncpus
]; cpuptr
++) {
961 (void) mutex_lock(&cpuptr
->mt_parent_lock
);
962 for (cachehead
= &cpuptr
->mt_caches
[0]; cachehead
<
963 &cpuptr
->mt_caches
[NUM_CACHES
]; cachehead
++) {
964 for (thiscache
= cachehead
->mt_cache
; thiscache
!= NULL
;
965 thiscache
= thiscache
->mt_next
) {
966 (void) mutex_lock(&thiscache
->mt_cache_lock
);
967 if (thiscache
->mt_nfree
== 0) {
969 &thiscache
->mt_cache_lock
);
972 if (thiscache
!= NULL
)
973 reinit_cache(thiscache
);
974 (void) mutex_unlock(&thiscache
->mt_cache_lock
);
977 (void) mutex_unlock(&cpuptr
->mt_parent_lock
);
983 reinit_cache(cache_t
*thiscache
)
985 uint32_t *freeblocks
; /* not a uintptr_t on purpose */
989 freeblocks
= (uint32_t *)thiscache
->mt_freelist
;
990 while (freeblocks
< (uint32_t *)thiscache
->mt_arena
) {
991 if (*freeblocks
& 0xffffffff) {
992 for (i
= 0; i
< 32; i
++) {
993 if (FLIP_EM(*freeblocks
) & (0x80000000 >> i
)) {
994 n
= (uintptr_t)(((freeblocks
-
995 (uint32_t *)thiscache
->mt_freelist
)
996 << 5) + i
) * thiscache
->mt_size
;
997 ret
= thiscache
->mt_arena
+ n
;
999 copy_pattern(FREEPATTERN
, ret
,
1000 thiscache
->mt_size
);
1009 malloc_internal(size_t size
, percpu_t
*cpuptr
)
1011 cache_head_t
*cachehead
;
1012 cache_t
*thiscache
, *hintcache
;
1013 int32_t i
, n
, logsz
, bucket
;
1015 uint32_t *freeblocks
; /* not a uintptr_t on purpose */
1018 logsz
= MIN_CACHED_SHIFT
;
1020 while (size
> (1 << logsz
))
1023 bucket
= logsz
- MIN_CACHED_SHIFT
;
1025 (void) mutex_lock(&cpuptr
->mt_parent_lock
);
1028 * Find a cache of the appropriate size with free buffers.
1030 * We don't need to lock each cache as we check their mt_nfree count,
1032 * 1. We are only looking for caches with mt_nfree > 0. If a
1033 * free happens during our search, it will increment mt_nfree,
1034 * which will not effect the test.
1035 * 2. Allocations can decrement mt_nfree, but they can't happen
1036 * as long as we hold mt_parent_lock.
1039 cachehead
= &cpuptr
->mt_caches
[bucket
];
1041 /* Search through the list, starting at the mt_hint */
1042 thiscache
= cachehead
->mt_hint
;
1044 while (thiscache
!= NULL
&& thiscache
->mt_nfree
== 0)
1045 thiscache
= thiscache
->mt_next
;
1047 if (thiscache
== NULL
) {
1048 /* wrap around -- search up to the hint */
1049 thiscache
= cachehead
->mt_cache
;
1050 hintcache
= cachehead
->mt_hint
;
1052 while (thiscache
!= NULL
&& thiscache
!= hintcache
&&
1053 thiscache
->mt_nfree
== 0)
1054 thiscache
= thiscache
->mt_next
;
1056 if (thiscache
== hintcache
)
1061 if (thiscache
== NULL
) { /* there are no free caches */
1062 int32_t thisrequest
= requestsize
;
1063 int32_t buffer_size
= (1 << logsz
) + OVERHEAD
;
1065 thiscache
= (cache_t
*)morecore(thisrequest
* HUNKSIZE
);
1067 if (thiscache
== (cache_t
*)-1) {
1068 (void) mutex_unlock(&cpuptr
->mt_parent_lock
);
1072 create_cache(thiscache
, buffer_size
, thisrequest
);
1074 /* link in the new block at the beginning of the list */
1075 thiscache
->mt_next
= cachehead
->mt_cache
;
1076 cachehead
->mt_cache
= thiscache
;
1079 /* update the hint to the cache we found or created */
1080 cachehead
->mt_hint
= thiscache
;
1082 /* thiscache now points to a cache with available space */
1083 (void) mutex_lock(&thiscache
->mt_cache_lock
);
1085 freeblocks
= (uint32_t *)thiscache
->mt_freelist
;
1086 while (freeblocks
< (uint32_t *)thiscache
->mt_arena
) {
1087 if (*freeblocks
& 0xffffffff)
1090 if (freeblocks
< (uint32_t *)thiscache
->mt_arena
&&
1091 *freeblocks
& 0xffffffff)
1094 if (freeblocks
< (uint32_t *)thiscache
->mt_arena
&&
1095 *freeblocks
& 0xffffffff)
1098 if (freeblocks
< (uint32_t *)thiscache
->mt_arena
&&
1099 *freeblocks
& 0xffffffff)
1105 * the offset from mt_freelist to freeblocks is the offset into
1106 * the arena. Be sure to include the offset into freeblocks
1107 * of the bitmask. n is the offset.
1109 for (i
= 0; i
< 32; ) {
1110 if (FLIP_EM(*freeblocks
) & (0x80000000 >> i
++))
1112 if (FLIP_EM(*freeblocks
) & (0x80000000 >> i
++))
1114 if (FLIP_EM(*freeblocks
) & (0x80000000 >> i
++))
1116 if (FLIP_EM(*freeblocks
) & (0x80000000 >> i
++))
1119 index
= 0x80000000 >> --i
;
1122 *freeblocks
&= FLIP_EM(~index
);
1124 thiscache
->mt_nfree
--;
1126 (void) mutex_unlock(&thiscache
->mt_cache_lock
);
1127 (void) mutex_unlock(&cpuptr
->mt_parent_lock
);
1129 n
= (uintptr_t)(((freeblocks
- (uint32_t *)thiscache
->mt_freelist
) << 5)
1130 + i
) * thiscache
->mt_size
;
1132 * Now you have the offset in n, you've changed the free mask
1133 * in the freelist. Nothing left to do but find the block
1134 * in the arena and put the value of thiscache in the word
1135 * ahead of the handed out address and return the memory
1138 ret
= thiscache
->mt_arena
+ n
;
1140 /* Store the cache addr for this buf. Makes free go fast. */
1141 *(uintptr_t *)ret
= (uintptr_t)thiscache
;
1144 * This assert makes sure we don't hand out memory that is not
1145 * owned by this cache.
1147 assert(ret
+ thiscache
->mt_size
<= thiscache
->mt_freelist
+
1148 thiscache
->mt_span
);
1152 assert(((uintptr_t)ret
& 7) == 0); /* are we 8 byte aligned */
1154 if (reinit
== 0 && (debugopt
& MTDEBUGPATTERN
))
1155 if (verify_pattern(FREEPATTERN
, ret
, size
))
1156 abort(); /* reference after free */
1158 if (debugopt
& MTINITBUFFER
)
1159 copy_pattern(INITPATTERN
, ret
, size
);
1160 return ((void *)ret
);
1164 morecore(size_t bytes
)
1168 if (bytes
> LONG_MAX
) {
1171 * The request size is too big. We need to do this in
1172 * chunks. Sbrk only takes an int for an arg.
1174 if (bytes
== ULONG_MAX
)
1175 return ((void *)-1);
1180 if (sbrk(wad
) == (void *)-1) {
1182 (void) sbrk(-LONG_MAX
);
1183 return ((void *)-1);
1196 oversize(size_t size
)
1201 /* make sure we will not overflow */
1202 if (size
> MAX_MTMALLOC
) {
1208 * Since we ensure every address we hand back is
1209 * MTMALLOC_MIN_ALIGN-byte aligned, ALIGNing size ensures that the
1210 * memory handed out is MTMALLOC_MIN_ALIGN-byte aligned at both ends.
1211 * This eases the implementation of MTDEBUGPATTERN and MTINITPATTERN,
1212 * particularly where coalescing occurs.
1214 size
= ALIGN(size
, MTMALLOC_MIN_ALIGN
);
1217 * The idea with the global lock is that we are sure to
1218 * block in the kernel anyway since given an oversize alloc
1219 * we are sure to have to call morecore();
1221 (void) mutex_lock(&oversize_lock
);
1223 if ((big
= find_oversize(size
)) != NULL
) {
1224 if (reinit
== 0 && (debugopt
& MTDEBUGPATTERN
))
1225 if (verify_pattern(FREEPATTERN
, big
->addr
, size
))
1226 abort(); /* reference after free */
1228 /* Get more 8-byte aligned memory from heap */
1229 ret
= morecore(size
+ OVSZ_HEADER_SIZE
);
1230 if (ret
== (caddr_t
)-1) {
1231 (void) mutex_unlock(&oversize_lock
);
1235 big
= oversize_header_alloc((uintptr_t)ret
, size
);
1241 if (debugopt
& MTINITBUFFER
)
1242 copy_pattern(INITPATTERN
, ret
, size
);
1244 (void) mutex_unlock(&oversize_lock
);
1245 assert(((uintptr_t)ret
& 7) == 0); /* are we 8 byte aligned */
1246 return ((void *)ret
);
1250 insert_oversize(oversize_t
*op
, oversize_t
*nx
)
1254 /* locate correct insertion point in size-ordered list */
1255 for (sp
= oversize_list
.next_bysize
;
1256 sp
!= &oversize_list
&& (op
->size
> sp
->size
);
1257 sp
= sp
->next_bysize
)
1260 /* link into size-ordered list */
1261 op
->next_bysize
= sp
;
1262 op
->prev_bysize
= sp
->prev_bysize
;
1263 op
->prev_bysize
->next_bysize
= op
;
1264 op
->next_bysize
->prev_bysize
= op
;
1267 * link item into address-ordered list
1268 * (caller provides insertion point as an optimization)
1270 op
->next_byaddr
= nx
;
1271 op
->prev_byaddr
= nx
->prev_byaddr
;
1272 op
->prev_byaddr
->next_byaddr
= op
;
1273 op
->next_byaddr
->prev_byaddr
= op
;
1278 unlink_oversize(oversize_t
*lp
)
1280 /* unlink from address list */
1281 lp
->prev_byaddr
->next_byaddr
= lp
->next_byaddr
;
1282 lp
->next_byaddr
->prev_byaddr
= lp
->prev_byaddr
;
1284 /* unlink from size list */
1285 lp
->prev_bysize
->next_bysize
= lp
->next_bysize
;
1286 lp
->next_bysize
->prev_bysize
= lp
->prev_bysize
;
1290 position_oversize_by_size(oversize_t
*op
)
1294 if (op
->size
> op
->next_bysize
->size
||
1295 op
->size
< op
->prev_bysize
->size
) {
1297 /* unlink from size list */
1298 op
->prev_bysize
->next_bysize
= op
->next_bysize
;
1299 op
->next_bysize
->prev_bysize
= op
->prev_bysize
;
1301 /* locate correct insertion point in size-ordered list */
1302 for (sp
= oversize_list
.next_bysize
;
1303 sp
!= &oversize_list
&& (op
->size
> sp
->size
);
1304 sp
= sp
->next_bysize
)
1307 /* link into size-ordered list */
1308 op
->next_bysize
= sp
;
1309 op
->prev_bysize
= sp
->prev_bysize
;
1310 op
->prev_bysize
->next_bysize
= op
;
1311 op
->next_bysize
->prev_bysize
= op
;
1316 add_oversize(oversize_t
*lp
)
1318 int merge_flags
= INSERT_ONLY
;
1319 oversize_t
*nx
; /* ptr to item right of insertion point */
1320 oversize_t
*pv
; /* ptr to item left of insertion point */
1321 uint_t size_lp
, size_pv
, size_nx
;
1322 uintptr_t endp_lp
, endp_pv
, endp_nx
;
1325 * Locate insertion point in address-ordered list
1328 for (nx
= oversize_list
.next_byaddr
;
1329 nx
!= &oversize_list
&& (lp
->addr
> nx
->addr
);
1330 nx
= nx
->next_byaddr
)
1334 * Determine how to add chunk to oversize freelist
1337 size_lp
= OVSZ_HEADER_SIZE
+ lp
->size
;
1338 endp_lp
= ALIGN((uintptr_t)lp
+ size_lp
, MTMALLOC_MIN_ALIGN
);
1339 size_lp
= endp_lp
- (uintptr_t)lp
;
1341 pv
= nx
->prev_byaddr
;
1345 size_pv
= OVSZ_HEADER_SIZE
+ pv
->size
;
1346 endp_pv
= ALIGN((uintptr_t)pv
+ size_pv
,
1347 MTMALLOC_MIN_ALIGN
);
1348 size_pv
= endp_pv
- (uintptr_t)pv
;
1350 /* Check for adjacency with left chunk */
1351 if ((uintptr_t)lp
== endp_pv
)
1352 merge_flags
|= COALESCE_LEFT
;
1357 /* Check for adjacency with right chunk */
1358 if ((uintptr_t)nx
== endp_lp
) {
1359 size_nx
= OVSZ_HEADER_SIZE
+ nx
->size
;
1360 endp_nx
= ALIGN((uintptr_t)nx
+ size_nx
,
1361 MTMALLOC_MIN_ALIGN
);
1362 size_nx
= endp_nx
- (uintptr_t)nx
;
1363 merge_flags
|= COALESCE_RIGHT
;
1368 * If MTDEBUGPATTERN==1, lp->addr will have been overwritten with
1369 * FREEPATTERN for lp->size bytes. If we can merge, the oversize
1370 * header(s) that will also become part of the memory available for
1371 * reallocation (ie lp and/or nx) must also be overwritten with
1372 * FREEPATTERN or we will SIGABRT when this memory is next reallocated.
1374 switch (merge_flags
) {
1376 case INSERT_ONLY
: /* Coalescing not possible */
1377 insert_oversize(lp
, nx
);
1380 pv
->size
+= size_lp
;
1381 position_oversize_by_size(pv
);
1382 if (debugopt
& MTDEBUGPATTERN
)
1383 copy_pattern(FREEPATTERN
, lp
, OVSZ_HEADER_SIZE
);
1385 case COALESCE_RIGHT
:
1386 unlink_oversize(nx
);
1387 lp
->size
+= size_nx
;
1388 insert_oversize(lp
, pv
->next_byaddr
);
1389 if (debugopt
& MTDEBUGPATTERN
)
1390 copy_pattern(FREEPATTERN
, nx
, OVSZ_HEADER_SIZE
);
1392 case COALESCE_WITH_BOTH_SIDES
: /* Merge (with right) to the left */
1393 pv
->size
+= size_lp
+ size_nx
;
1394 unlink_oversize(nx
);
1395 position_oversize_by_size(pv
);
1396 if (debugopt
& MTDEBUGPATTERN
) {
1397 copy_pattern(FREEPATTERN
, lp
, OVSZ_HEADER_SIZE
);
1398 copy_pattern(FREEPATTERN
, nx
, OVSZ_HEADER_SIZE
);
1405 * Find memory on our list that is at least size big. If we find a block that is
1406 * big enough, we break it up and return the associated oversize_t struct back
1407 * to the calling client. Any leftover piece of that block is returned to the
1411 find_oversize(size_t size
)
1413 oversize_t
*wp
= oversize_list
.next_bysize
;
1414 while (wp
!= &oversize_list
&& size
> wp
->size
)
1415 wp
= wp
->next_bysize
;
1417 if (wp
== &oversize_list
) /* empty list or nothing big enough */
1419 /* breaking up a chunk of memory */
1420 if ((long)((wp
->size
- (size
+ OVSZ_HEADER_SIZE
+ MTMALLOC_MIN_ALIGN
)))
1425 off
= (caddr_t
)ALIGN(wp
->addr
+ size
,
1426 MTMALLOC_MIN_ALIGN
);
1428 wp
->size
= (size_t)(off
- wp
->addr
);
1429 np
= oversize_header_alloc((uintptr_t)off
,
1430 osize
- (wp
->size
+ OVSZ_HEADER_SIZE
));
1431 if ((long)np
->size
< 0)
1433 unlink_oversize(wp
);
1436 unlink_oversize(wp
);
1442 copy_pattern(uint32_t pattern
, void *buf_arg
, size_t size
)
1444 uint32_t *bufend
= (uint32_t *)((char *)buf_arg
+ size
);
1445 uint32_t *buf
= buf_arg
;
1447 while (buf
< bufend
- 3) {
1448 buf
[3] = buf
[2] = buf
[1] = buf
[0] = pattern
;
1451 while (buf
< bufend
)
1456 verify_pattern(uint32_t pattern
, void *buf_arg
, size_t size
)
1458 uint32_t *bufend
= (uint32_t *)((char *)buf_arg
+ size
);
1461 for (buf
= buf_arg
; buf
< bufend
; buf
++)
1462 if (*buf
!= pattern
)
1468 free_oversize(oversize_t
*ovp
)
1470 assert(((uintptr_t)ovp
->addr
& 7) == 0); /* are we 8 byte aligned */
1471 assert(ovp
->size
> MAX_CACHED
);
1473 ovp
->next_bysize
= ovp
->prev_bysize
= NULL
;
1474 ovp
->next_byaddr
= ovp
->prev_byaddr
= NULL
;
1475 (void) mutex_lock(&oversize_lock
);
1477 (void) mutex_unlock(&oversize_lock
);
1481 oversize_header_alloc(uintptr_t mem
, size_t size
)
1483 oversize_t
*ovsz_hdr
;
1485 assert(size
> MAX_CACHED
);
1487 ovsz_hdr
= (oversize_t
*)mem
;
1488 ovsz_hdr
->prev_bysize
= NULL
;
1489 ovsz_hdr
->next_bysize
= NULL
;
1490 ovsz_hdr
->prev_byaddr
= NULL
;
1491 ovsz_hdr
->next_byaddr
= NULL
;
1492 ovsz_hdr
->hash_next
= NULL
;
1493 ovsz_hdr
->size
= size
;
1495 *(uintptr_t *)mem
= MTMALLOC_OVERSIZE_MAGIC
;
1497 assert(((uintptr_t)mem
& 7) == 0); /* are we 8 byte aligned */
1498 ovsz_hdr
->addr
= (caddr_t
)mem
;
1506 cache_head_t
*cachehead
;
1509 (void) mutex_lock(&oversize_lock
);
1510 for (cpuptr
= &cpu_list
[0]; cpuptr
< &cpu_list
[ncpus
]; cpuptr
++) {
1511 (void) mutex_lock(&cpuptr
->mt_parent_lock
);
1512 for (cachehead
= &cpuptr
->mt_caches
[0];
1513 cachehead
< &cpuptr
->mt_caches
[NUM_CACHES
];
1515 for (thiscache
= cachehead
->mt_cache
;
1517 thiscache
= thiscache
->mt_next
) {
1519 &thiscache
->mt_cache_lock
);
1529 cache_head_t
*cachehead
;
1532 for (cpuptr
= &cpu_list
[ncpus
- 1]; cpuptr
>= &cpu_list
[0]; cpuptr
--) {
1533 for (cachehead
= &cpuptr
->mt_caches
[NUM_CACHES
- 1];
1534 cachehead
>= &cpuptr
->mt_caches
[0];
1536 for (thiscache
= cachehead
->mt_cache
;
1538 thiscache
= thiscache
->mt_next
) {
1539 (void) mutex_unlock(
1540 &thiscache
->mt_cache_lock
);
1543 (void) mutex_unlock(&cpuptr
->mt_parent_lock
);
1545 (void) mutex_unlock(&oversize_lock
);
1548 #pragma init(malloc_init)
1553 * This works in the init section for this library
1554 * because setup_caches() doesn't call anything in libc
1555 * that calls malloc(). If it did, disaster would ensue.
1557 * For this to work properly, this library must be the first
1558 * one to have its init section called (after libc) by the
1559 * dynamic linker. If some other library's init section
1560 * ran first and called malloc(), disaster would ensue.
1561 * Because this is an interposer library for malloc(), the
1562 * dynamic linker arranges for its init section to run first.
1564 (void) setup_caches();
1566 (void) pthread_atfork(malloc_prepare
, malloc_release
, malloc_release
);