2 * Copyright (C) 2024 Mikulas Patocka
4 * This file is part of Ajla.
6 * Ajla is free software: you can redistribute it and/or modify it under the
7 * terms of the GNU General Public License as published by the Free Software
8 * Foundation, either version 3 of the License, or (at your option) any later
11 * Ajla is distributed in the hope that it will be useful, but WITHOUT ANY
12 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along with
16 * Ajla. If not, see <https://www.gnu.org/licenses/>.
31 /*#define AMALLOC_EAGER_FREE*/
32 /*#define AMALLOC_TRIM_MIDBLOCKS*/
33 /*#define AMALLOC_USE_RANDOM_ROVER*/
37 #define OBJ_ANY 0x0400
39 static ULONG dosallocmem_attrib
= PAG_READ
| PAG_WRITE
|
40 #ifdef CODEGEN_USE_HEAP
45 #ifndef CODEGEN_USE_HEAP
46 #define PROT_HEAP (PROT_READ | PROT_WRITE)
48 #define PROT_HEAP (PROT_READ | PROT_WRITE | PROT_EXEC)
53 #define ARENA_SIZE ((size_t)1 << ARENA_BITS)
55 #define MIDBLOCK_BITS 9
56 #define MIDBLOCK_SIZE ((size_t)1 << MIDBLOCK_BITS)
58 #define SMALL_BLOCK_TRIES 16
61 #define DIRECT_LIMIT 512
62 #define N_CLASSES (DIRECT_LIMIT / MIN_ALLOC)
63 #define SIZE_TO_INDEX(s) (((unsigned)(s) + MIN_ALLOC - 1) / MIN_ALLOC)
64 #define INDEX_TO_CLASS(i) (!(i) ? 0 : (i) - 1)
65 #define CLASS_TO_SIZE(c) (((c) + 1) * MIN_ALLOC)
67 #define ARENA_MIDBLOCKS (ARENA_SIZE / MIDBLOCK_SIZE)
69 #define BITMAP_BITS EFFICIENT_WORD_SIZE
70 #define bitmap_t cat4(uint,BITMAP_BITS,_,t)
73 struct tree midblock_free
;
74 struct tree_entry arena_entry
;
75 ushort_efficient_t max_midblock_run
;
76 uchar_efficient_t attached
;
77 ushort_efficient_t map
[ARENA_MIDBLOCKS
];
80 #define MAP_FREE 0x8000
82 #define ARENA_PREFIX (round_up(sizeof(struct arena), MIDBLOCK_SIZE) >> MIDBLOCK_BITS)
83 #define MIDBLOCK_LIMIT ((ARENA_SIZE - (ARENA_PREFIX << MIDBLOCK_BITS)) / 2)
86 struct tree_entry free_entry
;
89 atomic_type bitmap_t atomic_map
;
90 #ifdef AMALLOC_EAGER_FREE
93 uchar_efficient_t reserved_bits
;
94 uchar_efficient_t cls
;
95 ushort_efficient_t size
;
96 ushort_efficient_t reciprocal
;
97 #define RECIPROCAL_BASE 0x8000
104 struct tree_entry entry
;
109 union midblock
*small_runs
[N_CLASSES
+ 1];
110 tls_destructor_t destructor
;
113 static union midblock full_midblock
;
115 static struct per_thread thread1
;
117 static tls_decl(struct per_thread
*, per_thread
);
119 static uchar_efficient_t amalloc_threads_initialized
;
121 static struct tree arena_tree
;
122 static mutex_t arena_tree_mutex
;
124 static struct tree huge_tree
;
125 static mutex_t huge_tree_mutex
;
127 static size_t page_size
;
129 static struct small_block_cache
{
130 union midblock
**array
;
132 size_t allocated_size
;
133 #ifndef AMALLOC_USE_RANDOM_ROVER
139 } small_block_cache
[N_CLASSES
];
142 #if BITMAP_BITS == 16
143 #define BITMAP_ASM_X86_SUFFIX "w"
144 #elif BITMAP_BITS == 32
145 #define BITMAP_ASM_X86_SUFFIX "l"
146 #elif BITMAP_BITS == 64
147 #define BITMAP_ASM_X86_SUFFIX "q"
150 static attr_always_inline
size_t find_bit(bitmap_t bitmap
)
152 #if defined(BITMAP_ASM_X86_SUFFIX) && defined(INLINE_ASM_GCC_X86)
153 __asm__ ("rep; bsf"BITMAP_ASM_X86_SUFFIX
" %1, %0" : "=r"(bitmap
) : "r"(bitmap
) : "cc");
156 #if BITMAP_BITS == 32 && defined(INLINE_ASM_GCC_ARM) && defined(HAVE_ARM_ASSEMBLER_RBIT) && defined(HAVE_ARM_ASSEMBLER_CLZ)
157 if (likely(cpu_test_feature(CPU_FEATURE_armv6t2
))) {
158 __asm__ (ARM_ASM_PREFIX
"rbit %0, %1; clz %0, %0" : "=r"(bitmap
) : "r"(bitmap
) : "cc");
162 #if defined(ARCH_ALPHA) && defined(HAVE_GCC_ASSEMBLER)
163 if (likely(cpu_test_feature(CPU_FEATURE_cix
))) {
164 __asm__ (".arch ev68; cttz %1, %0" : "=r"(bitmap
) : "r"(bitmap
));
168 #if defined(ARCH_RISCV64) && defined(HAVE_GCC_ASSEMBLER) && !defined(__riscv_zbb)
169 if (likely(cpu_test_feature(CPU_FEATURE_zbb
))) {
170 if (BITMAP_BITS
== 32) {
171 __asm__ ("mv t6, %1; .word 0x601f9f9b; mv %0, t6" : "=r"(bitmap
) : "r"(bitmap
) : "t6");
174 if (BITMAP_BITS
== 64) {
175 __asm__ ("mv t6, %1; .word 0x601f9f93; mv %0, t6" : "=r"(bitmap
) : "r"(bitmap
) : "t6");
180 #if defined(ARCH_S390_64) && defined(HAVE_GCC_ASSEMBLER) && !(__ARCH__ >= 7)
181 if (likely(cpu_test_feature(CPU_FEATURE_extended_imm
))) {
182 if (BITMAP_BITS
== 32) {
183 __asm__("llgfr %%r1, %1; .long 0xb9830001; lgr %0, %%r0" : "=r"(bitmap
) : "r"(bitmap
& -bitmap
) : "r0", "r1", "cc");
186 if (BITMAP_BITS
== 64) {
187 __asm__("lgr %%r1, %1; .long 0xb9830001; lgr %0, %%r0" : "=r"(bitmap
) : "r"(bitmap
& -bitmap
) : "r0", "r1", "cc");
192 #if defined(ARCH_SPARC64) && defined(HAVE_GCC_ASSEMBLER)
193 __asm__ ("popc %1, %0" : "=r"(bitmap
) : "r"((bitmap
- 1) & ~bitmap
));
196 #if BITMAP_BITS == 32 && SIZEOF_UNSIGNED == 4 && defined(HAVE_BUILTIN_CTZ)
197 return __builtin_ctz(bitmap
);
198 #elif BITMAP_BITS == 64 && SIZEOF_UNSIGNED_LONG_LONG == 8 && defined(HAVE_BUILTIN_CTZ)
199 return __builtin_ctzll(bitmap
);
200 #elif BITMAP_BITS == 32 && SIZEOF_UNSIGNED == 4 && defined(HAVE_FFS)
201 return ffs(bitmap
) - 1;
202 #elif BITMAP_BITS == 64 && SIZEOF_UNSIGNED_LONG_LONG == 8 && defined(HAVE_FFSLL)
203 return ffsll(bitmap
) - 1;
218 static attr_always_inline
unsigned count_bits(bitmap_t bitmap
)
220 #if defined(BITMAP_ASM_X86_SUFFIX) && defined(INLINE_ASM_GCC_X86) && defined(HAVE_X86_ASSEMBLER_POPCNT)
221 if (likely(cpu_test_feature(CPU_FEATURE_popcnt
))) {
222 __asm__ ("popcnt"BITMAP_ASM_X86_SUFFIX
" %1, %0" : "=r"(bitmap
) : "r"(bitmap
) : "cc");
226 #if BITMAP_BITS == 32 && defined(INLINE_ASM_GCC_ARM) && defined(HAVE_ARM_ASSEMBLER_VFP)
227 if (likely(cpu_test_feature(CPU_FEATURE_neon
))) {
228 __asm__ (ARM_ASM_PREFIX
" \n\
229 vld1.32 d0[0], [ %0 ] \n\
232 vpaddl.u16 d0, d0 \n\
233 vst1.32 d0[0], [ %0 ] \n\
234 " : : "r"(&bitmap
) : "d0", "memory");
238 #if BITMAP_BITS == 64 && defined(INLINE_ASM_GCC_ARM64)
239 if (likely(cpu_test_feature(CPU_FEATURE_neon
))) {
241 __asm__ (ARM_ASM_PREFIX
" \n\
246 " : "=r"(result
) : "r"(bitmap
) : "d0");
250 #if defined(ARCH_SPARC64) && defined(HAVE_GCC_ASSEMBLER)
251 __asm__ ("popc %1, %0" : "=r"(bitmap
) : "r"(bitmap
));
254 #if defined(ARCH_ALPHA) && defined(HAVE_GCC_ASSEMBLER)
255 if (likely(cpu_test_feature(CPU_FEATURE_cix
))) {
256 __asm__ (".arch ev68; ctpop %1, %0" : "=r"(bitmap
) : "r"(bitmap
));
260 #if BITMAP_BITS == 32 && SIZEOF_UNSIGNED == 4 && defined(HAVE_BUILTIN_POPCOUNT)
261 return __builtin_popcount(bitmap
);
262 #elif BITMAP_BITS == 64 && SIZEOF_UNSIGNED_LONG_LONG == 8 && defined(HAVE_BUILTIN_POPCOUNT)
263 return __builtin_popcountll(bitmap
);
268 bitmap
&= bitmap
- 1, ret
++;
274 static attr_always_inline
uint16_t div_16(uint16_t x
, uint16_t y
)
276 #if defined(INLINE_ASM_GCC_ARM) && defined(HAVE_ARM_ASSEMBLER_SDIV_UDIV)
277 if (likely(cpu_test_feature(CPU_FEATURE_idiv
))) {
279 __asm__ (ARM_ASM_PREFIX
"udiv %0, %1, %2" : "=r"(result
) : "r"((unsigned)x
), "r"((unsigned)y
));
286 static attr_always_inline
void amalloc_atomic_or(atomic_type bitmap_t
*bitmap
, bitmap_t value
)
288 #if defined(HAVE_C11_ATOMICS)
289 atomic_fetch_or_explicit(bitmap
, value
, memory_order_release
);
290 #elif defined(BITMAP_ASM_X86_SUFFIX) && defined(INLINE_ASM_GCC_X86)
291 __asm__
volatile ("lock; or"BITMAP_ASM_X86_SUFFIX
" %2, %0":"=m"(*bitmap
):"m"(*bitmap
),"ir"(value
):"cc","memory");
292 #elif defined(HAVE_SYNC_AND_FETCH)
293 __sync_or_and_fetch(bitmap
, value
);
295 if (likely(amalloc_threads_initialized
))
296 address_lock((void *)bitmap
, DEPTH_ARENA
);
298 if (likely(amalloc_threads_initialized
))
299 address_unlock((void *)bitmap
, DEPTH_ARENA
);
303 static attr_always_inline bitmap_t
amalloc_atomic_xchg(atomic_type bitmap_t
*bitmap
, bitmap_t value
)
305 #if defined(HAVE_C11_ATOMICS)
306 return atomic_exchange_explicit(bitmap
, value
, memory_order_acquire
);
307 #elif defined(BITMAP_ASM_X86_SUFFIX) && defined(INLINE_ASM_GCC_X86)
308 __asm__
volatile ("xchg"BITMAP_ASM_X86_SUFFIX
" %1, %0":"=m"(*bitmap
),"=r"(value
):"m"(*bitmap
),"1"(value
):"memory");
310 #elif defined(HAVE_SYNC_AND_FETCH)
314 } while (unlikely(!__sync_bool_compare_and_swap(bitmap
, v
, value
)));
318 if (likely(amalloc_threads_initialized
))
319 address_lock((void *)bitmap
, DEPTH_ARENA
);
322 if (likely(amalloc_threads_initialized
))
323 address_unlock((void *)bitmap
, DEPTH_ARENA
);
329 static inline struct arena
*addr_to_arena(void *addr
)
331 return num_to_ptr(ptr_to_num(addr
) & ~((1 << ARENA_BITS
) - 1));
334 static inline union midblock
*idx_to_midblock(struct arena
*a
, unsigned midblock
)
336 return cast_ptr(union midblock
*, cast_ptr(char *, a
) + (midblock
* MIDBLOCK_SIZE
));
339 static inline unsigned midblock_to_idx(struct arena
*a
, void *m
)
341 return (ptr_to_num(m
) - ptr_to_num(a
)) >> MIDBLOCK_BITS
;
345 static void huge_tree_lock(void);
346 static void huge_tree_unlock(void);
350 static struct list pad_list
;
352 struct pad_descriptor
{
353 struct list pad_entry
;
358 static void amalloc_do_unmap(void *ptr
, size_t attr_unused size
)
364 if (r
== ERROR_INTERRUPT
)
366 internal(file_line
, "amalloc_do_unmap: DosFreeMem(%p) returned error: %lu", ptr
, r
);
370 static void *do_dosallocmem(size_t size
, bool commit
)
375 r
= DosAllocMem(&ptr
, size
, dosallocmem_attrib
| (commit
? PAG_COMMIT
: 0));
377 if (r
== ERROR_INTERRUPT
)
384 static bool do_commitmem(void *ptr
, size_t size
)
388 r
= DosSetMem(ptr
, size
, PAG_READ
| PAG_WRITE
|
389 #ifdef CODEGEN_USE_HEAP
394 if (r
== ERROR_INTERRUPT
)
401 static void *os2_alloc_aligned(size_t size
)
404 size_t offset
, aligned_size
;
408 aligned_size
= round_up(size
, ARENA_SIZE
);
409 if (unlikely(!aligned_size
))
412 ptr
= do_dosallocmem(aligned_size
, likely(size
== aligned_size
));
415 offset
= ptr_to_num(ptr
) & (ARENA_SIZE
- 1);
416 if (unlikely(offset
!= 0)) {
417 struct pad_descriptor
*pd
;
418 amalloc_do_unmap(ptr
, size
);
419 pad
= do_dosallocmem(ARENA_SIZE
- offset
, false);
422 pd
= malloc(sizeof(struct pad_descriptor
));
424 amalloc_do_unmap(pad
, ARENA_SIZE
- offset
);
428 pd
->size
= ARENA_SIZE
- offset
;
429 list_add(&pad_list
, &pd
->pad_entry
);
432 if (unlikely(size
!= aligned_size
)) {
433 if (unlikely(!do_commitmem(ptr
, size
))) {
434 amalloc_do_unmap(ptr
, aligned_size
);
449 bool os2_test_for_32bit_tcpip(const char *ptr
);
451 static void amalloc_os_init(void)
453 if (dosallocmem_attrib
& OBJ_ANY
) {
455 ptr
= do_dosallocmem(0x1000, true);
457 if (ptr_to_num(ptr
) < 0x20000000UL
) {
458 dosallocmem_attrib
&= ~OBJ_ANY
;
460 if (!os2_test_for_32bit_tcpip(ptr
))
461 dosallocmem_attrib
&= ~OBJ_ANY
;
463 amalloc_do_unmap(ptr
, 0x1000);
465 dosallocmem_attrib
&= ~OBJ_ANY
;
468 list_init(&pad_list
);
472 static void amalloc_os_done(void)
474 while (!list_is_empty(&pad_list
)) {
475 struct pad_descriptor
*pd
= get_struct(pad_list
.prev
, struct pad_descriptor
, pad_entry
);
476 list_del(&pd
->pad_entry
);
477 amalloc_do_unmap(pd
->base
, pd
->size
);
484 static inline void amalloc_do_unmap(void *ptr
, size_t size
)
486 os_munmap(ptr
, size
, false);
489 static void amalloc_os_init(void)
491 page_size
= os_getpagesize();
494 static void amalloc_os_done(void)
500 #ifdef POINTER_COMPRESSION_POSSIBLE
502 static bool do_enable_mapping(void *ptr
, size_t size
, bool attr_unused clr
)
504 #if defined(OS_CYGWIN) || defined(OS_WIN32)
506 if (unlikely(!os_mprotect(ptr
, size
, PROT_HEAP
, &sink
)))
509 memset(ptr
, 0, size
);
513 void *r
= os_mmap(ptr
, size
, PROT_HEAP
, MAP_PRIVATE
| MAP_ANONYMOUS
| MAP_FIXED
, handle_none
, 0, &sink
);
514 if (likely(r
== MAP_FAILED
))
516 if (unlikely(r
!= ptr
))
517 internal(file_line
, "do_enable_mapping: os_mmap(MAP_FIXED) returned different pointer: %p != %p", r
, ptr
);
522 static void do_disable_mapping(void *ptr
, size_t size
)
524 #if defined(OS_CYGWIN) || defined(OS_WIN32)
526 if (unlikely(!os_mprotect(ptr
, size
, PROT_NONE
, &err
))) {
527 warning("failed to clear existing mapping: mprotect(%p, %"PRIxMAX
") returned error: %s", ptr
, (uintmax_t)size
, error_decode(err
));
532 void *r
= os_mmap(ptr
, size
, PROT_NONE
, MAP_PRIVATE
| MAP_ANONYMOUS
| MAP_NORESERVE
| MAP_FIXED
, handle_none
, 0, &err
);
533 if (unlikely(r
== MAP_FAILED
)) {
534 warning("failed to clear existing mapping: os_mmap(%p, %"PRIxMAX
") returned error: %s", ptr
, (uintmax_t)size
, error_decode(err
));
537 if (unlikely(r
!= ptr
))
538 internal(file_line
, "do_disable_mapping: os_mmap(MAP_FIXED) returned different pointer: %p != %p", r
, ptr
);
542 static struct tree rmap_tree
;
544 static struct reserved_map
{
545 struct tree_entry free_entry
;
549 static mutex_t rmap_mutex
;
551 #define RESERVED_MAP_ENTRIES ((uintptr_t)1 << (32 + POINTER_COMPRESSION_POSSIBLE - ARENA_BITS))
553 static void rmap_lock(void)
555 if (unlikely(!amalloc_threads_initialized
))
557 mutex_lock(&rmap_mutex
);
560 static void rmap_unlock(void)
562 if (unlikely(!amalloc_threads_initialized
))
564 mutex_unlock(&rmap_mutex
);
567 static int rmap_compare(const struct tree_entry
*entry
, uintptr_t idx2
)
569 struct reserved_map
*rmap1
= get_struct(entry
, struct reserved_map
, free_entry
);
570 unsigned idx1
= rmap1
- rmap
;
572 len1
= rmap
[idx1
].f
+ 1 - idx1
;
573 len2
= rmap
[idx2
].f
+ 1 - idx2
;
575 return len1
< len2
? -1 : 1;
581 static void reserve_free_merge_run(unsigned idx
, unsigned len
)
583 struct tree_insert_position ins
;
584 if (rmap
[idx
- 1].f
) {
585 unsigned more
= idx
- rmap
[idx
- 1].f
;
588 tree_delete(&rmap
[idx
].free_entry
);
590 if (idx
+ len
< RESERVED_MAP_ENTRIES
&& rmap
[idx
+ len
].f
) {
591 unsigned more
= rmap
[idx
+ len
].f
+ 1 - (idx
+ len
);
592 tree_delete(&rmap
[idx
+ len
].free_entry
);
595 rmap
[idx
].f
= idx
+ len
- 1;
596 rmap
[idx
+ len
- 1].f
= idx
;
597 tree_find_for_insert(&rmap_tree
, rmap_compare
, idx
, &ins
);
598 tree_insert_after_find(&rmap
[idx
].free_entry
, &ins
);
601 static void reserve_sub_alloc(unsigned idx1
, unsigned len1
, unsigned idx
, unsigned len
)
603 struct reserved_map
*rmap1
= &rmap
[idx1
];
605 rmap
[idx
+ len
- 1].f
= 0;
606 tree_delete(&rmap1
->free_entry
);
608 reserve_free_merge_run(idx1
, idx
- idx1
);
610 if (idx
+ len
< idx1
+ len1
) {
611 reserve_free_merge_run(idx
+ len
, (idx1
+ len1
) - (idx
+ len
));
615 static int rmap_find(const struct tree_entry
*entry
, uintptr_t len
)
617 struct reserved_map
*rmap1
= get_struct(entry
, struct reserved_map
, free_entry
);
618 unsigned idx1
= rmap1
- rmap
;
619 unsigned len1
= rmap
[idx1
].f
+ 1 - idx1
;
627 static unsigned reserve_alloc_run(unsigned al
, unsigned len
)
629 struct tree_entry
*ee
;
630 ee
= tree_find_best(&rmap_tree
, rmap_find
, len
+ al
- 1);
631 if (likely(ee
!= NULL
)) {
632 struct reserved_map
*rmap1
= get_struct(ee
, struct reserved_map
, free_entry
);
633 unsigned idx1
= rmap1
- rmap
;
634 unsigned len1
= rmap
[idx1
].f
+ 1 - idx1
;
635 unsigned idx
= round_up(idx1
, al
);
636 reserve_sub_alloc(idx1
, len1
, idx
, len
);
642 static bool reserve_realloc_run(unsigned start
, unsigned old_len
, unsigned new_len
)
645 if (unlikely(old_len
>= new_len
)) {
646 if (unlikely(old_len
== new_len
))
648 rmap
[start
+ new_len
- 1].f
= 0;
649 reserve_free_merge_run(start
+ new_len
, old_len
- new_len
);
652 if (unlikely(start
+ old_len
== RESERVED_MAP_ENTRIES
))
654 if (!rmap
[start
+ old_len
].f
)
656 xlen
= rmap
[start
+ old_len
].f
+ 1 - (start
+ old_len
);
657 if (xlen
< new_len
- old_len
)
659 tree_delete(&rmap
[start
+ old_len
].free_entry
);
660 rmap
[start
+ new_len
- 1].f
= 0;
661 if (xlen
> new_len
- old_len
)
662 reserve_free_merge_run(start
+ new_len
, xlen
- (new_len
- old_len
));
666 static unsigned reserve_last_run(unsigned len
)
668 struct tree_entry
*e
;
669 struct reserved_map
*rmap1
;
671 unsigned best_idx
= 0;
672 for (e
= tree_first(&rmap_tree
); e
; e
= tree_next(e
)) {
673 rmap1
= get_struct(e
, struct reserved_map
, free_entry
);
675 len1
= rmap
[idx1
].f
+ 1 - idx1
;
676 if (likely(len1
>= len
)) {
681 if (unlikely(!best_idx
))
685 len1
= rmap
[idx1
].f
+ 1 - idx1
;
686 reserve_sub_alloc(idx1
, len1
, idx1
+ len1
- len
, len
);
687 return idx1
+ len1
- len
;
690 bool amalloc_ptrcomp_try_reserve_range(void *ptr
, size_t length
)
693 struct tree_entry
*e
;
695 if (unlikely(!length
))
698 if (unlikely(ptr_to_num(ptr
) & (ARENA_SIZE
- 1)))
700 idx
= ptr_to_num(ptr
) >> ARENA_BITS
;
701 len
= (length
+ ARENA_SIZE
- 1) >> ARENA_BITS
;
705 for (e
= tree_first(&rmap_tree
); e
; e
= tree_next(e
)) {
706 struct reserved_map
*rmap1
= get_struct(e
, struct reserved_map
, free_entry
);
707 unsigned idx1
= rmap1
- rmap
;
708 unsigned len1
= rmap
[idx1
].f
+ 1 - idx1
;
709 if (idx
>= idx1
&& idx
+ len
<= idx1
+ len1
) {
710 reserve_sub_alloc(idx1
, len1
, idx
, len
);
721 static bool try_to_reserve_memory(void *ptr
, size_t len
)
727 MAP_FIXED
| MAP_EXCL
|
731 if (ptr_to_num(ptr
) + len
> (uintptr_t)1 << (32 + POINTER_COMPRESSION_POSSIBLE
))
734 res
= os_mmap(ptr
, len
, PROT_NONE
, MAP_PRIVATE
| MAP_ANONYMOUS
| MAP_NORESERVE
| extra_flags
, handle_none
, 0, &sink
);
735 if (unlikely(res
== MAP_FAILED
))
739 os_munmap(res
, len
, false);
746 static void reserve_memory(void)
749 tree_init(&rmap_tree
);
750 rmap
= os_mmap(NULL
, RESERVED_MAP_ENTRIES
* sizeof(struct reserved_map
), PROT_HEAP
, MAP_PRIVATE
| MAP_ANONYMOUS
, handle_none
, 0, NULL
);
751 #if defined(OS_CYGWIN)
752 step
= ARENA_SIZE
* 256;
753 ptr
= round_up((uintptr_t)1 << 34, step
);
754 #elif defined(__sun__)
756 ptr
= round_up((uintptr_t)1 << 31, step
);
761 while (ptr
< (uintptr_t)1 << (32 + POINTER_COMPRESSION_POSSIBLE
)) {
764 if (!try_to_reserve_memory(num_to_ptr(ptr
), step
)) {
768 os_munmap(num_to_ptr(ptr
), step
, false);
770 len
= ((uintptr_t)1 << (32 + POINTER_COMPRESSION_POSSIBLE
)) - ptr
;
771 if (try_to_reserve_memory(num_to_ptr(ptr
), len
)) {
772 reserve_free_merge_run(ptr
>> ARENA_BITS
, len
>> ARENA_BITS
);
777 bit
= (uintptr_t)1 << (32 + POINTER_COMPRESSION_POSSIBLE
- 1);
779 for (; bit
>= step
; bit
>>= 1) {
780 if (try_to_reserve_memory(num_to_ptr(ptr
), len
| bit
)) {
781 os_munmap(num_to_ptr(ptr
), len
| bit
, false);
787 if (likely(try_to_reserve_memory(num_to_ptr(ptr
), len
))) {
788 reserve_free_merge_run(ptr
>> ARENA_BITS
, len
>> ARENA_BITS
);
797 if (unlikely(tree_is_empty(&rmap_tree
)))
798 fatal("unable to reserve any memory for pointer compression");
801 static void unreserve_memory(void)
803 while (!tree_is_empty(&rmap_tree
)) {
804 struct reserved_map
*rmap1
= get_struct(tree_any(&rmap_tree
), struct reserved_map
, free_entry
);
805 unsigned idx1
= rmap1
- rmap
;
806 unsigned len1
= rmap
[idx1
].f
+ 1 - idx1
;
807 amalloc_do_unmap(num_to_ptr((uintptr_t)idx1
<< ARENA_BITS
), (size_t)len1
<< ARENA_BITS
);
808 tree_delete(&rmap
[idx1
].free_entry
);
810 amalloc_do_unmap(rmap
, RESERVED_MAP_ENTRIES
* sizeof(struct reserved_map
));
816 void amalloc_run_free(void *ptr
, size_t length
)
818 if (unlikely(!length
))
820 #ifdef POINTER_COMPRESSION_POSSIBLE
821 if (pointer_compression_enabled
) {
822 unsigned idx
= ptr_to_num(ptr
) >> ARENA_BITS
;
823 size_t l
= (length
+ ARENA_SIZE
- 1) >> ARENA_BITS
;
824 do_disable_mapping(ptr
, l
<< ARENA_BITS
);
826 reserve_free_merge_run(idx
, l
);
831 amalloc_do_unmap(ptr
, length
);
834 void *amalloc_run_alloc(size_t al
, size_t length
, bool attr_unused clr
, bool attr_unused saved
)
836 size_t attr_unused extra_length
;
838 void attr_unused
*base_address
;
839 uintptr_t attr_unused nptr
, aptr
, fptr
, eptr
;
844 if (unlikely(al
< page_size
))
847 length
= round_up(length
, page_size
);
848 if (unlikely(!length
))
851 #ifdef POINTER_COMPRESSION_POSSIBLE
852 if (pointer_compression_enabled
) {
855 if (unlikely(al
< ARENA_SIZE
))
858 l
= round_up(length
, ARENA_SIZE
) >> ARENA_BITS
;
861 if (unlikely(l
!= (unsigned)l
))
865 res
= reserve_last_run(l
);
867 res
= reserve_alloc_run(al
, l
);
871 ptr
= num_to_ptr((uintptr_t)res
<< ARENA_BITS
);
872 if (unlikely(!do_enable_mapping(ptr
, length
, clr
))) {
874 reserve_free_merge_run(res
, l
);
883 if (unlikely(saved
)) {
884 base_address
= num_to_ptr(round_down(ptr_to_num(&saved
) / 2, page_size
));
888 if (unlikely(al
> ARENA_SIZE
))
890 ptr
= os2_alloc_aligned(length
);
891 #elif defined(MAP_ALIGNED) && !defined(__minix__)
892 if (unlikely(al
!= (bitmap_t
)al
))
894 ptr
= os_mmap(base_address
, length
, PROT_HEAP
, MAP_PRIVATE
| MAP_ANONYMOUS
| MAP_ALIGNED(find_bit(al
)), handle_none
, 0, &sink
);
895 if (unlikely(ptr
== MAP_FAILED
))
897 if (unlikely((ptr_to_num(ptr
) & (al
- 1)) != 0))
898 fatal("os_mmap returned unaligned pointer: %p, required alignment %lx", ptr
, (unsigned long)ARENA_SIZE
);
900 extra_length
= length
+ al
- page_size
;
901 if (unlikely(extra_length
< length
))
904 ptr
= os_mmap(base_address
, extra_length
, PROT_HEAP
, MAP_PRIVATE
| MAP_ANONYMOUS
, handle_none
, 0, &sink
);
906 if (unlikely(ptr
== MAP_FAILED
))
909 nptr
= ptr_to_num(ptr
);
910 aptr
= round_up(nptr
, al
);
911 fptr
= aptr
+ length
;
912 eptr
= nptr
+ extra_length
;
913 amalloc_run_free(num_to_ptr(nptr
), aptr
- nptr
);
914 amalloc_run_free(num_to_ptr(fptr
), eptr
- fptr
);
916 ptr
= num_to_ptr(aptr
);
919 #ifdef POINTER_COMPRESSION_POSSIBLE
923 #if !defined(AMALLOC_TRIM_MIDBLOCKS) && defined(HAVE_MADVISE) && defined(MADV_HUGEPAGE)
924 if (length
== ARENA_SIZE
) {
925 int madv
= MADV_HUGEPAGE
;
927 EINTR_LOOP(r
, madvise(ptr
, length
, madv
));
928 /*if (unlikely(r == -1)) {
930 warning("madvise(%d) failed: %d, %s", madv, er, error_decode(error_from_errno(EC_SYSCALL, er)));
938 static void *amalloc_run_realloc(void *ptr
, size_t old_length
, size_t new_length
)
942 old_length
= round_up(old_length
, page_size
);
943 new_length
= round_up(new_length
, page_size
);
944 if (unlikely(!new_length
))
947 #ifdef POINTER_COMPRESSION_POSSIBLE
948 if (pointer_compression_enabled
) {
950 size_t old_length_arenas
= round_up(old_length
, ARENA_SIZE
) >> ARENA_BITS
;
951 size_t new_length_arenas
= round_up(new_length
, ARENA_SIZE
) >> ARENA_BITS
;
952 if (unlikely(!new_length_arenas
))
954 if (unlikely(new_length_arenas
!= (unsigned)new_length_arenas
))
957 f
= reserve_realloc_run(ptr_to_num(ptr
) >> ARENA_BITS
, old_length_arenas
, new_length_arenas
);
959 /*debug("try realloc: %p, %d", ptr, f);*/
961 if (likely(old_length
< new_length
)) {
962 if (unlikely(!do_enable_mapping(cast_ptr(char *, ptr
) + old_length
, new_length
- old_length
, false))) {
964 reserve_free_merge_run((ptr_to_num(ptr
) >> ARENA_BITS
) + old_length_arenas
, new_length_arenas
- old_length_arenas
);
968 } else if (old_length
> new_length
) {
969 do_disable_mapping(cast_ptr(char *, ptr
) + new_length
, old_length
- new_length
);
977 #if defined(OS_OS2) || defined(OS_WIN32)
978 if (new_length
== old_length
)
982 if (unlikely(new_length
<= old_length
)) {
983 amalloc_run_free(cast_ptr(char *, ptr
) + new_length
, old_length
- new_length
);
987 #if defined(OS_HAS_MREMAP)
990 void *r
= os_mremap(ptr
, old_length
, new_length
, 0, NULL
, &sink
);
991 if (r
!= MAP_FAILED
) {
992 if (unlikely(r
!= ptr
))
993 internal(file_line
, "amalloc_run_realloc: os_mremap returned different pointer: %p != %p", r
, ptr
);
1000 n
= os_mmap(cast_ptr(char *, ptr
) + old_length
, new_length
- old_length
, PROT_HEAP
, MAP_PRIVATE
| MAP_ANONYMOUS
, handle_none
, 0, &sink
);
1001 if (likely(n
!= MAP_FAILED
)) {
1002 if (n
== cast_ptr(char *, ptr
) + old_length
)
1004 amalloc_do_unmap(n
, new_length
- old_length
);
1009 n
= amalloc_run_alloc(ARENA_SIZE
, new_length
, false, false);
1013 #if defined(OS_HAS_MREMAP) && defined(MREMAP_FIXED)
1016 void *r
= os_mremap(ptr
, old_length
, new_length
, MREMAP_MAYMOVE
| MREMAP_FIXED
, n
, &sink
);
1017 if (likely(r
!= MAP_FAILED
)) {
1018 if (unlikely(r
!= n
))
1019 internal(file_line
, "amalloc_run_realloc: os_mremap returned different pointer: %p != %p", r
, n
);
1025 memcpy(n
, ptr
, old_length
);
1026 amalloc_run_free(ptr
, old_length
);
1032 static inline void sbc_lock(struct small_block_cache
*sbc
)
1034 if (likely(amalloc_threads_initialized
))
1035 mutex_lock(&sbc
->mutex
);
1038 static inline void sbc_unlock(struct small_block_cache
*sbc
)
1040 if (likely(amalloc_threads_initialized
))
1041 mutex_unlock(&sbc
->mutex
);
1044 static inline void arena_lock(struct arena
*a
)
1046 if (unlikely(!amalloc_threads_initialized
))
1048 address_lock(a
, DEPTH_ARENA
);
1051 static inline void arena_unlock(struct arena
*a
)
1053 if (unlikely(!amalloc_threads_initialized
))
1055 address_unlock(a
, DEPTH_ARENA
);
1058 static void arena_tree_lock(void)
1060 if (unlikely(!amalloc_threads_initialized
))
1062 mutex_lock(&arena_tree_mutex
);
1065 static void arena_tree_unlock(void)
1067 if (unlikely(!amalloc_threads_initialized
))
1069 mutex_unlock(&arena_tree_mutex
);
1072 static void huge_tree_lock(void)
1074 if (unlikely(!amalloc_threads_initialized
))
1076 mutex_lock(&huge_tree_mutex
);
1079 static void huge_tree_unlock(void)
1081 if (unlikely(!amalloc_threads_initialized
))
1083 mutex_unlock(&huge_tree_mutex
);
1086 static int huge_tree_compare(const struct tree_entry
*entry
, uintptr_t addr
)
1088 struct huge_entry
*e
= get_struct(entry
, struct huge_entry
, entry
);
1089 if (ptr_to_num(e
->ptr
) < addr
)
1091 if (ptr_to_num(e
->ptr
) > addr
)
1096 static void huge_tree_insert(struct huge_entry
*e
)
1098 struct tree_insert_position ins
;
1099 struct tree_entry attr_unused
*ee
;
1101 ee
= tree_find_for_insert(&huge_tree
, huge_tree_compare
, ptr_to_num(e
->ptr
), &ins
);
1102 ajla_assert_lo(ee
== NULL
, (file_line
, "huge_tree_insert: entry for address %p is already present", e
->ptr
));
1103 tree_insert_after_find(&e
->entry
, &ins
);
1107 static struct huge_entry
*huge_tree_delete(void *ptr
)
1109 struct tree_entry
*entry
;
1111 entry
= tree_find(&huge_tree
, huge_tree_compare
, ptr_to_num(ptr
));
1112 ajla_assert_lo(entry
!= NULL
, (file_line
, "huge_tree_delete: entry for address %p not found", ptr
));
1115 return get_struct(entry
, struct huge_entry
, entry
);
1119 static attr_always_inline
struct per_thread
*amalloc_per_thread(void)
1121 if (unlikely(!amalloc_threads_initialized
))
1124 return tls_get(struct per_thread
*, per_thread
);
1127 static void arena_detach(struct arena
*a
);
1128 static bool amalloc_detach_small(struct small_block_cache
*sbc
, union midblock
*m
);
1129 #ifdef AMALLOC_EAGER_FREE
1130 static void amalloc_test_free_small(struct small_block_cache
*sbc
, size_t idx
, union midblock
*m
);
1133 static void set_small_run(struct per_thread
*pt
, unsigned i
, union midblock
*m
)
1136 pt
->small_runs
[0] = m
;
1137 pt
->small_runs
[i
+ 1] = m
;
1140 static void detach_pt_data(struct per_thread
*pt
)
1144 for (i
= 0; i
< N_CLASSES
; i
++) {
1145 union midblock
*m
= pt
->small_runs
[i
+ 1];
1146 struct small_block_cache
*sbc
= &small_block_cache
[i
];
1147 #ifdef AMALLOC_EAGER_FREE
1150 set_small_run(pt
, i
, NULL
);
1151 if (m
== &full_midblock
)
1154 amalloc_atomic_or(&m
->s
.atomic_map
, m
->s
.map
);
1156 if (unlikely(!amalloc_detach_small(sbc
, m
))) {
1158 fatal("arealloc failed: %d, %s", er
, error_decode(error_from_errno(EC_SYSCALL
, er
)));
1160 #ifdef AMALLOC_EAGER_FREE
1164 #ifdef AMALLOC_EAGER_FREE
1165 amalloc_test_free_small(sbc
, idx
, m
);
1169 pt
->arena
= BAD_POINTER_1
;
1177 static void per_thread_destructor(tls_destructor_t
*destr
)
1179 struct per_thread
*pt
= get_struct(destr
, struct per_thread
, destructor
);
1184 static void amalloc_per_thread_init(struct per_thread
*pt
)
1188 for (i
= 0; i
< N_CLASSES
; i
++)
1189 set_small_run(pt
, i
, &full_midblock
);
1192 static bool amalloc_per_thread_alloc(void)
1194 struct per_thread
*pt
;
1195 pt
= malloc(sizeof(struct per_thread
));
1198 amalloc_per_thread_init(pt
);
1199 tls_destructor(&pt
->destructor
, per_thread_destructor
);
1200 tls_set(struct per_thread
*, per_thread
, pt
);
1204 static int arena_tree_compare(const struct tree_entry
*entry
, uintptr_t len
)
1206 struct arena
*a
= get_struct(entry
, struct arena
, arena_entry
);
1207 if (a
->max_midblock_run
< len
)
1212 static void arena_insert_locked(struct arena
*a
)
1214 struct tree_insert_position ins
;
1215 struct tree_entry attr_unused
*ee
;
1216 if (a
->max_midblock_run
== ARENA_MIDBLOCKS
- ARENA_PREFIX
) {
1217 /*debug("freeing empty arena %p", a);*/
1218 amalloc_run_free(a
, ARENA_SIZE
);
1221 ee
= tree_find_for_insert(&arena_tree
, arena_tree_compare
, a
->max_midblock_run
, &ins
);
1222 ajla_assert(ee
== NULL
, (file_line
, "arena_insert_locked: entry for address %p is already present", a
));
1223 tree_insert_after_find(&a
->arena_entry
, &ins
);
1226 static void arena_relink(struct arena
*a
, unsigned new_run
)
1229 bool full_free
= new_run
== ARENA_MIDBLOCKS
- ARENA_PREFIX
;
1230 if (new_run
>= (unsigned)a
->max_midblock_run
* 2 || full_free
) {
1233 tree_delete(&a
->arena_entry
);
1234 a
->max_midblock_run
= new_run
;
1235 arena_insert_locked(a
);
1237 arena_tree_unlock();
1242 static int midblock_compare(const struct tree_entry
*entry
, uintptr_t idx2
)
1244 union midblock
*m1
= get_struct(entry
, union midblock
, free_entry
);
1245 struct arena
*a
= addr_to_arena(m1
);
1246 unsigned idx1
= midblock_to_idx(a
, m1
);
1247 unsigned len1
, len2
;
1248 ajla_assert(a
->map
[idx1
] & MAP_FREE
, (file_line
, "midblock_compare: idx1 is not free"));
1249 ajla_assert(a
->map
[idx2
] & MAP_FREE
, (file_line
, "midblock_compare: idx2 is not free"));
1250 len1
= (a
->map
[idx1
] & ~MAP_FREE
) + 1 - idx1
;
1251 len2
= (a
->map
[idx2
] & ~MAP_FREE
) + 1 - idx2
;
1253 return len1
< len2
? -1 : 1;
1259 static void arena_free_midblock(struct arena
*a
, unsigned start
, unsigned len
)
1261 struct tree_insert_position ins
;
1262 a
->map
[start
] = MAP_FREE
| (start
+ len
- 1);
1263 a
->map
[start
+ len
- 1] = MAP_FREE
| start
;
1264 tree_find_for_insert(&a
->midblock_free
, midblock_compare
, start
, &ins
);
1265 tree_insert_after_find(&idx_to_midblock(a
, start
)->free_entry
, &ins
);
1268 static void arena_free_merge_midblock(struct arena
*a
, unsigned start
, unsigned len
)
1270 uintptr_t attr_unused start_ptr
, end_ptr
;
1271 if (start
&& a
->map
[start
- 1] & MAP_FREE
) {
1272 unsigned more
= start
- (a
->map
[start
- 1] & ~MAP_FREE
);
1275 tree_delete(&idx_to_midblock(a
, start
)->free_entry
);
1277 if (start
+ len
< ARENA_MIDBLOCKS
&& a
->map
[start
+ len
] & MAP_FREE
) {
1278 unsigned more
= (a
->map
[start
+ len
] & ~MAP_FREE
) + 1 - (start
+ len
);
1279 tree_delete(&idx_to_midblock(a
, start
+ len
)->free_entry
);
1282 #if defined(AMALLOC_TRIM_MIDBLOCKS) && defined(HAVE_MADVISE) && ((defined(__linux__) && defined(MADV_DONTNEED)) || (!defined(__linux__) && defined(MADV_FREE)))
1283 start_ptr
= round_up(ptr_to_num(idx_to_midblock(a
, start
)), page_size
);
1284 end_ptr
= round_down(ptr_to_num(idx_to_midblock(a
, start
+ len
)), page_size
);
1285 if (start_ptr
< end_ptr
) {
1287 int madv
= MADV_DONTNEED
;
1289 int madv
= MADV_FREE
;
1292 EINTR_LOOP(r
, madvise(num_to_ptr(start_ptr
), end_ptr
- start_ptr
, madv
));
1293 if (unlikely(r
== -1)) {
1295 warning("madvise(%d) failed: %d, %s", madv
, er
, error_decode(error_from_errno(EC_SYSCALL
, er
)));
1299 arena_free_midblock(a
, start
, len
);
1300 arena_relink(a
, len
);
1303 static void arena_grab_midblocks(struct arena
*a
, unsigned idx1
, unsigned idx
, unsigned len
)
1306 union midblock
*m
= idx_to_midblock(a
, idx1
);
1307 tree_delete(&m
->free_entry
);
1308 found_end
= (a
->map
[idx1
] & ~MAP_FREE
) + 1;
1309 if (unlikely(idx
> idx1
))
1310 arena_free_midblock(a
, idx1
, idx
- idx1
);
1311 if (found_end
> idx
+ len
)
1312 arena_free_midblock(a
, idx
+ len
, found_end
- (idx
+ len
));
1315 static int midblock_find(const struct tree_entry
*entry
, uintptr_t len
)
1317 union midblock
*m1
= get_struct(entry
, union midblock
, free_entry
);
1318 struct arena
*a
= addr_to_arena(m1
);
1319 unsigned idx1
= midblock_to_idx(a
, m1
);
1320 unsigned len1
= (a
->map
[idx1
] & ~MAP_FREE
) + 1 - idx1
;
1328 static union midblock
*arena_alloc_midblock(struct arena
*a
, unsigned midblock_alignment
, unsigned midblocks
)
1330 struct tree_entry
*ee
;
1331 ee
= tree_find_best(&a
->midblock_free
, midblock_find
, midblocks
+ midblock_alignment
- 1);
1332 if (likely(ee
!= NULL
)) {
1333 union midblock
*m
= get_struct(ee
, union midblock
, free_entry
);
1334 unsigned best
= midblock_to_idx(a
, m
);
1335 unsigned best_aligned
= round_up(best
, midblock_alignment
);
1336 arena_grab_midblocks(a
, best
, best_aligned
, midblocks
);
1337 a
->map
[best_aligned
] = best_aligned
+ midblocks
- 1;
1338 a
->map
[best_aligned
+ midblocks
- 1] = best_aligned
;
1339 return idx_to_midblock(a
, best_aligned
);
1344 static bool arena_try_realloc_midblock(struct arena
*a
, unsigned base
, unsigned orig_len
, unsigned new_len
)
1347 if (unlikely(new_len
<= orig_len
)) {
1348 if (new_len
== orig_len
)
1350 a
->map
[base
] = base
+ new_len
- 1;
1351 a
->map
[base
+ new_len
- 1] = base
;
1352 arena_free_merge_midblock(a
, base
+ new_len
, orig_len
- new_len
);
1354 if (unlikely(base
+ new_len
> ARENA_MIDBLOCKS
))
1356 if (!(a
->map
[base
+ orig_len
] & MAP_FREE
))
1358 free_len
= (a
->map
[base
+ orig_len
] & ~MAP_FREE
) + 1 - (base
+ orig_len
);
1359 if (free_len
< new_len
- orig_len
)
1361 arena_grab_midblocks(a
, base
+ orig_len
, base
+ orig_len
, new_len
- orig_len
);
1362 a
->map
[base
] = base
+ new_len
- 1;
1363 a
->map
[base
+ new_len
- 1] = base
;
1368 static struct arena
*arena_alloc(void)
1371 a
= amalloc_run_alloc(ARENA_SIZE
, ARENA_SIZE
, false, false);
1374 tree_init(&a
->midblock_free
);
1375 a
->map
[0] = ARENA_PREFIX
- 1;
1376 a
->map
[ARENA_PREFIX
- 1] = 0;
1378 arena_free_midblock(a
, ARENA_PREFIX
, ARENA_MIDBLOCKS
- ARENA_PREFIX
);
1379 /*debug("allocating arena %p", a);*/
1384 static unsigned arena_compute_max_run(struct arena
*a
)
1386 struct tree_entry
*e
= tree_last(&a
->midblock_free
);
1390 union midblock
*m
= get_struct(e
, union midblock
, free_entry
);
1391 unsigned idx
= midblock_to_idx(a
, m
);
1392 return (a
->map
[idx
] & ~MAP_FREE
) + 1 - idx
;
1396 static void arena_detach(struct arena
*a
)
1398 a
->max_midblock_run
= arena_compute_max_run(a
);
1400 a
->attached
= false;
1401 arena_insert_locked(a
);
1402 arena_tree_unlock();
1405 static int arena_tree_find(const struct tree_entry
*entry
, uintptr_t len
)
1407 struct arena
*a
= get_struct(entry
, struct arena
, arena_entry
);
1408 if (a
->max_midblock_run
< len
)
1410 if (a
->max_midblock_run
> len
)
1415 static struct arena
*arena_attach(unsigned midblocks
)
1418 struct tree_entry
*ee
;
1420 ee
= tree_find_best(&arena_tree
, arena_tree_find
, midblocks
);
1422 a
= get_struct(ee
, struct arena
, arena_entry
);
1424 tree_delete(&a
->arena_entry
);
1425 arena_tree_unlock();
1428 arena_tree_unlock();
1429 return arena_alloc();
1432 static void *amalloc_mid(struct per_thread
*pt
, unsigned midblock_alignment
, unsigned midblocks
)
1434 struct arena
*a
= pt
->arena
;
1435 bool looped
= false;
1436 if (likely(a
!= NULL
)) {
1440 m
= arena_alloc_midblock(a
, midblock_alignment
, midblocks
);
1441 if (likely(m
!= NULL
)) {
1449 if (likely(!looped
))
1450 a
= arena_attach(midblocks
+ midblock_alignment
- 1);
1460 static void *amalloc_huge(size_t al
, size_t size
, bool clr
)
1462 struct huge_entry
*e
;
1463 void *ptr
= amalloc_run_alloc(al
, size
, clr
, false);
1466 e
= malloc(sizeof(struct huge_entry
));
1468 amalloc_run_free(ptr
, size
);
1473 huge_tree_insert(e
);
1477 static attr_noinline
void *amalloc_mid_huge(struct per_thread
*pt
, size_t size
, bool clr
)
1479 if (unlikely(size
> MIDBLOCK_LIMIT
)) {
1480 return amalloc_huge(ARENA_SIZE
, size
, clr
);
1482 unsigned midblocks
= round_up(size
, MIDBLOCK_SIZE
) >> MIDBLOCK_BITS
;
1483 void *ptr
= amalloc_mid(pt
, 1, midblocks
);
1484 if (likely(ptr
!= NULL
) && clr
)
1485 memset(ptr
, 0, size
);
1490 static attr_noinline
void *amemalign_mid_huge(size_t al
, size_t size
, bool clr
)
1492 if (size
+ al
> MIDBLOCK_LIMIT
) {
1493 if (al
< ARENA_SIZE
)
1495 return amalloc_huge(al
, size
, clr
);
1497 unsigned midblocks
, midblock_alignment
;
1499 struct per_thread
*pt
= amalloc_per_thread();
1500 if (unlikely(!pt
)) {
1501 if (unlikely(!amalloc_per_thread_alloc()))
1503 pt
= amalloc_per_thread();
1505 midblocks
= round_up(size
, MIDBLOCK_SIZE
) >> MIDBLOCK_BITS
;
1506 midblock_alignment
= round_up(al
, MIDBLOCK_SIZE
) >> MIDBLOCK_BITS
;
1507 ptr
= amalloc_mid(pt
, midblock_alignment
, midblocks
);
1508 if (likely(ptr
!= NULL
) && clr
)
1509 memset(ptr
, 0, size
);
1514 static unsigned reserved_bits(unsigned cls
)
1516 size_t size
= CLASS_TO_SIZE(cls
);
1517 unsigned reserved_bits
= div_16(sizeof(full_midblock
.s
) + size
- 1, size
);
1518 return reserved_bits
;
1521 static union midblock
*create_small_run(struct per_thread
*pt
, unsigned cls
)
1524 unsigned idx
, i
, res
;
1525 size_t size
= CLASS_TO_SIZE(cls
);
1526 unsigned midblocks
= (size
* BITMAP_BITS
+ MIDBLOCK_SIZE
- 1) >> MIDBLOCK_BITS
;
1527 union midblock
*m
= amalloc_mid(pt
, 1, midblocks
);
1530 a
= addr_to_arena(m
);
1531 idx
= midblock_to_idx(a
, m
);
1532 for (i
= 0; i
< midblocks
; i
++) {
1533 a
->map
[idx
+ i
] = idx
;
1535 res
= reserved_bits(cls
);
1536 m
->s
.map
= (bitmap_t
)-1 << res
;
1537 store_relaxed(&m
->s
.atomic_map
, 0);
1538 #ifdef AMALLOC_EAGER_FREE
1539 m
->s
.index
= (size_t)-1;
1541 m
->s
.reserved_bits
= res
;
1544 m
->s
.reciprocal
= div_16(RECIPROCAL_BASE
+ size
- 1, size
);
1548 static void amalloc_free_small(union midblock
*m
)
1550 struct arena
*a
= addr_to_arena(m
);
1551 unsigned idx
= midblock_to_idx(a
, m
);
1552 unsigned midblocks
= (m
->s
.size
* BITMAP_BITS
+ MIDBLOCK_SIZE
- 1) >> MIDBLOCK_BITS
;
1553 /*debug("midblock (%d): %lx / %lx", m->s.cls, m->s.map, m->s.atomic_map);*/
1555 arena_free_merge_midblock(a
, idx
, midblocks
);
1559 #ifdef AMALLOC_EAGER_FREE
1560 static attr_noinline
void amalloc_test_free_small(struct small_block_cache
*sbc
, size_t idx
, union midblock
*m
)
1563 if (likely(idx
< sbc
->array_size
) && likely(sbc
->array
[idx
] == m
) && likely(load_relaxed(&m
->s
.atomic_map
) == (bitmap_t
)-1 << m
->s
.reserved_bits
)) {
1564 (sbc
->array
[idx
] = sbc
->array
[--sbc
->array_size
])->s
.index
= idx
;
1569 if (likely(m
!= NULL
)) {
1570 amalloc_free_small(m
);
1571 /*debug("freed small");*/
1576 static bool amalloc_detach_small(struct small_block_cache
*sbc
, union midblock
*m
)
1579 if (unlikely(sbc
->array_size
== sbc
->allocated_size
)) {
1581 size_t s
= sbc
->allocated_size
* 2;
1582 if (unlikely(s
> (size_t)-1 / sizeof(union midblock
*)))
1584 a
= arealloc(sbc
->array
, s
* sizeof(union midblock
*));
1588 sbc
->allocated_size
= s
;
1589 /*debug("amalloc_detach_small: %ld - %ld - %p", sbc - small_block_cache, s, a);*/
1591 index
= sbc
->array_size
++;
1592 #ifdef AMALLOC_EAGER_FREE
1595 sbc
->array
[index
] = m
;
1599 static attr_noinline
void *amalloc_small_empty(struct per_thread
*pt
, size_t size
)
1601 unsigned idx
= SIZE_TO_INDEX(size
);
1602 unsigned cls
= INDEX_TO_CLASS(idx
);
1603 unsigned i
, best_idx
, best_count
, bit
, all_free
;
1604 union midblock
*m
= pt
->small_runs
[cls
+ 1];
1605 struct small_block_cache
*sbc
= &small_block_cache
[cls
];
1607 if (likely(m
!= &full_midblock
)) {
1608 if (unlikely(!amalloc_detach_small(sbc
, m
))) {
1612 all_free
= BITMAP_BITS
- m
->s
.reserved_bits
;
1614 all_free
= BITMAP_BITS
- reserved_bits(cls
);
1618 i
= minimum(SMALL_BLOCK_TRIES
, sbc
->array_size
);
1620 unsigned test_count
;
1621 union midblock
*test
;
1624 #ifndef AMALLOC_USE_RANDOM_ROVER
1625 test_idx
= ++sbc
->rover
;
1626 if (unlikely(test_idx
>= sbc
->array_size
)) {
1627 test_idx
= sbc
->rover
= 0;
1630 test_idx
= rand_r(&sbc
->rover
) % sbc
->array_size
;
1632 test
= sbc
->array
[test_idx
];
1633 map
= load_relaxed(&test
->s
.atomic_map
);
1636 test_count
= count_bits(map
);
1637 if (best_count
== all_free
) {
1638 if (test_count
== all_free
&& test_idx
!= best_idx
&& sbc
->array_size
- 1 != best_idx
) {
1639 /*debug("freeing cached slab: %u, %p", cls, test);*/
1640 (sbc
->array
[test_idx
] = sbc
->array
[--sbc
->array_size
])
1641 #ifdef AMALLOC_EAGER_FREE
1642 ->s
.index
= test_idx
1645 amalloc_free_small(test
);
1646 if (!sbc
->array_size
)
1650 if (test_count
> best_count
) {
1651 best_idx
= test_idx
;
1652 best_count
= test_count
;
1656 if (likely(best_count
)) {
1657 m
= sbc
->array
[best_idx
];
1658 (sbc
->array
[best_idx
] = sbc
->array
[--sbc
->array_size
])
1659 #ifdef AMALLOC_EAGER_FREE
1660 ->s
.index
= best_idx
1664 m
->s
.map
= amalloc_atomic_xchg(&m
->s
.atomic_map
, 0);
1665 #ifdef AMALLOC_EAGER_FREE
1666 m
->s
.index
= (size_t)-1;
1670 m
= create_small_run(pt
, cls
);
1672 set_small_run(pt
, cls
, &full_midblock
);
1676 set_small_run(pt
, cls
, m
);
1677 bit
= find_bit(m
->s
.map
);
1678 m
->s
.map
&= ~((bitmap_t
)1 << bit
);
1679 return cast_ptr(char *, m
) + bit
* m
->s
.size
;
1682 static attr_noinline
void *amalloc_alloc_pt(size_t size
, bool clr
)
1684 if (unlikely(!amalloc_per_thread_alloc()))
1687 return amalloc(size
);
1689 return acalloc(size
);
1692 static attr_always_inline
void *amalloc_small(struct per_thread
*pt
, size_t size
)
1695 unsigned idx
= SIZE_TO_INDEX(size
);
1696 union midblock
*m
= pt
->small_runs
[idx
];
1697 if (likely(m
->s
.map
!= 0)) {
1700 if (load_relaxed(&m
->s
.atomic_map
) != 0) {
1701 m
->s
.map
= amalloc_atomic_xchg(&m
->s
.atomic_map
, 0);
1704 return amalloc_small_empty(pt
, size
);
1706 bit
= find_bit(m
->s
.map
);
1707 m
->s
.map
&= ~((bitmap_t
)1 << bit
);
1708 return cast_ptr(char *, m
) + bit
* m
->s
.size
;
1711 void * attr_fastcall
amalloc(size_t size
)
1713 struct per_thread
*pt
= amalloc_per_thread();
1715 return amalloc_alloc_pt(size
, false);
1716 if (unlikely(size
> DIRECT_LIMIT
))
1717 return amalloc_mid_huge(pt
, size
, false);
1718 return amalloc_small(pt
, size
);
1721 void * attr_fastcall
acalloc(size_t size
)
1724 struct per_thread
*pt
= amalloc_per_thread();
1726 return amalloc_alloc_pt(size
, true);
1727 if (unlikely(size
> DIRECT_LIMIT
))
1728 return amalloc_mid_huge(pt
, size
, true);
1729 ptr
= amalloc_small(pt
, size
);
1732 return memset(ptr
, 0, size
);
1735 void * attr_fastcall
amemalign(size_t al
, size_t size
)
1738 if (likely(al
<= minimum(DIRECT_LIMIT
, MIDBLOCK_SIZE
))) {
1739 size2
= round_up(size
, al
);
1740 if (unlikely(size2
< size
))
1742 return amalloc(size
);
1744 return amemalign_mid_huge(al
, size
, false);
1747 void * attr_fastcall
acmemalign(size_t al
, size_t size
)
1749 if (likely(al
<= minimum(DIRECT_LIMIT
, MIDBLOCK_SIZE
))) {
1751 size2
= round_up(size
, al
);
1752 if (unlikely(size2
< size
))
1754 return acalloc(size
);
1756 return amemalign_mid_huge(al
, size
, true);
1760 static attr_noinline
void afree_huge(void *ptr
)
1762 struct huge_entry
*e
= huge_tree_delete(ptr
);
1763 amalloc_run_free(ptr
, e
->len
);
1767 static attr_noinline
void afree_mid(struct arena
*a
, unsigned idx
)
1770 arena_free_merge_midblock(a
, idx
, a
->map
[idx
] + 1 - idx
);
1774 void attr_fastcall
afree(void *ptr
)
1777 unsigned attr_unused bit2
;
1780 struct per_thread
*pt
;
1781 struct arena
*a
= addr_to_arena(ptr
);
1782 if (unlikely((void *)a
== ptr
)) {
1786 idx
= midblock_to_idx(a
, ptr
);
1787 m
= idx_to_midblock(a
, a
->map
[idx
]);
1788 if (unlikely((void *)m
>= ptr
)) {
1792 bit
= ((unsigned)(cast_ptr(char *, ptr
) - cast_ptr(char *, m
)) * m
->s
.reciprocal
) / RECIPROCAL_BASE
;
1793 bit2
= (unsigned)(cast_ptr(char *, ptr
) - cast_ptr(char *, m
)) / m
->s
.size
;
1794 ajla_assert(bit
== bit2
, (file_line
, "afree: reciprocal doesn't match: size %u, reciprocal %u, %u != %u", m
->s
.size
, m
->s
.reciprocal
, bit
, bit2
));
1795 bit_or
= (bitmap_t
)1 << bit
;
1796 pt
= amalloc_per_thread();
1797 if (likely(pt
!= NULL
) && likely(pt
->small_runs
[m
->s
.cls
+ 1] == m
)) {
1800 #ifdef AMALLOC_EAGER_FREE
1801 size_t idx
= (size_t)-1;
1802 struct small_block_cache
*sbc
= NULL
;
1803 if (unlikely((load_relaxed(&m
->s
.atomic_map
) | bit_or
) == (bitmap_t
)-1 << m
->s
.reserved_bits
)) {
1805 sbc
= &small_block_cache
[m
->s
.cls
];
1808 amalloc_atomic_or(&m
->s
.atomic_map
, bit_or
);
1809 #ifdef AMALLOC_EAGER_FREE
1810 if (unlikely(idx
!= (size_t)-1)) {
1811 amalloc_test_free_small(sbc
, idx
, m
);
1817 static attr_noinline
void *arealloc_malloc(void *ptr
, size_t old_size
, size_t new_size
)
1820 if (unlikely(new_size
== old_size
))
1822 n
= amalloc(new_size
);
1825 memcpy(n
, ptr
, minimum(new_size
, old_size
));
1830 static attr_noinline
void *arealloc_huge(void *ptr
, size_t size
)
1832 struct huge_entry
*e
= huge_tree_delete(ptr
);
1833 if (likely(size
>= page_size
)) {
1834 void *n
= amalloc_run_realloc(ptr
, e
->len
, size
);
1838 huge_tree_insert(e
);
1842 huge_tree_insert(e
);
1843 return arealloc_malloc(ptr
, e
->len
, size
);
1846 static attr_noinline
void *arealloc_mid(struct arena
*a
, unsigned idx
, size_t size
)
1848 unsigned existing_blocks
= a
->map
[idx
] + 1 - idx
;
1849 if (size
> DIRECT_LIMIT
&& size
<= ARENA_SIZE
) {
1851 unsigned new_blocks
= round_up(size
, MIDBLOCK_SIZE
) >> MIDBLOCK_BITS
;
1853 f
= arena_try_realloc_midblock(a
, idx
, existing_blocks
, new_blocks
);
1856 return idx_to_midblock(a
, idx
);
1858 return arealloc_malloc(idx_to_midblock(a
, idx
), existing_blocks
<< MIDBLOCK_BITS
, size
);
1861 void * attr_fastcall
arealloc(void *ptr
, size_t size
)
1865 struct arena
*a
= addr_to_arena(ptr
);
1866 if (unlikely((void *)a
== ptr
)) {
1867 return arealloc_huge(ptr
, size
);
1869 idx
= midblock_to_idx(a
, ptr
);
1870 m
= idx_to_midblock(a
, a
->map
[idx
]);
1871 if (unlikely((void *)m
>= ptr
)) {
1872 return arealloc_mid(a
, idx
, size
);
1874 if (INDEX_TO_CLASS(SIZE_TO_INDEX(size
)) != m
->s
.cls
) {
1875 return arealloc_malloc(ptr
, m
->s
.size
, size
);
1880 bool attr_fastcall
aptr_is_huge(void *ptr
)
1882 struct arena
*a
= addr_to_arena(ptr
);
1883 return (void *)a
== ptr
;
1887 void amalloc_init(void)
1893 for (size
= MIN_ALLOC
; size
<= DIRECT_LIMIT
; size
+= MIN_ALLOC
) {
1894 ushort_efficient_t reciprocal
= div_16(RECIPROCAL_BASE
+ size
- 1, size
);
1895 for (i
= 1; i
< BITMAP_BITS
; i
++) {
1896 unsigned offs
= i
* size
;
1897 unsigned ii
= (offs
* reciprocal
) / RECIPROCAL_BASE
;
1898 if (unlikely(ii
!= i
))
1899 internal(file_line
, "amalloc_init: reciprocal doesn't match: size %u, i %u, ii %u", size
, i
, ii
);
1905 if (unlikely(!amalloc_enabled
))
1907 #ifdef POINTER_COMPRESSION_POSSIBLE
1908 if (pointer_compression_enabled
)
1911 amalloc_threads_initialized
= false;
1912 amalloc_per_thread_init(&thread1
);
1913 tree_init(&huge_tree
);
1914 tree_init(&arena_tree
);
1915 for (i
= 0; i
< N_CLASSES
; i
++) {
1916 small_block_cache
[i
].array
= amalloc(DIRECT_LIMIT
* 2);
1917 if (unlikely(!small_block_cache
[i
].array
)) {
1918 fatal("amalloc failed");
1920 small_block_cache
[i
].array_size
= 0;
1921 small_block_cache
[i
].allocated_size
= DIRECT_LIMIT
* 2 / sizeof(union midblock
*);
1922 small_block_cache
[i
].rover
= 0;
1926 void amalloc_init_multithreaded(void)
1929 if (unlikely(!amalloc_enabled
))
1931 if (unlikely(amalloc_threads_initialized
))
1932 internal(file_line
, "amalloc_init_multithreaded: amalloc_threads_initialized already set");
1933 mutex_init(&huge_tree_mutex
);
1934 mutex_init(&arena_tree_mutex
);
1935 tls_init(struct per_thread
*, per_thread
);
1936 tls_set(struct per_thread
*, per_thread
, &thread1
);
1937 for (i
= 0; i
< N_CLASSES
; i
++) {
1938 mutex_init(&small_block_cache
[i
].mutex
);
1940 #ifdef POINTER_COMPRESSION_POSSIBLE
1941 if (pointer_compression_enabled
)
1942 mutex_init(&rmap_mutex
);
1944 amalloc_threads_initialized
= true;
1947 void *x
= amalloc(0x2000000);
1949 x
= arealloc(x
, 0x4000000);
1951 x
= arealloc(x
, 0x2000000);
1953 x
= arealloc(x
, 0x4000000);
1960 void amalloc_done_multithreaded(void)
1963 if (unlikely(!amalloc_enabled
))
1965 if (unlikely(!amalloc_threads_initialized
))
1966 internal(file_line
, "amalloc_done_multithreaded: amalloc_threads_initialized not set");
1967 amalloc_threads_initialized
= false;
1968 #ifdef POINTER_COMPRESSION_POSSIBLE
1969 if (pointer_compression_enabled
)
1970 mutex_done(&rmap_mutex
);
1972 for (i
= 0; i
< N_CLASSES
; i
++) {
1973 mutex_done(&small_block_cache
[i
].mutex
);
1975 tls_done(struct per_thread
*, per_thread
);
1976 mutex_done(&arena_tree_mutex
);
1977 mutex_done(&huge_tree_mutex
);
1980 void amalloc_done(void)
1983 if (unlikely(!amalloc_enabled
))
1985 if (unlikely(amalloc_threads_initialized
))
1986 internal(file_line
, "amalloc_done: amalloc_threads_initialized set");
1987 detach_pt_data(&thread1
);
1988 for (i
= 0; i
< N_CLASSES
; i
++) {
1989 struct small_block_cache
*sbc
= &small_block_cache
[i
];
1990 bitmap_t all_free
= (bitmap_t
)-1 << reserved_bits(i
);
1992 for (j
= 0; j
< sbc
->array_size
; j
++) {
1993 union midblock
*m
= sbc
->array
[j
];
1994 if (unlikely(load_relaxed(&m
->s
.atomic_map
) != all_free
))
1995 internal(file_line
, "amalloc_done: small block memory leak, class %u", i
);
1996 /*debug("end: freeing cached slab: %u, %p", i, m);*/
1997 amalloc_free_small(m
);
2000 sbc
->array
= BAD_POINTER_1
;
2002 if (unlikely(!tree_is_empty(&huge_tree
)))
2003 internal(file_line
, "amalloc_done: huge tree is not empty");
2004 if (unlikely(!tree_is_empty(&arena_tree
)))
2005 internal(file_line
, "amalloc_done: arena tree is not empty");
2010 struct tree_entry
*e
, *f
;
2011 debug("%x", (unsigned)(ARENA_MIDBLOCKS
- ARENA_PREFIX
));
2012 for (e
= tree_first(&arena_tree
); e
; e
= tree_next(e
)) {
2013 struct arena
*a
= get_struct(e
, struct arena
, arena_entry
);
2014 debug("leaked arena %p, %x", a
, a
->max_midblock_run
);
2015 for (f
= tree_first(&a
->midblock_free
); f
; f
= tree_next(f
)) {
2016 union midblock
*m
= get_struct(f
, union midblock
, free_entry
);
2017 unsigned idx
= midblock_to_idx(a
, m
);
2018 unsigned len
= (a
->map
[idx
] & ~MAP_FREE
) + 1 - idx
;
2019 debug("free midblock: %p: %x, %x", m
, idx
, len
);
2020 if (idx
+ len
< ARENA_MIDBLOCKS
) {
2021 union midblock
*m2
= idx_to_midblock(a
, idx
+ len
);
2022 debug("small run(%p): %lx, atomic_map: %lx, size: %u, class %u", m2
, m2
->s
.map
, m2
->s
.atomic_map
, m2
->s
.size
, m2
->s
.cls
);
2028 #ifdef POINTER_COMPRESSION_POSSIBLE
2029 if (pointer_compression_enabled
)