2 * Copyright (C) 2024 Mikulas Patocka
4 * This file is part of Ajla.
6 * Ajla is free software: you can redistribute it and/or modify it under the
7 * terms of the GNU General Public License as published by the Free Software
8 * Foundation, either version 3 of the License, or (at your option) any later
11 * Ajla is distributed in the hope that it will be useful, but WITHOUT ANY
12 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along with
16 * Ajla. If not, see <https://www.gnu.org/licenses/>.
31 /*#define AMALLOC_EAGER_FREE*/
32 /*#define AMALLOC_TRIM_MIDBLOCKS*/
33 /*#define AMALLOC_USE_RANDOM_ROVER*/
37 #define OBJ_ANY 0x0400
39 static ULONG dosallocmem_attrib
= PAG_READ
| PAG_WRITE
|
40 #ifdef CODEGEN_USE_HEAP
45 #ifndef CODEGEN_USE_HEAP
46 #define PROT_HEAP (PROT_READ | PROT_WRITE)
48 #define PROT_HEAP (PROT_READ | PROT_WRITE | PROT_EXEC)
53 #define ARENA_SIZE ((size_t)1 << ARENA_BITS)
55 #define MIDBLOCK_BITS 9
56 #define MIDBLOCK_SIZE ((size_t)1 << MIDBLOCK_BITS)
58 #define SMALL_BLOCK_TRIES 16
61 #define DIRECT_LIMIT 512
62 #define N_CLASSES (DIRECT_LIMIT / MIN_ALLOC)
63 #define SIZE_TO_INDEX(s) (((unsigned)(s) + MIN_ALLOC - 1) / MIN_ALLOC)
64 #define INDEX_TO_CLASS(i) (!(i) ? 0 : (i) - 1)
65 #define CLASS_TO_SIZE(c) (((c) + 1) * MIN_ALLOC)
67 #define ARENA_MIDBLOCKS (ARENA_SIZE / MIDBLOCK_SIZE)
69 #define BITMAP_BITS EFFICIENT_WORD_SIZE
70 #define bitmap_t cat4(uint,BITMAP_BITS,_,t)
73 struct tree midblock_free
;
74 struct tree_entry arena_entry
;
75 ushort_efficient_t max_midblock_run
;
76 uchar_efficient_t attached
;
77 ushort_efficient_t map
[ARENA_MIDBLOCKS
];
80 #define MAP_FREE 0x8000
82 #define ARENA_PREFIX (round_up(sizeof(struct arena), MIDBLOCK_SIZE) >> MIDBLOCK_BITS)
83 #define MIDBLOCK_LIMIT ((ARENA_SIZE - (ARENA_PREFIX << MIDBLOCK_BITS)) / 2)
86 struct tree_entry free_entry
;
89 atomic_type bitmap_t atomic_map
;
90 #ifdef AMALLOC_EAGER_FREE
93 uchar_efficient_t reserved_bits
;
94 uchar_efficient_t cls
;
95 ushort_efficient_t size
;
96 ushort_efficient_t reciprocal
;
97 #define RECIPROCAL_BASE 0x8000
104 struct tree_entry entry
;
109 union midblock
*small_runs
[N_CLASSES
+ 1];
110 tls_destructor_t destructor
;
113 static union midblock full_midblock
;
115 static struct per_thread thread1
;
117 static tls_decl(struct per_thread
*, per_thread
);
119 static uchar_efficient_t amalloc_threads_initialized
;
121 static struct tree arena_tree
;
122 static mutex_t arena_tree_mutex
;
124 static struct tree huge_tree
;
125 static mutex_t huge_tree_mutex
;
127 static size_t page_size
;
129 static struct small_block_cache
{
130 union midblock
**array
;
132 size_t allocated_size
;
133 #ifndef AMALLOC_USE_RANDOM_ROVER
139 } small_block_cache
[N_CLASSES
];
142 #if BITMAP_BITS == 16
143 #define BITMAP_ASM_X86_SUFFIX "w"
144 #elif BITMAP_BITS == 32
145 #define BITMAP_ASM_X86_SUFFIX "l"
146 #elif BITMAP_BITS == 64
147 #define BITMAP_ASM_X86_SUFFIX "q"
150 static attr_always_inline
size_t find_bit(bitmap_t bitmap
)
152 #if defined(BITMAP_ASM_X86_SUFFIX) && defined(INLINE_ASM_GCC_X86)
153 __asm__ ("rep; bsf"BITMAP_ASM_X86_SUFFIX
" %1, %0" : "=r"(bitmap
) : "r"(bitmap
) : "cc");
156 #if BITMAP_BITS == 32 && defined(INLINE_ASM_GCC_ARM) && defined(HAVE_ARM_ASSEMBLER_RBIT) && defined(HAVE_ARM_ASSEMBLER_CLZ)
157 if (likely(cpu_test_feature(CPU_FEATURE_armv6t2
))) {
158 __asm__ (ARM_ASM_PREFIX
"rbit %0, %1; clz %0, %0" : "=r"(bitmap
) : "r"(bitmap
) : "cc");
162 #if defined(ARCH_ALPHA) && defined(HAVE_GCC_ASSEMBLER)
163 if (likely(cpu_test_feature(CPU_FEATURE_cix
))) {
164 __asm__ (".arch ev68; cttz %1, %0" : "=r"(bitmap
) : "r"(bitmap
));
168 #if defined(ARCH_RISCV64) && defined(HAVE_GCC_ASSEMBLER) && !defined(__riscv_zbb)
169 if (likely(cpu_test_feature(CPU_FEATURE_zbb
))) {
170 if (BITMAP_BITS
== 32) {
171 __asm__ ("mv t6, %1; .word 0x601f9f9b; mv %0, t6" : "=r"(bitmap
) : "r"(bitmap
) : "t6");
174 if (BITMAP_BITS
== 64) {
175 __asm__ ("mv t6, %1; .word 0x601f9f93; mv %0, t6" : "=r"(bitmap
) : "r"(bitmap
) : "t6");
180 #if defined(ARCH_S390_64) && defined(HAVE_GCC_ASSEMBLER) && !(__ARCH__ >= 7)
181 if (likely(cpu_test_feature(CPU_FEATURE_extended_imm
))) {
182 if (BITMAP_BITS
== 32) {
183 __asm__("llgfr %%r1, %1; .long 0xb9830001; lgr %0, %%r0" : "=r"(bitmap
) : "r"(bitmap
& -bitmap
) : "r0", "r1", "cc");
186 if (BITMAP_BITS
== 64) {
187 __asm__("lgr %%r1, %1; .long 0xb9830001; lgr %0, %%r0" : "=r"(bitmap
) : "r"(bitmap
& -bitmap
) : "r0", "r1", "cc");
192 #if defined(ARCH_SPARC64) && defined(HAVE_GCC_ASSEMBLER)
193 __asm__ ("popc %1, %0" : "=r"(bitmap
) : "r"((bitmap
- 1) & ~bitmap
));
196 #if BITMAP_BITS == 32 && SIZEOF_UNSIGNED == 4 && defined(HAVE_BUILTIN_CTZ)
197 return __builtin_ctz(bitmap
);
198 #elif BITMAP_BITS == 64 && SIZEOF_UNSIGNED_LONG_LONG == 8 && defined(HAVE_BUILTIN_CTZ)
199 return __builtin_ctzll(bitmap
);
200 #elif BITMAP_BITS == 32 && SIZEOF_UNSIGNED == 4 && defined(HAVE_FFS)
201 return ffs(bitmap
) - 1;
202 #elif BITMAP_BITS == 64 && SIZEOF_UNSIGNED_LONG_LONG == 8 && defined(HAVE_FFSLL)
203 return ffsll(bitmap
) - 1;
216 static attr_always_inline
unsigned count_bits(bitmap_t bitmap
)
218 #if defined(BITMAP_ASM_X86_SUFFIX) && defined(INLINE_ASM_GCC_X86) && defined(HAVE_X86_ASSEMBLER_POPCNT)
219 if (likely(cpu_test_feature(CPU_FEATURE_popcnt
))) {
220 __asm__ ("popcnt"BITMAP_ASM_X86_SUFFIX
" %1, %0" : "=r"(bitmap
) : "r"(bitmap
) : "cc");
224 #if BITMAP_BITS == 32 && defined(INLINE_ASM_GCC_ARM) && defined(HAVE_ARM_ASSEMBLER_VFP)
225 if (likely(cpu_test_feature(CPU_FEATURE_neon
))) {
226 __asm__ (ARM_ASM_PREFIX
" \n\
227 vld1.32 d0[0], [ %0 ] \n\
230 vpaddl.u16 d0, d0 \n\
231 vst1.32 d0[0], [ %0 ] \n\
232 " : : "r"(&bitmap
) : "d0", "memory");
236 #if BITMAP_BITS == 64 && defined(INLINE_ASM_GCC_ARM64)
237 if (likely(cpu_test_feature(CPU_FEATURE_neon
))) {
239 __asm__ (ARM_ASM_PREFIX
" \n\
244 " : "=r"(result
) : "r"(bitmap
) : "d0");
248 #if defined(ARCH_SPARC64) && defined(HAVE_GCC_ASSEMBLER)
249 __asm__ ("popc %1, %0" : "=r"(bitmap
) : "r"(bitmap
));
252 #if defined(ARCH_ALPHA) && defined(HAVE_GCC_ASSEMBLER)
253 if (likely(cpu_test_feature(CPU_FEATURE_cix
))) {
254 __asm__ (".arch ev68; ctpop %1, %0" : "=r"(bitmap
) : "r"(bitmap
));
258 #if BITMAP_BITS == 32 && SIZEOF_UNSIGNED == 4 && defined(HAVE_BUILTIN_POPCOUNT)
259 return __builtin_popcount(bitmap
);
260 #elif BITMAP_BITS == 64 && SIZEOF_UNSIGNED_LONG_LONG == 8 && defined(HAVE_BUILTIN_POPCOUNT)
261 return __builtin_popcountll(bitmap
);
265 bitmap
&= bitmap
- 1, ret
++;
270 static attr_always_inline
uint16_t div_16(uint16_t x
, uint16_t y
)
272 #if defined(INLINE_ASM_GCC_ARM) && defined(HAVE_ARM_ASSEMBLER_SDIV_UDIV)
273 if (likely(cpu_test_feature(CPU_FEATURE_idiv
))) {
275 __asm__ (ARM_ASM_PREFIX
"udiv %0, %1, %2" : "=r"(result
) : "r"((unsigned)x
), "r"((unsigned)y
));
282 static attr_always_inline
void atomic_or(atomic_type bitmap_t
*bitmap
, bitmap_t value
)
284 #if defined(HAVE_C11_ATOMICS)
285 atomic_fetch_or_explicit(bitmap
, value
, memory_order_release
);
286 #elif defined(BITMAP_ASM_X86_SUFFIX) && defined(INLINE_ASM_GCC_X86)
287 __asm__
volatile ("lock; or"BITMAP_ASM_X86_SUFFIX
" %2, %0":"=m"(*bitmap
):"m"(*bitmap
),"ir"(value
):"cc","memory");
288 #elif defined(HAVE_SYNC_AND_FETCH)
289 __sync_or_and_fetch(bitmap
, value
);
291 if (likely(amalloc_threads_initialized
))
292 address_lock((void *)bitmap
, DEPTH_ARENA
);
294 if (likely(amalloc_threads_initialized
))
295 address_unlock((void *)bitmap
, DEPTH_ARENA
);
299 static attr_always_inline bitmap_t
atomic_xchg(atomic_type bitmap_t
*bitmap
, bitmap_t value
)
301 #if defined(HAVE_C11_ATOMICS)
302 return atomic_exchange_explicit(bitmap
, value
, memory_order_acquire
);
303 #elif defined(BITMAP_ASM_X86_SUFFIX) && defined(INLINE_ASM_GCC_X86)
304 __asm__
volatile ("xchg"BITMAP_ASM_X86_SUFFIX
" %1, %0":"=m"(*bitmap
),"=r"(value
):"m"(*bitmap
),"1"(value
):"memory");
306 #elif defined(HAVE_SYNC_AND_FETCH)
310 } while (unlikely(!__sync_bool_compare_and_swap(bitmap
, v
, value
)));
314 if (likely(amalloc_threads_initialized
))
315 address_lock((void *)bitmap
, DEPTH_ARENA
);
318 if (likely(amalloc_threads_initialized
))
319 address_unlock((void *)bitmap
, DEPTH_ARENA
);
325 static inline struct arena
*addr_to_arena(void *addr
)
327 return num_to_ptr(ptr_to_num(addr
) & ~((1 << ARENA_BITS
) - 1));
330 static inline union midblock
*idx_to_midblock(struct arena
*a
, unsigned midblock
)
332 return cast_ptr(union midblock
*, cast_ptr(char *, a
) + (midblock
* MIDBLOCK_SIZE
));
335 static inline unsigned midblock_to_idx(struct arena
*a
, void *m
)
337 return (ptr_to_num(m
) - ptr_to_num(a
)) >> MIDBLOCK_BITS
;
341 static void huge_tree_lock(void);
342 static void huge_tree_unlock(void);
346 static struct list pad_list
;
348 struct pad_descriptor
{
349 struct list pad_entry
;
354 static void amalloc_do_unmap(void *ptr
, size_t attr_unused size
)
360 if (r
== ERROR_INTERRUPT
)
362 internal(file_line
, "amalloc_do_unmap: DosFreeMem(%p) returned error: %lu", ptr
, r
);
366 static void *do_dosallocmem(size_t size
, bool commit
)
371 r
= DosAllocMem(&ptr
, size
, dosallocmem_attrib
| (commit
? PAG_COMMIT
: 0));
373 if (r
== ERROR_INTERRUPT
)
380 static bool do_commitmem(void *ptr
, size_t size
)
384 r
= DosSetMem(ptr
, size
, PAG_READ
| PAG_WRITE
|
385 #ifdef CODEGEN_USE_HEAP
390 if (r
== ERROR_INTERRUPT
)
397 static void *os2_alloc_aligned(size_t size
)
400 size_t offset
, aligned_size
;
404 aligned_size
= round_up(size
, ARENA_SIZE
);
405 if (unlikely(!aligned_size
))
408 ptr
= do_dosallocmem(aligned_size
, likely(size
== aligned_size
));
411 offset
= ptr_to_num(ptr
) & (ARENA_SIZE
- 1);
412 if (unlikely(offset
!= 0)) {
413 struct pad_descriptor
*pd
;
414 amalloc_do_unmap(ptr
, size
);
415 pad
= do_dosallocmem(ARENA_SIZE
- offset
, false);
418 pd
= malloc(sizeof(struct pad_descriptor
));
420 amalloc_do_unmap(pad
, ARENA_SIZE
- offset
);
424 pd
->size
= ARENA_SIZE
- offset
;
425 list_add(&pad_list
, &pd
->pad_entry
);
428 if (unlikely(size
!= aligned_size
)) {
429 if (unlikely(!do_commitmem(ptr
, size
))) {
430 amalloc_do_unmap(ptr
, aligned_size
);
445 bool os2_test_for_32bit_tcpip(const char *ptr
);
447 static void amalloc_os_init(void)
449 if (dosallocmem_attrib
& OBJ_ANY
) {
451 ptr
= do_dosallocmem(0x1000, true);
453 if (ptr_to_num(ptr
) < 0x20000000UL
) {
454 dosallocmem_attrib
&= ~OBJ_ANY
;
456 if (!os2_test_for_32bit_tcpip(ptr
))
457 dosallocmem_attrib
&= ~OBJ_ANY
;
459 amalloc_do_unmap(ptr
, 0x1000);
461 dosallocmem_attrib
&= ~OBJ_ANY
;
464 list_init(&pad_list
);
468 static void amalloc_os_done(void)
470 while (!list_is_empty(&pad_list
)) {
471 struct pad_descriptor
*pd
= get_struct(pad_list
.prev
, struct pad_descriptor
, pad_entry
);
472 list_del(&pd
->pad_entry
);
473 amalloc_do_unmap(pd
->base
, pd
->size
);
480 static inline void amalloc_do_unmap(void *ptr
, size_t size
)
482 os_munmap(ptr
, size
, false);
485 static void amalloc_os_init(void)
487 page_size
= os_getpagesize();
490 static void amalloc_os_done(void)
496 #ifdef POINTER_COMPRESSION_POSSIBLE
498 static bool do_enable_mapping(void *ptr
, size_t size
, bool attr_unused clr
)
500 #if defined(OS_CYGWIN) || defined(OS_WIN32)
502 if (unlikely(!os_mprotect(ptr
, size
, PROT_HEAP
, &sink
)))
505 memset(ptr
, 0, size
);
509 void *r
= os_mmap(ptr
, size
, PROT_HEAP
, MAP_PRIVATE
| MAP_ANONYMOUS
| MAP_FIXED
, handle_none
, 0, &sink
);
510 if (likely(r
== MAP_FAILED
))
512 if (unlikely(r
!= ptr
))
513 internal(file_line
, "do_enable_mapping: os_mmap(MAP_FIXED) returned different pointer: %p != %p", r
, ptr
);
518 static void do_disable_mapping(void *ptr
, size_t size
)
520 #if defined(OS_CYGWIN) || defined(OS_WIN32)
522 if (unlikely(!os_mprotect(ptr
, size
, PROT_NONE
, &err
))) {
523 warning("failed to clear existing mapping: mprotect(%p, %"PRIxMAX
") returned error: %s", ptr
, (uintmax_t)size
, error_decode(err
));
528 void *r
= os_mmap(ptr
, size
, PROT_NONE
, MAP_PRIVATE
| MAP_ANONYMOUS
| MAP_NORESERVE
| MAP_FIXED
, handle_none
, 0, &err
);
529 if (unlikely(r
== MAP_FAILED
)) {
530 warning("failed to clear existing mapping: os_mmap(%p, %"PRIxMAX
") returned error: %s", ptr
, (uintmax_t)size
, error_decode(err
));
533 if (unlikely(r
!= ptr
))
534 internal(file_line
, "do_disable_mapping: os_mmap(MAP_FIXED) returned different pointer: %p != %p", r
, ptr
);
538 static struct tree rmap_tree
;
540 static struct reserved_map
{
541 struct tree_entry free_entry
;
545 static mutex_t rmap_mutex
;
547 #define RESERVED_MAP_ENTRIES ((uintptr_t)1 << (32 + POINTER_COMPRESSION_POSSIBLE - ARENA_BITS))
549 static void rmap_lock(void)
551 if (unlikely(!amalloc_threads_initialized
))
553 mutex_lock(&rmap_mutex
);
556 static void rmap_unlock(void)
558 if (unlikely(!amalloc_threads_initialized
))
560 mutex_unlock(&rmap_mutex
);
563 static int rmap_compare(const struct tree_entry
*entry
, uintptr_t idx2
)
565 struct reserved_map
*rmap1
= get_struct(entry
, struct reserved_map
, free_entry
);
566 unsigned idx1
= rmap1
- rmap
;
568 len1
= rmap
[idx1
].f
+ 1 - idx1
;
569 len2
= rmap
[idx2
].f
+ 1 - idx2
;
571 return len1
< len2
? -1 : 1;
577 static void reserve_free_merge_run(unsigned idx
, unsigned len
)
579 struct tree_insert_position ins
;
580 if (rmap
[idx
- 1].f
) {
581 unsigned more
= idx
- rmap
[idx
- 1].f
;
584 tree_delete(&rmap
[idx
].free_entry
);
586 if (idx
+ len
< RESERVED_MAP_ENTRIES
&& rmap
[idx
+ len
].f
) {
587 unsigned more
= rmap
[idx
+ len
].f
+ 1 - (idx
+ len
);
588 tree_delete(&rmap
[idx
+ len
].free_entry
);
591 rmap
[idx
].f
= idx
+ len
- 1;
592 rmap
[idx
+ len
- 1].f
= idx
;
593 tree_find_for_insert(&rmap_tree
, rmap_compare
, idx
, &ins
);
594 tree_insert_after_find(&rmap
[idx
].free_entry
, &ins
);
597 static void reserve_sub_alloc(unsigned idx1
, unsigned len1
, unsigned idx
, unsigned len
)
599 struct reserved_map
*rmap1
= &rmap
[idx1
];
601 rmap
[idx
+ len
- 1].f
= 0;
602 tree_delete(&rmap1
->free_entry
);
604 reserve_free_merge_run(idx1
, idx
- idx1
);
606 if (idx
+ len
< idx1
+ len1
) {
607 reserve_free_merge_run(idx
+ len
, (idx1
+ len1
) - (idx
+ len
));
611 static int rmap_find(const struct tree_entry
*entry
, uintptr_t len
)
613 struct reserved_map
*rmap1
= get_struct(entry
, struct reserved_map
, free_entry
);
614 unsigned idx1
= rmap1
- rmap
;
615 unsigned len1
= rmap
[idx1
].f
+ 1 - idx1
;
623 static unsigned reserve_alloc_run(unsigned al
, unsigned len
)
625 struct tree_entry
*ee
;
626 ee
= tree_find_best(&rmap_tree
, rmap_find
, len
+ al
- 1);
627 if (likely(ee
!= NULL
)) {
628 struct reserved_map
*rmap1
= get_struct(ee
, struct reserved_map
, free_entry
);
629 unsigned idx1
= rmap1
- rmap
;
630 unsigned len1
= rmap
[idx1
].f
+ 1 - idx1
;
631 unsigned idx
= round_up(idx1
, al
);
632 reserve_sub_alloc(idx1
, len1
, idx
, len
);
638 static bool reserve_realloc_run(unsigned start
, unsigned old_len
, unsigned new_len
)
641 if (unlikely(old_len
>= new_len
)) {
642 if (unlikely(old_len
== new_len
))
644 rmap
[start
+ new_len
- 1].f
= 0;
645 reserve_free_merge_run(start
+ new_len
, old_len
- new_len
);
648 if (unlikely(start
+ old_len
== RESERVED_MAP_ENTRIES
))
650 if (!rmap
[start
+ old_len
].f
)
652 xlen
= rmap
[start
+ old_len
].f
+ 1 - (start
+ old_len
);
653 if (xlen
< new_len
- old_len
)
655 tree_delete(&rmap
[start
+ old_len
].free_entry
);
656 rmap
[start
+ new_len
- 1].f
= 0;
657 if (xlen
> new_len
- old_len
)
658 reserve_free_merge_run(start
+ new_len
, xlen
- (new_len
- old_len
));
662 static unsigned reserve_last_run(unsigned len
)
664 struct tree_entry
*e
;
665 struct reserved_map
*rmap1
;
667 unsigned best_idx
= 0;
668 for (e
= tree_first(&rmap_tree
); e
; e
= tree_next(e
)) {
669 rmap1
= get_struct(e
, struct reserved_map
, free_entry
);
671 len1
= rmap
[idx1
].f
+ 1 - idx1
;
672 if (likely(len1
>= len
)) {
677 if (unlikely(!best_idx
))
681 len1
= rmap
[idx1
].f
+ 1 - idx1
;
682 reserve_sub_alloc(idx1
, len1
, idx1
+ len1
- len
, len
);
683 return idx1
+ len1
- len
;
686 bool amalloc_ptrcomp_try_reserve_range(void *ptr
, size_t length
)
689 struct tree_entry
*e
;
691 if (unlikely(!length
))
694 if (unlikely(ptr_to_num(ptr
) & (ARENA_SIZE
- 1)))
696 idx
= ptr_to_num(ptr
) >> ARENA_BITS
;
697 len
= (length
+ ARENA_SIZE
- 1) >> ARENA_BITS
;
701 for (e
= tree_first(&rmap_tree
); e
; e
= tree_next(e
)) {
702 struct reserved_map
*rmap1
= get_struct(e
, struct reserved_map
, free_entry
);
703 unsigned idx1
= rmap1
- rmap
;
704 unsigned len1
= rmap
[idx1
].f
+ 1 - idx1
;
705 if (idx
>= idx1
&& idx
+ len
<= idx1
+ len1
) {
706 reserve_sub_alloc(idx1
, len1
, idx
, len
);
717 static bool try_to_reserve_memory(void *ptr
, size_t len
)
723 MAP_FIXED
| MAP_EXCL
|
727 if (ptr_to_num(ptr
) + len
> (uintptr_t)1 << (32 + POINTER_COMPRESSION_POSSIBLE
))
730 res
= os_mmap(ptr
, len
, PROT_NONE
, MAP_PRIVATE
| MAP_ANONYMOUS
| MAP_NORESERVE
| extra_flags
, handle_none
, 0, &sink
);
731 if (unlikely(res
== MAP_FAILED
))
735 os_munmap(res
, len
, false);
742 static void reserve_memory(void)
745 tree_init(&rmap_tree
);
746 rmap
= os_mmap(NULL
, RESERVED_MAP_ENTRIES
* sizeof(struct reserved_map
), PROT_HEAP
, MAP_PRIVATE
| MAP_ANONYMOUS
, handle_none
, 0, NULL
);
747 #if defined(OS_CYGWIN)
748 step
= ARENA_SIZE
* 256;
749 ptr
= round_up((uintptr_t)1 << 34, step
);
750 #elif defined(__sun__)
752 ptr
= round_up((uintptr_t)1 << 31, step
);
757 while (ptr
< (uintptr_t)1 << (32 + POINTER_COMPRESSION_POSSIBLE
)) {
760 if (!try_to_reserve_memory(num_to_ptr(ptr
), step
)) {
764 os_munmap(num_to_ptr(ptr
), step
, false);
766 len
= ((uintptr_t)1 << (32 + POINTER_COMPRESSION_POSSIBLE
)) - ptr
;
767 if (try_to_reserve_memory(num_to_ptr(ptr
), len
)) {
768 reserve_free_merge_run(ptr
>> ARENA_BITS
, len
>> ARENA_BITS
);
773 bit
= (uintptr_t)1 << (32 + POINTER_COMPRESSION_POSSIBLE
- 1);
775 for (; bit
>= step
; bit
>>= 1) {
776 if (try_to_reserve_memory(num_to_ptr(ptr
), len
| bit
)) {
777 os_munmap(num_to_ptr(ptr
), len
| bit
, false);
783 if (likely(try_to_reserve_memory(num_to_ptr(ptr
), len
))) {
784 reserve_free_merge_run(ptr
>> ARENA_BITS
, len
>> ARENA_BITS
);
793 if (unlikely(tree_is_empty(&rmap_tree
)))
794 fatal("unable to reserve any memory for pointer compression");
797 static void unreserve_memory(void)
799 while (!tree_is_empty(&rmap_tree
)) {
800 struct reserved_map
*rmap1
= get_struct(tree_any(&rmap_tree
), struct reserved_map
, free_entry
);
801 unsigned idx1
= rmap1
- rmap
;
802 unsigned len1
= rmap
[idx1
].f
+ 1 - idx1
;
803 amalloc_do_unmap(num_to_ptr((uintptr_t)idx1
<< ARENA_BITS
), (size_t)len1
<< ARENA_BITS
);
804 tree_delete(&rmap
[idx1
].free_entry
);
806 amalloc_do_unmap(rmap
, RESERVED_MAP_ENTRIES
* sizeof(struct reserved_map
));
812 void amalloc_run_free(void *ptr
, size_t length
)
814 if (unlikely(!length
))
816 #ifdef POINTER_COMPRESSION_POSSIBLE
817 if (pointer_compression_enabled
) {
818 unsigned idx
= ptr_to_num(ptr
) >> ARENA_BITS
;
819 size_t l
= (length
+ ARENA_SIZE
- 1) >> ARENA_BITS
;
820 do_disable_mapping(ptr
, l
<< ARENA_BITS
);
822 reserve_free_merge_run(idx
, l
);
827 amalloc_do_unmap(ptr
, length
);
830 void *amalloc_run_alloc(size_t al
, size_t length
, bool attr_unused clr
, bool attr_unused saved
)
832 size_t attr_unused extra_length
;
834 void attr_unused
*base_address
;
835 uintptr_t attr_unused nptr
, aptr
, fptr
, eptr
;
840 if (unlikely(al
< page_size
))
843 length
= round_up(length
, page_size
);
844 if (unlikely(!length
))
847 #ifdef POINTER_COMPRESSION_POSSIBLE
848 if (pointer_compression_enabled
) {
851 if (unlikely(al
< ARENA_SIZE
))
854 l
= round_up(length
, ARENA_SIZE
) >> ARENA_BITS
;
857 if (unlikely(l
!= (unsigned)l
))
861 res
= reserve_last_run(l
);
863 res
= reserve_alloc_run(al
, l
);
867 ptr
= num_to_ptr((uintptr_t)res
<< ARENA_BITS
);
868 if (unlikely(!do_enable_mapping(ptr
, length
, clr
))) {
870 reserve_free_merge_run(res
, l
);
879 if (unlikely(saved
)) {
880 base_address
= num_to_ptr(round_down(ptr_to_num(&saved
) / 2, page_size
));
884 if (unlikely(al
> ARENA_SIZE
))
886 ptr
= os2_alloc_aligned(length
);
887 #elif defined(MAP_ALIGNED) && !defined(__minix__)
888 if (unlikely(al
!= (bitmap_t
)al
))
890 ptr
= os_mmap(base_address
, length
, PROT_HEAP
, MAP_PRIVATE
| MAP_ANONYMOUS
| MAP_ALIGNED(find_bit(al
)), handle_none
, 0, &sink
);
891 if (unlikely(ptr
== MAP_FAILED
))
893 if (unlikely((ptr_to_num(ptr
) & (al
- 1)) != 0))
894 fatal("os_mmap returned unaligned pointer: %p, required alignment %lx", ptr
, (unsigned long)ARENA_SIZE
);
896 extra_length
= length
+ al
- page_size
;
897 if (unlikely(extra_length
< length
))
900 ptr
= os_mmap(base_address
, extra_length
, PROT_HEAP
, MAP_PRIVATE
| MAP_ANONYMOUS
, handle_none
, 0, &sink
);
902 if (unlikely(ptr
== MAP_FAILED
))
905 nptr
= ptr_to_num(ptr
);
906 aptr
= round_up(nptr
, al
);
907 fptr
= aptr
+ length
;
908 eptr
= nptr
+ extra_length
;
909 amalloc_run_free(num_to_ptr(nptr
), aptr
- nptr
);
910 amalloc_run_free(num_to_ptr(fptr
), eptr
- fptr
);
912 ptr
= num_to_ptr(aptr
);
915 #ifdef POINTER_COMPRESSION_POSSIBLE
919 #if !defined(AMALLOC_TRIM_MIDBLOCKS) && defined(HAVE_MADVISE) && defined(MADV_HUGEPAGE)
920 if (length
== ARENA_SIZE
) {
921 int madv
= MADV_HUGEPAGE
;
923 EINTR_LOOP(r
, madvise(ptr
, length
, madv
));
924 /*if (unlikely(r == -1)) {
926 warning("madvise(%d) failed: %d, %s", madv, er, error_decode(error_from_errno(EC_SYSCALL, er)));
934 static void *amalloc_run_realloc(void *ptr
, size_t old_length
, size_t new_length
)
938 old_length
= round_up(old_length
, page_size
);
939 new_length
= round_up(new_length
, page_size
);
940 if (unlikely(!new_length
))
943 #ifdef POINTER_COMPRESSION_POSSIBLE
944 if (pointer_compression_enabled
) {
946 size_t old_length_arenas
= round_up(old_length
, ARENA_SIZE
) >> ARENA_BITS
;
947 size_t new_length_arenas
= round_up(new_length
, ARENA_SIZE
) >> ARENA_BITS
;
948 if (unlikely(!new_length_arenas
))
950 if (unlikely(new_length_arenas
!= (unsigned)new_length_arenas
))
953 f
= reserve_realloc_run(ptr_to_num(ptr
) >> ARENA_BITS
, old_length_arenas
, new_length_arenas
);
955 /*debug("try realloc: %p, %d", ptr, f);*/
957 if (likely(old_length
< new_length
)) {
958 if (unlikely(!do_enable_mapping(cast_ptr(char *, ptr
) + old_length
, new_length
- old_length
, false))) {
960 reserve_free_merge_run((ptr_to_num(ptr
) >> ARENA_BITS
) + old_length_arenas
, new_length_arenas
- old_length_arenas
);
964 } else if (old_length
> new_length
) {
965 do_disable_mapping(cast_ptr(char *, ptr
) + new_length
, old_length
- new_length
);
973 #if defined(OS_OS2) || defined(OS_WIN32)
974 if (new_length
== old_length
)
978 if (unlikely(new_length
<= old_length
)) {
979 amalloc_run_free(cast_ptr(char *, ptr
) + new_length
, old_length
- new_length
);
983 #if defined(OS_HAS_MREMAP)
986 void *r
= os_mremap(ptr
, old_length
, new_length
, 0, NULL
, &sink
);
987 if (r
!= MAP_FAILED
) {
988 if (unlikely(r
!= ptr
))
989 internal(file_line
, "amalloc_run_realloc: os_mremap returned different pointer: %p != %p", r
, ptr
);
996 n
= os_mmap(cast_ptr(char *, ptr
) + old_length
, new_length
- old_length
, PROT_HEAP
, MAP_PRIVATE
| MAP_ANONYMOUS
, handle_none
, 0, &sink
);
997 if (likely(n
!= MAP_FAILED
)) {
998 if (n
== cast_ptr(char *, ptr
) + old_length
)
1000 amalloc_do_unmap(n
, new_length
- old_length
);
1005 n
= amalloc_run_alloc(ARENA_SIZE
, new_length
, false, false);
1009 #if defined(OS_HAS_MREMAP) && defined(MREMAP_FIXED)
1012 void *r
= os_mremap(ptr
, old_length
, new_length
, MREMAP_MAYMOVE
| MREMAP_FIXED
, n
, &sink
);
1013 if (likely(r
!= MAP_FAILED
)) {
1014 if (unlikely(r
!= n
))
1015 internal(file_line
, "amalloc_run_realloc: os_mremap returned different pointer: %p != %p", r
, n
);
1021 memcpy(n
, ptr
, old_length
);
1022 amalloc_run_free(ptr
, old_length
);
1028 static inline void sbc_lock(struct small_block_cache
*sbc
)
1030 if (likely(amalloc_threads_initialized
))
1031 mutex_lock(&sbc
->mutex
);
1034 static inline void sbc_unlock(struct small_block_cache
*sbc
)
1036 if (likely(amalloc_threads_initialized
))
1037 mutex_unlock(&sbc
->mutex
);
1040 static inline void arena_lock(struct arena
*a
)
1042 if (unlikely(!amalloc_threads_initialized
))
1044 address_lock(a
, DEPTH_ARENA
);
1047 static inline void arena_unlock(struct arena
*a
)
1049 if (unlikely(!amalloc_threads_initialized
))
1051 address_unlock(a
, DEPTH_ARENA
);
1054 static void arena_tree_lock(void)
1056 if (unlikely(!amalloc_threads_initialized
))
1058 mutex_lock(&arena_tree_mutex
);
1061 static void arena_tree_unlock(void)
1063 if (unlikely(!amalloc_threads_initialized
))
1065 mutex_unlock(&arena_tree_mutex
);
1068 static void huge_tree_lock(void)
1070 if (unlikely(!amalloc_threads_initialized
))
1072 mutex_lock(&huge_tree_mutex
);
1075 static void huge_tree_unlock(void)
1077 if (unlikely(!amalloc_threads_initialized
))
1079 mutex_unlock(&huge_tree_mutex
);
1082 static int huge_tree_compare(const struct tree_entry
*entry
, uintptr_t addr
)
1084 struct huge_entry
*e
= get_struct(entry
, struct huge_entry
, entry
);
1085 if (ptr_to_num(e
->ptr
) < addr
)
1087 if (ptr_to_num(e
->ptr
) > addr
)
1092 static void huge_tree_insert(struct huge_entry
*e
)
1094 struct tree_insert_position ins
;
1095 struct tree_entry attr_unused
*ee
;
1097 ee
= tree_find_for_insert(&huge_tree
, huge_tree_compare
, ptr_to_num(e
->ptr
), &ins
);
1098 ajla_assert_lo(ee
== NULL
, (file_line
, "huge_tree_insert: entry for address %p is already present", e
->ptr
));
1099 tree_insert_after_find(&e
->entry
, &ins
);
1103 static struct huge_entry
*huge_tree_delete(void *ptr
)
1105 struct tree_entry
*entry
;
1107 entry
= tree_find(&huge_tree
, huge_tree_compare
, ptr_to_num(ptr
));
1108 ajla_assert_lo(entry
!= NULL
, (file_line
, "huge_tree_delete: entry for address %p not found", ptr
));
1111 return get_struct(entry
, struct huge_entry
, entry
);
1115 static attr_always_inline
struct per_thread
*amalloc_per_thread(void)
1117 if (unlikely(!amalloc_threads_initialized
))
1120 return tls_get(struct per_thread
*, per_thread
);
1123 static void arena_detach(struct arena
*a
);
1124 static bool amalloc_detach_small(struct small_block_cache
*sbc
, union midblock
*m
);
1125 #ifdef AMALLOC_EAGER_FREE
1126 static void amalloc_test_free_small(struct small_block_cache
*sbc
, size_t idx
, union midblock
*m
);
1129 static void set_small_run(struct per_thread
*pt
, unsigned i
, union midblock
*m
)
1132 pt
->small_runs
[0] = m
;
1133 pt
->small_runs
[i
+ 1] = m
;
1136 static void detach_pt_data(struct per_thread
*pt
)
1140 for (i
= 0; i
< N_CLASSES
; i
++) {
1141 union midblock
*m
= pt
->small_runs
[i
+ 1];
1142 struct small_block_cache
*sbc
= &small_block_cache
[i
];
1143 #ifdef AMALLOC_EAGER_FREE
1146 set_small_run(pt
, i
, NULL
);
1147 if (m
== &full_midblock
)
1150 atomic_or(&m
->s
.atomic_map
, m
->s
.map
);
1152 if (unlikely(!amalloc_detach_small(sbc
, m
))) {
1154 fatal("arealloc failed: %d, %s", er
, error_decode(error_from_errno(EC_SYSCALL
, er
)));
1156 #ifdef AMALLOC_EAGER_FREE
1160 #ifdef AMALLOC_EAGER_FREE
1161 amalloc_test_free_small(sbc
, idx
, m
);
1165 pt
->arena
= BAD_POINTER_1
;
1173 static void per_thread_destructor(tls_destructor_t
*destr
)
1175 struct per_thread
*pt
= get_struct(destr
, struct per_thread
, destructor
);
1180 static void amalloc_per_thread_init(struct per_thread
*pt
)
1184 for (i
= 0; i
< N_CLASSES
; i
++)
1185 set_small_run(pt
, i
, &full_midblock
);
1188 static bool amalloc_per_thread_alloc(void)
1190 struct per_thread
*pt
;
1191 pt
= malloc(sizeof(struct per_thread
));
1194 amalloc_per_thread_init(pt
);
1195 tls_destructor(&pt
->destructor
, per_thread_destructor
);
1196 tls_set(struct per_thread
*, per_thread
, pt
);
1200 static int arena_tree_compare(const struct tree_entry
*entry
, uintptr_t len
)
1202 struct arena
*a
= get_struct(entry
, struct arena
, arena_entry
);
1203 if (a
->max_midblock_run
< len
)
1208 static void arena_insert_locked(struct arena
*a
)
1210 struct tree_insert_position ins
;
1211 struct tree_entry attr_unused
*ee
;
1212 if (a
->max_midblock_run
== ARENA_MIDBLOCKS
- ARENA_PREFIX
) {
1213 /*debug("freeing empty arena %p", a);*/
1214 amalloc_run_free(a
, ARENA_SIZE
);
1217 ee
= tree_find_for_insert(&arena_tree
, arena_tree_compare
, a
->max_midblock_run
, &ins
);
1218 ajla_assert(ee
== NULL
, (file_line
, "arena_insert_locked: entry for address %p is already present", a
));
1219 tree_insert_after_find(&a
->arena_entry
, &ins
);
1222 static void arena_relink(struct arena
*a
, unsigned new_run
)
1225 bool full_free
= new_run
== ARENA_MIDBLOCKS
- ARENA_PREFIX
;
1226 if (new_run
>= (unsigned)a
->max_midblock_run
* 2 || full_free
) {
1229 tree_delete(&a
->arena_entry
);
1230 a
->max_midblock_run
= new_run
;
1231 arena_insert_locked(a
);
1233 arena_tree_unlock();
1238 static int midblock_compare(const struct tree_entry
*entry
, uintptr_t idx2
)
1240 union midblock
*m1
= get_struct(entry
, union midblock
, free_entry
);
1241 struct arena
*a
= addr_to_arena(m1
);
1242 unsigned idx1
= midblock_to_idx(a
, m1
);
1243 unsigned len1
, len2
;
1244 ajla_assert(a
->map
[idx1
] & MAP_FREE
, (file_line
, "midblock_compare: idx1 is not free"));
1245 ajla_assert(a
->map
[idx2
] & MAP_FREE
, (file_line
, "midblock_compare: idx2 is not free"));
1246 len1
= (a
->map
[idx1
] & ~MAP_FREE
) + 1 - idx1
;
1247 len2
= (a
->map
[idx2
] & ~MAP_FREE
) + 1 - idx2
;
1249 return len1
< len2
? -1 : 1;
1255 static void arena_free_midblock(struct arena
*a
, unsigned start
, unsigned len
)
1257 struct tree_insert_position ins
;
1258 a
->map
[start
] = MAP_FREE
| (start
+ len
- 1);
1259 a
->map
[start
+ len
- 1] = MAP_FREE
| start
;
1260 tree_find_for_insert(&a
->midblock_free
, midblock_compare
, start
, &ins
);
1261 tree_insert_after_find(&idx_to_midblock(a
, start
)->free_entry
, &ins
);
1264 static void arena_free_merge_midblock(struct arena
*a
, unsigned start
, unsigned len
)
1266 uintptr_t attr_unused start_ptr
, end_ptr
;
1267 if (start
&& a
->map
[start
- 1] & MAP_FREE
) {
1268 unsigned more
= start
- (a
->map
[start
- 1] & ~MAP_FREE
);
1271 tree_delete(&idx_to_midblock(a
, start
)->free_entry
);
1273 if (start
+ len
< ARENA_MIDBLOCKS
&& a
->map
[start
+ len
] & MAP_FREE
) {
1274 unsigned more
= (a
->map
[start
+ len
] & ~MAP_FREE
) + 1 - (start
+ len
);
1275 tree_delete(&idx_to_midblock(a
, start
+ len
)->free_entry
);
1278 #if defined(AMALLOC_TRIM_MIDBLOCKS) && defined(HAVE_MADVISE) && ((defined(__linux__) && defined(MADV_DONTNEED)) || (!defined(__linux__) && defined(MADV_FREE)))
1279 start_ptr
= round_up(ptr_to_num(idx_to_midblock(a
, start
)), page_size
);
1280 end_ptr
= round_down(ptr_to_num(idx_to_midblock(a
, start
+ len
)), page_size
);
1281 if (start_ptr
< end_ptr
) {
1283 int madv
= MADV_DONTNEED
;
1285 int madv
= MADV_FREE
;
1288 EINTR_LOOP(r
, madvise(num_to_ptr(start_ptr
), end_ptr
- start_ptr
, madv
));
1289 if (unlikely(r
== -1)) {
1291 warning("madvise(%d) failed: %d, %s", madv
, er
, error_decode(error_from_errno(EC_SYSCALL
, er
)));
1295 arena_free_midblock(a
, start
, len
);
1296 arena_relink(a
, len
);
1299 static void arena_grab_midblocks(struct arena
*a
, unsigned idx1
, unsigned idx
, unsigned len
)
1302 union midblock
*m
= idx_to_midblock(a
, idx1
);
1303 tree_delete(&m
->free_entry
);
1304 found_end
= (a
->map
[idx1
] & ~MAP_FREE
) + 1;
1305 if (unlikely(idx
> idx1
))
1306 arena_free_midblock(a
, idx1
, idx
- idx1
);
1307 if (found_end
> idx
+ len
)
1308 arena_free_midblock(a
, idx
+ len
, found_end
- (idx
+ len
));
1311 static int midblock_find(const struct tree_entry
*entry
, uintptr_t len
)
1313 union midblock
*m1
= get_struct(entry
, union midblock
, free_entry
);
1314 struct arena
*a
= addr_to_arena(m1
);
1315 unsigned idx1
= midblock_to_idx(a
, m1
);
1316 unsigned len1
= (a
->map
[idx1
] & ~MAP_FREE
) + 1 - idx1
;
1324 static union midblock
*arena_alloc_midblock(struct arena
*a
, unsigned midblock_alignment
, unsigned midblocks
)
1326 struct tree_entry
*ee
;
1327 ee
= tree_find_best(&a
->midblock_free
, midblock_find
, midblocks
+ midblock_alignment
- 1);
1328 if (likely(ee
!= NULL
)) {
1329 union midblock
*m
= get_struct(ee
, union midblock
, free_entry
);
1330 unsigned best
= midblock_to_idx(a
, m
);
1331 unsigned best_aligned
= round_up(best
, midblock_alignment
);
1332 arena_grab_midblocks(a
, best
, best_aligned
, midblocks
);
1333 a
->map
[best_aligned
] = best_aligned
+ midblocks
- 1;
1334 a
->map
[best_aligned
+ midblocks
- 1] = best_aligned
;
1335 return idx_to_midblock(a
, best_aligned
);
1340 static bool arena_try_realloc_midblock(struct arena
*a
, unsigned base
, unsigned orig_len
, unsigned new_len
)
1343 if (unlikely(new_len
<= orig_len
)) {
1344 if (new_len
== orig_len
)
1346 a
->map
[base
] = base
+ new_len
- 1;
1347 a
->map
[base
+ new_len
- 1] = base
;
1348 arena_free_merge_midblock(a
, base
+ new_len
, orig_len
- new_len
);
1350 if (unlikely(base
+ new_len
> ARENA_MIDBLOCKS
))
1352 if (!(a
->map
[base
+ orig_len
] & MAP_FREE
))
1354 free_len
= (a
->map
[base
+ orig_len
] & ~MAP_FREE
) + 1 - (base
+ orig_len
);
1355 if (free_len
< new_len
- orig_len
)
1357 arena_grab_midblocks(a
, base
+ orig_len
, base
+ orig_len
, new_len
- orig_len
);
1358 a
->map
[base
] = base
+ new_len
- 1;
1359 a
->map
[base
+ new_len
- 1] = base
;
1364 static struct arena
*arena_alloc(void)
1367 a
= amalloc_run_alloc(ARENA_SIZE
, ARENA_SIZE
, false, false);
1370 tree_init(&a
->midblock_free
);
1371 a
->map
[0] = ARENA_PREFIX
- 1;
1372 a
->map
[ARENA_PREFIX
- 1] = 0;
1374 arena_free_midblock(a
, ARENA_PREFIX
, ARENA_MIDBLOCKS
- ARENA_PREFIX
);
1375 /*debug("allocating arena %p", a);*/
1380 static unsigned arena_compute_max_run(struct arena
*a
)
1382 struct tree_entry
*e
= tree_last(&a
->midblock_free
);
1386 union midblock
*m
= get_struct(e
, union midblock
, free_entry
);
1387 unsigned idx
= midblock_to_idx(a
, m
);
1388 return (a
->map
[idx
] & ~MAP_FREE
) + 1 - idx
;
1392 static void arena_detach(struct arena
*a
)
1394 a
->max_midblock_run
= arena_compute_max_run(a
);
1396 a
->attached
= false;
1397 arena_insert_locked(a
);
1398 arena_tree_unlock();
1401 static int arena_tree_find(const struct tree_entry
*entry
, uintptr_t len
)
1403 struct arena
*a
= get_struct(entry
, struct arena
, arena_entry
);
1404 if (a
->max_midblock_run
< len
)
1406 if (a
->max_midblock_run
> len
)
1411 static struct arena
*arena_attach(unsigned midblocks
)
1414 struct tree_entry
*ee
;
1416 ee
= tree_find_best(&arena_tree
, arena_tree_find
, midblocks
);
1418 a
= get_struct(ee
, struct arena
, arena_entry
);
1420 tree_delete(&a
->arena_entry
);
1421 arena_tree_unlock();
1424 arena_tree_unlock();
1425 return arena_alloc();
1428 static void *amalloc_mid(struct per_thread
*pt
, unsigned midblock_alignment
, unsigned midblocks
)
1430 struct arena
*a
= pt
->arena
;
1431 bool looped
= false;
1432 if (likely(a
!= NULL
)) {
1436 m
= arena_alloc_midblock(a
, midblock_alignment
, midblocks
);
1437 if (likely(m
!= NULL
)) {
1445 if (likely(!looped
))
1446 a
= arena_attach(midblocks
+ midblock_alignment
- 1);
1456 static void *amalloc_huge(size_t al
, size_t size
, bool clr
)
1458 struct huge_entry
*e
;
1459 void *ptr
= amalloc_run_alloc(al
, size
, clr
, false);
1462 e
= malloc(sizeof(struct huge_entry
));
1464 amalloc_run_free(ptr
, size
);
1469 huge_tree_insert(e
);
1473 static attr_noinline
void *amalloc_mid_huge(struct per_thread
*pt
, size_t size
, bool clr
)
1475 if (unlikely(size
> MIDBLOCK_LIMIT
)) {
1476 return amalloc_huge(ARENA_SIZE
, size
, clr
);
1478 unsigned midblocks
= round_up(size
, MIDBLOCK_SIZE
) >> MIDBLOCK_BITS
;
1479 void *ptr
= amalloc_mid(pt
, 1, midblocks
);
1480 if (likely(ptr
!= NULL
) && clr
)
1481 memset(ptr
, 0, size
);
1486 static attr_noinline
void *amemalign_mid_huge(size_t al
, size_t size
, bool clr
)
1488 if (size
+ al
> MIDBLOCK_LIMIT
) {
1489 if (al
< ARENA_SIZE
)
1491 return amalloc_huge(al
, size
, clr
);
1493 unsigned midblocks
, midblock_alignment
;
1495 struct per_thread
*pt
= amalloc_per_thread();
1496 if (unlikely(!pt
)) {
1497 if (unlikely(!amalloc_per_thread_alloc()))
1499 pt
= amalloc_per_thread();
1501 midblocks
= round_up(size
, MIDBLOCK_SIZE
) >> MIDBLOCK_BITS
;
1502 midblock_alignment
= round_up(al
, MIDBLOCK_SIZE
) >> MIDBLOCK_BITS
;
1503 ptr
= amalloc_mid(pt
, midblock_alignment
, midblocks
);
1504 if (likely(ptr
!= NULL
) && clr
)
1505 memset(ptr
, 0, size
);
1510 static unsigned reserved_bits(unsigned cls
)
1512 size_t size
= CLASS_TO_SIZE(cls
);
1513 unsigned reserved_bits
= div_16(sizeof(full_midblock
.s
) + size
- 1, size
);
1514 return reserved_bits
;
1517 static union midblock
*create_small_run(struct per_thread
*pt
, unsigned cls
)
1520 unsigned idx
, i
, res
;
1521 size_t size
= CLASS_TO_SIZE(cls
);
1522 unsigned midblocks
= (size
* BITMAP_BITS
+ MIDBLOCK_SIZE
- 1) >> MIDBLOCK_BITS
;
1523 union midblock
*m
= amalloc_mid(pt
, 1, midblocks
);
1526 a
= addr_to_arena(m
);
1527 idx
= midblock_to_idx(a
, m
);
1528 for (i
= 0; i
< midblocks
; i
++) {
1529 a
->map
[idx
+ i
] = idx
;
1531 res
= reserved_bits(cls
);
1532 m
->s
.map
= (bitmap_t
)-1 << res
;
1533 store_relaxed(&m
->s
.atomic_map
, 0);
1534 #ifdef AMALLOC_EAGER_FREE
1535 m
->s
.index
= (size_t)-1;
1537 m
->s
.reserved_bits
= res
;
1540 m
->s
.reciprocal
= div_16(RECIPROCAL_BASE
+ size
- 1, size
);
1544 static void amalloc_free_small(union midblock
*m
)
1546 struct arena
*a
= addr_to_arena(m
);
1547 unsigned idx
= midblock_to_idx(a
, m
);
1548 unsigned midblocks
= (m
->s
.size
* BITMAP_BITS
+ MIDBLOCK_SIZE
- 1) >> MIDBLOCK_BITS
;
1549 /*debug("midblock (%d): %lx / %lx", m->s.cls, m->s.map, m->s.atomic_map);*/
1551 arena_free_merge_midblock(a
, idx
, midblocks
);
1555 #ifdef AMALLOC_EAGER_FREE
1556 static attr_noinline
void amalloc_test_free_small(struct small_block_cache
*sbc
, size_t idx
, union midblock
*m
)
1559 if (likely(idx
< sbc
->array_size
) && likely(sbc
->array
[idx
] == m
) && likely(load_relaxed(&m
->s
.atomic_map
) == (bitmap_t
)-1 << m
->s
.reserved_bits
)) {
1560 (sbc
->array
[idx
] = sbc
->array
[--sbc
->array_size
])->s
.index
= idx
;
1565 if (likely(m
!= NULL
)) {
1566 amalloc_free_small(m
);
1567 /*debug("freed small");*/
1572 static bool amalloc_detach_small(struct small_block_cache
*sbc
, union midblock
*m
)
1575 if (unlikely(sbc
->array_size
== sbc
->allocated_size
)) {
1577 size_t s
= sbc
->allocated_size
* 2;
1578 if (unlikely(s
> (size_t)-1 / sizeof(union midblock
*)))
1580 a
= arealloc(sbc
->array
, s
* sizeof(union midblock
*));
1584 sbc
->allocated_size
= s
;
1585 /*debug("amalloc_detach_small: %ld - %ld - %p", sbc - small_block_cache, s, a);*/
1587 index
= sbc
->array_size
++;
1588 #ifdef AMALLOC_EAGER_FREE
1591 sbc
->array
[index
] = m
;
1595 static attr_noinline
void *amalloc_small_empty(struct per_thread
*pt
, size_t size
)
1597 unsigned idx
= SIZE_TO_INDEX(size
);
1598 unsigned cls
= INDEX_TO_CLASS(idx
);
1599 unsigned i
, best_idx
, best_count
, bit
, all_free
;
1600 union midblock
*m
= pt
->small_runs
[cls
+ 1];
1601 struct small_block_cache
*sbc
= &small_block_cache
[cls
];
1603 if (likely(m
!= &full_midblock
)) {
1604 if (unlikely(!amalloc_detach_small(sbc
, m
))) {
1608 all_free
= BITMAP_BITS
- m
->s
.reserved_bits
;
1610 all_free
= BITMAP_BITS
- reserved_bits(cls
);
1614 i
= minimum(SMALL_BLOCK_TRIES
, sbc
->array_size
);
1616 unsigned test_count
;
1617 union midblock
*test
;
1620 #ifndef AMALLOC_USE_RANDOM_ROVER
1621 test_idx
= ++sbc
->rover
;
1622 if (unlikely(test_idx
>= sbc
->array_size
)) {
1623 test_idx
= sbc
->rover
= 0;
1626 test_idx
= rand_r(&sbc
->rover
) % sbc
->array_size
;
1628 test
= sbc
->array
[test_idx
];
1629 map
= load_relaxed(&test
->s
.atomic_map
);
1632 test_count
= count_bits(map
);
1633 if (best_count
== all_free
) {
1634 if (test_count
== all_free
&& test_idx
!= best_idx
&& sbc
->array_size
- 1 != best_idx
) {
1635 /*debug("freeing cached slab: %u, %p", cls, test);*/
1636 (sbc
->array
[test_idx
] = sbc
->array
[--sbc
->array_size
])
1637 #ifdef AMALLOC_EAGER_FREE
1638 ->s
.index
= test_idx
1641 amalloc_free_small(test
);
1642 if (!sbc
->array_size
)
1646 if (test_count
> best_count
) {
1647 best_idx
= test_idx
;
1648 best_count
= test_count
;
1652 if (likely(best_count
)) {
1653 m
= sbc
->array
[best_idx
];
1654 (sbc
->array
[best_idx
] = sbc
->array
[--sbc
->array_size
])
1655 #ifdef AMALLOC_EAGER_FREE
1656 ->s
.index
= best_idx
1660 m
->s
.map
= atomic_xchg(&m
->s
.atomic_map
, 0);
1661 #ifdef AMALLOC_EAGER_FREE
1662 m
->s
.index
= (size_t)-1;
1666 m
= create_small_run(pt
, cls
);
1668 set_small_run(pt
, cls
, &full_midblock
);
1672 set_small_run(pt
, cls
, m
);
1673 bit
= find_bit(m
->s
.map
);
1674 m
->s
.map
&= ~((bitmap_t
)1 << bit
);
1675 return cast_ptr(char *, m
) + bit
* m
->s
.size
;
1678 static attr_noinline
void *amalloc_alloc_pt(size_t size
, bool clr
)
1680 if (unlikely(!amalloc_per_thread_alloc()))
1683 return amalloc(size
);
1685 return acalloc(size
);
1688 static attr_always_inline
void *amalloc_small(struct per_thread
*pt
, size_t size
)
1691 unsigned idx
= SIZE_TO_INDEX(size
);
1692 union midblock
*m
= pt
->small_runs
[idx
];
1693 if (likely(m
->s
.map
!= 0)) {
1696 if (load_relaxed(&m
->s
.atomic_map
) != 0) {
1697 m
->s
.map
= atomic_xchg(&m
->s
.atomic_map
, 0);
1700 return amalloc_small_empty(pt
, size
);
1702 bit
= find_bit(m
->s
.map
);
1703 m
->s
.map
&= ~((bitmap_t
)1 << bit
);
1704 return cast_ptr(char *, m
) + bit
* m
->s
.size
;
1707 void * attr_fastcall
amalloc(size_t size
)
1709 struct per_thread
*pt
= amalloc_per_thread();
1711 return amalloc_alloc_pt(size
, false);
1712 if (unlikely(size
> DIRECT_LIMIT
))
1713 return amalloc_mid_huge(pt
, size
, false);
1714 return amalloc_small(pt
, size
);
1717 void * attr_fastcall
acalloc(size_t size
)
1720 struct per_thread
*pt
= amalloc_per_thread();
1722 return amalloc_alloc_pt(size
, true);
1723 if (unlikely(size
> DIRECT_LIMIT
))
1724 return amalloc_mid_huge(pt
, size
, true);
1725 ptr
= amalloc_small(pt
, size
);
1728 return memset(ptr
, 0, size
);
1731 void * attr_fastcall
amemalign(size_t al
, size_t size
)
1734 if (likely(al
<= minimum(DIRECT_LIMIT
, MIDBLOCK_SIZE
))) {
1735 size2
= round_up(size
, al
);
1736 if (unlikely(size2
< size
))
1738 return amalloc(size
);
1740 return amemalign_mid_huge(al
, size
, false);
1743 void * attr_fastcall
acmemalign(size_t al
, size_t size
)
1745 if (likely(al
<= minimum(DIRECT_LIMIT
, MIDBLOCK_SIZE
))) {
1747 size2
= round_up(size
, al
);
1748 if (unlikely(size2
< size
))
1750 return acalloc(size
);
1752 return amemalign_mid_huge(al
, size
, true);
1756 static attr_noinline
void afree_huge(void *ptr
)
1758 struct huge_entry
*e
= huge_tree_delete(ptr
);
1759 amalloc_run_free(ptr
, e
->len
);
1763 static attr_noinline
void afree_mid(struct arena
*a
, unsigned idx
)
1766 arena_free_merge_midblock(a
, idx
, a
->map
[idx
] + 1 - idx
);
1770 void attr_fastcall
afree(void *ptr
)
1773 unsigned attr_unused bit2
;
1776 struct per_thread
*pt
;
1777 struct arena
*a
= addr_to_arena(ptr
);
1778 if (unlikely((void *)a
== ptr
)) {
1782 idx
= midblock_to_idx(a
, ptr
);
1783 m
= idx_to_midblock(a
, a
->map
[idx
]);
1784 if (unlikely((void *)m
>= ptr
)) {
1788 bit
= ((unsigned)(cast_ptr(char *, ptr
) - cast_ptr(char *, m
)) * m
->s
.reciprocal
) / RECIPROCAL_BASE
;
1789 bit2
= (unsigned)(cast_ptr(char *, ptr
) - cast_ptr(char *, m
)) / m
->s
.size
;
1790 ajla_assert(bit
== bit2
, (file_line
, "afree: reciprocal doesn't match: size %u, reciprocal %u, %u != %u", m
->s
.size
, m
->s
.reciprocal
, bit
, bit2
));
1791 bit_or
= (bitmap_t
)1 << bit
;
1792 pt
= amalloc_per_thread();
1793 if (likely(pt
!= NULL
) && likely(pt
->small_runs
[m
->s
.cls
+ 1] == m
)) {
1796 #ifdef AMALLOC_EAGER_FREE
1797 size_t idx
= (size_t)-1;
1798 struct small_block_cache
*sbc
= NULL
;
1799 if (unlikely((load_relaxed(&m
->s
.atomic_map
) | bit_or
) == (bitmap_t
)-1 << m
->s
.reserved_bits
)) {
1801 sbc
= &small_block_cache
[m
->s
.cls
];
1804 atomic_or(&m
->s
.atomic_map
, bit_or
);
1805 #ifdef AMALLOC_EAGER_FREE
1806 if (unlikely(idx
!= (size_t)-1)) {
1807 amalloc_test_free_small(sbc
, idx
, m
);
1813 static attr_noinline
void *arealloc_malloc(void *ptr
, size_t old_size
, size_t new_size
)
1816 if (unlikely(new_size
== old_size
))
1818 n
= amalloc(new_size
);
1821 memcpy(n
, ptr
, minimum(new_size
, old_size
));
1826 static attr_noinline
void *arealloc_huge(void *ptr
, size_t size
)
1828 struct huge_entry
*e
= huge_tree_delete(ptr
);
1829 if (likely(size
>= page_size
)) {
1830 void *n
= amalloc_run_realloc(ptr
, e
->len
, size
);
1834 huge_tree_insert(e
);
1838 huge_tree_insert(e
);
1839 return arealloc_malloc(ptr
, e
->len
, size
);
1842 static attr_noinline
void *arealloc_mid(struct arena
*a
, unsigned idx
, size_t size
)
1844 unsigned existing_blocks
= a
->map
[idx
] + 1 - idx
;
1845 if (size
> DIRECT_LIMIT
&& size
<= ARENA_SIZE
) {
1847 unsigned new_blocks
= round_up(size
, MIDBLOCK_SIZE
) >> MIDBLOCK_BITS
;
1849 f
= arena_try_realloc_midblock(a
, idx
, existing_blocks
, new_blocks
);
1852 return idx_to_midblock(a
, idx
);
1854 return arealloc_malloc(idx_to_midblock(a
, idx
), existing_blocks
<< MIDBLOCK_BITS
, size
);
1857 void * attr_fastcall
arealloc(void *ptr
, size_t size
)
1861 struct arena
*a
= addr_to_arena(ptr
);
1862 if (unlikely((void *)a
== ptr
)) {
1863 return arealloc_huge(ptr
, size
);
1865 idx
= midblock_to_idx(a
, ptr
);
1866 m
= idx_to_midblock(a
, a
->map
[idx
]);
1867 if (unlikely((void *)m
>= ptr
)) {
1868 return arealloc_mid(a
, idx
, size
);
1870 if (INDEX_TO_CLASS(SIZE_TO_INDEX(size
)) != m
->s
.cls
) {
1871 return arealloc_malloc(ptr
, m
->s
.size
, size
);
1876 bool attr_fastcall
aptr_is_huge(void *ptr
)
1878 struct arena
*a
= addr_to_arena(ptr
);
1879 return (void *)a
== ptr
;
1883 void amalloc_init(void)
1889 for (size
= MIN_ALLOC
; size
<= DIRECT_LIMIT
; size
+= MIN_ALLOC
) {
1890 ushort_efficient_t reciprocal
= div_16(RECIPROCAL_BASE
+ size
- 1, size
);
1891 for (i
= 1; i
< BITMAP_BITS
; i
++) {
1892 unsigned offs
= i
* size
;
1893 unsigned ii
= (offs
* reciprocal
) / RECIPROCAL_BASE
;
1894 if (unlikely(ii
!= i
))
1895 internal(file_line
, "amalloc_init: reciprocal doesn't match: size %u, i %u, ii %u", size
, i
, ii
);
1900 if (unlikely(!amalloc_enabled
))
1903 #ifdef POINTER_COMPRESSION_POSSIBLE
1904 if (pointer_compression_enabled
)
1907 amalloc_threads_initialized
= false;
1908 amalloc_per_thread_init(&thread1
);
1909 tree_init(&huge_tree
);
1910 tree_init(&arena_tree
);
1911 for (i
= 0; i
< N_CLASSES
; i
++) {
1912 small_block_cache
[i
].array
= amalloc(DIRECT_LIMIT
* 2);
1913 if (unlikely(!small_block_cache
[i
].array
)) {
1914 fatal("amalloc failed");
1916 small_block_cache
[i
].array_size
= 0;
1917 small_block_cache
[i
].allocated_size
= DIRECT_LIMIT
* 2 / sizeof(union midblock
*);
1918 small_block_cache
[i
].rover
= 0;
1922 void amalloc_init_multithreaded(void)
1925 if (unlikely(!amalloc_enabled
))
1927 if (unlikely(amalloc_threads_initialized
))
1928 internal(file_line
, "amalloc_init_multithreaded: amalloc_threads_initialized already set");
1929 mutex_init(&huge_tree_mutex
);
1930 mutex_init(&arena_tree_mutex
);
1931 tls_init(struct per_thread
*, per_thread
);
1932 tls_set(struct per_thread
*, per_thread
, &thread1
);
1933 for (i
= 0; i
< N_CLASSES
; i
++) {
1934 mutex_init(&small_block_cache
[i
].mutex
);
1936 #ifdef POINTER_COMPRESSION_POSSIBLE
1937 if (pointer_compression_enabled
)
1938 mutex_init(&rmap_mutex
);
1940 amalloc_threads_initialized
= true;
1943 void *x
= amalloc(0x2000000);
1945 x
= arealloc(x
, 0x4000000);
1947 x
= arealloc(x
, 0x2000000);
1949 x
= arealloc(x
, 0x4000000);
1956 void amalloc_done_multithreaded(void)
1959 if (unlikely(!amalloc_enabled
))
1961 if (unlikely(!amalloc_threads_initialized
))
1962 internal(file_line
, "amalloc_done_multithreaded: amalloc_threads_initialized not set");
1963 amalloc_threads_initialized
= false;
1964 #ifdef POINTER_COMPRESSION_POSSIBLE
1965 if (pointer_compression_enabled
)
1966 mutex_done(&rmap_mutex
);
1968 for (i
= 0; i
< N_CLASSES
; i
++) {
1969 mutex_done(&small_block_cache
[i
].mutex
);
1971 tls_done(struct per_thread
*, per_thread
);
1972 mutex_done(&arena_tree_mutex
);
1973 mutex_done(&huge_tree_mutex
);
1976 void amalloc_done(void)
1979 if (unlikely(!amalloc_enabled
))
1981 if (unlikely(amalloc_threads_initialized
))
1982 internal(file_line
, "amalloc_done: amalloc_threads_initialized set");
1983 detach_pt_data(&thread1
);
1984 for (i
= 0; i
< N_CLASSES
; i
++) {
1985 struct small_block_cache
*sbc
= &small_block_cache
[i
];
1986 bitmap_t all_free
= (bitmap_t
)-1 << reserved_bits(i
);
1988 for (j
= 0; j
< sbc
->array_size
; j
++) {
1989 union midblock
*m
= sbc
->array
[j
];
1990 if (unlikely(load_relaxed(&m
->s
.atomic_map
) != all_free
))
1991 internal(file_line
, "amalloc_done: small block memory leak, class %u", i
);
1992 /*debug("end: freeing cached slab: %u, %p", i, m);*/
1993 amalloc_free_small(m
);
1996 sbc
->array
= BAD_POINTER_1
;
1998 if (unlikely(!tree_is_empty(&huge_tree
)))
1999 internal(file_line
, "amalloc_done: huge tree is not empty");
2000 if (unlikely(!tree_is_empty(&arena_tree
)))
2001 internal(file_line
, "amalloc_done: arena tree is not empty");
2005 struct tree_entry
*e
, *f
;
2006 debug("%x", (unsigned)(ARENA_MIDBLOCKS
- ARENA_PREFIX
));
2007 for (e
= tree_first(&arena_tree
); e
; e
= tree_next(e
)) {
2008 struct arena
*a
= get_struct(e
, struct arena
, arena_entry
);
2009 debug("leaked arena %p, %x", a
, a
->max_midblock_run
);
2010 for (f
= tree_first(&a
->midblock_free
); f
; f
= tree_next(f
)) {
2011 union midblock
*m
= get_struct(f
, union midblock
, free_entry
);
2012 unsigned idx
= midblock_to_idx(a
, m
);
2013 unsigned len
= (a
->map
[idx
] & ~MAP_FREE
) + 1 - idx
;
2014 debug("free midblock: %p: %x, %x", m
, idx
, len
);
2015 if (idx
+ len
< ARENA_MIDBLOCKS
) {
2016 union midblock
*m2
= idx_to_midblock(a
, idx
+ len
);
2017 debug("small run(%p): %lx, atomic_map: %lx, size: %u, class %u", m2
, m2
->s
.map
, m2
->s
.atomic_map
, m2
->s
.size
, m2
->s
.cls
);
2023 #ifdef POINTER_COMPRESSION_POSSIBLE
2024 if (pointer_compression_enabled
)