ssa: move optimizations from P_BinaryOp to P_BinaryConstOp
[ajla.git] / amalloc.c
blobe2e5c0dcdda7cba909d407f6ad1bcb79a496c0b6
1 /*
2 * Copyright (C) 2024 Mikulas Patocka
4 * This file is part of Ajla.
6 * Ajla is free software: you can redistribute it and/or modify it under the
7 * terms of the GNU General Public License as published by the Free Software
8 * Foundation, either version 3 of the License, or (at your option) any later
9 * version.
11 * Ajla is distributed in the hope that it will be useful, but WITHOUT ANY
12 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along with
16 * Ajla. If not, see <https://www.gnu.org/licenses/>.
19 #include "ajla.h"
21 #include "tree.h"
22 #include "thread.h"
23 #include "addrlock.h"
24 #include "mem_al.h"
25 #include "os.h"
27 #include "amalloc.h"
29 #ifdef USE_AMALLOC
31 /*#define AMALLOC_EAGER_FREE*/
32 /*#define AMALLOC_TRIM_MIDBLOCKS*/
33 /*#define AMALLOC_USE_RANDOM_ROVER*/
35 #if defined(OS_OS2)
36 #ifndef OBJ_ANY
37 #define OBJ_ANY 0x0400
38 #endif
39 static ULONG dosallocmem_attrib = PAG_READ | PAG_WRITE |
40 #ifdef CODEGEN_USE_HEAP
41 PAG_EXECUTE |
42 #endif
43 OBJ_ANY;
44 #else
45 #ifndef CODEGEN_USE_HEAP
46 #define PROT_HEAP (PROT_READ | PROT_WRITE)
47 #else
48 #define PROT_HEAP (PROT_READ | PROT_WRITE | PROT_EXEC)
49 #endif
50 #endif
52 #define ARENA_BITS 21
53 #define ARENA_SIZE ((size_t)1 << ARENA_BITS)
55 #define MIDBLOCK_BITS 9
56 #define MIDBLOCK_SIZE ((size_t)1 << MIDBLOCK_BITS)
58 #define SMALL_BLOCK_TRIES 16
60 #define MIN_ALLOC 16
61 #define DIRECT_LIMIT 512
62 #define N_CLASSES (DIRECT_LIMIT / MIN_ALLOC)
63 #define SIZE_TO_INDEX(s) (((unsigned)(s) + MIN_ALLOC - 1) / MIN_ALLOC)
64 #define INDEX_TO_CLASS(i) (!(i) ? 0 : (i) - 1)
65 #define CLASS_TO_SIZE(c) (((c) + 1) * MIN_ALLOC)
67 #define ARENA_MIDBLOCKS (ARENA_SIZE / MIDBLOCK_SIZE)
69 #define BITMAP_BITS EFFICIENT_WORD_SIZE
70 #define bitmap_t cat4(uint,BITMAP_BITS,_,t)
72 struct arena {
73 struct tree midblock_free;
74 struct tree_entry arena_entry;
75 ushort_efficient_t max_midblock_run;
76 uchar_efficient_t attached;
77 ushort_efficient_t map[ARENA_MIDBLOCKS];
80 #define MAP_FREE 0x8000
82 #define ARENA_PREFIX (round_up(sizeof(struct arena), MIDBLOCK_SIZE) >> MIDBLOCK_BITS)
83 #define MIDBLOCK_LIMIT ((ARENA_SIZE - (ARENA_PREFIX << MIDBLOCK_BITS)) / 2)
85 union midblock {
86 struct tree_entry free_entry;
87 struct {
88 bitmap_t map;
89 atomic_type bitmap_t atomic_map;
90 #ifdef AMALLOC_EAGER_FREE
91 size_t index;
92 #endif
93 uchar_efficient_t reserved_bits;
94 uchar_efficient_t cls;
95 ushort_efficient_t size;
96 ushort_efficient_t reciprocal;
97 #define RECIPROCAL_BASE 0x8000
98 } s;
101 struct huge_entry {
102 void *ptr;
103 size_t len;
104 struct tree_entry entry;
107 struct per_thread {
108 struct arena *arena;
109 union midblock *small_runs[N_CLASSES + 1];
110 tls_destructor_t destructor;
113 static union midblock full_midblock;
115 static struct per_thread thread1;
117 static tls_decl(struct per_thread *, per_thread);
119 static uchar_efficient_t amalloc_threads_initialized;
121 static struct tree arena_tree;
122 static mutex_t arena_tree_mutex;
124 static struct tree huge_tree;
125 static mutex_t huge_tree_mutex;
127 static size_t page_size;
129 static struct small_block_cache {
130 union midblock **array;
131 size_t array_size;
132 size_t allocated_size;
133 #ifndef AMALLOC_USE_RANDOM_ROVER
134 size_t rover;
135 #else
136 unsigned rover;
137 #endif
138 mutex_t mutex;
139 } small_block_cache[N_CLASSES];
142 #if BITMAP_BITS == 16
143 #define BITMAP_ASM_X86_SUFFIX "w"
144 #elif BITMAP_BITS == 32
145 #define BITMAP_ASM_X86_SUFFIX "l"
146 #elif BITMAP_BITS == 64
147 #define BITMAP_ASM_X86_SUFFIX "q"
148 #endif
150 static attr_always_inline size_t find_bit(bitmap_t bitmap)
152 #if defined(BITMAP_ASM_X86_SUFFIX) && defined(INLINE_ASM_GCC_X86)
153 __asm__ ("rep; bsf"BITMAP_ASM_X86_SUFFIX" %1, %0" : "=r"(bitmap) : "r"(bitmap) : "cc");
154 return bitmap;
155 #endif
156 #if BITMAP_BITS == 32 && defined(INLINE_ASM_GCC_ARM) && defined(HAVE_ARM_ASSEMBLER_RBIT) && defined(HAVE_ARM_ASSEMBLER_CLZ)
157 if (likely(cpu_test_feature(CPU_FEATURE_armv6t2))) {
158 __asm__ (ARM_ASM_PREFIX "rbit %0, %1; clz %0, %0" : "=r"(bitmap) : "r"(bitmap) : "cc");
159 return bitmap;
161 #endif
162 #if defined(ARCH_ALPHA) && defined(HAVE_GCC_ASSEMBLER)
163 if (likely(cpu_test_feature(CPU_FEATURE_cix))) {
164 __asm__ (".arch ev68; cttz %1, %0" : "=r"(bitmap) : "r"(bitmap));
165 return bitmap;
167 #endif
168 #if defined(ARCH_RISCV64) && defined(HAVE_GCC_ASSEMBLER) && !defined(__riscv_zbb)
169 if (likely(cpu_test_feature(CPU_FEATURE_zbb))) {
170 if (BITMAP_BITS == 32) {
171 __asm__ ("mv t6, %1; .word 0x601f9f9b; mv %0, t6" : "=r"(bitmap) : "r"(bitmap) : "t6");
172 return bitmap;
174 if (BITMAP_BITS == 64) {
175 __asm__ ("mv t6, %1; .word 0x601f9f93; mv %0, t6" : "=r"(bitmap) : "r"(bitmap) : "t6");
176 return bitmap;
179 #endif
180 #if defined(ARCH_S390_64) && defined(HAVE_GCC_ASSEMBLER) && !(__ARCH__ >= 7)
181 if (likely(cpu_test_feature(CPU_FEATURE_extended_imm))) {
182 if (BITMAP_BITS == 32) {
183 __asm__("llgfr %%r1, %1; .long 0xb9830001; lgr %0, %%r0" : "=r"(bitmap) : "r"(bitmap & -bitmap) : "r0", "r1", "cc");
184 return 63 - bitmap;
186 if (BITMAP_BITS == 64) {
187 __asm__("lgr %%r1, %1; .long 0xb9830001; lgr %0, %%r0" : "=r"(bitmap) : "r"(bitmap & -bitmap) : "r0", "r1", "cc");
188 return 63 - bitmap;
191 #endif
192 #if defined(ARCH_SPARC64) && defined(HAVE_GCC_ASSEMBLER)
193 __asm__ ("popc %1, %0" : "=r"(bitmap) : "r"((bitmap - 1) & ~bitmap));
194 return bitmap;
195 #endif
196 #if BITMAP_BITS == 32 && SIZEOF_UNSIGNED == 4 && defined(HAVE_BUILTIN_CTZ)
197 return __builtin_ctz(bitmap);
198 #elif BITMAP_BITS == 64 && SIZEOF_UNSIGNED_LONG_LONG == 8 && defined(HAVE_BUILTIN_CTZ)
199 return __builtin_ctzll(bitmap);
200 #elif BITMAP_BITS == 32 && SIZEOF_UNSIGNED == 4 && defined(HAVE_FFS)
201 return ffs(bitmap) - 1;
202 #elif BITMAP_BITS == 64 && SIZEOF_UNSIGNED_LONG_LONG == 8 && defined(HAVE_FFSLL)
203 return ffsll(bitmap) - 1;
204 #else
206 unsigned ret = 0;
207 while (1) {
208 if (bitmap & 1)
209 break;
210 ret++;
211 bitmap >>= 1;
213 return ret;
215 #endif
218 static attr_always_inline unsigned count_bits(bitmap_t bitmap)
220 #if defined(BITMAP_ASM_X86_SUFFIX) && defined(INLINE_ASM_GCC_X86) && defined(HAVE_X86_ASSEMBLER_POPCNT)
221 if (likely(cpu_test_feature(CPU_FEATURE_popcnt))) {
222 __asm__ ("popcnt"BITMAP_ASM_X86_SUFFIX" %1, %0" : "=r"(bitmap) : "r"(bitmap) : "cc");
223 return bitmap;
225 #endif
226 #if BITMAP_BITS == 32 && defined(INLINE_ASM_GCC_ARM) && defined(HAVE_ARM_ASSEMBLER_VFP)
227 if (likely(cpu_test_feature(CPU_FEATURE_neon))) {
228 __asm__ (ARM_ASM_PREFIX " \n\
229 vld1.32 d0[0], [ %0 ] \n\
230 vcnt.8 d0, d0 \n\
231 vpaddl.u8 d0, d0 \n\
232 vpaddl.u16 d0, d0 \n\
233 vst1.32 d0[0], [ %0 ] \n\
234 " : : "r"(&bitmap) : "d0", "memory");
235 return bitmap;
237 #endif
238 #if BITMAP_BITS == 64 && defined(INLINE_ASM_GCC_ARM64)
239 if (likely(cpu_test_feature(CPU_FEATURE_neon))) {
240 unsigned result;
241 __asm__ (ARM_ASM_PREFIX " \n\
242 fmov d0, %1 \n\
243 cnt v0.8b, v0.8b \n\
244 uaddlv h0, v0.8b \n\
245 fmov %w0, s0 \n\
246 " : "=r"(result) : "r"(bitmap) : "d0");
247 return result;
249 #endif
250 #if defined(ARCH_SPARC64) && defined(HAVE_GCC_ASSEMBLER)
251 __asm__ ("popc %1, %0" : "=r"(bitmap) : "r"(bitmap));
252 return bitmap;
253 #endif
254 #if defined(ARCH_ALPHA) && defined(HAVE_GCC_ASSEMBLER)
255 if (likely(cpu_test_feature(CPU_FEATURE_cix))) {
256 __asm__ (".arch ev68; ctpop %1, %0" : "=r"(bitmap) : "r"(bitmap));
257 return bitmap;
259 #endif
260 #if BITMAP_BITS == 32 && SIZEOF_UNSIGNED == 4 && defined(HAVE_BUILTIN_POPCOUNT)
261 return __builtin_popcount(bitmap);
262 #elif BITMAP_BITS == 64 && SIZEOF_UNSIGNED_LONG_LONG == 8 && defined(HAVE_BUILTIN_POPCOUNT)
263 return __builtin_popcountll(bitmap);
264 #else
266 unsigned ret = 0;
267 while (bitmap)
268 bitmap &= bitmap - 1, ret++;
269 return ret;
271 #endif
274 static attr_always_inline uint16_t div_16(uint16_t x, uint16_t y)
276 #if defined(INLINE_ASM_GCC_ARM) && defined(HAVE_ARM_ASSEMBLER_SDIV_UDIV)
277 if (likely(cpu_test_feature(CPU_FEATURE_idiv))) {
278 uint16_t result;
279 __asm__ (ARM_ASM_PREFIX "udiv %0, %1, %2" : "=r"(result) : "r"((unsigned)x), "r"((unsigned)y));
280 return result;
282 #endif
283 return x / y;
286 static attr_always_inline void amalloc_atomic_or(atomic_type bitmap_t *bitmap, bitmap_t value)
288 #if defined(HAVE_C11_ATOMICS)
289 atomic_fetch_or_explicit(bitmap, value, memory_order_release);
290 #elif defined(BITMAP_ASM_X86_SUFFIX) && defined(INLINE_ASM_GCC_X86)
291 __asm__ volatile ("lock; or"BITMAP_ASM_X86_SUFFIX" %2, %0":"=m"(*bitmap):"m"(*bitmap),"ir"(value):"cc","memory");
292 #elif defined(HAVE_SYNC_AND_FETCH)
293 __sync_or_and_fetch(bitmap, value);
294 #else
295 if (likely(amalloc_threads_initialized))
296 address_lock((void *)bitmap, DEPTH_ARENA);
297 *bitmap |= value;
298 if (likely(amalloc_threads_initialized))
299 address_unlock((void *)bitmap, DEPTH_ARENA);
300 #endif
303 static attr_always_inline bitmap_t amalloc_atomic_xchg(atomic_type bitmap_t *bitmap, bitmap_t value)
305 #if defined(HAVE_C11_ATOMICS)
306 return atomic_exchange_explicit(bitmap, value, memory_order_acquire);
307 #elif defined(BITMAP_ASM_X86_SUFFIX) && defined(INLINE_ASM_GCC_X86)
308 __asm__ volatile ("xchg"BITMAP_ASM_X86_SUFFIX" %1, %0":"=m"(*bitmap),"=r"(value):"m"(*bitmap),"1"(value):"memory");
309 return value;
310 #elif defined(HAVE_SYNC_AND_FETCH)
311 bitmap_t v;
312 do {
313 v = *bitmap;
314 } while (unlikely(!__sync_bool_compare_and_swap(bitmap, v, value)));
315 return v;
316 #else
317 bitmap_t v;
318 if (likely(amalloc_threads_initialized))
319 address_lock((void *)bitmap, DEPTH_ARENA);
320 v = *bitmap;
321 *bitmap = value;
322 if (likely(amalloc_threads_initialized))
323 address_unlock((void *)bitmap, DEPTH_ARENA);
324 return v;
325 #endif
329 static inline struct arena *addr_to_arena(void *addr)
331 return num_to_ptr(ptr_to_num(addr) & ~((1 << ARENA_BITS) - 1));
334 static inline union midblock *idx_to_midblock(struct arena *a, unsigned midblock)
336 return cast_ptr(union midblock *, cast_ptr(char *, a) + (midblock * MIDBLOCK_SIZE));
339 static inline unsigned midblock_to_idx(struct arena *a, void *m)
341 return (ptr_to_num(m) - ptr_to_num(a)) >> MIDBLOCK_BITS;
345 static void huge_tree_lock(void);
346 static void huge_tree_unlock(void);
348 #ifdef OS_OS2
350 static struct list pad_list;
352 struct pad_descriptor {
353 struct list pad_entry;
354 void *base;
355 size_t size;
358 static void amalloc_do_unmap(void *ptr, size_t attr_unused size)
360 APIRET r;
361 again:
362 r = DosFreeMem(ptr);
363 if (unlikely(r)) {
364 if (r == ERROR_INTERRUPT)
365 goto again;
366 internal(file_line, "amalloc_do_unmap: DosFreeMem(%p) returned error: %lu", ptr, r);
370 static void *do_dosallocmem(size_t size, bool commit)
372 APIRET r;
373 void *ptr;
374 again:
375 r = DosAllocMem(&ptr, size, dosallocmem_attrib | (commit ? PAG_COMMIT : 0));
376 if (unlikely(r)) {
377 if (r == ERROR_INTERRUPT)
378 goto again;
379 return NULL;
381 return ptr;
384 static bool do_commitmem(void *ptr, size_t size)
386 APIRET r;
387 again:
388 r = DosSetMem(ptr, size, PAG_READ | PAG_WRITE |
389 #ifdef CODEGEN_USE_HEAP
390 PAG_EXECUTE |
391 #endif
392 PAG_COMMIT);
393 if (unlikely(r)) {
394 if (r == ERROR_INTERRUPT)
395 goto again;
396 return false;
398 return true;
401 static void *os2_alloc_aligned(size_t size)
403 void *ptr, *pad;
404 size_t offset, aligned_size;
406 huge_tree_lock();
408 aligned_size = round_up(size, ARENA_SIZE);
409 if (unlikely(!aligned_size))
410 goto ret_null;
411 try_again:
412 ptr = do_dosallocmem(aligned_size, likely(size == aligned_size));
413 if (unlikely(!ptr))
414 goto ret_null;
415 offset = ptr_to_num(ptr) & (ARENA_SIZE - 1);
416 if (unlikely(offset != 0)) {
417 struct pad_descriptor *pd;
418 amalloc_do_unmap(ptr, size);
419 pad = do_dosallocmem(ARENA_SIZE - offset, false);
420 if (unlikely(!pad))
421 goto ret_null;
422 pd = malloc(sizeof(struct pad_descriptor));
423 if (unlikely(!pd)) {
424 amalloc_do_unmap(pad, ARENA_SIZE - offset);
425 goto ret_null;
427 pd->base = pad;
428 pd->size = ARENA_SIZE - offset;
429 list_add(&pad_list, &pd->pad_entry);
430 goto try_again;
432 if (unlikely(size != aligned_size)) {
433 if (unlikely(!do_commitmem(ptr, size))) {
434 amalloc_do_unmap(ptr, aligned_size);
435 goto ret_null;
439 huge_tree_unlock();
441 return ptr;
443 ret_null:
444 huge_tree_unlock();
446 return NULL;
449 bool os2_test_for_32bit_tcpip(const char *ptr);
451 static void amalloc_os_init(void)
453 if (dosallocmem_attrib & OBJ_ANY) {
454 void *ptr;
455 ptr = do_dosallocmem(0x1000, true);
456 if (ptr) {
457 if (ptr_to_num(ptr) < 0x20000000UL) {
458 dosallocmem_attrib &= ~OBJ_ANY;
459 } else {
460 if (!os2_test_for_32bit_tcpip(ptr))
461 dosallocmem_attrib &= ~OBJ_ANY;
463 amalloc_do_unmap(ptr, 0x1000);
464 } else {
465 dosallocmem_attrib &= ~OBJ_ANY;
468 list_init(&pad_list);
469 page_size = 4096;
472 static void amalloc_os_done(void)
474 while (!list_is_empty(&pad_list)) {
475 struct pad_descriptor *pd = get_struct(pad_list.prev, struct pad_descriptor, pad_entry);
476 list_del(&pd->pad_entry);
477 amalloc_do_unmap(pd->base, pd->size);
478 free(pd);
482 #else
484 static inline void amalloc_do_unmap(void *ptr, size_t size)
486 os_munmap(ptr, size, false);
489 static void amalloc_os_init(void)
491 page_size = os_getpagesize();
494 static void amalloc_os_done(void)
498 #endif
500 #ifdef POINTER_COMPRESSION_POSSIBLE
502 static bool do_enable_mapping(void *ptr, size_t size, bool attr_unused clr)
504 #if defined(OS_CYGWIN) || defined(OS_WIN32)
505 ajla_error_t sink;
506 if (unlikely(!os_mprotect(ptr, size, PROT_HEAP, &sink)))
507 return false;
508 if (unlikely(clr))
509 memset(ptr, 0, size);
510 return true;
511 #else
512 ajla_error_t sink;
513 void *r = os_mmap(ptr, size, PROT_HEAP, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, handle_none, 0, &sink);
514 if (likely(r == MAP_FAILED))
515 return false;
516 if (unlikely(r != ptr))
517 internal(file_line, "do_enable_mapping: os_mmap(MAP_FIXED) returned different pointer: %p != %p", r, ptr);
518 return true;
519 #endif
522 static void do_disable_mapping(void *ptr, size_t size)
524 #if defined(OS_CYGWIN) || defined(OS_WIN32)
525 ajla_error_t err;
526 if (unlikely(!os_mprotect(ptr, size, PROT_NONE, &err))) {
527 warning("failed to clear existing mapping: mprotect(%p, %"PRIxMAX") returned error: %s", ptr, (uintmax_t)size, error_decode(err));
528 return;
530 #else
531 ajla_error_t err;
532 void *r = os_mmap(ptr, size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE | MAP_FIXED, handle_none, 0, &err);
533 if (unlikely(r == MAP_FAILED)) {
534 warning("failed to clear existing mapping: os_mmap(%p, %"PRIxMAX") returned error: %s", ptr, (uintmax_t)size, error_decode(err));
535 return;
537 if (unlikely(r != ptr))
538 internal(file_line, "do_disable_mapping: os_mmap(MAP_FIXED) returned different pointer: %p != %p", r, ptr);
539 #endif
542 static struct tree rmap_tree;
544 static struct reserved_map {
545 struct tree_entry free_entry;
546 unsigned f;
547 } *rmap;
549 static mutex_t rmap_mutex;
551 #define RESERVED_MAP_ENTRIES ((uintptr_t)1 << (32 + POINTER_COMPRESSION_POSSIBLE - ARENA_BITS))
553 static void rmap_lock(void)
555 if (unlikely(!amalloc_threads_initialized))
556 return;
557 mutex_lock(&rmap_mutex);
560 static void rmap_unlock(void)
562 if (unlikely(!amalloc_threads_initialized))
563 return;
564 mutex_unlock(&rmap_mutex);
567 static int rmap_compare(const struct tree_entry *entry, uintptr_t idx2)
569 struct reserved_map *rmap1 = get_struct(entry, struct reserved_map, free_entry);
570 unsigned idx1 = rmap1 - rmap;
571 unsigned len1, len2;
572 len1 = rmap[idx1].f + 1 - idx1;
573 len2 = rmap[idx2].f + 1 - idx2;
574 if (len1 != len2)
575 return len1 < len2 ? -1 : 1;
576 if (idx1 < idx2)
577 return -1;
578 return 1;
581 static void reserve_free_merge_run(unsigned idx, unsigned len)
583 struct tree_insert_position ins;
584 if (rmap[idx - 1].f) {
585 unsigned more = idx - rmap[idx - 1].f;
586 idx -= more;
587 len += more;
588 tree_delete(&rmap[idx].free_entry);
590 if (idx + len < RESERVED_MAP_ENTRIES && rmap[idx + len].f) {
591 unsigned more = rmap[idx + len].f + 1 - (idx + len);
592 tree_delete(&rmap[idx + len].free_entry);
593 len += more;
595 rmap[idx].f = idx + len - 1;
596 rmap[idx + len - 1].f = idx;
597 tree_find_for_insert(&rmap_tree, rmap_compare, idx, &ins);
598 tree_insert_after_find(&rmap[idx].free_entry, &ins);
601 static void reserve_sub_alloc(unsigned idx1, unsigned len1, unsigned idx, unsigned len)
603 struct reserved_map *rmap1 = &rmap[idx1];
604 rmap[idx].f = 0;
605 rmap[idx + len - 1].f = 0;
606 tree_delete(&rmap1->free_entry);
607 if (idx > idx1) {
608 reserve_free_merge_run(idx1, idx - idx1);
610 if (idx + len < idx1 + len1) {
611 reserve_free_merge_run(idx + len, (idx1 + len1) - (idx + len));
615 static int rmap_find(const struct tree_entry *entry, uintptr_t len)
617 struct reserved_map *rmap1 = get_struct(entry, struct reserved_map, free_entry);
618 unsigned idx1 = rmap1 - rmap;
619 unsigned len1 = rmap[idx1].f + 1 - idx1;
620 if (len1 < len)
621 return -1;
622 if (len1 > len)
623 return 1;
624 return 0;
627 static unsigned reserve_alloc_run(unsigned al, unsigned len)
629 struct tree_entry *ee;
630 ee = tree_find_best(&rmap_tree, rmap_find, len + al - 1);
631 if (likely(ee != NULL)) {
632 struct reserved_map *rmap1 = get_struct(ee, struct reserved_map, free_entry);
633 unsigned idx1 = rmap1 - rmap;
634 unsigned len1 = rmap[idx1].f + 1 - idx1;
635 unsigned idx = round_up(idx1, al);
636 reserve_sub_alloc(idx1, len1, idx, len);
637 return idx;
639 return 0;
642 static bool reserve_realloc_run(unsigned start, unsigned old_len, unsigned new_len)
644 unsigned xlen;
645 if (unlikely(old_len >= new_len)) {
646 if (unlikely(old_len == new_len))
647 return true;
648 rmap[start + new_len - 1].f = 0;
649 reserve_free_merge_run(start + new_len, old_len - new_len);
650 return true;
652 if (unlikely(start + old_len == RESERVED_MAP_ENTRIES))
653 return false;
654 if (!rmap[start + old_len].f)
655 return false;
656 xlen = rmap[start + old_len].f + 1 - (start + old_len);
657 if (xlen < new_len - old_len)
658 return false;
659 tree_delete(&rmap[start + old_len].free_entry);
660 rmap[start + new_len - 1].f = 0;
661 if (xlen > new_len - old_len)
662 reserve_free_merge_run(start + new_len, xlen - (new_len - old_len));
663 return true;
666 static unsigned reserve_last_run(unsigned len)
668 struct tree_entry *e;
669 struct reserved_map *rmap1;
670 unsigned idx1, len1;
671 unsigned best_idx = 0;
672 for (e = tree_first(&rmap_tree); e; e = tree_next(e)) {
673 rmap1 = get_struct(e, struct reserved_map, free_entry);
674 idx1 = rmap1 - rmap;
675 len1 = rmap[idx1].f + 1 - idx1;
676 if (likely(len1 >= len)) {
677 if (idx1 > best_idx)
678 best_idx = idx1;
681 if (unlikely(!best_idx))
682 return 0;
683 idx1 = best_idx;
684 rmap1 = &rmap[idx1];
685 len1 = rmap[idx1].f + 1 - idx1;
686 reserve_sub_alloc(idx1, len1, idx1 + len1 - len, len);
687 return idx1 + len1 - len;
690 bool amalloc_ptrcomp_try_reserve_range(void *ptr, size_t length)
692 unsigned idx, len;
693 struct tree_entry *e;
695 if (unlikely(!length))
696 return true;
698 if (unlikely(ptr_to_num(ptr) & (ARENA_SIZE - 1)))
699 return false;
700 idx = ptr_to_num(ptr) >> ARENA_BITS;
701 len = (length + ARENA_SIZE - 1) >> ARENA_BITS;
703 rmap_lock();
705 for (e = tree_first(&rmap_tree); e; e = tree_next(e)) {
706 struct reserved_map *rmap1 = get_struct(e, struct reserved_map, free_entry);
707 unsigned idx1 = rmap1 - rmap;
708 unsigned len1 = rmap[idx1].f + 1 - idx1;
709 if (idx >= idx1 && idx + len <= idx1 + len1) {
710 reserve_sub_alloc(idx1, len1, idx, len);
711 rmap_unlock();
712 return true;
716 rmap_unlock();
718 return false;
721 static bool try_to_reserve_memory(void *ptr, size_t len)
723 ajla_error_t sink;
724 void *res;
725 int extra_flags =
726 #ifdef MAP_EXCL
727 MAP_FIXED | MAP_EXCL |
728 #endif
731 if (ptr_to_num(ptr) + len > (uintptr_t)1 << (32 + POINTER_COMPRESSION_POSSIBLE))
732 return false;
734 res = os_mmap(ptr, len, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE | extra_flags, handle_none, 0, &sink);
735 if (unlikely(res == MAP_FAILED))
736 return false;
738 if (res != ptr) {
739 os_munmap(res, len, false);
740 return false;
743 return true;
746 static void reserve_memory(void)
748 uintptr_t ptr, step;
749 tree_init(&rmap_tree);
750 rmap = os_mmap(NULL, RESERVED_MAP_ENTRIES * sizeof(struct reserved_map), PROT_HEAP, MAP_PRIVATE | MAP_ANONYMOUS, handle_none, 0, NULL);
751 #if defined(OS_CYGWIN)
752 step = ARENA_SIZE * 256;
753 ptr = round_up((uintptr_t)1 << 34, step);
754 #elif defined(__sun__)
755 step = ARENA_SIZE;
756 ptr = round_up((uintptr_t)1 << 31, step);
757 #else
758 step = ARENA_SIZE;
759 ptr = ARENA_SIZE;
760 #endif
761 while (ptr < (uintptr_t)1 << (32 + POINTER_COMPRESSION_POSSIBLE)) {
762 size_t len, bit;
764 if (!try_to_reserve_memory(num_to_ptr(ptr), step)) {
765 ptr += step;
766 continue;
768 os_munmap(num_to_ptr(ptr), step, false);
770 len = ((uintptr_t)1 << (32 + POINTER_COMPRESSION_POSSIBLE)) - ptr;
771 if (try_to_reserve_memory(num_to_ptr(ptr), len)) {
772 reserve_free_merge_run(ptr >> ARENA_BITS, len >> ARENA_BITS);
773 break;
776 len = 0;
777 bit = (uintptr_t)1 << (32 + POINTER_COMPRESSION_POSSIBLE - 1);
779 for (; bit >= step; bit >>= 1) {
780 if (try_to_reserve_memory(num_to_ptr(ptr), len | bit)) {
781 os_munmap(num_to_ptr(ptr), len | bit, false);
782 len |= bit;
786 if (len) {
787 if (likely(try_to_reserve_memory(num_to_ptr(ptr), len))) {
788 reserve_free_merge_run(ptr >> ARENA_BITS, len >> ARENA_BITS);
789 } else {
790 ptr += step;
791 continue;
795 ptr += len + step;
797 if (unlikely(tree_is_empty(&rmap_tree)))
798 fatal("unable to reserve any memory for pointer compression");
801 static void unreserve_memory(void)
803 while (!tree_is_empty(&rmap_tree)) {
804 struct reserved_map *rmap1 = get_struct(tree_any(&rmap_tree), struct reserved_map, free_entry);
805 unsigned idx1 = rmap1 - rmap;
806 unsigned len1 = rmap[idx1].f + 1 - idx1;
807 amalloc_do_unmap(num_to_ptr((uintptr_t)idx1 << ARENA_BITS), (size_t)len1 << ARENA_BITS);
808 tree_delete(&rmap[idx1].free_entry);
810 amalloc_do_unmap(rmap, RESERVED_MAP_ENTRIES * sizeof(struct reserved_map));
813 #endif
816 void amalloc_run_free(void *ptr, size_t length)
818 if (unlikely(!length))
819 return;
820 #ifdef POINTER_COMPRESSION_POSSIBLE
821 if (pointer_compression_enabled) {
822 unsigned idx = ptr_to_num(ptr) >> ARENA_BITS;
823 size_t l = (length + ARENA_SIZE - 1) >> ARENA_BITS;
824 do_disable_mapping(ptr, l << ARENA_BITS);
825 rmap_lock();
826 reserve_free_merge_run(idx, l);
827 rmap_unlock();
828 return;
830 #endif
831 amalloc_do_unmap(ptr, length);
834 void *amalloc_run_alloc(size_t al, size_t length, bool attr_unused clr, bool attr_unused saved)
836 size_t attr_unused extra_length;
837 void *ptr;
838 void attr_unused *base_address;
839 uintptr_t attr_unused nptr, aptr, fptr, eptr;
840 #if !defined(OS_OS2)
841 ajla_error_t sink;
842 #endif
844 if (unlikely(al < page_size))
845 al = page_size;
847 length = round_up(length, page_size);
848 if (unlikely(!length))
849 return NULL;
851 #ifdef POINTER_COMPRESSION_POSSIBLE
852 if (pointer_compression_enabled) {
853 size_t l;
854 unsigned res;
855 if (unlikely(al < ARENA_SIZE))
856 al = ARENA_SIZE;
857 al >>= ARENA_BITS;
858 l = round_up(length, ARENA_SIZE) >> ARENA_BITS;
859 if (unlikely(!l))
860 return NULL;
861 if (unlikely(l != (unsigned)l))
862 return NULL;
863 rmap_lock();
864 if (unlikely(saved))
865 res = reserve_last_run(l);
866 else
867 res = reserve_alloc_run(al, l);
868 rmap_unlock();
869 if (unlikely(!res))
870 return NULL;
871 ptr = num_to_ptr((uintptr_t)res << ARENA_BITS);
872 if (unlikely(!do_enable_mapping(ptr, length, clr))) {
873 rmap_lock();
874 reserve_free_merge_run(res, l);
875 rmap_unlock();
876 return NULL;
878 goto madvise_ret;
880 #endif
882 base_address = NULL;
883 if (unlikely(saved)) {
884 base_address = num_to_ptr(round_down(ptr_to_num(&saved) / 2, page_size));
887 #if defined(OS_OS2)
888 if (unlikely(al > ARENA_SIZE))
889 return NULL;
890 ptr = os2_alloc_aligned(length);
891 #elif defined(MAP_ALIGNED) && !defined(__minix__)
892 if (unlikely(al != (bitmap_t)al))
893 return NULL;
894 ptr = os_mmap(base_address, length, PROT_HEAP, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED(find_bit(al)), handle_none, 0, &sink);
895 if (unlikely(ptr == MAP_FAILED))
896 return NULL;
897 if (unlikely((ptr_to_num(ptr) & (al - 1)) != 0))
898 fatal("os_mmap returned unaligned pointer: %p, required alignment %lx", ptr, (unsigned long)ARENA_SIZE);
899 #else
900 extra_length = length + al - page_size;
901 if (unlikely(extra_length < length))
902 return NULL;
904 ptr = os_mmap(base_address, extra_length, PROT_HEAP, MAP_PRIVATE | MAP_ANONYMOUS, handle_none, 0, &sink);
906 if (unlikely(ptr == MAP_FAILED))
907 return NULL;
909 nptr = ptr_to_num(ptr);
910 aptr = round_up(nptr, al);
911 fptr = aptr + length;
912 eptr = nptr + extra_length;
913 amalloc_run_free(num_to_ptr(nptr), aptr - nptr);
914 amalloc_run_free(num_to_ptr(fptr), eptr - fptr);
916 ptr = num_to_ptr(aptr);
917 #endif
919 #ifdef POINTER_COMPRESSION_POSSIBLE
920 madvise_ret:
921 #endif
923 #if !defined(AMALLOC_TRIM_MIDBLOCKS) && defined(HAVE_MADVISE) && defined(MADV_HUGEPAGE)
924 if (length == ARENA_SIZE) {
925 int madv = MADV_HUGEPAGE;
926 int r;
927 EINTR_LOOP(r, madvise(ptr, length, madv));
928 /*if (unlikely(r == -1)) {
929 int er = errno;
930 warning("madvise(%d) failed: %d, %s", madv, er, error_decode(error_from_errno(EC_SYSCALL, er)));
933 #endif
935 return ptr;
938 static void *amalloc_run_realloc(void *ptr, size_t old_length, size_t new_length)
940 void attr_unused *n;
942 old_length = round_up(old_length, page_size);
943 new_length = round_up(new_length, page_size);
944 if (unlikely(!new_length))
945 return NULL;
947 #ifdef POINTER_COMPRESSION_POSSIBLE
948 if (pointer_compression_enabled) {
949 bool f;
950 size_t old_length_arenas = round_up(old_length, ARENA_SIZE) >> ARENA_BITS;
951 size_t new_length_arenas = round_up(new_length, ARENA_SIZE) >> ARENA_BITS;
952 if (unlikely(!new_length_arenas))
953 return NULL;
954 if (unlikely(new_length_arenas != (unsigned)new_length_arenas))
955 return NULL;
956 rmap_lock();
957 f = reserve_realloc_run(ptr_to_num(ptr) >> ARENA_BITS, old_length_arenas, new_length_arenas);
958 rmap_unlock();
959 /*debug("try realloc: %p, %d", ptr, f);*/
960 if (f) {
961 if (likely(old_length < new_length)) {
962 if (unlikely(!do_enable_mapping(cast_ptr(char *, ptr) + old_length, new_length - old_length, false))) {
963 rmap_lock();
964 reserve_free_merge_run((ptr_to_num(ptr) >> ARENA_BITS) + old_length_arenas, new_length_arenas - old_length_arenas);
965 rmap_unlock();
966 return NULL;
968 } else if (old_length > new_length) {
969 do_disable_mapping(cast_ptr(char *, ptr) + new_length, old_length - new_length);
971 return ptr;
973 return NULL;
975 #endif
977 #if defined(OS_OS2) || defined(OS_WIN32)
978 if (new_length == old_length)
979 return ptr;
980 return NULL;
981 #else
982 if (unlikely(new_length <= old_length)) {
983 amalloc_run_free(cast_ptr(char *, ptr) + new_length, old_length - new_length);
984 return ptr;
987 #if defined(OS_HAS_MREMAP)
989 ajla_error_t sink;
990 void *r = os_mremap(ptr, old_length, new_length, 0, NULL, &sink);
991 if (r != MAP_FAILED) {
992 if (unlikely(r != ptr))
993 internal(file_line, "amalloc_run_realloc: os_mremap returned different pointer: %p != %p", r, ptr);
994 return ptr;
997 #else
999 ajla_error_t sink;
1000 n = os_mmap(cast_ptr(char *, ptr) + old_length, new_length - old_length, PROT_HEAP, MAP_PRIVATE | MAP_ANONYMOUS, handle_none, 0, &sink);
1001 if (likely(n != MAP_FAILED)) {
1002 if (n == cast_ptr(char *, ptr) + old_length)
1003 return ptr;
1004 amalloc_do_unmap(n, new_length - old_length);
1007 #endif
1009 n = amalloc_run_alloc(ARENA_SIZE, new_length, false, false);
1010 if (unlikely(!n))
1011 return NULL;
1013 #if defined(OS_HAS_MREMAP) && defined(MREMAP_FIXED)
1015 ajla_error_t sink;
1016 void *r = os_mremap(ptr, old_length, new_length, MREMAP_MAYMOVE | MREMAP_FIXED, n, &sink);
1017 if (likely(r != MAP_FAILED)) {
1018 if (unlikely(r != n))
1019 internal(file_line, "amalloc_run_realloc: os_mremap returned different pointer: %p != %p", r, n);
1020 return n;
1023 #endif
1025 memcpy(n, ptr, old_length);
1026 amalloc_run_free(ptr, old_length);
1027 return n;
1028 #endif
1032 static inline void sbc_lock(struct small_block_cache *sbc)
1034 if (likely(amalloc_threads_initialized))
1035 mutex_lock(&sbc->mutex);
1038 static inline void sbc_unlock(struct small_block_cache *sbc)
1040 if (likely(amalloc_threads_initialized))
1041 mutex_unlock(&sbc->mutex);
1044 static inline void arena_lock(struct arena *a)
1046 if (unlikely(!amalloc_threads_initialized))
1047 return;
1048 address_lock(a, DEPTH_ARENA);
1051 static inline void arena_unlock(struct arena *a)
1053 if (unlikely(!amalloc_threads_initialized))
1054 return;
1055 address_unlock(a, DEPTH_ARENA);
1058 static void arena_tree_lock(void)
1060 if (unlikely(!amalloc_threads_initialized))
1061 return;
1062 mutex_lock(&arena_tree_mutex);
1065 static void arena_tree_unlock(void)
1067 if (unlikely(!amalloc_threads_initialized))
1068 return;
1069 mutex_unlock(&arena_tree_mutex);
1072 static void huge_tree_lock(void)
1074 if (unlikely(!amalloc_threads_initialized))
1075 return;
1076 mutex_lock(&huge_tree_mutex);
1079 static void huge_tree_unlock(void)
1081 if (unlikely(!amalloc_threads_initialized))
1082 return;
1083 mutex_unlock(&huge_tree_mutex);
1086 static int huge_tree_compare(const struct tree_entry *entry, uintptr_t addr)
1088 struct huge_entry *e = get_struct(entry, struct huge_entry, entry);
1089 if (ptr_to_num(e->ptr) < addr)
1090 return -1;
1091 if (ptr_to_num(e->ptr) > addr)
1092 return 1;
1093 return 0;
1096 static void huge_tree_insert(struct huge_entry *e)
1098 struct tree_insert_position ins;
1099 struct tree_entry attr_unused *ee;
1100 huge_tree_lock();
1101 ee = tree_find_for_insert(&huge_tree, huge_tree_compare, ptr_to_num(e->ptr), &ins);
1102 ajla_assert_lo(ee == NULL, (file_line, "huge_tree_insert: entry for address %p is already present", e->ptr));
1103 tree_insert_after_find(&e->entry, &ins);
1104 huge_tree_unlock();
1107 static struct huge_entry *huge_tree_delete(void *ptr)
1109 struct tree_entry *entry;
1110 huge_tree_lock();
1111 entry = tree_find(&huge_tree, huge_tree_compare, ptr_to_num(ptr));
1112 ajla_assert_lo(entry != NULL, (file_line, "huge_tree_delete: entry for address %p not found", ptr));
1113 tree_delete(entry);
1114 huge_tree_unlock();
1115 return get_struct(entry, struct huge_entry, entry);
1119 static attr_always_inline struct per_thread *amalloc_per_thread(void)
1121 if (unlikely(!amalloc_threads_initialized))
1122 return &thread1;
1123 else
1124 return tls_get(struct per_thread *, per_thread);
1127 static void arena_detach(struct arena *a);
1128 static bool amalloc_detach_small(struct small_block_cache *sbc, union midblock *m);
1129 #ifdef AMALLOC_EAGER_FREE
1130 static void amalloc_test_free_small(struct small_block_cache *sbc, size_t idx, union midblock *m);
1131 #endif
1133 static void set_small_run(struct per_thread *pt, unsigned i, union midblock *m)
1135 if (!i)
1136 pt->small_runs[0] = m;
1137 pt->small_runs[i + 1] = m;
1140 static void detach_pt_data(struct per_thread *pt)
1142 unsigned i;
1143 struct arena *a;
1144 for (i = 0; i < N_CLASSES; i++) {
1145 union midblock *m = pt->small_runs[i + 1];
1146 struct small_block_cache *sbc = &small_block_cache[i];
1147 #ifdef AMALLOC_EAGER_FREE
1148 size_t idx;
1149 #endif
1150 set_small_run(pt, i, NULL);
1151 if (m == &full_midblock)
1152 continue;
1153 sbc_lock(sbc);
1154 amalloc_atomic_or(&m->s.atomic_map, m->s.map);
1155 m->s.map = 0;
1156 if (unlikely(!amalloc_detach_small(sbc, m))) {
1157 int er = errno;
1158 fatal("arealloc failed: %d, %s", er, error_decode(error_from_errno(EC_SYSCALL, er)));
1160 #ifdef AMALLOC_EAGER_FREE
1161 idx = m->s.index;
1162 #endif
1163 sbc_unlock(sbc);
1164 #ifdef AMALLOC_EAGER_FREE
1165 amalloc_test_free_small(sbc, idx, m);
1166 #endif
1168 a = pt->arena;
1169 pt->arena = BAD_POINTER_1;
1170 if (a) {
1171 arena_lock(a);
1172 arena_detach(a);
1173 arena_unlock(a);
1177 static void per_thread_destructor(tls_destructor_t *destr)
1179 struct per_thread *pt = get_struct(destr, struct per_thread, destructor);
1180 detach_pt_data(pt);
1181 free(pt);
1184 static void amalloc_per_thread_init(struct per_thread *pt)
1186 unsigned i;
1187 pt->arena = NULL;
1188 for (i = 0; i < N_CLASSES; i++)
1189 set_small_run(pt, i, &full_midblock);
1192 static bool amalloc_per_thread_alloc(void)
1194 struct per_thread *pt;
1195 pt = malloc(sizeof(struct per_thread));
1196 if (unlikely(!pt))
1197 return false;
1198 amalloc_per_thread_init(pt);
1199 tls_destructor(&pt->destructor, per_thread_destructor);
1200 tls_set(struct per_thread *, per_thread, pt);
1201 return true;
1204 static int arena_tree_compare(const struct tree_entry *entry, uintptr_t len)
1206 struct arena *a = get_struct(entry, struct arena, arena_entry);
1207 if (a->max_midblock_run < len)
1208 return -1;
1209 return 1;
1212 static void arena_insert_locked(struct arena *a)
1214 struct tree_insert_position ins;
1215 struct tree_entry attr_unused *ee;
1216 if (a->max_midblock_run == ARENA_MIDBLOCKS - ARENA_PREFIX) {
1217 /*debug("freeing empty arena %p", a);*/
1218 amalloc_run_free(a, ARENA_SIZE);
1219 return;
1221 ee = tree_find_for_insert(&arena_tree, arena_tree_compare, a->max_midblock_run, &ins);
1222 ajla_assert(ee == NULL, (file_line, "arena_insert_locked: entry for address %p is already present", a));
1223 tree_insert_after_find(&a->arena_entry, &ins);
1226 static void arena_relink(struct arena *a, unsigned new_run)
1228 if (!a->attached) {
1229 bool full_free = new_run == ARENA_MIDBLOCKS - ARENA_PREFIX;
1230 if (new_run >= (unsigned)a->max_midblock_run * 2 || full_free) {
1231 arena_tree_lock();
1232 if (!a->attached) {
1233 tree_delete(&a->arena_entry);
1234 a->max_midblock_run = new_run;
1235 arena_insert_locked(a);
1237 arena_tree_unlock();
1242 static int midblock_compare(const struct tree_entry *entry, uintptr_t idx2)
1244 union midblock *m1 = get_struct(entry, union midblock, free_entry);
1245 struct arena *a = addr_to_arena(m1);
1246 unsigned idx1 = midblock_to_idx(a, m1);
1247 unsigned len1, len2;
1248 ajla_assert(a->map[idx1] & MAP_FREE, (file_line, "midblock_compare: idx1 is not free"));
1249 ajla_assert(a->map[idx2] & MAP_FREE, (file_line, "midblock_compare: idx2 is not free"));
1250 len1 = (a->map[idx1] & ~MAP_FREE) + 1 - idx1;
1251 len2 = (a->map[idx2] & ~MAP_FREE) + 1 - idx2;
1252 if (len1 != len2)
1253 return len1 < len2 ? -1 : 1;
1254 if (idx1 < idx2)
1255 return -1;
1256 return 1;
1259 static void arena_free_midblock(struct arena *a, unsigned start, unsigned len)
1261 struct tree_insert_position ins;
1262 a->map[start] = MAP_FREE | (start + len - 1);
1263 a->map[start + len - 1] = MAP_FREE | start;
1264 tree_find_for_insert(&a->midblock_free, midblock_compare, start, &ins);
1265 tree_insert_after_find(&idx_to_midblock(a, start)->free_entry, &ins);
1268 static void arena_free_merge_midblock(struct arena *a, unsigned start, unsigned len)
1270 uintptr_t attr_unused start_ptr, end_ptr;
1271 if (start && a->map[start - 1] & MAP_FREE) {
1272 unsigned more = start - (a->map[start - 1] & ~MAP_FREE);
1273 start -= more;
1274 len += more;
1275 tree_delete(&idx_to_midblock(a, start)->free_entry);
1277 if (start + len < ARENA_MIDBLOCKS && a->map[start + len] & MAP_FREE) {
1278 unsigned more = (a->map[start + len] & ~MAP_FREE) + 1 - (start + len);
1279 tree_delete(&idx_to_midblock(a, start + len)->free_entry);
1280 len += more;
1282 #if defined(AMALLOC_TRIM_MIDBLOCKS) && defined(HAVE_MADVISE) && ((defined(__linux__) && defined(MADV_DONTNEED)) || (!defined(__linux__) && defined(MADV_FREE)))
1283 start_ptr = round_up(ptr_to_num(idx_to_midblock(a, start)), page_size);
1284 end_ptr = round_down(ptr_to_num(idx_to_midblock(a, start + len)), page_size);
1285 if (start_ptr < end_ptr) {
1286 #ifdef __linux__
1287 int madv = MADV_DONTNEED;
1288 #else
1289 int madv = MADV_FREE;
1290 #endif
1291 int r;
1292 EINTR_LOOP(r, madvise(num_to_ptr(start_ptr), end_ptr - start_ptr, madv));
1293 if (unlikely(r == -1)) {
1294 int er = errno;
1295 warning("madvise(%d) failed: %d, %s", madv, er, error_decode(error_from_errno(EC_SYSCALL, er)));
1298 #endif
1299 arena_free_midblock(a, start, len);
1300 arena_relink(a, len);
1303 static void arena_grab_midblocks(struct arena *a, unsigned idx1, unsigned idx, unsigned len)
1305 unsigned found_end;
1306 union midblock *m = idx_to_midblock(a, idx1);
1307 tree_delete(&m->free_entry);
1308 found_end = (a->map[idx1] & ~MAP_FREE) + 1;
1309 if (unlikely(idx > idx1))
1310 arena_free_midblock(a, idx1, idx - idx1);
1311 if (found_end > idx + len)
1312 arena_free_midblock(a, idx + len, found_end - (idx + len));
1315 static int midblock_find(const struct tree_entry *entry, uintptr_t len)
1317 union midblock *m1 = get_struct(entry, union midblock, free_entry);
1318 struct arena *a = addr_to_arena(m1);
1319 unsigned idx1 = midblock_to_idx(a, m1);
1320 unsigned len1 = (a->map[idx1] & ~MAP_FREE) + 1 - idx1;
1321 if (len1 < len)
1322 return -1;
1323 if (len1 > len)
1324 return 1;
1325 return 0;
1328 static union midblock *arena_alloc_midblock(struct arena *a, unsigned midblock_alignment, unsigned midblocks)
1330 struct tree_entry *ee;
1331 ee = tree_find_best(&a->midblock_free, midblock_find, midblocks + midblock_alignment - 1);
1332 if (likely(ee != NULL)) {
1333 union midblock *m = get_struct(ee, union midblock, free_entry);
1334 unsigned best = midblock_to_idx(a, m);
1335 unsigned best_aligned = round_up(best, midblock_alignment);
1336 arena_grab_midblocks(a, best, best_aligned, midblocks);
1337 a->map[best_aligned] = best_aligned + midblocks - 1;
1338 a->map[best_aligned + midblocks - 1] = best_aligned;
1339 return idx_to_midblock(a, best_aligned);
1341 return NULL;
1344 static bool arena_try_realloc_midblock(struct arena *a, unsigned base, unsigned orig_len, unsigned new_len)
1346 unsigned free_len;
1347 if (unlikely(new_len <= orig_len)) {
1348 if (new_len == orig_len)
1349 return true;
1350 a->map[base] = base + new_len - 1;
1351 a->map[base + new_len - 1] = base;
1352 arena_free_merge_midblock(a, base + new_len, orig_len - new_len);
1353 } else {
1354 if (unlikely(base + new_len > ARENA_MIDBLOCKS))
1355 return false;
1356 if (!(a->map[base + orig_len] & MAP_FREE))
1357 return false;
1358 free_len = (a->map[base + orig_len] & ~MAP_FREE) + 1 - (base + orig_len);
1359 if (free_len < new_len - orig_len)
1360 return false;
1361 arena_grab_midblocks(a, base + orig_len, base + orig_len, new_len - orig_len);
1362 a->map[base] = base + new_len - 1;
1363 a->map[base + new_len - 1] = base;
1365 return true;
1368 static struct arena *arena_alloc(void)
1370 struct arena *a;
1371 a = amalloc_run_alloc(ARENA_SIZE, ARENA_SIZE, false, false);
1372 if (unlikely(!a))
1373 return NULL;
1374 tree_init(&a->midblock_free);
1375 a->map[0] = ARENA_PREFIX - 1;
1376 a->map[ARENA_PREFIX - 1] = 0;
1377 a->attached = true;
1378 arena_free_midblock(a, ARENA_PREFIX, ARENA_MIDBLOCKS - ARENA_PREFIX);
1379 /*debug("allocating arena %p", a);*/
1380 return a;
1384 static unsigned arena_compute_max_run(struct arena *a)
1386 struct tree_entry *e = tree_last(&a->midblock_free);
1387 if (!e) {
1388 return 0;
1389 } else {
1390 union midblock *m = get_struct(e, union midblock, free_entry);
1391 unsigned idx = midblock_to_idx(a, m);
1392 return (a->map[idx] & ~MAP_FREE) + 1 - idx;
1396 static void arena_detach(struct arena *a)
1398 a->max_midblock_run = arena_compute_max_run(a);
1399 arena_tree_lock();
1400 a->attached = false;
1401 arena_insert_locked(a);
1402 arena_tree_unlock();
1405 static int arena_tree_find(const struct tree_entry *entry, uintptr_t len)
1407 struct arena *a = get_struct(entry, struct arena, arena_entry);
1408 if (a->max_midblock_run < len)
1409 return -1;
1410 if (a->max_midblock_run > len)
1411 return 1;
1412 return 0;
1415 static struct arena *arena_attach(unsigned midblocks)
1417 struct arena *a;
1418 struct tree_entry *ee;
1419 arena_tree_lock();
1420 ee = tree_find_best(&arena_tree, arena_tree_find, midblocks);
1421 if (ee) {
1422 a = get_struct(ee, struct arena, arena_entry);
1423 a->attached = true;
1424 tree_delete(&a->arena_entry);
1425 arena_tree_unlock();
1426 return a;
1428 arena_tree_unlock();
1429 return arena_alloc();
1432 static void *amalloc_mid(struct per_thread *pt, unsigned midblock_alignment, unsigned midblocks)
1434 struct arena *a = pt->arena;
1435 bool looped = false;
1436 if (likely(a != NULL)) {
1437 union midblock *m;
1438 do_alloc:
1439 arena_lock(a);
1440 m = arena_alloc_midblock(a, midblock_alignment, midblocks);
1441 if (likely(m != NULL)) {
1442 arena_unlock(a);
1443 return m;
1445 arena_detach(a);
1446 arena_unlock(a);
1447 pt->arena = NULL;
1449 if (likely(!looped))
1450 a = arena_attach(midblocks + midblock_alignment - 1);
1451 else
1452 a = arena_alloc();
1453 if (unlikely(!a))
1454 return NULL;
1455 pt->arena = a;
1456 looped = true;
1457 goto do_alloc;
1460 static void *amalloc_huge(size_t al, size_t size, bool clr)
1462 struct huge_entry *e;
1463 void *ptr = amalloc_run_alloc(al, size, clr, false);
1464 if (unlikely(!ptr))
1465 return NULL;
1466 e = malloc(sizeof(struct huge_entry));
1467 if (unlikely(!e)) {
1468 amalloc_run_free(ptr, size);
1469 return NULL;
1471 e->ptr = ptr;
1472 e->len = size;
1473 huge_tree_insert(e);
1474 return ptr;
1477 static attr_noinline void *amalloc_mid_huge(struct per_thread *pt, size_t size, bool clr)
1479 if (unlikely(size > MIDBLOCK_LIMIT)) {
1480 return amalloc_huge(ARENA_SIZE, size, clr);
1481 } else {
1482 unsigned midblocks = round_up(size, MIDBLOCK_SIZE) >> MIDBLOCK_BITS;
1483 void *ptr = amalloc_mid(pt, 1, midblocks);
1484 if (likely(ptr != NULL) && clr)
1485 memset(ptr, 0, size);
1486 return ptr;
1490 static attr_noinline void *amemalign_mid_huge(size_t al, size_t size, bool clr)
1492 if (size + al > MIDBLOCK_LIMIT) {
1493 if (al < ARENA_SIZE)
1494 al = ARENA_SIZE;
1495 return amalloc_huge(al, size, clr);
1496 } else {
1497 unsigned midblocks, midblock_alignment;
1498 void *ptr;
1499 struct per_thread *pt = amalloc_per_thread();
1500 if (unlikely(!pt)) {
1501 if (unlikely(!amalloc_per_thread_alloc()))
1502 return NULL;
1503 pt = amalloc_per_thread();
1505 midblocks = round_up(size, MIDBLOCK_SIZE) >> MIDBLOCK_BITS;
1506 midblock_alignment = round_up(al, MIDBLOCK_SIZE) >> MIDBLOCK_BITS;
1507 ptr = amalloc_mid(pt, midblock_alignment, midblocks);
1508 if (likely(ptr != NULL) && clr)
1509 memset(ptr, 0, size);
1510 return ptr;
1514 static unsigned reserved_bits(unsigned cls)
1516 size_t size = CLASS_TO_SIZE(cls);
1517 unsigned reserved_bits = div_16(sizeof(full_midblock.s) + size - 1, size);
1518 return reserved_bits;
1521 static union midblock *create_small_run(struct per_thread *pt, unsigned cls)
1523 struct arena *a;
1524 unsigned idx, i, res;
1525 size_t size = CLASS_TO_SIZE(cls);
1526 unsigned midblocks = (size * BITMAP_BITS + MIDBLOCK_SIZE - 1) >> MIDBLOCK_BITS;
1527 union midblock *m = amalloc_mid(pt, 1, midblocks);
1528 if (unlikely(!m))
1529 return NULL;
1530 a = addr_to_arena(m);
1531 idx = midblock_to_idx(a, m);
1532 for (i = 0; i < midblocks; i++) {
1533 a->map[idx + i] = idx;
1535 res = reserved_bits(cls);
1536 m->s.map = (bitmap_t)-1 << res;
1537 store_relaxed(&m->s.atomic_map, 0);
1538 #ifdef AMALLOC_EAGER_FREE
1539 m->s.index = (size_t)-1;
1540 #endif
1541 m->s.reserved_bits = res;
1542 m->s.cls = cls;
1543 m->s.size = size;
1544 m->s.reciprocal = div_16(RECIPROCAL_BASE + size - 1, size);
1545 return m;
1548 static void amalloc_free_small(union midblock *m)
1550 struct arena *a = addr_to_arena(m);
1551 unsigned idx = midblock_to_idx(a, m);
1552 unsigned midblocks = (m->s.size * BITMAP_BITS + MIDBLOCK_SIZE - 1) >> MIDBLOCK_BITS;
1553 /*debug("midblock (%d): %lx / %lx", m->s.cls, m->s.map, m->s.atomic_map);*/
1554 arena_lock(a);
1555 arena_free_merge_midblock(a, idx, midblocks);
1556 arena_unlock(a);
1559 #ifdef AMALLOC_EAGER_FREE
1560 static attr_noinline void amalloc_test_free_small(struct small_block_cache *sbc, size_t idx, union midblock *m)
1562 sbc_lock(sbc);
1563 if (likely(idx < sbc->array_size) && likely(sbc->array[idx] == m) && likely(load_relaxed(&m->s.atomic_map) == (bitmap_t)-1 << m->s.reserved_bits)) {
1564 (sbc->array[idx] = sbc->array[--sbc->array_size])->s.index = idx;
1565 } else {
1566 m = NULL;
1568 sbc_unlock(sbc);
1569 if (likely(m != NULL)) {
1570 amalloc_free_small(m);
1571 /*debug("freed small");*/
1574 #endif
1576 static bool amalloc_detach_small(struct small_block_cache *sbc, union midblock *m)
1578 size_t index;
1579 if (unlikely(sbc->array_size == sbc->allocated_size)) {
1580 union midblock **a;
1581 size_t s = sbc->allocated_size * 2;
1582 if (unlikely(s > (size_t)-1 / sizeof(union midblock *)))
1583 return false;
1584 a = arealloc(sbc->array, s * sizeof(union midblock *));
1585 if (unlikely(!a))
1586 return false;
1587 sbc->array = a;
1588 sbc->allocated_size = s;
1589 /*debug("amalloc_detach_small: %ld - %ld - %p", sbc - small_block_cache, s, a);*/
1591 index = sbc->array_size++;
1592 #ifdef AMALLOC_EAGER_FREE
1593 m->s.index = index;
1594 #endif
1595 sbc->array[index] = m;
1596 return true;
1599 static attr_noinline void *amalloc_small_empty(struct per_thread *pt, size_t size)
1601 unsigned idx = SIZE_TO_INDEX(size);
1602 unsigned cls = INDEX_TO_CLASS(idx);
1603 unsigned i, best_idx, best_count, bit, all_free;
1604 union midblock *m = pt->small_runs[cls + 1];
1605 struct small_block_cache *sbc = &small_block_cache[cls];
1606 sbc_lock(sbc);
1607 if (likely(m != &full_midblock)) {
1608 if (unlikely(!amalloc_detach_small(sbc, m))) {
1609 sbc_unlock(sbc);
1610 return false;
1612 all_free = BITMAP_BITS - m->s.reserved_bits;
1613 } else {
1614 all_free = BITMAP_BITS - reserved_bits(cls);
1616 best_idx = 0;
1617 best_count = 0;
1618 i = minimum(SMALL_BLOCK_TRIES, sbc->array_size);
1619 while (i--) {
1620 unsigned test_count;
1621 union midblock *test;
1622 bitmap_t map;
1623 size_t test_idx;
1624 #ifndef AMALLOC_USE_RANDOM_ROVER
1625 test_idx = ++sbc->rover;
1626 if (unlikely(test_idx >= sbc->array_size)) {
1627 test_idx = sbc->rover = 0;
1629 #else
1630 test_idx = rand_r(&sbc->rover) % sbc->array_size;
1631 #endif
1632 test = sbc->array[test_idx];
1633 map = load_relaxed(&test->s.atomic_map);
1634 if (!map)
1635 continue;
1636 test_count = count_bits(map);
1637 if (best_count == all_free) {
1638 if (test_count == all_free && test_idx != best_idx && sbc->array_size - 1 != best_idx) {
1639 /*debug("freeing cached slab: %u, %p", cls, test);*/
1640 (sbc->array[test_idx] = sbc->array[--sbc->array_size])
1641 #ifdef AMALLOC_EAGER_FREE
1642 ->s.index = test_idx
1643 #endif
1645 amalloc_free_small(test);
1646 if (!sbc->array_size)
1647 break;
1649 } else {
1650 if (test_count > best_count) {
1651 best_idx = test_idx;
1652 best_count = test_count;
1656 if (likely(best_count)) {
1657 m = sbc->array[best_idx];
1658 (sbc->array[best_idx] = sbc->array[--sbc->array_size])
1659 #ifdef AMALLOC_EAGER_FREE
1660 ->s.index = best_idx
1661 #endif
1663 sbc_unlock(sbc);
1664 m->s.map = amalloc_atomic_xchg(&m->s.atomic_map, 0);
1665 #ifdef AMALLOC_EAGER_FREE
1666 m->s.index = (size_t)-1;
1667 #endif
1668 } else {
1669 sbc_unlock(sbc);
1670 m = create_small_run(pt, cls);
1671 if (unlikely(!m)) {
1672 set_small_run(pt, cls, &full_midblock);
1673 return NULL;
1676 set_small_run(pt, cls, m);
1677 bit = find_bit(m->s.map);
1678 m->s.map &= ~((bitmap_t)1 << bit);
1679 return cast_ptr(char *, m) + bit * m->s.size;
1682 static attr_noinline void *amalloc_alloc_pt(size_t size, bool clr)
1684 if (unlikely(!amalloc_per_thread_alloc()))
1685 return NULL;
1686 if (likely(!clr))
1687 return amalloc(size);
1688 else
1689 return acalloc(size);
1692 static attr_always_inline void *amalloc_small(struct per_thread *pt, size_t size)
1694 size_t bit;
1695 unsigned idx = SIZE_TO_INDEX(size);
1696 union midblock *m = pt->small_runs[idx];
1697 if (likely(m->s.map != 0)) {
1698 goto found_bit;
1700 if (load_relaxed(&m->s.atomic_map) != 0) {
1701 m->s.map = amalloc_atomic_xchg(&m->s.atomic_map, 0);
1702 goto found_bit;
1704 return amalloc_small_empty(pt, size);
1705 found_bit:
1706 bit = find_bit(m->s.map);
1707 m->s.map &= ~((bitmap_t)1 << bit);
1708 return cast_ptr(char *, m) + bit * m->s.size;
1711 void * attr_fastcall amalloc(size_t size)
1713 struct per_thread *pt = amalloc_per_thread();
1714 if (unlikely(!pt))
1715 return amalloc_alloc_pt(size, false);
1716 if (unlikely(size > DIRECT_LIMIT))
1717 return amalloc_mid_huge(pt, size, false);
1718 return amalloc_small(pt, size);
1721 void * attr_fastcall acalloc(size_t size)
1723 void *ptr;
1724 struct per_thread *pt = amalloc_per_thread();
1725 if (unlikely(!pt))
1726 return amalloc_alloc_pt(size, true);
1727 if (unlikely(size > DIRECT_LIMIT))
1728 return amalloc_mid_huge(pt, size, true);
1729 ptr = amalloc_small(pt, size);
1730 if (unlikely(!ptr))
1731 return NULL;
1732 return memset(ptr, 0, size);
1735 void * attr_fastcall amemalign(size_t al, size_t size)
1737 size_t size2;
1738 if (likely(al <= minimum(DIRECT_LIMIT, MIDBLOCK_SIZE))) {
1739 size2 = round_up(size, al);
1740 if (unlikely(size2 < size))
1741 return NULL;
1742 return amalloc(size);
1744 return amemalign_mid_huge(al, size, false);
1747 void * attr_fastcall acmemalign(size_t al, size_t size)
1749 if (likely(al <= minimum(DIRECT_LIMIT, MIDBLOCK_SIZE))) {
1750 size_t size2;
1751 size2 = round_up(size, al);
1752 if (unlikely(size2 < size))
1753 return NULL;
1754 return acalloc(size);
1756 return amemalign_mid_huge(al, size, true);
1760 static attr_noinline void afree_huge(void *ptr)
1762 struct huge_entry *e = huge_tree_delete(ptr);
1763 amalloc_run_free(ptr, e->len);
1764 free(e);
1767 static attr_noinline void afree_mid(struct arena *a, unsigned idx)
1769 arena_lock(a);
1770 arena_free_merge_midblock(a, idx, a->map[idx] + 1 - idx);
1771 arena_unlock(a);
1774 void attr_fastcall afree(void *ptr)
1776 unsigned idx, bit;
1777 unsigned attr_unused bit2;
1778 bitmap_t bit_or;
1779 union midblock *m;
1780 struct per_thread *pt;
1781 struct arena *a = addr_to_arena(ptr);
1782 if (unlikely((void *)a == ptr)) {
1783 afree_huge(ptr);
1784 return;
1786 idx = midblock_to_idx(a, ptr);
1787 m = idx_to_midblock(a, a->map[idx]);
1788 if (unlikely((void *)m >= ptr)) {
1789 afree_mid(a, idx);
1790 return;
1792 bit = ((unsigned)(cast_ptr(char *, ptr) - cast_ptr(char *, m)) * m->s.reciprocal) / RECIPROCAL_BASE;
1793 bit2 = (unsigned)(cast_ptr(char *, ptr) - cast_ptr(char *, m)) / m->s.size;
1794 ajla_assert(bit == bit2, (file_line, "afree: reciprocal doesn't match: size %u, reciprocal %u, %u != %u", m->s.size, m->s.reciprocal, bit, bit2));
1795 bit_or = (bitmap_t)1 << bit;
1796 pt = amalloc_per_thread();
1797 if (likely(pt != NULL) && likely(pt->small_runs[m->s.cls + 1] == m)) {
1798 m->s.map |= bit_or;
1799 } else {
1800 #ifdef AMALLOC_EAGER_FREE
1801 size_t idx = (size_t)-1;
1802 struct small_block_cache *sbc = NULL;
1803 if (unlikely((load_relaxed(&m->s.atomic_map) | bit_or) == (bitmap_t)-1 << m->s.reserved_bits)) {
1804 idx = m->s.index;
1805 sbc = &small_block_cache[m->s.cls];
1807 #endif
1808 amalloc_atomic_or(&m->s.atomic_map, bit_or);
1809 #ifdef AMALLOC_EAGER_FREE
1810 if (unlikely(idx != (size_t)-1)) {
1811 amalloc_test_free_small(sbc, idx, m);
1813 #endif
1817 static attr_noinline void *arealloc_malloc(void *ptr, size_t old_size, size_t new_size)
1819 void *n;
1820 if (unlikely(new_size == old_size))
1821 return ptr;
1822 n = amalloc(new_size);
1823 if (unlikely(!n))
1824 return NULL;
1825 memcpy(n, ptr, minimum(new_size, old_size));
1826 afree(ptr);
1827 return n;
1830 static attr_noinline void *arealloc_huge(void *ptr, size_t size)
1832 struct huge_entry *e = huge_tree_delete(ptr);
1833 if (likely(size >= page_size)) {
1834 void *n = amalloc_run_realloc(ptr, e->len, size);
1835 if (n) {
1836 e->len = size;
1837 e->ptr = n;
1838 huge_tree_insert(e);
1839 return n;
1842 huge_tree_insert(e);
1843 return arealloc_malloc(ptr, e->len, size);
1846 static attr_noinline void *arealloc_mid(struct arena *a, unsigned idx, size_t size)
1848 unsigned existing_blocks = a->map[idx] + 1 - idx;
1849 if (size > DIRECT_LIMIT && size <= ARENA_SIZE) {
1850 bool f;
1851 unsigned new_blocks = round_up(size, MIDBLOCK_SIZE) >> MIDBLOCK_BITS;
1852 arena_lock(a);
1853 f = arena_try_realloc_midblock(a, idx, existing_blocks, new_blocks);
1854 arena_unlock(a);
1855 if (f)
1856 return idx_to_midblock(a, idx);
1858 return arealloc_malloc(idx_to_midblock(a, idx), existing_blocks << MIDBLOCK_BITS, size);
1861 void * attr_fastcall arealloc(void *ptr, size_t size)
1863 unsigned idx;
1864 union midblock *m;
1865 struct arena *a = addr_to_arena(ptr);
1866 if (unlikely((void *)a == ptr)) {
1867 return arealloc_huge(ptr, size);
1869 idx = midblock_to_idx(a, ptr);
1870 m = idx_to_midblock(a, a->map[idx]);
1871 if (unlikely((void *)m >= ptr)) {
1872 return arealloc_mid(a, idx, size);
1874 if (INDEX_TO_CLASS(SIZE_TO_INDEX(size)) != m->s.cls) {
1875 return arealloc_malloc(ptr, m->s.size, size);
1877 return ptr;
1880 bool attr_fastcall aptr_is_huge(void *ptr)
1882 struct arena *a = addr_to_arena(ptr);
1883 return (void *)a == ptr;
1887 void amalloc_init(void)
1889 unsigned i;
1890 #if defined(DEBUG)
1892 unsigned size;
1893 for (size = MIN_ALLOC; size <= DIRECT_LIMIT; size += MIN_ALLOC) {
1894 ushort_efficient_t reciprocal = div_16(RECIPROCAL_BASE + size - 1, size);
1895 for (i = 1; i < BITMAP_BITS; i++) {
1896 unsigned offs = i * size;
1897 unsigned ii = (offs * reciprocal) / RECIPROCAL_BASE;
1898 if (unlikely(ii != i))
1899 internal(file_line, "amalloc_init: reciprocal doesn't match: size %u, i %u, ii %u", size, i, ii);
1903 #endif
1904 amalloc_os_init();
1905 if (unlikely(!amalloc_enabled))
1906 return;
1907 #ifdef POINTER_COMPRESSION_POSSIBLE
1908 if (pointer_compression_enabled)
1909 reserve_memory();
1910 #endif
1911 amalloc_threads_initialized = false;
1912 amalloc_per_thread_init(&thread1);
1913 tree_init(&huge_tree);
1914 tree_init(&arena_tree);
1915 for (i = 0; i < N_CLASSES; i++) {
1916 small_block_cache[i].array = amalloc(DIRECT_LIMIT * 2);
1917 if (unlikely(!small_block_cache[i].array)) {
1918 fatal("amalloc failed");
1920 small_block_cache[i].array_size = 0;
1921 small_block_cache[i].allocated_size = DIRECT_LIMIT * 2 / sizeof(union midblock *);
1922 small_block_cache[i].rover = 0;
1926 void amalloc_init_multithreaded(void)
1928 unsigned i;
1929 if (unlikely(!amalloc_enabled))
1930 return;
1931 if (unlikely(amalloc_threads_initialized))
1932 internal(file_line, "amalloc_init_multithreaded: amalloc_threads_initialized already set");
1933 mutex_init(&huge_tree_mutex);
1934 mutex_init(&arena_tree_mutex);
1935 tls_init(struct per_thread *, per_thread);
1936 tls_set(struct per_thread *, per_thread, &thread1);
1937 for (i = 0; i < N_CLASSES; i++) {
1938 mutex_init(&small_block_cache[i].mutex);
1940 #ifdef POINTER_COMPRESSION_POSSIBLE
1941 if (pointer_compression_enabled)
1942 mutex_init(&rmap_mutex);
1943 #endif
1944 amalloc_threads_initialized = true;
1945 #if 0
1947 void *x = amalloc(0x2000000);
1948 debug("x: %p", x);
1949 x = arealloc(x, 0x4000000);
1950 debug("x: %p", x);
1951 x = arealloc(x, 0x2000000);
1952 debug("x: %p", x);
1953 x = arealloc(x, 0x4000000);
1954 debug("x: %p", x);
1955 afree(x);
1957 #endif
1960 void amalloc_done_multithreaded(void)
1962 unsigned i;
1963 if (unlikely(!amalloc_enabled))
1964 return;
1965 if (unlikely(!amalloc_threads_initialized))
1966 internal(file_line, "amalloc_done_multithreaded: amalloc_threads_initialized not set");
1967 amalloc_threads_initialized = false;
1968 #ifdef POINTER_COMPRESSION_POSSIBLE
1969 if (pointer_compression_enabled)
1970 mutex_done(&rmap_mutex);
1971 #endif
1972 for (i = 0; i < N_CLASSES; i++) {
1973 mutex_done(&small_block_cache[i].mutex);
1975 tls_done(struct per_thread *, per_thread);
1976 mutex_done(&arena_tree_mutex);
1977 mutex_done(&huge_tree_mutex);
1980 void amalloc_done(void)
1982 unsigned i;
1983 if (unlikely(!amalloc_enabled))
1984 goto os_done;
1985 if (unlikely(amalloc_threads_initialized))
1986 internal(file_line, "amalloc_done: amalloc_threads_initialized set");
1987 detach_pt_data(&thread1);
1988 for (i = 0; i < N_CLASSES; i++) {
1989 struct small_block_cache *sbc = &small_block_cache[i];
1990 bitmap_t all_free = (bitmap_t)-1 << reserved_bits(i);
1991 size_t j;
1992 for (j = 0; j < sbc->array_size; j++) {
1993 union midblock *m = sbc->array[j];
1994 if (unlikely(load_relaxed(&m->s.atomic_map) != all_free))
1995 internal(file_line, "amalloc_done: small block memory leak, class %u", i);
1996 /*debug("end: freeing cached slab: %u, %p", i, m);*/
1997 amalloc_free_small(m);
1999 afree(sbc->array);
2000 sbc->array = BAD_POINTER_1;
2002 if (unlikely(!tree_is_empty(&huge_tree)))
2003 internal(file_line, "amalloc_done: huge tree is not empty");
2004 if (unlikely(!tree_is_empty(&arena_tree)))
2005 internal(file_line, "amalloc_done: arena tree is not empty");
2006 os_done:
2007 amalloc_os_done();
2008 #if 0
2010 struct tree_entry *e, *f;
2011 debug("%x", (unsigned)(ARENA_MIDBLOCKS - ARENA_PREFIX));
2012 for (e = tree_first(&arena_tree); e; e = tree_next(e)) {
2013 struct arena *a = get_struct(e, struct arena, arena_entry);
2014 debug("leaked arena %p, %x", a, a->max_midblock_run);
2015 for (f = tree_first(&a->midblock_free); f; f = tree_next(f)) {
2016 union midblock *m = get_struct(f, union midblock, free_entry);
2017 unsigned idx = midblock_to_idx(a, m);
2018 unsigned len = (a->map[idx] & ~MAP_FREE) + 1 - idx;
2019 debug("free midblock: %p: %x, %x", m, idx, len);
2020 if (idx + len < ARENA_MIDBLOCKS) {
2021 union midblock *m2 = idx_to_midblock(a, idx + len);
2022 debug("small run(%p): %lx, atomic_map: %lx, size: %u, class %u", m2, m2->s.map, m2->s.atomic_map, m2->s.size, m2->s.cls);
2027 #endif
2028 #ifdef POINTER_COMPRESSION_POSSIBLE
2029 if (pointer_compression_enabled)
2030 unreserve_memory();
2031 #endif
2034 #endif