codegen: use gen_frame_store for for bswap and brev
[ajla.git] / mem_al.c
blob48fa2a0d38606bccce480eef494ed9f33d195681
1 /*
2 * Copyright (C) 2024 Mikulas Patocka
4 * This file is part of Ajla.
6 * Ajla is free software: you can redistribute it and/or modify it under the
7 * terms of the GNU General Public License as published by the Free Software
8 * Foundation, either version 3 of the License, or (at your option) any later
9 * version.
11 * Ajla is distributed in the hope that it will be useful, but WITHOUT ANY
12 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along with
16 * Ajla. If not, see <https://www.gnu.org/licenses/>.
19 #include "ajla.h"
21 #include "list.h"
22 #include "thread.h"
23 #include "str.h"
24 #include "refcount.h"
25 #include "addrlock.h"
26 #include "amalloc.h"
28 #include "mem_al.h"
30 #ifdef HAVE_SYS_RESOURCE_H
31 #include <sys/resource.h>
32 #endif
34 #ifdef HAVE_MALLOC_H
35 #include <malloc.h>
36 #endif
38 #if defined(POINTER_COMPRESSION_POSSIBLE)
39 uchar_efficient_t pointer_compression_enabled = 0;
40 #endif
42 #if defined(USE_AMALLOC)
43 uchar_efficient_t amalloc_enabled = 1;
44 #endif
46 /*#define TEST_OOM*/
48 #if defined(TEST_OOM)
49 #define alloc_should_fail(mayfail) true
50 #elif 1
51 #define alloc_should_fail(mayfail) false
52 #else
53 static bool alloc_should_fail(ajla_error_t *mayfail)
55 static int count = 0;
56 if (!mayfail)
57 return false;
58 count++;
59 /*debug("%d", count);*/
60 if (!(rand() & 0xffff)) {
61 debug("failing allocation");
62 return true;
64 return false;
66 #endif
68 #if defined(HAVE_MALLOC) && HAVE_MALLOC
69 #define heap_notzero(x) (x)
70 #else
71 #define heap_notzero(x) (likely((x) != 0) ? (x) : 1)
72 #endif
74 #define heap_malloc(x) (likely(amalloc_enabled) ? amalloc(x) : malloc(heap_notzero(x)))
75 #define heap_calloc(x) (likely(amalloc_enabled) ? acalloc(x) : calloc(1, heap_notzero(x)))
76 #define heap_realloc(x, y) (likely(amalloc_enabled) ? arealloc(x, y) : realloc(x, y))
77 #define heap_free(x) (likely(amalloc_enabled) ? afree(x) : free(cast_cpp(void *, x)))
78 #define heap_memalign(al, sz) (likely(amalloc_enabled) ? amemalign(al, sz) : do_memalign(al, sz))
79 #define heap_cmemalign(al, sz) (likely(amalloc_enabled) ? acmemalign(al, sz) : zmem(do_memalign(al, sz), sz))
80 #define heap_free_aligned(x) (likely(amalloc_enabled) ? afree(x) : do_free_aligned(x))
82 static void *zmem(void *ptr, size_t size)
84 if (likely(ptr != NULL))
85 return memset(ptr, 0, size);
86 return ptr;
89 #if !defined(UNUSUAL_NO_MEMALIGN) && defined(HAVE_MEMALIGN) && defined(__DJGPP__)
90 /* DJGPP has swapped arguments */
91 static inline void *do_memalign(size_t al, size_t sz)
93 return memalign(heap_notzero(sz), al);
95 #define do_free_aligned free
96 #elif !defined(UNUSUAL_NO_MEMALIGN) && defined(HAVE_MEMALIGN) && !defined(__sun__)
97 #define do_memalign memalign
98 #define do_free_aligned free
99 #elif !defined(UNUSUAL_NO_MEMALIGN) && defined(HAVE_POSIX_MEMALIGN)
100 static inline void *do_memalign(size_t align, size_t sz)
102 void *ptr = NULL; /* avoid warning */
103 sz = heap_notzero(sz);
104 if (unlikely(align < sizeof(void *)))
105 align = sizeof(void *);
106 if (unlikely(posix_memalign(&ptr, align, sz)))
107 return NULL;
108 return ptr;
110 #define do_free_aligned free
111 #elif !defined(UNUSUAL_NO_MEMALIGN) && defined(HAVE_ALIGNED_ALLOC)
112 static inline void *do_memalign(size_t align, size_t sz)
114 size_t rsz;
115 sz = heap_notzero(sz);
116 rsz = round_up(sz, align);
117 if (unlikely(rsz < sz))
118 return NULL;
119 return aligned_alloc(align, rsz);
121 #define do_free_aligned free
122 #else
123 typedef size_t align_bytes_t;
124 static inline void *do_memalign(size_t align, size_t sz)
126 size_t extra, sz2;
127 void *p, *p2;
128 if (align < HEAP_ALIGN)
129 align = HEAP_ALIGN;
130 extra = align - 1 + sizeof(align_bytes_t);
131 /*debug("align: %x, %x", sz, align);*/
132 if (unlikely(extra != (align_bytes_t)extra))
133 internal(file_line, "do_memalign: too big alignment %"PRIuMAX"", (uintmax_t)align);
134 sz2 = sz + extra;
135 if (unlikely(sz2 < sz))
136 return NULL;
137 p = heap_malloc(sz2);
138 if (unlikely(!p))
139 return NULL;
140 p2 = cast_ptr(char *, p) + sizeof(align_bytes_t);
141 p2 = num_to_ptr(round_up(ptr_to_num(p2), align));
142 (cast_ptr(align_bytes_t *, p2))[-1] = (align_bytes_t)(cast_ptr(char *, p2) - cast_ptr(char *, p));
143 return p2;
145 static inline void do_free_aligned(void *p)
147 align_bytes_t a = (cast_ptr(align_bytes_t *, p))[-1];
148 heap_free(cast_ptr(char *, p) - a);
150 #endif
152 static void attr_cold *oom_calloc(size_t size, ajla_error_t *mayfail, position_t position)
154 if (mayfail == MEM_DONT_TRY_TO_FREE)
155 return NULL;
156 while (mem_trim_cache()) {
157 void *p = heap_calloc(size);
158 if (p)
159 return p;
161 fatal_mayfail(error_ajla(EC_ASYNC, AJLA_ERROR_OUT_OF_MEMORY), mayfail, "out of memory for malloc, %"PRIuMAX" bytes at %s", (uintmax_t)size, position_string(position));
162 return NULL;
165 static void attr_cold *oom_cmemalign(size_t size, size_t alignment, ajla_error_t *mayfail, position_t position)
167 if (mayfail == MEM_DONT_TRY_TO_FREE)
168 return NULL;
169 while (mem_trim_cache()) {
170 void *p = heap_cmemalign(alignment, size);
171 if (p)
172 return p;
174 fatal_mayfail(error_ajla(EC_ASYNC, AJLA_ERROR_OUT_OF_MEMORY), mayfail, "out of memory for memalign, %"PRIuMAX" bytes, alignment %"PRIuMAX" at %s", (uintmax_t)size, (uintmax_t)alignment, position_string(position));
175 return NULL;
178 static void attr_cold *oom_realloc(void attr_unused *ptr, size_t size, ajla_error_t *mayfail, position_t position)
180 if (mayfail == MEM_DONT_TRY_TO_FREE)
181 return NULL;
182 while (mem_trim_cache()) {
183 void *p = heap_realloc(ptr, size);
184 if (p)
185 return p;
187 fatal_mayfail(error_ajla(EC_ASYNC, AJLA_ERROR_OUT_OF_MEMORY), mayfail, "out of memory for realloc, %"PRIuMAX" bytes at %s", (uintmax_t)size, position_string(position));
188 return NULL;
191 #define MEMORY_DEBUG_MAGIC 1
192 #define MEMORY_DEBUG_REDZONE 2
193 #define MEMORY_DEBUG_FILL 4
194 #define MEMORY_DEBUG_TRACK_BLOCKS 8
195 #define MEMORY_DEBUG_HISTOGRAM 16
197 #ifdef DEBUG_MEMORY_POSSIBLE
199 static int memory_debug = 0;
201 #define USE_RED_ZONE (likely(memory_debug & MEMORY_DEBUG_REDZONE))
202 #define USE_FILL (likely(memory_debug & MEMORY_DEBUG_FILL))
203 #define USE_LIST (likely(memory_debug & MEMORY_DEBUG_TRACK_BLOCKS))
204 #define USE_HISTOGRAM (unlikely(memory_debug & MEMORY_DEBUG_HISTOGRAM))
206 #define RED_ZONE 'R'
208 struct histogram_entry {
209 uintmax_t cnt;
210 position_t position;
213 struct per_thread {
214 struct list block_list;
215 uintptr_t bytes;
216 uintptr_t blocks;
217 struct histogram_entry *histogram;
218 size_t histogram_size;
219 mutex_t mutex;
220 #ifndef THREAD_NONE
221 struct list free_list;
222 struct list used_list;
223 tls_destructor_t destructor;
224 #endif
227 #define ALLOC_MAGIC 0xa110c
228 #define ALLOC_MAGIC_ALIGNED 0xa11167
229 #define ALLOC_MAGIC_FREE 0xf4ee
230 #define ALLOC_MAGIC_REALLOC 0x4ea110c
232 typedef uint32_t ah_magic_t;
234 struct alloc_header {
235 #ifndef THREAD_NONE
236 struct per_thread *per_thread;
237 #endif
238 struct list entry;
239 position_t position;
240 size_t size;
241 size_t padding;
242 ah_magic_t magic_[1];
245 #define AH_SIZE round_up(sizeof(struct alloc_header), HEAP_ALIGN)
246 #define AH_DATA(ah) (cast_ptr(unsigned char *, ah) + AH_SIZE)
247 #define AH_MAGIC(ah) (cast_ptr(ah_magic_t *, AH_DATA(ah))[-1])
248 #define AH_RED_ZONE(ah) (AH_DATA(ah)[ah->size])
249 #define AH_MALLOC_BLOCK(ah) (cast_ptr(unsigned char *, (ah)) - (ah)->padding)
250 #define AH_FROM_PTR(ptr) cast_ptr(struct alloc_header *, (cast_ptr(unsigned char *, ptr) - AH_SIZE))
252 static struct per_thread thread1;
253 static bool memory_threads_initialized;
255 tls_decl(unsigned char, memory_fill);
257 static unsigned char get_memory_fill(void)
259 if (!memory_threads_initialized) {
260 static unsigned char memory_fill_preinit = 0;
261 return (unsigned char)++memory_fill_preinit;
262 } else {
263 unsigned char mf = tls_get(unsigned char, memory_fill);
264 mf++;
265 tls_set(unsigned char, memory_fill, mf);
266 return mf;
270 static inline void mem_per_thread_init(struct per_thread *pt)
272 list_init(&pt->block_list);
273 pt->bytes = 0;
274 pt->blocks = 0;
275 if (USE_HISTOGRAM) {
276 pt->histogram_size = 2;
277 pt->histogram = heap_calloc(pt->histogram_size * sizeof(struct histogram_entry));
281 static void increment_histogram(struct per_thread *pt, size_t sz, uintmax_t count, position_t position)
283 size_t old_count, new_count;
284 #if 0
285 sz = round_up(sz, 16);
286 #endif
287 if (unlikely(sz >= pt->histogram_size)) {
288 size_t new_size, i;
289 struct histogram_entry *new_histogram;
290 new_size = pt->histogram_size;
291 do {
292 new_size = new_size * 2 - 1;
293 if (unlikely(new_size > (size_t)-1 / sizeof(struct histogram_entry)))
294 return;
295 } while (sz >= new_size);
296 new_histogram = heap_calloc(new_size * sizeof(struct histogram_entry));
297 if (unlikely(!new_histogram))
298 return;
299 for (i = 0; i < pt->histogram_size; i++) {
300 if (unlikely(pt->histogram[i].cnt != 0))
301 new_histogram[i] = pt->histogram[i];
303 heap_free(pt->histogram);
304 pt->histogram = new_histogram;
305 pt->histogram_size = new_size;
307 old_count = pt->histogram[sz].cnt;
308 new_count = old_count + count;
309 if (unlikely(new_count < count))
310 new_count = -1;
311 pt->histogram[sz].cnt = new_count;
312 if ((new_count ^ old_count) >= old_count)
313 pt->histogram[sz].position = position;
316 #ifndef THREAD_NONE
318 static tls_decl(struct per_thread *, mem_per_thread);
320 static void mem_per_thread_free(struct per_thread *pt)
322 thread1.bytes += pt->bytes;
323 thread1.blocks += pt->blocks;
324 if (unlikely(thread1.bytes < pt->bytes) || unlikely(thread1.blocks < pt->blocks))
325 internal(file_line, "mem_per_thread_free: memory counters underflow: %"PRIuMAX", %"PRIuMAX" < %"PRIuMAX", %"PRIuMAX"", (uintmax_t)thread1.bytes, (uintmax_t)thread1.blocks, (uintmax_t)pt->bytes, (uintmax_t)pt->blocks);
326 if (USE_HISTOGRAM) {
327 size_t i;
328 for (i = 0; i < pt->histogram_size; i++)
329 if (unlikely(pt->histogram[i].cnt != 0))
330 increment_histogram(&thread1, i, pt->histogram[i].cnt, pt->histogram[i].position);
331 heap_free(pt->histogram);
333 while (!list_is_empty(&pt->block_list)) {
334 struct alloc_header *ah = get_struct(pt->block_list.prev, struct alloc_header, entry);
335 if (unlikely(ah->per_thread != pt))
336 internal(file_line, "mem_per_thread_free: block is on wrong list: %p != %p (block allocated at %s)", ah->per_thread, pt, position_string(ah->position));
337 ah->per_thread = &thread1;
338 list_del(&ah->entry);
339 list_add(&thread1.block_list, &ah->entry);
341 mutex_done(&pt->mutex);
342 mem_free_aligned(pt);
345 #ifndef THREAD_NONE
346 static void mem_per_thread_destructor(tls_destructor_t *destr)
348 struct per_thread *pt = get_struct(destr, struct per_thread, destructor);
349 tls_set(struct per_thread *, mem_per_thread, &thread1);
351 ajla_assert_lo(memory_threads_initialized, (file_line, "mem_per_thread_destructor called when threads are not initialized"));
353 mutex_lock(&thread1.mutex);
354 list_del(&pt->used_list);
355 list_add(&thread1.free_list, &pt->free_list);
356 mutex_unlock(&thread1.mutex);
358 #endif
360 static attr_noinline struct per_thread *mem_per_thread_alloc(void)
362 struct per_thread *pt;
363 ajla_error_t sink;
364 tls_set(struct per_thread *, mem_per_thread, &thread1);
365 mutex_lock(&thread1.mutex);
366 if (!list_is_empty(&thread1.free_list)) {
367 pt = get_struct(thread1.free_list.prev, struct per_thread, free_list);
368 list_del(&pt->free_list);
369 mutex_unlock(&thread1.mutex);
370 goto have_pt;
372 mutex_unlock(&thread1.mutex);
373 pt = mem_align_mayfail(struct per_thread *, round_up(sizeof(struct per_thread), SMP_ALIAS_ALIGNMENT), SMP_ALIAS_ALIGNMENT, &sink);
374 if (likely(pt != NULL)) {
375 mutex_init(&pt->mutex);
376 mem_per_thread_init(pt);
377 have_pt:
378 mutex_lock(&thread1.mutex);
379 list_add(&thread1.used_list, &pt->used_list);
380 mutex_unlock(&thread1.mutex);
381 tls_set(struct per_thread *, mem_per_thread, pt);
382 tls_destructor(&pt->destructor, mem_per_thread_destructor);
383 } else {
384 tls_set(struct per_thread *, mem_per_thread, NULL);
385 pt = &thread1;
387 return pt;
390 static struct per_thread *mem_current_thread(void)
392 struct per_thread *pt;
393 if (unlikely(!memory_threads_initialized)) {
394 pt = &thread1;
395 } else {
396 pt = tls_get(struct per_thread *, mem_per_thread);
397 if (unlikely(!pt)) {
398 pt = mem_per_thread_alloc();
401 return pt;
404 #endif
406 static struct per_thread *mem_mutex_lock(struct alloc_header attr_unused *ah)
408 struct per_thread *pt;
409 #ifndef THREAD_NONE
410 pt = ah->per_thread;
411 #else
412 pt = &thread1;
413 #endif
414 if (likely(memory_threads_initialized)) {
415 mutex_lock(&pt->mutex);
416 #ifndef THREAD_NONE
417 ajla_assert(pt == ah->per_thread, (file_line, "mem_mutex_lock: per_thread changed: %p != %p", pt, ah->per_thread));
418 #endif
420 return pt;
423 static void mem_mutex_unlock(struct per_thread *pt)
425 if (likely(memory_threads_initialized)) {
426 mutex_unlock(&pt->mutex);
430 #define VFY_UNALIGNED 1
431 #define VFY_ALIGNED 2
432 #define VFY_ANY 3
434 #ifdef POINTER_IGNORE_START
435 #define verify_no_tag(ah, position, fn) \
436 do { \
437 if (unlikely((ptr_to_num(ah) & POINTER_IGNORE_MASK) != 0))\
438 internal(position_string(position), "%s: pointer is tagged: %p", fn, AH_DATA(ah));\
439 } while (0)
440 #else
441 #define verify_no_tag(ah, position, fn) \
442 do { \
443 } while (0)
444 #endif
446 #define verify_block(ah, aligned, position, fn) \
447 do { \
448 verify_no_tag(ah, position, fn); \
449 if (!( \
450 ((aligned) & VFY_UNALIGNED && likely(AH_MAGIC(ah) == ALLOC_MAGIC)) ||\
451 ((aligned) & VFY_ALIGNED && likely(AH_MAGIC(ah) == ALLOC_MAGIC_ALIGNED))\
452 )) \
453 internal(position_string(position), "%s: magic doesn't match: %08lx", fn, (unsigned long)AH_MAGIC(ah));\
454 if (USE_RED_ZONE && unlikely(AH_RED_ZONE(ah) != RED_ZONE)) \
455 internal(position_string(position), "%s: red zone damaged: %02x (block allocated at %s)", fn, AH_RED_ZONE(ah), position_string(ah->position));\
456 } while (0)
458 static size_t get_needed_size(size_t size, size_t extra)
460 size_t needed_size = size + AH_SIZE + USE_RED_ZONE;
461 if (unlikely(needed_size < size))
462 fatal("allocation size overflow");
463 needed_size += extra;
464 if (unlikely(needed_size < extra))
465 fatal("allocation size overflow");
466 return needed_size;
469 static attr_noinline void *debug_mem_alloc(size_t size, size_t alignment, bool aligned, bool clear, ajla_error_t *mayfail, position_t position)
471 unsigned char *result;
472 size_t padding;
473 struct alloc_header *ah;
474 size_t needed_size;
475 if (unlikely(!is_power_of_2(alignment)))
476 internal(position_string(position), "debug_mem_alloc: invalid alignment %"PRIuMAX", size %"PRIuMAX"", (uintmax_t)alignment, (uintmax_t)size);
477 needed_size = get_needed_size(size, alignment - 1);
478 result = cast_cpp(unsigned char *, alloc_should_fail(mayfail) ? NULL : !clear ? heap_malloc(needed_size) : heap_calloc(needed_size));
479 if (unlikely(!result)) {
480 result = cast_cpp(unsigned char *, oom_calloc(needed_size, mayfail, position));
481 if (!result)
482 return NULL;
484 padding = -(size_t)ptr_to_num(result + AH_SIZE) & (alignment - 1);
485 ah = cast_ptr(struct alloc_header *, result + padding);
486 ah->padding = padding;
487 ah->position = position;
488 ah->size = size;
489 if (USE_FILL && !clear)
490 (void)memset(AH_DATA(ah), get_memory_fill(), size);
491 AH_MAGIC(ah) = !aligned ? ALLOC_MAGIC : ALLOC_MAGIC_ALIGNED;
492 if (USE_RED_ZONE)
493 AH_RED_ZONE(ah) = RED_ZONE;
494 #ifndef THREAD_NONE
495 ah->per_thread = mem_current_thread();
496 #endif
497 if (USE_LIST | USE_HISTOGRAM) {
498 struct per_thread *pt;
499 pt = mem_mutex_lock(ah);
500 if (USE_LIST) {
501 list_add(&pt->block_list, &ah->entry);
502 if (unlikely(pt->bytes + ah->size < pt->bytes) || unlikely(!(pt->blocks + 1)))
503 internal(file_line, "debug_mem_alloc: memory counters overflow: %"PRIuMAX", %"PRIuMAX", %"PRIuMAX"", (uintmax_t)pt->bytes, (uintmax_t)ah->size, (uintmax_t)pt->blocks);
504 pt->bytes += ah->size;
505 pt->blocks++;
506 /*debug("size: %lu, amount: %lu, blocks: %lu", ah->size, memory_amount, memory_blocks);*/
508 if (USE_HISTOGRAM)
509 increment_histogram(pt, ah->size, 1, ah->position);
510 mem_mutex_unlock(pt);
512 return AH_DATA(ah);
515 static attr_noinline void *debug_mem_realloc(void *ptr, size_t size, ajla_error_t *mayfail, position_t position)
517 size_t needed_size, padding;
518 unsigned char *result;
519 struct alloc_header *new_ah;
520 struct alloc_header *ah;
521 struct per_thread *pt;
523 if (unlikely(!ptr))
524 internal(position_string(position), "debug_mem_realloc(NULL, %"PRIuMAX")", (uintmax_t)size);
526 ah = AH_FROM_PTR(ptr);
527 verify_block(ah, VFY_UNALIGNED, position, "debug_mem_realloc");
528 if (USE_FILL && size < ah->size)
529 (void)memset(AH_DATA(ah) + size, get_memory_fill(), ah->size - size);
530 pt = mem_mutex_lock(ah);
531 AH_MAGIC(ah) = ALLOC_MAGIC_REALLOC;
532 padding = ah->padding;
533 needed_size = get_needed_size(size, padding);
534 result = cast_cpp(unsigned char *, alloc_should_fail(mayfail) ? NULL : heap_realloc(AH_MALLOC_BLOCK(ah), needed_size));
535 if (unlikely(!result)) {
536 AH_MAGIC(ah) = ALLOC_MAGIC;
537 mem_mutex_unlock(pt);
538 result = cast_cpp(unsigned char *, oom_calloc(needed_size, mayfail, position));
539 if (!result) {
540 if (size <= ah->size) {
541 ah->size = size;
542 if (USE_RED_ZONE)
543 AH_RED_ZONE(ah) = RED_ZONE;
544 return ptr;
546 return NULL;
548 pt = mem_mutex_lock(ah);
549 (void)memcpy(result + padding, ah, minimum(size, ah->size) + AH_SIZE);
550 AH_MAGIC(ah) = ALLOC_MAGIC_REALLOC;
551 heap_free(ah);
553 new_ah = cast_ptr(struct alloc_header *, result + padding);
554 AH_MAGIC(new_ah) = ALLOC_MAGIC;
555 if (USE_LIST) {
556 new_ah->entry.next->prev = &new_ah->entry;
557 new_ah->entry.prev->next = &new_ah->entry;
558 if (unlikely(pt->bytes < new_ah->size) || unlikely(!pt->blocks))
559 internal(file_line, "debug_mem_realloc: memory counters underflow: %"PRIuMAX", %"PRIuMAX", %"PRIuMAX"", (uintmax_t)pt->bytes, (uintmax_t)new_ah->size, (uintmax_t)pt->blocks);
560 pt->bytes -= new_ah->size;
561 if (unlikely(pt->bytes + size < pt->bytes))
562 internal(file_line, "debug_mem_realloc: memory counters overflow: %"PRIuMAX", %"PRIuMAX", %"PRIuMAX"", (uintmax_t)pt->bytes, (uintmax_t)size, (uintmax_t)pt->blocks);
563 pt->bytes += size;
565 new_ah->size = size;
566 if (USE_RED_ZONE)
567 AH_RED_ZONE(new_ah) = RED_ZONE;
568 if (USE_HISTOGRAM)
569 increment_histogram(pt, size, 1, new_ah->position);
570 mem_mutex_unlock(pt);
571 return AH_DATA(new_ah);
574 static attr_noinline void debug_mem_free(const void *ptr, unsigned vfy, position_t position)
576 struct alloc_header *ah;
578 if (unlikely(!ptr))
579 internal(position_string(position), "debug_mem_free(NULL)");
581 ah = AH_FROM_PTR(ptr);
582 verify_block(ah, vfy, position, "debug_mem_free");
583 if (USE_FILL && (!amalloc_enabled || !aptr_is_huge(AH_MALLOC_BLOCK(ah)))) {
584 unsigned char mf = get_memory_fill();
585 unsigned char *zero_p = AH_DATA(ah);
586 size_t zero_size = ah->size;
587 if (zero_size > sizeof(refcount_t) && (int8_t)mf >= -0x70) {
588 zero_p += sizeof(refcount_t);
589 zero_size -= sizeof(refcount_t);
590 #ifndef DEBUG_REFCOUNTS
591 refcount_init((refcount_t *)AH_DATA(ah));
592 #endif
594 (void)memset(zero_p, mf, zero_size);
596 if (USE_LIST | USE_HISTOGRAM) {
597 struct per_thread *pt;
598 pt = mem_mutex_lock(ah);
599 if (USE_LIST) {
600 list_del(&ah->entry);
601 if (unlikely(pt->bytes < ah->size) || unlikely(!pt->blocks))
602 internal(file_line, "debug_mem_free: memory counters underflow: %"PRIuMAX", %"PRIuMAX", %"PRIuMAX"", (uintmax_t)pt->bytes, (uintmax_t)ah->size, (uintmax_t)pt->blocks);
603 pt->bytes -= ah->size;
604 pt->blocks--;
606 mem_mutex_unlock(pt);
608 AH_MAGIC(ah) = ALLOC_MAGIC_FREE;
609 heap_free(AH_MALLOC_BLOCK(ah));
612 /* this should not be called concurrently */
613 static attr_noinline void debug_mem_set_position(const void *ptr, position_t position)
615 struct alloc_header *ah;
617 if (unlikely(!ptr))
618 internal(position_string(position), "debug_mem_set_position(NULL)");
620 ah = AH_FROM_PTR(ptr);
621 verify_block(ah, VFY_ANY, position, "debug_mem_set_position");
623 ah->position = position;
626 static attr_noinline const char *debug_mem_get_position(const void *ptr, position_t position)
628 struct alloc_header *ah;
630 if (unlikely(!ptr))
631 internal(position_string(position), "debug_mem_get_position(NULL)");
633 ah = AH_FROM_PTR(ptr);
634 verify_block(ah, VFY_ANY, position, "debug_mem_get_position");
636 return position_string(ah->position);
639 static attr_noinline void debug_mem_verify(const void *ptr, position_t position)
641 struct alloc_header *ah;
642 ah = AH_FROM_PTR(ptr);
643 verify_block(ah, VFY_UNALIGNED, position, "debug_mem_verify");
646 static attr_noinline void debug_mem_verify_aligned(const void *ptr, position_t position)
648 struct alloc_header *ah;
649 ah = AH_FROM_PTR(ptr);
650 verify_block(ah, VFY_ALIGNED, position, "debug_mem_verify_aligned");
653 #endif
655 #define verify_size \
656 do { \
657 if (sizeof(ptrdiff_t) < 8 && \
658 (unlikely(size != (size_t)(ptrdiff_t)size) || \
659 unlikely((ptrdiff_t)size < 0))) { \
660 fatal_mayfail(error_ajla(EC_ASYNC, AJLA_ERROR_SIZE_OVERFLOW), mayfail, "allocation size overflow: %"PRIuMAX" bytes", (uintmax_t)size);\
661 return NULL; \
663 } while (0)
665 void * attr_hot_fastcall mem_alloc_position(size_t size, ajla_error_t *mayfail argument_position)
667 void *new_ptr;
668 if (likely(mayfail != MEM_DONT_TRY_TO_FREE))
669 address_lock_verify();
670 verify_size;
671 #ifdef DEBUG_MEMORY_POSSIBLE
672 if (unlikely(memory_debug))
673 return debug_mem_alloc(size, 1, false, false, mayfail, position_arg);
674 #endif
675 new_ptr = alloc_should_fail(mayfail) ? NULL : heap_malloc(size);
676 if (unlikely(!new_ptr)) {
677 new_ptr = oom_calloc(size, mayfail, position_arg);
679 return new_ptr;
682 void * attr_hot_fastcall mem_calloc_position(size_t size, ajla_error_t *mayfail argument_position)
684 void *new_ptr;
685 if (likely(mayfail != MEM_DONT_TRY_TO_FREE))
686 address_lock_verify();
687 verify_size;
688 #ifdef DEBUG_MEMORY_POSSIBLE
689 if (unlikely(memory_debug))
690 return debug_mem_alloc(size, 1, false, true, mayfail, position_arg);
691 #endif
692 new_ptr = alloc_should_fail(mayfail) ? NULL : heap_calloc(size);
693 if (!new_ptr) {
694 new_ptr = oom_calloc(size, mayfail, position_arg);
696 return new_ptr;
700 void * attr_hot_fastcall mem_align_position(size_t size, size_t alignment, ajla_error_t *mayfail argument_position)
702 void *new_ptr;
703 if (likely(mayfail != MEM_DONT_TRY_TO_FREE))
704 address_lock_verify();
705 verify_size;
706 #ifdef DEBUG_MEMORY_POSSIBLE
707 if (unlikely(memory_debug))
708 return debug_mem_alloc(size, alignment, true, false, mayfail, position_arg);
709 #endif
710 new_ptr = alloc_should_fail(mayfail) ? NULL : heap_memalign(alignment, size);
711 if (unlikely(!new_ptr)) {
712 new_ptr = oom_cmemalign(size, alignment, mayfail, position_arg);
714 return new_ptr;
717 void * attr_hot_fastcall mem_calign_position(size_t size, size_t alignment, ajla_error_t *mayfail argument_position)
719 void *new_ptr;
720 if (likely(mayfail != MEM_DONT_TRY_TO_FREE))
721 address_lock_verify();
722 verify_size;
723 #ifdef DEBUG_MEMORY_POSSIBLE
724 if (unlikely(memory_debug))
725 return debug_mem_alloc(size, alignment, true, true, mayfail, position_arg);
726 #endif
727 new_ptr = alloc_should_fail(mayfail) ? NULL : heap_cmemalign(alignment, size);
728 if (unlikely(!new_ptr)) {
729 new_ptr = oom_cmemalign(size, alignment, mayfail, position_arg);
731 return new_ptr;
734 void * attr_hot_fastcall mem_realloc_position(void *ptr, size_t size, ajla_error_t *mayfail argument_position)
736 void *new_ptr;
737 if (likely(mayfail != MEM_DONT_TRY_TO_FREE))
738 address_lock_verify();
739 verify_size;
740 #ifdef DEBUG_MEMORY_POSSIBLE
741 if (unlikely(memory_debug))
742 return debug_mem_realloc(ptr, size, mayfail, position_arg);
743 #endif
744 if (unlikely(!size)) {
745 new_ptr = mem_alloc_position(0, mayfail pass_position);
746 if (likely(new_ptr != NULL))
747 mem_free_position(ptr pass_position);
748 return new_ptr;
750 new_ptr = alloc_should_fail(mayfail) ? NULL : heap_realloc(ptr, size);
751 if (!new_ptr) {
752 new_ptr = oom_realloc(ptr, size, mayfail, position_arg);
754 return new_ptr;
757 void attr_hot_fastcall mem_free_position(const void *ptr argument_position)
759 #ifdef DEBUG_MEMORY_POSSIBLE
760 if (unlikely(memory_debug)) {
761 debug_mem_free(ptr, VFY_UNALIGNED, position_arg);
762 return;
764 #endif
765 heap_free((void *)ptr);
768 void attr_hot_fastcall mem_free_aligned_position(const void *ptr argument_position)
770 #ifdef DEBUG_MEMORY_POSSIBLE
771 if (unlikely(memory_debug)) {
772 debug_mem_free(ptr, VFY_ALIGNED, position_arg);
773 return;
775 #endif
776 heap_free_aligned((void *)ptr);
779 #ifdef DEBUG_MEMORY_POSSIBLE
780 void attr_fastcall mem_set_position(const void *ptr argument_position)
782 if (unlikely(memory_debug)) {
783 debug_mem_set_position(ptr, position_arg);
786 const char * attr_fastcall mem_get_position(const void *ptr argument_position)
788 if (unlikely(memory_debug)) {
789 return debug_mem_get_position(ptr, position_arg);
791 return "unknown position";
793 void attr_fastcall mem_verify_position(const void *ptr argument_position)
795 if (unlikely(memory_debug)) {
796 debug_mem_verify(ptr, position_arg);
799 void attr_fastcall mem_verify_aligned_position(const void *ptr argument_position)
801 if (unlikely(memory_debug)) {
802 debug_mem_verify_aligned(ptr, position_arg);
805 #endif
807 bool attr_cold mem_trim_cache(void)
809 /* !!! TODO */
810 return false;
813 #ifdef DEBUG_MEMORY_POSSIBLE
814 static mutex_t mem_report_mutex;
816 struct memory_entry {
817 position_t position;
818 size_t size;
819 uintptr_t cumulative_size;
820 uintptr_t n_blocks;
823 static bool attr_cold add_memory_entry(struct memory_entry **me, size_t *me_l, struct alloc_header *ah)
825 if (unlikely(!(*me_l & (*me_l - 1)))) {
826 struct memory_entry *m;
827 size_t ns = !*me_l ? 1 : *me_l * 2;
828 if (unlikely(!ns) || ns > (size_t)-1 / sizeof(struct alloc_header))
829 return false;
830 m = heap_realloc(*me, ns * sizeof(struct alloc_header));
831 if (unlikely(!m))
832 return false;
833 *me = m;
835 (*me)[*me_l].position = ah->position;
836 (*me)[*me_l].size = ah->size;
837 (*me)[*me_l].cumulative_size = ah->size;
838 (*me)[*me_l].n_blocks = 1;
839 (*me_l)++;
840 return true;
843 static bool attr_cold add_memory_entries(struct memory_entry **me, size_t *me_l, struct per_thread *pt)
845 struct list *l;
846 list_for_each(l, &pt->block_list) {
847 struct alloc_header *ah = get_struct(l, struct alloc_header, entry);
848 if (unlikely(!add_memory_entry(me, me_l, ah)))
849 return false;
851 return true;
854 static int attr_cold mem_compare_file_line(const void *me1_, const void *me2_)
856 const struct memory_entry *me1 = me1_;
857 const struct memory_entry *me2 = me2_;
858 const char *p1 = position_string_alloc(me1->position);
859 const char *p2 = position_string_alloc(me2->position);
860 #ifdef HAVE_STRVERSCMP
861 int c = strverscmp(p1, p2);
862 #else
863 int c = strcmp(p1, p2);
864 #endif
865 position_string_free(p1);
866 position_string_free(p2);
867 return c;
870 static int attr_cold mem_compare_cumulative_size(const void *me1_, const void *me2_)
872 const struct memory_entry *me1 = me1_;
873 const struct memory_entry *me2 = me2_;
874 if (me1->cumulative_size < me2->cumulative_size)
875 return 1;
876 if (me1->cumulative_size > me2->cumulative_size)
877 return -1;
878 if (me1->n_blocks < me2->n_blocks)
879 return 1;
880 if (me1->n_blocks > me2->n_blocks)
881 return -1;
882 return mem_compare_file_line(me1, me2);
885 void attr_cold mem_report_usage(int mode, const char *string)
887 struct memory_entry *me;
888 size_t me_l, me_l2, mr;
889 uintptr_t total;
890 bool ok;
891 #ifndef THREAD_NONE
892 struct list *l;
893 #endif
894 size_t max_ps, max_digits;
896 if (!USE_LIST) {
897 warning("memory list not available, use --debug=leak");
898 return;
901 if (memory_threads_initialized) mutex_lock(&mem_report_mutex);
903 me_l = 0;
904 me = heap_malloc(1);
905 if (!me)
906 goto oom;
908 if (memory_threads_initialized) mutex_lock(&thread1.mutex);
909 ok = add_memory_entries(&me, &me_l, &thread1);
910 #ifndef THREAD_NONE
911 list_for_each(l, &thread1.used_list) {
912 struct per_thread *pt = get_struct(l, struct per_thread, used_list);
913 if (memory_threads_initialized) mutex_lock(&pt->mutex);
914 if (ok) ok = add_memory_entries(&me, &me_l, pt);
915 if (memory_threads_initialized) mutex_unlock(&pt->mutex);
917 list_for_each(l, &thread1.free_list) {
918 struct per_thread *pt = get_struct(l, struct per_thread, free_list);
919 if (memory_threads_initialized) mutex_lock(&pt->mutex);
920 if (ok) ok = add_memory_entries(&me, &me_l, pt);
921 if (memory_threads_initialized) mutex_unlock(&pt->mutex);
923 #endif
924 if (memory_threads_initialized) mutex_unlock(&thread1.mutex);
925 if (unlikely(!ok))
926 goto oom;
928 total = 0;
929 for (mr = 0; mr < me_l; mr++)
930 total += me[mr].cumulative_size;
932 debug("allocated memory%s%s: %"PRIuMAX" / %"PRIuMAX" = %"PRIuMAX"", *string ? " at " : "", string, (uintmax_t)total, (uintmax_t)me_l, (uintmax_t)(total / (me_l ? me_l : 1)));
934 if (mode == MR_SUMMARY) {
935 goto free_ret;
936 } else if (mode == MR_MOST_ALLOCATED) {
937 qsort(me, me_l, sizeof(struct memory_entry), mem_compare_file_line);
938 me_l2 = 0;
939 for (mr = 0; mr < me_l; mr++) {
940 me[me_l2] = me[mr];
941 while (mr + 1 < me_l && !mem_compare_file_line(&me[mr], &me[mr + 1])) {
942 mr++;
943 me[me_l2].cumulative_size += me[mr].size;
944 me[me_l2].n_blocks++;
946 me_l2++;
948 } else if (mode == MR_LARGEST_BLOCKS) {
949 me_l2 = me_l;
950 } else {
951 internal(file_line, "mem_report_usage: invalid mode %d", mode);
953 qsort(me, me_l2, sizeof(struct memory_entry), mem_compare_cumulative_size);
955 max_ps = 0;
956 for (mr = 0; mr < me_l2; mr++) {
957 const char *ps = position_string_alloc(me[mr].position);
958 size_t psl = strlen(ps);
959 position_string_free(ps);
960 if (psl > max_ps)
961 max_ps = psl;
963 if (me_l2) {
964 char *max_str = str_from_unsigned(me[0].cumulative_size, 10);
965 max_digits = strlen(max_str);
966 mem_free(max_str);
967 } else {
968 max_digits = 0;
971 for (mr = 0; mr < me_l2; mr++) {
972 const char *ps;
973 char *s;
974 size_t sl, psl;
975 str_init(&s, &sl);
976 ps = position_string_alloc(me[mr].position);
977 str_add_string(&s, &sl, ps);
978 position_string_free(ps);
979 ps = str_from_unsigned(me[mr].cumulative_size, 10);
980 psl = strlen(ps);
981 while (sl < max_ps + 1 + (max_digits - psl))
982 str_add_char(&s, &sl, ' ');
983 str_add_string(&s, &sl, ps);
984 mem_free(ps);
985 if (mode == MR_MOST_ALLOCATED) {
986 str_add_bytes(&s, &sl, " / ", 3);
987 str_add_unsigned(&s, &sl, me[mr].n_blocks, 10);
988 str_add_bytes(&s, &sl, " = ", 3);
989 str_add_unsigned(&s, &sl, me[mr].cumulative_size / me[mr].n_blocks, 10);
990 } else if (mode == MR_LARGEST_BLOCKS) {
991 size_t mq;
992 for (mq = mr + 1; mq < me_l2; mq++) {
993 if (mem_compare_file_line(&me[mr], &me[mq]))
994 break;
995 if (me[mr].cumulative_size != me[mq].cumulative_size)
996 break;
998 if (mq > mr + 1) {
999 str_add_bytes(&s, &sl, " x ", 3);
1000 str_add_unsigned(&s, &sl, mq - mr, 10);
1002 mr = mq - 1;
1004 str_finish(&s, &sl);
1005 debug("%s", s);
1006 mem_free(s);
1009 free_ret:
1010 if (memory_threads_initialized) mutex_unlock(&mem_report_mutex);
1011 heap_free(me);
1012 return;
1014 oom:
1015 if (memory_threads_initialized) mutex_unlock(&mem_report_mutex);
1016 if (me) heap_free(me);
1017 warning("out of memory for memory list, allocated size %"PRIuMAX"", (uintmax_t)me_l);
1019 #endif
1021 #ifdef DEBUG_MEMORY_POSSIBLE
1022 static attr_noreturn attr_cold mem_dump_leaks(void)
1024 struct list leaked_list;
1025 struct list *lv;
1026 char *s;
1027 size_t sl;
1028 const char *head = "memory leak: ";
1029 size_t strlen_head = strlen(head);
1030 const char *first_pos = file_line;
1031 uintmax_t n_blocks = 0;
1032 uintmax_t n_bytes = 0;
1034 list_take(&leaked_list, &thread1.block_list);
1035 str_init(&s, &sl);
1037 list_for_each_back(lv, &leaked_list) {
1038 struct alloc_header *ah;
1039 const char *pos_str;
1040 char *t;
1041 size_t tl;
1043 ah = get_struct(lv, struct alloc_header, entry);
1044 pos_str = position_string(ah->position);
1046 str_init(&t, &tl);
1047 str_add_unsigned(&t, &tl, ptr_to_num((char *)ah + AH_SIZE), 16);
1048 str_add_string(&t, &tl, ":");
1049 str_add_unsigned(&t, &tl, ah->size, 10);
1050 str_add_string(&t, &tl, " @ ");
1051 str_add_string(&t, &tl, pos_str);
1052 str_finish(&t, &tl);
1054 if (sl && strlen_head + sl + 2 + tl > 174 - 15) {
1055 str_finish(&s, &sl);
1056 debug("memory leak: %s", s);
1057 mem_free(s);
1058 str_init(&s, &sl);
1061 if (sl) str_add_string(&s, &sl, ", ");
1062 else first_pos = pos_str;
1063 str_add_string(&s, &sl, t);
1064 mem_free(t);
1066 n_blocks++;
1067 n_bytes += ah->size;
1070 str_finish(&s, &sl);
1072 internal(first_pos, "memory leak (%"PRIuMAX" blocks, %"PRIuMAX" bytes): %s", n_blocks, n_bytes, s);
1074 #endif
1076 bool mem_enable_debugging_option(const char *option, size_t l)
1078 #ifndef DEBUG_MEMORY_POSSIBLE
1079 int memory_debug = 0;
1080 #endif
1081 if (!option)
1082 memory_debug |= MEMORY_DEBUG_MAGIC | MEMORY_DEBUG_REDZONE | MEMORY_DEBUG_FILL | MEMORY_DEBUG_TRACK_BLOCKS;
1083 else if (l == 5 && !strncmp(option, "magic", l))
1084 memory_debug |= MEMORY_DEBUG_MAGIC;
1085 else if (l == 7 && !strncmp(option, "redzone", l))
1086 memory_debug |= MEMORY_DEBUG_REDZONE;
1087 else if (l == 4 && !strncmp(option, "fill", l))
1088 memory_debug |= MEMORY_DEBUG_FILL;
1089 else if (l == 4 && !strncmp(option, "leak", l))
1090 memory_debug |= MEMORY_DEBUG_TRACK_BLOCKS;
1091 else if (l == 6 && !strncmp(option, "memory", l))
1092 memory_debug |= MEMORY_DEBUG_MAGIC | MEMORY_DEBUG_REDZONE | MEMORY_DEBUG_FILL | MEMORY_DEBUG_TRACK_BLOCKS;
1093 else
1094 return false;
1095 return true;
1098 bool mem_al_enable_profile(const char *option, size_t l)
1100 #ifndef DEBUG_MEMORY_POSSIBLE
1101 int memory_debug = 0;
1102 #endif
1103 if (!option)
1104 memory_debug |= MEMORY_DEBUG_HISTOGRAM;
1105 else if (l == 6 && !strncmp(option, "memory", l))
1106 memory_debug |= MEMORY_DEBUG_HISTOGRAM;
1107 else
1108 return false;
1109 return true;
1112 void mem_al_set_ptrcomp(const char attr_unused *str)
1114 #ifdef POINTER_COMPRESSION_POSSIBLE
1115 pointer_compression_enabled = 1;
1116 #endif
1119 void mem_al_set_system_malloc(const char attr_unused *str)
1121 #ifdef USE_AMALLOC
1122 amalloc_enabled = 0;
1123 #endif
1126 void mem_init(void)
1128 #if defined(POINTER_COMPRESSION_POSSIBLE) && defined(USE_AMALLOC)
1129 if (pointer_compression_enabled && !amalloc_enabled)
1130 fatal("The options --ptrcomp and --system-malloc are not compatible");
1131 #endif
1132 #ifdef DEBUG_MEMORY_POSSIBLE
1133 if (USE_LIST | USE_HISTOGRAM) {
1134 mem_per_thread_init(&thread1);
1136 memory_threads_initialized = false;
1137 /*if (memory_debug & MEMORY_DEBUG_REDZONE && dl_sym("EF_Abort", NULL)) {
1138 debug("Electric Fence detected, disabling red zone");
1139 memory_debug &= ~MEMORY_DEBUG_REDZONE;
1141 #ifndef THREAD_NONE
1142 list_init(&thread1.used_list);
1143 #endif
1144 #endif
1147 void mem_init_multithreaded(void)
1149 #ifdef DEBUG_MEMORY_POSSIBLE
1150 if (unlikely(memory_threads_initialized))
1151 internal(file_line, "mem_init_multithreaded: memory_threads_initialized already set");
1152 if (USE_LIST | USE_HISTOGRAM) {
1153 mutex_init(&thread1.mutex);
1154 mutex_init(&mem_report_mutex);
1155 tls_init(unsigned char, memory_fill);
1156 #ifndef THREAD_NONE
1157 list_init(&thread1.free_list);
1158 tls_init(struct per_thread *, mem_per_thread);
1159 tls_set(struct per_thread *, mem_per_thread, &thread1);
1160 #endif
1161 memory_threads_initialized = true;
1163 #endif
1166 void mem_done_multithreaded(void)
1168 #ifdef DEBUG_MEMORY_POSSIBLE
1169 if (unlikely(!!memory_threads_initialized != !!(USE_LIST | USE_HISTOGRAM)))
1170 internal(file_line, "mem_done_multithreaded: memory_threads_initialized %sset", memory_threads_initialized ? "" : "not ");
1171 if (USE_LIST | USE_HISTOGRAM) {
1172 memory_threads_initialized = false;
1173 #ifndef THREAD_NONE
1174 tls_done(struct per_thread *, mem_per_thread);
1175 while (!list_is_empty(&thread1.free_list)) {
1176 struct per_thread *pt = get_struct(thread1.free_list.next, struct per_thread, free_list);
1177 list_del(&pt->free_list);
1178 mem_per_thread_free(pt);
1179 /* { static unsigned x = 0; debug("freeing per_thread: %u", ++x); } */
1181 if (!list_is_empty(&thread1.used_list)) {
1182 internal(file_line, "mem_done_multithreaded: used_list is not empty");
1184 #endif
1185 tls_done(unsigned char, memory_fill);
1186 mutex_done(&mem_report_mutex);
1187 mutex_done(&thread1.mutex);
1189 #endif
1192 void mem_done(void)
1194 #ifdef DEBUG_MEMORY_POSSIBLE
1195 if (unlikely(memory_threads_initialized))
1196 internal(file_line, "mem_done: memory_threads_initialized set");
1197 if (USE_LIST) {
1198 if (unlikely(!list_is_empty(&thread1.block_list)))
1199 mem_dump_leaks();
1200 if (unlikely(thread1.bytes != 0) || unlikely(thread1.blocks != 0))
1201 internal(file_line, "mem_done: memory counters leaked: %"PRIuMAX", %"PRIuMAX"", (uintmax_t)thread1.bytes, (uintmax_t)thread1.blocks);
1203 if (USE_HISTOGRAM) {
1204 size_t i;
1205 for (i = 0; i < thread1.histogram_size; i++)
1206 if (unlikely(thread1.histogram[i].cnt != 0))
1207 debug("%"PRIuMAX"(%"PRIxMAX") : %"PRIuMAX"\t\t%s", (uintmax_t)i, (uintmax_t)i, thread1.histogram[i].cnt, position_string(thread1.histogram[i].position));
1208 heap_free(thread1.histogram);
1209 thread1.histogram = NULL;
1211 #endif