[memprof] Upgrade a unit test to MemProf Version 3 (#117063)
[llvm-project.git] / openmp / runtime / src / kmp_alloc.cpp
blobfb1b0eb5f0fe5a9c69484b7af1543eb3e001501d
1 /*
2 * kmp_alloc.cpp -- private/shared dynamic memory allocation and management
3 */
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11 //===----------------------------------------------------------------------===//
13 #include "kmp.h"
14 #include "kmp_io.h"
15 #include "kmp_wrapper_malloc.h"
17 // Disable bget when it is not used
18 #if KMP_USE_BGET
20 /* Thread private buffer management code */
22 typedef int (*bget_compact_t)(size_t, int);
23 typedef void *(*bget_acquire_t)(size_t);
24 typedef void (*bget_release_t)(void *);
26 /* NOTE: bufsize must be a signed datatype */
28 #if KMP_OS_WINDOWS
29 #if KMP_ARCH_X86 || KMP_ARCH_ARM
30 typedef kmp_int32 bufsize;
31 #else
32 typedef kmp_int64 bufsize;
33 #endif
34 #else
35 typedef ssize_t bufsize;
36 #endif // KMP_OS_WINDOWS
38 /* The three modes of operation are, fifo search, lifo search, and best-fit */
40 typedef enum bget_mode {
41 bget_mode_fifo = 0,
42 bget_mode_lifo = 1,
43 bget_mode_best = 2
44 } bget_mode_t;
46 static void bpool(kmp_info_t *th, void *buffer, bufsize len);
47 static void *bget(kmp_info_t *th, bufsize size);
48 static void *bgetz(kmp_info_t *th, bufsize size);
49 static void *bgetr(kmp_info_t *th, void *buffer, bufsize newsize);
50 static void brel(kmp_info_t *th, void *buf);
51 static void bectl(kmp_info_t *th, bget_compact_t compact,
52 bget_acquire_t acquire, bget_release_t release,
53 bufsize pool_incr);
55 /* BGET CONFIGURATION */
56 /* Buffer allocation size quantum: all buffers allocated are a
57 multiple of this size. This MUST be a power of two. */
59 /* On IA-32 architecture with Linux* OS, malloc() does not
60 ensure 16 byte alignment */
62 #if KMP_ARCH_X86 || !KMP_HAVE_QUAD
64 #define SizeQuant 8
65 #define AlignType double
67 #else
69 #define SizeQuant 16
70 #define AlignType _Quad
72 #endif
74 // Define this symbol to enable the bstats() function which calculates the
75 // total free space in the buffer pool, the largest available buffer, and the
76 // total space currently allocated.
77 #define BufStats 1
79 #ifdef KMP_DEBUG
81 // Define this symbol to enable the bpoold() function which dumps the buffers
82 // in a buffer pool.
83 #define BufDump 1
85 // Define this symbol to enable the bpoolv() function for validating a buffer
86 // pool.
87 #define BufValid 1
89 // Define this symbol to enable the bufdump() function which allows dumping the
90 // contents of an allocated or free buffer.
91 #define DumpData 1
93 #ifdef NOT_USED_NOW
95 // Wipe free buffers to a guaranteed pattern of garbage to trip up miscreants
96 // who attempt to use pointers into released buffers.
97 #define FreeWipe 1
99 // Use a best fit algorithm when searching for space for an allocation request.
100 // This uses memory more efficiently, but allocation will be much slower.
101 #define BestFit 1
103 #endif /* NOT_USED_NOW */
104 #endif /* KMP_DEBUG */
106 static bufsize bget_bin_size[] = {
108 // 1 << 6, /* .5 Cache line */
109 1 << 7, /* 1 Cache line, new */
110 1 << 8, /* 2 Cache lines */
111 1 << 9, /* 4 Cache lines, new */
112 1 << 10, /* 8 Cache lines */
113 1 << 11, /* 16 Cache lines, new */
114 1 << 12, 1 << 13, /* new */
115 1 << 14, 1 << 15, /* new */
116 1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20, /* 1MB */
117 1 << 21, /* 2MB */
118 1 << 22, /* 4MB */
119 1 << 23, /* 8MB */
120 1 << 24, /* 16MB */
121 1 << 25, /* 32MB */
124 #define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize))
126 struct bfhead;
128 // Declare the interface, including the requested buffer size type, bufsize.
130 /* Queue links */
131 typedef struct qlinks {
132 struct bfhead *flink; /* Forward link */
133 struct bfhead *blink; /* Backward link */
134 } qlinks_t;
136 /* Header in allocated and free buffers */
137 typedef struct bhead2 {
138 kmp_info_t *bthr; /* The thread which owns the buffer pool */
139 bufsize prevfree; /* Relative link back to previous free buffer in memory or
140 0 if previous buffer is allocated. */
141 bufsize bsize; /* Buffer size: positive if free, negative if allocated. */
142 } bhead2_t;
144 /* Make sure the bhead structure is a multiple of SizeQuant in size. */
145 typedef union bhead {
146 KMP_ALIGN(SizeQuant)
147 AlignType b_align;
148 char b_pad[sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant))];
149 bhead2_t bb;
150 } bhead_t;
151 #define BH(p) ((bhead_t *)(p))
153 /* Header in directly allocated buffers (by acqfcn) */
154 typedef struct bdhead {
155 bufsize tsize; /* Total size, including overhead */
156 bhead_t bh; /* Common header */
157 } bdhead_t;
158 #define BDH(p) ((bdhead_t *)(p))
160 /* Header in free buffers */
161 typedef struct bfhead {
162 bhead_t bh; /* Common allocated/free header */
163 qlinks_t ql; /* Links on free list */
164 } bfhead_t;
165 #define BFH(p) ((bfhead_t *)(p))
167 typedef struct thr_data {
168 bfhead_t freelist[MAX_BGET_BINS];
169 #if BufStats
170 size_t totalloc; /* Total space currently allocated */
171 long numget, numrel; /* Number of bget() and brel() calls */
172 long numpblk; /* Number of pool blocks */
173 long numpget, numprel; /* Number of block gets and rels */
174 long numdget, numdrel; /* Number of direct gets and rels */
175 #endif /* BufStats */
177 /* Automatic expansion block management functions */
178 bget_compact_t compfcn;
179 bget_acquire_t acqfcn;
180 bget_release_t relfcn;
182 bget_mode_t mode; /* what allocation mode to use? */
184 bufsize exp_incr; /* Expansion block size */
185 bufsize pool_len; /* 0: no bpool calls have been made
186 -1: not all pool blocks are the same size
187 >0: (common) block size for all bpool calls made so far
189 bfhead_t *last_pool; /* Last pool owned by this thread (delay deallocation) */
190 } thr_data_t;
192 /* Minimum allocation quantum: */
193 #define QLSize (sizeof(qlinks_t))
194 #define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize)
195 #define MaxSize \
196 (bufsize)( \
197 ~(((bufsize)(1) << (sizeof(bufsize) * CHAR_BIT - 1)) | (SizeQuant - 1)))
198 // Maximum for the requested size.
200 /* End sentinel: value placed in bsize field of dummy block delimiting
201 end of pool block. The most negative number which will fit in a
202 bufsize, defined in a way that the compiler will accept. */
204 #define ESent \
205 ((bufsize)(-(((((bufsize)1) << ((int)sizeof(bufsize) * 8 - 2)) - 1) * 2) - 2))
207 /* Thread Data management routines */
208 static int bget_get_bin(bufsize size) {
209 // binary chop bins
210 int lo = 0, hi = MAX_BGET_BINS - 1;
212 KMP_DEBUG_ASSERT(size > 0);
214 while ((hi - lo) > 1) {
215 int mid = (lo + hi) >> 1;
216 if (size < bget_bin_size[mid])
217 hi = mid - 1;
218 else
219 lo = mid;
222 KMP_DEBUG_ASSERT((lo >= 0) && (lo < MAX_BGET_BINS));
224 return lo;
227 static void set_thr_data(kmp_info_t *th) {
228 int i;
229 thr_data_t *data;
231 data = (thr_data_t *)((!th->th.th_local.bget_data)
232 ? __kmp_allocate(sizeof(*data))
233 : th->th.th_local.bget_data);
235 memset(data, '\0', sizeof(*data));
237 for (i = 0; i < MAX_BGET_BINS; ++i) {
238 data->freelist[i].ql.flink = &data->freelist[i];
239 data->freelist[i].ql.blink = &data->freelist[i];
242 th->th.th_local.bget_data = data;
243 th->th.th_local.bget_list = 0;
244 #if !USE_CMP_XCHG_FOR_BGET
245 #ifdef USE_QUEUING_LOCK_FOR_BGET
246 __kmp_init_lock(&th->th.th_local.bget_lock);
247 #else
248 __kmp_init_bootstrap_lock(&th->th.th_local.bget_lock);
249 #endif /* USE_LOCK_FOR_BGET */
250 #endif /* ! USE_CMP_XCHG_FOR_BGET */
253 static thr_data_t *get_thr_data(kmp_info_t *th) {
254 thr_data_t *data;
256 data = (thr_data_t *)th->th.th_local.bget_data;
258 KMP_DEBUG_ASSERT(data != 0);
260 return data;
263 /* Walk the free list and release the enqueued buffers */
264 static void __kmp_bget_dequeue(kmp_info_t *th) {
265 void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
267 if (p != 0) {
268 #if USE_CMP_XCHG_FOR_BGET
270 volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
271 while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
272 CCAST(void *, old_value), nullptr)) {
273 KMP_CPU_PAUSE();
274 old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
276 p = CCAST(void *, old_value);
278 #else /* ! USE_CMP_XCHG_FOR_BGET */
279 #ifdef USE_QUEUING_LOCK_FOR_BGET
280 __kmp_acquire_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
281 #else
282 __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
283 #endif /* USE_QUEUING_LOCK_FOR_BGET */
285 p = (void *)th->th.th_local.bget_list;
286 th->th.th_local.bget_list = 0;
288 #ifdef USE_QUEUING_LOCK_FOR_BGET
289 __kmp_release_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
290 #else
291 __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
292 #endif
293 #endif /* USE_CMP_XCHG_FOR_BGET */
295 /* Check again to make sure the list is not empty */
296 while (p != 0) {
297 void *buf = p;
298 bfhead_t *b = BFH(((char *)p) - sizeof(bhead_t));
300 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
301 KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
302 (kmp_uintptr_t)th); // clear possible mark
303 KMP_DEBUG_ASSERT(b->ql.blink == 0);
305 p = (void *)b->ql.flink;
307 brel(th, buf);
312 /* Chain together the free buffers by using the thread owner field */
313 static void __kmp_bget_enqueue(kmp_info_t *th, void *buf
314 #ifdef USE_QUEUING_LOCK_FOR_BGET
316 kmp_int32 rel_gtid
317 #endif
319 bfhead_t *b = BFH(((char *)buf) - sizeof(bhead_t));
321 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
322 KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
323 (kmp_uintptr_t)th); // clear possible mark
325 b->ql.blink = 0;
327 KC_TRACE(10, ("__kmp_bget_enqueue: moving buffer to T#%d list\n",
328 __kmp_gtid_from_thread(th)));
330 #if USE_CMP_XCHG_FOR_BGET
332 volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
333 /* the next pointer must be set before setting bget_list to buf to avoid
334 exposing a broken list to other threads, even for an instant. */
335 b->ql.flink = BFH(CCAST(void *, old_value));
337 while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
338 CCAST(void *, old_value), buf)) {
339 KMP_CPU_PAUSE();
340 old_value = TCR_PTR(th->th.th_local.bget_list);
341 /* the next pointer must be set before setting bget_list to buf to avoid
342 exposing a broken list to other threads, even for an instant. */
343 b->ql.flink = BFH(CCAST(void *, old_value));
346 #else /* ! USE_CMP_XCHG_FOR_BGET */
347 #ifdef USE_QUEUING_LOCK_FOR_BGET
348 __kmp_acquire_lock(&th->th.th_local.bget_lock, rel_gtid);
349 #else
350 __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
351 #endif
353 b->ql.flink = BFH(th->th.th_local.bget_list);
354 th->th.th_local.bget_list = (void *)buf;
356 #ifdef USE_QUEUING_LOCK_FOR_BGET
357 __kmp_release_lock(&th->th.th_local.bget_lock, rel_gtid);
358 #else
359 __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
360 #endif
361 #endif /* USE_CMP_XCHG_FOR_BGET */
364 /* insert buffer back onto a new freelist */
365 static void __kmp_bget_insert_into_freelist(thr_data_t *thr, bfhead_t *b) {
366 int bin;
368 KMP_DEBUG_ASSERT(((size_t)b) % SizeQuant == 0);
369 KMP_DEBUG_ASSERT(b->bh.bb.bsize % SizeQuant == 0);
371 bin = bget_get_bin(b->bh.bb.bsize);
373 KMP_DEBUG_ASSERT(thr->freelist[bin].ql.blink->ql.flink ==
374 &thr->freelist[bin]);
375 KMP_DEBUG_ASSERT(thr->freelist[bin].ql.flink->ql.blink ==
376 &thr->freelist[bin]);
378 b->ql.flink = &thr->freelist[bin];
379 b->ql.blink = thr->freelist[bin].ql.blink;
381 thr->freelist[bin].ql.blink = b;
382 b->ql.blink->ql.flink = b;
385 /* unlink the buffer from the old freelist */
386 static void __kmp_bget_remove_from_freelist(bfhead_t *b) {
387 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
388 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
390 b->ql.blink->ql.flink = b->ql.flink;
391 b->ql.flink->ql.blink = b->ql.blink;
394 /* GET STATS -- check info on free list */
395 static void bcheck(kmp_info_t *th, bufsize *max_free, bufsize *total_free) {
396 thr_data_t *thr = get_thr_data(th);
397 int bin;
399 *total_free = *max_free = 0;
401 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
402 bfhead_t *b, *best;
404 best = &thr->freelist[bin];
405 b = best->ql.flink;
407 while (b != &thr->freelist[bin]) {
408 *total_free += (b->bh.bb.bsize - sizeof(bhead_t));
409 if ((best == &thr->freelist[bin]) || (b->bh.bb.bsize < best->bh.bb.bsize))
410 best = b;
412 /* Link to next buffer */
413 b = b->ql.flink;
416 if (*max_free < best->bh.bb.bsize)
417 *max_free = best->bh.bb.bsize;
420 if (*max_free > (bufsize)sizeof(bhead_t))
421 *max_free -= sizeof(bhead_t);
424 /* BGET -- Allocate a buffer. */
425 static void *bget(kmp_info_t *th, bufsize requested_size) {
426 thr_data_t *thr = get_thr_data(th);
427 bufsize size = requested_size;
428 bfhead_t *b;
429 void *buf;
430 int compactseq = 0;
431 int use_blink = 0;
432 /* For BestFit */
433 bfhead_t *best;
435 if (size < 0 || size + sizeof(bhead_t) > MaxSize) {
436 return NULL;
439 __kmp_bget_dequeue(th); /* Release any queued buffers */
441 if (size < (bufsize)SizeQ) { // Need at least room for the queue links.
442 size = SizeQ;
444 #if defined(SizeQuant) && (SizeQuant > 1)
445 size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
446 #endif
448 size += sizeof(bhead_t); // Add overhead in allocated buffer to size required.
449 KMP_DEBUG_ASSERT(size >= 0);
450 KMP_DEBUG_ASSERT(size % SizeQuant == 0);
452 use_blink = (thr->mode == bget_mode_lifo);
454 /* If a compact function was provided in the call to bectl(), wrap
455 a loop around the allocation process to allow compaction to
456 intervene in case we don't find a suitable buffer in the chain. */
458 for (;;) {
459 int bin;
461 for (bin = bget_get_bin(size); bin < MAX_BGET_BINS; ++bin) {
462 /* Link to next buffer */
463 b = (use_blink ? thr->freelist[bin].ql.blink
464 : thr->freelist[bin].ql.flink);
466 if (thr->mode == bget_mode_best) {
467 best = &thr->freelist[bin];
469 /* Scan the free list searching for the first buffer big enough
470 to hold the requested size buffer. */
471 while (b != &thr->freelist[bin]) {
472 if (b->bh.bb.bsize >= (bufsize)size) {
473 if ((best == &thr->freelist[bin]) ||
474 (b->bh.bb.bsize < best->bh.bb.bsize)) {
475 best = b;
479 /* Link to next buffer */
480 b = (use_blink ? b->ql.blink : b->ql.flink);
482 b = best;
485 while (b != &thr->freelist[bin]) {
486 if ((bufsize)b->bh.bb.bsize >= (bufsize)size) {
488 // Buffer is big enough to satisfy the request. Allocate it to the
489 // caller. We must decide whether the buffer is large enough to split
490 // into the part given to the caller and a free buffer that remains
491 // on the free list, or whether the entire buffer should be removed
492 // from the free list and given to the caller in its entirety. We
493 // only split the buffer if enough room remains for a header plus the
494 // minimum quantum of allocation.
495 if ((b->bh.bb.bsize - (bufsize)size) >
496 (bufsize)(SizeQ + (sizeof(bhead_t)))) {
497 bhead_t *ba, *bn;
499 ba = BH(((char *)b) + (b->bh.bb.bsize - (bufsize)size));
500 bn = BH(((char *)ba) + size);
502 KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
504 /* Subtract size from length of free block. */
505 b->bh.bb.bsize -= (bufsize)size;
507 /* Link allocated buffer to the previous free buffer. */
508 ba->bb.prevfree = b->bh.bb.bsize;
510 /* Plug negative size into user buffer. */
511 ba->bb.bsize = -size;
513 /* Mark this buffer as owned by this thread. */
514 TCW_PTR(ba->bb.bthr,
515 th); // not an allocated address (do not mark it)
516 /* Mark buffer after this one not preceded by free block. */
517 bn->bb.prevfree = 0;
519 // unlink buffer from old freelist, and reinsert into new freelist
520 __kmp_bget_remove_from_freelist(b);
521 __kmp_bget_insert_into_freelist(thr, b);
522 #if BufStats
523 thr->totalloc += (size_t)size;
524 thr->numget++; /* Increment number of bget() calls */
525 #endif
526 buf = (void *)((((char *)ba) + sizeof(bhead_t)));
527 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
528 return buf;
529 } else {
530 bhead_t *ba;
532 ba = BH(((char *)b) + b->bh.bb.bsize);
534 KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
536 /* The buffer isn't big enough to split. Give the whole
537 shebang to the caller and remove it from the free list. */
539 __kmp_bget_remove_from_freelist(b);
540 #if BufStats
541 thr->totalloc += (size_t)b->bh.bb.bsize;
542 thr->numget++; /* Increment number of bget() calls */
543 #endif
544 /* Negate size to mark buffer allocated. */
545 b->bh.bb.bsize = -(b->bh.bb.bsize);
547 /* Mark this buffer as owned by this thread. */
548 TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark)
549 /* Zero the back pointer in the next buffer in memory
550 to indicate that this buffer is allocated. */
551 ba->bb.prevfree = 0;
553 /* Give user buffer starting at queue links. */
554 buf = (void *)&(b->ql);
555 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
556 return buf;
560 /* Link to next buffer */
561 b = (use_blink ? b->ql.blink : b->ql.flink);
565 /* We failed to find a buffer. If there's a compact function defined,
566 notify it of the size requested. If it returns TRUE, try the allocation
567 again. */
569 if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
570 break;
574 /* No buffer available with requested size free. */
576 /* Don't give up yet -- look in the reserve supply. */
577 if (thr->acqfcn != 0) {
578 if (size > (bufsize)(thr->exp_incr - sizeof(bhead_t))) {
579 /* Request is too large to fit in a single expansion block.
580 Try to satisfy it by a direct buffer acquisition. */
581 bdhead_t *bdh;
583 size += sizeof(bdhead_t) - sizeof(bhead_t);
585 KE_TRACE(10, ("%%%%%% MALLOC( %d )\n", (int)size));
587 /* richryan */
588 bdh = BDH((*thr->acqfcn)((bufsize)size));
589 if (bdh != NULL) {
591 // Mark the buffer special by setting size field of its header to zero.
592 bdh->bh.bb.bsize = 0;
594 /* Mark this buffer as owned by this thread. */
595 TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated,
596 // because direct buffer never goes to free list
597 bdh->bh.bb.prevfree = 0;
598 bdh->tsize = size;
599 #if BufStats
600 thr->totalloc += (size_t)size;
601 thr->numget++; /* Increment number of bget() calls */
602 thr->numdget++; /* Direct bget() call count */
603 #endif
604 buf = (void *)(bdh + 1);
605 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
606 return buf;
609 } else {
611 /* Try to obtain a new expansion block */
612 void *newpool;
614 KE_TRACE(10, ("%%%%%% MALLOCB( %d )\n", (int)thr->exp_incr));
616 /* richryan */
617 newpool = (*thr->acqfcn)((bufsize)thr->exp_incr);
618 KMP_DEBUG_ASSERT(((size_t)newpool) % SizeQuant == 0);
619 if (newpool != NULL) {
620 bpool(th, newpool, thr->exp_incr);
621 buf = bget(
622 th, requested_size); /* This can't, I say, can't get into a loop. */
623 return buf;
628 /* Still no buffer available */
630 return NULL;
633 /* BGETZ -- Allocate a buffer and clear its contents to zero. We clear
634 the entire contents of the buffer to zero, not just the
635 region requested by the caller. */
637 static void *bgetz(kmp_info_t *th, bufsize size) {
638 char *buf = (char *)bget(th, size);
640 if (buf != NULL) {
641 bhead_t *b;
642 bufsize rsize;
644 b = BH(buf - sizeof(bhead_t));
645 rsize = -(b->bb.bsize);
646 if (rsize == 0) {
647 bdhead_t *bd;
649 bd = BDH(buf - sizeof(bdhead_t));
650 rsize = bd->tsize - (bufsize)sizeof(bdhead_t);
651 } else {
652 rsize -= sizeof(bhead_t);
655 KMP_DEBUG_ASSERT(rsize >= size);
657 (void)memset(buf, 0, (bufsize)rsize);
659 return ((void *)buf);
662 /* BGETR -- Reallocate a buffer. This is a minimal implementation,
663 simply in terms of brel() and bget(). It could be
664 enhanced to allow the buffer to grow into adjacent free
665 blocks and to avoid moving data unnecessarily. */
667 static void *bgetr(kmp_info_t *th, void *buf, bufsize size) {
668 void *nbuf;
669 bufsize osize; /* Old size of buffer */
670 bhead_t *b;
672 nbuf = bget(th, size);
673 if (nbuf == NULL) { /* Acquire new buffer */
674 return NULL;
676 if (buf == NULL) {
677 return nbuf;
679 b = BH(((char *)buf) - sizeof(bhead_t));
680 osize = -b->bb.bsize;
681 if (osize == 0) {
682 /* Buffer acquired directly through acqfcn. */
683 bdhead_t *bd;
685 bd = BDH(((char *)buf) - sizeof(bdhead_t));
686 osize = bd->tsize - (bufsize)sizeof(bdhead_t);
687 } else {
688 osize -= sizeof(bhead_t);
691 KMP_DEBUG_ASSERT(osize > 0);
693 (void)KMP_MEMCPY((char *)nbuf, (char *)buf, /* Copy the data */
694 (size_t)((size < osize) ? size : osize));
695 brel(th, buf);
697 return nbuf;
700 /* BREL -- Release a buffer. */
701 static void brel(kmp_info_t *th, void *buf) {
702 thr_data_t *thr = get_thr_data(th);
703 bfhead_t *b, *bn;
704 kmp_info_t *bth;
706 KMP_DEBUG_ASSERT(buf != NULL);
707 KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0);
709 b = BFH(((char *)buf) - sizeof(bhead_t));
711 if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */
712 bdhead_t *bdh;
714 bdh = BDH(((char *)buf) - sizeof(bdhead_t));
715 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
716 #if BufStats
717 thr->totalloc -= (size_t)bdh->tsize;
718 thr->numdrel++; /* Number of direct releases */
719 thr->numrel++; /* Increment number of brel() calls */
720 #endif /* BufStats */
721 #ifdef FreeWipe
722 (void)memset((char *)buf, 0x55, (size_t)(bdh->tsize - sizeof(bdhead_t)));
723 #endif /* FreeWipe */
725 KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)bdh));
727 KMP_DEBUG_ASSERT(thr->relfcn != 0);
728 (*thr->relfcn)((void *)bdh); /* Release it directly. */
729 return;
732 bth = (kmp_info_t *)((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) &
733 ~1); // clear possible mark before comparison
734 if (bth != th) {
735 /* Add this buffer to be released by the owning thread later */
736 __kmp_bget_enqueue(bth, buf
737 #ifdef USE_QUEUING_LOCK_FOR_BGET
739 __kmp_gtid_from_thread(th)
740 #endif
742 return;
745 /* Buffer size must be negative, indicating that the buffer is allocated. */
746 if (b->bh.bb.bsize >= 0) {
747 bn = NULL;
749 KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
751 /* Back pointer in next buffer must be zero, indicating the same thing: */
753 KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.bsize)->bb.prevfree == 0);
755 #if BufStats
756 thr->numrel++; /* Increment number of brel() calls */
757 thr->totalloc += (size_t)b->bh.bb.bsize;
758 #endif
760 /* If the back link is nonzero, the previous buffer is free. */
762 if (b->bh.bb.prevfree != 0) {
763 /* The previous buffer is free. Consolidate this buffer with it by adding
764 the length of this buffer to the previous free buffer. Note that we
765 subtract the size in the buffer being released, since it's negative to
766 indicate that the buffer is allocated. */
767 bufsize size = b->bh.bb.bsize;
769 /* Make the previous buffer the one we're working on. */
770 KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.prevfree)->bb.bsize ==
771 b->bh.bb.prevfree);
772 b = BFH(((char *)b) - b->bh.bb.prevfree);
773 b->bh.bb.bsize -= size;
775 /* unlink the buffer from the old freelist */
776 __kmp_bget_remove_from_freelist(b);
777 } else {
778 /* The previous buffer isn't allocated. Mark this buffer size as positive
779 (i.e. free) and fall through to place the buffer on the free list as an
780 isolated free block. */
781 b->bh.bb.bsize = -b->bh.bb.bsize;
784 /* insert buffer back onto a new freelist */
785 __kmp_bget_insert_into_freelist(thr, b);
787 /* Now we look at the next buffer in memory, located by advancing from
788 the start of this buffer by its size, to see if that buffer is
789 free. If it is, we combine this buffer with the next one in
790 memory, dechaining the second buffer from the free list. */
791 bn = BFH(((char *)b) + b->bh.bb.bsize);
792 if (bn->bh.bb.bsize > 0) {
794 /* The buffer is free. Remove it from the free list and add
795 its size to that of our buffer. */
796 KMP_DEBUG_ASSERT(BH((char *)bn + bn->bh.bb.bsize)->bb.prevfree ==
797 bn->bh.bb.bsize);
799 __kmp_bget_remove_from_freelist(bn);
801 b->bh.bb.bsize += bn->bh.bb.bsize;
803 /* unlink the buffer from the old freelist, and reinsert it into the new
804 * freelist */
805 __kmp_bget_remove_from_freelist(b);
806 __kmp_bget_insert_into_freelist(thr, b);
808 /* Finally, advance to the buffer that follows the newly
809 consolidated free block. We must set its backpointer to the
810 head of the consolidated free block. We know the next block
811 must be an allocated block because the process of recombination
812 guarantees that two free blocks will never be contiguous in
813 memory. */
814 bn = BFH(((char *)b) + b->bh.bb.bsize);
816 #ifdef FreeWipe
817 (void)memset(((char *)b) + sizeof(bfhead_t), 0x55,
818 (size_t)(b->bh.bb.bsize - sizeof(bfhead_t)));
819 #endif
820 KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
822 /* The next buffer is allocated. Set the backpointer in it to point
823 to this buffer; the previous free buffer in memory. */
825 bn->bh.bb.prevfree = b->bh.bb.bsize;
827 /* If a block-release function is defined, and this free buffer
828 constitutes the entire block, release it. Note that pool_len
829 is defined in such a way that the test will fail unless all
830 pool blocks are the same size. */
831 if (thr->relfcn != 0 &&
832 b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) {
833 #if BufStats
834 if (thr->numpblk !=
835 1) { /* Do not release the last buffer until finalization time */
836 #endif
838 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
839 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
840 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree ==
841 b->bh.bb.bsize);
843 /* Unlink the buffer from the free list */
844 __kmp_bget_remove_from_freelist(b);
846 KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b));
848 (*thr->relfcn)(b);
849 #if BufStats
850 thr->numprel++; /* Nr of expansion block releases */
851 thr->numpblk--; /* Total number of blocks */
852 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
854 // avoid leaving stale last_pool pointer around if it is being dealloced
855 if (thr->last_pool == b)
856 thr->last_pool = 0;
857 } else {
858 thr->last_pool = b;
860 #endif /* BufStats */
864 /* BECTL -- Establish automatic pool expansion control */
865 static void bectl(kmp_info_t *th, bget_compact_t compact,
866 bget_acquire_t acquire, bget_release_t release,
867 bufsize pool_incr) {
868 thr_data_t *thr = get_thr_data(th);
870 thr->compfcn = compact;
871 thr->acqfcn = acquire;
872 thr->relfcn = release;
873 thr->exp_incr = pool_incr;
876 /* BPOOL -- Add a region of memory to the buffer pool. */
877 static void bpool(kmp_info_t *th, void *buf, bufsize len) {
878 /* int bin = 0; */
879 thr_data_t *thr = get_thr_data(th);
880 bfhead_t *b = BFH(buf);
881 bhead_t *bn;
883 __kmp_bget_dequeue(th); /* Release any queued buffers */
885 #ifdef SizeQuant
886 len &= ~((bufsize)(SizeQuant - 1));
887 #endif
888 if (thr->pool_len == 0) {
889 thr->pool_len = len;
890 } else if (len != thr->pool_len) {
891 thr->pool_len = -1;
893 #if BufStats
894 thr->numpget++; /* Number of block acquisitions */
895 thr->numpblk++; /* Number of blocks total */
896 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
897 #endif /* BufStats */
899 /* Since the block is initially occupied by a single free buffer,
900 it had better not be (much) larger than the largest buffer
901 whose size we can store in bhead.bb.bsize. */
902 KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize)ESent + 1));
904 /* Clear the backpointer at the start of the block to indicate that
905 there is no free block prior to this one. That blocks
906 recombination when the first block in memory is released. */
907 b->bh.bb.prevfree = 0;
909 /* Create a dummy allocated buffer at the end of the pool. This dummy
910 buffer is seen when a buffer at the end of the pool is released and
911 blocks recombination of the last buffer with the dummy buffer at
912 the end. The length in the dummy buffer is set to the largest
913 negative number to denote the end of the pool for diagnostic
914 routines (this specific value is not counted on by the actual
915 allocation and release functions). */
916 len -= sizeof(bhead_t);
917 b->bh.bb.bsize = (bufsize)len;
918 /* Set the owner of this buffer */
919 TCW_PTR(b->bh.bb.bthr,
920 (kmp_info_t *)((kmp_uintptr_t)th |
921 1)); // mark the buffer as allocated address
923 /* Chain the new block to the free list. */
924 __kmp_bget_insert_into_freelist(thr, b);
926 #ifdef FreeWipe
927 (void)memset(((char *)b) + sizeof(bfhead_t), 0x55,
928 (size_t)(len - sizeof(bfhead_t)));
929 #endif
930 bn = BH(((char *)b) + len);
931 bn->bb.prevfree = (bufsize)len;
932 /* Definition of ESent assumes two's complement! */
933 KMP_DEBUG_ASSERT((~0) == -1 && (bn != 0));
935 bn->bb.bsize = ESent;
938 /* BFREED -- Dump the free lists for this thread. */
939 static void bfreed(kmp_info_t *th) {
940 int bin = 0, count = 0;
941 int gtid = __kmp_gtid_from_thread(th);
942 thr_data_t *thr = get_thr_data(th);
944 #if BufStats
945 __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC
946 " get=%" KMP_INT64_SPEC " rel=%" KMP_INT64_SPEC
947 " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC
948 " prel=%" KMP_INT64_SPEC " dget=%" KMP_INT64_SPEC
949 " drel=%" KMP_INT64_SPEC "\n",
950 gtid, (kmp_uint64)thr->totalloc, (kmp_int64)thr->numget,
951 (kmp_int64)thr->numrel, (kmp_int64)thr->numpblk,
952 (kmp_int64)thr->numpget, (kmp_int64)thr->numprel,
953 (kmp_int64)thr->numdget, (kmp_int64)thr->numdrel);
954 #endif
956 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
957 bfhead_t *b;
959 for (b = thr->freelist[bin].ql.flink; b != &thr->freelist[bin];
960 b = b->ql.flink) {
961 bufsize bs = b->bh.bb.bsize;
963 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
964 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
965 KMP_DEBUG_ASSERT(bs > 0);
967 count += 1;
969 __kmp_printf_no_lock(
970 "__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b,
971 (long)bs);
972 #ifdef FreeWipe
974 char *lerr = ((char *)b) + sizeof(bfhead_t);
975 if ((bs > sizeof(bfhead_t)) &&
976 ((*lerr != 0x55) ||
977 (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) !=
978 0))) {
979 __kmp_printf_no_lock("__kmp_printpool: T#%d (Contents of above "
980 "free block have been overstored.)\n",
981 gtid);
984 #endif
988 if (count == 0)
989 __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid);
992 void __kmp_initialize_bget(kmp_info_t *th) {
993 KMP_DEBUG_ASSERT(SizeQuant >= sizeof(void *) && (th != 0));
995 set_thr_data(th);
997 bectl(th, (bget_compact_t)0, (bget_acquire_t)malloc, (bget_release_t)free,
998 (bufsize)__kmp_malloc_pool_incr);
1001 void __kmp_finalize_bget(kmp_info_t *th) {
1002 thr_data_t *thr;
1003 bfhead_t *b;
1005 KMP_DEBUG_ASSERT(th != 0);
1007 #if BufStats
1008 thr = (thr_data_t *)th->th.th_local.bget_data;
1009 KMP_DEBUG_ASSERT(thr != NULL);
1010 b = thr->last_pool;
1012 /* If a block-release function is defined, and this free buffer constitutes
1013 the entire block, release it. Note that pool_len is defined in such a way
1014 that the test will fail unless all pool blocks are the same size. */
1016 // Deallocate the last pool if one exists because we no longer do it in brel()
1017 if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
1018 b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) {
1019 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
1020 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
1021 KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree ==
1022 b->bh.bb.bsize);
1024 /* Unlink the buffer from the free list */
1025 __kmp_bget_remove_from_freelist(b);
1027 KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b));
1029 (*thr->relfcn)(b);
1030 thr->numprel++; /* Nr of expansion block releases */
1031 thr->numpblk--; /* Total number of blocks */
1032 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1034 #endif /* BufStats */
1036 /* Deallocate bget_data */
1037 if (th->th.th_local.bget_data != NULL) {
1038 __kmp_free(th->th.th_local.bget_data);
1039 th->th.th_local.bget_data = NULL;
1043 void kmpc_set_poolsize(size_t size) {
1044 bectl(__kmp_get_thread(), (bget_compact_t)0, (bget_acquire_t)malloc,
1045 (bget_release_t)free, (bufsize)size);
1048 size_t kmpc_get_poolsize(void) {
1049 thr_data_t *p;
1051 p = get_thr_data(__kmp_get_thread());
1053 return p->exp_incr;
1056 void kmpc_set_poolmode(int mode) {
1057 thr_data_t *p;
1059 if (mode == bget_mode_fifo || mode == bget_mode_lifo ||
1060 mode == bget_mode_best) {
1061 p = get_thr_data(__kmp_get_thread());
1062 p->mode = (bget_mode_t)mode;
1066 int kmpc_get_poolmode(void) {
1067 thr_data_t *p;
1069 p = get_thr_data(__kmp_get_thread());
1071 return p->mode;
1074 void kmpc_get_poolstat(size_t *maxmem, size_t *allmem) {
1075 kmp_info_t *th = __kmp_get_thread();
1076 bufsize a, b;
1078 __kmp_bget_dequeue(th); /* Release any queued buffers */
1080 bcheck(th, &a, &b);
1082 *maxmem = a;
1083 *allmem = b;
1086 void kmpc_poolprint(void) {
1087 kmp_info_t *th = __kmp_get_thread();
1089 __kmp_bget_dequeue(th); /* Release any queued buffers */
1091 bfreed(th);
1094 #endif // #if KMP_USE_BGET
1096 void *kmpc_malloc(size_t size) {
1097 void *ptr;
1098 ptr = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr)));
1099 if (ptr != NULL) {
1100 // save allocated pointer just before one returned to user
1101 *(void **)ptr = ptr;
1102 ptr = (void **)ptr + 1;
1104 return ptr;
1107 #define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0)
1109 void *kmpc_aligned_malloc(size_t size, size_t alignment) {
1110 void *ptr;
1111 void *ptr_allocated;
1112 KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too big
1113 if (!IS_POWER_OF_TWO(alignment)) {
1114 // AC: do we need to issue a warning here?
1115 errno = EINVAL;
1116 return NULL;
1118 size = size + sizeof(void *) + alignment;
1119 ptr_allocated = bget(__kmp_entry_thread(), (bufsize)size);
1120 if (ptr_allocated != NULL) {
1121 // save allocated pointer just before one returned to user
1122 ptr = (void *)(((kmp_uintptr_t)ptr_allocated + sizeof(void *) + alignment) &
1123 ~(alignment - 1));
1124 *((void **)ptr - 1) = ptr_allocated;
1125 } else {
1126 ptr = NULL;
1128 return ptr;
1131 void *kmpc_calloc(size_t nelem, size_t elsize) {
1132 void *ptr;
1133 ptr = bgetz(__kmp_entry_thread(), (bufsize)(nelem * elsize + sizeof(ptr)));
1134 if (ptr != NULL) {
1135 // save allocated pointer just before one returned to user
1136 *(void **)ptr = ptr;
1137 ptr = (void **)ptr + 1;
1139 return ptr;
1142 void *kmpc_realloc(void *ptr, size_t size) {
1143 void *result = NULL;
1144 if (ptr == NULL) {
1145 // If pointer is NULL, realloc behaves like malloc.
1146 result = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr)));
1147 // save allocated pointer just before one returned to user
1148 if (result != NULL) {
1149 *(void **)result = result;
1150 result = (void **)result + 1;
1152 } else if (size == 0) {
1153 // If size is 0, realloc behaves like free.
1154 // The thread must be registered by the call to kmpc_malloc() or
1155 // kmpc_calloc() before.
1156 // So it should be safe to call __kmp_get_thread(), not
1157 // __kmp_entry_thread().
1158 KMP_ASSERT(*((void **)ptr - 1));
1159 brel(__kmp_get_thread(), *((void **)ptr - 1));
1160 } else {
1161 result = bgetr(__kmp_entry_thread(), *((void **)ptr - 1),
1162 (bufsize)(size + sizeof(ptr)));
1163 if (result != NULL) {
1164 *(void **)result = result;
1165 result = (void **)result + 1;
1168 return result;
1171 // NOTE: the library must have already been initialized by a previous allocate
1172 void kmpc_free(void *ptr) {
1173 if (!__kmp_init_serial) {
1174 return;
1176 if (ptr != NULL) {
1177 kmp_info_t *th = __kmp_get_thread();
1178 __kmp_bget_dequeue(th); /* Release any queued buffers */
1179 // extract allocated pointer and free it
1180 KMP_ASSERT(*((void **)ptr - 1));
1181 brel(th, *((void **)ptr - 1));
1185 void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL) {
1186 void *ptr;
1187 KE_TRACE(30, ("-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", th,
1188 (int)size KMP_SRC_LOC_PARM));
1189 ptr = bget(th, (bufsize)size);
1190 KE_TRACE(30, ("<- __kmp_thread_malloc() returns %p\n", ptr));
1191 return ptr;
1194 void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem,
1195 size_t elsize KMP_SRC_LOC_DECL) {
1196 void *ptr;
1197 KE_TRACE(30, ("-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", th,
1198 (int)nelem, (int)elsize KMP_SRC_LOC_PARM));
1199 ptr = bgetz(th, (bufsize)(nelem * elsize));
1200 KE_TRACE(30, ("<- __kmp_thread_calloc() returns %p\n", ptr));
1201 return ptr;
1204 void *___kmp_thread_realloc(kmp_info_t *th, void *ptr,
1205 size_t size KMP_SRC_LOC_DECL) {
1206 KE_TRACE(30, ("-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", th,
1207 ptr, (int)size KMP_SRC_LOC_PARM));
1208 ptr = bgetr(th, ptr, (bufsize)size);
1209 KE_TRACE(30, ("<- __kmp_thread_realloc() returns %p\n", ptr));
1210 return ptr;
1213 void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL) {
1214 KE_TRACE(30, ("-> __kmp_thread_free( %p, %p ) called from %s:%d\n", th,
1215 ptr KMP_SRC_LOC_PARM));
1216 if (ptr != NULL) {
1217 __kmp_bget_dequeue(th); /* Release any queued buffers */
1218 brel(th, ptr);
1220 KE_TRACE(30, ("<- __kmp_thread_free()\n"));
1223 /* OMP 5.0 Memory Management support */
1224 static const char *kmp_mk_lib_name;
1225 static void *h_memkind;
1226 /* memkind experimental API: */
1227 // memkind_alloc
1228 static void *(*kmp_mk_alloc)(void *k, size_t sz);
1229 // memkind_free
1230 static void (*kmp_mk_free)(void *kind, void *ptr);
1231 // memkind_check_available
1232 static int (*kmp_mk_check)(void *kind);
1233 // kinds we are going to use
1234 static void **mk_default;
1235 static void **mk_interleave;
1236 static void **mk_hbw;
1237 static void **mk_hbw_interleave;
1238 static void **mk_hbw_preferred;
1239 static void **mk_hugetlb;
1240 static void **mk_hbw_hugetlb;
1241 static void **mk_hbw_preferred_hugetlb;
1242 static void **mk_dax_kmem;
1243 static void **mk_dax_kmem_all;
1244 static void **mk_dax_kmem_preferred;
1245 static void *(*kmp_target_alloc_host)(size_t size, int device);
1246 static void *(*kmp_target_alloc_shared)(size_t size, int device);
1247 static void *(*kmp_target_alloc_device)(size_t size, int device);
1248 static void *(*kmp_target_lock_mem)(void *ptr, size_t size, int device);
1249 static void *(*kmp_target_unlock_mem)(void *ptr, int device);
1250 static void *(*kmp_target_free_host)(void *ptr, int device);
1251 static void *(*kmp_target_free_shared)(void *ptr, int device);
1252 static void *(*kmp_target_free_device)(void *ptr, int device);
1253 static bool __kmp_target_mem_available;
1254 #define KMP_IS_TARGET_MEM_SPACE(MS) \
1255 (MS == llvm_omp_target_host_mem_space || \
1256 MS == llvm_omp_target_shared_mem_space || \
1257 MS == llvm_omp_target_device_mem_space)
1258 #define KMP_IS_TARGET_MEM_ALLOC(MA) \
1259 (MA == llvm_omp_target_host_mem_alloc || \
1260 MA == llvm_omp_target_shared_mem_alloc || \
1261 MA == llvm_omp_target_device_mem_alloc)
1263 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
1264 static inline void chk_kind(void ***pkind) {
1265 KMP_DEBUG_ASSERT(pkind);
1266 if (*pkind) // symbol found
1267 if (kmp_mk_check(**pkind)) // kind not available or error
1268 *pkind = NULL;
1270 #endif
1272 void __kmp_init_memkind() {
1273 // as of 2018-07-31 memkind does not support Windows*, exclude it for now
1274 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
1275 // use of statically linked memkind is problematic, as it depends on libnuma
1276 kmp_mk_lib_name = "libmemkind.so";
1277 h_memkind = dlopen(kmp_mk_lib_name, RTLD_LAZY);
1278 if (h_memkind) {
1279 kmp_mk_check = (int (*)(void *))dlsym(h_memkind, "memkind_check_available");
1280 kmp_mk_alloc =
1281 (void *(*)(void *, size_t))dlsym(h_memkind, "memkind_malloc");
1282 kmp_mk_free = (void (*)(void *, void *))dlsym(h_memkind, "memkind_free");
1283 mk_default = (void **)dlsym(h_memkind, "MEMKIND_DEFAULT");
1284 if (kmp_mk_check && kmp_mk_alloc && kmp_mk_free && mk_default &&
1285 !kmp_mk_check(*mk_default)) {
1286 __kmp_memkind_available = 1;
1287 mk_interleave = (void **)dlsym(h_memkind, "MEMKIND_INTERLEAVE");
1288 chk_kind(&mk_interleave);
1289 mk_hbw = (void **)dlsym(h_memkind, "MEMKIND_HBW");
1290 chk_kind(&mk_hbw);
1291 mk_hbw_interleave = (void **)dlsym(h_memkind, "MEMKIND_HBW_INTERLEAVE");
1292 chk_kind(&mk_hbw_interleave);
1293 mk_hbw_preferred = (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED");
1294 chk_kind(&mk_hbw_preferred);
1295 mk_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HUGETLB");
1296 chk_kind(&mk_hugetlb);
1297 mk_hbw_hugetlb = (void **)dlsym(h_memkind, "MEMKIND_HBW_HUGETLB");
1298 chk_kind(&mk_hbw_hugetlb);
1299 mk_hbw_preferred_hugetlb =
1300 (void **)dlsym(h_memkind, "MEMKIND_HBW_PREFERRED_HUGETLB");
1301 chk_kind(&mk_hbw_preferred_hugetlb);
1302 mk_dax_kmem = (void **)dlsym(h_memkind, "MEMKIND_DAX_KMEM");
1303 chk_kind(&mk_dax_kmem);
1304 mk_dax_kmem_all = (void **)dlsym(h_memkind, "MEMKIND_DAX_KMEM_ALL");
1305 chk_kind(&mk_dax_kmem_all);
1306 mk_dax_kmem_preferred =
1307 (void **)dlsym(h_memkind, "MEMKIND_DAX_KMEM_PREFERRED");
1308 chk_kind(&mk_dax_kmem_preferred);
1309 KE_TRACE(25, ("__kmp_init_memkind: memkind library initialized\n"));
1310 return; // success
1312 dlclose(h_memkind); // failure
1314 #else // !(KMP_OS_UNIX && KMP_DYNAMIC_LIB)
1315 kmp_mk_lib_name = "";
1316 #endif // !(KMP_OS_UNIX && KMP_DYNAMIC_LIB)
1317 h_memkind = NULL;
1318 kmp_mk_check = NULL;
1319 kmp_mk_alloc = NULL;
1320 kmp_mk_free = NULL;
1321 mk_default = NULL;
1322 mk_interleave = NULL;
1323 mk_hbw = NULL;
1324 mk_hbw_interleave = NULL;
1325 mk_hbw_preferred = NULL;
1326 mk_hugetlb = NULL;
1327 mk_hbw_hugetlb = NULL;
1328 mk_hbw_preferred_hugetlb = NULL;
1329 mk_dax_kmem = NULL;
1330 mk_dax_kmem_all = NULL;
1331 mk_dax_kmem_preferred = NULL;
1334 void __kmp_fini_memkind() {
1335 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB
1336 if (__kmp_memkind_available)
1337 KE_TRACE(25, ("__kmp_fini_memkind: finalize memkind library\n"));
1338 if (h_memkind) {
1339 dlclose(h_memkind);
1340 h_memkind = NULL;
1342 kmp_mk_check = NULL;
1343 kmp_mk_alloc = NULL;
1344 kmp_mk_free = NULL;
1345 mk_default = NULL;
1346 mk_interleave = NULL;
1347 mk_hbw = NULL;
1348 mk_hbw_interleave = NULL;
1349 mk_hbw_preferred = NULL;
1350 mk_hugetlb = NULL;
1351 mk_hbw_hugetlb = NULL;
1352 mk_hbw_preferred_hugetlb = NULL;
1353 mk_dax_kmem = NULL;
1354 mk_dax_kmem_all = NULL;
1355 mk_dax_kmem_preferred = NULL;
1356 #endif
1359 void __kmp_init_target_mem() {
1360 *(void **)(&kmp_target_alloc_host) = KMP_DLSYM("llvm_omp_target_alloc_host");
1361 *(void **)(&kmp_target_alloc_shared) =
1362 KMP_DLSYM("llvm_omp_target_alloc_shared");
1363 *(void **)(&kmp_target_alloc_device) =
1364 KMP_DLSYM("llvm_omp_target_alloc_device");
1365 *(void **)(&kmp_target_free_host) = KMP_DLSYM("llvm_omp_target_free_host");
1366 *(void **)(&kmp_target_free_shared) =
1367 KMP_DLSYM("llvm_omp_target_free_shared");
1368 *(void **)(&kmp_target_free_device) =
1369 KMP_DLSYM("llvm_omp_target_free_device");
1370 __kmp_target_mem_available =
1371 kmp_target_alloc_host && kmp_target_alloc_shared &&
1372 kmp_target_alloc_device && kmp_target_free_host &&
1373 kmp_target_free_shared && kmp_target_free_device;
1374 // lock/pin and unlock/unpin target calls
1375 *(void **)(&kmp_target_lock_mem) = KMP_DLSYM("llvm_omp_target_lock_mem");
1376 *(void **)(&kmp_target_unlock_mem) = KMP_DLSYM("llvm_omp_target_unlock_mem");
1379 omp_allocator_handle_t __kmpc_init_allocator(int gtid, omp_memspace_handle_t ms,
1380 int ntraits,
1381 omp_alloctrait_t traits[]) {
1382 // OpenMP 5.0 only allows predefined memspaces
1383 KMP_DEBUG_ASSERT(ms == omp_default_mem_space || ms == omp_low_lat_mem_space ||
1384 ms == omp_large_cap_mem_space || ms == omp_const_mem_space ||
1385 ms == omp_high_bw_mem_space || KMP_IS_TARGET_MEM_SPACE(ms));
1386 kmp_allocator_t *al;
1387 int i;
1388 al = (kmp_allocator_t *)__kmp_allocate(sizeof(kmp_allocator_t)); // zeroed
1389 al->memspace = ms; // not used currently
1390 for (i = 0; i < ntraits; ++i) {
1391 switch (traits[i].key) {
1392 case omp_atk_sync_hint:
1393 case omp_atk_access:
1394 break;
1395 case omp_atk_pinned:
1396 al->pinned = true;
1397 break;
1398 case omp_atk_alignment:
1399 __kmp_type_convert(traits[i].value, &(al->alignment));
1400 KMP_ASSERT(IS_POWER_OF_TWO(al->alignment));
1401 break;
1402 case omp_atk_pool_size:
1403 al->pool_size = traits[i].value;
1404 break;
1405 case omp_atk_fallback:
1406 al->fb = (omp_alloctrait_value_t)traits[i].value;
1407 KMP_DEBUG_ASSERT(
1408 al->fb == omp_atv_default_mem_fb || al->fb == omp_atv_null_fb ||
1409 al->fb == omp_atv_abort_fb || al->fb == omp_atv_allocator_fb);
1410 break;
1411 case omp_atk_fb_data:
1412 al->fb_data = RCAST(kmp_allocator_t *, traits[i].value);
1413 break;
1414 case omp_atk_partition:
1415 al->memkind = RCAST(void **, traits[i].value);
1416 break;
1417 default:
1418 KMP_ASSERT2(0, "Unexpected allocator trait");
1421 if (al->fb == 0) {
1422 // set default allocator
1423 al->fb = omp_atv_default_mem_fb;
1424 al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc;
1425 } else if (al->fb == omp_atv_allocator_fb) {
1426 KMP_ASSERT(al->fb_data != NULL);
1427 } else if (al->fb == omp_atv_default_mem_fb) {
1428 al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc;
1430 if (__kmp_memkind_available) {
1431 // Let's use memkind library if available
1432 if (ms == omp_high_bw_mem_space) {
1433 if (al->memkind == (void *)omp_atv_interleaved && mk_hbw_interleave) {
1434 al->memkind = mk_hbw_interleave;
1435 } else if (mk_hbw_preferred) {
1436 // AC: do not try to use MEMKIND_HBW for now, because memkind library
1437 // cannot reliably detect exhaustion of HBW memory.
1438 // It could be possible using hbw_verify_memory_region() but memkind
1439 // manual says: "Using this function in production code may result in
1440 // serious performance penalty".
1441 al->memkind = mk_hbw_preferred;
1442 } else {
1443 // HBW is requested but not available --> return NULL allocator
1444 __kmp_free(al);
1445 return omp_null_allocator;
1447 } else if (ms == omp_large_cap_mem_space) {
1448 if (mk_dax_kmem_all) {
1449 // All pmem nodes are visited
1450 al->memkind = mk_dax_kmem_all;
1451 } else if (mk_dax_kmem) {
1452 // Only closest pmem node is visited
1453 al->memkind = mk_dax_kmem;
1454 } else {
1455 __kmp_free(al);
1456 return omp_null_allocator;
1458 } else {
1459 if (al->memkind == (void *)omp_atv_interleaved && mk_interleave) {
1460 al->memkind = mk_interleave;
1461 } else {
1462 al->memkind = mk_default;
1465 } else if (KMP_IS_TARGET_MEM_SPACE(ms) && !__kmp_target_mem_available) {
1466 __kmp_free(al);
1467 return omp_null_allocator;
1468 } else {
1469 if (ms == omp_high_bw_mem_space) {
1470 // cannot detect HBW memory presence without memkind library
1471 __kmp_free(al);
1472 return omp_null_allocator;
1475 return (omp_allocator_handle_t)al;
1478 void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t allocator) {
1479 if (allocator > kmp_max_mem_alloc)
1480 __kmp_free(allocator);
1483 void __kmpc_set_default_allocator(int gtid, omp_allocator_handle_t allocator) {
1484 if (allocator == omp_null_allocator)
1485 allocator = omp_default_mem_alloc;
1486 __kmp_threads[gtid]->th.th_def_allocator = allocator;
1489 omp_allocator_handle_t __kmpc_get_default_allocator(int gtid) {
1490 return __kmp_threads[gtid]->th.th_def_allocator;
1493 typedef struct kmp_mem_desc { // Memory block descriptor
1494 void *ptr_alloc; // Pointer returned by allocator
1495 size_t size_a; // Size of allocated memory block (initial+descriptor+align)
1496 size_t size_orig; // Original size requested
1497 void *ptr_align; // Pointer to aligned memory, returned
1498 kmp_allocator_t *allocator; // allocator
1499 } kmp_mem_desc_t;
1500 static int alignment = sizeof(void *); // align to pointer size by default
1502 // external interfaces are wrappers over internal implementation
1503 void *__kmpc_alloc(int gtid, size_t size, omp_allocator_handle_t allocator) {
1504 KE_TRACE(25, ("__kmpc_alloc: T#%d (%d, %p)\n", gtid, (int)size, allocator));
1505 void *ptr = __kmp_alloc(gtid, 0, size, allocator);
1506 KE_TRACE(25, ("__kmpc_alloc returns %p, T#%d\n", ptr, gtid));
1507 return ptr;
1510 void *__kmpc_aligned_alloc(int gtid, size_t algn, size_t size,
1511 omp_allocator_handle_t allocator) {
1512 KE_TRACE(25, ("__kmpc_aligned_alloc: T#%d (%d, %d, %p)\n", gtid, (int)algn,
1513 (int)size, allocator));
1514 void *ptr = __kmp_alloc(gtid, algn, size, allocator);
1515 KE_TRACE(25, ("__kmpc_aligned_alloc returns %p, T#%d\n", ptr, gtid));
1516 return ptr;
1519 void *__kmpc_calloc(int gtid, size_t nmemb, size_t size,
1520 omp_allocator_handle_t allocator) {
1521 KE_TRACE(25, ("__kmpc_calloc: T#%d (%d, %d, %p)\n", gtid, (int)nmemb,
1522 (int)size, allocator));
1523 void *ptr = __kmp_calloc(gtid, 0, nmemb, size, allocator);
1524 KE_TRACE(25, ("__kmpc_calloc returns %p, T#%d\n", ptr, gtid));
1525 return ptr;
1528 void *__kmpc_realloc(int gtid, void *ptr, size_t size,
1529 omp_allocator_handle_t allocator,
1530 omp_allocator_handle_t free_allocator) {
1531 KE_TRACE(25, ("__kmpc_realloc: T#%d (%p, %d, %p, %p)\n", gtid, ptr, (int)size,
1532 allocator, free_allocator));
1533 void *nptr = __kmp_realloc(gtid, ptr, size, allocator, free_allocator);
1534 KE_TRACE(25, ("__kmpc_realloc returns %p, T#%d\n", nptr, gtid));
1535 return nptr;
1538 void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator) {
1539 KE_TRACE(25, ("__kmpc_free: T#%d free(%p,%p)\n", gtid, ptr, allocator));
1540 ___kmpc_free(gtid, ptr, allocator);
1541 KE_TRACE(10, ("__kmpc_free: T#%d freed %p (%p)\n", gtid, ptr, allocator));
1542 return;
1545 // internal implementation, called from inside the library
1546 void *__kmp_alloc(int gtid, size_t algn, size_t size,
1547 omp_allocator_handle_t allocator) {
1548 void *ptr = NULL;
1549 kmp_allocator_t *al;
1550 KMP_DEBUG_ASSERT(__kmp_init_serial);
1551 if (size == 0)
1552 return NULL;
1553 if (allocator == omp_null_allocator)
1554 allocator = __kmp_threads[gtid]->th.th_def_allocator;
1555 kmp_int32 default_device =
1556 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1558 al = RCAST(kmp_allocator_t *, allocator);
1560 int sz_desc = sizeof(kmp_mem_desc_t);
1561 kmp_mem_desc_t desc;
1562 kmp_uintptr_t addr; // address returned by allocator
1563 kmp_uintptr_t addr_align; // address to return to caller
1564 kmp_uintptr_t addr_descr; // address of memory block descriptor
1565 size_t align = alignment; // default alignment
1566 if (allocator > kmp_max_mem_alloc && al->alignment > align)
1567 align = al->alignment; // alignment required by allocator trait
1568 if (align < algn)
1569 align = algn; // max of allocator trait, parameter and sizeof(void*)
1570 desc.size_orig = size;
1571 desc.size_a = size + sz_desc + align;
1572 bool is_pinned = false;
1573 if (allocator > kmp_max_mem_alloc)
1574 is_pinned = al->pinned;
1576 // Use default allocator if libmemkind is not available
1577 int use_default_allocator = (__kmp_memkind_available) ? false : true;
1579 if (KMP_IS_TARGET_MEM_ALLOC(allocator)) {
1580 // Use size input directly as the memory may not be accessible on host.
1581 // Use default device for now.
1582 if (__kmp_target_mem_available) {
1583 kmp_int32 device =
1584 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1585 if (allocator == llvm_omp_target_host_mem_alloc)
1586 ptr = kmp_target_alloc_host(size, device);
1587 else if (allocator == llvm_omp_target_shared_mem_alloc)
1588 ptr = kmp_target_alloc_shared(size, device);
1589 else // allocator == llvm_omp_target_device_mem_alloc
1590 ptr = kmp_target_alloc_device(size, device);
1591 return ptr;
1592 } else {
1593 KMP_INFORM(TargetMemNotAvailable);
1597 if (allocator >= kmp_max_mem_alloc && KMP_IS_TARGET_MEM_SPACE(al->memspace)) {
1598 if (__kmp_target_mem_available) {
1599 kmp_int32 device =
1600 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1601 if (al->memspace == llvm_omp_target_host_mem_space)
1602 ptr = kmp_target_alloc_host(size, device);
1603 else if (al->memspace == llvm_omp_target_shared_mem_space)
1604 ptr = kmp_target_alloc_shared(size, device);
1605 else // al->memspace == llvm_omp_target_device_mem_space
1606 ptr = kmp_target_alloc_device(size, device);
1607 return ptr;
1608 } else {
1609 KMP_INFORM(TargetMemNotAvailable);
1613 if (__kmp_memkind_available) {
1614 if (allocator < kmp_max_mem_alloc) {
1615 // pre-defined allocator
1616 if (allocator == omp_high_bw_mem_alloc && mk_hbw_preferred) {
1617 ptr = kmp_mk_alloc(*mk_hbw_preferred, desc.size_a);
1618 } else if (allocator == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
1619 ptr = kmp_mk_alloc(*mk_dax_kmem_all, desc.size_a);
1620 } else {
1621 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
1623 } else if (al->pool_size > 0) {
1624 // custom allocator with pool size requested
1625 kmp_uint64 used =
1626 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
1627 if (used + desc.size_a > al->pool_size) {
1628 // not enough space, need to go fallback path
1629 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
1630 if (al->fb == omp_atv_default_mem_fb) {
1631 al = (kmp_allocator_t *)omp_default_mem_alloc;
1632 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
1633 } else if (al->fb == omp_atv_abort_fb) {
1634 KMP_ASSERT(0); // abort fallback requested
1635 } else if (al->fb == omp_atv_allocator_fb) {
1636 KMP_ASSERT(al != al->fb_data);
1637 al = al->fb_data;
1638 ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
1639 if (is_pinned && kmp_target_lock_mem)
1640 kmp_target_lock_mem(ptr, size, default_device);
1641 return ptr;
1642 } // else ptr == NULL;
1643 } else {
1644 // pool has enough space
1645 ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
1646 if (ptr == NULL) {
1647 if (al->fb == omp_atv_default_mem_fb) {
1648 al = (kmp_allocator_t *)omp_default_mem_alloc;
1649 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
1650 } else if (al->fb == omp_atv_abort_fb) {
1651 KMP_ASSERT(0); // abort fallback requested
1652 } else if (al->fb == omp_atv_allocator_fb) {
1653 KMP_ASSERT(al != al->fb_data);
1654 al = al->fb_data;
1655 ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
1656 if (is_pinned && kmp_target_lock_mem)
1657 kmp_target_lock_mem(ptr, size, default_device);
1658 return ptr;
1662 } else {
1663 // custom allocator, pool size not requested
1664 ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
1665 if (ptr == NULL) {
1666 if (al->fb == omp_atv_default_mem_fb) {
1667 al = (kmp_allocator_t *)omp_default_mem_alloc;
1668 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
1669 } else if (al->fb == omp_atv_abort_fb) {
1670 KMP_ASSERT(0); // abort fallback requested
1671 } else if (al->fb == omp_atv_allocator_fb) {
1672 KMP_ASSERT(al != al->fb_data);
1673 al = al->fb_data;
1674 ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
1675 if (is_pinned && kmp_target_lock_mem)
1676 kmp_target_lock_mem(ptr, size, default_device);
1677 return ptr;
1681 } else if (allocator < kmp_max_mem_alloc) {
1682 // pre-defined allocator
1683 if (allocator == omp_high_bw_mem_alloc) {
1684 KMP_WARNING(OmpNoAllocator, "omp_high_bw_mem_alloc");
1685 } else if (allocator == omp_large_cap_mem_alloc) {
1686 KMP_WARNING(OmpNoAllocator, "omp_large_cap_mem_alloc");
1687 } else if (allocator == omp_const_mem_alloc) {
1688 KMP_WARNING(OmpNoAllocator, "omp_const_mem_alloc");
1689 } else if (allocator == omp_low_lat_mem_alloc) {
1690 KMP_WARNING(OmpNoAllocator, "omp_low_lat_mem_alloc");
1691 } else if (allocator == omp_cgroup_mem_alloc) {
1692 KMP_WARNING(OmpNoAllocator, "omp_cgroup_mem_alloc");
1693 } else if (allocator == omp_pteam_mem_alloc) {
1694 KMP_WARNING(OmpNoAllocator, "omp_pteam_mem_alloc");
1695 } else if (allocator == omp_thread_mem_alloc) {
1696 KMP_WARNING(OmpNoAllocator, "omp_thread_mem_alloc");
1697 } else { // default allocator requested
1698 use_default_allocator = true;
1700 if (use_default_allocator) {
1701 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
1702 use_default_allocator = false;
1704 } else if (al->pool_size > 0) {
1705 // custom allocator with pool size requested
1706 kmp_uint64 used =
1707 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
1708 if (used + desc.size_a > al->pool_size) {
1709 // not enough space, need to go fallback path
1710 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
1711 if (al->fb == omp_atv_default_mem_fb) {
1712 al = (kmp_allocator_t *)omp_default_mem_alloc;
1713 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
1714 } else if (al->fb == omp_atv_abort_fb) {
1715 KMP_ASSERT(0); // abort fallback requested
1716 } else if (al->fb == omp_atv_allocator_fb) {
1717 KMP_ASSERT(al != al->fb_data);
1718 al = al->fb_data;
1719 ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
1720 if (is_pinned && kmp_target_lock_mem)
1721 kmp_target_lock_mem(ptr, size, default_device);
1722 return ptr;
1723 } // else ptr == NULL;
1724 } else {
1725 // pool has enough space
1726 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
1727 if (ptr == NULL && al->fb == omp_atv_abort_fb) {
1728 KMP_ASSERT(0); // abort fallback requested
1729 } // no sense to look for another fallback because of same internal alloc
1731 } else {
1732 // custom allocator, pool size not requested
1733 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
1734 if (ptr == NULL && al->fb == omp_atv_abort_fb) {
1735 KMP_ASSERT(0); // abort fallback requested
1736 } // no sense to look for another fallback because of same internal alloc
1738 KE_TRACE(10, ("__kmp_alloc: T#%d %p=alloc(%d)\n", gtid, ptr, desc.size_a));
1739 if (ptr == NULL)
1740 return NULL;
1742 if (is_pinned && kmp_target_lock_mem)
1743 kmp_target_lock_mem(ptr, desc.size_a, default_device);
1745 addr = (kmp_uintptr_t)ptr;
1746 addr_align = (addr + sz_desc + align - 1) & ~(align - 1);
1747 addr_descr = addr_align - sz_desc;
1749 desc.ptr_alloc = ptr;
1750 desc.ptr_align = (void *)addr_align;
1751 desc.allocator = al;
1752 *((kmp_mem_desc_t *)addr_descr) = desc; // save descriptor contents
1753 KMP_MB();
1755 return desc.ptr_align;
1758 void *__kmp_calloc(int gtid, size_t algn, size_t nmemb, size_t size,
1759 omp_allocator_handle_t allocator) {
1760 void *ptr = NULL;
1761 kmp_allocator_t *al;
1762 KMP_DEBUG_ASSERT(__kmp_init_serial);
1764 if (allocator == omp_null_allocator)
1765 allocator = __kmp_threads[gtid]->th.th_def_allocator;
1767 al = RCAST(kmp_allocator_t *, allocator);
1769 if (nmemb == 0 || size == 0)
1770 return ptr;
1772 if ((SIZE_MAX - sizeof(kmp_mem_desc_t)) / size < nmemb) {
1773 if (al->fb == omp_atv_abort_fb) {
1774 KMP_ASSERT(0);
1776 return ptr;
1779 ptr = __kmp_alloc(gtid, algn, nmemb * size, allocator);
1781 if (ptr) {
1782 memset(ptr, 0x00, nmemb * size);
1784 return ptr;
1787 void *__kmp_realloc(int gtid, void *ptr, size_t size,
1788 omp_allocator_handle_t allocator,
1789 omp_allocator_handle_t free_allocator) {
1790 void *nptr = NULL;
1791 KMP_DEBUG_ASSERT(__kmp_init_serial);
1793 if (size == 0) {
1794 if (ptr != NULL)
1795 ___kmpc_free(gtid, ptr, free_allocator);
1796 return nptr;
1799 nptr = __kmp_alloc(gtid, 0, size, allocator);
1801 if (nptr != NULL && ptr != NULL) {
1802 kmp_mem_desc_t desc;
1803 kmp_uintptr_t addr_align; // address to return to caller
1804 kmp_uintptr_t addr_descr; // address of memory block descriptor
1806 addr_align = (kmp_uintptr_t)ptr;
1807 addr_descr = addr_align - sizeof(kmp_mem_desc_t);
1808 desc = *((kmp_mem_desc_t *)addr_descr); // read descriptor
1810 KMP_DEBUG_ASSERT(desc.ptr_align == ptr);
1811 KMP_DEBUG_ASSERT(desc.size_orig > 0);
1812 KMP_DEBUG_ASSERT(desc.size_orig < desc.size_a);
1813 KMP_MEMCPY((char *)nptr, (char *)ptr,
1814 (size_t)((size < desc.size_orig) ? size : desc.size_orig));
1817 if (nptr != NULL) {
1818 ___kmpc_free(gtid, ptr, free_allocator);
1821 return nptr;
1824 void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t allocator) {
1825 if (ptr == NULL)
1826 return;
1828 kmp_allocator_t *al;
1829 omp_allocator_handle_t oal;
1830 al = RCAST(kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator));
1831 kmp_mem_desc_t desc;
1832 kmp_uintptr_t addr_align; // address to return to caller
1833 kmp_uintptr_t addr_descr; // address of memory block descriptor
1834 if (__kmp_target_mem_available && (KMP_IS_TARGET_MEM_ALLOC(allocator) ||
1835 (allocator > kmp_max_mem_alloc &&
1836 KMP_IS_TARGET_MEM_SPACE(al->memspace)))) {
1837 kmp_int32 device =
1838 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1839 if (allocator == llvm_omp_target_host_mem_alloc) {
1840 kmp_target_free_host(ptr, device);
1841 } else if (allocator == llvm_omp_target_shared_mem_alloc) {
1842 kmp_target_free_shared(ptr, device);
1843 } else if (allocator == llvm_omp_target_device_mem_alloc) {
1844 kmp_target_free_device(ptr, device);
1846 return;
1849 addr_align = (kmp_uintptr_t)ptr;
1850 addr_descr = addr_align - sizeof(kmp_mem_desc_t);
1851 desc = *((kmp_mem_desc_t *)addr_descr); // read descriptor
1853 KMP_DEBUG_ASSERT(desc.ptr_align == ptr);
1854 if (allocator) {
1855 KMP_DEBUG_ASSERT(desc.allocator == al || desc.allocator == al->fb_data);
1857 al = desc.allocator;
1858 oal = (omp_allocator_handle_t)al; // cast to void* for comparisons
1859 KMP_DEBUG_ASSERT(al);
1861 if (allocator > kmp_max_mem_alloc && kmp_target_unlock_mem && al->pinned) {
1862 kmp_int32 device =
1863 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1864 kmp_target_unlock_mem(desc.ptr_alloc, device);
1867 if (__kmp_memkind_available) {
1868 if (oal < kmp_max_mem_alloc) {
1869 // pre-defined allocator
1870 if (oal == omp_high_bw_mem_alloc && mk_hbw_preferred) {
1871 kmp_mk_free(*mk_hbw_preferred, desc.ptr_alloc);
1872 } else if (oal == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
1873 kmp_mk_free(*mk_dax_kmem_all, desc.ptr_alloc);
1874 } else {
1875 kmp_mk_free(*mk_default, desc.ptr_alloc);
1877 } else {
1878 if (al->pool_size > 0) { // custom allocator with pool size requested
1879 kmp_uint64 used =
1880 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
1881 (void)used; // to suppress compiler warning
1882 KMP_DEBUG_ASSERT(used >= desc.size_a);
1884 kmp_mk_free(*al->memkind, desc.ptr_alloc);
1886 } else {
1887 if (oal > kmp_max_mem_alloc && al->pool_size > 0) {
1888 kmp_uint64 used =
1889 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
1890 (void)used; // to suppress compiler warning
1891 KMP_DEBUG_ASSERT(used >= desc.size_a);
1893 __kmp_thread_free(__kmp_thread_from_gtid(gtid), desc.ptr_alloc);
1897 /* If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes
1898 memory leaks, but it may be useful for debugging memory corruptions, used
1899 freed pointers, etc. */
1900 /* #define LEAK_MEMORY */
1901 struct kmp_mem_descr { // Memory block descriptor.
1902 void *ptr_allocated; // Pointer returned by malloc(), subject for free().
1903 size_t size_allocated; // Size of allocated memory block.
1904 void *ptr_aligned; // Pointer to aligned memory, to be used by client code.
1905 size_t size_aligned; // Size of aligned memory block.
1907 typedef struct kmp_mem_descr kmp_mem_descr_t;
1909 /* Allocate memory on requested boundary, fill allocated memory with 0x00.
1910 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
1911 error. Must use __kmp_free when freeing memory allocated by this routine! */
1912 static void *___kmp_allocate_align(size_t size,
1913 size_t alignment KMP_SRC_LOC_DECL) {
1914 /* __kmp_allocate() allocates (by call to malloc()) bigger memory block than
1915 requested to return properly aligned pointer. Original pointer returned
1916 by malloc() and size of allocated block is saved in descriptor just
1917 before the aligned pointer. This information used by __kmp_free() -- it
1918 has to pass to free() original pointer, not aligned one.
1920 +---------+------------+-----------------------------------+---------+
1921 | padding | descriptor | aligned block | padding |
1922 +---------+------------+-----------------------------------+---------+
1925 | +- Aligned pointer returned to caller
1926 +- Pointer returned by malloc()
1928 Aligned block is filled with zeros, paddings are filled with 0xEF. */
1930 kmp_mem_descr_t descr;
1931 kmp_uintptr_t addr_allocated; // Address returned by malloc().
1932 kmp_uintptr_t addr_aligned; // Aligned address to return to caller.
1933 kmp_uintptr_t addr_descr; // Address of memory block descriptor.
1935 KE_TRACE(25, ("-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
1936 (int)size, (int)alignment KMP_SRC_LOC_PARM));
1938 KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too
1939 KMP_DEBUG_ASSERT(sizeof(void *) <= sizeof(kmp_uintptr_t));
1940 // Make sure kmp_uintptr_t is enough to store addresses.
1942 descr.size_aligned = size;
1943 descr.size_allocated =
1944 descr.size_aligned + sizeof(kmp_mem_descr_t) + alignment;
1946 #if KMP_DEBUG
1947 descr.ptr_allocated = _malloc_src_loc(descr.size_allocated, _file_, _line_);
1948 #else
1949 descr.ptr_allocated = malloc_src_loc(descr.size_allocated KMP_SRC_LOC_PARM);
1950 #endif
1951 KE_TRACE(10, (" malloc( %d ) returned %p\n", (int)descr.size_allocated,
1952 descr.ptr_allocated));
1953 if (descr.ptr_allocated == NULL) {
1954 KMP_FATAL(OutOfHeapMemory);
1957 addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
1958 addr_aligned =
1959 (addr_allocated + sizeof(kmp_mem_descr_t) + alignment) & ~(alignment - 1);
1960 addr_descr = addr_aligned - sizeof(kmp_mem_descr_t);
1962 descr.ptr_aligned = (void *)addr_aligned;
1964 KE_TRACE(26, (" ___kmp_allocate_align: "
1965 "ptr_allocated=%p, size_allocated=%d, "
1966 "ptr_aligned=%p, size_aligned=%d\n",
1967 descr.ptr_allocated, (int)descr.size_allocated,
1968 descr.ptr_aligned, (int)descr.size_aligned));
1970 KMP_DEBUG_ASSERT(addr_allocated <= addr_descr);
1971 KMP_DEBUG_ASSERT(addr_descr + sizeof(kmp_mem_descr_t) == addr_aligned);
1972 KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
1973 addr_allocated + descr.size_allocated);
1974 KMP_DEBUG_ASSERT(addr_aligned % alignment == 0);
1975 #ifdef KMP_DEBUG
1976 memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
1977 // Fill allocated memory block with 0xEF.
1978 #endif
1979 memset(descr.ptr_aligned, 0x00, descr.size_aligned);
1980 // Fill the aligned memory block (which is intended for using by caller) with
1981 // 0x00. Do not
1982 // put this filling under KMP_DEBUG condition! Many callers expect zeroed
1983 // memory. (Padding
1984 // bytes remain filled with 0xEF in debugging library.)
1985 *((kmp_mem_descr_t *)addr_descr) = descr;
1987 KMP_MB();
1989 KE_TRACE(25, ("<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned));
1990 return descr.ptr_aligned;
1991 } // func ___kmp_allocate_align
1993 /* Allocate memory on cache line boundary, fill allocated memory with 0x00.
1994 Do not call this func directly! Use __kmp_allocate macro instead.
1995 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
1996 error. Must use __kmp_free when freeing memory allocated by this routine! */
1997 void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL) {
1998 void *ptr;
1999 KE_TRACE(25, ("-> __kmp_allocate( %d ) called from %s:%d\n",
2000 (int)size KMP_SRC_LOC_PARM));
2001 ptr = ___kmp_allocate_align(size, __kmp_align_alloc KMP_SRC_LOC_PARM);
2002 KE_TRACE(25, ("<- __kmp_allocate() returns %p\n", ptr));
2003 return ptr;
2004 } // func ___kmp_allocate
2006 /* Allocate memory on page boundary, fill allocated memory with 0x00.
2007 Does not call this func directly! Use __kmp_page_allocate macro instead.
2008 NULL is NEVER returned, __kmp_abort() is called in case of memory allocation
2009 error. Must use __kmp_free when freeing memory allocated by this routine! */
2010 void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL) {
2011 int page_size = 8 * 1024;
2012 void *ptr;
2014 KE_TRACE(25, ("-> __kmp_page_allocate( %d ) called from %s:%d\n",
2015 (int)size KMP_SRC_LOC_PARM));
2016 ptr = ___kmp_allocate_align(size, page_size KMP_SRC_LOC_PARM);
2017 KE_TRACE(25, ("<- __kmp_page_allocate( %d ) returns %p\n", (int)size, ptr));
2018 return ptr;
2019 } // ___kmp_page_allocate
2021 /* Free memory allocated by __kmp_allocate() and __kmp_page_allocate().
2022 In debug mode, fill the memory block with 0xEF before call to free(). */
2023 void ___kmp_free(void *ptr KMP_SRC_LOC_DECL) {
2024 kmp_mem_descr_t descr;
2025 #if KMP_DEBUG
2026 kmp_uintptr_t addr_allocated; // Address returned by malloc().
2027 kmp_uintptr_t addr_aligned; // Aligned address passed by caller.
2028 #endif
2029 KE_TRACE(25,
2030 ("-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM));
2031 KMP_ASSERT(ptr != NULL);
2033 descr = *(kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t));
2035 KE_TRACE(26, (" __kmp_free: "
2036 "ptr_allocated=%p, size_allocated=%d, "
2037 "ptr_aligned=%p, size_aligned=%d\n",
2038 descr.ptr_allocated, (int)descr.size_allocated,
2039 descr.ptr_aligned, (int)descr.size_aligned));
2040 #if KMP_DEBUG
2041 addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
2042 addr_aligned = (kmp_uintptr_t)descr.ptr_aligned;
2043 KMP_DEBUG_ASSERT(addr_aligned % CACHE_LINE == 0);
2044 KMP_DEBUG_ASSERT(descr.ptr_aligned == ptr);
2045 KMP_DEBUG_ASSERT(addr_allocated + sizeof(kmp_mem_descr_t) <= addr_aligned);
2046 KMP_DEBUG_ASSERT(descr.size_aligned < descr.size_allocated);
2047 KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
2048 addr_allocated + descr.size_allocated);
2049 memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
2050 // Fill memory block with 0xEF, it helps catch using freed memory.
2051 #endif
2053 #ifndef LEAK_MEMORY
2054 KE_TRACE(10, (" free( %p )\n", descr.ptr_allocated));
2055 #ifdef KMP_DEBUG
2056 _free_src_loc(descr.ptr_allocated, _file_, _line_);
2057 #else
2058 free_src_loc(descr.ptr_allocated KMP_SRC_LOC_PARM);
2059 #endif
2060 #endif
2061 KMP_MB();
2062 KE_TRACE(25, ("<- __kmp_free() returns\n"));
2063 } // func ___kmp_free
2065 #if USE_FAST_MEMORY == 3
2066 // Allocate fast memory by first scanning the thread's free lists
2067 // If a chunk the right size exists, grab it off the free list.
2068 // Otherwise allocate normally using kmp_thread_malloc.
2070 // AC: How to choose the limit? Just get 16 for now...
2071 #define KMP_FREE_LIST_LIMIT 16
2073 // Always use 128 bytes for determining buckets for caching memory blocks
2074 #define DCACHE_LINE 128
2076 void *___kmp_fast_allocate(kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL) {
2077 void *ptr;
2078 size_t num_lines, idx;
2079 int index;
2080 void *alloc_ptr;
2081 size_t alloc_size;
2082 kmp_mem_descr_t *descr;
2084 KE_TRACE(25, ("-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
2085 __kmp_gtid_from_thread(this_thr), (int)size KMP_SRC_LOC_PARM));
2087 num_lines = (size + DCACHE_LINE - 1) / DCACHE_LINE;
2088 idx = num_lines - 1;
2089 KMP_DEBUG_ASSERT(idx >= 0);
2090 if (idx < 2) {
2091 index = 0; // idx is [ 0, 1 ], use first free list
2092 num_lines = 2; // 1, 2 cache lines or less than cache line
2093 } else if ((idx >>= 2) == 0) {
2094 index = 1; // idx is [ 2, 3 ], use second free list
2095 num_lines = 4; // 3, 4 cache lines
2096 } else if ((idx >>= 2) == 0) {
2097 index = 2; // idx is [ 4, 15 ], use third free list
2098 num_lines = 16; // 5, 6, ..., 16 cache lines
2099 } else if ((idx >>= 2) == 0) {
2100 index = 3; // idx is [ 16, 63 ], use fourth free list
2101 num_lines = 64; // 17, 18, ..., 64 cache lines
2102 } else {
2103 goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists
2106 ptr = this_thr->th.th_free_lists[index].th_free_list_self;
2107 if (ptr != NULL) {
2108 // pop the head of no-sync free list
2109 this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
2110 KMP_DEBUG_ASSERT(this_thr == ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
2111 sizeof(kmp_mem_descr_t)))
2112 ->ptr_aligned);
2113 goto end;
2115 ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
2116 if (ptr != NULL) {
2117 // no-sync free list is empty, use sync free list (filled in by other
2118 // threads only)
2119 // pop the head of the sync free list, push NULL instead
2120 while (!KMP_COMPARE_AND_STORE_PTR(
2121 &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, nullptr)) {
2122 KMP_CPU_PAUSE();
2123 ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
2125 // push the rest of chain into no-sync free list (can be NULL if there was
2126 // the only block)
2127 this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
2128 KMP_DEBUG_ASSERT(this_thr == ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
2129 sizeof(kmp_mem_descr_t)))
2130 ->ptr_aligned);
2131 goto end;
2134 alloc_call:
2135 // haven't found block in the free lists, thus allocate it
2136 size = num_lines * DCACHE_LINE;
2138 alloc_size = size + sizeof(kmp_mem_descr_t) + DCACHE_LINE;
2139 KE_TRACE(25, ("__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with "
2140 "alloc_size %d\n",
2141 __kmp_gtid_from_thread(this_thr), alloc_size));
2142 alloc_ptr = bget(this_thr, (bufsize)alloc_size);
2144 // align ptr to DCACHE_LINE
2145 ptr = (void *)((((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) +
2146 DCACHE_LINE) &
2147 ~(DCACHE_LINE - 1));
2148 descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
2150 descr->ptr_allocated = alloc_ptr; // remember allocated pointer
2151 // we don't need size_allocated
2152 descr->ptr_aligned = (void *)this_thr; // remember allocating thread
2153 // (it is already saved in bget buffer,
2154 // but we may want to use another allocator in future)
2155 descr->size_aligned = size;
2157 end:
2158 KE_TRACE(25, ("<- __kmp_fast_allocate( T#%d ) returns %p\n",
2159 __kmp_gtid_from_thread(this_thr), ptr));
2160 return ptr;
2161 } // func __kmp_fast_allocate
2163 // Free fast memory and place it on the thread's free list if it is of
2164 // the correct size.
2165 void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL) {
2166 kmp_mem_descr_t *descr;
2167 kmp_info_t *alloc_thr;
2168 size_t size;
2169 size_t idx;
2170 int index;
2172 KE_TRACE(25, ("-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
2173 __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM));
2174 KMP_ASSERT(ptr != NULL);
2176 descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t));
2178 KE_TRACE(26, (" __kmp_fast_free: size_aligned=%d\n",
2179 (int)descr->size_aligned));
2181 size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines
2183 idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block
2184 if (idx == size) {
2185 index = 0; // 2 cache lines
2186 } else if ((idx <<= 1) == size) {
2187 index = 1; // 4 cache lines
2188 } else if ((idx <<= 2) == size) {
2189 index = 2; // 16 cache lines
2190 } else if ((idx <<= 2) == size) {
2191 index = 3; // 64 cache lines
2192 } else {
2193 KMP_DEBUG_ASSERT(size > DCACHE_LINE * 64);
2194 goto free_call; // 65 or more cache lines ( > 8KB )
2197 alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block
2198 if (alloc_thr == this_thr) {
2199 // push block to self no-sync free list, linking previous head (LIFO)
2200 *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
2201 this_thr->th.th_free_lists[index].th_free_list_self = ptr;
2202 } else {
2203 void *head = this_thr->th.th_free_lists[index].th_free_list_other;
2204 if (head == NULL) {
2205 // Create new free list
2206 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2207 *((void **)ptr) = NULL; // mark the tail of the list
2208 descr->size_allocated = (size_t)1; // head of the list keeps its length
2209 } else {
2210 // need to check existed "other" list's owner thread and size of queue
2211 kmp_mem_descr_t *dsc =
2212 (kmp_mem_descr_t *)((char *)head - sizeof(kmp_mem_descr_t));
2213 // allocating thread, same for all queue nodes
2214 kmp_info_t *q_th = (kmp_info_t *)(dsc->ptr_aligned);
2215 size_t q_sz =
2216 dsc->size_allocated + 1; // new size in case we add current task
2217 if (q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT) {
2218 // we can add current task to "other" list, no sync needed
2219 *((void **)ptr) = head;
2220 descr->size_allocated = q_sz;
2221 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2222 } else {
2223 // either queue blocks owner is changing or size limit exceeded
2224 // return old queue to allocating thread (q_th) synchronously,
2225 // and start new list for alloc_thr's tasks
2226 void *old_ptr;
2227 void *tail = head;
2228 void *next = *((void **)head);
2229 while (next != NULL) {
2230 KMP_DEBUG_ASSERT(
2231 // queue size should decrease by 1 each step through the list
2232 ((kmp_mem_descr_t *)((char *)next - sizeof(kmp_mem_descr_t)))
2233 ->size_allocated +
2234 1 ==
2235 ((kmp_mem_descr_t *)((char *)tail - sizeof(kmp_mem_descr_t)))
2236 ->size_allocated);
2237 tail = next; // remember tail node
2238 next = *((void **)next);
2240 KMP_DEBUG_ASSERT(q_th != NULL);
2241 // push block to owner's sync free list
2242 old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
2243 /* the next pointer must be set before setting free_list to ptr to avoid
2244 exposing a broken list to other threads, even for an instant. */
2245 *((void **)tail) = old_ptr;
2247 while (!KMP_COMPARE_AND_STORE_PTR(
2248 &q_th->th.th_free_lists[index].th_free_list_sync, old_ptr, head)) {
2249 KMP_CPU_PAUSE();
2250 old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
2251 *((void **)tail) = old_ptr;
2254 // start new list of not-selt tasks
2255 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2256 *((void **)ptr) = NULL;
2257 descr->size_allocated = (size_t)1; // head of queue keeps its length
2261 goto end;
2263 free_call:
2264 KE_TRACE(25, ("__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
2265 __kmp_gtid_from_thread(this_thr), size));
2266 __kmp_bget_dequeue(this_thr); /* Release any queued buffers */
2267 brel(this_thr, descr->ptr_allocated);
2269 end:
2270 KE_TRACE(25, ("<- __kmp_fast_free() returns\n"));
2272 } // func __kmp_fast_free
2274 // Initialize the thread free lists related to fast memory
2275 // Only do this when a thread is initially created.
2276 void __kmp_initialize_fast_memory(kmp_info_t *this_thr) {
2277 KE_TRACE(10, ("__kmp_initialize_fast_memory: Called from th %p\n", this_thr));
2279 memset(this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof(kmp_free_list_t));
2282 // Free the memory in the thread free lists related to fast memory
2283 // Only do this when a thread is being reaped (destroyed).
2284 void __kmp_free_fast_memory(kmp_info_t *th) {
2285 // Suppose we use BGET underlying allocator, walk through its structures...
2286 int bin;
2287 thr_data_t *thr = get_thr_data(th);
2288 void **lst = NULL;
2290 KE_TRACE(
2291 5, ("__kmp_free_fast_memory: Called T#%d\n", __kmp_gtid_from_thread(th)));
2293 __kmp_bget_dequeue(th); // Release any queued buffers
2295 // Dig through free lists and extract all allocated blocks
2296 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
2297 bfhead_t *b = thr->freelist[bin].ql.flink;
2298 while (b != &thr->freelist[bin]) {
2299 if ((kmp_uintptr_t)b->bh.bb.bthr & 1) { // the buffer is allocated address
2300 *((void **)b) =
2301 lst; // link the list (override bthr, but keep flink yet)
2302 lst = (void **)b; // push b into lst
2304 b = b->ql.flink; // get next buffer
2307 while (lst != NULL) {
2308 void *next = *lst;
2309 KE_TRACE(10, ("__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
2310 lst, next, th, __kmp_gtid_from_thread(th)));
2311 (*thr->relfcn)(lst);
2312 #if BufStats
2313 // count blocks to prevent problems in __kmp_finalize_bget()
2314 thr->numprel++; /* Nr of expansion block releases */
2315 thr->numpblk--; /* Total number of blocks */
2316 #endif
2317 lst = (void **)next;
2320 KE_TRACE(
2321 5, ("__kmp_free_fast_memory: Freed T#%d\n", __kmp_gtid_from_thread(th)));
2324 #endif // USE_FAST_MEMORY