15#include "kmp_wrapper_malloc.h"
22typedef int (*bget_compact_t)(size_t, int);
23typedef void *(*bget_acquire_t)(size_t);
24typedef void (*bget_release_t)(
void *);
29#if KMP_ARCH_X86 || KMP_ARCH_ARM
30typedef kmp_int32 bufsize;
32typedef kmp_int64 bufsize;
35typedef ssize_t bufsize;
40typedef enum bget_mode {
46static void bpool(kmp_info_t *th,
void *buffer, bufsize len);
47static void *bget(kmp_info_t *th, bufsize size);
48static void *bgetz(kmp_info_t *th, bufsize size);
49static void *bgetr(kmp_info_t *th,
void *buffer, bufsize newsize);
50static void brel(kmp_info_t *th,
void *buf);
51static void bectl(kmp_info_t *th, bget_compact_t compact,
52 bget_acquire_t acquire, bget_release_t release,
62#if KMP_ARCH_X86 || !KMP_HAVE_QUAD
65#define AlignType double
70#define AlignType _Quad
106static bufsize bget_bin_size[] = {
116 1 << 16, 1 << 17, 1 << 18, 1 << 19, 1 << 20,
124#define MAX_BGET_BINS (int)(sizeof(bget_bin_size) / sizeof(bufsize))
131typedef struct qlinks {
132 struct bfhead *flink;
133 struct bfhead *blink;
137typedef struct bhead2 {
148 char b_pad[
sizeof(bhead2_t) + (SizeQuant - (
sizeof(bhead2_t) % SizeQuant))];
151#define BH(p) ((bhead_t *)(p))
154typedef struct bdhead {
158#define BDH(p) ((bdhead_t *)(p))
161typedef struct bfhead {
165#define BFH(p) ((bfhead_t *)(p))
167typedef struct thr_data {
168 bfhead_t freelist[MAX_BGET_BINS];
173 long numpget, numprel;
174 long numdget, numdrel;
178 bget_compact_t compfcn;
179 bget_acquire_t acqfcn;
180 bget_release_t relfcn;
193#define QLSize (sizeof(qlinks_t))
194#define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize)
197 ~(((bufsize)(1) << (sizeof(bufsize) * CHAR_BIT - 1)) | (SizeQuant - 1)))
205 ((bufsize)(-(((((bufsize)1) << ((int)sizeof(bufsize) * 8 - 2)) - 1) * 2) - 2))
208static int bget_get_bin(bufsize size) {
210 int lo = 0, hi = MAX_BGET_BINS - 1;
212 KMP_DEBUG_ASSERT(size > 0);
214 while ((hi - lo) > 1) {
215 int mid = (lo + hi) >> 1;
216 if (size < bget_bin_size[mid])
222 KMP_DEBUG_ASSERT((lo >= 0) && (lo < MAX_BGET_BINS));
227static void set_thr_data(kmp_info_t *th) {
231 data = (thr_data_t *)((!th->th.th_local.bget_data)
232 ? __kmp_allocate(
sizeof(*data))
233 : th->th.th_local.bget_data);
235 memset(data,
'\0',
sizeof(*data));
237 for (i = 0; i < MAX_BGET_BINS; ++i) {
238 data->freelist[i].ql.flink = &data->freelist[i];
239 data->freelist[i].ql.blink = &data->freelist[i];
242 th->th.th_local.bget_data = data;
243 th->th.th_local.bget_list = 0;
244#if !USE_CMP_XCHG_FOR_BGET
245#ifdef USE_QUEUING_LOCK_FOR_BGET
246 __kmp_init_lock(&th->th.th_local.bget_lock);
248 __kmp_init_bootstrap_lock(&th->th.th_local.bget_lock);
253static thr_data_t *get_thr_data(kmp_info_t *th) {
256 data = (thr_data_t *)th->th.th_local.bget_data;
258 KMP_DEBUG_ASSERT(data != 0);
264static void __kmp_bget_dequeue(kmp_info_t *th) {
265 void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
268#if USE_CMP_XCHG_FOR_BGET
270 volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
271 while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
272 CCAST(
void *, old_value),
nullptr)) {
274 old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
276 p = CCAST(
void *, old_value);
279#ifdef USE_QUEUING_LOCK_FOR_BGET
280 __kmp_acquire_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
282 __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
285 p = (
void *)th->th.th_local.bget_list;
286 th->th.th_local.bget_list = 0;
288#ifdef USE_QUEUING_LOCK_FOR_BGET
289 __kmp_release_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th));
291 __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
298 bfhead_t *b = BFH(((
char *)p) -
sizeof(bhead_t));
300 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
301 KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
303 KMP_DEBUG_ASSERT(b->ql.blink == 0);
305 p = (
void *)b->ql.flink;
313static void __kmp_bget_enqueue(kmp_info_t *th,
void *buf
314#ifdef USE_QUEUING_LOCK_FOR_BGET
319 bfhead_t *b = BFH(((
char *)buf) -
sizeof(bhead_t));
321 KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
322 KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) ==
327 KC_TRACE(10, (
"__kmp_bget_enqueue: moving buffer to T#%d list\n",
328 __kmp_gtid_from_thread(th)));
330#if USE_CMP_XCHG_FOR_BGET
332 volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
335 b->ql.flink = BFH(CCAST(
void *, old_value));
337 while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list,
338 CCAST(
void *, old_value), buf)) {
340 old_value = TCR_PTR(th->th.th_local.bget_list);
343 b->ql.flink = BFH(CCAST(
void *, old_value));
347#ifdef USE_QUEUING_LOCK_FOR_BGET
348 __kmp_acquire_lock(&th->th.th_local.bget_lock, rel_gtid);
350 __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock);
353 b->ql.flink = BFH(th->th.th_local.bget_list);
354 th->th.th_local.bget_list = (
void *)buf;
356#ifdef USE_QUEUING_LOCK_FOR_BGET
357 __kmp_release_lock(&th->th.th_local.bget_lock, rel_gtid);
359 __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock);
365static void __kmp_bget_insert_into_freelist(thr_data_t *thr, bfhead_t *b) {
368 KMP_DEBUG_ASSERT(((
size_t)b) % SizeQuant == 0);
369 KMP_DEBUG_ASSERT(b->bh.bb.bsize % SizeQuant == 0);
371 bin = bget_get_bin(b->bh.bb.bsize);
373 KMP_DEBUG_ASSERT(thr->freelist[bin].ql.blink->ql.flink ==
374 &thr->freelist[bin]);
375 KMP_DEBUG_ASSERT(thr->freelist[bin].ql.flink->ql.blink ==
376 &thr->freelist[bin]);
378 b->ql.flink = &thr->freelist[bin];
379 b->ql.blink = thr->freelist[bin].ql.blink;
381 thr->freelist[bin].ql.blink = b;
382 b->ql.blink->ql.flink = b;
386static void __kmp_bget_remove_from_freelist(bfhead_t *b) {
387 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
388 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
390 b->ql.blink->ql.flink = b->ql.flink;
391 b->ql.flink->ql.blink = b->ql.blink;
395static void bcheck(kmp_info_t *th, bufsize *max_free, bufsize *total_free) {
396 thr_data_t *thr = get_thr_data(th);
399 *total_free = *max_free = 0;
401 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
404 best = &thr->freelist[bin];
407 while (b != &thr->freelist[bin]) {
408 *total_free += (b->bh.bb.bsize -
sizeof(bhead_t));
409 if ((best == &thr->freelist[bin]) || (b->bh.bb.bsize < best->bh.bb.bsize))
416 if (*max_free < best->bh.bb.bsize)
417 *max_free = best->bh.bb.bsize;
420 if (*max_free > (bufsize)
sizeof(bhead_t))
421 *max_free -=
sizeof(bhead_t);
425static void *bget(kmp_info_t *th, bufsize requested_size) {
426 thr_data_t *thr = get_thr_data(th);
427 bufsize size = requested_size;
435 if (size < 0 || size +
sizeof(bhead_t) > MaxSize) {
439 __kmp_bget_dequeue(th);
441 if (size < (bufsize)SizeQ) {
444#if defined(SizeQuant) && (SizeQuant > 1)
445 size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
448 size +=
sizeof(bhead_t);
449 KMP_DEBUG_ASSERT(size >= 0);
450 KMP_DEBUG_ASSERT(size % SizeQuant == 0);
452 use_blink = (thr->mode == bget_mode_lifo);
461 for (bin = bget_get_bin(size); bin < MAX_BGET_BINS; ++bin) {
463 b = (use_blink ? thr->freelist[bin].ql.blink
464 : thr->freelist[bin].ql.flink);
466 if (thr->mode == bget_mode_best) {
467 best = &thr->freelist[bin];
471 while (b != &thr->freelist[bin]) {
472 if (b->bh.bb.bsize >= (bufsize)size) {
473 if ((best == &thr->freelist[bin]) ||
474 (b->bh.bb.bsize < best->bh.bb.bsize)) {
480 b = (use_blink ? b->ql.blink : b->ql.flink);
485 while (b != &thr->freelist[bin]) {
486 if ((bufsize)b->bh.bb.bsize >= (bufsize)size) {
495 if ((b->bh.bb.bsize - (bufsize)size) >
496 (bufsize)(SizeQ + (
sizeof(bhead_t)))) {
499 ba = BH(((
char *)b) + (b->bh.bb.bsize - (bufsize)size));
500 bn = BH(((
char *)ba) + size);
502 KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
505 b->bh.bb.bsize -= (bufsize)size;
508 ba->bb.prevfree = b->bh.bb.bsize;
511 ba->bb.bsize = -size;
520 __kmp_bget_remove_from_freelist(b);
521 __kmp_bget_insert_into_freelist(thr, b);
523 thr->totalloc += (size_t)size;
526 buf = (
void *)((((
char *)ba) +
sizeof(bhead_t)));
527 KMP_DEBUG_ASSERT(((
size_t)buf) % SizeQuant == 0);
532 ba = BH(((
char *)b) + b->bh.bb.bsize);
534 KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
539 __kmp_bget_remove_from_freelist(b);
541 thr->totalloc += (size_t)b->bh.bb.bsize;
545 b->bh.bb.bsize = -(b->bh.bb.bsize);
548 TCW_PTR(ba->bb.bthr, th);
554 buf = (
void *)&(b->ql);
555 KMP_DEBUG_ASSERT(((
size_t)buf) % SizeQuant == 0);
561 b = (use_blink ? b->ql.blink : b->ql.flink);
569 if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
577 if (thr->acqfcn != 0) {
578 if (size > (bufsize)(thr->exp_incr -
sizeof(bhead_t))) {
583 size +=
sizeof(bdhead_t) -
sizeof(bhead_t);
585 KE_TRACE(10, (
"%%%%%% MALLOC( %d )\n", (
int)size));
588 bdh = BDH((*thr->acqfcn)((bufsize)size));
592 bdh->bh.bb.bsize = 0;
595 TCW_PTR(bdh->bh.bb.bthr, th);
597 bdh->bh.bb.prevfree = 0;
600 thr->totalloc += (size_t)size;
604 buf = (
void *)(bdh + 1);
605 KMP_DEBUG_ASSERT(((
size_t)buf) % SizeQuant == 0);
614 KE_TRACE(10, (
"%%%%%% MALLOCB( %d )\n", (
int)thr->exp_incr));
617 newpool = (*thr->acqfcn)((bufsize)thr->exp_incr);
618 KMP_DEBUG_ASSERT(((
size_t)newpool) % SizeQuant == 0);
619 if (newpool != NULL) {
620 bpool(th, newpool, thr->exp_incr);
637static void *bgetz(kmp_info_t *th, bufsize size) {
638 char *buf = (
char *)bget(th, size);
644 b = BH(buf -
sizeof(bhead_t));
645 rsize = -(b->bb.bsize);
649 bd = BDH(buf -
sizeof(bdhead_t));
650 rsize = bd->tsize - (bufsize)
sizeof(bdhead_t);
652 rsize -=
sizeof(bhead_t);
655 KMP_DEBUG_ASSERT(rsize >= size);
657 (void)memset(buf, 0, (bufsize)rsize);
659 return ((
void *)buf);
667static void *bgetr(kmp_info_t *th,
void *buf, bufsize size) {
672 nbuf = bget(th, size);
679 b = BH(((
char *)buf) -
sizeof(bhead_t));
680 osize = -b->bb.bsize;
685 bd = BDH(((
char *)buf) -
sizeof(bdhead_t));
686 osize = bd->tsize - (bufsize)
sizeof(bdhead_t);
688 osize -=
sizeof(bhead_t);
691 KMP_DEBUG_ASSERT(osize > 0);
693 (void)KMP_MEMCPY((
char *)nbuf, (
char *)buf,
694 (
size_t)((size < osize) ? size : osize));
701static void brel(kmp_info_t *th,
void *buf) {
702 thr_data_t *thr = get_thr_data(th);
706 KMP_DEBUG_ASSERT(buf != NULL);
707 KMP_DEBUG_ASSERT(((
size_t)buf) % SizeQuant == 0);
709 b = BFH(((
char *)buf) -
sizeof(bhead_t));
711 if (b->bh.bb.bsize == 0) {
714 bdh = BDH(((
char *)buf) -
sizeof(bdhead_t));
715 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
717 thr->totalloc -= (size_t)bdh->tsize;
722 (void)memset((
char *)buf, 0x55, (size_t)(bdh->tsize -
sizeof(bdhead_t)));
725 KE_TRACE(10, (
"%%%%%% FREE( %p )\n", (
void *)bdh));
727 KMP_DEBUG_ASSERT(thr->relfcn != 0);
728 (*thr->relfcn)((
void *)bdh);
732 bth = (kmp_info_t *)((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) &
736 __kmp_bget_enqueue(bth, buf
737#ifdef USE_QUEUING_LOCK_FOR_BGET
739 __kmp_gtid_from_thread(th)
746 if (b->bh.bb.bsize >= 0) {
749 KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
753 KMP_DEBUG_ASSERT(BH((
char *)b - b->bh.bb.bsize)->bb.prevfree == 0);
757 thr->totalloc += (size_t)b->bh.bb.bsize;
762 if (b->bh.bb.prevfree != 0) {
767 bufsize size = b->bh.bb.bsize;
770 KMP_DEBUG_ASSERT(BH((
char *)b - b->bh.bb.prevfree)->bb.bsize ==
772 b = BFH(((
char *)b) - b->bh.bb.prevfree);
773 b->bh.bb.bsize -= size;
776 __kmp_bget_remove_from_freelist(b);
781 b->bh.bb.bsize = -b->bh.bb.bsize;
785 __kmp_bget_insert_into_freelist(thr, b);
791 bn = BFH(((
char *)b) + b->bh.bb.bsize);
792 if (bn->bh.bb.bsize > 0) {
796 KMP_DEBUG_ASSERT(BH((
char *)bn + bn->bh.bb.bsize)->bb.prevfree ==
799 __kmp_bget_remove_from_freelist(bn);
801 b->bh.bb.bsize += bn->bh.bb.bsize;
805 __kmp_bget_remove_from_freelist(b);
806 __kmp_bget_insert_into_freelist(thr, b);
814 bn = BFH(((
char *)b) + b->bh.bb.bsize);
817 (void)memset(((
char *)b) +
sizeof(bfhead_t), 0x55,
818 (
size_t)(b->bh.bb.bsize -
sizeof(bfhead_t)));
820 KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
825 bn->bh.bb.prevfree = b->bh.bb.bsize;
831 if (thr->relfcn != 0 &&
832 b->bh.bb.bsize == (bufsize)(thr->pool_len -
sizeof(bhead_t))) {
838 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
839 KMP_DEBUG_ASSERT(BH((
char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
840 KMP_DEBUG_ASSERT(BH((
char *)b + b->bh.bb.bsize)->bb.prevfree ==
844 __kmp_bget_remove_from_freelist(b);
846 KE_TRACE(10, (
"%%%%%% FREE( %p )\n", (
void *)b));
852 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
855 if (thr->last_pool == b)
865static void bectl(kmp_info_t *th, bget_compact_t compact,
866 bget_acquire_t acquire, bget_release_t release,
868 thr_data_t *thr = get_thr_data(th);
870 thr->compfcn = compact;
871 thr->acqfcn = acquire;
872 thr->relfcn = release;
873 thr->exp_incr = pool_incr;
877static void bpool(kmp_info_t *th,
void *buf, bufsize len) {
879 thr_data_t *thr = get_thr_data(th);
880 bfhead_t *b = BFH(buf);
883 __kmp_bget_dequeue(th);
886 len &= ~((bufsize)(SizeQuant - 1));
888 if (thr->pool_len == 0) {
890 }
else if (len != thr->pool_len) {
896 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
902 KMP_DEBUG_ASSERT(len -
sizeof(bhead_t) <= -((bufsize)ESent + 1));
907 b->bh.bb.prevfree = 0;
916 len -=
sizeof(bhead_t);
917 b->bh.bb.bsize = (bufsize)len;
919 TCW_PTR(b->bh.bb.bthr,
920 (kmp_info_t *)((kmp_uintptr_t)th |
924 __kmp_bget_insert_into_freelist(thr, b);
927 (void)memset(((
char *)b) +
sizeof(bfhead_t), 0x55,
928 (
size_t)(len -
sizeof(bfhead_t)));
930 bn = BH(((
char *)b) + len);
931 bn->bb.prevfree = (bufsize)len;
933 KMP_DEBUG_ASSERT((~0) == -1 && (bn != 0));
935 bn->bb.bsize = ESent;
939static void bfreed(kmp_info_t *th) {
940 int bin = 0, count = 0;
941 int gtid = __kmp_gtid_from_thread(th);
942 thr_data_t *thr = get_thr_data(th);
945 __kmp_printf_no_lock(
"__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC
946 " get=%" KMP_INT64_SPEC
" rel=%" KMP_INT64_SPEC
947 " pblk=%" KMP_INT64_SPEC
" pget=%" KMP_INT64_SPEC
948 " prel=%" KMP_INT64_SPEC
" dget=%" KMP_INT64_SPEC
949 " drel=%" KMP_INT64_SPEC
"\n",
950 gtid, (kmp_uint64)thr->totalloc, (kmp_int64)thr->numget,
951 (kmp_int64)thr->numrel, (kmp_int64)thr->numpblk,
952 (kmp_int64)thr->numpget, (kmp_int64)thr->numprel,
953 (kmp_int64)thr->numdget, (kmp_int64)thr->numdrel);
956 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
959 for (b = thr->freelist[bin].ql.flink; b != &thr->freelist[bin];
961 bufsize bs = b->bh.bb.bsize;
963 KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
964 KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
965 KMP_DEBUG_ASSERT(bs > 0);
969 __kmp_printf_no_lock(
970 "__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b,
974 char *lerr = ((
char *)b) +
sizeof(bfhead_t);
975 if ((bs >
sizeof(bfhead_t)) &&
977 (memcmp(lerr, lerr + 1, (
size_t)(bs - (
sizeof(bfhead_t) + 1))) !=
979 __kmp_printf_no_lock(
"__kmp_printpool: T#%d (Contents of above "
980 "free block have been overstored.)\n",
989 __kmp_printf_no_lock(
"__kmp_printpool: T#%d No free blocks\n", gtid);
992void __kmp_initialize_bget(kmp_info_t *th) {
993 KMP_DEBUG_ASSERT(SizeQuant >=
sizeof(
void *) && (th != 0));
997 bectl(th, (bget_compact_t)0, (bget_acquire_t)malloc, (bget_release_t)free,
998 (bufsize)__kmp_malloc_pool_incr);
1001void __kmp_finalize_bget(kmp_info_t *th) {
1005 KMP_DEBUG_ASSERT(th != 0);
1008 thr = (thr_data_t *)th->th.th_local.bget_data;
1009 KMP_DEBUG_ASSERT(thr != NULL);
1017 if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
1018 b->bh.bb.bsize == (bufsize)(thr->pool_len -
sizeof(bhead_t))) {
1019 KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
1020 KMP_DEBUG_ASSERT(BH((
char *)b + b->bh.bb.bsize)->bb.bsize == ESent);
1021 KMP_DEBUG_ASSERT(BH((
char *)b + b->bh.bb.bsize)->bb.prevfree ==
1025 __kmp_bget_remove_from_freelist(b);
1027 KE_TRACE(10, (
"%%%%%% FREE( %p )\n", (
void *)b));
1032 KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1037 if (th->th.th_local.bget_data != NULL) {
1038 __kmp_free(th->th.th_local.bget_data);
1039 th->th.th_local.bget_data = NULL;
1043void kmpc_set_poolsize(
size_t size) {
1044 bectl(__kmp_get_thread(), (bget_compact_t)0, (bget_acquire_t)malloc,
1045 (bget_release_t)free, (bufsize)size);
1048size_t kmpc_get_poolsize(
void) {
1051 p = get_thr_data(__kmp_get_thread());
1056void kmpc_set_poolmode(
int mode) {
1059 if (mode == bget_mode_fifo || mode == bget_mode_lifo ||
1060 mode == bget_mode_best) {
1061 p = get_thr_data(__kmp_get_thread());
1062 p->mode = (bget_mode_t)mode;
1066int kmpc_get_poolmode(
void) {
1069 p = get_thr_data(__kmp_get_thread());
1074void kmpc_get_poolstat(
size_t *maxmem,
size_t *allmem) {
1075 kmp_info_t *th = __kmp_get_thread();
1078 __kmp_bget_dequeue(th);
1086void kmpc_poolprint(
void) {
1087 kmp_info_t *th = __kmp_get_thread();
1089 __kmp_bget_dequeue(th);
1096void *kmpc_malloc(
size_t size) {
1098 ptr = bget(__kmp_entry_thread(), (bufsize)(size +
sizeof(ptr)));
1101 *(
void **)ptr = ptr;
1102 ptr = (
void **)ptr + 1;
1107#define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0)
1109void *kmpc_aligned_malloc(
size_t size,
size_t alignment) {
1111 void *ptr_allocated;
1112 KMP_DEBUG_ASSERT(alignment < 32 * 1024);
1113 if (!IS_POWER_OF_TWO(alignment)) {
1118 size = size +
sizeof(
void *) + alignment;
1119 ptr_allocated = bget(__kmp_entry_thread(), (bufsize)size);
1120 if (ptr_allocated != NULL) {
1122 ptr = (
void *)(((kmp_uintptr_t)ptr_allocated +
sizeof(
void *) + alignment) &
1124 *((
void **)ptr - 1) = ptr_allocated;
1131void *kmpc_calloc(
size_t nelem,
size_t elsize) {
1133 ptr = bgetz(__kmp_entry_thread(), (bufsize)(nelem * elsize +
sizeof(ptr)));
1136 *(
void **)ptr = ptr;
1137 ptr = (
void **)ptr + 1;
1142void *kmpc_realloc(
void *ptr,
size_t size) {
1143 void *result = NULL;
1146 result = bget(__kmp_entry_thread(), (bufsize)(size +
sizeof(ptr)));
1148 if (result != NULL) {
1149 *(
void **)result = result;
1150 result = (
void **)result + 1;
1152 }
else if (size == 0) {
1158 KMP_ASSERT(*((
void **)ptr - 1));
1159 brel(__kmp_get_thread(), *((
void **)ptr - 1));
1161 result = bgetr(__kmp_entry_thread(), *((
void **)ptr - 1),
1162 (bufsize)(size +
sizeof(ptr)));
1163 if (result != NULL) {
1164 *(
void **)result = result;
1165 result = (
void **)result + 1;
1172void kmpc_free(
void *ptr) {
1173 if (!__kmp_init_serial) {
1177 kmp_info_t *th = __kmp_get_thread();
1178 __kmp_bget_dequeue(th);
1180 KMP_ASSERT(*((
void **)ptr - 1));
1181 brel(th, *((
void **)ptr - 1));
1185void *___kmp_thread_malloc(kmp_info_t *th,
size_t size KMP_SRC_LOC_DECL) {
1187 KE_TRACE(30, (
"-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", th,
1188 (
int)size KMP_SRC_LOC_PARM));
1189 ptr = bget(th, (bufsize)size);
1190 KE_TRACE(30, (
"<- __kmp_thread_malloc() returns %p\n", ptr));
1194void *___kmp_thread_calloc(kmp_info_t *th,
size_t nelem,
1195 size_t elsize KMP_SRC_LOC_DECL) {
1197 KE_TRACE(30, (
"-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", th,
1198 (
int)nelem, (
int)elsize KMP_SRC_LOC_PARM));
1199 ptr = bgetz(th, (bufsize)(nelem * elsize));
1200 KE_TRACE(30, (
"<- __kmp_thread_calloc() returns %p\n", ptr));
1204void *___kmp_thread_realloc(kmp_info_t *th,
void *ptr,
1205 size_t size KMP_SRC_LOC_DECL) {
1206 KE_TRACE(30, (
"-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", th,
1207 ptr, (
int)size KMP_SRC_LOC_PARM));
1208 ptr = bgetr(th, ptr, (bufsize)size);
1209 KE_TRACE(30, (
"<- __kmp_thread_realloc() returns %p\n", ptr));
1213void ___kmp_thread_free(kmp_info_t *th,
void *ptr KMP_SRC_LOC_DECL) {
1214 KE_TRACE(30, (
"-> __kmp_thread_free( %p, %p ) called from %s:%d\n", th,
1215 ptr KMP_SRC_LOC_PARM));
1217 __kmp_bget_dequeue(th);
1220 KE_TRACE(30, (
"<- __kmp_thread_free()\n"));
1224static const char *kmp_mk_lib_name;
1225static void *h_memkind;
1228static void *(*kmp_mk_alloc)(
void *k,
size_t sz);
1230static void (*kmp_mk_free)(
void *kind,
void *ptr);
1232static int (*kmp_mk_check)(
void *kind);
1234static void **mk_default;
1235static void **mk_interleave;
1236static void **mk_hbw;
1237static void **mk_hbw_interleave;
1238static void **mk_hbw_preferred;
1239static void **mk_hugetlb;
1240static void **mk_hbw_hugetlb;
1241static void **mk_hbw_preferred_hugetlb;
1242static void **mk_dax_kmem;
1243static void **mk_dax_kmem_all;
1244static void **mk_dax_kmem_preferred;
1245static void *(*kmp_target_alloc_host)(
size_t size,
int device);
1246static void *(*kmp_target_alloc_shared)(
size_t size,
int device);
1247static void *(*kmp_target_alloc_device)(
size_t size,
int device);
1248static void *(*kmp_target_lock_mem)(
void *ptr,
size_t size,
int device);
1249static void *(*kmp_target_unlock_mem)(
void *ptr,
int device);
1250static void *(*kmp_target_free_host)(
void *ptr,
int device);
1251static void *(*kmp_target_free_shared)(
void *ptr,
int device);
1252static void *(*kmp_target_free_device)(
void *ptr,
int device);
1253static bool __kmp_target_mem_available;
1254#define KMP_IS_TARGET_MEM_SPACE(MS) \
1255 (MS == llvm_omp_target_host_mem_space || \
1256 MS == llvm_omp_target_shared_mem_space || \
1257 MS == llvm_omp_target_device_mem_space)
1258#define KMP_IS_TARGET_MEM_ALLOC(MA) \
1259 (MA == llvm_omp_target_host_mem_alloc || \
1260 MA == llvm_omp_target_shared_mem_alloc || \
1261 MA == llvm_omp_target_device_mem_alloc)
1263#if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
1264static inline void chk_kind(
void ***pkind) {
1265 KMP_DEBUG_ASSERT(pkind);
1267 if (kmp_mk_check(**pkind))
1272void __kmp_init_memkind() {
1274#if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !KMP_OS_DARWIN
1276 kmp_mk_lib_name =
"libmemkind.so";
1277 h_memkind = dlopen(kmp_mk_lib_name, RTLD_LAZY);
1279 kmp_mk_check = (int (*)(
void *))dlsym(h_memkind,
"memkind_check_available");
1281 (
void *(*)(
void *,
size_t))dlsym(h_memkind,
"memkind_malloc");
1282 kmp_mk_free = (void (*)(
void *,
void *))dlsym(h_memkind,
"memkind_free");
1283 mk_default = (
void **)dlsym(h_memkind,
"MEMKIND_DEFAULT");
1284 if (kmp_mk_check && kmp_mk_alloc && kmp_mk_free && mk_default &&
1285 !kmp_mk_check(*mk_default)) {
1286 __kmp_memkind_available = 1;
1287 mk_interleave = (
void **)dlsym(h_memkind,
"MEMKIND_INTERLEAVE");
1288 chk_kind(&mk_interleave);
1289 mk_hbw = (
void **)dlsym(h_memkind,
"MEMKIND_HBW");
1291 mk_hbw_interleave = (
void **)dlsym(h_memkind,
"MEMKIND_HBW_INTERLEAVE");
1292 chk_kind(&mk_hbw_interleave);
1293 mk_hbw_preferred = (
void **)dlsym(h_memkind,
"MEMKIND_HBW_PREFERRED");
1294 chk_kind(&mk_hbw_preferred);
1295 mk_hugetlb = (
void **)dlsym(h_memkind,
"MEMKIND_HUGETLB");
1296 chk_kind(&mk_hugetlb);
1297 mk_hbw_hugetlb = (
void **)dlsym(h_memkind,
"MEMKIND_HBW_HUGETLB");
1298 chk_kind(&mk_hbw_hugetlb);
1299 mk_hbw_preferred_hugetlb =
1300 (
void **)dlsym(h_memkind,
"MEMKIND_HBW_PREFERRED_HUGETLB");
1301 chk_kind(&mk_hbw_preferred_hugetlb);
1302 mk_dax_kmem = (
void **)dlsym(h_memkind,
"MEMKIND_DAX_KMEM");
1303 chk_kind(&mk_dax_kmem);
1304 mk_dax_kmem_all = (
void **)dlsym(h_memkind,
"MEMKIND_DAX_KMEM_ALL");
1305 chk_kind(&mk_dax_kmem_all);
1306 mk_dax_kmem_preferred =
1307 (
void **)dlsym(h_memkind,
"MEMKIND_DAX_KMEM_PREFERRED");
1308 chk_kind(&mk_dax_kmem_preferred);
1309 KE_TRACE(25, (
"__kmp_init_memkind: memkind library initialized\n"));
1315 kmp_mk_lib_name =
"";
1318 kmp_mk_check = NULL;
1319 kmp_mk_alloc = NULL;
1322 mk_interleave = NULL;
1324 mk_hbw_interleave = NULL;
1325 mk_hbw_preferred = NULL;
1327 mk_hbw_hugetlb = NULL;
1328 mk_hbw_preferred_hugetlb = NULL;
1330 mk_dax_kmem_all = NULL;
1331 mk_dax_kmem_preferred = NULL;
1334void __kmp_fini_memkind() {
1335#if KMP_OS_UNIX && KMP_DYNAMIC_LIB
1336 if (__kmp_memkind_available)
1337 KE_TRACE(25, (
"__kmp_fini_memkind: finalize memkind library\n"));
1342 kmp_mk_check = NULL;
1343 kmp_mk_alloc = NULL;
1346 mk_interleave = NULL;
1348 mk_hbw_interleave = NULL;
1349 mk_hbw_preferred = NULL;
1351 mk_hbw_hugetlb = NULL;
1352 mk_hbw_preferred_hugetlb = NULL;
1354 mk_dax_kmem_all = NULL;
1355 mk_dax_kmem_preferred = NULL;
1359void __kmp_init_target_mem() {
1360 *(
void **)(&kmp_target_alloc_host) = KMP_DLSYM(
"llvm_omp_target_alloc_host");
1361 *(
void **)(&kmp_target_alloc_shared) =
1362 KMP_DLSYM(
"llvm_omp_target_alloc_shared");
1363 *(
void **)(&kmp_target_alloc_device) =
1364 KMP_DLSYM(
"llvm_omp_target_alloc_device");
1365 *(
void **)(&kmp_target_free_host) = KMP_DLSYM(
"llvm_omp_target_free_host");
1366 *(
void **)(&kmp_target_free_shared) =
1367 KMP_DLSYM(
"llvm_omp_target_free_shared");
1368 *(
void **)(&kmp_target_free_device) =
1369 KMP_DLSYM(
"llvm_omp_target_free_device");
1370 __kmp_target_mem_available =
1371 kmp_target_alloc_host && kmp_target_alloc_shared &&
1372 kmp_target_alloc_device && kmp_target_free_host &&
1373 kmp_target_free_shared && kmp_target_free_device;
1375 *(
void **)(&kmp_target_lock_mem) = KMP_DLSYM(
"llvm_omp_target_lock_mem");
1376 *(
void **)(&kmp_target_unlock_mem) = KMP_DLSYM(
"llvm_omp_target_unlock_mem");
1379omp_allocator_handle_t __kmpc_init_allocator(
int gtid, omp_memspace_handle_t ms,
1381 omp_alloctrait_t traits[]) {
1383 KMP_DEBUG_ASSERT(ms == omp_default_mem_space || ms == omp_low_lat_mem_space ||
1384 ms == omp_large_cap_mem_space || ms == omp_const_mem_space ||
1385 ms == omp_high_bw_mem_space || KMP_IS_TARGET_MEM_SPACE(ms));
1386 kmp_allocator_t *al;
1388 al = (kmp_allocator_t *)__kmp_allocate(
sizeof(kmp_allocator_t));
1390 for (i = 0; i < ntraits; ++i) {
1391 switch (traits[i].key) {
1392 case omp_atk_sync_hint:
1393 case omp_atk_access:
1395 case omp_atk_pinned:
1398 case omp_atk_alignment:
1399 __kmp_type_convert(traits[i].value, &(al->alignment));
1400 KMP_ASSERT(IS_POWER_OF_TWO(al->alignment));
1402 case omp_atk_pool_size:
1403 al->pool_size = traits[i].value;
1405 case omp_atk_fallback:
1406 al->fb = (omp_alloctrait_value_t)traits[i].value;
1408 al->fb == omp_atv_default_mem_fb || al->fb == omp_atv_null_fb ||
1409 al->fb == omp_atv_abort_fb || al->fb == omp_atv_allocator_fb);
1411 case omp_atk_fb_data:
1412 al->fb_data = RCAST(kmp_allocator_t *, traits[i].value);
1414 case omp_atk_partition:
1415 al->memkind = RCAST(
void **, traits[i].value);
1418 KMP_ASSERT2(0,
"Unexpected allocator trait");
1423 al->fb = omp_atv_default_mem_fb;
1424 al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc;
1425 }
else if (al->fb == omp_atv_allocator_fb) {
1426 KMP_ASSERT(al->fb_data != NULL);
1427 }
else if (al->fb == omp_atv_default_mem_fb) {
1428 al->fb_data = (kmp_allocator_t *)omp_default_mem_alloc;
1430 if (__kmp_memkind_available) {
1432 if (ms == omp_high_bw_mem_space) {
1433 if (al->memkind == (
void *)omp_atv_interleaved && mk_hbw_interleave) {
1434 al->memkind = mk_hbw_interleave;
1435 }
else if (mk_hbw_preferred) {
1441 al->memkind = mk_hbw_preferred;
1445 return omp_null_allocator;
1447 }
else if (ms == omp_large_cap_mem_space) {
1448 if (mk_dax_kmem_all) {
1450 al->memkind = mk_dax_kmem_all;
1451 }
else if (mk_dax_kmem) {
1453 al->memkind = mk_dax_kmem;
1456 return omp_null_allocator;
1459 if (al->memkind == (
void *)omp_atv_interleaved && mk_interleave) {
1460 al->memkind = mk_interleave;
1462 al->memkind = mk_default;
1465 }
else if (KMP_IS_TARGET_MEM_SPACE(ms) && !__kmp_target_mem_available) {
1467 return omp_null_allocator;
1469 if (ms == omp_high_bw_mem_space) {
1472 return omp_null_allocator;
1475 return (omp_allocator_handle_t)al;
1478void __kmpc_destroy_allocator(
int gtid, omp_allocator_handle_t allocator) {
1479 if (allocator > kmp_max_mem_alloc)
1480 __kmp_free(allocator);
1483void __kmpc_set_default_allocator(
int gtid, omp_allocator_handle_t allocator) {
1484 if (allocator == omp_null_allocator)
1485 allocator = omp_default_mem_alloc;
1486 __kmp_threads[gtid]->th.th_def_allocator = allocator;
1489omp_allocator_handle_t __kmpc_get_default_allocator(
int gtid) {
1490 return __kmp_threads[gtid]->th.th_def_allocator;
1493typedef struct kmp_mem_desc {
1498 kmp_allocator_t *allocator;
1500static int alignment =
sizeof(
void *);
1503void *__kmpc_alloc(
int gtid,
size_t size, omp_allocator_handle_t allocator) {
1504 KE_TRACE(25, (
"__kmpc_alloc: T#%d (%d, %p)\n", gtid, (
int)size, allocator));
1505 void *ptr = __kmp_alloc(gtid, 0, size, allocator);
1506 KE_TRACE(25, (
"__kmpc_alloc returns %p, T#%d\n", ptr, gtid));
1510void *__kmpc_aligned_alloc(
int gtid,
size_t algn,
size_t size,
1511 omp_allocator_handle_t allocator) {
1512 KE_TRACE(25, (
"__kmpc_aligned_alloc: T#%d (%d, %d, %p)\n", gtid, (
int)algn,
1513 (
int)size, allocator));
1514 void *ptr = __kmp_alloc(gtid, algn, size, allocator);
1515 KE_TRACE(25, (
"__kmpc_aligned_alloc returns %p, T#%d\n", ptr, gtid));
1519void *__kmpc_calloc(
int gtid,
size_t nmemb,
size_t size,
1520 omp_allocator_handle_t allocator) {
1521 KE_TRACE(25, (
"__kmpc_calloc: T#%d (%d, %d, %p)\n", gtid, (
int)nmemb,
1522 (
int)size, allocator));
1523 void *ptr = __kmp_calloc(gtid, 0, nmemb, size, allocator);
1524 KE_TRACE(25, (
"__kmpc_calloc returns %p, T#%d\n", ptr, gtid));
1528void *__kmpc_realloc(
int gtid,
void *ptr,
size_t size,
1529 omp_allocator_handle_t allocator,
1530 omp_allocator_handle_t free_allocator) {
1531 KE_TRACE(25, (
"__kmpc_realloc: T#%d (%p, %d, %p, %p)\n", gtid, ptr, (
int)size,
1532 allocator, free_allocator));
1533 void *nptr = __kmp_realloc(gtid, ptr, size, allocator, free_allocator);
1534 KE_TRACE(25, (
"__kmpc_realloc returns %p, T#%d\n", nptr, gtid));
1538void __kmpc_free(
int gtid,
void *ptr, omp_allocator_handle_t allocator) {
1539 KE_TRACE(25, (
"__kmpc_free: T#%d free(%p,%p)\n", gtid, ptr, allocator));
1540 ___kmpc_free(gtid, ptr, allocator);
1541 KE_TRACE(10, (
"__kmpc_free: T#%d freed %p (%p)\n", gtid, ptr, allocator));
1546void *__kmp_alloc(
int gtid,
size_t algn,
size_t size,
1547 omp_allocator_handle_t allocator) {
1549 kmp_allocator_t *al;
1550 KMP_DEBUG_ASSERT(__kmp_init_serial);
1553 if (allocator == omp_null_allocator)
1554 allocator = __kmp_threads[gtid]->th.th_def_allocator;
1555 kmp_int32 default_device =
1556 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1558 al = RCAST(kmp_allocator_t *, allocator);
1560 int sz_desc =
sizeof(kmp_mem_desc_t);
1561 kmp_mem_desc_t desc;
1563 kmp_uintptr_t addr_align;
1564 kmp_uintptr_t addr_descr;
1565 size_t align = alignment;
1566 if (allocator > kmp_max_mem_alloc && al->alignment > align)
1567 align = al->alignment;
1570 desc.size_orig = size;
1571 desc.size_a = size + sz_desc + align;
1572 bool is_pinned =
false;
1573 if (allocator > kmp_max_mem_alloc)
1574 is_pinned = al->pinned;
1577 int use_default_allocator = (__kmp_memkind_available) ?
false : true;
1579 if (KMP_IS_TARGET_MEM_ALLOC(allocator)) {
1582 if (__kmp_target_mem_available) {
1584 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1585 if (allocator == llvm_omp_target_host_mem_alloc)
1586 ptr = kmp_target_alloc_host(size, device);
1587 else if (allocator == llvm_omp_target_shared_mem_alloc)
1588 ptr = kmp_target_alloc_shared(size, device);
1590 ptr = kmp_target_alloc_device(size, device);
1593 KMP_INFORM(TargetMemNotAvailable);
1597 if (allocator >= kmp_max_mem_alloc && KMP_IS_TARGET_MEM_SPACE(al->memspace)) {
1598 if (__kmp_target_mem_available) {
1600 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1601 if (al->memspace == llvm_omp_target_host_mem_space)
1602 ptr = kmp_target_alloc_host(size, device);
1603 else if (al->memspace == llvm_omp_target_shared_mem_space)
1604 ptr = kmp_target_alloc_shared(size, device);
1606 ptr = kmp_target_alloc_device(size, device);
1609 KMP_INFORM(TargetMemNotAvailable);
1613 if (__kmp_memkind_available) {
1614 if (allocator < kmp_max_mem_alloc) {
1616 if (allocator == omp_high_bw_mem_alloc && mk_hbw_preferred) {
1617 ptr = kmp_mk_alloc(*mk_hbw_preferred, desc.size_a);
1618 }
else if (allocator == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
1619 ptr = kmp_mk_alloc(*mk_dax_kmem_all, desc.size_a);
1621 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
1623 }
else if (al->pool_size > 0) {
1626 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
1627 if (used + desc.size_a > al->pool_size) {
1629 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
1630 if (al->fb == omp_atv_default_mem_fb) {
1631 al = (kmp_allocator_t *)omp_default_mem_alloc;
1632 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
1633 }
else if (al->fb == omp_atv_abort_fb) {
1635 }
else if (al->fb == omp_atv_allocator_fb) {
1636 KMP_ASSERT(al != al->fb_data);
1638 ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
1639 if (is_pinned && kmp_target_lock_mem)
1640 kmp_target_lock_mem(ptr, size, default_device);
1645 ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
1647 if (al->fb == omp_atv_default_mem_fb) {
1648 al = (kmp_allocator_t *)omp_default_mem_alloc;
1649 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
1650 }
else if (al->fb == omp_atv_abort_fb) {
1652 }
else if (al->fb == omp_atv_allocator_fb) {
1653 KMP_ASSERT(al != al->fb_data);
1655 ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
1656 if (is_pinned && kmp_target_lock_mem)
1657 kmp_target_lock_mem(ptr, size, default_device);
1664 ptr = kmp_mk_alloc(*al->memkind, desc.size_a);
1666 if (al->fb == omp_atv_default_mem_fb) {
1667 al = (kmp_allocator_t *)omp_default_mem_alloc;
1668 ptr = kmp_mk_alloc(*mk_default, desc.size_a);
1669 }
else if (al->fb == omp_atv_abort_fb) {
1671 }
else if (al->fb == omp_atv_allocator_fb) {
1672 KMP_ASSERT(al != al->fb_data);
1674 ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
1675 if (is_pinned && kmp_target_lock_mem)
1676 kmp_target_lock_mem(ptr, size, default_device);
1681 }
else if (allocator < kmp_max_mem_alloc) {
1683 if (allocator == omp_high_bw_mem_alloc) {
1684 KMP_WARNING(OmpNoAllocator,
"omp_high_bw_mem_alloc");
1685 }
else if (allocator == omp_large_cap_mem_alloc) {
1686 KMP_WARNING(OmpNoAllocator,
"omp_large_cap_mem_alloc");
1687 }
else if (allocator == omp_const_mem_alloc) {
1688 KMP_WARNING(OmpNoAllocator,
"omp_const_mem_alloc");
1689 }
else if (allocator == omp_low_lat_mem_alloc) {
1690 KMP_WARNING(OmpNoAllocator,
"omp_low_lat_mem_alloc");
1691 }
else if (allocator == omp_cgroup_mem_alloc) {
1692 KMP_WARNING(OmpNoAllocator,
"omp_cgroup_mem_alloc");
1693 }
else if (allocator == omp_pteam_mem_alloc) {
1694 KMP_WARNING(OmpNoAllocator,
"omp_pteam_mem_alloc");
1695 }
else if (allocator == omp_thread_mem_alloc) {
1696 KMP_WARNING(OmpNoAllocator,
"omp_thread_mem_alloc");
1698 use_default_allocator =
true;
1700 if (use_default_allocator) {
1701 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
1702 use_default_allocator =
false;
1704 }
else if (al->pool_size > 0) {
1707 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, desc.size_a);
1708 if (used + desc.size_a > al->pool_size) {
1710 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
1711 if (al->fb == omp_atv_default_mem_fb) {
1712 al = (kmp_allocator_t *)omp_default_mem_alloc;
1713 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
1714 }
else if (al->fb == omp_atv_abort_fb) {
1716 }
else if (al->fb == omp_atv_allocator_fb) {
1717 KMP_ASSERT(al != al->fb_data);
1719 ptr = __kmp_alloc(gtid, algn, size, (omp_allocator_handle_t)al);
1720 if (is_pinned && kmp_target_lock_mem)
1721 kmp_target_lock_mem(ptr, size, default_device);
1726 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
1727 if (ptr == NULL && al->fb == omp_atv_abort_fb) {
1733 ptr = __kmp_thread_malloc(__kmp_thread_from_gtid(gtid), desc.size_a);
1734 if (ptr == NULL && al->fb == omp_atv_abort_fb) {
1738 KE_TRACE(10, (
"__kmp_alloc: T#%d %p=alloc(%d)\n", gtid, ptr, desc.size_a));
1742 if (is_pinned && kmp_target_lock_mem)
1743 kmp_target_lock_mem(ptr, desc.size_a, default_device);
1745 addr = (kmp_uintptr_t)ptr;
1746 addr_align = (addr + sz_desc + align - 1) & ~(align - 1);
1747 addr_descr = addr_align - sz_desc;
1749 desc.ptr_alloc = ptr;
1750 desc.ptr_align = (
void *)addr_align;
1751 desc.allocator = al;
1752 *((kmp_mem_desc_t *)addr_descr) = desc;
1755 return desc.ptr_align;
1758void *__kmp_calloc(
int gtid,
size_t algn,
size_t nmemb,
size_t size,
1759 omp_allocator_handle_t allocator) {
1761 kmp_allocator_t *al;
1762 KMP_DEBUG_ASSERT(__kmp_init_serial);
1764 if (allocator == omp_null_allocator)
1765 allocator = __kmp_threads[gtid]->th.th_def_allocator;
1767 al = RCAST(kmp_allocator_t *, allocator);
1769 if (nmemb == 0 || size == 0)
1772 if ((SIZE_MAX -
sizeof(kmp_mem_desc_t)) / size < nmemb) {
1773 if (al->fb == omp_atv_abort_fb) {
1779 ptr = __kmp_alloc(gtid, algn, nmemb * size, allocator);
1782 memset(ptr, 0x00, nmemb * size);
1787void *__kmp_realloc(
int gtid,
void *ptr,
size_t size,
1788 omp_allocator_handle_t allocator,
1789 omp_allocator_handle_t free_allocator) {
1791 KMP_DEBUG_ASSERT(__kmp_init_serial);
1795 ___kmpc_free(gtid, ptr, free_allocator);
1799 nptr = __kmp_alloc(gtid, 0, size, allocator);
1801 if (nptr != NULL && ptr != NULL) {
1802 kmp_mem_desc_t desc;
1803 kmp_uintptr_t addr_align;
1804 kmp_uintptr_t addr_descr;
1806 addr_align = (kmp_uintptr_t)ptr;
1807 addr_descr = addr_align -
sizeof(kmp_mem_desc_t);
1808 desc = *((kmp_mem_desc_t *)addr_descr);
1810 KMP_DEBUG_ASSERT(desc.ptr_align == ptr);
1811 KMP_DEBUG_ASSERT(desc.size_orig > 0);
1812 KMP_DEBUG_ASSERT(desc.size_orig < desc.size_a);
1813 KMP_MEMCPY((
char *)nptr, (
char *)ptr,
1814 (
size_t)((size < desc.size_orig) ? size : desc.size_orig));
1818 ___kmpc_free(gtid, ptr, free_allocator);
1824void ___kmpc_free(
int gtid,
void *ptr, omp_allocator_handle_t allocator) {
1828 kmp_allocator_t *al;
1829 omp_allocator_handle_t oal;
1830 al = RCAST(kmp_allocator_t *, CCAST(omp_allocator_handle_t, allocator));
1831 kmp_mem_desc_t desc;
1832 kmp_uintptr_t addr_align;
1833 kmp_uintptr_t addr_descr;
1834 if (__kmp_target_mem_available && (KMP_IS_TARGET_MEM_ALLOC(allocator) ||
1835 (allocator > kmp_max_mem_alloc &&
1836 KMP_IS_TARGET_MEM_SPACE(al->memspace)))) {
1838 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1839 if (allocator == llvm_omp_target_host_mem_alloc) {
1840 kmp_target_free_host(ptr, device);
1841 }
else if (allocator == llvm_omp_target_shared_mem_alloc) {
1842 kmp_target_free_shared(ptr, device);
1843 }
else if (allocator == llvm_omp_target_device_mem_alloc) {
1844 kmp_target_free_device(ptr, device);
1849 addr_align = (kmp_uintptr_t)ptr;
1850 addr_descr = addr_align -
sizeof(kmp_mem_desc_t);
1851 desc = *((kmp_mem_desc_t *)addr_descr);
1853 KMP_DEBUG_ASSERT(desc.ptr_align == ptr);
1855 KMP_DEBUG_ASSERT(desc.allocator == al || desc.allocator == al->fb_data);
1857 al = desc.allocator;
1858 oal = (omp_allocator_handle_t)al;
1859 KMP_DEBUG_ASSERT(al);
1861 if (allocator > kmp_max_mem_alloc && kmp_target_unlock_mem && al->pinned) {
1863 __kmp_threads[gtid]->th.th_current_task->td_icvs.default_device;
1864 kmp_target_unlock_mem(desc.ptr_alloc, device);
1867 if (__kmp_memkind_available) {
1868 if (oal < kmp_max_mem_alloc) {
1870 if (oal == omp_high_bw_mem_alloc && mk_hbw_preferred) {
1871 kmp_mk_free(*mk_hbw_preferred, desc.ptr_alloc);
1872 }
else if (oal == omp_large_cap_mem_alloc && mk_dax_kmem_all) {
1873 kmp_mk_free(*mk_dax_kmem_all, desc.ptr_alloc);
1875 kmp_mk_free(*mk_default, desc.ptr_alloc);
1878 if (al->pool_size > 0) {
1880 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
1882 KMP_DEBUG_ASSERT(used >= desc.size_a);
1884 kmp_mk_free(*al->memkind, desc.ptr_alloc);
1887 if (oal > kmp_max_mem_alloc && al->pool_size > 0) {
1889 KMP_TEST_THEN_ADD64((kmp_int64 *)&al->pool_used, -desc.size_a);
1891 KMP_DEBUG_ASSERT(used >= desc.size_a);
1893 __kmp_thread_free(__kmp_thread_from_gtid(gtid), desc.ptr_alloc);
1901struct kmp_mem_descr {
1902 void *ptr_allocated;
1903 size_t size_allocated;
1905 size_t size_aligned;
1907typedef struct kmp_mem_descr kmp_mem_descr_t;
1912static void *___kmp_allocate_align(
size_t size,
1913 size_t alignment KMP_SRC_LOC_DECL) {
1930 kmp_mem_descr_t descr;
1931 kmp_uintptr_t addr_allocated;
1932 kmp_uintptr_t addr_aligned;
1933 kmp_uintptr_t addr_descr;
1935 KE_TRACE(25, (
"-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
1936 (
int)size, (
int)alignment KMP_SRC_LOC_PARM));
1938 KMP_DEBUG_ASSERT(alignment < 32 * 1024);
1939 KMP_DEBUG_ASSERT(
sizeof(
void *) <=
sizeof(kmp_uintptr_t));
1942 descr.size_aligned = size;
1943 descr.size_allocated =
1944 descr.size_aligned +
sizeof(kmp_mem_descr_t) + alignment;
1947 descr.ptr_allocated = _malloc_src_loc(descr.size_allocated, _file_, _line_);
1949 descr.ptr_allocated = malloc_src_loc(descr.size_allocated KMP_SRC_LOC_PARM);
1951 KE_TRACE(10, (
" malloc( %d ) returned %p\n", (
int)descr.size_allocated,
1952 descr.ptr_allocated));
1953 if (descr.ptr_allocated == NULL) {
1954 KMP_FATAL(OutOfHeapMemory);
1957 addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
1959 (addr_allocated +
sizeof(kmp_mem_descr_t) + alignment) & ~(alignment - 1);
1960 addr_descr = addr_aligned -
sizeof(kmp_mem_descr_t);
1962 descr.ptr_aligned = (
void *)addr_aligned;
1964 KE_TRACE(26, (
" ___kmp_allocate_align: "
1965 "ptr_allocated=%p, size_allocated=%d, "
1966 "ptr_aligned=%p, size_aligned=%d\n",
1967 descr.ptr_allocated, (
int)descr.size_allocated,
1968 descr.ptr_aligned, (
int)descr.size_aligned));
1970 KMP_DEBUG_ASSERT(addr_allocated <= addr_descr);
1971 KMP_DEBUG_ASSERT(addr_descr +
sizeof(kmp_mem_descr_t) == addr_aligned);
1972 KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
1973 addr_allocated + descr.size_allocated);
1974 KMP_DEBUG_ASSERT(addr_aligned % alignment == 0);
1976 memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
1979 memset(descr.ptr_aligned, 0x00, descr.size_aligned);
1985 *((kmp_mem_descr_t *)addr_descr) = descr;
1989 KE_TRACE(25, (
"<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned));
1990 return descr.ptr_aligned;
1997void *___kmp_allocate(
size_t size KMP_SRC_LOC_DECL) {
1999 KE_TRACE(25, (
"-> __kmp_allocate( %d ) called from %s:%d\n",
2000 (
int)size KMP_SRC_LOC_PARM));
2001 ptr = ___kmp_allocate_align(size, __kmp_align_alloc KMP_SRC_LOC_PARM);
2002 KE_TRACE(25, (
"<- __kmp_allocate() returns %p\n", ptr));
2010void *___kmp_page_allocate(
size_t size KMP_SRC_LOC_DECL) {
2011 int page_size = 8 * 1024;
2014 KE_TRACE(25, (
"-> __kmp_page_allocate( %d ) called from %s:%d\n",
2015 (
int)size KMP_SRC_LOC_PARM));
2016 ptr = ___kmp_allocate_align(size, page_size KMP_SRC_LOC_PARM);
2017 KE_TRACE(25, (
"<- __kmp_page_allocate( %d ) returns %p\n", (
int)size, ptr));
2023void ___kmp_free(
void *ptr KMP_SRC_LOC_DECL) {
2024 kmp_mem_descr_t descr;
2026 kmp_uintptr_t addr_allocated;
2027 kmp_uintptr_t addr_aligned;
2030 (
"-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM));
2031 KMP_ASSERT(ptr != NULL);
2033 descr = *(kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
sizeof(kmp_mem_descr_t));
2035 KE_TRACE(26, (
" __kmp_free: "
2036 "ptr_allocated=%p, size_allocated=%d, "
2037 "ptr_aligned=%p, size_aligned=%d\n",
2038 descr.ptr_allocated, (
int)descr.size_allocated,
2039 descr.ptr_aligned, (
int)descr.size_aligned));
2041 addr_allocated = (kmp_uintptr_t)descr.ptr_allocated;
2042 addr_aligned = (kmp_uintptr_t)descr.ptr_aligned;
2043 KMP_DEBUG_ASSERT(addr_aligned % CACHE_LINE == 0);
2044 KMP_DEBUG_ASSERT(descr.ptr_aligned == ptr);
2045 KMP_DEBUG_ASSERT(addr_allocated +
sizeof(kmp_mem_descr_t) <= addr_aligned);
2046 KMP_DEBUG_ASSERT(descr.size_aligned < descr.size_allocated);
2047 KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <=
2048 addr_allocated + descr.size_allocated);
2049 memset(descr.ptr_allocated, 0xEF, descr.size_allocated);
2054 KE_TRACE(10, (
" free( %p )\n", descr.ptr_allocated));
2056 _free_src_loc(descr.ptr_allocated, _file_, _line_);
2058 free_src_loc(descr.ptr_allocated KMP_SRC_LOC_PARM);
2062 KE_TRACE(25, (
"<- __kmp_free() returns\n"));
2065#if USE_FAST_MEMORY == 3
2071#define KMP_FREE_LIST_LIMIT 16
2074#define DCACHE_LINE 128
2076void *___kmp_fast_allocate(kmp_info_t *this_thr,
size_t size KMP_SRC_LOC_DECL) {
2078 size_t num_lines, idx;
2082 kmp_mem_descr_t *descr;
2084 KE_TRACE(25, (
"-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
2085 __kmp_gtid_from_thread(this_thr), (
int)size KMP_SRC_LOC_PARM));
2087 num_lines = (size + DCACHE_LINE - 1) / DCACHE_LINE;
2088 idx = num_lines - 1;
2089 KMP_DEBUG_ASSERT(idx >= 0);
2093 }
else if ((idx >>= 2) == 0) {
2096 }
else if ((idx >>= 2) == 0) {
2099 }
else if ((idx >>= 2) == 0) {
2106 ptr = this_thr->th.th_free_lists[index].th_free_list_self;
2109 this_thr->th.th_free_lists[index].th_free_list_self = *((
void **)ptr);
2110 KMP_DEBUG_ASSERT(this_thr == ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
2111 sizeof(kmp_mem_descr_t)))
2115 ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
2120 while (!KMP_COMPARE_AND_STORE_PTR(
2121 &this_thr->th.th_free_lists[index].th_free_list_sync, ptr,
nullptr)) {
2123 ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync);
2127 this_thr->th.th_free_lists[index].th_free_list_self = *((
void **)ptr);
2128 KMP_DEBUG_ASSERT(this_thr == ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr -
2129 sizeof(kmp_mem_descr_t)))
2136 size = num_lines * DCACHE_LINE;
2138 alloc_size = size +
sizeof(kmp_mem_descr_t) + DCACHE_LINE;
2139 KE_TRACE(25, (
"__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with "
2141 __kmp_gtid_from_thread(this_thr), alloc_size));
2142 alloc_ptr = bget(this_thr, (bufsize)alloc_size);
2145 ptr = (
void *)((((kmp_uintptr_t)alloc_ptr) +
sizeof(kmp_mem_descr_t) +
2147 ~(DCACHE_LINE - 1));
2148 descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) -
sizeof(kmp_mem_descr_t));
2150 descr->ptr_allocated = alloc_ptr;
2152 descr->ptr_aligned = (
void *)this_thr;
2155 descr->size_aligned = size;
2158 KE_TRACE(25, (
"<- __kmp_fast_allocate( T#%d ) returns %p\n",
2159 __kmp_gtid_from_thread(this_thr), ptr));
2165void ___kmp_fast_free(kmp_info_t *this_thr,
void *ptr KMP_SRC_LOC_DECL) {
2166 kmp_mem_descr_t *descr;
2167 kmp_info_t *alloc_thr;
2172 KE_TRACE(25, (
"-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
2173 __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM));
2174 KMP_ASSERT(ptr != NULL);
2176 descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) -
sizeof(kmp_mem_descr_t));
2178 KE_TRACE(26, (
" __kmp_fast_free: size_aligned=%d\n",
2179 (
int)descr->size_aligned));
2181 size = descr->size_aligned;
2183 idx = DCACHE_LINE * 2;
2186 }
else if ((idx <<= 1) == size) {
2188 }
else if ((idx <<= 2) == size) {
2190 }
else if ((idx <<= 2) == size) {
2193 KMP_DEBUG_ASSERT(size > DCACHE_LINE * 64);
2197 alloc_thr = (kmp_info_t *)descr->ptr_aligned;
2198 if (alloc_thr == this_thr) {
2200 *((
void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
2201 this_thr->th.th_free_lists[index].th_free_list_self = ptr;
2203 void *head = this_thr->th.th_free_lists[index].th_free_list_other;
2206 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2207 *((
void **)ptr) = NULL;
2208 descr->size_allocated = (size_t)1;
2211 kmp_mem_descr_t *dsc =
2212 (kmp_mem_descr_t *)((
char *)head -
sizeof(kmp_mem_descr_t));
2214 kmp_info_t *q_th = (kmp_info_t *)(dsc->ptr_aligned);
2216 dsc->size_allocated + 1;
2217 if (q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT) {
2219 *((
void **)ptr) = head;
2220 descr->size_allocated = q_sz;
2221 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2228 void *next = *((
void **)head);
2229 while (next != NULL) {
2232 ((kmp_mem_descr_t *)((
char *)next -
sizeof(kmp_mem_descr_t)))
2235 ((kmp_mem_descr_t *)((
char *)tail -
sizeof(kmp_mem_descr_t)))
2238 next = *((
void **)next);
2240 KMP_DEBUG_ASSERT(q_th != NULL);
2242 old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
2245 *((
void **)tail) = old_ptr;
2247 while (!KMP_COMPARE_AND_STORE_PTR(
2248 &q_th->th.th_free_lists[index].th_free_list_sync, old_ptr, head)) {
2250 old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync);
2251 *((
void **)tail) = old_ptr;
2255 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2256 *((
void **)ptr) = NULL;
2257 descr->size_allocated = (size_t)1;
2264 KE_TRACE(25, (
"__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
2265 __kmp_gtid_from_thread(this_thr), size));
2266 __kmp_bget_dequeue(this_thr);
2267 brel(this_thr, descr->ptr_allocated);
2270 KE_TRACE(25, (
"<- __kmp_fast_free() returns\n"));
2276void __kmp_initialize_fast_memory(kmp_info_t *this_thr) {
2277 KE_TRACE(10, (
"__kmp_initialize_fast_memory: Called from th %p\n", this_thr));
2279 memset(this_thr->th.th_free_lists, 0, NUM_LISTS *
sizeof(kmp_free_list_t));
2284void __kmp_free_fast_memory(kmp_info_t *th) {
2287 thr_data_t *thr = get_thr_data(th);
2291 5, (
"__kmp_free_fast_memory: Called T#%d\n", __kmp_gtid_from_thread(th)));
2293 __kmp_bget_dequeue(th);
2296 for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
2297 bfhead_t *b = thr->freelist[bin].ql.flink;
2298 while (b != &thr->freelist[bin]) {
2299 if ((kmp_uintptr_t)b->bh.bb.bthr & 1) {
2307 while (lst != NULL) {
2309 KE_TRACE(10, (
"__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
2310 lst, next, th, __kmp_gtid_from_thread(th)));
2311 (*thr->relfcn)(lst);
2317 lst = (
void **)next;
2321 5, (
"__kmp_free_fast_memory: Freed T#%d\n", __kmp_gtid_from_thread(th)));