5 //===----------------------------------------------------------------------===//
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11 //===----------------------------------------------------------------------===//
19 #if KMP_HAVE_XMMINTRIN_H && KMP_HAVE__MM_MALLOC
20 #include <xmmintrin.h>
21 #define KMP_ALIGNED_ALLOCATE(size, alignment) _mm_malloc(size, alignment)
22 #define KMP_ALIGNED_FREE(ptr) _mm_free(ptr)
23 #elif KMP_HAVE_ALIGNED_ALLOC
24 #define KMP_ALGIN_UP(val, alignment) \
25 (((val) + (alignment)-1) / (alignment) * (alignment))
26 #define KMP_ALIGNED_ALLOCATE(size, alignment) \
27 aligned_alloc(alignment, KMP_ALGIN_UP(size, alignment))
28 #define KMP_ALIGNED_FREE(ptr) free(ptr)
29 #elif KMP_HAVE_POSIX_MEMALIGN
30 static inline void *KMP_ALIGNED_ALLOCATE(size_t size
, size_t alignment
) {
32 int n
= posix_memalign(&ptr
, alignment
, size
);
40 #define KMP_ALIGNED_FREE(ptr) free(ptr)
41 #elif KMP_HAVE__ALIGNED_MALLOC
43 #define KMP_ALIGNED_ALLOCATE(size, alignment) _aligned_malloc(size, alignment)
44 #define KMP_ALIGNED_FREE(ptr) _aligned_free(ptr)
46 #define KMP_ALIGNED_ALLOCATE(size, alignment) KMP_INTERNAL_MALLOC(size)
47 #define KMP_ALIGNED_FREE(ptr) KMP_INTERNAL_FREE(ptr)
50 // Use four cache lines: MLC tends to prefetch the next or previous cache line
51 // creating a possible fake conflict between cores, so this is the only way to
52 // guarantee that no such prefetch can happen.
53 #ifndef KMP_FOURLINE_ALIGN_CACHE
54 #define KMP_FOURLINE_ALIGN_CACHE KMP_ALIGN(4 * CACHE_LINE)
57 #define KMP_OPTIMIZE_FOR_REDUCTIONS 0
59 class distributedBarrier
{
61 kmp_uint32
volatile KMP_FOURLINE_ALIGN_CACHE stillNeed
;
65 std::atomic
<kmp_uint64
> KMP_FOURLINE_ALIGN_CACHE go
;
69 kmp_uint64
volatile KMP_FOURLINE_ALIGN_CACHE iter
;
73 std::atomic
<bool> KMP_FOURLINE_ALIGN_CACHE sleep
;
76 void init(size_t nthr
);
77 void resize(size_t nthr
);
78 void computeGo(size_t n
);
79 void computeVarsForN(size_t n
);
86 IDEAL_CONTENTION
= 16,
89 flags_s
*flags
[MAX_ITERS
];
94 size_t KMP_ALIGN_CACHE num_threads
; // number of threads in barrier
95 size_t KMP_ALIGN_CACHE max_threads
; // size of arrays in data structure
96 // number of go signals each requiring one write per iteration
97 size_t KMP_ALIGN_CACHE num_gos
;
98 // number of groups of gos
99 size_t KMP_ALIGN_CACHE num_groups
;
100 // threads per go signal
101 size_t KMP_ALIGN_CACHE threads_per_go
;
102 bool KMP_ALIGN_CACHE fix_threads_per_go
;
104 size_t KMP_ALIGN_CACHE threads_per_group
;
105 // number of go signals in a group
106 size_t KMP_ALIGN_CACHE gos_per_group
;
109 distributedBarrier() = delete;
110 ~distributedBarrier() = delete;
112 // Used instead of constructor to create aligned data
113 static distributedBarrier
*allocate(int nThreads
) {
114 distributedBarrier
*d
= (distributedBarrier
*)KMP_ALIGNED_ALLOCATE(
115 sizeof(distributedBarrier
), 4 * CACHE_LINE
);
117 KMP_FATAL(MemoryAllocFailed
);
121 for (int i
= 0; i
< MAX_ITERS
; ++i
)
127 d
->fix_threads_per_go
= false;
128 // calculate gos and groups ONCE on base size
129 d
->computeGo(nThreads
);
134 static void deallocate(distributedBarrier
*db
) { KMP_ALIGNED_FREE(db
); }
136 void update_num_threads(size_t nthr
) { init(nthr
); }
138 bool need_resize(size_t new_nthr
) { return (new_nthr
> max_threads
); }
139 size_t get_num_threads() { return num_threads
; }
140 kmp_uint64
go_release();
144 #endif // KMP_BARRIER_H