2 //===--------------------------- barrier ----------------------------------===//
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8 //===----------------------------------------------------------------------===//
10 #ifndef _LIBCPP_BARRIER
11 #define _LIBCPP_BARRIER
19 template<class CompletionFunction = see below>
23 using arrival_token = see below;
25 static constexpr ptrdiff_t max() noexcept;
27 constexpr explicit barrier(ptrdiff_t phase_count,
28 CompletionFunction f = CompletionFunction());
31 barrier(const barrier&) = delete;
32 barrier& operator=(const barrier&) = delete;
34 [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1);
35 void wait(arrival_token&& arrival) const;
37 void arrive_and_wait();
38 void arrive_and_drop();
41 CompletionFunction completion; // exposition only
48 #include <__availability>
51 #ifndef _LIBCPP_HAS_NO_TREE_BARRIER
55 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
56 #pragma GCC system_header
59 #ifdef _LIBCPP_HAS_NO_THREADS
60 # error <barrier> is not supported on this single threaded system
64 #include <__undef_macros>
66 #if _LIBCPP_STD_VER >= 14
68 _LIBCPP_BEGIN_NAMESPACE_STD
70 struct __empty_completion
72 inline _LIBCPP_INLINE_VISIBILITY
73 void operator()() noexcept
78 #ifndef _LIBCPP_HAS_NO_TREE_BARRIER
82 The default implementation of __barrier_base is a classic tree barrier.
84 It looks different from literature pseudocode for two main reasons:
85 1. Threads that call into std::barrier functions do not provide indices,
86 so a numbering step is added before the actual barrier algorithm,
87 appearing as an N+1 round to the N rounds of the tree barrier.
88 2. A great deal of attention has been paid to avoid cache line thrashing
89 by flattening the tree structure into cache-line sized arrays, that
90 are indexed in an efficient way.
94 using __barrier_phase_t = uint8_t;
96 class __barrier_algorithm_base;
98 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
99 __barrier_algorithm_base* __construct_barrier_algorithm_base(ptrdiff_t& __expected);
101 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
102 bool __arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier,
103 __barrier_phase_t __old_phase);
105 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
106 void __destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier);
108 template<class _CompletionF>
109 class __barrier_base {
110 ptrdiff_t __expected;
111 unique_ptr<__barrier_algorithm_base,
112 void (*)(__barrier_algorithm_base*)> __base;
113 __atomic_base<ptrdiff_t> __expected_adjustment;
114 _CompletionF __completion;
115 __atomic_base<__barrier_phase_t> __phase;
118 using arrival_token = __barrier_phase_t;
120 static constexpr ptrdiff_t max() noexcept {
121 return numeric_limits<ptrdiff_t>::max();
124 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
125 __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
126 : __expected(__expected), __base(__construct_barrier_algorithm_base(this->__expected),
127 &__destroy_barrier_algorithm_base),
128 __expected_adjustment(0), __completion(move(__completion)), __phase(0)
131 [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
132 arrival_token arrive(ptrdiff_t update)
134 auto const __old_phase = __phase.load(memory_order_relaxed);
135 for(; update; --update)
136 if(__arrive_barrier_algorithm_base(__base.get(), __old_phase)) {
138 __expected += __expected_adjustment.load(memory_order_relaxed);
139 __expected_adjustment.store(0, memory_order_relaxed);
140 __phase.store(__old_phase + 2, memory_order_release);
141 __phase.notify_all();
145 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
146 void wait(arrival_token&& __old_phase) const
148 auto const __test_fn = [this, __old_phase]() -> bool {
149 return __phase.load(memory_order_acquire) != __old_phase;
151 __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
153 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
154 void arrive_and_drop()
156 __expected_adjustment.fetch_sub(1, memory_order_relaxed);
165 The alternative implementation of __barrier_base is a central barrier.
167 Two versions of this algorithm are provided:
168 1. A fairly straightforward implementation of the litterature for the
169 general case where the completion function is not empty.
170 2. An optimized implementation that exploits 2's complement arithmetic
171 and well-defined overflow in atomic arithmetic, to handle the phase
176 template<class _CompletionF>
177 class __barrier_base {
179 __atomic_base<ptrdiff_t> __expected;
180 __atomic_base<ptrdiff_t> __arrived;
181 _CompletionF __completion;
182 __atomic_base<bool> __phase;
184 using arrival_token = bool;
186 static constexpr ptrdiff_t max() noexcept {
187 return numeric_limits<ptrdiff_t>::max();
190 _LIBCPP_INLINE_VISIBILITY
191 __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
192 : __expected(__expected), __arrived(__expected), __completion(move(__completion)), __phase(false)
195 [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
196 arrival_token arrive(ptrdiff_t update)
198 auto const __old_phase = __phase.load(memory_order_relaxed);
199 auto const __result = __arrived.fetch_sub(update, memory_order_acq_rel) - update;
200 auto const new_expected = __expected.load(memory_order_relaxed);
203 __arrived.store(new_expected, memory_order_relaxed);
204 __phase.store(!__old_phase, memory_order_release);
205 __phase.notify_all();
209 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
210 void wait(arrival_token&& __old_phase) const
212 __phase.wait(__old_phase, memory_order_acquire);
214 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
215 void arrive_and_drop()
217 __expected.fetch_sub(1, memory_order_relaxed);
223 class __barrier_base<__empty_completion> {
225 static constexpr uint64_t __expected_unit = 1ull;
226 static constexpr uint64_t __arrived_unit = 1ull << 32;
227 static constexpr uint64_t __expected_mask = __arrived_unit - 1;
228 static constexpr uint64_t __phase_bit = 1ull << 63;
229 static constexpr uint64_t __arrived_mask = (__phase_bit - 1) & ~__expected_mask;
231 __atomic_base<uint64_t> __phase_arrived_expected;
233 static _LIBCPP_INLINE_VISIBILITY
234 constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT
236 return ((uint64_t(1u << 31) - __count) << 32)
237 | (uint64_t(1u << 31) - __count);
241 using arrival_token = uint64_t;
243 static constexpr ptrdiff_t max() noexcept {
244 return ptrdiff_t(1u << 31) - 1;
247 _LIBCPP_INLINE_VISIBILITY
248 explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion())
249 : __phase_arrived_expected(__init(__count))
252 [[nodiscard]] inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
253 arrival_token arrive(ptrdiff_t update)
255 auto const __inc = __arrived_unit * update;
256 auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel);
257 if((__old ^ (__old + __inc)) & __phase_bit) {
258 __phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed);
259 __phase_arrived_expected.notify_all();
261 return __old & __phase_bit;
263 inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
264 void wait(arrival_token&& __phase) const
266 auto const __test_fn = [=]() -> bool {
267 uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire);
268 return ((__current & __phase_bit) != __phase);
270 __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
272 inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
273 void arrive_and_drop()
275 __phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed);
280 #endif //_LIBCPP_HAS_NO_TREE_BARRIER
282 template<class _CompletionF = __empty_completion>
285 __barrier_base<_CompletionF> __b;
287 using arrival_token = typename __barrier_base<_CompletionF>::arrival_token;
289 static constexpr ptrdiff_t max() noexcept {
290 return __barrier_base<_CompletionF>::max();
293 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
294 barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
295 : __b(__count, _VSTD::move(__completion)) {
298 barrier(barrier const&) = delete;
299 barrier& operator=(barrier const&) = delete;
301 [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
302 arrival_token arrive(ptrdiff_t update = 1)
304 return __b.arrive(update);
306 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
307 void wait(arrival_token&& __phase) const
309 __b.wait(_VSTD::move(__phase));
311 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
312 void arrive_and_wait()
316 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
317 void arrive_and_drop()
319 __b.arrive_and_drop();
323 _LIBCPP_END_NAMESPACE_STD
325 #endif // _LIBCPP_STD_VER >= 14
329 #endif //_LIBCPP_BARRIER