2 //===----------------------------------------------------------------------===//
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8 //===----------------------------------------------------------------------===//
10 #ifndef _LIBCPP_BARRIER
11 #define _LIBCPP_BARRIER
19 template<class CompletionFunction = see below>
23 using arrival_token = see below;
25 static constexpr ptrdiff_t max() noexcept;
27 constexpr explicit barrier(ptrdiff_t phase_count,
28 CompletionFunction f = CompletionFunction());
31 barrier(const barrier&) = delete;
32 barrier& operator=(const barrier&) = delete;
34 [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1);
35 void wait(arrival_token&& arrival) const;
37 void arrive_and_wait();
38 void arrive_and_drop();
41 CompletionFunction completion; // exposition only
48 #include <__assert> // all public C++ headers provide the assertion handler
49 #include <__atomic/atomic_base.h>
50 #include <__atomic/memory_order.h>
51 #include <__availability>
53 #include <__memory/unique_ptr.h>
54 #include <__thread/poll_with_backoff.h>
55 #include <__thread/timed_backoff_policy.h>
56 #include <__utility/move.h>
62 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
63 # pragma GCC system_header
66 #ifdef _LIBCPP_HAS_NO_THREADS
67 # error "<barrier> is not supported since libc++ has been configured without support for threads."
71 #include <__undef_macros>
73 #if _LIBCPP_STD_VER >= 14
75 _LIBCPP_BEGIN_NAMESPACE_STD
77 struct __empty_completion
79 inline _LIBCPP_INLINE_VISIBILITY
80 void operator()() noexcept
85 #ifndef _LIBCPP_HAS_NO_TREE_BARRIER
89 The default implementation of __barrier_base is a classic tree barrier.
91 It looks different from literature pseudocode for two main reasons:
92 1. Threads that call into std::barrier functions do not provide indices,
93 so a numbering step is added before the actual barrier algorithm,
94 appearing as an N+1 round to the N rounds of the tree barrier.
95 2. A great deal of attention has been paid to avoid cache line thrashing
96 by flattening the tree structure into cache-line sized arrays, that
97 are indexed in an efficient way.
101 using __barrier_phase_t = uint8_t;
103 class __barrier_algorithm_base;
105 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
106 __barrier_algorithm_base* __construct_barrier_algorithm_base(ptrdiff_t& __expected);
108 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
109 bool __arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier,
110 __barrier_phase_t __old_phase);
112 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
113 void __destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier);
115 template<class _CompletionF>
116 class __barrier_base {
117 ptrdiff_t __expected_;
118 unique_ptr<__barrier_algorithm_base,
119 void (*)(__barrier_algorithm_base*)> __base_;
120 __atomic_base<ptrdiff_t> __expected_adjustment_;
121 _CompletionF __completion_;
122 __atomic_base<__barrier_phase_t> __phase_;
125 using arrival_token = __barrier_phase_t;
127 static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept {
128 return numeric_limits<ptrdiff_t>::max();
131 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
132 __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
133 : __expected_(__expected), __base_(std::__construct_barrier_algorithm_base(this->__expected_),
134 &__destroy_barrier_algorithm_base),
135 __expected_adjustment_(0), __completion_(std::move(__completion)), __phase_(0)
138 [[__nodiscard__]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
139 arrival_token arrive(ptrdiff_t __update)
141 _LIBCPP_ASSERT_UNCATEGORIZED(
142 __update <= __expected_, "update is greater than the expected count for the current barrier phase");
144 auto const __old_phase = __phase_.load(memory_order_relaxed);
145 for(; __update; --__update)
146 if(__arrive_barrier_algorithm_base(__base_.get(), __old_phase)) {
148 __expected_ += __expected_adjustment_.load(memory_order_relaxed);
149 __expected_adjustment_.store(0, memory_order_relaxed);
150 __phase_.store(__old_phase + 2, memory_order_release);
151 __phase_.notify_all();
155 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
156 void wait(arrival_token&& __old_phase) const
158 auto const __test_fn = [this, __old_phase]() -> bool {
159 return __phase_.load(memory_order_acquire) != __old_phase;
161 std::__libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
163 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
164 void arrive_and_drop()
166 __expected_adjustment_.fetch_sub(1, memory_order_relaxed);
175 The alternative implementation of __barrier_base is a central barrier.
177 Two versions of this algorithm are provided:
178 1. A fairly straightforward implementation of the litterature for the
179 general case where the completion function is not empty.
180 2. An optimized implementation that exploits 2's complement arithmetic
181 and well-defined overflow in atomic arithmetic, to handle the phase
186 template<class _CompletionF>
187 class __barrier_base {
189 __atomic_base<ptrdiff_t> __expected;
190 __atomic_base<ptrdiff_t> __arrived;
191 _CompletionF __completion;
192 __atomic_base<bool> __phase;
194 using arrival_token = bool;
196 static constexpr ptrdiff_t max() noexcept {
197 return numeric_limits<ptrdiff_t>::max();
200 _LIBCPP_INLINE_VISIBILITY
201 __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
202 : __expected(__expected), __arrived(__expected), __completion(std::move(__completion)), __phase(false)
205 [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
206 arrival_token arrive(ptrdiff_t update)
208 auto const __old_phase = __phase.load(memory_order_relaxed);
209 auto const __result = __arrived.fetch_sub(update, memory_order_acq_rel) - update;
210 auto const new_expected = __expected.load(memory_order_relaxed);
212 _LIBCPP_ASSERT_UNCATEGORIZED(
213 update <= new_expected, "update is greater than the expected count for the current barrier phase");
217 __arrived.store(new_expected, memory_order_relaxed);
218 __phase.store(!__old_phase, memory_order_release);
219 __phase.notify_all();
223 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
224 void wait(arrival_token&& __old_phase) const
226 __phase.wait(__old_phase, memory_order_acquire);
228 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
229 void arrive_and_drop()
231 __expected.fetch_sub(1, memory_order_relaxed);
237 class __barrier_base<__empty_completion> {
239 static constexpr uint64_t __expected_unit = 1ull;
240 static constexpr uint64_t __arrived_unit = 1ull << 32;
241 static constexpr uint64_t __expected_mask = __arrived_unit - 1;
242 static constexpr uint64_t __phase_bit = 1ull << 63;
243 static constexpr uint64_t __arrived_mask = (__phase_bit - 1) & ~__expected_mask;
245 __atomic_base<uint64_t> __phase_arrived_expected;
247 static _LIBCPP_INLINE_VISIBILITY
248 constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT
250 return ((uint64_t(1u << 31) - __count) << 32)
251 | (uint64_t(1u << 31) - __count);
255 using arrival_token = uint64_t;
257 static constexpr ptrdiff_t max() noexcept {
258 return ptrdiff_t(1u << 31) - 1;
261 _LIBCPP_INLINE_VISIBILITY
262 explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion())
263 : __phase_arrived_expected(__init(__count))
266 [[nodiscard]] inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
267 arrival_token arrive(ptrdiff_t update)
269 auto const __inc = __arrived_unit * update;
270 auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel);
272 _LIBCPP_ASSERT_UNCATEGORIZED(
273 update <= __old, "update is greater than the expected count for the current barrier phase");
275 if ((__old ^ (__old + __inc)) & __phase_bit) {
276 __phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed);
277 __phase_arrived_expected.notify_all();
279 return __old & __phase_bit;
281 inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
282 void wait(arrival_token&& __phase) const
284 auto const __test_fn = [=]() -> bool {
285 uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire);
286 return ((__current & __phase_bit) != __phase);
288 __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
290 inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
291 void arrive_and_drop()
293 __phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed);
298 #endif // !_LIBCPP_HAS_NO_TREE_BARRIER
300 template<class _CompletionF = __empty_completion>
303 __barrier_base<_CompletionF> __b_;
305 using arrival_token = typename __barrier_base<_CompletionF>::arrival_token;
307 static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept {
308 return __barrier_base<_CompletionF>::max();
311 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
312 explicit barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
313 : __b_(__count, _VSTD::move(__completion)) {
314 _LIBCPP_ASSERT_UNCATEGORIZED(
316 "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with a negative value");
317 _LIBCPP_ASSERT_UNCATEGORIZED(
319 "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with "
320 "a value greater than max()");
323 barrier(barrier const&) = delete;
324 barrier& operator=(barrier const&) = delete;
326 [[__nodiscard__]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
327 arrival_token arrive(ptrdiff_t __update = 1)
329 _LIBCPP_ASSERT_UNCATEGORIZED(__update > 0, "barrier:arrive must be called with a value greater than 0");
330 return __b_.arrive(__update);
332 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
333 void wait(arrival_token&& __phase) const
335 __b_.wait(_VSTD::move(__phase));
337 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
338 void arrive_and_wait()
342 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
343 void arrive_and_drop()
345 __b_.arrive_and_drop();
349 _LIBCPP_END_NAMESPACE_STD
351 #endif // _LIBCPP_STD_VER >= 14
355 #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
360 # include <stdexcept>
364 #endif //_LIBCPP_BARRIER