2 //===----------------------------------------------------------------------===//
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8 //===----------------------------------------------------------------------===//
10 #ifndef _LIBCPP_BARRIER
11 #define _LIBCPP_BARRIER
19 template<class CompletionFunction = see below>
23 using arrival_token = see below;
25 static constexpr ptrdiff_t max() noexcept;
27 constexpr explicit barrier(ptrdiff_t phase_count,
28 CompletionFunction f = CompletionFunction());
31 barrier(const barrier&) = delete;
32 barrier& operator=(const barrier&) = delete;
34 [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1);
35 void wait(arrival_token&& arrival) const;
37 void arrive_and_wait();
38 void arrive_and_drop();
41 CompletionFunction completion; // exposition only
48 #include <__assert> // all public C++ headers provide the assertion handler
49 #include <__atomic/atomic_base.h>
50 #include <__atomic/memory_order.h>
51 #include <__availability>
53 #include <__memory/unique_ptr.h>
54 #include <__thread/poll_with_backoff.h>
55 #include <__thread/timed_backoff_policy.h>
56 #include <__utility/move.h>
62 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
63 # pragma GCC system_header
66 #ifdef _LIBCPP_HAS_NO_THREADS
67 # error "<barrier> is not supported since libc++ has been configured without support for threads."
71 #include <__undef_macros>
73 #if _LIBCPP_STD_VER >= 14
75 _LIBCPP_BEGIN_NAMESPACE_STD
77 struct __empty_completion {
78 inline _LIBCPP_HIDE_FROM_ABI void operator()() noexcept {}
81 # ifndef _LIBCPP_HAS_NO_TREE_BARRIER
85 The default implementation of __barrier_base is a classic tree barrier.
87 It looks different from literature pseudocode for two main reasons:
88 1. Threads that call into std::barrier functions do not provide indices,
89 so a numbering step is added before the actual barrier algorithm,
90 appearing as an N+1 round to the N rounds of the tree barrier.
91 2. A great deal of attention has been paid to avoid cache line thrashing
92 by flattening the tree structure into cache-line sized arrays, that
93 are indexed in an efficient way.
97 using __barrier_phase_t = uint8_t;
99 class __barrier_algorithm_base;
101 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI __barrier_algorithm_base*
102 __construct_barrier_algorithm_base(ptrdiff_t& __expected);
104 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI bool
105 __arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier, __barrier_phase_t __old_phase);
107 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
108 __destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier);
110 template <class _CompletionF>
111 class __barrier_base {
112 ptrdiff_t __expected_;
113 unique_ptr<__barrier_algorithm_base, void (*)(__barrier_algorithm_base*)> __base_;
114 __atomic_base<ptrdiff_t> __expected_adjustment_;
115 _CompletionF __completion_;
116 __atomic_base<__barrier_phase_t> __phase_;
119 using arrival_token = __barrier_phase_t;
121 static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { return numeric_limits<ptrdiff_t>::max(); }
123 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI
124 __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
125 : __expected_(__expected),
126 __base_(std::__construct_barrier_algorithm_base(this->__expected_), &__destroy_barrier_algorithm_base),
127 __expected_adjustment_(0),
128 __completion_(std::move(__completion)),
130 [[__nodiscard__]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update) {
131 _LIBCPP_ASSERT_UNCATEGORIZED(
132 __update <= __expected_, "update is greater than the expected count for the current barrier phase");
134 auto const __old_phase = __phase_.load(memory_order_relaxed);
135 for (; __update; --__update)
136 if (__arrive_barrier_algorithm_base(__base_.get(), __old_phase)) {
138 __expected_ += __expected_adjustment_.load(memory_order_relaxed);
139 __expected_adjustment_.store(0, memory_order_relaxed);
140 __phase_.store(__old_phase + 2, memory_order_release);
141 __phase_.notify_all();
145 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __old_phase) const {
146 auto const __test_fn = [this, __old_phase]() -> bool { return __phase_.load(memory_order_acquire) != __old_phase; };
147 std::__libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
149 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() {
150 __expected_adjustment_.fetch_sub(1, memory_order_relaxed);
159 The alternative implementation of __barrier_base is a central barrier.
161 Two versions of this algorithm are provided:
162 1. A fairly straightforward implementation of the litterature for the
163 general case where the completion function is not empty.
164 2. An optimized implementation that exploits 2's complement arithmetic
165 and well-defined overflow in atomic arithmetic, to handle the phase
170 template <class _CompletionF>
171 class __barrier_base {
172 __atomic_base<ptrdiff_t> __expected;
173 __atomic_base<ptrdiff_t> __arrived;
174 _CompletionF __completion;
175 __atomic_base<bool> __phase;
178 using arrival_token = bool;
180 static constexpr ptrdiff_t max() noexcept { return numeric_limits<ptrdiff_t>::max(); }
182 _LIBCPP_HIDE_FROM_ABI __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
183 : __expected(__expected), __arrived(__expected), __completion(std::move(__completion)), __phase(false) {}
184 [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t update) {
185 auto const __old_phase = __phase.load(memory_order_relaxed);
186 auto const __result = __arrived.fetch_sub(update, memory_order_acq_rel) - update;
187 auto const new_expected = __expected.load(memory_order_relaxed);
189 _LIBCPP_ASSERT_UNCATEGORIZED(
190 update <= new_expected, "update is greater than the expected count for the current barrier phase");
194 __arrived.store(new_expected, memory_order_relaxed);
195 __phase.store(!__old_phase, memory_order_release);
196 __phase.notify_all();
200 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __old_phase) const {
201 __phase.wait(__old_phase, memory_order_acquire);
203 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() {
204 __expected.fetch_sub(1, memory_order_relaxed);
210 class __barrier_base<__empty_completion> {
211 static constexpr uint64_t __expected_unit = 1ull;
212 static constexpr uint64_t __arrived_unit = 1ull << 32;
213 static constexpr uint64_t __expected_mask = __arrived_unit - 1;
214 static constexpr uint64_t __phase_bit = 1ull << 63;
215 static constexpr uint64_t __arrived_mask = (__phase_bit - 1) & ~__expected_mask;
217 __atomic_base<uint64_t> __phase_arrived_expected;
219 static _LIBCPP_HIDE_FROM_ABI constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT {
220 return ((uint64_t(1u << 31) - __count) << 32) | (uint64_t(1u << 31) - __count);
224 using arrival_token = uint64_t;
226 static constexpr ptrdiff_t max() noexcept { return ptrdiff_t(1u << 31) - 1; }
228 _LIBCPP_HIDE_FROM_ABI explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion())
229 : __phase_arrived_expected(__init(__count)) {}
230 [[nodiscard]] inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t update) {
231 auto const __inc = __arrived_unit * update;
232 auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel);
234 _LIBCPP_ASSERT_UNCATEGORIZED(
235 update <= __old, "update is greater than the expected count for the current barrier phase");
237 if ((__old ^ (__old + __inc)) & __phase_bit) {
238 __phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed);
239 __phase_arrived_expected.notify_all();
241 return __old & __phase_bit;
243 inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __phase) const {
244 auto const __test_fn = [=]() -> bool {
245 uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire);
246 return ((__current & __phase_bit) != __phase);
248 __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
250 inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() {
251 __phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed);
256 # endif // !_LIBCPP_HAS_NO_TREE_BARRIER
258 template <class _CompletionF = __empty_completion>
260 __barrier_base<_CompletionF> __b_;
263 using arrival_token = typename __barrier_base<_CompletionF>::arrival_token;
265 static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { return __barrier_base<_CompletionF>::max(); }
267 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI explicit barrier(
268 ptrdiff_t __count, _CompletionF __completion = _CompletionF())
269 : __b_(__count, std::move(__completion)) {
270 _LIBCPP_ASSERT_UNCATEGORIZED(
272 "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with a negative value");
273 _LIBCPP_ASSERT_UNCATEGORIZED(
275 "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with "
276 "a value greater than max()");
279 barrier(barrier const&) = delete;
280 barrier& operator=(barrier const&) = delete;
282 [[__nodiscard__]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update = 1) {
283 _LIBCPP_ASSERT_UNCATEGORIZED(__update > 0, "barrier:arrive must be called with a value greater than 0");
284 return __b_.arrive(__update);
286 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __phase) const {
287 __b_.wait(std::move(__phase));
289 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_wait() { wait(arrive()); }
290 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() { __b_.arrive_and_drop(); }
293 _LIBCPP_END_NAMESPACE_STD
295 #endif // _LIBCPP_STD_VER >= 14
299 #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
304 # include <stdexcept>
308 #endif //_LIBCPP_BARRIER