2 //===----------------------------------------------------------------------===//
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8 //===----------------------------------------------------------------------===//
10 #ifndef _LIBCPP_BARRIER
11 #define _LIBCPP_BARRIER
19 template<class CompletionFunction = see below>
23 using arrival_token = see below;
25 static constexpr ptrdiff_t max() noexcept;
27 constexpr explicit barrier(ptrdiff_t phase_count,
28 CompletionFunction f = CompletionFunction());
31 barrier(const barrier&) = delete;
32 barrier& operator=(const barrier&) = delete;
34 [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1);
35 void wait(arrival_token&& arrival) const;
37 void arrive_and_wait();
38 void arrive_and_drop();
41 CompletionFunction completion; // exposition only
50 #ifdef _LIBCPP_HAS_NO_THREADS
51 # error "<barrier> is not supported since libc++ has been configured without support for threads."
54 #include <__assert> // all public C++ headers provide the assertion handler
55 #include <__atomic/atomic_base.h>
56 #include <__atomic/memory_order.h>
57 #include <__availability>
58 #include <__memory/unique_ptr.h>
59 #include <__thread/poll_with_backoff.h>
60 #include <__thread/timed_backoff_policy.h>
61 #include <__utility/move.h>
67 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
68 # pragma GCC system_header
72 #include <__undef_macros>
74 #if _LIBCPP_STD_VER >= 14
76 _LIBCPP_BEGIN_NAMESPACE_STD
78 struct __empty_completion {
79 inline _LIBCPP_HIDE_FROM_ABI void operator()() noexcept {}
82 # ifndef _LIBCPP_HAS_NO_TREE_BARRIER
86 The default implementation of __barrier_base is a classic tree barrier.
88 It looks different from literature pseudocode for two main reasons:
89 1. Threads that call into std::barrier functions do not provide indices,
90 so a numbering step is added before the actual barrier algorithm,
91 appearing as an N+1 round to the N rounds of the tree barrier.
92 2. A great deal of attention has been paid to avoid cache line thrashing
93 by flattening the tree structure into cache-line sized arrays, that
94 are indexed in an efficient way.
98 using __barrier_phase_t = uint8_t;
100 class __barrier_algorithm_base;
102 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI __barrier_algorithm_base*
103 __construct_barrier_algorithm_base(ptrdiff_t& __expected);
105 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI bool
106 __arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier, __barrier_phase_t __old_phase);
108 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
109 __destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier);
111 template <class _CompletionF>
112 class __barrier_base {
113 ptrdiff_t __expected_;
114 unique_ptr<__barrier_algorithm_base, void (*)(__barrier_algorithm_base*)> __base_;
115 __atomic_base<ptrdiff_t> __expected_adjustment_;
116 _CompletionF __completion_;
117 __atomic_base<__barrier_phase_t> __phase_;
120 using arrival_token = __barrier_phase_t;
122 static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { return numeric_limits<ptrdiff_t>::max(); }
124 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI
125 __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
126 : __expected_(__expected),
127 __base_(std::__construct_barrier_algorithm_base(this->__expected_), &__destroy_barrier_algorithm_base),
128 __expected_adjustment_(0),
129 __completion_(std::move(__completion)),
131 [[__nodiscard__]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update) {
132 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
133 __update <= __expected_, "update is greater than the expected count for the current barrier phase");
135 auto const __old_phase = __phase_.load(memory_order_relaxed);
136 for (; __update; --__update)
137 if (__arrive_barrier_algorithm_base(__base_.get(), __old_phase)) {
139 __expected_ += __expected_adjustment_.load(memory_order_relaxed);
140 __expected_adjustment_.store(0, memory_order_relaxed);
141 __phase_.store(__old_phase + 2, memory_order_release);
142 __phase_.notify_all();
146 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __old_phase) const {
147 auto const __test_fn = [this, __old_phase]() -> bool { return __phase_.load(memory_order_acquire) != __old_phase; };
148 std::__libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
150 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() {
151 __expected_adjustment_.fetch_sub(1, memory_order_relaxed);
160 The alternative implementation of __barrier_base is a central barrier.
162 Two versions of this algorithm are provided:
163 1. A fairly straightforward implementation of the litterature for the
164 general case where the completion function is not empty.
165 2. An optimized implementation that exploits 2's complement arithmetic
166 and well-defined overflow in atomic arithmetic, to handle the phase
171 template <class _CompletionF>
172 class __barrier_base {
173 __atomic_base<ptrdiff_t> __expected;
174 __atomic_base<ptrdiff_t> __arrived;
175 _CompletionF __completion;
176 __atomic_base<bool> __phase;
179 using arrival_token = bool;
181 static constexpr ptrdiff_t max() noexcept { return numeric_limits<ptrdiff_t>::max(); }
183 _LIBCPP_HIDE_FROM_ABI __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
184 : __expected(__expected), __arrived(__expected), __completion(std::move(__completion)), __phase(false) {}
185 [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t update) {
186 auto const __old_phase = __phase.load(memory_order_relaxed);
187 auto const __result = __arrived.fetch_sub(update, memory_order_acq_rel) - update;
188 auto const new_expected = __expected.load(memory_order_relaxed);
190 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
191 update <= new_expected, "update is greater than the expected count for the current barrier phase");
195 __arrived.store(new_expected, memory_order_relaxed);
196 __phase.store(!__old_phase, memory_order_release);
197 __phase.notify_all();
201 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __old_phase) const {
202 __phase.wait(__old_phase, memory_order_acquire);
204 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() {
205 __expected.fetch_sub(1, memory_order_relaxed);
211 class __barrier_base<__empty_completion> {
212 static constexpr uint64_t __expected_unit = 1ull;
213 static constexpr uint64_t __arrived_unit = 1ull << 32;
214 static constexpr uint64_t __expected_mask = __arrived_unit - 1;
215 static constexpr uint64_t __phase_bit = 1ull << 63;
216 static constexpr uint64_t __arrived_mask = (__phase_bit - 1) & ~__expected_mask;
218 __atomic_base<uint64_t> __phase_arrived_expected;
220 static _LIBCPP_HIDE_FROM_ABI constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT {
221 return ((uint64_t(1u << 31) - __count) << 32) | (uint64_t(1u << 31) - __count);
225 using arrival_token = uint64_t;
227 static constexpr ptrdiff_t max() noexcept { return ptrdiff_t(1u << 31) - 1; }
229 _LIBCPP_HIDE_FROM_ABI explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion())
230 : __phase_arrived_expected(__init(__count)) {}
231 [[nodiscard]] inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t update) {
232 auto const __inc = __arrived_unit * update;
233 auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel);
235 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
236 update <= __old, "update is greater than the expected count for the current barrier phase");
238 if ((__old ^ (__old + __inc)) & __phase_bit) {
239 __phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed);
240 __phase_arrived_expected.notify_all();
242 return __old & __phase_bit;
244 inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __phase) const {
245 auto const __test_fn = [=]() -> bool {
246 uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire);
247 return ((__current & __phase_bit) != __phase);
249 __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
251 inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() {
252 __phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed);
257 # endif // !_LIBCPP_HAS_NO_TREE_BARRIER
259 template <class _CompletionF = __empty_completion>
261 __barrier_base<_CompletionF> __b_;
264 using arrival_token = typename __barrier_base<_CompletionF>::arrival_token;
266 static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { return __barrier_base<_CompletionF>::max(); }
268 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI explicit barrier(
269 ptrdiff_t __count, _CompletionF __completion = _CompletionF())
270 : __b_(__count, std::move(__completion)) {
271 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
273 "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with a negative value");
274 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
276 "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with "
277 "a value greater than max()");
280 barrier(barrier const&) = delete;
281 barrier& operator=(barrier const&) = delete;
283 [[__nodiscard__]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update = 1) {
284 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(__update > 0, "barrier:arrive must be called with a value greater than 0");
285 return __b_.arrive(__update);
287 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __phase) const {
288 __b_.wait(std::move(__phase));
290 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_wait() { wait(arrive()); }
291 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() { __b_.arrive_and_drop(); }
294 _LIBCPP_END_NAMESPACE_STD
296 #endif // _LIBCPP_STD_VER >= 14
300 #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
305 # include <stdexcept>
309 #endif //_LIBCPP_BARRIER