2 //===----------------------------------------------------------------------===//
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8 //===----------------------------------------------------------------------===//
10 #ifndef _LIBCPP_BARRIER
11 #define _LIBCPP_BARRIER
19 template<class CompletionFunction = see below>
20 class barrier // since C++20
23 using arrival_token = see below;
25 static constexpr ptrdiff_t max() noexcept;
27 constexpr explicit barrier(ptrdiff_t phase_count,
28 CompletionFunction f = CompletionFunction());
31 barrier(const barrier&) = delete;
32 barrier& operator=(const barrier&) = delete;
34 [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1);
35 void wait(arrival_token&& arrival) const;
37 void arrive_and_wait();
38 void arrive_and_drop();
41 CompletionFunction completion; // exposition only
50 #if _LIBCPP_HAS_THREADS
53 # include <__atomic/atomic.h>
54 # include <__atomic/memory_order.h>
55 # include <__cstddef/ptrdiff_t.h>
56 # include <__memory/unique_ptr.h>
57 # include <__thread/poll_with_backoff.h>
58 # include <__thread/timed_backoff_policy.h>
59 # include <__utility/move.h>
64 # if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
65 # pragma GCC system_header
69 # include <__undef_macros>
71 # if _LIBCPP_STD_VER >= 20
73 _LIBCPP_BEGIN_NAMESPACE_STD
75 struct __empty_completion {
76 inline _LIBCPP_HIDE_FROM_ABI void operator()() noexcept {}
79 # ifndef _LIBCPP_HAS_NO_TREE_BARRIER
83 The default implementation of __barrier_base is a classic tree barrier.
85 It looks different from literature pseudocode for two main reasons:
86 1. Threads that call into std::barrier functions do not provide indices,
87 so a numbering step is added before the actual barrier algorithm,
88 appearing as an N+1 round to the N rounds of the tree barrier.
89 2. A great deal of attention has been paid to avoid cache line thrashing
90 by flattening the tree structure into cache-line sized arrays, that
91 are indexed in an efficient way.
95 using __barrier_phase_t = uint8_t;
97 class __barrier_algorithm_base;
99 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI __barrier_algorithm_base*
100 __construct_barrier_algorithm_base(ptrdiff_t& __expected);
102 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI bool
103 __arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier, __barrier_phase_t __old_phase) noexcept;
105 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
106 __destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier) noexcept;
108 template <class _CompletionF>
109 class __barrier_base {
110 ptrdiff_t __expected_;
111 unique_ptr<__barrier_algorithm_base, void (*)(__barrier_algorithm_base*)> __base_;
112 atomic<ptrdiff_t> __expected_adjustment_;
113 _CompletionF __completion_;
114 atomic<__barrier_phase_t> __phase_;
117 using arrival_token = __barrier_phase_t;
119 static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { return numeric_limits<ptrdiff_t>::max(); }
121 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI
122 __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
123 : __expected_(__expected),
124 __base_(std::__construct_barrier_algorithm_base(this->__expected_), &__destroy_barrier_algorithm_base),
125 __expected_adjustment_(0),
126 __completion_(std::move(__completion)),
128 [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update) {
129 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
130 __update <= __expected_, "update is greater than the expected count for the current barrier phase");
132 auto const __old_phase = __phase_.load(memory_order_relaxed);
133 for (; __update; --__update)
134 if (__arrive_barrier_algorithm_base(__base_.get(), __old_phase)) {
136 __expected_ += __expected_adjustment_.load(memory_order_relaxed);
137 __expected_adjustment_.store(0, memory_order_relaxed);
138 __phase_.store(__old_phase + 2, memory_order_release);
139 __phase_.notify_all();
143 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __old_phase) const {
144 auto const __test_fn = [this, __old_phase]() -> bool { return __phase_.load(memory_order_acquire) != __old_phase; };
145 std::__libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
147 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() {
148 __expected_adjustment_.fetch_sub(1, memory_order_relaxed);
157 The alternative implementation of __barrier_base is a central barrier.
159 Two versions of this algorithm are provided:
160 1. A fairly straightforward implementation of the litterature for the
161 general case where the completion function is not empty.
162 2. An optimized implementation that exploits 2's complement arithmetic
163 and well-defined overflow in atomic arithmetic, to handle the phase
168 template <class _CompletionF>
169 class __barrier_base {
170 atomic<ptrdiff_t> __expected;
171 atomic<ptrdiff_t> __arrived;
172 _CompletionF __completion;
173 atomic<bool> __phase;
176 using arrival_token = bool;
178 static constexpr ptrdiff_t max() noexcept { return numeric_limits<ptrdiff_t>::max(); }
180 _LIBCPP_HIDE_FROM_ABI __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
181 : __expected(__expected), __arrived(__expected), __completion(std::move(__completion)), __phase(false) {}
182 [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t update) {
183 auto const __old_phase = __phase.load(memory_order_relaxed);
184 auto const __result = __arrived.fetch_sub(update, memory_order_acq_rel) - update;
185 auto const new_expected = __expected.load(memory_order_relaxed);
187 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
188 update <= new_expected, "update is greater than the expected count for the current barrier phase");
192 __arrived.store(new_expected, memory_order_relaxed);
193 __phase.store(!__old_phase, memory_order_release);
194 __phase.notify_all();
198 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __old_phase) const {
199 __phase.wait(__old_phase, memory_order_acquire);
201 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() {
202 __expected.fetch_sub(1, memory_order_relaxed);
208 class __barrier_base<__empty_completion> {
209 static constexpr uint64_t __expected_unit = 1ull;
210 static constexpr uint64_t __arrived_unit = 1ull << 32;
211 static constexpr uint64_t __expected_mask = __arrived_unit - 1;
212 static constexpr uint64_t __phase_bit = 1ull << 63;
213 static constexpr uint64_t __arrived_mask = (__phase_bit - 1) & ~__expected_mask;
215 atomic<uint64_t> __phase_arrived_expected;
217 static _LIBCPP_HIDE_FROM_ABI constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT {
218 return ((uint64_t(1u << 31) - __count) << 32) | (uint64_t(1u << 31) - __count);
222 using arrival_token = uint64_t;
224 static constexpr ptrdiff_t max() noexcept { return ptrdiff_t(1u << 31) - 1; }
226 _LIBCPP_HIDE_FROM_ABI explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion())
227 : __phase_arrived_expected(__init(__count)) {}
228 [[nodiscard]] inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t update) {
229 auto const __inc = __arrived_unit * update;
230 auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel);
232 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
233 update <= __old, "update is greater than the expected count for the current barrier phase");
235 if ((__old ^ (__old + __inc)) & __phase_bit) {
236 __phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed);
237 __phase_arrived_expected.notify_all();
239 return __old & __phase_bit;
241 inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __phase) const {
242 auto const __test_fn = [=]() -> bool {
243 uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire);
244 return ((__current & __phase_bit) != __phase);
246 __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
248 inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() {
249 __phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed);
254 # endif // !_LIBCPP_HAS_NO_TREE_BARRIER
256 template <class _CompletionF = __empty_completion>
258 __barrier_base<_CompletionF> __b_;
261 using arrival_token = typename __barrier_base<_CompletionF>::arrival_token;
263 static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { return __barrier_base<_CompletionF>::max(); }
265 _LIBCPP_AVAILABILITY_SYNC
266 _LIBCPP_HIDE_FROM_ABI explicit barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
267 : __b_(__count, std::move(__completion)) {
268 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
270 "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with a negative value");
271 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
273 "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with "
274 "a value greater than max()");
277 barrier(barrier const&) = delete;
278 barrier& operator=(barrier const&) = delete;
280 [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI arrival_token arrive(ptrdiff_t __update = 1) {
281 _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(__update > 0, "barrier:arrive must be called with a value greater than 0");
282 return __b_.arrive(__update);
284 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(arrival_token&& __phase) const {
285 __b_.wait(std::move(__phase));
287 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_wait() { wait(arrive()); }
288 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void arrive_and_drop() { __b_.arrive_and_drop(); }
291 _LIBCPP_END_NAMESPACE_STD
293 # endif // _LIBCPP_STD_VER >= 20
297 #endif // _LIBCPP_HAS_THREADS
299 #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
304 # include <stdexcept>
308 #endif // _LIBCPP_BARRIER