[clang][modules] Don't prevent translation of FW_Private includes when explicitly...
[llvm-project.git] / libcxx / include / barrier
blobe0d63fa59ffda7b94e30ec909c1e8b23d7d5cda5
1 // -*- C++ -*-
2 //===----------------------------------------------------------------------===//
3 //
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //
8 //===----------------------------------------------------------------------===//
10 #ifndef _LIBCPP_BARRIER
11 #define _LIBCPP_BARRIER
14     barrier synopsis
16 namespace std
19   template<class CompletionFunction = see below>
20   class barrier
21   {
22   public:
23     using arrival_token = see below;
25     static constexpr ptrdiff_t max() noexcept;
27     constexpr explicit barrier(ptrdiff_t phase_count,
28                                CompletionFunction f = CompletionFunction());
29     ~barrier();
31     barrier(const barrier&) = delete;
32     barrier& operator=(const barrier&) = delete;
34     [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1);
35     void wait(arrival_token&& arrival) const;
37     void arrive_and_wait();
38     void arrive_and_drop();
40   private:
41     CompletionFunction completion; // exposition only
42   };
48 #include <__assert> // all public C++ headers provide the assertion handler
49 #include <__atomic/atomic_base.h>
50 #include <__atomic/memory_order.h>
51 #include <__availability>
52 #include <__config>
53 #include <__memory/unique_ptr.h>
54 #include <__thread/poll_with_backoff.h>
55 #include <__thread/timed_backoff_policy.h>
56 #include <__utility/move.h>
57 #include <cstddef>
58 #include <cstdint>
59 #include <limits>
60 #include <version>
62 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
63 #  pragma GCC system_header
64 #endif
66 #ifdef _LIBCPP_HAS_NO_THREADS
67 # error "<barrier> is not supported since libc++ has been configured without support for threads."
68 #endif
70 _LIBCPP_PUSH_MACROS
71 #include <__undef_macros>
73 #if _LIBCPP_STD_VER >= 14
75 _LIBCPP_BEGIN_NAMESPACE_STD
77 struct __empty_completion
79     inline _LIBCPP_INLINE_VISIBILITY
80     void operator()() noexcept
81     {
82     }
85 #ifndef _LIBCPP_HAS_NO_TREE_BARRIER
89 The default implementation of __barrier_base is a classic tree barrier.
91 It looks different from literature pseudocode for two main reasons:
92  1. Threads that call into std::barrier functions do not provide indices,
93     so a numbering step is added before the actual barrier algorithm,
94     appearing as an N+1 round to the N rounds of the tree barrier.
95  2. A great deal of attention has been paid to avoid cache line thrashing
96     by flattening the tree structure into cache-line sized arrays, that
97     are indexed in an efficient way.
101 using __barrier_phase_t = uint8_t;
103 class __barrier_algorithm_base;
105 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
106 __barrier_algorithm_base* __construct_barrier_algorithm_base(ptrdiff_t& __expected);
108 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
109 bool __arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier,
110                                      __barrier_phase_t __old_phase);
112 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
113 void __destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier);
115 template<class _CompletionF>
116 class __barrier_base {
117     ptrdiff_t                                               __expected_;
118     unique_ptr<__barrier_algorithm_base,
119                void (*)(__barrier_algorithm_base*)>         __base_;
120     __atomic_base<ptrdiff_t>                                __expected_adjustment_;
121     _CompletionF                                            __completion_;
122     __atomic_base<__barrier_phase_t>                        __phase_;
124 public:
125     using arrival_token = __barrier_phase_t;
127     static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept {
128         return numeric_limits<ptrdiff_t>::max();
129     }
131     _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
132     __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
133             : __expected_(__expected), __base_(std::__construct_barrier_algorithm_base(this->__expected_),
134                                                &__destroy_barrier_algorithm_base),
135               __expected_adjustment_(0), __completion_(std::move(__completion)), __phase_(0)
136     {
137     }
138     [[__nodiscard__]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
139     arrival_token arrive(ptrdiff_t __update)
140     {
141         _LIBCPP_ASSERT_UNCATEGORIZED(
142             __update <= __expected_, "update is greater than the expected count for the current barrier phase");
144         auto const __old_phase = __phase_.load(memory_order_relaxed);
145         for(; __update; --__update)
146             if(__arrive_barrier_algorithm_base(__base_.get(), __old_phase)) {
147                 __completion_();
148                 __expected_ += __expected_adjustment_.load(memory_order_relaxed);
149                 __expected_adjustment_.store(0, memory_order_relaxed);
150                 __phase_.store(__old_phase + 2, memory_order_release);
151                 __phase_.notify_all();
152             }
153         return __old_phase;
154     }
155     _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
156     void wait(arrival_token&& __old_phase) const
157     {
158         auto const __test_fn = [this, __old_phase]() -> bool {
159             return __phase_.load(memory_order_acquire) != __old_phase;
160         };
161         std::__libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
162     }
163     _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
164     void arrive_and_drop()
165     {
166         __expected_adjustment_.fetch_sub(1, memory_order_relaxed);
167         (void)arrive(1);
168     }
171 #else
175 The alternative implementation of __barrier_base is a central barrier.
177 Two versions of this algorithm are provided:
178  1. A fairly straightforward implementation of the litterature for the
179     general case where the completion function is not empty.
180  2. An optimized implementation that exploits 2's complement arithmetic
181     and well-defined overflow in atomic arithmetic, to handle the phase
182     roll-over for free.
186 template<class _CompletionF>
187 class __barrier_base {
189     __atomic_base<ptrdiff_t> __expected;
190     __atomic_base<ptrdiff_t> __arrived;
191     _CompletionF             __completion;
192     __atomic_base<bool>      __phase;
193 public:
194     using arrival_token = bool;
196     static constexpr ptrdiff_t max() noexcept {
197         return numeric_limits<ptrdiff_t>::max();
198     }
200     _LIBCPP_INLINE_VISIBILITY
201     __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
202         : __expected(__expected), __arrived(__expected), __completion(std::move(__completion)), __phase(false)
203     {
204     }
205     [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
206     arrival_token arrive(ptrdiff_t update)
207     {
208         auto const __old_phase = __phase.load(memory_order_relaxed);
209         auto const __result = __arrived.fetch_sub(update, memory_order_acq_rel) - update;
210         auto const new_expected = __expected.load(memory_order_relaxed);
212         _LIBCPP_ASSERT_UNCATEGORIZED(
213             update <= new_expected, "update is greater than the expected count for the current barrier phase");
215         if (0 == __result) {
216             __completion();
217             __arrived.store(new_expected, memory_order_relaxed);
218             __phase.store(!__old_phase, memory_order_release);
219             __phase.notify_all();
220         }
221         return __old_phase;
222     }
223     _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
224     void wait(arrival_token&& __old_phase) const
225     {
226         __phase.wait(__old_phase, memory_order_acquire);
227     }
228     _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
229     void arrive_and_drop()
230     {
231         __expected.fetch_sub(1, memory_order_relaxed);
232         (void)arrive(1);
233     }
236 template<>
237 class __barrier_base<__empty_completion> {
239     static constexpr uint64_t __expected_unit = 1ull;
240     static constexpr uint64_t __arrived_unit = 1ull << 32;
241     static constexpr uint64_t __expected_mask = __arrived_unit - 1;
242     static constexpr uint64_t __phase_bit = 1ull << 63;
243     static constexpr uint64_t __arrived_mask = (__phase_bit - 1) & ~__expected_mask;
245     __atomic_base<uint64_t>   __phase_arrived_expected;
247     static _LIBCPP_INLINE_VISIBILITY
248     constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT
249     {
250         return ((uint64_t(1u << 31) - __count) << 32)
251               | (uint64_t(1u << 31) - __count);
252     }
254 public:
255     using arrival_token = uint64_t;
257     static constexpr ptrdiff_t max() noexcept {
258         return ptrdiff_t(1u << 31) - 1;
259     }
261     _LIBCPP_INLINE_VISIBILITY
262     explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion())
263         : __phase_arrived_expected(__init(__count))
264     {
265     }
266     [[nodiscard]] inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
267     arrival_token arrive(ptrdiff_t update)
268     {
269         auto const __inc = __arrived_unit * update;
270         auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel);
272         _LIBCPP_ASSERT_UNCATEGORIZED(
273             update <= __old, "update is greater than the expected count for the current barrier phase");
275         if ((__old ^ (__old + __inc)) & __phase_bit) {
276             __phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed);
277             __phase_arrived_expected.notify_all();
278         }
279         return __old & __phase_bit;
280     }
281     inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
282     void wait(arrival_token&& __phase) const
283     {
284         auto const __test_fn = [=]() -> bool {
285             uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire);
286             return ((__current & __phase_bit) != __phase);
287         };
288         __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
289     }
290     inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
291     void arrive_and_drop()
292     {
293         __phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed);
294         (void)arrive(1);
295     }
298 #endif // !_LIBCPP_HAS_NO_TREE_BARRIER
300 template<class _CompletionF = __empty_completion>
301 class barrier {
303     __barrier_base<_CompletionF> __b_;
304 public:
305     using arrival_token = typename __barrier_base<_CompletionF>::arrival_token;
307     static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept {
308         return __barrier_base<_CompletionF>::max();
309     }
311     _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
312     explicit barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
313         : __b_(__count, _VSTD::move(__completion)) {
314         _LIBCPP_ASSERT_UNCATEGORIZED(
315             __count >= 0,
316             "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with a negative value");
317         _LIBCPP_ASSERT_UNCATEGORIZED(
318             __count <= max(),
319             "barrier::barrier(ptrdiff_t, CompletionFunction): barrier cannot be initialized with "
320             "a value greater than max()");
321     }
323     barrier(barrier const&) = delete;
324     barrier& operator=(barrier const&) = delete;
326     [[__nodiscard__]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
327     arrival_token arrive(ptrdiff_t __update = 1)
328     {
329         _LIBCPP_ASSERT_UNCATEGORIZED(__update > 0, "barrier:arrive must be called with a value greater than 0");
330         return __b_.arrive(__update);
331     }
332     _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
333     void wait(arrival_token&& __phase) const
334     {
335         __b_.wait(_VSTD::move(__phase));
336     }
337     _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
338     void arrive_and_wait()
339     {
340         wait(arrive());
341     }
342     _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
343     void arrive_and_drop()
344     {
345         __b_.arrive_and_drop();
346     }
349 _LIBCPP_END_NAMESPACE_STD
351 #endif // _LIBCPP_STD_VER >= 14
353 _LIBCPP_POP_MACROS
355 #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
356 #  include <atomic>
357 #  include <concepts>
358 #  include <iterator>
359 #  include <memory>
360 #  include <stdexcept>
361 #  include <variant>
362 #endif
364 #endif //_LIBCPP_BARRIER