Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / libcxx / include / __algorithm / pstl_backends / cpu_backends / find_if.h
blob170470e4fb7edde24c43ee9d223c3b6d085cfc33
1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #ifndef _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_FIND_IF_H
10 #define _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_FIND_IF_H
12 #include <__algorithm/find_if.h>
13 #include <__algorithm/pstl_backends/cpu_backends/backend.h>
14 #include <__atomic/atomic.h>
15 #include <__config>
16 #include <__functional/operations.h>
17 #include <__iterator/concepts.h>
18 #include <__iterator/iterator_traits.h>
19 #include <__type_traits/is_execution_policy.h>
20 #include <__utility/move.h>
21 #include <__utility/pair.h>
22 #include <cstddef>
23 #include <optional>
25 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
26 # pragma GCC system_header
27 #endif
29 #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
31 _LIBCPP_PUSH_MACROS
32 # include <__undef_macros>
34 _LIBCPP_BEGIN_NAMESPACE_STD
36 template <class _Index, class _Brick, class _Compare>
37 _LIBCPP_HIDE_FROM_ABI optional<_Index>
38 __parallel_find(_Index __first, _Index __last, _Brick __f, _Compare __comp, bool __b_first) {
39 typedef typename std::iterator_traits<_Index>::difference_type _DifferenceType;
40 const _DifferenceType __n = __last - __first;
41 _DifferenceType __initial_dist = __b_first ? __n : -1;
42 std::atomic<_DifferenceType> __extremum(__initial_dist);
43 // TODO: find out what is better here: parallel_for or parallel_reduce
44 auto __res =
45 __par_backend::__parallel_for(__first, __last, [__comp, __f, __first, &__extremum](_Index __i, _Index __j) {
46 // See "Reducing Contention Through Priority Updates", PPoPP '13, for discussion of
47 // why using a shared variable scales fairly well in this situation.
48 if (__comp(__i - __first, __extremum)) {
49 _Index __result = __f(__i, __j);
50 // If not '__last' returned then we found what we want so put this to extremum
51 if (__result != __j) {
52 const _DifferenceType __k = __result - __first;
53 for (_DifferenceType __old = __extremum; __comp(__k, __old); __old = __extremum) {
54 __extremum.compare_exchange_weak(__old, __k);
58 });
59 if (!__res)
60 return nullopt;
61 return __extremum.load() != __initial_dist ? __first + __extremum.load() : __last;
64 template <class _Index, class _DifferenceType, class _Compare>
65 _LIBCPP_HIDE_FROM_ABI _Index
66 __simd_first(_Index __first, _DifferenceType __begin, _DifferenceType __end, _Compare __comp) noexcept {
67 // Experiments show good block sizes like this
68 const _DifferenceType __block_size = 8;
69 alignas(__lane_size) _DifferenceType __lane[__block_size] = {0};
70 while (__end - __begin >= __block_size) {
71 _DifferenceType __found = 0;
72 _PSTL_PRAGMA_SIMD_REDUCTION(| : __found) for (_DifferenceType __i = __begin; __i < __begin + __block_size; ++__i) {
73 const _DifferenceType __t = __comp(__first, __i);
74 __lane[__i - __begin] = __t;
75 __found |= __t;
77 if (__found) {
78 _DifferenceType __i;
79 // This will vectorize
80 for (__i = 0; __i < __block_size; ++__i) {
81 if (__lane[__i]) {
82 break;
85 return __first + __begin + __i;
87 __begin += __block_size;
90 // Keep remainder scalar
91 while (__begin != __end) {
92 if (__comp(__first, __begin)) {
93 return __first + __begin;
95 ++__begin;
97 return __first + __end;
100 template <class _ExecutionPolicy, class _ForwardIterator, class _Predicate>
101 _LIBCPP_HIDE_FROM_ABI optional<_ForwardIterator>
102 __pstl_find_if(__cpu_backend_tag, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) {
103 if constexpr (__is_parallel_execution_policy_v<_ExecutionPolicy> &&
104 __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
105 return std::__parallel_find(
106 __first,
107 __last,
108 [&__pred](_ForwardIterator __brick_first, _ForwardIterator __brick_last) {
109 auto __res = std::__pstl_find_if<__remove_parallel_policy_t<_ExecutionPolicy>>(
110 __cpu_backend_tag{}, __brick_first, __brick_last, __pred);
111 _LIBCPP_ASSERT_INTERNAL(__res, "unseq/seq should never try to allocate!");
112 return *std::move(__res);
114 less<>{},
115 true);
116 } else if constexpr (__is_unsequenced_execution_policy_v<_ExecutionPolicy> &&
117 __has_random_access_iterator_category_or_concept<_ForwardIterator>::value) {
118 using __diff_t = __iter_diff_t<_ForwardIterator>;
119 return std::__simd_first(__first, __diff_t(0), __last - __first, [&__pred](_ForwardIterator __iter, __diff_t __i) {
120 return __pred(__iter[__i]);
122 } else {
123 return std::find_if(__first, __last, __pred);
127 _LIBCPP_END_NAMESPACE_STD
129 _LIBCPP_POP_MACROS
131 #endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17
133 #endif // _LIBCPP___ALGORITHM_PSTL_BACKENDS_CPU_BACKENDS_FIND_IF_H