1 //===----------------------------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef _LIBCPP___ALGORITHM_SIMD_UTILS_H
10 #define _LIBCPP___ALGORITHM_SIMD_UTILS_H
12 #include <__algorithm/min.h>
13 #include <__bit/bit_cast.h>
14 #include <__bit/countl.h>
15 #include <__bit/countr.h>
17 #include <__cstddef/size_t.h>
18 #include <__type_traits/is_arithmetic.h>
19 #include <__type_traits/is_same.h>
20 #include <__utility/integer_sequence.h>
23 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
24 # pragma GCC system_header
28 #include <__undef_macros>
30 // TODO: Find out how altivec changes things and allow vectorizations there too.
31 #if _LIBCPP_STD_VER >= 14 && defined(_LIBCPP_CLANG_VER) && !defined(__ALTIVEC__)
32 # define _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS 1
34 # define _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS 0
37 #if _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS && !defined(__OPTIMIZE_SIZE__)
38 # define _LIBCPP_VECTORIZE_ALGORITHMS 1
40 # define _LIBCPP_VECTORIZE_ALGORITHMS 0
43 #if _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS
45 _LIBCPP_BEGIN_NAMESPACE_STD
48 inline constexpr bool __can_map_to_integer_v
=
49 sizeof(_Tp
) == alignof(_Tp
) && (sizeof(_Tp
) == 1 || sizeof(_Tp
) == 2 || sizeof(_Tp
) == 4 || sizeof(_Tp
) == 8);
51 template <size_t _TypeSize
>
52 struct __get_as_integer_type_impl
;
55 struct __get_as_integer_type_impl
<1> {
60 struct __get_as_integer_type_impl
<2> {
61 using type
= uint16_t;
64 struct __get_as_integer_type_impl
<4> {
65 using type
= uint32_t;
68 struct __get_as_integer_type_impl
<8> {
69 using type
= uint64_t;
73 using __get_as_integer_type_t
= typename __get_as_integer_type_impl
<sizeof(_Tp
)>::type
;
75 // This isn't specialized for 64 byte vectors on purpose. They have the potential to significantly reduce performance
76 // in mixed simd/non-simd workloads and don't provide any performance improvement for currently vectorized algorithms
77 // as far as benchmarks are concerned.
78 # if defined(__AVX__) || defined(__MVS__)
80 inline constexpr size_t __native_vector_size
= 32 / sizeof(_Tp
);
81 # elif defined(__SSE__) || defined(__ARM_NEON__)
83 inline constexpr size_t __native_vector_size
= 16 / sizeof(_Tp
);
84 # elif defined(__MMX__)
86 inline constexpr size_t __native_vector_size
= 8 / sizeof(_Tp
);
89 inline constexpr size_t __native_vector_size
= 1;
92 template <class _ArithmeticT
, size_t _Np
>
93 using __simd_vector
__attribute__((__ext_vector_type__(_Np
))) = _ArithmeticT
;
95 template <class _VecT
>
96 inline constexpr size_t __simd_vector_size_v
= []<bool _False
= false>() -> size_t {
97 static_assert(_False
, "Not a vector!");
100 template <class _Tp
, size_t _Np
>
101 inline constexpr size_t __simd_vector_size_v
<__simd_vector
<_Tp
, _Np
>> = _Np
;
103 template <class _Tp
, size_t _Np
>
104 _LIBCPP_HIDE_FROM_ABI _Tp
__simd_vector_underlying_type_impl(__simd_vector
<_Tp
, _Np
>) {
108 template <class _VecT
>
109 using __simd_vector_underlying_type_t
= decltype(std::__simd_vector_underlying_type_impl(_VecT
{}));
111 // This isn't inlined without always_inline when loading chars.
112 template <class _VecT
, class _Iter
>
113 [[__nodiscard__
]] _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _VecT
__load_vector(_Iter __iter
) noexcept
{
114 return [=]<size_t... _Indices
>(index_sequence
<_Indices
...>) _LIBCPP_ALWAYS_INLINE noexcept
{
115 return _VecT
{__iter
[_Indices
]...};
116 }(make_index_sequence
<__simd_vector_size_v
<_VecT
>>{});
119 template <class _Tp
, size_t _Np
>
120 [[__nodiscard__
]] _LIBCPP_HIDE_FROM_ABI
bool __all_of(__simd_vector
<_Tp
, _Np
> __vec
) noexcept
{
121 return __builtin_reduce_and(__builtin_convertvector(__vec
, __simd_vector
<bool, _Np
>));
124 template <class _Tp
, size_t _Np
>
125 [[__nodiscard__
]] _LIBCPP_HIDE_FROM_ABI
size_t __find_first_set(__simd_vector
<_Tp
, _Np
> __vec
) noexcept
{
126 using __mask_vec
= __simd_vector
<bool, _Np
>;
128 // This has MSan disabled du to https://github.com/llvm/llvm-project/issues/85876
129 auto __impl
= [&]<class _MaskT
>(_MaskT
) _LIBCPP_NO_SANITIZE("memory") noexcept
{
130 # if defined(_LIBCPP_BIG_ENDIAN)
131 return std::min
<size_t>(
132 _Np
, std::__countl_zero(__builtin_bit_cast(_MaskT
, __builtin_convertvector(__vec
, __mask_vec
))));
134 return std::min
<size_t>(
135 _Np
, std::__countr_zero(__builtin_bit_cast(_MaskT
, __builtin_convertvector(__vec
, __mask_vec
))));
139 if constexpr (sizeof(__mask_vec
) == sizeof(uint8_t)) {
140 return __impl(uint8_t{});
141 } else if constexpr (sizeof(__mask_vec
) == sizeof(uint16_t)) {
142 return __impl(uint16_t{});
143 } else if constexpr (sizeof(__mask_vec
) == sizeof(uint32_t)) {
144 return __impl(uint32_t{});
145 } else if constexpr (sizeof(__mask_vec
) == sizeof(uint64_t)) {
146 return __impl(uint64_t{});
148 static_assert(sizeof(__mask_vec
) == 0, "unexpected required size for mask integer type");
153 template <class _Tp
, size_t _Np
>
154 [[__nodiscard__
]] _LIBCPP_HIDE_FROM_ABI
size_t __find_first_not_set(__simd_vector
<_Tp
, _Np
> __vec
) noexcept
{
155 return std::__find_first_set(~__vec
);
158 _LIBCPP_END_NAMESPACE_STD
160 #endif // _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS
164 #endif // _LIBCPP___ALGORITHM_SIMD_UTILS_H