1 /*===---- bmi2intrin.h - BMI2 intrinsics -----------------------------------===
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *===-----------------------------------------------------------------------===
11 #error "Never use <bmi2intrin.h> directly; include <immintrin.h> instead."
14 #ifndef __BMI2INTRIN_H
15 #define __BMI2INTRIN_H
17 /* Define the default attributes for the functions in this file. */
18 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi2")))
20 /// Copies the unsigned 32-bit integer \a __X and zeroes the upper bits
21 /// starting at bit number \a __Y.
31 /// \headerfile <immintrin.h>
33 /// This intrinsic corresponds to the \c BZHI instruction.
36 /// The 32-bit source value to copy.
38 /// The lower 8 bits specify the bit number of the lowest bit to zero.
39 /// \returns The partially zeroed 32-bit value.
40 static __inline__
unsigned int __DEFAULT_FN_ATTRS
41 _bzhi_u32(unsigned int __X
, unsigned int __Y
)
43 return __builtin_ia32_bzhi_si(__X
, __Y
);
46 /// Deposit (scatter) low-order bits from the unsigned 32-bit integer \a __X
47 /// into the 32-bit result, according to the mask in the unsigned 32-bit
48 /// integer \a __Y. All other bits of the result are zero.
55 /// result[m] := __X[i]
61 /// \headerfile <immintrin.h>
63 /// This intrinsic corresponds to the \c PDEP instruction.
66 /// The 32-bit source value to copy.
68 /// The 32-bit mask specifying where to deposit source bits.
69 /// \returns The 32-bit result.
70 static __inline__
unsigned int __DEFAULT_FN_ATTRS
71 _pdep_u32(unsigned int __X
, unsigned int __Y
)
73 return __builtin_ia32_pdep_si(__X
, __Y
);
76 /// Extract (gather) bits from the unsigned 32-bit integer \a __X into the
77 /// low-order bits of the 32-bit result, according to the mask in the
78 /// unsigned 32-bit integer \a __Y. All other bits of the result are zero.
85 /// result[i] := __X[m]
91 /// \headerfile <immintrin.h>
93 /// This intrinsic corresponds to the \c PEXT instruction.
96 /// The 32-bit source value to copy.
98 /// The 32-bit mask specifying which source bits to extract.
99 /// \returns The 32-bit result.
100 static __inline__
unsigned int __DEFAULT_FN_ATTRS
101 _pext_u32(unsigned int __X
, unsigned int __Y
)
103 return __builtin_ia32_pext_si(__X
, __Y
);
106 /// Multiplies the unsigned 32-bit integers \a __X and \a __Y to form a
107 /// 64-bit product. Stores the upper 32 bits of the product in the
108 /// memory at \a __P and returns the lower 32 bits.
110 /// \code{.operation}
111 /// Store32(__P, (__X * __Y)[63:32])
112 /// result := (__X * __Y)[31:0]
115 /// \headerfile <immintrin.h>
117 /// This intrinsic corresponds to the \c MULX instruction.
120 /// An unsigned 32-bit multiplicand.
122 /// An unsigned 32-bit multiplicand.
124 /// A pointer to memory for storing the upper half of the product.
125 /// \returns The lower half of the product.
126 static __inline__
unsigned int __DEFAULT_FN_ATTRS
127 _mulx_u32(unsigned int __X
, unsigned int __Y
, unsigned int *__P
)
129 unsigned long long __res
= (unsigned long long) __X
* __Y
;
130 *__P
= (unsigned int)(__res
>> 32);
131 return (unsigned int)__res
;
136 /// Copies the unsigned 64-bit integer \a __X and zeroes the upper bits
137 /// starting at bit number \a __Y.
139 /// \code{.operation}
143 /// result[63:i] := 0
147 /// \headerfile <immintrin.h>
149 /// This intrinsic corresponds to the \c BZHI instruction.
152 /// The 64-bit source value to copy.
154 /// The lower 8 bits specify the bit number of the lowest bit to zero.
155 /// \returns The partially zeroed 64-bit value.
156 static __inline__
unsigned long long __DEFAULT_FN_ATTRS
157 _bzhi_u64(unsigned long long __X
, unsigned long long __Y
)
159 return __builtin_ia32_bzhi_di(__X
, __Y
);
162 /// Deposit (scatter) low-order bits from the unsigned 64-bit integer \a __X
163 /// into the 64-bit result, according to the mask in the unsigned 64-bit
164 /// integer \a __Y. All other bits of the result are zero.
166 /// \code{.operation}
171 /// result[m] := __X[i]
177 /// \headerfile <immintrin.h>
179 /// This intrinsic corresponds to the \c PDEP instruction.
182 /// The 64-bit source value to copy.
184 /// The 64-bit mask specifying where to deposit source bits.
185 /// \returns The 64-bit result.
186 static __inline__
unsigned long long __DEFAULT_FN_ATTRS
187 _pdep_u64(unsigned long long __X
, unsigned long long __Y
)
189 return __builtin_ia32_pdep_di(__X
, __Y
);
192 /// Extract (gather) bits from the unsigned 64-bit integer \a __X into the
193 /// low-order bits of the 64-bit result, according to the mask in the
194 /// unsigned 64-bit integer \a __Y. All other bits of the result are zero.
196 /// \code{.operation}
201 /// result[i] := __X[m]
207 /// \headerfile <immintrin.h>
209 /// This intrinsic corresponds to the \c PEXT instruction.
212 /// The 64-bit source value to copy.
214 /// The 64-bit mask specifying which source bits to extract.
215 /// \returns The 64-bit result.
216 static __inline__
unsigned long long __DEFAULT_FN_ATTRS
217 _pext_u64(unsigned long long __X
, unsigned long long __Y
)
219 return __builtin_ia32_pext_di(__X
, __Y
);
222 /// Multiplies the unsigned 64-bit integers \a __X and \a __Y to form a
223 /// 128-bit product. Stores the upper 64 bits of the product to the
224 /// memory addressed by \a __P and returns the lower 64 bits.
226 /// \code{.operation}
227 /// Store64(__P, (__X * __Y)[127:64])
228 /// result := (__X * __Y)[63:0]
231 /// \headerfile <immintrin.h>
233 /// This intrinsic corresponds to the \c MULX instruction.
236 /// An unsigned 64-bit multiplicand.
238 /// An unsigned 64-bit multiplicand.
240 /// A pointer to memory for storing the upper half of the product.
241 /// \returns The lower half of the product.
242 static __inline__
unsigned long long __DEFAULT_FN_ATTRS
243 _mulx_u64 (unsigned long long __X
, unsigned long long __Y
,
244 unsigned long long *__P
)
246 unsigned __int128 __res
= (unsigned __int128
) __X
* __Y
;
247 *__P
= (unsigned long long) (__res
>> 64);
248 return (unsigned long long) __res
;
251 #endif /* __x86_64__ */
253 #undef __DEFAULT_FN_ATTRS
255 #endif /* __BMI2INTRIN_H */