1 /*===----------- avx10_2satcvtdsintrin.h - AVX512SATCVTDS intrinsics --------===
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *===-----------------------------------------------------------------------===
12 "Never use <avx10_2satcvtdsintrin.h> directly; include <immintrin.h> instead."
13 #endif // __IMMINTRIN_H
15 #ifndef __AVX10_2SATCVTDSINTRIN_H
16 #define __AVX10_2SATCVTDSINTRIN_H
18 /* Define the default attributes for the functions in this file. */
19 #define __DEFAULT_FN_ATTRS256 \
20 __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \
21 __min_vector_width__(256)))
23 #define __DEFAULT_FN_ATTRS128 \
24 __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \
25 __min_vector_width__(128)))
27 #define _mm_cvtts_roundsd_i32(__A, __R) \
28 ((int)__builtin_ia32_vcvttsd2sis32((__v2df)(__m128)(__A), (const int)(__R)))
30 #define _mm_cvtts_roundsd_si32(__A, __R) \
31 ((int)__builtin_ia32_vcvttsd2sis32((__v2df)(__m128d)(__A), (const int)(__R)))
33 #define _mm_cvtts_roundsd_u32(__A, __R) \
34 ((unsigned int)__builtin_ia32_vcvttsd2usis32((__v2df)(__m128d)(__A), \
37 #define _mm_cvtts_roundss_i32(__A, __R) \
38 ((int)__builtin_ia32_vcvttss2sis32((__v4sf)(__m128)(__A), (const int)(__R)))
40 #define _mm_cvtts_roundss_si32(__A, __R) \
41 ((int)__builtin_ia32_vcvttss2sis32((__v4sf)(__m128)(__A), (const int)(__R)))
43 #define _mm_cvtts_roundss_u32(__A, __R) \
44 ((unsigned int)__builtin_ia32_vcvttss2usis32((__v4sf)(__m128)(__A), \
48 #define _mm_cvtts_roundss_u64(__A, __R) \
49 ((unsigned long long)__builtin_ia32_vcvttss2usis64((__v4sf)(__m128)(__A), \
52 #define _mm_cvtts_roundsd_u64(__A, __R) \
53 ((unsigned long long)__builtin_ia32_vcvttsd2usis64((__v2df)(__m128d)(__A), \
56 #define _mm_cvtts_roundss_i64(__A, __R) \
57 ((long long)__builtin_ia32_vcvttss2sis64((__v4sf)(__m128)(__A), \
60 #define _mm_cvtts_roundss_si64(__A, __R) \
61 ((long long)__builtin_ia32_vcvttss2sis64((__v4sf)(__m128)(__A), \
64 #define _mm_cvtts_roundsd_si64(__A, __R) \
65 ((long long)__builtin_ia32_vcvttsd2sis64((__v2df)(__m128d)(__A), \
68 #define _mm_cvtts_roundsd_i64(__A, __R) \
69 ((long long)__builtin_ia32_vcvttsd2sis64((__v2df)(__m128d)(__A), \
71 #endif /* __x86_64__ */
73 // 128 Bit : Double -> int
74 static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_cvttspd_epi32(__m128d __A
) {
75 return ((__m128i
)__builtin_ia32_vcvttpd2dqs128_mask(
76 (__v2df
)__A
, (__v4si
)(__m128i
)_mm_undefined_si128(), (__mmask8
)(-1)));
79 static __inline__ __m128i __DEFAULT_FN_ATTRS128
80 _mm_mask_cvttspd_epi32(__m128i __W
, __mmask8 __U
, __m128d __A
) {
81 return ((__m128i
)__builtin_ia32_vcvttpd2dqs128_mask((__v2df
)__A
, (__v4si
)__W
,
85 static __inline__ __m128i __DEFAULT_FN_ATTRS128
86 _mm_maskz_cvttspd_epi32(__mmask16 __U
, __m128d __A
) {
87 return ((__m128i
)__builtin_ia32_vcvttpd2dqs128_mask(
88 (__v2df
)__A
, (__v4si
)(__m128i
)_mm_setzero_si128(), __U
));
91 // 256 Bit : Double -> int
92 static __inline__ __m128i __DEFAULT_FN_ATTRS256
93 _mm256_cvttspd_epi32(__m256d __A
) {
94 return ((__m128i
)__builtin_ia32_vcvttpd2dqs256_round_mask(
95 (__v4df
)__A
, (__v4si
)_mm_undefined_si128(), (__mmask8
)-1,
96 _MM_FROUND_CUR_DIRECTION
));
99 static __inline__ __m128i __DEFAULT_FN_ATTRS256
100 _mm256_mask_cvttspd_epi32(__m128i __W
, __mmask8 __U
, __m256d __A
) {
101 return ((__m128i
)__builtin_ia32_vcvttpd2dqs256_round_mask(
102 (__v4df
)__A
, (__v4si
)__W
, __U
, _MM_FROUND_CUR_DIRECTION
));
105 static __inline__ __m128i __DEFAULT_FN_ATTRS256
106 _mm256_maskz_cvttspd_epi32(__mmask8 __U
, __m256d __A
) {
107 return ((__m128i
)__builtin_ia32_vcvttpd2dqs256_round_mask(
108 (__v4df
)__A
, (__v4si
)_mm_setzero_si128(), __U
, _MM_FROUND_CUR_DIRECTION
));
111 #define _mm256_cvtts_roundpd_epi32(__A, __R) \
112 ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( \
113 (__v4df)(__m256d)__A, (__v4si)(__m128i)_mm_undefined_si128(), \
114 (__mmask8) - 1, (int)(__R)))
116 #define _mm256_mask_cvtts_roundpd_epi32(__W, __U, __A, __R) \
117 ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( \
118 (__v4df)(__m256d)__A, (__v4si)(__m128i)__W, (__mmask8)__U, (int)(__R)))
120 #define _mm256_maskz_cvtts_roundpd_epi32(__U, __A, __R) \
121 ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( \
122 (__v4df)(__m256d)__A, (__v4si)(__m128i)_mm_setzero_si128(), \
123 (__mmask8)__U, (int)(__R)))
125 // 128 Bit : Double -> uint
126 static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_cvttspd_epu32(__m128d __A
) {
127 return ((__m128i
)__builtin_ia32_vcvttpd2udqs128_mask(
128 (__v2df
)__A
, (__v4si
)(__m128i
)_mm_undefined_si128(), (__mmask8
)(-1)));
131 static __inline__ __m128i __DEFAULT_FN_ATTRS128
132 _mm_mask_cvttspd_epu32(__m128i __W
, __mmask8 __U
, __m128d __A
) {
133 return ((__m128i
)__builtin_ia32_vcvttpd2udqs128_mask(
134 (__v2df
)__A
, (__v4si
)(__m128i
)__W
, (__mmask8
)__U
));
137 static __inline__ __m128i __DEFAULT_FN_ATTRS128
138 _mm_maskz_cvttspd_epu32(__mmask8 __U
, __m128d __A
) {
139 return ((__m128i
)__builtin_ia32_vcvttpd2udqs128_mask(
140 (__v2df
)__A
, (__v4si
)(__m128i
)_mm_setzero_si128(), __U
));
143 // 256 Bit : Double -> uint
144 static __inline__ __m128i __DEFAULT_FN_ATTRS256
145 _mm256_cvttspd_epu32(__m256d __A
) {
146 return ((__m128i
)__builtin_ia32_vcvttpd2udqs256_round_mask(
147 (__v4df
)__A
, (__v4si
)_mm_undefined_si128(), (__mmask8
)-1,
148 _MM_FROUND_CUR_DIRECTION
));
151 static __inline__ __m128i __DEFAULT_FN_ATTRS256
152 _mm256_mask_cvttspd_epu32(__m128i __W
, __mmask8 __U
, __m256d __A
) {
153 return ((__m128i
)__builtin_ia32_vcvttpd2udqs256_round_mask(
154 (__v4df
)__A
, (__v4si
)__W
, __U
, _MM_FROUND_CUR_DIRECTION
));
157 static __inline__ __m128i __DEFAULT_FN_ATTRS256
158 _mm256_maskz_cvttspd_epu32(__mmask8 __U
, __m256d __A
) {
159 return ((__m128i
)__builtin_ia32_vcvttpd2udqs256_round_mask(
160 (__v4df
)__A
, (__v4si
)_mm_setzero_si128(), __U
, _MM_FROUND_CUR_DIRECTION
));
163 #define _mm256_cvtts_roundpd_epu32(__A, __R) \
164 ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( \
165 (__v4df)(__m256d)__A, (__v4si)(__m128i)_mm_undefined_si128(), \
166 (__mmask8) - 1, (int)(__R)))
168 #define _mm256_mask_cvtts_roundpd_epu32(__W, __U, __A, __R) \
169 ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( \
170 (__v4df)(__m256d)__A, (__v4si)(__m128i)__W, (__mmask8)__U, (int)(__R)))
172 #define _mm256_maskz_cvtts_roundpd_epu32(__U, __A, __R) \
173 ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( \
174 (__v4df)(__m256d)__A, (__v4si)(__m128i)_mm_setzero_si128(), \
175 (__mmask8)__U, (int)(__R)))
177 // 128 Bit : Double -> long
178 static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_cvttspd_epi64(__m128d __A
) {
179 return ((__m128i
)__builtin_ia32_vcvttpd2qqs128_mask(
180 (__v2df
)__A
, (__v2di
)_mm_undefined_si128(), (__mmask8
)-1));
183 static __inline__ __m128i __DEFAULT_FN_ATTRS128
184 _mm_mask_cvttspd_epi64(__m128i __W
, __mmask8 __U
, __m128d __A
) {
185 return ((__m128i
)__builtin_ia32_vcvttpd2qqs128_mask((__v2df
)__A
, (__v2di
)__W
,
189 static __inline__ __m128i __DEFAULT_FN_ATTRS128
190 _mm_maskz_cvttspd_epi64(__mmask8 __U
, __m128d __A
) {
191 return ((__m128i
)__builtin_ia32_vcvttpd2qqs128_mask(
192 (__v2df
)__A
, (__v2di
)_mm_setzero_si128(), (__mmask8
)__U
));
195 // 256 Bit : Double -> long
196 static __inline__ __m256i __DEFAULT_FN_ATTRS256
197 _mm256_cvttspd_epi64(__m256d __A
) {
198 return ((__m256i
)__builtin_ia32_vcvttpd2qqs256_round_mask(
199 (__v4df
)__A
, (__v4di
)_mm256_undefined_si256(), (__mmask8
)-1,
200 _MM_FROUND_CUR_DIRECTION
));
203 static __inline__ __m256i __DEFAULT_FN_ATTRS256
204 _mm256_mask_cvttspd_epi64(__m256i __W
, __mmask8 __U
, __m256d __A
) {
205 return ((__m256i
)__builtin_ia32_vcvttpd2qqs256_round_mask(
206 (__v4df
)__A
, (__v4di
)__W
, __U
, _MM_FROUND_CUR_DIRECTION
));
209 static __inline__ __m256i __DEFAULT_FN_ATTRS256
210 _mm256_maskz_cvttspd_epi64(__mmask8 __U
, __m256d __A
) {
211 return ((__m256i
)__builtin_ia32_vcvttpd2qqs256_round_mask(
212 (__v4df
)__A
, (__v4di
)_mm256_setzero_si256(), __U
,
213 _MM_FROUND_CUR_DIRECTION
));
216 #define _mm256_cvtts_roundpd_epi64(__A, __R) \
217 ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask( \
218 (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, \
221 #define _mm256_mask_cvtts_roundpd_epi64(__W, __U, __A, __R) \
222 ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask((__v4df)__A, (__v4di)__W, \
223 (__mmask8)__U, (int)__R))
225 #define _mm256_maskz_cvtts_roundpd_epi64(__U, __A, __R) \
226 ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask( \
227 (__v4df)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U, (int)__R))
229 // 128 Bit : Double -> ulong
230 static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_cvttspd_epu64(__m128d __A
) {
231 return ((__m128i
)__builtin_ia32_vcvttpd2uqqs128_mask(
232 (__v2df
)__A
, (__v2di
)_mm_undefined_si128(), (__mmask8
)-1));
235 static __inline__ __m128i __DEFAULT_FN_ATTRS128
236 _mm_mask_cvttspd_epu64(__m128i __W
, __mmask8 __U
, __m128d __A
) {
237 return ((__m128i
)__builtin_ia32_vcvttpd2uqqs128_mask((__v2df
)__A
, (__v2di
)__W
,
241 static __inline__ __m128i __DEFAULT_FN_ATTRS128
242 _mm_maskz_cvttspd_epu64(__mmask8 __U
, __m128d __A
) {
243 return ((__m128i
)__builtin_ia32_vcvttpd2uqqs128_mask(
244 (__v2df
)__A
, (__v2di
)_mm_setzero_si128(), (__mmask8
)__U
));
247 // 256 Bit : Double -> ulong
249 static __inline__ __m256i __DEFAULT_FN_ATTRS256
250 _mm256_cvttspd_epu64(__m256d __A
) {
251 return ((__m256i
)__builtin_ia32_vcvttpd2uqqs256_round_mask(
252 (__v4df
)__A
, (__v4di
)_mm256_undefined_si256(), (__mmask8
)-1,
253 _MM_FROUND_CUR_DIRECTION
));
256 static __inline__ __m256i __DEFAULT_FN_ATTRS256
257 _mm256_mask_cvttspd_epu64(__m256i __W
, __mmask8 __U
, __m256d __A
) {
258 return ((__m256i
)__builtin_ia32_vcvttpd2uqqs256_round_mask(
259 (__v4df
)__A
, (__v4di
)__W
, __U
, _MM_FROUND_CUR_DIRECTION
));
262 static __inline__ __m256i __DEFAULT_FN_ATTRS256
263 _mm256_maskz_cvttspd_epu64(__mmask8 __U
, __m256d __A
) {
264 return ((__m256i
)__builtin_ia32_vcvttpd2uqqs256_round_mask(
265 (__v4df
)__A
, (__v4di
)_mm256_setzero_si256(), __U
,
266 _MM_FROUND_CUR_DIRECTION
));
269 #define _mm256_cvtts_roundpd_epu64(__A, __R) \
270 ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( \
271 (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, \
274 #define _mm256_mask_cvtts_roundpd_epu64(__W, __U, __A, __R) \
275 ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( \
276 (__v4df)__A, (__v4di)__W, (__mmask8)__U, (int)__R))
278 #define _mm256_maskz_cvtts_roundpd_epu64(__U, __A, __R) \
279 ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( \
280 (__v4df)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U, (int)__R))
282 // 128 Bit : float -> int
283 static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_cvttsps_epi32(__m128 __A
) {
284 return ((__m128i
)__builtin_ia32_vcvttps2dqs128_mask(
285 (__v4sf
)__A
, (__v4si
)(__m128i
)_mm_undefined_si128(), (__mmask8
)(-1)));
288 static __inline__ __m128i __DEFAULT_FN_ATTRS128
289 _mm_mask_cvttsps_epi32(__m128i __W
, __mmask8 __U
, __m128 __A
) {
290 return ((__m128i
)__builtin_ia32_vcvttps2dqs128_mask((__v4sf
)__A
, (__v4si
)__W
,
294 static __inline__ __m128i __DEFAULT_FN_ATTRS128
295 _mm_maskz_cvttsps_epi32(__mmask8 __U
, __m128 __A
) {
296 return ((__m128i
)__builtin_ia32_vcvttps2dqs128_mask(
297 (__v4sf
)__A
, (__v4si
)(__m128i
)_mm_setzero_si128(), (__mmask8
)__U
));
300 // 256 Bit : float -> int
301 static __inline__ __m256i __DEFAULT_FN_ATTRS256
302 _mm256_cvttsps_epi32(__m256 __A
) {
303 return ((__m256i
)__builtin_ia32_vcvttps2dqs256_round_mask(
304 (__v8sf
)__A
, (__v8si
)_mm256_undefined_si256(), (__mmask8
)-1,
305 _MM_FROUND_CUR_DIRECTION
));
308 static __inline__ __m256i __DEFAULT_FN_ATTRS256
309 _mm256_mask_cvttsps_epi32(__m256i __W
, __mmask8 __U
, __m256 __A
) {
310 return ((__m256i
)__builtin_ia32_vcvttps2dqs256_round_mask(
311 (__v8sf
)__A
, (__v8si
)__W
, __U
, _MM_FROUND_CUR_DIRECTION
));
314 static __inline__ __m256i __DEFAULT_FN_ATTRS256
315 _mm256_maskz_cvttsps_epi32(__mmask8 __U
, __m256 __A
) {
316 return ((__m256i
)__builtin_ia32_vcvttps2dqs256_round_mask(
317 (__v8sf
)__A
, (__v8si
)_mm256_setzero_si256(), __U
,
318 _MM_FROUND_CUR_DIRECTION
));
321 #define _mm256_cvtts_roundps_epi32(__A, __R) \
322 ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( \
323 (__v8sf)(__m256)__A, (__v8si)(__m256i)_mm256_undefined_si256(), \
324 (__mmask8) - 1, (int)(__R)))
326 #define _mm256_mask_cvtts_roundps_epi32(__W, __U, __A, __R) \
327 ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( \
328 (__v8sf)(__m256)__A, (__v8si)(__m256i)__W, (__mmask8)__U, (int)(__R)))
330 #define _mm256_maskz_cvtts_roundps_epi32(__U, __A, __R) \
331 ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( \
332 (__v8sf)(__m256)__A, (__v8si)(__m256i)_mm256_setzero_si256(), \
333 (__mmask8)__U, (int)(__R)))
335 // 128 Bit : float -> uint
336 static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_cvttsps_epu32(__m128 __A
) {
337 return ((__m128i
)__builtin_ia32_vcvttps2udqs128_mask(
338 (__v4sf
)__A
, (__v4si
)(__m128i
)_mm_undefined_si128(), (__mmask8
)(-1)));
341 static __inline__ __m128i __DEFAULT_FN_ATTRS128
342 _mm_mask_cvttsps_epu32(__m128i __W
, __mmask8 __U
, __m128 __A
) {
343 return ((__m128i
)__builtin_ia32_vcvttps2udqs128_mask((__v4sf
)__A
, (__v4si
)__W
,
347 static __inline__ __m128i __DEFAULT_FN_ATTRS128
348 _mm_maskz_cvttsps_epu32(__mmask8 __U
, __m128 __A
) {
349 return ((__m128i
)__builtin_ia32_vcvttps2udqs128_mask(
350 (__v4sf
)__A
, (__v4si
)_mm_setzero_si128(), (__mmask8
)__U
));
353 // 256 Bit : float -> uint
355 static __inline__ __m256i __DEFAULT_FN_ATTRS256
356 _mm256_cvttsps_epu32(__m256 __A
) {
357 return ((__m256i
)__builtin_ia32_vcvttps2udqs256_round_mask(
358 (__v8sf
)__A
, (__v8si
)_mm256_undefined_si256(), (__mmask8
)-1,
359 _MM_FROUND_CUR_DIRECTION
));
362 static __inline__ __m256i __DEFAULT_FN_ATTRS256
363 _mm256_mask_cvttsps_epu32(__m256i __W
, __mmask8 __U
, __m256 __A
) {
364 return ((__m256i
)__builtin_ia32_vcvttps2udqs256_round_mask(
365 (__v8sf
)__A
, (__v8si
)__W
, __U
, _MM_FROUND_CUR_DIRECTION
));
368 static __inline__ __m256i __DEFAULT_FN_ATTRS256
369 _mm256_maskz_cvttsps_epu32(__mmask8 __U
, __m256 __A
) {
370 return ((__m256i
)__builtin_ia32_vcvttps2udqs256_round_mask(
371 (__v8sf
)__A
, (__v8si
)_mm256_setzero_si256(), __U
,
372 _MM_FROUND_CUR_DIRECTION
));
375 #define _mm256_cvtts_roundps_epu32(__A, __R) \
376 ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( \
377 (__v8sf)(__m256)__A, (__v8si)(__m256i)_mm256_undefined_si256(), \
378 (__mmask8) - 1, (int)(__R)))
380 #define _mm256_mask_cvtts_roundps_epu32(__W, __U, __A, __R) \
381 ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( \
382 (__v8sf)(__m256)__A, (__v8si)(__m256i)__W, (__mmask8)__U, (int)(__R)))
384 #define _mm256_maskz_cvtts_roundps_epu32(__U, __A, __R) \
385 ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( \
386 (__v8sf)(__m256)__A, (__v8si)(__m256i)_mm256_setzero_si256(), \
387 (__mmask8)__U, (int)(__R)))
389 // 128 bit : float -> long
390 static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_cvttsps_epi64(__m128 __A
) {
391 return ((__m128i
)__builtin_ia32_vcvttps2qqs128_mask(
392 (__v4sf
)__A
, (__v2di
)_mm_undefined_si128(), (__mmask8
)-1));
395 static __inline__ __m128i __DEFAULT_FN_ATTRS128
396 _mm_mask_cvttsps_epi64(__m128i __W
, __mmask8 __U
, __m128 __A
) {
397 return ((__m128i
)__builtin_ia32_vcvttps2qqs128_mask(
398 (__v4sf
)__A
, (__v2di
)(__m128i
)__W
, (__mmask8
)__U
));
401 static __inline__ __m128i __DEFAULT_FN_ATTRS128
402 _mm_maskz_cvttsps_epi64(__mmask8 __U
, __m128 __A
) {
403 return ((__m128i
)__builtin_ia32_vcvttps2qqs128_mask(
404 (__v4sf
)__A
, (__v2di
)_mm_setzero_si128(), (__mmask8
)__U
));
406 // 256 bit : float -> long
408 static __inline__ __m256i __DEFAULT_FN_ATTRS256
409 _mm256_cvttsps_epi64(__m128 __A
) {
410 return ((__m256i
)__builtin_ia32_vcvttps2qqs256_round_mask(
411 (__v4sf
)__A
, (__v4di
)_mm256_undefined_si256(), (__mmask8
)-1,
412 _MM_FROUND_CUR_DIRECTION
));
414 static __inline__ __m256i __DEFAULT_FN_ATTRS256
415 _mm256_mask_cvttsps_epi64(__m256i __W
, __mmask8 __U
, __m128 __A
) {
416 return ((__m256i
)__builtin_ia32_vcvttps2qqs256_round_mask(
417 (__v4sf
)__A
, (__v4di
)__W
, __U
, _MM_FROUND_CUR_DIRECTION
));
420 static __inline__ __m256i __DEFAULT_FN_ATTRS256
421 _mm256_maskz_cvttsps_epi64(__mmask8 __U
, __m128 __A
) {
422 return ((__m256i
)__builtin_ia32_vcvttps2qqs256_round_mask(
423 (__v4sf
)__A
, (__v4di
)_mm256_setzero_si256(), __U
,
424 _MM_FROUND_CUR_DIRECTION
));
427 #define _mm256_cvtts_roundps_epi64(__A, __R) \
428 ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( \
429 (__v4sf)(__m128)__A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, \
432 #define _mm256_mask_cvtts_roundps_epi64(__W, __U, __A, __R) \
433 ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( \
434 (__v4sf)(__m128)__A, (__v4di)__W, (__mmask8)__U, (int)__R))
436 #define _mm256_maskz_cvtts_roundps_epi64(__U, __A, __R) \
437 ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( \
438 (__v4sf)(__m128)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U, \
441 // 128 bit : float -> ulong
442 static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_cvttsps_epu64(__m128 __A
) {
443 return ((__m128i
)__builtin_ia32_vcvttps2uqqs128_mask(
444 (__v4sf
)__A
, (__v2di
)_mm_undefined_si128(), (__mmask8
)-1));
447 static __inline__ __m128i __DEFAULT_FN_ATTRS128
448 _mm_mask_cvttsps_epu64(__m128i __W
, __mmask8 __U
, __m128 __A
) {
449 return ((__m128i
)__builtin_ia32_vcvttps2uqqs128_mask(
450 (__v4sf
)__A
, (__v2di
)(__m128i
)__W
, (__mmask8
)__U
));
453 static __inline__ __m128i __DEFAULT_FN_ATTRS128
454 _mm_maskz_cvttsps_epu64(__mmask8 __U
, __m128 __A
) {
455 return ((__m128i
)__builtin_ia32_vcvttps2uqqs128_mask(
456 (__v4sf
)__A
, (__v2di
)_mm_setzero_si128(), (__mmask8
)__U
));
458 // 256 bit : float -> ulong
460 static __inline__ __m256i __DEFAULT_FN_ATTRS256
461 _mm256_cvttsps_epu64(__m128 __A
) {
462 return ((__m256i
)__builtin_ia32_vcvttps2uqqs256_round_mask(
463 (__v4sf
)__A
, (__v4di
)_mm256_undefined_si256(), (__mmask8
)-1,
464 _MM_FROUND_CUR_DIRECTION
));
467 static __inline__ __m256i __DEFAULT_FN_ATTRS256
468 _mm256_mask_cvttsps_epu64(__m256i __W
, __mmask8 __U
, __m128 __A
) {
469 return ((__m256i
)__builtin_ia32_vcvttps2uqqs256_round_mask(
470 (__v4sf
)__A
, (__v4di
)__W
, __U
, _MM_FROUND_CUR_DIRECTION
));
473 static __inline__ __m256i __DEFAULT_FN_ATTRS256
474 _mm256_maskz_cvttsps_epu64(__mmask8 __U
, __m128 __A
) {
475 return ((__m256i
)__builtin_ia32_vcvttps2uqqs256_round_mask(
476 (__v4sf
)__A
, (__v4di
)_mm256_setzero_si256(), __U
,
477 _MM_FROUND_CUR_DIRECTION
));
480 #define _mm256_cvtts_roundps_epu64(__A, __R) \
481 ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( \
482 (__v4sf)(__m128)__A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, \
485 #define _mm256_mask_cvtts_roundps_epu64(__W, __U, __A, __R) \
486 ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( \
487 (__v4sf)(__m128)__A, (__v4di)__W, (__mmask8)__U, (int)__R))
489 #define _mm256_maskz_cvtts_roundps_epu64(__U, __A, __R) \
490 ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( \
491 (__v4sf)(__m128)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U, \
494 #undef __DEFAULT_FN_ATTRS128
495 #undef __DEFAULT_FN_ATTRS256
496 #endif // __AVX10_2SATCVTDSINTRIN_H