1 /*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ------------===
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *===-----------------------------------------------------------------------===
11 #error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
14 #ifndef __AVX512VLDQINTRIN_H
15 #define __AVX512VLDQINTRIN_H
17 /* Define the default attributes for the functions in this file. */
18 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(128)))
19 #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(256)))
21 static __inline__ __m256i __DEFAULT_FN_ATTRS256
22 _mm256_mullo_epi64 (__m256i __A
, __m256i __B
) {
23 return (__m256i
) ((__v4du
) __A
* (__v4du
) __B
);
26 static __inline__ __m256i __DEFAULT_FN_ATTRS256
27 _mm256_mask_mullo_epi64(__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
) {
28 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
29 (__v4di
)_mm256_mullo_epi64(__A
, __B
),
33 static __inline__ __m256i __DEFAULT_FN_ATTRS256
34 _mm256_maskz_mullo_epi64(__mmask8 __U
, __m256i __A
, __m256i __B
) {
35 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
36 (__v4di
)_mm256_mullo_epi64(__A
, __B
),
37 (__v4di
)_mm256_setzero_si256());
40 static __inline__ __m128i __DEFAULT_FN_ATTRS128
41 _mm_mullo_epi64 (__m128i __A
, __m128i __B
) {
42 return (__m128i
) ((__v2du
) __A
* (__v2du
) __B
);
45 static __inline__ __m128i __DEFAULT_FN_ATTRS128
46 _mm_mask_mullo_epi64(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
) {
47 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
48 (__v2di
)_mm_mullo_epi64(__A
, __B
),
52 static __inline__ __m128i __DEFAULT_FN_ATTRS128
53 _mm_maskz_mullo_epi64(__mmask8 __U
, __m128i __A
, __m128i __B
) {
54 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
55 (__v2di
)_mm_mullo_epi64(__A
, __B
),
56 (__v2di
)_mm_setzero_si128());
59 static __inline__ __m256d __DEFAULT_FN_ATTRS256
60 _mm256_mask_andnot_pd(__m256d __W
, __mmask8 __U
, __m256d __A
, __m256d __B
) {
61 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
62 (__v4df
)_mm256_andnot_pd(__A
, __B
),
66 static __inline__ __m256d __DEFAULT_FN_ATTRS256
67 _mm256_maskz_andnot_pd(__mmask8 __U
, __m256d __A
, __m256d __B
) {
68 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
69 (__v4df
)_mm256_andnot_pd(__A
, __B
),
70 (__v4df
)_mm256_setzero_pd());
73 static __inline__ __m128d __DEFAULT_FN_ATTRS128
74 _mm_mask_andnot_pd(__m128d __W
, __mmask8 __U
, __m128d __A
, __m128d __B
) {
75 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
76 (__v2df
)_mm_andnot_pd(__A
, __B
),
80 static __inline__ __m128d __DEFAULT_FN_ATTRS128
81 _mm_maskz_andnot_pd(__mmask8 __U
, __m128d __A
, __m128d __B
) {
82 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
83 (__v2df
)_mm_andnot_pd(__A
, __B
),
84 (__v2df
)_mm_setzero_pd());
87 static __inline__ __m256 __DEFAULT_FN_ATTRS256
88 _mm256_mask_andnot_ps(__m256 __W
, __mmask8 __U
, __m256 __A
, __m256 __B
) {
89 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
90 (__v8sf
)_mm256_andnot_ps(__A
, __B
),
94 static __inline__ __m256 __DEFAULT_FN_ATTRS256
95 _mm256_maskz_andnot_ps(__mmask8 __U
, __m256 __A
, __m256 __B
) {
96 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
97 (__v8sf
)_mm256_andnot_ps(__A
, __B
),
98 (__v8sf
)_mm256_setzero_ps());
101 static __inline__ __m128 __DEFAULT_FN_ATTRS128
102 _mm_mask_andnot_ps(__m128 __W
, __mmask8 __U
, __m128 __A
, __m128 __B
) {
103 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
104 (__v4sf
)_mm_andnot_ps(__A
, __B
),
108 static __inline__ __m128 __DEFAULT_FN_ATTRS128
109 _mm_maskz_andnot_ps(__mmask8 __U
, __m128 __A
, __m128 __B
) {
110 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
111 (__v4sf
)_mm_andnot_ps(__A
, __B
),
112 (__v4sf
)_mm_setzero_ps());
115 static __inline__ __m256d __DEFAULT_FN_ATTRS256
116 _mm256_mask_and_pd(__m256d __W
, __mmask8 __U
, __m256d __A
, __m256d __B
) {
117 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
118 (__v4df
)_mm256_and_pd(__A
, __B
),
122 static __inline__ __m256d __DEFAULT_FN_ATTRS256
123 _mm256_maskz_and_pd(__mmask8 __U
, __m256d __A
, __m256d __B
) {
124 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
125 (__v4df
)_mm256_and_pd(__A
, __B
),
126 (__v4df
)_mm256_setzero_pd());
129 static __inline__ __m128d __DEFAULT_FN_ATTRS128
130 _mm_mask_and_pd(__m128d __W
, __mmask8 __U
, __m128d __A
, __m128d __B
) {
131 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
132 (__v2df
)_mm_and_pd(__A
, __B
),
136 static __inline__ __m128d __DEFAULT_FN_ATTRS128
137 _mm_maskz_and_pd(__mmask8 __U
, __m128d __A
, __m128d __B
) {
138 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
139 (__v2df
)_mm_and_pd(__A
, __B
),
140 (__v2df
)_mm_setzero_pd());
143 static __inline__ __m256 __DEFAULT_FN_ATTRS256
144 _mm256_mask_and_ps(__m256 __W
, __mmask8 __U
, __m256 __A
, __m256 __B
) {
145 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
146 (__v8sf
)_mm256_and_ps(__A
, __B
),
150 static __inline__ __m256 __DEFAULT_FN_ATTRS256
151 _mm256_maskz_and_ps(__mmask8 __U
, __m256 __A
, __m256 __B
) {
152 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
153 (__v8sf
)_mm256_and_ps(__A
, __B
),
154 (__v8sf
)_mm256_setzero_ps());
157 static __inline__ __m128 __DEFAULT_FN_ATTRS128
158 _mm_mask_and_ps(__m128 __W
, __mmask8 __U
, __m128 __A
, __m128 __B
) {
159 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
160 (__v4sf
)_mm_and_ps(__A
, __B
),
164 static __inline__ __m128 __DEFAULT_FN_ATTRS128
165 _mm_maskz_and_ps(__mmask8 __U
, __m128 __A
, __m128 __B
) {
166 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
167 (__v4sf
)_mm_and_ps(__A
, __B
),
168 (__v4sf
)_mm_setzero_ps());
171 static __inline__ __m256d __DEFAULT_FN_ATTRS256
172 _mm256_mask_xor_pd(__m256d __W
, __mmask8 __U
, __m256d __A
, __m256d __B
) {
173 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
174 (__v4df
)_mm256_xor_pd(__A
, __B
),
178 static __inline__ __m256d __DEFAULT_FN_ATTRS256
179 _mm256_maskz_xor_pd(__mmask8 __U
, __m256d __A
, __m256d __B
) {
180 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
181 (__v4df
)_mm256_xor_pd(__A
, __B
),
182 (__v4df
)_mm256_setzero_pd());
185 static __inline__ __m128d __DEFAULT_FN_ATTRS128
186 _mm_mask_xor_pd(__m128d __W
, __mmask8 __U
, __m128d __A
, __m128d __B
) {
187 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
188 (__v2df
)_mm_xor_pd(__A
, __B
),
192 static __inline__ __m128d __DEFAULT_FN_ATTRS128
193 _mm_maskz_xor_pd (__mmask8 __U
, __m128d __A
, __m128d __B
) {
194 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
195 (__v2df
)_mm_xor_pd(__A
, __B
),
196 (__v2df
)_mm_setzero_pd());
199 static __inline__ __m256 __DEFAULT_FN_ATTRS256
200 _mm256_mask_xor_ps(__m256 __W
, __mmask8 __U
, __m256 __A
, __m256 __B
) {
201 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
202 (__v8sf
)_mm256_xor_ps(__A
, __B
),
206 static __inline__ __m256 __DEFAULT_FN_ATTRS256
207 _mm256_maskz_xor_ps(__mmask8 __U
, __m256 __A
, __m256 __B
) {
208 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
209 (__v8sf
)_mm256_xor_ps(__A
, __B
),
210 (__v8sf
)_mm256_setzero_ps());
213 static __inline__ __m128 __DEFAULT_FN_ATTRS128
214 _mm_mask_xor_ps(__m128 __W
, __mmask8 __U
, __m128 __A
, __m128 __B
) {
215 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
216 (__v4sf
)_mm_xor_ps(__A
, __B
),
220 static __inline__ __m128 __DEFAULT_FN_ATTRS128
221 _mm_maskz_xor_ps(__mmask8 __U
, __m128 __A
, __m128 __B
) {
222 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
223 (__v4sf
)_mm_xor_ps(__A
, __B
),
224 (__v4sf
)_mm_setzero_ps());
227 static __inline__ __m256d __DEFAULT_FN_ATTRS256
228 _mm256_mask_or_pd(__m256d __W
, __mmask8 __U
, __m256d __A
, __m256d __B
) {
229 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
230 (__v4df
)_mm256_or_pd(__A
, __B
),
234 static __inline__ __m256d __DEFAULT_FN_ATTRS256
235 _mm256_maskz_or_pd(__mmask8 __U
, __m256d __A
, __m256d __B
) {
236 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
237 (__v4df
)_mm256_or_pd(__A
, __B
),
238 (__v4df
)_mm256_setzero_pd());
241 static __inline__ __m128d __DEFAULT_FN_ATTRS128
242 _mm_mask_or_pd(__m128d __W
, __mmask8 __U
, __m128d __A
, __m128d __B
) {
243 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
244 (__v2df
)_mm_or_pd(__A
, __B
),
248 static __inline__ __m128d __DEFAULT_FN_ATTRS128
249 _mm_maskz_or_pd(__mmask8 __U
, __m128d __A
, __m128d __B
) {
250 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
251 (__v2df
)_mm_or_pd(__A
, __B
),
252 (__v2df
)_mm_setzero_pd());
255 static __inline__ __m256 __DEFAULT_FN_ATTRS256
256 _mm256_mask_or_ps(__m256 __W
, __mmask8 __U
, __m256 __A
, __m256 __B
) {
257 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
258 (__v8sf
)_mm256_or_ps(__A
, __B
),
262 static __inline__ __m256 __DEFAULT_FN_ATTRS256
263 _mm256_maskz_or_ps(__mmask8 __U
, __m256 __A
, __m256 __B
) {
264 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
265 (__v8sf
)_mm256_or_ps(__A
, __B
),
266 (__v8sf
)_mm256_setzero_ps());
269 static __inline__ __m128 __DEFAULT_FN_ATTRS128
270 _mm_mask_or_ps(__m128 __W
, __mmask8 __U
, __m128 __A
, __m128 __B
) {
271 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
272 (__v4sf
)_mm_or_ps(__A
, __B
),
276 static __inline__ __m128 __DEFAULT_FN_ATTRS128
277 _mm_maskz_or_ps(__mmask8 __U
, __m128 __A
, __m128 __B
) {
278 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
279 (__v4sf
)_mm_or_ps(__A
, __B
),
280 (__v4sf
)_mm_setzero_ps());
283 static __inline__ __m128i __DEFAULT_FN_ATTRS128
284 _mm_cvtpd_epi64 (__m128d __A
) {
285 return (__m128i
) __builtin_ia32_cvtpd2qq128_mask ((__v2df
) __A
,
286 (__v2di
) _mm_setzero_si128(),
290 static __inline__ __m128i __DEFAULT_FN_ATTRS128
291 _mm_mask_cvtpd_epi64 (__m128i __W
, __mmask8 __U
, __m128d __A
) {
292 return (__m128i
) __builtin_ia32_cvtpd2qq128_mask ((__v2df
) __A
,
297 static __inline__ __m128i __DEFAULT_FN_ATTRS128
298 _mm_maskz_cvtpd_epi64 (__mmask8 __U
, __m128d __A
) {
299 return (__m128i
) __builtin_ia32_cvtpd2qq128_mask ((__v2df
) __A
,
300 (__v2di
) _mm_setzero_si128(),
304 static __inline__ __m256i __DEFAULT_FN_ATTRS256
305 _mm256_cvtpd_epi64 (__m256d __A
) {
306 return (__m256i
) __builtin_ia32_cvtpd2qq256_mask ((__v4df
) __A
,
307 (__v4di
) _mm256_setzero_si256(),
311 static __inline__ __m256i __DEFAULT_FN_ATTRS256
312 _mm256_mask_cvtpd_epi64 (__m256i __W
, __mmask8 __U
, __m256d __A
) {
313 return (__m256i
) __builtin_ia32_cvtpd2qq256_mask ((__v4df
) __A
,
318 static __inline__ __m256i __DEFAULT_FN_ATTRS256
319 _mm256_maskz_cvtpd_epi64 (__mmask8 __U
, __m256d __A
) {
320 return (__m256i
) __builtin_ia32_cvtpd2qq256_mask ((__v4df
) __A
,
321 (__v4di
) _mm256_setzero_si256(),
325 static __inline__ __m128i __DEFAULT_FN_ATTRS128
326 _mm_cvtpd_epu64 (__m128d __A
) {
327 return (__m128i
) __builtin_ia32_cvtpd2uqq128_mask ((__v2df
) __A
,
328 (__v2di
) _mm_setzero_si128(),
332 static __inline__ __m128i __DEFAULT_FN_ATTRS128
333 _mm_mask_cvtpd_epu64 (__m128i __W
, __mmask8 __U
, __m128d __A
) {
334 return (__m128i
) __builtin_ia32_cvtpd2uqq128_mask ((__v2df
) __A
,
339 static __inline__ __m128i __DEFAULT_FN_ATTRS128
340 _mm_maskz_cvtpd_epu64 (__mmask8 __U
, __m128d __A
) {
341 return (__m128i
) __builtin_ia32_cvtpd2uqq128_mask ((__v2df
) __A
,
342 (__v2di
) _mm_setzero_si128(),
346 static __inline__ __m256i __DEFAULT_FN_ATTRS256
347 _mm256_cvtpd_epu64 (__m256d __A
) {
348 return (__m256i
) __builtin_ia32_cvtpd2uqq256_mask ((__v4df
) __A
,
349 (__v4di
) _mm256_setzero_si256(),
353 static __inline__ __m256i __DEFAULT_FN_ATTRS256
354 _mm256_mask_cvtpd_epu64 (__m256i __W
, __mmask8 __U
, __m256d __A
) {
355 return (__m256i
) __builtin_ia32_cvtpd2uqq256_mask ((__v4df
) __A
,
360 static __inline__ __m256i __DEFAULT_FN_ATTRS256
361 _mm256_maskz_cvtpd_epu64 (__mmask8 __U
, __m256d __A
) {
362 return (__m256i
) __builtin_ia32_cvtpd2uqq256_mask ((__v4df
) __A
,
363 (__v4di
) _mm256_setzero_si256(),
367 static __inline__ __m128i __DEFAULT_FN_ATTRS128
368 _mm_cvtps_epi64 (__m128 __A
) {
369 return (__m128i
) __builtin_ia32_cvtps2qq128_mask ((__v4sf
) __A
,
370 (__v2di
) _mm_setzero_si128(),
374 static __inline__ __m128i __DEFAULT_FN_ATTRS128
375 _mm_mask_cvtps_epi64 (__m128i __W
, __mmask8 __U
, __m128 __A
) {
376 return (__m128i
) __builtin_ia32_cvtps2qq128_mask ((__v4sf
) __A
,
381 static __inline__ __m128i __DEFAULT_FN_ATTRS128
382 _mm_maskz_cvtps_epi64 (__mmask8 __U
, __m128 __A
) {
383 return (__m128i
) __builtin_ia32_cvtps2qq128_mask ((__v4sf
) __A
,
384 (__v2di
) _mm_setzero_si128(),
388 static __inline__ __m256i __DEFAULT_FN_ATTRS256
389 _mm256_cvtps_epi64 (__m128 __A
) {
390 return (__m256i
) __builtin_ia32_cvtps2qq256_mask ((__v4sf
) __A
,
391 (__v4di
) _mm256_setzero_si256(),
395 static __inline__ __m256i __DEFAULT_FN_ATTRS256
396 _mm256_mask_cvtps_epi64 (__m256i __W
, __mmask8 __U
, __m128 __A
) {
397 return (__m256i
) __builtin_ia32_cvtps2qq256_mask ((__v4sf
) __A
,
402 static __inline__ __m256i __DEFAULT_FN_ATTRS256
403 _mm256_maskz_cvtps_epi64 (__mmask8 __U
, __m128 __A
) {
404 return (__m256i
) __builtin_ia32_cvtps2qq256_mask ((__v4sf
) __A
,
405 (__v4di
) _mm256_setzero_si256(),
409 static __inline__ __m128i __DEFAULT_FN_ATTRS128
410 _mm_cvtps_epu64 (__m128 __A
) {
411 return (__m128i
) __builtin_ia32_cvtps2uqq128_mask ((__v4sf
) __A
,
412 (__v2di
) _mm_setzero_si128(),
416 static __inline__ __m128i __DEFAULT_FN_ATTRS128
417 _mm_mask_cvtps_epu64 (__m128i __W
, __mmask8 __U
, __m128 __A
) {
418 return (__m128i
) __builtin_ia32_cvtps2uqq128_mask ((__v4sf
) __A
,
423 static __inline__ __m128i __DEFAULT_FN_ATTRS128
424 _mm_maskz_cvtps_epu64 (__mmask8 __U
, __m128 __A
) {
425 return (__m128i
) __builtin_ia32_cvtps2uqq128_mask ((__v4sf
) __A
,
426 (__v2di
) _mm_setzero_si128(),
430 static __inline__ __m256i __DEFAULT_FN_ATTRS256
431 _mm256_cvtps_epu64 (__m128 __A
) {
432 return (__m256i
) __builtin_ia32_cvtps2uqq256_mask ((__v4sf
) __A
,
433 (__v4di
) _mm256_setzero_si256(),
437 static __inline__ __m256i __DEFAULT_FN_ATTRS256
438 _mm256_mask_cvtps_epu64 (__m256i __W
, __mmask8 __U
, __m128 __A
) {
439 return (__m256i
) __builtin_ia32_cvtps2uqq256_mask ((__v4sf
) __A
,
444 static __inline__ __m256i __DEFAULT_FN_ATTRS256
445 _mm256_maskz_cvtps_epu64 (__mmask8 __U
, __m128 __A
) {
446 return (__m256i
) __builtin_ia32_cvtps2uqq256_mask ((__v4sf
) __A
,
447 (__v4di
) _mm256_setzero_si256(),
451 static __inline__ __m128d __DEFAULT_FN_ATTRS128
452 _mm_cvtepi64_pd (__m128i __A
) {
453 return (__m128d
)__builtin_convertvector((__v2di
)__A
, __v2df
);
456 static __inline__ __m128d __DEFAULT_FN_ATTRS128
457 _mm_mask_cvtepi64_pd (__m128d __W
, __mmask8 __U
, __m128i __A
) {
458 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
459 (__v2df
)_mm_cvtepi64_pd(__A
),
463 static __inline__ __m128d __DEFAULT_FN_ATTRS128
464 _mm_maskz_cvtepi64_pd (__mmask8 __U
, __m128i __A
) {
465 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
466 (__v2df
)_mm_cvtepi64_pd(__A
),
467 (__v2df
)_mm_setzero_pd());
470 static __inline__ __m256d __DEFAULT_FN_ATTRS256
471 _mm256_cvtepi64_pd (__m256i __A
) {
472 return (__m256d
)__builtin_convertvector((__v4di
)__A
, __v4df
);
475 static __inline__ __m256d __DEFAULT_FN_ATTRS256
476 _mm256_mask_cvtepi64_pd (__m256d __W
, __mmask8 __U
, __m256i __A
) {
477 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
478 (__v4df
)_mm256_cvtepi64_pd(__A
),
482 static __inline__ __m256d __DEFAULT_FN_ATTRS256
483 _mm256_maskz_cvtepi64_pd (__mmask8 __U
, __m256i __A
) {
484 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
485 (__v4df
)_mm256_cvtepi64_pd(__A
),
486 (__v4df
)_mm256_setzero_pd());
489 static __inline__ __m128 __DEFAULT_FN_ATTRS128
490 _mm_cvtepi64_ps (__m128i __A
) {
491 return (__m128
) __builtin_ia32_cvtqq2ps128_mask ((__v2di
) __A
,
492 (__v4sf
) _mm_setzero_ps(),
496 static __inline__ __m128 __DEFAULT_FN_ATTRS128
497 _mm_mask_cvtepi64_ps (__m128 __W
, __mmask8 __U
, __m128i __A
) {
498 return (__m128
) __builtin_ia32_cvtqq2ps128_mask ((__v2di
) __A
,
503 static __inline__ __m128 __DEFAULT_FN_ATTRS128
504 _mm_maskz_cvtepi64_ps (__mmask8 __U
, __m128i __A
) {
505 return (__m128
) __builtin_ia32_cvtqq2ps128_mask ((__v2di
) __A
,
506 (__v4sf
) _mm_setzero_ps(),
510 static __inline__ __m128 __DEFAULT_FN_ATTRS256
511 _mm256_cvtepi64_ps (__m256i __A
) {
512 return (__m128
)__builtin_convertvector((__v4di
)__A
, __v4sf
);
515 static __inline__ __m128 __DEFAULT_FN_ATTRS256
516 _mm256_mask_cvtepi64_ps (__m128 __W
, __mmask8 __U
, __m256i __A
) {
517 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
518 (__v4sf
)_mm256_cvtepi64_ps(__A
),
522 static __inline__ __m128 __DEFAULT_FN_ATTRS256
523 _mm256_maskz_cvtepi64_ps (__mmask8 __U
, __m256i __A
) {
524 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
525 (__v4sf
)_mm256_cvtepi64_ps(__A
),
526 (__v4sf
)_mm_setzero_ps());
529 static __inline__ __m128i __DEFAULT_FN_ATTRS128
530 _mm_cvttpd_epi64 (__m128d __A
) {
531 return (__m128i
) __builtin_ia32_cvttpd2qq128_mask ((__v2df
) __A
,
532 (__v2di
) _mm_setzero_si128(),
536 static __inline__ __m128i __DEFAULT_FN_ATTRS128
537 _mm_mask_cvttpd_epi64 (__m128i __W
, __mmask8 __U
, __m128d __A
) {
538 return (__m128i
) __builtin_ia32_cvttpd2qq128_mask ((__v2df
) __A
,
543 static __inline__ __m128i __DEFAULT_FN_ATTRS128
544 _mm_maskz_cvttpd_epi64 (__mmask8 __U
, __m128d __A
) {
545 return (__m128i
) __builtin_ia32_cvttpd2qq128_mask ((__v2df
) __A
,
546 (__v2di
) _mm_setzero_si128(),
550 static __inline__ __m256i __DEFAULT_FN_ATTRS256
551 _mm256_cvttpd_epi64 (__m256d __A
) {
552 return (__m256i
) __builtin_ia32_cvttpd2qq256_mask ((__v4df
) __A
,
553 (__v4di
) _mm256_setzero_si256(),
557 static __inline__ __m256i __DEFAULT_FN_ATTRS256
558 _mm256_mask_cvttpd_epi64 (__m256i __W
, __mmask8 __U
, __m256d __A
) {
559 return (__m256i
) __builtin_ia32_cvttpd2qq256_mask ((__v4df
) __A
,
564 static __inline__ __m256i __DEFAULT_FN_ATTRS256
565 _mm256_maskz_cvttpd_epi64 (__mmask8 __U
, __m256d __A
) {
566 return (__m256i
) __builtin_ia32_cvttpd2qq256_mask ((__v4df
) __A
,
567 (__v4di
) _mm256_setzero_si256(),
571 static __inline__ __m128i __DEFAULT_FN_ATTRS128
572 _mm_cvttpd_epu64 (__m128d __A
) {
573 return (__m128i
) __builtin_ia32_cvttpd2uqq128_mask ((__v2df
) __A
,
574 (__v2di
) _mm_setzero_si128(),
578 static __inline__ __m128i __DEFAULT_FN_ATTRS128
579 _mm_mask_cvttpd_epu64 (__m128i __W
, __mmask8 __U
, __m128d __A
) {
580 return (__m128i
) __builtin_ia32_cvttpd2uqq128_mask ((__v2df
) __A
,
585 static __inline__ __m128i __DEFAULT_FN_ATTRS128
586 _mm_maskz_cvttpd_epu64 (__mmask8 __U
, __m128d __A
) {
587 return (__m128i
) __builtin_ia32_cvttpd2uqq128_mask ((__v2df
) __A
,
588 (__v2di
) _mm_setzero_si128(),
592 static __inline__ __m256i __DEFAULT_FN_ATTRS256
593 _mm256_cvttpd_epu64 (__m256d __A
) {
594 return (__m256i
) __builtin_ia32_cvttpd2uqq256_mask ((__v4df
) __A
,
595 (__v4di
) _mm256_setzero_si256(),
599 static __inline__ __m256i __DEFAULT_FN_ATTRS256
600 _mm256_mask_cvttpd_epu64 (__m256i __W
, __mmask8 __U
, __m256d __A
) {
601 return (__m256i
) __builtin_ia32_cvttpd2uqq256_mask ((__v4df
) __A
,
606 static __inline__ __m256i __DEFAULT_FN_ATTRS256
607 _mm256_maskz_cvttpd_epu64 (__mmask8 __U
, __m256d __A
) {
608 return (__m256i
) __builtin_ia32_cvttpd2uqq256_mask ((__v4df
) __A
,
609 (__v4di
) _mm256_setzero_si256(),
613 static __inline__ __m128i __DEFAULT_FN_ATTRS128
614 _mm_cvttps_epi64 (__m128 __A
) {
615 return (__m128i
) __builtin_ia32_cvttps2qq128_mask ((__v4sf
) __A
,
616 (__v2di
) _mm_setzero_si128(),
620 static __inline__ __m128i __DEFAULT_FN_ATTRS128
621 _mm_mask_cvttps_epi64 (__m128i __W
, __mmask8 __U
, __m128 __A
) {
622 return (__m128i
) __builtin_ia32_cvttps2qq128_mask ((__v4sf
) __A
,
627 static __inline__ __m128i __DEFAULT_FN_ATTRS128
628 _mm_maskz_cvttps_epi64 (__mmask8 __U
, __m128 __A
) {
629 return (__m128i
) __builtin_ia32_cvttps2qq128_mask ((__v4sf
) __A
,
630 (__v2di
) _mm_setzero_si128(),
634 static __inline__ __m256i __DEFAULT_FN_ATTRS256
635 _mm256_cvttps_epi64 (__m128 __A
) {
636 return (__m256i
) __builtin_ia32_cvttps2qq256_mask ((__v4sf
) __A
,
637 (__v4di
) _mm256_setzero_si256(),
641 static __inline__ __m256i __DEFAULT_FN_ATTRS256
642 _mm256_mask_cvttps_epi64 (__m256i __W
, __mmask8 __U
, __m128 __A
) {
643 return (__m256i
) __builtin_ia32_cvttps2qq256_mask ((__v4sf
) __A
,
648 static __inline__ __m256i __DEFAULT_FN_ATTRS256
649 _mm256_maskz_cvttps_epi64 (__mmask8 __U
, __m128 __A
) {
650 return (__m256i
) __builtin_ia32_cvttps2qq256_mask ((__v4sf
) __A
,
651 (__v4di
) _mm256_setzero_si256(),
655 static __inline__ __m128i __DEFAULT_FN_ATTRS128
656 _mm_cvttps_epu64 (__m128 __A
) {
657 return (__m128i
) __builtin_ia32_cvttps2uqq128_mask ((__v4sf
) __A
,
658 (__v2di
) _mm_setzero_si128(),
662 static __inline__ __m128i __DEFAULT_FN_ATTRS128
663 _mm_mask_cvttps_epu64 (__m128i __W
, __mmask8 __U
, __m128 __A
) {
664 return (__m128i
) __builtin_ia32_cvttps2uqq128_mask ((__v4sf
) __A
,
669 static __inline__ __m128i __DEFAULT_FN_ATTRS128
670 _mm_maskz_cvttps_epu64 (__mmask8 __U
, __m128 __A
) {
671 return (__m128i
) __builtin_ia32_cvttps2uqq128_mask ((__v4sf
) __A
,
672 (__v2di
) _mm_setzero_si128(),
676 static __inline__ __m256i __DEFAULT_FN_ATTRS256
677 _mm256_cvttps_epu64 (__m128 __A
) {
678 return (__m256i
) __builtin_ia32_cvttps2uqq256_mask ((__v4sf
) __A
,
679 (__v4di
) _mm256_setzero_si256(),
683 static __inline__ __m256i __DEFAULT_FN_ATTRS256
684 _mm256_mask_cvttps_epu64 (__m256i __W
, __mmask8 __U
, __m128 __A
) {
685 return (__m256i
) __builtin_ia32_cvttps2uqq256_mask ((__v4sf
) __A
,
690 static __inline__ __m256i __DEFAULT_FN_ATTRS256
691 _mm256_maskz_cvttps_epu64 (__mmask8 __U
, __m128 __A
) {
692 return (__m256i
) __builtin_ia32_cvttps2uqq256_mask ((__v4sf
) __A
,
693 (__v4di
) _mm256_setzero_si256(),
697 static __inline__ __m128d __DEFAULT_FN_ATTRS128
698 _mm_cvtepu64_pd (__m128i __A
) {
699 return (__m128d
)__builtin_convertvector((__v2du
)__A
, __v2df
);
702 static __inline__ __m128d __DEFAULT_FN_ATTRS128
703 _mm_mask_cvtepu64_pd (__m128d __W
, __mmask8 __U
, __m128i __A
) {
704 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
705 (__v2df
)_mm_cvtepu64_pd(__A
),
709 static __inline__ __m128d __DEFAULT_FN_ATTRS128
710 _mm_maskz_cvtepu64_pd (__mmask8 __U
, __m128i __A
) {
711 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
712 (__v2df
)_mm_cvtepu64_pd(__A
),
713 (__v2df
)_mm_setzero_pd());
716 static __inline__ __m256d __DEFAULT_FN_ATTRS256
717 _mm256_cvtepu64_pd (__m256i __A
) {
718 return (__m256d
)__builtin_convertvector((__v4du
)__A
, __v4df
);
721 static __inline__ __m256d __DEFAULT_FN_ATTRS256
722 _mm256_mask_cvtepu64_pd (__m256d __W
, __mmask8 __U
, __m256i __A
) {
723 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
724 (__v4df
)_mm256_cvtepu64_pd(__A
),
728 static __inline__ __m256d __DEFAULT_FN_ATTRS256
729 _mm256_maskz_cvtepu64_pd (__mmask8 __U
, __m256i __A
) {
730 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
731 (__v4df
)_mm256_cvtepu64_pd(__A
),
732 (__v4df
)_mm256_setzero_pd());
735 static __inline__ __m128 __DEFAULT_FN_ATTRS128
736 _mm_cvtepu64_ps (__m128i __A
) {
737 return (__m128
) __builtin_ia32_cvtuqq2ps128_mask ((__v2di
) __A
,
738 (__v4sf
) _mm_setzero_ps(),
742 static __inline__ __m128 __DEFAULT_FN_ATTRS128
743 _mm_mask_cvtepu64_ps (__m128 __W
, __mmask8 __U
, __m128i __A
) {
744 return (__m128
) __builtin_ia32_cvtuqq2ps128_mask ((__v2di
) __A
,
749 static __inline__ __m128 __DEFAULT_FN_ATTRS128
750 _mm_maskz_cvtepu64_ps (__mmask8 __U
, __m128i __A
) {
751 return (__m128
) __builtin_ia32_cvtuqq2ps128_mask ((__v2di
) __A
,
752 (__v4sf
) _mm_setzero_ps(),
756 static __inline__ __m128 __DEFAULT_FN_ATTRS256
757 _mm256_cvtepu64_ps (__m256i __A
) {
758 return (__m128
)__builtin_convertvector((__v4du
)__A
, __v4sf
);
761 static __inline__ __m128 __DEFAULT_FN_ATTRS256
762 _mm256_mask_cvtepu64_ps (__m128 __W
, __mmask8 __U
, __m256i __A
) {
763 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
764 (__v4sf
)_mm256_cvtepu64_ps(__A
),
768 static __inline__ __m128 __DEFAULT_FN_ATTRS256
769 _mm256_maskz_cvtepu64_ps (__mmask8 __U
, __m256i __A
) {
770 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
771 (__v4sf
)_mm256_cvtepu64_ps(__A
),
772 (__v4sf
)_mm_setzero_ps());
775 #define _mm_range_pd(A, B, C) \
776 ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
777 (__v2df)(__m128d)(B), (int)(C), \
778 (__v2df)_mm_setzero_pd(), \
781 #define _mm_mask_range_pd(W, U, A, B, C) \
782 ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
783 (__v2df)(__m128d)(B), (int)(C), \
784 (__v2df)(__m128d)(W), \
787 #define _mm_maskz_range_pd(U, A, B, C) \
788 ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
789 (__v2df)(__m128d)(B), (int)(C), \
790 (__v2df)_mm_setzero_pd(), \
793 #define _mm256_range_pd(A, B, C) \
794 ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
795 (__v4df)(__m256d)(B), (int)(C), \
796 (__v4df)_mm256_setzero_pd(), \
799 #define _mm256_mask_range_pd(W, U, A, B, C) \
800 ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
801 (__v4df)(__m256d)(B), (int)(C), \
802 (__v4df)(__m256d)(W), \
805 #define _mm256_maskz_range_pd(U, A, B, C) \
806 ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
807 (__v4df)(__m256d)(B), (int)(C), \
808 (__v4df)_mm256_setzero_pd(), \
811 #define _mm_range_ps(A, B, C) \
812 ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
813 (__v4sf)(__m128)(B), (int)(C), \
814 (__v4sf)_mm_setzero_ps(), \
817 #define _mm_mask_range_ps(W, U, A, B, C) \
818 ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
819 (__v4sf)(__m128)(B), (int)(C), \
820 (__v4sf)(__m128)(W), (__mmask8)(U)))
822 #define _mm_maskz_range_ps(U, A, B, C) \
823 ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
824 (__v4sf)(__m128)(B), (int)(C), \
825 (__v4sf)_mm_setzero_ps(), \
828 #define _mm256_range_ps(A, B, C) \
829 ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
830 (__v8sf)(__m256)(B), (int)(C), \
831 (__v8sf)_mm256_setzero_ps(), \
834 #define _mm256_mask_range_ps(W, U, A, B, C) \
835 ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
836 (__v8sf)(__m256)(B), (int)(C), \
837 (__v8sf)(__m256)(W), (__mmask8)(U)))
839 #define _mm256_maskz_range_ps(U, A, B, C) \
840 ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
841 (__v8sf)(__m256)(B), (int)(C), \
842 (__v8sf)_mm256_setzero_ps(), \
845 #define _mm_reduce_pd(A, B) \
846 ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
847 (__v2df)_mm_setzero_pd(), \
850 #define _mm_mask_reduce_pd(W, U, A, B) \
851 ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
852 (__v2df)(__m128d)(W), \
855 #define _mm_maskz_reduce_pd(U, A, B) \
856 ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
857 (__v2df)_mm_setzero_pd(), \
860 #define _mm256_reduce_pd(A, B) \
861 ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
862 (__v4df)_mm256_setzero_pd(), \
865 #define _mm256_mask_reduce_pd(W, U, A, B) \
866 ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
867 (__v4df)(__m256d)(W), \
870 #define _mm256_maskz_reduce_pd(U, A, B) \
871 ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
872 (__v4df)_mm256_setzero_pd(), \
875 #define _mm_reduce_ps(A, B) \
876 ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
877 (__v4sf)_mm_setzero_ps(), \
880 #define _mm_mask_reduce_ps(W, U, A, B) \
881 ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
882 (__v4sf)(__m128)(W), \
885 #define _mm_maskz_reduce_ps(U, A, B) \
886 ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
887 (__v4sf)_mm_setzero_ps(), \
890 #define _mm256_reduce_ps(A, B) \
891 ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
892 (__v8sf)_mm256_setzero_ps(), \
895 #define _mm256_mask_reduce_ps(W, U, A, B) \
896 ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
897 (__v8sf)(__m256)(W), \
900 #define _mm256_maskz_reduce_ps(U, A, B) \
901 ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
902 (__v8sf)_mm256_setzero_ps(), \
905 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
906 _mm_movepi32_mask (__m128i __A
)
908 return (__mmask8
) __builtin_ia32_cvtd2mask128 ((__v4si
) __A
);
911 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
912 _mm256_movepi32_mask (__m256i __A
)
914 return (__mmask8
) __builtin_ia32_cvtd2mask256 ((__v8si
) __A
);
917 static __inline__ __m128i __DEFAULT_FN_ATTRS128
918 _mm_movm_epi32 (__mmask8 __A
)
920 return (__m128i
) __builtin_ia32_cvtmask2d128 (__A
);
923 static __inline__ __m256i __DEFAULT_FN_ATTRS256
924 _mm256_movm_epi32 (__mmask8 __A
)
926 return (__m256i
) __builtin_ia32_cvtmask2d256 (__A
);
929 static __inline__ __m128i __DEFAULT_FN_ATTRS128
930 _mm_movm_epi64 (__mmask8 __A
)
932 return (__m128i
) __builtin_ia32_cvtmask2q128 (__A
);
935 static __inline__ __m256i __DEFAULT_FN_ATTRS256
936 _mm256_movm_epi64 (__mmask8 __A
)
938 return (__m256i
) __builtin_ia32_cvtmask2q256 (__A
);
941 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
942 _mm_movepi64_mask (__m128i __A
)
944 return (__mmask8
) __builtin_ia32_cvtq2mask128 ((__v2di
) __A
);
947 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
948 _mm256_movepi64_mask (__m256i __A
)
950 return (__mmask8
) __builtin_ia32_cvtq2mask256 ((__v4di
) __A
);
953 static __inline__ __m256 __DEFAULT_FN_ATTRS256
954 _mm256_broadcast_f32x2 (__m128 __A
)
956 return (__m256
)__builtin_shufflevector((__v4sf
)__A
, (__v4sf
)__A
,
957 0, 1, 0, 1, 0, 1, 0, 1);
960 static __inline__ __m256 __DEFAULT_FN_ATTRS256
961 _mm256_mask_broadcast_f32x2 (__m256 __O
, __mmask8 __M
, __m128 __A
)
963 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__M
,
964 (__v8sf
)_mm256_broadcast_f32x2(__A
),
968 static __inline__ __m256 __DEFAULT_FN_ATTRS256
969 _mm256_maskz_broadcast_f32x2 (__mmask8 __M
, __m128 __A
)
971 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__M
,
972 (__v8sf
)_mm256_broadcast_f32x2(__A
),
973 (__v8sf
)_mm256_setzero_ps());
976 static __inline__ __m256d __DEFAULT_FN_ATTRS256
977 _mm256_broadcast_f64x2(__m128d __A
)
979 return (__m256d
)__builtin_shufflevector((__v2df
)__A
, (__v2df
)__A
,
983 static __inline__ __m256d __DEFAULT_FN_ATTRS256
984 _mm256_mask_broadcast_f64x2(__m256d __O
, __mmask8 __M
, __m128d __A
)
986 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__M
,
987 (__v4df
)_mm256_broadcast_f64x2(__A
),
991 static __inline__ __m256d __DEFAULT_FN_ATTRS256
992 _mm256_maskz_broadcast_f64x2 (__mmask8 __M
, __m128d __A
)
994 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__M
,
995 (__v4df
)_mm256_broadcast_f64x2(__A
),
996 (__v4df
)_mm256_setzero_pd());
999 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1000 _mm_broadcast_i32x2 (__m128i __A
)
1002 return (__m128i
)__builtin_shufflevector((__v4si
)__A
, (__v4si
)__A
,
1006 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1007 _mm_mask_broadcast_i32x2 (__m128i __O
, __mmask8 __M
, __m128i __A
)
1009 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__M
,
1010 (__v4si
)_mm_broadcast_i32x2(__A
),
1014 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1015 _mm_maskz_broadcast_i32x2 (__mmask8 __M
, __m128i __A
)
1017 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__M
,
1018 (__v4si
)_mm_broadcast_i32x2(__A
),
1019 (__v4si
)_mm_setzero_si128());
1022 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1023 _mm256_broadcast_i32x2 (__m128i __A
)
1025 return (__m256i
)__builtin_shufflevector((__v4si
)__A
, (__v4si
)__A
,
1026 0, 1, 0, 1, 0, 1, 0, 1);
1029 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1030 _mm256_mask_broadcast_i32x2 (__m256i __O
, __mmask8 __M
, __m128i __A
)
1032 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__M
,
1033 (__v8si
)_mm256_broadcast_i32x2(__A
),
1037 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1038 _mm256_maskz_broadcast_i32x2 (__mmask8 __M
, __m128i __A
)
1040 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__M
,
1041 (__v8si
)_mm256_broadcast_i32x2(__A
),
1042 (__v8si
)_mm256_setzero_si256());
1045 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1046 _mm256_broadcast_i64x2(__m128i __A
)
1048 return (__m256i
)__builtin_shufflevector((__v2di
)__A
, (__v2di
)__A
,
1052 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1053 _mm256_mask_broadcast_i64x2(__m256i __O
, __mmask8 __M
, __m128i __A
)
1055 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__M
,
1056 (__v4di
)_mm256_broadcast_i64x2(__A
),
1060 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1061 _mm256_maskz_broadcast_i64x2 (__mmask8 __M
, __m128i __A
)
1063 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__M
,
1064 (__v4di
)_mm256_broadcast_i64x2(__A
),
1065 (__v4di
)_mm256_setzero_si256());
1068 #define _mm256_extractf64x2_pd(A, imm) \
1069 ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1071 (__v2df)_mm_undefined_pd(), \
1074 #define _mm256_mask_extractf64x2_pd(W, U, A, imm) \
1075 ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1077 (__v2df)(__m128d)(W), \
1080 #define _mm256_maskz_extractf64x2_pd(U, A, imm) \
1081 ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1083 (__v2df)_mm_setzero_pd(), \
1086 #define _mm256_extracti64x2_epi64(A, imm) \
1087 ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1089 (__v2di)_mm_undefined_si128(), \
1092 #define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \
1093 ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1095 (__v2di)(__m128i)(W), \
1098 #define _mm256_maskz_extracti64x2_epi64(U, A, imm) \
1099 ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1101 (__v2di)_mm_setzero_si128(), \
1104 #define _mm256_insertf64x2(A, B, imm) \
1105 ((__m256d)__builtin_ia32_insertf64x2_256((__v4df)(__m256d)(A), \
1106 (__v2df)(__m128d)(B), (int)(imm)))
1108 #define _mm256_mask_insertf64x2(W, U, A, B, imm) \
1109 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
1110 (__v4df)_mm256_insertf64x2((A), (B), (imm)), \
1111 (__v4df)(__m256d)(W)))
1113 #define _mm256_maskz_insertf64x2(U, A, B, imm) \
1114 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
1115 (__v4df)_mm256_insertf64x2((A), (B), (imm)), \
1116 (__v4df)_mm256_setzero_pd()))
1118 #define _mm256_inserti64x2(A, B, imm) \
1119 ((__m256i)__builtin_ia32_inserti64x2_256((__v4di)(__m256i)(A), \
1120 (__v2di)(__m128i)(B), (int)(imm)))
1122 #define _mm256_mask_inserti64x2(W, U, A, B, imm) \
1123 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
1124 (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
1125 (__v4di)(__m256i)(W)))
1127 #define _mm256_maskz_inserti64x2(U, A, B, imm) \
1128 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
1129 (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
1130 (__v4di)_mm256_setzero_si256()))
1132 #define _mm_mask_fpclass_pd_mask(U, A, imm) \
1133 ((__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
1136 #define _mm_fpclass_pd_mask(A, imm) \
1137 ((__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
1140 #define _mm256_mask_fpclass_pd_mask(U, A, imm) \
1141 ((__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
1144 #define _mm256_fpclass_pd_mask(A, imm) \
1145 ((__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
1148 #define _mm_mask_fpclass_ps_mask(U, A, imm) \
1149 ((__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
1152 #define _mm_fpclass_ps_mask(A, imm) \
1153 ((__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
1156 #define _mm256_mask_fpclass_ps_mask(U, A, imm) \
1157 ((__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
1160 #define _mm256_fpclass_ps_mask(A, imm) \
1161 ((__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
1164 #undef __DEFAULT_FN_ATTRS128
1165 #undef __DEFAULT_FN_ATTRS256