libcpp, c, middle-end: Optimize initializers using #embed in C
[official-gcc.git] / gcc / config / i386 / avx512fintrin.h
blob3b6749de1c0c8c0ccf09e55d95c68d880cfafd23
1 /* Copyright (C) 2013-2024 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
26 #endif
28 #ifndef _AVX512FINTRIN_H_INCLUDED
29 #define _AVX512FINTRIN_H_INCLUDED
31 #if !defined (__AVX512F__) || defined (__EVEX512__)
32 #pragma GCC push_options
33 #pragma GCC target("avx512f,no-evex512")
34 #define __DISABLE_AVX512F__
35 #endif /* __AVX512F__ */
37 typedef unsigned char __mmask8;
38 typedef unsigned short __mmask16;
39 typedef unsigned int __mmask32;
41 /* Constants for mantissa extraction */
42 typedef enum
44 _MM_MANT_NORM_1_2, /* interval [1, 2) */
45 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
46 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
47 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
48 } _MM_MANTISSA_NORM_ENUM;
50 typedef enum
52 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
53 _MM_MANT_SIGN_zero, /* sign = 0 */
54 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
55 } _MM_MANTISSA_SIGN_ENUM;
57 /* These _mm{,256}_avx512* intrins are duplicated from their _mm{,256}_* forms
58 from AVX2 or before. We need to add them to prevent target option mismatch
59 when calling AVX512 intrins implemented with these intrins under no-evex512
60 function attribute. All AVX512 intrins calling those AVX2 intrins or
61 before will change their calls to these AVX512 version. */
62 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
63 _mm_avx512_undefined_ps (void)
65 #pragma GCC diagnostic push
66 #pragma GCC diagnostic ignored "-Winit-self"
67 __m128 __Y = __Y;
68 #pragma GCC diagnostic pop
69 return __Y;
72 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
73 _mm_avx512_undefined_pd (void)
75 #pragma GCC diagnostic push
76 #pragma GCC diagnostic ignored "-Winit-self"
77 __m128d __Y = __Y;
78 #pragma GCC diagnostic pop
79 return __Y;
82 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
83 _mm_avx512_setzero_ps (void)
85 return __extension__ (__m128){ 0.0f, 0.0f, 0.0f, 0.0f };
88 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
89 _mm_avx512_setzero_pd (void)
91 return __extension__ (__m128d){ 0.0, 0.0 };
94 #ifdef __OPTIMIZE__
95 extern __inline __m128d
96 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
97 _mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
99 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
100 (__v2df) __B,
101 __R);
104 extern __inline __m128d
105 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
106 _mm_mask_add_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
107 __m128d __B, const int __R)
109 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
110 (__v2df) __B,
111 (__v2df) __W,
112 (__mmask8) __U, __R);
115 extern __inline __m128d
116 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
117 _mm_maskz_add_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
118 const int __R)
120 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
121 (__v2df) __B,
122 (__v2df)
123 _mm_avx512_setzero_pd (),
124 (__mmask8) __U, __R);
127 extern __inline __m128
128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
129 _mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
131 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
132 (__v4sf) __B,
133 __R);
136 extern __inline __m128
137 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
138 _mm_mask_add_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
139 __m128 __B, const int __R)
141 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
142 (__v4sf) __B,
143 (__v4sf) __W,
144 (__mmask8) __U, __R);
147 extern __inline __m128
148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
149 _mm_maskz_add_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
150 const int __R)
152 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
153 (__v4sf) __B,
154 (__v4sf)
155 _mm_avx512_setzero_ps (),
156 (__mmask8) __U, __R);
159 extern __inline __m128d
160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
161 _mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
163 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
164 (__v2df) __B,
165 __R);
168 extern __inline __m128d
169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
170 _mm_mask_sub_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
171 __m128d __B, const int __R)
173 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
174 (__v2df) __B,
175 (__v2df) __W,
176 (__mmask8) __U, __R);
179 extern __inline __m128d
180 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
181 _mm_maskz_sub_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
182 const int __R)
184 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
185 (__v2df) __B,
186 (__v2df)
187 _mm_avx512_setzero_pd (),
188 (__mmask8) __U, __R);
191 extern __inline __m128
192 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
193 _mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
195 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
196 (__v4sf) __B,
197 __R);
200 extern __inline __m128
201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
202 _mm_mask_sub_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
203 __m128 __B, const int __R)
205 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
206 (__v4sf) __B,
207 (__v4sf) __W,
208 (__mmask8) __U, __R);
211 extern __inline __m128
212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
213 _mm_maskz_sub_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
214 const int __R)
216 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
217 (__v4sf) __B,
218 (__v4sf)
219 _mm_avx512_setzero_ps (),
220 (__mmask8) __U, __R);
223 #else
224 #define _mm_add_round_sd(A, B, C) \
225 (__m128d)__builtin_ia32_addsd_round(A, B, C)
227 #define _mm_mask_add_round_sd(W, U, A, B, C) \
228 (__m128d)__builtin_ia32_addsd_mask_round(A, B, W, U, C)
230 #define _mm_maskz_add_round_sd(U, A, B, C) \
231 (__m128d)__builtin_ia32_addsd_mask_round(A, B, (__v2df)_mm_avx512_setzero_pd(), U, C)
233 #define _mm_add_round_ss(A, B, C) \
234 (__m128)__builtin_ia32_addss_round(A, B, C)
236 #define _mm_mask_add_round_ss(W, U, A, B, C) \
237 (__m128)__builtin_ia32_addss_mask_round(A, B, W, U, C)
239 #define _mm_maskz_add_round_ss(U, A, B, C) \
240 (__m128)__builtin_ia32_addss_mask_round(A, B, (__v4sf)_mm_avx512_setzero_ps(), U, C)
242 #define _mm_sub_round_sd(A, B, C) \
243 (__m128d)__builtin_ia32_subsd_round(A, B, C)
245 #define _mm_mask_sub_round_sd(W, U, A, B, C) \
246 (__m128d)__builtin_ia32_subsd_mask_round(A, B, W, U, C)
248 #define _mm_maskz_sub_round_sd(U, A, B, C) \
249 (__m128d)__builtin_ia32_subsd_mask_round(A, B, (__v2df)_mm_avx512_setzero_pd(), U, C)
251 #define _mm_sub_round_ss(A, B, C) \
252 (__m128)__builtin_ia32_subss_round(A, B, C)
254 #define _mm_mask_sub_round_ss(W, U, A, B, C) \
255 (__m128)__builtin_ia32_subss_mask_round(A, B, W, U, C)
257 #define _mm_maskz_sub_round_ss(U, A, B, C) \
258 (__m128)__builtin_ia32_subss_mask_round(A, B, (__v4sf)_mm_avx512_setzero_ps(), U, C)
260 #endif
262 extern __inline __m128d
263 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
264 _mm_rcp14_sd (__m128d __A, __m128d __B)
266 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
267 (__v2df) __A);
270 extern __inline __m128d
271 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
272 _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
274 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
275 (__v2df) __A,
276 (__v2df) __W,
277 (__mmask8) __U);
280 extern __inline __m128d
281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
282 _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
284 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
285 (__v2df) __A,
286 (__v2df) _mm_avx512_setzero_ps (),
287 (__mmask8) __U);
290 extern __inline __m128
291 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
292 _mm_rcp14_ss (__m128 __A, __m128 __B)
294 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
295 (__v4sf) __A);
298 extern __inline __m128
299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
300 _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
302 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
303 (__v4sf) __A,
304 (__v4sf) __W,
305 (__mmask8) __U);
308 extern __inline __m128
309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
310 _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
312 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
313 (__v4sf) __A,
314 (__v4sf) _mm_avx512_setzero_ps (),
315 (__mmask8) __U);
318 extern __inline __m128d
319 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
320 _mm_rsqrt14_sd (__m128d __A, __m128d __B)
322 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
323 (__v2df) __A);
326 extern __inline __m128d
327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
328 _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
330 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
331 (__v2df) __A,
332 (__v2df) __W,
333 (__mmask8) __U);
336 extern __inline __m128d
337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
338 _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
340 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
341 (__v2df) __A,
342 (__v2df) _mm_avx512_setzero_pd (),
343 (__mmask8) __U);
346 extern __inline __m128
347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
348 _mm_rsqrt14_ss (__m128 __A, __m128 __B)
350 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
351 (__v4sf) __A);
354 extern __inline __m128
355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
356 _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
358 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
359 (__v4sf) __A,
360 (__v4sf) __W,
361 (__mmask8) __U);
364 extern __inline __m128
365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
366 _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
368 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
369 (__v4sf) __A,
370 (__v4sf) _mm_avx512_setzero_ps (),
371 (__mmask8) __U);
374 #ifdef __OPTIMIZE__
375 extern __inline __m128d
376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
377 _mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
379 return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
380 (__v2df) __A,
381 (__v2df)
382 _mm_avx512_setzero_pd (),
383 (__mmask8) -1, __R);
386 extern __inline __m128d
387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
388 _mm_mask_sqrt_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
389 const int __R)
391 return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
392 (__v2df) __A,
393 (__v2df) __W,
394 (__mmask8) __U, __R);
397 extern __inline __m128d
398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
399 _mm_maskz_sqrt_round_sd (__mmask8 __U, __m128d __A, __m128d __B, const int __R)
401 return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
402 (__v2df) __A,
403 (__v2df)
404 _mm_avx512_setzero_pd (),
405 (__mmask8) __U, __R);
408 extern __inline __m128
409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
410 _mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
412 return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
413 (__v4sf) __A,
414 (__v4sf)
415 _mm_avx512_setzero_ps (),
416 (__mmask8) -1, __R);
419 extern __inline __m128
420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
421 _mm_mask_sqrt_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
422 const int __R)
424 return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
425 (__v4sf) __A,
426 (__v4sf) __W,
427 (__mmask8) __U, __R);
430 extern __inline __m128
431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
432 _mm_maskz_sqrt_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
434 return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
435 (__v4sf) __A,
436 (__v4sf)
437 _mm_avx512_setzero_ps (),
438 (__mmask8) __U, __R);
441 extern __inline __m128d
442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
443 _mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
445 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
446 (__v2df) __B,
447 __R);
450 extern __inline __m128d
451 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
452 _mm_mask_mul_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
453 __m128d __B, const int __R)
455 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
456 (__v2df) __B,
457 (__v2df) __W,
458 (__mmask8) __U, __R);
461 extern __inline __m128d
462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
463 _mm_maskz_mul_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
464 const int __R)
466 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
467 (__v2df) __B,
468 (__v2df)
469 _mm_avx512_setzero_pd (),
470 (__mmask8) __U, __R);
473 extern __inline __m128
474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
475 _mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
477 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
478 (__v4sf) __B,
479 __R);
482 extern __inline __m128
483 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
484 _mm_mask_mul_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
485 __m128 __B, const int __R)
487 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
488 (__v4sf) __B,
489 (__v4sf) __W,
490 (__mmask8) __U, __R);
493 extern __inline __m128
494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
495 _mm_maskz_mul_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
496 const int __R)
498 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
499 (__v4sf) __B,
500 (__v4sf)
501 _mm_avx512_setzero_ps (),
502 (__mmask8) __U, __R);
505 extern __inline __m128d
506 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
507 _mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
509 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
510 (__v2df) __B,
511 __R);
514 extern __inline __m128d
515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
516 _mm_mask_div_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
517 __m128d __B, const int __R)
519 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
520 (__v2df) __B,
521 (__v2df) __W,
522 (__mmask8) __U, __R);
525 extern __inline __m128d
526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
527 _mm_maskz_div_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
528 const int __R)
530 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
531 (__v2df) __B,
532 (__v2df)
533 _mm_avx512_setzero_pd (),
534 (__mmask8) __U, __R);
537 extern __inline __m128
538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
539 _mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
541 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
542 (__v4sf) __B,
543 __R);
546 extern __inline __m128
547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
548 _mm_mask_div_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
549 __m128 __B, const int __R)
551 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
552 (__v4sf) __B,
553 (__v4sf) __W,
554 (__mmask8) __U, __R);
557 extern __inline __m128
558 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
559 _mm_maskz_div_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
560 const int __R)
562 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
563 (__v4sf) __B,
564 (__v4sf)
565 _mm_avx512_setzero_ps (),
566 (__mmask8) __U, __R);
569 extern __inline __m128d
570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
571 _mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
573 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
574 (__v2df) __B,
575 (__v2df)
576 _mm_avx512_setzero_pd (),
577 (__mmask8) -1, __R);
580 extern __inline __m128d
581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
582 _mm_mask_scalef_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
583 const int __R)
585 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
586 (__v2df) __B,
587 (__v2df) __W,
588 (__mmask8) __U, __R);
591 extern __inline __m128d
592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
593 _mm_maskz_scalef_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
594 const int __R)
596 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
597 (__v2df) __B,
598 (__v2df)
599 _mm_avx512_setzero_pd (),
600 (__mmask8) __U, __R);
603 extern __inline __m128
604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
605 _mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
607 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
608 (__v4sf) __B,
609 (__v4sf)
610 _mm_avx512_setzero_ps (),
611 (__mmask8) -1, __R);
614 extern __inline __m128
615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
616 _mm_mask_scalef_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
617 const int __R)
619 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
620 (__v4sf) __B,
621 (__v4sf) __W,
622 (__mmask8) __U, __R);
625 extern __inline __m128
626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
627 _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
629 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
630 (__v4sf) __B,
631 (__v4sf)
632 _mm_avx512_setzero_ps (),
633 (__mmask8) __U, __R);
635 #else
636 #define _mm_sqrt_round_sd(A, B, C) \
637 (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \
638 (__v2df) _mm_avx512_setzero_pd (), -1, C)
640 #define _mm_mask_sqrt_round_sd(W, U, A, B, C) \
641 (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, W, U, C)
643 #define _mm_maskz_sqrt_round_sd(U, A, B, C) \
644 (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \
645 (__v2df) _mm_avx512_setzero_pd (), U, C)
647 #define _mm_sqrt_round_ss(A, B, C) \
648 (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \
649 (__v4sf) _mm_avx512_setzero_ps (), -1, C)
651 #define _mm_mask_sqrt_round_ss(W, U, A, B, C) \
652 (__m128)__builtin_ia32_sqrtss_mask_round (B, A, W, U, C)
654 #define _mm_maskz_sqrt_round_ss(U, A, B, C) \
655 (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \
656 (__v4sf) _mm_avx512_setzero_ps (), U, C)
658 #define _mm_mul_round_sd(A, B, C) \
659 (__m128d)__builtin_ia32_mulsd_round(A, B, C)
661 #define _mm_mask_mul_round_sd(W, U, A, B, C) \
662 (__m128d)__builtin_ia32_mulsd_mask_round(A, B, W, U, C)
664 #define _mm_maskz_mul_round_sd(U, A, B, C) \
665 (__m128d)__builtin_ia32_mulsd_mask_round(A, B, (__v2df)_mm_avx512_setzero_pd(), U, C)
667 #define _mm_mul_round_ss(A, B, C) \
668 (__m128)__builtin_ia32_mulss_round(A, B, C)
670 #define _mm_mask_mul_round_ss(W, U, A, B, C) \
671 (__m128)__builtin_ia32_mulss_mask_round(A, B, W, U, C)
673 #define _mm_maskz_mul_round_ss(U, A, B, C) \
674 (__m128)__builtin_ia32_mulss_mask_round(A, B, (__v4sf)_mm_avx512_setzero_ps(), U, C)
676 #define _mm_div_round_sd(A, B, C) \
677 (__m128d)__builtin_ia32_divsd_round(A, B, C)
679 #define _mm_mask_div_round_sd(W, U, A, B, C) \
680 (__m128d)__builtin_ia32_divsd_mask_round(A, B, W, U, C)
682 #define _mm_maskz_div_round_sd(U, A, B, C) \
683 (__m128d)__builtin_ia32_divsd_mask_round(A, B, (__v2df)_mm_avx512_setzero_pd(), U, C)
685 #define _mm_div_round_ss(A, B, C) \
686 (__m128)__builtin_ia32_divss_round(A, B, C)
688 #define _mm_mask_div_round_ss(W, U, A, B, C) \
689 (__m128)__builtin_ia32_divss_mask_round(A, B, W, U, C)
691 #define _mm_maskz_div_round_ss(U, A, B, C) \
692 (__m128)__builtin_ia32_divss_mask_round(A, B, (__v4sf)_mm_avx512_setzero_ps(), U, C)
694 #define _mm_scalef_round_sd(A, B, C) \
695 ((__m128d) \
696 __builtin_ia32_scalefsd_mask_round ((A), (B), \
697 (__v2df) _mm_avx512_undefined_pd (), \
698 -1, (C)))
700 #define _mm_scalef_round_ss(A, B, C) \
701 ((__m128) \
702 __builtin_ia32_scalefss_mask_round ((A), (B), \
703 (__v4sf) _mm_avx512_undefined_ps (), \
704 -1, (C)))
706 #define _mm_mask_scalef_round_sd(W, U, A, B, C) \
707 ((__m128d) \
708 __builtin_ia32_scalefsd_mask_round ((A), (B), (W), (U), (C)))
710 #define _mm_mask_scalef_round_ss(W, U, A, B, C) \
711 ((__m128) \
712 __builtin_ia32_scalefss_mask_round ((A), (B), (W), (U), (C)))
714 #define _mm_maskz_scalef_round_sd(U, A, B, C) \
715 ((__m128d) \
716 __builtin_ia32_scalefsd_mask_round ((A), (B), \
717 (__v2df) _mm_avx512_setzero_pd (), \
718 (U), (C)))
720 #define _mm_maskz_scalef_round_ss(U, A, B, C) \
721 ((__m128) \
722 __builtin_ia32_scalefss_mask_round ((A), (B), \
723 (__v4sf) _mm_avx512_setzero_ps (), \
724 (U), (C)))
725 #endif
727 #define _mm_mask_sqrt_sd(W, U, A, B) \
728 _mm_mask_sqrt_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
730 #define _mm_maskz_sqrt_sd(U, A, B) \
731 _mm_maskz_sqrt_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
733 #define _mm_mask_sqrt_ss(W, U, A, B) \
734 _mm_mask_sqrt_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
736 #define _mm_maskz_sqrt_ss(U, A, B) \
737 _mm_maskz_sqrt_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
739 #define _mm_mask_scalef_sd(W, U, A, B) \
740 _mm_mask_scalef_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
742 #define _mm_maskz_scalef_sd(U, A, B) \
743 _mm_maskz_scalef_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
745 #define _mm_mask_scalef_ss(W, U, A, B) \
746 _mm_mask_scalef_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
748 #define _mm_maskz_scalef_ss(U, A, B) \
749 _mm_maskz_scalef_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
751 extern __inline __m128d
752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
753 _mm_cvtu32_sd (__m128d __A, unsigned __B)
755 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B);
758 #ifdef __x86_64__
759 #ifdef __OPTIMIZE__
760 extern __inline __m128d
761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
762 _mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R)
764 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R);
767 extern __inline __m128d
768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
769 _mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R)
771 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
774 extern __inline __m128d
775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
776 _mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R)
778 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R);
780 #else
781 #define _mm_cvt_roundu64_sd(A, B, C) \
782 (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C)
784 #define _mm_cvt_roundi64_sd(A, B, C) \
785 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
787 #define _mm_cvt_roundsi64_sd(A, B, C) \
788 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C)
789 #endif
791 #endif
793 #ifdef __OPTIMIZE__
794 extern __inline __m128
795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
796 _mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R)
798 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R);
801 extern __inline __m128
802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
803 _mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R)
805 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
808 extern __inline __m128
809 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
810 _mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R)
812 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R);
814 #else
815 #define _mm_cvt_roundu32_ss(A, B, C) \
816 (__m128)__builtin_ia32_cvtusi2ss32(A, B, C)
818 #define _mm_cvt_roundi32_ss(A, B, C) \
819 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
821 #define _mm_cvt_roundsi32_ss(A, B, C) \
822 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C)
823 #endif
825 #ifdef __x86_64__
826 #ifdef __OPTIMIZE__
827 extern __inline __m128
828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
829 _mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R)
831 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R);
834 extern __inline __m128
835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
836 _mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R)
838 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
841 extern __inline __m128
842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
843 _mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R)
845 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R);
847 #else
848 #define _mm_cvt_roundu64_ss(A, B, C) \
849 (__m128)__builtin_ia32_cvtusi2ss64(A, B, C)
851 #define _mm_cvt_roundi64_ss(A, B, C) \
852 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
854 #define _mm_cvt_roundsi64_ss(A, B, C) \
855 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C)
856 #endif
858 #endif
860 extern __inline __m128
861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
862 _mm_mask_load_ss (__m128 __W, __mmask8 __U, const float *__P)
864 return (__m128) __builtin_ia32_loadss_mask (__P, (__v4sf) __W, __U);
867 extern __inline __m128
868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
869 _mm_maskz_load_ss (__mmask8 __U, const float *__P)
871 return (__m128) __builtin_ia32_loadss_mask (__P, (__v4sf) _mm_avx512_setzero_ps (),
872 __U);
875 extern __inline __m128d
876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
877 _mm_mask_load_sd (__m128d __W, __mmask8 __U, const double *__P)
879 return (__m128d) __builtin_ia32_loadsd_mask (__P, (__v2df) __W, __U);
882 extern __inline __m128d
883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
884 _mm_maskz_load_sd (__mmask8 __U, const double *__P)
886 return (__m128d) __builtin_ia32_loadsd_mask (__P, (__v2df) _mm_avx512_setzero_pd (),
887 __U);
890 extern __inline __m128
891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
892 _mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
894 return (__m128) __builtin_ia32_movess_mask ((__v4sf) __A, (__v4sf) __B,
895 (__v4sf) __W, __U);
898 extern __inline __m128
899 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
900 _mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
902 return (__m128) __builtin_ia32_movess_mask ((__v4sf) __A, (__v4sf) __B,
903 (__v4sf) _mm_avx512_setzero_ps (), __U);
906 extern __inline __m128d
907 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
908 _mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
910 return (__m128d) __builtin_ia32_movesd_mask ((__v2df) __A, (__v2df) __B,
911 (__v2df) __W, __U);
914 extern __inline __m128d
915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
916 _mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
918 return (__m128d) __builtin_ia32_movesd_mask ((__v2df) __A, (__v2df) __B,
919 (__v2df) _mm_avx512_setzero_pd (),
920 __U);
923 extern __inline void
924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
925 _mm_mask_store_ss (float *__P, __mmask8 __U, __m128 __A)
927 __builtin_ia32_storess_mask (__P, (__v4sf) __A, (__mmask8) __U);
930 extern __inline void
931 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
932 _mm_mask_store_sd (double *__P, __mmask8 __U, __m128d __A)
934 __builtin_ia32_storesd_mask (__P, (__v2df) __A, (__mmask8) __U);
937 #ifdef __OPTIMIZE__
938 extern __inline __m128d
939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
940 _mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C,
941 const int __imm, const int __R)
943 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
944 (__v2df) __B,
945 (__v2di) __C, __imm,
946 (__mmask8) -1, __R);
949 extern __inline __m128d
950 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
951 _mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
952 __m128i __C, const int __imm, const int __R)
954 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
955 (__v2df) __B,
956 (__v2di) __C, __imm,
957 (__mmask8) __U, __R);
960 extern __inline __m128d
961 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
962 _mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
963 __m128i __C, const int __imm, const int __R)
965 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
966 (__v2df) __B,
967 (__v2di) __C,
968 __imm,
969 (__mmask8) __U, __R);
972 extern __inline __m128
973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
974 _mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C,
975 const int __imm, const int __R)
977 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
978 (__v4sf) __B,
979 (__v4si) __C, __imm,
980 (__mmask8) -1, __R);
983 extern __inline __m128
984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
985 _mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
986 __m128i __C, const int __imm, const int __R)
988 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
989 (__v4sf) __B,
990 (__v4si) __C, __imm,
991 (__mmask8) __U, __R);
994 extern __inline __m128
995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
996 _mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
997 __m128i __C, const int __imm, const int __R)
999 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
1000 (__v4sf) __B,
1001 (__v4si) __C, __imm,
1002 (__mmask8) __U, __R);
1005 #else
1006 #define _mm_fixupimm_round_sd(X, Y, Z, C, R) \
1007 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
1008 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
1009 (__mmask8)(-1), (R)))
1011 #define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \
1012 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
1013 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
1014 (__mmask8)(U), (R)))
1016 #define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \
1017 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
1018 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
1019 (__mmask8)(U), (R)))
1021 #define _mm_fixupimm_round_ss(X, Y, Z, C, R) \
1022 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
1023 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
1024 (__mmask8)(-1), (R)))
1026 #define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \
1027 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
1028 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
1029 (__mmask8)(U), (R)))
1031 #define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \
1032 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
1033 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
1034 (__mmask8)(U), (R)))
1036 #endif
1038 #ifdef __x86_64__
1039 #ifdef __OPTIMIZE__
1040 extern __inline unsigned long long
1041 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1042 _mm_cvt_roundss_u64 (__m128 __A, const int __R)
1044 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R);
1047 extern __inline long long
1048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1049 _mm_cvt_roundss_si64 (__m128 __A, const int __R)
1051 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
1054 extern __inline long long
1055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1056 _mm_cvt_roundss_i64 (__m128 __A, const int __R)
1058 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R);
1061 extern __inline unsigned long long
1062 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1063 _mm_cvtt_roundss_u64 (__m128 __A, const int __R)
1065 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R);
1068 extern __inline long long
1069 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1070 _mm_cvtt_roundss_i64 (__m128 __A, const int __R)
1072 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
1075 extern __inline long long
1076 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1077 _mm_cvtt_roundss_si64 (__m128 __A, const int __R)
1079 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R);
1081 #else
1082 #define _mm_cvt_roundss_u64(A, B) \
1083 ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B))
1085 #define _mm_cvt_roundss_si64(A, B) \
1086 ((long long)__builtin_ia32_vcvtss2si64(A, B))
1088 #define _mm_cvt_roundss_i64(A, B) \
1089 ((long long)__builtin_ia32_vcvtss2si64(A, B))
1091 #define _mm_cvtt_roundss_u64(A, B) \
1092 ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B))
1094 #define _mm_cvtt_roundss_i64(A, B) \
1095 ((long long)__builtin_ia32_vcvttss2si64(A, B))
1097 #define _mm_cvtt_roundss_si64(A, B) \
1098 ((long long)__builtin_ia32_vcvttss2si64(A, B))
1099 #endif
1100 #endif
1102 #ifdef __OPTIMIZE__
1103 extern __inline unsigned
1104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1105 _mm_cvt_roundss_u32 (__m128 __A, const int __R)
1107 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R);
1110 extern __inline int
1111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1112 _mm_cvt_roundss_si32 (__m128 __A, const int __R)
1114 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
1117 extern __inline int
1118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1119 _mm_cvt_roundss_i32 (__m128 __A, const int __R)
1121 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R);
1124 extern __inline unsigned
1125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1126 _mm_cvtt_roundss_u32 (__m128 __A, const int __R)
1128 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R);
1131 extern __inline int
1132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1133 _mm_cvtt_roundss_i32 (__m128 __A, const int __R)
1135 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
1138 extern __inline int
1139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1140 _mm_cvtt_roundss_si32 (__m128 __A, const int __R)
1142 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R);
1144 #else
1145 #define _mm_cvt_roundss_u32(A, B) \
1146 ((unsigned)__builtin_ia32_vcvtss2usi32(A, B))
1148 #define _mm_cvt_roundss_si32(A, B) \
1149 ((int)__builtin_ia32_vcvtss2si32(A, B))
1151 #define _mm_cvt_roundss_i32(A, B) \
1152 ((int)__builtin_ia32_vcvtss2si32(A, B))
1154 #define _mm_cvtt_roundss_u32(A, B) \
1155 ((unsigned)__builtin_ia32_vcvttss2usi32(A, B))
1157 #define _mm_cvtt_roundss_si32(A, B) \
1158 ((int)__builtin_ia32_vcvttss2si32(A, B))
1160 #define _mm_cvtt_roundss_i32(A, B) \
1161 ((int)__builtin_ia32_vcvttss2si32(A, B))
1162 #endif
1164 #ifdef __x86_64__
1165 #ifdef __OPTIMIZE__
1166 extern __inline unsigned long long
1167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1168 _mm_cvt_roundsd_u64 (__m128d __A, const int __R)
1170 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R);
1173 extern __inline long long
1174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1175 _mm_cvt_roundsd_si64 (__m128d __A, const int __R)
1177 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
1180 extern __inline long long
1181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1182 _mm_cvt_roundsd_i64 (__m128d __A, const int __R)
1184 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R);
1187 extern __inline unsigned long long
1188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1189 _mm_cvtt_roundsd_u64 (__m128d __A, const int __R)
1191 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R);
1194 extern __inline long long
1195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1196 _mm_cvtt_roundsd_si64 (__m128d __A, const int __R)
1198 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
1201 extern __inline long long
1202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1203 _mm_cvtt_roundsd_i64 (__m128d __A, const int __R)
1205 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R);
1207 #else
1208 #define _mm_cvt_roundsd_u64(A, B) \
1209 ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B))
1211 #define _mm_cvt_roundsd_si64(A, B) \
1212 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
1214 #define _mm_cvt_roundsd_i64(A, B) \
1215 ((long long)__builtin_ia32_vcvtsd2si64(A, B))
1217 #define _mm_cvtt_roundsd_u64(A, B) \
1218 ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B))
1220 #define _mm_cvtt_roundsd_si64(A, B) \
1221 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
1223 #define _mm_cvtt_roundsd_i64(A, B) \
1224 ((long long)__builtin_ia32_vcvttsd2si64(A, B))
1225 #endif
1226 #endif
1228 #ifdef __OPTIMIZE__
1229 extern __inline unsigned
1230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1231 _mm_cvt_roundsd_u32 (__m128d __A, const int __R)
1233 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R);
1236 extern __inline int
1237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1238 _mm_cvt_roundsd_si32 (__m128d __A, const int __R)
1240 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
1243 extern __inline int
1244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1245 _mm_cvt_roundsd_i32 (__m128d __A, const int __R)
1247 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R);
1250 extern __inline unsigned
1251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1252 _mm_cvtt_roundsd_u32 (__m128d __A, const int __R)
1254 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R);
1257 extern __inline int
1258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1259 _mm_cvtt_roundsd_i32 (__m128d __A, const int __R)
1261 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
1264 extern __inline int
1265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1266 _mm_cvtt_roundsd_si32 (__m128d __A, const int __R)
1268 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R);
1271 extern __inline __m128
1272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1273 _mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R)
1275 return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A,
1276 (__v2df) __B,
1277 __R);
1280 extern __inline __m128
1281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1282 _mm_mask_cvt_roundsd_ss (__m128 __W, __mmask8 __U, __m128 __A,
1283 __m128d __B, const int __R)
1285 return (__m128) __builtin_ia32_cvtsd2ss_mask_round ((__v4sf) __A,
1286 (__v2df) __B,
1287 (__v4sf) __W,
1288 __U,
1289 __R);
1292 extern __inline __m128
1293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1294 _mm_maskz_cvt_roundsd_ss (__mmask8 __U, __m128 __A,
1295 __m128d __B, const int __R)
1297 return (__m128) __builtin_ia32_cvtsd2ss_mask_round ((__v4sf) __A,
1298 (__v2df) __B,
1299 _mm_avx512_setzero_ps (),
1300 __U,
1301 __R);
1304 extern __inline __m128d
1305 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1306 _mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R)
1308 return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A,
1309 (__v4sf) __B,
1310 __R);
1313 extern __inline __m128d
1314 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1315 _mm_mask_cvt_roundss_sd (__m128d __W, __mmask8 __U, __m128d __A,
1316 __m128 __B, const int __R)
1318 return (__m128d) __builtin_ia32_cvtss2sd_mask_round ((__v2df) __A,
1319 (__v4sf) __B,
1320 (__v2df) __W,
1321 __U,
1322 __R);
1325 extern __inline __m128d
1326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1327 _mm_maskz_cvt_roundss_sd (__mmask8 __U, __m128d __A,
1328 __m128 __B, const int __R)
1330 return (__m128d) __builtin_ia32_cvtss2sd_mask_round ((__v2df) __A,
1331 (__v4sf) __B,
1332 _mm_avx512_setzero_pd (),
1333 __U,
1334 __R);
1337 extern __inline __m128
1338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1339 _mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R)
1341 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
1342 (__v4sf) __B,
1343 __R);
1346 extern __inline __m128
1347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1348 _mm_mask_getexp_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
1349 __m128 __B, const int __R)
1351 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
1352 (__v4sf) __B,
1353 (__v4sf) __W,
1354 (__mmask8) __U, __R);
1357 extern __inline __m128
1358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1359 _mm_maskz_getexp_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
1360 const int __R)
1362 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
1363 (__v4sf) __B,
1364 (__v4sf)
1365 _mm_avx512_setzero_ps (),
1366 (__mmask8) __U, __R);
1369 extern __inline __m128d
1370 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1371 _mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R)
1373 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
1374 (__v2df) __B,
1375 __R);
1378 extern __inline __m128d
1379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1380 _mm_mask_getexp_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1381 __m128d __B, const int __R)
1383 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
1384 (__v2df) __B,
1385 (__v2df) __W,
1386 (__mmask8) __U, __R);
1389 extern __inline __m128d
1390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1391 _mm_maskz_getexp_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1392 const int __R)
1394 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
1395 (__v2df) __B,
1396 (__v2df)
1397 _mm_avx512_setzero_pd (),
1398 (__mmask8) __U, __R);
1401 extern __inline __m128d
1402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1403 _mm_getmant_round_sd (__m128d __A, __m128d __B,
1404 _MM_MANTISSA_NORM_ENUM __C,
1405 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
1407 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
1408 (__v2df) __B,
1409 (__D << 2) | __C,
1410 __R);
1413 extern __inline __m128d
1414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1415 _mm_mask_getmant_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1416 __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
1417 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
1419 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
1420 (__v2df) __B,
1421 (__D << 2) | __C,
1422 (__v2df) __W,
1423 __U, __R);
1426 extern __inline __m128d
1427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1428 _mm_maskz_getmant_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1429 _MM_MANTISSA_NORM_ENUM __C,
1430 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
1432 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
1433 (__v2df) __B,
1434 (__D << 2) | __C,
1435 (__v2df)
1436 _mm_avx512_setzero_pd(),
1437 __U, __R);
1440 extern __inline __m128
1441 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1442 _mm_getmant_round_ss (__m128 __A, __m128 __B,
1443 _MM_MANTISSA_NORM_ENUM __C,
1444 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
1446 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
1447 (__v4sf) __B,
1448 (__D << 2) | __C,
1449 __R);
1452 extern __inline __m128
1453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1454 _mm_mask_getmant_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
1455 __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
1456 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
1458 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
1459 (__v4sf) __B,
1460 (__D << 2) | __C,
1461 (__v4sf) __W,
1462 __U, __R);
1465 extern __inline __m128
1466 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1467 _mm_maskz_getmant_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
1468 _MM_MANTISSA_NORM_ENUM __C,
1469 _MM_MANTISSA_SIGN_ENUM __D, const int __R)
1471 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
1472 (__v4sf) __B,
1473 (__D << 2) | __C,
1474 (__v4sf)
1475 _mm_avx512_setzero_ps(),
1476 __U, __R);
1479 extern __inline __m128
1480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1481 _mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm,
1482 const int __R)
1484 return (__m128)
1485 __builtin_ia32_rndscaless_mask_round ((__v4sf) __A,
1486 (__v4sf) __B, __imm,
1487 (__v4sf)
1488 _mm_avx512_setzero_ps (),
1489 (__mmask8) -1,
1490 __R);
1493 extern __inline __m128
1494 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1495 _mm_mask_roundscale_round_ss (__m128 __A, __mmask8 __B, __m128 __C,
1496 __m128 __D, const int __imm, const int __R)
1498 return (__m128)
1499 __builtin_ia32_rndscaless_mask_round ((__v4sf) __C,
1500 (__v4sf) __D, __imm,
1501 (__v4sf) __A,
1502 (__mmask8) __B,
1503 __R);
1506 extern __inline __m128
1507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1508 _mm_maskz_roundscale_round_ss (__mmask8 __A, __m128 __B, __m128 __C,
1509 const int __imm, const int __R)
1511 return (__m128)
1512 __builtin_ia32_rndscaless_mask_round ((__v4sf) __B,
1513 (__v4sf) __C, __imm,
1514 (__v4sf)
1515 _mm_avx512_setzero_ps (),
1516 (__mmask8) __A,
1517 __R);
1520 extern __inline __m128d
1521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1522 _mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm,
1523 const int __R)
1525 return (__m128d)
1526 __builtin_ia32_rndscalesd_mask_round ((__v2df) __A,
1527 (__v2df) __B, __imm,
1528 (__v2df)
1529 _mm_avx512_setzero_pd (),
1530 (__mmask8) -1,
1531 __R);
1534 extern __inline __m128d
1535 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1536 _mm_mask_roundscale_round_sd (__m128d __A, __mmask8 __B, __m128d __C,
1537 __m128d __D, const int __imm, const int __R)
1539 return (__m128d)
1540 __builtin_ia32_rndscalesd_mask_round ((__v2df) __C,
1541 (__v2df) __D, __imm,
1542 (__v2df) __A,
1543 (__mmask8) __B,
1544 __R);
1547 extern __inline __m128d
1548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1549 _mm_maskz_roundscale_round_sd (__mmask8 __A, __m128d __B, __m128d __C,
1550 const int __imm, const int __R)
1552 return (__m128d)
1553 __builtin_ia32_rndscalesd_mask_round ((__v2df) __B,
1554 (__v2df) __C, __imm,
1555 (__v2df)
1556 _mm_avx512_setzero_pd (),
1557 (__mmask8) __A,
1558 __R);
1561 #else
1562 #define _mm_cvt_roundsd_u32(A, B) \
1563 ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B))
1565 #define _mm_cvt_roundsd_si32(A, B) \
1566 ((int)__builtin_ia32_vcvtsd2si32(A, B))
1568 #define _mm_cvt_roundsd_i32(A, B) \
1569 ((int)__builtin_ia32_vcvtsd2si32(A, B))
1571 #define _mm_cvtt_roundsd_u32(A, B) \
1572 ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B))
1574 #define _mm_cvtt_roundsd_si32(A, B) \
1575 ((int)__builtin_ia32_vcvttsd2si32(A, B))
1577 #define _mm_cvtt_roundsd_i32(A, B) \
1578 ((int)__builtin_ia32_vcvttsd2si32(A, B))
1580 #define _mm_cvt_roundsd_ss(A, B, C) \
1581 (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C)
1583 #define _mm_mask_cvt_roundsd_ss(W, U, A, B, C) \
1584 (__m128)__builtin_ia32_cvtsd2ss_mask_round ((A), (B), (W), (U), (C))
1586 #define _mm_maskz_cvt_roundsd_ss(U, A, B, C) \
1587 (__m128)__builtin_ia32_cvtsd2ss_mask_round ((A), (B), _mm_avx512_setzero_ps (), \
1588 (U), (C))
1590 #define _mm_cvt_roundss_sd(A, B, C) \
1591 (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C)
1593 #define _mm_mask_cvt_roundss_sd(W, U, A, B, C) \
1594 (__m128d)__builtin_ia32_cvtss2sd_mask_round ((A), (B), (W), (U), (C))
1596 #define _mm_maskz_cvt_roundss_sd(U, A, B, C) \
1597 (__m128d)__builtin_ia32_cvtss2sd_mask_round ((A), (B), _mm_avx512_setzero_pd (), \
1598 (U), (C))
1600 #define _mm_getmant_round_sd(X, Y, C, D, R) \
1601 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
1602 (__v2df)(__m128d)(Y), \
1603 (int)(((D)<<2) | (C)), \
1604 (R)))
1606 #define _mm_mask_getmant_round_sd(W, U, X, Y, C, D, R) \
1607 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
1608 (__v2df)(__m128d)(Y), \
1609 (int)(((D)<<2) | (C)), \
1610 (__v2df)(__m128d)(W), \
1611 (__mmask8)(U),\
1612 (R)))
1614 #define _mm_maskz_getmant_round_sd(U, X, Y, C, D, R) \
1615 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
1616 (__v2df)(__m128d)(Y), \
1617 (int)(((D)<<2) | (C)), \
1618 (__v2df)(__m128d)_mm_avx512_setzero_pd(), \
1619 (__mmask8)(U),\
1620 (R)))
1622 #define _mm_getmant_round_ss(X, Y, C, D, R) \
1623 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
1624 (__v4sf)(__m128)(Y), \
1625 (int)(((D)<<2) | (C)), \
1626 (R)))
1628 #define _mm_mask_getmant_round_ss(W, U, X, Y, C, D, R) \
1629 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
1630 (__v4sf)(__m128)(Y), \
1631 (int)(((D)<<2) | (C)), \
1632 (__v4sf)(__m128)(W), \
1633 (__mmask8)(U),\
1634 (R)))
1636 #define _mm_maskz_getmant_round_ss(U, X, Y, C, D, R) \
1637 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
1638 (__v4sf)(__m128)(Y), \
1639 (int)(((D)<<2) | (C)), \
1640 (__v4sf)(__m128)_mm_avx512_setzero_ps(), \
1641 (__mmask8)(U),\
1642 (R)))
1644 #define _mm_getexp_round_ss(A, B, R) \
1645 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R))
1647 #define _mm_mask_getexp_round_ss(W, U, A, B, C) \
1648 (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U, C)
1650 #define _mm_maskz_getexp_round_ss(U, A, B, C) \
1651 (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_avx512_setzero_ps(), U, C)
1653 #define _mm_getexp_round_sd(A, B, R) \
1654 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R))
1656 #define _mm_mask_getexp_round_sd(W, U, A, B, C) \
1657 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U, C)
1659 #define _mm_maskz_getexp_round_sd(U, A, B, C) \
1660 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_avx512_setzero_pd(), U, C)
1662 #define _mm_roundscale_round_ss(A, B, I, R) \
1663 ((__m128) \
1664 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \
1665 (__v4sf) (__m128) (B), \
1666 (int) (I), \
1667 (__v4sf) _mm_avx512_setzero_ps (), \
1668 (__mmask8) (-1), \
1669 (int) (R)))
1670 #define _mm_mask_roundscale_round_ss(A, U, B, C, I, R) \
1671 ((__m128) \
1672 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (B), \
1673 (__v4sf) (__m128) (C), \
1674 (int) (I), \
1675 (__v4sf) (__m128) (A), \
1676 (__mmask8) (U), \
1677 (int) (R)))
1678 #define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \
1679 ((__m128) \
1680 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \
1681 (__v4sf) (__m128) (B), \
1682 (int) (I), \
1683 (__v4sf) _mm_avx512_setzero_ps (), \
1684 (__mmask8) (U), \
1685 (int) (R)))
1686 #define _mm_roundscale_round_sd(A, B, I, R) \
1687 ((__m128d) \
1688 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \
1689 (__v2df) (__m128d) (B), \
1690 (int) (I), \
1691 (__v2df) _mm_avx512_setzero_pd (), \
1692 (__mmask8) (-1), \
1693 (int) (R)))
1694 #define _mm_mask_roundscale_round_sd(A, U, B, C, I, R) \
1695 ((__m128d) \
1696 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (B), \
1697 (__v2df) (__m128d) (C), \
1698 (int) (I), \
1699 (__v2df) (__m128d) (A), \
1700 (__mmask8) (U), \
1701 (int) (R)))
1702 #define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \
1703 ((__m128d) \
1704 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \
1705 (__v2df) (__m128d) (B), \
1706 (int) (I), \
1707 (__v2df) _mm_avx512_setzero_pd (), \
1708 (__mmask8) (U), \
1709 (int) (R)))
1711 #endif
1713 #define _mm_mask_cvtss_sd(W, U, A, B) \
1714 _mm_mask_cvt_roundss_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
1716 #define _mm_maskz_cvtss_sd(U, A, B) \
1717 _mm_maskz_cvt_roundss_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
1719 #define _mm_mask_cvtsd_ss(W, U, A, B) \
1720 _mm_mask_cvt_roundsd_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
1722 #define _mm_maskz_cvtsd_ss(U, A, B) \
1723 _mm_maskz_cvt_roundsd_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
1725 #ifdef __OPTIMIZE__
1726 extern __inline __mmask16
1727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1728 _kshiftli_mask16 (__mmask16 __A, unsigned int __B)
1730 return (__mmask16) __builtin_ia32_kshiftlihi ((__mmask16) __A,
1731 (__mmask8) __B);
1734 extern __inline __mmask16
1735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1736 _kshiftri_mask16 (__mmask16 __A, unsigned int __B)
1738 return (__mmask16) __builtin_ia32_kshiftrihi ((__mmask16) __A,
1739 (__mmask8) __B);
1742 extern __inline __mmask8
1743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1744 _mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R)
1746 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
1747 (__v2df) __Y, __P,
1748 (__mmask8) -1, __R);
1751 extern __inline __mmask8
1752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1753 _mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y,
1754 const int __P, const int __R)
1756 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
1757 (__v2df) __Y, __P,
1758 (__mmask8) __M, __R);
1761 extern __inline __mmask8
1762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1763 _mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R)
1765 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
1766 (__v4sf) __Y, __P,
1767 (__mmask8) -1, __R);
1770 extern __inline __mmask8
1771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1772 _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
1773 const int __P, const int __R)
1775 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
1776 (__v4sf) __Y, __P,
1777 (__mmask8) __M, __R);
1780 #else
1781 #define _kshiftli_mask16(X, Y) \
1782 ((__mmask16) __builtin_ia32_kshiftlihi ((__mmask16)(X), (__mmask8)(Y)))
1784 #define _kshiftri_mask16(X, Y) \
1785 ((__mmask16) __builtin_ia32_kshiftrihi ((__mmask16)(X), (__mmask8)(Y)))
1787 #define _mm_cmp_round_sd_mask(X, Y, P, R) \
1788 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
1789 (__v2df)(__m128d)(Y), (int)(P),\
1790 (__mmask8)-1, R))
1792 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
1793 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
1794 (__v2df)(__m128d)(Y), (int)(P),\
1795 (M), R))
1797 #define _mm_cmp_round_ss_mask(X, Y, P, R) \
1798 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
1799 (__v4sf)(__m128)(Y), (int)(P), \
1800 (__mmask8)-1, R))
1802 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
1803 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
1804 (__v4sf)(__m128)(Y), (int)(P), \
1805 (M), R))
1807 #endif
1809 extern __inline unsigned char
1810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1811 _kortest_mask16_u8 (__mmask16 __A, __mmask16 __B, unsigned char *__CF)
1813 *__CF = (unsigned char) __builtin_ia32_kortestchi (__A, __B);
1814 return (unsigned char) __builtin_ia32_kortestzhi (__A, __B);
1817 extern __inline unsigned char
1818 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1819 _kortestz_mask16_u8 (__mmask16 __A, __mmask16 __B)
1821 return (unsigned char) __builtin_ia32_kortestzhi ((__mmask16) __A,
1822 (__mmask16) __B);
1825 extern __inline unsigned char
1826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1827 _kortestc_mask16_u8 (__mmask16 __A, __mmask16 __B)
1829 return (unsigned char) __builtin_ia32_kortestchi ((__mmask16) __A,
1830 (__mmask16) __B);
1833 extern __inline unsigned int
1834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1835 _cvtmask16_u32 (__mmask16 __A)
1837 return (unsigned int) __builtin_ia32_kmovw ((__mmask16 ) __A);
1840 extern __inline __mmask16
1841 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1842 _cvtu32_mask16 (unsigned int __A)
1844 return (__mmask16) __builtin_ia32_kmovw ((__mmask16 ) __A);
1847 extern __inline __mmask16
1848 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1849 _load_mask16 (__mmask16 *__A)
1851 return (__mmask16) __builtin_ia32_kmovw (*(__mmask16 *) __A);
1854 extern __inline void
1855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1856 _store_mask16 (__mmask16 *__A, __mmask16 __B)
1858 *(__mmask16 *) __A = __builtin_ia32_kmovw (__B);
1861 extern __inline __mmask16
1862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1863 _kand_mask16 (__mmask16 __A, __mmask16 __B)
1865 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
1868 extern __inline __mmask16
1869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1870 _kandn_mask16 (__mmask16 __A, __mmask16 __B)
1872 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A,
1873 (__mmask16) __B);
1876 extern __inline __mmask16
1877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1878 _kor_mask16 (__mmask16 __A, __mmask16 __B)
1880 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
1883 extern __inline __mmask16
1884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1885 _kxnor_mask16 (__mmask16 __A, __mmask16 __B)
1887 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
1890 extern __inline __mmask16
1891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1892 _kxor_mask16 (__mmask16 __A, __mmask16 __B)
1894 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
1897 extern __inline __mmask16
1898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1899 _knot_mask16 (__mmask16 __A)
1901 return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
1904 extern __inline __mmask16
1905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1906 _kunpackb_mask16 (__mmask8 __A, __mmask8 __B)
1908 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
1911 #ifdef __OPTIMIZE__
1912 extern __inline __m128d
1913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1914 _mm_max_round_sd (__m128d __A, __m128d __B, const int __R)
1916 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
1917 (__v2df) __B,
1918 __R);
1921 extern __inline __m128d
1922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1923 _mm_mask_max_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1924 __m128d __B, const int __R)
1926 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
1927 (__v2df) __B,
1928 (__v2df) __W,
1929 (__mmask8) __U, __R);
1932 extern __inline __m128d
1933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1934 _mm_maskz_max_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1935 const int __R)
1937 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
1938 (__v2df) __B,
1939 (__v2df)
1940 _mm_avx512_setzero_pd (),
1941 (__mmask8) __U, __R);
1944 extern __inline __m128
1945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1946 _mm_max_round_ss (__m128 __A, __m128 __B, const int __R)
1948 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
1949 (__v4sf) __B,
1950 __R);
1953 extern __inline __m128
1954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1955 _mm_mask_max_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
1956 __m128 __B, const int __R)
1958 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
1959 (__v4sf) __B,
1960 (__v4sf) __W,
1961 (__mmask8) __U, __R);
1964 extern __inline __m128
1965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1966 _mm_maskz_max_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
1967 const int __R)
1969 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
1970 (__v4sf) __B,
1971 (__v4sf)
1972 _mm_avx512_setzero_ps (),
1973 (__mmask8) __U, __R);
1976 extern __inline __m128d
1977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1978 _mm_min_round_sd (__m128d __A, __m128d __B, const int __R)
1980 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
1981 (__v2df) __B,
1982 __R);
1985 extern __inline __m128d
1986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1987 _mm_mask_min_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
1988 __m128d __B, const int __R)
1990 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
1991 (__v2df) __B,
1992 (__v2df) __W,
1993 (__mmask8) __U, __R);
1996 extern __inline __m128d
1997 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1998 _mm_maskz_min_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
1999 const int __R)
2001 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
2002 (__v2df) __B,
2003 (__v2df)
2004 _mm_avx512_setzero_pd (),
2005 (__mmask8) __U, __R);
2008 extern __inline __m128
2009 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2010 _mm_min_round_ss (__m128 __A, __m128 __B, const int __R)
2012 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
2013 (__v4sf) __B,
2014 __R);
2017 extern __inline __m128
2018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2019 _mm_mask_min_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
2020 __m128 __B, const int __R)
2022 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
2023 (__v4sf) __B,
2024 (__v4sf) __W,
2025 (__mmask8) __U, __R);
2028 extern __inline __m128
2029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2030 _mm_maskz_min_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
2031 const int __R)
2033 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
2034 (__v4sf) __B,
2035 (__v4sf)
2036 _mm_avx512_setzero_ps (),
2037 (__mmask8) __U, __R);
2040 #else
2041 #define _mm_max_round_sd(A, B, C) \
2042 (__m128d)__builtin_ia32_maxsd_round(A, B, C)
2044 #define _mm_mask_max_round_sd(W, U, A, B, C) \
2045 (__m128d)__builtin_ia32_maxsd_mask_round(A, B, W, U, C)
2047 #define _mm_maskz_max_round_sd(U, A, B, C) \
2048 (__m128d)__builtin_ia32_maxsd_mask_round(A, B, (__v2df)_mm_avx512_setzero_pd(), U, C)
2050 #define _mm_max_round_ss(A, B, C) \
2051 (__m128)__builtin_ia32_maxss_round(A, B, C)
2053 #define _mm_mask_max_round_ss(W, U, A, B, C) \
2054 (__m128)__builtin_ia32_maxss_mask_round(A, B, W, U, C)
2056 #define _mm_maskz_max_round_ss(U, A, B, C) \
2057 (__m128)__builtin_ia32_maxss_mask_round(A, B, (__v4sf)_mm_avx512_setzero_ps(), U, C)
2059 #define _mm_min_round_sd(A, B, C) \
2060 (__m128d)__builtin_ia32_minsd_round(A, B, C)
2062 #define _mm_mask_min_round_sd(W, U, A, B, C) \
2063 (__m128d)__builtin_ia32_minsd_mask_round(A, B, W, U, C)
2065 #define _mm_maskz_min_round_sd(U, A, B, C) \
2066 (__m128d)__builtin_ia32_minsd_mask_round(A, B, (__v2df)_mm_avx512_setzero_pd(), U, C)
2068 #define _mm_min_round_ss(A, B, C) \
2069 (__m128)__builtin_ia32_minss_round(A, B, C)
2071 #define _mm_mask_min_round_ss(W, U, A, B, C) \
2072 (__m128)__builtin_ia32_minss_mask_round(A, B, W, U, C)
2074 #define _mm_maskz_min_round_ss(U, A, B, C) \
2075 (__m128)__builtin_ia32_minss_mask_round(A, B, (__v4sf)_mm_avx512_setzero_ps(), U, C)
2077 #endif
2079 #ifdef __OPTIMIZE__
2080 extern __inline __m128d
2081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2082 _mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
2084 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
2085 (__v2df) __A,
2086 (__v2df) __B,
2087 __R);
2090 extern __inline __m128
2091 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2092 _mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
2094 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
2095 (__v4sf) __A,
2096 (__v4sf) __B,
2097 __R);
2100 extern __inline __m128d
2101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2102 _mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
2104 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
2105 (__v2df) __A,
2106 -(__v2df) __B,
2107 __R);
2110 extern __inline __m128
2111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2112 _mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
2114 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
2115 (__v4sf) __A,
2116 -(__v4sf) __B,
2117 __R);
2120 extern __inline __m128d
2121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2122 _mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
2124 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
2125 -(__v2df) __A,
2126 (__v2df) __B,
2127 __R);
2130 extern __inline __m128
2131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2132 _mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
2134 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
2135 -(__v4sf) __A,
2136 (__v4sf) __B,
2137 __R);
2140 extern __inline __m128d
2141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2142 _mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R)
2144 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W,
2145 -(__v2df) __A,
2146 -(__v2df) __B,
2147 __R);
2150 extern __inline __m128
2151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2152 _mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R)
2154 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W,
2155 -(__v4sf) __A,
2156 -(__v4sf) __B,
2157 __R);
2159 #else
2160 #define _mm_fmadd_round_sd(A, B, C, R) \
2161 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R)
2163 #define _mm_fmadd_round_ss(A, B, C, R) \
2164 (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R)
2166 #define _mm_fmsub_round_sd(A, B, C, R) \
2167 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R)
2169 #define _mm_fmsub_round_ss(A, B, C, R) \
2170 (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R)
2172 #define _mm_fnmadd_round_sd(A, B, C, R) \
2173 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R)
2175 #define _mm_fnmadd_round_ss(A, B, C, R) \
2176 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R)
2178 #define _mm_fnmsub_round_sd(A, B, C, R) \
2179 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R)
2181 #define _mm_fnmsub_round_ss(A, B, C, R) \
2182 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R)
2183 #endif
2185 extern __inline __m128d
2186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2187 _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
2189 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
2190 (__v2df) __A,
2191 (__v2df) __B,
2192 (__mmask8) __U,
2193 _MM_FROUND_CUR_DIRECTION);
2196 extern __inline __m128
2197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2198 _mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
2200 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
2201 (__v4sf) __A,
2202 (__v4sf) __B,
2203 (__mmask8) __U,
2204 _MM_FROUND_CUR_DIRECTION);
2207 extern __inline __m128d
2208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2209 _mm_mask3_fmadd_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
2211 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
2212 (__v2df) __A,
2213 (__v2df) __B,
2214 (__mmask8) __U,
2215 _MM_FROUND_CUR_DIRECTION);
2218 extern __inline __m128
2219 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2220 _mm_mask3_fmadd_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
2222 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
2223 (__v4sf) __A,
2224 (__v4sf) __B,
2225 (__mmask8) __U,
2226 _MM_FROUND_CUR_DIRECTION);
2229 extern __inline __m128d
2230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2231 _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
2233 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
2234 (__v2df) __A,
2235 (__v2df) __B,
2236 (__mmask8) __U,
2237 _MM_FROUND_CUR_DIRECTION);
2240 extern __inline __m128
2241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2242 _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
2244 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
2245 (__v4sf) __A,
2246 (__v4sf) __B,
2247 (__mmask8) __U,
2248 _MM_FROUND_CUR_DIRECTION);
2251 extern __inline __m128d
2252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2253 _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
2255 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
2256 (__v2df) __A,
2257 -(__v2df) __B,
2258 (__mmask8) __U,
2259 _MM_FROUND_CUR_DIRECTION);
2262 extern __inline __m128
2263 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2264 _mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
2266 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
2267 (__v4sf) __A,
2268 -(__v4sf) __B,
2269 (__mmask8) __U,
2270 _MM_FROUND_CUR_DIRECTION);
2273 extern __inline __m128d
2274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2275 _mm_mask3_fmsub_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
2277 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
2278 (__v2df) __A,
2279 (__v2df) __B,
2280 (__mmask8) __U,
2281 _MM_FROUND_CUR_DIRECTION);
2284 extern __inline __m128
2285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2286 _mm_mask3_fmsub_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
2288 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
2289 (__v4sf) __A,
2290 (__v4sf) __B,
2291 (__mmask8) __U,
2292 _MM_FROUND_CUR_DIRECTION);
2295 extern __inline __m128d
2296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2297 _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
2299 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
2300 (__v2df) __A,
2301 -(__v2df) __B,
2302 (__mmask8) __U,
2303 _MM_FROUND_CUR_DIRECTION);
2306 extern __inline __m128
2307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2308 _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
2310 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
2311 (__v4sf) __A,
2312 -(__v4sf) __B,
2313 (__mmask8) __U,
2314 _MM_FROUND_CUR_DIRECTION);
2317 extern __inline __m128d
2318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2319 _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
2321 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
2322 -(__v2df) __A,
2323 (__v2df) __B,
2324 (__mmask8) __U,
2325 _MM_FROUND_CUR_DIRECTION);
2328 extern __inline __m128
2329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2330 _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
2332 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
2333 -(__v4sf) __A,
2334 (__v4sf) __B,
2335 (__mmask8) __U,
2336 _MM_FROUND_CUR_DIRECTION);
2339 extern __inline __m128d
2340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2341 _mm_mask3_fnmadd_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
2343 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
2344 -(__v2df) __A,
2345 (__v2df) __B,
2346 (__mmask8) __U,
2347 _MM_FROUND_CUR_DIRECTION);
2350 extern __inline __m128
2351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2352 _mm_mask3_fnmadd_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
2354 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
2355 -(__v4sf) __A,
2356 (__v4sf) __B,
2357 (__mmask8) __U,
2358 _MM_FROUND_CUR_DIRECTION);
2361 extern __inline __m128d
2362 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2363 _mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
2365 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
2366 -(__v2df) __A,
2367 (__v2df) __B,
2368 (__mmask8) __U,
2369 _MM_FROUND_CUR_DIRECTION);
2372 extern __inline __m128
2373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2374 _mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
2376 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
2377 -(__v4sf) __A,
2378 (__v4sf) __B,
2379 (__mmask8) __U,
2380 _MM_FROUND_CUR_DIRECTION);
2383 extern __inline __m128d
2384 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2385 _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
2387 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
2388 -(__v2df) __A,
2389 -(__v2df) __B,
2390 (__mmask8) __U,
2391 _MM_FROUND_CUR_DIRECTION);
2394 extern __inline __m128
2395 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2396 _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
2398 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
2399 -(__v4sf) __A,
2400 -(__v4sf) __B,
2401 (__mmask8) __U,
2402 _MM_FROUND_CUR_DIRECTION);
2405 extern __inline __m128d
2406 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2407 _mm_mask3_fnmsub_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U)
2409 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
2410 -(__v2df) __A,
2411 (__v2df) __B,
2412 (__mmask8) __U,
2413 _MM_FROUND_CUR_DIRECTION);
2416 extern __inline __m128
2417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2418 _mm_mask3_fnmsub_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U)
2420 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
2421 -(__v4sf) __A,
2422 (__v4sf) __B,
2423 (__mmask8) __U,
2424 _MM_FROUND_CUR_DIRECTION);
2427 extern __inline __m128d
2428 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2429 _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B)
2431 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
2432 -(__v2df) __A,
2433 -(__v2df) __B,
2434 (__mmask8) __U,
2435 _MM_FROUND_CUR_DIRECTION);
2438 extern __inline __m128
2439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2440 _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B)
2442 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
2443 -(__v4sf) __A,
2444 -(__v4sf) __B,
2445 (__mmask8) __U,
2446 _MM_FROUND_CUR_DIRECTION);
2449 #ifdef __OPTIMIZE__
2450 extern __inline __m128d
2451 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2452 _mm_mask_fmadd_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
2453 const int __R)
2455 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
2456 (__v2df) __A,
2457 (__v2df) __B,
2458 (__mmask8) __U, __R);
2461 extern __inline __m128
2462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2463 _mm_mask_fmadd_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
2464 const int __R)
2466 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
2467 (__v4sf) __A,
2468 (__v4sf) __B,
2469 (__mmask8) __U, __R);
2472 extern __inline __m128d
2473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2474 _mm_mask3_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
2475 const int __R)
2477 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
2478 (__v2df) __A,
2479 (__v2df) __B,
2480 (__mmask8) __U, __R);
2483 extern __inline __m128
2484 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2485 _mm_mask3_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
2486 const int __R)
2488 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
2489 (__v4sf) __A,
2490 (__v4sf) __B,
2491 (__mmask8) __U, __R);
2494 extern __inline __m128d
2495 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2496 _mm_maskz_fmadd_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
2497 const int __R)
2499 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
2500 (__v2df) __A,
2501 (__v2df) __B,
2502 (__mmask8) __U, __R);
2505 extern __inline __m128
2506 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2507 _mm_maskz_fmadd_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
2508 const int __R)
2510 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
2511 (__v4sf) __A,
2512 (__v4sf) __B,
2513 (__mmask8) __U, __R);
2516 extern __inline __m128d
2517 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2518 _mm_mask_fmsub_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
2519 const int __R)
2521 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
2522 (__v2df) __A,
2523 -(__v2df) __B,
2524 (__mmask8) __U, __R);
2527 extern __inline __m128
2528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2529 _mm_mask_fmsub_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
2530 const int __R)
2532 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
2533 (__v4sf) __A,
2534 -(__v4sf) __B,
2535 (__mmask8) __U, __R);
2538 extern __inline __m128d
2539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2540 _mm_mask3_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
2541 const int __R)
2543 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
2544 (__v2df) __A,
2545 (__v2df) __B,
2546 (__mmask8) __U, __R);
2549 extern __inline __m128
2550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2551 _mm_mask3_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
2552 const int __R)
2554 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
2555 (__v4sf) __A,
2556 (__v4sf) __B,
2557 (__mmask8) __U, __R);
2560 extern __inline __m128d
2561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2562 _mm_maskz_fmsub_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
2563 const int __R)
2565 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
2566 (__v2df) __A,
2567 -(__v2df) __B,
2568 (__mmask8) __U, __R);
2571 extern __inline __m128
2572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2573 _mm_maskz_fmsub_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
2574 const int __R)
2576 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
2577 (__v4sf) __A,
2578 -(__v4sf) __B,
2579 (__mmask8) __U, __R);
2582 extern __inline __m128d
2583 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2584 _mm_mask_fnmadd_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
2585 const int __R)
2587 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
2588 -(__v2df) __A,
2589 (__v2df) __B,
2590 (__mmask8) __U, __R);
2593 extern __inline __m128
2594 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2595 _mm_mask_fnmadd_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
2596 const int __R)
2598 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
2599 -(__v4sf) __A,
2600 (__v4sf) __B,
2601 (__mmask8) __U, __R);
2604 extern __inline __m128d
2605 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2606 _mm_mask3_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
2607 const int __R)
2609 return (__m128d) __builtin_ia32_vfmaddsd3_mask3 ((__v2df) __W,
2610 -(__v2df) __A,
2611 (__v2df) __B,
2612 (__mmask8) __U, __R);
2615 extern __inline __m128
2616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2617 _mm_mask3_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
2618 const int __R)
2620 return (__m128) __builtin_ia32_vfmaddss3_mask3 ((__v4sf) __W,
2621 -(__v4sf) __A,
2622 (__v4sf) __B,
2623 (__mmask8) __U, __R);
2626 extern __inline __m128d
2627 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2628 _mm_maskz_fnmadd_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
2629 const int __R)
2631 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
2632 -(__v2df) __A,
2633 (__v2df) __B,
2634 (__mmask8) __U, __R);
2637 extern __inline __m128
2638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2639 _mm_maskz_fnmadd_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
2640 const int __R)
2642 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
2643 -(__v4sf) __A,
2644 (__v4sf) __B,
2645 (__mmask8) __U, __R);
2648 extern __inline __m128d
2649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2650 _mm_mask_fnmsub_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
2651 const int __R)
2653 return (__m128d) __builtin_ia32_vfmaddsd3_mask ((__v2df) __W,
2654 -(__v2df) __A,
2655 -(__v2df) __B,
2656 (__mmask8) __U, __R);
2659 extern __inline __m128
2660 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2661 _mm_mask_fnmsub_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
2662 const int __R)
2664 return (__m128) __builtin_ia32_vfmaddss3_mask ((__v4sf) __W,
2665 -(__v4sf) __A,
2666 -(__v4sf) __B,
2667 (__mmask8) __U, __R);
2670 extern __inline __m128d
2671 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2672 _mm_mask3_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, __mmask8 __U,
2673 const int __R)
2675 return (__m128d) __builtin_ia32_vfmsubsd3_mask3 ((__v2df) __W,
2676 -(__v2df) __A,
2677 (__v2df) __B,
2678 (__mmask8) __U, __R);
2681 extern __inline __m128
2682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2683 _mm_mask3_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, __mmask8 __U,
2684 const int __R)
2686 return (__m128) __builtin_ia32_vfmsubss3_mask3 ((__v4sf) __W,
2687 -(__v4sf) __A,
2688 (__v4sf) __B,
2689 (__mmask8) __U, __R);
2692 extern __inline __m128d
2693 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2694 _mm_maskz_fnmsub_round_sd (__mmask8 __U, __m128d __W, __m128d __A, __m128d __B,
2695 const int __R)
2697 return (__m128d) __builtin_ia32_vfmaddsd3_maskz ((__v2df) __W,
2698 -(__v2df) __A,
2699 -(__v2df) __B,
2700 (__mmask8) __U, __R);
2703 extern __inline __m128
2704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2705 _mm_maskz_fnmsub_round_ss (__mmask8 __U, __m128 __W, __m128 __A, __m128 __B,
2706 const int __R)
2708 return (__m128) __builtin_ia32_vfmaddss3_maskz ((__v4sf) __W,
2709 -(__v4sf) __A,
2710 -(__v4sf) __B,
2711 (__mmask8) __U, __R);
2713 #else
2714 #define _mm_mask_fmadd_round_sd(A, U, B, C, R) \
2715 (__m128d) __builtin_ia32_vfmaddsd3_mask (A, B, C, U, R)
2717 #define _mm_mask_fmadd_round_ss(A, U, B, C, R) \
2718 (__m128) __builtin_ia32_vfmaddss3_mask (A, B, C, U, R)
2720 #define _mm_mask3_fmadd_round_sd(A, B, C, U, R) \
2721 (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, B, C, U, R)
2723 #define _mm_mask3_fmadd_round_ss(A, B, C, U, R) \
2724 (__m128) __builtin_ia32_vfmaddss3_mask3 (A, B, C, U, R)
2726 #define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
2727 (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, B, C, U, R)
2729 #define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
2730 (__m128) __builtin_ia32_vfmaddss3_maskz (A, B, C, U, R)
2732 #define _mm_mask_fmsub_round_sd(A, U, B, C, R) \
2733 (__m128d) __builtin_ia32_vfmaddsd3_mask (A, B, -(C), U, R)
2735 #define _mm_mask_fmsub_round_ss(A, U, B, C, R) \
2736 (__m128) __builtin_ia32_vfmaddss3_mask (A, B, -(C), U, R)
2738 #define _mm_mask3_fmsub_round_sd(A, B, C, U, R) \
2739 (__m128d) __builtin_ia32_vfmsubsd3_mask3 (A, B, C, U, R)
2741 #define _mm_mask3_fmsub_round_ss(A, B, C, U, R) \
2742 (__m128) __builtin_ia32_vfmsubss3_mask3 (A, B, C, U, R)
2744 #define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
2745 (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, B, -(C), U, R)
2747 #define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
2748 (__m128) __builtin_ia32_vfmaddss3_maskz (A, B, -(C), U, R)
2750 #define _mm_mask_fnmadd_round_sd(A, U, B, C, R) \
2751 (__m128d) __builtin_ia32_vfmaddsd3_mask (A, -(B), C, U, R)
2753 #define _mm_mask_fnmadd_round_ss(A, U, B, C, R) \
2754 (__m128) __builtin_ia32_vfmaddss3_mask (A, -(B), C, U, R)
2756 #define _mm_mask3_fnmadd_round_sd(A, B, C, U, R) \
2757 (__m128d) __builtin_ia32_vfmaddsd3_mask3 (A, -(B), C, U, R)
2759 #define _mm_mask3_fnmadd_round_ss(A, B, C, U, R) \
2760 (__m128) __builtin_ia32_vfmaddss3_mask3 (A, -(B), C, U, R)
2762 #define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
2763 (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, -(B), C, U, R)
2765 #define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
2766 (__m128) __builtin_ia32_vfmaddss3_maskz (A, -(B), C, U, R)
2768 #define _mm_mask_fnmsub_round_sd(A, U, B, C, R) \
2769 (__m128d) __builtin_ia32_vfmaddsd3_mask (A, -(B), -(C), U, R)
2771 #define _mm_mask_fnmsub_round_ss(A, U, B, C, R) \
2772 (__m128) __builtin_ia32_vfmaddss3_mask (A, -(B), -(C), U, R)
2774 #define _mm_mask3_fnmsub_round_sd(A, B, C, U, R) \
2775 (__m128d) __builtin_ia32_vfmsubsd3_mask3 (A, -(B), C, U, R)
2777 #define _mm_mask3_fnmsub_round_ss(A, B, C, U, R) \
2778 (__m128) __builtin_ia32_vfmsubss3_mask3 (A, -(B), C, U, R)
2780 #define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
2781 (__m128d) __builtin_ia32_vfmaddsd3_maskz (A, -(B), -(C), U, R)
2783 #define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
2784 (__m128) __builtin_ia32_vfmaddss3_maskz (A, -(B), -(C), U, R)
2785 #endif
2787 #ifdef __OPTIMIZE__
2788 extern __inline int
2789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2790 _mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R)
2792 return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R);
2795 extern __inline int
2796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2797 _mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R)
2799 return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R);
2801 #else
2802 #define _mm_comi_round_ss(A, B, C, D)\
2803 __builtin_ia32_vcomiss(A, B, C, D)
2804 #define _mm_comi_round_sd(A, B, C, D)\
2805 __builtin_ia32_vcomisd(A, B, C, D)
2806 #endif
2808 extern __inline __m128d
2809 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2810 _mm_mask_add_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
2812 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
2813 (__v2df) __B,
2814 (__v2df) __W,
2815 (__mmask8) __U,
2816 _MM_FROUND_CUR_DIRECTION);
2819 extern __inline __m128d
2820 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2821 _mm_maskz_add_sd (__mmask8 __U, __m128d __A, __m128d __B)
2823 return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
2824 (__v2df) __B,
2825 (__v2df)
2826 _mm_avx512_setzero_pd (),
2827 (__mmask8) __U,
2828 _MM_FROUND_CUR_DIRECTION);
2831 extern __inline __m128
2832 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2833 _mm_mask_add_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
2835 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
2836 (__v4sf) __B,
2837 (__v4sf) __W,
2838 (__mmask8) __U,
2839 _MM_FROUND_CUR_DIRECTION);
2842 extern __inline __m128
2843 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2844 _mm_maskz_add_ss (__mmask8 __U, __m128 __A, __m128 __B)
2846 return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
2847 (__v4sf) __B,
2848 (__v4sf)
2849 _mm_avx512_setzero_ps (),
2850 (__mmask8) __U,
2851 _MM_FROUND_CUR_DIRECTION);
2854 extern __inline __m128d
2855 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2856 _mm_mask_sub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
2858 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
2859 (__v2df) __B,
2860 (__v2df) __W,
2861 (__mmask8) __U,
2862 _MM_FROUND_CUR_DIRECTION);
2865 extern __inline __m128d
2866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2867 _mm_maskz_sub_sd (__mmask8 __U, __m128d __A, __m128d __B)
2869 return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
2870 (__v2df) __B,
2871 (__v2df)
2872 _mm_avx512_setzero_pd (),
2873 (__mmask8) __U,
2874 _MM_FROUND_CUR_DIRECTION);
2877 extern __inline __m128
2878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2879 _mm_mask_sub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
2881 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
2882 (__v4sf) __B,
2883 (__v4sf) __W,
2884 (__mmask8) __U,
2885 _MM_FROUND_CUR_DIRECTION);
2888 extern __inline __m128
2889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2890 _mm_maskz_sub_ss (__mmask8 __U, __m128 __A, __m128 __B)
2892 return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
2893 (__v4sf) __B,
2894 (__v4sf)
2895 _mm_avx512_setzero_ps (),
2896 (__mmask8) __U,
2897 _MM_FROUND_CUR_DIRECTION);
2900 extern __inline __m128d
2901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2902 _mm_mask_mul_sd (__m128d __W, __mmask8 __U, __m128d __A,
2903 __m128d __B)
2905 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
2906 (__v2df) __B,
2907 (__v2df) __W,
2908 (__mmask8) __U,
2909 _MM_FROUND_CUR_DIRECTION);
2912 extern __inline __m128d
2913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2914 _mm_maskz_mul_sd (__mmask8 __U, __m128d __A, __m128d __B)
2916 return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
2917 (__v2df) __B,
2918 (__v2df)
2919 _mm_avx512_setzero_pd (),
2920 (__mmask8) __U,
2921 _MM_FROUND_CUR_DIRECTION);
2924 extern __inline __m128
2925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2926 _mm_mask_mul_ss (__m128 __W, __mmask8 __U, __m128 __A,
2927 __m128 __B)
2929 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
2930 (__v4sf) __B,
2931 (__v4sf) __W,
2932 (__mmask8) __U,
2933 _MM_FROUND_CUR_DIRECTION);
2936 extern __inline __m128
2937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2938 _mm_maskz_mul_ss (__mmask8 __U, __m128 __A, __m128 __B)
2940 return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
2941 (__v4sf) __B,
2942 (__v4sf)
2943 _mm_avx512_setzero_ps (),
2944 (__mmask8) __U,
2945 _MM_FROUND_CUR_DIRECTION);
2948 extern __inline __m128d
2949 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2950 _mm_mask_div_sd (__m128d __W, __mmask8 __U, __m128d __A,
2951 __m128d __B)
2953 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
2954 (__v2df) __B,
2955 (__v2df) __W,
2956 (__mmask8) __U,
2957 _MM_FROUND_CUR_DIRECTION);
2960 extern __inline __m128d
2961 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2962 _mm_maskz_div_sd (__mmask8 __U, __m128d __A, __m128d __B)
2964 return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
2965 (__v2df) __B,
2966 (__v2df)
2967 _mm_avx512_setzero_pd (),
2968 (__mmask8) __U,
2969 _MM_FROUND_CUR_DIRECTION);
2972 extern __inline __m128
2973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2974 _mm_mask_div_ss (__m128 __W, __mmask8 __U, __m128 __A,
2975 __m128 __B)
2977 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
2978 (__v4sf) __B,
2979 (__v4sf) __W,
2980 (__mmask8) __U,
2981 _MM_FROUND_CUR_DIRECTION);
2984 extern __inline __m128
2985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2986 _mm_maskz_div_ss (__mmask8 __U, __m128 __A, __m128 __B)
2988 return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
2989 (__v4sf) __B,
2990 (__v4sf)
2991 _mm_avx512_setzero_ps (),
2992 (__mmask8) __U,
2993 _MM_FROUND_CUR_DIRECTION);
2996 extern __inline __m128d
2997 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2998 _mm_mask_max_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
3000 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
3001 (__v2df) __B,
3002 (__v2df) __W,
3003 (__mmask8) __U,
3004 _MM_FROUND_CUR_DIRECTION);
3007 extern __inline __m128d
3008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3009 _mm_maskz_max_sd (__mmask8 __U, __m128d __A, __m128d __B)
3011 return (__m128d) __builtin_ia32_maxsd_mask_round ((__v2df) __A,
3012 (__v2df) __B,
3013 (__v2df)
3014 _mm_avx512_setzero_pd (),
3015 (__mmask8) __U,
3016 _MM_FROUND_CUR_DIRECTION);
3019 extern __inline __m128
3020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3021 _mm_mask_max_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
3023 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
3024 (__v4sf) __B,
3025 (__v4sf) __W,
3026 (__mmask8) __U,
3027 _MM_FROUND_CUR_DIRECTION);
3030 extern __inline __m128
3031 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3032 _mm_maskz_max_ss (__mmask8 __U, __m128 __A, __m128 __B)
3034 return (__m128) __builtin_ia32_maxss_mask_round ((__v4sf) __A,
3035 (__v4sf) __B,
3036 (__v4sf)
3037 _mm_avx512_setzero_ps (),
3038 (__mmask8) __U,
3039 _MM_FROUND_CUR_DIRECTION);
3042 extern __inline __m128d
3043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3044 _mm_mask_min_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
3046 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
3047 (__v2df) __B,
3048 (__v2df) __W,
3049 (__mmask8) __U,
3050 _MM_FROUND_CUR_DIRECTION);
3053 extern __inline __m128d
3054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3055 _mm_maskz_min_sd (__mmask8 __U, __m128d __A, __m128d __B)
3057 return (__m128d) __builtin_ia32_minsd_mask_round ((__v2df) __A,
3058 (__v2df) __B,
3059 (__v2df)
3060 _mm_avx512_setzero_pd (),
3061 (__mmask8) __U,
3062 _MM_FROUND_CUR_DIRECTION);
3065 extern __inline __m128
3066 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3067 _mm_mask_min_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
3069 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
3070 (__v4sf) __B,
3071 (__v4sf) __W,
3072 (__mmask8) __U,
3073 _MM_FROUND_CUR_DIRECTION);
3076 extern __inline __m128
3077 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3078 _mm_maskz_min_ss (__mmask8 __U, __m128 __A, __m128 __B)
3080 return (__m128) __builtin_ia32_minss_mask_round ((__v4sf) __A,
3081 (__v4sf) __B,
3082 (__v4sf)
3083 _mm_avx512_setzero_ps (),
3084 (__mmask8) __U,
3085 _MM_FROUND_CUR_DIRECTION);
3088 extern __inline __m128d
3089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3090 _mm_scalef_sd (__m128d __A, __m128d __B)
3092 return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A,
3093 (__v2df) __B,
3094 (__v2df)
3095 _mm_avx512_setzero_pd (),
3096 (__mmask8) -1,
3097 _MM_FROUND_CUR_DIRECTION);
3100 extern __inline __m128
3101 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3102 _mm_scalef_ss (__m128 __A, __m128 __B)
3104 return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A,
3105 (__v4sf) __B,
3106 (__v4sf)
3107 _mm_avx512_setzero_ps (),
3108 (__mmask8) -1,
3109 _MM_FROUND_CUR_DIRECTION);
3112 #ifdef __x86_64__
3113 extern __inline __m128
3114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3115 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
3117 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B,
3118 _MM_FROUND_CUR_DIRECTION);
3121 extern __inline __m128d
3122 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3123 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
3125 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B,
3126 _MM_FROUND_CUR_DIRECTION);
3128 #endif
3130 extern __inline __m128
3131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3132 _mm_cvtu32_ss (__m128 __A, unsigned __B)
3134 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B,
3135 _MM_FROUND_CUR_DIRECTION);
3138 #ifdef __OPTIMIZE__
3139 extern __inline __m128d
3140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3141 _mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm)
3143 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
3144 (__v2df) __B,
3145 (__v2di) __C, __imm,
3146 (__mmask8) -1,
3147 _MM_FROUND_CUR_DIRECTION);
3150 extern __inline __m128d
3151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3152 _mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
3153 __m128i __C, const int __imm)
3155 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A,
3156 (__v2df) __B,
3157 (__v2di) __C, __imm,
3158 (__mmask8) __U,
3159 _MM_FROUND_CUR_DIRECTION);
3162 extern __inline __m128d
3163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3164 _mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B,
3165 __m128i __C, const int __imm)
3167 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A,
3168 (__v2df) __B,
3169 (__v2di) __C,
3170 __imm,
3171 (__mmask8) __U,
3172 _MM_FROUND_CUR_DIRECTION);
3175 extern __inline __m128
3176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3177 _mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm)
3179 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
3180 (__v4sf) __B,
3181 (__v4si) __C, __imm,
3182 (__mmask8) -1,
3183 _MM_FROUND_CUR_DIRECTION);
3186 extern __inline __m128
3187 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3188 _mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
3189 __m128i __C, const int __imm)
3191 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A,
3192 (__v4sf) __B,
3193 (__v4si) __C, __imm,
3194 (__mmask8) __U,
3195 _MM_FROUND_CUR_DIRECTION);
3198 extern __inline __m128
3199 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3200 _mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B,
3201 __m128i __C, const int __imm)
3203 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A,
3204 (__v4sf) __B,
3205 (__v4si) __C, __imm,
3206 (__mmask8) __U,
3207 _MM_FROUND_CUR_DIRECTION);
3210 #else
3211 #define _mm_fixupimm_sd(X, Y, Z, C) \
3212 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
3213 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
3214 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
3216 #define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \
3217 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \
3218 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
3219 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
3221 #define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \
3222 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \
3223 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \
3224 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
3226 #define _mm_fixupimm_ss(X, Y, Z, C) \
3227 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
3228 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
3229 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
3231 #define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \
3232 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \
3233 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
3234 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
3236 #define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \
3237 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \
3238 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \
3239 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
3241 #endif
3243 #ifdef __x86_64__
3244 extern __inline unsigned long long
3245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3246 _mm_cvtss_u64 (__m128 __A)
3248 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
3249 __A,
3250 _MM_FROUND_CUR_DIRECTION);
3253 extern __inline unsigned long long
3254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3255 _mm_cvttss_u64 (__m128 __A)
3257 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
3258 __A,
3259 _MM_FROUND_CUR_DIRECTION);
3262 extern __inline long long
3263 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3264 _mm_cvttss_i64 (__m128 __A)
3266 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
3267 _MM_FROUND_CUR_DIRECTION);
3269 #endif /* __x86_64__ */
3271 extern __inline unsigned
3272 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3273 _mm_cvtss_u32 (__m128 __A)
3275 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
3276 _MM_FROUND_CUR_DIRECTION);
3279 extern __inline unsigned
3280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3281 _mm_cvttss_u32 (__m128 __A)
3283 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
3284 _MM_FROUND_CUR_DIRECTION);
3287 extern __inline int
3288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3289 _mm_cvttss_i32 (__m128 __A)
3291 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
3292 _MM_FROUND_CUR_DIRECTION);
3295 extern __inline int
3296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3297 _mm_cvtsd_i32 (__m128d __A)
3299 return (int) __builtin_ia32_cvtsd2si ((__v2df) __A);
3302 extern __inline int
3303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3304 _mm_cvtss_i32 (__m128 __A)
3306 return (int) __builtin_ia32_cvtss2si ((__v4sf) __A);
3309 extern __inline __m128d
3310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3311 _mm_cvti32_sd (__m128d __A, int __B)
3313 return (__m128d) __builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
3316 extern __inline __m128
3317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3318 _mm_cvti32_ss (__m128 __A, int __B)
3320 return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B);
3323 #ifdef __x86_64__
3324 extern __inline unsigned long long
3325 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3326 _mm_cvtsd_u64 (__m128d __A)
3328 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
3329 __A,
3330 _MM_FROUND_CUR_DIRECTION);
3333 extern __inline unsigned long long
3334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3335 _mm_cvttsd_u64 (__m128d __A)
3337 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
3338 __A,
3339 _MM_FROUND_CUR_DIRECTION);
3342 extern __inline long long
3343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3344 _mm_cvttsd_i64 (__m128d __A)
3346 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
3347 _MM_FROUND_CUR_DIRECTION);
3350 extern __inline long long
3351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3352 _mm_cvtsd_i64 (__m128d __A)
3354 return (long long) __builtin_ia32_cvtsd2si64 ((__v2df) __A);
3357 extern __inline long long
3358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3359 _mm_cvtss_i64 (__m128 __A)
3361 return (long long) __builtin_ia32_cvtss2si64 ((__v4sf) __A);
3364 extern __inline __m128d
3365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3366 _mm_cvti64_sd (__m128d __A, long long __B)
3368 return (__m128d) __builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
3371 extern __inline __m128
3372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3373 _mm_cvti64_ss (__m128 __A, long long __B)
3375 return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
3377 #endif /* __x86_64__ */
3379 extern __inline unsigned
3380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3381 _mm_cvtsd_u32 (__m128d __A)
3383 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
3384 _MM_FROUND_CUR_DIRECTION);
3387 extern __inline unsigned
3388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3389 _mm_cvttsd_u32 (__m128d __A)
3391 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
3392 _MM_FROUND_CUR_DIRECTION);
3395 extern __inline int
3396 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3397 _mm_cvttsd_i32 (__m128d __A)
3399 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
3400 _MM_FROUND_CUR_DIRECTION);
3403 #ifdef __OPTIMIZE__
3404 extern __inline __m128
3405 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3406 _mm_getexp_ss (__m128 __A, __m128 __B)
3408 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A,
3409 (__v4sf) __B,
3410 _MM_FROUND_CUR_DIRECTION);
3413 extern __inline __m128
3414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3415 _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
3417 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
3418 (__v4sf) __B,
3419 (__v4sf) __W,
3420 (__mmask8) __U,
3421 _MM_FROUND_CUR_DIRECTION);
3424 extern __inline __m128
3425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3426 _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
3428 return (__m128) __builtin_ia32_getexpss_mask_round ((__v4sf) __A,
3429 (__v4sf) __B,
3430 (__v4sf)
3431 _mm_avx512_setzero_ps (),
3432 (__mmask8) __U,
3433 _MM_FROUND_CUR_DIRECTION);
3436 extern __inline __m128d
3437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3438 _mm_getexp_sd (__m128d __A, __m128d __B)
3440 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A,
3441 (__v2df) __B,
3442 _MM_FROUND_CUR_DIRECTION);
3445 extern __inline __m128d
3446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3447 _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
3449 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
3450 (__v2df) __B,
3451 (__v2df) __W,
3452 (__mmask8) __U,
3453 _MM_FROUND_CUR_DIRECTION);
3456 extern __inline __m128d
3457 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3458 _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
3460 return (__m128d) __builtin_ia32_getexpsd_mask_round ((__v2df) __A,
3461 (__v2df) __B,
3462 (__v2df)
3463 _mm_avx512_setzero_pd (),
3464 (__mmask8) __U,
3465 _MM_FROUND_CUR_DIRECTION);
3468 extern __inline __m128d
3469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3470 _mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C,
3471 _MM_MANTISSA_SIGN_ENUM __D)
3473 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A,
3474 (__v2df) __B,
3475 (__D << 2) | __C,
3476 _MM_FROUND_CUR_DIRECTION);
3479 extern __inline __m128d
3480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3481 _mm_mask_getmant_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
3482 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
3484 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
3485 (__v2df) __B,
3486 (__D << 2) | __C,
3487 (__v2df) __W,
3488 __U,
3489 _MM_FROUND_CUR_DIRECTION);
3492 extern __inline __m128d
3493 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3494 _mm_maskz_getmant_sd (__mmask8 __U, __m128d __A, __m128d __B,
3495 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
3497 return (__m128d) __builtin_ia32_getmantsd_mask_round ((__v2df) __A,
3498 (__v2df) __B,
3499 (__D << 2) | __C,
3500 (__v2df)
3501 _mm_avx512_setzero_pd(),
3502 __U,
3503 _MM_FROUND_CUR_DIRECTION);
3506 extern __inline __m128
3507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3508 _mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C,
3509 _MM_MANTISSA_SIGN_ENUM __D)
3511 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A,
3512 (__v4sf) __B,
3513 (__D << 2) | __C,
3514 _MM_FROUND_CUR_DIRECTION);
3517 extern __inline __m128
3518 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3519 _mm_mask_getmant_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
3520 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
3522 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
3523 (__v4sf) __B,
3524 (__D << 2) | __C,
3525 (__v4sf) __W,
3526 __U,
3527 _MM_FROUND_CUR_DIRECTION);
3530 extern __inline __m128
3531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3532 _mm_maskz_getmant_ss (__mmask8 __U, __m128 __A, __m128 __B,
3533 _MM_MANTISSA_NORM_ENUM __C, _MM_MANTISSA_SIGN_ENUM __D)
3535 return (__m128) __builtin_ia32_getmantss_mask_round ((__v4sf) __A,
3536 (__v4sf) __B,
3537 (__D << 2) | __C,
3538 (__v4sf)
3539 _mm_avx512_setzero_ps(),
3540 __U,
3541 _MM_FROUND_CUR_DIRECTION);
3544 extern __inline __m128
3545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3546 _mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm)
3548 return (__m128)
3549 __builtin_ia32_rndscaless_mask_round ((__v4sf) __A,
3550 (__v4sf) __B, __imm,
3551 (__v4sf)
3552 _mm_avx512_setzero_ps (),
3553 (__mmask8) -1,
3554 _MM_FROUND_CUR_DIRECTION);
3558 extern __inline __m128
3559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3560 _mm_mask_roundscale_ss (__m128 __A, __mmask8 __B, __m128 __C, __m128 __D,
3561 const int __imm)
3563 return (__m128)
3564 __builtin_ia32_rndscaless_mask_round ((__v4sf) __C,
3565 (__v4sf) __D, __imm,
3566 (__v4sf) __A,
3567 (__mmask8) __B,
3568 _MM_FROUND_CUR_DIRECTION);
3571 extern __inline __m128
3572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3573 _mm_maskz_roundscale_ss (__mmask8 __A, __m128 __B, __m128 __C,
3574 const int __imm)
3576 return (__m128)
3577 __builtin_ia32_rndscaless_mask_round ((__v4sf) __B,
3578 (__v4sf) __C, __imm,
3579 (__v4sf)
3580 _mm_avx512_setzero_ps (),
3581 (__mmask8) __A,
3582 _MM_FROUND_CUR_DIRECTION);
3585 extern __inline __m128d
3586 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3587 _mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm)
3589 return (__m128d)
3590 __builtin_ia32_rndscalesd_mask_round ((__v2df) __A,
3591 (__v2df) __B, __imm,
3592 (__v2df)
3593 _mm_avx512_setzero_pd (),
3594 (__mmask8) -1,
3595 _MM_FROUND_CUR_DIRECTION);
3598 extern __inline __m128d
3599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3600 _mm_mask_roundscale_sd (__m128d __A, __mmask8 __B, __m128d __C, __m128d __D,
3601 const int __imm)
3603 return (__m128d)
3604 __builtin_ia32_rndscalesd_mask_round ((__v2df) __C,
3605 (__v2df) __D, __imm,
3606 (__v2df) __A,
3607 (__mmask8) __B,
3608 _MM_FROUND_CUR_DIRECTION);
3611 extern __inline __m128d
3612 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3613 _mm_maskz_roundscale_sd (__mmask8 __A, __m128d __B, __m128d __C,
3614 const int __imm)
3616 return (__m128d)
3617 __builtin_ia32_rndscalesd_mask_round ((__v2df) __B,
3618 (__v2df) __C, __imm,
3619 (__v2df)
3620 _mm_avx512_setzero_pd (),
3621 (__mmask8) __A,
3622 _MM_FROUND_CUR_DIRECTION);
3625 extern __inline __mmask8
3626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3627 _mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P)
3629 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
3630 (__v2df) __Y, __P,
3631 (__mmask8) -1,
3632 _MM_FROUND_CUR_DIRECTION);
3635 extern __inline __mmask8
3636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3637 _mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P)
3639 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X,
3640 (__v2df) __Y, __P,
3641 (__mmask8) __M,
3642 _MM_FROUND_CUR_DIRECTION);
3645 extern __inline __mmask8
3646 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3647 _mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P)
3649 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
3650 (__v4sf) __Y, __P,
3651 (__mmask8) -1,
3652 _MM_FROUND_CUR_DIRECTION);
3655 extern __inline __mmask8
3656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3657 _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
3659 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X,
3660 (__v4sf) __Y, __P,
3661 (__mmask8) __M,
3662 _MM_FROUND_CUR_DIRECTION);
3665 #else
3666 #define _mm_getmant_sd(X, Y, C, D) \
3667 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \
3668 (__v2df)(__m128d)(Y), \
3669 (int)(((D)<<2) | (C)), \
3670 _MM_FROUND_CUR_DIRECTION))
3672 #define _mm_mask_getmant_sd(W, U, X, Y, C, D) \
3673 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
3674 (__v2df)(__m128d)(Y), \
3675 (int)(((D)<<2) | (C)), \
3676 (__v2df)(__m128d)(W), \
3677 (__mmask8)(U),\
3678 _MM_FROUND_CUR_DIRECTION))
3680 #define _mm_maskz_getmant_sd(U, X, Y, C, D) \
3681 ((__m128d)__builtin_ia32_getmantsd_mask_round ((__v2df)(__m128d)(X), \
3682 (__v2df)(__m128d)(Y), \
3683 (int)(((D)<<2) | (C)), \
3684 (__v2df)_mm_avx512_setzero_pd(), \
3685 (__mmask8)(U),\
3686 _MM_FROUND_CUR_DIRECTION))
3688 #define _mm_getmant_ss(X, Y, C, D) \
3689 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \
3690 (__v4sf)(__m128)(Y), \
3691 (int)(((D)<<2) | (C)), \
3692 _MM_FROUND_CUR_DIRECTION))
3694 #define _mm_mask_getmant_ss(W, U, X, Y, C, D) \
3695 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
3696 (__v4sf)(__m128)(Y), \
3697 (int)(((D)<<2) | (C)), \
3698 (__v4sf)(__m128)(W), \
3699 (__mmask8)(U),\
3700 _MM_FROUND_CUR_DIRECTION))
3702 #define _mm_maskz_getmant_ss(U, X, Y, C, D) \
3703 ((__m128)__builtin_ia32_getmantss_mask_round ((__v4sf)(__m128)(X), \
3704 (__v4sf)(__m128)(Y), \
3705 (int)(((D)<<2) | (C)), \
3706 (__v4sf)_mm_avx512_setzero_ps(), \
3707 (__mmask8)(U),\
3708 _MM_FROUND_CUR_DIRECTION))
3710 #define _mm_getexp_ss(A, B) \
3711 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
3712 _MM_FROUND_CUR_DIRECTION))
3714 #define _mm_mask_getexp_ss(W, U, A, B) \
3715 (__m128)__builtin_ia32_getexpss_mask_round(A, B, W, U,\
3716 _MM_FROUND_CUR_DIRECTION)
3718 #define _mm_maskz_getexp_ss(U, A, B) \
3719 (__m128)__builtin_ia32_getexpss_mask_round(A, B, (__v4sf)_mm_avx512_setzero_ps(), U,\
3720 _MM_FROUND_CUR_DIRECTION)
3722 #define _mm_getexp_sd(A, B) \
3723 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\
3724 _MM_FROUND_CUR_DIRECTION))
3726 #define _mm_mask_getexp_sd(W, U, A, B) \
3727 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, W, U,\
3728 _MM_FROUND_CUR_DIRECTION)
3730 #define _mm_maskz_getexp_sd(U, A, B) \
3731 (__m128d)__builtin_ia32_getexpsd_mask_round(A, B, (__v2df)_mm_avx512_setzero_pd(), U,\
3732 _MM_FROUND_CUR_DIRECTION)
3734 #define _mm_roundscale_ss(A, B, I) \
3735 ((__m128) \
3736 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \
3737 (__v4sf) (__m128) (B), \
3738 (int) (I), \
3739 (__v4sf) _mm_avx512_setzero_ps (), \
3740 (__mmask8) (-1), \
3741 _MM_FROUND_CUR_DIRECTION))
3742 #define _mm_mask_roundscale_ss(A, U, B, C, I) \
3743 ((__m128) \
3744 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (B), \
3745 (__v4sf) (__m128) (C), \
3746 (int) (I), \
3747 (__v4sf) (__m128) (A), \
3748 (__mmask8) (U), \
3749 _MM_FROUND_CUR_DIRECTION))
3750 #define _mm_maskz_roundscale_ss(U, A, B, I) \
3751 ((__m128) \
3752 __builtin_ia32_rndscaless_mask_round ((__v4sf) (__m128) (A), \
3753 (__v4sf) (__m128) (B), \
3754 (int) (I), \
3755 (__v4sf) _mm_avx512_setzero_ps (), \
3756 (__mmask8) (U), \
3757 _MM_FROUND_CUR_DIRECTION))
3758 #define _mm_roundscale_sd(A, B, I) \
3759 ((__m128d) \
3760 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \
3761 (__v2df) (__m128d) (B), \
3762 (int) (I), \
3763 (__v2df) _mm_avx512_setzero_pd (), \
3764 (__mmask8) (-1), \
3765 _MM_FROUND_CUR_DIRECTION))
3766 #define _mm_mask_roundscale_sd(A, U, B, C, I) \
3767 ((__m128d) \
3768 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (B), \
3769 (__v2df) (__m128d) (C), \
3770 (int) (I), \
3771 (__v2df) (__m128d) (A), \
3772 (__mmask8) (U), \
3773 _MM_FROUND_CUR_DIRECTION))
3774 #define _mm_maskz_roundscale_sd(U, A, B, I) \
3775 ((__m128d) \
3776 __builtin_ia32_rndscalesd_mask_round ((__v2df) (__m128d) (A), \
3777 (__v2df) (__m128d) (B), \
3778 (int) (I), \
3779 (__v2df) _mm_avx512_setzero_pd (), \
3780 (__mmask8) (U), \
3781 _MM_FROUND_CUR_DIRECTION))
3783 #define _mm_cmp_sd_mask(X, Y, P) \
3784 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
3785 (__v2df)(__m128d)(Y), (int)(P),\
3786 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
3788 #define _mm_mask_cmp_sd_mask(M, X, Y, P) \
3789 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \
3790 (__v2df)(__m128d)(Y), (int)(P),\
3791 M,_MM_FROUND_CUR_DIRECTION))
3793 #define _mm_cmp_ss_mask(X, Y, P) \
3794 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
3795 (__v4sf)(__m128)(Y), (int)(P), \
3796 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
3798 #define _mm_mask_cmp_ss_mask(M, X, Y, P) \
3799 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \
3800 (__v4sf)(__m128)(Y), (int)(P), \
3801 M,_MM_FROUND_CUR_DIRECTION))
3803 #endif
3805 #ifdef __DISABLE_AVX512F__
3806 #undef __DISABLE_AVX512F__
3807 #pragma GCC pop_options
3808 #endif /* __DISABLE_AVX512F__ */
3810 #if !defined (__AVX512F__) || !defined (__EVEX512__)
3811 #pragma GCC push_options
3812 #pragma GCC target("avx512f,evex512")
3813 #define __DISABLE_AVX512F_512__
3814 #endif /* __AVX512F_512__ */
3816 /* Internal data types for implementing the intrinsics. */
3817 typedef double __v8df __attribute__ ((__vector_size__ (64)));
3818 typedef float __v16sf __attribute__ ((__vector_size__ (64)));
3819 typedef long long __v8di __attribute__ ((__vector_size__ (64)));
3820 typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64)));
3821 typedef int __v16si __attribute__ ((__vector_size__ (64)));
3822 typedef unsigned int __v16su __attribute__ ((__vector_size__ (64)));
3823 typedef short __v32hi __attribute__ ((__vector_size__ (64)));
3824 typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
3825 typedef char __v64qi __attribute__ ((__vector_size__ (64)));
3826 typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
3828 /* The Intel API is flexible enough that we must allow aliasing with other
3829 vector types, and their scalar components. */
3830 typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
3831 typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
3832 typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
3834 /* Unaligned version of the same type. */
3835 typedef float __m512_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
3836 typedef long long __m512i_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
3837 typedef double __m512d_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
3839 extern __inline __mmask16
3840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3841 _mm512_int2mask (int __M)
3843 return (__mmask16) __M;
3846 extern __inline int
3847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3848 _mm512_mask2int (__mmask16 __M)
3850 return (int) __M;
3853 extern __inline __m512i
3854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3855 _mm512_set_epi64 (long long __A, long long __B, long long __C,
3856 long long __D, long long __E, long long __F,
3857 long long __G, long long __H)
3859 return __extension__ (__m512i) (__v8di)
3860 { __H, __G, __F, __E, __D, __C, __B, __A };
3863 /* Create the vector [A B C D E F G H I J K L M N O P]. */
3864 extern __inline __m512i
3865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3866 _mm512_set_epi32 (int __A, int __B, int __C, int __D,
3867 int __E, int __F, int __G, int __H,
3868 int __I, int __J, int __K, int __L,
3869 int __M, int __N, int __O, int __P)
3871 return __extension__ (__m512i)(__v16si)
3872 { __P, __O, __N, __M, __L, __K, __J, __I,
3873 __H, __G, __F, __E, __D, __C, __B, __A };
3876 extern __inline __m512i
3877 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
3878 _mm512_set_epi16 (short __q31, short __q30, short __q29, short __q28,
3879 short __q27, short __q26, short __q25, short __q24,
3880 short __q23, short __q22, short __q21, short __q20,
3881 short __q19, short __q18, short __q17, short __q16,
3882 short __q15, short __q14, short __q13, short __q12,
3883 short __q11, short __q10, short __q09, short __q08,
3884 short __q07, short __q06, short __q05, short __q04,
3885 short __q03, short __q02, short __q01, short __q00)
3887 return __extension__ (__m512i)(__v32hi){
3888 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
3889 __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15,
3890 __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23,
3891 __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31
3895 extern __inline __m512i
3896 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
3897 _mm512_set_epi8 (char __q63, char __q62, char __q61, char __q60,
3898 char __q59, char __q58, char __q57, char __q56,
3899 char __q55, char __q54, char __q53, char __q52,
3900 char __q51, char __q50, char __q49, char __q48,
3901 char __q47, char __q46, char __q45, char __q44,
3902 char __q43, char __q42, char __q41, char __q40,
3903 char __q39, char __q38, char __q37, char __q36,
3904 char __q35, char __q34, char __q33, char __q32,
3905 char __q31, char __q30, char __q29, char __q28,
3906 char __q27, char __q26, char __q25, char __q24,
3907 char __q23, char __q22, char __q21, char __q20,
3908 char __q19, char __q18, char __q17, char __q16,
3909 char __q15, char __q14, char __q13, char __q12,
3910 char __q11, char __q10, char __q09, char __q08,
3911 char __q07, char __q06, char __q05, char __q04,
3912 char __q03, char __q02, char __q01, char __q00)
3914 return __extension__ (__m512i)(__v64qi){
3915 __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
3916 __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15,
3917 __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23,
3918 __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31,
3919 __q32, __q33, __q34, __q35, __q36, __q37, __q38, __q39,
3920 __q40, __q41, __q42, __q43, __q44, __q45, __q46, __q47,
3921 __q48, __q49, __q50, __q51, __q52, __q53, __q54, __q55,
3922 __q56, __q57, __q58, __q59, __q60, __q61, __q62, __q63
3926 extern __inline __m512d
3927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3928 _mm512_set_pd (double __A, double __B, double __C, double __D,
3929 double __E, double __F, double __G, double __H)
3931 return __extension__ (__m512d)
3932 { __H, __G, __F, __E, __D, __C, __B, __A };
3935 extern __inline __m512
3936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3937 _mm512_set_ps (float __A, float __B, float __C, float __D,
3938 float __E, float __F, float __G, float __H,
3939 float __I, float __J, float __K, float __L,
3940 float __M, float __N, float __O, float __P)
3942 return __extension__ (__m512)
3943 { __P, __O, __N, __M, __L, __K, __J, __I,
3944 __H, __G, __F, __E, __D, __C, __B, __A };
3947 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
3948 _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
3950 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
3951 e8,e9,e10,e11,e12,e13,e14,e15) \
3952 _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
3954 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
3955 _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
3957 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
3958 _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
3960 extern __inline __m512
3961 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3962 _mm512_undefined_ps (void)
3964 #pragma GCC diagnostic push
3965 #pragma GCC diagnostic ignored "-Winit-self"
3966 __m512 __Y = __Y;
3967 #pragma GCC diagnostic pop
3968 return __Y;
3971 #define _mm512_undefined _mm512_undefined_ps
3973 extern __inline __m512d
3974 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3975 _mm512_undefined_pd (void)
3977 #pragma GCC diagnostic push
3978 #pragma GCC diagnostic ignored "-Winit-self"
3979 __m512d __Y = __Y;
3980 #pragma GCC diagnostic pop
3981 return __Y;
3984 extern __inline __m512i
3985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3986 _mm512_undefined_epi32 (void)
3988 #pragma GCC diagnostic push
3989 #pragma GCC diagnostic ignored "-Winit-self"
3990 __m512i __Y = __Y;
3991 #pragma GCC diagnostic pop
3992 return __Y;
3995 #define _mm512_undefined_si512 _mm512_undefined_epi32
3997 extern __inline __m512i
3998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3999 _mm512_set1_epi8 (char __A)
4001 return __extension__ (__m512i)(__v64qi)
4002 { __A, __A, __A, __A, __A, __A, __A, __A,
4003 __A, __A, __A, __A, __A, __A, __A, __A,
4004 __A, __A, __A, __A, __A, __A, __A, __A,
4005 __A, __A, __A, __A, __A, __A, __A, __A,
4006 __A, __A, __A, __A, __A, __A, __A, __A,
4007 __A, __A, __A, __A, __A, __A, __A, __A,
4008 __A, __A, __A, __A, __A, __A, __A, __A,
4009 __A, __A, __A, __A, __A, __A, __A, __A };
4012 extern __inline __m512i
4013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4014 _mm512_set1_epi16 (short __A)
4016 return __extension__ (__m512i)(__v32hi)
4017 { __A, __A, __A, __A, __A, __A, __A, __A,
4018 __A, __A, __A, __A, __A, __A, __A, __A,
4019 __A, __A, __A, __A, __A, __A, __A, __A,
4020 __A, __A, __A, __A, __A, __A, __A, __A };
4023 extern __inline __m512d
4024 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4025 _mm512_set1_pd (double __A)
4027 return __extension__ (__m512d)(__v8df)
4028 { __A, __A, __A, __A, __A, __A, __A, __A };
4031 extern __inline __m512
4032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4033 _mm512_set1_ps (float __A)
4035 return __extension__ (__m512)(__v16sf)
4036 { __A, __A, __A, __A, __A, __A, __A, __A,
4037 __A, __A, __A, __A, __A, __A, __A, __A };
4040 /* Create the vector [A B C D A B C D A B C D A B C D]. */
4041 extern __inline __m512i
4042 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4043 _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
4045 return __extension__ (__m512i)(__v16si)
4046 { __D, __C, __B, __A, __D, __C, __B, __A,
4047 __D, __C, __B, __A, __D, __C, __B, __A };
4050 extern __inline __m512i
4051 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4052 _mm512_set4_epi64 (long long __A, long long __B, long long __C,
4053 long long __D)
4055 return __extension__ (__m512i) (__v8di)
4056 { __D, __C, __B, __A, __D, __C, __B, __A };
4059 extern __inline __m512d
4060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4061 _mm512_set4_pd (double __A, double __B, double __C, double __D)
4063 return __extension__ (__m512d)
4064 { __D, __C, __B, __A, __D, __C, __B, __A };
4067 extern __inline __m512
4068 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4069 _mm512_set4_ps (float __A, float __B, float __C, float __D)
4071 return __extension__ (__m512)
4072 { __D, __C, __B, __A, __D, __C, __B, __A,
4073 __D, __C, __B, __A, __D, __C, __B, __A };
4076 #define _mm512_setr4_epi64(e0,e1,e2,e3) \
4077 _mm512_set4_epi64(e3,e2,e1,e0)
4079 #define _mm512_setr4_epi32(e0,e1,e2,e3) \
4080 _mm512_set4_epi32(e3,e2,e1,e0)
4082 #define _mm512_setr4_pd(e0,e1,e2,e3) \
4083 _mm512_set4_pd(e3,e2,e1,e0)
4085 #define _mm512_setr4_ps(e0,e1,e2,e3) \
4086 _mm512_set4_ps(e3,e2,e1,e0)
4088 extern __inline __m512
4089 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4090 _mm512_setzero_ps (void)
4092 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
4093 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
4096 extern __inline __m512
4097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4098 _mm512_setzero (void)
4100 return _mm512_setzero_ps ();
4103 extern __inline __m512d
4104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4105 _mm512_setzero_pd (void)
4107 return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
4110 extern __inline __m512i
4111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4112 _mm512_setzero_epi32 (void)
4114 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
4117 extern __inline __m512i
4118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4119 _mm512_setzero_si512 (void)
4121 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
4124 extern __inline __m512d
4125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4126 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
4128 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
4129 (__v8df) __W,
4130 (__mmask8) __U);
4133 extern __inline __m512d
4134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4135 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
4137 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
4138 (__v8df)
4139 _mm512_setzero_pd (),
4140 (__mmask8) __U);
4143 extern __inline __m512
4144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4145 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
4147 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
4148 (__v16sf) __W,
4149 (__mmask16) __U);
4152 extern __inline __m512
4153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4154 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
4156 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
4157 (__v16sf)
4158 _mm512_setzero_ps (),
4159 (__mmask16) __U);
4162 extern __inline __m512d
4163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4164 _mm512_load_pd (void const *__P)
4166 return *(__m512d *) __P;
4169 extern __inline __m512d
4170 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4171 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
4173 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4174 (__v8df) __W,
4175 (__mmask8) __U);
4178 extern __inline __m512d
4179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4180 _mm512_maskz_load_pd (__mmask8 __U, void const *__P)
4182 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4183 (__v8df)
4184 _mm512_setzero_pd (),
4185 (__mmask8) __U);
4188 extern __inline void
4189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4190 _mm512_store_pd (void *__P, __m512d __A)
4192 *(__m512d *) __P = __A;
4195 extern __inline void
4196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4197 _mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
4199 __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
4200 (__mmask8) __U);
4203 extern __inline __m512
4204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4205 _mm512_load_ps (void const *__P)
4207 return *(__m512 *) __P;
4210 extern __inline __m512
4211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4212 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
4214 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4215 (__v16sf) __W,
4216 (__mmask16) __U);
4219 extern __inline __m512
4220 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4221 _mm512_maskz_load_ps (__mmask16 __U, void const *__P)
4223 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4224 (__v16sf)
4225 _mm512_setzero_ps (),
4226 (__mmask16) __U);
4229 extern __inline void
4230 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4231 _mm512_store_ps (void *__P, __m512 __A)
4233 *(__m512 *) __P = __A;
4236 extern __inline void
4237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4238 _mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
4240 __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
4241 (__mmask16) __U);
4244 extern __inline __m512i
4245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4246 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
4248 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
4249 (__v8di) __W,
4250 (__mmask8) __U);
4253 extern __inline __m512i
4254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4255 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
4257 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
4258 (__v8di)
4259 _mm512_setzero_si512 (),
4260 (__mmask8) __U);
4263 extern __inline __m512i
4264 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4265 _mm512_load_epi64 (void const *__P)
4267 return *(__m512i *) __P;
4270 extern __inline __m512i
4271 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4272 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4274 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
4275 (__v8di) __W,
4276 (__mmask8) __U);
4279 extern __inline __m512i
4280 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4281 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
4283 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
4284 (__v8di)
4285 _mm512_setzero_si512 (),
4286 (__mmask8) __U);
4289 extern __inline void
4290 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4291 _mm512_store_epi64 (void *__P, __m512i __A)
4293 *(__m512i *) __P = __A;
4296 extern __inline void
4297 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4298 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
4300 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
4301 (__mmask8) __U);
4304 extern __inline __m512i
4305 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4306 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
4308 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
4309 (__v16si) __W,
4310 (__mmask16) __U);
4313 extern __inline __m512i
4314 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4315 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
4317 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
4318 (__v16si)
4319 _mm512_setzero_si512 (),
4320 (__mmask16) __U);
4323 extern __inline __m512i
4324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4325 _mm512_load_si512 (void const *__P)
4327 return *(__m512i *) __P;
4330 extern __inline __m512i
4331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4332 _mm512_load_epi32 (void const *__P)
4334 return *(__m512i *) __P;
4337 extern __inline __m512i
4338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4339 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4341 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
4342 (__v16si) __W,
4343 (__mmask16) __U);
4346 extern __inline __m512i
4347 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4348 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
4350 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
4351 (__v16si)
4352 _mm512_setzero_si512 (),
4353 (__mmask16) __U);
4356 extern __inline void
4357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4358 _mm512_store_si512 (void *__P, __m512i __A)
4360 *(__m512i *) __P = __A;
4363 extern __inline void
4364 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4365 _mm512_store_epi32 (void *__P, __m512i __A)
4367 *(__m512i *) __P = __A;
4370 extern __inline void
4371 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4372 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
4374 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
4375 (__mmask16) __U);
4378 extern __inline __m512i
4379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4380 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
4382 return (__m512i) ((__v16su) __A * (__v16su) __B);
4385 extern __inline __m512i
4386 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4387 _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
4389 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
4390 (__v16si) __B,
4391 (__v16si)
4392 _mm512_setzero_si512 (),
4393 __M);
4396 extern __inline __m512i
4397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4398 _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
4400 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
4401 (__v16si) __B,
4402 (__v16si) __W, __M);
4405 extern __inline __m512i
4406 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4407 _mm512_mullox_epi64 (__m512i __A, __m512i __B)
4409 return (__m512i) ((__v8du) __A * (__v8du) __B);
4412 extern __inline __m512i
4413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4414 _mm512_mask_mullox_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
4416 return _mm512_mask_mov_epi64 (__W, __M, _mm512_mullox_epi64 (__A, __B));
4419 extern __inline __m512i
4420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4421 _mm512_sllv_epi32 (__m512i __X, __m512i __Y)
4423 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
4424 (__v16si) __Y,
4425 (__v16si)
4426 _mm512_undefined_epi32 (),
4427 (__mmask16) -1);
4430 extern __inline __m512i
4431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4432 _mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
4434 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
4435 (__v16si) __Y,
4436 (__v16si) __W,
4437 (__mmask16) __U);
4440 extern __inline __m512i
4441 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4442 _mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
4444 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
4445 (__v16si) __Y,
4446 (__v16si)
4447 _mm512_setzero_si512 (),
4448 (__mmask16) __U);
4451 extern __inline __m512i
4452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4453 _mm512_srav_epi32 (__m512i __X, __m512i __Y)
4455 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
4456 (__v16si) __Y,
4457 (__v16si)
4458 _mm512_undefined_epi32 (),
4459 (__mmask16) -1);
4462 extern __inline __m512i
4463 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4464 _mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
4466 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
4467 (__v16si) __Y,
4468 (__v16si) __W,
4469 (__mmask16) __U);
4472 extern __inline __m512i
4473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4474 _mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
4476 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
4477 (__v16si) __Y,
4478 (__v16si)
4479 _mm512_setzero_si512 (),
4480 (__mmask16) __U);
4483 extern __inline __m512i
4484 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4485 _mm512_srlv_epi32 (__m512i __X, __m512i __Y)
4487 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
4488 (__v16si) __Y,
4489 (__v16si)
4490 _mm512_undefined_epi32 (),
4491 (__mmask16) -1);
4494 extern __inline __m512i
4495 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4496 _mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
4498 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
4499 (__v16si) __Y,
4500 (__v16si) __W,
4501 (__mmask16) __U);
4504 extern __inline __m512i
4505 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4506 _mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
4508 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
4509 (__v16si) __Y,
4510 (__v16si)
4511 _mm512_setzero_si512 (),
4512 (__mmask16) __U);
4515 extern __inline __m512i
4516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4517 _mm512_add_epi64 (__m512i __A, __m512i __B)
4519 return (__m512i) ((__v8du) __A + (__v8du) __B);
4522 extern __inline __m512i
4523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4524 _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4526 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
4527 (__v8di) __B,
4528 (__v8di) __W,
4529 (__mmask8) __U);
4532 extern __inline __m512i
4533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4534 _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4536 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
4537 (__v8di) __B,
4538 (__v8di)
4539 _mm512_setzero_si512 (),
4540 (__mmask8) __U);
4543 extern __inline __m512i
4544 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4545 _mm512_sub_epi64 (__m512i __A, __m512i __B)
4547 return (__m512i) ((__v8du) __A - (__v8du) __B);
4550 extern __inline __m512i
4551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4552 _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4554 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
4555 (__v8di) __B,
4556 (__v8di) __W,
4557 (__mmask8) __U);
4560 extern __inline __m512i
4561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4562 _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4564 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
4565 (__v8di) __B,
4566 (__v8di)
4567 _mm512_setzero_si512 (),
4568 (__mmask8) __U);
4571 extern __inline __m512i
4572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4573 _mm512_sllv_epi64 (__m512i __X, __m512i __Y)
4575 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
4576 (__v8di) __Y,
4577 (__v8di)
4578 _mm512_undefined_pd (),
4579 (__mmask8) -1);
4582 extern __inline __m512i
4583 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4584 _mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
4586 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
4587 (__v8di) __Y,
4588 (__v8di) __W,
4589 (__mmask8) __U);
4592 extern __inline __m512i
4593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4594 _mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
4596 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
4597 (__v8di) __Y,
4598 (__v8di)
4599 _mm512_setzero_si512 (),
4600 (__mmask8) __U);
4603 extern __inline __m512i
4604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4605 _mm512_srav_epi64 (__m512i __X, __m512i __Y)
4607 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
4608 (__v8di) __Y,
4609 (__v8di)
4610 _mm512_undefined_epi32 (),
4611 (__mmask8) -1);
4614 extern __inline __m512i
4615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4616 _mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
4618 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
4619 (__v8di) __Y,
4620 (__v8di) __W,
4621 (__mmask8) __U);
4624 extern __inline __m512i
4625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4626 _mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
4628 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
4629 (__v8di) __Y,
4630 (__v8di)
4631 _mm512_setzero_si512 (),
4632 (__mmask8) __U);
4635 extern __inline __m512i
4636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4637 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
4639 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
4640 (__v8di) __Y,
4641 (__v8di)
4642 _mm512_undefined_epi32 (),
4643 (__mmask8) -1);
4646 extern __inline __m512i
4647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4648 _mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
4650 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
4651 (__v8di) __Y,
4652 (__v8di) __W,
4653 (__mmask8) __U);
4656 extern __inline __m512i
4657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4658 _mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
4660 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
4661 (__v8di) __Y,
4662 (__v8di)
4663 _mm512_setzero_si512 (),
4664 (__mmask8) __U);
4667 extern __inline __m512i
4668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4669 _mm512_add_epi32 (__m512i __A, __m512i __B)
4671 return (__m512i) ((__v16su) __A + (__v16su) __B);
4674 extern __inline __m512i
4675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4676 _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4678 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
4679 (__v16si) __B,
4680 (__v16si) __W,
4681 (__mmask16) __U);
4684 extern __inline __m512i
4685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4686 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4688 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
4689 (__v16si) __B,
4690 (__v16si)
4691 _mm512_setzero_si512 (),
4692 (__mmask16) __U);
4695 extern __inline __m512i
4696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4697 _mm512_mul_epi32 (__m512i __X, __m512i __Y)
4699 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
4700 (__v16si) __Y,
4701 (__v8di)
4702 _mm512_undefined_epi32 (),
4703 (__mmask8) -1);
4706 extern __inline __m512i
4707 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4708 _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
4710 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
4711 (__v16si) __Y,
4712 (__v8di) __W, __M);
4715 extern __inline __m512i
4716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4717 _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
4719 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
4720 (__v16si) __Y,
4721 (__v8di)
4722 _mm512_setzero_si512 (),
4723 __M);
4726 extern __inline __m512i
4727 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4728 _mm512_sub_epi32 (__m512i __A, __m512i __B)
4730 return (__m512i) ((__v16su) __A - (__v16su) __B);
4733 extern __inline __m512i
4734 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4735 _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4737 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
4738 (__v16si) __B,
4739 (__v16si) __W,
4740 (__mmask16) __U);
4743 extern __inline __m512i
4744 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4745 _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4747 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
4748 (__v16si) __B,
4749 (__v16si)
4750 _mm512_setzero_si512 (),
4751 (__mmask16) __U);
4754 extern __inline __m512i
4755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4756 _mm512_mul_epu32 (__m512i __X, __m512i __Y)
4758 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
4759 (__v16si) __Y,
4760 (__v8di)
4761 _mm512_undefined_epi32 (),
4762 (__mmask8) -1);
4765 extern __inline __m512i
4766 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4767 _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
4769 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
4770 (__v16si) __Y,
4771 (__v8di) __W, __M);
4774 extern __inline __m512i
4775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4776 _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
4778 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
4779 (__v16si) __Y,
4780 (__v8di)
4781 _mm512_setzero_si512 (),
4782 __M);
4785 #ifdef __OPTIMIZE__
4786 extern __inline __m512i
4787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4788 _mm512_slli_epi64 (__m512i __A, unsigned int __B)
4790 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
4791 (__v8di)
4792 _mm512_undefined_epi32 (),
4793 (__mmask8) -1);
4796 extern __inline __m512i
4797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4798 _mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
4799 unsigned int __B)
4801 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
4802 (__v8di) __W,
4803 (__mmask8) __U);
4806 extern __inline __m512i
4807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4808 _mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
4810 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
4811 (__v8di)
4812 _mm512_setzero_si512 (),
4813 (__mmask8) __U);
4815 #else
4816 #define _mm512_slli_epi64(X, C) \
4817 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), \
4818 (unsigned int)(C), \
4819 (__v8di)(__m512i)_mm512_undefined_epi32 (), \
4820 (__mmask8)-1))
4822 #define _mm512_mask_slli_epi64(W, U, X, C) \
4823 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), \
4824 (unsigned int)(C), \
4825 (__v8di)(__m512i)(W), \
4826 (__mmask8)(U)))
4828 #define _mm512_maskz_slli_epi64(U, X, C) \
4829 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), \
4830 (unsigned int)(C), \
4831 (__v8di)(__m512i)_mm512_setzero_si512 (), \
4832 (__mmask8)(U)))
4833 #endif
4835 extern __inline __m512i
4836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4837 _mm512_sll_epi64 (__m512i __A, __m128i __B)
4839 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
4840 (__v2di) __B,
4841 (__v8di)
4842 _mm512_undefined_epi32 (),
4843 (__mmask8) -1);
4846 extern __inline __m512i
4847 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4848 _mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
4850 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
4851 (__v2di) __B,
4852 (__v8di) __W,
4853 (__mmask8) __U);
4856 extern __inline __m512i
4857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4858 _mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
4860 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
4861 (__v2di) __B,
4862 (__v8di)
4863 _mm512_setzero_si512 (),
4864 (__mmask8) __U);
4867 #ifdef __OPTIMIZE__
4868 extern __inline __m512i
4869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4870 _mm512_srli_epi64 (__m512i __A, unsigned int __B)
4872 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
4873 (__v8di)
4874 _mm512_undefined_epi32 (),
4875 (__mmask8) -1);
4878 extern __inline __m512i
4879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4880 _mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
4881 __m512i __A, unsigned int __B)
4883 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
4884 (__v8di) __W,
4885 (__mmask8) __U);
4888 extern __inline __m512i
4889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4890 _mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
4892 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
4893 (__v8di)
4894 _mm512_setzero_si512 (),
4895 (__mmask8) __U);
4897 #else
4898 #define _mm512_srli_epi64(X, C) \
4899 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), \
4900 (unsigned int)(C), \
4901 (__v8di)(__m512i)_mm512_undefined_epi32 (), \
4902 (__mmask8)-1))
4904 #define _mm512_mask_srli_epi64(W, U, X, C) \
4905 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), \
4906 (unsigned int)(C), \
4907 (__v8di)(__m512i)(W), \
4908 (__mmask8)(U)))
4910 #define _mm512_maskz_srli_epi64(U, X, C) \
4911 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), \
4912 (unsigned int)(C), \
4913 (__v8di)(__m512i)_mm512_setzero_si512 (), \
4914 (__mmask8)(U)))
4915 #endif
4917 extern __inline __m512i
4918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4919 _mm512_srl_epi64 (__m512i __A, __m128i __B)
4921 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
4922 (__v2di) __B,
4923 (__v8di)
4924 _mm512_undefined_epi32 (),
4925 (__mmask8) -1);
4928 extern __inline __m512i
4929 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4930 _mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
4932 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
4933 (__v2di) __B,
4934 (__v8di) __W,
4935 (__mmask8) __U);
4938 extern __inline __m512i
4939 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4940 _mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
4942 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
4943 (__v2di) __B,
4944 (__v8di)
4945 _mm512_setzero_si512 (),
4946 (__mmask8) __U);
4949 #ifdef __OPTIMIZE__
4950 extern __inline __m512i
4951 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4952 _mm512_srai_epi64 (__m512i __A, unsigned int __B)
4954 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
4955 (__v8di)
4956 _mm512_undefined_epi32 (),
4957 (__mmask8) -1);
4960 extern __inline __m512i
4961 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4962 _mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
4963 unsigned int __B)
4965 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
4966 (__v8di) __W,
4967 (__mmask8) __U);
4970 extern __inline __m512i
4971 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4972 _mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
4974 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
4975 (__v8di)
4976 _mm512_setzero_si512 (),
4977 (__mmask8) __U);
4979 #else
4980 #define _mm512_srai_epi64(X, C) \
4981 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), \
4982 (unsigned int)(C), \
4983 (__v8di)(__m512i)_mm512_undefined_epi32 (), \
4984 (__mmask8)-1))
4986 #define _mm512_mask_srai_epi64(W, U, X, C) \
4987 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), \
4988 (unsigned int)(C), \
4989 (__v8di)(__m512i)(W), \
4990 (__mmask8)(U)))
4992 #define _mm512_maskz_srai_epi64(U, X, C) \
4993 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), \
4994 (unsigned int)(C), \
4995 (__v8di)(__m512i)_mm512_setzero_si512 (), \
4996 (__mmask8)(U)))
4997 #endif
4999 extern __inline __m512i
5000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5001 _mm512_sra_epi64 (__m512i __A, __m128i __B)
5003 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
5004 (__v2di) __B,
5005 (__v8di)
5006 _mm512_undefined_epi32 (),
5007 (__mmask8) -1);
5010 extern __inline __m512i
5011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5012 _mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5014 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
5015 (__v2di) __B,
5016 (__v8di) __W,
5017 (__mmask8) __U);
5020 extern __inline __m512i
5021 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5022 _mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
5024 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
5025 (__v2di) __B,
5026 (__v8di)
5027 _mm512_setzero_si512 (),
5028 (__mmask8) __U);
5031 #ifdef __OPTIMIZE__
5032 extern __inline __m512i
5033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5034 _mm512_slli_epi32 (__m512i __A, unsigned int __B)
5036 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
5037 (__v16si)
5038 _mm512_undefined_epi32 (),
5039 (__mmask16) -1);
5042 extern __inline __m512i
5043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5044 _mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
5045 unsigned int __B)
5047 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
5048 (__v16si) __W,
5049 (__mmask16) __U);
5052 extern __inline __m512i
5053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5054 _mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
5056 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
5057 (__v16si)
5058 _mm512_setzero_si512 (),
5059 (__mmask16) __U);
5061 #else
5062 #define _mm512_slli_epi32(X, C) \
5063 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), \
5064 (unsigned int)(C), \
5065 (__v16si)(__m512i)_mm512_undefined_epi32 (), \
5066 (__mmask16)-1))
5068 #define _mm512_mask_slli_epi32(W, U, X, C) \
5069 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), \
5070 (unsigned int)(C), \
5071 (__v16si)(__m512i)(W), \
5072 (__mmask16)(U)))
5074 #define _mm512_maskz_slli_epi32(U, X, C) \
5075 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), \
5076 (unsigned int)(C), \
5077 (__v16si)(__m512i)_mm512_setzero_si512 (), \
5078 (__mmask16)(U)))
5079 #endif
5081 extern __inline __m512i
5082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5083 _mm512_sll_epi32 (__m512i __A, __m128i __B)
5085 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
5086 (__v4si) __B,
5087 (__v16si)
5088 _mm512_undefined_epi32 (),
5089 (__mmask16) -1);
5092 extern __inline __m512i
5093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5094 _mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5096 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
5097 (__v4si) __B,
5098 (__v16si) __W,
5099 (__mmask16) __U);
5102 extern __inline __m512i
5103 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5104 _mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
5106 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
5107 (__v4si) __B,
5108 (__v16si)
5109 _mm512_setzero_si512 (),
5110 (__mmask16) __U);
5113 #ifdef __OPTIMIZE__
5114 extern __inline __m512i
5115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5116 _mm512_srli_epi32 (__m512i __A, unsigned int __B)
5118 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
5119 (__v16si)
5120 _mm512_undefined_epi32 (),
5121 (__mmask16) -1);
5124 extern __inline __m512i
5125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5126 _mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
5127 __m512i __A, unsigned int __B)
5129 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
5130 (__v16si) __W,
5131 (__mmask16) __U);
5134 extern __inline __m512i
5135 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5136 _mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
5138 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
5139 (__v16si)
5140 _mm512_setzero_si512 (),
5141 (__mmask16) __U);
5143 #else
5144 #define _mm512_srli_epi32(X, C) \
5145 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), \
5146 (unsigned int)(C), \
5147 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
5148 (__mmask16)-1))
5150 #define _mm512_mask_srli_epi32(W, U, X, C) \
5151 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), \
5152 (unsigned int)(C), \
5153 (__v16si)(__m512i)(W), \
5154 (__mmask16)(U)))
5156 #define _mm512_maskz_srli_epi32(U, X, C) \
5157 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), \
5158 (unsigned int)(C), \
5159 (__v16si)(__m512i)_mm512_setzero_si512 (), \
5160 (__mmask16)(U)))
5161 #endif
5163 extern __inline __m512i
5164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5165 _mm512_srl_epi32 (__m512i __A, __m128i __B)
5167 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
5168 (__v4si) __B,
5169 (__v16si)
5170 _mm512_undefined_epi32 (),
5171 (__mmask16) -1);
5174 extern __inline __m512i
5175 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5176 _mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5178 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
5179 (__v4si) __B,
5180 (__v16si) __W,
5181 (__mmask16) __U);
5184 extern __inline __m512i
5185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5186 _mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
5188 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
5189 (__v4si) __B,
5190 (__v16si)
5191 _mm512_setzero_si512 (),
5192 (__mmask16) __U);
5195 #ifdef __OPTIMIZE__
5196 extern __inline __m512i
5197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5198 _mm512_srai_epi32 (__m512i __A, unsigned int __B)
5200 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
5201 (__v16si)
5202 _mm512_undefined_epi32 (),
5203 (__mmask16) -1);
5206 extern __inline __m512i
5207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5208 _mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
5209 unsigned int __B)
5211 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
5212 (__v16si) __W,
5213 (__mmask16) __U);
5216 extern __inline __m512i
5217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5218 _mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
5220 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
5221 (__v16si)
5222 _mm512_setzero_si512 (),
5223 (__mmask16) __U);
5225 #else
5226 #define _mm512_srai_epi32(X, C) \
5227 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), \
5228 (unsigned int)(C), \
5229 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
5230 (__mmask16)-1))
5232 #define _mm512_mask_srai_epi32(W, U, X, C) \
5233 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), \
5234 (unsigned int)(C), \
5235 (__v16si)(__m512i)(W), \
5236 (__mmask16)(U)))
5238 #define _mm512_maskz_srai_epi32(U, X, C) \
5239 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), \
5240 (unsigned int)(C), \
5241 (__v16si)(__m512i)_mm512_setzero_si512 (), \
5242 (__mmask16)(U)))
5243 #endif
5245 extern __inline __m512i
5246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5247 _mm512_sra_epi32 (__m512i __A, __m128i __B)
5249 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
5250 (__v4si) __B,
5251 (__v16si)
5252 _mm512_undefined_epi32 (),
5253 (__mmask16) -1);
5256 extern __inline __m512i
5257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5258 _mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5260 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
5261 (__v4si) __B,
5262 (__v16si) __W,
5263 (__mmask16) __U);
5266 extern __inline __m512i
5267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5268 _mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
5270 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
5271 (__v4si) __B,
5272 (__v16si)
5273 _mm512_setzero_si512 (),
5274 (__mmask16) __U);
5277 /* Constant helper to represent the ternary logic operations among
5278 vector A, B and C. */
5279 typedef enum
5281 _MM_TERNLOG_A = 0xF0,
5282 _MM_TERNLOG_B = 0xCC,
5283 _MM_TERNLOG_C = 0xAA
5284 } _MM_TERNLOG_ENUM;
5286 #ifdef __OPTIMIZE__
5287 extern __inline __m512i
5288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5289 _mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C,
5290 const int __imm)
5292 return (__m512i)
5293 __builtin_ia32_pternlogq512_mask ((__v8di) __A,
5294 (__v8di) __B,
5295 (__v8di) __C,
5296 (unsigned char) __imm,
5297 (__mmask8) -1);
5300 extern __inline __m512i
5301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5302 _mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
5303 __m512i __C, const int __imm)
5305 return (__m512i)
5306 __builtin_ia32_pternlogq512_mask ((__v8di) __A,
5307 (__v8di) __B,
5308 (__v8di) __C,
5309 (unsigned char) __imm,
5310 (__mmask8) __U);
5313 extern __inline __m512i
5314 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5315 _mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
5316 __m512i __C, const int __imm)
5318 return (__m512i)
5319 __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
5320 (__v8di) __B,
5321 (__v8di) __C,
5322 (unsigned char) __imm,
5323 (__mmask8) __U);
5326 extern __inline __m512i
5327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5328 _mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C,
5329 const int __imm)
5331 return (__m512i)
5332 __builtin_ia32_pternlogd512_mask ((__v16si) __A,
5333 (__v16si) __B,
5334 (__v16si) __C,
5335 (unsigned char) __imm,
5336 (__mmask16) -1);
5339 extern __inline __m512i
5340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5341 _mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
5342 __m512i __C, const int __imm)
5344 return (__m512i)
5345 __builtin_ia32_pternlogd512_mask ((__v16si) __A,
5346 (__v16si) __B,
5347 (__v16si) __C,
5348 (unsigned char) __imm,
5349 (__mmask16) __U);
5352 extern __inline __m512i
5353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5354 _mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
5355 __m512i __C, const int __imm)
5357 return (__m512i)
5358 __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
5359 (__v16si) __B,
5360 (__v16si) __C,
5361 (unsigned char) __imm,
5362 (__mmask16) __U);
5364 #else
5365 #define _mm512_ternarylogic_epi64(A, B, C, I) \
5366 ((__m512i) \
5367 __builtin_ia32_pternlogq512_mask ((__v8di) (__m512i) (A), \
5368 (__v8di) (__m512i) (B), \
5369 (__v8di) (__m512i) (C), \
5370 (unsigned char) (I), \
5371 (__mmask8) -1))
5372 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \
5373 ((__m512i) \
5374 __builtin_ia32_pternlogq512_mask ((__v8di) (__m512i) (A), \
5375 (__v8di) (__m512i) (B), \
5376 (__v8di) (__m512i) (C), \
5377 (unsigned char)(I), \
5378 (__mmask8) (U)))
5379 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \
5380 ((__m512i) \
5381 __builtin_ia32_pternlogq512_maskz ((__v8di) (__m512i) (A), \
5382 (__v8di) (__m512i) (B), \
5383 (__v8di) (__m512i) (C), \
5384 (unsigned char) (I), \
5385 (__mmask8) (U)))
5386 #define _mm512_ternarylogic_epi32(A, B, C, I) \
5387 ((__m512i) \
5388 __builtin_ia32_pternlogd512_mask ((__v16si) (__m512i) (A), \
5389 (__v16si) (__m512i) (B), \
5390 (__v16si) (__m512i) (C), \
5391 (unsigned char) (I), \
5392 (__mmask16) -1))
5393 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \
5394 ((__m512i) \
5395 __builtin_ia32_pternlogd512_mask ((__v16si) (__m512i) (A), \
5396 (__v16si) (__m512i) (B), \
5397 (__v16si) (__m512i) (C), \
5398 (unsigned char) (I), \
5399 (__mmask16) (U)))
5400 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \
5401 ((__m512i) \
5402 __builtin_ia32_pternlogd512_maskz ((__v16si) (__m512i) (A), \
5403 (__v16si) (__m512i) (B), \
5404 (__v16si) (__m512i) (C), \
5405 (unsigned char) (I), \
5406 (__mmask16) (U)))
5407 #endif
5409 extern __inline __m512d
5410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5411 _mm512_rcp14_pd (__m512d __A)
5413 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
5414 (__v8df)
5415 _mm512_undefined_pd (),
5416 (__mmask8) -1);
5419 extern __inline __m512d
5420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5421 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
5423 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
5424 (__v8df) __W,
5425 (__mmask8) __U);
5428 extern __inline __m512d
5429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5430 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
5432 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
5433 (__v8df)
5434 _mm512_setzero_pd (),
5435 (__mmask8) __U);
5438 extern __inline __m512
5439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5440 _mm512_rcp14_ps (__m512 __A)
5442 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
5443 (__v16sf)
5444 _mm512_undefined_ps (),
5445 (__mmask16) -1);
5448 extern __inline __m512
5449 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5450 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
5452 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
5453 (__v16sf) __W,
5454 (__mmask16) __U);
5457 extern __inline __m512
5458 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5459 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
5461 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
5462 (__v16sf)
5463 _mm512_setzero_ps (),
5464 (__mmask16) __U);
5467 extern __inline __m512d
5468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5469 _mm512_rsqrt14_pd (__m512d __A)
5471 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
5472 (__v8df)
5473 _mm512_undefined_pd (),
5474 (__mmask8) -1);
5477 extern __inline __m512d
5478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5479 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
5481 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
5482 (__v8df) __W,
5483 (__mmask8) __U);
5486 extern __inline __m512d
5487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5488 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
5490 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
5491 (__v8df)
5492 _mm512_setzero_pd (),
5493 (__mmask8) __U);
5496 extern __inline __m512
5497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5498 _mm512_rsqrt14_ps (__m512 __A)
5500 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
5501 (__v16sf)
5502 _mm512_undefined_ps (),
5503 (__mmask16) -1);
5506 extern __inline __m512
5507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5508 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
5510 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
5511 (__v16sf) __W,
5512 (__mmask16) __U);
5515 extern __inline __m512
5516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5517 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
5519 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
5520 (__v16sf)
5521 _mm512_setzero_ps (),
5522 (__mmask16) __U);
5525 #ifdef __OPTIMIZE__
5526 extern __inline __m512d
5527 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5528 _mm512_sqrt_round_pd (__m512d __A, const int __R)
5530 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
5531 (__v8df)
5532 _mm512_undefined_pd (),
5533 (__mmask8) -1, __R);
5536 extern __inline __m512d
5537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5538 _mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
5539 const int __R)
5541 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
5542 (__v8df) __W,
5543 (__mmask8) __U, __R);
5546 extern __inline __m512d
5547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5548 _mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
5550 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
5551 (__v8df)
5552 _mm512_setzero_pd (),
5553 (__mmask8) __U, __R);
5556 extern __inline __m512
5557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5558 _mm512_sqrt_round_ps (__m512 __A, const int __R)
5560 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
5561 (__v16sf)
5562 _mm512_undefined_ps (),
5563 (__mmask16) -1, __R);
5566 extern __inline __m512
5567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5568 _mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
5570 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
5571 (__v16sf) __W,
5572 (__mmask16) __U, __R);
5575 extern __inline __m512
5576 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5577 _mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
5579 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
5580 (__v16sf)
5581 _mm512_setzero_ps (),
5582 (__mmask16) __U, __R);
5585 #else
5586 #define _mm512_sqrt_round_pd(A, C) \
5587 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
5589 #define _mm512_mask_sqrt_round_pd(W, U, A, C) \
5590 (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
5592 #define _mm512_maskz_sqrt_round_pd(U, A, C) \
5593 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
5595 #define _mm512_sqrt_round_ps(A, C) \
5596 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
5598 #define _mm512_mask_sqrt_round_ps(W, U, A, C) \
5599 (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
5601 #define _mm512_maskz_sqrt_round_ps(U, A, C) \
5602 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
5604 #endif
5606 extern __inline __m512i
5607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5608 _mm512_cvtepi8_epi32 (__m128i __A)
5610 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
5611 (__v16si)
5612 _mm512_undefined_epi32 (),
5613 (__mmask16) -1);
5616 extern __inline __m512i
5617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5618 _mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
5620 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
5621 (__v16si) __W,
5622 (__mmask16) __U);
5625 extern __inline __m512i
5626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5627 _mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
5629 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
5630 (__v16si)
5631 _mm512_setzero_si512 (),
5632 (__mmask16) __U);
5635 extern __inline __m512i
5636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5637 _mm512_cvtepi8_epi64 (__m128i __A)
5639 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
5640 (__v8di)
5641 _mm512_undefined_epi32 (),
5642 (__mmask8) -1);
5645 extern __inline __m512i
5646 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5647 _mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
5649 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
5650 (__v8di) __W,
5651 (__mmask8) __U);
5654 extern __inline __m512i
5655 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5656 _mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
5658 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
5659 (__v8di)
5660 _mm512_setzero_si512 (),
5661 (__mmask8) __U);
5664 extern __inline __m512i
5665 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5666 _mm512_cvtepi16_epi32 (__m256i __A)
5668 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
5669 (__v16si)
5670 _mm512_undefined_epi32 (),
5671 (__mmask16) -1);
5674 extern __inline __m512i
5675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5676 _mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
5678 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
5679 (__v16si) __W,
5680 (__mmask16) __U);
5683 extern __inline __m512i
5684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5685 _mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
5687 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
5688 (__v16si)
5689 _mm512_setzero_si512 (),
5690 (__mmask16) __U);
5693 extern __inline __m512i
5694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5695 _mm512_cvtepi16_epi64 (__m128i __A)
5697 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
5698 (__v8di)
5699 _mm512_undefined_epi32 (),
5700 (__mmask8) -1);
5703 extern __inline __m512i
5704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5705 _mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
5707 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
5708 (__v8di) __W,
5709 (__mmask8) __U);
5712 extern __inline __m512i
5713 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5714 _mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
5716 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
5717 (__v8di)
5718 _mm512_setzero_si512 (),
5719 (__mmask8) __U);
5722 extern __inline __m512i
5723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5724 _mm512_cvtepi32_epi64 (__m256i __X)
5726 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
5727 (__v8di)
5728 _mm512_undefined_epi32 (),
5729 (__mmask8) -1);
5732 extern __inline __m512i
5733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5734 _mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
5736 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
5737 (__v8di) __W,
5738 (__mmask8) __U);
5741 extern __inline __m512i
5742 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5743 _mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
5745 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
5746 (__v8di)
5747 _mm512_setzero_si512 (),
5748 (__mmask8) __U);
5751 extern __inline __m512i
5752 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5753 _mm512_cvtepu8_epi32 (__m128i __A)
5755 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
5756 (__v16si)
5757 _mm512_undefined_epi32 (),
5758 (__mmask16) -1);
5761 extern __inline __m512i
5762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5763 _mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
5765 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
5766 (__v16si) __W,
5767 (__mmask16) __U);
5770 extern __inline __m512i
5771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5772 _mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
5774 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
5775 (__v16si)
5776 _mm512_setzero_si512 (),
5777 (__mmask16) __U);
5780 extern __inline __m512i
5781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5782 _mm512_cvtepu8_epi64 (__m128i __A)
5784 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
5785 (__v8di)
5786 _mm512_undefined_epi32 (),
5787 (__mmask8) -1);
5790 extern __inline __m512i
5791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5792 _mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
5794 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
5795 (__v8di) __W,
5796 (__mmask8) __U);
5799 extern __inline __m512i
5800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5801 _mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
5803 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
5804 (__v8di)
5805 _mm512_setzero_si512 (),
5806 (__mmask8) __U);
5809 extern __inline __m512i
5810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5811 _mm512_cvtepu16_epi32 (__m256i __A)
5813 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
5814 (__v16si)
5815 _mm512_undefined_epi32 (),
5816 (__mmask16) -1);
5819 extern __inline __m512i
5820 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5821 _mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
5823 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
5824 (__v16si) __W,
5825 (__mmask16) __U);
5828 extern __inline __m512i
5829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5830 _mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
5832 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
5833 (__v16si)
5834 _mm512_setzero_si512 (),
5835 (__mmask16) __U);
5838 extern __inline __m512i
5839 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5840 _mm512_cvtepu16_epi64 (__m128i __A)
5842 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
5843 (__v8di)
5844 _mm512_undefined_epi32 (),
5845 (__mmask8) -1);
5848 extern __inline __m512i
5849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5850 _mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
5852 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
5853 (__v8di) __W,
5854 (__mmask8) __U);
5857 extern __inline __m512i
5858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5859 _mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
5861 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
5862 (__v8di)
5863 _mm512_setzero_si512 (),
5864 (__mmask8) __U);
5867 extern __inline __m512i
5868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5869 _mm512_cvtepu32_epi64 (__m256i __X)
5871 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
5872 (__v8di)
5873 _mm512_undefined_epi32 (),
5874 (__mmask8) -1);
5877 extern __inline __m512i
5878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5879 _mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
5881 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
5882 (__v8di) __W,
5883 (__mmask8) __U);
5886 extern __inline __m512i
5887 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5888 _mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
5890 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
5891 (__v8di)
5892 _mm512_setzero_si512 (),
5893 (__mmask8) __U);
5896 #ifdef __OPTIMIZE__
5897 extern __inline __m512d
5898 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5899 _mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
5901 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
5902 (__v8df) __B,
5903 (__v8df)
5904 _mm512_undefined_pd (),
5905 (__mmask8) -1, __R);
5908 extern __inline __m512d
5909 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5910 _mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
5911 __m512d __B, const int __R)
5913 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
5914 (__v8df) __B,
5915 (__v8df) __W,
5916 (__mmask8) __U, __R);
5919 extern __inline __m512d
5920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5921 _mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
5922 const int __R)
5924 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
5925 (__v8df) __B,
5926 (__v8df)
5927 _mm512_setzero_pd (),
5928 (__mmask8) __U, __R);
5931 extern __inline __m512
5932 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5933 _mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
5935 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
5936 (__v16sf) __B,
5937 (__v16sf)
5938 _mm512_undefined_ps (),
5939 (__mmask16) -1, __R);
5942 extern __inline __m512
5943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5944 _mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
5945 __m512 __B, const int __R)
5947 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
5948 (__v16sf) __B,
5949 (__v16sf) __W,
5950 (__mmask16) __U, __R);
5953 extern __inline __m512
5954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5955 _mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
5957 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
5958 (__v16sf) __B,
5959 (__v16sf)
5960 _mm512_setzero_ps (),
5961 (__mmask16) __U, __R);
5964 extern __inline __m512d
5965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5966 _mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
5968 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
5969 (__v8df) __B,
5970 (__v8df)
5971 _mm512_undefined_pd (),
5972 (__mmask8) -1, __R);
5975 extern __inline __m512d
5976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5977 _mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
5978 __m512d __B, const int __R)
5980 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
5981 (__v8df) __B,
5982 (__v8df) __W,
5983 (__mmask8) __U, __R);
5986 extern __inline __m512d
5987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
5988 _mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
5989 const int __R)
5991 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
5992 (__v8df) __B,
5993 (__v8df)
5994 _mm512_setzero_pd (),
5995 (__mmask8) __U, __R);
5998 extern __inline __m512
5999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6000 _mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
6002 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
6003 (__v16sf) __B,
6004 (__v16sf)
6005 _mm512_undefined_ps (),
6006 (__mmask16) -1, __R);
6009 extern __inline __m512
6010 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6011 _mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
6012 __m512 __B, const int __R)
6014 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
6015 (__v16sf) __B,
6016 (__v16sf) __W,
6017 (__mmask16) __U, __R);
6020 extern __inline __m512
6021 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6022 _mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
6024 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
6025 (__v16sf) __B,
6026 (__v16sf)
6027 _mm512_setzero_ps (),
6028 (__mmask16) __U, __R);
6030 #else
6031 #define _mm512_add_round_pd(A, B, C) \
6032 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
6034 #define _mm512_mask_add_round_pd(W, U, A, B, C) \
6035 (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
6037 #define _mm512_maskz_add_round_pd(U, A, B, C) \
6038 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
6040 #define _mm512_add_round_ps(A, B, C) \
6041 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
6043 #define _mm512_mask_add_round_ps(W, U, A, B, C) \
6044 (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
6046 #define _mm512_maskz_add_round_ps(U, A, B, C) \
6047 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
6049 #define _mm512_sub_round_pd(A, B, C) \
6050 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
6052 #define _mm512_mask_sub_round_pd(W, U, A, B, C) \
6053 (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
6055 #define _mm512_maskz_sub_round_pd(U, A, B, C) \
6056 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
6058 #define _mm512_sub_round_ps(A, B, C) \
6059 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
6061 #define _mm512_mask_sub_round_ps(W, U, A, B, C) \
6062 (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
6064 #define _mm512_maskz_sub_round_ps(U, A, B, C) \
6065 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
6066 #endif
6068 #ifdef __OPTIMIZE__
6069 extern __inline __m512d
6070 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6071 _mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
6073 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
6074 (__v8df) __B,
6075 (__v8df)
6076 _mm512_undefined_pd (),
6077 (__mmask8) -1, __R);
6080 extern __inline __m512d
6081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6082 _mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
6083 __m512d __B, const int __R)
6085 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
6086 (__v8df) __B,
6087 (__v8df) __W,
6088 (__mmask8) __U, __R);
6091 extern __inline __m512d
6092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6093 _mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6094 const int __R)
6096 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
6097 (__v8df) __B,
6098 (__v8df)
6099 _mm512_setzero_pd (),
6100 (__mmask8) __U, __R);
6103 extern __inline __m512
6104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6105 _mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
6107 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
6108 (__v16sf) __B,
6109 (__v16sf)
6110 _mm512_undefined_ps (),
6111 (__mmask16) -1, __R);
6114 extern __inline __m512
6115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6116 _mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
6117 __m512 __B, const int __R)
6119 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
6120 (__v16sf) __B,
6121 (__v16sf) __W,
6122 (__mmask16) __U, __R);
6125 extern __inline __m512
6126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6127 _mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
6129 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
6130 (__v16sf) __B,
6131 (__v16sf)
6132 _mm512_setzero_ps (),
6133 (__mmask16) __U, __R);
6136 extern __inline __m512d
6137 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6138 _mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
6140 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
6141 (__v8df) __V,
6142 (__v8df)
6143 _mm512_undefined_pd (),
6144 (__mmask8) -1, __R);
6147 extern __inline __m512d
6148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6149 _mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
6150 __m512d __V, const int __R)
6152 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
6153 (__v8df) __V,
6154 (__v8df) __W,
6155 (__mmask8) __U, __R);
6158 extern __inline __m512d
6159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6160 _mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
6161 const int __R)
6163 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
6164 (__v8df) __V,
6165 (__v8df)
6166 _mm512_setzero_pd (),
6167 (__mmask8) __U, __R);
6170 extern __inline __m512
6171 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6172 _mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
6174 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
6175 (__v16sf) __B,
6176 (__v16sf)
6177 _mm512_undefined_ps (),
6178 (__mmask16) -1, __R);
6181 extern __inline __m512
6182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6183 _mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
6184 __m512 __B, const int __R)
6186 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
6187 (__v16sf) __B,
6188 (__v16sf) __W,
6189 (__mmask16) __U, __R);
6192 extern __inline __m512
6193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6194 _mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
6196 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
6197 (__v16sf) __B,
6198 (__v16sf)
6199 _mm512_setzero_ps (),
6200 (__mmask16) __U, __R);
6203 #else
6204 #define _mm512_mul_round_pd(A, B, C) \
6205 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
6207 #define _mm512_mask_mul_round_pd(W, U, A, B, C) \
6208 (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
6210 #define _mm512_maskz_mul_round_pd(U, A, B, C) \
6211 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
6213 #define _mm512_mul_round_ps(A, B, C) \
6214 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
6216 #define _mm512_mask_mul_round_ps(W, U, A, B, C) \
6217 (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
6219 #define _mm512_maskz_mul_round_ps(U, A, B, C) \
6220 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
6222 #define _mm512_div_round_pd(A, B, C) \
6223 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
6225 #define _mm512_mask_div_round_pd(W, U, A, B, C) \
6226 (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
6228 #define _mm512_maskz_div_round_pd(U, A, B, C) \
6229 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
6231 #define _mm512_div_round_ps(A, B, C) \
6232 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
6234 #define _mm512_mask_div_round_ps(W, U, A, B, C) \
6235 (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
6237 #define _mm512_maskz_div_round_ps(U, A, B, C) \
6238 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
6240 #endif
6242 #ifdef __OPTIMIZE__
6243 extern __inline __m512d
6244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6245 _mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
6247 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
6248 (__v8df) __B,
6249 (__v8df)
6250 _mm512_undefined_pd (),
6251 (__mmask8) -1, __R);
6254 extern __inline __m512d
6255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6256 _mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
6257 __m512d __B, const int __R)
6259 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
6260 (__v8df) __B,
6261 (__v8df) __W,
6262 (__mmask8) __U, __R);
6265 extern __inline __m512d
6266 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6267 _mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6268 const int __R)
6270 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
6271 (__v8df) __B,
6272 (__v8df)
6273 _mm512_setzero_pd (),
6274 (__mmask8) __U, __R);
6277 extern __inline __m512
6278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6279 _mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
6281 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
6282 (__v16sf) __B,
6283 (__v16sf)
6284 _mm512_undefined_ps (),
6285 (__mmask16) -1, __R);
6288 extern __inline __m512
6289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6290 _mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
6291 __m512 __B, const int __R)
6293 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
6294 (__v16sf) __B,
6295 (__v16sf) __W,
6296 (__mmask16) __U, __R);
6299 extern __inline __m512
6300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6301 _mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
6303 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
6304 (__v16sf) __B,
6305 (__v16sf)
6306 _mm512_setzero_ps (),
6307 (__mmask16) __U, __R);
6310 extern __inline __m512d
6311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6312 _mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
6314 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
6315 (__v8df) __B,
6316 (__v8df)
6317 _mm512_undefined_pd (),
6318 (__mmask8) -1, __R);
6321 extern __inline __m512d
6322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6323 _mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
6324 __m512d __B, const int __R)
6326 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
6327 (__v8df) __B,
6328 (__v8df) __W,
6329 (__mmask8) __U, __R);
6332 extern __inline __m512d
6333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6334 _mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6335 const int __R)
6337 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
6338 (__v8df) __B,
6339 (__v8df)
6340 _mm512_setzero_pd (),
6341 (__mmask8) __U, __R);
6344 extern __inline __m512
6345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6346 _mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
6348 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
6349 (__v16sf) __B,
6350 (__v16sf)
6351 _mm512_undefined_ps (),
6352 (__mmask16) -1, __R);
6355 extern __inline __m512
6356 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6357 _mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
6358 __m512 __B, const int __R)
6360 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
6361 (__v16sf) __B,
6362 (__v16sf) __W,
6363 (__mmask16) __U, __R);
6366 extern __inline __m512
6367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6368 _mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
6370 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
6371 (__v16sf) __B,
6372 (__v16sf)
6373 _mm512_setzero_ps (),
6374 (__mmask16) __U, __R);
6376 #else
6377 #define _mm512_max_round_pd(A, B, R) \
6378 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
6380 #define _mm512_mask_max_round_pd(W, U, A, B, R) \
6381 (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
6383 #define _mm512_maskz_max_round_pd(U, A, B, R) \
6384 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
6386 #define _mm512_max_round_ps(A, B, R) \
6387 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
6389 #define _mm512_mask_max_round_ps(W, U, A, B, R) \
6390 (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
6392 #define _mm512_maskz_max_round_ps(U, A, B, R) \
6393 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
6395 #define _mm512_min_round_pd(A, B, R) \
6396 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
6398 #define _mm512_mask_min_round_pd(W, U, A, B, R) \
6399 (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
6401 #define _mm512_maskz_min_round_pd(U, A, B, R) \
6402 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
6404 #define _mm512_min_round_ps(A, B, R) \
6405 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
6407 #define _mm512_mask_min_round_ps(W, U, A, B, R) \
6408 (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
6410 #define _mm512_maskz_min_round_ps(U, A, B, R) \
6411 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
6412 #endif
6414 #ifdef __OPTIMIZE__
6415 extern __inline __m512d
6416 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6417 _mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
6419 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6420 (__v8df) __B,
6421 (__v8df)
6422 _mm512_undefined_pd (),
6423 (__mmask8) -1, __R);
6426 extern __inline __m512d
6427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6428 _mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
6429 __m512d __B, const int __R)
6431 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6432 (__v8df) __B,
6433 (__v8df) __W,
6434 (__mmask8) __U, __R);
6437 extern __inline __m512d
6438 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6439 _mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6440 const int __R)
6442 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6443 (__v8df) __B,
6444 (__v8df)
6445 _mm512_setzero_pd (),
6446 (__mmask8) __U, __R);
6449 extern __inline __m512
6450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6451 _mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
6453 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6454 (__v16sf) __B,
6455 (__v16sf)
6456 _mm512_undefined_ps (),
6457 (__mmask16) -1, __R);
6460 extern __inline __m512
6461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6462 _mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
6463 __m512 __B, const int __R)
6465 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6466 (__v16sf) __B,
6467 (__v16sf) __W,
6468 (__mmask16) __U, __R);
6471 extern __inline __m512
6472 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6473 _mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
6474 const int __R)
6476 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6477 (__v16sf) __B,
6478 (__v16sf)
6479 _mm512_setzero_ps (),
6480 (__mmask16) __U, __R);
6483 #else
6484 #define _mm512_scalef_round_pd(A, B, C) \
6485 ((__m512d) \
6486 __builtin_ia32_scalefpd512_mask((A), (B), \
6487 (__v8df) _mm512_undefined_pd(), \
6488 -1, (C)))
6490 #define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
6491 ((__m512d) __builtin_ia32_scalefpd512_mask((A), (B), (W), (U), (C)))
6493 #define _mm512_maskz_scalef_round_pd(U, A, B, C) \
6494 ((__m512d) \
6495 __builtin_ia32_scalefpd512_mask((A), (B), \
6496 (__v8df) _mm512_setzero_pd(), \
6497 (U), (C)))
6499 #define _mm512_scalef_round_ps(A, B, C) \
6500 ((__m512) \
6501 __builtin_ia32_scalefps512_mask((A), (B), \
6502 (__v16sf) _mm512_undefined_ps(), \
6503 -1, (C)))
6505 #define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
6506 ((__m512) __builtin_ia32_scalefps512_mask((A), (B), (W), (U), (C)))
6508 #define _mm512_maskz_scalef_round_ps(U, A, B, C) \
6509 ((__m512) \
6510 __builtin_ia32_scalefps512_mask((A), (B), \
6511 (__v16sf) _mm512_setzero_ps(), \
6512 (U), (C)))
6514 #endif
6516 #ifdef __OPTIMIZE__
6517 extern __inline __m512d
6518 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6519 _mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
6521 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
6522 (__v8df) __B,
6523 (__v8df) __C,
6524 (__mmask8) -1, __R);
6527 extern __inline __m512d
6528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6529 _mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6530 __m512d __C, const int __R)
6532 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
6533 (__v8df) __B,
6534 (__v8df) __C,
6535 (__mmask8) __U, __R);
6538 extern __inline __m512d
6539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6540 _mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
6541 __mmask8 __U, const int __R)
6543 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
6544 (__v8df) __B,
6545 (__v8df) __C,
6546 (__mmask8) __U, __R);
6549 extern __inline __m512d
6550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6551 _mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6552 __m512d __C, const int __R)
6554 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
6555 (__v8df) __B,
6556 (__v8df) __C,
6557 (__mmask8) __U, __R);
6560 extern __inline __m512
6561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6562 _mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
6564 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
6565 (__v16sf) __B,
6566 (__v16sf) __C,
6567 (__mmask16) -1, __R);
6570 extern __inline __m512
6571 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6572 _mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
6573 __m512 __C, const int __R)
6575 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
6576 (__v16sf) __B,
6577 (__v16sf) __C,
6578 (__mmask16) __U, __R);
6581 extern __inline __m512
6582 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6583 _mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
6584 __mmask16 __U, const int __R)
6586 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
6587 (__v16sf) __B,
6588 (__v16sf) __C,
6589 (__mmask16) __U, __R);
6592 extern __inline __m512
6593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6594 _mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
6595 __m512 __C, const int __R)
6597 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
6598 (__v16sf) __B,
6599 (__v16sf) __C,
6600 (__mmask16) __U, __R);
6603 extern __inline __m512d
6604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6605 _mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
6607 return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
6608 (__v8df) __B,
6609 (__v8df) __C,
6610 (__mmask8) -1, __R);
6613 extern __inline __m512d
6614 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6615 _mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6616 __m512d __C, const int __R)
6618 return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
6619 (__v8df) __B,
6620 (__v8df) __C,
6621 (__mmask8) __U, __R);
6624 extern __inline __m512d
6625 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6626 _mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
6627 __mmask8 __U, const int __R)
6629 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
6630 (__v8df) __B,
6631 (__v8df) __C,
6632 (__mmask8) __U, __R);
6635 extern __inline __m512d
6636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6637 _mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6638 __m512d __C, const int __R)
6640 return (__m512d) __builtin_ia32_vfmsubpd512_maskz ((__v8df) __A,
6641 (__v8df) __B,
6642 (__v8df) __C,
6643 (__mmask8) __U, __R);
6646 extern __inline __m512
6647 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6648 _mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
6650 return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
6651 (__v16sf) __B,
6652 (__v16sf) __C,
6653 (__mmask16) -1, __R);
6656 extern __inline __m512
6657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6658 _mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
6659 __m512 __C, const int __R)
6661 return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
6662 (__v16sf) __B,
6663 (__v16sf) __C,
6664 (__mmask16) __U, __R);
6667 extern __inline __m512
6668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6669 _mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
6670 __mmask16 __U, const int __R)
6672 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
6673 (__v16sf) __B,
6674 (__v16sf) __C,
6675 (__mmask16) __U, __R);
6678 extern __inline __m512
6679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6680 _mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
6681 __m512 __C, const int __R)
6683 return (__m512) __builtin_ia32_vfmsubps512_maskz ((__v16sf) __A,
6684 (__v16sf) __B,
6685 (__v16sf) __C,
6686 (__mmask16) __U, __R);
6689 extern __inline __m512d
6690 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6691 _mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
6693 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
6694 (__v8df) __B,
6695 (__v8df) __C,
6696 (__mmask8) -1, __R);
6699 extern __inline __m512d
6700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6701 _mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6702 __m512d __C, const int __R)
6704 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
6705 (__v8df) __B,
6706 (__v8df) __C,
6707 (__mmask8) __U, __R);
6710 extern __inline __m512d
6711 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6712 _mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
6713 __mmask8 __U, const int __R)
6715 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
6716 (__v8df) __B,
6717 (__v8df) __C,
6718 (__mmask8) __U, __R);
6721 extern __inline __m512d
6722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6723 _mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6724 __m512d __C, const int __R)
6726 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
6727 (__v8df) __B,
6728 (__v8df) __C,
6729 (__mmask8) __U, __R);
6732 extern __inline __m512
6733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6734 _mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
6736 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
6737 (__v16sf) __B,
6738 (__v16sf) __C,
6739 (__mmask16) -1, __R);
6742 extern __inline __m512
6743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6744 _mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
6745 __m512 __C, const int __R)
6747 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
6748 (__v16sf) __B,
6749 (__v16sf) __C,
6750 (__mmask16) __U, __R);
6753 extern __inline __m512
6754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6755 _mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
6756 __mmask16 __U, const int __R)
6758 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
6759 (__v16sf) __B,
6760 (__v16sf) __C,
6761 (__mmask16) __U, __R);
6764 extern __inline __m512
6765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6766 _mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
6767 __m512 __C, const int __R)
6769 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
6770 (__v16sf) __B,
6771 (__v16sf) __C,
6772 (__mmask16) __U, __R);
6775 extern __inline __m512d
6776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6777 _mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
6779 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
6780 (__v8df) __B,
6781 -(__v8df) __C,
6782 (__mmask8) -1, __R);
6785 extern __inline __m512d
6786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6787 _mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6788 __m512d __C, const int __R)
6790 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
6791 (__v8df) __B,
6792 -(__v8df) __C,
6793 (__mmask8) __U, __R);
6796 extern __inline __m512d
6797 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6798 _mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
6799 __mmask8 __U, const int __R)
6801 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
6802 (__v8df) __B,
6803 (__v8df) __C,
6804 (__mmask8) __U, __R);
6807 extern __inline __m512d
6808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6809 _mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6810 __m512d __C, const int __R)
6812 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
6813 (__v8df) __B,
6814 -(__v8df) __C,
6815 (__mmask8) __U, __R);
6818 extern __inline __m512
6819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6820 _mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
6822 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
6823 (__v16sf) __B,
6824 -(__v16sf) __C,
6825 (__mmask16) -1, __R);
6828 extern __inline __m512
6829 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6830 _mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
6831 __m512 __C, const int __R)
6833 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
6834 (__v16sf) __B,
6835 -(__v16sf) __C,
6836 (__mmask16) __U, __R);
6839 extern __inline __m512
6840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6841 _mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
6842 __mmask16 __U, const int __R)
6844 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
6845 (__v16sf) __B,
6846 (__v16sf) __C,
6847 (__mmask16) __U, __R);
6850 extern __inline __m512
6851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6852 _mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
6853 __m512 __C, const int __R)
6855 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
6856 (__v16sf) __B,
6857 -(__v16sf) __C,
6858 (__mmask16) __U, __R);
6861 extern __inline __m512d
6862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6863 _mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
6865 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
6866 (__v8df) __B,
6867 (__v8df) __C,
6868 (__mmask8) -1, __R);
6871 extern __inline __m512d
6872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6873 _mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6874 __m512d __C, const int __R)
6876 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
6877 (__v8df) __B,
6878 (__v8df) __C,
6879 (__mmask8) __U, __R);
6882 extern __inline __m512d
6883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6884 _mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
6885 __mmask8 __U, const int __R)
6887 return (__m512d) __builtin_ia32_vfnmaddpd512_mask3 ((__v8df) __A,
6888 (__v8df) __B,
6889 (__v8df) __C,
6890 (__mmask8) __U, __R);
6893 extern __inline __m512d
6894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6895 _mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6896 __m512d __C, const int __R)
6898 return (__m512d) __builtin_ia32_vfnmaddpd512_maskz ((__v8df) __A,
6899 (__v8df) __B,
6900 (__v8df) __C,
6901 (__mmask8) __U, __R);
6904 extern __inline __m512
6905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6906 _mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
6908 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
6909 (__v16sf) __B,
6910 (__v16sf) __C,
6911 (__mmask16) -1, __R);
6914 extern __inline __m512
6915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6916 _mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
6917 __m512 __C, const int __R)
6919 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
6920 (__v16sf) __B,
6921 (__v16sf) __C,
6922 (__mmask16) __U, __R);
6925 extern __inline __m512
6926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6927 _mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
6928 __mmask16 __U, const int __R)
6930 return (__m512) __builtin_ia32_vfnmaddps512_mask3 ((__v16sf) __A,
6931 (__v16sf) __B,
6932 (__v16sf) __C,
6933 (__mmask16) __U, __R);
6936 extern __inline __m512
6937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6938 _mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
6939 __m512 __C, const int __R)
6941 return (__m512) __builtin_ia32_vfnmaddps512_maskz ((__v16sf) __A,
6942 (__v16sf) __B,
6943 (__v16sf) __C,
6944 (__mmask16) __U, __R);
6947 extern __inline __m512d
6948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6949 _mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
6951 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
6952 (__v8df) __B,
6953 (__v8df) __C,
6954 (__mmask8) -1, __R);
6957 extern __inline __m512d
6958 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6959 _mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
6960 __m512d __C, const int __R)
6962 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
6963 (__v8df) __B,
6964 (__v8df) __C,
6965 (__mmask8) __U, __R);
6968 extern __inline __m512d
6969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6970 _mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
6971 __mmask8 __U, const int __R)
6973 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
6974 (__v8df) __B,
6975 (__v8df) __C,
6976 (__mmask8) __U, __R);
6979 extern __inline __m512d
6980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6981 _mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
6982 __m512d __C, const int __R)
6984 return (__m512d) __builtin_ia32_vfnmsubpd512_maskz ((__v8df) __A,
6985 (__v8df) __B,
6986 (__v8df) __C,
6987 (__mmask8) __U, __R);
6990 extern __inline __m512
6991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
6992 _mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
6994 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
6995 (__v16sf) __B,
6996 (__v16sf) __C,
6997 (__mmask16) -1, __R);
7000 extern __inline __m512
7001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7002 _mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
7003 __m512 __C, const int __R)
7005 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
7006 (__v16sf) __B,
7007 (__v16sf) __C,
7008 (__mmask16) __U, __R);
7011 extern __inline __m512
7012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7013 _mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
7014 __mmask16 __U, const int __R)
7016 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
7017 (__v16sf) __B,
7018 (__v16sf) __C,
7019 (__mmask16) __U, __R);
7022 extern __inline __m512
7023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7024 _mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
7025 __m512 __C, const int __R)
7027 return (__m512) __builtin_ia32_vfnmsubps512_maskz ((__v16sf) __A,
7028 (__v16sf) __B,
7029 (__v16sf) __C,
7030 (__mmask16) __U, __R);
7032 #else
7033 #define _mm512_fmadd_round_pd(A, B, C, R) \
7034 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R)
7036 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
7037 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R)
7039 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
7040 (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R)
7042 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
7043 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R)
7045 #define _mm512_fmadd_round_ps(A, B, C, R) \
7046 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R)
7048 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
7049 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R)
7051 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
7052 (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R)
7054 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
7055 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R)
7057 #define _mm512_fmsub_round_pd(A, B, C, R) \
7058 (__m512d)__builtin_ia32_vfmsubpd512_mask(A, B, C, -1, R)
7060 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
7061 (__m512d)__builtin_ia32_vfmsubpd512_mask(A, B, C, U, R)
7063 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
7064 (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R)
7066 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
7067 (__m512d)__builtin_ia32_vfmsubpd512_maskz(A, B, C, U, R)
7069 #define _mm512_fmsub_round_ps(A, B, C, R) \
7070 (__m512)__builtin_ia32_vfmsubps512_mask(A, B, C, -1, R)
7072 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
7073 (__m512)__builtin_ia32_vfmsubps512_mask(A, B, C, U, R)
7075 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
7076 (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R)
7078 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
7079 (__m512)__builtin_ia32_vfmsubps512_maskz(A, B, C, U, R)
7081 #define _mm512_fmaddsub_round_pd(A, B, C, R) \
7082 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R)
7084 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
7085 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, U, R)
7087 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
7088 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R)
7090 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
7091 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R)
7093 #define _mm512_fmaddsub_round_ps(A, B, C, R) \
7094 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R)
7096 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
7097 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R)
7099 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
7100 (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R)
7102 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
7103 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R)
7105 #define _mm512_fmsubadd_round_pd(A, B, C, R) \
7106 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R)
7108 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
7109 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R)
7111 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
7112 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R)
7114 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
7115 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R)
7117 #define _mm512_fmsubadd_round_ps(A, B, C, R) \
7118 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R)
7120 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
7121 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R)
7123 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
7124 (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R)
7126 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
7127 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R)
7129 #define _mm512_fnmadd_round_pd(A, B, C, R) \
7130 (__m512d)__builtin_ia32_vfnmaddpd512_mask(A, B, C, -1, R)
7132 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
7133 (__m512d)__builtin_ia32_vfnmaddpd512_mask(A, B, C, U, R)
7135 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
7136 (__m512d)__builtin_ia32_vfnmaddpd512_mask3(A, B, C, U, R)
7138 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
7139 (__m512d)__builtin_ia32_vfnmaddpd512_maskz(A, B, C, U, R)
7141 #define _mm512_fnmadd_round_ps(A, B, C, R) \
7142 (__m512)__builtin_ia32_vfnmaddps512_mask(A, B, C, -1, R)
7144 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
7145 (__m512)__builtin_ia32_vfnmaddps512_mask(A, B, C, U, R)
7147 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
7148 (__m512)__builtin_ia32_vfnmaddps512_mask3(A, B, C, U, R)
7150 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
7151 (__m512)__builtin_ia32_vfnmaddps512_maskz(A, B, C, U, R)
7153 #define _mm512_fnmsub_round_pd(A, B, C, R) \
7154 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, -1, R)
7156 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
7157 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R)
7159 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
7160 (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R)
7162 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
7163 (__m512d)__builtin_ia32_vfnmsubpd512_maskz(A, B, C, U, R)
7165 #define _mm512_fnmsub_round_ps(A, B, C, R) \
7166 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, -1, R)
7168 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
7169 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R)
7171 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
7172 (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R)
7174 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
7175 (__m512)__builtin_ia32_vfnmsubps512_maskz(A, B, C, U, R)
7176 #endif
7178 extern __inline __m512i
7179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7180 _mm512_abs_epi64 (__m512i __A)
7182 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
7183 (__v8di)
7184 _mm512_undefined_epi32 (),
7185 (__mmask8) -1);
7188 extern __inline __m512i
7189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7190 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
7192 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
7193 (__v8di) __W,
7194 (__mmask8) __U);
7197 extern __inline __m512i
7198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7199 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
7201 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A,
7202 (__v8di)
7203 _mm512_setzero_si512 (),
7204 (__mmask8) __U);
7207 extern __inline __m512i
7208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7209 _mm512_abs_epi32 (__m512i __A)
7211 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
7212 (__v16si)
7213 _mm512_undefined_epi32 (),
7214 (__mmask16) -1);
7217 extern __inline __m512i
7218 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7219 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
7221 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
7222 (__v16si) __W,
7223 (__mmask16) __U);
7226 extern __inline __m512i
7227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7228 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
7230 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A,
7231 (__v16si)
7232 _mm512_setzero_si512 (),
7233 (__mmask16) __U);
7236 extern __inline __m512
7237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7238 _mm512_broadcastss_ps (__m128 __A)
7240 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
7241 (__v16sf)
7242 _mm512_undefined_ps (),
7243 (__mmask16) -1);
7246 extern __inline __m512
7247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7248 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
7250 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
7251 (__v16sf) __O, __M);
7254 extern __inline __m512
7255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7256 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
7258 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A,
7259 (__v16sf)
7260 _mm512_setzero_ps (),
7261 __M);
7264 extern __inline __m512d
7265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7266 _mm512_broadcastsd_pd (__m128d __A)
7268 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
7269 (__v8df)
7270 _mm512_undefined_pd (),
7271 (__mmask8) -1);
7274 extern __inline __m512d
7275 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7276 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
7278 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
7279 (__v8df) __O, __M);
7282 extern __inline __m512d
7283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7284 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
7286 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A,
7287 (__v8df)
7288 _mm512_setzero_pd (),
7289 __M);
7292 extern __inline __m512i
7293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7294 _mm512_broadcastd_epi32 (__m128i __A)
7296 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
7297 (__v16si)
7298 _mm512_undefined_epi32 (),
7299 (__mmask16) -1);
7302 extern __inline __m512i
7303 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7304 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
7306 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
7307 (__v16si) __O, __M);
7310 extern __inline __m512i
7311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7312 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
7314 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A,
7315 (__v16si)
7316 _mm512_setzero_si512 (),
7317 __M);
7320 extern __inline __m512i
7321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7322 _mm512_set1_epi32 (int __A)
7324 return (__m512i)(__v16si)
7325 { __A, __A, __A, __A, __A, __A, __A, __A,
7326 __A, __A, __A, __A, __A, __A, __A, __A };
7329 extern __inline __m512i
7330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7331 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
7333 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O,
7334 __M);
7337 extern __inline __m512i
7338 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7339 _mm512_maskz_set1_epi32 (__mmask16 __M, int __A)
7341 return (__m512i)
7342 __builtin_ia32_pbroadcastd512_gpr_mask (__A,
7343 (__v16si) _mm512_setzero_si512 (),
7344 __M);
7347 extern __inline __m512i
7348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7349 _mm512_broadcastq_epi64 (__m128i __A)
7351 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
7352 (__v8di)
7353 _mm512_undefined_epi32 (),
7354 (__mmask8) -1);
7357 extern __inline __m512i
7358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7359 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
7361 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
7362 (__v8di) __O, __M);
7365 extern __inline __m512i
7366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7367 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
7369 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A,
7370 (__v8di)
7371 _mm512_setzero_si512 (),
7372 __M);
7375 extern __inline __m512i
7376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7377 _mm512_set1_epi64 (long long __A)
7379 return (__m512i)(__v8di) { __A, __A, __A, __A, __A, __A, __A, __A };
7382 extern __inline __m512i
7383 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7384 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
7386 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O,
7387 __M);
7390 extern __inline __m512i
7391 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7392 _mm512_maskz_set1_epi64 (__mmask8 __M, long long __A)
7394 return (__m512i)
7395 __builtin_ia32_pbroadcastq512_gpr_mask (__A,
7396 (__v8di) _mm512_setzero_si512 (),
7397 __M);
7400 extern __inline __m512
7401 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7402 _mm512_broadcast_f32x4 (__m128 __A)
7404 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
7405 (__v16sf)
7406 _mm512_undefined_ps (),
7407 (__mmask16) -1);
7410 extern __inline __m512
7411 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7412 _mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A)
7414 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
7415 (__v16sf) __O,
7416 __M);
7419 extern __inline __m512
7420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7421 _mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A)
7423 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A,
7424 (__v16sf)
7425 _mm512_setzero_ps (),
7426 __M);
7429 extern __inline __m512i
7430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7431 _mm512_broadcast_i32x4 (__m128i __A)
7433 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
7434 (__v16si)
7435 _mm512_undefined_epi32 (),
7436 (__mmask16) -1);
7439 extern __inline __m512i
7440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7441 _mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A)
7443 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
7444 (__v16si) __O,
7445 __M);
7448 extern __inline __m512i
7449 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7450 _mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A)
7452 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A,
7453 (__v16si)
7454 _mm512_setzero_si512 (),
7455 __M);
7458 extern __inline __m512d
7459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7460 _mm512_broadcast_f64x4 (__m256d __A)
7462 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
7463 (__v8df)
7464 _mm512_undefined_pd (),
7465 (__mmask8) -1);
7468 extern __inline __m512d
7469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7470 _mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A)
7472 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
7473 (__v8df) __O,
7474 __M);
7477 extern __inline __m512d
7478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7479 _mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A)
7481 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A,
7482 (__v8df)
7483 _mm512_setzero_pd (),
7484 __M);
7487 extern __inline __m512i
7488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7489 _mm512_broadcast_i64x4 (__m256i __A)
7491 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
7492 (__v8di)
7493 _mm512_undefined_epi32 (),
7494 (__mmask8) -1);
7497 extern __inline __m512i
7498 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7499 _mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A)
7501 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
7502 (__v8di) __O,
7503 __M);
7506 extern __inline __m512i
7507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7508 _mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A)
7510 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A,
7511 (__v8di)
7512 _mm512_setzero_si512 (),
7513 __M);
7516 typedef enum
7518 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
7519 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
7520 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
7521 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
7522 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
7523 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
7524 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
7525 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
7526 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
7527 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
7528 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
7529 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
7530 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
7531 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
7532 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
7533 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
7534 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
7535 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
7536 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
7537 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
7538 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
7539 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
7540 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
7541 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
7542 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
7543 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
7544 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
7545 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
7546 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
7547 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
7548 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
7549 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
7550 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
7551 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
7552 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
7553 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
7554 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
7555 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
7556 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
7557 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
7558 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
7559 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
7560 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
7561 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
7562 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
7563 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
7564 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
7565 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
7566 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
7567 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
7568 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
7569 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
7570 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
7571 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
7572 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
7573 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
7574 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
7575 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
7576 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
7577 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
7578 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
7579 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
7580 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
7581 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
7582 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
7583 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
7584 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
7585 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
7586 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
7587 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
7588 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
7589 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
7590 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
7591 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
7592 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
7593 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
7594 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
7595 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
7596 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
7597 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
7598 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
7599 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
7600 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
7601 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
7602 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
7603 _MM_PERM_DDDD = 0xFF
7604 } _MM_PERM_ENUM;
7606 #ifdef __OPTIMIZE__
7607 extern __inline __m512i
7608 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7609 _mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask)
7611 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
7612 __mask,
7613 (__v16si)
7614 _mm512_undefined_epi32 (),
7615 (__mmask16) -1);
7618 extern __inline __m512i
7619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7620 _mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
7621 _MM_PERM_ENUM __mask)
7623 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
7624 __mask,
7625 (__v16si) __W,
7626 (__mmask16) __U);
7629 extern __inline __m512i
7630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7631 _mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask)
7633 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A,
7634 __mask,
7635 (__v16si)
7636 _mm512_setzero_si512 (),
7637 (__mmask16) __U);
7640 extern __inline __m512i
7641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7642 _mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm)
7644 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
7645 (__v8di) __B, __imm,
7646 (__v8di)
7647 _mm512_undefined_epi32 (),
7648 (__mmask8) -1);
7651 extern __inline __m512i
7652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7653 _mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A,
7654 __m512i __B, const int __imm)
7656 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
7657 (__v8di) __B, __imm,
7658 (__v8di) __W,
7659 (__mmask8) __U);
7662 extern __inline __m512i
7663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7664 _mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B,
7665 const int __imm)
7667 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A,
7668 (__v8di) __B, __imm,
7669 (__v8di)
7670 _mm512_setzero_si512 (),
7671 (__mmask8) __U);
7674 extern __inline __m512i
7675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7676 _mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm)
7678 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
7679 (__v16si) __B,
7680 __imm,
7681 (__v16si)
7682 _mm512_undefined_epi32 (),
7683 (__mmask16) -1);
7686 extern __inline __m512i
7687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7688 _mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A,
7689 __m512i __B, const int __imm)
7691 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
7692 (__v16si) __B,
7693 __imm,
7694 (__v16si) __W,
7695 (__mmask16) __U);
7698 extern __inline __m512i
7699 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7700 _mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B,
7701 const int __imm)
7703 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A,
7704 (__v16si) __B,
7705 __imm,
7706 (__v16si)
7707 _mm512_setzero_si512 (),
7708 (__mmask16) __U);
7711 extern __inline __m512d
7712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7713 _mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm)
7715 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
7716 (__v8df) __B, __imm,
7717 (__v8df)
7718 _mm512_undefined_pd (),
7719 (__mmask8) -1);
7722 extern __inline __m512d
7723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7724 _mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A,
7725 __m512d __B, const int __imm)
7727 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
7728 (__v8df) __B, __imm,
7729 (__v8df) __W,
7730 (__mmask8) __U);
7733 extern __inline __m512d
7734 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7735 _mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B,
7736 const int __imm)
7738 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A,
7739 (__v8df) __B, __imm,
7740 (__v8df)
7741 _mm512_setzero_pd (),
7742 (__mmask8) __U);
7745 extern __inline __m512
7746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7747 _mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm)
7749 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
7750 (__v16sf) __B, __imm,
7751 (__v16sf)
7752 _mm512_undefined_ps (),
7753 (__mmask16) -1);
7756 extern __inline __m512
7757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7758 _mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A,
7759 __m512 __B, const int __imm)
7761 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
7762 (__v16sf) __B, __imm,
7763 (__v16sf) __W,
7764 (__mmask16) __U);
7767 extern __inline __m512
7768 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7769 _mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B,
7770 const int __imm)
7772 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A,
7773 (__v16sf) __B, __imm,
7774 (__v16sf)
7775 _mm512_setzero_ps (),
7776 (__mmask16) __U);
7779 #else
7780 #define _mm512_shuffle_epi32(X, C) \
7781 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
7782 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
7783 (__mmask16)-1))
7785 #define _mm512_mask_shuffle_epi32(W, U, X, C) \
7786 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
7787 (__v16si)(__m512i)(W),\
7788 (__mmask16)(U)))
7790 #define _mm512_maskz_shuffle_epi32(U, X, C) \
7791 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\
7792 (__v16si)(__m512i)_mm512_setzero_si512 (),\
7793 (__mmask16)(U)))
7795 #define _mm512_shuffle_i64x2(X, Y, C) \
7796 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
7797 (__v8di)(__m512i)(Y), (int)(C),\
7798 (__v8di)(__m512i)_mm512_undefined_epi32 (),\
7799 (__mmask8)-1))
7801 #define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \
7802 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
7803 (__v8di)(__m512i)(Y), (int)(C),\
7804 (__v8di)(__m512i)(W),\
7805 (__mmask8)(U)))
7807 #define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \
7808 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \
7809 (__v8di)(__m512i)(Y), (int)(C),\
7810 (__v8di)(__m512i)_mm512_setzero_si512 (),\
7811 (__mmask8)(U)))
7813 #define _mm512_shuffle_i32x4(X, Y, C) \
7814 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
7815 (__v16si)(__m512i)(Y), (int)(C),\
7816 (__v16si)(__m512i)_mm512_undefined_epi32 (),\
7817 (__mmask16)-1))
7819 #define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \
7820 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
7821 (__v16si)(__m512i)(Y), (int)(C),\
7822 (__v16si)(__m512i)(W),\
7823 (__mmask16)(U)))
7825 #define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \
7826 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \
7827 (__v16si)(__m512i)(Y), (int)(C),\
7828 (__v16si)(__m512i)_mm512_setzero_si512 (),\
7829 (__mmask16)(U)))
7831 #define _mm512_shuffle_f64x2(X, Y, C) \
7832 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
7833 (__v8df)(__m512d)(Y), (int)(C),\
7834 (__v8df)(__m512d)_mm512_undefined_pd(),\
7835 (__mmask8)-1))
7837 #define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \
7838 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
7839 (__v8df)(__m512d)(Y), (int)(C),\
7840 (__v8df)(__m512d)(W),\
7841 (__mmask8)(U)))
7843 #define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \
7844 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \
7845 (__v8df)(__m512d)(Y), (int)(C),\
7846 (__v8df)(__m512d)_mm512_setzero_pd(),\
7847 (__mmask8)(U)))
7849 #define _mm512_shuffle_f32x4(X, Y, C) \
7850 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
7851 (__v16sf)(__m512)(Y), (int)(C),\
7852 (__v16sf)(__m512)_mm512_undefined_ps(),\
7853 (__mmask16)-1))
7855 #define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \
7856 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
7857 (__v16sf)(__m512)(Y), (int)(C),\
7858 (__v16sf)(__m512)(W),\
7859 (__mmask16)(U)))
7861 #define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \
7862 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \
7863 (__v16sf)(__m512)(Y), (int)(C),\
7864 (__v16sf)(__m512)_mm512_setzero_ps(),\
7865 (__mmask16)(U)))
7866 #endif
7868 extern __inline __m512i
7869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7870 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
7872 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
7873 (__v16si) __B,
7874 (__v16si)
7875 _mm512_undefined_epi32 (),
7876 (__mmask16) -1);
7879 extern __inline __m512i
7880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7881 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7883 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
7884 (__v16si) __B,
7885 (__v16si) __W,
7886 (__mmask16) __U);
7889 extern __inline __m512i
7890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7891 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7893 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A,
7894 (__v16si) __B,
7895 (__v16si)
7896 _mm512_setzero_si512 (),
7897 (__mmask16) __U);
7900 extern __inline __m512i
7901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7902 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
7904 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
7905 (__v16si) __B,
7906 (__v16si)
7907 _mm512_undefined_epi32 (),
7908 (__mmask16) -1);
7911 extern __inline __m512i
7912 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7913 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
7915 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
7916 (__v16si) __B,
7917 (__v16si) __W,
7918 (__mmask16) __U);
7921 extern __inline __m512i
7922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7923 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
7925 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A,
7926 (__v16si) __B,
7927 (__v16si)
7928 _mm512_setzero_si512 (),
7929 (__mmask16) __U);
7932 extern __inline __m512i
7933 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7934 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
7936 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
7937 (__v8di) __B,
7938 (__v8di)
7939 _mm512_undefined_epi32 (),
7940 (__mmask8) -1);
7943 extern __inline __m512i
7944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7945 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7947 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
7948 (__v8di) __B,
7949 (__v8di) __W,
7950 (__mmask8) __U);
7953 extern __inline __m512i
7954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7955 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7957 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A,
7958 (__v8di) __B,
7959 (__v8di)
7960 _mm512_setzero_si512 (),
7961 (__mmask8) __U);
7964 extern __inline __m512i
7965 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7966 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
7968 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
7969 (__v8di) __B,
7970 (__v8di)
7971 _mm512_undefined_epi32 (),
7972 (__mmask8) -1);
7975 extern __inline __m512i
7976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7977 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
7979 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
7980 (__v8di) __B,
7981 (__v8di) __W,
7982 (__mmask8) __U);
7985 extern __inline __m512i
7986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7987 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
7989 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A,
7990 (__v8di) __B,
7991 (__v8di)
7992 _mm512_setzero_si512 (),
7993 (__mmask8) __U);
7996 #ifdef __OPTIMIZE__
7997 extern __inline __m256i
7998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
7999 _mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R)
8001 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
8002 (__v8si)
8003 _mm256_undefined_si256 (),
8004 (__mmask8) -1, __R);
8007 extern __inline __m256i
8008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8009 _mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
8010 const int __R)
8012 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
8013 (__v8si) __W,
8014 (__mmask8) __U, __R);
8017 extern __inline __m256i
8018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8019 _mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
8021 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
8022 (__v8si)
8023 _mm256_setzero_si256 (),
8024 (__mmask8) __U, __R);
8027 extern __inline __m256i
8028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8029 _mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R)
8031 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
8032 (__v8si)
8033 _mm256_undefined_si256 (),
8034 (__mmask8) -1, __R);
8037 extern __inline __m256i
8038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8039 _mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
8040 const int __R)
8042 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
8043 (__v8si) __W,
8044 (__mmask8) __U, __R);
8047 extern __inline __m256i
8048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8049 _mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
8051 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
8052 (__v8si)
8053 _mm256_setzero_si256 (),
8054 (__mmask8) __U, __R);
8056 #else
8057 #define _mm512_cvtt_roundpd_epi32(A, B) \
8058 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
8060 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \
8061 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B))
8063 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \
8064 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
8066 #define _mm512_cvtt_roundpd_epu32(A, B) \
8067 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
8069 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \
8070 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B))
8072 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \
8073 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
8074 #endif
8076 #ifdef __OPTIMIZE__
8077 extern __inline __m256i
8078 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8079 _mm512_cvt_roundpd_epi32 (__m512d __A, const int __R)
8081 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
8082 (__v8si)
8083 _mm256_undefined_si256 (),
8084 (__mmask8) -1, __R);
8087 extern __inline __m256i
8088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8089 _mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
8090 const int __R)
8092 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
8093 (__v8si) __W,
8094 (__mmask8) __U, __R);
8097 extern __inline __m256i
8098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8099 _mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
8101 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
8102 (__v8si)
8103 _mm256_setzero_si256 (),
8104 (__mmask8) __U, __R);
8107 extern __inline __m256i
8108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8109 _mm512_cvt_roundpd_epu32 (__m512d __A, const int __R)
8111 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
8112 (__v8si)
8113 _mm256_undefined_si256 (),
8114 (__mmask8) -1, __R);
8117 extern __inline __m256i
8118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8119 _mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
8120 const int __R)
8122 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
8123 (__v8si) __W,
8124 (__mmask8) __U, __R);
8127 extern __inline __m256i
8128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8129 _mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
8131 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
8132 (__v8si)
8133 _mm256_setzero_si256 (),
8134 (__mmask8) __U, __R);
8136 #else
8137 #define _mm512_cvt_roundpd_epi32(A, B) \
8138 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
8140 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \
8141 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B))
8143 #define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \
8144 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
8146 #define _mm512_cvt_roundpd_epu32(A, B) \
8147 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B))
8149 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \
8150 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B))
8152 #define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \
8153 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B))
8154 #endif
8156 #ifdef __OPTIMIZE__
8157 extern __inline __m512i
8158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8159 _mm512_cvtt_roundps_epi32 (__m512 __A, const int __R)
8161 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
8162 (__v16si)
8163 _mm512_undefined_epi32 (),
8164 (__mmask16) -1, __R);
8167 extern __inline __m512i
8168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8169 _mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
8170 const int __R)
8172 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
8173 (__v16si) __W,
8174 (__mmask16) __U, __R);
8177 extern __inline __m512i
8178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8179 _mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
8181 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
8182 (__v16si)
8183 _mm512_setzero_si512 (),
8184 (__mmask16) __U, __R);
8187 extern __inline __m512i
8188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8189 _mm512_cvtt_roundps_epu32 (__m512 __A, const int __R)
8191 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
8192 (__v16si)
8193 _mm512_undefined_epi32 (),
8194 (__mmask16) -1, __R);
8197 extern __inline __m512i
8198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8199 _mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
8200 const int __R)
8202 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
8203 (__v16si) __W,
8204 (__mmask16) __U, __R);
8207 extern __inline __m512i
8208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8209 _mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
8211 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
8212 (__v16si)
8213 _mm512_setzero_si512 (),
8214 (__mmask16) __U, __R);
8216 #else
8217 #define _mm512_cvtt_roundps_epi32(A, B) \
8218 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
8220 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \
8221 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B))
8223 #define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \
8224 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
8226 #define _mm512_cvtt_roundps_epu32(A, B) \
8227 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
8229 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \
8230 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B))
8232 #define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \
8233 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
8234 #endif
8236 #ifdef __OPTIMIZE__
8237 extern __inline __m512i
8238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8239 _mm512_cvt_roundps_epi32 (__m512 __A, const int __R)
8241 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
8242 (__v16si)
8243 _mm512_undefined_epi32 (),
8244 (__mmask16) -1, __R);
8247 extern __inline __m512i
8248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8249 _mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
8250 const int __R)
8252 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
8253 (__v16si) __W,
8254 (__mmask16) __U, __R);
8257 extern __inline __m512i
8258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8259 _mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
8261 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
8262 (__v16si)
8263 _mm512_setzero_si512 (),
8264 (__mmask16) __U, __R);
8267 extern __inline __m512i
8268 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8269 _mm512_cvt_roundps_epu32 (__m512 __A, const int __R)
8271 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
8272 (__v16si)
8273 _mm512_undefined_epi32 (),
8274 (__mmask16) -1, __R);
8277 extern __inline __m512i
8278 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8279 _mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
8280 const int __R)
8282 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
8283 (__v16si) __W,
8284 (__mmask16) __U, __R);
8287 extern __inline __m512i
8288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8289 _mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
8291 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
8292 (__v16si)
8293 _mm512_setzero_si512 (),
8294 (__mmask16) __U, __R);
8296 #else
8297 #define _mm512_cvt_roundps_epi32(A, B) \
8298 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
8300 #define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \
8301 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B))
8303 #define _mm512_maskz_cvt_roundps_epi32(U, A, B) \
8304 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
8306 #define _mm512_cvt_roundps_epu32(A, B) \
8307 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_epi32 (), -1, B))
8309 #define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \
8310 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B))
8312 #define _mm512_maskz_cvt_roundps_epu32(U, A, B) \
8313 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B))
8314 #endif
8316 extern __inline __m128i
8317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8318 _mm512_cvtepi32_epi8 (__m512i __A)
8320 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
8321 (__v16qi)
8322 _mm_undefined_si128 (),
8323 (__mmask16) -1);
8326 extern __inline void
8327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8328 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
8330 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
8333 extern __inline __m128i
8334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8335 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
8337 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
8338 (__v16qi) __O, __M);
8341 extern __inline __m128i
8342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8343 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
8345 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
8346 (__v16qi)
8347 _mm_setzero_si128 (),
8348 __M);
8351 extern __inline __m128i
8352 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8353 _mm512_cvtsepi32_epi8 (__m512i __A)
8355 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
8356 (__v16qi)
8357 _mm_undefined_si128 (),
8358 (__mmask16) -1);
8361 extern __inline void
8362 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8363 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
8365 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
8368 extern __inline __m128i
8369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8370 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
8372 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
8373 (__v16qi) __O, __M);
8376 extern __inline __m128i
8377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8378 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
8380 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
8381 (__v16qi)
8382 _mm_setzero_si128 (),
8383 __M);
8386 extern __inline __m128i
8387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8388 _mm512_cvtusepi32_epi8 (__m512i __A)
8390 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
8391 (__v16qi)
8392 _mm_undefined_si128 (),
8393 (__mmask16) -1);
8396 extern __inline void
8397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8398 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
8400 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
8403 extern __inline __m128i
8404 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8405 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
8407 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
8408 (__v16qi) __O,
8409 __M);
8412 extern __inline __m128i
8413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8414 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
8416 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
8417 (__v16qi)
8418 _mm_setzero_si128 (),
8419 __M);
8422 extern __inline __m256i
8423 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8424 _mm512_cvtepi32_epi16 (__m512i __A)
8426 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
8427 (__v16hi)
8428 _mm256_undefined_si256 (),
8429 (__mmask16) -1);
8432 extern __inline void
8433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8434 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
8436 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
8439 extern __inline __m256i
8440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8441 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
8443 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
8444 (__v16hi) __O, __M);
8447 extern __inline __m256i
8448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8449 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
8451 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
8452 (__v16hi)
8453 _mm256_setzero_si256 (),
8454 __M);
8457 extern __inline __m256i
8458 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8459 _mm512_cvtsepi32_epi16 (__m512i __A)
8461 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
8462 (__v16hi)
8463 _mm256_undefined_si256 (),
8464 (__mmask16) -1);
8467 extern __inline void
8468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8469 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
8471 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
8474 extern __inline __m256i
8475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8476 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
8478 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
8479 (__v16hi) __O, __M);
8482 extern __inline __m256i
8483 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8484 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
8486 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
8487 (__v16hi)
8488 _mm256_setzero_si256 (),
8489 __M);
8492 extern __inline __m256i
8493 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8494 _mm512_cvtusepi32_epi16 (__m512i __A)
8496 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
8497 (__v16hi)
8498 _mm256_undefined_si256 (),
8499 (__mmask16) -1);
8502 extern __inline void
8503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8504 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
8506 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
8509 extern __inline __m256i
8510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8511 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
8513 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
8514 (__v16hi) __O,
8515 __M);
8518 extern __inline __m256i
8519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8520 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
8522 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
8523 (__v16hi)
8524 _mm256_setzero_si256 (),
8525 __M);
8528 extern __inline __m256i
8529 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8530 _mm512_cvtepi64_epi32 (__m512i __A)
8532 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
8533 (__v8si)
8534 _mm256_undefined_si256 (),
8535 (__mmask8) -1);
8538 extern __inline void
8539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8540 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
8542 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
8545 extern __inline __m256i
8546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8547 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
8549 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
8550 (__v8si) __O, __M);
8553 extern __inline __m256i
8554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8555 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
8557 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
8558 (__v8si)
8559 _mm256_setzero_si256 (),
8560 __M);
8563 extern __inline __m256i
8564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8565 _mm512_cvtsepi64_epi32 (__m512i __A)
8567 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
8568 (__v8si)
8569 _mm256_undefined_si256 (),
8570 (__mmask8) -1);
8573 extern __inline void
8574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8575 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
8577 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
8580 extern __inline __m256i
8581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8582 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
8584 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
8585 (__v8si) __O, __M);
8588 extern __inline __m256i
8589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8590 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
8592 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
8593 (__v8si)
8594 _mm256_setzero_si256 (),
8595 __M);
8598 extern __inline __m256i
8599 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8600 _mm512_cvtusepi64_epi32 (__m512i __A)
8602 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
8603 (__v8si)
8604 _mm256_undefined_si256 (),
8605 (__mmask8) -1);
8608 extern __inline void
8609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8610 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
8612 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
8615 extern __inline __m256i
8616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8617 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
8619 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
8620 (__v8si) __O, __M);
8623 extern __inline __m256i
8624 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8625 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
8627 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
8628 (__v8si)
8629 _mm256_setzero_si256 (),
8630 __M);
8633 extern __inline __m128i
8634 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8635 _mm512_cvtepi64_epi16 (__m512i __A)
8637 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
8638 (__v8hi)
8639 _mm_undefined_si128 (),
8640 (__mmask8) -1);
8643 extern __inline void
8644 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8645 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
8647 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
8650 extern __inline __m128i
8651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8652 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
8654 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
8655 (__v8hi) __O, __M);
8658 extern __inline __m128i
8659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8660 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
8662 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
8663 (__v8hi)
8664 _mm_setzero_si128 (),
8665 __M);
8668 extern __inline __m128i
8669 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8670 _mm512_cvtsepi64_epi16 (__m512i __A)
8672 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
8673 (__v8hi)
8674 _mm_undefined_si128 (),
8675 (__mmask8) -1);
8678 extern __inline void
8679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8680 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
8682 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
8685 extern __inline __m128i
8686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8687 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
8689 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
8690 (__v8hi) __O, __M);
8693 extern __inline __m128i
8694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8695 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
8697 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
8698 (__v8hi)
8699 _mm_setzero_si128 (),
8700 __M);
8703 extern __inline __m128i
8704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8705 _mm512_cvtusepi64_epi16 (__m512i __A)
8707 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
8708 (__v8hi)
8709 _mm_undefined_si128 (),
8710 (__mmask8) -1);
8713 extern __inline void
8714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8715 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
8717 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
8720 extern __inline __m128i
8721 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8722 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
8724 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
8725 (__v8hi) __O, __M);
8728 extern __inline __m128i
8729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8730 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
8732 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
8733 (__v8hi)
8734 _mm_setzero_si128 (),
8735 __M);
8738 extern __inline __m128i
8739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8740 _mm512_cvtepi64_epi8 (__m512i __A)
8742 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
8743 (__v16qi)
8744 _mm_undefined_si128 (),
8745 (__mmask8) -1);
8748 extern __inline void
8749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8750 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
8752 __builtin_ia32_pmovqb512mem_mask ((unsigned long long *) __P,
8753 (__v8di) __A, __M);
8756 extern __inline __m128i
8757 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8758 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
8760 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
8761 (__v16qi) __O, __M);
8764 extern __inline __m128i
8765 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8766 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
8768 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
8769 (__v16qi)
8770 _mm_setzero_si128 (),
8771 __M);
8774 extern __inline __m128i
8775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8776 _mm512_cvtsepi64_epi8 (__m512i __A)
8778 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
8779 (__v16qi)
8780 _mm_undefined_si128 (),
8781 (__mmask8) -1);
8784 extern __inline void
8785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8786 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
8788 __builtin_ia32_pmovsqb512mem_mask ((unsigned long long *) __P, (__v8di) __A, __M);
8791 extern __inline __m128i
8792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8793 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
8795 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
8796 (__v16qi) __O, __M);
8799 extern __inline __m128i
8800 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8801 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
8803 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
8804 (__v16qi)
8805 _mm_setzero_si128 (),
8806 __M);
8809 extern __inline __m128i
8810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8811 _mm512_cvtusepi64_epi8 (__m512i __A)
8813 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
8814 (__v16qi)
8815 _mm_undefined_si128 (),
8816 (__mmask8) -1);
8819 extern __inline void
8820 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8821 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
8823 __builtin_ia32_pmovusqb512mem_mask ((unsigned long long *) __P, (__v8di) __A, __M);
8826 extern __inline __m128i
8827 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8828 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
8830 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
8831 (__v16qi) __O,
8832 __M);
8835 extern __inline __m128i
8836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8837 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
8839 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
8840 (__v16qi)
8841 _mm_setzero_si128 (),
8842 __M);
8845 extern __inline __m512d
8846 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8847 _mm512_cvtepi32_pd (__m256i __A)
8849 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
8850 (__v8df)
8851 _mm512_undefined_pd (),
8852 (__mmask8) -1);
8855 extern __inline __m512d
8856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8857 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
8859 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
8860 (__v8df) __W,
8861 (__mmask8) __U);
8864 extern __inline __m512d
8865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8866 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
8868 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A,
8869 (__v8df)
8870 _mm512_setzero_pd (),
8871 (__mmask8) __U);
8874 extern __inline __m512d
8875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8876 _mm512_cvtepu32_pd (__m256i __A)
8878 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
8879 (__v8df)
8880 _mm512_undefined_pd (),
8881 (__mmask8) -1);
8884 extern __inline __m512d
8885 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8886 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
8888 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
8889 (__v8df) __W,
8890 (__mmask8) __U);
8893 extern __inline __m512d
8894 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8895 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
8897 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A,
8898 (__v8df)
8899 _mm512_setzero_pd (),
8900 (__mmask8) __U);
8903 #ifdef __OPTIMIZE__
8904 extern __inline __m512
8905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8906 _mm512_cvt_roundepi32_ps (__m512i __A, const int __R)
8908 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
8909 (__v16sf)
8910 _mm512_undefined_ps (),
8911 (__mmask16) -1, __R);
8914 extern __inline __m512
8915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8916 _mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A,
8917 const int __R)
8919 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
8920 (__v16sf) __W,
8921 (__mmask16) __U, __R);
8924 extern __inline __m512
8925 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8926 _mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R)
8928 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
8929 (__v16sf)
8930 _mm512_setzero_ps (),
8931 (__mmask16) __U, __R);
8934 extern __inline __m512
8935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8936 _mm512_cvt_roundepu32_ps (__m512i __A, const int __R)
8938 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
8939 (__v16sf)
8940 _mm512_undefined_ps (),
8941 (__mmask16) -1, __R);
8944 extern __inline __m512
8945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8946 _mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A,
8947 const int __R)
8949 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
8950 (__v16sf) __W,
8951 (__mmask16) __U, __R);
8954 extern __inline __m512
8955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8956 _mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R)
8958 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
8959 (__v16sf)
8960 _mm512_setzero_ps (),
8961 (__mmask16) __U, __R);
8964 #else
8965 #define _mm512_cvt_roundepi32_ps(A, B) \
8966 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
8968 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \
8969 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B)
8971 #define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \
8972 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
8974 #define _mm512_cvt_roundepu32_ps(A, B) \
8975 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
8977 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \
8978 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B)
8980 #define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \
8981 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B)
8982 #endif
8984 #ifdef __OPTIMIZE__
8985 extern __inline __m256d
8986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8987 _mm512_extractf64x4_pd (__m512d __A, const int __imm)
8989 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
8990 __imm,
8991 (__v4df)
8992 _mm256_undefined_pd (),
8993 (__mmask8) -1);
8996 extern __inline __m256d
8997 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
8998 _mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A,
8999 const int __imm)
9001 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
9002 __imm,
9003 (__v4df) __W,
9004 (__mmask8) __U);
9007 extern __inline __m256d
9008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9009 _mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm)
9011 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A,
9012 __imm,
9013 (__v4df)
9014 _mm256_setzero_pd (),
9015 (__mmask8) __U);
9018 extern __inline __m128
9019 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9020 _mm512_extractf32x4_ps (__m512 __A, const int __imm)
9022 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
9023 __imm,
9024 (__v4sf)
9025 _mm_undefined_ps (),
9026 (__mmask8) -1);
9029 extern __inline __m128
9030 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9031 _mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A,
9032 const int __imm)
9034 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
9035 __imm,
9036 (__v4sf) __W,
9037 (__mmask8) __U);
9040 extern __inline __m128
9041 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9042 _mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm)
9044 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A,
9045 __imm,
9046 (__v4sf)
9047 _mm_setzero_ps (),
9048 (__mmask8) __U);
9051 extern __inline __m256i
9052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9053 _mm512_extracti64x4_epi64 (__m512i __A, const int __imm)
9055 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
9056 __imm,
9057 (__v4di)
9058 _mm256_undefined_si256 (),
9059 (__mmask8) -1);
9062 extern __inline __m256i
9063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9064 _mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A,
9065 const int __imm)
9067 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
9068 __imm,
9069 (__v4di) __W,
9070 (__mmask8) __U);
9073 extern __inline __m256i
9074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9075 _mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm)
9077 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A,
9078 __imm,
9079 (__v4di)
9080 _mm256_setzero_si256 (),
9081 (__mmask8) __U);
9084 extern __inline __m128i
9085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9086 _mm512_extracti32x4_epi32 (__m512i __A, const int __imm)
9088 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
9089 __imm,
9090 (__v4si)
9091 _mm_undefined_si128 (),
9092 (__mmask8) -1);
9095 extern __inline __m128i
9096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9097 _mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A,
9098 const int __imm)
9100 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
9101 __imm,
9102 (__v4si) __W,
9103 (__mmask8) __U);
9106 extern __inline __m128i
9107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9108 _mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm)
9110 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A,
9111 __imm,
9112 (__v4si)
9113 _mm_setzero_si128 (),
9114 (__mmask8) __U);
9116 #else
9118 #define _mm512_extractf64x4_pd(X, C) \
9119 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
9120 (int) (C),\
9121 (__v4df)(__m256d)_mm256_undefined_pd(),\
9122 (__mmask8)-1))
9124 #define _mm512_mask_extractf64x4_pd(W, U, X, C) \
9125 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
9126 (int) (C),\
9127 (__v4df)(__m256d)(W),\
9128 (__mmask8)(U)))
9130 #define _mm512_maskz_extractf64x4_pd(U, X, C) \
9131 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \
9132 (int) (C),\
9133 (__v4df)(__m256d)_mm256_setzero_pd(),\
9134 (__mmask8)(U)))
9136 #define _mm512_extractf32x4_ps(X, C) \
9137 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
9138 (int) (C),\
9139 (__v4sf)(__m128)_mm_undefined_ps(),\
9140 (__mmask8)-1))
9142 #define _mm512_mask_extractf32x4_ps(W, U, X, C) \
9143 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
9144 (int) (C),\
9145 (__v4sf)(__m128)(W),\
9146 (__mmask8)(U)))
9148 #define _mm512_maskz_extractf32x4_ps(U, X, C) \
9149 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \
9150 (int) (C),\
9151 (__v4sf)(__m128)_mm_setzero_ps(),\
9152 (__mmask8)(U)))
9154 #define _mm512_extracti64x4_epi64(X, C) \
9155 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
9156 (int) (C),\
9157 (__v4di)(__m256i)_mm256_undefined_si256 (),\
9158 (__mmask8)-1))
9160 #define _mm512_mask_extracti64x4_epi64(W, U, X, C) \
9161 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
9162 (int) (C),\
9163 (__v4di)(__m256i)(W),\
9164 (__mmask8)(U)))
9166 #define _mm512_maskz_extracti64x4_epi64(U, X, C) \
9167 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \
9168 (int) (C),\
9169 (__v4di)(__m256i)_mm256_setzero_si256 (),\
9170 (__mmask8)(U)))
9172 #define _mm512_extracti32x4_epi32(X, C) \
9173 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
9174 (int) (C),\
9175 (__v4si)(__m128i)_mm_undefined_si128 (),\
9176 (__mmask8)-1))
9178 #define _mm512_mask_extracti32x4_epi32(W, U, X, C) \
9179 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
9180 (int) (C),\
9181 (__v4si)(__m128i)(W),\
9182 (__mmask8)(U)))
9184 #define _mm512_maskz_extracti32x4_epi32(U, X, C) \
9185 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \
9186 (int) (C),\
9187 (__v4si)(__m128i)_mm_setzero_si128 (),\
9188 (__mmask8)(U)))
9189 #endif
9191 #ifdef __OPTIMIZE__
9192 extern __inline __m512i
9193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9194 _mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm)
9196 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A,
9197 (__v4si) __B,
9198 __imm,
9199 (__v16si) __A, -1);
9202 extern __inline __m512
9203 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9204 _mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm)
9206 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A,
9207 (__v4sf) __B,
9208 __imm,
9209 (__v16sf) __A, -1);
9212 extern __inline __m512i
9213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9214 _mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm)
9216 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
9217 (__v4di) __B,
9218 __imm,
9219 (__v8di)
9220 _mm512_undefined_epi32 (),
9221 (__mmask8) -1);
9224 extern __inline __m512i
9225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9226 _mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A,
9227 __m256i __B, const int __imm)
9229 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
9230 (__v4di) __B,
9231 __imm,
9232 (__v8di) __W,
9233 (__mmask8) __U);
9236 extern __inline __m512i
9237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9238 _mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B,
9239 const int __imm)
9241 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A,
9242 (__v4di) __B,
9243 __imm,
9244 (__v8di)
9245 _mm512_setzero_si512 (),
9246 (__mmask8) __U);
9249 extern __inline __m512d
9250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9251 _mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm)
9253 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
9254 (__v4df) __B,
9255 __imm,
9256 (__v8df)
9257 _mm512_undefined_pd (),
9258 (__mmask8) -1);
9261 extern __inline __m512d
9262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9263 _mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A,
9264 __m256d __B, const int __imm)
9266 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
9267 (__v4df) __B,
9268 __imm,
9269 (__v8df) __W,
9270 (__mmask8) __U);
9273 extern __inline __m512d
9274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9275 _mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B,
9276 const int __imm)
9278 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A,
9279 (__v4df) __B,
9280 __imm,
9281 (__v8df)
9282 _mm512_setzero_pd (),
9283 (__mmask8) __U);
9285 #else
9286 #define _mm512_insertf32x4(X, Y, C) \
9287 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
9288 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1)))
9290 #define _mm512_inserti32x4(X, Y, C) \
9291 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
9292 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1)))
9294 #define _mm512_insertf64x4(X, Y, C) \
9295 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
9296 (__v4df)(__m256d) (Y), (int) (C), \
9297 (__v8df)(__m512d)_mm512_undefined_pd(), \
9298 (__mmask8)-1))
9300 #define _mm512_mask_insertf64x4(W, U, X, Y, C) \
9301 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
9302 (__v4df)(__m256d) (Y), (int) (C), \
9303 (__v8df)(__m512d)(W), \
9304 (__mmask8)(U)))
9306 #define _mm512_maskz_insertf64x4(U, X, Y, C) \
9307 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \
9308 (__v4df)(__m256d) (Y), (int) (C), \
9309 (__v8df)(__m512d)_mm512_setzero_pd(), \
9310 (__mmask8)(U)))
9312 #define _mm512_inserti64x4(X, Y, C) \
9313 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
9314 (__v4di)(__m256i) (Y), (int) (C), \
9315 (__v8di)(__m512i)_mm512_undefined_epi32 (), \
9316 (__mmask8)-1))
9318 #define _mm512_mask_inserti64x4(W, U, X, Y, C) \
9319 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
9320 (__v4di)(__m256i) (Y), (int) (C),\
9321 (__v8di)(__m512i)(W),\
9322 (__mmask8)(U)))
9324 #define _mm512_maskz_inserti64x4(U, X, Y, C) \
9325 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \
9326 (__v4di)(__m256i) (Y), (int) (C), \
9327 (__v8di)(__m512i)_mm512_setzero_si512 (), \
9328 (__mmask8)(U)))
9329 #endif
9331 extern __inline __m512d
9332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9333 _mm512_loadu_pd (void const *__P)
9335 return *(__m512d_u *)__P;
9338 extern __inline __m512d
9339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9340 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
9342 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
9343 (__v8df) __W,
9344 (__mmask8) __U);
9347 extern __inline __m512d
9348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9349 _mm512_maskz_loadu_pd (__mmask8 __U, void const *__P)
9351 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
9352 (__v8df)
9353 _mm512_setzero_pd (),
9354 (__mmask8) __U);
9357 extern __inline void
9358 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9359 _mm512_storeu_pd (void *__P, __m512d __A)
9361 *(__m512d_u *)__P = __A;
9364 extern __inline void
9365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9366 _mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A)
9368 __builtin_ia32_storeupd512_mask ((double *) __P, (__v8df) __A,
9369 (__mmask8) __U);
9372 extern __inline __m512
9373 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9374 _mm512_loadu_ps (void const *__P)
9376 return *(__m512_u *)__P;
9379 extern __inline __m512
9380 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9381 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
9383 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
9384 (__v16sf) __W,
9385 (__mmask16) __U);
9388 extern __inline __m512
9389 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9390 _mm512_maskz_loadu_ps (__mmask16 __U, void const *__P)
9392 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
9393 (__v16sf)
9394 _mm512_setzero_ps (),
9395 (__mmask16) __U);
9398 extern __inline void
9399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9400 _mm512_storeu_ps (void *__P, __m512 __A)
9402 *(__m512_u *)__P = __A;
9405 extern __inline void
9406 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9407 _mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A)
9409 __builtin_ia32_storeups512_mask ((float *) __P, (__v16sf) __A,
9410 (__mmask16) __U);
9413 extern __inline __m512i
9414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9415 _mm512_loadu_epi64 (void const *__P)
9417 return *(__m512i_u *) __P;
9420 extern __inline __m512i
9421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9422 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
9424 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
9425 (__v8di) __W,
9426 (__mmask8) __U);
9429 extern __inline __m512i
9430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9431 _mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
9433 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
9434 (__v8di)
9435 _mm512_setzero_si512 (),
9436 (__mmask8) __U);
9439 extern __inline void
9440 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9441 _mm512_storeu_epi64 (void *__P, __m512i __A)
9443 *(__m512i_u *) __P = (__m512i_u) __A;
9446 extern __inline void
9447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9448 _mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A)
9450 __builtin_ia32_storedqudi512_mask ((long long *) __P, (__v8di) __A,
9451 (__mmask8) __U);
9454 extern __inline __m512i
9455 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9456 _mm512_loadu_si512 (void const *__P)
9458 return *(__m512i_u *)__P;
9461 extern __inline __m512i
9462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9463 _mm512_loadu_epi32 (void const *__P)
9465 return *(__m512i_u *) __P;
9468 extern __inline __m512i
9469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9470 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
9472 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
9473 (__v16si) __W,
9474 (__mmask16) __U);
9477 extern __inline __m512i
9478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9479 _mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P)
9481 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
9482 (__v16si)
9483 _mm512_setzero_si512 (),
9484 (__mmask16) __U);
9487 extern __inline void
9488 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9489 _mm512_storeu_si512 (void *__P, __m512i __A)
9491 *(__m512i_u *)__P = __A;
9494 extern __inline void
9495 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9496 _mm512_storeu_epi32 (void *__P, __m512i __A)
9498 *(__m512i_u *) __P = (__m512i_u) __A;
9501 extern __inline void
9502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9503 _mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9505 __builtin_ia32_storedqusi512_mask ((int *) __P, (__v16si) __A,
9506 (__mmask16) __U);
9509 extern __inline __m512d
9510 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9511 _mm512_permutevar_pd (__m512d __A, __m512i __C)
9513 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
9514 (__v8di) __C,
9515 (__v8df)
9516 _mm512_undefined_pd (),
9517 (__mmask8) -1);
9520 extern __inline __m512d
9521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9522 _mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
9524 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
9525 (__v8di) __C,
9526 (__v8df) __W,
9527 (__mmask8) __U);
9530 extern __inline __m512d
9531 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9532 _mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C)
9534 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A,
9535 (__v8di) __C,
9536 (__v8df)
9537 _mm512_setzero_pd (),
9538 (__mmask8) __U);
9541 extern __inline __m512
9542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9543 _mm512_permutevar_ps (__m512 __A, __m512i __C)
9545 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
9546 (__v16si) __C,
9547 (__v16sf)
9548 _mm512_undefined_ps (),
9549 (__mmask16) -1);
9552 extern __inline __m512
9553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9554 _mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
9556 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
9557 (__v16si) __C,
9558 (__v16sf) __W,
9559 (__mmask16) __U);
9562 extern __inline __m512
9563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9564 _mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C)
9566 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A,
9567 (__v16si) __C,
9568 (__v16sf)
9569 _mm512_setzero_ps (),
9570 (__mmask16) __U);
9573 extern __inline __m512i
9574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9575 _mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B)
9577 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
9578 /* idx */ ,
9579 (__v8di) __A,
9580 (__v8di) __B,
9581 (__mmask8) -1);
9584 extern __inline __m512i
9585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9586 _mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I,
9587 __m512i __B)
9589 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I
9590 /* idx */ ,
9591 (__v8di) __A,
9592 (__v8di) __B,
9593 (__mmask8) __U);
9596 extern __inline __m512i
9597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9598 _mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I,
9599 __mmask8 __U, __m512i __B)
9601 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A,
9602 (__v8di) __I
9603 /* idx */ ,
9604 (__v8di) __B,
9605 (__mmask8) __U);
9608 extern __inline __m512i
9609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9610 _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A,
9611 __m512i __I, __m512i __B)
9613 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I
9614 /* idx */ ,
9615 (__v8di) __A,
9616 (__v8di) __B,
9617 (__mmask8) __U);
9620 extern __inline __m512i
9621 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9622 _mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B)
9624 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
9625 /* idx */ ,
9626 (__v16si) __A,
9627 (__v16si) __B,
9628 (__mmask16) -1);
9631 extern __inline __m512i
9632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9633 _mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U,
9634 __m512i __I, __m512i __B)
9636 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I
9637 /* idx */ ,
9638 (__v16si) __A,
9639 (__v16si) __B,
9640 (__mmask16) __U);
9643 extern __inline __m512i
9644 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9645 _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I,
9646 __mmask16 __U, __m512i __B)
9648 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A,
9649 (__v16si) __I
9650 /* idx */ ,
9651 (__v16si) __B,
9652 (__mmask16) __U);
9655 extern __inline __m512i
9656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9657 _mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A,
9658 __m512i __I, __m512i __B)
9660 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I
9661 /* idx */ ,
9662 (__v16si) __A,
9663 (__v16si) __B,
9664 (__mmask16) __U);
9667 extern __inline __m512d
9668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9669 _mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B)
9671 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
9672 /* idx */ ,
9673 (__v8df) __A,
9674 (__v8df) __B,
9675 (__mmask8) -1);
9678 extern __inline __m512d
9679 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9680 _mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I,
9681 __m512d __B)
9683 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I
9684 /* idx */ ,
9685 (__v8df) __A,
9686 (__v8df) __B,
9687 (__mmask8) __U);
9690 extern __inline __m512d
9691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9692 _mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U,
9693 __m512d __B)
9695 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A,
9696 (__v8di) __I
9697 /* idx */ ,
9698 (__v8df) __B,
9699 (__mmask8) __U);
9702 extern __inline __m512d
9703 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9704 _mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I,
9705 __m512d __B)
9707 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I
9708 /* idx */ ,
9709 (__v8df) __A,
9710 (__v8df) __B,
9711 (__mmask8) __U);
9714 extern __inline __m512
9715 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9716 _mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B)
9718 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
9719 /* idx */ ,
9720 (__v16sf) __A,
9721 (__v16sf) __B,
9722 (__mmask16) -1);
9725 extern __inline __m512
9726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9727 _mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
9729 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I
9730 /* idx */ ,
9731 (__v16sf) __A,
9732 (__v16sf) __B,
9733 (__mmask16) __U);
9736 extern __inline __m512
9737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9738 _mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U,
9739 __m512 __B)
9741 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A,
9742 (__v16si) __I
9743 /* idx */ ,
9744 (__v16sf) __B,
9745 (__mmask16) __U);
9748 extern __inline __m512
9749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9750 _mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I,
9751 __m512 __B)
9753 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I
9754 /* idx */ ,
9755 (__v16sf) __A,
9756 (__v16sf) __B,
9757 (__mmask16) __U);
9760 #ifdef __OPTIMIZE__
9761 extern __inline __m512d
9762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9763 _mm512_permute_pd (__m512d __X, const int __C)
9765 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
9766 (__v8df)
9767 _mm512_undefined_pd (),
9768 (__mmask8) -1);
9771 extern __inline __m512d
9772 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9773 _mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C)
9775 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
9776 (__v8df) __W,
9777 (__mmask8) __U);
9780 extern __inline __m512d
9781 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9782 _mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C)
9784 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C,
9785 (__v8df)
9786 _mm512_setzero_pd (),
9787 (__mmask8) __U);
9790 extern __inline __m512
9791 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9792 _mm512_permute_ps (__m512 __X, const int __C)
9794 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
9795 (__v16sf)
9796 _mm512_undefined_ps (),
9797 (__mmask16) -1);
9800 extern __inline __m512
9801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9802 _mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C)
9804 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
9805 (__v16sf) __W,
9806 (__mmask16) __U);
9809 extern __inline __m512
9810 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9811 _mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C)
9813 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C,
9814 (__v16sf)
9815 _mm512_setzero_ps (),
9816 (__mmask16) __U);
9818 #else
9819 #define _mm512_permute_pd(X, C) \
9820 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
9821 (__v8df)(__m512d)_mm512_undefined_pd(),\
9822 (__mmask8)(-1)))
9824 #define _mm512_mask_permute_pd(W, U, X, C) \
9825 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
9826 (__v8df)(__m512d)(W), \
9827 (__mmask8)(U)))
9829 #define _mm512_maskz_permute_pd(U, X, C) \
9830 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \
9831 (__v8df)(__m512d)_mm512_setzero_pd(), \
9832 (__mmask8)(U)))
9834 #define _mm512_permute_ps(X, C) \
9835 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
9836 (__v16sf)(__m512)_mm512_undefined_ps(),\
9837 (__mmask16)(-1)))
9839 #define _mm512_mask_permute_ps(W, U, X, C) \
9840 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
9841 (__v16sf)(__m512)(W), \
9842 (__mmask16)(U)))
9844 #define _mm512_maskz_permute_ps(U, X, C) \
9845 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \
9846 (__v16sf)(__m512)_mm512_setzero_ps(), \
9847 (__mmask16)(U)))
9848 #endif
9850 #ifdef __OPTIMIZE__
9851 extern __inline __m512i
9852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9853 _mm512_permutex_epi64 (__m512i __X, const int __I)
9855 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
9856 (__v8di)
9857 _mm512_undefined_epi32 (),
9858 (__mmask8) (-1));
9861 extern __inline __m512i
9862 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9863 _mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M,
9864 __m512i __X, const int __I)
9866 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
9867 (__v8di) __W,
9868 (__mmask8) __M);
9871 extern __inline __m512i
9872 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9873 _mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I)
9875 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I,
9876 (__v8di)
9877 _mm512_setzero_si512 (),
9878 (__mmask8) __M);
9881 extern __inline __m512d
9882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9883 _mm512_permutex_pd (__m512d __X, const int __M)
9885 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
9886 (__v8df)
9887 _mm512_undefined_pd (),
9888 (__mmask8) -1);
9891 extern __inline __m512d
9892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9893 _mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M)
9895 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
9896 (__v8df) __W,
9897 (__mmask8) __U);
9900 extern __inline __m512d
9901 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9902 _mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M)
9904 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M,
9905 (__v8df)
9906 _mm512_setzero_pd (),
9907 (__mmask8) __U);
9909 #else
9910 #define _mm512_permutex_pd(X, M) \
9911 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
9912 (__v8df)(__m512d)_mm512_undefined_pd(),\
9913 (__mmask8)-1))
9915 #define _mm512_mask_permutex_pd(W, U, X, M) \
9916 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
9917 (__v8df)(__m512d)(W), (__mmask8)(U)))
9919 #define _mm512_maskz_permutex_pd(U, X, M) \
9920 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \
9921 (__v8df)(__m512d)_mm512_setzero_pd(),\
9922 (__mmask8)(U)))
9924 #define _mm512_permutex_epi64(X, I) \
9925 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
9926 (int)(I), \
9927 (__v8di)(__m512i) \
9928 (_mm512_undefined_epi32 ()),\
9929 (__mmask8)(-1)))
9931 #define _mm512_maskz_permutex_epi64(M, X, I) \
9932 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
9933 (int)(I), \
9934 (__v8di)(__m512i) \
9935 (_mm512_setzero_si512 ()),\
9936 (__mmask8)(M)))
9938 #define _mm512_mask_permutex_epi64(W, M, X, I) \
9939 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \
9940 (int)(I), \
9941 (__v8di)(__m512i)(W), \
9942 (__mmask8)(M)))
9943 #endif
9945 extern __inline __m512i
9946 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9947 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
9949 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
9950 (__v8di) __X,
9951 (__v8di)
9952 _mm512_setzero_si512 (),
9953 __M);
9956 extern __inline __m512i
9957 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9958 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
9960 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
9961 (__v8di) __X,
9962 (__v8di)
9963 _mm512_undefined_epi32 (),
9964 (__mmask8) -1);
9967 extern __inline __m512i
9968 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9969 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
9970 __m512i __Y)
9972 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y,
9973 (__v8di) __X,
9974 (__v8di) __W,
9975 __M);
9978 extern __inline __m512i
9979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9980 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
9982 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
9983 (__v16si) __X,
9984 (__v16si)
9985 _mm512_setzero_si512 (),
9986 __M);
9989 extern __inline __m512i
9990 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
9991 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
9993 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
9994 (__v16si) __X,
9995 (__v16si)
9996 _mm512_undefined_epi32 (),
9997 (__mmask16) -1);
10000 extern __inline __m512i
10001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10002 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
10003 __m512i __Y)
10005 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y,
10006 (__v16si) __X,
10007 (__v16si) __W,
10008 __M);
10011 extern __inline __m512d
10012 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10013 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
10015 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
10016 (__v8di) __X,
10017 (__v8df)
10018 _mm512_undefined_pd (),
10019 (__mmask8) -1);
10022 extern __inline __m512d
10023 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10024 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
10026 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
10027 (__v8di) __X,
10028 (__v8df) __W,
10029 (__mmask8) __U);
10032 extern __inline __m512d
10033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10034 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
10036 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y,
10037 (__v8di) __X,
10038 (__v8df)
10039 _mm512_setzero_pd (),
10040 (__mmask8) __U);
10043 extern __inline __m512
10044 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10045 _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
10047 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
10048 (__v16si) __X,
10049 (__v16sf)
10050 _mm512_undefined_ps (),
10051 (__mmask16) -1);
10054 extern __inline __m512
10055 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10056 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
10058 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
10059 (__v16si) __X,
10060 (__v16sf) __W,
10061 (__mmask16) __U);
10064 extern __inline __m512
10065 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10066 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
10068 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y,
10069 (__v16si) __X,
10070 (__v16sf)
10071 _mm512_setzero_ps (),
10072 (__mmask16) __U);
10075 #ifdef __OPTIMIZE__
10076 extern __inline __m512
10077 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10078 _mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm)
10080 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
10081 (__v16sf) __V, __imm,
10082 (__v16sf)
10083 _mm512_undefined_ps (),
10084 (__mmask16) -1);
10087 extern __inline __m512
10088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10089 _mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M,
10090 __m512 __V, const int __imm)
10092 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
10093 (__v16sf) __V, __imm,
10094 (__v16sf) __W,
10095 (__mmask16) __U);
10098 extern __inline __m512
10099 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10100 _mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm)
10102 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M,
10103 (__v16sf) __V, __imm,
10104 (__v16sf)
10105 _mm512_setzero_ps (),
10106 (__mmask16) __U);
10109 extern __inline __m512d
10110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10111 _mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm)
10113 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
10114 (__v8df) __V, __imm,
10115 (__v8df)
10116 _mm512_undefined_pd (),
10117 (__mmask8) -1);
10120 extern __inline __m512d
10121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10122 _mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M,
10123 __m512d __V, const int __imm)
10125 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
10126 (__v8df) __V, __imm,
10127 (__v8df) __W,
10128 (__mmask8) __U);
10131 extern __inline __m512d
10132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10133 _mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V,
10134 const int __imm)
10136 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M,
10137 (__v8df) __V, __imm,
10138 (__v8df)
10139 _mm512_setzero_pd (),
10140 (__mmask8) __U);
10143 extern __inline __m512d
10144 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10145 _mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C,
10146 const int __imm, const int __R)
10148 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
10149 (__v8df) __B,
10150 (__v8di) __C,
10151 __imm,
10152 (__mmask8) -1, __R);
10155 extern __inline __m512d
10156 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10157 _mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
10158 __m512i __C, const int __imm, const int __R)
10160 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
10161 (__v8df) __B,
10162 (__v8di) __C,
10163 __imm,
10164 (__mmask8) __U, __R);
10167 extern __inline __m512d
10168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10169 _mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
10170 __m512i __C, const int __imm, const int __R)
10172 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
10173 (__v8df) __B,
10174 (__v8di) __C,
10175 __imm,
10176 (__mmask8) __U, __R);
10179 extern __inline __m512
10180 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10181 _mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C,
10182 const int __imm, const int __R)
10184 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
10185 (__v16sf) __B,
10186 (__v16si) __C,
10187 __imm,
10188 (__mmask16) -1, __R);
10191 extern __inline __m512
10192 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10193 _mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
10194 __m512i __C, const int __imm, const int __R)
10196 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
10197 (__v16sf) __B,
10198 (__v16si) __C,
10199 __imm,
10200 (__mmask16) __U, __R);
10203 extern __inline __m512
10204 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10205 _mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
10206 __m512i __C, const int __imm, const int __R)
10208 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
10209 (__v16sf) __B,
10210 (__v16si) __C,
10211 __imm,
10212 (__mmask16) __U, __R);
10215 #else
10216 #define _mm512_shuffle_pd(X, Y, C) \
10217 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
10218 (__v8df)(__m512d)(Y), (int)(C),\
10219 (__v8df)(__m512d)_mm512_undefined_pd(),\
10220 (__mmask8)-1))
10222 #define _mm512_mask_shuffle_pd(W, U, X, Y, C) \
10223 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
10224 (__v8df)(__m512d)(Y), (int)(C),\
10225 (__v8df)(__m512d)(W),\
10226 (__mmask8)(U)))
10228 #define _mm512_maskz_shuffle_pd(U, X, Y, C) \
10229 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \
10230 (__v8df)(__m512d)(Y), (int)(C),\
10231 (__v8df)(__m512d)_mm512_setzero_pd(),\
10232 (__mmask8)(U)))
10234 #define _mm512_shuffle_ps(X, Y, C) \
10235 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
10236 (__v16sf)(__m512)(Y), (int)(C),\
10237 (__v16sf)(__m512)_mm512_undefined_ps(),\
10238 (__mmask16)-1))
10240 #define _mm512_mask_shuffle_ps(W, U, X, Y, C) \
10241 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
10242 (__v16sf)(__m512)(Y), (int)(C),\
10243 (__v16sf)(__m512)(W),\
10244 (__mmask16)(U)))
10246 #define _mm512_maskz_shuffle_ps(U, X, Y, C) \
10247 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \
10248 (__v16sf)(__m512)(Y), (int)(C),\
10249 (__v16sf)(__m512)_mm512_setzero_ps(),\
10250 (__mmask16)(U)))
10252 #define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \
10253 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
10254 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
10255 (__mmask8)(-1), (R)))
10257 #define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \
10258 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
10259 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
10260 (__mmask8)(U), (R)))
10262 #define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \
10263 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
10264 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
10265 (__mmask8)(U), (R)))
10267 #define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \
10268 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
10269 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
10270 (__mmask16)(-1), (R)))
10272 #define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \
10273 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
10274 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
10275 (__mmask16)(U), (R)))
10277 #define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \
10278 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
10279 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
10280 (__mmask16)(U), (R)))
10282 #endif
10284 extern __inline __m512
10285 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10286 _mm512_movehdup_ps (__m512 __A)
10288 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
10289 (__v16sf)
10290 _mm512_undefined_ps (),
10291 (__mmask16) -1);
10294 extern __inline __m512
10295 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10296 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
10298 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
10299 (__v16sf) __W,
10300 (__mmask16) __U);
10303 extern __inline __m512
10304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10305 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
10307 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A,
10308 (__v16sf)
10309 _mm512_setzero_ps (),
10310 (__mmask16) __U);
10313 extern __inline __m512
10314 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10315 _mm512_moveldup_ps (__m512 __A)
10317 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
10318 (__v16sf)
10319 _mm512_undefined_ps (),
10320 (__mmask16) -1);
10323 extern __inline __m512
10324 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10325 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
10327 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
10328 (__v16sf) __W,
10329 (__mmask16) __U);
10332 extern __inline __m512
10333 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10334 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
10336 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A,
10337 (__v16sf)
10338 _mm512_setzero_ps (),
10339 (__mmask16) __U);
10342 extern __inline __m512i
10343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10344 _mm512_or_si512 (__m512i __A, __m512i __B)
10346 return (__m512i) ((__v16su) __A | (__v16su) __B);
10349 extern __inline __m512i
10350 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10351 _mm512_or_epi32 (__m512i __A, __m512i __B)
10353 return (__m512i) ((__v16su) __A | (__v16su) __B);
10356 extern __inline __m512i
10357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10358 _mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
10360 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
10361 (__v16si) __B,
10362 (__v16si) __W,
10363 (__mmask16) __U);
10366 extern __inline __m512i
10367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10368 _mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
10370 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A,
10371 (__v16si) __B,
10372 (__v16si)
10373 _mm512_setzero_si512 (),
10374 (__mmask16) __U);
10377 extern __inline __m512i
10378 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10379 _mm512_or_epi64 (__m512i __A, __m512i __B)
10381 return (__m512i) ((__v8du) __A | (__v8du) __B);
10384 extern __inline __m512i
10385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10386 _mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
10388 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
10389 (__v8di) __B,
10390 (__v8di) __W,
10391 (__mmask8) __U);
10394 extern __inline __m512i
10395 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10396 _mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
10398 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A,
10399 (__v8di) __B,
10400 (__v8di)
10401 _mm512_setzero_si512 (),
10402 (__mmask8) __U);
10405 extern __inline __m512i
10406 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10407 _mm512_xor_si512 (__m512i __A, __m512i __B)
10409 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
10412 extern __inline __m512i
10413 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10414 _mm512_xor_epi32 (__m512i __A, __m512i __B)
10416 return (__m512i) ((__v16su) __A ^ (__v16su) __B);
10419 extern __inline __m512i
10420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10421 _mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
10423 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
10424 (__v16si) __B,
10425 (__v16si) __W,
10426 (__mmask16) __U);
10429 extern __inline __m512i
10430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10431 _mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
10433 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A,
10434 (__v16si) __B,
10435 (__v16si)
10436 _mm512_setzero_si512 (),
10437 (__mmask16) __U);
10440 extern __inline __m512i
10441 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10442 _mm512_xor_epi64 (__m512i __A, __m512i __B)
10444 return (__m512i) ((__v8du) __A ^ (__v8du) __B);
10447 extern __inline __m512i
10448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10449 _mm512_mask_xor_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
10451 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
10452 (__v8di) __B,
10453 (__v8di) __W,
10454 (__mmask8) __U);
10457 extern __inline __m512i
10458 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10459 _mm512_maskz_xor_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
10461 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A,
10462 (__v8di) __B,
10463 (__v8di)
10464 _mm512_setzero_si512 (),
10465 (__mmask8) __U);
10468 #ifdef __OPTIMIZE__
10469 extern __inline __m512i
10470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10471 _mm512_rol_epi32 (__m512i __A, const int __B)
10473 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
10474 (__v16si)
10475 _mm512_undefined_epi32 (),
10476 (__mmask16) -1);
10479 extern __inline __m512i
10480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10481 _mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B)
10483 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
10484 (__v16si) __W,
10485 (__mmask16) __U);
10488 extern __inline __m512i
10489 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10490 _mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B)
10492 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B,
10493 (__v16si)
10494 _mm512_setzero_si512 (),
10495 (__mmask16) __U);
10498 extern __inline __m512i
10499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10500 _mm512_ror_epi32 (__m512i __A, int __B)
10502 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
10503 (__v16si)
10504 _mm512_undefined_epi32 (),
10505 (__mmask16) -1);
10508 extern __inline __m512i
10509 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10510 _mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B)
10512 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
10513 (__v16si) __W,
10514 (__mmask16) __U);
10517 extern __inline __m512i
10518 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10519 _mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B)
10521 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B,
10522 (__v16si)
10523 _mm512_setzero_si512 (),
10524 (__mmask16) __U);
10527 extern __inline __m512i
10528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10529 _mm512_rol_epi64 (__m512i __A, const int __B)
10531 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
10532 (__v8di)
10533 _mm512_undefined_epi32 (),
10534 (__mmask8) -1);
10537 extern __inline __m512i
10538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10539 _mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B)
10541 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
10542 (__v8di) __W,
10543 (__mmask8) __U);
10546 extern __inline __m512i
10547 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10548 _mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B)
10550 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B,
10551 (__v8di)
10552 _mm512_setzero_si512 (),
10553 (__mmask8) __U);
10556 extern __inline __m512i
10557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10558 _mm512_ror_epi64 (__m512i __A, int __B)
10560 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
10561 (__v8di)
10562 _mm512_undefined_epi32 (),
10563 (__mmask8) -1);
10566 extern __inline __m512i
10567 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10568 _mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B)
10570 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
10571 (__v8di) __W,
10572 (__mmask8) __U);
10575 extern __inline __m512i
10576 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10577 _mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B)
10579 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B,
10580 (__v8di)
10581 _mm512_setzero_si512 (),
10582 (__mmask8) __U);
10585 #else
10586 #define _mm512_rol_epi32(A, B) \
10587 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
10588 (int)(B), \
10589 (__v16si)_mm512_undefined_epi32 (), \
10590 (__mmask16)(-1)))
10591 #define _mm512_mask_rol_epi32(W, U, A, B) \
10592 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
10593 (int)(B), \
10594 (__v16si)(__m512i)(W), \
10595 (__mmask16)(U)))
10596 #define _mm512_maskz_rol_epi32(U, A, B) \
10597 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \
10598 (int)(B), \
10599 (__v16si)_mm512_setzero_si512 (), \
10600 (__mmask16)(U)))
10601 #define _mm512_ror_epi32(A, B) \
10602 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
10603 (int)(B), \
10604 (__v16si)_mm512_undefined_epi32 (), \
10605 (__mmask16)(-1)))
10606 #define _mm512_mask_ror_epi32(W, U, A, B) \
10607 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
10608 (int)(B), \
10609 (__v16si)(__m512i)(W), \
10610 (__mmask16)(U)))
10611 #define _mm512_maskz_ror_epi32(U, A, B) \
10612 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \
10613 (int)(B), \
10614 (__v16si)_mm512_setzero_si512 (), \
10615 (__mmask16)(U)))
10616 #define _mm512_rol_epi64(A, B) \
10617 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
10618 (int)(B), \
10619 (__v8di)_mm512_undefined_epi32 (), \
10620 (__mmask8)(-1)))
10621 #define _mm512_mask_rol_epi64(W, U, A, B) \
10622 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
10623 (int)(B), \
10624 (__v8di)(__m512i)(W), \
10625 (__mmask8)(U)))
10626 #define _mm512_maskz_rol_epi64(U, A, B) \
10627 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \
10628 (int)(B), \
10629 (__v8di)_mm512_setzero_si512 (), \
10630 (__mmask8)(U)))
10632 #define _mm512_ror_epi64(A, B) \
10633 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
10634 (int)(B), \
10635 (__v8di)_mm512_undefined_epi32 (), \
10636 (__mmask8)(-1)))
10637 #define _mm512_mask_ror_epi64(W, U, A, B) \
10638 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
10639 (int)(B), \
10640 (__v8di)(__m512i)(W), \
10641 (__mmask8)(U)))
10642 #define _mm512_maskz_ror_epi64(U, A, B) \
10643 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \
10644 (int)(B), \
10645 (__v8di)_mm512_setzero_si512 (), \
10646 (__mmask8)(U)))
10647 #endif
10649 extern __inline __m512i
10650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10651 _mm512_and_si512 (__m512i __A, __m512i __B)
10653 return (__m512i) ((__v16su) __A & (__v16su) __B);
10656 extern __inline __m512i
10657 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10658 _mm512_and_epi32 (__m512i __A, __m512i __B)
10660 return (__m512i) ((__v16su) __A & (__v16su) __B);
10663 extern __inline __m512i
10664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10665 _mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
10667 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
10668 (__v16si) __B,
10669 (__v16si) __W,
10670 (__mmask16) __U);
10673 extern __inline __m512i
10674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10675 _mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
10677 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A,
10678 (__v16si) __B,
10679 (__v16si)
10680 _mm512_setzero_si512 (),
10681 (__mmask16) __U);
10684 extern __inline __m512i
10685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10686 _mm512_and_epi64 (__m512i __A, __m512i __B)
10688 return (__m512i) ((__v8du) __A & (__v8du) __B);
10691 extern __inline __m512i
10692 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10693 _mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
10695 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
10696 (__v8di) __B,
10697 (__v8di) __W, __U);
10700 extern __inline __m512i
10701 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10702 _mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
10704 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A,
10705 (__v8di) __B,
10706 (__v8di)
10707 _mm512_setzero_pd (),
10708 __U);
10711 extern __inline __m512i
10712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10713 _mm512_andnot_si512 (__m512i __A, __m512i __B)
10715 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
10716 (__v16si) __B,
10717 (__v16si)
10718 _mm512_undefined_epi32 (),
10719 (__mmask16) -1);
10722 extern __inline __m512i
10723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10724 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
10726 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
10727 (__v16si) __B,
10728 (__v16si)
10729 _mm512_undefined_epi32 (),
10730 (__mmask16) -1);
10733 extern __inline __m512i
10734 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10735 _mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
10737 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
10738 (__v16si) __B,
10739 (__v16si) __W,
10740 (__mmask16) __U);
10743 extern __inline __m512i
10744 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10745 _mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
10747 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A,
10748 (__v16si) __B,
10749 (__v16si)
10750 _mm512_setzero_si512 (),
10751 (__mmask16) __U);
10754 extern __inline __m512i
10755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10756 _mm512_andnot_epi64 (__m512i __A, __m512i __B)
10758 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
10759 (__v8di) __B,
10760 (__v8di)
10761 _mm512_undefined_epi32 (),
10762 (__mmask8) -1);
10765 extern __inline __m512i
10766 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10767 _mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
10769 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
10770 (__v8di) __B,
10771 (__v8di) __W, __U);
10774 extern __inline __m512i
10775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10776 _mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
10778 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A,
10779 (__v8di) __B,
10780 (__v8di)
10781 _mm512_setzero_pd (),
10782 __U);
10785 extern __inline __mmask16
10786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10787 _mm512_test_epi32_mask (__m512i __A, __m512i __B)
10789 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
10790 (__v16si) __B,
10791 (__mmask16) -1);
10794 extern __inline __mmask16
10795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10796 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
10798 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A,
10799 (__v16si) __B, __U);
10802 extern __inline __mmask8
10803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10804 _mm512_test_epi64_mask (__m512i __A, __m512i __B)
10806 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A,
10807 (__v8di) __B,
10808 (__mmask8) -1);
10811 extern __inline __mmask8
10812 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10813 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
10815 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U);
10818 extern __inline __mmask16
10819 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10820 _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
10822 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
10823 (__v16si) __B,
10824 (__mmask16) -1);
10827 extern __inline __mmask16
10828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10829 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
10831 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A,
10832 (__v16si) __B, __U);
10835 extern __inline __mmask8
10836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10837 _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
10839 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
10840 (__v8di) __B,
10841 (__mmask8) -1);
10844 extern __inline __mmask8
10845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10846 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
10848 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A,
10849 (__v8di) __B, __U);
10852 extern __inline __m512
10853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10854 _mm512_abs_ps (__m512 __A)
10856 return (__m512) _mm512_and_epi32 ((__m512i) __A,
10857 _mm512_set1_epi32 (0x7fffffff));
10860 extern __inline __m512
10861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10862 _mm512_mask_abs_ps (__m512 __W, __mmask16 __U, __m512 __A)
10864 return (__m512) _mm512_mask_and_epi32 ((__m512i) __W, __U, (__m512i) __A,
10865 _mm512_set1_epi32 (0x7fffffff));
10868 extern __inline __m512d
10869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10870 _mm512_abs_pd (__m512d __A)
10872 return (__m512d) _mm512_and_epi64 ((__m512i) __A,
10873 _mm512_set1_epi64 (0x7fffffffffffffffLL));
10876 extern __inline __m512d
10877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10878 _mm512_mask_abs_pd (__m512d __W, __mmask8 __U, __m512d __A)
10880 return (__m512d)
10881 _mm512_mask_and_epi64 ((__m512i) __W, __U, (__m512i) __A,
10882 _mm512_set1_epi64 (0x7fffffffffffffffLL));
10885 extern __inline __m512i
10886 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10887 _mm512_unpackhi_epi32 (__m512i __A, __m512i __B)
10889 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
10890 (__v16si) __B,
10891 (__v16si)
10892 _mm512_undefined_epi32 (),
10893 (__mmask16) -1);
10896 extern __inline __m512i
10897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10898 _mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
10899 __m512i __B)
10901 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
10902 (__v16si) __B,
10903 (__v16si) __W,
10904 (__mmask16) __U);
10907 extern __inline __m512i
10908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10909 _mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
10911 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A,
10912 (__v16si) __B,
10913 (__v16si)
10914 _mm512_setzero_si512 (),
10915 (__mmask16) __U);
10918 extern __inline __m512i
10919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10920 _mm512_unpackhi_epi64 (__m512i __A, __m512i __B)
10922 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
10923 (__v8di) __B,
10924 (__v8di)
10925 _mm512_undefined_epi32 (),
10926 (__mmask8) -1);
10929 extern __inline __m512i
10930 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10931 _mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
10933 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
10934 (__v8di) __B,
10935 (__v8di) __W,
10936 (__mmask8) __U);
10939 extern __inline __m512i
10940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10941 _mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
10943 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A,
10944 (__v8di) __B,
10945 (__v8di)
10946 _mm512_setzero_si512 (),
10947 (__mmask8) __U);
10950 extern __inline __m512i
10951 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10952 _mm512_unpacklo_epi32 (__m512i __A, __m512i __B)
10954 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
10955 (__v16si) __B,
10956 (__v16si)
10957 _mm512_undefined_epi32 (),
10958 (__mmask16) -1);
10961 extern __inline __m512i
10962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10963 _mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
10964 __m512i __B)
10966 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
10967 (__v16si) __B,
10968 (__v16si) __W,
10969 (__mmask16) __U);
10972 extern __inline __m512i
10973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10974 _mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
10976 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A,
10977 (__v16si) __B,
10978 (__v16si)
10979 _mm512_setzero_si512 (),
10980 (__mmask16) __U);
10983 extern __inline __m512i
10984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10985 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
10987 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
10988 (__v8di) __B,
10989 (__v8di)
10990 _mm512_undefined_epi32 (),
10991 (__mmask8) -1);
10994 extern __inline __m512i
10995 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
10996 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
10998 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
10999 (__v8di) __B,
11000 (__v8di) __W,
11001 (__mmask8) __U);
11004 extern __inline __m512i
11005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11006 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
11008 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A,
11009 (__v8di) __B,
11010 (__v8di)
11011 _mm512_setzero_si512 (),
11012 (__mmask8) __U);
11015 extern __inline __m512d
11016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11017 _mm512_movedup_pd (__m512d __A)
11019 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
11020 (__v8df)
11021 _mm512_undefined_pd (),
11022 (__mmask8) -1);
11025 extern __inline __m512d
11026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11027 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
11029 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
11030 (__v8df) __W,
11031 (__mmask8) __U);
11034 extern __inline __m512d
11035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11036 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
11038 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A,
11039 (__v8df)
11040 _mm512_setzero_pd (),
11041 (__mmask8) __U);
11044 extern __inline __m512d
11045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11046 _mm512_unpacklo_pd (__m512d __A, __m512d __B)
11048 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
11049 (__v8df) __B,
11050 (__v8df)
11051 _mm512_undefined_pd (),
11052 (__mmask8) -1);
11055 extern __inline __m512d
11056 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11057 _mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11059 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
11060 (__v8df) __B,
11061 (__v8df) __W,
11062 (__mmask8) __U);
11065 extern __inline __m512d
11066 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11067 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
11069 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A,
11070 (__v8df) __B,
11071 (__v8df)
11072 _mm512_setzero_pd (),
11073 (__mmask8) __U);
11076 extern __inline __m512d
11077 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11078 _mm512_unpackhi_pd (__m512d __A, __m512d __B)
11080 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
11081 (__v8df) __B,
11082 (__v8df)
11083 _mm512_undefined_pd (),
11084 (__mmask8) -1);
11087 extern __inline __m512d
11088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11089 _mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
11091 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
11092 (__v8df) __B,
11093 (__v8df) __W,
11094 (__mmask8) __U);
11097 extern __inline __m512d
11098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11099 _mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B)
11101 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A,
11102 (__v8df) __B,
11103 (__v8df)
11104 _mm512_setzero_pd (),
11105 (__mmask8) __U);
11108 extern __inline __m512
11109 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11110 _mm512_unpackhi_ps (__m512 __A, __m512 __B)
11112 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
11113 (__v16sf) __B,
11114 (__v16sf)
11115 _mm512_undefined_ps (),
11116 (__mmask16) -1);
11119 extern __inline __m512
11120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11121 _mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
11123 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
11124 (__v16sf) __B,
11125 (__v16sf) __W,
11126 (__mmask16) __U);
11129 extern __inline __m512
11130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11131 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
11133 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A,
11134 (__v16sf) __B,
11135 (__v16sf)
11136 _mm512_setzero_ps (),
11137 (__mmask16) __U);
11140 #ifdef __OPTIMIZE__
11141 extern __inline __m512d
11142 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11143 _mm512_cvt_roundps_pd (__m256 __A, const int __R)
11145 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
11146 (__v8df)
11147 _mm512_undefined_pd (),
11148 (__mmask8) -1, __R);
11151 extern __inline __m512d
11152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11153 _mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A,
11154 const int __R)
11156 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
11157 (__v8df) __W,
11158 (__mmask8) __U, __R);
11161 extern __inline __m512d
11162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11163 _mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R)
11165 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
11166 (__v8df)
11167 _mm512_setzero_pd (),
11168 (__mmask8) __U, __R);
11171 extern __inline __m512
11172 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11173 _mm512_cvt_roundph_ps (__m256i __A, const int __R)
11175 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
11176 (__v16sf)
11177 _mm512_undefined_ps (),
11178 (__mmask16) -1, __R);
11181 extern __inline __m512
11182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11183 _mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A,
11184 const int __R)
11186 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
11187 (__v16sf) __W,
11188 (__mmask16) __U, __R);
11191 extern __inline __m512
11192 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11193 _mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R)
11195 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
11196 (__v16sf)
11197 _mm512_setzero_ps (),
11198 (__mmask16) __U, __R);
11201 extern __inline __m256i
11202 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11203 _mm512_cvt_roundps_ph (__m512 __A, const int __I)
11205 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
11206 __I,
11207 (__v16hi)
11208 _mm256_undefined_si256 (),
11209 -1);
11212 extern __inline __m256i
11213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11214 _mm512_cvtps_ph (__m512 __A, const int __I)
11216 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
11217 __I,
11218 (__v16hi)
11219 _mm256_undefined_si256 (),
11220 -1);
11223 extern __inline __m256i
11224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11225 _mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A,
11226 const int __I)
11228 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
11229 __I,
11230 (__v16hi) __U,
11231 (__mmask16) __W);
11234 extern __inline __m256i
11235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11236 _mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I)
11238 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
11239 __I,
11240 (__v16hi) __U,
11241 (__mmask16) __W);
11244 extern __inline __m256i
11245 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11246 _mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I)
11248 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
11249 __I,
11250 (__v16hi)
11251 _mm256_setzero_si256 (),
11252 (__mmask16) __W);
11255 extern __inline __m256i
11256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11257 _mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I)
11259 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A,
11260 __I,
11261 (__v16hi)
11262 _mm256_setzero_si256 (),
11263 (__mmask16) __W);
11265 #else
11266 #define _mm512_cvt_roundps_pd(A, B) \
11267 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B)
11269 #define _mm512_mask_cvt_roundps_pd(W, U, A, B) \
11270 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B)
11272 #define _mm512_maskz_cvt_roundps_pd(U, A, B) \
11273 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B)
11275 #define _mm512_cvt_roundph_ps(A, B) \
11276 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B)
11278 #define _mm512_mask_cvt_roundph_ps(W, U, A, B) \
11279 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B)
11281 #define _mm512_maskz_cvt_roundph_ps(U, A, B) \
11282 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B)
11284 #define _mm512_cvt_roundps_ph(A, I) \
11285 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
11286 (__v16hi)_mm256_undefined_si256 (), -1))
11287 #define _mm512_cvtps_ph(A, I) \
11288 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
11289 (__v16hi)_mm256_undefined_si256 (), -1))
11290 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
11291 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
11292 (__v16hi)(__m256i)(U), (__mmask16) (W)))
11293 #define _mm512_mask_cvtps_ph(U, W, A, I) \
11294 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
11295 (__v16hi)(__m256i)(U), (__mmask16) (W)))
11296 #define _mm512_maskz_cvt_roundps_ph(W, A, I) \
11297 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
11298 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
11299 #define _mm512_maskz_cvtps_ph(W, A, I) \
11300 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) (A), (int) (I),\
11301 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W)))
11302 #endif
11304 #ifdef __OPTIMIZE__
11305 extern __inline __m256
11306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11307 _mm512_cvt_roundpd_ps (__m512d __A, const int __R)
11309 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
11310 (__v8sf)
11311 _mm256_undefined_ps (),
11312 (__mmask8) -1, __R);
11315 extern __inline __m256
11316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11317 _mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A,
11318 const int __R)
11320 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
11321 (__v8sf) __W,
11322 (__mmask8) __U, __R);
11325 extern __inline __m256
11326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11327 _mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R)
11329 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
11330 (__v8sf)
11331 _mm256_setzero_ps (),
11332 (__mmask8) __U, __R);
11335 #else
11336 #define _mm512_cvt_roundpd_ps(A, B) \
11337 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B)
11339 #define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \
11340 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B)
11342 #define _mm512_maskz_cvt_roundpd_ps(U, A, B) \
11343 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B)
11345 #endif
11347 extern __inline void
11348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11349 _mm512_stream_si512 (__m512i * __P, __m512i __A)
11351 __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A);
11354 extern __inline void
11355 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11356 _mm512_stream_ps (float *__P, __m512 __A)
11358 __builtin_ia32_movntps512 (__P, (__v16sf) __A);
11361 extern __inline void
11362 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11363 _mm512_stream_pd (double *__P, __m512d __A)
11365 __builtin_ia32_movntpd512 (__P, (__v8df) __A);
11368 extern __inline __m512i
11369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11370 _mm512_stream_load_si512 (void *__P)
11372 return __builtin_ia32_movntdqa512 ((__v8di *)__P);
11375 #ifdef __OPTIMIZE__
11376 extern __inline __m512
11377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11378 _mm512_getexp_round_ps (__m512 __A, const int __R)
11380 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
11381 (__v16sf)
11382 _mm512_undefined_ps (),
11383 (__mmask16) -1, __R);
11386 extern __inline __m512
11387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11388 _mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
11389 const int __R)
11391 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
11392 (__v16sf) __W,
11393 (__mmask16) __U, __R);
11396 extern __inline __m512
11397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11398 _mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R)
11400 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
11401 (__v16sf)
11402 _mm512_setzero_ps (),
11403 (__mmask16) __U, __R);
11406 extern __inline __m512d
11407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11408 _mm512_getexp_round_pd (__m512d __A, const int __R)
11410 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
11411 (__v8df)
11412 _mm512_undefined_pd (),
11413 (__mmask8) -1, __R);
11416 extern __inline __m512d
11417 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11418 _mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
11419 const int __R)
11421 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
11422 (__v8df) __W,
11423 (__mmask8) __U, __R);
11426 extern __inline __m512d
11427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11428 _mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R)
11430 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
11431 (__v8df)
11432 _mm512_setzero_pd (),
11433 (__mmask8) __U, __R);
11436 extern __inline __m512d
11437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11438 _mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
11439 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
11441 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
11442 (__C << 2) | __B,
11443 _mm512_undefined_pd (),
11444 (__mmask8) -1, __R);
11447 extern __inline __m512d
11448 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11449 _mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
11450 _MM_MANTISSA_NORM_ENUM __B,
11451 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
11453 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
11454 (__C << 2) | __B,
11455 (__v8df) __W, __U,
11456 __R);
11459 extern __inline __m512d
11460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11461 _mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A,
11462 _MM_MANTISSA_NORM_ENUM __B,
11463 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
11465 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
11466 (__C << 2) | __B,
11467 (__v8df)
11468 _mm512_setzero_pd (),
11469 __U, __R);
11472 extern __inline __m512
11473 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11474 _mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
11475 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
11477 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
11478 (__C << 2) | __B,
11479 _mm512_undefined_ps (),
11480 (__mmask16) -1, __R);
11483 extern __inline __m512
11484 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11485 _mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
11486 _MM_MANTISSA_NORM_ENUM __B,
11487 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
11489 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
11490 (__C << 2) | __B,
11491 (__v16sf) __W, __U,
11492 __R);
11495 extern __inline __m512
11496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11497 _mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A,
11498 _MM_MANTISSA_NORM_ENUM __B,
11499 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
11501 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
11502 (__C << 2) | __B,
11503 (__v16sf)
11504 _mm512_setzero_ps (),
11505 __U, __R);
11508 #else
11509 #define _mm512_getmant_round_pd(X, B, C, R) \
11510 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
11511 (int)(((C)<<2) | (B)), \
11512 (__v8df)(__m512d)_mm512_undefined_pd(), \
11513 (__mmask8)-1,\
11514 (R)))
11516 #define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \
11517 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
11518 (int)(((C)<<2) | (B)), \
11519 (__v8df)(__m512d)(W), \
11520 (__mmask8)(U),\
11521 (R)))
11523 #define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \
11524 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
11525 (int)(((C)<<2) | (B)), \
11526 (__v8df)(__m512d)_mm512_setzero_pd(), \
11527 (__mmask8)(U),\
11528 (R)))
11529 #define _mm512_getmant_round_ps(X, B, C, R) \
11530 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
11531 (int)(((C)<<2) | (B)), \
11532 (__v16sf)(__m512)_mm512_undefined_ps(), \
11533 (__mmask16)-1,\
11534 (R)))
11536 #define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \
11537 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
11538 (int)(((C)<<2) | (B)), \
11539 (__v16sf)(__m512)(W), \
11540 (__mmask16)(U),\
11541 (R)))
11543 #define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \
11544 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
11545 (int)(((C)<<2) | (B)), \
11546 (__v16sf)(__m512)_mm512_setzero_ps(), \
11547 (__mmask16)(U),\
11548 (R)))
11550 #define _mm512_getexp_round_ps(A, R) \
11551 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
11552 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R))
11554 #define _mm512_mask_getexp_round_ps(W, U, A, R) \
11555 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
11556 (__v16sf)(__m512)(W), (__mmask16)(U), R))
11558 #define _mm512_maskz_getexp_round_ps(U, A, R) \
11559 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
11560 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R))
11562 #define _mm512_getexp_round_pd(A, R) \
11563 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
11564 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R))
11566 #define _mm512_mask_getexp_round_pd(W, U, A, R) \
11567 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
11568 (__v8df)(__m512d)(W), (__mmask8)(U), R))
11570 #define _mm512_maskz_getexp_round_pd(U, A, R) \
11571 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
11572 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R))
11573 #endif
11575 #ifdef __OPTIMIZE__
11576 extern __inline __m512
11577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11578 _mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R)
11580 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
11581 (__v16sf)
11582 _mm512_undefined_ps (),
11583 -1, __R);
11586 extern __inline __m512
11587 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11588 _mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C,
11589 const int __imm, const int __R)
11591 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
11592 (__v16sf) __A,
11593 (__mmask16) __B, __R);
11596 extern __inline __m512
11597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11598 _mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B,
11599 const int __imm, const int __R)
11601 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
11602 __imm,
11603 (__v16sf)
11604 _mm512_setzero_ps (),
11605 (__mmask16) __A, __R);
11608 extern __inline __m512d
11609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11610 _mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R)
11612 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
11613 (__v8df)
11614 _mm512_undefined_pd (),
11615 -1, __R);
11618 extern __inline __m512d
11619 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11620 _mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B,
11621 __m512d __C, const int __imm, const int __R)
11623 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
11624 (__v8df) __A,
11625 (__mmask8) __B, __R);
11628 extern __inline __m512d
11629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11630 _mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B,
11631 const int __imm, const int __R)
11633 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
11634 __imm,
11635 (__v8df)
11636 _mm512_setzero_pd (),
11637 (__mmask8) __A, __R);
11640 #else
11641 #define _mm512_roundscale_round_ps(A, B, R) \
11642 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
11643 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R))
11644 #define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \
11645 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
11646 (int)(D), \
11647 (__v16sf)(__m512)(A), \
11648 (__mmask16)(B), R))
11649 #define _mm512_maskz_roundscale_round_ps(A, B, C, R) \
11650 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
11651 (int)(C), \
11652 (__v16sf)_mm512_setzero_ps(),\
11653 (__mmask16)(A), R))
11654 #define _mm512_roundscale_round_pd(A, B, R) \
11655 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
11656 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R))
11657 #define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \
11658 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
11659 (int)(D), \
11660 (__v8df)(__m512d)(A), \
11661 (__mmask8)(B), R))
11662 #define _mm512_maskz_roundscale_round_pd(A, B, C, R) \
11663 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
11664 (int)(C), \
11665 (__v8df)_mm512_setzero_pd(),\
11666 (__mmask8)(A), R))
11667 #endif
11669 extern __inline __m512
11670 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11671 _mm512_floor_ps (__m512 __A)
11673 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
11674 _MM_FROUND_FLOOR,
11675 (__v16sf) __A, -1,
11676 _MM_FROUND_CUR_DIRECTION);
11679 extern __inline __m512d
11680 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11681 _mm512_floor_pd (__m512d __A)
11683 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
11684 _MM_FROUND_FLOOR,
11685 (__v8df) __A, -1,
11686 _MM_FROUND_CUR_DIRECTION);
11689 extern __inline __m512
11690 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11691 _mm512_ceil_ps (__m512 __A)
11693 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
11694 _MM_FROUND_CEIL,
11695 (__v16sf) __A, -1,
11696 _MM_FROUND_CUR_DIRECTION);
11699 extern __inline __m512d
11700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11701 _mm512_ceil_pd (__m512d __A)
11703 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
11704 _MM_FROUND_CEIL,
11705 (__v8df) __A, -1,
11706 _MM_FROUND_CUR_DIRECTION);
11709 extern __inline __m512
11710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11711 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
11713 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
11714 _MM_FROUND_FLOOR,
11715 (__v16sf) __W, __U,
11716 _MM_FROUND_CUR_DIRECTION);
11719 extern __inline __m512d
11720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11721 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
11723 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
11724 _MM_FROUND_FLOOR,
11725 (__v8df) __W, __U,
11726 _MM_FROUND_CUR_DIRECTION);
11729 extern __inline __m512
11730 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11731 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
11733 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
11734 _MM_FROUND_CEIL,
11735 (__v16sf) __W, __U,
11736 _MM_FROUND_CUR_DIRECTION);
11739 extern __inline __m512d
11740 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11741 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
11743 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
11744 _MM_FROUND_CEIL,
11745 (__v8df) __W, __U,
11746 _MM_FROUND_CUR_DIRECTION);
11749 #ifdef __OPTIMIZE__
11750 extern __inline __m512i
11751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11752 _mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm)
11754 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
11755 (__v16si) __B, __imm,
11756 (__v16si)
11757 _mm512_undefined_epi32 (),
11758 (__mmask16) -1);
11761 extern __inline __m512i
11762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11763 _mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
11764 __m512i __B, const int __imm)
11766 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
11767 (__v16si) __B, __imm,
11768 (__v16si) __W,
11769 (__mmask16) __U);
11772 extern __inline __m512i
11773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11774 _mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
11775 const int __imm)
11777 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,
11778 (__v16si) __B, __imm,
11779 (__v16si)
11780 _mm512_setzero_si512 (),
11781 (__mmask16) __U);
11784 extern __inline __m512i
11785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11786 _mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm)
11788 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
11789 (__v8di) __B, __imm,
11790 (__v8di)
11791 _mm512_undefined_epi32 (),
11792 (__mmask8) -1);
11795 extern __inline __m512i
11796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11797 _mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
11798 __m512i __B, const int __imm)
11800 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
11801 (__v8di) __B, __imm,
11802 (__v8di) __W,
11803 (__mmask8) __U);
11806 extern __inline __m512i
11807 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11808 _mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
11809 const int __imm)
11811 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A,
11812 (__v8di) __B, __imm,
11813 (__v8di)
11814 _mm512_setzero_si512 (),
11815 (__mmask8) __U);
11817 #else
11818 #define _mm512_alignr_epi32(X, Y, C) \
11819 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
11820 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_epi32 (),\
11821 (__mmask16)-1))
11823 #define _mm512_mask_alignr_epi32(W, U, X, Y, C) \
11824 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
11825 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \
11826 (__mmask16)(U)))
11828 #define _mm512_maskz_alignr_epi32(U, X, Y, C) \
11829 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \
11830 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\
11831 (__mmask16)(U)))
11833 #define _mm512_alignr_epi64(X, Y, C) \
11834 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
11835 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_epi32 (), \
11836 (__mmask8)-1))
11838 #define _mm512_mask_alignr_epi64(W, U, X, Y, C) \
11839 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
11840 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U)))
11842 #define _mm512_maskz_alignr_epi64(U, X, Y, C) \
11843 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \
11844 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\
11845 (__mmask8)(U)))
11846 #endif
11848 extern __inline __mmask16
11849 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11850 _mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B)
11852 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
11853 (__v16si) __B,
11854 (__mmask16) -1);
11857 extern __inline __mmask16
11858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11859 _mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
11861 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A,
11862 (__v16si) __B, __U);
11865 extern __inline __mmask8
11866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11867 _mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
11869 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
11870 (__v8di) __B, __U);
11873 extern __inline __mmask8
11874 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11875 _mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B)
11877 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A,
11878 (__v8di) __B,
11879 (__mmask8) -1);
11882 extern __inline __mmask16
11883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11884 _mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B)
11886 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
11887 (__v16si) __B,
11888 (__mmask16) -1);
11891 extern __inline __mmask16
11892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11893 _mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
11895 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A,
11896 (__v16si) __B, __U);
11899 extern __inline __mmask8
11900 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11901 _mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
11903 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
11904 (__v8di) __B, __U);
11907 extern __inline __mmask8
11908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11909 _mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B)
11911 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A,
11912 (__v8di) __B,
11913 (__mmask8) -1);
11916 extern __inline __mmask16
11917 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11918 _mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y)
11920 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
11921 (__v16si) __Y, 5,
11922 (__mmask16) -1);
11925 extern __inline __mmask16
11926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11927 _mm512_mask_cmpge_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
11929 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
11930 (__v16si) __Y, 5,
11931 (__mmask16) __M);
11934 extern __inline __mmask16
11935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11936 _mm512_mask_cmpge_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
11938 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
11939 (__v16si) __Y, 5,
11940 (__mmask16) __M);
11943 extern __inline __mmask16
11944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11945 _mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y)
11947 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
11948 (__v16si) __Y, 5,
11949 (__mmask16) -1);
11952 extern __inline __mmask8
11953 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11954 _mm512_mask_cmpge_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
11956 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
11957 (__v8di) __Y, 5,
11958 (__mmask8) __M);
11961 extern __inline __mmask8
11962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11963 _mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y)
11965 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
11966 (__v8di) __Y, 5,
11967 (__mmask8) -1);
11970 extern __inline __mmask8
11971 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11972 _mm512_mask_cmpge_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
11974 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
11975 (__v8di) __Y, 5,
11976 (__mmask8) __M);
11979 extern __inline __mmask8
11980 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11981 _mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y)
11983 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
11984 (__v8di) __Y, 5,
11985 (__mmask8) -1);
11988 extern __inline __mmask16
11989 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11990 _mm512_mask_cmple_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
11992 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
11993 (__v16si) __Y, 2,
11994 (__mmask16) __M);
11997 extern __inline __mmask16
11998 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
11999 _mm512_cmple_epi32_mask (__m512i __X, __m512i __Y)
12001 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
12002 (__v16si) __Y, 2,
12003 (__mmask16) -1);
12006 extern __inline __mmask16
12007 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12008 _mm512_mask_cmple_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
12010 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
12011 (__v16si) __Y, 2,
12012 (__mmask16) __M);
12015 extern __inline __mmask16
12016 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12017 _mm512_cmple_epu32_mask (__m512i __X, __m512i __Y)
12019 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
12020 (__v16si) __Y, 2,
12021 (__mmask16) -1);
12024 extern __inline __mmask8
12025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12026 _mm512_mask_cmple_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
12028 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
12029 (__v8di) __Y, 2,
12030 (__mmask8) __M);
12033 extern __inline __mmask8
12034 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12035 _mm512_cmple_epi64_mask (__m512i __X, __m512i __Y)
12037 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
12038 (__v8di) __Y, 2,
12039 (__mmask8) -1);
12042 extern __inline __mmask8
12043 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12044 _mm512_mask_cmple_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
12046 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
12047 (__v8di) __Y, 2,
12048 (__mmask8) __M);
12051 extern __inline __mmask8
12052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12053 _mm512_cmple_epu64_mask (__m512i __X, __m512i __Y)
12055 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
12056 (__v8di) __Y, 2,
12057 (__mmask8) -1);
12060 extern __inline __mmask16
12061 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12062 _mm512_mask_cmplt_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
12064 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
12065 (__v16si) __Y, 1,
12066 (__mmask16) __M);
12069 extern __inline __mmask16
12070 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12071 _mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y)
12073 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
12074 (__v16si) __Y, 1,
12075 (__mmask16) -1);
12078 extern __inline __mmask16
12079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12080 _mm512_mask_cmplt_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
12082 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
12083 (__v16si) __Y, 1,
12084 (__mmask16) __M);
12087 extern __inline __mmask16
12088 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12089 _mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y)
12091 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
12092 (__v16si) __Y, 1,
12093 (__mmask16) -1);
12096 extern __inline __mmask8
12097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12098 _mm512_mask_cmplt_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
12100 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
12101 (__v8di) __Y, 1,
12102 (__mmask8) __M);
12105 extern __inline __mmask8
12106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12107 _mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y)
12109 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
12110 (__v8di) __Y, 1,
12111 (__mmask8) -1);
12114 extern __inline __mmask8
12115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12116 _mm512_mask_cmplt_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
12118 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
12119 (__v8di) __Y, 1,
12120 (__mmask8) __M);
12123 extern __inline __mmask8
12124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12125 _mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y)
12127 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
12128 (__v8di) __Y, 1,
12129 (__mmask8) -1);
12132 extern __inline __mmask16
12133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12134 _mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y)
12136 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
12137 (__v16si) __Y, 4,
12138 (__mmask16) -1);
12141 extern __inline __mmask16
12142 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12143 _mm512_mask_cmpneq_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
12145 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
12146 (__v16si) __Y, 4,
12147 (__mmask16) __M);
12150 extern __inline __mmask16
12151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12152 _mm512_mask_cmpneq_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y)
12154 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
12155 (__v16si) __Y, 4,
12156 (__mmask16) __M);
12159 extern __inline __mmask16
12160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12161 _mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y)
12163 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
12164 (__v16si) __Y, 4,
12165 (__mmask16) -1);
12168 extern __inline __mmask8
12169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12170 _mm512_mask_cmpneq_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
12172 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
12173 (__v8di) __Y, 4,
12174 (__mmask8) __M);
12177 extern __inline __mmask8
12178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12179 _mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y)
12181 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
12182 (__v8di) __Y, 4,
12183 (__mmask8) -1);
12186 extern __inline __mmask8
12187 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12188 _mm512_mask_cmpneq_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y)
12190 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
12191 (__v8di) __Y, 4,
12192 (__mmask8) __M);
12195 extern __inline __mmask8
12196 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12197 _mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y)
12199 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
12200 (__v8di) __Y, 4,
12201 (__mmask8) -1);
12204 #define _MM_CMPINT_EQ 0x0
12205 #define _MM_CMPINT_LT 0x1
12206 #define _MM_CMPINT_LE 0x2
12207 #define _MM_CMPINT_UNUSED 0x3
12208 #define _MM_CMPINT_NE 0x4
12209 #define _MM_CMPINT_NLT 0x5
12210 #define _MM_CMPINT_GE 0x5
12211 #define _MM_CMPINT_NLE 0x6
12212 #define _MM_CMPINT_GT 0x6
12214 #ifdef __OPTIMIZE__
12215 extern __inline __mmask8
12216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12217 _mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P)
12219 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
12220 (__v8di) __Y, __P,
12221 (__mmask8) -1);
12224 extern __inline __mmask16
12225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12226 _mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P)
12228 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
12229 (__v16si) __Y, __P,
12230 (__mmask16) -1);
12233 extern __inline __mmask8
12234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12235 _mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P)
12237 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
12238 (__v8di) __Y, __P,
12239 (__mmask8) -1);
12242 extern __inline __mmask16
12243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12244 _mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P)
12246 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
12247 (__v16si) __Y, __P,
12248 (__mmask16) -1);
12251 extern __inline __mmask8
12252 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12253 _mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P,
12254 const int __R)
12256 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12257 (__v8df) __Y, __P,
12258 (__mmask8) -1, __R);
12261 extern __inline __mmask16
12262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12263 _mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R)
12265 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12266 (__v16sf) __Y, __P,
12267 (__mmask16) -1, __R);
12270 extern __inline __mmask8
12271 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12272 _mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
12273 const int __P)
12275 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X,
12276 (__v8di) __Y, __P,
12277 (__mmask8) __U);
12280 extern __inline __mmask16
12281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12282 _mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
12283 const int __P)
12285 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X,
12286 (__v16si) __Y, __P,
12287 (__mmask16) __U);
12290 extern __inline __mmask8
12291 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12292 _mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y,
12293 const int __P)
12295 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X,
12296 (__v8di) __Y, __P,
12297 (__mmask8) __U);
12300 extern __inline __mmask16
12301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12302 _mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y,
12303 const int __P)
12305 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X,
12306 (__v16si) __Y, __P,
12307 (__mmask16) __U);
12310 extern __inline __mmask8
12311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12312 _mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y,
12313 const int __P, const int __R)
12315 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
12316 (__v8df) __Y, __P,
12317 (__mmask8) __U, __R);
12320 extern __inline __mmask16
12321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12322 _mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y,
12323 const int __P, const int __R)
12325 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
12326 (__v16sf) __Y, __P,
12327 (__mmask16) __U, __R);
12330 #else
12331 #define _mm512_cmp_epi64_mask(X, Y, P) \
12332 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
12333 (__v8di)(__m512i)(Y), (int)(P),\
12334 (__mmask8)-1))
12336 #define _mm512_cmp_epi32_mask(X, Y, P) \
12337 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
12338 (__v16si)(__m512i)(Y), (int)(P), \
12339 (__mmask16)-1))
12341 #define _mm512_cmp_epu64_mask(X, Y, P) \
12342 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
12343 (__v8di)(__m512i)(Y), (int)(P),\
12344 (__mmask8)-1))
12346 #define _mm512_cmp_epu32_mask(X, Y, P) \
12347 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
12348 (__v16si)(__m512i)(Y), (int)(P), \
12349 (__mmask16)-1))
12351 #define _mm512_cmp_round_pd_mask(X, Y, P, R) \
12352 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
12353 (__v8df)(__m512d)(Y), (int)(P),\
12354 (__mmask8)-1, R))
12356 #define _mm512_cmp_round_ps_mask(X, Y, P, R) \
12357 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
12358 (__v16sf)(__m512)(Y), (int)(P),\
12359 (__mmask16)-1, R))
12361 #define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \
12362 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \
12363 (__v8di)(__m512i)(Y), (int)(P),\
12364 (__mmask8)(M)))
12366 #define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \
12367 ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \
12368 (__v16si)(__m512i)(Y), (int)(P), \
12369 (__mmask16)(M)))
12371 #define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \
12372 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \
12373 (__v8di)(__m512i)(Y), (int)(P),\
12374 (__mmask8)(M)))
12376 #define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \
12377 ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \
12378 (__v16si)(__m512i)(Y), (int)(P), \
12379 (__mmask16)(M)))
12381 #define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \
12382 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
12383 (__v8df)(__m512d)(Y), (int)(P),\
12384 (__mmask8)(M), R))
12386 #define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \
12387 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
12388 (__v16sf)(__m512)(Y), (int)(P),\
12389 (__mmask16)(M), R))
12391 #endif
12393 #ifdef __OPTIMIZE__
12394 extern __inline __m512
12395 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12396 _mm512_i32gather_ps (__m512i __index, void const *__addr, int __scale)
12398 __m512 __v1_old = _mm512_undefined_ps ();
12399 __mmask16 __mask = 0xFFFF;
12401 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
12402 __addr,
12403 (__v16si) __index,
12404 __mask, __scale);
12407 extern __inline __m512
12408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12409 _mm512_mask_i32gather_ps (__m512 __v1_old, __mmask16 __mask,
12410 __m512i __index, void const *__addr, int __scale)
12412 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) __v1_old,
12413 __addr,
12414 (__v16si) __index,
12415 __mask, __scale);
12418 extern __inline __m512d
12419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12420 _mm512_i32gather_pd (__m256i __index, void const *__addr, int __scale)
12422 __m512d __v1_old = _mm512_undefined_pd ();
12423 __mmask8 __mask = 0xFF;
12425 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
12426 __addr,
12427 (__v8si) __index, __mask,
12428 __scale);
12431 extern __inline __m512d
12432 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12433 _mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask,
12434 __m256i __index, void const *__addr, int __scale)
12436 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old,
12437 __addr,
12438 (__v8si) __index,
12439 __mask, __scale);
12442 extern __inline __m256
12443 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12444 _mm512_i64gather_ps (__m512i __index, void const *__addr, int __scale)
12446 __m256 __v1_old = _mm256_undefined_ps ();
12447 __mmask8 __mask = 0xFF;
12449 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
12450 __addr,
12451 (__v8di) __index, __mask,
12452 __scale);
12455 extern __inline __m256
12456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12457 _mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask,
12458 __m512i __index, void const *__addr, int __scale)
12460 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old,
12461 __addr,
12462 (__v8di) __index,
12463 __mask, __scale);
12466 extern __inline __m512d
12467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12468 _mm512_i64gather_pd (__m512i __index, void const *__addr, int __scale)
12470 __m512d __v1_old = _mm512_undefined_pd ();
12471 __mmask8 __mask = 0xFF;
12473 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
12474 __addr,
12475 (__v8di) __index, __mask,
12476 __scale);
12479 extern __inline __m512d
12480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12481 _mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask,
12482 __m512i __index, void const *__addr, int __scale)
12484 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old,
12485 __addr,
12486 (__v8di) __index,
12487 __mask, __scale);
12490 extern __inline __m512i
12491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12492 _mm512_i32gather_epi32 (__m512i __index, void const *__addr, int __scale)
12494 __m512i __v1_old = _mm512_undefined_epi32 ();
12495 __mmask16 __mask = 0xFFFF;
12497 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
12498 __addr,
12499 (__v16si) __index,
12500 __mask, __scale);
12503 extern __inline __m512i
12504 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12505 _mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask,
12506 __m512i __index, void const *__addr, int __scale)
12508 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old,
12509 __addr,
12510 (__v16si) __index,
12511 __mask, __scale);
12514 extern __inline __m512i
12515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12516 _mm512_i32gather_epi64 (__m256i __index, void const *__addr, int __scale)
12518 __m512i __v1_old = _mm512_undefined_epi32 ();
12519 __mmask8 __mask = 0xFF;
12521 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
12522 __addr,
12523 (__v8si) __index, __mask,
12524 __scale);
12527 extern __inline __m512i
12528 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12529 _mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask,
12530 __m256i __index, void const *__addr,
12531 int __scale)
12533 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old,
12534 __addr,
12535 (__v8si) __index,
12536 __mask, __scale);
12539 extern __inline __m256i
12540 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12541 _mm512_i64gather_epi32 (__m512i __index, void const *__addr, int __scale)
12543 __m256i __v1_old = _mm256_undefined_si256 ();
12544 __mmask8 __mask = 0xFF;
12546 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
12547 __addr,
12548 (__v8di) __index,
12549 __mask, __scale);
12552 extern __inline __m256i
12553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12554 _mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask,
12555 __m512i __index, void const *__addr, int __scale)
12557 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old,
12558 __addr,
12559 (__v8di) __index,
12560 __mask, __scale);
12563 extern __inline __m512i
12564 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12565 _mm512_i64gather_epi64 (__m512i __index, void const *__addr, int __scale)
12567 __m512i __v1_old = _mm512_undefined_epi32 ();
12568 __mmask8 __mask = 0xFF;
12570 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
12571 __addr,
12572 (__v8di) __index, __mask,
12573 __scale);
12576 extern __inline __m512i
12577 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12578 _mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask,
12579 __m512i __index, void const *__addr,
12580 int __scale)
12582 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old,
12583 __addr,
12584 (__v8di) __index,
12585 __mask, __scale);
12588 extern __inline void
12589 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12590 _mm512_i32scatter_ps (void *__addr, __m512i __index, __m512 __v1, int __scale)
12592 __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF,
12593 (__v16si) __index, (__v16sf) __v1, __scale);
12596 extern __inline void
12597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12598 _mm512_mask_i32scatter_ps (void *__addr, __mmask16 __mask,
12599 __m512i __index, __m512 __v1, int __scale)
12601 __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index,
12602 (__v16sf) __v1, __scale);
12605 extern __inline void
12606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12607 _mm512_i32scatter_pd (void *__addr, __m256i __index, __m512d __v1,
12608 int __scale)
12610 __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF,
12611 (__v8si) __index, (__v8df) __v1, __scale);
12614 extern __inline void
12615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12616 _mm512_mask_i32scatter_pd (void *__addr, __mmask8 __mask,
12617 __m256i __index, __m512d __v1, int __scale)
12619 __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index,
12620 (__v8df) __v1, __scale);
12623 extern __inline void
12624 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12625 _mm512_i64scatter_ps (void *__addr, __m512i __index, __m256 __v1, int __scale)
12627 __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF,
12628 (__v8di) __index, (__v8sf) __v1, __scale);
12631 extern __inline void
12632 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12633 _mm512_mask_i64scatter_ps (void *__addr, __mmask8 __mask,
12634 __m512i __index, __m256 __v1, int __scale)
12636 __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index,
12637 (__v8sf) __v1, __scale);
12640 extern __inline void
12641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12642 _mm512_i64scatter_pd (void *__addr, __m512i __index, __m512d __v1,
12643 int __scale)
12645 __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF,
12646 (__v8di) __index, (__v8df) __v1, __scale);
12649 extern __inline void
12650 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12651 _mm512_mask_i64scatter_pd (void *__addr, __mmask8 __mask,
12652 __m512i __index, __m512d __v1, int __scale)
12654 __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index,
12655 (__v8df) __v1, __scale);
12658 extern __inline void
12659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12660 _mm512_i32scatter_epi32 (void *__addr, __m512i __index,
12661 __m512i __v1, int __scale)
12663 __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF,
12664 (__v16si) __index, (__v16si) __v1, __scale);
12667 extern __inline void
12668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12669 _mm512_mask_i32scatter_epi32 (void *__addr, __mmask16 __mask,
12670 __m512i __index, __m512i __v1, int __scale)
12672 __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index,
12673 (__v16si) __v1, __scale);
12676 extern __inline void
12677 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12678 _mm512_i32scatter_epi64 (void *__addr, __m256i __index,
12679 __m512i __v1, int __scale)
12681 __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF,
12682 (__v8si) __index, (__v8di) __v1, __scale);
12685 extern __inline void
12686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12687 _mm512_mask_i32scatter_epi64 (void *__addr, __mmask8 __mask,
12688 __m256i __index, __m512i __v1, int __scale)
12690 __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index,
12691 (__v8di) __v1, __scale);
12694 extern __inline void
12695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12696 _mm512_i64scatter_epi32 (void *__addr, __m512i __index,
12697 __m256i __v1, int __scale)
12699 __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF,
12700 (__v8di) __index, (__v8si) __v1, __scale);
12703 extern __inline void
12704 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12705 _mm512_mask_i64scatter_epi32 (void *__addr, __mmask8 __mask,
12706 __m512i __index, __m256i __v1, int __scale)
12708 __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index,
12709 (__v8si) __v1, __scale);
12712 extern __inline void
12713 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12714 _mm512_i64scatter_epi64 (void *__addr, __m512i __index,
12715 __m512i __v1, int __scale)
12717 __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF,
12718 (__v8di) __index, (__v8di) __v1, __scale);
12721 extern __inline void
12722 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12723 _mm512_mask_i64scatter_epi64 (void *__addr, __mmask8 __mask,
12724 __m512i __index, __m512i __v1, int __scale)
12726 __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index,
12727 (__v8di) __v1, __scale);
12729 #else
12730 #define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \
12731 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\
12732 (void const *) (ADDR), \
12733 (__v16si)(__m512i) (INDEX), \
12734 (__mmask16)0xFFFF, \
12735 (int) (SCALE))
12737 #define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12738 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512) (V1OLD), \
12739 (void const *) (ADDR), \
12740 (__v16si)(__m512i) (INDEX), \
12741 (__mmask16) (MASK), \
12742 (int) (SCALE))
12744 #define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \
12745 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \
12746 (void const *) (ADDR), \
12747 (__v8si)(__m256i) (INDEX), \
12748 (__mmask8)0xFF, (int) (SCALE))
12750 #define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12751 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d) (V1OLD), \
12752 (void const *) (ADDR), \
12753 (__v8si)(__m256i) (INDEX), \
12754 (__mmask8) (MASK), \
12755 (int) (SCALE))
12757 #define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \
12758 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \
12759 (void const *) (ADDR), \
12760 (__v8di)(__m512i) (INDEX), \
12761 (__mmask8)0xFF, (int) (SCALE))
12763 #define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \
12764 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256) (V1OLD), \
12765 (void const *) (ADDR), \
12766 (__v8di)(__m512i) (INDEX), \
12767 (__mmask8) (MASK), \
12768 (int) (SCALE))
12770 #define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \
12771 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \
12772 (void const *) (ADDR), \
12773 (__v8di)(__m512i) (INDEX), \
12774 (__mmask8)0xFF, (int) (SCALE))
12776 #define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \
12777 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d) (V1OLD), \
12778 (void const *) (ADDR), \
12779 (__v8di)(__m512i) (INDEX), \
12780 (__mmask8) (MASK), \
12781 (int) (SCALE))
12783 #define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \
12784 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_epi32 (),\
12785 (void const *) (ADDR), \
12786 (__v16si)(__m512i) (INDEX), \
12787 (__mmask16)0xFFFF, \
12788 (int) (SCALE))
12790 #define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
12791 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i) (V1OLD), \
12792 (void const *) (ADDR), \
12793 (__v16si)(__m512i) (INDEX), \
12794 (__mmask16) (MASK), \
12795 (int) (SCALE))
12797 #define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \
12798 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_epi32 (),\
12799 (void const *) (ADDR), \
12800 (__v8si)(__m256i) (INDEX), \
12801 (__mmask8)0xFF, (int) (SCALE))
12803 #define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
12804 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i) (V1OLD), \
12805 (void const *) (ADDR), \
12806 (__v8si)(__m256i) (INDEX), \
12807 (__mmask8) (MASK), \
12808 (int) (SCALE))
12810 #define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \
12811 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(),\
12812 (void const *) (ADDR), \
12813 (__v8di)(__m512i) (INDEX), \
12814 (__mmask8)0xFF, (int) (SCALE))
12816 #define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \
12817 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i) (V1OLD), \
12818 (void const *) (ADDR), \
12819 (__v8di)(__m512i) (INDEX), \
12820 (__mmask8) (MASK), \
12821 (int) (SCALE))
12823 #define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \
12824 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_epi32 (),\
12825 (void const *) (ADDR), \
12826 (__v8di)(__m512i) (INDEX), \
12827 (__mmask8)0xFF, (int) (SCALE))
12829 #define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \
12830 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i) (V1OLD), \
12831 (void const *) (ADDR), \
12832 (__v8di)(__m512i) (INDEX), \
12833 (__mmask8) (MASK), \
12834 (int) (SCALE))
12836 #define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \
12837 __builtin_ia32_scattersiv16sf ((void *) (ADDR), (__mmask16)0xFFFF, \
12838 (__v16si)(__m512i) (INDEX), \
12839 (__v16sf)(__m512) (V1), (int) (SCALE))
12841 #define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
12842 __builtin_ia32_scattersiv16sf ((void *) (ADDR), (__mmask16) (MASK), \
12843 (__v16si)(__m512i) (INDEX), \
12844 (__v16sf)(__m512) (V1), (int) (SCALE))
12846 #define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \
12847 __builtin_ia32_scattersiv8df ((void *) (ADDR), (__mmask8)0xFF, \
12848 (__v8si)(__m256i) (INDEX), \
12849 (__v8df)(__m512d) (V1), (int) (SCALE))
12851 #define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
12852 __builtin_ia32_scattersiv8df ((void *) (ADDR), (__mmask8) (MASK), \
12853 (__v8si)(__m256i) (INDEX), \
12854 (__v8df)(__m512d) (V1), (int) (SCALE))
12856 #define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \
12857 __builtin_ia32_scatterdiv16sf ((void *) (ADDR), (__mmask8)0xFF, \
12858 (__v8di)(__m512i) (INDEX), \
12859 (__v8sf)(__m256) (V1), (int) (SCALE))
12861 #define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \
12862 __builtin_ia32_scatterdiv16sf ((void *) (ADDR), (__mmask16) (MASK), \
12863 (__v8di)(__m512i) (INDEX), \
12864 (__v8sf)(__m256) (V1), (int) (SCALE))
12866 #define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \
12867 __builtin_ia32_scatterdiv8df ((void *) (ADDR), (__mmask8)0xFF, \
12868 (__v8di)(__m512i) (INDEX), \
12869 (__v8df)(__m512d) (V1), (int) (SCALE))
12871 #define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \
12872 __builtin_ia32_scatterdiv8df ((void *) (ADDR), (__mmask8) (MASK), \
12873 (__v8di)(__m512i) (INDEX), \
12874 (__v8df)(__m512d) (V1), (int) (SCALE))
12876 #define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \
12877 __builtin_ia32_scattersiv16si ((void *) (ADDR), (__mmask16)0xFFFF, \
12878 (__v16si)(__m512i) (INDEX), \
12879 (__v16si)(__m512i) (V1), (int) (SCALE))
12881 #define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
12882 __builtin_ia32_scattersiv16si ((void *) (ADDR), (__mmask16) (MASK), \
12883 (__v16si)(__m512i) (INDEX), \
12884 (__v16si)(__m512i) (V1), (int) (SCALE))
12886 #define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \
12887 __builtin_ia32_scattersiv8di ((void *) (ADDR), (__mmask8)0xFF, \
12888 (__v8si)(__m256i) (INDEX), \
12889 (__v8di)(__m512i) (V1), (int) (SCALE))
12891 #define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
12892 __builtin_ia32_scattersiv8di ((void *) (ADDR), (__mmask8) (MASK), \
12893 (__v8si)(__m256i) (INDEX), \
12894 (__v8di)(__m512i) (V1), (int) (SCALE))
12896 #define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \
12897 __builtin_ia32_scatterdiv16si ((void *) (ADDR), (__mmask8)0xFF, \
12898 (__v8di)(__m512i) (INDEX), \
12899 (__v8si)(__m256i) (V1), (int) (SCALE))
12901 #define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \
12902 __builtin_ia32_scatterdiv16si ((void *) (ADDR), (__mmask8) (MASK), \
12903 (__v8di)(__m512i) (INDEX), \
12904 (__v8si)(__m256i) (V1), (int) (SCALE))
12906 #define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \
12907 __builtin_ia32_scatterdiv8di ((void *) (ADDR), (__mmask8)0xFF, \
12908 (__v8di)(__m512i) (INDEX), \
12909 (__v8di)(__m512i) (V1), (int) (SCALE))
12911 #define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \
12912 __builtin_ia32_scatterdiv8di ((void *) (ADDR), (__mmask8) (MASK), \
12913 (__v8di)(__m512i) (INDEX), \
12914 (__v8di)(__m512i) (V1), (int) (SCALE))
12915 #endif
12917 extern __inline __m512d
12918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12919 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
12921 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
12922 (__v8df) __W,
12923 (__mmask8) __U);
12926 extern __inline __m512d
12927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12928 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
12930 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
12931 (__v8df)
12932 _mm512_setzero_pd (),
12933 (__mmask8) __U);
12936 extern __inline void
12937 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12938 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
12940 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
12941 (__mmask8) __U);
12944 extern __inline __m512
12945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12946 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
12948 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
12949 (__v16sf) __W,
12950 (__mmask16) __U);
12953 extern __inline __m512
12954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12955 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
12957 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
12958 (__v16sf)
12959 _mm512_setzero_ps (),
12960 (__mmask16) __U);
12963 extern __inline void
12964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12965 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
12967 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
12968 (__mmask16) __U);
12971 extern __inline __m512i
12972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12973 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
12975 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
12976 (__v8di) __W,
12977 (__mmask8) __U);
12980 extern __inline __m512i
12981 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12982 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
12984 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
12985 (__v8di)
12986 _mm512_setzero_si512 (),
12987 (__mmask8) __U);
12990 extern __inline void
12991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
12992 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
12994 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
12995 (__mmask8) __U);
12998 extern __inline __m512i
12999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13000 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
13002 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
13003 (__v16si) __W,
13004 (__mmask16) __U);
13007 extern __inline __m512i
13008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13009 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
13011 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
13012 (__v16si)
13013 _mm512_setzero_si512 (),
13014 (__mmask16) __U);
13017 extern __inline void
13018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13019 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
13021 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
13022 (__mmask16) __U);
13025 extern __inline __m512d
13026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13027 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
13029 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
13030 (__v8df) __W,
13031 (__mmask8) __U);
13034 extern __inline __m512d
13035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13036 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
13038 return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A,
13039 (__v8df)
13040 _mm512_setzero_pd (),
13041 (__mmask8) __U);
13044 extern __inline __m512d
13045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13046 _mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P)
13048 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P,
13049 (__v8df) __W,
13050 (__mmask8) __U);
13053 extern __inline __m512d
13054 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13055 _mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P)
13057 return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P,
13058 (__v8df)
13059 _mm512_setzero_pd (),
13060 (__mmask8) __U);
13063 extern __inline __m512
13064 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13065 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
13067 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
13068 (__v16sf) __W,
13069 (__mmask16) __U);
13072 extern __inline __m512
13073 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13074 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
13076 return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A,
13077 (__v16sf)
13078 _mm512_setzero_ps (),
13079 (__mmask16) __U);
13082 extern __inline __m512
13083 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13084 _mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P)
13086 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P,
13087 (__v16sf) __W,
13088 (__mmask16) __U);
13091 extern __inline __m512
13092 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13093 _mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P)
13095 return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P,
13096 (__v16sf)
13097 _mm512_setzero_ps (),
13098 (__mmask16) __U);
13101 extern __inline __m512i
13102 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13103 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
13105 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
13106 (__v8di) __W,
13107 (__mmask8) __U);
13110 extern __inline __m512i
13111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13112 _mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A)
13114 return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A,
13115 (__v8di)
13116 _mm512_setzero_si512 (),
13117 (__mmask8) __U);
13120 extern __inline __m512i
13121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13122 _mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
13124 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P,
13125 (__v8di) __W,
13126 (__mmask8) __U);
13129 extern __inline __m512i
13130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13131 _mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P)
13133 return (__m512i)
13134 __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P,
13135 (__v8di)
13136 _mm512_setzero_si512 (),
13137 (__mmask8) __U);
13140 extern __inline __m512i
13141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13142 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
13144 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
13145 (__v16si) __W,
13146 (__mmask16) __U);
13149 extern __inline __m512i
13150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13151 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
13153 return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A,
13154 (__v16si)
13155 _mm512_setzero_si512 (),
13156 (__mmask16) __U);
13159 extern __inline __m512i
13160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13161 _mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
13163 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P,
13164 (__v16si) __W,
13165 (__mmask16) __U);
13168 extern __inline __m512i
13169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13170 _mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
13172 return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P,
13173 (__v16si)
13174 _mm512_setzero_si512
13175 (), (__mmask16) __U);
13178 extern __inline __mmask16
13179 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13180 _mm512_kand (__mmask16 __A, __mmask16 __B)
13182 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
13185 extern __inline __mmask16
13186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13187 _mm512_kandn (__mmask16 __A, __mmask16 __B)
13189 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A,
13190 (__mmask16) __B);
13193 extern __inline __mmask16
13194 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13195 _mm512_kor (__mmask16 __A, __mmask16 __B)
13197 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
13200 extern __inline int
13201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13202 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
13204 return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A,
13205 (__mmask16) __B);
13208 extern __inline int
13209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13210 _mm512_kortestc (__mmask16 __A, __mmask16 __B)
13212 return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A,
13213 (__mmask16) __B);
13216 extern __inline __mmask16
13217 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13218 _mm512_kxnor (__mmask16 __A, __mmask16 __B)
13220 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
13223 extern __inline __mmask16
13224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13225 _mm512_kxor (__mmask16 __A, __mmask16 __B)
13227 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
13230 extern __inline __mmask16
13231 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13232 _mm512_knot (__mmask16 __A)
13234 return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A);
13237 extern __inline __mmask16
13238 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13239 _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
13241 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
13244 #ifdef __OPTIMIZE__
13245 extern __inline __m512i
13246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13247 _mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D,
13248 const int __imm)
13250 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
13251 (__v4si) __D,
13252 __imm,
13253 (__v16si)
13254 _mm512_setzero_si512 (),
13255 __B);
13258 extern __inline __m512
13259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13260 _mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D,
13261 const int __imm)
13263 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
13264 (__v4sf) __D,
13265 __imm,
13266 (__v16sf)
13267 _mm512_setzero_ps (), __B);
13270 extern __inline __m512i
13271 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13272 _mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C,
13273 __m128i __D, const int __imm)
13275 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C,
13276 (__v4si) __D,
13277 __imm,
13278 (__v16si) __A,
13279 __B);
13282 extern __inline __m512
13283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13284 _mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C,
13285 __m128 __D, const int __imm)
13287 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C,
13288 (__v4sf) __D,
13289 __imm,
13290 (__v16sf) __A, __B);
13292 #else
13293 #define _mm512_maskz_insertf32x4(A, X, Y, C) \
13294 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
13295 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \
13296 (__mmask16)(A)))
13298 #define _mm512_maskz_inserti32x4(A, X, Y, C) \
13299 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
13300 (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \
13301 (__mmask16)(A)))
13303 #define _mm512_mask_insertf32x4(A, B, X, Y, C) \
13304 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \
13305 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \
13306 (__mmask16)(B)))
13308 #define _mm512_mask_inserti32x4(A, B, X, Y, C) \
13309 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \
13310 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \
13311 (__mmask16)(B)))
13312 #endif
13314 extern __inline __m512i
13315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13316 _mm512_max_epi64 (__m512i __A, __m512i __B)
13318 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
13319 (__v8di) __B,
13320 (__v8di)
13321 _mm512_undefined_epi32 (),
13322 (__mmask8) -1);
13325 extern __inline __m512i
13326 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13327 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
13329 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
13330 (__v8di) __B,
13331 (__v8di)
13332 _mm512_setzero_si512 (),
13333 __M);
13336 extern __inline __m512i
13337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13338 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
13340 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A,
13341 (__v8di) __B,
13342 (__v8di) __W, __M);
13345 extern __inline __m512i
13346 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13347 _mm512_min_epi64 (__m512i __A, __m512i __B)
13349 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
13350 (__v8di) __B,
13351 (__v8di)
13352 _mm512_undefined_epi32 (),
13353 (__mmask8) -1);
13356 extern __inline __m512i
13357 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13358 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
13360 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
13361 (__v8di) __B,
13362 (__v8di) __W, __M);
13365 extern __inline __m512i
13366 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13367 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
13369 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A,
13370 (__v8di) __B,
13371 (__v8di)
13372 _mm512_setzero_si512 (),
13373 __M);
13376 extern __inline __m512i
13377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13378 _mm512_max_epu64 (__m512i __A, __m512i __B)
13380 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
13381 (__v8di) __B,
13382 (__v8di)
13383 _mm512_undefined_epi32 (),
13384 (__mmask8) -1);
13387 extern __inline __m512i
13388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13389 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
13391 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
13392 (__v8di) __B,
13393 (__v8di)
13394 _mm512_setzero_si512 (),
13395 __M);
13398 extern __inline __m512i
13399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13400 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
13402 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A,
13403 (__v8di) __B,
13404 (__v8di) __W, __M);
13407 extern __inline __m512i
13408 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13409 _mm512_min_epu64 (__m512i __A, __m512i __B)
13411 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
13412 (__v8di) __B,
13413 (__v8di)
13414 _mm512_undefined_epi32 (),
13415 (__mmask8) -1);
13418 extern __inline __m512i
13419 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13420 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
13422 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
13423 (__v8di) __B,
13424 (__v8di) __W, __M);
13427 extern __inline __m512i
13428 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13429 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
13431 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A,
13432 (__v8di) __B,
13433 (__v8di)
13434 _mm512_setzero_si512 (),
13435 __M);
13438 extern __inline __m512i
13439 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13440 _mm512_max_epi32 (__m512i __A, __m512i __B)
13442 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
13443 (__v16si) __B,
13444 (__v16si)
13445 _mm512_undefined_epi32 (),
13446 (__mmask16) -1);
13449 extern __inline __m512i
13450 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13451 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
13453 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
13454 (__v16si) __B,
13455 (__v16si)
13456 _mm512_setzero_si512 (),
13457 __M);
13460 extern __inline __m512i
13461 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13462 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
13464 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A,
13465 (__v16si) __B,
13466 (__v16si) __W, __M);
13469 extern __inline __m512i
13470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13471 _mm512_min_epi32 (__m512i __A, __m512i __B)
13473 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
13474 (__v16si) __B,
13475 (__v16si)
13476 _mm512_undefined_epi32 (),
13477 (__mmask16) -1);
13480 extern __inline __m512i
13481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13482 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
13484 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
13485 (__v16si) __B,
13486 (__v16si)
13487 _mm512_setzero_si512 (),
13488 __M);
13491 extern __inline __m512i
13492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13493 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
13495 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A,
13496 (__v16si) __B,
13497 (__v16si) __W, __M);
13500 extern __inline __m512i
13501 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13502 _mm512_max_epu32 (__m512i __A, __m512i __B)
13504 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
13505 (__v16si) __B,
13506 (__v16si)
13507 _mm512_undefined_epi32 (),
13508 (__mmask16) -1);
13511 extern __inline __m512i
13512 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13513 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
13515 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
13516 (__v16si) __B,
13517 (__v16si)
13518 _mm512_setzero_si512 (),
13519 __M);
13522 extern __inline __m512i
13523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13524 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
13526 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A,
13527 (__v16si) __B,
13528 (__v16si) __W, __M);
13531 extern __inline __m512i
13532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13533 _mm512_min_epu32 (__m512i __A, __m512i __B)
13535 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
13536 (__v16si) __B,
13537 (__v16si)
13538 _mm512_undefined_epi32 (),
13539 (__mmask16) -1);
13542 extern __inline __m512i
13543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13544 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
13546 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
13547 (__v16si) __B,
13548 (__v16si)
13549 _mm512_setzero_si512 (),
13550 __M);
13553 extern __inline __m512i
13554 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13555 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
13557 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A,
13558 (__v16si) __B,
13559 (__v16si) __W, __M);
13562 extern __inline __m512
13563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13564 _mm512_unpacklo_ps (__m512 __A, __m512 __B)
13566 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
13567 (__v16sf) __B,
13568 (__v16sf)
13569 _mm512_undefined_ps (),
13570 (__mmask16) -1);
13573 extern __inline __m512
13574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13575 _mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
13577 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
13578 (__v16sf) __B,
13579 (__v16sf) __W,
13580 (__mmask16) __U);
13583 extern __inline __m512
13584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13585 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
13587 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A,
13588 (__v16sf) __B,
13589 (__v16sf)
13590 _mm512_setzero_ps (),
13591 (__mmask16) __U);
13594 extern __inline __m512d
13595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13596 _mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W)
13598 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A,
13599 (__v8df) __W,
13600 (__mmask8) __U);
13603 extern __inline __m512
13604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13605 _mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W)
13607 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A,
13608 (__v16sf) __W,
13609 (__mmask16) __U);
13612 extern __inline __m512i
13613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13614 _mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W)
13616 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A,
13617 (__v8di) __W,
13618 (__mmask8) __U);
13621 extern __inline __m512i
13622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13623 _mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W)
13625 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A,
13626 (__v16si) __W,
13627 (__mmask16) __U);
13630 extern __inline __m512d
13631 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13632 _mm512_sqrt_pd (__m512d __A)
13634 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
13635 (__v8df)
13636 _mm512_undefined_pd (),
13637 (__mmask8) -1,
13638 _MM_FROUND_CUR_DIRECTION);
13641 extern __inline __m512d
13642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13643 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
13645 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
13646 (__v8df) __W,
13647 (__mmask8) __U,
13648 _MM_FROUND_CUR_DIRECTION);
13651 extern __inline __m512d
13652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13653 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
13655 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
13656 (__v8df)
13657 _mm512_setzero_pd (),
13658 (__mmask8) __U,
13659 _MM_FROUND_CUR_DIRECTION);
13662 extern __inline __m512
13663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13664 _mm512_sqrt_ps (__m512 __A)
13666 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
13667 (__v16sf)
13668 _mm512_undefined_ps (),
13669 (__mmask16) -1,
13670 _MM_FROUND_CUR_DIRECTION);
13673 extern __inline __m512
13674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13675 _mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A)
13677 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
13678 (__v16sf) __W,
13679 (__mmask16) __U,
13680 _MM_FROUND_CUR_DIRECTION);
13683 extern __inline __m512
13684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13685 _mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A)
13687 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
13688 (__v16sf)
13689 _mm512_setzero_ps (),
13690 (__mmask16) __U,
13691 _MM_FROUND_CUR_DIRECTION);
13694 extern __inline __m512d
13695 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13696 _mm512_add_pd (__m512d __A, __m512d __B)
13698 return (__m512d) ((__v8df)__A + (__v8df)__B);
13701 extern __inline __m512d
13702 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13703 _mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
13705 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
13706 (__v8df) __B,
13707 (__v8df) __W,
13708 (__mmask8) __U,
13709 _MM_FROUND_CUR_DIRECTION);
13712 extern __inline __m512d
13713 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13714 _mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B)
13716 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
13717 (__v8df) __B,
13718 (__v8df)
13719 _mm512_setzero_pd (),
13720 (__mmask8) __U,
13721 _MM_FROUND_CUR_DIRECTION);
13724 extern __inline __m512
13725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13726 _mm512_add_ps (__m512 __A, __m512 __B)
13728 return (__m512) ((__v16sf)__A + (__v16sf)__B);
13731 extern __inline __m512
13732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13733 _mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
13735 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
13736 (__v16sf) __B,
13737 (__v16sf) __W,
13738 (__mmask16) __U,
13739 _MM_FROUND_CUR_DIRECTION);
13742 extern __inline __m512
13743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13744 _mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B)
13746 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
13747 (__v16sf) __B,
13748 (__v16sf)
13749 _mm512_setzero_ps (),
13750 (__mmask16) __U,
13751 _MM_FROUND_CUR_DIRECTION);
13754 extern __inline __m512d
13755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13756 _mm512_sub_pd (__m512d __A, __m512d __B)
13758 return (__m512d) ((__v8df)__A - (__v8df)__B);
13761 extern __inline __m512d
13762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13763 _mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
13765 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
13766 (__v8df) __B,
13767 (__v8df) __W,
13768 (__mmask8) __U,
13769 _MM_FROUND_CUR_DIRECTION);
13772 extern __inline __m512d
13773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13774 _mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B)
13776 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
13777 (__v8df) __B,
13778 (__v8df)
13779 _mm512_setzero_pd (),
13780 (__mmask8) __U,
13781 _MM_FROUND_CUR_DIRECTION);
13784 extern __inline __m512
13785 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13786 _mm512_sub_ps (__m512 __A, __m512 __B)
13788 return (__m512) ((__v16sf)__A - (__v16sf)__B);
13791 extern __inline __m512
13792 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13793 _mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
13795 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
13796 (__v16sf) __B,
13797 (__v16sf) __W,
13798 (__mmask16) __U,
13799 _MM_FROUND_CUR_DIRECTION);
13802 extern __inline __m512
13803 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13804 _mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B)
13806 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
13807 (__v16sf) __B,
13808 (__v16sf)
13809 _mm512_setzero_ps (),
13810 (__mmask16) __U,
13811 _MM_FROUND_CUR_DIRECTION);
13814 extern __inline __m512d
13815 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13816 _mm512_mul_pd (__m512d __A, __m512d __B)
13818 return (__m512d) ((__v8df)__A * (__v8df)__B);
13821 extern __inline __m512d
13822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13823 _mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
13825 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
13826 (__v8df) __B,
13827 (__v8df) __W,
13828 (__mmask8) __U,
13829 _MM_FROUND_CUR_DIRECTION);
13832 extern __inline __m512d
13833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13834 _mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B)
13836 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
13837 (__v8df) __B,
13838 (__v8df)
13839 _mm512_setzero_pd (),
13840 (__mmask8) __U,
13841 _MM_FROUND_CUR_DIRECTION);
13844 extern __inline __m512
13845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13846 _mm512_mul_ps (__m512 __A, __m512 __B)
13848 return (__m512) ((__v16sf)__A * (__v16sf)__B);
13851 extern __inline __m512
13852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13853 _mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
13855 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
13856 (__v16sf) __B,
13857 (__v16sf) __W,
13858 (__mmask16) __U,
13859 _MM_FROUND_CUR_DIRECTION);
13862 extern __inline __m512
13863 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13864 _mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B)
13866 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
13867 (__v16sf) __B,
13868 (__v16sf)
13869 _mm512_setzero_ps (),
13870 (__mmask16) __U,
13871 _MM_FROUND_CUR_DIRECTION);
13874 extern __inline __m512d
13875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13876 _mm512_div_pd (__m512d __M, __m512d __V)
13878 return (__m512d) ((__v8df)__M / (__v8df)__V);
13881 extern __inline __m512d
13882 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13883 _mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V)
13885 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
13886 (__v8df) __V,
13887 (__v8df) __W,
13888 (__mmask8) __U,
13889 _MM_FROUND_CUR_DIRECTION);
13892 extern __inline __m512d
13893 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13894 _mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V)
13896 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
13897 (__v8df) __V,
13898 (__v8df)
13899 _mm512_setzero_pd (),
13900 (__mmask8) __U,
13901 _MM_FROUND_CUR_DIRECTION);
13904 extern __inline __m512
13905 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13906 _mm512_div_ps (__m512 __A, __m512 __B)
13908 return (__m512) ((__v16sf)__A / (__v16sf)__B);
13911 extern __inline __m512
13912 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13913 _mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
13915 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
13916 (__v16sf) __B,
13917 (__v16sf) __W,
13918 (__mmask16) __U,
13919 _MM_FROUND_CUR_DIRECTION);
13922 extern __inline __m512
13923 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13924 _mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B)
13926 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
13927 (__v16sf) __B,
13928 (__v16sf)
13929 _mm512_setzero_ps (),
13930 (__mmask16) __U,
13931 _MM_FROUND_CUR_DIRECTION);
13934 extern __inline __m512d
13935 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13936 _mm512_max_pd (__m512d __A, __m512d __B)
13938 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
13939 (__v8df) __B,
13940 (__v8df)
13941 _mm512_undefined_pd (),
13942 (__mmask8) -1,
13943 _MM_FROUND_CUR_DIRECTION);
13946 extern __inline __m512d
13947 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13948 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
13950 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
13951 (__v8df) __B,
13952 (__v8df) __W,
13953 (__mmask8) __U,
13954 _MM_FROUND_CUR_DIRECTION);
13957 extern __inline __m512d
13958 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13959 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
13961 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
13962 (__v8df) __B,
13963 (__v8df)
13964 _mm512_setzero_pd (),
13965 (__mmask8) __U,
13966 _MM_FROUND_CUR_DIRECTION);
13969 extern __inline __m512
13970 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13971 _mm512_max_ps (__m512 __A, __m512 __B)
13973 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
13974 (__v16sf) __B,
13975 (__v16sf)
13976 _mm512_undefined_ps (),
13977 (__mmask16) -1,
13978 _MM_FROUND_CUR_DIRECTION);
13981 extern __inline __m512
13982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13983 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
13985 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
13986 (__v16sf) __B,
13987 (__v16sf) __W,
13988 (__mmask16) __U,
13989 _MM_FROUND_CUR_DIRECTION);
13992 extern __inline __m512
13993 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
13994 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
13996 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
13997 (__v16sf) __B,
13998 (__v16sf)
13999 _mm512_setzero_ps (),
14000 (__mmask16) __U,
14001 _MM_FROUND_CUR_DIRECTION);
14004 extern __inline __m512d
14005 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14006 _mm512_min_pd (__m512d __A, __m512d __B)
14008 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
14009 (__v8df) __B,
14010 (__v8df)
14011 _mm512_undefined_pd (),
14012 (__mmask8) -1,
14013 _MM_FROUND_CUR_DIRECTION);
14016 extern __inline __m512d
14017 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14018 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
14020 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
14021 (__v8df) __B,
14022 (__v8df) __W,
14023 (__mmask8) __U,
14024 _MM_FROUND_CUR_DIRECTION);
14027 extern __inline __m512d
14028 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14029 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
14031 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
14032 (__v8df) __B,
14033 (__v8df)
14034 _mm512_setzero_pd (),
14035 (__mmask8) __U,
14036 _MM_FROUND_CUR_DIRECTION);
14039 extern __inline __m512
14040 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14041 _mm512_min_ps (__m512 __A, __m512 __B)
14043 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
14044 (__v16sf) __B,
14045 (__v16sf)
14046 _mm512_undefined_ps (),
14047 (__mmask16) -1,
14048 _MM_FROUND_CUR_DIRECTION);
14051 extern __inline __m512
14052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14053 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
14055 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
14056 (__v16sf) __B,
14057 (__v16sf) __W,
14058 (__mmask16) __U,
14059 _MM_FROUND_CUR_DIRECTION);
14062 extern __inline __m512
14063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14064 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
14066 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
14067 (__v16sf) __B,
14068 (__v16sf)
14069 _mm512_setzero_ps (),
14070 (__mmask16) __U,
14071 _MM_FROUND_CUR_DIRECTION);
14074 extern __inline __m512d
14075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14076 _mm512_scalef_pd (__m512d __A, __m512d __B)
14078 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
14079 (__v8df) __B,
14080 (__v8df)
14081 _mm512_undefined_pd (),
14082 (__mmask8) -1,
14083 _MM_FROUND_CUR_DIRECTION);
14086 extern __inline __m512d
14087 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14088 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
14090 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
14091 (__v8df) __B,
14092 (__v8df) __W,
14093 (__mmask8) __U,
14094 _MM_FROUND_CUR_DIRECTION);
14097 extern __inline __m512d
14098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14099 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
14101 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
14102 (__v8df) __B,
14103 (__v8df)
14104 _mm512_setzero_pd (),
14105 (__mmask8) __U,
14106 _MM_FROUND_CUR_DIRECTION);
14109 extern __inline __m512
14110 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14111 _mm512_scalef_ps (__m512 __A, __m512 __B)
14113 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
14114 (__v16sf) __B,
14115 (__v16sf)
14116 _mm512_undefined_ps (),
14117 (__mmask16) -1,
14118 _MM_FROUND_CUR_DIRECTION);
14121 extern __inline __m512
14122 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14123 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
14125 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
14126 (__v16sf) __B,
14127 (__v16sf) __W,
14128 (__mmask16) __U,
14129 _MM_FROUND_CUR_DIRECTION);
14132 extern __inline __m512
14133 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14134 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
14136 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
14137 (__v16sf) __B,
14138 (__v16sf)
14139 _mm512_setzero_ps (),
14140 (__mmask16) __U,
14141 _MM_FROUND_CUR_DIRECTION);
14144 extern __inline __m512d
14145 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14146 _mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C)
14148 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
14149 (__v8df) __B,
14150 (__v8df) __C,
14151 (__mmask8) -1,
14152 _MM_FROUND_CUR_DIRECTION);
14155 extern __inline __m512d
14156 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14157 _mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
14159 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
14160 (__v8df) __B,
14161 (__v8df) __C,
14162 (__mmask8) __U,
14163 _MM_FROUND_CUR_DIRECTION);
14166 extern __inline __m512d
14167 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14168 _mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
14170 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
14171 (__v8df) __B,
14172 (__v8df) __C,
14173 (__mmask8) __U,
14174 _MM_FROUND_CUR_DIRECTION);
14177 extern __inline __m512d
14178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14179 _mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
14181 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
14182 (__v8df) __B,
14183 (__v8df) __C,
14184 (__mmask8) __U,
14185 _MM_FROUND_CUR_DIRECTION);
14188 extern __inline __m512
14189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14190 _mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C)
14192 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
14193 (__v16sf) __B,
14194 (__v16sf) __C,
14195 (__mmask16) -1,
14196 _MM_FROUND_CUR_DIRECTION);
14199 extern __inline __m512
14200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14201 _mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
14203 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
14204 (__v16sf) __B,
14205 (__v16sf) __C,
14206 (__mmask16) __U,
14207 _MM_FROUND_CUR_DIRECTION);
14210 extern __inline __m512
14211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14212 _mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
14214 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
14215 (__v16sf) __B,
14216 (__v16sf) __C,
14217 (__mmask16) __U,
14218 _MM_FROUND_CUR_DIRECTION);
14221 extern __inline __m512
14222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14223 _mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
14225 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
14226 (__v16sf) __B,
14227 (__v16sf) __C,
14228 (__mmask16) __U,
14229 _MM_FROUND_CUR_DIRECTION);
14232 extern __inline __m512d
14233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14234 _mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C)
14236 return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
14237 (__v8df) __B,
14238 (__v8df) __C,
14239 (__mmask8) -1,
14240 _MM_FROUND_CUR_DIRECTION);
14243 extern __inline __m512d
14244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14245 _mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
14247 return (__m512d) __builtin_ia32_vfmsubpd512_mask ((__v8df) __A,
14248 (__v8df) __B,
14249 (__v8df) __C,
14250 (__mmask8) __U,
14251 _MM_FROUND_CUR_DIRECTION);
14254 extern __inline __m512d
14255 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14256 _mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
14258 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
14259 (__v8df) __B,
14260 (__v8df) __C,
14261 (__mmask8) __U,
14262 _MM_FROUND_CUR_DIRECTION);
14265 extern __inline __m512d
14266 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14267 _mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
14269 return (__m512d) __builtin_ia32_vfmsubpd512_maskz ((__v8df) __A,
14270 (__v8df) __B,
14271 (__v8df) __C,
14272 (__mmask8) __U,
14273 _MM_FROUND_CUR_DIRECTION);
14276 extern __inline __m512
14277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14278 _mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C)
14280 return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
14281 (__v16sf) __B,
14282 (__v16sf) __C,
14283 (__mmask16) -1,
14284 _MM_FROUND_CUR_DIRECTION);
14287 extern __inline __m512
14288 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14289 _mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
14291 return (__m512) __builtin_ia32_vfmsubps512_mask ((__v16sf) __A,
14292 (__v16sf) __B,
14293 (__v16sf) __C,
14294 (__mmask16) __U,
14295 _MM_FROUND_CUR_DIRECTION);
14298 extern __inline __m512
14299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14300 _mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
14302 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
14303 (__v16sf) __B,
14304 (__v16sf) __C,
14305 (__mmask16) __U,
14306 _MM_FROUND_CUR_DIRECTION);
14309 extern __inline __m512
14310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14311 _mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
14313 return (__m512) __builtin_ia32_vfmsubps512_maskz ((__v16sf) __A,
14314 (__v16sf) __B,
14315 (__v16sf) __C,
14316 (__mmask16) __U,
14317 _MM_FROUND_CUR_DIRECTION);
14320 extern __inline __m512d
14321 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14322 _mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C)
14324 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
14325 (__v8df) __B,
14326 (__v8df) __C,
14327 (__mmask8) -1,
14328 _MM_FROUND_CUR_DIRECTION);
14331 extern __inline __m512d
14332 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14333 _mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
14335 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
14336 (__v8df) __B,
14337 (__v8df) __C,
14338 (__mmask8) __U,
14339 _MM_FROUND_CUR_DIRECTION);
14342 extern __inline __m512d
14343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14344 _mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
14346 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
14347 (__v8df) __B,
14348 (__v8df) __C,
14349 (__mmask8) __U,
14350 _MM_FROUND_CUR_DIRECTION);
14353 extern __inline __m512d
14354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14355 _mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
14357 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
14358 (__v8df) __B,
14359 (__v8df) __C,
14360 (__mmask8) __U,
14361 _MM_FROUND_CUR_DIRECTION);
14364 extern __inline __m512
14365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14366 _mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C)
14368 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
14369 (__v16sf) __B,
14370 (__v16sf) __C,
14371 (__mmask16) -1,
14372 _MM_FROUND_CUR_DIRECTION);
14375 extern __inline __m512
14376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14377 _mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
14379 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
14380 (__v16sf) __B,
14381 (__v16sf) __C,
14382 (__mmask16) __U,
14383 _MM_FROUND_CUR_DIRECTION);
14386 extern __inline __m512
14387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14388 _mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
14390 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
14391 (__v16sf) __B,
14392 (__v16sf) __C,
14393 (__mmask16) __U,
14394 _MM_FROUND_CUR_DIRECTION);
14397 extern __inline __m512
14398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14399 _mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
14401 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
14402 (__v16sf) __B,
14403 (__v16sf) __C,
14404 (__mmask16) __U,
14405 _MM_FROUND_CUR_DIRECTION);
14408 extern __inline __m512d
14409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14410 _mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C)
14412 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
14413 (__v8df) __B,
14414 -(__v8df) __C,
14415 (__mmask8) -1,
14416 _MM_FROUND_CUR_DIRECTION);
14419 extern __inline __m512d
14420 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14421 _mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
14423 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
14424 (__v8df) __B,
14425 -(__v8df) __C,
14426 (__mmask8) __U,
14427 _MM_FROUND_CUR_DIRECTION);
14430 extern __inline __m512d
14431 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14432 _mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
14434 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
14435 (__v8df) __B,
14436 (__v8df) __C,
14437 (__mmask8) __U,
14438 _MM_FROUND_CUR_DIRECTION);
14441 extern __inline __m512d
14442 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14443 _mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
14445 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
14446 (__v8df) __B,
14447 -(__v8df) __C,
14448 (__mmask8) __U,
14449 _MM_FROUND_CUR_DIRECTION);
14452 extern __inline __m512
14453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14454 _mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C)
14456 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
14457 (__v16sf) __B,
14458 -(__v16sf) __C,
14459 (__mmask16) -1,
14460 _MM_FROUND_CUR_DIRECTION);
14463 extern __inline __m512
14464 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14465 _mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
14467 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
14468 (__v16sf) __B,
14469 -(__v16sf) __C,
14470 (__mmask16) __U,
14471 _MM_FROUND_CUR_DIRECTION);
14474 extern __inline __m512
14475 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14476 _mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
14478 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
14479 (__v16sf) __B,
14480 (__v16sf) __C,
14481 (__mmask16) __U,
14482 _MM_FROUND_CUR_DIRECTION);
14485 extern __inline __m512
14486 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14487 _mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
14489 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
14490 (__v16sf) __B,
14491 -(__v16sf) __C,
14492 (__mmask16) __U,
14493 _MM_FROUND_CUR_DIRECTION);
14496 extern __inline __m512d
14497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14498 _mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C)
14500 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
14501 (__v8df) __B,
14502 (__v8df) __C,
14503 (__mmask8) -1,
14504 _MM_FROUND_CUR_DIRECTION);
14507 extern __inline __m512d
14508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14509 _mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
14511 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A,
14512 (__v8df) __B,
14513 (__v8df) __C,
14514 (__mmask8) __U,
14515 _MM_FROUND_CUR_DIRECTION);
14518 extern __inline __m512d
14519 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14520 _mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
14522 return (__m512d) __builtin_ia32_vfnmaddpd512_mask3 ((__v8df) __A,
14523 (__v8df) __B,
14524 (__v8df) __C,
14525 (__mmask8) __U,
14526 _MM_FROUND_CUR_DIRECTION);
14529 extern __inline __m512d
14530 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14531 _mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
14533 return (__m512d) __builtin_ia32_vfnmaddpd512_maskz ((__v8df) __A,
14534 (__v8df) __B,
14535 (__v8df) __C,
14536 (__mmask8) __U,
14537 _MM_FROUND_CUR_DIRECTION);
14540 extern __inline __m512
14541 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14542 _mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C)
14544 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
14545 (__v16sf) __B,
14546 (__v16sf) __C,
14547 (__mmask16) -1,
14548 _MM_FROUND_CUR_DIRECTION);
14551 extern __inline __m512
14552 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14553 _mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
14555 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A,
14556 (__v16sf) __B,
14557 (__v16sf) __C,
14558 (__mmask16) __U,
14559 _MM_FROUND_CUR_DIRECTION);
14562 extern __inline __m512
14563 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14564 _mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
14566 return (__m512) __builtin_ia32_vfnmaddps512_mask3 ((__v16sf) __A,
14567 (__v16sf) __B,
14568 (__v16sf) __C,
14569 (__mmask16) __U,
14570 _MM_FROUND_CUR_DIRECTION);
14573 extern __inline __m512
14574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14575 _mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
14577 return (__m512) __builtin_ia32_vfnmaddps512_maskz ((__v16sf) __A,
14578 (__v16sf) __B,
14579 (__v16sf) __C,
14580 (__mmask16) __U,
14581 _MM_FROUND_CUR_DIRECTION);
14584 extern __inline __m512d
14585 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14586 _mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C)
14588 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
14589 (__v8df) __B,
14590 (__v8df) __C,
14591 (__mmask8) -1,
14592 _MM_FROUND_CUR_DIRECTION);
14595 extern __inline __m512d
14596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14597 _mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
14599 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A,
14600 (__v8df) __B,
14601 (__v8df) __C,
14602 (__mmask8) __U,
14603 _MM_FROUND_CUR_DIRECTION);
14606 extern __inline __m512d
14607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14608 _mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
14610 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A,
14611 (__v8df) __B,
14612 (__v8df) __C,
14613 (__mmask8) __U,
14614 _MM_FROUND_CUR_DIRECTION);
14617 extern __inline __m512d
14618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14619 _mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
14621 return (__m512d) __builtin_ia32_vfnmsubpd512_maskz ((__v8df) __A,
14622 (__v8df) __B,
14623 (__v8df) __C,
14624 (__mmask8) __U,
14625 _MM_FROUND_CUR_DIRECTION);
14628 extern __inline __m512
14629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14630 _mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C)
14632 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
14633 (__v16sf) __B,
14634 (__v16sf) __C,
14635 (__mmask16) -1,
14636 _MM_FROUND_CUR_DIRECTION);
14639 extern __inline __m512
14640 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14641 _mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
14643 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A,
14644 (__v16sf) __B,
14645 (__v16sf) __C,
14646 (__mmask16) __U,
14647 _MM_FROUND_CUR_DIRECTION);
14650 extern __inline __m512
14651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14652 _mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
14654 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A,
14655 (__v16sf) __B,
14656 (__v16sf) __C,
14657 (__mmask16) __U,
14658 _MM_FROUND_CUR_DIRECTION);
14661 extern __inline __m512
14662 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14663 _mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
14665 return (__m512) __builtin_ia32_vfnmsubps512_maskz ((__v16sf) __A,
14666 (__v16sf) __B,
14667 (__v16sf) __C,
14668 (__mmask16) __U,
14669 _MM_FROUND_CUR_DIRECTION);
14672 extern __inline __m256i
14673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14674 _mm512_cvttpd_epi32 (__m512d __A)
14676 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
14677 (__v8si)
14678 _mm256_undefined_si256 (),
14679 (__mmask8) -1,
14680 _MM_FROUND_CUR_DIRECTION);
14683 extern __inline __m256i
14684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14685 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
14687 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
14688 (__v8si) __W,
14689 (__mmask8) __U,
14690 _MM_FROUND_CUR_DIRECTION);
14693 extern __inline __m256i
14694 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14695 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
14697 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
14698 (__v8si)
14699 _mm256_setzero_si256 (),
14700 (__mmask8) __U,
14701 _MM_FROUND_CUR_DIRECTION);
14704 extern __inline __m256i
14705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14706 _mm512_cvttpd_epu32 (__m512d __A)
14708 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
14709 (__v8si)
14710 _mm256_undefined_si256 (),
14711 (__mmask8) -1,
14712 _MM_FROUND_CUR_DIRECTION);
14715 extern __inline __m256i
14716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14717 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
14719 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
14720 (__v8si) __W,
14721 (__mmask8) __U,
14722 _MM_FROUND_CUR_DIRECTION);
14725 extern __inline __m256i
14726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14727 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
14729 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
14730 (__v8si)
14731 _mm256_setzero_si256 (),
14732 (__mmask8) __U,
14733 _MM_FROUND_CUR_DIRECTION);
14736 extern __inline __m256i
14737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14738 _mm512_cvtpd_epi32 (__m512d __A)
14740 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
14741 (__v8si)
14742 _mm256_undefined_si256 (),
14743 (__mmask8) -1,
14744 _MM_FROUND_CUR_DIRECTION);
14747 extern __inline __m256i
14748 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14749 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
14751 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
14752 (__v8si) __W,
14753 (__mmask8) __U,
14754 _MM_FROUND_CUR_DIRECTION);
14757 extern __inline __m256i
14758 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14759 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
14761 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
14762 (__v8si)
14763 _mm256_setzero_si256 (),
14764 (__mmask8) __U,
14765 _MM_FROUND_CUR_DIRECTION);
14768 extern __inline __m256i
14769 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14770 _mm512_cvtpd_epu32 (__m512d __A)
14772 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
14773 (__v8si)
14774 _mm256_undefined_si256 (),
14775 (__mmask8) -1,
14776 _MM_FROUND_CUR_DIRECTION);
14779 extern __inline __m256i
14780 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14781 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
14783 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
14784 (__v8si) __W,
14785 (__mmask8) __U,
14786 _MM_FROUND_CUR_DIRECTION);
14789 extern __inline __m256i
14790 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14791 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
14793 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
14794 (__v8si)
14795 _mm256_setzero_si256 (),
14796 (__mmask8) __U,
14797 _MM_FROUND_CUR_DIRECTION);
14800 extern __inline __m512i
14801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14802 _mm512_cvttps_epi32 (__m512 __A)
14804 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
14805 (__v16si)
14806 _mm512_undefined_epi32 (),
14807 (__mmask16) -1,
14808 _MM_FROUND_CUR_DIRECTION);
14811 extern __inline __m512i
14812 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14813 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
14815 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
14816 (__v16si) __W,
14817 (__mmask16) __U,
14818 _MM_FROUND_CUR_DIRECTION);
14821 extern __inline __m512i
14822 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14823 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
14825 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
14826 (__v16si)
14827 _mm512_setzero_si512 (),
14828 (__mmask16) __U,
14829 _MM_FROUND_CUR_DIRECTION);
14832 extern __inline __m512i
14833 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14834 _mm512_cvttps_epu32 (__m512 __A)
14836 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
14837 (__v16si)
14838 _mm512_undefined_epi32 (),
14839 (__mmask16) -1,
14840 _MM_FROUND_CUR_DIRECTION);
14843 extern __inline __m512i
14844 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14845 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
14847 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
14848 (__v16si) __W,
14849 (__mmask16) __U,
14850 _MM_FROUND_CUR_DIRECTION);
14853 extern __inline __m512i
14854 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14855 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
14857 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
14858 (__v16si)
14859 _mm512_setzero_si512 (),
14860 (__mmask16) __U,
14861 _MM_FROUND_CUR_DIRECTION);
14864 extern __inline __m512i
14865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14866 _mm512_cvtps_epi32 (__m512 __A)
14868 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
14869 (__v16si)
14870 _mm512_undefined_epi32 (),
14871 (__mmask16) -1,
14872 _MM_FROUND_CUR_DIRECTION);
14875 extern __inline __m512i
14876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14877 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
14879 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
14880 (__v16si) __W,
14881 (__mmask16) __U,
14882 _MM_FROUND_CUR_DIRECTION);
14885 extern __inline __m512i
14886 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14887 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
14889 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
14890 (__v16si)
14891 _mm512_setzero_si512 (),
14892 (__mmask16) __U,
14893 _MM_FROUND_CUR_DIRECTION);
14896 extern __inline __m512i
14897 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14898 _mm512_cvtps_epu32 (__m512 __A)
14900 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
14901 (__v16si)
14902 _mm512_undefined_epi32 (),
14903 (__mmask16) -1,
14904 _MM_FROUND_CUR_DIRECTION);
14907 extern __inline __m512i
14908 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14909 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
14911 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
14912 (__v16si) __W,
14913 (__mmask16) __U,
14914 _MM_FROUND_CUR_DIRECTION);
14917 extern __inline __m512i
14918 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14919 _mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A)
14921 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
14922 (__v16si)
14923 _mm512_setzero_si512 (),
14924 (__mmask16) __U,
14925 _MM_FROUND_CUR_DIRECTION);
14928 extern __inline double
14929 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14930 _mm512_cvtsd_f64 (__m512d __A)
14932 return __A[0];
14935 extern __inline float
14936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14937 _mm512_cvtss_f32 (__m512 __A)
14939 return __A[0];
14942 extern __inline __m512
14943 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14944 _mm512_cvtepi32_ps (__m512i __A)
14946 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
14947 (__v16sf)
14948 _mm512_undefined_ps (),
14949 (__mmask16) -1,
14950 _MM_FROUND_CUR_DIRECTION);
14953 extern __inline __m512
14954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14955 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
14957 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
14958 (__v16sf) __W,
14959 (__mmask16) __U,
14960 _MM_FROUND_CUR_DIRECTION);
14963 extern __inline __m512
14964 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14965 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
14967 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A,
14968 (__v16sf)
14969 _mm512_setzero_ps (),
14970 (__mmask16) __U,
14971 _MM_FROUND_CUR_DIRECTION);
14974 extern __inline __m512
14975 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14976 _mm512_cvtepu32_ps (__m512i __A)
14978 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
14979 (__v16sf)
14980 _mm512_undefined_ps (),
14981 (__mmask16) -1,
14982 _MM_FROUND_CUR_DIRECTION);
14985 extern __inline __m512
14986 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14987 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
14989 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
14990 (__v16sf) __W,
14991 (__mmask16) __U,
14992 _MM_FROUND_CUR_DIRECTION);
14995 extern __inline __m512
14996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
14997 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
14999 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A,
15000 (__v16sf)
15001 _mm512_setzero_ps (),
15002 (__mmask16) __U,
15003 _MM_FROUND_CUR_DIRECTION);
15006 #ifdef __OPTIMIZE__
15007 extern __inline __m512d
15008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15009 _mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm)
15011 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
15012 (__v8df) __B,
15013 (__v8di) __C,
15014 __imm,
15015 (__mmask8) -1,
15016 _MM_FROUND_CUR_DIRECTION);
15019 extern __inline __m512d
15020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15021 _mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
15022 __m512i __C, const int __imm)
15024 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A,
15025 (__v8df) __B,
15026 (__v8di) __C,
15027 __imm,
15028 (__mmask8) __U,
15029 _MM_FROUND_CUR_DIRECTION);
15032 extern __inline __m512d
15033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15034 _mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B,
15035 __m512i __C, const int __imm)
15037 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A,
15038 (__v8df) __B,
15039 (__v8di) __C,
15040 __imm,
15041 (__mmask8) __U,
15042 _MM_FROUND_CUR_DIRECTION);
15045 extern __inline __m512
15046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15047 _mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm)
15049 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
15050 (__v16sf) __B,
15051 (__v16si) __C,
15052 __imm,
15053 (__mmask16) -1,
15054 _MM_FROUND_CUR_DIRECTION);
15057 extern __inline __m512
15058 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15059 _mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
15060 __m512i __C, const int __imm)
15062 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A,
15063 (__v16sf) __B,
15064 (__v16si) __C,
15065 __imm,
15066 (__mmask16) __U,
15067 _MM_FROUND_CUR_DIRECTION);
15070 extern __inline __m512
15071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15072 _mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B,
15073 __m512i __C, const int __imm)
15075 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A,
15076 (__v16sf) __B,
15077 (__v16si) __C,
15078 __imm,
15079 (__mmask16) __U,
15080 _MM_FROUND_CUR_DIRECTION);
15083 #else
15084 #define _mm512_fixupimm_pd(X, Y, Z, C) \
15085 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
15086 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
15087 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
15089 #define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \
15090 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \
15091 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
15092 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
15094 #define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \
15095 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \
15096 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \
15097 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
15099 #define _mm512_fixupimm_ps(X, Y, Z, C) \
15100 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
15101 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
15102 (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
15104 #define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \
15105 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \
15106 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
15107 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
15109 #define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \
15110 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \
15111 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \
15112 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
15114 #endif
15116 extern __inline int
15117 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15118 _mm512_cvtsi512_si32 (__m512i __A)
15120 __v16si __B = (__v16si) __A;
15121 return __B[0];
15124 extern __inline __m512d
15125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15126 _mm512_cvtps_pd (__m256 __A)
15128 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
15129 (__v8df)
15130 _mm512_undefined_pd (),
15131 (__mmask8) -1,
15132 _MM_FROUND_CUR_DIRECTION);
15135 extern __inline __m512d
15136 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15137 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
15139 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
15140 (__v8df) __W,
15141 (__mmask8) __U,
15142 _MM_FROUND_CUR_DIRECTION);
15145 extern __inline __m512d
15146 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15147 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
15149 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A,
15150 (__v8df)
15151 _mm512_setzero_pd (),
15152 (__mmask8) __U,
15153 _MM_FROUND_CUR_DIRECTION);
15156 extern __inline __m512
15157 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15158 _mm512_cvtph_ps (__m256i __A)
15160 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
15161 (__v16sf)
15162 _mm512_undefined_ps (),
15163 (__mmask16) -1,
15164 _MM_FROUND_CUR_DIRECTION);
15167 extern __inline __m512
15168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15169 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
15171 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
15172 (__v16sf) __W,
15173 (__mmask16) __U,
15174 _MM_FROUND_CUR_DIRECTION);
15177 extern __inline __m512
15178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15179 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
15181 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
15182 (__v16sf)
15183 _mm512_setzero_ps (),
15184 (__mmask16) __U,
15185 _MM_FROUND_CUR_DIRECTION);
15188 extern __inline __m256
15189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15190 _mm512_cvtpd_ps (__m512d __A)
15192 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
15193 (__v8sf)
15194 _mm256_undefined_ps (),
15195 (__mmask8) -1,
15196 _MM_FROUND_CUR_DIRECTION);
15199 extern __inline __m256
15200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15201 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
15203 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
15204 (__v8sf) __W,
15205 (__mmask8) __U,
15206 _MM_FROUND_CUR_DIRECTION);
15209 extern __inline __m256
15210 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15211 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
15213 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
15214 (__v8sf)
15215 _mm256_setzero_ps (),
15216 (__mmask8) __U,
15217 _MM_FROUND_CUR_DIRECTION);
15220 #ifdef __OPTIMIZE__
15221 extern __inline __m512
15222 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15223 _mm512_getexp_ps (__m512 __A)
15225 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
15226 (__v16sf)
15227 _mm512_undefined_ps (),
15228 (__mmask16) -1,
15229 _MM_FROUND_CUR_DIRECTION);
15232 extern __inline __m512
15233 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15234 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
15236 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
15237 (__v16sf) __W,
15238 (__mmask16) __U,
15239 _MM_FROUND_CUR_DIRECTION);
15242 extern __inline __m512
15243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15244 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
15246 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
15247 (__v16sf)
15248 _mm512_setzero_ps (),
15249 (__mmask16) __U,
15250 _MM_FROUND_CUR_DIRECTION);
15253 extern __inline __m512d
15254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15255 _mm512_getexp_pd (__m512d __A)
15257 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
15258 (__v8df)
15259 _mm512_undefined_pd (),
15260 (__mmask8) -1,
15261 _MM_FROUND_CUR_DIRECTION);
15264 extern __inline __m512d
15265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15266 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
15268 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
15269 (__v8df) __W,
15270 (__mmask8) __U,
15271 _MM_FROUND_CUR_DIRECTION);
15274 extern __inline __m512d
15275 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15276 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
15278 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
15279 (__v8df)
15280 _mm512_setzero_pd (),
15281 (__mmask8) __U,
15282 _MM_FROUND_CUR_DIRECTION);
15285 extern __inline __m512d
15286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15287 _mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B,
15288 _MM_MANTISSA_SIGN_ENUM __C)
15290 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
15291 (__C << 2) | __B,
15292 _mm512_undefined_pd (),
15293 (__mmask8) -1,
15294 _MM_FROUND_CUR_DIRECTION);
15297 extern __inline __m512d
15298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15299 _mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A,
15300 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
15302 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
15303 (__C << 2) | __B,
15304 (__v8df) __W, __U,
15305 _MM_FROUND_CUR_DIRECTION);
15308 extern __inline __m512d
15309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15310 _mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A,
15311 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
15313 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A,
15314 (__C << 2) | __B,
15315 (__v8df)
15316 _mm512_setzero_pd (),
15317 __U,
15318 _MM_FROUND_CUR_DIRECTION);
15321 extern __inline __m512
15322 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15323 _mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B,
15324 _MM_MANTISSA_SIGN_ENUM __C)
15326 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
15327 (__C << 2) | __B,
15328 _mm512_undefined_ps (),
15329 (__mmask16) -1,
15330 _MM_FROUND_CUR_DIRECTION);
15333 extern __inline __m512
15334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15335 _mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A,
15336 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
15338 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
15339 (__C << 2) | __B,
15340 (__v16sf) __W, __U,
15341 _MM_FROUND_CUR_DIRECTION);
15344 extern __inline __m512
15345 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15346 _mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A,
15347 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C)
15349 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A,
15350 (__C << 2) | __B,
15351 (__v16sf)
15352 _mm512_setzero_ps (),
15353 __U,
15354 _MM_FROUND_CUR_DIRECTION);
15357 #else
15358 #define _mm512_getmant_pd(X, B, C) \
15359 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
15360 (int)(((C)<<2) | (B)), \
15361 (__v8df)_mm512_undefined_pd(), \
15362 (__mmask8)-1,\
15363 _MM_FROUND_CUR_DIRECTION))
15365 #define _mm512_mask_getmant_pd(W, U, X, B, C) \
15366 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
15367 (int)(((C)<<2) | (B)), \
15368 (__v8df)(__m512d)(W), \
15369 (__mmask8)(U),\
15370 _MM_FROUND_CUR_DIRECTION))
15372 #define _mm512_maskz_getmant_pd(U, X, B, C) \
15373 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \
15374 (int)(((C)<<2) | (B)), \
15375 (__v8df)_mm512_setzero_pd(), \
15376 (__mmask8)(U),\
15377 _MM_FROUND_CUR_DIRECTION))
15378 #define _mm512_getmant_ps(X, B, C) \
15379 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
15380 (int)(((C)<<2) | (B)), \
15381 (__v16sf)_mm512_undefined_ps(), \
15382 (__mmask16)-1,\
15383 _MM_FROUND_CUR_DIRECTION))
15385 #define _mm512_mask_getmant_ps(W, U, X, B, C) \
15386 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
15387 (int)(((C)<<2) | (B)), \
15388 (__v16sf)(__m512)(W), \
15389 (__mmask16)(U),\
15390 _MM_FROUND_CUR_DIRECTION))
15392 #define _mm512_maskz_getmant_ps(U, X, B, C) \
15393 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \
15394 (int)(((C)<<2) | (B)), \
15395 (__v16sf)_mm512_setzero_ps(), \
15396 (__mmask16)(U),\
15397 _MM_FROUND_CUR_DIRECTION))
15398 #define _mm512_getexp_ps(A) \
15399 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
15400 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION))
15402 #define _mm512_mask_getexp_ps(W, U, A) \
15403 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
15404 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
15406 #define _mm512_maskz_getexp_ps(U, A) \
15407 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
15408 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
15410 #define _mm512_getexp_pd(A) \
15411 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
15412 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
15414 #define _mm512_mask_getexp_pd(W, U, A) \
15415 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
15416 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
15418 #define _mm512_maskz_getexp_pd(U, A) \
15419 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
15420 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
15421 #endif
15423 #ifdef __OPTIMIZE__
15424 extern __inline __m512
15425 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15426 _mm512_roundscale_ps (__m512 __A, const int __imm)
15428 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm,
15429 (__v16sf)
15430 _mm512_undefined_ps (),
15432 _MM_FROUND_CUR_DIRECTION);
15435 extern __inline __m512
15436 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15437 _mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C,
15438 const int __imm)
15440 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm,
15441 (__v16sf) __A,
15442 (__mmask16) __B,
15443 _MM_FROUND_CUR_DIRECTION);
15446 extern __inline __m512
15447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15448 _mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm)
15450 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B,
15451 __imm,
15452 (__v16sf)
15453 _mm512_setzero_ps (),
15454 (__mmask16) __A,
15455 _MM_FROUND_CUR_DIRECTION);
15458 extern __inline __m512d
15459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15460 _mm512_roundscale_pd (__m512d __A, const int __imm)
15462 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm,
15463 (__v8df)
15464 _mm512_undefined_pd (),
15466 _MM_FROUND_CUR_DIRECTION);
15469 extern __inline __m512d
15470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15471 _mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C,
15472 const int __imm)
15474 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm,
15475 (__v8df) __A,
15476 (__mmask8) __B,
15477 _MM_FROUND_CUR_DIRECTION);
15480 extern __inline __m512d
15481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15482 _mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm)
15484 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B,
15485 __imm,
15486 (__v8df)
15487 _mm512_setzero_pd (),
15488 (__mmask8) __A,
15489 _MM_FROUND_CUR_DIRECTION);
15492 #else
15493 #define _mm512_roundscale_ps(A, B) \
15494 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\
15495 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION))
15496 #define _mm512_mask_roundscale_ps(A, B, C, D) \
15497 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \
15498 (int)(D), \
15499 (__v16sf)(__m512)(A), \
15500 (__mmask16)(B), _MM_FROUND_CUR_DIRECTION))
15501 #define _mm512_maskz_roundscale_ps(A, B, C) \
15502 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \
15503 (int)(C), \
15504 (__v16sf)_mm512_setzero_ps(),\
15505 (__mmask16)(A), _MM_FROUND_CUR_DIRECTION))
15506 #define _mm512_roundscale_pd(A, B) \
15507 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\
15508 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION))
15509 #define _mm512_mask_roundscale_pd(A, B, C, D) \
15510 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \
15511 (int)(D), \
15512 (__v8df)(__m512d)(A), \
15513 (__mmask8)(B), _MM_FROUND_CUR_DIRECTION))
15514 #define _mm512_maskz_roundscale_pd(A, B, C) \
15515 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \
15516 (int)(C), \
15517 (__v8df)_mm512_setzero_pd(),\
15518 (__mmask8)(A), _MM_FROUND_CUR_DIRECTION))
15519 #endif
15521 #ifdef __OPTIMIZE__
15522 extern __inline __mmask8
15523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15524 _mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P)
15526 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15527 (__v8df) __Y, __P,
15528 (__mmask8) -1,
15529 _MM_FROUND_CUR_DIRECTION);
15532 extern __inline __mmask16
15533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15534 _mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P)
15536 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15537 (__v16sf) __Y, __P,
15538 (__mmask16) -1,
15539 _MM_FROUND_CUR_DIRECTION);
15542 extern __inline __mmask16
15543 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15544 _mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P)
15546 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15547 (__v16sf) __Y, __P,
15548 (__mmask16) __U,
15549 _MM_FROUND_CUR_DIRECTION);
15552 extern __inline __mmask8
15553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15554 _mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P)
15556 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15557 (__v8df) __Y, __P,
15558 (__mmask8) __U,
15559 _MM_FROUND_CUR_DIRECTION);
15562 #else
15563 #define _mm512_cmp_pd_mask(X, Y, P) \
15564 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
15565 (__v8df)(__m512d)(Y), (int)(P),\
15566 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION))
15568 #define _mm512_cmp_ps_mask(X, Y, P) \
15569 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
15570 (__v16sf)(__m512)(Y), (int)(P),\
15571 (__mmask16)-1,_MM_FROUND_CUR_DIRECTION))
15573 #define _mm512_mask_cmp_pd_mask(M, X, Y, P) \
15574 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \
15575 (__v8df)(__m512d)(Y), (int)(P),\
15576 (__mmask8)(M), _MM_FROUND_CUR_DIRECTION))
15578 #define _mm512_mask_cmp_ps_mask(M, X, Y, P) \
15579 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \
15580 (__v16sf)(__m512)(Y), (int)(P),\
15581 (__mmask16)(M),_MM_FROUND_CUR_DIRECTION))
15583 #endif
15585 extern __inline __mmask8
15586 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15587 _mm512_cmpeq_pd_mask (__m512d __X, __m512d __Y)
15589 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15590 (__v8df) __Y, _CMP_EQ_OQ,
15591 (__mmask8) -1,
15592 _MM_FROUND_CUR_DIRECTION);
15595 extern __inline __mmask8
15596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15597 _mm512_mask_cmpeq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15599 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15600 (__v8df) __Y, _CMP_EQ_OQ,
15601 (__mmask8) __U,
15602 _MM_FROUND_CUR_DIRECTION);
15605 extern __inline __mmask8
15606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15607 _mm512_cmplt_pd_mask (__m512d __X, __m512d __Y)
15609 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15610 (__v8df) __Y, _CMP_LT_OS,
15611 (__mmask8) -1,
15612 _MM_FROUND_CUR_DIRECTION);
15615 extern __inline __mmask8
15616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15617 _mm512_mask_cmplt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15619 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15620 (__v8df) __Y, _CMP_LT_OS,
15621 (__mmask8) __U,
15622 _MM_FROUND_CUR_DIRECTION);
15625 extern __inline __mmask8
15626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15627 _mm512_cmple_pd_mask (__m512d __X, __m512d __Y)
15629 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15630 (__v8df) __Y, _CMP_LE_OS,
15631 (__mmask8) -1,
15632 _MM_FROUND_CUR_DIRECTION);
15635 extern __inline __mmask8
15636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15637 _mm512_mask_cmple_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15639 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15640 (__v8df) __Y, _CMP_LE_OS,
15641 (__mmask8) __U,
15642 _MM_FROUND_CUR_DIRECTION);
15645 extern __inline __mmask8
15646 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15647 _mm512_cmpunord_pd_mask (__m512d __X, __m512d __Y)
15649 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15650 (__v8df) __Y, _CMP_UNORD_Q,
15651 (__mmask8) -1,
15652 _MM_FROUND_CUR_DIRECTION);
15655 extern __inline __mmask8
15656 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15657 _mm512_mask_cmpunord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15659 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15660 (__v8df) __Y, _CMP_UNORD_Q,
15661 (__mmask8) __U,
15662 _MM_FROUND_CUR_DIRECTION);
15665 extern __inline __mmask8
15666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15667 _mm512_cmpneq_pd_mask (__m512d __X, __m512d __Y)
15669 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15670 (__v8df) __Y, _CMP_NEQ_UQ,
15671 (__mmask8) -1,
15672 _MM_FROUND_CUR_DIRECTION);
15675 extern __inline __mmask8
15676 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15677 _mm512_mask_cmpneq_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15679 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15680 (__v8df) __Y, _CMP_NEQ_UQ,
15681 (__mmask8) __U,
15682 _MM_FROUND_CUR_DIRECTION);
15685 extern __inline __mmask8
15686 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15687 _mm512_cmpnlt_pd_mask (__m512d __X, __m512d __Y)
15689 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15690 (__v8df) __Y, _CMP_NLT_US,
15691 (__mmask8) -1,
15692 _MM_FROUND_CUR_DIRECTION);
15695 extern __inline __mmask8
15696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15697 _mm512_mask_cmpnlt_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15699 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15700 (__v8df) __Y, _CMP_NLT_US,
15701 (__mmask8) __U,
15702 _MM_FROUND_CUR_DIRECTION);
15705 extern __inline __mmask8
15706 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15707 _mm512_cmpnle_pd_mask (__m512d __X, __m512d __Y)
15709 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15710 (__v8df) __Y, _CMP_NLE_US,
15711 (__mmask8) -1,
15712 _MM_FROUND_CUR_DIRECTION);
15715 extern __inline __mmask8
15716 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15717 _mm512_mask_cmpnle_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15719 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15720 (__v8df) __Y, _CMP_NLE_US,
15721 (__mmask8) __U,
15722 _MM_FROUND_CUR_DIRECTION);
15725 extern __inline __mmask8
15726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15727 _mm512_cmpord_pd_mask (__m512d __X, __m512d __Y)
15729 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15730 (__v8df) __Y, _CMP_ORD_Q,
15731 (__mmask8) -1,
15732 _MM_FROUND_CUR_DIRECTION);
15735 extern __inline __mmask8
15736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15737 _mm512_mask_cmpord_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y)
15739 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X,
15740 (__v8df) __Y, _CMP_ORD_Q,
15741 (__mmask8) __U,
15742 _MM_FROUND_CUR_DIRECTION);
15745 extern __inline __mmask16
15746 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15747 _mm512_cmpeq_ps_mask (__m512 __X, __m512 __Y)
15749 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15750 (__v16sf) __Y, _CMP_EQ_OQ,
15751 (__mmask16) -1,
15752 _MM_FROUND_CUR_DIRECTION);
15755 extern __inline __mmask16
15756 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15757 _mm512_mask_cmpeq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15759 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15760 (__v16sf) __Y, _CMP_EQ_OQ,
15761 (__mmask16) __U,
15762 _MM_FROUND_CUR_DIRECTION);
15765 extern __inline __mmask16
15766 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15767 _mm512_cmplt_ps_mask (__m512 __X, __m512 __Y)
15769 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15770 (__v16sf) __Y, _CMP_LT_OS,
15771 (__mmask16) -1,
15772 _MM_FROUND_CUR_DIRECTION);
15775 extern __inline __mmask16
15776 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15777 _mm512_mask_cmplt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15779 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15780 (__v16sf) __Y, _CMP_LT_OS,
15781 (__mmask16) __U,
15782 _MM_FROUND_CUR_DIRECTION);
15785 extern __inline __mmask16
15786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15787 _mm512_cmple_ps_mask (__m512 __X, __m512 __Y)
15789 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15790 (__v16sf) __Y, _CMP_LE_OS,
15791 (__mmask16) -1,
15792 _MM_FROUND_CUR_DIRECTION);
15795 extern __inline __mmask16
15796 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15797 _mm512_mask_cmple_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15799 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15800 (__v16sf) __Y, _CMP_LE_OS,
15801 (__mmask16) __U,
15802 _MM_FROUND_CUR_DIRECTION);
15805 extern __inline __mmask16
15806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15807 _mm512_cmpunord_ps_mask (__m512 __X, __m512 __Y)
15809 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15810 (__v16sf) __Y, _CMP_UNORD_Q,
15811 (__mmask16) -1,
15812 _MM_FROUND_CUR_DIRECTION);
15815 extern __inline __mmask16
15816 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15817 _mm512_mask_cmpunord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15819 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15820 (__v16sf) __Y, _CMP_UNORD_Q,
15821 (__mmask16) __U,
15822 _MM_FROUND_CUR_DIRECTION);
15825 extern __inline __mmask16
15826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15827 _mm512_cmpneq_ps_mask (__m512 __X, __m512 __Y)
15829 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15830 (__v16sf) __Y, _CMP_NEQ_UQ,
15831 (__mmask16) -1,
15832 _MM_FROUND_CUR_DIRECTION);
15835 extern __inline __mmask16
15836 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15837 _mm512_mask_cmpneq_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15839 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15840 (__v16sf) __Y, _CMP_NEQ_UQ,
15841 (__mmask16) __U,
15842 _MM_FROUND_CUR_DIRECTION);
15845 extern __inline __mmask16
15846 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15847 _mm512_cmpnlt_ps_mask (__m512 __X, __m512 __Y)
15849 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15850 (__v16sf) __Y, _CMP_NLT_US,
15851 (__mmask16) -1,
15852 _MM_FROUND_CUR_DIRECTION);
15855 extern __inline __mmask16
15856 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15857 _mm512_mask_cmpnlt_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15859 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15860 (__v16sf) __Y, _CMP_NLT_US,
15861 (__mmask16) __U,
15862 _MM_FROUND_CUR_DIRECTION);
15865 extern __inline __mmask16
15866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15867 _mm512_cmpnle_ps_mask (__m512 __X, __m512 __Y)
15869 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15870 (__v16sf) __Y, _CMP_NLE_US,
15871 (__mmask16) -1,
15872 _MM_FROUND_CUR_DIRECTION);
15875 extern __inline __mmask16
15876 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15877 _mm512_mask_cmpnle_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15879 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15880 (__v16sf) __Y, _CMP_NLE_US,
15881 (__mmask16) __U,
15882 _MM_FROUND_CUR_DIRECTION);
15885 extern __inline __mmask16
15886 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15887 _mm512_cmpord_ps_mask (__m512 __X, __m512 __Y)
15889 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15890 (__v16sf) __Y, _CMP_ORD_Q,
15891 (__mmask16) -1,
15892 _MM_FROUND_CUR_DIRECTION);
15895 extern __inline __mmask16
15896 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15897 _mm512_mask_cmpord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y)
15899 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X,
15900 (__v16sf) __Y, _CMP_ORD_Q,
15901 (__mmask16) __U,
15902 _MM_FROUND_CUR_DIRECTION);
15905 extern __inline __mmask16
15906 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15907 _mm512_kmov (__mmask16 __A)
15909 return __builtin_ia32_kmovw (__A);
15912 extern __inline __m512
15913 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15914 _mm512_castpd_ps (__m512d __A)
15916 return (__m512) (__A);
15919 extern __inline __m512i
15920 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15921 _mm512_castpd_si512 (__m512d __A)
15923 return (__m512i) (__A);
15926 extern __inline __m512d
15927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15928 _mm512_castps_pd (__m512 __A)
15930 return (__m512d) (__A);
15933 extern __inline __m512i
15934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15935 _mm512_castps_si512 (__m512 __A)
15937 return (__m512i) (__A);
15940 extern __inline __m512
15941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15942 _mm512_castsi512_ps (__m512i __A)
15944 return (__m512) (__A);
15947 extern __inline __m512d
15948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15949 _mm512_castsi512_pd (__m512i __A)
15951 return (__m512d) (__A);
15954 extern __inline __m128d
15955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15956 _mm512_castpd512_pd128 (__m512d __A)
15958 return (__m128d)_mm512_extractf32x4_ps((__m512)__A, 0);
15961 extern __inline __m128
15962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15963 _mm512_castps512_ps128 (__m512 __A)
15965 return _mm512_extractf32x4_ps(__A, 0);
15968 extern __inline __m128i
15969 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15970 _mm512_castsi512_si128 (__m512i __A)
15972 return (__m128i)_mm512_extracti32x4_epi32((__m512i)__A, 0);
15975 extern __inline __m256d
15976 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15977 _mm512_castpd512_pd256 (__m512d __A)
15979 return _mm512_extractf64x4_pd(__A, 0);
15982 extern __inline __m256
15983 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15984 _mm512_castps512_ps256 (__m512 __A)
15986 return (__m256)_mm512_extractf64x4_pd((__m512d)__A, 0);
15989 extern __inline __m256i
15990 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15991 _mm512_castsi512_si256 (__m512i __A)
15993 return (__m256i)_mm512_extractf64x4_pd((__m512d)__A, 0);
15996 extern __inline __m512d
15997 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
15998 _mm512_castpd128_pd512 (__m128d __A)
16000 return (__m512d) __builtin_ia32_pd512_pd((__m128d)__A);
16003 extern __inline __m512
16004 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16005 _mm512_castps128_ps512 (__m128 __A)
16007 return (__m512) __builtin_ia32_ps512_ps((__m128)__A);
16010 extern __inline __m512i
16011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16012 _mm512_castsi128_si512 (__m128i __A)
16014 return (__m512i) __builtin_ia32_si512_si((__v4si)__A);
16017 extern __inline __m512d
16018 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16019 _mm512_castpd256_pd512 (__m256d __A)
16021 return __builtin_ia32_pd512_256pd (__A);
16024 extern __inline __m512
16025 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16026 _mm512_castps256_ps512 (__m256 __A)
16028 return __builtin_ia32_ps512_256ps (__A);
16031 extern __inline __m512i
16032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16033 _mm512_castsi256_si512 (__m256i __A)
16035 return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A);
16038 extern __inline __m512d
16039 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16040 _mm512_zextpd128_pd512 (__m128d __A)
16042 return (__m512d) _mm512_insertf32x4 (_mm512_setzero_ps (), (__m128) __A, 0);
16045 extern __inline __m512
16046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16047 _mm512_zextps128_ps512 (__m128 __A)
16049 return _mm512_insertf32x4 (_mm512_setzero_ps (), __A, 0);
16052 extern __inline __m512i
16053 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16054 _mm512_zextsi128_si512 (__m128i __A)
16056 return _mm512_inserti32x4 (_mm512_setzero_si512 (), __A, 0);
16059 extern __inline __m512d
16060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16061 _mm512_zextpd256_pd512 (__m256d __A)
16063 return _mm512_insertf64x4 (_mm512_setzero_pd (), __A, 0);
16066 extern __inline __m512
16067 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16068 _mm512_zextps256_ps512 (__m256 __A)
16070 return (__m512) _mm512_insertf64x4 (_mm512_setzero_pd (), (__m256d) __A, 0);
16073 extern __inline __m512i
16074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16075 _mm512_zextsi256_si512 (__m256i __A)
16077 return _mm512_inserti64x4 (_mm512_setzero_si512 (), __A, 0);
16080 extern __inline __mmask16
16081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16082 _mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B)
16084 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
16085 (__v16si) __B, 0,
16086 (__mmask16) -1);
16089 extern __inline __mmask16
16090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16091 _mm512_mask_cmpeq_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
16093 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
16094 (__v16si) __B, 0, __U);
16097 extern __inline __mmask8
16098 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16099 _mm512_mask_cmpeq_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
16101 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
16102 (__v8di) __B, 0, __U);
16105 extern __inline __mmask8
16106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16107 _mm512_cmpeq_epu64_mask (__m512i __A, __m512i __B)
16109 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
16110 (__v8di) __B, 0,
16111 (__mmask8) -1);
16114 extern __inline __mmask16
16115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16116 _mm512_cmpgt_epu32_mask (__m512i __A, __m512i __B)
16118 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
16119 (__v16si) __B, 6,
16120 (__mmask16) -1);
16123 extern __inline __mmask16
16124 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16125 _mm512_mask_cmpgt_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B)
16127 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A,
16128 (__v16si) __B, 6, __U);
16131 extern __inline __mmask8
16132 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16133 _mm512_mask_cmpgt_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B)
16135 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
16136 (__v8di) __B, 6, __U);
16139 extern __inline __mmask8
16140 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16141 _mm512_cmpgt_epu64_mask (__m512i __A, __m512i __B)
16143 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A,
16144 (__v8di) __B, 6,
16145 (__mmask8) -1);
16148 #undef __MM512_REDUCE_OP
16149 #define __MM512_REDUCE_OP(op) \
16150 __v8si __T1 = (__v8si) _mm512_extracti64x4_epi64 (__A, 1); \
16151 __v8si __T2 = (__v8si) _mm512_extracti64x4_epi64 (__A, 0); \
16152 __m256i __T3 = (__m256i) (__T1 op __T2); \
16153 __v4si __T4 = (__v4si) _mm256_extracti128_si256 (__T3, 1); \
16154 __v4si __T5 = (__v4si) _mm256_extracti128_si256 (__T3, 0); \
16155 __v4si __T6 = __T4 op __T5; \
16156 __v4si __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
16157 __v4si __T8 = __T6 op __T7; \
16158 return __T8[0] op __T8[1]
16160 extern __inline int
16161 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16162 _mm512_reduce_add_epi32 (__m512i __A)
16164 __MM512_REDUCE_OP (+);
16167 extern __inline int
16168 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16169 _mm512_reduce_mul_epi32 (__m512i __A)
16171 __MM512_REDUCE_OP (*);
16174 extern __inline int
16175 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16176 _mm512_reduce_and_epi32 (__m512i __A)
16178 __MM512_REDUCE_OP (&);
16181 extern __inline int
16182 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16183 _mm512_reduce_or_epi32 (__m512i __A)
16185 __MM512_REDUCE_OP (|);
16188 extern __inline int
16189 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16190 _mm512_mask_reduce_add_epi32 (__mmask16 __U, __m512i __A)
16192 __A = _mm512_maskz_mov_epi32 (__U, __A);
16193 __MM512_REDUCE_OP (+);
16196 extern __inline int
16197 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16198 _mm512_mask_reduce_mul_epi32 (__mmask16 __U, __m512i __A)
16200 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (1), __U, __A);
16201 __MM512_REDUCE_OP (*);
16204 extern __inline int
16205 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16206 _mm512_mask_reduce_and_epi32 (__mmask16 __U, __m512i __A)
16208 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (~0), __U, __A);
16209 __MM512_REDUCE_OP (&);
16212 extern __inline int
16213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16214 _mm512_mask_reduce_or_epi32 (__mmask16 __U, __m512i __A)
16216 __A = _mm512_maskz_mov_epi32 (__U, __A);
16217 __MM512_REDUCE_OP (|);
16220 #undef __MM512_REDUCE_OP
16221 #define __MM512_REDUCE_OP(op) \
16222 __m256i __T1 = (__m256i) _mm512_extracti64x4_epi64 (__A, 1); \
16223 __m256i __T2 = (__m256i) _mm512_extracti64x4_epi64 (__A, 0); \
16224 __m256i __T3 = _mm256_##op (__T1, __T2); \
16225 __m128i __T4 = (__m128i) _mm256_extracti128_si256 (__T3, 1); \
16226 __m128i __T5 = (__m128i) _mm256_extracti128_si256 (__T3, 0); \
16227 __m128i __T6 = _mm_##op (__T4, __T5); \
16228 __m128i __T7 = (__m128i) __builtin_shuffle ((__v4si) __T6, \
16229 (__v4si) { 2, 3, 0, 1 }); \
16230 __m128i __T8 = _mm_##op (__T6, __T7); \
16231 __m128i __T9 = (__m128i) __builtin_shuffle ((__v4si) __T8, \
16232 (__v4si) { 1, 0, 1, 0 }); \
16233 __v4si __T10 = (__v4si) _mm_##op (__T8, __T9); \
16234 return __T10[0]
16236 extern __inline int
16237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16238 _mm512_reduce_min_epi32 (__m512i __A)
16240 __MM512_REDUCE_OP (min_epi32);
16243 extern __inline int
16244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16245 _mm512_reduce_max_epi32 (__m512i __A)
16247 __MM512_REDUCE_OP (max_epi32);
16250 extern __inline unsigned int
16251 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16252 _mm512_reduce_min_epu32 (__m512i __A)
16254 __MM512_REDUCE_OP (min_epu32);
16257 extern __inline unsigned int
16258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16259 _mm512_reduce_max_epu32 (__m512i __A)
16261 __MM512_REDUCE_OP (max_epu32);
16264 extern __inline int
16265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16266 _mm512_mask_reduce_min_epi32 (__mmask16 __U, __m512i __A)
16268 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (__INT_MAX__), __U, __A);
16269 __MM512_REDUCE_OP (min_epi32);
16272 extern __inline int
16273 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16274 _mm512_mask_reduce_max_epi32 (__mmask16 __U, __m512i __A)
16276 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (-__INT_MAX__ - 1), __U, __A);
16277 __MM512_REDUCE_OP (max_epi32);
16280 extern __inline unsigned int
16281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16282 _mm512_mask_reduce_min_epu32 (__mmask16 __U, __m512i __A)
16284 __A = _mm512_mask_mov_epi32 (_mm512_set1_epi32 (~0), __U, __A);
16285 __MM512_REDUCE_OP (min_epu32);
16288 extern __inline unsigned int
16289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16290 _mm512_mask_reduce_max_epu32 (__mmask16 __U, __m512i __A)
16292 __A = _mm512_maskz_mov_epi32 (__U, __A);
16293 __MM512_REDUCE_OP (max_epu32);
16296 #undef __MM512_REDUCE_OP
16297 #define __MM512_REDUCE_OP(op) \
16298 __m256 __T1 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
16299 __m256 __T2 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
16300 __m256 __T3 = __T1 op __T2; \
16301 __m128 __T4 = _mm256_extractf128_ps (__T3, 1); \
16302 __m128 __T5 = _mm256_extractf128_ps (__T3, 0); \
16303 __m128 __T6 = __T4 op __T5; \
16304 __m128 __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
16305 __m128 __T8 = __T6 op __T7; \
16306 return __T8[0] op __T8[1]
16308 extern __inline float
16309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16310 _mm512_reduce_add_ps (__m512 __A)
16312 __MM512_REDUCE_OP (+);
16315 extern __inline float
16316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16317 _mm512_reduce_mul_ps (__m512 __A)
16319 __MM512_REDUCE_OP (*);
16322 extern __inline float
16323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16324 _mm512_mask_reduce_add_ps (__mmask16 __U, __m512 __A)
16326 __A = _mm512_maskz_mov_ps (__U, __A);
16327 __MM512_REDUCE_OP (+);
16330 extern __inline float
16331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16332 _mm512_mask_reduce_mul_ps (__mmask16 __U, __m512 __A)
16334 __A = _mm512_mask_mov_ps (_mm512_set1_ps (1.0f), __U, __A);
16335 __MM512_REDUCE_OP (*);
16338 #undef __MM512_REDUCE_OP
16339 #define __MM512_REDUCE_OP(op) \
16340 __m256 __T1 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 1); \
16341 __m256 __T2 = (__m256) _mm512_extractf64x4_pd ((__m512d) __A, 0); \
16342 __m256 __T3 = _mm256_##op (__T1, __T2); \
16343 __m128 __T4 = _mm256_extractf128_ps (__T3, 1); \
16344 __m128 __T5 = _mm256_extractf128_ps (__T3, 0); \
16345 __m128 __T6 = _mm_##op (__T4, __T5); \
16346 __m128 __T7 = __builtin_shuffle (__T6, (__v4si) { 2, 3, 0, 1 }); \
16347 __m128 __T8 = _mm_##op (__T6, __T7); \
16348 __m128 __T9 = __builtin_shuffle (__T8, (__v4si) { 1, 0, 1, 0 }); \
16349 __m128 __T10 = _mm_##op (__T8, __T9); \
16350 return __T10[0]
16352 extern __inline float
16353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16354 _mm512_reduce_min_ps (__m512 __A)
16356 __MM512_REDUCE_OP (min_ps);
16359 extern __inline float
16360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16361 _mm512_reduce_max_ps (__m512 __A)
16363 __MM512_REDUCE_OP (max_ps);
16366 extern __inline float
16367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16368 _mm512_mask_reduce_min_ps (__mmask16 __U, __m512 __A)
16370 __A = _mm512_mask_mov_ps (_mm512_set1_ps (__builtin_inff ()), __U, __A);
16371 __MM512_REDUCE_OP (min_ps);
16374 extern __inline float
16375 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16376 _mm512_mask_reduce_max_ps (__mmask16 __U, __m512 __A)
16378 __A = _mm512_mask_mov_ps (_mm512_set1_ps (-__builtin_inff ()), __U, __A);
16379 __MM512_REDUCE_OP (max_ps);
16382 #undef __MM512_REDUCE_OP
16383 #define __MM512_REDUCE_OP(op) \
16384 __v4di __T1 = (__v4di) _mm512_extracti64x4_epi64 (__A, 1); \
16385 __v4di __T2 = (__v4di) _mm512_extracti64x4_epi64 (__A, 0); \
16386 __m256i __T3 = (__m256i) (__T1 op __T2); \
16387 __v2di __T4 = (__v2di) _mm256_extracti128_si256 (__T3, 1); \
16388 __v2di __T5 = (__v2di) _mm256_extracti128_si256 (__T3, 0); \
16389 __v2di __T6 = __T4 op __T5; \
16390 return __T6[0] op __T6[1]
16392 extern __inline long long
16393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16394 _mm512_reduce_add_epi64 (__m512i __A)
16396 __MM512_REDUCE_OP (+);
16399 extern __inline long long
16400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16401 _mm512_reduce_mul_epi64 (__m512i __A)
16403 __MM512_REDUCE_OP (*);
16406 extern __inline long long
16407 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16408 _mm512_reduce_and_epi64 (__m512i __A)
16410 __MM512_REDUCE_OP (&);
16413 extern __inline long long
16414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16415 _mm512_reduce_or_epi64 (__m512i __A)
16417 __MM512_REDUCE_OP (|);
16420 extern __inline long long
16421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16422 _mm512_mask_reduce_add_epi64 (__mmask8 __U, __m512i __A)
16424 __A = _mm512_maskz_mov_epi64 (__U, __A);
16425 __MM512_REDUCE_OP (+);
16428 extern __inline long long
16429 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16430 _mm512_mask_reduce_mul_epi64 (__mmask8 __U, __m512i __A)
16432 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (1LL), __U, __A);
16433 __MM512_REDUCE_OP (*);
16436 extern __inline long long
16437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16438 _mm512_mask_reduce_and_epi64 (__mmask8 __U, __m512i __A)
16440 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (~0LL), __U, __A);
16441 __MM512_REDUCE_OP (&);
16444 extern __inline long long
16445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16446 _mm512_mask_reduce_or_epi64 (__mmask8 __U, __m512i __A)
16448 __A = _mm512_maskz_mov_epi64 (__U, __A);
16449 __MM512_REDUCE_OP (|);
16452 #undef __MM512_REDUCE_OP
16453 #define __MM512_REDUCE_OP(op) \
16454 __m512i __T1 = _mm512_shuffle_i64x2 (__A, __A, 0x4e); \
16455 __m512i __T2 = _mm512_##op (__A, __T1); \
16456 __m512i __T3 \
16457 = (__m512i) __builtin_shuffle ((__v8di) __T2, \
16458 (__v8di) { 2, 3, 0, 1, 6, 7, 4, 5 });\
16459 __m512i __T4 = _mm512_##op (__T2, __T3); \
16460 __m512i __T5 \
16461 = (__m512i) __builtin_shuffle ((__v8di) __T4, \
16462 (__v8di) { 1, 0, 3, 2, 5, 4, 7, 6 });\
16463 __v8di __T6 = (__v8di) _mm512_##op (__T4, __T5); \
16464 return __T6[0]
16466 extern __inline long long
16467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16468 _mm512_reduce_min_epi64 (__m512i __A)
16470 __MM512_REDUCE_OP (min_epi64);
16473 extern __inline long long
16474 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16475 _mm512_reduce_max_epi64 (__m512i __A)
16477 __MM512_REDUCE_OP (max_epi64);
16480 extern __inline long long
16481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16482 _mm512_mask_reduce_min_epi64 (__mmask8 __U, __m512i __A)
16484 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (__LONG_LONG_MAX__),
16485 __U, __A);
16486 __MM512_REDUCE_OP (min_epi64);
16489 extern __inline long long
16490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16491 _mm512_mask_reduce_max_epi64 (__mmask8 __U, __m512i __A)
16493 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (-__LONG_LONG_MAX__ - 1),
16494 __U, __A);
16495 __MM512_REDUCE_OP (max_epi64);
16498 extern __inline unsigned long long
16499 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16500 _mm512_reduce_min_epu64 (__m512i __A)
16502 __MM512_REDUCE_OP (min_epu64);
16505 extern __inline unsigned long long
16506 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16507 _mm512_reduce_max_epu64 (__m512i __A)
16509 __MM512_REDUCE_OP (max_epu64);
16512 extern __inline unsigned long long
16513 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16514 _mm512_mask_reduce_min_epu64 (__mmask8 __U, __m512i __A)
16516 __A = _mm512_mask_mov_epi64 (_mm512_set1_epi64 (~0LL), __U, __A);
16517 __MM512_REDUCE_OP (min_epu64);
16520 extern __inline unsigned long long
16521 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16522 _mm512_mask_reduce_max_epu64 (__mmask8 __U, __m512i __A)
16524 __A = _mm512_maskz_mov_epi64 (__U, __A);
16525 __MM512_REDUCE_OP (max_epu64);
16528 #undef __MM512_REDUCE_OP
16529 #define __MM512_REDUCE_OP(op) \
16530 __m256d __T1 = (__m256d) _mm512_extractf64x4_pd (__A, 1); \
16531 __m256d __T2 = (__m256d) _mm512_extractf64x4_pd (__A, 0); \
16532 __m256d __T3 = __T1 op __T2; \
16533 __m128d __T4 = _mm256_extractf128_pd (__T3, 1); \
16534 __m128d __T5 = _mm256_extractf128_pd (__T3, 0); \
16535 __m128d __T6 = __T4 op __T5; \
16536 return __T6[0] op __T6[1]
16538 extern __inline double
16539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16540 _mm512_reduce_add_pd (__m512d __A)
16542 __MM512_REDUCE_OP (+);
16545 extern __inline double
16546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16547 _mm512_reduce_mul_pd (__m512d __A)
16549 __MM512_REDUCE_OP (*);
16552 extern __inline double
16553 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16554 _mm512_mask_reduce_add_pd (__mmask8 __U, __m512d __A)
16556 __A = _mm512_maskz_mov_pd (__U, __A);
16557 __MM512_REDUCE_OP (+);
16560 extern __inline double
16561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16562 _mm512_mask_reduce_mul_pd (__mmask8 __U, __m512d __A)
16564 __A = _mm512_mask_mov_pd (_mm512_set1_pd (1.0), __U, __A);
16565 __MM512_REDUCE_OP (*);
16568 #undef __MM512_REDUCE_OP
16569 #define __MM512_REDUCE_OP(op) \
16570 __m256d __T1 = (__m256d) _mm512_extractf64x4_pd (__A, 1); \
16571 __m256d __T2 = (__m256d) _mm512_extractf64x4_pd (__A, 0); \
16572 __m256d __T3 = _mm256_##op (__T1, __T2); \
16573 __m128d __T4 = _mm256_extractf128_pd (__T3, 1); \
16574 __m128d __T5 = _mm256_extractf128_pd (__T3, 0); \
16575 __m128d __T6 = _mm_##op (__T4, __T5); \
16576 __m128d __T7 = (__m128d) __builtin_shuffle (__T6, (__v2di) { 1, 0 }); \
16577 __m128d __T8 = _mm_##op (__T6, __T7); \
16578 return __T8[0]
16580 extern __inline double
16581 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16582 _mm512_reduce_min_pd (__m512d __A)
16584 __MM512_REDUCE_OP (min_pd);
16587 extern __inline double
16588 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16589 _mm512_reduce_max_pd (__m512d __A)
16591 __MM512_REDUCE_OP (max_pd);
16594 extern __inline double
16595 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16596 _mm512_mask_reduce_min_pd (__mmask8 __U, __m512d __A)
16598 __A = _mm512_mask_mov_pd (_mm512_set1_pd (__builtin_inf ()), __U, __A);
16599 __MM512_REDUCE_OP (min_pd);
16602 extern __inline double
16603 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
16604 _mm512_mask_reduce_max_pd (__mmask8 __U, __m512d __A)
16606 __A = _mm512_mask_mov_pd (_mm512_set1_pd (-__builtin_inf ()), __U, __A);
16607 __MM512_REDUCE_OP (max_pd);
16610 #undef __MM512_REDUCE_OP
16612 #ifdef __DISABLE_AVX512F_512__
16613 #undef __DISABLE_AVX512F_512__
16614 #pragma GCC pop_options
16615 #endif /* __DISABLE_AVX512F_512__ */
16617 #endif /* _AVX512FINTRIN_H_INCLUDED */