1 /* Copyright (C) 2024 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx10_2roundingintrin.h> directly; include <immintrin.h> instead."
28 #ifndef _AVX10_2ROUNDINGINTRIN_H_INCLUDED
29 #define _AVX10_2ROUNDINGINTRIN_H_INCLUDED
31 #ifndef __AVX10_2_256__
32 #pragma GCC push_options
33 #pragma GCC target("avx10.2-256")
34 #define __DISABLE_AVX10_2_256__
35 #endif /* __AVX10_2_256__ */
38 extern __inline __m256d
39 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
40 _mm256_add_round_pd (__m256d __A
, __m256d __B
, const int __R
)
42 return (__m256d
) __builtin_ia32_addpd256_mask_round ((__v4df
) __A
,
45 _mm256_undefined_pd (),
50 extern __inline __m256d
51 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
52 _mm256_mask_add_round_pd (__m256d __W
, __mmask8 __U
, __m256d __A
,
53 __m256d __B
, const int __R
)
55 return (__m256d
) __builtin_ia32_addpd256_mask_round ((__v4df
) __A
,
62 extern __inline __m256d
63 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
64 _mm256_maskz_add_round_pd (__mmask8 __U
, __m256d __A
, __m256d __B
,
67 return (__m256d
) __builtin_ia32_addpd256_mask_round ((__v4df
) __A
,
75 extern __inline __m256h
76 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
77 _mm256_add_round_ph (__m256h __A
, __m256h __B
, const int __R
)
79 return (__m256h
) __builtin_ia32_addph256_mask_round ((__v16hf
) __A
,
82 _mm256_undefined_ph (),
87 extern __inline __m256h
88 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
89 _mm256_mask_add_round_ph (__m256h __W
, __mmask16 __U
, __m256h __A
,
90 __m256h __B
, const int __R
)
92 return (__m256h
) __builtin_ia32_addph256_mask_round ((__v16hf
) __A
,
99 extern __inline __m256h
100 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
101 _mm256_maskz_add_round_ph (__mmask16 __U
, __m256h __A
, __m256h __B
,
104 return (__m256h
) __builtin_ia32_addph256_mask_round ((__v16hf
) __A
,
107 _mm256_setzero_ph (),
112 extern __inline __m256
113 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
114 _mm256_add_round_ps (__m256 __A
, __m256 __B
, const int __R
)
116 return (__m256
) __builtin_ia32_addps256_mask_round ((__v8sf
) __A
,
119 _mm256_undefined_ps (),
124 extern __inline __m256
125 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
126 _mm256_mask_add_round_ps (__m256 __W
, __mmask8 __U
, __m256 __A
, __m256 __B
,
129 return (__m256
) __builtin_ia32_addps256_mask_round ((__v8sf
) __A
,
136 extern __inline __m256
137 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
138 _mm256_maskz_add_round_ps (__mmask8 __U
, __m256 __A
, __m256 __B
,
141 return (__m256
) __builtin_ia32_addps256_mask_round ((__v8sf
) __A
,
144 _mm256_setzero_ps (),
149 extern __inline __mmask8
150 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
151 _mm256_cmp_round_pd_mask (__m256d __A
, __m256d __B
, const int __C
,
154 return (__mmask8
) __builtin_ia32_cmppd256_mask_round ((__v4df
) __A
,
161 extern __inline __mmask8
162 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
163 _mm256_mask_cmp_round_pd_mask (__mmask8 __U
, __m256d __A
, __m256d __B
,
164 const int __C
, const int __R
)
166 return (__mmask8
) __builtin_ia32_cmppd256_mask_round ((__v4df
) __A
,
173 extern __inline __mmask16
174 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
175 _mm256_cmp_round_ph_mask (__m256h __A
, __m256h __B
, const int __C
,
178 return (__mmask16
) __builtin_ia32_cmpph256_mask_round ((__v16hf
) __A
,
185 extern __inline __mmask16
186 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
187 _mm256_mask_cmp_round_ph_mask (__mmask16 __U
, __m256h __A
, __m256h __B
,
188 const int __C
, const int __R
)
190 return (__mmask16
) __builtin_ia32_cmpph256_mask_round ((__v16hf
) __A
,
197 extern __inline __mmask8
198 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
199 _mm256_cmp_round_ps_mask (__m256 __A
, __m256 __B
, const int __C
, const int __R
)
201 return (__mmask8
) __builtin_ia32_cmpps256_mask_round ((__v8sf
) __A
,
208 extern __inline __mmask8
209 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
210 _mm256_mask_cmp_round_ps_mask (__mmask8 __U
, __m256 __A
, __m256 __B
,
211 const int __C
, const int __R
)
213 return (__mmask8
) __builtin_ia32_cmpps256_mask_round ((__v8sf
) __A
,
220 extern __inline __m128h
221 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
222 _mm256_cvt_roundepi32_ph (__m256i __A
, const int __R
)
224 return (__m128h
) __builtin_ia32_vcvtdq2ph256_mask_round ((__v8si
) __A
,
231 extern __inline __m128h
232 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
233 _mm256_mask_cvt_roundepi32_ph (__m128h __W
, __mmask8 __U
, __m256i __A
,
236 return (__m128h
) __builtin_ia32_vcvtdq2ph256_mask_round ((__v8si
) __A
,
242 extern __inline __m128h
243 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
244 _mm256_maskz_cvt_roundepi32_ph (__mmask8 __U
, __m256i __A
, const int __R
)
246 return (__m128h
) __builtin_ia32_vcvtdq2ph256_mask_round ((__v8si
) __A
,
253 extern __inline __m256
254 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
255 _mm256_cvt_roundepi32_ps (__m256i __A
, const int __R
)
257 return (__m256
) __builtin_ia32_cvtdq2ps256_mask_round ((__v8si
) __A
,
259 _mm256_undefined_ps (),
264 extern __inline __m256
265 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
266 _mm256_mask_cvt_roundepi32_ps (__m256 __W
, __mmask8 __U
, __m256i __A
,
269 return (__m256
) __builtin_ia32_cvtdq2ps256_mask_round ((__v8si
) __A
,
275 extern __inline __m256
276 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
277 _mm256_maskz_cvt_roundepi32_ps (__mmask8 __U
, __m256i __A
, const int __R
)
279 return (__m256
) __builtin_ia32_cvtdq2ps256_mask_round ((__v8si
) __A
,
281 _mm256_setzero_ps (),
286 extern __inline __m128h
287 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
288 _mm256_cvt_roundpd_ph (__m256d __A
, const int __R
)
290 return (__m128h
) __builtin_ia32_vcvtpd2ph256_mask_round ((__v4df
) __A
,
297 extern __inline __m128h
298 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
299 _mm256_mask_cvt_roundpd_ph (__m128h __W
, __mmask8 __U
, __m256d __A
,
302 return (__m128h
) __builtin_ia32_vcvtpd2ph256_mask_round ((__v4df
) __A
,
308 extern __inline __m128h
309 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
310 _mm256_maskz_cvt_roundpd_ph (__mmask8 __U
, __m256d __A
, const int __R
)
312 return (__m128h
) __builtin_ia32_vcvtpd2ph256_mask_round ((__v4df
) __A
,
319 extern __inline __m128
320 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
321 _mm256_cvt_roundpd_ps (__m256d __A
, const int __R
)
323 return (__m128
) __builtin_ia32_cvtpd2ps256_mask_round ((__v4df
) __A
,
330 extern __inline __m128
331 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
332 _mm256_mask_cvt_roundpd_ps (__m128 __W
, __mmask8 __U
, __m256d __A
,
335 return (__m128
) __builtin_ia32_cvtpd2ps256_mask_round ((__v4df
) __A
,
341 extern __inline __m128
342 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
343 _mm256_maskz_cvt_roundpd_ps (__mmask8 __U
, __m256d __A
, const int __R
)
345 return (__m128
) __builtin_ia32_cvtpd2ps256_mask_round ((__v4df
) __A
,
352 extern __inline __m128i
353 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
354 _mm256_cvt_roundpd_epi32 (__m256d __A
, const int __R
)
357 (__m128i
) __builtin_ia32_cvtpd2dq256_mask_round ((__v4df
) __A
,
359 _mm_undefined_si128 (),
364 extern __inline __m128i
365 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
366 _mm256_mask_cvt_roundpd_epi32 (__m128i __W
, __mmask8 __U
, __m256d __A
,
369 return (__m128i
) __builtin_ia32_cvtpd2dq256_mask_round ((__v4df
) __A
,
375 extern __inline __m128i
376 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
377 _mm256_maskz_cvt_roundpd_epi32 (__mmask8 __U
, __m256d __A
, const int __R
)
379 return (__m128i
) __builtin_ia32_cvtpd2dq256_mask_round ((__v4df
) __A
,
381 _mm_setzero_si128 (),
386 extern __inline __m256i
387 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
388 _mm256_cvt_roundpd_epi64 (__m256d __A
, const int __R
)
391 (__m256i
) __builtin_ia32_cvtpd2qq256_mask_round ((__v4df
) __A
,
393 _mm256_setzero_si256 (),
398 extern __inline __m256i
399 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
400 _mm256_mask_cvt_roundpd_epi64 (__m256i __W
, __mmask8 __U
, __m256d __A
,
403 return (__m256i
) __builtin_ia32_cvtpd2qq256_mask_round ((__v4df
) __A
,
409 extern __inline __m256i
410 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
411 _mm256_maskz_cvt_roundpd_epi64 (__mmask8 __U
, __m256d __A
, const int __R
)
414 (__m256i
) __builtin_ia32_cvtpd2qq256_mask_round ((__v4df
) __A
,
416 _mm256_setzero_si256 (),
421 extern __inline __m128i
422 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
423 _mm256_cvt_roundpd_epu32 (__m256d __A
, const int __R
)
426 (__m128i
) __builtin_ia32_cvtpd2udq256_mask_round ((__v4df
) __A
,
428 _mm_undefined_si128 (),
433 extern __inline __m128i
434 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
435 _mm256_mask_cvt_roundpd_epu32 (__m128i __W
, __mmask8 __U
, __m256d __A
,
438 return (__m128i
) __builtin_ia32_cvtpd2udq256_mask_round ((__v4df
) __A
,
444 extern __inline __m128i
445 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
446 _mm256_maskz_cvt_roundpd_epu32 (__mmask8 __U
, __m256d __A
, const int __R
)
448 return (__m128i
) __builtin_ia32_cvtpd2udq256_mask_round ((__v4df
) __A
,
450 _mm_setzero_si128 (),
455 extern __inline __m256i
456 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
457 _mm256_cvt_roundpd_epu64 (__m256d __A
, const int __R
)
460 (__m256i
) __builtin_ia32_cvtpd2uqq256_mask_round ((__v4df
) __A
,
462 _mm256_setzero_si256 (),
467 extern __inline __m256i
468 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
469 _mm256_mask_cvt_roundpd_epu64 (__m256i __W
, __mmask8 __U
, __m256d __A
,
472 return (__m256i
) __builtin_ia32_cvtpd2uqq256_mask_round ((__v4df
) __A
,
478 extern __inline __m256i
479 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
480 _mm256_maskz_cvt_roundpd_epu64 (__mmask8 __U
, __m256d __A
, const int __R
)
483 (__m256i
) __builtin_ia32_cvtpd2uqq256_mask_round ((__v4df
) __A
,
485 _mm256_setzero_si256 (),
490 extern __inline __m256i
491 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
492 _mm256_cvt_roundph_epi32 (__m128h __A
, const int __R
)
495 (__m256i
) __builtin_ia32_vcvtph2dq256_mask_round ((__v8hf
) __A
,
497 _mm256_setzero_si256 (),
502 extern __inline __m256i
503 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
504 _mm256_mask_cvt_roundph_epi32 (__m256i __W
, __mmask8 __U
, __m128h __A
,
507 return (__m256i
) __builtin_ia32_vcvtph2dq256_mask_round ((__v8hf
) __A
,
513 extern __inline __m256i
514 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
515 _mm256_maskz_cvt_roundph_epi32 (__mmask8 __U
, __m128h __A
, const int __R
)
518 (__m256i
) __builtin_ia32_vcvtph2dq256_mask_round ((__v8hf
) __A
,
520 _mm256_setzero_si256 (),
525 extern __inline __m256d
526 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
527 _mm256_cvt_roundph_pd (__m128h __A
, const int __R
)
529 return (__m256d
) __builtin_ia32_vcvtph2pd256_mask_round ((__v8hf
) __A
,
531 _mm256_setzero_pd (),
536 extern __inline __m256d
537 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
538 _mm256_mask_cvt_roundph_pd (__m256d __W
, __mmask8 __U
, __m128h __A
,
541 return (__m256d
) __builtin_ia32_vcvtph2pd256_mask_round ((__v8hf
) __A
,
547 extern __inline __m256d
548 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
549 _mm256_maskz_cvt_roundph_pd (__mmask8 __U
, __m128h __A
, const int __R
)
551 return (__m256d
) __builtin_ia32_vcvtph2pd256_mask_round ((__v8hf
) __A
,
553 _mm256_setzero_pd (),
558 extern __inline __m256
559 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
560 _mm256_cvt_roundph_ps (__m128h __A
, const int __R
)
563 (__m256
) __builtin_ia32_vcvtph2ps256_mask_round ((__v8hf
) __A
,
565 _mm256_undefined_ps (),
570 extern __inline __m256
571 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
572 _mm256_mask_cvt_roundph_ps (__m256 __W
, __mmask8 __U
, __m128h __A
,
575 return (__m256
) __builtin_ia32_vcvtph2ps256_mask_round ((__v8hf
) __A
,
581 extern __inline __m256
582 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
583 _mm256_maskz_cvt_roundph_ps (__mmask8 __U
, __m128h __A
, const int __R
)
585 return (__m256
) __builtin_ia32_vcvtph2ps256_mask_round ((__v8hf
) __A
,
587 _mm256_setzero_ps (),
592 extern __inline __m256
593 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
594 _mm256_cvtx_roundph_ps (__m128h __A
, const int __R
)
596 return (__m256
) __builtin_ia32_vcvtph2psx256_mask_round ((__v8hf
) __A
,
598 _mm256_setzero_ps (),
603 extern __inline __m256
604 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
605 _mm256_mask_cvtx_roundph_ps (__m256 __W
, __mmask8 __U
, __m128h __A
,
608 return (__m256
) __builtin_ia32_vcvtph2psx256_mask_round ((__v8hf
) __A
,
614 extern __inline __m256
615 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
616 _mm256_maskz_cvtx_roundph_ps (__mmask8 __U
, __m128h __A
, const int __R
)
618 return (__m256
) __builtin_ia32_vcvtph2psx256_mask_round ((__v8hf
) __A
,
620 _mm256_setzero_ps (),
625 extern __inline __m256i
626 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
627 _mm256_cvt_roundph_epi64 (__m128h __A
, const int __R
)
630 (__m256i
) __builtin_ia32_vcvtph2qq256_mask_round ((__v8hf
) __A
,
632 _mm256_setzero_si256 (),
637 extern __inline __m256i
638 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
639 _mm256_mask_cvt_roundph_epi64 (__m256i __W
, __mmask8 __U
, __m128h __A
,
642 return (__m256i
) __builtin_ia32_vcvtph2qq256_mask_round ((__v8hf
) __A
,
648 extern __inline __m256i
649 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
650 _mm256_maskz_cvt_roundph_epi64 (__mmask8 __U
, __m128h __A
, const int __R
)
653 (__m256i
) __builtin_ia32_vcvtph2qq256_mask_round ((__v8hf
) __A
,
655 _mm256_setzero_si256 (),
660 extern __inline __m256i
661 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
662 _mm256_cvt_roundph_epu32 (__m128h __A
, const int __R
)
665 (__m256i
) __builtin_ia32_vcvtph2udq256_mask_round ((__v8hf
) __A
,
667 _mm256_setzero_si256 (),
672 extern __inline __m256i
673 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
674 _mm256_mask_cvt_roundph_epu32 (__m256i __W
, __mmask8 __U
, __m128h __A
,
677 return (__m256i
) __builtin_ia32_vcvtph2udq256_mask_round ((__v8hf
) __A
,
683 extern __inline __m256i
684 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
685 _mm256_maskz_cvt_roundph_epu32 (__mmask8 __U
, __m128h __A
, const int __R
)
688 (__m256i
) __builtin_ia32_vcvtph2udq256_mask_round ((__v8hf
) __A
,
690 _mm256_setzero_si256 (),
695 extern __inline __m256i
696 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
697 _mm256_cvt_roundph_epu64 (__m128h __A
, const int __R
)
700 (__m256i
) __builtin_ia32_vcvtph2uqq256_mask_round ((__v8hf
) __A
,
702 _mm256_setzero_si256 (),
707 extern __inline __m256i
708 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
709 _mm256_mask_cvt_roundph_epu64 (__m256i __W
, __mmask8 __U
, __m128h __A
,
712 return (__m256i
) __builtin_ia32_vcvtph2uqq256_mask_round ((__v8hf
) __A
,
718 extern __inline __m256i
719 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
720 _mm256_maskz_cvt_roundph_epu64 (__mmask8 __U
, __m128h __A
, const int __R
)
723 (__m256i
) __builtin_ia32_vcvtph2uqq256_mask_round ((__v8hf
) __A
,
725 _mm256_setzero_si256 (),
730 extern __inline __m256i
731 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
732 _mm256_cvt_roundph_epu16 (__m256h __A
, const int __R
)
735 (__m256i
) __builtin_ia32_vcvtph2uw256_mask_round ((__v16hf
) __A
,
737 _mm256_undefined_si256 (),
742 extern __inline __m256i
743 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
744 _mm256_mask_cvt_roundph_epu16 (__m256i __W
, __mmask16 __U
, __m256h __A
,
747 return (__m256i
) __builtin_ia32_vcvtph2uw256_mask_round ((__v16hf
) __A
,
753 extern __inline __m256i
754 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
755 _mm256_maskz_cvt_roundph_epu16 (__mmask16 __U
, __m256h __A
, const int __R
)
758 (__m256i
) __builtin_ia32_vcvtph2uw256_mask_round ((__v16hf
) __A
,
760 _mm256_setzero_si256 (),
765 extern __inline __m256i
766 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
767 _mm256_cvt_roundph_epi16 (__m256h __A
, const int __R
)
770 (__m256i
) __builtin_ia32_vcvtph2w256_mask_round ((__v16hf
) __A
,
772 _mm256_undefined_si256 (),
777 extern __inline __m256i
778 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
779 _mm256_mask_cvt_roundph_epi16 (__m256i __W
, __mmask16 __U
, __m256h __A
,
782 return (__m256i
) __builtin_ia32_vcvtph2w256_mask_round ((__v16hf
) __A
,
788 extern __inline __m256i
789 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
790 _mm256_maskz_cvt_roundph_epi16 (__mmask16 __U
, __m256h __A
, const int __R
)
793 (__m256i
) __builtin_ia32_vcvtph2w256_mask_round ((__v16hf
) __A
,
795 _mm256_setzero_si256 (),
800 extern __inline __m256d
801 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
802 _mm256_cvt_roundps_pd (__m128 __A
, const int __R
)
805 (__m256d
) __builtin_ia32_vcvtps2pd256_mask_round ((__v4sf
) __A
,
807 _mm256_undefined_pd (),
812 extern __inline __m256d
813 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
814 _mm256_mask_cvt_roundps_pd (__m256d __W
, __mmask8 __U
, __m128 __A
,
817 return (__m256d
) __builtin_ia32_vcvtps2pd256_mask_round ((__v4sf
) __A
,
823 extern __inline __m256d
824 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
825 _mm256_maskz_cvt_roundps_pd (__mmask8 __U
, __m128 __A
, const int __R
)
827 return (__m256d
) __builtin_ia32_vcvtps2pd256_mask_round ((__v4sf
) __A
,
829 _mm256_setzero_pd (),
834 extern __inline __m128h
835 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
836 _mm256_cvtx_roundps_ph (__m256 __A
, const int __R
)
838 return (__m128h
) __builtin_ia32_vcvtps2phx256_mask_round ((__v8sf
) __A
,
845 extern __inline __m128h
846 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
847 _mm256_mask_cvtx_roundps_ph (__m128h __W
, __mmask8 __U
, __m256 __A
,
850 return (__m128h
) __builtin_ia32_vcvtps2phx256_mask_round ((__v8sf
) __A
,
856 extern __inline __m128h
857 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
858 _mm256_maskz_cvtx_roundps_ph (__mmask8 __U
, __m256 __A
, const int __R
)
860 return (__m128h
) __builtin_ia32_vcvtps2phx256_mask_round ((__v8sf
) __A
,
867 extern __inline __m256i
868 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
869 _mm256_cvt_roundps_epi32 (__m256 __A
, const int __R
)
872 (__m256i
) __builtin_ia32_vcvtps2dq256_mask_round ((__v8sf
) __A
,
874 _mm256_undefined_si256 (),
879 extern __inline __m256i
880 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
881 _mm256_mask_cvt_roundps_epi32 (__m256i __W
, __mmask8 __U
, __m256 __A
,
884 return (__m256i
) __builtin_ia32_vcvtps2dq256_mask_round ((__v8sf
) __A
,
890 extern __inline __m256i
891 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
892 _mm256_maskz_cvt_roundps_epi32 (__mmask8 __U
, __m256 __A
, const int __R
)
895 (__m256i
) __builtin_ia32_vcvtps2dq256_mask_round ((__v8sf
) __A
,
897 _mm256_setzero_si256 (),
902 extern __inline __m256i
903 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
904 _mm256_cvt_roundps_epi64 (__m128 __A
, const int __R
)
907 (__m256i
) __builtin_ia32_cvtps2qq256_mask_round ((__v4sf
) __A
,
909 _mm256_setzero_si256 (),
914 extern __inline __m256i
915 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
916 _mm256_mask_cvt_roundps_epi64 (__m256i __W
, __mmask8 __U
, __m128 __A
,
919 return (__m256i
) __builtin_ia32_cvtps2qq256_mask_round ((__v4sf
) __A
,
925 extern __inline __m256i
926 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
927 _mm256_maskz_cvt_roundps_epi64 (__mmask8 __U
, __m128 __A
, const int __R
)
930 (__m256i
) __builtin_ia32_cvtps2qq256_mask_round ((__v4sf
) __A
,
932 _mm256_setzero_si256 (),
937 extern __inline __m256i
938 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
939 _mm256_cvt_roundps_epu32 (__m256 __A
, const int __R
)
942 (__m256i
) __builtin_ia32_cvtps2udq256_mask_round ((__v8sf
) __A
,
944 _mm256_undefined_si256 (),
949 extern __inline __m256i
950 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
951 _mm256_mask_cvt_roundps_epu32 (__m256i __W
, __mmask8 __U
, __m256 __A
,
954 return (__m256i
) __builtin_ia32_cvtps2udq256_mask_round ((__v8sf
) __A
,
960 extern __inline __m256i
961 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
962 _mm256_maskz_cvt_roundps_epu32 (__mmask8 __U
, __m256 __A
, const int __R
)
965 (__m256i
) __builtin_ia32_cvtps2udq256_mask_round ((__v8sf
) __A
,
967 _mm256_setzero_si256 (),
972 extern __inline __m256i
973 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
974 _mm256_cvt_roundps_epu64 (__m128 __A
, const int __R
)
977 (__m256i
) __builtin_ia32_cvtps2uqq256_mask_round ((__v4sf
) __A
,
979 _mm256_setzero_si256 (),
984 extern __inline __m256i
985 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
986 _mm256_mask_cvt_roundps_epu64 (__m256i __W
, __mmask8 __U
, __m128 __A
,
989 return (__m256i
) __builtin_ia32_cvtps2uqq256_mask_round ((__v4sf
) __A
,
995 extern __inline __m256i
996 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
997 _mm256_maskz_cvt_roundps_epu64 (__mmask8 __U
, __m128 __A
, const int __R
)
1000 (__m256i
) __builtin_ia32_cvtps2uqq256_mask_round ((__v4sf
) __A
,
1002 _mm256_setzero_si256 (),
1007 extern __inline __m256d
1008 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1009 _mm256_cvt_roundepi64_pd (__m256i __A
, const int __R
)
1011 return (__m256d
) __builtin_ia32_cvtqq2pd256_mask_round ((__v4di
) __A
,
1013 _mm256_setzero_pd (),
1018 extern __inline __m256d
1019 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1020 _mm256_mask_cvt_roundepi64_pd (__m256d __W
, __mmask8 __U
, __m256i __A
,
1023 return (__m256d
) __builtin_ia32_cvtqq2pd256_mask_round ((__v4di
) __A
,
1029 extern __inline __m256d
1030 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1031 _mm256_maskz_cvt_roundepi64_pd (__mmask8 __U
, __m256i __A
, const int __R
)
1033 return (__m256d
) __builtin_ia32_cvtqq2pd256_mask_round ((__v4di
) __A
,
1035 _mm256_setzero_pd (),
1040 extern __inline __m128h
1041 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1042 _mm256_cvt_roundepi64_ph (__m256i __A
, const int __R
)
1044 return (__m128h
) __builtin_ia32_vcvtqq2ph256_mask_round ((__v4di
) __A
,
1051 extern __inline __m128h
1052 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1053 _mm256_mask_cvt_roundepi64_ph (__m128h __W
, __mmask8 __U
, __m256i __A
,
1056 return (__m128h
) __builtin_ia32_vcvtqq2ph256_mask_round ((__v4di
) __A
,
1062 extern __inline __m128h
1063 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1064 _mm256_maskz_cvt_roundepi64_ph (__mmask8 __U
, __m256i __A
, const int __R
)
1066 return (__m128h
) __builtin_ia32_vcvtqq2ph256_mask_round ((__v4di
) __A
,
1073 extern __inline __m128
1074 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1075 _mm256_cvt_roundepi64_ps (__m256i __A
, const int __R
)
1077 return (__m128
) __builtin_ia32_cvtqq2ps256_mask_round ((__v4di
) __A
,
1084 extern __inline __m128
1085 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1086 _mm256_mask_cvt_roundepi64_ps (__m128 __W
, __mmask8 __U
, __m256i __A
,
1089 return (__m128
) __builtin_ia32_cvtqq2ps256_mask_round ((__v4di
) __A
,
1095 extern __inline __m128
1096 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1097 _mm256_maskz_cvt_roundepi64_ps (__mmask8 __U
, __m256i __A
, const int __R
)
1099 return (__m128
) __builtin_ia32_cvtqq2ps256_mask_round ((__v4di
) __A
,
1106 extern __inline __m128i
1107 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1108 _mm256_cvtt_roundpd_epi32 (__m256d __A
, const int __R
)
1111 (__m128i
) __builtin_ia32_cvttpd2dq256_mask_round ((__v4df
) __A
,
1113 _mm_undefined_si128 (),
1118 extern __inline __m128i
1119 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1120 _mm256_mask_cvtt_roundpd_epi32 (__m128i __W
, __mmask8 __U
, __m256d __A
,
1123 return (__m128i
) __builtin_ia32_cvttpd2dq256_mask_round ((__v4df
) __A
,
1129 extern __inline __m128i
1130 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1131 _mm256_maskz_cvtt_roundpd_epi32 (__mmask8 __U
, __m256d __A
, const int __R
)
1133 return (__m128i
) __builtin_ia32_cvttpd2dq256_mask_round ((__v4df
) __A
,
1135 _mm_setzero_si128 (),
1140 extern __inline __m256i
1141 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1142 _mm256_cvtt_roundpd_epi64 (__m256d __A
, const int __R
)
1145 (__m256i
) __builtin_ia32_cvttpd2qq256_mask_round ((__v4df
) __A
,
1147 _mm256_setzero_si256 (),
1152 extern __inline __m256i
1153 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1154 _mm256_mask_cvtt_roundpd_epi64 (__m256i __W
, __mmask8 __U
, __m256d __A
,
1157 return (__m256i
) __builtin_ia32_cvttpd2qq256_mask_round ((__v4df
) __A
,
1163 extern __inline __m256i
1164 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1165 _mm256_maskz_cvtt_roundpd_epi64 (__mmask8 __U
, __m256d __A
, const int __R
)
1168 (__m256i
) __builtin_ia32_cvttpd2qq256_mask_round ((__v4df
) __A
,
1170 _mm256_setzero_si256 (),
1175 extern __inline __m128i
1176 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1177 _mm256_cvtt_roundpd_epu32 (__m256d __A
, const int __R
)
1180 (__m128i
) __builtin_ia32_cvttpd2udq256_mask_round ((__v4df
) __A
,
1182 _mm_undefined_si128 (),
1187 extern __inline __m128i
1188 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1189 _mm256_mask_cvtt_roundpd_epu32 (__m128i __W
, __mmask8 __U
, __m256d __A
,
1192 return (__m128i
) __builtin_ia32_cvttpd2udq256_mask_round ((__v4df
) __A
,
1198 extern __inline __m128i
1199 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1200 _mm256_maskz_cvtt_roundpd_epu32 (__mmask8 __U
, __m256d __A
, const int __R
)
1203 (__m128i
) __builtin_ia32_cvttpd2udq256_mask_round ((__v4df
) __A
,
1205 _mm_setzero_si128 (),
1210 extern __inline __m256i
1211 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1212 _mm256_cvtt_roundpd_epu64 (__m256d __A
, const int __R
)
1215 (__m256i
) __builtin_ia32_cvttpd2uqq256_mask_round ((__v4df
) __A
,
1217 _mm256_setzero_si256 (),
1222 extern __inline __m256i
1223 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1224 _mm256_mask_cvtt_roundpd_epu64 (__m256i __W
, __mmask8 __U
, __m256d __A
,
1227 return (__m256i
) __builtin_ia32_cvttpd2uqq256_mask_round ((__v4df
) __A
,
1233 extern __inline __m256i
1234 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1235 _mm256_maskz_cvtt_roundpd_epu64 (__mmask8 __U
, __m256d __A
, const int __R
)
1238 (__m256i
) __builtin_ia32_cvttpd2uqq256_mask_round ((__v4df
) __A
,
1240 _mm256_setzero_si256 (),
1245 extern __inline __m256i
1246 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1247 _mm256_cvtt_roundph_epi32 (__m128h __A
, const int __R
)
1250 (__m256i
) __builtin_ia32_vcvttph2dq256_mask_round ((__v8hf
) __A
,
1252 _mm256_setzero_si256 (),
1257 extern __inline __m256i
1258 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1259 _mm256_mask_cvtt_roundph_epi32 (__m256i __W
, __mmask8 __U
, __m128h __A
,
1262 return (__m256i
) __builtin_ia32_vcvttph2dq256_mask_round ((__v8hf
) __A
,
1268 extern __inline __m256i
1269 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1270 _mm256_maskz_cvtt_roundph_epi32 (__mmask8 __U
, __m128h __A
, const int __R
)
1273 (__m256i
) __builtin_ia32_vcvttph2dq256_mask_round ((__v8hf
) __A
,
1275 _mm256_setzero_si256 (),
1280 extern __inline __m256i
1281 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1282 _mm256_cvtt_roundph_epi64 (__m128h __A
, const int __R
)
1285 (__m256i
) __builtin_ia32_vcvttph2qq256_mask_round ((__v8hf
) __A
,
1287 _mm256_setzero_si256 (),
1292 extern __inline __m256i
1293 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1294 _mm256_mask_cvtt_roundph_epi64 (__m256i __W
, __mmask8 __U
, __m128h __A
,
1297 return (__m256i
) __builtin_ia32_vcvttph2qq256_mask_round ((__v8hf
) __A
,
1303 extern __inline __m256i
1304 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1305 _mm256_maskz_cvtt_roundph_epi64 (__mmask8 __U
, __m128h __A
, const int __R
)
1308 (__m256i
) __builtin_ia32_vcvttph2qq256_mask_round ((__v8hf
) __A
,
1310 _mm256_setzero_si256 (),
1315 extern __inline __m256i
1316 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1317 _mm256_cvtt_roundph_epu32 (__m128h __A
, const int __R
)
1320 (__m256i
) __builtin_ia32_vcvttph2udq256_mask_round ((__v8hf
) __A
,
1322 _mm256_setzero_si256 (),
1327 extern __inline __m256i
1328 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1329 _mm256_mask_cvtt_roundph_epu32 (__m256i __W
, __mmask8 __U
, __m128h __A
,
1332 return (__m256i
) __builtin_ia32_vcvttph2udq256_mask_round ((__v8hf
) __A
,
1338 extern __inline __m256i
1339 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1340 _mm256_maskz_cvtt_roundph_epu32 (__mmask8 __U
, __m128h __A
, const int __R
)
1343 (__m256i
) __builtin_ia32_vcvttph2udq256_mask_round ((__v8hf
) __A
,
1345 _mm256_setzero_si256 (),
1350 extern __inline __m256i
1351 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1352 _mm256_cvtt_roundph_epu64 (__m128h __A
, const int __R
)
1355 (__m256i
) __builtin_ia32_vcvttph2uqq256_mask_round ((__v8hf
) __A
,
1357 _mm256_setzero_si256 (),
1362 extern __inline __m256i
1363 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1364 _mm256_mask_cvtt_roundph_epu64 (__m256i __W
, __mmask8 __U
, __m128h __A
,
1367 return (__m256i
) __builtin_ia32_vcvttph2uqq256_mask_round ((__v8hf
) __A
,
1373 extern __inline __m256i
1374 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1375 _mm256_maskz_cvtt_roundph_epu64 (__mmask8 __U
, __m128h __A
, const int __R
)
1378 (__m256i
) __builtin_ia32_vcvttph2uqq256_mask_round ((__v8hf
) __A
,
1380 _mm256_setzero_si256 (),
1385 extern __inline __m256i
1386 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1387 _mm256_cvtt_roundph_epu16 (__m256h __A
, const int __R
)
1390 (__m256i
) __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf
) __A
,
1392 _mm256_setzero_si256 (),
1397 extern __inline __m256i
1398 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1399 _mm256_mask_cvtt_roundph_epu16 (__m256i __W
, __mmask16 __U
, __m256h __A
,
1402 return (__m256i
) __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf
) __A
,
1408 extern __inline __m256i
1409 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1410 _mm256_maskz_cvtt_roundph_epu16 (__mmask16 __U
, __m256h __A
, const int __R
)
1413 (__m256i
) __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf
) __A
,
1415 _mm256_setzero_si256 (),
1420 extern __inline __m256i
1421 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1422 _mm256_cvtt_roundph_epi16 (__m256h __A
, const int __R
)
1425 (__m256i
) __builtin_ia32_vcvttph2w256_mask_round ((__v16hf
) __A
,
1427 _mm256_setzero_si256 (),
1432 extern __inline __m256i
1433 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1434 _mm256_mask_cvtt_roundph_epi16 (__m256i __W
, __mmask16 __U
, __m256h __A
,
1437 return (__m256i
) __builtin_ia32_vcvttph2w256_mask_round ((__v16hf
) __A
,
1443 extern __inline __m256i
1444 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1445 _mm256_maskz_cvtt_roundph_epi16 (__mmask16 __U
, __m256h __A
, const int __R
)
1448 (__m256i
) __builtin_ia32_vcvttph2w256_mask_round ((__v16hf
) __A
,
1450 _mm256_setzero_si256 (),
1455 extern __inline __m256i
1456 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1457 _mm256_cvtt_roundps_epi32 (__m256 __A
, const int __R
)
1460 (__m256i
) __builtin_ia32_cvttps2dq256_mask_round ((__v8sf
) __A
,
1462 _mm256_undefined_si256 (),
1467 extern __inline __m256i
1468 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1469 _mm256_mask_cvtt_roundps_epi32 (__m256i __W
, __mmask8 __U
, __m256 __A
,
1472 return (__m256i
) __builtin_ia32_cvttps2dq256_mask_round ((__v8sf
) __A
,
1478 extern __inline __m256i
1479 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1480 _mm256_maskz_cvtt_roundps_epi32 (__mmask8 __U
, __m256 __A
, const int __R
)
1483 (__m256i
) __builtin_ia32_cvttps2dq256_mask_round ((__v8sf
) __A
,
1485 _mm256_setzero_si256 (),
1490 extern __inline __m256i
1491 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1492 _mm256_cvtt_roundps_epi64 (__m128 __A
, const int __R
)
1495 (__m256i
) __builtin_ia32_cvttps2qq256_mask_round ((__v4sf
) __A
,
1497 _mm256_setzero_si256 (),
1502 extern __inline __m256i
1503 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1504 _mm256_mask_cvtt_roundps_epi64 (__m256i __W
, __mmask8 __U
, __m128 __A
,
1507 return (__m256i
) __builtin_ia32_cvttps2qq256_mask_round ((__v4sf
) __A
,
1513 extern __inline __m256i
1514 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1515 _mm256_maskz_cvtt_roundps_epi64 (__mmask8 __U
, __m128 __A
, const int __R
)
1518 (__m256i
) __builtin_ia32_cvttps2qq256_mask_round ((__v4sf
) __A
,
1520 _mm256_setzero_si256 (),
1525 extern __inline __m256i
1526 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1527 _mm256_cvtt_roundps_epu32 (__m256 __A
, const int __R
)
1530 (__m256i
) __builtin_ia32_cvttps2udq256_mask_round ((__v8sf
) __A
,
1532 _mm256_undefined_si256 (),
1537 extern __inline __m256i
1538 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1539 _mm256_mask_cvtt_roundps_epu32 (__m256i __W
, __mmask8 __U
, __m256 __A
,
1542 return (__m256i
) __builtin_ia32_cvttps2udq256_mask_round ((__v8sf
) __A
,
1548 extern __inline __m256i
1549 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1550 _mm256_maskz_cvtt_roundps_epu32 (__mmask8 __U
, __m256 __A
, const int __R
)
1553 (__m256i
) __builtin_ia32_cvttps2udq256_mask_round ((__v8sf
) __A
,
1555 _mm256_setzero_si256 (),
1560 extern __inline __m256i
1561 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1562 _mm256_cvtt_roundps_epu64 (__m128 __A
, const int __R
)
1565 (__m256i
) __builtin_ia32_cvttps2uqq256_mask_round ((__v4sf
) __A
,
1567 _mm256_setzero_si256 (),
1572 extern __inline __m256i
1573 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1574 _mm256_mask_cvtt_roundps_epu64 (__m256i __W
, __mmask8 __U
, __m128 __A
,
1577 return (__m256i
) __builtin_ia32_cvttps2uqq256_mask_round ((__v4sf
) __A
,
1583 extern __inline __m256i
1584 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1585 _mm256_maskz_cvtt_roundps_epu64 (__mmask8 __U
, __m128 __A
, const int __R
)
1588 (__m256i
) __builtin_ia32_cvttps2uqq256_mask_round ((__v4sf
) __A
,
1590 _mm256_setzero_si256 (),
1595 extern __inline __m128h
1596 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1597 _mm256_cvt_roundepu32_ph (__m256i __A
, const int __R
)
1599 return (__m128h
) __builtin_ia32_vcvtudq2ph256_mask_round ((__v8si
) __A
,
1606 extern __inline __m128h
1607 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1608 _mm256_mask_cvt_roundepu32_ph (__m128h __W
, __mmask8 __U
, __m256i __A
,
1611 return (__m128h
) __builtin_ia32_vcvtudq2ph256_mask_round ((__v8si
) __A
,
1617 extern __inline __m128h
1618 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1619 _mm256_maskz_cvt_roundepu32_ph (__mmask8 __U
, __m256i __A
, const int __R
)
1621 return (__m128h
) __builtin_ia32_vcvtudq2ph256_mask_round ((__v8si
) __A
,
1628 extern __inline __m256
1629 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1630 _mm256_cvt_roundepu32_ps (__m256i __A
, const int __R
)
1633 (__m256
) __builtin_ia32_cvtudq2ps256_mask_round ((__v8si
) __A
,
1635 _mm256_undefined_ps (),
1640 extern __inline __m256
1641 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1642 _mm256_mask_cvt_roundepu32_ps (__m256 __W
, __mmask8 __U
, __m256i __A
,
1645 return (__m256
) __builtin_ia32_cvtudq2ps256_mask_round ((__v8si
) __A
,
1651 extern __inline __m256
1652 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1653 _mm256_maskz_cvt_roundepu32_ps (__mmask8 __U
, __m256i __A
, const int __R
)
1655 return (__m256
) __builtin_ia32_cvtudq2ps256_mask_round ((__v8si
) __A
,
1657 _mm256_setzero_ps (),
1662 extern __inline __m256d
1663 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1664 _mm256_cvt_roundepu64_pd (__m256i __A
, const int __R
)
1666 return (__m256d
) __builtin_ia32_cvtuqq2pd256_mask_round ((__v4di
) __A
,
1668 _mm256_setzero_pd (),
1673 extern __inline __m256d
1674 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1675 _mm256_mask_cvt_roundepu64_pd (__m256d __W
, __mmask8 __U
, __m256i __A
,
1678 return (__m256d
) __builtin_ia32_cvtuqq2pd256_mask_round ((__v4di
) __A
,
1684 extern __inline __m256d
1685 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1686 _mm256_maskz_cvt_roundepu64_pd (__mmask8 __U
, __m256i __A
, const int __R
)
1688 return (__m256d
) __builtin_ia32_cvtuqq2pd256_mask_round ((__v4di
) __A
,
1690 _mm256_setzero_pd (),
1695 extern __inline __m128h
1696 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1697 _mm256_cvt_roundepu64_ph (__m256i __A
, const int __R
)
1699 return (__m128h
) __builtin_ia32_vcvtuqq2ph256_mask_round ((__v4di
) __A
,
1706 extern __inline __m128h
1707 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1708 _mm256_mask_cvt_roundepu64_ph (__m128h __W
, __mmask8 __U
, __m256i __A
,
1711 return (__m128h
) __builtin_ia32_vcvtuqq2ph256_mask_round ((__v4di
) __A
,
1717 extern __inline __m128h
1718 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1719 _mm256_maskz_cvt_roundepu64_ph (__mmask8 __U
, __m256i __A
, const int __R
)
1721 return (__m128h
) __builtin_ia32_vcvtuqq2ph256_mask_round ((__v4di
) __A
,
1728 extern __inline __m128
1729 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1730 _mm256_cvt_roundepu64_ps (__m256i __A
, const int __R
)
1732 return (__m128
) __builtin_ia32_cvtuqq2ps256_mask_round ((__v4di
) __A
,
1739 extern __inline __m128
1740 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1741 _mm256_mask_cvt_roundepu64_ps (__m128 __W
, __mmask8 __U
, __m256i __A
,
1744 return (__m128
) __builtin_ia32_cvtuqq2ps256_mask_round ((__v4di
) __A
,
1750 extern __inline __m128
1751 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1752 _mm256_maskz_cvt_roundepu64_ps (__mmask8 __U
, __m256i __A
, const int __R
)
1754 return (__m128
) __builtin_ia32_cvtuqq2ps256_mask_round ((__v4di
) __A
,
1761 extern __inline __m256h
1762 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1763 _mm256_cvt_roundepu16_ph (__m256i __A
, const int __R
)
1765 return (__m256h
) __builtin_ia32_vcvtuw2ph256_mask_round ((__v16hi
) __A
,
1767 _mm256_setzero_ph (),
1772 extern __inline __m256h
1773 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1774 _mm256_mask_cvt_roundepu16_ph (__m256h __W
, __mmask16 __U
, __m256i __A
,
1777 return (__m256h
) __builtin_ia32_vcvtuw2ph256_mask_round ((__v16hi
) __A
,
1783 extern __inline __m256h
1784 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1785 _mm256_maskz_cvt_roundepu16_ph (__mmask16 __U
, __m256i __A
, const int __R
)
1787 return (__m256h
) __builtin_ia32_vcvtuw2ph256_mask_round ((__v16hi
) __A
,
1789 _mm256_setzero_ph (),
1794 extern __inline __m256h
1795 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1796 _mm256_cvt_roundepi16_ph (__m256i __A
, const int __R
)
1798 return (__m256h
) __builtin_ia32_vcvtw2ph256_mask_round ((__v16hi
) __A
,
1800 _mm256_setzero_ph (),
1805 extern __inline __m256h
1806 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1807 _mm256_mask_cvt_roundepi16_ph (__m256h __W
, __mmask16 __U
, __m256i __A
,
1810 return (__m256h
) __builtin_ia32_vcvtw2ph256_mask_round ((__v16hi
) __A
,
1816 extern __inline __m256h
1817 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1818 _mm256_maskz_cvt_roundepi16_ph (__mmask16 __U
, __m256i __A
, const int __R
)
1820 return (__m256h
) __builtin_ia32_vcvtw2ph256_mask_round ((__v16hi
) __A
,
1822 _mm256_setzero_ph (),
1827 extern __inline __m256d
1828 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1829 _mm256_div_round_pd (__m256d __A
, __m256d __B
, const int __R
)
1831 return (__m256d
) __builtin_ia32_divpd256_mask_round ((__v4df
) __A
,
1834 _mm256_undefined_pd (),
1839 extern __inline __m256d
1840 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1841 _mm256_mask_div_round_pd (__m256d __W
, __mmask8 __U
, __m256d __A
,
1842 __m256d __B
, const int __R
)
1844 return (__m256d
) __builtin_ia32_divpd256_mask_round ((__v4df
) __A
,
1851 extern __inline __m256d
1852 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1853 _mm256_maskz_div_round_pd (__mmask8 __U
, __m256d __A
, __m256d __B
,
1856 return (__m256d
) __builtin_ia32_divpd256_mask_round ((__v4df
) __A
,
1859 _mm256_setzero_pd (),
1864 extern __inline __m256h
1865 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1866 _mm256_div_round_ph (__m256h __A
, __m256h __B
, const int __R
)
1868 return (__m256h
) __builtin_ia32_divph256_mask_round ((__v16hf
) __A
,
1871 _mm256_setzero_ph (),
1876 extern __inline __m256h
1877 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1878 _mm256_mask_div_round_ph (__m256h __W
, __mmask16 __U
, __m256h __A
,
1879 __m256h __B
, const int __R
)
1881 return (__m256h
) __builtin_ia32_divph256_mask_round ((__v16hf
) __A
,
1888 extern __inline __m256h
1889 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1890 _mm256_maskz_div_round_ph (__mmask16 __U
, __m256h __A
, __m256h __B
,
1893 return (__m256h
) __builtin_ia32_divph256_mask_round ((__v16hf
) __A
,
1896 _mm256_setzero_ph (),
1901 extern __inline __m256
1902 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1903 _mm256_div_round_ps (__m256 __A
, __m256 __B
, const int __R
)
1905 return (__m256
) __builtin_ia32_divps256_mask_round ((__v8sf
) __A
,
1908 _mm256_undefined_ps (),
1913 extern __inline __m256
1914 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1915 _mm256_mask_div_round_ps (__m256 __W
, __mmask8 __U
, __m256 __A
, __m256 __B
,
1918 return (__m256
) __builtin_ia32_divps256_mask_round ((__v8sf
) __A
,
1925 extern __inline __m256
1926 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1927 _mm256_maskz_div_round_ps (__mmask8 __U
, __m256 __A
, __m256 __B
,
1930 return (__m256
) __builtin_ia32_divps256_mask_round ((__v8sf
) __A
,
1933 _mm256_setzero_ps (),
1937 extern __inline __m256h
1938 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1939 _mm256_fcmadd_round_pch (__m256h __A
, __m256h __B
, __m256h __D
, const int __R
)
1941 return (__m256h
) __builtin_ia32_vfcmaddcph256_round ((__v16hf
) __A
,
1947 extern __inline __m256h
1948 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1949 _mm256_mask_fcmadd_round_pch (__m256h __A
, __mmask8 __U
, __m256h __B
,
1950 __m256h __D
, const int __R
)
1952 return (__m256h
) __builtin_ia32_vfcmaddcph256_mask_round ((__v16hf
) __A
,
1959 extern __inline __m256h
1960 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1961 _mm256_mask3_fcmadd_round_pch (__m256h __A
, __m256h __B
, __m256h __D
,
1962 __mmask8 __U
, const int __R
)
1964 return (__m256h
) __builtin_ia32_vfcmaddcph256_mask3_round ((__v16hf
) __A
,
1971 extern __inline __m256h
1972 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1973 _mm256_maskz_fcmadd_round_pch (__mmask8 __U
, __m256h __A
, __m256h __B
,
1974 __m256h __D
, const int __R
)
1976 return (__m256h
) __builtin_ia32_vfcmaddcph256_maskz_round ((__v16hf
) __A
,
1983 extern __inline __m256h
1984 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1985 _mm256_fcmul_round_pch (__m256h __A
, __m256h __B
, const int __R
)
1988 (__m256h
) __builtin_ia32_vfcmulcph256_round ((__v16hf
) __A
,
1993 extern __inline __m256h
1994 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
1995 _mm256_mask_fcmul_round_pch (__m256h __W
, __mmask8 __U
, __m256h __A
,
1996 __m256h __B
, const int __R
)
1998 return (__m256h
) __builtin_ia32_vfcmulcph256_mask_round ((__v16hf
) __A
,
2005 extern __inline __m256h
2006 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2007 _mm256_maskz_fcmul_round_pch (__mmask8 __U
, __m256h __A
, __m256h __B
,
2010 return (__m256h
) __builtin_ia32_vfcmulcph256_mask_round ((__v16hf
) __A
,
2013 _mm256_setzero_ph (),
2018 extern __inline __m256d
2019 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2020 _mm256_fixupimm_round_pd (__m256d __A
, __m256d __B
, __m256i __D
,
2021 const int __C
, const int __R
)
2023 return (__m256d
) __builtin_ia32_fixupimmpd256_mask_round ((__v4df
) __A
,
2031 extern __inline __m256d
2032 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2033 _mm256_mask_fixupimm_round_pd (__m256d __A
, __mmask8 __U
, __m256d __B
,
2034 __m256i __D
, const int __C
, const int __R
)
2036 return (__m256d
) __builtin_ia32_fixupimmpd256_mask_round ((__v4df
) __A
,
2044 extern __inline __m256d
2045 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2046 _mm256_maskz_fixupimm_round_pd (__mmask8 __U
, __m256d __A
, __m256d __B
,
2047 __m256i __D
, const int __C
, const int __R
)
2049 return (__m256d
) __builtin_ia32_fixupimmpd256_maskz_round ((__v4df
) __A
,
2057 extern __inline __m256
2058 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2059 _mm256_fixupimm_round_ps (__m256 __A
, __m256 __B
, __m256i __D
, const int __C
,
2062 return (__m256
) __builtin_ia32_fixupimmps256_mask_round ((__v8sf
) __A
,
2070 extern __inline __m256
2071 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2072 _mm256_mask_fixupimm_round_ps (__m256 __A
, __mmask8 __U
, __m256 __B
,
2073 __m256i __D
, const int __C
, const int __R
)
2075 return (__m256
) __builtin_ia32_fixupimmps256_mask_round ((__v8sf
) __A
,
2083 extern __inline __m256
2084 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2085 _mm256_maskz_fixupimm_round_ps (__mmask8 __U
, __m256 __A
, __m256 __B
,
2086 __m256i __D
, const int __C
, const int __R
)
2088 return (__m256
) __builtin_ia32_fixupimmps256_maskz_round ((__v8sf
) __A
,
2096 extern __inline __m256d
2097 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2098 _mm256_fmadd_round_pd (__m256d __A
, __m256d __B
, __m256d __D
, const int __R
)
2100 return (__m256d
) __builtin_ia32_vfmaddpd256_mask_round ((__v4df
) __A
,
2107 extern __inline __m256d
2108 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2109 _mm256_mask_fmadd_round_pd (__m256d __A
, __mmask8 __U
, __m256d __B
,
2110 __m256d __D
, const int __R
)
2112 return (__m256d
) __builtin_ia32_vfmaddpd256_mask_round ((__v4df
) __A
,
2115 (__mmask8
) __U
, __R
);
2118 extern __inline __m256d
2119 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2120 _mm256_mask3_fmadd_round_pd (__m256d __A
, __m256d __B
, __m256d __D
,
2121 __mmask8 __U
, const int __R
)
2123 return (__m256d
) __builtin_ia32_vfmaddpd256_mask3_round ((__v4df
) __A
,
2130 extern __inline __m256d
2131 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2132 _mm256_maskz_fmadd_round_pd (__mmask8 __U
, __m256d __A
, __m256d __B
,
2133 __m256d __D
, const int __R
)
2135 return (__m256d
) __builtin_ia32_vfmaddpd256_maskz_round ((__v4df
) __A
,
2142 extern __inline __m256h
2143 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2144 _mm256_fmadd_round_ph (__m256h __A
, __m256h __B
, __m256h __D
, const int __R
)
2146 return (__m256h
) __builtin_ia32_vfmaddph256_mask_round ((__v16hf
) __A
,
2153 extern __inline __m256h
2154 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2155 _mm256_mask_fmadd_round_ph (__m256h __A
, __mmask16 __U
, __m256h __B
,
2156 __m256h __D
, const int __R
)
2158 return (__m256h
) __builtin_ia32_vfmaddph256_mask_round ((__v16hf
) __A
,
2165 extern __inline __m256h
2166 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2167 _mm256_mask3_fmadd_round_ph (__m256h __A
, __m256h __B
, __m256h __D
,
2168 __mmask16 __U
, const int __R
)
2170 return (__m256h
) __builtin_ia32_vfmaddph256_mask3_round ((__v16hf
) __A
,
2177 extern __inline __m256h
2178 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2179 _mm256_maskz_fmadd_round_ph (__mmask16 __U
, __m256h __A
, __m256h __B
,
2180 __m256h __D
, const int __R
)
2182 return (__m256h
) __builtin_ia32_vfmaddph256_maskz_round ((__v16hf
) __A
,
2189 extern __inline __m256
2190 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2191 _mm256_fmadd_round_ps (__m256 __A
, __m256 __B
, __m256 __D
, const int __R
)
2193 return (__m256
) __builtin_ia32_vfmaddps256_mask_round ((__v8sf
) __A
,
2200 extern __inline __m256
2201 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2202 _mm256_mask_fmadd_round_ps (__m256 __A
, __mmask8 __U
, __m256 __B
,
2203 __m256 __D
, const int __R
)
2205 return (__m256
) __builtin_ia32_vfmaddps256_mask_round ((__v8sf
) __A
,
2212 extern __inline __m256
2213 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2214 _mm256_mask3_fmadd_round_ps (__m256 __A
, __m256 __B
, __m256 __D
,
2215 __mmask8 __U
, const int __R
)
2217 return (__m256
) __builtin_ia32_vfmaddps256_mask3_round ((__v8sf
) __A
,
2224 extern __inline __m256
2225 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2226 _mm256_maskz_fmadd_round_ps (__mmask8 __U
, __m256 __A
, __m256 __B
,
2227 __m256 __D
, const int __R
)
2229 return (__m256
) __builtin_ia32_vfmaddps256_maskz_round ((__v8sf
) __A
,
2236 extern __inline __m256h
2237 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2238 _mm256_fmadd_round_pch (__m256h __A
, __m256h __B
, __m256h __D
, const int __R
)
2240 return (__m256h
) __builtin_ia32_vfmaddcph256_round ((__v16hf
) __A
,
2246 extern __inline __m256h
2247 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2248 _mm256_mask_fmadd_round_pch (__m256h __A
, __mmask16 __U
, __m256h __B
,
2249 __m256h __D
, const int __R
)
2251 return (__m256h
) __builtin_ia32_vfmaddcph256_mask_round ((__v16hf
) __A
,
2258 extern __inline __m256h
2259 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2260 _mm256_mask3_fmadd_round_pch (__m256h __A
, __m256h __B
, __m256h __D
,
2261 __mmask16 __U
, const int __R
)
2263 return (__m256h
) __builtin_ia32_vfmaddcph256_mask3_round ((__v16hf
) __A
,
2270 extern __inline __m256h
2271 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2272 _mm256_maskz_fmadd_round_pch (__mmask16 __U
, __m256h __A
, __m256h __B
,
2273 __m256h __D
, const int __R
)
2275 return (__m256h
) __builtin_ia32_vfmaddcph256_maskz_round ((__v16hf
) __A
,
2282 extern __inline __m256d
2283 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2284 _mm256_fmaddsub_round_pd (__m256d __A
, __m256d __B
, __m256d __D
, const int __R
)
2286 return (__m256d
) __builtin_ia32_vfmaddsubpd256_mask_round ((__v4df
) __A
,
2293 extern __inline __m256d
2294 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2295 _mm256_mask_fmaddsub_round_pd (__m256d __A
, __mmask8 __U
, __m256d __B
,
2296 __m256d __D
, const int __R
)
2298 return (__m256d
) __builtin_ia32_vfmaddsubpd256_mask_round ((__v4df
) __A
,
2305 extern __inline __m256d
2306 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2307 _mm256_mask3_fmaddsub_round_pd (__m256d __A
, __m256d __B
, __m256d __D
,
2308 __mmask8 __U
, const int __R
)
2310 return (__m256d
) __builtin_ia32_vfmaddsubpd256_mask3_round ((__v4df
) __A
,
2317 extern __inline __m256d
2318 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2319 _mm256_maskz_fmaddsub_round_pd (__mmask8 __U
, __m256d __A
, __m256d __B
,
2320 __m256d __D
, const int __R
)
2322 return (__m256d
) __builtin_ia32_vfmaddsubpd256_maskz_round ((__v4df
) __A
,
2329 extern __inline __m256h
2330 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2331 _mm256_fmaddsub_round_ph (__m256h __A
, __m256h __B
, __m256h __D
, const int __R
)
2333 return (__m256h
) __builtin_ia32_vfmaddsubph256_mask_round ((__v16hf
) __A
,
2340 extern __inline __m256h
2341 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2342 _mm256_mask_fmaddsub_round_ph (__m256h __A
, __mmask16 __U
, __m256h __B
,
2343 __m256h __D
, const int __R
)
2345 return (__m256h
) __builtin_ia32_vfmaddsubph256_mask_round ((__v16hf
) __A
,
2352 extern __inline __m256h
2353 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2354 _mm256_mask3_fmaddsub_round_ph (__m256h __A
, __m256h __B
, __m256h __D
,
2355 __mmask16 __U
, const int __R
)
2357 return (__m256h
) __builtin_ia32_vfmaddsubph256_mask3_round ((__v16hf
) __A
,
2364 extern __inline __m256h
2365 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2366 _mm256_maskz_fmaddsub_round_ph (__mmask16 __U
, __m256h __A
, __m256h __B
,
2367 __m256h __D
, const int __R
)
2369 return (__m256h
) __builtin_ia32_vfmaddsubph256_maskz_round ((__v16hf
) __A
,
2376 extern __inline __m256
2377 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2378 _mm256_fmaddsub_round_ps (__m256 __A
, __m256 __B
, __m256 __D
, const int __R
)
2380 return (__m256
) __builtin_ia32_vfmaddsubps256_mask_round ((__v8sf
) __A
,
2387 extern __inline __m256
2388 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2389 _mm256_mask_fmaddsub_round_ps (__m256 __A
, __mmask8 __U
, __m256 __B
,
2390 __m256 __D
, const int __R
)
2392 return (__m256
) __builtin_ia32_vfmaddsubps256_mask_round ((__v8sf
) __A
,
2399 extern __inline __m256
2400 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2401 _mm256_mask3_fmaddsub_round_ps (__m256 __A
, __m256 __B
, __m256 __D
,
2402 __mmask8 __U
, const int __R
)
2404 return (__m256
) __builtin_ia32_vfmaddsubps256_mask3_round ((__v8sf
) __A
,
2411 extern __inline __m256
2412 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2413 _mm256_maskz_fmaddsub_round_ps (__mmask8 __U
, __m256 __A
, __m256 __B
,
2414 __m256 __D
, const int __R
)
2416 return (__m256
) __builtin_ia32_vfmaddsubps256_maskz_round ((__v8sf
) __A
,
2423 extern __inline __m256d
2424 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2425 _mm256_fmsub_round_pd (__m256d __A
, __m256d __B
, __m256d __D
, const int __R
)
2427 return (__m256d
) __builtin_ia32_vfmsubpd256_mask_round ((__v4df
) __A
,
2430 (__mmask8
) -1, __R
);
2433 extern __inline __m256d
2434 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2435 _mm256_mask_fmsub_round_pd (__m256d __A
, __mmask8 __U
, __m256d __B
,
2436 __m256d __D
, const int __R
)
2438 return (__m256d
) __builtin_ia32_vfmsubpd256_mask_round ((__v4df
) __A
,
2441 (__mmask8
) __U
, __R
);
2444 extern __inline __m256d
2445 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2446 _mm256_mask3_fmsub_round_pd (__m256d __A
, __m256d __B
, __m256d __D
,
2447 __mmask8 __U
, const int __R
)
2449 return (__m256d
) __builtin_ia32_vfmsubpd256_mask3_round ((__v4df
) __A
,
2452 (__mmask8
) __U
, __R
);
2455 extern __inline __m256d
2456 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2457 _mm256_maskz_fmsub_round_pd (__mmask8 __U
, __m256d __A
, __m256d __B
,
2458 __m256d __D
, const int __R
)
2460 return (__m256d
) __builtin_ia32_vfmsubpd256_maskz_round ((__v4df
) __A
,
2463 (__mmask8
) __U
, __R
);
2466 extern __inline __m256h
2467 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2468 _mm256_fmsub_round_ph (__m256h __A
, __m256h __B
, __m256h __D
, const int __R
)
2471 __builtin_ia32_vfmsubph256_mask_round ((__v16hf
) __A
,
2474 (__mmask16
) -1, __R
);
2477 extern __inline __m256h
2478 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2479 _mm256_mask_fmsub_round_ph (__m256h __A
, __mmask16 __U
, __m256h __B
,
2480 __m256h __D
, const int __R
)
2483 __builtin_ia32_vfmsubph256_mask_round ((__v16hf
) __A
,
2486 (__mmask16
) __U
, __R
);
2489 extern __inline __m256h
2490 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2491 _mm256_mask3_fmsub_round_ph (__m256h __A
, __m256h __B
, __m256h __D
,
2492 __mmask16 __U
, const int __R
)
2495 __builtin_ia32_vfmsubph256_mask3_round ((__v16hf
) __A
,
2498 (__mmask16
) __U
, __R
);
2501 extern __inline __m256h
2502 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2503 _mm256_maskz_fmsub_round_ph (__mmask16 __U
, __m256h __A
, __m256h __B
,
2504 __m256h __D
, const int __R
)
2507 __builtin_ia32_vfmsubph256_maskz_round ((__v16hf
) __A
,
2510 (__mmask16
) __U
, __R
);
2513 extern __inline __m256
2514 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2515 _mm256_fmsub_round_ps (__m256 __A
, __m256 __B
, __m256 __D
, const int __R
)
2517 return (__m256
) __builtin_ia32_vfmsubps256_mask_round ((__v8sf
) __A
,
2520 (__mmask8
) -1, __R
);
2523 extern __inline __m256
2524 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2525 _mm256_mask_fmsub_round_ps (__m256 __A
, __mmask8 __U
, __m256 __B
,
2526 __m256 __D
, const int __R
)
2528 return (__m256
) __builtin_ia32_vfmsubps256_mask_round ((__v8sf
) __A
,
2531 (__mmask8
) __U
, __R
);
2534 extern __inline __m256
2535 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2536 _mm256_mask3_fmsub_round_ps (__m256 __A
, __m256 __B
, __m256 __D
,
2537 __mmask8 __U
, const int __R
)
2539 return (__m256
) __builtin_ia32_vfmsubps256_mask3_round ((__v8sf
) __A
,
2542 (__mmask8
) __U
, __R
);
2545 extern __inline __m256
2546 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2547 _mm256_maskz_fmsub_round_ps (__mmask8 __U
, __m256 __A
, __m256 __B
,
2548 __m256 __D
, const int __R
)
2550 return (__m256
) __builtin_ia32_vfmsubps256_maskz_round ((__v8sf
) __A
,
2553 (__mmask8
) __U
, __R
);
2556 extern __inline __m256d
2557 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2558 _mm256_fmsubadd_round_pd (__m256d __A
, __m256d __B
, __m256d __D
, const int __R
)
2560 return (__m256d
) __builtin_ia32_vfmsubaddpd256_mask_round ((__v4df
) __A
,
2567 extern __inline __m256d
2568 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2569 _mm256_mask_fmsubadd_round_pd (__m256d __A
, __mmask8 __U
, __m256d __B
,
2570 __m256d __D
, const int __R
)
2572 return (__m256d
) __builtin_ia32_vfmsubaddpd256_mask_round ((__v4df
) __A
,
2579 extern __inline __m256d
2580 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2581 _mm256_mask3_fmsubadd_round_pd (__m256d __A
, __m256d __B
, __m256d __D
,
2582 __mmask8 __U
, const int __R
)
2584 return (__m256d
) __builtin_ia32_vfmsubaddpd256_mask3_round ((__v4df
) __A
,
2591 extern __inline __m256d
2592 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2593 _mm256_maskz_fmsubadd_round_pd (__mmask8 __U
, __m256d __A
, __m256d __B
,
2594 __m256d __D
, const int __R
)
2596 return (__m256d
) __builtin_ia32_vfmsubaddpd256_maskz_round ((__v4df
) __A
,
2603 extern __inline __m256h
2604 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2605 _mm256_fmsubadd_round_ph (__m256h __A
, __m256h __B
, __m256h __D
, const int __R
)
2608 __builtin_ia32_vfmsubaddph256_mask_round ((__v16hf
) __A
,
2615 extern __inline __m256h
2616 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2617 _mm256_mask_fmsubadd_round_ph (__m256h __A
, __mmask16 __U
, __m256h __B
,
2618 __m256h __D
, const int __R
)
2621 __builtin_ia32_vfmsubaddph256_mask_round ((__v16hf
) __A
,
2628 extern __inline __m256h
2629 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2630 _mm256_mask3_fmsubadd_round_ph (__m256h __A
, __m256h __B
, __m256h __D
,
2631 __mmask16 __U
, const int __R
)
2634 __builtin_ia32_vfmsubaddph256_mask3_round ((__v16hf
) __A
,
2641 extern __inline __m256h
2642 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2643 _mm256_maskz_fmsubadd_round_ph (__mmask16 __U
, __m256h __A
, __m256h __B
,
2644 __m256h __D
, const int __R
)
2647 __builtin_ia32_vfmsubaddph256_maskz_round ((__v16hf
) __A
,
2654 extern __inline __m256
2655 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2656 _mm256_fmsubadd_round_ps (__m256 __A
, __m256 __B
, __m256 __D
, const int __R
)
2658 return (__m256
) __builtin_ia32_vfmsubaddps256_mask_round ((__v8sf
) __A
,
2665 extern __inline __m256
2666 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2667 _mm256_mask_fmsubadd_round_ps (__m256 __A
, __mmask8 __U
, __m256 __B
,
2668 __m256 __D
, const int __R
)
2670 return (__m256
) __builtin_ia32_vfmsubaddps256_mask_round ((__v8sf
) __A
,
2677 extern __inline __m256
2678 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2679 _mm256_mask3_fmsubadd_round_ps (__m256 __A
, __m256 __B
, __m256 __D
,
2680 __mmask8 __U
, const int __R
)
2682 return (__m256
) __builtin_ia32_vfmsubaddps256_mask3_round ((__v8sf
) __A
,
2689 extern __inline __m256
2690 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2691 _mm256_maskz_fmsubadd_round_ps (__mmask8 __U
, __m256 __A
, __m256 __B
,
2692 __m256 __D
, const int __R
)
2694 return (__m256
) __builtin_ia32_vfmsubaddps256_maskz_round ((__v8sf
) __A
,
2701 extern __inline __m256h
2702 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2703 _mm256_fmul_round_pch (__m256h __B
, __m256h __D
, const int __R
)
2705 return (__m256h
) __builtin_ia32_vfmulcph256_round ((__v16hf
) __B
,
2710 extern __inline __m256h
2711 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2712 _mm256_mask_fmul_round_pch (__m256h __A
, __mmask8 __U
, __m256h __B
,
2713 __m256h __D
, const int __R
)
2715 return (__m256h
) __builtin_ia32_vfmulcph256_mask_round ((__v16hf
) __B
,
2722 extern __inline __m256h
2723 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2724 _mm256_maskz_fmul_round_pch (__mmask8 __U
, __m256h __B
, __m256h __D
,
2727 return (__m256h
) __builtin_ia32_vfmulcph256_mask_round ((__v16hf
) __B
,
2730 _mm256_setzero_ph (),
2735 extern __inline __m256d
2736 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2737 _mm256_fnmadd_round_pd (__m256d __A
, __m256d __B
, __m256d __D
, const int __R
)
2739 return (__m256d
) __builtin_ia32_vfnmaddpd256_mask_round ((__v4df
) __A
,
2746 extern __inline __m256d
2747 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2748 _mm256_mask_fnmadd_round_pd (__m256d __A
, __mmask8 __U
, __m256d __B
,
2749 __m256d __D
, const int __R
)
2751 return (__m256d
) __builtin_ia32_vfnmaddpd256_mask_round ((__v4df
) __A
,
2758 extern __inline __m256d
2759 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2760 _mm256_mask3_fnmadd_round_pd (__m256d __A
, __m256d __B
, __m256d __D
,
2761 __mmask8 __U
, const int __R
)
2763 return (__m256d
) __builtin_ia32_vfnmaddpd256_mask3_round ((__v4df
) __A
,
2770 extern __inline __m256d
2771 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2772 _mm256_maskz_fnmadd_round_pd (__mmask8 __U
, __m256d __A
, __m256d __B
,
2773 __m256d __D
, const int __R
)
2775 return (__m256d
) __builtin_ia32_vfnmaddpd256_maskz_round ((__v4df
) __A
,
2782 extern __inline __m256h
2783 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2784 _mm256_fnmadd_round_ph (__m256h __A
, __m256h __B
, __m256h __D
, const int __R
)
2787 __builtin_ia32_vfnmaddph256_mask_round ((__v16hf
) __A
,
2794 extern __inline __m256h
2795 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2796 _mm256_mask_fnmadd_round_ph (__m256h __A
, __mmask16 __U
, __m256h __B
,
2797 __m256h __D
, const int __R
)
2800 __builtin_ia32_vfnmaddph256_mask_round ((__v16hf
) __A
,
2807 extern __inline __m256h
2808 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2809 _mm256_mask3_fnmadd_round_ph (__m256h __A
, __m256h __B
, __m256h __D
,
2810 __mmask16 __U
, const int __R
)
2813 __builtin_ia32_vfnmaddph256_mask3_round ((__v16hf
) __A
,
2820 extern __inline __m256h
2821 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2822 _mm256_maskz_fnmadd_round_ph (__mmask16 __U
, __m256h __A
, __m256h __B
,
2823 __m256h __D
, const int __R
)
2826 __builtin_ia32_vfnmaddph256_maskz_round ((__v16hf
) __A
,
2833 extern __inline __m256
2834 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2835 _mm256_fnmadd_round_ps (__m256 __A
, __m256 __B
, __m256 __D
, const int __R
)
2837 return (__m256
) __builtin_ia32_vfnmaddps256_mask_round ((__v8sf
) __A
,
2844 extern __inline __m256
2845 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2846 _mm256_mask_fnmadd_round_ps (__m256 __A
, __mmask8 __U
, __m256 __B
,
2847 __m256 __D
, const int __R
)
2849 return (__m256
) __builtin_ia32_vfnmaddps256_mask_round ((__v8sf
) __A
,
2856 extern __inline __m256
2857 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2858 _mm256_mask3_fnmadd_round_ps (__m256 __A
, __m256 __B
, __m256 __D
,
2859 __mmask8 __U
, const int __R
)
2861 return (__m256
) __builtin_ia32_vfnmaddps256_mask3_round ((__v8sf
) __A
,
2868 extern __inline __m256
2869 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2870 _mm256_maskz_fnmadd_round_ps (__mmask8 __U
, __m256 __A
, __m256 __B
,
2871 __m256 __D
, const int __R
)
2873 return (__m256
) __builtin_ia32_vfnmaddps256_maskz_round ((__v8sf
) __A
,
2880 extern __inline __m256d
2881 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2882 _mm256_fnmsub_round_pd (__m256d __A
, __m256d __B
, __m256d __D
, const int __R
)
2884 return (__m256d
) __builtin_ia32_vfnmsubpd256_mask_round ((__v4df
) __A
,
2891 extern __inline __m256d
2892 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2893 _mm256_mask_fnmsub_round_pd (__m256d __A
, __mmask8 __U
, __m256d __B
,
2894 __m256d __D
, const int __R
)
2896 return (__m256d
) __builtin_ia32_vfnmsubpd256_mask_round ((__v4df
) __A
,
2903 extern __inline __m256d
2904 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2905 _mm256_mask3_fnmsub_round_pd (__m256d __A
, __m256d __B
, __m256d __D
,
2906 __mmask8 __U
, const int __R
)
2908 return (__m256d
) __builtin_ia32_vfnmsubpd256_mask3_round ((__v4df
) __A
,
2915 extern __inline __m256d
2916 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2917 _mm256_maskz_fnmsub_round_pd (__mmask8 __U
, __m256d __A
, __m256d __B
,
2918 __m256d __D
, const int __R
)
2920 return (__m256d
) __builtin_ia32_vfnmsubpd256_maskz_round ((__v4df
) __A
,
2927 extern __inline __m256h
2928 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2929 _mm256_fnmsub_round_ph (__m256h __A
, __m256h __B
, __m256h __D
, const int __R
)
2932 __builtin_ia32_vfnmsubph256_mask_round ((__v16hf
) __A
,
2939 extern __inline __m256h
2940 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2941 _mm256_mask_fnmsub_round_ph (__m256h __A
, __mmask16 __U
, __m256h __B
,
2942 __m256h __D
, const int __R
)
2945 __builtin_ia32_vfnmsubph256_mask_round ((__v16hf
) __A
,
2952 extern __inline __m256h
2953 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2954 _mm256_mask3_fnmsub_round_ph (__m256h __A
, __m256h __B
, __m256h __D
,
2955 __mmask16 __U
, const int __R
)
2958 __builtin_ia32_vfnmsubph256_mask3_round ((__v16hf
) __A
,
2965 extern __inline __m256h
2966 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2967 _mm256_maskz_fnmsub_round_ph (__mmask16 __U
, __m256h __A
, __m256h __B
,
2968 __m256h __D
, const int __R
)
2971 __builtin_ia32_vfnmsubph256_maskz_round ((__v16hf
) __A
,
2978 extern __inline __m256
2979 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2980 _mm256_fnmsub_round_ps (__m256 __A
, __m256 __B
, __m256 __D
, const int __R
)
2982 return (__m256
) __builtin_ia32_vfnmsubps256_mask_round ((__v8sf
) __A
,
2989 extern __inline __m256
2990 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
2991 _mm256_mask_fnmsub_round_ps (__m256 __A
, __mmask8 __U
, __m256 __B
,
2992 __m256 __D
, const int __R
)
2994 return (__m256
) __builtin_ia32_vfnmsubps256_mask_round ((__v8sf
) __A
,
3001 extern __inline __m256
3002 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3003 _mm256_mask3_fnmsub_round_ps (__m256 __A
, __m256 __B
, __m256 __D
,
3004 __mmask8 __U
, const int __R
)
3006 return (__m256
) __builtin_ia32_vfnmsubps256_mask3_round ((__v8sf
) __A
,
3013 extern __inline __m256
3014 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3015 _mm256_maskz_fnmsub_round_ps (__mmask8 __U
, __m256 __A
, __m256 __B
,
3016 __m256 __D
, const int __R
)
3018 return (__m256
) __builtin_ia32_vfnmsubps256_maskz_round ((__v8sf
) __A
,
3025 extern __inline __m256d
3026 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3027 _mm256_getexp_round_pd (__m256d __A
, const int __R
)
3030 (__m256d
) __builtin_ia32_getexppd256_mask_round ((__v4df
) __A
,
3032 _mm256_undefined_pd (),
3037 extern __inline __m256d
3038 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3039 _mm256_mask_getexp_round_pd (__m256d __W
, __mmask8 __U
, __m256d __A
,
3042 return (__m256d
) __builtin_ia32_getexppd256_mask_round ((__v4df
) __A
,
3048 extern __inline __m256d
3049 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3050 _mm256_maskz_getexp_round_pd (__mmask8 __U
, __m256d __A
, const int __R
)
3052 return (__m256d
) __builtin_ia32_getexppd256_mask_round ((__v4df
) __A
,
3054 _mm256_setzero_pd (),
3059 extern __inline __m256h
3060 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3061 _mm256_getexp_round_ph (__m256h __A
, const int __R
)
3063 return (__m256h
) __builtin_ia32_getexpph256_mask_round ((__v16hf
) __A
,
3065 _mm256_setzero_ph (),
3070 extern __inline __m256h
3071 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3072 _mm256_mask_getexp_round_ph (__m256h __W
, __mmask16 __U
, __m256h __A
,
3075 return (__m256h
) __builtin_ia32_getexpph256_mask_round ((__v16hf
) __A
,
3081 extern __inline __m256h
3082 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3083 _mm256_maskz_getexp_round_ph (__mmask16 __U
, __m256h __A
, const int __R
)
3085 return (__m256h
) __builtin_ia32_getexpph256_mask_round ((__v16hf
) __A
,
3087 _mm256_setzero_ph (),
3092 extern __inline __m256
3093 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3094 _mm256_getexp_round_ps (__m256 __A
, const int __R
)
3096 return (__m256
) __builtin_ia32_getexpps256_mask_round ((__v8sf
) __A
,
3098 _mm256_undefined_ps (),
3103 extern __inline __m256
3104 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3105 _mm256_mask_getexp_round_ps (__m256 __W
, __mmask8 __U
, __m256 __A
,
3108 return (__m256
) __builtin_ia32_getexpps256_mask_round ((__v8sf
) __A
,
3114 extern __inline __m256
3115 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3116 _mm256_maskz_getexp_round_ps (__mmask8 __U
, __m256 __A
, const int __R
)
3118 return (__m256
) __builtin_ia32_getexpps256_mask_round ((__v8sf
) __A
,
3120 _mm256_setzero_ps (),
3125 extern __inline __m256d
3126 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3127 _mm256_getmant_round_pd (__m256d __A
, _MM_MANTISSA_NORM_ENUM __B
,
3128 _MM_MANTISSA_SIGN_ENUM __C
, const int __R
)
3131 (__m256d
) __builtin_ia32_getmantpd256_mask_round ((__v4df
) __A
,
3133 _mm256_undefined_pd (),
3134 (__mmask8
) -1, __R
);
3137 extern __inline __m256d
3138 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3139 _mm256_mask_getmant_round_pd (__m256d __W
, __mmask8 __U
, __m256d __A
,
3140 _MM_MANTISSA_NORM_ENUM __B
,
3141 _MM_MANTISSA_SIGN_ENUM __C
, const int __R
)
3143 return (__m256d
) __builtin_ia32_getmantpd256_mask_round ((__v4df
) __A
,
3149 extern __inline __m256d
3150 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3151 _mm256_maskz_getmant_round_pd (__mmask8 __U
, __m256d __A
,
3152 _MM_MANTISSA_NORM_ENUM __B
,
3153 _MM_MANTISSA_SIGN_ENUM __C
, const int __R
)
3155 return (__m256d
) __builtin_ia32_getmantpd256_mask_round ((__v4df
) __A
,
3158 _mm256_setzero_pd (),
3162 extern __inline __m256h
3163 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3164 _mm256_getmant_round_ph (__m256h __A
, _MM_MANTISSA_NORM_ENUM __B
,
3165 _MM_MANTISSA_SIGN_ENUM __C
, const int __R
)
3168 (__m256h
) __builtin_ia32_getmantph256_mask_round ((__v16hf
) __A
,
3170 _mm256_undefined_ph (),
3171 (__mmask16
) -1, __R
);
3174 extern __inline __m256h
3175 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3176 _mm256_mask_getmant_round_ph (__m256h __W
, __mmask16 __U
, __m256h __A
,
3177 _MM_MANTISSA_NORM_ENUM __B
,
3178 _MM_MANTISSA_SIGN_ENUM __C
, const int __R
)
3180 return (__m256h
) __builtin_ia32_getmantph256_mask_round ((__v16hf
) __A
,
3186 extern __inline __m256h
3187 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3188 _mm256_maskz_getmant_round_ph (__mmask8 __U
, __m256h __A
,
3189 _MM_MANTISSA_NORM_ENUM __B
,
3190 _MM_MANTISSA_SIGN_ENUM __C
, const int __R
)
3192 return (__m256h
) __builtin_ia32_getmantph256_mask_round ((__v16hf
) __A
,
3195 _mm256_setzero_ph (),
3199 extern __inline __m256
3200 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3201 _mm256_getmant_round_ps (__m256 __A
, _MM_MANTISSA_NORM_ENUM __B
,
3202 _MM_MANTISSA_SIGN_ENUM __C
, const int __R
)
3205 (__m256
) __builtin_ia32_getmantps256_mask_round ((__v8sf
) __A
,
3207 _mm256_undefined_ps (),
3208 (__mmask8
) -1, __R
);
3211 extern __inline __m256
3212 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3213 _mm256_mask_getmant_round_ps (__m256 __W
, __mmask8 __U
, __m256 __A
,
3214 _MM_MANTISSA_NORM_ENUM __B
,
3215 _MM_MANTISSA_SIGN_ENUM __C
, const int __R
)
3217 return (__m256
) __builtin_ia32_getmantps256_mask_round ((__v8sf
) __A
,
3223 extern __inline __m256
3224 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3225 _mm256_maskz_getmant_round_ps (__mmask8 __U
, __m256 __A
,
3226 _MM_MANTISSA_NORM_ENUM __B
,
3227 _MM_MANTISSA_SIGN_ENUM __C
, const int __R
)
3229 return (__m256
) __builtin_ia32_getmantps256_mask_round ((__v8sf
) __A
,
3232 _mm256_setzero_ps (),
3236 extern __inline __m256d
3237 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3238 _mm256_max_round_pd (__m256d __A
, __m256d __B
, const int __R
)
3240 return (__m256d
) __builtin_ia32_maxpd256_mask_round ((__v4df
) __A
,
3243 _mm256_undefined_pd (),
3248 extern __inline __m256d
3249 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3250 _mm256_mask_max_round_pd (__m256d __W
, __mmask8 __U
, __m256d __A
,
3251 __m256d __B
, const int __R
)
3253 return (__m256d
) __builtin_ia32_maxpd256_mask_round ((__v4df
) __A
,
3260 extern __inline __m256d
3261 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3262 _mm256_maskz_max_round_pd (__mmask8 __U
, __m256d __A
, __m256d __B
,
3265 return (__m256d
) __builtin_ia32_maxpd256_mask_round ((__v4df
) __A
,
3268 _mm256_setzero_pd (),
3273 extern __inline __m256h
3274 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3275 _mm256_max_round_ph (__m256h __A
, __m256h __B
, const int __R
)
3277 return (__m256h
) __builtin_ia32_maxph256_mask_round ((__v16hf
) __A
,
3280 _mm256_undefined_ph (),
3285 extern __inline __m256h
3286 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3287 _mm256_mask_max_round_ph (__m256h __W
, __mmask16 __U
, __m256h __A
,
3288 __m256h __B
, const int __R
)
3290 return (__m256h
) __builtin_ia32_maxph256_mask_round ((__v16hf
) __A
,
3297 extern __inline __m256h
3298 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3299 _mm256_maskz_max_round_ph (__mmask16 __U
, __m256h __A
, __m256h __B
,
3302 return (__m256h
) __builtin_ia32_maxph256_mask_round ((__v16hf
) __A
,
3305 _mm256_setzero_ph (),
3310 extern __inline __m256
3311 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3312 _mm256_max_round_ps (__m256 __A
, __m256 __B
, const int __R
)
3314 return (__m256
) __builtin_ia32_maxps256_mask_round ((__v8sf
) __A
,
3317 _mm256_undefined_ps (),
3322 extern __inline __m256
3323 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3324 _mm256_mask_max_round_ps (__m256 __W
, __mmask8 __U
, __m256 __A
, __m256 __B
,
3327 return (__m256
) __builtin_ia32_maxps256_mask_round ((__v8sf
) __A
,
3334 extern __inline __m256
3335 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3336 _mm256_maskz_max_round_ps (__mmask8 __U
, __m256 __A
, __m256 __B
,
3339 return (__m256
) __builtin_ia32_maxps256_mask_round ((__v8sf
) __A
,
3342 _mm256_setzero_ps (),
3347 extern __inline __m256d
3348 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3349 _mm256_min_round_pd (__m256d __A
, __m256d __B
, const int __R
)
3351 return (__m256d
) __builtin_ia32_minpd256_mask_round ((__v4df
) __A
,
3354 _mm256_undefined_pd (),
3359 extern __inline __m256d
3360 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3361 _mm256_mask_min_round_pd (__m256d __W
, __mmask8 __U
, __m256d __A
,
3362 __m256d __B
, const int __R
)
3364 return (__m256d
) __builtin_ia32_minpd256_mask_round ((__v4df
) __A
,
3371 extern __inline __m256d
3372 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3373 _mm256_maskz_min_round_pd (__mmask8 __U
, __m256d __A
, __m256d __B
,
3376 return (__m256d
) __builtin_ia32_minpd256_mask_round ((__v4df
) __A
,
3379 _mm256_setzero_pd (),
3384 extern __inline __m256h
3385 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3386 _mm256_min_round_ph (__m256h __A
, __m256h __B
, const int __R
)
3388 return (__m256h
) __builtin_ia32_minph256_mask_round ((__v16hf
) __A
,
3391 _mm256_undefined_ph (),
3396 extern __inline __m256h
3397 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3398 _mm256_mask_min_round_ph (__m256h __W
, __mmask16 __U
, __m256h __A
,
3399 __m256h __B
, const int __R
)
3401 return (__m256h
) __builtin_ia32_minph256_mask_round ((__v16hf
) __A
,
3408 extern __inline __m256h
3409 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3410 _mm256_maskz_min_round_ph (__mmask16 __U
, __m256h __A
, __m256h __B
,
3413 return (__m256h
) __builtin_ia32_minph256_mask_round ((__v16hf
) __A
,
3416 _mm256_setzero_ph (),
3421 extern __inline __m256
3422 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3423 _mm256_min_round_ps (__m256 __A
, __m256 __B
, const int __R
)
3425 return (__m256
) __builtin_ia32_minps256_mask_round ((__v8sf
) __A
,
3428 _mm256_undefined_ps (),
3433 extern __inline __m256
3434 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3435 _mm256_mask_min_round_ps (__m256 __W
, __mmask8 __U
, __m256 __A
, __m256 __B
,
3438 return (__m256
) __builtin_ia32_minps256_mask_round ((__v8sf
) __A
,
3445 extern __inline __m256
3446 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3447 _mm256_maskz_min_round_ps (__mmask8 __U
, __m256 __A
, __m256 __B
,
3450 return (__m256
) __builtin_ia32_minps256_mask_round ((__v8sf
) __A
,
3453 _mm256_setzero_ps (),
3458 extern __inline __m256d
3459 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3460 _mm256_mul_round_pd (__m256d __A
, __m256d __B
, const int __R
)
3462 return (__m256d
) __builtin_ia32_mulpd256_mask_round ((__v4df
) __A
,
3465 _mm256_undefined_pd (),
3470 extern __inline __m256d
3471 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3472 _mm256_mask_mul_round_pd (__m256d __W
, __mmask8 __U
, __m256d __A
,
3473 __m256d __B
, const int __R
)
3475 return (__m256d
) __builtin_ia32_mulpd256_mask_round ((__v4df
) __A
,
3482 extern __inline __m256d
3483 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3484 _mm256_maskz_mul_round_pd (__mmask8 __U
, __m256d __A
, __m256d __B
,
3487 return (__m256d
) __builtin_ia32_mulpd256_mask_round ((__v4df
) __A
,
3490 _mm256_setzero_pd (),
3495 extern __inline __m256h
3496 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3497 _mm256_mul_round_ph (__m256h __A
, __m256h __B
, const int __R
)
3499 return (__m256h
) __builtin_ia32_mulph256_mask_round ((__v16hf
) __A
,
3502 _mm256_undefined_ph (),
3507 extern __inline __m256h
3508 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3509 _mm256_mask_mul_round_ph (__m256h __W
, __mmask16 __U
, __m256h __A
,
3510 __m256h __B
, const int __R
)
3512 return (__m256h
) __builtin_ia32_mulph256_mask_round ((__v16hf
) __A
,
3519 extern __inline __m256h
3520 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3521 _mm256_maskz_mul_round_ph (__mmask16 __U
, __m256h __A
, __m256h __B
,
3524 return (__m256h
) __builtin_ia32_mulph256_mask_round ((__v16hf
) __A
,
3527 _mm256_setzero_ph (),
3532 extern __inline __m256
3533 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3534 _mm256_mul_round_ps (__m256 __A
, __m256 __B
, const int __R
)
3536 return (__m256
) __builtin_ia32_mulps256_mask_round ((__v8sf
) __A
,
3539 _mm256_undefined_ps (),
3544 extern __inline __m256
3545 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3546 _mm256_mask_mul_round_ps (__m256 __W
, __mmask8 __U
, __m256 __A
, __m256 __B
,
3549 return (__m256
) __builtin_ia32_mulps256_mask_round ((__v8sf
) __A
,
3556 extern __inline __m256
3557 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3558 _mm256_maskz_mul_round_ps (__mmask8 __U
, __m256 __A
, __m256 __B
,
3561 return (__m256
) __builtin_ia32_mulps256_mask_round ((__v8sf
) __A
,
3564 _mm256_setzero_ps (),
3569 extern __inline __m256d
3570 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3571 _mm256_range_round_pd (__m256d __A
, __m256d __B
, const int __C
,
3574 return (__m256d
) __builtin_ia32_rangepd256_mask_round ((__v4df
) __A
,
3578 _mm256_setzero_pd (),
3583 extern __inline __m256d
3584 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3585 _mm256_mask_range_round_pd (__m256d __W
, __mmask8 __U
, __m256d __A
,
3586 __m256d __B
, const int __C
, const int __R
)
3588 return (__m256d
) __builtin_ia32_rangepd256_mask_round ((__v4df
) __A
,
3596 extern __inline __m256d
3597 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3598 _mm256_maskz_range_round_pd (__mmask8 __U
, __m256d __A
, __m256d __B
,
3599 const int __C
, const int __R
)
3601 return (__m256d
) __builtin_ia32_rangepd256_mask_round ((__v4df
) __A
,
3605 _mm256_setzero_pd (),
3610 extern __inline __m256
3611 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3612 _mm256_range_round_ps (__m256 __A
, __m256 __B
, const int __C
, const int __R
)
3614 return (__m256
) __builtin_ia32_rangeps256_mask_round ((__v8sf
) __A
,
3618 _mm256_setzero_ps (),
3623 extern __inline __m256
3624 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3625 _mm256_mask_range_round_ps (__m256 __W
, __mmask8 __U
, __m256 __A
,
3626 __m256 __B
, const int __C
, const int __R
)
3628 return (__m256
) __builtin_ia32_rangeps256_mask_round ((__v8sf
) __A
,
3636 extern __inline __m256
3637 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3638 _mm256_maskz_range_round_ps (__mmask8 __U
, __m256 __A
, __m256 __B
,
3639 const int __C
, const int __R
)
3641 return (__m256
) __builtin_ia32_rangeps256_mask_round ((__v8sf
) __A
,
3645 _mm256_setzero_ps (),
3650 extern __inline __m256d
3651 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3652 _mm256_reduce_round_pd (__m256d __A
, const int __C
, const int __R
)
3654 return (__m256d
) __builtin_ia32_reducepd256_mask_round ((__v4df
) __A
,
3657 _mm256_setzero_pd (),
3662 extern __inline __m256d
3663 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3664 _mm256_mask_reduce_round_pd (__m256d __W
, __mmask8 __U
, __m256d __A
,
3665 const int __C
, const int __R
)
3667 return (__m256d
) __builtin_ia32_reducepd256_mask_round ((__v4df
) __A
,
3674 extern __inline __m256d
3675 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3676 _mm256_maskz_reduce_round_pd (__mmask8 __U
, __m256d __A
, const int __C
,
3679 return (__m256d
) __builtin_ia32_reducepd256_mask_round ((__v4df
) __A
,
3682 _mm256_setzero_pd (),
3687 extern __inline __m256h
3688 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3689 _mm256_reduce_round_ph (__m256h __A
, const int __C
, const int __R
)
3691 return (__m256h
) __builtin_ia32_reduceph256_mask_round ((__v16hf
) __A
,
3694 _mm256_setzero_ph (),
3699 extern __inline __m256h
3700 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3701 _mm256_mask_reduce_round_ph (__m256h __W
, __mmask16 __U
, __m256h __A
,
3702 const int __C
, const int __R
)
3704 return (__m256h
) __builtin_ia32_reduceph256_mask_round ((__v16hf
) __A
,
3711 extern __inline __m256h
3712 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3713 _mm256_maskz_reduce_round_ph (__mmask16 __U
, __m256h __A
, const int __C
,
3716 return (__m256h
) __builtin_ia32_reduceph256_mask_round ((__v16hf
) __A
,
3719 _mm256_setzero_ph (),
3724 extern __inline __m256
3725 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3726 _mm256_reduce_round_ps (__m256 __A
, const int __C
, const int __R
)
3728 return (__m256
) __builtin_ia32_reduceps256_mask_round ((__v8sf
) __A
,
3731 _mm256_setzero_ps (),
3736 extern __inline __m256
3737 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3738 _mm256_mask_reduce_round_ps (__m256 __W
, __mmask8 __U
, __m256 __A
,
3739 const int __C
, const int __R
)
3741 return (__m256
) __builtin_ia32_reduceps256_mask_round ((__v8sf
) __A
,
3748 extern __inline __m256
3749 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3750 _mm256_maskz_reduce_round_ps (__mmask8 __U
, __m256 __A
, const int __C
,
3753 return (__m256
) __builtin_ia32_reduceps256_mask_round ((__v8sf
) __A
,
3756 _mm256_setzero_ps (),
3761 extern __inline __m256d
3762 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3763 _mm256_roundscale_round_pd (__m256d __A
, const int __C
, const int __R
)
3766 (__m256d
) __builtin_ia32_rndscalepd256_mask_round ((__v4df
) __A
,
3769 _mm256_undefined_pd (),
3774 extern __inline __m256d
3775 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3776 _mm256_mask_roundscale_round_pd (__m256d __W
, __mmask8 __U
, __m256d __A
,
3777 const int __C
, const int __R
)
3779 return (__m256d
) __builtin_ia32_rndscalepd256_mask_round ((__v4df
) __A
,
3786 extern __inline __m256d
3787 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3788 _mm256_maskz_roundscale_round_pd (__mmask8 __U
, __m256d __A
, const int __C
,
3792 (__m256d
) __builtin_ia32_rndscalepd256_mask_round ((__v4df
) __A
,
3795 _mm256_setzero_pd (),
3800 extern __inline __m256h
3801 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3802 _mm256_roundscale_round_ph (__m256h __A
, const int __C
, const int __R
)
3805 (__m256h
) __builtin_ia32_rndscaleph256_mask_round ((__v16hf
) __A
,
3808 _mm256_undefined_ph (),
3813 extern __inline __m256h
3814 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3815 _mm256_mask_roundscale_round_ph (__m256h __W
, __mmask16 __U
, __m256h __A
,
3816 const int __C
, const int __R
)
3818 return (__m256h
) __builtin_ia32_rndscaleph256_mask_round ((__v16hf
) __A
,
3825 extern __inline __m256h
3826 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3827 _mm256_maskz_roundscale_round_ph (__mmask16 __U
, __m256h __A
, const int __C
,
3831 (__m256h
) __builtin_ia32_rndscaleph256_mask_round ((__v16hf
) __A
,
3834 _mm256_setzero_ph (),
3839 extern __inline __m256
3840 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3841 _mm256_roundscale_round_ps (__m256 __A
, const int __C
, const int __R
)
3844 (__m256
) __builtin_ia32_rndscaleps256_mask_round ((__v8sf
) __A
,
3847 _mm256_undefined_ps (),
3852 extern __inline __m256
3853 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3854 _mm256_mask_roundscale_round_ps (__m256 __W
, __mmask8 __U
, __m256 __A
,
3855 const int __C
, const int __R
)
3857 return (__m256
) __builtin_ia32_rndscaleps256_mask_round ((__v8sf
) __A
,
3864 extern __inline __m256
3865 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3866 _mm256_maskz_roundscale_round_ps (__mmask8 __U
, __m256 __A
, const int __C
,
3869 return (__m256
) __builtin_ia32_rndscaleps256_mask_round ((__v8sf
) __A
,
3872 _mm256_setzero_ps (),
3877 extern __inline __m256d
3878 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3879 _mm256_scalef_round_pd (__m256d __A
, __m256d __B
, const int __R
)
3882 (__m256d
) __builtin_ia32_scalefpd256_mask_round ((__v4df
) __A
,
3885 _mm256_undefined_pd (),
3890 extern __inline __m256d
3891 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3892 _mm256_mask_scalef_round_pd (__m256d __W
, __mmask8 __U
, __m256d __A
,
3893 __m256d __B
, const int __R
)
3895 return (__m256d
) __builtin_ia32_scalefpd256_mask_round ((__v4df
) __A
,
3902 extern __inline __m256d
3903 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3904 _mm256_maskz_scalef_round_pd (__mmask8 __U
, __m256d __A
, __m256d __B
,
3907 return (__m256d
) __builtin_ia32_scalefpd256_mask_round ((__v4df
) __A
,
3910 _mm256_setzero_pd (),
3915 extern __inline __m256h
3916 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3917 _mm256_scalef_round_ph (__m256h __A
, __m256h __B
, const int __R
)
3920 (__m256h
) __builtin_ia32_scalefph256_mask_round ((__v16hf
) __A
,
3923 _mm256_undefined_ph (),
3928 extern __inline __m256h
3929 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3930 _mm256_mask_scalef_round_ph (__m256h __W
, __mmask16 __U
, __m256h __A
,
3931 __m256h __B
, const int __R
)
3933 return (__m256h
) __builtin_ia32_scalefph256_mask_round ((__v16hf
) __A
,
3940 extern __inline __m256h
3941 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3942 _mm256_maskz_scalef_round_ph (__mmask16 __U
, __m256h __A
, __m256h __B
,
3945 return (__m256h
) __builtin_ia32_scalefph256_mask_round ((__v16hf
) __A
,
3948 _mm256_setzero_ph (),
3953 extern __inline __m256
3954 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3955 _mm256_scalef_round_ps (__m256 __A
, __m256 __B
, const int __R
)
3957 return (__m256
) __builtin_ia32_scalefps256_mask_round ((__v8sf
) __A
,
3960 _mm256_undefined_ps (),
3965 extern __inline __m256
3966 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3967 _mm256_mask_scalef_round_ps (__m256 __W
, __mmask8 __U
, __m256 __A
,
3968 __m256 __B
, const int __R
)
3970 return (__m256
) __builtin_ia32_scalefps256_mask_round ((__v8sf
) __A
,
3977 extern __inline __m256
3978 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3979 _mm256_maskz_scalef_round_ps (__mmask8 __U
, __m256 __A
, __m256 __B
,
3982 return (__m256
) __builtin_ia32_scalefps256_mask_round ((__v8sf
) __A
,
3985 _mm256_setzero_ps (),
3990 extern __inline __m256d
3991 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
3992 _mm256_sqrt_round_pd (__m256d __A
, const int __R
)
3994 return (__m256d
) __builtin_ia32_sqrtpd256_mask_round ((__v4df
) __A
,
3996 _mm256_undefined_pd (),
4001 extern __inline __m256d
4002 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4003 _mm256_mask_sqrt_round_pd (__m256d __W
, __mmask8 __U
, __m256d __A
,
4006 return (__m256d
) __builtin_ia32_sqrtpd256_mask_round ((__v4df
) __A
,
4012 extern __inline __m256d
4013 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4014 _mm256_maskz_sqrt_round_pd (__mmask8 __U
, __m256d __A
, const int __R
)
4016 return (__m256d
) __builtin_ia32_sqrtpd256_mask_round ((__v4df
) __A
,
4018 _mm256_setzero_pd (),
4023 extern __inline __m256h
4024 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4025 _mm256_sqrt_round_ph (__m256h __A
, const int __R
)
4027 return (__m256h
) __builtin_ia32_sqrtph256_mask_round ((__v16hf
) __A
,
4029 _mm256_undefined_ph (),
4034 extern __inline __m256h
4035 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4036 _mm256_mask_sqrt_round_ph (__m256h __W
, __mmask16 __U
, __m256h __A
,
4039 return (__m256h
) __builtin_ia32_sqrtph256_mask_round ((__v16hf
) __A
,
4045 extern __inline __m256h
4046 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4047 _mm256_maskz_sqrt_round_ph (__mmask16 __U
, __m256h __A
, const int __R
)
4049 return (__m256h
) __builtin_ia32_sqrtph256_mask_round ((__v16hf
) __A
,
4051 _mm256_setzero_ph (),
4056 extern __inline __m256
4057 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4058 _mm256_sqrt_round_ps (__m256 __A
, const int __R
)
4060 return (__m256
) __builtin_ia32_sqrtps256_mask_round ((__v8sf
) __A
,
4062 _mm256_undefined_ps (),
4067 extern __inline __m256
4068 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4069 _mm256_mask_sqrt_round_ps (__m256 __W
, __mmask8 __U
, __m256 __A
,
4072 return (__m256
) __builtin_ia32_sqrtps256_mask_round ((__v8sf
) __A
,
4078 extern __inline __m256
4079 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4080 _mm256_maskz_sqrt_round_ps (__mmask8 __U
, __m256 __A
, const int __R
)
4082 return (__m256
) __builtin_ia32_sqrtps256_mask_round ((__v8sf
) __A
,
4084 _mm256_setzero_ps (),
4089 extern __inline __m256d
4090 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4091 _mm256_sub_round_pd (__m256d __A
, __m256d __B
, const int __R
)
4093 return (__m256d
) __builtin_ia32_subpd256_mask_round ((__v4df
) __A
,
4096 _mm256_undefined_pd (),
4101 extern __inline __m256d
4102 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4103 _mm256_mask_sub_round_pd (__m256d __W
, __mmask8 __U
, __m256d __A
,
4104 __m256d __B
, const int __R
)
4106 return (__m256d
) __builtin_ia32_subpd256_mask_round ((__v4df
) __A
,
4113 extern __inline __m256d
4114 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4115 _mm256_maskz_sub_round_pd (__mmask8 __U
, __m256d __A
, __m256d __B
,
4118 return (__m256d
) __builtin_ia32_subpd256_mask_round ((__v4df
) __A
,
4121 _mm256_setzero_pd (),
4126 extern __inline __m256h
4127 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4128 _mm256_sub_round_ph (__m256h __A
, __m256h __B
, const int __R
)
4130 return (__m256h
) __builtin_ia32_subph256_mask_round ((__v16hf
) __A
,
4133 _mm256_undefined_ph (),
4138 extern __inline __m256h
4139 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4140 _mm256_mask_sub_round_ph (__m256h __W
, __mmask16 __U
, __m256h __A
,
4141 __m256h __B
, const int __R
)
4143 return (__m256h
) __builtin_ia32_subph256_mask_round ((__v16hf
) __A
,
4150 extern __inline __m256h
4151 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4152 _mm256_maskz_sub_round_ph (__mmask16 __U
, __m256h __A
, __m256h __B
,
4155 return (__m256h
) __builtin_ia32_subph256_mask_round ((__v16hf
) __A
,
4158 _mm256_setzero_ph (),
4163 extern __inline __m256
4164 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4165 _mm256_sub_round_ps (__m256 __A
, __m256 __B
, const int __R
)
4167 return (__m256
) __builtin_ia32_subps256_mask_round ((__v8sf
) __A
,
4170 _mm256_undefined_ps (),
4175 extern __inline __m256
4176 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4177 _mm256_mask_sub_round_ps (__m256 __W
, __mmask8 __U
, __m256 __A
, __m256 __B
,
4180 return (__m256
) __builtin_ia32_subps256_mask_round ((__v8sf
) __A
,
4187 extern __inline __m256
4188 __attribute__ ((__gnu_inline__
, __always_inline__
, __artificial__
))
4189 _mm256_maskz_sub_round_ps (__mmask8 __U
, __m256 __A
, __m256 __B
,
4192 return (__m256
) __builtin_ia32_subps256_mask_round ((__v8sf
) __A
,
4195 _mm256_setzero_ps (),
4200 #define _mm256_add_round_pd(A, B, R) \
4201 ((__m256d) __builtin_ia32_addpd256_mask_round ((__v4df) (A), \
4204 (_mm256_undefined_pd ()), \
4208 #define _mm256_mask_add_round_pd(W, U, A, B, R) \
4209 ((__m256d) __builtin_ia32_addpd256_mask_round ((__v4df) (A), \
4215 #define _mm256_maskz_add_round_pd(U, A, B, R) \
4216 ((__m256d) __builtin_ia32_addpd256_mask_round ((__v4df) (A), \
4219 (_mm256_setzero_pd ()), \
4223 #define _mm256_add_round_ph(A, B, R) \
4224 ((__m256h) __builtin_ia32_addph256_mask_round ((__v16hf) (A), \
4227 (_mm256_undefined_ph ()), \
4231 #define _mm256_mask_add_round_ph(W, U, A, B, R) \
4232 ((__m256h) __builtin_ia32_addph256_mask_round ((__v16hf) (A), \
4238 #define _mm256_maskz_add_round_ph(U, A, B, R) \
4239 ((__m256h) __builtin_ia32_addph256_mask_round ((__v16hf) (A), \
4242 (_mm256_setzero_ph ()), \
4246 #define _mm256_add_round_ps(A, B, R) \
4247 ((__m256) __builtin_ia32_addps256_mask_round ((__v8sf) (A), \
4250 (_mm256_undefined_ps ()), \
4254 #define _mm256_mask_add_round_ps(W, U, A, B, R) \
4255 ((__m256) __builtin_ia32_addps256_mask_round ((__v8sf) (A), \
4261 #define _mm256_maskz_add_round_ps(U, A, B, R)\
4262 ((__m256) __builtin_ia32_addps256_mask_round ((__v8sf) (A), \
4265 (_mm256_setzero_ps ()), \
4269 #define _mm256_cmp_round_pd_mask(A, B, C, R) \
4270 ((__mmask8) __builtin_ia32_cmppd256_mask_round ((__v4df) (A), \
4276 #define _mm256_mask_cmp_round_pd_mask(U, A, B, C, R) \
4277 ((__mmask8) __builtin_ia32_cmppd256_mask_round ((__v4df) (A), \
4283 #define _mm256_cmp_round_ph_mask(A, B, C, R) \
4284 ((__mmask16) __builtin_ia32_cmpph256_mask_round ((__v16hf) (A), \
4290 #define _mm256_mask_cmp_round_ph_mask(U, A, B, C, R) \
4291 ((__mmask16) __builtin_ia32_cmpph256_mask_round ((__v16hf) (A), \
4297 #define _mm256_cmp_round_ps_mask(A, B, C, R) \
4298 ((__mmask8) __builtin_ia32_cmpps256_mask_round ((__v8sf) (A), \
4304 #define _mm256_mask_cmp_round_ps_mask(U, A, B, C, R) \
4305 ((__mmask8) __builtin_ia32_cmpps256_mask_round ((__v8sf) (A), \
4311 #define _mm256_cvt_roundepi32_ph(A, R) \
4312 ((__m128h) __builtin_ia32_vcvtdq2ph256_mask_round ((__v8si) (A), \
4314 (_mm_setzero_ph ()), \
4318 #define _mm256_mask_cvt_roundepi32_ph(W, U, A, R) \
4319 ((__m128h) __builtin_ia32_vcvtdq2ph256_mask_round ((__v8si) (A), \
4324 #define _mm256_maskz_cvt_roundepi32_ph(U, A, R) \
4325 ((__m128h) __builtin_ia32_vcvtdq2ph256_mask_round ((__v8si) (A), \
4327 (_mm_setzero_ph ()), \
4331 #define _mm256_cvt_roundepi32_ps(A, R) \
4332 ((__m256) __builtin_ia32_cvtdq2ps256_mask_round ((__v8si) (A), \
4334 (_mm256_undefined_ps ()), \
4338 #define _mm256_mask_cvt_roundepi32_ps(W, U, A, R) \
4339 ((__m256) __builtin_ia32_cvtdq2ps256_mask_round ((__v8si) (A), \
4344 #define _mm256_maskz_cvt_roundepi32_ps(U, A, R) \
4345 ((__m256) __builtin_ia32_cvtdq2ps256_mask_round ((__v8si) (A), \
4347 (_mm256_setzero_ps ()), \
4351 #define _mm256_cvt_roundpd_ph(A, R) \
4352 ((__m128h) __builtin_ia32_vcvtpd2ph256_mask_round ((__v4df) (A), \
4353 (_mm_setzero_ph ()), \
4357 #define _mm256_mask_cvt_roundpd_ph(W, U, A, R) \
4358 ((__m128h) __builtin_ia32_vcvtpd2ph256_mask_round ((__v4df) (A), \
4363 #define _mm256_maskz_cvt_roundpd_ph(U, A, R) \
4364 ((__m128h) __builtin_ia32_vcvtpd2ph256_mask_round ((__v4df) (A), \
4365 (_mm_setzero_ph ()), \
4369 #define _mm256_cvt_roundpd_ps(A, R) \
4370 ((__m128) __builtin_ia32_cvtpd2ps256_mask_round ((__v4df) (A), \
4372 (_mm_undefined_ps ()), \
4376 #define _mm256_mask_cvt_roundpd_ps(W, U, A, R) \
4377 ((__m128) __builtin_ia32_cvtpd2ps256_mask_round ((__v4df) (A), \
4382 #define _mm256_maskz_cvt_roundpd_ps(U, A, R) \
4383 ((__m128) __builtin_ia32_cvtpd2ps256_mask_round ((__v4df) (A), \
4385 (_mm_setzero_ps ()), \
4389 #define _mm256_cvt_roundpd_epi32(A, R) \
4390 ((__m128i) __builtin_ia32_cvtpd2dq256_mask_round ((__v4df) (A), \
4392 (_mm_undefined_si128 ()), \
4396 #define _mm256_mask_cvt_roundpd_epi32(W, U, A, R) \
4397 ((__m128i) __builtin_ia32_cvtpd2dq256_mask_round ((__v4df) (A), \
4402 #define _mm256_maskz_cvt_roundpd_epi32(U, A, R)\
4403 ((__m128i) __builtin_ia32_cvtpd2dq256_mask_round ((__v4df) (A), \
4405 (_mm_setzero_si128 ()), \
4409 #define _mm256_cvt_roundpd_epi64(A, R) \
4410 ((__m256i) __builtin_ia32_cvtpd2qq256_mask_round ((__v4df) (A), \
4412 (_mm256_setzero_si256 ()), \
4416 #define _mm256_mask_cvt_roundpd_epi64(W, U, A, R) \
4417 ((__m256i) __builtin_ia32_cvtpd2qq256_mask_round ((__v4df) (A), \
4422 #define _mm256_maskz_cvt_roundpd_epi64(U, A, R) \
4423 ((__m256i) __builtin_ia32_cvtpd2qq256_mask_round ((__v4df) (A), \
4425 (_mm256_setzero_si256 ()), \
4429 #define _mm256_cvt_roundpd_epu32(A, R) \
4430 ((__m128i) __builtin_ia32_cvtpd2udq256_mask_round ((__v4df) (A), \
4432 (_mm_undefined_si128 ()), \
4436 #define _mm256_mask_cvt_roundpd_epu32(W, U, A, R) \
4437 ((__m128i) __builtin_ia32_cvtpd2udq256_mask_round ((__v4df) (A), \
4442 #define _mm256_maskz_cvt_roundpd_epu32(U, A, R) \
4443 ((__m128i) __builtin_ia32_cvtpd2udq256_mask_round ((__v4df) (A), \
4445 (_mm_setzero_si128 ()), \
4449 #define _mm256_cvt_roundpd_epu64(A, R) \
4450 ((__m256i) __builtin_ia32_cvtpd2uqq256_mask_round ((__v4df) (A), \
4452 (_mm256_setzero_si256 ()),\
4456 #define _mm256_mask_cvt_roundpd_epu64(W, U, A, R) \
4457 ((__m256i) __builtin_ia32_cvtpd2uqq256_mask_round ((__v4df) (A), \
4462 #define _mm256_maskz_cvt_roundpd_epu64(U, A, R) \
4463 ((__m256i) __builtin_ia32_cvtpd2uqq256_mask_round ((__v4df) (A), \
4465 (_mm256_setzero_si256 ()),\
4469 #define _mm256_cvt_roundph_epi32(A, R) \
4470 ((__m256i) __builtin_ia32_vcvtph2dq256_mask_round ((__v8hf) (A), \
4472 (_mm256_setzero_si256 ()),\
4476 #define _mm256_mask_cvt_roundph_epi32(W, U, A, R) \
4477 ((__m256i) __builtin_ia32_vcvtph2dq256_mask_round ((__v8hf) (A), \
4482 #define _mm256_maskz_cvt_roundph_epi32(U, A, R) \
4483 ((__m256i) __builtin_ia32_vcvtph2dq256_mask_round ((__v8hf) (A), \
4485 (_mm256_setzero_si256 ()),\
4489 #define _mm256_cvt_roundph_pd(A, R) \
4490 ((__m256d) __builtin_ia32_vcvtph2pd256_mask_round ((__v8hf) (A), \
4492 (_mm256_setzero_pd ()), \
4496 #define _mm256_mask_cvt_roundph_pd(W, U, A, R) \
4497 ((__m256d) __builtin_ia32_vcvtph2pd256_mask_round ((__v8hf) (A), \
4502 #define _mm256_maskz_cvt_roundph_pd(U, A, R) \
4503 ((__m256d) __builtin_ia32_vcvtph2pd256_mask_round ((__v8hf) (A), \
4505 (_mm256_setzero_pd ()), \
4509 #define _mm256_cvt_roundph_ps(A, R) \
4510 ((__m256) __builtin_ia32_vcvtph2ps256_mask_round ((__v8hf) (A), \
4512 (_mm256_undefined_ps ()), \
4516 #define _mm256_mask_cvt_roundph_ps(W, U, A, R) \
4517 ((__m256) __builtin_ia32_vcvtph2ps256_mask_round ((__v8hf) (A), \
4522 #define _mm256_maskz_cvt_roundph_ps(U, A, R) \
4523 ((__m256) __builtin_ia32_vcvtph2ps256_mask_round ((__v8hf) (A), \
4525 (_mm256_setzero_ps ()), \
4529 #define _mm256_cvtx_roundph_ps(A, R) \
4530 ((__m256) __builtin_ia32_vcvtph2psx256_mask_round ((__v8hf) (A), \
4532 (_mm256_setzero_ps ()), \
4536 #define _mm256_mask_cvtx_roundph_ps(W, U, A, R) \
4537 ((__m256) __builtin_ia32_vcvtph2psx256_mask_round ((__v8hf) (A), \
4542 #define _mm256_maskz_cvtx_roundph_ps(U, A, R) \
4543 ((__m256) __builtin_ia32_vcvtph2psx256_mask_round ((__v8hf) (A), \
4545 (_mm256_setzero_ps ()), \
4549 #define _mm256_cvt_roundph_epi64(A, R) \
4550 ((__m256i) __builtin_ia32_vcvtph2qq256_mask_round ((__v8hf) (A), \
4552 (_mm256_setzero_si256 ()),\
4556 #define _mm256_mask_cvt_roundph_epi64(W, U, A, R) \
4557 ((__m256i) __builtin_ia32_vcvtph2qq256_mask_round ((__v8hf) (A), \
4562 #define _mm256_maskz_cvt_roundph_epi64(U, A, R) \
4563 ((__m256i) __builtin_ia32_vcvtph2qq256_mask_round ((__v8hf) (A), \
4565 (_mm256_setzero_si256 ()),\
4569 #define _mm256_cvt_roundph_epu32(A, R) \
4571 __builtin_ia32_vcvtph2udq256_mask_round ((__v8hf) (A), \
4573 (_mm256_setzero_si256 ()), \
4577 #define _mm256_mask_cvt_roundph_epu32(W, U, A, R) \
4578 ((__m256i) __builtin_ia32_vcvtph2udq256_mask_round ((__v8hf) (A), \
4583 #define _mm256_maskz_cvt_roundph_epu32(U, A, R) \
4585 __builtin_ia32_vcvtph2udq256_mask_round ((__v8hf) (A), \
4587 (_mm256_setzero_si256 ()), \
4591 #define _mm256_cvt_roundph_epu64(A, R) \
4593 __builtin_ia32_vcvtph2uqq256_mask_round ((__v8hf) (A), \
4595 (_mm256_setzero_si256 ()), \
4599 #define _mm256_mask_cvt_roundph_epu64(W, U, A, R) \
4600 ((__m256i) __builtin_ia32_vcvtph2uqq256_mask_round ((__v8hf) (A), \
4605 #define _mm256_maskz_cvt_roundph_epu64(U, A, R) \
4607 __builtin_ia32_vcvtph2uqq256_mask_round ((__v8hf) (A), \
4609 (_mm256_setzero_si256 ()), \
4613 #define _mm256_cvt_roundph_epu16(A, R) \
4615 __builtin_ia32_vcvtph2uw256_mask_round ((__v16hf) (A), \
4617 (_mm256_undefined_si256 ()), \
4621 #define _mm256_mask_cvt_roundph_epu16(W, U, A, R) \
4622 ((__m256i) __builtin_ia32_vcvtph2uw256_mask_round ((__v16hf) (A), \
4627 #define _mm256_maskz_cvt_roundph_epu16(U, A, R) \
4629 __builtin_ia32_vcvtph2uw256_mask_round ((__v16hf) (A), \
4631 (_mm256_setzero_si256 ()), \
4635 #define _mm256_cvt_roundph_epi16(A, R) \
4637 __builtin_ia32_vcvtph2w256_mask_round ((__v16hf) (A), \
4639 (_mm256_undefined_si256 ()), \
4643 #define _mm256_mask_cvt_roundph_epi16(W, U, A, R) \
4644 ((__m256i) __builtin_ia32_vcvtph2w256_mask_round ((__v16hf) (A), \
4649 #define _mm256_maskz_cvt_roundph_epi16(U, A, R) \
4650 ((__m256i) __builtin_ia32_vcvtph2w256_mask_round ((__v16hf) (A), \
4652 (_mm256_setzero_si256 ()), \
4656 #define _mm256_cvt_roundps_pd(A, R) \
4657 ((__m256d) __builtin_ia32_vcvtps2pd256_mask_round ((__v4sf) (A), \
4659 (_mm256_undefined_pd ()), \
4663 #define _mm256_mask_cvt_roundps_pd(W, U, A, R) \
4664 ((__m256d) __builtin_ia32_vcvtps2pd256_mask_round ((__v4sf) (A), \
4669 #define _mm256_maskz_cvt_roundps_pd(U, A, R) \
4670 ((__m256d) __builtin_ia32_vcvtps2pd256_mask_round ((__v4sf) (A), \
4672 (_mm256_setzero_pd ()), \
4676 #define _mm256_cvtx_roundps_ph(A, R) \
4677 ((__m128h) __builtin_ia32_vcvtps2phx256_mask_round ((__v8sf) (A), \
4679 (_mm_setzero_ph ()), \
4683 #define _mm256_mask_cvtx_roundps_ph(W, U, A, R) \
4684 ((__m128h) __builtin_ia32_vcvtps2phx256_mask_round ((__v8sf) (A), \
4689 #define _mm256_maskz_cvtx_roundps_ph(U, A, R) \
4690 ((__m128h) __builtin_ia32_vcvtps2phx256_mask_round ((__v8sf) (A), \
4692 (_mm_setzero_ph ()), \
4696 #define _mm256_cvt_roundps_epi32(A, R) \
4698 __builtin_ia32_vcvtps2dq256_mask_round ((__v8sf) (A), \
4700 (_mm256_undefined_si256 ()), \
4704 #define _mm256_mask_cvt_roundps_epi32(W, U, A, R) \
4705 ((__m256i) __builtin_ia32_vcvtps2dq256_mask_round ((__v8sf) (A), \
4710 #define _mm256_maskz_cvt_roundps_epi32(U, A, R) \
4712 __builtin_ia32_vcvtps2dq256_mask_round ((__v8sf) (A), \
4714 (_mm256_setzero_si256 ()), \
4718 #define _mm256_cvt_roundps_epi64(A, R) \
4719 ((__m256i) __builtin_ia32_cvtps2qq256_mask_round ((__v4sf) (A), \
4721 (_mm256_setzero_si256 ()), \
4725 #define _mm256_mask_cvt_roundps_epi64(W, U, A, R) \
4726 ((__m256i) __builtin_ia32_cvtps2qq256_mask_round ((__v4sf) (A), \
4731 #define _mm256_maskz_cvt_roundps_epi64(U, A, R) \
4732 ((__m256i) __builtin_ia32_cvtps2qq256_mask_round ((__v4sf) (A), \
4734 (_mm256_setzero_si256 ()), \
4738 #define _mm256_cvt_roundps_epu32(A, R) \
4740 __builtin_ia32_cvtps2udq256_mask_round ((__v8sf) (A), \
4742 (_mm256_undefined_si256 ()), \
4746 #define _mm256_mask_cvt_roundps_epu32(W, U, A, R) \
4747 ((__m256i) __builtin_ia32_cvtps2udq256_mask_round ((__v8sf) (A), \
4752 #define _mm256_maskz_cvt_roundps_epu32(U, A, R) \
4754 __builtin_ia32_cvtps2udq256_mask_round ((__v8sf) (A), \
4756 (_mm256_setzero_si256 ()), \
4760 #define _mm256_cvt_roundps_epu64(B, R) \
4762 __builtin_ia32_cvtps2uqq256_mask_round ((__v4sf) (B), \
4764 (_mm256_setzero_si256 ()), \
4768 #define _mm256_mask_cvt_roundps_epu64(W, U, A, R) \
4769 ((__m256i) __builtin_ia32_cvtps2uqq256_mask_round ((__v4sf) (A), \
4774 #define _mm256_maskz_cvt_roundps_epu64(U, A, R) \
4776 __builtin_ia32_cvtps2uqq256_mask_round ((__v4sf) (A), \
4778 (_mm256_setzero_si256 ()), \
4782 #define _mm256_cvt_roundepi64_pd(A, R) \
4783 ((__m256d) __builtin_ia32_cvtqq2pd256_mask_round ((__v4di) (A), \
4785 (_mm256_setzero_pd ()), \
4789 #define _mm256_mask_cvt_roundepi64_pd(W, U, A, R) \
4790 ((__m256d) __builtin_ia32_cvtqq2pd256_mask_round ((__v4di) (A), \
4795 #define _mm256_maskz_cvt_roundepi64_pd(U, A, R) \
4796 ((__m256d) __builtin_ia32_cvtqq2pd256_mask_round ((__v4di) (A), \
4798 (_mm256_setzero_pd ()), \
4802 #define _mm256_cvt_roundepi64_ph(A, R) \
4803 ((__m128h) __builtin_ia32_vcvtqq2ph256_mask_round ((__v4di) (A), \
4805 (_mm_setzero_ph ()), \
4809 #define _mm256_mask_cvt_roundepi64_ph(W, U, A, R) \
4810 ((__m128h) __builtin_ia32_vcvtqq2ph256_mask_round ((__v4di) (A), \
4815 #define _mm256_maskz_cvt_roundepi64_ph(U, A, R) \
4816 ((__m128h) __builtin_ia32_vcvtqq2ph256_mask_round ((__v4di) (A), \
4818 (_mm_setzero_ph ()), \
4822 #define _mm256_cvt_roundepi64_ps(A, R) \
4823 ((__m128) __builtin_ia32_cvtqq2ps256_mask_round ((__v4di) (A), \
4825 (_mm_setzero_ps ()), \
4829 #define _mm256_mask_cvt_roundepi64_ps(W, U, A, R) \
4830 ((__m128) __builtin_ia32_cvtqq2ps256_mask_round ((__v4di) (A), \
4835 #define _mm256_maskz_cvt_roundepi64_ps(U, A, R) \
4836 ((__m128) __builtin_ia32_cvtqq2ps256_mask_round ((__v4di) (A), \
4838 (_mm_setzero_ps ()), \
4842 #define _mm256_cvtt_roundpd_epi32(A, R) \
4843 ((__m128i) __builtin_ia32_cvttpd2dq256_mask_round ((__v4df) (A), \
4845 (_mm_undefined_si128 ()), \
4849 #define _mm256_mask_cvtt_roundpd_epi32(W, U, A, R) \
4850 ((__m128i) __builtin_ia32_cvttpd2dq256_mask_round ((__v4df) (A), \
4855 #define _mm256_maskz_cvtt_roundpd_epi32(U, A, R) \
4856 ((__m128i) __builtin_ia32_cvttpd2dq256_mask_round ((__v4df) (A), \
4858 (_mm_setzero_si128 ()), \
4862 #define _mm256_cvtt_roundpd_epi64(A, R) \
4864 __builtin_ia32_cvttpd2qq256_mask_round ((__v4df) (A), \
4866 (_mm256_setzero_si256 ()), \
4870 #define _mm256_mask_cvtt_roundpd_epi64(W, U, A, R) \
4871 ((__m256i) __builtin_ia32_cvttpd2qq256_mask_round ((__v4df) (A), \
4876 #define _mm256_maskz_cvtt_roundpd_epi64(U, A, R) \
4878 __builtin_ia32_cvttpd2qq256_mask_round ((__v4df) (A), \
4880 (_mm256_setzero_si256 ()), \
4884 #define _mm256_cvtt_roundpd_epu32(A, R) \
4886 __builtin_ia32_cvttpd2udq256_mask_round ((__v4df) (A), \
4888 (_mm_undefined_si128 ()), \
4892 #define _mm256_mask_cvtt_roundpd_epu32(W, U, A, R) \
4893 ((__m128i) __builtin_ia32_cvttpd2udq256_mask_round ((__v4df) (A), \
4898 #define _mm256_maskz_cvtt_roundpd_epu32(U, A, R) \
4899 ((__m128i) __builtin_ia32_cvttpd2udq256_mask_round ((__v4df) (A), \
4901 (_mm_setzero_si128 ()), \
4905 #define _mm256_cvtt_roundpd_epu64(A, R) \
4907 __builtin_ia32_cvttpd2uqq256_mask_round ((__v4df) (A), \
4909 (_mm256_setzero_si256 ()), \
4913 #define _mm256_mask_cvtt_roundpd_epu64(W, U, A, R) \
4914 ((__m256i) __builtin_ia32_cvttpd2uqq256_mask_round ((__v4df) (A), \
4919 #define _mm256_maskz_cvtt_roundpd_epu64(U, A, R) \
4921 __builtin_ia32_cvttpd2uqq256_mask_round ((__v4df) (A), \
4923 (_mm256_setzero_si256 ()), \
4927 #define _mm256_cvtt_roundph_epi32(A, R) \
4929 __builtin_ia32_vcvttph2dq256_mask_round ((__v8hf) (A), \
4931 (_mm256_setzero_si256 ()), \
4935 #define _mm256_mask_cvtt_roundph_epi32(W, U, A, R) \
4936 ((__m256i) __builtin_ia32_vcvttph2dq256_mask_round ((__v8hf) (A), \
4941 #define _mm256_maskz_cvtt_roundph_epi32(U, A, R) \
4943 __builtin_ia32_vcvttph2dq256_mask_round ((__v8hf) (A), \
4945 (_mm256_setzero_si256 ()), \
4949 #define _mm256_cvtt_roundph_epi64(A, R) \
4951 __builtin_ia32_vcvttph2qq256_mask_round ((__v8hf) (A), \
4953 (_mm256_setzero_si256 ()), \
4957 #define _mm256_mask_cvtt_roundph_epi64(W, U, A, R) \
4958 ((__m256i) __builtin_ia32_vcvttph2qq256_mask_round ((__v8hf) (A), \
4963 #define _mm256_maskz_cvtt_roundph_epi64(U, A, R) \
4965 __builtin_ia32_vcvttph2qq256_mask_round ((__v8hf) (A), \
4967 (_mm256_setzero_si256 ()), \
4971 #define _mm256_cvtt_roundph_epu32(A, R) \
4973 __builtin_ia32_vcvttph2udq256_mask_round ((__v8hf) (A), \
4975 (_mm256_setzero_si256 ()), \
4979 #define _mm256_mask_cvtt_roundph_epu32(W, U, A, R) \
4980 ((__m256i) __builtin_ia32_vcvttph2udq256_mask_round ((__v8hf) (A), \
4985 #define _mm256_maskz_cvtt_roundph_epu32(U, A, R) \
4987 __builtin_ia32_vcvttph2udq256_mask_round ((__v8hf) (A), \
4989 (_mm256_setzero_si256 ()), \
4993 #define _mm256_cvtt_roundph_epu64(A, R) \
4995 __builtin_ia32_vcvttph2uqq256_mask_round ((__v8hf) (A), \
4997 (_mm256_setzero_si256 ()), \
5001 #define _mm256_mask_cvtt_roundph_epu64(W, U, A, R) \
5002 ((__m256i) __builtin_ia32_vcvttph2uqq256_mask_round ((__v8hf) (A), \
5007 #define _mm256_maskz_cvtt_roundph_epu64(U, A, R) \
5009 __builtin_ia32_vcvttph2uqq256_mask_round ((__v8hf) (A), \
5011 (_mm256_setzero_si256 ()), \
5015 #define _mm256_cvtt_roundph_epu16(A, R) \
5017 __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf) (A), \
5019 (_mm256_setzero_si256 ()), \
5023 #define _mm256_mask_cvtt_roundph_epu16(W, U, A, R) \
5024 ((__m256i) __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf) (A), \
5029 #define _mm256_maskz_cvtt_roundph_epu16(U, A, R) \
5031 __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf) (A), \
5033 (_mm256_setzero_si256 ()), \
5037 #define _mm256_cvtt_roundph_epi16(A, R) \
5039 __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf) (A), \
5041 (_mm256_setzero_si256 ()), \
5045 #define _mm256_mask_cvtt_roundph_epi16(W, U, A, R) \
5046 ((__m256i) __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf) (A), \
5051 #define _mm256_maskz_cvtt_roundph_epi16(U, A, R)\
5053 __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf) (A), \
5055 (_mm256_setzero_si256 ()), \
5059 #define _mm256_cvtt_roundps_epi32(A, R) \
5061 __builtin_ia32_cvttps2dq256_mask_round ((__v8sf) (A), \
5063 (_mm256_undefined_si256 ()), \
5067 #define _mm256_mask_cvtt_roundps_epi32(W, U, A, R) \
5068 ((__m256i) __builtin_ia32_cvttps2dq256_mask_round ((__v8sf) (A), \
5073 #define _mm256_maskz_cvtt_roundps_epi32(U, A, R) \
5075 __builtin_ia32_cvttps2dq256_mask_round ((__v8sf) (A), \
5077 (_mm256_setzero_si256 ()), \
5081 #define _mm256_cvtt_roundps_epi64(A, R) \
5082 ((__m256i) __builtin_ia32_cvttps2qq256_mask_round ((__v4sf) (A), \
5084 (_mm256_setzero_si256 ()),\
5088 #define _mm256_mask_cvtt_roundps_epi64(W, U, A, R) \
5089 ((__m256i) __builtin_ia32_cvttps2qq256_mask_round ((__v4sf) (A), \
5094 #define _mm256_maskz_cvtt_roundps_epi64(U, A, R) \
5095 ((__m256i) __builtin_ia32_cvttps2qq256_mask_round ((__v4sf) (A), \
5097 (_mm256_setzero_si256 ()),\
5101 #define _mm256_cvtt_roundps_epu32(A, R) \
5103 __builtin_ia32_cvttps2udq256_mask_round ((__v8sf) (A), \
5105 (_mm256_undefined_si256 ()), \
5109 #define _mm256_mask_cvtt_roundps_epu32(W, U, A, R) \
5110 ((__m256i) __builtin_ia32_cvttps2udq256_mask_round ((__v8sf) (A), \
5115 #define _mm256_maskz_cvtt_roundps_epu32(U, A, R) \
5117 __builtin_ia32_cvttps2udq256_mask_round ((__v8sf) (A), \
5119 (_mm256_setzero_si256 ()), \
5123 #define _mm256_cvtt_roundps_epu64(A, R) \
5125 __builtin_ia32_cvttps2uqq256_mask_round ((__v4sf) (A), \
5127 (_mm256_setzero_si256 ()), \
5131 #define _mm256_mask_cvtt_roundps_epu64(W, U, A, R) \
5132 ((__m256i) __builtin_ia32_cvttps2uqq256_mask_round ((__v4sf) (A), \
5137 #define _mm256_maskz_cvtt_roundps_epu64(U, A, R) \
5139 __builtin_ia32_cvttps2uqq256_mask_round ((__v4sf) (A), \
5141 (_mm256_setzero_si256 ()), \
5145 #define _mm256_cvt_roundepu32_ph(A, R) \
5146 ((__m128h) __builtin_ia32_vcvtudq2ph256_mask_round ((__v8si) (A), \
5148 (_mm_setzero_ph ()), \
5152 #define _mm256_mask_cvt_roundepu32_ph(W, U, A, R) \
5153 ((__m128h) __builtin_ia32_vcvtudq2ph256_mask_round ((__v8si) (A), \
5158 #define _mm256_maskz_cvt_roundepu32_ph(U, A, R) \
5159 ((__m128h) __builtin_ia32_vcvtudq2ph256_mask_round ((__v8si) (A), \
5161 (_mm_setzero_ph ()), \
5165 #define _mm256_cvt_roundepu32_ps(A, R) \
5166 ((__m256) __builtin_ia32_cvtudq2ps256_mask_round ((__v8si) (A), \
5168 (_mm256_undefined_ps ()), \
5172 #define _mm256_mask_cvt_roundepu32_ps(W, U, A, R) \
5173 ((__m256) __builtin_ia32_cvtudq2ps256_mask_round ((__v8si) (A), \
5178 #define _mm256_maskz_cvt_roundepu32_ps(U, A, R) \
5179 ((__m256) __builtin_ia32_cvtudq2ps256_mask_round ((__v8si) (A), \
5181 (_mm256_setzero_ps ()), \
5185 #define _mm256_cvt_roundepu64_pd(A, R) \
5186 ((__m256d) __builtin_ia32_cvtuqq2pd256_mask_round ((__v4di) (A), \
5188 (_mm256_setzero_pd ()), \
5192 #define _mm256_mask_cvt_roundepu64_pd(W, U, A, R) \
5193 ((__m256d) __builtin_ia32_cvtuqq2pd256_mask_round ((__v4di) (A), \
5198 #define _mm256_maskz_cvt_roundepu64_pd(U, A, R) \
5199 ((__m256d) __builtin_ia32_cvtuqq2pd256_mask_round ((__v4di) (A), \
5201 (_mm256_setzero_pd ()), \
5205 #define _mm256_cvt_roundepu64_ph(A, R) \
5206 ((__m128h) __builtin_ia32_vcvtuqq2ph256_mask_round ((__v4di) (A), \
5208 (_mm_setzero_ph ()), \
5212 #define _mm256_mask_cvt_roundepu64_ph(W, U, A, R) \
5213 ((__m128h) __builtin_ia32_vcvtuqq2ph256_mask_round ((__v4di) (A), \
5218 #define _mm256_maskz_cvt_roundepu64_ph(U, A, R) \
5219 ((__m128h) __builtin_ia32_vcvtuqq2ph256_mask_round ((__v4di) (A), \
5221 (_mm_setzero_ph ()), \
5225 #define _mm256_cvt_roundepu64_ps(A, R) \
5226 ((__m128) __builtin_ia32_cvtuqq2ps256_mask_round ((__v4di) (A), \
5228 (_mm_setzero_ps ()), \
5232 #define _mm256_mask_cvt_roundepu64_ps(W, U, A, R) \
5233 ((__m128) __builtin_ia32_cvtuqq2ps256_mask_round ((__v4di) (A), \
5238 #define _mm256_maskz_cvt_roundepu64_ps(U, A, R) \
5239 ((__m128) __builtin_ia32_cvtuqq2ps256_mask_round ((__v4di) (A), \
5241 (_mm_setzero_ps ()), \
5245 #define _mm256_cvt_roundepu16_ph(A, R) \
5246 ((__m256h) __builtin_ia32_vcvtuw2ph256_mask_round ((__v16hi) (A), \
5248 (_mm256_setzero_ph ()), \
5252 #define _mm256_mask_cvt_roundepu16_ph(W, U, A, R) \
5253 ((__m256h) __builtin_ia32_vcvtuw2ph256_mask_round ((__v16hi) (A), \
5258 #define _mm256_maskz_cvt_roundepu16_ph(U, A, R) \
5259 ((__m256h) __builtin_ia32_vcvtuw2ph256_mask_round ((__v16hi) (A), \
5261 (_mm256_setzero_ph ()), \
5265 #define _mm256_cvt_roundepi16_ph(A, R) \
5266 ((__m256h) __builtin_ia32_vcvtw2ph256_mask_round ((__v16hi) (A), \
5268 (_mm256_setzero_ph ()), \
5272 #define _mm256_mask_cvt_roundepi16_ph(W, U, A, R) \
5273 ((__m256h) __builtin_ia32_vcvtw2ph256_mask_round ((__v16hi) (A), \
5278 #define _mm256_maskz_cvt_roundepi16_ph(U, A, R) \
5279 ((__m256h) __builtin_ia32_vcvtw2ph256_mask_round ((__v16hi) (A), \
5281 (_mm256_setzero_ph ()), \
5285 #define _mm256_div_round_pd(A, B, R) \
5286 ((__m256d) __builtin_ia32_divpd256_mask_round ((__v4df) (A), \
5289 (_mm256_undefined_pd ()), \
5293 #define _mm256_mask_div_round_pd(W, U, A, B, R) \
5294 ((__m256d) __builtin_ia32_divpd256_mask_round ((__v4df) (A), \
5300 #define _mm256_maskz_div_round_pd(U, A, B, R) \
5301 ((__m256d) __builtin_ia32_divpd256_mask_round ((__v4df) (A), \
5304 (_mm256_setzero_pd ()), \
5308 #define _mm256_div_round_ph(A, B, R) \
5309 ((__m256h) __builtin_ia32_divph256_mask_round ((__v16hf) (A), \
5312 (_mm256_setzero_ph ()), \
5316 #define _mm256_mask_div_round_ph(W, U, A, B, R) \
5317 ((__m256h) __builtin_ia32_divph256_mask_round ((__v16hf) (A), \
5323 #define _mm256_maskz_div_round_ph(U, A, B, R) \
5324 ((__m256h) __builtin_ia32_divph256_mask_round ((__v16hf) (A), \
5327 (_mm256_setzero_ph ()), \
5331 #define _mm256_div_round_ps(A, B, R) \
5332 ((__m256) __builtin_ia32_divps256_mask_round ((__v8sf) (A), \
5335 (_mm256_undefined_ps ()), \
5339 #define _mm256_mask_div_round_ps(W, U, A, B, R) \
5340 ((__m256) __builtin_ia32_divps256_mask_round ((__v8sf) (A), \
5346 #define _mm256_maskz_div_round_ps(U, A, B, R) \
5347 ((__m256) __builtin_ia32_divps256_mask_round ((__v8sf) (A), \
5350 (_mm256_setzero_ps ()), \
5354 #define _mm256_fcmadd_round_pch(A, B, D, R) \
5355 (__m256h) __builtin_ia32_vfcmaddcph256_round ((A), (B), (D), (R))
5357 #define _mm256_mask_fcmadd_round_pch(A, U, B, D, R) \
5358 ((__m256h) __builtin_ia32_vfcmaddcph256_mask_round ((__v16hf)(A), \
5363 #define _mm256_mask3_fcmadd_round_pch(A, B, D, U, R) \
5364 ((__m256h) __builtin_ia32_vfcmaddcph256_mask3_round ((A), (B), (D), (U), (R)))
5366 #define _mm256_maskz_fcmadd_round_pch(U, A, B, D, R) \
5367 ((__m256h) __builtin_ia32_vfcmaddcph256_maskz_round ((A), (B), (D), (U), (R)))
5369 #define _mm256_fcmul_round_pch(A, B, R) \
5370 ((__m256h) __builtin_ia32_vfcmulcph256_round ((__v16hf) (A), \
5374 #define _mm256_mask_fcmul_round_pch(W, U, A, B, R) \
5375 ((__m256h) __builtin_ia32_vfcmulcph256_mask_round ((__v16hf) (A), \
5381 #define _mm256_maskz_fcmul_round_pch(U, A, B, R) \
5382 ((__m256h) __builtin_ia32_vfcmulcph256_mask_round ((__v16hf) (A), \
5385 (_mm256_setzero_ph ()), \
5389 #define _mm256_fixupimm_round_pd(A, B, D, C, R) \
5390 ((__m256d) __builtin_ia32_fixupimmpd256_mask_round ((__v4df) (A), \
5397 #define _mm256_mask_fixupimm_round_pd(A, U, B, D, C, R)\
5398 ((__m256d) __builtin_ia32_fixupimmpd256_mask_round ((__v4df) (A), \
5405 #define _mm256_maskz_fixupimm_round_pd(U, A, B, D, C, R)\
5406 ((__m256d) __builtin_ia32_fixupimmpd256_maskz_round ((__v4df) (A), \
5413 #define _mm256_fixupimm_round_ps(A, B, D, C, R)\
5414 ((__m256) __builtin_ia32_fixupimmps256_mask_round ((__v8sf) (A), \
5421 #define _mm256_mask_fixupimm_round_ps(A, U, B, D, C, R)\
5422 ((__m256) __builtin_ia32_fixupimmps256_mask_round ((__v8sf) (A), \
5429 #define _mm256_maskz_fixupimm_round_ps(U, A, B, D, C, R)\
5430 ((__m256) __builtin_ia32_fixupimmps256_maskz_round ((__v8sf) (A), \
5437 #define _mm256_fmadd_round_pd(A, B, D, R) \
5438 ((__m256d) __builtin_ia32_vfmaddpd256_mask_round (A, B, D, -1, R))
5440 #define _mm256_mask_fmadd_round_pd(A, U, B, D, R) \
5441 ((__m256d) __builtin_ia32_vfmaddpd256_mask_round (A, B, D, U, R))
5443 #define _mm256_mask3_fmadd_round_pd(A, B, D, U, R) \
5444 ((__m256d) __builtin_ia32_vfmaddpd256_mask3_round (A, B, D, U, R))
5446 #define _mm256_maskz_fmadd_round_pd(U, A, B, D, R) \
5447 ((__m256d) __builtin_ia32_vfmaddpd256_maskz_round (A, B, D, U, R))
5449 #define _mm256_fmadd_round_ph(A, B, D, R) \
5450 ((__m256h) __builtin_ia32_vfmaddph256_mask_round (A, B, D, -1, R))
5452 #define _mm256_mask_fmadd_round_ph(A, U, B, D, R) \
5453 ((__m256h) __builtin_ia32_vfmaddph256_mask_round (A, B, D, U, R))
5455 #define _mm256_mask3_fmadd_round_ph(A, B, D, U, R) \
5456 ((__m256h) __builtin_ia32_vfmaddph256_mask3_round (A, B, D, U, R))
5458 #define _mm256_maskz_fmadd_round_ph(U, A, B, D, R) \
5459 ((__m256h) __builtin_ia32_vfmaddph256_maskz_round (A, B, D, U, R))
5461 #define _mm256_fmadd_round_ps(A, B, D, R) \
5462 ((__m256)__builtin_ia32_vfmaddps256_mask_round (A, B, D, -1, R))
5464 #define _mm256_mask_fmadd_round_ps(A, U, B, D, R) \
5465 ((__m256)__builtin_ia32_vfmaddps256_mask_round (A, B, D, U, R))
5467 #define _mm256_mask3_fmadd_round_ps(A, B, D, U, R) \
5468 ((__m256)__builtin_ia32_vfmaddps256_mask3_round (A, B, D, U, R))
5470 #define _mm256_maskz_fmadd_round_ps(U, A, B, D, R) \
5471 ((__m256)__builtin_ia32_vfmaddps256_maskz_round (A, B, D, U, R))
5473 #define _mm256_fmadd_round_pch(A, B, D, R) \
5474 (__m256h) __builtin_ia32_vfmaddcph256_round ((A), (B), (D), (R))
5476 #define _mm256_mask_fmadd_round_pch(A, U, B, D, R) \
5477 ((__m256h) __builtin_ia32_vfmaddcph256_mask_round ((__v16hf) (A), \
5482 #define _mm256_mask3_fmadd_round_pch(A, B, D, U, R) \
5483 (__m256h) __builtin_ia32_vfmaddcph256_mask3_round ((A), (B), (D), (U), (R))
5485 #define _mm256_maskz_fmadd_round_pch(U, A, B, D, R) \
5486 (__m256h) __builtin_ia32_vfmaddcph256_maskz_round ((A), (B), (D), (U), (R))
5488 #define _mm256_fmaddsub_round_pd(A, B, D, R) \
5489 (__m256d) __builtin_ia32_vfmaddsubpd256_mask_round (A, B, D, -1, R)
5491 #define _mm256_mask_fmaddsub_round_pd(A, U, B, D, R) \
5492 (__m256d) __builtin_ia32_vfmaddsubpd256_mask_round (A, B, D, U, R)
5494 #define _mm256_mask3_fmaddsub_round_pd(A, B, D, U, R) \
5495 (__m256d)__builtin_ia32_vfmaddsubpd256_mask3_round (A, B, D, U, R)
5497 #define _mm256_maskz_fmaddsub_round_pd(U, A, B, D, R) \
5498 (__m256d)__builtin_ia32_vfmaddsubpd256_maskz_round (A, B, D, U, R)
5500 #define _mm256_fmaddsub_round_ph(A, B, D, R) \
5501 ((__m256h)__builtin_ia32_vfmaddsubph256_mask_round ((A), (B), (D), -1, (R)))
5503 #define _mm256_mask_fmaddsub_round_ph(A, U, B, D, R) \
5504 ((__m256h)__builtin_ia32_vfmaddsubph256_mask_round ((A), (B), (D), (U), (R)))
5506 #define _mm256_mask3_fmaddsub_round_ph(A, B, D, U, R) \
5507 ((__m256h)__builtin_ia32_vfmaddsubph256_mask3_round ((A), (B), (D), (U), (R)))
5509 #define _mm256_maskz_fmaddsub_round_ph(U, A, B, D, R) \
5510 ((__m256h)__builtin_ia32_vfmaddsubph256_maskz_round ((A), (B), (D), (U), (R)))
5512 #define _mm256_fmaddsub_round_ps(A, B, D, R) \
5513 (__m256)__builtin_ia32_vfmaddsubps256_mask_round (A, B, D, -1, R)
5515 #define _mm256_mask_fmaddsub_round_ps(A, U, B, D, R) \
5516 (__m256)__builtin_ia32_vfmaddsubps256_mask_round (A, B, D, U, R)
5518 #define _mm256_mask3_fmaddsub_round_ps(A, B, D, U, R) \
5519 (__m256)__builtin_ia32_vfmaddsubps256_mask3_round (A, B, D, U, R)
5521 #define _mm256_maskz_fmaddsub_round_ps(U, A, B, D, R) \
5522 (__m256)__builtin_ia32_vfmaddsubps256_maskz_round (A, B, D, U, R)
5524 #define _mm256_fmsub_round_pd(A, B, D, R) \
5525 (__m256d)__builtin_ia32_vfmsubpd256_mask_round (A, B, D, -1, R)
5527 #define _mm256_mask_fmsub_round_pd(A, U, B, D, R) \
5528 (__m256d)__builtin_ia32_vfmsubpd256_mask_round (A, B, D, U, R)
5530 #define _mm256_mask3_fmsub_round_pd(A, B, D, U, R) \
5531 (__m256d)__builtin_ia32_vfmsubpd256_mask3_round (A, B, D, U, R)
5533 #define _mm256_maskz_fmsub_round_pd(U, A, B, D, R) \
5534 (__m256d)__builtin_ia32_vfmsubpd256_maskz_round (A, B, D, U, R)
5536 #define _mm256_fmsub_round_ph(A, B, D, R) \
5537 ((__m256h)__builtin_ia32_vfmsubph256_mask_round ((A), (B), (D), -1, (R)))
5539 #define _mm256_mask_fmsub_round_ph(A, U, B, D, R) \
5540 ((__m256h)__builtin_ia32_vfmsubph256_mask_round ((A), (B), (D), (U), (R)))
5542 #define _mm256_mask3_fmsub_round_ph(A, B, D, U, R) \
5543 ((__m256h)__builtin_ia32_vfmsubph256_mask3_round ((A), (B), (D), (U), (R)))
5545 #define _mm256_maskz_fmsub_round_ph(U, A, B, D, R) \
5546 ((__m256h)__builtin_ia32_vfmsubph256_maskz_round ((A), (B), (D), (U), (R)))
5548 #define _mm256_fmsub_round_ps(A, B, D, R) \
5549 (__m256)__builtin_ia32_vfmsubps256_mask_round (A, B, D, -1, R)
5551 #define _mm256_mask_fmsub_round_ps(A, U, B, D, R) \
5552 (__m256)__builtin_ia32_vfmsubps256_mask_round (A, B, D, U, R)
5554 #define _mm256_mask3_fmsub_round_ps(A, B, D, U, R) \
5555 (__m256)__builtin_ia32_vfmsubps256_mask3_round (A, B, D, U, R)
5557 #define _mm256_maskz_fmsub_round_ps(U, A, B, D, R) \
5558 (__m256)__builtin_ia32_vfmsubps256_maskz_round (A, B, D, U, R)
5560 #define _mm256_fmsubadd_round_pd(A, B, D, R) \
5561 (__m256d)__builtin_ia32_vfmsubaddpd256_mask_round (A, B, D, -1, R)
5563 #define _mm256_mask_fmsubadd_round_pd(A, U, B, D, R) \
5564 (__m256d)__builtin_ia32_vfmsubaddpd256_mask_round (A, B, D, U, R)
5566 #define _mm256_mask3_fmsubadd_round_pd(A, B, D, U, R) \
5567 (__m256d)__builtin_ia32_vfmsubaddpd256_mask3_round (A, B, D, U, R)
5569 #define _mm256_maskz_fmsubadd_round_pd(U, A, B, D, R) \
5570 (__m256d)__builtin_ia32_vfmsubaddpd256_maskz_round (A, B, D, U, R)
5572 #define _mm256_fmsubadd_round_ph(A, B, D, R) \
5573 ((__m256h)__builtin_ia32_vfmsubaddph256_mask_round ((A), (B), (D), -1, (R)))
5575 #define _mm256_mask_fmsubadd_round_ph(A, U, B, D, R) \
5576 ((__m256h)__builtin_ia32_vfmsubaddph256_mask_round ((A), (B), (D), (U), (R)))
5578 #define _mm256_mask3_fmsubadd_round_ph(A, B, D, U, R) \
5579 ((__m256h)__builtin_ia32_vfmsubaddph256_mask3_round ((A), (B), (D), (U), (R)))
5581 #define _mm256_maskz_fmsubadd_round_ph(U, A, B, D, R) \
5582 ((__m256h)__builtin_ia32_vfmsubaddph256_maskz_round ((A), (B), (D), (U), (R)))
5584 #define _mm256_fmsubadd_round_ps(A, B, D, R) \
5585 (__m256)__builtin_ia32_vfmsubaddps256_mask_round (A, B, D, -1, R)
5587 #define _mm256_mask_fmsubadd_round_ps(A, U, B, D, R) \
5588 (__m256)__builtin_ia32_vfmsubaddps256_mask_round (A, B, D, U, R)
5590 #define _mm256_mask3_fmsubadd_round_ps(A, B, D, U, R) \
5591 (__m256)__builtin_ia32_vfmsubaddps256_mask3_round (A, B, D, U, R)
5593 #define _mm256_maskz_fmsubadd_round_ps(U, A, B, D, R) \
5594 (__m256)__builtin_ia32_vfmsubaddps256_maskz_round (A, B, D, U, R)
5596 #define _mm256_fmul_round_pch(B, D, R) \
5597 ((__m256h) __builtin_ia32_vfmulcph256_round ((__v16hf) (B), \
5601 #define _mm256_mask_fmul_round_pch(A, U, B, D, R) \
5602 ((__m256h) __builtin_ia32_vfmulcph256_mask_round ((__v16hf) (B), \
5608 #define _mm256_maskz_fmul_round_pch(U, B, D, R) \
5609 ((__m256h) __builtin_ia32_vfmulcph256_mask_round ((__v16hf) (B), \
5612 (_mm256_setzero_ph ()), \
5616 #define _mm256_fnmadd_round_pd(A, B, D, R) \
5617 (__m256d)__builtin_ia32_vfnmaddpd256_mask_round (A, B, D, -1, R)
5619 #define _mm256_mask_fnmadd_round_pd(A, U, B, D, R) \
5620 (__m256d)__builtin_ia32_vfnmaddpd256_mask_round (A, B, D, U, R)
5622 #define _mm256_mask3_fnmadd_round_pd(A, B, D, U, R) \
5623 (__m256d)__builtin_ia32_vfnmaddpd256_mask3_round (A, B, D, U, R)
5625 #define _mm256_maskz_fnmadd_round_pd(U, A, B, D, R) \
5626 (__m256d)__builtin_ia32_vfnmaddpd256_maskz_round (A, B, D, U, R)
5628 #define _mm256_fnmadd_round_ph(A, B, D, R) \
5629 ((__m256h)__builtin_ia32_vfnmaddph256_mask_round ((A), (B), (D), -1, (R)))
5631 #define _mm256_mask_fnmadd_round_ph(A, U, B, D, R) \
5632 ((__m256h)__builtin_ia32_vfnmaddph256_mask_round ((A), (B), (D), (U), (R)))
5634 #define _mm256_mask3_fnmadd_round_ph(A, B, D, U, R) \
5635 ((__m256h)__builtin_ia32_vfnmaddph256_mask3_round ((A), (B), (D), (U), (R)))
5637 #define _mm256_maskz_fnmadd_round_ph(U, A, B, D, R) \
5638 ((__m256h)__builtin_ia32_vfnmaddph256_maskz_round ((A), (B), (D), (U), (R)))
5640 #define _mm256_fnmadd_round_ps(A, B, D, R) \
5641 (__m256)__builtin_ia32_vfnmaddps256_mask_round (A, B, D, -1, R)
5643 #define _mm256_mask_fnmadd_round_ps(A, U, B, D, R) \
5644 (__m256)__builtin_ia32_vfnmaddps256_mask_round (A, B, D, U, R)
5646 #define _mm256_mask3_fnmadd_round_ps(A, B, D, U, R) \
5647 (__m256)__builtin_ia32_vfnmaddps256_mask3_round (A, B, D, U, R)
5649 #define _mm256_maskz_fnmadd_round_ps(U, A, B, D, R) \
5650 (__m256)__builtin_ia32_vfnmaddps256_maskz_round (A, B, D, U, R)
5652 #define _mm256_fnmsub_round_pd(A, B, D, R) \
5653 (__m256d)__builtin_ia32_vfnmsubpd256_mask_round (A, B, D, -1, R)
5655 #define _mm256_mask_fnmsub_round_pd(A, U, B, D, R) \
5656 (__m256d)__builtin_ia32_vfnmsubpd256_mask_round (A, B, D, U, R)
5658 #define _mm256_mask3_fnmsub_round_pd(A, B, D, U, R) \
5659 (__m256d)__builtin_ia32_vfnmsubpd256_mask3_round (A, B, D, U, R)
5661 #define _mm256_maskz_fnmsub_round_pd(U, A, B, D, R) \
5662 (__m256d)__builtin_ia32_vfnmsubpd256_maskz_round (A, B, D, U, R)
5664 #define _mm256_fnmsub_round_ph(A, B, D, R) \
5665 ((__m256h)__builtin_ia32_vfnmsubph256_mask_round ((A), (B), (D), -1, (R)))
5667 #define _mm256_mask_fnmsub_round_ph(A, U, B, D, R) \
5668 ((__m256h)__builtin_ia32_vfnmsubph256_mask_round ((A), (B), (D), (U), (R)))
5670 #define _mm256_mask3_fnmsub_round_ph(A, B, D, U, R) \
5671 ((__m256h)__builtin_ia32_vfnmsubph256_mask3_round ((A), (B), (D), (U), (R)))
5673 #define _mm256_maskz_fnmsub_round_ph(U, A, B, D, R) \
5674 ((__m256h)__builtin_ia32_vfnmsubph256_maskz_round ((A), (B), (D), (U), (R)))
5676 #define _mm256_fnmsub_round_ps(A, B, D, R) \
5677 (__m256)__builtin_ia32_vfnmsubps256_mask_round (A, B, D, -1, R)
5679 #define _mm256_mask_fnmsub_round_ps(A, U, B, D, R) \
5680 (__m256)__builtin_ia32_vfnmsubps256_mask_round (A, B, D, U, R)
5682 #define _mm256_mask3_fnmsub_round_ps(A, B, D, U, R) \
5683 (__m256)__builtin_ia32_vfnmsubps256_mask3_round (A, B, D, U, R)
5685 #define _mm256_maskz_fnmsub_round_ps(U, A, B, D, R) \
5686 (__m256)__builtin_ia32_vfnmsubps256_maskz_round (A, B, D, U, R)
5688 #define _mm256_getexp_round_pd(A, R) \
5689 ((__m256d) __builtin_ia32_getexppd256_mask_round ((__v4df) (A), \
5691 (_mm256_undefined_pd ()), \
5695 #define _mm256_mask_getexp_round_pd(W, U, A, R) \
5696 ((__m256d) __builtin_ia32_getexppd256_mask_round ((__v4df) (A), \
5701 #define _mm256_maskz_getexp_round_pd(U, A, R) \
5702 ((__m256d) __builtin_ia32_getexppd256_mask_round ((__v4df) (A), \
5704 (_mm256_setzero_pd ()), \
5708 #define _mm256_getexp_round_ph(A, R)\
5709 ((__m256h) __builtin_ia32_getexpph256_mask_round ((__v16hf) (A), \
5711 (_mm256_setzero_ph ()), \
5715 #define _mm256_mask_getexp_round_ph(W, U, A, R)\
5716 ((__m256h) __builtin_ia32_getexpph256_mask_round ((__v16hf) (A), \
5721 #define _mm256_maskz_getexp_round_ph(U, A, R)\
5722 ((__m256h) __builtin_ia32_getexpph256_mask_round ((__v16hf) (A), \
5724 (_mm256_setzero_ph ()), \
5728 #define _mm256_getexp_round_ps(A, R)\
5729 ((__m256) __builtin_ia32_getexpps256_mask_round ((__v8sf) (A), \
5731 (_mm256_undefined_ps ()), \
5735 #define _mm256_mask_getexp_round_ps(W, U, A, R)\
5736 ((__m256) __builtin_ia32_getexpps256_mask_round ((__v8sf) (A), \
5741 #define _mm256_maskz_getexp_round_ps(U, A, R)\
5742 ((__m256) __builtin_ia32_getexpps256_mask_round ((__v8sf) (A), \
5744 (_mm256_setzero_ps ()), \
5748 #define _mm256_getmant_round_pd(A, B, C, R) \
5749 ((__m256d)__builtin_ia32_getmantpd256_mask_round ((__v4df) (__m256d) (A), \
5750 (int) (((C) << 2) | (B)), \
5751 (__v4df) (__m256d) \
5752 _mm256_undefined_pd (), \
5756 #define _mm256_mask_getmant_round_pd(W, U, A, B, C, R) \
5757 ((__m256d)__builtin_ia32_getmantpd256_mask_round ((__v4df) (__m256d) (A), \
5758 (int) (((C) << 2) | (B)), \
5759 (__v4df) (__m256d) (W), \
5763 #define _mm256_maskz_getmant_round_pd(U, A, B, C, R) \
5764 ((__m256d)__builtin_ia32_getmantpd256_mask_round ((__v4df) (__m256d) (A), \
5765 (int) (((C) << 2) | (B)), \
5766 (__v4df) (__m256d) \
5767 _mm256_setzero_pd (), \
5772 #define _mm256_getmant_round_ph(A, B, C, R) \
5773 ((__m256h)__builtin_ia32_getmantph256_mask_round ((__v16hf) (__m256h) (A), \
5774 (int) (((C)<<2) | (B)), \
5775 (__v16hf) (__m256h) \
5776 _mm256_undefined_ph (), \
5780 #define _mm256_mask_getmant_round_ph(W, U, A, B, C, R) \
5781 ((__m256h)__builtin_ia32_getmantph256_mask_round ((__v16hf) (__m256h) (A), \
5782 (int) (((C)<<2) | (B)), \
5783 (__v16hf) (__m256h) (W), \
5787 #define _mm256_maskz_getmant_round_ph(U, A, B, C, R) \
5788 ((__m256h)__builtin_ia32_getmantph256_mask_round ((__v16hf) (__m256h) (A), \
5789 (int) (((C)<<2) | (B)), \
5790 (__v16hf) (__m256h) \
5791 _mm256_setzero_ph (), \
5795 #define _mm256_getmant_round_ps(A, B, C, R) \
5796 ((__m256)__builtin_ia32_getmantps256_mask_round ((__v8sf) (__m256) (A), \
5797 (int) (((C)<<2) | (B)), \
5799 _mm256_undefined_ps (), \
5803 #define _mm256_mask_getmant_round_ps(W, U, A, B, C, R) \
5804 ((__m256)__builtin_ia32_getmantps256_mask_round ((__v8sf) (__m256) (A), \
5805 (int) (((C)<<2) | (B)), \
5806 (__v8sf) (__m256) (W), \
5810 #define _mm256_maskz_getmant_round_ps(U, A, B, C, R) \
5811 ((__m256)__builtin_ia32_getmantps256_mask_round ((__v8sf) (__m256) (A), \
5812 (int) (((C)<<2) | (B)), \
5814 _mm256_setzero_ps (), \
5818 #define _mm256_max_round_pd(A, B, R) \
5819 ((__m256d) __builtin_ia32_maxpd256_mask_round ((__v4df) (A), \
5822 (_mm256_undefined_pd ()), \
5826 #define _mm256_mask_max_round_pd(W, U, A, B, R) \
5827 ((__m256d) __builtin_ia32_maxpd256_mask_round ((__v4df) (A), \
5833 #define _mm256_maskz_max_round_pd(U, A, B, R) \
5834 ((__m256d) __builtin_ia32_maxpd256_mask_round ((__v4df) (A), \
5837 (_mm256_setzero_pd ()), \
5841 #define _mm256_max_round_ph(A, B, R) \
5842 ((__m256h) __builtin_ia32_maxph256_mask_round ((__v16hf) (A), \
5845 (_mm256_undefined_ph ()), \
5849 #define _mm256_mask_max_round_ph(W, U, A, B, R) \
5850 ((__m256h) __builtin_ia32_maxph256_mask_round ((__v16hf) (A), \
5856 #define _mm256_maskz_max_round_ph(U, A, B, R) \
5857 ((__m256h) __builtin_ia32_maxph256_mask_round ((__v16hf) (A), \
5860 (_mm256_setzero_ph ()), \
5864 #define _mm256_max_round_ps(A, B, R) \
5865 ((__m256) __builtin_ia32_maxps256_mask_round ((__v8sf) (A), \
5868 (_mm256_undefined_ps ()), \
5872 #define _mm256_mask_max_round_ps(W, U, A, B, R) \
5873 ((__m256) __builtin_ia32_maxps256_mask_round ((__v8sf) (A), \
5879 #define _mm256_maskz_max_round_ps(U, A, B, R) \
5880 ((__m256) __builtin_ia32_maxps256_mask_round ((__v8sf) (A), \
5883 (_mm256_setzero_ps ()), \
5887 #define _mm256_min_round_pd(A, B, R) \
5888 ((__m256d) __builtin_ia32_minpd256_mask_round ((__v4df) (A), \
5891 (_mm256_undefined_pd ()), \
5895 #define _mm256_mask_min_round_pd(W, U, A, B, R) \
5896 ((__m256d) __builtin_ia32_minpd256_mask_round ((__v4df) (A), \
5902 #define _mm256_maskz_min_round_pd(U, A, B, R) \
5903 ((__m256d) __builtin_ia32_minpd256_mask_round ((__v4df) (A), \
5906 (_mm256_setzero_pd ()), \
5910 #define _mm256_min_round_ph(A, B, R) \
5911 ((__m256h) __builtin_ia32_minph256_mask_round ((__v16hf) (A), \
5914 (_mm256_undefined_ph ()), \
5918 #define _mm256_mask_min_round_ph(W, U, A, B, R) \
5919 ((__m256h) __builtin_ia32_minph256_mask_round ((__v16hf) (A), \
5925 #define _mm256_maskz_min_round_ph(U, A, B, R) \
5926 ((__m256h) __builtin_ia32_minph256_mask_round ((__v16hf) (A), \
5929 (_mm256_setzero_ph ()), \
5933 #define _mm256_min_round_ps(A, B, R) \
5934 ((__m256) __builtin_ia32_minps256_mask_round ((__v8sf) (A), \
5937 (_mm256_undefined_ps ()), \
5941 #define _mm256_mask_min_round_ps(W, U, A, B, R) \
5942 ((__m256) __builtin_ia32_minps256_mask_round ((__v8sf) (A), \
5948 #define _mm256_maskz_min_round_ps(U, A, B, R) \
5949 ((__m256) __builtin_ia32_minps256_mask_round ((__v8sf) (A), \
5952 (_mm256_setzero_ps ()), \
5956 #define _mm256_mul_round_pd(A, B, R) \
5957 ((__m256d) __builtin_ia32_mulpd256_mask_round ((__v4df) (A), \
5960 (_mm256_undefined_pd ()), \
5964 #define _mm256_mask_mul_round_pd(W, U, A, B, R) \
5965 ((__m256d) __builtin_ia32_mulpd256_mask_round ((__v4df) (A), \
5971 #define _mm256_maskz_mul_round_pd(U, A, B, R) \
5972 ((__m256d) __builtin_ia32_mulpd256_mask_round ((__v4df) (A), \
5975 (_mm256_setzero_pd ()), \
5979 #define _mm256_mul_round_ph(A, B, R) \
5980 ((__m256h) __builtin_ia32_mulph256_mask_round ((__v16hf) (A), \
5983 (_mm256_undefined_ph ()), \
5987 #define _mm256_mask_mul_round_ph(W, U, A, B, R) \
5988 ((__m256h) __builtin_ia32_mulph256_mask_round ((__v16hf) (A), \
5994 #define _mm256_maskz_mul_round_ph(U, A, B, R) \
5995 ((__m256h) __builtin_ia32_mulph256_mask_round ((__v16hf) (A), \
5998 (_mm256_setzero_ph ()), \
6002 #define _mm256_mul_round_ps(A, B, R) \
6003 ((__m256) __builtin_ia32_mulps256_mask_round ((__v8sf) (A), \
6006 (_mm256_undefined_ps ()), \
6010 #define _mm256_mask_mul_round_ps(W, U, A, B, R) \
6011 ((__m256) __builtin_ia32_mulps256_mask_round ((__v8sf) (A), \
6017 #define _mm256_maskz_mul_round_ps(U, A, B, R) \
6018 ((__m256) __builtin_ia32_mulps256_mask_round ((__v8sf) (A), \
6021 (_mm256_setzero_ps ()), \
6025 #define _mm256_range_round_pd(A, B, C, R) \
6026 ((__m256d) __builtin_ia32_rangepd256_mask_round ((__v4df) (A), \
6030 (_mm256_setzero_pd ()), \
6034 #define _mm256_mask_range_round_pd(W, U, A, B, C, R) \
6035 ((__m256d) __builtin_ia32_rangepd256_mask_round ((__v4df) (A), \
6042 #define _mm256_maskz_range_round_pd(U, A, B, C, R) \
6043 ((__m256d) __builtin_ia32_rangepd256_mask_round ((__v4df) (A), \
6047 (_mm256_setzero_pd ()), \
6051 #define _mm256_range_round_ps(A, B, C, R) \
6052 ((__m256) __builtin_ia32_rangeps256_mask_round ((__v8sf) (A), \
6056 (_mm256_setzero_ps ()), \
6060 #define _mm256_mask_range_round_ps(W, U, A, B, C, R) \
6061 ((__m256) __builtin_ia32_rangeps256_mask_round ((__v8sf) (A), \
6068 #define _mm256_maskz_range_round_ps(U, A, B, C, R) \
6069 ((__m256) __builtin_ia32_rangeps256_mask_round ((__v8sf) (A), \
6073 (_mm256_setzero_ps ()), \
6077 #define _mm256_reduce_round_pd(A, C, R) \
6078 ((__m256d) __builtin_ia32_reducepd256_mask_round ((__v4df) (A), \
6081 (_mm256_setzero_pd ()), \
6085 #define _mm256_mask_reduce_round_pd(W, U, A, C, R) \
6086 ((__m256d) __builtin_ia32_reducepd256_mask_round ((__v4df) (A), \
6092 #define _mm256_maskz_reduce_round_pd(U, A, C, R) \
6093 ((__m256d) __builtin_ia32_reducepd256_mask_round ((__v4df) (A), \
6096 (_mm256_setzero_pd ()), \
6100 #define _mm256_reduce_round_ph(A, C, R) \
6101 ((__m256h) __builtin_ia32_reduceph256_mask_round ((__v16hf) (A), \
6104 (_mm256_setzero_ph ()), \
6108 #define _mm256_mask_reduce_round_ph(W, U, A, C, R) \
6109 ((__m256h) __builtin_ia32_reduceph256_mask_round ((__v16hf) (A), \
6115 #define _mm256_maskz_reduce_round_ph(U, A, C, R) \
6116 ((__m256h) __builtin_ia32_reduceph256_mask_round ((__v16hf) (A), \
6119 (_mm256_setzero_ph ()), \
6123 #define _mm256_reduce_round_ps(A, C, R) \
6124 ((__m256) __builtin_ia32_reduceps256_mask_round ((__v8sf) (A), \
6127 (_mm256_setzero_ps ()), \
6131 #define _mm256_mask_reduce_round_ps(W, U, A, C, R) \
6132 ((__m256) __builtin_ia32_reduceps256_mask_round ((__v8sf) (A), \
6138 #define _mm256_maskz_reduce_round_ps(U, A, C, R) \
6139 ((__m256) __builtin_ia32_reduceps256_mask_round ((__v8sf) (A), \
6142 (_mm256_setzero_ps ()), \
6146 #define _mm256_roundscale_round_pd(A, C, R) \
6148 __builtin_ia32_rndscalepd256_mask_round ((__v4df) (A), \
6151 (_mm256_undefined_pd ()), \
6155 #define _mm256_mask_roundscale_round_pd(W, U, A, C, R) \
6156 ((__m256d) __builtin_ia32_rndscalepd256_mask_round ((__v4df) (A), \
6162 #define _mm256_maskz_roundscale_round_pd(U, A, C, R) \
6163 ((__m256d) __builtin_ia32_rndscalepd256_mask_round ((__v4df) (A), \
6166 (_mm256_setzero_pd ()), \
6170 #define _mm256_roundscale_round_ph(A, C, R) \
6172 __builtin_ia32_rndscaleph256_mask_round ((__v16hf) (A), \
6175 (_mm256_undefined_ph ()), \
6179 #define _mm256_mask_roundscale_round_ph(W, U, A, C, R) \
6180 ((__m256h) __builtin_ia32_rndscaleph256_mask_round ((__v16hf) (A), \
6186 #define _mm256_maskz_roundscale_round_ph(U, A, C, R) \
6187 ((__m256h) __builtin_ia32_rndscaleph256_mask_round ((__v16hf) (A), \
6190 (_mm256_setzero_ph ()), \
6194 #define _mm256_roundscale_round_ps(A, C, R) \
6195 ((__m256) __builtin_ia32_rndscaleps256_mask_round ((__v8sf) (A), \
6198 (_mm256_undefined_ps ()), \
6202 #define _mm256_mask_roundscale_round_ps(W, U, A, C, R) \
6203 ((__m256) __builtin_ia32_rndscaleps256_mask_round ((__v8sf) (A), \
6209 #define _mm256_maskz_roundscale_round_ps(U, A, C, R) \
6210 ((__m256) __builtin_ia32_rndscaleps256_mask_round ((__v8sf) (A), \
6213 (_mm256_setzero_ps ()), \
6217 #define _mm256_scalef_round_pd(A, B, R) \
6218 ((__m256d) __builtin_ia32_scalefpd256_mask_round ((__v4df) (A), \
6221 (_mm256_undefined_pd ()), \
6225 #define _mm256_mask_scalef_round_pd(W, U, A, B, R) \
6226 ((__m256d) __builtin_ia32_scalefpd256_mask_round ((__v4df) (A), \
6232 #define _mm256_maskz_scalef_round_pd(U, A, B, R) \
6233 ((__m256d) __builtin_ia32_scalefpd256_mask_round ((__v4df) (A), \
6236 (_mm256_setzero_pd ()), \
6240 #define _mm256_scalef_round_ph(A, B, R) \
6241 ((__m256h) __builtin_ia32_scalefph256_mask_round ((__v16hf) (A), \
6244 (_mm256_undefined_ph ()), \
6248 #define _mm256_mask_scalef_round_ph(W, U, A, B, R) \
6249 ((__m256h) __builtin_ia32_scalefph256_mask_round ((__v16hf) (A), \
6255 #define _mm256_maskz_scalef_round_ph(U, A, B, R) \
6256 ((__m256h) __builtin_ia32_scalefph256_mask_round ((__v16hf) (A), \
6259 (_mm256_setzero_ph ()), \
6263 #define _mm256_scalef_round_ps(A, B, R) \
6264 ((__m256) __builtin_ia32_scalefps256_mask_round ((__v8sf) (A), \
6267 (_mm256_undefined_ps ()), \
6271 #define _mm256_mask_scalef_round_ps(W, U, A, B, R) \
6272 ((__m256) __builtin_ia32_scalefps256_mask_round ((__v8sf) (A), \
6278 #define _mm256_maskz_scalef_round_ps(U, A, B, R) \
6279 ((__m256) __builtin_ia32_scalefps256_mask_round ((__v8sf) (A), \
6282 (_mm256_setzero_ps ()), \
6286 #define _mm256_sqrt_round_pd(A, R) \
6287 ((__m256d) __builtin_ia32_sqrtpd256_mask_round ((__v4df) (A), \
6289 (_mm256_undefined_pd ()), \
6293 #define _mm256_mask_sqrt_round_pd(W, U, A, R) \
6294 ((__m256d) __builtin_ia32_sqrtpd256_mask_round ((__v4df) (A), \
6299 #define _mm256_maskz_sqrt_round_pd(U, A, R) \
6300 ((__m256d) __builtin_ia32_sqrtpd256_mask_round ((__v4df) (A), \
6302 (_mm256_setzero_pd ()), \
6306 #define _mm256_sqrt_round_ph(A, R) \
6307 ((__m256h) __builtin_ia32_sqrtph256_mask_round ((__v16hf) (A), \
6309 (_mm256_undefined_ph ()), \
6313 #define _mm256_mask_sqrt_round_ph(W, U, A, R) \
6314 ((__m256h) __builtin_ia32_sqrtph256_mask_round ((__v16hf) (A), \
6319 #define _mm256_maskz_sqrt_round_ph(U, A, R) \
6320 ((__m256h) __builtin_ia32_sqrtph256_mask_round ((__v16hf) (A), \
6322 (_mm256_setzero_ph ()), \
6326 #define _mm256_sqrt_round_ps(A, R) \
6327 ((__m256) __builtin_ia32_sqrtps256_mask_round ((__v8sf) (A), \
6329 (_mm256_undefined_ps ()), \
6333 #define _mm256_mask_sqrt_round_ps(W, U, A, R) \
6334 ((__m256) __builtin_ia32_sqrtps256_mask_round ((__v8sf) (A), \
6339 #define _mm256_maskz_sqrt_round_ps(U, A, R) \
6340 ((__m256) __builtin_ia32_sqrtps256_mask_round ((__v8sf) (A), \
6342 (_mm256_setzero_ps ()), \
6346 #define _mm256_sub_round_pd(A, B, R) \
6347 ((__m256d) __builtin_ia32_subpd256_mask_round ((__v4df) (A), \
6350 (_mm256_undefined_pd ()), \
6354 #define _mm256_mask_sub_round_pd(W, U, A, B, R) \
6355 ((__m256d) __builtin_ia32_subpd256_mask_round ((__v4df) (A), \
6361 #define _mm256_maskz_sub_round_pd(U, A, B, R) \
6362 ((__m256d) __builtin_ia32_subpd256_mask_round ((__v4df) (A), \
6365 (_mm256_setzero_pd ()), \
6369 #define _mm256_sub_round_ph(A, B, R) \
6370 ((__m256h) __builtin_ia32_subph256_mask_round ((__v16hf) (A), \
6373 (_mm256_undefined_ph ()), \
6377 #define _mm256_mask_sub_round_ph(W, U, A, B, R) \
6378 ((__m256h) __builtin_ia32_subph256_mask_round ((__v16hf) (A), \
6384 #define _mm256_maskz_sub_round_ph(U, A, B, R) \
6385 ((__m256h) __builtin_ia32_subph256_mask_round ((__v16hf) (A), \
6388 (_mm256_setzero_ph ()), \
6392 #define _mm256_sub_round_ps(A, B, R) \
6393 ((__m256) __builtin_ia32_subps256_mask_round ((__v8sf) (A), \
6396 (_mm256_undefined_ps ()), \
6400 #define _mm256_mask_sub_round_ps(W, U, A, B, R) \
6401 ((__m256) __builtin_ia32_subps256_mask_round ((__v8sf) (A), \
6407 #define _mm256_maskz_sub_round_ps(U, A, B, R) \
6408 ((__m256) __builtin_ia32_subps256_mask_round ((__v8sf) (A), \
6411 (_mm256_setzero_ps ()), \
6416 #define _mm256_cmul_round_pch(A, B, R) _mm256_fcmul_round_pch ((A), (B), (R))
6417 #define _mm256_mask_cmul_round_pch(W, U, A, B, R) \
6418 _mm256_mask_fcmul_round_pch ((W), (U), (A), (B), (R))
6419 #define _mm256_maskz_cmul_round_pch(U, A, B, R) \
6420 _mm256_maskz_fcmul_round_pch ((U), (A), (B), (R))
6422 #define _mm256_mul_round_pch(A, B, R) _mm256_fmul_round_pch ((A), (B), (R))
6423 #define _mm256_mask_mul_round_pch(W, U, A, B, R) \
6424 _mm256_mask_fmul_round_pch ((W), (U), (A), (B), (R))
6425 #define _mm256_maskz_mul_round_pch(U, A, B, R) \
6426 _mm256_maskz_fmul_round_pch ((U), (A), (B), (R))
6428 #ifdef __DISABLE_AVX10_2_256__
6429 #undef __DISABLE_AVX10_2_256__
6430 #pragma GCC pop_options
6431 #endif /* __DISABLE_AVX10_2_256__ */
6433 #endif /* _AVX10_2ROUNDINGINTRIN_H_INCLUDED */