libcpp, c, middle-end: Optimize initializers using #embed in C
[official-gcc.git] / gcc / config / i386 / avx10_2roundingintrin.h
blobc7146e37ec9a8a81afcfc427fe5556284d3749eb
1 /* Copyright (C) 2024 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx10_2roundingintrin.h> directly; include <immintrin.h> instead."
26 #endif
28 #ifndef _AVX10_2ROUNDINGINTRIN_H_INCLUDED
29 #define _AVX10_2ROUNDINGINTRIN_H_INCLUDED
31 #ifndef __AVX10_2_256__
32 #pragma GCC push_options
33 #pragma GCC target("avx10.2-256")
34 #define __DISABLE_AVX10_2_256__
35 #endif /* __AVX10_2_256__ */
37 #ifdef __OPTIMIZE__
38 extern __inline __m256d
39 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
40 _mm256_add_round_pd (__m256d __A, __m256d __B, const int __R)
42 return (__m256d) __builtin_ia32_addpd256_mask_round ((__v4df) __A,
43 (__v4df) __B,
44 (__v4df)
45 _mm256_undefined_pd (),
46 (__mmask8) -1,
47 __R);
50 extern __inline __m256d
51 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
52 _mm256_mask_add_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
53 __m256d __B, const int __R)
55 return (__m256d) __builtin_ia32_addpd256_mask_round ((__v4df) __A,
56 (__v4df) __B,
57 (__v4df) __W,
58 (__mmask8) __U,
59 __R);
62 extern __inline __m256d
63 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
64 _mm256_maskz_add_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
65 const int __R)
67 return (__m256d) __builtin_ia32_addpd256_mask_round ((__v4df) __A,
68 (__v4df) __B,
69 (__v4df)
70 _mm256_setzero_pd (),
71 (__mmask8) __U,
72 __R);
75 extern __inline __m256h
76 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
77 _mm256_add_round_ph (__m256h __A, __m256h __B, const int __R)
79 return (__m256h) __builtin_ia32_addph256_mask_round ((__v16hf) __A,
80 (__v16hf) __B,
81 (__v16hf)
82 _mm256_undefined_ph (),
83 (__mmask16) -1,
84 __R);
87 extern __inline __m256h
88 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
89 _mm256_mask_add_round_ph (__m256h __W, __mmask16 __U, __m256h __A,
90 __m256h __B, const int __R)
92 return (__m256h) __builtin_ia32_addph256_mask_round ((__v16hf) __A,
93 (__v16hf) __B,
94 (__v16hf) __W,
95 (__mmask16) __U,
96 __R);
99 extern __inline __m256h
100 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
101 _mm256_maskz_add_round_ph (__mmask16 __U, __m256h __A, __m256h __B,
102 const int __R)
104 return (__m256h) __builtin_ia32_addph256_mask_round ((__v16hf) __A,
105 (__v16hf) __B,
106 (__v16hf)
107 _mm256_setzero_ph (),
108 (__mmask16) __U,
109 __R);
112 extern __inline __m256
113 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
114 _mm256_add_round_ps (__m256 __A, __m256 __B, const int __R)
116 return (__m256) __builtin_ia32_addps256_mask_round ((__v8sf) __A,
117 (__v8sf) __B,
118 (__v8sf)
119 _mm256_undefined_ps (),
120 (__mmask8) -1,
121 __R);
124 extern __inline __m256
125 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
126 _mm256_mask_add_round_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B,
127 const int __R)
129 return (__m256) __builtin_ia32_addps256_mask_round ((__v8sf) __A,
130 (__v8sf) __B,
131 (__v8sf) __W,
132 (__mmask8) __U,
133 __R);
136 extern __inline __m256
137 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
138 _mm256_maskz_add_round_ps (__mmask8 __U, __m256 __A, __m256 __B,
139 const int __R)
141 return (__m256) __builtin_ia32_addps256_mask_round ((__v8sf) __A,
142 (__v8sf) __B,
143 (__v8sf)
144 _mm256_setzero_ps (),
145 (__mmask8) __U,
146 __R);
149 extern __inline __mmask8
150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
151 _mm256_cmp_round_pd_mask (__m256d __A, __m256d __B, const int __C,
152 const int __R)
154 return (__mmask8) __builtin_ia32_cmppd256_mask_round ((__v4df) __A,
155 (__v4df) __B,
156 __C,
157 (__mmask8) -1,
158 __R);
161 extern __inline __mmask8
162 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
163 _mm256_mask_cmp_round_pd_mask (__mmask8 __U, __m256d __A, __m256d __B,
164 const int __C, const int __R)
166 return (__mmask8) __builtin_ia32_cmppd256_mask_round ((__v4df) __A,
167 (__v4df) __B,
168 __C,
169 (__mmask8) __U,
170 __R);
173 extern __inline __mmask16
174 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
175 _mm256_cmp_round_ph_mask (__m256h __A, __m256h __B, const int __C,
176 const int __R)
178 return (__mmask16) __builtin_ia32_cmpph256_mask_round ((__v16hf) __A,
179 (__v16hf) __B,
180 __C,
181 (__mmask16) -1,
182 __R);
185 extern __inline __mmask16
186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
187 _mm256_mask_cmp_round_ph_mask (__mmask16 __U, __m256h __A, __m256h __B,
188 const int __C, const int __R)
190 return (__mmask16) __builtin_ia32_cmpph256_mask_round ((__v16hf) __A,
191 (__v16hf) __B,
192 __C,
193 (__mmask16) __U,
194 __R);
197 extern __inline __mmask8
198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
199 _mm256_cmp_round_ps_mask (__m256 __A, __m256 __B, const int __C, const int __R)
201 return (__mmask8) __builtin_ia32_cmpps256_mask_round ((__v8sf) __A,
202 (__v8sf) __B,
203 __C,
204 (__mmask8) -1,
205 __R);
208 extern __inline __mmask8
209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
210 _mm256_mask_cmp_round_ps_mask (__mmask8 __U, __m256 __A, __m256 __B,
211 const int __C, const int __R)
213 return (__mmask8) __builtin_ia32_cmpps256_mask_round ((__v8sf) __A,
214 (__v8sf) __B,
215 __C,
216 (__mmask8) __U,
217 __R);
220 extern __inline __m128h
221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
222 _mm256_cvt_roundepi32_ph (__m256i __A, const int __R)
224 return (__m128h) __builtin_ia32_vcvtdq2ph256_mask_round ((__v8si) __A,
225 (__v8hf)
226 _mm_setzero_ph (),
227 (__mmask8) -1,
228 __R);
231 extern __inline __m128h
232 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
233 _mm256_mask_cvt_roundepi32_ph (__m128h __W, __mmask8 __U, __m256i __A,
234 const int __R)
236 return (__m128h) __builtin_ia32_vcvtdq2ph256_mask_round ((__v8si) __A,
237 (__v8hf) __W,
238 (__mmask8) __U,
239 __R);
242 extern __inline __m128h
243 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
244 _mm256_maskz_cvt_roundepi32_ph (__mmask8 __U, __m256i __A, const int __R)
246 return (__m128h) __builtin_ia32_vcvtdq2ph256_mask_round ((__v8si) __A,
247 (__v8hf)
248 _mm_setzero_ph (),
249 (__mmask8) __U,
250 __R);
253 extern __inline __m256
254 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
255 _mm256_cvt_roundepi32_ps (__m256i __A, const int __R)
257 return (__m256) __builtin_ia32_cvtdq2ps256_mask_round ((__v8si) __A,
258 (__v8sf)
259 _mm256_undefined_ps (),
260 (__mmask8) -1,
261 __R);
264 extern __inline __m256
265 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
266 _mm256_mask_cvt_roundepi32_ps (__m256 __W, __mmask8 __U, __m256i __A,
267 const int __R)
269 return (__m256) __builtin_ia32_cvtdq2ps256_mask_round ((__v8si) __A,
270 (__v8sf) __W,
271 (__mmask8) __U,
272 __R);
275 extern __inline __m256
276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
277 _mm256_maskz_cvt_roundepi32_ps (__mmask8 __U, __m256i __A, const int __R)
279 return (__m256) __builtin_ia32_cvtdq2ps256_mask_round ((__v8si) __A,
280 (__v8sf)
281 _mm256_setzero_ps (),
282 (__mmask8) __U,
283 __R);
286 extern __inline __m128h
287 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
288 _mm256_cvt_roundpd_ph (__m256d __A, const int __R)
290 return (__m128h) __builtin_ia32_vcvtpd2ph256_mask_round ((__v4df) __A,
291 (__v8hf)
292 _mm_setzero_ph (),
293 (__mmask8) -1,
294 __R);
297 extern __inline __m128h
298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
299 _mm256_mask_cvt_roundpd_ph (__m128h __W, __mmask8 __U, __m256d __A,
300 const int __R)
302 return (__m128h) __builtin_ia32_vcvtpd2ph256_mask_round ((__v4df) __A,
303 (__v8hf) __W,
304 (__mmask8) __U,
305 __R);
308 extern __inline __m128h
309 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
310 _mm256_maskz_cvt_roundpd_ph (__mmask8 __U, __m256d __A, const int __R)
312 return (__m128h) __builtin_ia32_vcvtpd2ph256_mask_round ((__v4df) __A,
313 (__v8hf)
314 _mm_setzero_ph (),
315 (__mmask8) __U,
316 __R);
319 extern __inline __m128
320 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
321 _mm256_cvt_roundpd_ps (__m256d __A, const int __R)
323 return (__m128) __builtin_ia32_cvtpd2ps256_mask_round ((__v4df) __A,
324 (__v4sf)
325 _mm_undefined_ps (),
326 (__mmask8) -1,
327 __R);
330 extern __inline __m128
331 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
332 _mm256_mask_cvt_roundpd_ps (__m128 __W, __mmask8 __U, __m256d __A,
333 const int __R)
335 return (__m128) __builtin_ia32_cvtpd2ps256_mask_round ((__v4df) __A,
336 (__v4sf) __W,
337 (__mmask8) __U,
338 __R);
341 extern __inline __m128
342 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
343 _mm256_maskz_cvt_roundpd_ps (__mmask8 __U, __m256d __A, const int __R)
345 return (__m128) __builtin_ia32_cvtpd2ps256_mask_round ((__v4df) __A,
346 (__v4sf)
347 _mm_setzero_ps (),
348 (__mmask8) __U,
349 __R);
352 extern __inline __m128i
353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
354 _mm256_cvt_roundpd_epi32 (__m256d __A, const int __R)
356 return
357 (__m128i) __builtin_ia32_cvtpd2dq256_mask_round ((__v4df) __A,
358 (__v4si)
359 _mm_undefined_si128 (),
360 (__mmask8) -1,
361 __R);
364 extern __inline __m128i
365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
366 _mm256_mask_cvt_roundpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A,
367 const int __R)
369 return (__m128i) __builtin_ia32_cvtpd2dq256_mask_round ((__v4df) __A,
370 (__v4si) __W,
371 (__mmask8) __U,
372 __R);
375 extern __inline __m128i
376 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
377 _mm256_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m256d __A, const int __R)
379 return (__m128i) __builtin_ia32_cvtpd2dq256_mask_round ((__v4df) __A,
380 (__v4si)
381 _mm_setzero_si128 (),
382 (__mmask8) __U,
383 __R);
386 extern __inline __m256i
387 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
388 _mm256_cvt_roundpd_epi64 (__m256d __A, const int __R)
390 return
391 (__m256i) __builtin_ia32_cvtpd2qq256_mask_round ((__v4df) __A,
392 (__v4di)
393 _mm256_setzero_si256 (),
394 (__mmask8) -1,
395 __R);
398 extern __inline __m256i
399 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
400 _mm256_mask_cvt_roundpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A,
401 const int __R)
403 return (__m256i) __builtin_ia32_cvtpd2qq256_mask_round ((__v4df) __A,
404 (__v4di) __W,
405 (__mmask8) __U,
406 __R);
409 extern __inline __m256i
410 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
411 _mm256_maskz_cvt_roundpd_epi64 (__mmask8 __U, __m256d __A, const int __R)
413 return
414 (__m256i) __builtin_ia32_cvtpd2qq256_mask_round ((__v4df) __A,
415 (__v4di)
416 _mm256_setzero_si256 (),
417 (__mmask8) __U,
418 __R);
421 extern __inline __m128i
422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
423 _mm256_cvt_roundpd_epu32 (__m256d __A, const int __R)
425 return
426 (__m128i) __builtin_ia32_cvtpd2udq256_mask_round ((__v4df) __A,
427 (__v4si)
428 _mm_undefined_si128 (),
429 (__mmask8) -1,
430 __R);
433 extern __inline __m128i
434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
435 _mm256_mask_cvt_roundpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A,
436 const int __R)
438 return (__m128i) __builtin_ia32_cvtpd2udq256_mask_round ((__v4df) __A,
439 (__v4si) __W,
440 (__mmask8) __U,
441 __R);
444 extern __inline __m128i
445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
446 _mm256_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m256d __A, const int __R)
448 return (__m128i) __builtin_ia32_cvtpd2udq256_mask_round ((__v4df) __A,
449 (__v4si)
450 _mm_setzero_si128 (),
451 (__mmask8) __U,
452 __R);
455 extern __inline __m256i
456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
457 _mm256_cvt_roundpd_epu64 (__m256d __A, const int __R)
459 return
460 (__m256i) __builtin_ia32_cvtpd2uqq256_mask_round ((__v4df) __A,
461 (__v4di)
462 _mm256_setzero_si256 (),
463 (__mmask8) -1,
464 __R);
467 extern __inline __m256i
468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
469 _mm256_mask_cvt_roundpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A,
470 const int __R)
472 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask_round ((__v4df) __A,
473 (__v4di) __W,
474 (__mmask8) __U,
475 __R);
478 extern __inline __m256i
479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
480 _mm256_maskz_cvt_roundpd_epu64 (__mmask8 __U, __m256d __A, const int __R)
482 return
483 (__m256i) __builtin_ia32_cvtpd2uqq256_mask_round ((__v4df) __A,
484 (__v4di)
485 _mm256_setzero_si256 (),
486 (__mmask8) __U,
487 __R);
490 extern __inline __m256i
491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
492 _mm256_cvt_roundph_epi32 (__m128h __A, const int __R)
494 return
495 (__m256i) __builtin_ia32_vcvtph2dq256_mask_round ((__v8hf) __A,
496 (__v8si)
497 _mm256_setzero_si256 (),
498 (__mmask8) -1,
499 __R);
502 extern __inline __m256i
503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
504 _mm256_mask_cvt_roundph_epi32 (__m256i __W, __mmask8 __U, __m128h __A,
505 const int __R)
507 return (__m256i) __builtin_ia32_vcvtph2dq256_mask_round ((__v8hf) __A,
508 (__v8si) __W,
509 (__mmask8) __U,
510 __R);
513 extern __inline __m256i
514 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
515 _mm256_maskz_cvt_roundph_epi32 (__mmask8 __U, __m128h __A, const int __R)
517 return
518 (__m256i) __builtin_ia32_vcvtph2dq256_mask_round ((__v8hf) __A,
519 (__v8si)
520 _mm256_setzero_si256 (),
521 (__mmask8) __U,
522 __R);
525 extern __inline __m256d
526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
527 _mm256_cvt_roundph_pd (__m128h __A, const int __R)
529 return (__m256d) __builtin_ia32_vcvtph2pd256_mask_round ((__v8hf) __A,
530 (__v4df)
531 _mm256_setzero_pd (),
532 (__mmask8) -1,
533 __R);
536 extern __inline __m256d
537 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
538 _mm256_mask_cvt_roundph_pd (__m256d __W, __mmask8 __U, __m128h __A,
539 const int __R)
541 return (__m256d) __builtin_ia32_vcvtph2pd256_mask_round ((__v8hf) __A,
542 (__v4df) __W,
543 (__mmask8) __U,
544 __R);
547 extern __inline __m256d
548 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
549 _mm256_maskz_cvt_roundph_pd (__mmask8 __U, __m128h __A, const int __R)
551 return (__m256d) __builtin_ia32_vcvtph2pd256_mask_round ((__v8hf) __A,
552 (__v4df)
553 _mm256_setzero_pd (),
554 (__mmask8) __U,
555 __R);
558 extern __inline __m256
559 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
560 _mm256_cvt_roundph_ps (__m128h __A, const int __R)
562 return
563 (__m256) __builtin_ia32_vcvtph2ps256_mask_round ((__v8hf) __A,
564 (__v8sf)
565 _mm256_undefined_ps (),
566 (__mmask8) -1,
567 __R);
570 extern __inline __m256
571 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
572 _mm256_mask_cvt_roundph_ps (__m256 __W, __mmask8 __U, __m128h __A,
573 const int __R)
575 return (__m256) __builtin_ia32_vcvtph2ps256_mask_round ((__v8hf) __A,
576 (__v8sf) __W,
577 (__mmask8) __U,
578 __R);
581 extern __inline __m256
582 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
583 _mm256_maskz_cvt_roundph_ps (__mmask8 __U, __m128h __A, const int __R)
585 return (__m256) __builtin_ia32_vcvtph2ps256_mask_round ((__v8hf) __A,
586 (__v8sf)
587 _mm256_setzero_ps (),
588 (__mmask8) __U,
589 __R);
592 extern __inline __m256
593 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
594 _mm256_cvtx_roundph_ps (__m128h __A, const int __R)
596 return (__m256) __builtin_ia32_vcvtph2psx256_mask_round ((__v8hf) __A,
597 (__v8sf)
598 _mm256_setzero_ps (),
599 (__mmask8) -1,
600 __R);
603 extern __inline __m256
604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
605 _mm256_mask_cvtx_roundph_ps (__m256 __W, __mmask8 __U, __m128h __A,
606 const int __R)
608 return (__m256) __builtin_ia32_vcvtph2psx256_mask_round ((__v8hf) __A,
609 (__v8sf) __W,
610 (__mmask8) __U,
611 __R);
614 extern __inline __m256
615 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
616 _mm256_maskz_cvtx_roundph_ps (__mmask8 __U, __m128h __A, const int __R)
618 return (__m256) __builtin_ia32_vcvtph2psx256_mask_round ((__v8hf) __A,
619 (__v8sf)
620 _mm256_setzero_ps (),
621 (__mmask8) __U,
622 __R);
625 extern __inline __m256i
626 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
627 _mm256_cvt_roundph_epi64 (__m128h __A, const int __R)
629 return
630 (__m256i) __builtin_ia32_vcvtph2qq256_mask_round ((__v8hf) __A,
631 (__v4di)
632 _mm256_setzero_si256 (),
633 (__mmask8) -1,
634 __R);
637 extern __inline __m256i
638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
639 _mm256_mask_cvt_roundph_epi64 (__m256i __W, __mmask8 __U, __m128h __A,
640 const int __R)
642 return (__m256i) __builtin_ia32_vcvtph2qq256_mask_round ((__v8hf) __A,
643 (__v4di) __W,
644 (__mmask8) __U,
645 __R);
648 extern __inline __m256i
649 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
650 _mm256_maskz_cvt_roundph_epi64 (__mmask8 __U, __m128h __A, const int __R)
652 return
653 (__m256i) __builtin_ia32_vcvtph2qq256_mask_round ((__v8hf) __A,
654 (__v4di)
655 _mm256_setzero_si256 (),
656 (__mmask8) __U,
657 __R);
660 extern __inline __m256i
661 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
662 _mm256_cvt_roundph_epu32 (__m128h __A, const int __R)
664 return
665 (__m256i) __builtin_ia32_vcvtph2udq256_mask_round ((__v8hf) __A,
666 (__v8si)
667 _mm256_setzero_si256 (),
668 (__mmask8) -1,
669 __R);
672 extern __inline __m256i
673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
674 _mm256_mask_cvt_roundph_epu32 (__m256i __W, __mmask8 __U, __m128h __A,
675 const int __R)
677 return (__m256i) __builtin_ia32_vcvtph2udq256_mask_round ((__v8hf) __A,
678 (__v8si) __W,
679 (__mmask8) __U,
680 __R);
683 extern __inline __m256i
684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
685 _mm256_maskz_cvt_roundph_epu32 (__mmask8 __U, __m128h __A, const int __R)
687 return
688 (__m256i) __builtin_ia32_vcvtph2udq256_mask_round ((__v8hf) __A,
689 (__v8si)
690 _mm256_setzero_si256 (),
691 (__mmask8) __U,
692 __R);
695 extern __inline __m256i
696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
697 _mm256_cvt_roundph_epu64 (__m128h __A, const int __R)
699 return
700 (__m256i) __builtin_ia32_vcvtph2uqq256_mask_round ((__v8hf) __A,
701 (__v4di)
702 _mm256_setzero_si256 (),
703 (__mmask8) -1,
704 __R);
707 extern __inline __m256i
708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
709 _mm256_mask_cvt_roundph_epu64 (__m256i __W, __mmask8 __U, __m128h __A,
710 const int __R)
712 return (__m256i) __builtin_ia32_vcvtph2uqq256_mask_round ((__v8hf) __A,
713 (__v4di) __W,
714 (__mmask8) __U,
715 __R);
718 extern __inline __m256i
719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
720 _mm256_maskz_cvt_roundph_epu64 (__mmask8 __U, __m128h __A, const int __R)
722 return
723 (__m256i) __builtin_ia32_vcvtph2uqq256_mask_round ((__v8hf) __A,
724 (__v4di)
725 _mm256_setzero_si256 (),
726 (__mmask8) __U,
727 __R);
730 extern __inline __m256i
731 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
732 _mm256_cvt_roundph_epu16 (__m256h __A, const int __R)
734 return
735 (__m256i) __builtin_ia32_vcvtph2uw256_mask_round ((__v16hf) __A,
736 (__v16hi)
737 _mm256_undefined_si256 (),
738 (__mmask16) -1,
739 __R);
742 extern __inline __m256i
743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
744 _mm256_mask_cvt_roundph_epu16 (__m256i __W, __mmask16 __U, __m256h __A,
745 const int __R)
747 return (__m256i) __builtin_ia32_vcvtph2uw256_mask_round ((__v16hf) __A,
748 (__v16hi) __W,
749 (__mmask16) __U,
750 __R);
753 extern __inline __m256i
754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
755 _mm256_maskz_cvt_roundph_epu16 (__mmask16 __U, __m256h __A, const int __R)
757 return
758 (__m256i) __builtin_ia32_vcvtph2uw256_mask_round ((__v16hf) __A,
759 (__v16hi)
760 _mm256_setzero_si256 (),
761 (__mmask16) __U,
762 __R);
765 extern __inline __m256i
766 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
767 _mm256_cvt_roundph_epi16 (__m256h __A, const int __R)
769 return
770 (__m256i) __builtin_ia32_vcvtph2w256_mask_round ((__v16hf) __A,
771 (__v16hi)
772 _mm256_undefined_si256 (),
773 (__mmask16) -1,
774 __R);
777 extern __inline __m256i
778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
779 _mm256_mask_cvt_roundph_epi16 (__m256i __W, __mmask16 __U, __m256h __A,
780 const int __R)
782 return (__m256i) __builtin_ia32_vcvtph2w256_mask_round ((__v16hf) __A,
783 (__v16hi) __W,
784 (__mmask16) __U,
785 __R);
788 extern __inline __m256i
789 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
790 _mm256_maskz_cvt_roundph_epi16 (__mmask16 __U, __m256h __A, const int __R)
792 return
793 (__m256i) __builtin_ia32_vcvtph2w256_mask_round ((__v16hf) __A,
794 (__v16hi)
795 _mm256_setzero_si256 (),
796 (__mmask16) __U,
797 __R);
800 extern __inline __m256d
801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
802 _mm256_cvt_roundps_pd (__m128 __A, const int __R)
804 return
805 (__m256d) __builtin_ia32_vcvtps2pd256_mask_round ((__v4sf) __A,
806 (__v4df)
807 _mm256_undefined_pd (),
808 (__mmask8) -1,
809 __R);
812 extern __inline __m256d
813 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
814 _mm256_mask_cvt_roundps_pd (__m256d __W, __mmask8 __U, __m128 __A,
815 const int __R)
817 return (__m256d) __builtin_ia32_vcvtps2pd256_mask_round ((__v4sf) __A,
818 (__v4df) __W,
819 (__mmask8) __U,
820 __R);
823 extern __inline __m256d
824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
825 _mm256_maskz_cvt_roundps_pd (__mmask8 __U, __m128 __A, const int __R)
827 return (__m256d) __builtin_ia32_vcvtps2pd256_mask_round ((__v4sf) __A,
828 (__v4df)
829 _mm256_setzero_pd (),
830 (__mmask8) __U,
831 __R);
834 extern __inline __m128h
835 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
836 _mm256_cvtx_roundps_ph (__m256 __A, const int __R)
838 return (__m128h) __builtin_ia32_vcvtps2phx256_mask_round ((__v8sf) __A,
839 (__v8hf)
840 _mm_setzero_ph (),
841 (__mmask8) -1,
842 __R);
845 extern __inline __m128h
846 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
847 _mm256_mask_cvtx_roundps_ph (__m128h __W, __mmask8 __U, __m256 __A,
848 const int __R)
850 return (__m128h) __builtin_ia32_vcvtps2phx256_mask_round ((__v8sf) __A,
851 (__v8hf) __W,
852 (__mmask8) __U,
853 __R);
856 extern __inline __m128h
857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
858 _mm256_maskz_cvtx_roundps_ph (__mmask8 __U, __m256 __A, const int __R)
860 return (__m128h) __builtin_ia32_vcvtps2phx256_mask_round ((__v8sf) __A,
861 (__v8hf)
862 _mm_setzero_ph (),
863 (__mmask8) __U,
864 __R);
867 extern __inline __m256i
868 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
869 _mm256_cvt_roundps_epi32 (__m256 __A, const int __R)
871 return
872 (__m256i) __builtin_ia32_vcvtps2dq256_mask_round ((__v8sf) __A,
873 (__v8si)
874 _mm256_undefined_si256 (),
875 (__mmask8) -1,
876 __R);
879 extern __inline __m256i
880 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
881 _mm256_mask_cvt_roundps_epi32 (__m256i __W, __mmask8 __U, __m256 __A,
882 const int __R)
884 return (__m256i) __builtin_ia32_vcvtps2dq256_mask_round ((__v8sf) __A,
885 (__v8si) __W,
886 (__mmask8) __U,
887 __R);
890 extern __inline __m256i
891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
892 _mm256_maskz_cvt_roundps_epi32 (__mmask8 __U, __m256 __A, const int __R)
894 return
895 (__m256i) __builtin_ia32_vcvtps2dq256_mask_round ((__v8sf) __A,
896 (__v8si)
897 _mm256_setzero_si256 (),
898 (__mmask8) __U,
899 __R);
902 extern __inline __m256i
903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
904 _mm256_cvt_roundps_epi64 (__m128 __A, const int __R)
906 return
907 (__m256i) __builtin_ia32_cvtps2qq256_mask_round ((__v4sf) __A,
908 (__v4di)
909 _mm256_setzero_si256 (),
910 (__mmask8) -1,
911 __R);
914 extern __inline __m256i
915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
916 _mm256_mask_cvt_roundps_epi64 (__m256i __W, __mmask8 __U, __m128 __A,
917 const int __R)
919 return (__m256i) __builtin_ia32_cvtps2qq256_mask_round ((__v4sf) __A,
920 (__v4di) __W,
921 (__mmask8) __U,
922 __R);
925 extern __inline __m256i
926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
927 _mm256_maskz_cvt_roundps_epi64 (__mmask8 __U, __m128 __A, const int __R)
929 return
930 (__m256i) __builtin_ia32_cvtps2qq256_mask_round ((__v4sf) __A,
931 (__v4di)
932 _mm256_setzero_si256 (),
933 (__mmask8) __U,
934 __R);
937 extern __inline __m256i
938 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
939 _mm256_cvt_roundps_epu32 (__m256 __A, const int __R)
941 return
942 (__m256i) __builtin_ia32_cvtps2udq256_mask_round ((__v8sf) __A,
943 (__v8si)
944 _mm256_undefined_si256 (),
945 (__mmask8) -1,
946 __R);
949 extern __inline __m256i
950 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
951 _mm256_mask_cvt_roundps_epu32 (__m256i __W, __mmask8 __U, __m256 __A,
952 const int __R)
954 return (__m256i) __builtin_ia32_cvtps2udq256_mask_round ((__v8sf) __A,
955 (__v8si) __W,
956 (__mmask8) __U,
957 __R);
960 extern __inline __m256i
961 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
962 _mm256_maskz_cvt_roundps_epu32 (__mmask8 __U, __m256 __A, const int __R)
964 return
965 (__m256i) __builtin_ia32_cvtps2udq256_mask_round ((__v8sf) __A,
966 (__v8si)
967 _mm256_setzero_si256 (),
968 (__mmask8) __U,
969 __R);
972 extern __inline __m256i
973 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
974 _mm256_cvt_roundps_epu64 (__m128 __A, const int __R)
976 return
977 (__m256i) __builtin_ia32_cvtps2uqq256_mask_round ((__v4sf) __A,
978 (__v4di)
979 _mm256_setzero_si256 (),
980 (__mmask8) -1,
981 __R);
984 extern __inline __m256i
985 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
986 _mm256_mask_cvt_roundps_epu64 (__m256i __W, __mmask8 __U, __m128 __A,
987 const int __R)
989 return (__m256i) __builtin_ia32_cvtps2uqq256_mask_round ((__v4sf) __A,
990 (__v4di) __W,
991 (__mmask8) __U,
992 __R);
995 extern __inline __m256i
996 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
997 _mm256_maskz_cvt_roundps_epu64 (__mmask8 __U, __m128 __A, const int __R)
999 return
1000 (__m256i) __builtin_ia32_cvtps2uqq256_mask_round ((__v4sf) __A,
1001 (__v4di)
1002 _mm256_setzero_si256 (),
1003 (__mmask8) __U,
1004 __R);
1007 extern __inline __m256d
1008 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1009 _mm256_cvt_roundepi64_pd (__m256i __A, const int __R)
1011 return (__m256d) __builtin_ia32_cvtqq2pd256_mask_round ((__v4di) __A,
1012 (__v4df)
1013 _mm256_setzero_pd (),
1014 (__mmask8) -1,
1015 __R);
1018 extern __inline __m256d
1019 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1020 _mm256_mask_cvt_roundepi64_pd (__m256d __W, __mmask8 __U, __m256i __A,
1021 const int __R)
1023 return (__m256d) __builtin_ia32_cvtqq2pd256_mask_round ((__v4di) __A,
1024 (__v4df) __W,
1025 (__mmask8) __U,
1026 __R);
1029 extern __inline __m256d
1030 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1031 _mm256_maskz_cvt_roundepi64_pd (__mmask8 __U, __m256i __A, const int __R)
1033 return (__m256d) __builtin_ia32_cvtqq2pd256_mask_round ((__v4di) __A,
1034 (__v4df)
1035 _mm256_setzero_pd (),
1036 (__mmask8) __U,
1037 __R);
1040 extern __inline __m128h
1041 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1042 _mm256_cvt_roundepi64_ph (__m256i __A, const int __R)
1044 return (__m128h) __builtin_ia32_vcvtqq2ph256_mask_round ((__v4di) __A,
1045 (__v8hf)
1046 _mm_setzero_ph (),
1047 (__mmask8) -1,
1048 __R);
1051 extern __inline __m128h
1052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1053 _mm256_mask_cvt_roundepi64_ph (__m128h __W, __mmask8 __U, __m256i __A,
1054 const int __R)
1056 return (__m128h) __builtin_ia32_vcvtqq2ph256_mask_round ((__v4di) __A,
1057 (__v8hf) __W,
1058 (__mmask8) __U,
1059 __R);
1062 extern __inline __m128h
1063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1064 _mm256_maskz_cvt_roundepi64_ph (__mmask8 __U, __m256i __A, const int __R)
1066 return (__m128h) __builtin_ia32_vcvtqq2ph256_mask_round ((__v4di) __A,
1067 (__v8hf)
1068 _mm_setzero_ph (),
1069 (__mmask8) __U,
1070 __R);
1073 extern __inline __m128
1074 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1075 _mm256_cvt_roundepi64_ps (__m256i __A, const int __R)
1077 return (__m128) __builtin_ia32_cvtqq2ps256_mask_round ((__v4di) __A,
1078 (__v4sf)
1079 _mm_setzero_ps (),
1080 (__mmask8) -1,
1081 __R);
1084 extern __inline __m128
1085 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1086 _mm256_mask_cvt_roundepi64_ps (__m128 __W, __mmask8 __U, __m256i __A,
1087 const int __R)
1089 return (__m128) __builtin_ia32_cvtqq2ps256_mask_round ((__v4di) __A,
1090 (__v4sf) __W,
1091 (__mmask8) __U,
1092 __R);
1095 extern __inline __m128
1096 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1097 _mm256_maskz_cvt_roundepi64_ps (__mmask8 __U, __m256i __A, const int __R)
1099 return (__m128) __builtin_ia32_cvtqq2ps256_mask_round ((__v4di) __A,
1100 (__v4sf)
1101 _mm_setzero_ps (),
1102 (__mmask8) __U,
1103 __R);
1106 extern __inline __m128i
1107 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1108 _mm256_cvtt_roundpd_epi32 (__m256d __A, const int __R)
1110 return
1111 (__m128i) __builtin_ia32_cvttpd2dq256_mask_round ((__v4df) __A,
1112 (__v4si)
1113 _mm_undefined_si128 (),
1114 (__mmask8) -1,
1115 __R);
1118 extern __inline __m128i
1119 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1120 _mm256_mask_cvtt_roundpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A,
1121 const int __R)
1123 return (__m128i) __builtin_ia32_cvttpd2dq256_mask_round ((__v4df) __A,
1124 (__v4si) __W,
1125 (__mmask8) __U,
1126 __R);
1129 extern __inline __m128i
1130 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1131 _mm256_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m256d __A, const int __R)
1133 return (__m128i) __builtin_ia32_cvttpd2dq256_mask_round ((__v4df) __A,
1134 (__v4si)
1135 _mm_setzero_si128 (),
1136 (__mmask8) __U,
1137 __R);
1140 extern __inline __m256i
1141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1142 _mm256_cvtt_roundpd_epi64 (__m256d __A, const int __R)
1144 return
1145 (__m256i) __builtin_ia32_cvttpd2qq256_mask_round ((__v4df) __A,
1146 (__v4di)
1147 _mm256_setzero_si256 (),
1148 (__mmask8) -1,
1149 __R);
1152 extern __inline __m256i
1153 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1154 _mm256_mask_cvtt_roundpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A,
1155 const int __R)
1157 return (__m256i) __builtin_ia32_cvttpd2qq256_mask_round ((__v4df) __A,
1158 (__v4di) __W,
1159 (__mmask8) __U,
1160 __R);
1163 extern __inline __m256i
1164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1165 _mm256_maskz_cvtt_roundpd_epi64 (__mmask8 __U, __m256d __A, const int __R)
1167 return
1168 (__m256i) __builtin_ia32_cvttpd2qq256_mask_round ((__v4df) __A,
1169 (__v4di)
1170 _mm256_setzero_si256 (),
1171 (__mmask8) __U,
1172 __R);
1175 extern __inline __m128i
1176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1177 _mm256_cvtt_roundpd_epu32 (__m256d __A, const int __R)
1179 return
1180 (__m128i) __builtin_ia32_cvttpd2udq256_mask_round ((__v4df) __A,
1181 (__v4si)
1182 _mm_undefined_si128 (),
1183 (__mmask8) -1,
1184 __R);
1187 extern __inline __m128i
1188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1189 _mm256_mask_cvtt_roundpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A,
1190 const int __R)
1192 return (__m128i) __builtin_ia32_cvttpd2udq256_mask_round ((__v4df) __A,
1193 (__v4si) __W,
1194 (__mmask8) __U,
1195 __R);
1198 extern __inline __m128i
1199 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1200 _mm256_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m256d __A, const int __R)
1202 return
1203 (__m128i) __builtin_ia32_cvttpd2udq256_mask_round ((__v4df) __A,
1204 (__v4si)
1205 _mm_setzero_si128 (),
1206 (__mmask8) __U,
1207 __R);
1210 extern __inline __m256i
1211 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1212 _mm256_cvtt_roundpd_epu64 (__m256d __A, const int __R)
1214 return
1215 (__m256i) __builtin_ia32_cvttpd2uqq256_mask_round ((__v4df) __A,
1216 (__v4di) \
1217 _mm256_setzero_si256 (),
1218 (__mmask8) -1,
1219 __R);
1222 extern __inline __m256i
1223 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1224 _mm256_mask_cvtt_roundpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A,
1225 const int __R)
1227 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask_round ((__v4df) __A,
1228 (__v4di) __W,
1229 (__mmask8) __U,
1230 __R);
1233 extern __inline __m256i
1234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1235 _mm256_maskz_cvtt_roundpd_epu64 (__mmask8 __U, __m256d __A, const int __R)
1237 return
1238 (__m256i) __builtin_ia32_cvttpd2uqq256_mask_round ((__v4df) __A,
1239 (__v4di)
1240 _mm256_setzero_si256 (),
1241 (__mmask8) __U,
1242 __R);
1245 extern __inline __m256i
1246 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1247 _mm256_cvtt_roundph_epi32 (__m128h __A, const int __R)
1249 return
1250 (__m256i) __builtin_ia32_vcvttph2dq256_mask_round ((__v8hf) __A,
1251 (__v8si)
1252 _mm256_setzero_si256 (),
1253 (__mmask8) -1,
1254 __R);
1257 extern __inline __m256i
1258 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1259 _mm256_mask_cvtt_roundph_epi32 (__m256i __W, __mmask8 __U, __m128h __A,
1260 const int __R)
1262 return (__m256i) __builtin_ia32_vcvttph2dq256_mask_round ((__v8hf) __A,
1263 (__v8si) __W,
1264 (__mmask8) __U,
1265 __R);
1268 extern __inline __m256i
1269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1270 _mm256_maskz_cvtt_roundph_epi32 (__mmask8 __U, __m128h __A, const int __R)
1272 return
1273 (__m256i) __builtin_ia32_vcvttph2dq256_mask_round ((__v8hf) __A,
1274 (__v8si)
1275 _mm256_setzero_si256 (),
1276 (__mmask8) __U,
1277 __R);
1280 extern __inline __m256i
1281 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1282 _mm256_cvtt_roundph_epi64 (__m128h __A, const int __R)
1284 return
1285 (__m256i) __builtin_ia32_vcvttph2qq256_mask_round ((__v8hf) __A,
1286 (__v4di)
1287 _mm256_setzero_si256 (),
1288 (__mmask8) -1,
1289 __R);
1292 extern __inline __m256i
1293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1294 _mm256_mask_cvtt_roundph_epi64 (__m256i __W, __mmask8 __U, __m128h __A,
1295 const int __R)
1297 return (__m256i) __builtin_ia32_vcvttph2qq256_mask_round ((__v8hf) __A,
1298 (__v4di) __W,
1299 (__mmask8) __U,
1300 __R);
1303 extern __inline __m256i
1304 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1305 _mm256_maskz_cvtt_roundph_epi64 (__mmask8 __U, __m128h __A, const int __R)
1307 return
1308 (__m256i) __builtin_ia32_vcvttph2qq256_mask_round ((__v8hf) __A,
1309 (__v4di)
1310 _mm256_setzero_si256 (),
1311 (__mmask8) __U,
1312 __R);
1315 extern __inline __m256i
1316 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1317 _mm256_cvtt_roundph_epu32 (__m128h __A, const int __R)
1319 return
1320 (__m256i) __builtin_ia32_vcvttph2udq256_mask_round ((__v8hf) __A,
1321 (__v8si)
1322 _mm256_setzero_si256 (),
1323 (__mmask8) -1,
1324 __R);
1327 extern __inline __m256i
1328 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1329 _mm256_mask_cvtt_roundph_epu32 (__m256i __W, __mmask8 __U, __m128h __A,
1330 const int __R)
1332 return (__m256i) __builtin_ia32_vcvttph2udq256_mask_round ((__v8hf) __A,
1333 (__v8si) __W,
1334 (__mmask8) __U,
1335 __R);
1338 extern __inline __m256i
1339 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1340 _mm256_maskz_cvtt_roundph_epu32 (__mmask8 __U, __m128h __A, const int __R)
1342 return
1343 (__m256i) __builtin_ia32_vcvttph2udq256_mask_round ((__v8hf) __A,
1344 (__v8si)
1345 _mm256_setzero_si256 (),
1346 (__mmask8) __U,
1347 __R);
1350 extern __inline __m256i
1351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1352 _mm256_cvtt_roundph_epu64 (__m128h __A, const int __R)
1354 return
1355 (__m256i) __builtin_ia32_vcvttph2uqq256_mask_round ((__v8hf) __A,
1356 (__v4di)
1357 _mm256_setzero_si256 (),
1358 (__mmask8) -1,
1359 __R);
1362 extern __inline __m256i
1363 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1364 _mm256_mask_cvtt_roundph_epu64 (__m256i __W, __mmask8 __U, __m128h __A,
1365 const int __R)
1367 return (__m256i) __builtin_ia32_vcvttph2uqq256_mask_round ((__v8hf) __A,
1368 (__v4di) __W,
1369 (__mmask8) __U,
1370 __R);
1373 extern __inline __m256i
1374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1375 _mm256_maskz_cvtt_roundph_epu64 (__mmask8 __U, __m128h __A, const int __R)
1377 return
1378 (__m256i) __builtin_ia32_vcvttph2uqq256_mask_round ((__v8hf) __A,
1379 (__v4di)
1380 _mm256_setzero_si256 (),
1381 (__mmask8) __U,
1382 __R);
1385 extern __inline __m256i
1386 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1387 _mm256_cvtt_roundph_epu16 (__m256h __A, const int __R)
1389 return
1390 (__m256i) __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf) __A,
1391 (__v16hi)
1392 _mm256_setzero_si256 (),
1393 (__mmask16) -1,
1394 __R);
1397 extern __inline __m256i
1398 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1399 _mm256_mask_cvtt_roundph_epu16 (__m256i __W, __mmask16 __U, __m256h __A,
1400 const int __R)
1402 return (__m256i) __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf) __A,
1403 (__v16hi) __W,
1404 (__mmask16) __U,
1405 __R);
1408 extern __inline __m256i
1409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1410 _mm256_maskz_cvtt_roundph_epu16 (__mmask16 __U, __m256h __A, const int __R)
1412 return
1413 (__m256i) __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf) __A,
1414 (__v16hi)
1415 _mm256_setzero_si256 (),
1416 (__mmask16) __U,
1417 __R);
1420 extern __inline __m256i
1421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1422 _mm256_cvtt_roundph_epi16 (__m256h __A, const int __R)
1424 return
1425 (__m256i) __builtin_ia32_vcvttph2w256_mask_round ((__v16hf) __A,
1426 (__v16hi)
1427 _mm256_setzero_si256 (),
1428 (__mmask16) -1,
1429 __R);
1432 extern __inline __m256i
1433 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1434 _mm256_mask_cvtt_roundph_epi16 (__m256i __W, __mmask16 __U, __m256h __A,
1435 const int __R)
1437 return (__m256i) __builtin_ia32_vcvttph2w256_mask_round ((__v16hf) __A,
1438 (__v16hi) __W,
1439 (__mmask16) __U,
1440 __R);
1443 extern __inline __m256i
1444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1445 _mm256_maskz_cvtt_roundph_epi16 (__mmask16 __U, __m256h __A, const int __R)
1447 return
1448 (__m256i) __builtin_ia32_vcvttph2w256_mask_round ((__v16hf) __A,
1449 (__v16hi)
1450 _mm256_setzero_si256 (),
1451 (__mmask16) __U,
1452 __R);
1455 extern __inline __m256i
1456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1457 _mm256_cvtt_roundps_epi32 (__m256 __A, const int __R)
1459 return
1460 (__m256i) __builtin_ia32_cvttps2dq256_mask_round ((__v8sf) __A,
1461 (__v8si)
1462 _mm256_undefined_si256 (),
1463 (__mmask8) -1,
1464 __R);
1467 extern __inline __m256i
1468 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1469 _mm256_mask_cvtt_roundps_epi32 (__m256i __W, __mmask8 __U, __m256 __A,
1470 const int __R)
1472 return (__m256i) __builtin_ia32_cvttps2dq256_mask_round ((__v8sf) __A,
1473 (__v8si) __W,
1474 (__mmask8) __U,
1475 __R);
1478 extern __inline __m256i
1479 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1480 _mm256_maskz_cvtt_roundps_epi32 (__mmask8 __U, __m256 __A, const int __R)
1482 return
1483 (__m256i) __builtin_ia32_cvttps2dq256_mask_round ((__v8sf) __A,
1484 (__v8si)
1485 _mm256_setzero_si256 (),
1486 (__mmask8) __U,
1487 __R);
1490 extern __inline __m256i
1491 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1492 _mm256_cvtt_roundps_epi64 (__m128 __A, const int __R)
1494 return
1495 (__m256i) __builtin_ia32_cvttps2qq256_mask_round ((__v4sf) __A,
1496 (__v4di)
1497 _mm256_setzero_si256 (),
1498 (__mmask8) -1,
1499 __R);
1502 extern __inline __m256i
1503 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1504 _mm256_mask_cvtt_roundps_epi64 (__m256i __W, __mmask8 __U, __m128 __A,
1505 const int __R)
1507 return (__m256i) __builtin_ia32_cvttps2qq256_mask_round ((__v4sf) __A,
1508 (__v4di) __W,
1509 (__mmask8) __U,
1510 __R);
1513 extern __inline __m256i
1514 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1515 _mm256_maskz_cvtt_roundps_epi64 (__mmask8 __U, __m128 __A, const int __R)
1517 return
1518 (__m256i) __builtin_ia32_cvttps2qq256_mask_round ((__v4sf) __A,
1519 (__v4di)
1520 _mm256_setzero_si256 (),
1521 (__mmask8) __U,
1522 __R);
1525 extern __inline __m256i
1526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1527 _mm256_cvtt_roundps_epu32 (__m256 __A, const int __R)
1529 return
1530 (__m256i) __builtin_ia32_cvttps2udq256_mask_round ((__v8sf) __A,
1531 (__v8si)
1532 _mm256_undefined_si256 (),
1533 (__mmask8) -1,
1534 __R);
1537 extern __inline __m256i
1538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1539 _mm256_mask_cvtt_roundps_epu32 (__m256i __W, __mmask8 __U, __m256 __A,
1540 const int __R)
1542 return (__m256i) __builtin_ia32_cvttps2udq256_mask_round ((__v8sf) __A,
1543 (__v8si) __W,
1544 (__mmask8) __U,
1545 __R);
1548 extern __inline __m256i
1549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1550 _mm256_maskz_cvtt_roundps_epu32 (__mmask8 __U, __m256 __A, const int __R)
1552 return
1553 (__m256i) __builtin_ia32_cvttps2udq256_mask_round ((__v8sf) __A,
1554 (__v8si)
1555 _mm256_setzero_si256 (),
1556 (__mmask8) __U,
1557 __R);
1560 extern __inline __m256i
1561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1562 _mm256_cvtt_roundps_epu64 (__m128 __A, const int __R)
1564 return
1565 (__m256i) __builtin_ia32_cvttps2uqq256_mask_round ((__v4sf) __A,
1566 (__v4di)
1567 _mm256_setzero_si256 (),
1568 (__mmask8) -1,
1569 __R);
1572 extern __inline __m256i
1573 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1574 _mm256_mask_cvtt_roundps_epu64 (__m256i __W, __mmask8 __U, __m128 __A,
1575 const int __R)
1577 return (__m256i) __builtin_ia32_cvttps2uqq256_mask_round ((__v4sf) __A,
1578 (__v4di) __W,
1579 (__mmask8) __U,
1580 __R);
1583 extern __inline __m256i
1584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1585 _mm256_maskz_cvtt_roundps_epu64 (__mmask8 __U, __m128 __A, const int __R)
1587 return
1588 (__m256i) __builtin_ia32_cvttps2uqq256_mask_round ((__v4sf) __A,
1589 (__v4di)
1590 _mm256_setzero_si256 (),
1591 (__mmask8) __U,
1592 __R);
1595 extern __inline __m128h
1596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1597 _mm256_cvt_roundepu32_ph (__m256i __A, const int __R)
1599 return (__m128h) __builtin_ia32_vcvtudq2ph256_mask_round ((__v8si) __A,
1600 (__v8hf)
1601 _mm_setzero_ph (),
1602 (__mmask8) -1,
1603 __R);
1606 extern __inline __m128h
1607 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1608 _mm256_mask_cvt_roundepu32_ph (__m128h __W, __mmask8 __U, __m256i __A,
1609 const int __R)
1611 return (__m128h) __builtin_ia32_vcvtudq2ph256_mask_round ((__v8si) __A,
1612 (__v8hf) __W,
1613 (__mmask8) __U,
1614 __R);
1617 extern __inline __m128h
1618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1619 _mm256_maskz_cvt_roundepu32_ph (__mmask8 __U, __m256i __A, const int __R)
1621 return (__m128h) __builtin_ia32_vcvtudq2ph256_mask_round ((__v8si) __A,
1622 (__v8hf)
1623 _mm_setzero_ph (),
1624 (__mmask8) __U,
1625 __R);
1628 extern __inline __m256
1629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1630 _mm256_cvt_roundepu32_ps (__m256i __A, const int __R)
1632 return
1633 (__m256) __builtin_ia32_cvtudq2ps256_mask_round ((__v8si) __A,
1634 (__v8sf)
1635 _mm256_undefined_ps (),
1636 (__mmask8) -1,
1637 __R);
1640 extern __inline __m256
1641 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1642 _mm256_mask_cvt_roundepu32_ps (__m256 __W, __mmask8 __U, __m256i __A,
1643 const int __R)
1645 return (__m256) __builtin_ia32_cvtudq2ps256_mask_round ((__v8si) __A,
1646 (__v8sf) __W,
1647 (__mmask8) __U,
1648 __R);
1651 extern __inline __m256
1652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1653 _mm256_maskz_cvt_roundepu32_ps (__mmask8 __U, __m256i __A, const int __R)
1655 return (__m256) __builtin_ia32_cvtudq2ps256_mask_round ((__v8si) __A,
1656 (__v8sf)
1657 _mm256_setzero_ps (),
1658 (__mmask8) __U,
1659 __R);
1662 extern __inline __m256d
1663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1664 _mm256_cvt_roundepu64_pd (__m256i __A, const int __R)
1666 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask_round ((__v4di) __A,
1667 (__v4df)
1668 _mm256_setzero_pd (),
1669 (__mmask8) -1,
1670 __R);
1673 extern __inline __m256d
1674 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1675 _mm256_mask_cvt_roundepu64_pd (__m256d __W, __mmask8 __U, __m256i __A,
1676 const int __R)
1678 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask_round ((__v4di) __A,
1679 (__v4df) __W,
1680 (__mmask8) __U,
1681 __R);
1684 extern __inline __m256d
1685 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1686 _mm256_maskz_cvt_roundepu64_pd (__mmask8 __U, __m256i __A, const int __R)
1688 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask_round ((__v4di) __A,
1689 (__v4df)
1690 _mm256_setzero_pd (),
1691 (__mmask8) __U,
1692 __R);
1695 extern __inline __m128h
1696 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1697 _mm256_cvt_roundepu64_ph (__m256i __A, const int __R)
1699 return (__m128h) __builtin_ia32_vcvtuqq2ph256_mask_round ((__v4di) __A,
1700 (__v8hf)
1701 _mm_setzero_ph (),
1702 (__mmask8) -1,
1703 __R);
1706 extern __inline __m128h
1707 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1708 _mm256_mask_cvt_roundepu64_ph (__m128h __W, __mmask8 __U, __m256i __A,
1709 const int __R)
1711 return (__m128h) __builtin_ia32_vcvtuqq2ph256_mask_round ((__v4di) __A,
1712 (__v8hf) __W,
1713 (__mmask8) __U,
1714 __R);
1717 extern __inline __m128h
1718 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1719 _mm256_maskz_cvt_roundepu64_ph (__mmask8 __U, __m256i __A, const int __R)
1721 return (__m128h) __builtin_ia32_vcvtuqq2ph256_mask_round ((__v4di) __A,
1722 (__v8hf)
1723 _mm_setzero_ph (),
1724 (__mmask8) __U,
1725 __R);
1728 extern __inline __m128
1729 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1730 _mm256_cvt_roundepu64_ps (__m256i __A, const int __R)
1732 return (__m128) __builtin_ia32_cvtuqq2ps256_mask_round ((__v4di) __A,
1733 (__v4sf)
1734 _mm_setzero_ps (),
1735 (__mmask8) -1,
1736 __R);
1739 extern __inline __m128
1740 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1741 _mm256_mask_cvt_roundepu64_ps (__m128 __W, __mmask8 __U, __m256i __A,
1742 const int __R)
1744 return (__m128) __builtin_ia32_cvtuqq2ps256_mask_round ((__v4di) __A,
1745 (__v4sf) __W,
1746 (__mmask8) __U,
1747 __R);
1750 extern __inline __m128
1751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1752 _mm256_maskz_cvt_roundepu64_ps (__mmask8 __U, __m256i __A, const int __R)
1754 return (__m128) __builtin_ia32_cvtuqq2ps256_mask_round ((__v4di) __A,
1755 (__v4sf)
1756 _mm_setzero_ps (),
1757 (__mmask8) __U,
1758 __R);
1761 extern __inline __m256h
1762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1763 _mm256_cvt_roundepu16_ph (__m256i __A, const int __R)
1765 return (__m256h) __builtin_ia32_vcvtuw2ph256_mask_round ((__v16hi) __A,
1766 (__v16hf)
1767 _mm256_setzero_ph (),
1768 (__mmask16) -1,
1769 __R);
1772 extern __inline __m256h
1773 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1774 _mm256_mask_cvt_roundepu16_ph (__m256h __W, __mmask16 __U, __m256i __A,
1775 const int __R)
1777 return (__m256h) __builtin_ia32_vcvtuw2ph256_mask_round ((__v16hi) __A,
1778 (__v16hf) __W,
1779 (__mmask16) __U,
1780 __R);
1783 extern __inline __m256h
1784 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1785 _mm256_maskz_cvt_roundepu16_ph (__mmask16 __U, __m256i __A, const int __R)
1787 return (__m256h) __builtin_ia32_vcvtuw2ph256_mask_round ((__v16hi) __A,
1788 (__v16hf)
1789 _mm256_setzero_ph (),
1790 (__mmask16) __U,
1791 __R);
1794 extern __inline __m256h
1795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1796 _mm256_cvt_roundepi16_ph (__m256i __A, const int __R)
1798 return (__m256h) __builtin_ia32_vcvtw2ph256_mask_round ((__v16hi) __A,
1799 (__v16hf)
1800 _mm256_setzero_ph (),
1801 (__mmask16) -1,
1802 __R);
1805 extern __inline __m256h
1806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1807 _mm256_mask_cvt_roundepi16_ph (__m256h __W, __mmask16 __U, __m256i __A,
1808 const int __R)
1810 return (__m256h) __builtin_ia32_vcvtw2ph256_mask_round ((__v16hi) __A,
1811 (__v16hf) __W,
1812 (__mmask16) __U,
1813 __R);
1816 extern __inline __m256h
1817 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1818 _mm256_maskz_cvt_roundepi16_ph (__mmask16 __U, __m256i __A, const int __R)
1820 return (__m256h) __builtin_ia32_vcvtw2ph256_mask_round ((__v16hi) __A,
1821 (__v16hf)
1822 _mm256_setzero_ph (),
1823 (__mmask16) __U,
1824 __R);
1827 extern __inline __m256d
1828 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1829 _mm256_div_round_pd (__m256d __A, __m256d __B, const int __R)
1831 return (__m256d) __builtin_ia32_divpd256_mask_round ((__v4df) __A,
1832 (__v4df) __B,
1833 (__v4df)
1834 _mm256_undefined_pd (),
1835 (__mmask8) -1,
1836 __R);
1839 extern __inline __m256d
1840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1841 _mm256_mask_div_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
1842 __m256d __B, const int __R)
1844 return (__m256d) __builtin_ia32_divpd256_mask_round ((__v4df) __A,
1845 (__v4df) __B,
1846 (__v4df) __W,
1847 (__mmask8) __U,
1848 __R);
1851 extern __inline __m256d
1852 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1853 _mm256_maskz_div_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
1854 const int __R)
1856 return (__m256d) __builtin_ia32_divpd256_mask_round ((__v4df) __A,
1857 (__v4df) __B,
1858 (__v4df)
1859 _mm256_setzero_pd (),
1860 (__mmask8) __U,
1861 __R);
1864 extern __inline __m256h
1865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1866 _mm256_div_round_ph (__m256h __A, __m256h __B, const int __R)
1868 return (__m256h) __builtin_ia32_divph256_mask_round ((__v16hf) __A,
1869 (__v16hf) __B,
1870 (__v16hf)
1871 _mm256_setzero_ph (),
1872 (__mmask16) -1,
1873 __R);
1876 extern __inline __m256h
1877 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1878 _mm256_mask_div_round_ph (__m256h __W, __mmask16 __U, __m256h __A,
1879 __m256h __B, const int __R)
1881 return (__m256h) __builtin_ia32_divph256_mask_round ((__v16hf) __A,
1882 (__v16hf) __B,
1883 (__v16hf) __W,
1884 (__mmask16) __U,
1885 __R);
1888 extern __inline __m256h
1889 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1890 _mm256_maskz_div_round_ph (__mmask16 __U, __m256h __A, __m256h __B,
1891 const int __R)
1893 return (__m256h) __builtin_ia32_divph256_mask_round ((__v16hf) __A,
1894 (__v16hf) __B,
1895 (__v16hf)
1896 _mm256_setzero_ph (),
1897 (__mmask16) __U,
1898 __R);
1901 extern __inline __m256
1902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1903 _mm256_div_round_ps (__m256 __A, __m256 __B, const int __R)
1905 return (__m256) __builtin_ia32_divps256_mask_round ((__v8sf) __A,
1906 (__v8sf) __B,
1907 (__v8sf)
1908 _mm256_undefined_ps (),
1909 (__mmask8) -1,
1910 __R);
1913 extern __inline __m256
1914 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1915 _mm256_mask_div_round_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B,
1916 const int __R)
1918 return (__m256) __builtin_ia32_divps256_mask_round ((__v8sf) __A,
1919 (__v8sf) __B,
1920 (__v8sf) __W,
1921 (__mmask8) __U,
1922 __R);
1925 extern __inline __m256
1926 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1927 _mm256_maskz_div_round_ps (__mmask8 __U, __m256 __A, __m256 __B,
1928 const int __R)
1930 return (__m256) __builtin_ia32_divps256_mask_round ((__v8sf) __A,
1931 (__v8sf) __B,
1932 (__v8sf)
1933 _mm256_setzero_ps (),
1934 (__mmask8) __U,
1935 __R);
1937 extern __inline __m256h
1938 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1939 _mm256_fcmadd_round_pch (__m256h __A, __m256h __B, __m256h __D, const int __R)
1941 return (__m256h) __builtin_ia32_vfcmaddcph256_round ((__v16hf) __A,
1942 (__v16hf) __B,
1943 (__v16hf) __D,
1944 __R);
1947 extern __inline __m256h
1948 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1949 _mm256_mask_fcmadd_round_pch (__m256h __A, __mmask8 __U, __m256h __B,
1950 __m256h __D, const int __R)
1952 return (__m256h) __builtin_ia32_vfcmaddcph256_mask_round ((__v16hf) __A,
1953 (__v16hf) __B,
1954 (__v16hf) __D,
1955 __U,
1956 __R);
1959 extern __inline __m256h
1960 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1961 _mm256_mask3_fcmadd_round_pch (__m256h __A, __m256h __B, __m256h __D,
1962 __mmask8 __U, const int __R)
1964 return (__m256h) __builtin_ia32_vfcmaddcph256_mask3_round ((__v16hf) __A,
1965 (__v16hf) __B,
1966 (__v16hf) __D,
1967 __U,
1968 __R);
1971 extern __inline __m256h
1972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1973 _mm256_maskz_fcmadd_round_pch (__mmask8 __U, __m256h __A, __m256h __B,
1974 __m256h __D, const int __R)
1976 return (__m256h) __builtin_ia32_vfcmaddcph256_maskz_round ((__v16hf) __A,
1977 (__v16hf) __B,
1978 (__v16hf) __D,
1979 __U,
1980 __R);
1983 extern __inline __m256h
1984 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1985 _mm256_fcmul_round_pch (__m256h __A, __m256h __B, const int __R)
1987 return
1988 (__m256h) __builtin_ia32_vfcmulcph256_round ((__v16hf) __A,
1989 (__v16hf) __B,
1990 __R);
1993 extern __inline __m256h
1994 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1995 _mm256_mask_fcmul_round_pch (__m256h __W, __mmask8 __U, __m256h __A,
1996 __m256h __B, const int __R)
1998 return (__m256h) __builtin_ia32_vfcmulcph256_mask_round ((__v16hf) __A,
1999 (__v16hf) __B,
2000 (__v16hf) __W,
2001 (__mmask16) __U,
2002 __R);
2005 extern __inline __m256h
2006 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2007 _mm256_maskz_fcmul_round_pch (__mmask8 __U, __m256h __A, __m256h __B,
2008 const int __R)
2010 return (__m256h) __builtin_ia32_vfcmulcph256_mask_round ((__v16hf) __A,
2011 (__v16hf) __B,
2012 (__v16hf)
2013 _mm256_setzero_ph (),
2014 (__mmask16) __U,
2015 __R);
2018 extern __inline __m256d
2019 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2020 _mm256_fixupimm_round_pd (__m256d __A, __m256d __B, __m256i __D,
2021 const int __C, const int __R)
2023 return (__m256d) __builtin_ia32_fixupimmpd256_mask_round ((__v4df) __A,
2024 (__v4df) __B,
2025 (__v4di) __D,
2026 __C,
2027 (__mmask8) -1,
2028 __R);
2031 extern __inline __m256d
2032 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2033 _mm256_mask_fixupimm_round_pd (__m256d __A, __mmask8 __U, __m256d __B,
2034 __m256i __D, const int __C, const int __R)
2036 return (__m256d) __builtin_ia32_fixupimmpd256_mask_round ((__v4df) __A,
2037 (__v4df) __B,
2038 (__v4di) __D,
2039 __C,
2040 (__mmask8) __U,
2041 __R);
2044 extern __inline __m256d
2045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2046 _mm256_maskz_fixupimm_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
2047 __m256i __D, const int __C, const int __R)
2049 return (__m256d) __builtin_ia32_fixupimmpd256_maskz_round ((__v4df) __A,
2050 (__v4df) __B,
2051 (__v4di) __D,
2052 __C,
2053 (__mmask8) __U,
2054 __R);
2057 extern __inline __m256
2058 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2059 _mm256_fixupimm_round_ps (__m256 __A, __m256 __B, __m256i __D, const int __C,
2060 const int __R)
2062 return (__m256) __builtin_ia32_fixupimmps256_mask_round ((__v8sf) __A,
2063 (__v8sf) __B,
2064 (__v8si) __D,
2065 __C,
2066 (__mmask8) -1,
2067 __R);
2070 extern __inline __m256
2071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2072 _mm256_mask_fixupimm_round_ps (__m256 __A, __mmask8 __U, __m256 __B,
2073 __m256i __D, const int __C, const int __R)
2075 return (__m256) __builtin_ia32_fixupimmps256_mask_round ((__v8sf) __A,
2076 (__v8sf) __B,
2077 (__v8si) __D,
2078 __C,
2079 (__mmask8) __U,
2080 __R);
2083 extern __inline __m256
2084 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2085 _mm256_maskz_fixupimm_round_ps (__mmask8 __U, __m256 __A, __m256 __B,
2086 __m256i __D, const int __C, const int __R)
2088 return (__m256) __builtin_ia32_fixupimmps256_maskz_round ((__v8sf) __A,
2089 (__v8sf) __B,
2090 (__v8si) __D,
2091 __C,
2092 (__mmask8) __U,
2093 __R);
2096 extern __inline __m256d
2097 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2098 _mm256_fmadd_round_pd (__m256d __A, __m256d __B, __m256d __D, const int __R)
2100 return (__m256d) __builtin_ia32_vfmaddpd256_mask_round ((__v4df) __A,
2101 (__v4df) __B,
2102 (__v4df) __D,
2103 (__mmask8) -1,
2104 __R);
2107 extern __inline __m256d
2108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2109 _mm256_mask_fmadd_round_pd (__m256d __A, __mmask8 __U, __m256d __B,
2110 __m256d __D, const int __R)
2112 return (__m256d) __builtin_ia32_vfmaddpd256_mask_round ((__v4df) __A,
2113 (__v4df) __B,
2114 (__v4df) __D,
2115 (__mmask8) __U, __R);
2118 extern __inline __m256d
2119 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2120 _mm256_mask3_fmadd_round_pd (__m256d __A, __m256d __B, __m256d __D,
2121 __mmask8 __U, const int __R)
2123 return (__m256d) __builtin_ia32_vfmaddpd256_mask3_round ((__v4df) __A,
2124 (__v4df) __B,
2125 (__v4df) __D,
2126 (__mmask8) __U,
2127 __R);
2130 extern __inline __m256d
2131 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2132 _mm256_maskz_fmadd_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
2133 __m256d __D, const int __R)
2135 return (__m256d) __builtin_ia32_vfmaddpd256_maskz_round ((__v4df) __A,
2136 (__v4df) __B,
2137 (__v4df) __D,
2138 (__mmask8) __U,
2139 __R);
2142 extern __inline __m256h
2143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2144 _mm256_fmadd_round_ph (__m256h __A, __m256h __B, __m256h __D, const int __R)
2146 return (__m256h) __builtin_ia32_vfmaddph256_mask_round ((__v16hf) __A,
2147 (__v16hf) __B,
2148 (__v16hf) __D,
2149 (__mmask16) -1,
2150 __R);
2153 extern __inline __m256h
2154 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2155 _mm256_mask_fmadd_round_ph (__m256h __A, __mmask16 __U, __m256h __B,
2156 __m256h __D, const int __R)
2158 return (__m256h) __builtin_ia32_vfmaddph256_mask_round ((__v16hf) __A,
2159 (__v16hf) __B,
2160 (__v16hf) __D,
2161 (__mmask16) __U,
2162 __R);
2165 extern __inline __m256h
2166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2167 _mm256_mask3_fmadd_round_ph (__m256h __A, __m256h __B, __m256h __D,
2168 __mmask16 __U, const int __R)
2170 return (__m256h) __builtin_ia32_vfmaddph256_mask3_round ((__v16hf) __A,
2171 (__v16hf) __B,
2172 (__v16hf) __D,
2173 (__mmask16) __U,
2174 __R);
2177 extern __inline __m256h
2178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2179 _mm256_maskz_fmadd_round_ph (__mmask16 __U, __m256h __A, __m256h __B,
2180 __m256h __D, const int __R)
2182 return (__m256h) __builtin_ia32_vfmaddph256_maskz_round ((__v16hf) __A,
2183 (__v16hf) __B,
2184 (__v16hf) __D,
2185 (__mmask16) __U,
2186 __R);
2189 extern __inline __m256
2190 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2191 _mm256_fmadd_round_ps (__m256 __A, __m256 __B, __m256 __D, const int __R)
2193 return (__m256) __builtin_ia32_vfmaddps256_mask_round ((__v8sf) __A,
2194 (__v8sf) __B,
2195 (__v8sf) __D,
2196 (__mmask8) -1,
2197 __R);
2200 extern __inline __m256
2201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2202 _mm256_mask_fmadd_round_ps (__m256 __A, __mmask8 __U, __m256 __B,
2203 __m256 __D, const int __R)
2205 return (__m256) __builtin_ia32_vfmaddps256_mask_round ((__v8sf) __A,
2206 (__v8sf) __B,
2207 (__v8sf) __D,
2208 (__mmask8) __U,
2209 __R);
2212 extern __inline __m256
2213 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2214 _mm256_mask3_fmadd_round_ps (__m256 __A, __m256 __B, __m256 __D,
2215 __mmask8 __U, const int __R)
2217 return (__m256) __builtin_ia32_vfmaddps256_mask3_round ((__v8sf) __A,
2218 (__v8sf) __B,
2219 (__v8sf) __D,
2220 (__mmask8) __U,
2221 __R);
2224 extern __inline __m256
2225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2226 _mm256_maskz_fmadd_round_ps (__mmask8 __U, __m256 __A, __m256 __B,
2227 __m256 __D, const int __R)
2229 return (__m256) __builtin_ia32_vfmaddps256_maskz_round ((__v8sf) __A,
2230 (__v8sf) __B,
2231 (__v8sf) __D,
2232 (__mmask8) __U,
2233 __R);
2236 extern __inline __m256h
2237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2238 _mm256_fmadd_round_pch (__m256h __A, __m256h __B, __m256h __D, const int __R)
2240 return (__m256h) __builtin_ia32_vfmaddcph256_round ((__v16hf) __A,
2241 (__v16hf) __B,
2242 (__v16hf) __D,
2243 __R);
2246 extern __inline __m256h
2247 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2248 _mm256_mask_fmadd_round_pch (__m256h __A, __mmask16 __U, __m256h __B,
2249 __m256h __D, const int __R)
2251 return (__m256h) __builtin_ia32_vfmaddcph256_mask_round ((__v16hf) __A,
2252 (__v16hf) __B,
2253 (__v16hf) __D,
2254 __U,
2255 __R);
2258 extern __inline __m256h
2259 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2260 _mm256_mask3_fmadd_round_pch (__m256h __A, __m256h __B, __m256h __D,
2261 __mmask16 __U, const int __R)
2263 return (__m256h) __builtin_ia32_vfmaddcph256_mask3_round ((__v16hf) __A,
2264 (__v16hf) __B,
2265 (__v16hf) __D,
2266 __U,
2267 __R);
2270 extern __inline __m256h
2271 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2272 _mm256_maskz_fmadd_round_pch (__mmask16 __U, __m256h __A, __m256h __B,
2273 __m256h __D, const int __R)
2275 return (__m256h) __builtin_ia32_vfmaddcph256_maskz_round ((__v16hf) __A,
2276 (__v16hf) __B,
2277 (__v16hf) __D,
2278 __U,
2279 __R);
2282 extern __inline __m256d
2283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2284 _mm256_fmaddsub_round_pd (__m256d __A, __m256d __B, __m256d __D, const int __R)
2286 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask_round ((__v4df) __A,
2287 (__v4df) __B,
2288 (__v4df) __D,
2289 (__mmask8) -1,
2290 __R);
2293 extern __inline __m256d
2294 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2295 _mm256_mask_fmaddsub_round_pd (__m256d __A, __mmask8 __U, __m256d __B,
2296 __m256d __D, const int __R)
2298 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask_round ((__v4df) __A,
2299 (__v4df) __B,
2300 (__v4df) __D,
2301 (__mmask8) __U,
2302 __R);
2305 extern __inline __m256d
2306 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2307 _mm256_mask3_fmaddsub_round_pd (__m256d __A, __m256d __B, __m256d __D,
2308 __mmask8 __U, const int __R)
2310 return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3_round ((__v4df) __A,
2311 (__v4df) __B,
2312 (__v4df) __D,
2313 (__mmask8) __U,
2314 __R);
2317 extern __inline __m256d
2318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2319 _mm256_maskz_fmaddsub_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
2320 __m256d __D, const int __R)
2322 return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz_round ((__v4df) __A,
2323 (__v4df) __B,
2324 (__v4df) __D,
2325 (__mmask8) __U,
2326 __R);
2329 extern __inline __m256h
2330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2331 _mm256_fmaddsub_round_ph (__m256h __A, __m256h __B, __m256h __D, const int __R)
2333 return (__m256h) __builtin_ia32_vfmaddsubph256_mask_round ((__v16hf) __A,
2334 (__v16hf) __B,
2335 (__v16hf) __D,
2336 (__mmask16) -1,
2337 __R);
2340 extern __inline __m256h
2341 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2342 _mm256_mask_fmaddsub_round_ph (__m256h __A, __mmask16 __U, __m256h __B,
2343 __m256h __D, const int __R)
2345 return (__m256h) __builtin_ia32_vfmaddsubph256_mask_round ((__v16hf) __A,
2346 (__v16hf) __B,
2347 (__v16hf) __D,
2348 (__mmask16) __U,
2349 __R);
2352 extern __inline __m256h
2353 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2354 _mm256_mask3_fmaddsub_round_ph (__m256h __A, __m256h __B, __m256h __D,
2355 __mmask16 __U, const int __R)
2357 return (__m256h) __builtin_ia32_vfmaddsubph256_mask3_round ((__v16hf) __A,
2358 (__v16hf) __B,
2359 (__v16hf) __D,
2360 (__mmask16) __U,
2361 __R);
2364 extern __inline __m256h
2365 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2366 _mm256_maskz_fmaddsub_round_ph (__mmask16 __U, __m256h __A, __m256h __B,
2367 __m256h __D, const int __R)
2369 return (__m256h) __builtin_ia32_vfmaddsubph256_maskz_round ((__v16hf) __A,
2370 (__v16hf) __B,
2371 (__v16hf) __D,
2372 (__mmask16) __U,
2373 __R);
2376 extern __inline __m256
2377 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2378 _mm256_fmaddsub_round_ps (__m256 __A, __m256 __B, __m256 __D, const int __R)
2380 return (__m256) __builtin_ia32_vfmaddsubps256_mask_round ((__v8sf) __A,
2381 (__v8sf) __B,
2382 (__v8sf) __D,
2383 (__mmask8) -1,
2384 __R);
2387 extern __inline __m256
2388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2389 _mm256_mask_fmaddsub_round_ps (__m256 __A, __mmask8 __U, __m256 __B,
2390 __m256 __D, const int __R)
2392 return (__m256) __builtin_ia32_vfmaddsubps256_mask_round ((__v8sf) __A,
2393 (__v8sf) __B,
2394 (__v8sf) __D,
2395 (__mmask8) __U,
2396 __R);
2399 extern __inline __m256
2400 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2401 _mm256_mask3_fmaddsub_round_ps (__m256 __A, __m256 __B, __m256 __D,
2402 __mmask8 __U, const int __R)
2404 return (__m256) __builtin_ia32_vfmaddsubps256_mask3_round ((__v8sf) __A,
2405 (__v8sf) __B,
2406 (__v8sf) __D,
2407 (__mmask8) __U,
2408 __R);
2411 extern __inline __m256
2412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2413 _mm256_maskz_fmaddsub_round_ps (__mmask8 __U, __m256 __A, __m256 __B,
2414 __m256 __D, const int __R)
2416 return (__m256) __builtin_ia32_vfmaddsubps256_maskz_round ((__v8sf) __A,
2417 (__v8sf) __B,
2418 (__v8sf) __D,
2419 (__mmask8) __U,
2420 __R);
2423 extern __inline __m256d
2424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2425 _mm256_fmsub_round_pd (__m256d __A, __m256d __B, __m256d __D, const int __R)
2427 return (__m256d) __builtin_ia32_vfmsubpd256_mask_round ((__v4df) __A,
2428 (__v4df) __B,
2429 (__v4df) __D,
2430 (__mmask8) -1, __R);
2433 extern __inline __m256d
2434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2435 _mm256_mask_fmsub_round_pd (__m256d __A, __mmask8 __U, __m256d __B,
2436 __m256d __D, const int __R)
2438 return (__m256d) __builtin_ia32_vfmsubpd256_mask_round ((__v4df) __A,
2439 (__v4df) __B,
2440 (__v4df) __D,
2441 (__mmask8) __U, __R);
2444 extern __inline __m256d
2445 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2446 _mm256_mask3_fmsub_round_pd (__m256d __A, __m256d __B, __m256d __D,
2447 __mmask8 __U, const int __R)
2449 return (__m256d) __builtin_ia32_vfmsubpd256_mask3_round ((__v4df) __A,
2450 (__v4df) __B,
2451 (__v4df) __D,
2452 (__mmask8) __U, __R);
2455 extern __inline __m256d
2456 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2457 _mm256_maskz_fmsub_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
2458 __m256d __D, const int __R)
2460 return (__m256d) __builtin_ia32_vfmsubpd256_maskz_round ((__v4df) __A,
2461 (__v4df) __B,
2462 (__v4df) __D,
2463 (__mmask8) __U, __R);
2466 extern __inline __m256h
2467 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2468 _mm256_fmsub_round_ph (__m256h __A, __m256h __B, __m256h __D, const int __R)
2470 return (__m256h)
2471 __builtin_ia32_vfmsubph256_mask_round ((__v16hf) __A,
2472 (__v16hf) __B,
2473 (__v16hf) __D,
2474 (__mmask16) -1, __R);
2477 extern __inline __m256h
2478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2479 _mm256_mask_fmsub_round_ph (__m256h __A, __mmask16 __U, __m256h __B,
2480 __m256h __D, const int __R)
2482 return (__m256h)
2483 __builtin_ia32_vfmsubph256_mask_round ((__v16hf) __A,
2484 (__v16hf) __B,
2485 (__v16hf) __D,
2486 (__mmask16) __U, __R);
2489 extern __inline __m256h
2490 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2491 _mm256_mask3_fmsub_round_ph (__m256h __A, __m256h __B, __m256h __D,
2492 __mmask16 __U, const int __R)
2494 return (__m256h)
2495 __builtin_ia32_vfmsubph256_mask3_round ((__v16hf) __A,
2496 (__v16hf) __B,
2497 (__v16hf) __D,
2498 (__mmask16) __U, __R);
2501 extern __inline __m256h
2502 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2503 _mm256_maskz_fmsub_round_ph (__mmask16 __U, __m256h __A, __m256h __B,
2504 __m256h __D, const int __R)
2506 return (__m256h)
2507 __builtin_ia32_vfmsubph256_maskz_round ((__v16hf) __A,
2508 (__v16hf) __B,
2509 (__v16hf) __D,
2510 (__mmask16) __U, __R);
2513 extern __inline __m256
2514 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2515 _mm256_fmsub_round_ps (__m256 __A, __m256 __B, __m256 __D, const int __R)
2517 return (__m256) __builtin_ia32_vfmsubps256_mask_round ((__v8sf) __A,
2518 (__v8sf) __B,
2519 (__v8sf) __D,
2520 (__mmask8) -1, __R);
2523 extern __inline __m256
2524 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2525 _mm256_mask_fmsub_round_ps (__m256 __A, __mmask8 __U, __m256 __B,
2526 __m256 __D, const int __R)
2528 return (__m256) __builtin_ia32_vfmsubps256_mask_round ((__v8sf) __A,
2529 (__v8sf) __B,
2530 (__v8sf) __D,
2531 (__mmask8) __U, __R);
2534 extern __inline __m256
2535 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2536 _mm256_mask3_fmsub_round_ps (__m256 __A, __m256 __B, __m256 __D,
2537 __mmask8 __U, const int __R)
2539 return (__m256) __builtin_ia32_vfmsubps256_mask3_round ((__v8sf) __A,
2540 (__v8sf) __B,
2541 (__v8sf) __D,
2542 (__mmask8) __U, __R);
2545 extern __inline __m256
2546 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2547 _mm256_maskz_fmsub_round_ps (__mmask8 __U, __m256 __A, __m256 __B,
2548 __m256 __D, const int __R)
2550 return (__m256) __builtin_ia32_vfmsubps256_maskz_round ((__v8sf) __A,
2551 (__v8sf) __B,
2552 (__v8sf) __D,
2553 (__mmask8) __U, __R);
2556 extern __inline __m256d
2557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2558 _mm256_fmsubadd_round_pd (__m256d __A, __m256d __B, __m256d __D, const int __R)
2560 return (__m256d) __builtin_ia32_vfmsubaddpd256_mask_round ((__v4df) __A,
2561 (__v4df) __B,
2562 (__v4df) __D,
2563 (__mmask8) -1,
2564 __R);
2567 extern __inline __m256d
2568 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2569 _mm256_mask_fmsubadd_round_pd (__m256d __A, __mmask8 __U, __m256d __B,
2570 __m256d __D, const int __R)
2572 return (__m256d) __builtin_ia32_vfmsubaddpd256_mask_round ((__v4df) __A,
2573 (__v4df) __B,
2574 (__v4df) __D,
2575 (__mmask8) __U,
2576 __R);
2579 extern __inline __m256d
2580 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2581 _mm256_mask3_fmsubadd_round_pd (__m256d __A, __m256d __B, __m256d __D,
2582 __mmask8 __U, const int __R)
2584 return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3_round ((__v4df) __A,
2585 (__v4df) __B,
2586 (__v4df) __D,
2587 (__mmask8) __U,
2588 __R);
2591 extern __inline __m256d
2592 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2593 _mm256_maskz_fmsubadd_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
2594 __m256d __D, const int __R)
2596 return (__m256d) __builtin_ia32_vfmsubaddpd256_maskz_round ((__v4df) __A,
2597 (__v4df) __B,
2598 (__v4df) __D,
2599 (__mmask8) __U,
2600 __R);
2603 extern __inline __m256h
2604 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2605 _mm256_fmsubadd_round_ph (__m256h __A, __m256h __B, __m256h __D, const int __R)
2607 return (__m256h)
2608 __builtin_ia32_vfmsubaddph256_mask_round ((__v16hf) __A,
2609 (__v16hf) __B,
2610 (__v16hf) __D,
2611 (__mmask16) -1,
2612 __R);
2615 extern __inline __m256h
2616 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2617 _mm256_mask_fmsubadd_round_ph (__m256h __A, __mmask16 __U, __m256h __B,
2618 __m256h __D, const int __R)
2620 return (__m256h)
2621 __builtin_ia32_vfmsubaddph256_mask_round ((__v16hf) __A,
2622 (__v16hf) __B,
2623 (__v16hf) __D,
2624 (__mmask16) __U,
2625 __R);
2628 extern __inline __m256h
2629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2630 _mm256_mask3_fmsubadd_round_ph (__m256h __A, __m256h __B, __m256h __D,
2631 __mmask16 __U, const int __R)
2633 return (__m256h)
2634 __builtin_ia32_vfmsubaddph256_mask3_round ((__v16hf) __A,
2635 (__v16hf) __B,
2636 (__v16hf) __D,
2637 (__mmask16) __U,
2638 __R);
2641 extern __inline __m256h
2642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2643 _mm256_maskz_fmsubadd_round_ph (__mmask16 __U, __m256h __A, __m256h __B,
2644 __m256h __D, const int __R)
2646 return (__m256h)
2647 __builtin_ia32_vfmsubaddph256_maskz_round ((__v16hf) __A,
2648 (__v16hf) __B,
2649 (__v16hf) __D,
2650 (__mmask16) __U,
2651 __R);
2654 extern __inline __m256
2655 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2656 _mm256_fmsubadd_round_ps (__m256 __A, __m256 __B, __m256 __D, const int __R)
2658 return (__m256) __builtin_ia32_vfmsubaddps256_mask_round ((__v8sf) __A,
2659 (__v8sf) __B,
2660 (__v8sf) __D,
2661 (__mmask8) -1,
2662 __R);
2665 extern __inline __m256
2666 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2667 _mm256_mask_fmsubadd_round_ps (__m256 __A, __mmask8 __U, __m256 __B,
2668 __m256 __D, const int __R)
2670 return (__m256) __builtin_ia32_vfmsubaddps256_mask_round ((__v8sf) __A,
2671 (__v8sf) __B,
2672 (__v8sf) __D,
2673 (__mmask8) __U,
2674 __R);
2677 extern __inline __m256
2678 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2679 _mm256_mask3_fmsubadd_round_ps (__m256 __A, __m256 __B, __m256 __D,
2680 __mmask8 __U, const int __R)
2682 return (__m256) __builtin_ia32_vfmsubaddps256_mask3_round ((__v8sf) __A,
2683 (__v8sf) __B,
2684 (__v8sf) __D,
2685 (__mmask8) __U,
2686 __R);
2689 extern __inline __m256
2690 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2691 _mm256_maskz_fmsubadd_round_ps (__mmask8 __U, __m256 __A, __m256 __B,
2692 __m256 __D, const int __R)
2694 return (__m256) __builtin_ia32_vfmsubaddps256_maskz_round ((__v8sf) __A,
2695 (__v8sf) __B,
2696 (__v8sf) __D,
2697 (__mmask8) __U,
2698 __R);
2701 extern __inline __m256h
2702 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2703 _mm256_fmul_round_pch (__m256h __B, __m256h __D, const int __R)
2705 return (__m256h) __builtin_ia32_vfmulcph256_round ((__v16hf) __B,
2706 (__v16hf) __D,
2707 __R);
2710 extern __inline __m256h
2711 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2712 _mm256_mask_fmul_round_pch (__m256h __A, __mmask8 __U, __m256h __B,
2713 __m256h __D, const int __R)
2715 return (__m256h) __builtin_ia32_vfmulcph256_mask_round ((__v16hf) __B,
2716 (__v16hf) __D,
2717 (__v16hf) __A,
2718 (__mmask16) __U,
2719 __R);
2722 extern __inline __m256h
2723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2724 _mm256_maskz_fmul_round_pch (__mmask8 __U, __m256h __B, __m256h __D,
2725 const int __R)
2727 return (__m256h) __builtin_ia32_vfmulcph256_mask_round ((__v16hf) __B,
2728 (__v16hf) __D,
2729 (__v16hf)
2730 _mm256_setzero_ph (),
2731 (__mmask16) __U,
2732 __R);
2735 extern __inline __m256d
2736 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2737 _mm256_fnmadd_round_pd (__m256d __A, __m256d __B, __m256d __D, const int __R)
2739 return (__m256d) __builtin_ia32_vfnmaddpd256_mask_round ((__v4df) __A,
2740 (__v4df) __B,
2741 (__v4df) __D,
2742 (__mmask8) -1,
2743 __R);
2746 extern __inline __m256d
2747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2748 _mm256_mask_fnmadd_round_pd (__m256d __A, __mmask8 __U, __m256d __B,
2749 __m256d __D, const int __R)
2751 return (__m256d) __builtin_ia32_vfnmaddpd256_mask_round ((__v4df) __A,
2752 (__v4df) __B,
2753 (__v4df) __D,
2754 (__mmask8) __U,
2755 __R);
2758 extern __inline __m256d
2759 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2760 _mm256_mask3_fnmadd_round_pd (__m256d __A, __m256d __B, __m256d __D,
2761 __mmask8 __U, const int __R)
2763 return (__m256d) __builtin_ia32_vfnmaddpd256_mask3_round ((__v4df) __A,
2764 (__v4df) __B,
2765 (__v4df) __D,
2766 (__mmask8) __U,
2767 __R);
2770 extern __inline __m256d
2771 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2772 _mm256_maskz_fnmadd_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
2773 __m256d __D, const int __R)
2775 return (__m256d) __builtin_ia32_vfnmaddpd256_maskz_round ((__v4df) __A,
2776 (__v4df) __B,
2777 (__v4df) __D,
2778 (__mmask8) __U,
2779 __R);
2782 extern __inline __m256h
2783 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2784 _mm256_fnmadd_round_ph (__m256h __A, __m256h __B, __m256h __D, const int __R)
2786 return (__m256h)
2787 __builtin_ia32_vfnmaddph256_mask_round ((__v16hf) __A,
2788 (__v16hf) __B,
2789 (__v16hf) __D,
2790 (__mmask16) -1,
2791 __R);
2794 extern __inline __m256h
2795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2796 _mm256_mask_fnmadd_round_ph (__m256h __A, __mmask16 __U, __m256h __B,
2797 __m256h __D, const int __R)
2799 return (__m256h)
2800 __builtin_ia32_vfnmaddph256_mask_round ((__v16hf) __A,
2801 (__v16hf) __B,
2802 (__v16hf) __D,
2803 (__mmask16) __U,
2804 __R);
2807 extern __inline __m256h
2808 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2809 _mm256_mask3_fnmadd_round_ph (__m256h __A, __m256h __B, __m256h __D,
2810 __mmask16 __U, const int __R)
2812 return (__m256h)
2813 __builtin_ia32_vfnmaddph256_mask3_round ((__v16hf) __A,
2814 (__v16hf) __B,
2815 (__v16hf) __D,
2816 (__mmask16) __U,
2817 __R);
2820 extern __inline __m256h
2821 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2822 _mm256_maskz_fnmadd_round_ph (__mmask16 __U, __m256h __A, __m256h __B,
2823 __m256h __D, const int __R)
2825 return (__m256h)
2826 __builtin_ia32_vfnmaddph256_maskz_round ((__v16hf) __A,
2827 (__v16hf) __B,
2828 (__v16hf) __D,
2829 (__mmask16) __U,
2830 __R);
2833 extern __inline __m256
2834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2835 _mm256_fnmadd_round_ps (__m256 __A, __m256 __B, __m256 __D, const int __R)
2837 return (__m256) __builtin_ia32_vfnmaddps256_mask_round ((__v8sf) __A,
2838 (__v8sf) __B,
2839 (__v8sf) __D,
2840 (__mmask8) -1,
2841 __R);
2844 extern __inline __m256
2845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2846 _mm256_mask_fnmadd_round_ps (__m256 __A, __mmask8 __U, __m256 __B,
2847 __m256 __D, const int __R)
2849 return (__m256) __builtin_ia32_vfnmaddps256_mask_round ((__v8sf) __A,
2850 (__v8sf) __B,
2851 (__v8sf) __D,
2852 (__mmask8) __U,
2853 __R);
2856 extern __inline __m256
2857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2858 _mm256_mask3_fnmadd_round_ps (__m256 __A, __m256 __B, __m256 __D,
2859 __mmask8 __U, const int __R)
2861 return (__m256) __builtin_ia32_vfnmaddps256_mask3_round ((__v8sf) __A,
2862 (__v8sf) __B,
2863 (__v8sf) __D,
2864 (__mmask8) __U,
2865 __R);
2868 extern __inline __m256
2869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2870 _mm256_maskz_fnmadd_round_ps (__mmask8 __U, __m256 __A, __m256 __B,
2871 __m256 __D, const int __R)
2873 return (__m256) __builtin_ia32_vfnmaddps256_maskz_round ((__v8sf) __A,
2874 (__v8sf) __B,
2875 (__v8sf) __D,
2876 (__mmask8) __U,
2877 __R);
2880 extern __inline __m256d
2881 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2882 _mm256_fnmsub_round_pd (__m256d __A, __m256d __B, __m256d __D, const int __R)
2884 return (__m256d) __builtin_ia32_vfnmsubpd256_mask_round ((__v4df) __A,
2885 (__v4df) __B,
2886 (__v4df) __D,
2887 (__mmask8) -1,
2888 __R);
2891 extern __inline __m256d
2892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2893 _mm256_mask_fnmsub_round_pd (__m256d __A, __mmask8 __U, __m256d __B,
2894 __m256d __D, const int __R)
2896 return (__m256d) __builtin_ia32_vfnmsubpd256_mask_round ((__v4df) __A,
2897 (__v4df) __B,
2898 (__v4df) __D,
2899 (__mmask8) __U,
2900 __R);
2903 extern __inline __m256d
2904 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2905 _mm256_mask3_fnmsub_round_pd (__m256d __A, __m256d __B, __m256d __D,
2906 __mmask8 __U, const int __R)
2908 return (__m256d) __builtin_ia32_vfnmsubpd256_mask3_round ((__v4df) __A,
2909 (__v4df) __B,
2910 (__v4df) __D,
2911 (__mmask8) __U,
2912 __R);
2915 extern __inline __m256d
2916 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2917 _mm256_maskz_fnmsub_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
2918 __m256d __D, const int __R)
2920 return (__m256d) __builtin_ia32_vfnmsubpd256_maskz_round ((__v4df) __A,
2921 (__v4df) __B,
2922 (__v4df) __D,
2923 (__mmask8) __U,
2924 __R);
2927 extern __inline __m256h
2928 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2929 _mm256_fnmsub_round_ph (__m256h __A, __m256h __B, __m256h __D, const int __R)
2931 return (__m256h)
2932 __builtin_ia32_vfnmsubph256_mask_round ((__v16hf) __A,
2933 (__v16hf) __B,
2934 (__v16hf) __D,
2935 (__mmask16) -1,
2936 __R);
2939 extern __inline __m256h
2940 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2941 _mm256_mask_fnmsub_round_ph (__m256h __A, __mmask16 __U, __m256h __B,
2942 __m256h __D, const int __R)
2944 return (__m256h)
2945 __builtin_ia32_vfnmsubph256_mask_round ((__v16hf) __A,
2946 (__v16hf) __B,
2947 (__v16hf) __D,
2948 (__mmask16) __U,
2949 __R);
2952 extern __inline __m256h
2953 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2954 _mm256_mask3_fnmsub_round_ph (__m256h __A, __m256h __B, __m256h __D,
2955 __mmask16 __U, const int __R)
2957 return (__m256h)
2958 __builtin_ia32_vfnmsubph256_mask3_round ((__v16hf) __A,
2959 (__v16hf) __B,
2960 (__v16hf) __D,
2961 (__mmask16) __U,
2962 __R);
2965 extern __inline __m256h
2966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2967 _mm256_maskz_fnmsub_round_ph (__mmask16 __U, __m256h __A, __m256h __B,
2968 __m256h __D, const int __R)
2970 return (__m256h)
2971 __builtin_ia32_vfnmsubph256_maskz_round ((__v16hf) __A,
2972 (__v16hf) __B,
2973 (__v16hf) __D,
2974 (__mmask16) __U,
2975 __R);
2978 extern __inline __m256
2979 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2980 _mm256_fnmsub_round_ps (__m256 __A, __m256 __B, __m256 __D, const int __R)
2982 return (__m256) __builtin_ia32_vfnmsubps256_mask_round ((__v8sf) __A,
2983 (__v8sf) __B,
2984 (__v8sf) __D,
2985 (__mmask8) -1,
2986 __R);
2989 extern __inline __m256
2990 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2991 _mm256_mask_fnmsub_round_ps (__m256 __A, __mmask8 __U, __m256 __B,
2992 __m256 __D, const int __R)
2994 return (__m256) __builtin_ia32_vfnmsubps256_mask_round ((__v8sf) __A,
2995 (__v8sf) __B,
2996 (__v8sf) __D,
2997 (__mmask8) __U,
2998 __R);
3001 extern __inline __m256
3002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3003 _mm256_mask3_fnmsub_round_ps (__m256 __A, __m256 __B, __m256 __D,
3004 __mmask8 __U, const int __R)
3006 return (__m256) __builtin_ia32_vfnmsubps256_mask3_round ((__v8sf) __A,
3007 (__v8sf) __B,
3008 (__v8sf) __D,
3009 (__mmask8) __U,
3010 __R);
3013 extern __inline __m256
3014 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3015 _mm256_maskz_fnmsub_round_ps (__mmask8 __U, __m256 __A, __m256 __B,
3016 __m256 __D, const int __R)
3018 return (__m256) __builtin_ia32_vfnmsubps256_maskz_round ((__v8sf) __A,
3019 (__v8sf) __B,
3020 (__v8sf) __D,
3021 (__mmask8) __U,
3022 __R);
3025 extern __inline __m256d
3026 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3027 _mm256_getexp_round_pd (__m256d __A, const int __R)
3029 return
3030 (__m256d) __builtin_ia32_getexppd256_mask_round ((__v4df) __A,
3031 (__v4df)
3032 _mm256_undefined_pd (),
3033 (__mmask8) -1,
3034 __R);
3037 extern __inline __m256d
3038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3039 _mm256_mask_getexp_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
3040 const int __R)
3042 return (__m256d) __builtin_ia32_getexppd256_mask_round ((__v4df) __A,
3043 (__v4df) __W,
3044 (__mmask8) __U,
3045 __R);
3048 extern __inline __m256d
3049 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3050 _mm256_maskz_getexp_round_pd (__mmask8 __U, __m256d __A, const int __R)
3052 return (__m256d) __builtin_ia32_getexppd256_mask_round ((__v4df) __A,
3053 (__v4df)
3054 _mm256_setzero_pd (),
3055 (__mmask8) __U,
3056 __R);
3059 extern __inline __m256h
3060 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3061 _mm256_getexp_round_ph (__m256h __A, const int __R)
3063 return (__m256h) __builtin_ia32_getexpph256_mask_round ((__v16hf) __A,
3064 (__v16hf)
3065 _mm256_setzero_ph (),
3066 (__mmask16) -1,
3067 __R);
3070 extern __inline __m256h
3071 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3072 _mm256_mask_getexp_round_ph (__m256h __W, __mmask16 __U, __m256h __A,
3073 const int __R)
3075 return (__m256h) __builtin_ia32_getexpph256_mask_round ((__v16hf) __A,
3076 (__v16hf) __W,
3077 (__mmask16) __U,
3078 __R);
3081 extern __inline __m256h
3082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3083 _mm256_maskz_getexp_round_ph (__mmask16 __U, __m256h __A, const int __R)
3085 return (__m256h) __builtin_ia32_getexpph256_mask_round ((__v16hf) __A,
3086 (__v16hf)
3087 _mm256_setzero_ph (),
3088 (__mmask16) __U,
3089 __R);
3092 extern __inline __m256
3093 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3094 _mm256_getexp_round_ps (__m256 __A, const int __R)
3096 return (__m256) __builtin_ia32_getexpps256_mask_round ((__v8sf) __A,
3097 (__v8sf)
3098 _mm256_undefined_ps (),
3099 (__mmask8) -1,
3100 __R);
3103 extern __inline __m256
3104 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3105 _mm256_mask_getexp_round_ps (__m256 __W, __mmask8 __U, __m256 __A,
3106 const int __R)
3108 return (__m256) __builtin_ia32_getexpps256_mask_round ((__v8sf) __A,
3109 (__v8sf) __W,
3110 (__mmask8) __U,
3111 __R);
3114 extern __inline __m256
3115 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3116 _mm256_maskz_getexp_round_ps (__mmask8 __U, __m256 __A, const int __R)
3118 return (__m256) __builtin_ia32_getexpps256_mask_round ((__v8sf) __A,
3119 (__v8sf)
3120 _mm256_setzero_ps (),
3121 (__mmask8) __U,
3122 __R);
3125 extern __inline __m256d
3126 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3127 _mm256_getmant_round_pd (__m256d __A, _MM_MANTISSA_NORM_ENUM __B,
3128 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
3130 return
3131 (__m256d) __builtin_ia32_getmantpd256_mask_round ((__v4df) __A,
3132 (__C << 2) | __B,
3133 _mm256_undefined_pd (),
3134 (__mmask8) -1, __R);
3137 extern __inline __m256d
3138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3139 _mm256_mask_getmant_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
3140 _MM_MANTISSA_NORM_ENUM __B,
3141 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
3143 return (__m256d) __builtin_ia32_getmantpd256_mask_round ((__v4df) __A,
3144 (__C << 2) | __B,
3145 (__v4df) __W, __U,
3146 __R);
3149 extern __inline __m256d
3150 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3151 _mm256_maskz_getmant_round_pd (__mmask8 __U, __m256d __A,
3152 _MM_MANTISSA_NORM_ENUM __B,
3153 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
3155 return (__m256d) __builtin_ia32_getmantpd256_mask_round ((__v4df) __A,
3156 (__C << 2) | __B,
3157 (__v4df)
3158 _mm256_setzero_pd (),
3159 __U, __R);
3162 extern __inline __m256h
3163 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3164 _mm256_getmant_round_ph (__m256h __A, _MM_MANTISSA_NORM_ENUM __B,
3165 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
3167 return
3168 (__m256h) __builtin_ia32_getmantph256_mask_round ((__v16hf) __A,
3169 (__C << 2) | __B,
3170 _mm256_undefined_ph (),
3171 (__mmask16) -1, __R);
3174 extern __inline __m256h
3175 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3176 _mm256_mask_getmant_round_ph (__m256h __W, __mmask16 __U, __m256h __A,
3177 _MM_MANTISSA_NORM_ENUM __B,
3178 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
3180 return (__m256h) __builtin_ia32_getmantph256_mask_round ((__v16hf) __A,
3181 (__C << 2) | __B,
3182 (__v16hf) __W, __U,
3183 __R);
3186 extern __inline __m256h
3187 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3188 _mm256_maskz_getmant_round_ph (__mmask8 __U, __m256h __A,
3189 _MM_MANTISSA_NORM_ENUM __B,
3190 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
3192 return (__m256h) __builtin_ia32_getmantph256_mask_round ((__v16hf) __A,
3193 (__C << 2) | __B,
3194 (__v16hf)
3195 _mm256_setzero_ph (),
3196 __U, __R);
3199 extern __inline __m256
3200 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3201 _mm256_getmant_round_ps (__m256 __A, _MM_MANTISSA_NORM_ENUM __B,
3202 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
3204 return
3205 (__m256) __builtin_ia32_getmantps256_mask_round ((__v8sf) __A,
3206 (__C << 2) | __B,
3207 _mm256_undefined_ps (),
3208 (__mmask8) -1, __R);
3211 extern __inline __m256
3212 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3213 _mm256_mask_getmant_round_ps (__m256 __W, __mmask8 __U, __m256 __A,
3214 _MM_MANTISSA_NORM_ENUM __B,
3215 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
3217 return (__m256) __builtin_ia32_getmantps256_mask_round ((__v8sf) __A,
3218 (__C << 2) | __B,
3219 (__v8sf) __W, __U,
3220 __R);
3223 extern __inline __m256
3224 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3225 _mm256_maskz_getmant_round_ps (__mmask8 __U, __m256 __A,
3226 _MM_MANTISSA_NORM_ENUM __B,
3227 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
3229 return (__m256) __builtin_ia32_getmantps256_mask_round ((__v8sf) __A,
3230 (__C << 2) | __B,
3231 (__v8sf)
3232 _mm256_setzero_ps (),
3233 __U, __R);
3236 extern __inline __m256d
3237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3238 _mm256_max_round_pd (__m256d __A, __m256d __B, const int __R)
3240 return (__m256d) __builtin_ia32_maxpd256_mask_round ((__v4df) __A,
3241 (__v4df) __B,
3242 (__v4df)
3243 _mm256_undefined_pd (),
3244 (__mmask8) -1,
3245 __R);
3248 extern __inline __m256d
3249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3250 _mm256_mask_max_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
3251 __m256d __B, const int __R)
3253 return (__m256d) __builtin_ia32_maxpd256_mask_round ((__v4df) __A,
3254 (__v4df) __B,
3255 (__v4df) __W,
3256 (__mmask8) __U,
3257 __R);
3260 extern __inline __m256d
3261 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3262 _mm256_maskz_max_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
3263 const int __R)
3265 return (__m256d) __builtin_ia32_maxpd256_mask_round ((__v4df) __A,
3266 (__v4df) __B,
3267 (__v4df)
3268 _mm256_setzero_pd (),
3269 (__mmask8) __U,
3270 __R);
3273 extern __inline __m256h
3274 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3275 _mm256_max_round_ph (__m256h __A, __m256h __B, const int __R)
3277 return (__m256h) __builtin_ia32_maxph256_mask_round ((__v16hf) __A,
3278 (__v16hf) __B,
3279 (__v16hf)
3280 _mm256_undefined_ph (),
3281 (__mmask16) -1,
3282 __R);
3285 extern __inline __m256h
3286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3287 _mm256_mask_max_round_ph (__m256h __W, __mmask16 __U, __m256h __A,
3288 __m256h __B, const int __R)
3290 return (__m256h) __builtin_ia32_maxph256_mask_round ((__v16hf) __A,
3291 (__v16hf) __B,
3292 (__v16hf) __W,
3293 (__mmask16) __U,
3294 __R);
3297 extern __inline __m256h
3298 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3299 _mm256_maskz_max_round_ph (__mmask16 __U, __m256h __A, __m256h __B,
3300 const int __R)
3302 return (__m256h) __builtin_ia32_maxph256_mask_round ((__v16hf) __A,
3303 (__v16hf) __B,
3304 (__v16hf)
3305 _mm256_setzero_ph (),
3306 (__mmask16) __U,
3307 __R);
3310 extern __inline __m256
3311 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3312 _mm256_max_round_ps (__m256 __A, __m256 __B, const int __R)
3314 return (__m256) __builtin_ia32_maxps256_mask_round ((__v8sf) __A,
3315 (__v8sf) __B,
3316 (__v8sf)
3317 _mm256_undefined_ps (),
3318 (__mmask8) -1,
3319 __R);
3322 extern __inline __m256
3323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3324 _mm256_mask_max_round_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B,
3325 const int __R)
3327 return (__m256) __builtin_ia32_maxps256_mask_round ((__v8sf) __A,
3328 (__v8sf) __B,
3329 (__v8sf) __W,
3330 (__mmask8) __U,
3331 __R);
3334 extern __inline __m256
3335 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3336 _mm256_maskz_max_round_ps (__mmask8 __U, __m256 __A, __m256 __B,
3337 const int __R)
3339 return (__m256) __builtin_ia32_maxps256_mask_round ((__v8sf) __A,
3340 (__v8sf) __B,
3341 (__v8sf)
3342 _mm256_setzero_ps (),
3343 (__mmask8) __U,
3344 __R);
3347 extern __inline __m256d
3348 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3349 _mm256_min_round_pd (__m256d __A, __m256d __B, const int __R)
3351 return (__m256d) __builtin_ia32_minpd256_mask_round ((__v4df) __A,
3352 (__v4df) __B,
3353 (__v4df)
3354 _mm256_undefined_pd (),
3355 (__mmask8) -1,
3356 __R);
3359 extern __inline __m256d
3360 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3361 _mm256_mask_min_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
3362 __m256d __B, const int __R)
3364 return (__m256d) __builtin_ia32_minpd256_mask_round ((__v4df) __A,
3365 (__v4df) __B,
3366 (__v4df) __W,
3367 (__mmask8) __U,
3368 __R);
3371 extern __inline __m256d
3372 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3373 _mm256_maskz_min_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
3374 const int __R)
3376 return (__m256d) __builtin_ia32_minpd256_mask_round ((__v4df) __A,
3377 (__v4df) __B,
3378 (__v4df)
3379 _mm256_setzero_pd (),
3380 (__mmask8) __U,
3381 __R);
3384 extern __inline __m256h
3385 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3386 _mm256_min_round_ph (__m256h __A, __m256h __B, const int __R)
3388 return (__m256h) __builtin_ia32_minph256_mask_round ((__v16hf) __A,
3389 (__v16hf) __B,
3390 (__v16hf)
3391 _mm256_undefined_ph (),
3392 (__mmask16) -1,
3393 __R);
3396 extern __inline __m256h
3397 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3398 _mm256_mask_min_round_ph (__m256h __W, __mmask16 __U, __m256h __A,
3399 __m256h __B, const int __R)
3401 return (__m256h) __builtin_ia32_minph256_mask_round ((__v16hf) __A,
3402 (__v16hf) __B,
3403 (__v16hf) __W,
3404 (__mmask16) __U,
3405 __R);
3408 extern __inline __m256h
3409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3410 _mm256_maskz_min_round_ph (__mmask16 __U, __m256h __A, __m256h __B,
3411 const int __R)
3413 return (__m256h) __builtin_ia32_minph256_mask_round ((__v16hf) __A,
3414 (__v16hf) __B,
3415 (__v16hf)
3416 _mm256_setzero_ph (),
3417 (__mmask16) __U,
3418 __R);
3421 extern __inline __m256
3422 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3423 _mm256_min_round_ps (__m256 __A, __m256 __B, const int __R)
3425 return (__m256) __builtin_ia32_minps256_mask_round ((__v8sf) __A,
3426 (__v8sf) __B,
3427 (__v8sf)
3428 _mm256_undefined_ps (),
3429 (__mmask8) -1,
3430 __R);
3433 extern __inline __m256
3434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3435 _mm256_mask_min_round_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B,
3436 const int __R)
3438 return (__m256) __builtin_ia32_minps256_mask_round ((__v8sf) __A,
3439 (__v8sf) __B,
3440 (__v8sf) __W,
3441 (__mmask8) __U,
3442 __R);
3445 extern __inline __m256
3446 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3447 _mm256_maskz_min_round_ps (__mmask8 __U, __m256 __A, __m256 __B,
3448 const int __R)
3450 return (__m256) __builtin_ia32_minps256_mask_round ((__v8sf) __A,
3451 (__v8sf) __B,
3452 (__v8sf)
3453 _mm256_setzero_ps (),
3454 (__mmask8) __U,
3455 __R);
3458 extern __inline __m256d
3459 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3460 _mm256_mul_round_pd (__m256d __A, __m256d __B, const int __R)
3462 return (__m256d) __builtin_ia32_mulpd256_mask_round ((__v4df) __A,
3463 (__v4df) __B,
3464 (__v4df)
3465 _mm256_undefined_pd (),
3466 (__mmask8) -1,
3467 __R);
3470 extern __inline __m256d
3471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3472 _mm256_mask_mul_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
3473 __m256d __B, const int __R)
3475 return (__m256d) __builtin_ia32_mulpd256_mask_round ((__v4df) __A,
3476 (__v4df) __B,
3477 (__v4df) __W,
3478 (__mmask8) __U,
3479 __R);
3482 extern __inline __m256d
3483 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3484 _mm256_maskz_mul_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
3485 const int __R)
3487 return (__m256d) __builtin_ia32_mulpd256_mask_round ((__v4df) __A,
3488 (__v4df) __B,
3489 (__v4df)
3490 _mm256_setzero_pd (),
3491 (__mmask8) __U,
3492 __R);
3495 extern __inline __m256h
3496 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3497 _mm256_mul_round_ph (__m256h __A, __m256h __B, const int __R)
3499 return (__m256h) __builtin_ia32_mulph256_mask_round ((__v16hf) __A,
3500 (__v16hf) __B,
3501 (__v16hf)
3502 _mm256_undefined_ph (),
3503 (__mmask16) -1,
3504 __R);
3507 extern __inline __m256h
3508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3509 _mm256_mask_mul_round_ph (__m256h __W, __mmask16 __U, __m256h __A,
3510 __m256h __B, const int __R)
3512 return (__m256h) __builtin_ia32_mulph256_mask_round ((__v16hf) __A,
3513 (__v16hf) __B,
3514 (__v16hf) __W,
3515 (__mmask16) __U,
3516 __R);
3519 extern __inline __m256h
3520 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3521 _mm256_maskz_mul_round_ph (__mmask16 __U, __m256h __A, __m256h __B,
3522 const int __R)
3524 return (__m256h) __builtin_ia32_mulph256_mask_round ((__v16hf) __A,
3525 (__v16hf) __B,
3526 (__v16hf)
3527 _mm256_setzero_ph (),
3528 (__mmask16) __U,
3529 __R);
3532 extern __inline __m256
3533 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3534 _mm256_mul_round_ps (__m256 __A, __m256 __B, const int __R)
3536 return (__m256) __builtin_ia32_mulps256_mask_round ((__v8sf) __A,
3537 (__v8sf) __B,
3538 (__v8sf)
3539 _mm256_undefined_ps (),
3540 (__mmask8) -1,
3541 __R);
3544 extern __inline __m256
3545 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3546 _mm256_mask_mul_round_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B,
3547 const int __R)
3549 return (__m256) __builtin_ia32_mulps256_mask_round ((__v8sf) __A,
3550 (__v8sf) __B,
3551 (__v8sf) __W,
3552 (__mmask8) __U,
3553 __R);
3556 extern __inline __m256
3557 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3558 _mm256_maskz_mul_round_ps (__mmask8 __U, __m256 __A, __m256 __B,
3559 const int __R)
3561 return (__m256) __builtin_ia32_mulps256_mask_round ((__v8sf) __A,
3562 (__v8sf) __B,
3563 (__v8sf)
3564 _mm256_setzero_ps (),
3565 (__mmask8) __U,
3566 __R);
3569 extern __inline __m256d
3570 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3571 _mm256_range_round_pd (__m256d __A, __m256d __B, const int __C,
3572 const int __R)
3574 return (__m256d) __builtin_ia32_rangepd256_mask_round ((__v4df) __A,
3575 (__v4df) __B,
3576 __C,
3577 (__v4df)
3578 _mm256_setzero_pd (),
3579 (__mmask8) -1,
3580 __R);
3583 extern __inline __m256d
3584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3585 _mm256_mask_range_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
3586 __m256d __B, const int __C, const int __R)
3588 return (__m256d) __builtin_ia32_rangepd256_mask_round ((__v4df) __A,
3589 (__v4df) __B,
3590 __C,
3591 (__v4df) __W,
3592 (__mmask8) __U,
3593 __R);
3596 extern __inline __m256d
3597 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3598 _mm256_maskz_range_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
3599 const int __C, const int __R)
3601 return (__m256d) __builtin_ia32_rangepd256_mask_round ((__v4df) __A,
3602 (__v4df) __B,
3603 __C,
3604 (__v4df)
3605 _mm256_setzero_pd (),
3606 (__mmask8) __U,
3607 __R);
3610 extern __inline __m256
3611 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3612 _mm256_range_round_ps (__m256 __A, __m256 __B, const int __C, const int __R)
3614 return (__m256) __builtin_ia32_rangeps256_mask_round ((__v8sf) __A,
3615 (__v8sf) __B,
3616 __C,
3617 (__v8sf)
3618 _mm256_setzero_ps (),
3619 (__mmask8) -1,
3620 __R);
3623 extern __inline __m256
3624 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3625 _mm256_mask_range_round_ps (__m256 __W, __mmask8 __U, __m256 __A,
3626 __m256 __B, const int __C, const int __R)
3628 return (__m256) __builtin_ia32_rangeps256_mask_round ((__v8sf) __A,
3629 (__v8sf) __B,
3630 __C,
3631 (__v8sf) __W,
3632 (__mmask8) __U,
3633 __R);
3636 extern __inline __m256
3637 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3638 _mm256_maskz_range_round_ps (__mmask8 __U, __m256 __A, __m256 __B,
3639 const int __C, const int __R)
3641 return (__m256) __builtin_ia32_rangeps256_mask_round ((__v8sf) __A,
3642 (__v8sf) __B,
3643 __C,
3644 (__v8sf)
3645 _mm256_setzero_ps (),
3646 (__mmask8) __U,
3647 __R);
3650 extern __inline __m256d
3651 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3652 _mm256_reduce_round_pd (__m256d __A, const int __C, const int __R)
3654 return (__m256d) __builtin_ia32_reducepd256_mask_round ((__v4df) __A,
3655 __C,
3656 (__v4df)
3657 _mm256_setzero_pd (),
3658 (__mmask8) -1,
3659 __R);
3662 extern __inline __m256d
3663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3664 _mm256_mask_reduce_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
3665 const int __C, const int __R)
3667 return (__m256d) __builtin_ia32_reducepd256_mask_round ((__v4df) __A,
3668 __C,
3669 (__v4df) __W,
3670 (__mmask8) __U,
3671 __R);
3674 extern __inline __m256d
3675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3676 _mm256_maskz_reduce_round_pd (__mmask8 __U, __m256d __A, const int __C,
3677 const int __R)
3679 return (__m256d) __builtin_ia32_reducepd256_mask_round ((__v4df) __A,
3680 __C,
3681 (__v4df)
3682 _mm256_setzero_pd (),
3683 (__mmask8) __U,
3684 __R);
3687 extern __inline __m256h
3688 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3689 _mm256_reduce_round_ph (__m256h __A, const int __C, const int __R)
3691 return (__m256h) __builtin_ia32_reduceph256_mask_round ((__v16hf) __A,
3692 __C,
3693 (__v16hf)
3694 _mm256_setzero_ph (),
3695 (__mmask16) -1,
3696 __R);
3699 extern __inline __m256h
3700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3701 _mm256_mask_reduce_round_ph (__m256h __W, __mmask16 __U, __m256h __A,
3702 const int __C, const int __R)
3704 return (__m256h) __builtin_ia32_reduceph256_mask_round ((__v16hf) __A,
3705 __C,
3706 (__v16hf) __W,
3707 (__mmask16) __U,
3708 __R);
3711 extern __inline __m256h
3712 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3713 _mm256_maskz_reduce_round_ph (__mmask16 __U, __m256h __A, const int __C,
3714 const int __R)
3716 return (__m256h) __builtin_ia32_reduceph256_mask_round ((__v16hf) __A,
3717 __C,
3718 (__v16hf)
3719 _mm256_setzero_ph (),
3720 (__mmask16) __U,
3721 __R);
3724 extern __inline __m256
3725 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3726 _mm256_reduce_round_ps (__m256 __A, const int __C, const int __R)
3728 return (__m256) __builtin_ia32_reduceps256_mask_round ((__v8sf) __A,
3729 __C,
3730 (__v8sf)
3731 _mm256_setzero_ps (),
3732 (__mmask8) -1,
3733 __R);
3736 extern __inline __m256
3737 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3738 _mm256_mask_reduce_round_ps (__m256 __W, __mmask8 __U, __m256 __A,
3739 const int __C, const int __R)
3741 return (__m256) __builtin_ia32_reduceps256_mask_round ((__v8sf) __A,
3742 __C,
3743 (__v8sf) __W,
3744 (__mmask8) __U,
3745 __R);
3748 extern __inline __m256
3749 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3750 _mm256_maskz_reduce_round_ps (__mmask8 __U, __m256 __A, const int __C,
3751 const int __R)
3753 return (__m256) __builtin_ia32_reduceps256_mask_round ((__v8sf) __A,
3754 __C,
3755 (__v8sf)
3756 _mm256_setzero_ps (),
3757 (__mmask8) __U,
3758 __R);
3761 extern __inline __m256d
3762 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3763 _mm256_roundscale_round_pd (__m256d __A, const int __C, const int __R)
3765 return
3766 (__m256d) __builtin_ia32_rndscalepd256_mask_round ((__v4df) __A,
3767 __C,
3768 (__v4df)
3769 _mm256_undefined_pd (),
3770 (__mmask8) -1,
3771 __R);
3774 extern __inline __m256d
3775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3776 _mm256_mask_roundscale_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
3777 const int __C, const int __R)
3779 return (__m256d) __builtin_ia32_rndscalepd256_mask_round ((__v4df) __A,
3780 __C,
3781 (__v4df) __W,
3782 (__mmask8) __U,
3783 __R);
3786 extern __inline __m256d
3787 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3788 _mm256_maskz_roundscale_round_pd (__mmask8 __U, __m256d __A, const int __C,
3789 const int __R)
3791 return
3792 (__m256d) __builtin_ia32_rndscalepd256_mask_round ((__v4df) __A,
3793 __C,
3794 (__v4df)
3795 _mm256_setzero_pd (),
3796 (__mmask8) __U,
3797 __R);
3800 extern __inline __m256h
3801 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3802 _mm256_roundscale_round_ph (__m256h __A, const int __C, const int __R)
3804 return
3805 (__m256h) __builtin_ia32_rndscaleph256_mask_round ((__v16hf) __A,
3806 __C,
3807 (__v16hf)
3808 _mm256_undefined_ph (),
3809 (__mmask16) -1,
3810 __R);
3813 extern __inline __m256h
3814 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3815 _mm256_mask_roundscale_round_ph (__m256h __W, __mmask16 __U, __m256h __A,
3816 const int __C, const int __R)
3818 return (__m256h) __builtin_ia32_rndscaleph256_mask_round ((__v16hf) __A,
3819 __C,
3820 (__v16hf) __W,
3821 (__mmask16) __U,
3822 __R);
3825 extern __inline __m256h
3826 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3827 _mm256_maskz_roundscale_round_ph (__mmask16 __U, __m256h __A, const int __C,
3828 const int __R)
3830 return
3831 (__m256h) __builtin_ia32_rndscaleph256_mask_round ((__v16hf) __A,
3832 __C,
3833 (__v16hf)
3834 _mm256_setzero_ph (),
3835 (__mmask16) __U,
3836 __R);
3839 extern __inline __m256
3840 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3841 _mm256_roundscale_round_ps (__m256 __A, const int __C, const int __R)
3843 return
3844 (__m256) __builtin_ia32_rndscaleps256_mask_round ((__v8sf) __A,
3845 __C,
3846 (__v8sf)
3847 _mm256_undefined_ps (),
3848 (__mmask8) -1,
3849 __R);
3852 extern __inline __m256
3853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3854 _mm256_mask_roundscale_round_ps (__m256 __W, __mmask8 __U, __m256 __A,
3855 const int __C, const int __R)
3857 return (__m256) __builtin_ia32_rndscaleps256_mask_round ((__v8sf) __A,
3858 __C,
3859 (__v8sf) __W,
3860 (__mmask8) __U,
3861 __R);
3864 extern __inline __m256
3865 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3866 _mm256_maskz_roundscale_round_ps (__mmask8 __U, __m256 __A, const int __C,
3867 const int __R)
3869 return (__m256) __builtin_ia32_rndscaleps256_mask_round ((__v8sf) __A,
3870 __C,
3871 (__v8sf)
3872 _mm256_setzero_ps (),
3873 (__mmask8) __U,
3874 __R);
3877 extern __inline __m256d
3878 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3879 _mm256_scalef_round_pd (__m256d __A, __m256d __B, const int __R)
3881 return
3882 (__m256d) __builtin_ia32_scalefpd256_mask_round ((__v4df) __A,
3883 (__v4df) __B,
3884 (__v4df)
3885 _mm256_undefined_pd (),
3886 (__mmask8) -1,
3887 __R);
3890 extern __inline __m256d
3891 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3892 _mm256_mask_scalef_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
3893 __m256d __B, const int __R)
3895 return (__m256d) __builtin_ia32_scalefpd256_mask_round ((__v4df) __A,
3896 (__v4df) __B,
3897 (__v4df) __W,
3898 (__mmask8) __U,
3899 __R);
3902 extern __inline __m256d
3903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3904 _mm256_maskz_scalef_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
3905 const int __R)
3907 return (__m256d) __builtin_ia32_scalefpd256_mask_round ((__v4df) __A,
3908 (__v4df) __B,
3909 (__v4df)
3910 _mm256_setzero_pd (),
3911 (__mmask8) __U,
3912 __R);
3915 extern __inline __m256h
3916 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3917 _mm256_scalef_round_ph (__m256h __A, __m256h __B, const int __R)
3919 return
3920 (__m256h) __builtin_ia32_scalefph256_mask_round ((__v16hf) __A,
3921 (__v16hf) __B,
3922 (__v16hf)
3923 _mm256_undefined_ph (),
3924 (__mmask16) -1,
3925 __R);
3928 extern __inline __m256h
3929 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3930 _mm256_mask_scalef_round_ph (__m256h __W, __mmask16 __U, __m256h __A,
3931 __m256h __B, const int __R)
3933 return (__m256h) __builtin_ia32_scalefph256_mask_round ((__v16hf) __A,
3934 (__v16hf) __B,
3935 (__v16hf) __W,
3936 (__mmask16) __U,
3937 __R);
3940 extern __inline __m256h
3941 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3942 _mm256_maskz_scalef_round_ph (__mmask16 __U, __m256h __A, __m256h __B,
3943 const int __R)
3945 return (__m256h) __builtin_ia32_scalefph256_mask_round ((__v16hf) __A,
3946 (__v16hf) __B,
3947 (__v16hf)
3948 _mm256_setzero_ph (),
3949 (__mmask16) __U,
3950 __R);
3953 extern __inline __m256
3954 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3955 _mm256_scalef_round_ps (__m256 __A, __m256 __B, const int __R)
3957 return (__m256) __builtin_ia32_scalefps256_mask_round ((__v8sf) __A,
3958 (__v8sf) __B,
3959 (__v8sf)
3960 _mm256_undefined_ps (),
3961 (__mmask8) -1,
3962 __R);
3965 extern __inline __m256
3966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3967 _mm256_mask_scalef_round_ps (__m256 __W, __mmask8 __U, __m256 __A,
3968 __m256 __B, const int __R)
3970 return (__m256) __builtin_ia32_scalefps256_mask_round ((__v8sf) __A,
3971 (__v8sf) __B,
3972 (__v8sf) __W,
3973 (__mmask8) __U,
3974 __R);
3977 extern __inline __m256
3978 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3979 _mm256_maskz_scalef_round_ps (__mmask8 __U, __m256 __A, __m256 __B,
3980 const int __R)
3982 return (__m256) __builtin_ia32_scalefps256_mask_round ((__v8sf) __A,
3983 (__v8sf) __B,
3984 (__v8sf)
3985 _mm256_setzero_ps (),
3986 (__mmask8) __U,
3987 __R);
3990 extern __inline __m256d
3991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3992 _mm256_sqrt_round_pd (__m256d __A, const int __R)
3994 return (__m256d) __builtin_ia32_sqrtpd256_mask_round ((__v4df) __A,
3995 (__v4df)
3996 _mm256_undefined_pd (),
3997 (__mmask8) -1,
3998 __R);
4001 extern __inline __m256d
4002 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4003 _mm256_mask_sqrt_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
4004 const int __R)
4006 return (__m256d) __builtin_ia32_sqrtpd256_mask_round ((__v4df) __A,
4007 (__v4df) __W,
4008 (__mmask8) __U,
4009 __R);
4012 extern __inline __m256d
4013 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4014 _mm256_maskz_sqrt_round_pd (__mmask8 __U, __m256d __A, const int __R)
4016 return (__m256d) __builtin_ia32_sqrtpd256_mask_round ((__v4df) __A,
4017 (__v4df)
4018 _mm256_setzero_pd (),
4019 (__mmask8) __U,
4020 __R);
4023 extern __inline __m256h
4024 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4025 _mm256_sqrt_round_ph (__m256h __A, const int __R)
4027 return (__m256h) __builtin_ia32_sqrtph256_mask_round ((__v16hf) __A,
4028 (__v16hf)
4029 _mm256_undefined_ph (),
4030 (__mmask16) -1,
4031 __R);
4034 extern __inline __m256h
4035 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4036 _mm256_mask_sqrt_round_ph (__m256h __W, __mmask16 __U, __m256h __A,
4037 const int __R)
4039 return (__m256h) __builtin_ia32_sqrtph256_mask_round ((__v16hf) __A,
4040 (__v16hf) __W,
4041 (__mmask16) __U,
4042 __R);
4045 extern __inline __m256h
4046 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4047 _mm256_maskz_sqrt_round_ph (__mmask16 __U, __m256h __A, const int __R)
4049 return (__m256h) __builtin_ia32_sqrtph256_mask_round ((__v16hf) __A,
4050 (__v16hf)
4051 _mm256_setzero_ph (),
4052 (__mmask16) __U,
4053 __R);
4056 extern __inline __m256
4057 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4058 _mm256_sqrt_round_ps (__m256 __A, const int __R)
4060 return (__m256) __builtin_ia32_sqrtps256_mask_round ((__v8sf) __A,
4061 (__v8sf)
4062 _mm256_undefined_ps (),
4063 (__mmask8) -1,
4064 __R);
4067 extern __inline __m256
4068 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4069 _mm256_mask_sqrt_round_ps (__m256 __W, __mmask8 __U, __m256 __A,
4070 const int __R)
4072 return (__m256) __builtin_ia32_sqrtps256_mask_round ((__v8sf) __A,
4073 (__v8sf) __W,
4074 (__mmask8) __U,
4075 __R);
4078 extern __inline __m256
4079 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4080 _mm256_maskz_sqrt_round_ps (__mmask8 __U, __m256 __A, const int __R)
4082 return (__m256) __builtin_ia32_sqrtps256_mask_round ((__v8sf) __A,
4083 (__v8sf)
4084 _mm256_setzero_ps (),
4085 (__mmask8) __U,
4086 __R);
4089 extern __inline __m256d
4090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4091 _mm256_sub_round_pd (__m256d __A, __m256d __B, const int __R)
4093 return (__m256d) __builtin_ia32_subpd256_mask_round ((__v4df) __A,
4094 (__v4df) __B,
4095 (__v4df)
4096 _mm256_undefined_pd (),
4097 (__mmask8) -1,
4098 __R);
4101 extern __inline __m256d
4102 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4103 _mm256_mask_sub_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
4104 __m256d __B, const int __R)
4106 return (__m256d) __builtin_ia32_subpd256_mask_round ((__v4df) __A,
4107 (__v4df) __B,
4108 (__v4df) __W,
4109 (__mmask8) __U,
4110 __R);
4113 extern __inline __m256d
4114 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4115 _mm256_maskz_sub_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
4116 const int __R)
4118 return (__m256d) __builtin_ia32_subpd256_mask_round ((__v4df) __A,
4119 (__v4df) __B,
4120 (__v4df)
4121 _mm256_setzero_pd (),
4122 (__mmask8) __U,
4123 __R);
4126 extern __inline __m256h
4127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4128 _mm256_sub_round_ph (__m256h __A, __m256h __B, const int __R)
4130 return (__m256h) __builtin_ia32_subph256_mask_round ((__v16hf) __A,
4131 (__v16hf) __B,
4132 (__v16hf)
4133 _mm256_undefined_ph (),
4134 (__mmask16) -1,
4135 __R);
4138 extern __inline __m256h
4139 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4140 _mm256_mask_sub_round_ph (__m256h __W, __mmask16 __U, __m256h __A,
4141 __m256h __B, const int __R)
4143 return (__m256h) __builtin_ia32_subph256_mask_round ((__v16hf) __A,
4144 (__v16hf) __B,
4145 (__v16hf) __W,
4146 (__mmask16) __U,
4147 __R);
4150 extern __inline __m256h
4151 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4152 _mm256_maskz_sub_round_ph (__mmask16 __U, __m256h __A, __m256h __B,
4153 const int __R)
4155 return (__m256h) __builtin_ia32_subph256_mask_round ((__v16hf) __A,
4156 (__v16hf) __B,
4157 (__v16hf)
4158 _mm256_setzero_ph (),
4159 (__mmask16) __U,
4160 __R);
4163 extern __inline __m256
4164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4165 _mm256_sub_round_ps (__m256 __A, __m256 __B, const int __R)
4167 return (__m256) __builtin_ia32_subps256_mask_round ((__v8sf) __A,
4168 (__v8sf) __B,
4169 (__v8sf)
4170 _mm256_undefined_ps (),
4171 (__mmask8) -1,
4172 __R);
4175 extern __inline __m256
4176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4177 _mm256_mask_sub_round_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B,
4178 const int __R)
4180 return (__m256) __builtin_ia32_subps256_mask_round ((__v8sf) __A,
4181 (__v8sf) __B,
4182 (__v8sf) __W,
4183 (__mmask8) __U,
4184 __R);
4187 extern __inline __m256
4188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
4189 _mm256_maskz_sub_round_ps (__mmask8 __U, __m256 __A, __m256 __B,
4190 const int __R)
4192 return (__m256) __builtin_ia32_subps256_mask_round ((__v8sf) __A,
4193 (__v8sf) __B,
4194 (__v8sf)
4195 _mm256_setzero_ps (),
4196 (__mmask8) __U,
4197 __R);
4199 #else
4200 #define _mm256_add_round_pd(A, B, R) \
4201 ((__m256d) __builtin_ia32_addpd256_mask_round ((__v4df) (A), \
4202 (__v4df) (B), \
4203 (__v4df) \
4204 (_mm256_undefined_pd ()), \
4205 (__mmask8) (-1), \
4206 (R)))
4208 #define _mm256_mask_add_round_pd(W, U, A, B, R) \
4209 ((__m256d) __builtin_ia32_addpd256_mask_round ((__v4df) (A), \
4210 (__v4df) (B), \
4211 (__v4df) (W), \
4212 (__mmask8) (U), \
4213 (R)))
4215 #define _mm256_maskz_add_round_pd(U, A, B, R) \
4216 ((__m256d) __builtin_ia32_addpd256_mask_round ((__v4df) (A), \
4217 (__v4df) (B), \
4218 (__v4df) \
4219 (_mm256_setzero_pd ()), \
4220 (__mmask8) (U), \
4221 (R)))
4223 #define _mm256_add_round_ph(A, B, R) \
4224 ((__m256h) __builtin_ia32_addph256_mask_round ((__v16hf) (A), \
4225 (__v16hf) (B), \
4226 (__v16hf) \
4227 (_mm256_undefined_ph ()), \
4228 (__mmask16) (-1), \
4229 (R)))
4231 #define _mm256_mask_add_round_ph(W, U, A, B, R) \
4232 ((__m256h) __builtin_ia32_addph256_mask_round ((__v16hf) (A), \
4233 (__v16hf) (B), \
4234 (__v16hf) (W), \
4235 (__mmask16) (U), \
4236 (R)))
4238 #define _mm256_maskz_add_round_ph(U, A, B, R) \
4239 ((__m256h) __builtin_ia32_addph256_mask_round ((__v16hf) (A), \
4240 (__v16hf) (B), \
4241 (__v16hf) \
4242 (_mm256_setzero_ph ()), \
4243 (__mmask16) (U), \
4244 (R)))
4246 #define _mm256_add_round_ps(A, B, R) \
4247 ((__m256) __builtin_ia32_addps256_mask_round ((__v8sf) (A), \
4248 (__v8sf) (B), \
4249 (__v8sf) \
4250 (_mm256_undefined_ps ()), \
4251 (__mmask8) (-1), \
4252 (R)))
4254 #define _mm256_mask_add_round_ps(W, U, A, B, R) \
4255 ((__m256) __builtin_ia32_addps256_mask_round ((__v8sf) (A), \
4256 (__v8sf) (B), \
4257 (__v8sf) (W), \
4258 (__mmask8) (U), \
4259 (R)))
4261 #define _mm256_maskz_add_round_ps(U, A, B, R)\
4262 ((__m256) __builtin_ia32_addps256_mask_round ((__v8sf) (A), \
4263 (__v8sf) (B), \
4264 (__v8sf) \
4265 (_mm256_setzero_ps ()), \
4266 (__mmask8) (U), \
4267 (R)))
4269 #define _mm256_cmp_round_pd_mask(A, B, C, R) \
4270 ((__mmask8) __builtin_ia32_cmppd256_mask_round ((__v4df) (A), \
4271 (__v4df) (B), \
4272 (C), \
4273 (__mmask8) (-1), \
4274 (R)))
4276 #define _mm256_mask_cmp_round_pd_mask(U, A, B, C, R) \
4277 ((__mmask8) __builtin_ia32_cmppd256_mask_round ((__v4df) (A), \
4278 (__v4df) (B), \
4279 (C), \
4280 (__mmask8) (U), \
4281 (R)))
4283 #define _mm256_cmp_round_ph_mask(A, B, C, R) \
4284 ((__mmask16) __builtin_ia32_cmpph256_mask_round ((__v16hf) (A), \
4285 (__v16hf) (B), \
4286 (C), \
4287 (__mmask16) (-1), \
4288 (R)))
4290 #define _mm256_mask_cmp_round_ph_mask(U, A, B, C, R) \
4291 ((__mmask16) __builtin_ia32_cmpph256_mask_round ((__v16hf) (A), \
4292 (__v16hf) (B), \
4293 (C), \
4294 (__mmask16) (U), \
4295 (R)))
4297 #define _mm256_cmp_round_ps_mask(A, B, C, R) \
4298 ((__mmask8) __builtin_ia32_cmpps256_mask_round ((__v8sf) (A), \
4299 (__v8sf) (B), \
4300 (C), \
4301 (__mmask8) (-1), \
4302 (R)))
4304 #define _mm256_mask_cmp_round_ps_mask(U, A, B, C, R) \
4305 ((__mmask8) __builtin_ia32_cmpps256_mask_round ((__v8sf) (A), \
4306 (__v8sf) (B), \
4307 (C), \
4308 (__mmask8) (U), \
4309 (R)))
4311 #define _mm256_cvt_roundepi32_ph(A, R) \
4312 ((__m128h) __builtin_ia32_vcvtdq2ph256_mask_round ((__v8si) (A), \
4313 (__v8hf) \
4314 (_mm_setzero_ph ()), \
4315 (__mmask8) (-1), \
4316 (R)))
4318 #define _mm256_mask_cvt_roundepi32_ph(W, U, A, R) \
4319 ((__m128h) __builtin_ia32_vcvtdq2ph256_mask_round ((__v8si) (A), \
4320 (__v8hf) (W), \
4321 (__mmask8) (U), \
4322 (R)))
4324 #define _mm256_maskz_cvt_roundepi32_ph(U, A, R) \
4325 ((__m128h) __builtin_ia32_vcvtdq2ph256_mask_round ((__v8si) (A), \
4326 (__v8hf) \
4327 (_mm_setzero_ph ()), \
4328 (__mmask8) (U), \
4329 (R)))
4331 #define _mm256_cvt_roundepi32_ps(A, R) \
4332 ((__m256) __builtin_ia32_cvtdq2ps256_mask_round ((__v8si) (A), \
4333 (__v8sf) \
4334 (_mm256_undefined_ps ()), \
4335 (__mmask8) (-1), \
4336 (R)))
4338 #define _mm256_mask_cvt_roundepi32_ps(W, U, A, R) \
4339 ((__m256) __builtin_ia32_cvtdq2ps256_mask_round ((__v8si) (A), \
4340 (__v8sf) (W), \
4341 (__mmask8) (U), \
4342 (R)))
4344 #define _mm256_maskz_cvt_roundepi32_ps(U, A, R) \
4345 ((__m256) __builtin_ia32_cvtdq2ps256_mask_round ((__v8si) (A), \
4346 (__v8sf) \
4347 (_mm256_setzero_ps ()), \
4348 (__mmask8) (U), \
4349 (R)))
4351 #define _mm256_cvt_roundpd_ph(A, R) \
4352 ((__m128h) __builtin_ia32_vcvtpd2ph256_mask_round ((__v4df) (A), \
4353 (_mm_setzero_ph ()), \
4354 (__mmask8) (-1), \
4355 (R)))
4357 #define _mm256_mask_cvt_roundpd_ph(W, U, A, R) \
4358 ((__m128h) __builtin_ia32_vcvtpd2ph256_mask_round ((__v4df) (A), \
4359 (__v8hf) (W), \
4360 (__mmask8) (U), \
4361 (R)))
4363 #define _mm256_maskz_cvt_roundpd_ph(U, A, R) \
4364 ((__m128h) __builtin_ia32_vcvtpd2ph256_mask_round ((__v4df) (A), \
4365 (_mm_setzero_ph ()), \
4366 (__mmask8) (U), \
4367 (R)))
4369 #define _mm256_cvt_roundpd_ps(A, R) \
4370 ((__m128) __builtin_ia32_cvtpd2ps256_mask_round ((__v4df) (A), \
4371 (__v4sf) \
4372 (_mm_undefined_ps ()), \
4373 (__mmask8) (-1), \
4374 (R)))
4376 #define _mm256_mask_cvt_roundpd_ps(W, U, A, R) \
4377 ((__m128) __builtin_ia32_cvtpd2ps256_mask_round ((__v4df) (A), \
4378 (__v4sf) (W), \
4379 (__mmask8) (U), \
4380 (R)))
4382 #define _mm256_maskz_cvt_roundpd_ps(U, A, R) \
4383 ((__m128) __builtin_ia32_cvtpd2ps256_mask_round ((__v4df) (A), \
4384 (__v4sf) \
4385 (_mm_setzero_ps ()), \
4386 (__mmask8) (U), \
4387 (R)))
4389 #define _mm256_cvt_roundpd_epi32(A, R) \
4390 ((__m128i) __builtin_ia32_cvtpd2dq256_mask_round ((__v4df) (A), \
4391 (__v4si) \
4392 (_mm_undefined_si128 ()), \
4393 (__mmask8) (-1), \
4394 (R)))
4396 #define _mm256_mask_cvt_roundpd_epi32(W, U, A, R) \
4397 ((__m128i) __builtin_ia32_cvtpd2dq256_mask_round ((__v4df) (A), \
4398 (__v4si) (W), \
4399 (__mmask8) (U), \
4400 (R)))
4402 #define _mm256_maskz_cvt_roundpd_epi32(U, A, R)\
4403 ((__m128i) __builtin_ia32_cvtpd2dq256_mask_round ((__v4df) (A), \
4404 (__v4si) \
4405 (_mm_setzero_si128 ()), \
4406 (__mmask8) (U), \
4407 (R)))
4409 #define _mm256_cvt_roundpd_epi64(A, R) \
4410 ((__m256i) __builtin_ia32_cvtpd2qq256_mask_round ((__v4df) (A), \
4411 (__v4di) \
4412 (_mm256_setzero_si256 ()), \
4413 (__mmask8) (-1), \
4414 (R)))
4416 #define _mm256_mask_cvt_roundpd_epi64(W, U, A, R) \
4417 ((__m256i) __builtin_ia32_cvtpd2qq256_mask_round ((__v4df) (A), \
4418 (__v4di) (W), \
4419 (__mmask8) (U), \
4420 (R)))
4422 #define _mm256_maskz_cvt_roundpd_epi64(U, A, R) \
4423 ((__m256i) __builtin_ia32_cvtpd2qq256_mask_round ((__v4df) (A), \
4424 (__v4di) \
4425 (_mm256_setzero_si256 ()), \
4426 (__mmask8) (U), \
4427 (R)))
4429 #define _mm256_cvt_roundpd_epu32(A, R) \
4430 ((__m128i) __builtin_ia32_cvtpd2udq256_mask_round ((__v4df) (A), \
4431 (__v4si) \
4432 (_mm_undefined_si128 ()), \
4433 (__mmask8) (-1), \
4434 (R)))
4436 #define _mm256_mask_cvt_roundpd_epu32(W, U, A, R) \
4437 ((__m128i) __builtin_ia32_cvtpd2udq256_mask_round ((__v4df) (A), \
4438 (__v4si) (W), \
4439 (__mmask8) (U), \
4440 (R)))
4442 #define _mm256_maskz_cvt_roundpd_epu32(U, A, R) \
4443 ((__m128i) __builtin_ia32_cvtpd2udq256_mask_round ((__v4df) (A), \
4444 (__v4si) \
4445 (_mm_setzero_si128 ()), \
4446 (__mmask8) (U), \
4447 (R)))
4449 #define _mm256_cvt_roundpd_epu64(A, R) \
4450 ((__m256i) __builtin_ia32_cvtpd2uqq256_mask_round ((__v4df) (A), \
4451 (__v4di) \
4452 (_mm256_setzero_si256 ()),\
4453 (__mmask8) (-1), \
4454 (R)))
4456 #define _mm256_mask_cvt_roundpd_epu64(W, U, A, R) \
4457 ((__m256i) __builtin_ia32_cvtpd2uqq256_mask_round ((__v4df) (A), \
4458 (__v4di) (W), \
4459 (__mmask8) (U), \
4460 (R)))
4462 #define _mm256_maskz_cvt_roundpd_epu64(U, A, R) \
4463 ((__m256i) __builtin_ia32_cvtpd2uqq256_mask_round ((__v4df) (A), \
4464 (__v4di) \
4465 (_mm256_setzero_si256 ()),\
4466 (__mmask8) (U), \
4467 (R)))
4469 #define _mm256_cvt_roundph_epi32(A, R) \
4470 ((__m256i) __builtin_ia32_vcvtph2dq256_mask_round ((__v8hf) (A), \
4471 (__v8si) \
4472 (_mm256_setzero_si256 ()),\
4473 (__mmask8) (-1), \
4474 (R)))
4476 #define _mm256_mask_cvt_roundph_epi32(W, U, A, R) \
4477 ((__m256i) __builtin_ia32_vcvtph2dq256_mask_round ((__v8hf) (A), \
4478 (__v8si) (W), \
4479 (__mmask8) (U), \
4480 (R)))
4482 #define _mm256_maskz_cvt_roundph_epi32(U, A, R) \
4483 ((__m256i) __builtin_ia32_vcvtph2dq256_mask_round ((__v8hf) (A), \
4484 (__v8si) \
4485 (_mm256_setzero_si256 ()),\
4486 (__mmask8) (U), \
4487 (R)))
4489 #define _mm256_cvt_roundph_pd(A, R) \
4490 ((__m256d) __builtin_ia32_vcvtph2pd256_mask_round ((__v8hf) (A), \
4491 (__v4df) \
4492 (_mm256_setzero_pd ()), \
4493 (__mmask8) (-1), \
4494 (R)))
4496 #define _mm256_mask_cvt_roundph_pd(W, U, A, R) \
4497 ((__m256d) __builtin_ia32_vcvtph2pd256_mask_round ((__v8hf) (A), \
4498 (__v4df) (W), \
4499 (__mmask8) (U), \
4500 (R)))
4502 #define _mm256_maskz_cvt_roundph_pd(U, A, R) \
4503 ((__m256d) __builtin_ia32_vcvtph2pd256_mask_round ((__v8hf) (A), \
4504 (__v4df) \
4505 (_mm256_setzero_pd ()), \
4506 (__mmask8) (U), \
4507 (R)))
4509 #define _mm256_cvt_roundph_ps(A, R) \
4510 ((__m256) __builtin_ia32_vcvtph2ps256_mask_round ((__v8hf) (A), \
4511 (__v8sf) \
4512 (_mm256_undefined_ps ()), \
4513 (__mmask8) (-1), \
4514 (R)))
4516 #define _mm256_mask_cvt_roundph_ps(W, U, A, R) \
4517 ((__m256) __builtin_ia32_vcvtph2ps256_mask_round ((__v8hf) (A), \
4518 (__v8sf) (W), \
4519 (__mmask8) (U), \
4520 (R)))
4522 #define _mm256_maskz_cvt_roundph_ps(U, A, R) \
4523 ((__m256) __builtin_ia32_vcvtph2ps256_mask_round ((__v8hf) (A), \
4524 (__v8sf) \
4525 (_mm256_setzero_ps ()), \
4526 (__mmask8) (U), \
4527 (R)))
4529 #define _mm256_cvtx_roundph_ps(A, R) \
4530 ((__m256) __builtin_ia32_vcvtph2psx256_mask_round ((__v8hf) (A), \
4531 (__v8sf) \
4532 (_mm256_setzero_ps ()), \
4533 (__mmask8) (-1), \
4534 (R)))
4536 #define _mm256_mask_cvtx_roundph_ps(W, U, A, R) \
4537 ((__m256) __builtin_ia32_vcvtph2psx256_mask_round ((__v8hf) (A), \
4538 (__v8sf) (W), \
4539 (__mmask8) (U), \
4540 (R)))
4542 #define _mm256_maskz_cvtx_roundph_ps(U, A, R) \
4543 ((__m256) __builtin_ia32_vcvtph2psx256_mask_round ((__v8hf) (A), \
4544 (__v8sf) \
4545 (_mm256_setzero_ps ()), \
4546 (__mmask8) (U), \
4547 (R)))
4549 #define _mm256_cvt_roundph_epi64(A, R) \
4550 ((__m256i) __builtin_ia32_vcvtph2qq256_mask_round ((__v8hf) (A), \
4551 (__v4di) \
4552 (_mm256_setzero_si256 ()),\
4553 (__mmask8) (-1), \
4554 (R)))
4556 #define _mm256_mask_cvt_roundph_epi64(W, U, A, R) \
4557 ((__m256i) __builtin_ia32_vcvtph2qq256_mask_round ((__v8hf) (A), \
4558 (__v4di) (W), \
4559 (__mmask8) (U), \
4560 (R)))
4562 #define _mm256_maskz_cvt_roundph_epi64(U, A, R) \
4563 ((__m256i) __builtin_ia32_vcvtph2qq256_mask_round ((__v8hf) (A), \
4564 (__v4di) \
4565 (_mm256_setzero_si256 ()),\
4566 (__mmask8) (U), \
4567 (R)))
4569 #define _mm256_cvt_roundph_epu32(A, R) \
4570 ((__m256i) \
4571 __builtin_ia32_vcvtph2udq256_mask_round ((__v8hf) (A), \
4572 (__v8si) \
4573 (_mm256_setzero_si256 ()), \
4574 (__mmask8) (-1), \
4575 (R)))
4577 #define _mm256_mask_cvt_roundph_epu32(W, U, A, R) \
4578 ((__m256i) __builtin_ia32_vcvtph2udq256_mask_round ((__v8hf) (A), \
4579 (__v8si) (W), \
4580 (__mmask8) (U), \
4581 (R)))
4583 #define _mm256_maskz_cvt_roundph_epu32(U, A, R) \
4584 ((__m256i) \
4585 __builtin_ia32_vcvtph2udq256_mask_round ((__v8hf) (A), \
4586 (__v8si) \
4587 (_mm256_setzero_si256 ()), \
4588 (__mmask8) (U), \
4589 (R)))
4591 #define _mm256_cvt_roundph_epu64(A, R) \
4592 ((__m256i) \
4593 __builtin_ia32_vcvtph2uqq256_mask_round ((__v8hf) (A), \
4594 (__v4di) \
4595 (_mm256_setzero_si256 ()), \
4596 (__mmask8) (-1), \
4597 (R)))
4599 #define _mm256_mask_cvt_roundph_epu64(W, U, A, R) \
4600 ((__m256i) __builtin_ia32_vcvtph2uqq256_mask_round ((__v8hf) (A), \
4601 (__v4di) (W), \
4602 (__mmask8) (U), \
4603 (R)))
4605 #define _mm256_maskz_cvt_roundph_epu64(U, A, R) \
4606 ((__m256i) \
4607 __builtin_ia32_vcvtph2uqq256_mask_round ((__v8hf) (A), \
4608 (__v4di) \
4609 (_mm256_setzero_si256 ()), \
4610 (__mmask8) (U), \
4611 (R)))
4613 #define _mm256_cvt_roundph_epu16(A, R) \
4614 ((__m256i) \
4615 __builtin_ia32_vcvtph2uw256_mask_round ((__v16hf) (A), \
4616 (__v16hi) \
4617 (_mm256_undefined_si256 ()), \
4618 (__mmask16) (-1), \
4619 (R)))
4621 #define _mm256_mask_cvt_roundph_epu16(W, U, A, R) \
4622 ((__m256i) __builtin_ia32_vcvtph2uw256_mask_round ((__v16hf) (A), \
4623 (__v16hi) (W), \
4624 (__mmask16) (U), \
4625 (R)))
4627 #define _mm256_maskz_cvt_roundph_epu16(U, A, R) \
4628 ((__m256i) \
4629 __builtin_ia32_vcvtph2uw256_mask_round ((__v16hf) (A), \
4630 (__v16hi) \
4631 (_mm256_setzero_si256 ()), \
4632 (__mmask16) (U), \
4633 (R)))
4635 #define _mm256_cvt_roundph_epi16(A, R) \
4636 ((__m256i) \
4637 __builtin_ia32_vcvtph2w256_mask_round ((__v16hf) (A), \
4638 (__v16hi) \
4639 (_mm256_undefined_si256 ()), \
4640 (__mmask16) (-1), \
4641 (R)))
4643 #define _mm256_mask_cvt_roundph_epi16(W, U, A, R) \
4644 ((__m256i) __builtin_ia32_vcvtph2w256_mask_round ((__v16hf) (A), \
4645 (__v16hi) (W), \
4646 (__mmask16) (U), \
4647 (R)))
4649 #define _mm256_maskz_cvt_roundph_epi16(U, A, R) \
4650 ((__m256i) __builtin_ia32_vcvtph2w256_mask_round ((__v16hf) (A), \
4651 (__v16hi) \
4652 (_mm256_setzero_si256 ()), \
4653 (__mmask16) (U), \
4654 (R)))
4656 #define _mm256_cvt_roundps_pd(A, R) \
4657 ((__m256d) __builtin_ia32_vcvtps2pd256_mask_round ((__v4sf) (A), \
4658 (__v4df) \
4659 (_mm256_undefined_pd ()), \
4660 (__mmask8) (-1), \
4661 (R)))
4663 #define _mm256_mask_cvt_roundps_pd(W, U, A, R) \
4664 ((__m256d) __builtin_ia32_vcvtps2pd256_mask_round ((__v4sf) (A), \
4665 (__v4df) (W), \
4666 (__mmask8) (U), \
4667 (R)))
4669 #define _mm256_maskz_cvt_roundps_pd(U, A, R) \
4670 ((__m256d) __builtin_ia32_vcvtps2pd256_mask_round ((__v4sf) (A), \
4671 (__v4df) \
4672 (_mm256_setzero_pd ()), \
4673 (__mmask8) (U), \
4674 (R)))
4676 #define _mm256_cvtx_roundps_ph(A, R) \
4677 ((__m128h) __builtin_ia32_vcvtps2phx256_mask_round ((__v8sf) (A), \
4678 (__v8hf) \
4679 (_mm_setzero_ph ()), \
4680 (__mmask8) (-1), \
4681 (R)))
4683 #define _mm256_mask_cvtx_roundps_ph(W, U, A, R) \
4684 ((__m128h) __builtin_ia32_vcvtps2phx256_mask_round ((__v8sf) (A), \
4685 (__v8hf) (W), \
4686 (__mmask8) (U), \
4687 (R)))
4689 #define _mm256_maskz_cvtx_roundps_ph(U, A, R) \
4690 ((__m128h) __builtin_ia32_vcvtps2phx256_mask_round ((__v8sf) (A), \
4691 (__v8hf) \
4692 (_mm_setzero_ph ()), \
4693 (__mmask8) (U), \
4694 (R)))
4696 #define _mm256_cvt_roundps_epi32(A, R) \
4697 ((__m256i) \
4698 __builtin_ia32_vcvtps2dq256_mask_round ((__v8sf) (A), \
4699 (__v8si) \
4700 (_mm256_undefined_si256 ()), \
4701 (__mmask8) (-1), \
4702 (R)))
4704 #define _mm256_mask_cvt_roundps_epi32(W, U, A, R) \
4705 ((__m256i) __builtin_ia32_vcvtps2dq256_mask_round ((__v8sf) (A), \
4706 (__v8si) (W), \
4707 (__mmask8) (U), \
4708 (R)))
4710 #define _mm256_maskz_cvt_roundps_epi32(U, A, R) \
4711 ((__m256i) \
4712 __builtin_ia32_vcvtps2dq256_mask_round ((__v8sf) (A), \
4713 (__v8si) \
4714 (_mm256_setzero_si256 ()), \
4715 (__mmask8) (U), \
4716 (R)))
4718 #define _mm256_cvt_roundps_epi64(A, R) \
4719 ((__m256i) __builtin_ia32_cvtps2qq256_mask_round ((__v4sf) (A), \
4720 (__v4di) \
4721 (_mm256_setzero_si256 ()), \
4722 (__mmask8) (-1), \
4723 (R)))
4725 #define _mm256_mask_cvt_roundps_epi64(W, U, A, R) \
4726 ((__m256i) __builtin_ia32_cvtps2qq256_mask_round ((__v4sf) (A), \
4727 (__v4di) (W), \
4728 (__mmask8) (U), \
4729 (R)))
4731 #define _mm256_maskz_cvt_roundps_epi64(U, A, R) \
4732 ((__m256i) __builtin_ia32_cvtps2qq256_mask_round ((__v4sf) (A), \
4733 (__v4di) \
4734 (_mm256_setzero_si256 ()), \
4735 (__mmask8) (U), \
4736 (R)))
4738 #define _mm256_cvt_roundps_epu32(A, R) \
4739 ((__m256i) \
4740 __builtin_ia32_cvtps2udq256_mask_round ((__v8sf) (A), \
4741 (__v8si) \
4742 (_mm256_undefined_si256 ()), \
4743 (__mmask8) (-1), \
4744 (R)))
4746 #define _mm256_mask_cvt_roundps_epu32(W, U, A, R) \
4747 ((__m256i) __builtin_ia32_cvtps2udq256_mask_round ((__v8sf) (A), \
4748 (__v8si) (W), \
4749 (__mmask8) (U), \
4750 (R)))
4752 #define _mm256_maskz_cvt_roundps_epu32(U, A, R) \
4753 ((__m256i) \
4754 __builtin_ia32_cvtps2udq256_mask_round ((__v8sf) (A), \
4755 (__v8si) \
4756 (_mm256_setzero_si256 ()), \
4757 (__mmask8) (U), \
4758 (R)))
4760 #define _mm256_cvt_roundps_epu64(B, R) \
4761 ((__m256i) \
4762 __builtin_ia32_cvtps2uqq256_mask_round ((__v4sf) (B), \
4763 (__v4di) \
4764 (_mm256_setzero_si256 ()), \
4765 (__mmask8) (-1), \
4766 (R)))
4768 #define _mm256_mask_cvt_roundps_epu64(W, U, A, R) \
4769 ((__m256i) __builtin_ia32_cvtps2uqq256_mask_round ((__v4sf) (A), \
4770 (__v4di) (W), \
4771 (__mmask8) (U), \
4772 (R)))
4774 #define _mm256_maskz_cvt_roundps_epu64(U, A, R) \
4775 ((__m256i) \
4776 __builtin_ia32_cvtps2uqq256_mask_round ((__v4sf) (A), \
4777 (__v4di) \
4778 (_mm256_setzero_si256 ()), \
4779 (__mmask8) (U), \
4780 (R)))
4782 #define _mm256_cvt_roundepi64_pd(A, R) \
4783 ((__m256d) __builtin_ia32_cvtqq2pd256_mask_round ((__v4di) (A), \
4784 (__v4df) \
4785 (_mm256_setzero_pd ()), \
4786 (__mmask8) (-1), \
4787 (R)))
4789 #define _mm256_mask_cvt_roundepi64_pd(W, U, A, R) \
4790 ((__m256d) __builtin_ia32_cvtqq2pd256_mask_round ((__v4di) (A), \
4791 (__v4df) (W), \
4792 (__mmask8) (U), \
4793 (R)))
4795 #define _mm256_maskz_cvt_roundepi64_pd(U, A, R) \
4796 ((__m256d) __builtin_ia32_cvtqq2pd256_mask_round ((__v4di) (A), \
4797 (__v4df) \
4798 (_mm256_setzero_pd ()), \
4799 (__mmask8) (U), \
4800 (R)))
4802 #define _mm256_cvt_roundepi64_ph(A, R) \
4803 ((__m128h) __builtin_ia32_vcvtqq2ph256_mask_round ((__v4di) (A), \
4804 (__v8hf) \
4805 (_mm_setzero_ph ()), \
4806 (__mmask8) (-1), \
4807 (R)))
4809 #define _mm256_mask_cvt_roundepi64_ph(W, U, A, R) \
4810 ((__m128h) __builtin_ia32_vcvtqq2ph256_mask_round ((__v4di) (A), \
4811 (__v8hf) (W), \
4812 (__mmask8) (U), \
4813 (R)))
4815 #define _mm256_maskz_cvt_roundepi64_ph(U, A, R) \
4816 ((__m128h) __builtin_ia32_vcvtqq2ph256_mask_round ((__v4di) (A), \
4817 (__v8hf) \
4818 (_mm_setzero_ph ()), \
4819 (__mmask8) (U), \
4820 (R)))
4822 #define _mm256_cvt_roundepi64_ps(A, R) \
4823 ((__m128) __builtin_ia32_cvtqq2ps256_mask_round ((__v4di) (A), \
4824 (__v4sf) \
4825 (_mm_setzero_ps ()), \
4826 (__mmask8) (-1), \
4827 (R)))
4829 #define _mm256_mask_cvt_roundepi64_ps(W, U, A, R) \
4830 ((__m128) __builtin_ia32_cvtqq2ps256_mask_round ((__v4di) (A), \
4831 (__v4sf) (W), \
4832 (__mmask8) (U), \
4833 (R)))
4835 #define _mm256_maskz_cvt_roundepi64_ps(U, A, R) \
4836 ((__m128) __builtin_ia32_cvtqq2ps256_mask_round ((__v4di) (A), \
4837 (__v4sf) \
4838 (_mm_setzero_ps ()), \
4839 (__mmask8) (U), \
4840 (R)))
4842 #define _mm256_cvtt_roundpd_epi32(A, R) \
4843 ((__m128i) __builtin_ia32_cvttpd2dq256_mask_round ((__v4df) (A), \
4844 (__v4si) \
4845 (_mm_undefined_si128 ()), \
4846 (__mmask8) (-1), \
4847 (R)))
4849 #define _mm256_mask_cvtt_roundpd_epi32(W, U, A, R) \
4850 ((__m128i) __builtin_ia32_cvttpd2dq256_mask_round ((__v4df) (A), \
4851 (__v4si) (W), \
4852 (__mmask8) (U), \
4853 (R)))
4855 #define _mm256_maskz_cvtt_roundpd_epi32(U, A, R) \
4856 ((__m128i) __builtin_ia32_cvttpd2dq256_mask_round ((__v4df) (A), \
4857 (__v4si) \
4858 (_mm_setzero_si128 ()), \
4859 (__mmask8) (U), \
4860 (R)))
4862 #define _mm256_cvtt_roundpd_epi64(A, R) \
4863 ((__m256i) \
4864 __builtin_ia32_cvttpd2qq256_mask_round ((__v4df) (A), \
4865 (__v4di) \
4866 (_mm256_setzero_si256 ()), \
4867 (__mmask8) (-1), \
4868 (R)))
4870 #define _mm256_mask_cvtt_roundpd_epi64(W, U, A, R) \
4871 ((__m256i) __builtin_ia32_cvttpd2qq256_mask_round ((__v4df) (A), \
4872 (__v4di) (W), \
4873 (__mmask8) (U), \
4874 (R)))
4876 #define _mm256_maskz_cvtt_roundpd_epi64(U, A, R) \
4877 ((__m256i) \
4878 __builtin_ia32_cvttpd2qq256_mask_round ((__v4df) (A), \
4879 (__v4di) \
4880 (_mm256_setzero_si256 ()), \
4881 (__mmask8) (U), \
4882 (R)))
4884 #define _mm256_cvtt_roundpd_epu32(A, R) \
4885 ((__m128i) \
4886 __builtin_ia32_cvttpd2udq256_mask_round ((__v4df) (A), \
4887 (__v4si) \
4888 (_mm_undefined_si128 ()), \
4889 (__mmask8) (-1), \
4890 (R)))
4892 #define _mm256_mask_cvtt_roundpd_epu32(W, U, A, R) \
4893 ((__m128i) __builtin_ia32_cvttpd2udq256_mask_round ((__v4df) (A), \
4894 (__v4si) (W), \
4895 (__mmask8) (U), \
4896 (R)))
4898 #define _mm256_maskz_cvtt_roundpd_epu32(U, A, R) \
4899 ((__m128i) __builtin_ia32_cvttpd2udq256_mask_round ((__v4df) (A), \
4900 (__v4si) \
4901 (_mm_setzero_si128 ()), \
4902 (__mmask8) (U), \
4903 (R)))
4905 #define _mm256_cvtt_roundpd_epu64(A, R) \
4906 ((__m256i) \
4907 __builtin_ia32_cvttpd2uqq256_mask_round ((__v4df) (A), \
4908 (__v4di) \
4909 (_mm256_setzero_si256 ()), \
4910 (__mmask8) (-1), \
4911 (R)))
4913 #define _mm256_mask_cvtt_roundpd_epu64(W, U, A, R) \
4914 ((__m256i) __builtin_ia32_cvttpd2uqq256_mask_round ((__v4df) (A), \
4915 (__v4di) (W), \
4916 (__mmask8) (U), \
4917 (R)))
4919 #define _mm256_maskz_cvtt_roundpd_epu64(U, A, R) \
4920 ((__m256i) \
4921 __builtin_ia32_cvttpd2uqq256_mask_round ((__v4df) (A), \
4922 (__v4di) \
4923 (_mm256_setzero_si256 ()), \
4924 (__mmask8) (U), \
4925 (R)))
4927 #define _mm256_cvtt_roundph_epi32(A, R) \
4928 ((__m256i) \
4929 __builtin_ia32_vcvttph2dq256_mask_round ((__v8hf) (A), \
4930 (__v8si) \
4931 (_mm256_setzero_si256 ()), \
4932 (__mmask8) (-1), \
4933 (R)))
4935 #define _mm256_mask_cvtt_roundph_epi32(W, U, A, R) \
4936 ((__m256i) __builtin_ia32_vcvttph2dq256_mask_round ((__v8hf) (A), \
4937 (__v8si) (W), \
4938 (__mmask8) (U), \
4939 (R)))
4941 #define _mm256_maskz_cvtt_roundph_epi32(U, A, R) \
4942 ((__m256i) \
4943 __builtin_ia32_vcvttph2dq256_mask_round ((__v8hf) (A), \
4944 (__v8si) \
4945 (_mm256_setzero_si256 ()), \
4946 (__mmask8) (U), \
4947 (R)))
4949 #define _mm256_cvtt_roundph_epi64(A, R) \
4950 ((__m256i) \
4951 __builtin_ia32_vcvttph2qq256_mask_round ((__v8hf) (A), \
4952 (__v4di) \
4953 (_mm256_setzero_si256 ()), \
4954 (__mmask8) (-1), \
4955 (R)))
4957 #define _mm256_mask_cvtt_roundph_epi64(W, U, A, R) \
4958 ((__m256i) __builtin_ia32_vcvttph2qq256_mask_round ((__v8hf) (A), \
4959 (__v4di) (W), \
4960 (__mmask8) (U), \
4961 (R)))
4963 #define _mm256_maskz_cvtt_roundph_epi64(U, A, R) \
4964 ((__m256i) \
4965 __builtin_ia32_vcvttph2qq256_mask_round ((__v8hf) (A), \
4966 (__v4di) \
4967 (_mm256_setzero_si256 ()), \
4968 (__mmask8) (U), \
4969 (R)))
4971 #define _mm256_cvtt_roundph_epu32(A, R) \
4972 ((__m256i) \
4973 __builtin_ia32_vcvttph2udq256_mask_round ((__v8hf) (A), \
4974 (__v8si) \
4975 (_mm256_setzero_si256 ()), \
4976 (__mmask8) (-1), \
4977 (R)))
4979 #define _mm256_mask_cvtt_roundph_epu32(W, U, A, R) \
4980 ((__m256i) __builtin_ia32_vcvttph2udq256_mask_round ((__v8hf) (A), \
4981 (__v8si) (W), \
4982 (__mmask8) (U), \
4983 (R)))
4985 #define _mm256_maskz_cvtt_roundph_epu32(U, A, R) \
4986 ((__m256i) \
4987 __builtin_ia32_vcvttph2udq256_mask_round ((__v8hf) (A), \
4988 (__v8si) \
4989 (_mm256_setzero_si256 ()), \
4990 (__mmask8) (U), \
4991 (R)))
4993 #define _mm256_cvtt_roundph_epu64(A, R) \
4994 ((__m256i) \
4995 __builtin_ia32_vcvttph2uqq256_mask_round ((__v8hf) (A), \
4996 (__v4di) \
4997 (_mm256_setzero_si256 ()), \
4998 (__mmask8) (-1), \
4999 (R)))
5001 #define _mm256_mask_cvtt_roundph_epu64(W, U, A, R) \
5002 ((__m256i) __builtin_ia32_vcvttph2uqq256_mask_round ((__v8hf) (A), \
5003 (__v4di) (W), \
5004 (__mmask8) (U), \
5005 (R)))
5007 #define _mm256_maskz_cvtt_roundph_epu64(U, A, R) \
5008 ((__m256i) \
5009 __builtin_ia32_vcvttph2uqq256_mask_round ((__v8hf) (A), \
5010 (__v4di) \
5011 (_mm256_setzero_si256 ()), \
5012 (__mmask8) (U), \
5013 (R)))
5015 #define _mm256_cvtt_roundph_epu16(A, R) \
5016 ((__m256i) \
5017 __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf) (A), \
5018 (__v16hi) \
5019 (_mm256_setzero_si256 ()), \
5020 (__mmask16) (-1), \
5021 (R)))
5023 #define _mm256_mask_cvtt_roundph_epu16(W, U, A, R) \
5024 ((__m256i) __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf) (A), \
5025 (__v16hi) (W), \
5026 (__mmask16) (U), \
5027 (R)))
5029 #define _mm256_maskz_cvtt_roundph_epu16(U, A, R) \
5030 ((__m256i) \
5031 __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf) (A), \
5032 (__v16hi) \
5033 (_mm256_setzero_si256 ()), \
5034 (__mmask16) (U), \
5035 (R)))
5037 #define _mm256_cvtt_roundph_epi16(A, R) \
5038 ((__m256i) \
5039 __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf) (A), \
5040 (__v16hi) \
5041 (_mm256_setzero_si256 ()), \
5042 (__mmask16) (-1), \
5043 (R)))
5045 #define _mm256_mask_cvtt_roundph_epi16(W, U, A, R) \
5046 ((__m256i) __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf) (A), \
5047 (__v16hi) (W), \
5048 (__mmask16) (U), \
5049 (R)))
5051 #define _mm256_maskz_cvtt_roundph_epi16(U, A, R)\
5052 ((__m256i) \
5053 __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf) (A), \
5054 (__v16hi) \
5055 (_mm256_setzero_si256 ()), \
5056 (__mmask16) (U), \
5057 (R)))
5059 #define _mm256_cvtt_roundps_epi32(A, R) \
5060 ((__m256i) \
5061 __builtin_ia32_cvttps2dq256_mask_round ((__v8sf) (A), \
5062 (__v8si) \
5063 (_mm256_undefined_si256 ()), \
5064 (__mmask8) (-1), \
5065 (R)))
5067 #define _mm256_mask_cvtt_roundps_epi32(W, U, A, R) \
5068 ((__m256i) __builtin_ia32_cvttps2dq256_mask_round ((__v8sf) (A), \
5069 (__v8si) (W), \
5070 (__mmask8) (U), \
5071 (R)))
5073 #define _mm256_maskz_cvtt_roundps_epi32(U, A, R) \
5074 ((__m256i) \
5075 __builtin_ia32_cvttps2dq256_mask_round ((__v8sf) (A), \
5076 (__v8si) \
5077 (_mm256_setzero_si256 ()), \
5078 (__mmask8) (U), \
5079 (R)))
5081 #define _mm256_cvtt_roundps_epi64(A, R) \
5082 ((__m256i) __builtin_ia32_cvttps2qq256_mask_round ((__v4sf) (A), \
5083 (__v4di) \
5084 (_mm256_setzero_si256 ()),\
5085 (__mmask8) (-1), \
5086 (R)))
5088 #define _mm256_mask_cvtt_roundps_epi64(W, U, A, R) \
5089 ((__m256i) __builtin_ia32_cvttps2qq256_mask_round ((__v4sf) (A), \
5090 (__v4di) (W), \
5091 (__mmask8) (U), \
5092 (R)))
5094 #define _mm256_maskz_cvtt_roundps_epi64(U, A, R) \
5095 ((__m256i) __builtin_ia32_cvttps2qq256_mask_round ((__v4sf) (A), \
5096 (__v4di) \
5097 (_mm256_setzero_si256 ()),\
5098 (__mmask8) (U), \
5099 (R)))
5101 #define _mm256_cvtt_roundps_epu32(A, R) \
5102 ((__m256i) \
5103 __builtin_ia32_cvttps2udq256_mask_round ((__v8sf) (A), \
5104 (__v8si) \
5105 (_mm256_undefined_si256 ()), \
5106 (__mmask8) (-1), \
5107 (R)))
5109 #define _mm256_mask_cvtt_roundps_epu32(W, U, A, R) \
5110 ((__m256i) __builtin_ia32_cvttps2udq256_mask_round ((__v8sf) (A), \
5111 (__v8si) (W), \
5112 (__mmask8) (U), \
5113 (R)))
5115 #define _mm256_maskz_cvtt_roundps_epu32(U, A, R) \
5116 ((__m256i) \
5117 __builtin_ia32_cvttps2udq256_mask_round ((__v8sf) (A), \
5118 (__v8si) \
5119 (_mm256_setzero_si256 ()), \
5120 (__mmask8) (U), \
5121 (R)))
5123 #define _mm256_cvtt_roundps_epu64(A, R) \
5124 ((__m256i) \
5125 __builtin_ia32_cvttps2uqq256_mask_round ((__v4sf) (A), \
5126 (__v4di) \
5127 (_mm256_setzero_si256 ()), \
5128 (__mmask8) (-1), \
5129 (R)))
5131 #define _mm256_mask_cvtt_roundps_epu64(W, U, A, R) \
5132 ((__m256i) __builtin_ia32_cvttps2uqq256_mask_round ((__v4sf) (A), \
5133 (__v4di) (W), \
5134 (__mmask8) (U), \
5135 (R)))
5137 #define _mm256_maskz_cvtt_roundps_epu64(U, A, R) \
5138 ((__m256i) \
5139 __builtin_ia32_cvttps2uqq256_mask_round ((__v4sf) (A), \
5140 (__v4di) \
5141 (_mm256_setzero_si256 ()), \
5142 (__mmask8) (U), \
5143 (R)))
5145 #define _mm256_cvt_roundepu32_ph(A, R) \
5146 ((__m128h) __builtin_ia32_vcvtudq2ph256_mask_round ((__v8si) (A), \
5147 (__v8hf) \
5148 (_mm_setzero_ph ()), \
5149 (__mmask8) (-1), \
5150 (R)))
5152 #define _mm256_mask_cvt_roundepu32_ph(W, U, A, R) \
5153 ((__m128h) __builtin_ia32_vcvtudq2ph256_mask_round ((__v8si) (A), \
5154 (__v8hf) (W), \
5155 (__mmask8) (U), \
5156 (R)))
5158 #define _mm256_maskz_cvt_roundepu32_ph(U, A, R) \
5159 ((__m128h) __builtin_ia32_vcvtudq2ph256_mask_round ((__v8si) (A), \
5160 (__v8hf) \
5161 (_mm_setzero_ph ()), \
5162 (__mmask8) (U), \
5163 (R)))
5165 #define _mm256_cvt_roundepu32_ps(A, R) \
5166 ((__m256) __builtin_ia32_cvtudq2ps256_mask_round ((__v8si) (A), \
5167 (__v8sf) \
5168 (_mm256_undefined_ps ()), \
5169 (__mmask8) (-1), \
5170 (R)))
5172 #define _mm256_mask_cvt_roundepu32_ps(W, U, A, R) \
5173 ((__m256) __builtin_ia32_cvtudq2ps256_mask_round ((__v8si) (A), \
5174 (__v8sf) (W), \
5175 (__mmask8) (U), \
5176 (R)))
5178 #define _mm256_maskz_cvt_roundepu32_ps(U, A, R) \
5179 ((__m256) __builtin_ia32_cvtudq2ps256_mask_round ((__v8si) (A), \
5180 (__v8sf) \
5181 (_mm256_setzero_ps ()), \
5182 (__mmask8) (U), \
5183 (R)))
5185 #define _mm256_cvt_roundepu64_pd(A, R) \
5186 ((__m256d) __builtin_ia32_cvtuqq2pd256_mask_round ((__v4di) (A), \
5187 (__v4df) \
5188 (_mm256_setzero_pd ()), \
5189 (__mmask8) (-1), \
5190 (R)))
5192 #define _mm256_mask_cvt_roundepu64_pd(W, U, A, R) \
5193 ((__m256d) __builtin_ia32_cvtuqq2pd256_mask_round ((__v4di) (A), \
5194 (__v4df) (W), \
5195 (__mmask8) (U), \
5196 (R)))
5198 #define _mm256_maskz_cvt_roundepu64_pd(U, A, R) \
5199 ((__m256d) __builtin_ia32_cvtuqq2pd256_mask_round ((__v4di) (A), \
5200 (__v4df) \
5201 (_mm256_setzero_pd ()), \
5202 (__mmask8) (U), \
5203 (R)))
5205 #define _mm256_cvt_roundepu64_ph(A, R) \
5206 ((__m128h) __builtin_ia32_vcvtuqq2ph256_mask_round ((__v4di) (A), \
5207 (__v8hf) \
5208 (_mm_setzero_ph ()), \
5209 (__mmask8) (-1), \
5210 (R)))
5212 #define _mm256_mask_cvt_roundepu64_ph(W, U, A, R) \
5213 ((__m128h) __builtin_ia32_vcvtuqq2ph256_mask_round ((__v4di) (A), \
5214 (__v8hf) (W), \
5215 (__mmask8) (U), \
5216 (R)))
5218 #define _mm256_maskz_cvt_roundepu64_ph(U, A, R) \
5219 ((__m128h) __builtin_ia32_vcvtuqq2ph256_mask_round ((__v4di) (A), \
5220 (__v8hf) \
5221 (_mm_setzero_ph ()), \
5222 (__mmask8) (U), \
5223 (R)))
5225 #define _mm256_cvt_roundepu64_ps(A, R) \
5226 ((__m128) __builtin_ia32_cvtuqq2ps256_mask_round ((__v4di) (A), \
5227 (__v4sf) \
5228 (_mm_setzero_ps ()), \
5229 (__mmask8) (-1), \
5230 (R)))
5232 #define _mm256_mask_cvt_roundepu64_ps(W, U, A, R) \
5233 ((__m128) __builtin_ia32_cvtuqq2ps256_mask_round ((__v4di) (A), \
5234 (__v4sf) (W), \
5235 (__mmask8) (U), \
5236 (R)))
5238 #define _mm256_maskz_cvt_roundepu64_ps(U, A, R) \
5239 ((__m128) __builtin_ia32_cvtuqq2ps256_mask_round ((__v4di) (A), \
5240 (__v4sf) \
5241 (_mm_setzero_ps ()), \
5242 (__mmask8) (U), \
5243 (R)))
5245 #define _mm256_cvt_roundepu16_ph(A, R) \
5246 ((__m256h) __builtin_ia32_vcvtuw2ph256_mask_round ((__v16hi) (A), \
5247 (__v16hf) \
5248 (_mm256_setzero_ph ()), \
5249 (__mmask16) (-1), \
5250 (R)))
5252 #define _mm256_mask_cvt_roundepu16_ph(W, U, A, R) \
5253 ((__m256h) __builtin_ia32_vcvtuw2ph256_mask_round ((__v16hi) (A), \
5254 (__v16hf) (W), \
5255 (__mmask16) (U), \
5256 (R)))
5258 #define _mm256_maskz_cvt_roundepu16_ph(U, A, R) \
5259 ((__m256h) __builtin_ia32_vcvtuw2ph256_mask_round ((__v16hi) (A), \
5260 (__v16hf) \
5261 (_mm256_setzero_ph ()), \
5262 (__mmask16) (U), \
5263 (R)))
5265 #define _mm256_cvt_roundepi16_ph(A, R) \
5266 ((__m256h) __builtin_ia32_vcvtw2ph256_mask_round ((__v16hi) (A), \
5267 (__v16hf) \
5268 (_mm256_setzero_ph ()), \
5269 (__mmask16) (-1), \
5270 (R)))
5272 #define _mm256_mask_cvt_roundepi16_ph(W, U, A, R) \
5273 ((__m256h) __builtin_ia32_vcvtw2ph256_mask_round ((__v16hi) (A), \
5274 (__v16hf) (W), \
5275 (__mmask16) (U), \
5276 (R)))
5278 #define _mm256_maskz_cvt_roundepi16_ph(U, A, R) \
5279 ((__m256h) __builtin_ia32_vcvtw2ph256_mask_round ((__v16hi) (A), \
5280 (__v16hf) \
5281 (_mm256_setzero_ph ()), \
5282 (__mmask16) (U), \
5283 (R)))
5285 #define _mm256_div_round_pd(A, B, R) \
5286 ((__m256d) __builtin_ia32_divpd256_mask_round ((__v4df) (A), \
5287 (__v4df) (B), \
5288 (__v4df) \
5289 (_mm256_undefined_pd ()), \
5290 (__mmask8) (-1), \
5291 (R)))
5293 #define _mm256_mask_div_round_pd(W, U, A, B, R) \
5294 ((__m256d) __builtin_ia32_divpd256_mask_round ((__v4df) (A), \
5295 (__v4df) (B), \
5296 (__v4df) (W), \
5297 (__mmask8) (U), \
5298 (R)))
5300 #define _mm256_maskz_div_round_pd(U, A, B, R) \
5301 ((__m256d) __builtin_ia32_divpd256_mask_round ((__v4df) (A), \
5302 (__v4df) (B), \
5303 (__v4df) \
5304 (_mm256_setzero_pd ()), \
5305 (__mmask8) (U), \
5306 (R)))
5308 #define _mm256_div_round_ph(A, B, R) \
5309 ((__m256h) __builtin_ia32_divph256_mask_round ((__v16hf) (A), \
5310 (__v16hf) (B), \
5311 (__v16hf) \
5312 (_mm256_setzero_ph ()), \
5313 (__mmask16) (-1), \
5314 (R)))
5316 #define _mm256_mask_div_round_ph(W, U, A, B, R) \
5317 ((__m256h) __builtin_ia32_divph256_mask_round ((__v16hf) (A), \
5318 (__v16hf) (B), \
5319 (__v16hf) (W), \
5320 (__mmask16) (U), \
5321 (R)))
5323 #define _mm256_maskz_div_round_ph(U, A, B, R) \
5324 ((__m256h) __builtin_ia32_divph256_mask_round ((__v16hf) (A), \
5325 (__v16hf) (B), \
5326 (__v16hf) \
5327 (_mm256_setzero_ph ()), \
5328 (__mmask16) (U), \
5329 (R)))
5331 #define _mm256_div_round_ps(A, B, R) \
5332 ((__m256) __builtin_ia32_divps256_mask_round ((__v8sf) (A), \
5333 (__v8sf) (B), \
5334 (__v8sf) \
5335 (_mm256_undefined_ps ()), \
5336 (__mmask8) (-1), \
5337 (R)))
5339 #define _mm256_mask_div_round_ps(W, U, A, B, R) \
5340 ((__m256) __builtin_ia32_divps256_mask_round ((__v8sf) (A), \
5341 (__v8sf) (B), \
5342 (__v8sf) (W), \
5343 (__mmask8) (U), \
5344 (R)))
5346 #define _mm256_maskz_div_round_ps(U, A, B, R) \
5347 ((__m256) __builtin_ia32_divps256_mask_round ((__v8sf) (A), \
5348 (__v8sf) (B), \
5349 (__v8sf) \
5350 (_mm256_setzero_ps ()), \
5351 (__mmask8) (U), \
5352 (R)))
5354 #define _mm256_fcmadd_round_pch(A, B, D, R) \
5355 (__m256h) __builtin_ia32_vfcmaddcph256_round ((A), (B), (D), (R))
5357 #define _mm256_mask_fcmadd_round_pch(A, U, B, D, R) \
5358 ((__m256h) __builtin_ia32_vfcmaddcph256_mask_round ((__v16hf)(A), \
5359 (__v16hf)(B), \
5360 (__v16hf)(D), \
5361 (U), (R)))
5363 #define _mm256_mask3_fcmadd_round_pch(A, B, D, U, R) \
5364 ((__m256h) __builtin_ia32_vfcmaddcph256_mask3_round ((A), (B), (D), (U), (R)))
5366 #define _mm256_maskz_fcmadd_round_pch(U, A, B, D, R) \
5367 ((__m256h) __builtin_ia32_vfcmaddcph256_maskz_round ((A), (B), (D), (U), (R)))
5369 #define _mm256_fcmul_round_pch(A, B, R) \
5370 ((__m256h) __builtin_ia32_vfcmulcph256_round ((__v16hf) (A), \
5371 (__v16hf) (B), \
5372 (R)))
5374 #define _mm256_mask_fcmul_round_pch(W, U, A, B, R) \
5375 ((__m256h) __builtin_ia32_vfcmulcph256_mask_round ((__v16hf) (A), \
5376 (__v16hf) (B), \
5377 (__v16hf) (W), \
5378 (__mmask16) (U), \
5379 (R)))
5381 #define _mm256_maskz_fcmul_round_pch(U, A, B, R) \
5382 ((__m256h) __builtin_ia32_vfcmulcph256_mask_round ((__v16hf) (A), \
5383 (__v16hf) (B), \
5384 (__v16hf) \
5385 (_mm256_setzero_ph ()), \
5386 (__mmask16) (U), \
5387 (R)))
5389 #define _mm256_fixupimm_round_pd(A, B, D, C, R) \
5390 ((__m256d) __builtin_ia32_fixupimmpd256_mask_round ((__v4df) (A), \
5391 (__v4df) (B), \
5392 (__v4di) (D), \
5393 (C), \
5394 (__mmask8) (-1), \
5395 (R)))
5397 #define _mm256_mask_fixupimm_round_pd(A, U, B, D, C, R)\
5398 ((__m256d) __builtin_ia32_fixupimmpd256_mask_round ((__v4df) (A), \
5399 (__v4df) (B), \
5400 (__v4di) (D), \
5401 (C), \
5402 (__mmask8) (U), \
5403 (R)))
5405 #define _mm256_maskz_fixupimm_round_pd(U, A, B, D, C, R)\
5406 ((__m256d) __builtin_ia32_fixupimmpd256_maskz_round ((__v4df) (A), \
5407 (__v4df) (B), \
5408 (__v4di) (D), \
5409 (C), \
5410 (__mmask8) (U), \
5411 (R)))
5413 #define _mm256_fixupimm_round_ps(A, B, D, C, R)\
5414 ((__m256) __builtin_ia32_fixupimmps256_mask_round ((__v8sf) (A), \
5415 (__v8sf) (B), \
5416 (__v8si) (D), \
5417 (C), \
5418 (__mmask8) (-1), \
5419 (R)))
5421 #define _mm256_mask_fixupimm_round_ps(A, U, B, D, C, R)\
5422 ((__m256) __builtin_ia32_fixupimmps256_mask_round ((__v8sf) (A), \
5423 (__v8sf) (B), \
5424 (__v8si) (D), \
5425 (C), \
5426 (__mmask8) (U), \
5427 (R)))
5429 #define _mm256_maskz_fixupimm_round_ps(U, A, B, D, C, R)\
5430 ((__m256) __builtin_ia32_fixupimmps256_maskz_round ((__v8sf) (A), \
5431 (__v8sf) (B), \
5432 (__v8si) (D), \
5433 (C), \
5434 (__mmask8) (U), \
5435 (R)))
5437 #define _mm256_fmadd_round_pd(A, B, D, R) \
5438 ((__m256d) __builtin_ia32_vfmaddpd256_mask_round (A, B, D, -1, R))
5440 #define _mm256_mask_fmadd_round_pd(A, U, B, D, R) \
5441 ((__m256d) __builtin_ia32_vfmaddpd256_mask_round (A, B, D, U, R))
5443 #define _mm256_mask3_fmadd_round_pd(A, B, D, U, R) \
5444 ((__m256d) __builtin_ia32_vfmaddpd256_mask3_round (A, B, D, U, R))
5446 #define _mm256_maskz_fmadd_round_pd(U, A, B, D, R) \
5447 ((__m256d) __builtin_ia32_vfmaddpd256_maskz_round (A, B, D, U, R))
5449 #define _mm256_fmadd_round_ph(A, B, D, R) \
5450 ((__m256h) __builtin_ia32_vfmaddph256_mask_round (A, B, D, -1, R))
5452 #define _mm256_mask_fmadd_round_ph(A, U, B, D, R) \
5453 ((__m256h) __builtin_ia32_vfmaddph256_mask_round (A, B, D, U, R))
5455 #define _mm256_mask3_fmadd_round_ph(A, B, D, U, R) \
5456 ((__m256h) __builtin_ia32_vfmaddph256_mask3_round (A, B, D, U, R))
5458 #define _mm256_maskz_fmadd_round_ph(U, A, B, D, R) \
5459 ((__m256h) __builtin_ia32_vfmaddph256_maskz_round (A, B, D, U, R))
5461 #define _mm256_fmadd_round_ps(A, B, D, R) \
5462 ((__m256)__builtin_ia32_vfmaddps256_mask_round (A, B, D, -1, R))
5464 #define _mm256_mask_fmadd_round_ps(A, U, B, D, R) \
5465 ((__m256)__builtin_ia32_vfmaddps256_mask_round (A, B, D, U, R))
5467 #define _mm256_mask3_fmadd_round_ps(A, B, D, U, R) \
5468 ((__m256)__builtin_ia32_vfmaddps256_mask3_round (A, B, D, U, R))
5470 #define _mm256_maskz_fmadd_round_ps(U, A, B, D, R) \
5471 ((__m256)__builtin_ia32_vfmaddps256_maskz_round (A, B, D, U, R))
5473 #define _mm256_fmadd_round_pch(A, B, D, R) \
5474 (__m256h) __builtin_ia32_vfmaddcph256_round ((A), (B), (D), (R))
5476 #define _mm256_mask_fmadd_round_pch(A, U, B, D, R) \
5477 ((__m256h) __builtin_ia32_vfmaddcph256_mask_round ((__v16hf) (A), \
5478 (__v16hf) (B), \
5479 (__v16hf) (D), \
5480 (U), (R)))
5482 #define _mm256_mask3_fmadd_round_pch(A, B, D, U, R) \
5483 (__m256h) __builtin_ia32_vfmaddcph256_mask3_round ((A), (B), (D), (U), (R))
5485 #define _mm256_maskz_fmadd_round_pch(U, A, B, D, R) \
5486 (__m256h) __builtin_ia32_vfmaddcph256_maskz_round ((A), (B), (D), (U), (R))
5488 #define _mm256_fmaddsub_round_pd(A, B, D, R) \
5489 (__m256d) __builtin_ia32_vfmaddsubpd256_mask_round (A, B, D, -1, R)
5491 #define _mm256_mask_fmaddsub_round_pd(A, U, B, D, R) \
5492 (__m256d) __builtin_ia32_vfmaddsubpd256_mask_round (A, B, D, U, R)
5494 #define _mm256_mask3_fmaddsub_round_pd(A, B, D, U, R) \
5495 (__m256d)__builtin_ia32_vfmaddsubpd256_mask3_round (A, B, D, U, R)
5497 #define _mm256_maskz_fmaddsub_round_pd(U, A, B, D, R) \
5498 (__m256d)__builtin_ia32_vfmaddsubpd256_maskz_round (A, B, D, U, R)
5500 #define _mm256_fmaddsub_round_ph(A, B, D, R) \
5501 ((__m256h)__builtin_ia32_vfmaddsubph256_mask_round ((A), (B), (D), -1, (R)))
5503 #define _mm256_mask_fmaddsub_round_ph(A, U, B, D, R) \
5504 ((__m256h)__builtin_ia32_vfmaddsubph256_mask_round ((A), (B), (D), (U), (R)))
5506 #define _mm256_mask3_fmaddsub_round_ph(A, B, D, U, R) \
5507 ((__m256h)__builtin_ia32_vfmaddsubph256_mask3_round ((A), (B), (D), (U), (R)))
5509 #define _mm256_maskz_fmaddsub_round_ph(U, A, B, D, R) \
5510 ((__m256h)__builtin_ia32_vfmaddsubph256_maskz_round ((A), (B), (D), (U), (R)))
5512 #define _mm256_fmaddsub_round_ps(A, B, D, R) \
5513 (__m256)__builtin_ia32_vfmaddsubps256_mask_round (A, B, D, -1, R)
5515 #define _mm256_mask_fmaddsub_round_ps(A, U, B, D, R) \
5516 (__m256)__builtin_ia32_vfmaddsubps256_mask_round (A, B, D, U, R)
5518 #define _mm256_mask3_fmaddsub_round_ps(A, B, D, U, R) \
5519 (__m256)__builtin_ia32_vfmaddsubps256_mask3_round (A, B, D, U, R)
5521 #define _mm256_maskz_fmaddsub_round_ps(U, A, B, D, R) \
5522 (__m256)__builtin_ia32_vfmaddsubps256_maskz_round (A, B, D, U, R)
5524 #define _mm256_fmsub_round_pd(A, B, D, R) \
5525 (__m256d)__builtin_ia32_vfmsubpd256_mask_round (A, B, D, -1, R)
5527 #define _mm256_mask_fmsub_round_pd(A, U, B, D, R) \
5528 (__m256d)__builtin_ia32_vfmsubpd256_mask_round (A, B, D, U, R)
5530 #define _mm256_mask3_fmsub_round_pd(A, B, D, U, R) \
5531 (__m256d)__builtin_ia32_vfmsubpd256_mask3_round (A, B, D, U, R)
5533 #define _mm256_maskz_fmsub_round_pd(U, A, B, D, R) \
5534 (__m256d)__builtin_ia32_vfmsubpd256_maskz_round (A, B, D, U, R)
5536 #define _mm256_fmsub_round_ph(A, B, D, R) \
5537 ((__m256h)__builtin_ia32_vfmsubph256_mask_round ((A), (B), (D), -1, (R)))
5539 #define _mm256_mask_fmsub_round_ph(A, U, B, D, R) \
5540 ((__m256h)__builtin_ia32_vfmsubph256_mask_round ((A), (B), (D), (U), (R)))
5542 #define _mm256_mask3_fmsub_round_ph(A, B, D, U, R) \
5543 ((__m256h)__builtin_ia32_vfmsubph256_mask3_round ((A), (B), (D), (U), (R)))
5545 #define _mm256_maskz_fmsub_round_ph(U, A, B, D, R) \
5546 ((__m256h)__builtin_ia32_vfmsubph256_maskz_round ((A), (B), (D), (U), (R)))
5548 #define _mm256_fmsub_round_ps(A, B, D, R) \
5549 (__m256)__builtin_ia32_vfmsubps256_mask_round (A, B, D, -1, R)
5551 #define _mm256_mask_fmsub_round_ps(A, U, B, D, R) \
5552 (__m256)__builtin_ia32_vfmsubps256_mask_round (A, B, D, U, R)
5554 #define _mm256_mask3_fmsub_round_ps(A, B, D, U, R) \
5555 (__m256)__builtin_ia32_vfmsubps256_mask3_round (A, B, D, U, R)
5557 #define _mm256_maskz_fmsub_round_ps(U, A, B, D, R) \
5558 (__m256)__builtin_ia32_vfmsubps256_maskz_round (A, B, D, U, R)
5560 #define _mm256_fmsubadd_round_pd(A, B, D, R) \
5561 (__m256d)__builtin_ia32_vfmsubaddpd256_mask_round (A, B, D, -1, R)
5563 #define _mm256_mask_fmsubadd_round_pd(A, U, B, D, R) \
5564 (__m256d)__builtin_ia32_vfmsubaddpd256_mask_round (A, B, D, U, R)
5566 #define _mm256_mask3_fmsubadd_round_pd(A, B, D, U, R) \
5567 (__m256d)__builtin_ia32_vfmsubaddpd256_mask3_round (A, B, D, U, R)
5569 #define _mm256_maskz_fmsubadd_round_pd(U, A, B, D, R) \
5570 (__m256d)__builtin_ia32_vfmsubaddpd256_maskz_round (A, B, D, U, R)
5572 #define _mm256_fmsubadd_round_ph(A, B, D, R) \
5573 ((__m256h)__builtin_ia32_vfmsubaddph256_mask_round ((A), (B), (D), -1, (R)))
5575 #define _mm256_mask_fmsubadd_round_ph(A, U, B, D, R) \
5576 ((__m256h)__builtin_ia32_vfmsubaddph256_mask_round ((A), (B), (D), (U), (R)))
5578 #define _mm256_mask3_fmsubadd_round_ph(A, B, D, U, R) \
5579 ((__m256h)__builtin_ia32_vfmsubaddph256_mask3_round ((A), (B), (D), (U), (R)))
5581 #define _mm256_maskz_fmsubadd_round_ph(U, A, B, D, R) \
5582 ((__m256h)__builtin_ia32_vfmsubaddph256_maskz_round ((A), (B), (D), (U), (R)))
5584 #define _mm256_fmsubadd_round_ps(A, B, D, R) \
5585 (__m256)__builtin_ia32_vfmsubaddps256_mask_round (A, B, D, -1, R)
5587 #define _mm256_mask_fmsubadd_round_ps(A, U, B, D, R) \
5588 (__m256)__builtin_ia32_vfmsubaddps256_mask_round (A, B, D, U, R)
5590 #define _mm256_mask3_fmsubadd_round_ps(A, B, D, U, R) \
5591 (__m256)__builtin_ia32_vfmsubaddps256_mask3_round (A, B, D, U, R)
5593 #define _mm256_maskz_fmsubadd_round_ps(U, A, B, D, R) \
5594 (__m256)__builtin_ia32_vfmsubaddps256_maskz_round (A, B, D, U, R)
5596 #define _mm256_fmul_round_pch(B, D, R) \
5597 ((__m256h) __builtin_ia32_vfmulcph256_round ((__v16hf) (B), \
5598 (__v16hf) (D), \
5599 (R)))
5601 #define _mm256_mask_fmul_round_pch(A, U, B, D, R) \
5602 ((__m256h) __builtin_ia32_vfmulcph256_mask_round ((__v16hf) (B), \
5603 (__v16hf) (D), \
5604 (__v16hf) (A), \
5605 (__mmask16) (U), \
5606 (R)))
5608 #define _mm256_maskz_fmul_round_pch(U, B, D, R) \
5609 ((__m256h) __builtin_ia32_vfmulcph256_mask_round ((__v16hf) (B), \
5610 (__v16hf) (D), \
5611 (__v16hf) \
5612 (_mm256_setzero_ph ()), \
5613 (__mmask16) (U), \
5614 (R)))
5616 #define _mm256_fnmadd_round_pd(A, B, D, R) \
5617 (__m256d)__builtin_ia32_vfnmaddpd256_mask_round (A, B, D, -1, R)
5619 #define _mm256_mask_fnmadd_round_pd(A, U, B, D, R) \
5620 (__m256d)__builtin_ia32_vfnmaddpd256_mask_round (A, B, D, U, R)
5622 #define _mm256_mask3_fnmadd_round_pd(A, B, D, U, R) \
5623 (__m256d)__builtin_ia32_vfnmaddpd256_mask3_round (A, B, D, U, R)
5625 #define _mm256_maskz_fnmadd_round_pd(U, A, B, D, R) \
5626 (__m256d)__builtin_ia32_vfnmaddpd256_maskz_round (A, B, D, U, R)
5628 #define _mm256_fnmadd_round_ph(A, B, D, R) \
5629 ((__m256h)__builtin_ia32_vfnmaddph256_mask_round ((A), (B), (D), -1, (R)))
5631 #define _mm256_mask_fnmadd_round_ph(A, U, B, D, R) \
5632 ((__m256h)__builtin_ia32_vfnmaddph256_mask_round ((A), (B), (D), (U), (R)))
5634 #define _mm256_mask3_fnmadd_round_ph(A, B, D, U, R) \
5635 ((__m256h)__builtin_ia32_vfnmaddph256_mask3_round ((A), (B), (D), (U), (R)))
5637 #define _mm256_maskz_fnmadd_round_ph(U, A, B, D, R) \
5638 ((__m256h)__builtin_ia32_vfnmaddph256_maskz_round ((A), (B), (D), (U), (R)))
5640 #define _mm256_fnmadd_round_ps(A, B, D, R) \
5641 (__m256)__builtin_ia32_vfnmaddps256_mask_round (A, B, D, -1, R)
5643 #define _mm256_mask_fnmadd_round_ps(A, U, B, D, R) \
5644 (__m256)__builtin_ia32_vfnmaddps256_mask_round (A, B, D, U, R)
5646 #define _mm256_mask3_fnmadd_round_ps(A, B, D, U, R) \
5647 (__m256)__builtin_ia32_vfnmaddps256_mask3_round (A, B, D, U, R)
5649 #define _mm256_maskz_fnmadd_round_ps(U, A, B, D, R) \
5650 (__m256)__builtin_ia32_vfnmaddps256_maskz_round (A, B, D, U, R)
5652 #define _mm256_fnmsub_round_pd(A, B, D, R) \
5653 (__m256d)__builtin_ia32_vfnmsubpd256_mask_round (A, B, D, -1, R)
5655 #define _mm256_mask_fnmsub_round_pd(A, U, B, D, R) \
5656 (__m256d)__builtin_ia32_vfnmsubpd256_mask_round (A, B, D, U, R)
5658 #define _mm256_mask3_fnmsub_round_pd(A, B, D, U, R) \
5659 (__m256d)__builtin_ia32_vfnmsubpd256_mask3_round (A, B, D, U, R)
5661 #define _mm256_maskz_fnmsub_round_pd(U, A, B, D, R) \
5662 (__m256d)__builtin_ia32_vfnmsubpd256_maskz_round (A, B, D, U, R)
5664 #define _mm256_fnmsub_round_ph(A, B, D, R) \
5665 ((__m256h)__builtin_ia32_vfnmsubph256_mask_round ((A), (B), (D), -1, (R)))
5667 #define _mm256_mask_fnmsub_round_ph(A, U, B, D, R) \
5668 ((__m256h)__builtin_ia32_vfnmsubph256_mask_round ((A), (B), (D), (U), (R)))
5670 #define _mm256_mask3_fnmsub_round_ph(A, B, D, U, R) \
5671 ((__m256h)__builtin_ia32_vfnmsubph256_mask3_round ((A), (B), (D), (U), (R)))
5673 #define _mm256_maskz_fnmsub_round_ph(U, A, B, D, R) \
5674 ((__m256h)__builtin_ia32_vfnmsubph256_maskz_round ((A), (B), (D), (U), (R)))
5676 #define _mm256_fnmsub_round_ps(A, B, D, R) \
5677 (__m256)__builtin_ia32_vfnmsubps256_mask_round (A, B, D, -1, R)
5679 #define _mm256_mask_fnmsub_round_ps(A, U, B, D, R) \
5680 (__m256)__builtin_ia32_vfnmsubps256_mask_round (A, B, D, U, R)
5682 #define _mm256_mask3_fnmsub_round_ps(A, B, D, U, R) \
5683 (__m256)__builtin_ia32_vfnmsubps256_mask3_round (A, B, D, U, R)
5685 #define _mm256_maskz_fnmsub_round_ps(U, A, B, D, R) \
5686 (__m256)__builtin_ia32_vfnmsubps256_maskz_round (A, B, D, U, R)
5688 #define _mm256_getexp_round_pd(A, R) \
5689 ((__m256d) __builtin_ia32_getexppd256_mask_round ((__v4df) (A), \
5690 (__v4df) \
5691 (_mm256_undefined_pd ()), \
5692 (__mmask8) (-1), \
5693 (R)))
5695 #define _mm256_mask_getexp_round_pd(W, U, A, R) \
5696 ((__m256d) __builtin_ia32_getexppd256_mask_round ((__v4df) (A), \
5697 (__v4df) (W), \
5698 (__mmask8) (U), \
5699 (R)))
5701 #define _mm256_maskz_getexp_round_pd(U, A, R) \
5702 ((__m256d) __builtin_ia32_getexppd256_mask_round ((__v4df) (A), \
5703 (__v4df) \
5704 (_mm256_setzero_pd ()), \
5705 (__mmask8) (U), \
5706 (R)))
5708 #define _mm256_getexp_round_ph(A, R)\
5709 ((__m256h) __builtin_ia32_getexpph256_mask_round ((__v16hf) (A), \
5710 (__v16hf) \
5711 (_mm256_setzero_ph ()), \
5712 (__mmask16) (-1), \
5713 (R)))
5715 #define _mm256_mask_getexp_round_ph(W, U, A, R)\
5716 ((__m256h) __builtin_ia32_getexpph256_mask_round ((__v16hf) (A), \
5717 (__v16hf) (W), \
5718 (__mmask16) (U), \
5719 (R)))
5721 #define _mm256_maskz_getexp_round_ph(U, A, R)\
5722 ((__m256h) __builtin_ia32_getexpph256_mask_round ((__v16hf) (A), \
5723 (__v16hf) \
5724 (_mm256_setzero_ph ()), \
5725 (__mmask16) (U), \
5726 (R)))
5728 #define _mm256_getexp_round_ps(A, R)\
5729 ((__m256) __builtin_ia32_getexpps256_mask_round ((__v8sf) (A), \
5730 (__v8sf) \
5731 (_mm256_undefined_ps ()), \
5732 (__mmask8) (-1), \
5733 (R)))
5735 #define _mm256_mask_getexp_round_ps(W, U, A, R)\
5736 ((__m256) __builtin_ia32_getexpps256_mask_round ((__v8sf) (A), \
5737 (__v8sf) (W), \
5738 (__mmask8) (U), \
5739 (R)))
5741 #define _mm256_maskz_getexp_round_ps(U, A, R)\
5742 ((__m256) __builtin_ia32_getexpps256_mask_round ((__v8sf) (A), \
5743 (__v8sf) \
5744 (_mm256_setzero_ps ()), \
5745 (__mmask8) (U), \
5746 (R)))
5748 #define _mm256_getmant_round_pd(A, B, C, R) \
5749 ((__m256d)__builtin_ia32_getmantpd256_mask_round ((__v4df) (__m256d) (A), \
5750 (int) (((C) << 2) | (B)), \
5751 (__v4df) (__m256d) \
5752 _mm256_undefined_pd (), \
5753 (__mmask8)-1, \
5754 (R)))
5756 #define _mm256_mask_getmant_round_pd(W, U, A, B, C, R) \
5757 ((__m256d)__builtin_ia32_getmantpd256_mask_round ((__v4df) (__m256d) (A), \
5758 (int) (((C) << 2) | (B)), \
5759 (__v4df) (__m256d) (W), \
5760 (__mmask8) (U), \
5761 (R)))
5763 #define _mm256_maskz_getmant_round_pd(U, A, B, C, R) \
5764 ((__m256d)__builtin_ia32_getmantpd256_mask_round ((__v4df) (__m256d) (A), \
5765 (int) (((C) << 2) | (B)), \
5766 (__v4df) (__m256d) \
5767 _mm256_setzero_pd (), \
5768 (__mmask8) (U), \
5769 (R)))
5772 #define _mm256_getmant_round_ph(A, B, C, R) \
5773 ((__m256h)__builtin_ia32_getmantph256_mask_round ((__v16hf) (__m256h) (A), \
5774 (int) (((C)<<2) | (B)), \
5775 (__v16hf) (__m256h) \
5776 _mm256_undefined_ph (), \
5777 (__mmask16)-1, \
5778 (R)))
5780 #define _mm256_mask_getmant_round_ph(W, U, A, B, C, R) \
5781 ((__m256h)__builtin_ia32_getmantph256_mask_round ((__v16hf) (__m256h) (A), \
5782 (int) (((C)<<2) | (B)), \
5783 (__v16hf) (__m256h) (W), \
5784 (__mmask16) (U), \
5785 (R)))
5787 #define _mm256_maskz_getmant_round_ph(U, A, B, C, R) \
5788 ((__m256h)__builtin_ia32_getmantph256_mask_round ((__v16hf) (__m256h) (A), \
5789 (int) (((C)<<2) | (B)), \
5790 (__v16hf) (__m256h) \
5791 _mm256_setzero_ph (), \
5792 (__mmask16) (U), \
5793 (R)))
5795 #define _mm256_getmant_round_ps(A, B, C, R) \
5796 ((__m256)__builtin_ia32_getmantps256_mask_round ((__v8sf) (__m256) (A), \
5797 (int) (((C)<<2) | (B)), \
5798 (__v8sf) (__m256) \
5799 _mm256_undefined_ps (), \
5800 (__mmask8)-1, \
5801 (R)))
5803 #define _mm256_mask_getmant_round_ps(W, U, A, B, C, R) \
5804 ((__m256)__builtin_ia32_getmantps256_mask_round ((__v8sf) (__m256) (A), \
5805 (int) (((C)<<2) | (B)), \
5806 (__v8sf) (__m256) (W), \
5807 (__mmask8) (U), \
5808 (R)))
5810 #define _mm256_maskz_getmant_round_ps(U, A, B, C, R) \
5811 ((__m256)__builtin_ia32_getmantps256_mask_round ((__v8sf) (__m256) (A), \
5812 (int) (((C)<<2) | (B)), \
5813 (__v8sf) (__m256) \
5814 _mm256_setzero_ps (), \
5815 (__mmask8) (U), \
5816 (R)))
5818 #define _mm256_max_round_pd(A, B, R) \
5819 ((__m256d) __builtin_ia32_maxpd256_mask_round ((__v4df) (A), \
5820 (__v4df) (B), \
5821 (__v4df) \
5822 (_mm256_undefined_pd ()), \
5823 (__mmask8) (-1), \
5824 (R)))
5826 #define _mm256_mask_max_round_pd(W, U, A, B, R) \
5827 ((__m256d) __builtin_ia32_maxpd256_mask_round ((__v4df) (A), \
5828 (__v4df) (B), \
5829 (__v4df) (W), \
5830 (__mmask8) (U), \
5831 (R)))
5833 #define _mm256_maskz_max_round_pd(U, A, B, R) \
5834 ((__m256d) __builtin_ia32_maxpd256_mask_round ((__v4df) (A), \
5835 (__v4df) (B), \
5836 (__v4df) \
5837 (_mm256_setzero_pd ()), \
5838 (__mmask8) (U), \
5839 (R)))
5841 #define _mm256_max_round_ph(A, B, R) \
5842 ((__m256h) __builtin_ia32_maxph256_mask_round ((__v16hf) (A), \
5843 (__v16hf) (B), \
5844 (__v16hf) \
5845 (_mm256_undefined_ph ()), \
5846 (__mmask16) (-1), \
5847 (R)))
5849 #define _mm256_mask_max_round_ph(W, U, A, B, R) \
5850 ((__m256h) __builtin_ia32_maxph256_mask_round ((__v16hf) (A), \
5851 (__v16hf) (B), \
5852 (__v16hf) (W), \
5853 (__mmask16) (U), \
5854 (R)))
5856 #define _mm256_maskz_max_round_ph(U, A, B, R) \
5857 ((__m256h) __builtin_ia32_maxph256_mask_round ((__v16hf) (A), \
5858 (__v16hf) (B), \
5859 (__v16hf) \
5860 (_mm256_setzero_ph ()), \
5861 (__mmask16) (U), \
5862 (R)))
5864 #define _mm256_max_round_ps(A, B, R) \
5865 ((__m256) __builtin_ia32_maxps256_mask_round ((__v8sf) (A), \
5866 (__v8sf) (B), \
5867 (__v8sf) \
5868 (_mm256_undefined_ps ()), \
5869 (__mmask8) (-1), \
5870 (R)))
5872 #define _mm256_mask_max_round_ps(W, U, A, B, R) \
5873 ((__m256) __builtin_ia32_maxps256_mask_round ((__v8sf) (A), \
5874 (__v8sf) (B), \
5875 (__v8sf) (W), \
5876 (__mmask8) (U), \
5877 (R)))
5879 #define _mm256_maskz_max_round_ps(U, A, B, R) \
5880 ((__m256) __builtin_ia32_maxps256_mask_round ((__v8sf) (A), \
5881 (__v8sf) (B), \
5882 (__v8sf) \
5883 (_mm256_setzero_ps ()), \
5884 (__mmask8) (U), \
5885 (R)))
5887 #define _mm256_min_round_pd(A, B, R) \
5888 ((__m256d) __builtin_ia32_minpd256_mask_round ((__v4df) (A), \
5889 (__v4df) (B), \
5890 (__v4df) \
5891 (_mm256_undefined_pd ()), \
5892 (__mmask8) (-1), \
5893 (R)))
5895 #define _mm256_mask_min_round_pd(W, U, A, B, R) \
5896 ((__m256d) __builtin_ia32_minpd256_mask_round ((__v4df) (A), \
5897 (__v4df) (B), \
5898 (__v4df) (W), \
5899 (__mmask8) (U), \
5900 (R)))
5902 #define _mm256_maskz_min_round_pd(U, A, B, R) \
5903 ((__m256d) __builtin_ia32_minpd256_mask_round ((__v4df) (A), \
5904 (__v4df) (B), \
5905 (__v4df) \
5906 (_mm256_setzero_pd ()), \
5907 (__mmask8) (U), \
5908 (R)))
5910 #define _mm256_min_round_ph(A, B, R) \
5911 ((__m256h) __builtin_ia32_minph256_mask_round ((__v16hf) (A), \
5912 (__v16hf) (B), \
5913 (__v16hf) \
5914 (_mm256_undefined_ph ()), \
5915 (__mmask16) (-1), \
5916 (R)))
5918 #define _mm256_mask_min_round_ph(W, U, A, B, R) \
5919 ((__m256h) __builtin_ia32_minph256_mask_round ((__v16hf) (A), \
5920 (__v16hf) (B), \
5921 (__v16hf) (W), \
5922 (__mmask16) (U), \
5923 (R)))
5925 #define _mm256_maskz_min_round_ph(U, A, B, R) \
5926 ((__m256h) __builtin_ia32_minph256_mask_round ((__v16hf) (A), \
5927 (__v16hf) (B), \
5928 (__v16hf) \
5929 (_mm256_setzero_ph ()), \
5930 (__mmask16) (U), \
5931 (R)))
5933 #define _mm256_min_round_ps(A, B, R) \
5934 ((__m256) __builtin_ia32_minps256_mask_round ((__v8sf) (A), \
5935 (__v8sf) (B), \
5936 (__v8sf) \
5937 (_mm256_undefined_ps ()), \
5938 (__mmask8) (-1), \
5939 (R)))
5941 #define _mm256_mask_min_round_ps(W, U, A, B, R) \
5942 ((__m256) __builtin_ia32_minps256_mask_round ((__v8sf) (A), \
5943 (__v8sf) (B), \
5944 (__v8sf) (W), \
5945 (__mmask8) (U), \
5946 (R)))
5948 #define _mm256_maskz_min_round_ps(U, A, B, R) \
5949 ((__m256) __builtin_ia32_minps256_mask_round ((__v8sf) (A), \
5950 (__v8sf) (B), \
5951 (__v8sf) \
5952 (_mm256_setzero_ps ()), \
5953 (__mmask8) (U), \
5954 (R)))
5956 #define _mm256_mul_round_pd(A, B, R) \
5957 ((__m256d) __builtin_ia32_mulpd256_mask_round ((__v4df) (A), \
5958 (__v4df) (B), \
5959 (__v4df) \
5960 (_mm256_undefined_pd ()), \
5961 (__mmask8) (-1), \
5962 (R)))
5964 #define _mm256_mask_mul_round_pd(W, U, A, B, R) \
5965 ((__m256d) __builtin_ia32_mulpd256_mask_round ((__v4df) (A), \
5966 (__v4df) (B), \
5967 (__v4df) (W), \
5968 (__mmask8) (U), \
5969 (R)))
5971 #define _mm256_maskz_mul_round_pd(U, A, B, R) \
5972 ((__m256d) __builtin_ia32_mulpd256_mask_round ((__v4df) (A), \
5973 (__v4df) (B), \
5974 (__v4df) \
5975 (_mm256_setzero_pd ()), \
5976 (__mmask8) (U), \
5977 (R)))
5979 #define _mm256_mul_round_ph(A, B, R) \
5980 ((__m256h) __builtin_ia32_mulph256_mask_round ((__v16hf) (A), \
5981 (__v16hf) (B), \
5982 (__v16hf) \
5983 (_mm256_undefined_ph ()), \
5984 (__mmask16) (-1), \
5985 (R)))
5987 #define _mm256_mask_mul_round_ph(W, U, A, B, R) \
5988 ((__m256h) __builtin_ia32_mulph256_mask_round ((__v16hf) (A), \
5989 (__v16hf) (B), \
5990 (__v16hf) (W), \
5991 (__mmask16) (U), \
5992 (R)))
5994 #define _mm256_maskz_mul_round_ph(U, A, B, R) \
5995 ((__m256h) __builtin_ia32_mulph256_mask_round ((__v16hf) (A), \
5996 (__v16hf) (B), \
5997 (__v16hf) \
5998 (_mm256_setzero_ph ()), \
5999 (__mmask16) (U), \
6000 (R)))
6002 #define _mm256_mul_round_ps(A, B, R) \
6003 ((__m256) __builtin_ia32_mulps256_mask_round ((__v8sf) (A), \
6004 (__v8sf) (B), \
6005 (__v8sf) \
6006 (_mm256_undefined_ps ()), \
6007 (__mmask8) (-1), \
6008 (R)))
6010 #define _mm256_mask_mul_round_ps(W, U, A, B, R) \
6011 ((__m256) __builtin_ia32_mulps256_mask_round ((__v8sf) (A), \
6012 (__v8sf) (B), \
6013 (__v8sf) (W), \
6014 (__mmask8) (U), \
6015 (R)))
6017 #define _mm256_maskz_mul_round_ps(U, A, B, R) \
6018 ((__m256) __builtin_ia32_mulps256_mask_round ((__v8sf) (A), \
6019 (__v8sf) (B), \
6020 (__v8sf) \
6021 (_mm256_setzero_ps ()), \
6022 (__mmask8) (U), \
6023 (R)))
6025 #define _mm256_range_round_pd(A, B, C, R) \
6026 ((__m256d) __builtin_ia32_rangepd256_mask_round ((__v4df) (A), \
6027 (__v4df) (B), \
6028 (C), \
6029 (__v4df) \
6030 (_mm256_setzero_pd ()), \
6031 (__mmask8) (-1), \
6032 (R)))
6034 #define _mm256_mask_range_round_pd(W, U, A, B, C, R) \
6035 ((__m256d) __builtin_ia32_rangepd256_mask_round ((__v4df) (A), \
6036 (__v4df) (B), \
6037 (C), \
6038 (__v4df) (W), \
6039 (__mmask8) (U), \
6040 (R)))
6042 #define _mm256_maskz_range_round_pd(U, A, B, C, R) \
6043 ((__m256d) __builtin_ia32_rangepd256_mask_round ((__v4df) (A), \
6044 (__v4df) (B), \
6045 (C), \
6046 (__v4df) \
6047 (_mm256_setzero_pd ()), \
6048 (__mmask8) (U), \
6049 (R)))
6051 #define _mm256_range_round_ps(A, B, C, R) \
6052 ((__m256) __builtin_ia32_rangeps256_mask_round ((__v8sf) (A), \
6053 (__v8sf) (B), \
6054 (C), \
6055 (__v8sf) \
6056 (_mm256_setzero_ps ()), \
6057 (__mmask8) (-1), \
6058 (R)))
6060 #define _mm256_mask_range_round_ps(W, U, A, B, C, R) \
6061 ((__m256) __builtin_ia32_rangeps256_mask_round ((__v8sf) (A), \
6062 (__v8sf) (B), \
6063 (C), \
6064 (__v8sf) (W), \
6065 (__mmask8) (U), \
6066 (R)))
6068 #define _mm256_maskz_range_round_ps(U, A, B, C, R) \
6069 ((__m256) __builtin_ia32_rangeps256_mask_round ((__v8sf) (A), \
6070 (__v8sf) (B), \
6071 (C), \
6072 (__v8sf) \
6073 (_mm256_setzero_ps ()), \
6074 (__mmask8) (U), \
6075 (R)))
6077 #define _mm256_reduce_round_pd(A, C, R) \
6078 ((__m256d) __builtin_ia32_reducepd256_mask_round ((__v4df) (A), \
6079 (C), \
6080 (__v4df) \
6081 (_mm256_setzero_pd ()), \
6082 (__mmask8) (-1), \
6083 (R)))
6085 #define _mm256_mask_reduce_round_pd(W, U, A, C, R) \
6086 ((__m256d) __builtin_ia32_reducepd256_mask_round ((__v4df) (A), \
6087 (C), \
6088 (__v4df) (W), \
6089 (__mmask8) (U), \
6090 (R)))
6092 #define _mm256_maskz_reduce_round_pd(U, A, C, R) \
6093 ((__m256d) __builtin_ia32_reducepd256_mask_round ((__v4df) (A), \
6094 (C), \
6095 (__v4df) \
6096 (_mm256_setzero_pd ()), \
6097 (__mmask8) (U), \
6098 (R)))
6100 #define _mm256_reduce_round_ph(A, C, R) \
6101 ((__m256h) __builtin_ia32_reduceph256_mask_round ((__v16hf) (A), \
6102 (C), \
6103 (__v16hf) \
6104 (_mm256_setzero_ph ()), \
6105 (__mmask16) (-1), \
6106 (R)))
6108 #define _mm256_mask_reduce_round_ph(W, U, A, C, R) \
6109 ((__m256h) __builtin_ia32_reduceph256_mask_round ((__v16hf) (A), \
6110 (C), \
6111 (__v16hf) (W), \
6112 (__mmask16) (U), \
6113 (R)))
6115 #define _mm256_maskz_reduce_round_ph(U, A, C, R) \
6116 ((__m256h) __builtin_ia32_reduceph256_mask_round ((__v16hf) (A), \
6117 (C), \
6118 (__v16hf) \
6119 (_mm256_setzero_ph ()), \
6120 (__mmask16) (U), \
6121 (R)))
6123 #define _mm256_reduce_round_ps(A, C, R) \
6124 ((__m256) __builtin_ia32_reduceps256_mask_round ((__v8sf) (A), \
6125 (C), \
6126 (__v8sf) \
6127 (_mm256_setzero_ps ()), \
6128 (__mmask8) (-1), \
6129 (R)))
6131 #define _mm256_mask_reduce_round_ps(W, U, A, C, R) \
6132 ((__m256) __builtin_ia32_reduceps256_mask_round ((__v8sf) (A), \
6133 (C), \
6134 (__v8sf) (W), \
6135 (__mmask8) (U), \
6136 (R)))
6138 #define _mm256_maskz_reduce_round_ps(U, A, C, R) \
6139 ((__m256) __builtin_ia32_reduceps256_mask_round ((__v8sf) (A), \
6140 (C), \
6141 (__v8sf) \
6142 (_mm256_setzero_ps ()), \
6143 (__mmask8) (U), \
6144 (R)))
6146 #define _mm256_roundscale_round_pd(A, C, R) \
6147 ((__m256d) \
6148 __builtin_ia32_rndscalepd256_mask_round ((__v4df) (A), \
6149 (C), \
6150 (__v4df) \
6151 (_mm256_undefined_pd ()), \
6152 (__mmask8) (-1), \
6153 (R)))
6155 #define _mm256_mask_roundscale_round_pd(W, U, A, C, R) \
6156 ((__m256d) __builtin_ia32_rndscalepd256_mask_round ((__v4df) (A), \
6157 (C), \
6158 (__v4df) (W), \
6159 (__mmask8) (U), \
6160 (R)))
6162 #define _mm256_maskz_roundscale_round_pd(U, A, C, R) \
6163 ((__m256d) __builtin_ia32_rndscalepd256_mask_round ((__v4df) (A), \
6164 (C), \
6165 (__v4df) \
6166 (_mm256_setzero_pd ()), \
6167 (__mmask8) (U), \
6168 (R)))
6170 #define _mm256_roundscale_round_ph(A, C, R) \
6171 ((__m256h) \
6172 __builtin_ia32_rndscaleph256_mask_round ((__v16hf) (A), \
6173 (C), \
6174 (__v16hf) \
6175 (_mm256_undefined_ph ()), \
6176 (__mmask16) (-1), \
6177 (R)))
6179 #define _mm256_mask_roundscale_round_ph(W, U, A, C, R) \
6180 ((__m256h) __builtin_ia32_rndscaleph256_mask_round ((__v16hf) (A), \
6181 (C), \
6182 (__v16hf) (W), \
6183 (__mmask16) (U), \
6184 (R)))
6186 #define _mm256_maskz_roundscale_round_ph(U, A, C, R) \
6187 ((__m256h) __builtin_ia32_rndscaleph256_mask_round ((__v16hf) (A), \
6188 (C), \
6189 (__v16hf) \
6190 (_mm256_setzero_ph ()), \
6191 (__mmask16) (U), \
6192 (R)))
6194 #define _mm256_roundscale_round_ps(A, C, R) \
6195 ((__m256) __builtin_ia32_rndscaleps256_mask_round ((__v8sf) (A), \
6196 (C), \
6197 (__v8sf) \
6198 (_mm256_undefined_ps ()), \
6199 (__mmask8) (-1), \
6200 (R)))
6202 #define _mm256_mask_roundscale_round_ps(W, U, A, C, R) \
6203 ((__m256) __builtin_ia32_rndscaleps256_mask_round ((__v8sf) (A), \
6204 (C), \
6205 (__v8sf) (W), \
6206 (__mmask8) (U), \
6207 (R)))
6209 #define _mm256_maskz_roundscale_round_ps(U, A, C, R) \
6210 ((__m256) __builtin_ia32_rndscaleps256_mask_round ((__v8sf) (A), \
6211 (C), \
6212 (__v8sf) \
6213 (_mm256_setzero_ps ()), \
6214 (__mmask8) (U), \
6215 (R)))
6217 #define _mm256_scalef_round_pd(A, B, R) \
6218 ((__m256d) __builtin_ia32_scalefpd256_mask_round ((__v4df) (A), \
6219 (__v4df) (B), \
6220 (__v4df) \
6221 (_mm256_undefined_pd ()), \
6222 (__mmask8) (-1), \
6223 (R)))
6225 #define _mm256_mask_scalef_round_pd(W, U, A, B, R) \
6226 ((__m256d) __builtin_ia32_scalefpd256_mask_round ((__v4df) (A), \
6227 (__v4df) (B), \
6228 (__v4df) (W), \
6229 (__mmask8) (U), \
6230 (R)))
6232 #define _mm256_maskz_scalef_round_pd(U, A, B, R) \
6233 ((__m256d) __builtin_ia32_scalefpd256_mask_round ((__v4df) (A), \
6234 (__v4df) (B), \
6235 (__v4df) \
6236 (_mm256_setzero_pd ()), \
6237 (__mmask8) (U), \
6238 (R)))
6240 #define _mm256_scalef_round_ph(A, B, R) \
6241 ((__m256h) __builtin_ia32_scalefph256_mask_round ((__v16hf) (A), \
6242 (__v16hf) (B), \
6243 (__v16hf) \
6244 (_mm256_undefined_ph ()), \
6245 (__mmask16) (-1), \
6246 (R)))
6248 #define _mm256_mask_scalef_round_ph(W, U, A, B, R) \
6249 ((__m256h) __builtin_ia32_scalefph256_mask_round ((__v16hf) (A), \
6250 (__v16hf) (B), \
6251 (__v16hf) (W), \
6252 (__mmask16) (U), \
6253 (R)))
6255 #define _mm256_maskz_scalef_round_ph(U, A, B, R) \
6256 ((__m256h) __builtin_ia32_scalefph256_mask_round ((__v16hf) (A), \
6257 (__v16hf) (B), \
6258 (__v16hf) \
6259 (_mm256_setzero_ph ()), \
6260 (__mmask16) (U), \
6261 (R)))
6263 #define _mm256_scalef_round_ps(A, B, R) \
6264 ((__m256) __builtin_ia32_scalefps256_mask_round ((__v8sf) (A), \
6265 (__v8sf) (B), \
6266 (__v8sf) \
6267 (_mm256_undefined_ps ()), \
6268 (__mmask8) (-1), \
6269 (R)))
6271 #define _mm256_mask_scalef_round_ps(W, U, A, B, R) \
6272 ((__m256) __builtin_ia32_scalefps256_mask_round ((__v8sf) (A), \
6273 (__v8sf) (B), \
6274 (__v8sf) (W), \
6275 (__mmask8) (U), \
6276 (R)))
6278 #define _mm256_maskz_scalef_round_ps(U, A, B, R) \
6279 ((__m256) __builtin_ia32_scalefps256_mask_round ((__v8sf) (A), \
6280 (__v8sf) (B), \
6281 (__v8sf) \
6282 (_mm256_setzero_ps ()), \
6283 (__mmask8) (U), \
6284 (R)))
6286 #define _mm256_sqrt_round_pd(A, R) \
6287 ((__m256d) __builtin_ia32_sqrtpd256_mask_round ((__v4df) (A), \
6288 (__v4df) \
6289 (_mm256_undefined_pd ()), \
6290 (__mmask8) (-1), \
6291 (R)))
6293 #define _mm256_mask_sqrt_round_pd(W, U, A, R) \
6294 ((__m256d) __builtin_ia32_sqrtpd256_mask_round ((__v4df) (A), \
6295 (__v4df) (W), \
6296 (__mmask8) (U), \
6297 (R)))
6299 #define _mm256_maskz_sqrt_round_pd(U, A, R) \
6300 ((__m256d) __builtin_ia32_sqrtpd256_mask_round ((__v4df) (A), \
6301 (__v4df) \
6302 (_mm256_setzero_pd ()), \
6303 (__mmask8) (U), \
6304 (R)))
6306 #define _mm256_sqrt_round_ph(A, R) \
6307 ((__m256h) __builtin_ia32_sqrtph256_mask_round ((__v16hf) (A), \
6308 (__v16hf) \
6309 (_mm256_undefined_ph ()), \
6310 (__mmask16) (-1), \
6311 (R)))
6313 #define _mm256_mask_sqrt_round_ph(W, U, A, R) \
6314 ((__m256h) __builtin_ia32_sqrtph256_mask_round ((__v16hf) (A), \
6315 (__v16hf) (W), \
6316 (__mmask16) (U), \
6317 (R)))
6319 #define _mm256_maskz_sqrt_round_ph(U, A, R) \
6320 ((__m256h) __builtin_ia32_sqrtph256_mask_round ((__v16hf) (A), \
6321 (__v16hf) \
6322 (_mm256_setzero_ph ()), \
6323 (__mmask16) (U), \
6324 (R)))
6326 #define _mm256_sqrt_round_ps(A, R) \
6327 ((__m256) __builtin_ia32_sqrtps256_mask_round ((__v8sf) (A), \
6328 (__v8sf) \
6329 (_mm256_undefined_ps ()), \
6330 (__mmask8) (-1), \
6331 (R)))
6333 #define _mm256_mask_sqrt_round_ps(W, U, A, R) \
6334 ((__m256) __builtin_ia32_sqrtps256_mask_round ((__v8sf) (A), \
6335 (__v8sf) (W), \
6336 (__mmask8) (U), \
6337 (R)))
6339 #define _mm256_maskz_sqrt_round_ps(U, A, R) \
6340 ((__m256) __builtin_ia32_sqrtps256_mask_round ((__v8sf) (A), \
6341 (__v8sf) \
6342 (_mm256_setzero_ps ()), \
6343 (__mmask8) (U), \
6344 (R)))
6346 #define _mm256_sub_round_pd(A, B, R) \
6347 ((__m256d) __builtin_ia32_subpd256_mask_round ((__v4df) (A), \
6348 (__v4df) (B), \
6349 (__v4df) \
6350 (_mm256_undefined_pd ()), \
6351 (__mmask8) (-1), \
6352 (R)))
6354 #define _mm256_mask_sub_round_pd(W, U, A, B, R) \
6355 ((__m256d) __builtin_ia32_subpd256_mask_round ((__v4df) (A), \
6356 (__v4df) (B), \
6357 (__v4df) (W), \
6358 (__mmask8) (U), \
6359 (R)))
6361 #define _mm256_maskz_sub_round_pd(U, A, B, R) \
6362 ((__m256d) __builtin_ia32_subpd256_mask_round ((__v4df) (A), \
6363 (__v4df) (B), \
6364 (__v4df) \
6365 (_mm256_setzero_pd ()), \
6366 (__mmask8) (U), \
6367 (R)))
6369 #define _mm256_sub_round_ph(A, B, R) \
6370 ((__m256h) __builtin_ia32_subph256_mask_round ((__v16hf) (A), \
6371 (__v16hf) (B), \
6372 (__v16hf) \
6373 (_mm256_undefined_ph ()), \
6374 (__mmask16) (-1), \
6375 (R)))
6377 #define _mm256_mask_sub_round_ph(W, U, A, B, R) \
6378 ((__m256h) __builtin_ia32_subph256_mask_round ((__v16hf) (A), \
6379 (__v16hf) (B), \
6380 (__v16hf) (W), \
6381 (__mmask16) (U), \
6382 (R)))
6384 #define _mm256_maskz_sub_round_ph(U, A, B, R) \
6385 ((__m256h) __builtin_ia32_subph256_mask_round ((__v16hf) (A), \
6386 (__v16hf) (B), \
6387 (__v16hf) \
6388 (_mm256_setzero_ph ()), \
6389 (__mmask16) (U), \
6390 (R)))
6392 #define _mm256_sub_round_ps(A, B, R) \
6393 ((__m256) __builtin_ia32_subps256_mask_round ((__v8sf) (A), \
6394 (__v8sf) (B), \
6395 (__v8sf) \
6396 (_mm256_undefined_ps ()), \
6397 (__mmask8) (-1), \
6398 (R)))
6400 #define _mm256_mask_sub_round_ps(W, U, A, B, R) \
6401 ((__m256) __builtin_ia32_subps256_mask_round ((__v8sf) (A), \
6402 (__v8sf) (B), \
6403 (__v8sf) (W), \
6404 (__mmask8) (U), \
6405 (R)))
6407 #define _mm256_maskz_sub_round_ps(U, A, B, R) \
6408 ((__m256) __builtin_ia32_subps256_mask_round ((__v8sf) (A), \
6409 (__v8sf) (B), \
6410 (__v8sf) \
6411 (_mm256_setzero_ps ()), \
6412 (__mmask8) (U), \
6413 (R)))
6414 #endif
6416 #define _mm256_cmul_round_pch(A, B, R) _mm256_fcmul_round_pch ((A), (B), (R))
6417 #define _mm256_mask_cmul_round_pch(W, U, A, B, R) \
6418 _mm256_mask_fcmul_round_pch ((W), (U), (A), (B), (R))
6419 #define _mm256_maskz_cmul_round_pch(U, A, B, R) \
6420 _mm256_maskz_fcmul_round_pch ((U), (A), (B), (R))
6422 #define _mm256_mul_round_pch(A, B, R) _mm256_fmul_round_pch ((A), (B), (R))
6423 #define _mm256_mask_mul_round_pch(W, U, A, B, R) \
6424 _mm256_mask_fmul_round_pch ((W), (U), (A), (B), (R))
6425 #define _mm256_maskz_mul_round_pch(U, A, B, R) \
6426 _mm256_maskz_fmul_round_pch ((U), (A), (B), (R))
6428 #ifdef __DISABLE_AVX10_2_256__
6429 #undef __DISABLE_AVX10_2_256__
6430 #pragma GCC pop_options
6431 #endif /* __DISABLE_AVX10_2_256__ */
6433 #endif /* _AVX10_2ROUNDINGINTRIN_H_INCLUDED */