libcpp, c, middle-end: Optimize initializers using #embed in C
[official-gcc.git] / gcc / config / i386 / avx512fp16vlintrin.h
blob405a06bbb9ee7a90e34ea90380d9e0e9cbae0069
1 /* Copyright (C) 2019-2024 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3, or (at your option)
8 any later version.
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 #ifndef _IMMINTRIN_H_INCLUDED
25 #error "Never use <avx512fp16vlintrin.h> directly; include <immintrin.h> instead."
26 #endif
28 #ifndef __AVX512FP16VLINTRIN_H_INCLUDED
29 #define __AVX512FP16VLINTRIN_H_INCLUDED
31 #if !defined(__AVX512VL__) || !defined(__AVX512FP16__) || defined (__EVEX512__)
32 #pragma GCC push_options
33 #pragma GCC target("avx512fp16,avx512vl,no-evex512")
34 #define __DISABLE_AVX512FP16VL__
35 #endif /* __AVX512FP16VL__ */
37 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
38 _mm_avx512_set1_ps (float __F)
40 return __extension__ (__m128)(__v4sf){ __F, __F, __F, __F };
43 extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
44 _mm256_avx512_set1_ps (float __A)
46 return __extension__ (__m256){ __A, __A, __A, __A,
47 __A, __A, __A, __A };
50 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
51 _mm_avx512_and_si128 (__m128i __A, __m128i __B)
53 return (__m128i) ((__v2du)__A & (__v2du)__B);
56 extern __inline __m256i
57 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
58 _mm256_avx512_and_si256 (__m256i __A, __m256i __B)
60 return (__m256i) ((__v4du)__A & (__v4du)__B);
63 extern __inline __m128
64 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
65 _mm_castph_ps (__m128h __a)
67 return (__m128) __a;
70 extern __inline __m256
71 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
72 _mm256_castph_ps (__m256h __a)
74 return (__m256) __a;
77 extern __inline __m128d
78 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
79 _mm_castph_pd (__m128h __a)
81 return (__m128d) __a;
84 extern __inline __m256d
85 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
86 _mm256_castph_pd (__m256h __a)
88 return (__m256d) __a;
91 extern __inline __m128i
92 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
93 _mm_castph_si128 (__m128h __a)
95 return (__m128i) __a;
98 extern __inline __m256i
99 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
100 _mm256_castph_si256 (__m256h __a)
102 return (__m256i) __a;
105 extern __inline __m128h
106 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
107 _mm_castps_ph (__m128 __a)
109 return (__m128h) __a;
112 extern __inline __m256h
113 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
114 _mm256_castps_ph (__m256 __a)
116 return (__m256h) __a;
119 extern __inline __m128h
120 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
121 _mm_castpd_ph (__m128d __a)
123 return (__m128h) __a;
126 extern __inline __m256h
127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
128 _mm256_castpd_ph (__m256d __a)
130 return (__m256h) __a;
133 extern __inline __m128h
134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
135 _mm_castsi128_ph (__m128i __a)
137 return (__m128h) __a;
140 extern __inline __m256h
141 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
142 _mm256_castsi256_ph (__m256i __a)
144 return (__m256h) __a;
147 extern __inline __m128h
148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
149 _mm256_castph256_ph128 (__m256h __A)
151 union
153 __m128h __a[2];
154 __m256h __v;
155 } __u = { .__v = __A };
156 return __u.__a[0];
159 extern __inline __m256h
160 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
161 _mm256_castph128_ph256 (__m128h __A)
163 union
165 __m128h __a[2];
166 __m256h __v;
167 } __u;
168 __u.__a[0] = __A;
169 return __u.__v;
172 extern __inline __m256h
173 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
174 _mm256_zextph128_ph256 (__m128h __A)
176 return (__m256h) _mm256_avx512_insertf128_ps (_mm256_avx512_setzero_ps (),
177 (__m128) __A, 0);
180 extern __inline __m256h
181 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
182 _mm256_conj_pch (__m256h __A)
184 return (__m256h) _mm256_xor_epi32 ((__m256i) __A, _mm256_avx512_set1_epi32 (1U<<31));
187 extern __inline __m256h
188 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
189 _mm256_mask_conj_pch (__m256h __W, __mmask8 __U, __m256h __A)
191 return (__m256h) __builtin_ia32_movaps256_mask ((__v8sf)
192 _mm256_conj_pch (__A),
193 (__v8sf) __W,
194 (__mmask8) __U);
197 extern __inline __m256h
198 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
199 _mm256_maskz_conj_pch (__mmask8 __U, __m256h __A)
201 return (__m256h) __builtin_ia32_movaps256_mask ((__v8sf)
202 _mm256_conj_pch (__A),
203 (__v8sf)
204 _mm256_avx512_setzero_ps (),
205 (__mmask8) __U);
208 extern __inline __m128h
209 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
210 _mm_conj_pch (__m128h __A)
212 return (__m128h) _mm_xor_epi32 ((__m128i) __A, _mm_avx512_set1_epi32 (1U<<31));
215 extern __inline __m128h
216 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
217 _mm_mask_conj_pch (__m128h __W, __mmask8 __U, __m128h __A)
219 return (__m128h) __builtin_ia32_movaps128_mask ((__v4sf) _mm_conj_pch (__A),
220 (__v4sf) __W,
221 (__mmask8) __U);
224 extern __inline __m128h
225 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
226 _mm_maskz_conj_pch (__mmask8 __U, __m128h __A)
228 return (__m128h) __builtin_ia32_movaps128_mask ((__v4sf) _mm_conj_pch (__A),
229 (__v4sf) _mm_avx512_setzero_ps (),
230 (__mmask8) __U);
233 /* Intrinsics v[add,sub,mul,div]ph. */
234 extern __inline __m128h
235 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
236 _mm_add_ph (__m128h __A, __m128h __B)
238 return (__m128h) ((__v8hf) __A + (__v8hf) __B);
241 extern __inline __m256h
242 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
243 _mm256_add_ph (__m256h __A, __m256h __B)
245 return (__m256h) ((__v16hf) __A + (__v16hf) __B);
248 extern __inline __m128h
249 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
250 _mm_mask_add_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
252 return __builtin_ia32_addph128_mask (__C, __D, __A, __B);
255 extern __inline __m256h
256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
257 _mm256_mask_add_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
259 return __builtin_ia32_addph256_mask (__C, __D, __A, __B);
262 extern __inline __m128h
263 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
264 _mm_maskz_add_ph (__mmask8 __A, __m128h __B, __m128h __C)
266 return __builtin_ia32_addph128_mask (__B, __C, _mm_setzero_ph (),
267 __A);
270 extern __inline __m256h
271 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
272 _mm256_maskz_add_ph (__mmask16 __A, __m256h __B, __m256h __C)
274 return __builtin_ia32_addph256_mask (__B, __C,
275 _mm256_setzero_ph (), __A);
278 extern __inline __m128h
279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
280 _mm_sub_ph (__m128h __A, __m128h __B)
282 return (__m128h) ((__v8hf) __A - (__v8hf) __B);
285 extern __inline __m256h
286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
287 _mm256_sub_ph (__m256h __A, __m256h __B)
289 return (__m256h) ((__v16hf) __A - (__v16hf) __B);
292 extern __inline __m128h
293 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
294 _mm_mask_sub_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
296 return __builtin_ia32_subph128_mask (__C, __D, __A, __B);
299 extern __inline __m256h
300 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
301 _mm256_mask_sub_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
303 return __builtin_ia32_subph256_mask (__C, __D, __A, __B);
306 extern __inline __m128h
307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
308 _mm_maskz_sub_ph (__mmask8 __A, __m128h __B, __m128h __C)
310 return __builtin_ia32_subph128_mask (__B, __C, _mm_setzero_ph (),
311 __A);
314 extern __inline __m256h
315 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
316 _mm256_maskz_sub_ph (__mmask16 __A, __m256h __B, __m256h __C)
318 return __builtin_ia32_subph256_mask (__B, __C,
319 _mm256_setzero_ph (), __A);
322 extern __inline __m128h
323 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
324 _mm_mul_ph (__m128h __A, __m128h __B)
326 return (__m128h) ((__v8hf) __A * (__v8hf) __B);
329 extern __inline __m256h
330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
331 _mm256_mul_ph (__m256h __A, __m256h __B)
333 return (__m256h) ((__v16hf) __A * (__v16hf) __B);
336 extern __inline __m128h
337 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
338 _mm_mask_mul_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
340 return __builtin_ia32_mulph128_mask (__C, __D, __A, __B);
343 extern __inline __m256h
344 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
345 _mm256_mask_mul_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
347 return __builtin_ia32_mulph256_mask (__C, __D, __A, __B);
350 extern __inline __m128h
351 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
352 _mm_maskz_mul_ph (__mmask8 __A, __m128h __B, __m128h __C)
354 return __builtin_ia32_mulph128_mask (__B, __C, _mm_setzero_ph (),
355 __A);
358 extern __inline __m256h
359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
360 _mm256_maskz_mul_ph (__mmask16 __A, __m256h __B, __m256h __C)
362 return __builtin_ia32_mulph256_mask (__B, __C,
363 _mm256_setzero_ph (), __A);
366 extern __inline __m128h
367 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
368 _mm_div_ph (__m128h __A, __m128h __B)
370 return (__m128h) ((__v8hf) __A / (__v8hf) __B);
373 extern __inline __m256h
374 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
375 _mm256_div_ph (__m256h __A, __m256h __B)
377 return (__m256h) ((__v16hf) __A / (__v16hf) __B);
380 extern __inline __m128h
381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
382 _mm_mask_div_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
384 return __builtin_ia32_divph128_mask (__C, __D, __A, __B);
387 extern __inline __m256h
388 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
389 _mm256_mask_div_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
391 return __builtin_ia32_divph256_mask (__C, __D, __A, __B);
394 extern __inline __m128h
395 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
396 _mm_maskz_div_ph (__mmask8 __A, __m128h __B, __m128h __C)
398 return __builtin_ia32_divph128_mask (__B, __C, _mm_setzero_ph (),
399 __A);
402 extern __inline __m256h
403 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
404 _mm256_maskz_div_ph (__mmask16 __A, __m256h __B, __m256h __C)
406 return __builtin_ia32_divph256_mask (__B, __C,
407 _mm256_setzero_ph (), __A);
410 /* Intrinsics v[max,min]ph. */
411 extern __inline __m128h
412 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
413 _mm_max_ph (__m128h __A, __m128h __B)
415 return __builtin_ia32_maxph128_mask (__A, __B,
416 _mm_setzero_ph (),
417 (__mmask8) -1);
420 extern __inline __m256h
421 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
422 _mm256_max_ph (__m256h __A, __m256h __B)
424 return __builtin_ia32_maxph256_mask (__A, __B,
425 _mm256_setzero_ph (),
426 (__mmask16) -1);
429 extern __inline __m128h
430 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
431 _mm_mask_max_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
433 return __builtin_ia32_maxph128_mask (__C, __D, __A, __B);
436 extern __inline __m256h
437 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
438 _mm256_mask_max_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
440 return __builtin_ia32_maxph256_mask (__C, __D, __A, __B);
443 extern __inline __m128h
444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
445 _mm_maskz_max_ph (__mmask8 __A, __m128h __B, __m128h __C)
447 return __builtin_ia32_maxph128_mask (__B, __C, _mm_setzero_ph (),
448 __A);
451 extern __inline __m256h
452 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
453 _mm256_maskz_max_ph (__mmask16 __A, __m256h __B, __m256h __C)
455 return __builtin_ia32_maxph256_mask (__B, __C,
456 _mm256_setzero_ph (), __A);
459 extern __inline __m128h
460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
461 _mm_min_ph (__m128h __A, __m128h __B)
463 return __builtin_ia32_minph128_mask (__A, __B,
464 _mm_setzero_ph (),
465 (__mmask8) -1);
468 extern __inline __m256h
469 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
470 _mm256_min_ph (__m256h __A, __m256h __B)
472 return __builtin_ia32_minph256_mask (__A, __B,
473 _mm256_setzero_ph (),
474 (__mmask16) -1);
477 extern __inline __m128h
478 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
479 _mm_mask_min_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
481 return __builtin_ia32_minph128_mask (__C, __D, __A, __B);
484 extern __inline __m256h
485 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
486 _mm256_mask_min_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D)
488 return __builtin_ia32_minph256_mask (__C, __D, __A, __B);
491 extern __inline __m128h
492 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
493 _mm_maskz_min_ph (__mmask8 __A, __m128h __B, __m128h __C)
495 return __builtin_ia32_minph128_mask (__B, __C, _mm_setzero_ph (),
496 __A);
499 extern __inline __m256h
500 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
501 _mm256_maskz_min_ph (__mmask16 __A, __m256h __B, __m256h __C)
503 return __builtin_ia32_minph256_mask (__B, __C,
504 _mm256_setzero_ph (), __A);
507 extern __inline __m128h
508 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
509 _mm_abs_ph (__m128h __A)
511 return (__m128h) _mm_avx512_and_si128 (_mm_avx512_set1_epi32 (0x7FFF7FFF),
512 (__m128i) __A);
515 extern __inline __m256h
516 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
517 _mm256_abs_ph (__m256h __A)
519 return (__m256h) _mm256_avx512_and_si256 (_mm256_avx512_set1_epi32 (0x7FFF7FFF),
520 (__m256i) __A);
523 /* vcmpph */
524 #ifdef __OPTIMIZE
525 extern __inline __mmask8
526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
527 _mm_cmp_ph_mask (__m128h __A, __m128h __B, const int __C)
529 return (__mmask8) __builtin_ia32_cmpph128_mask (__A, __B, __C,
530 (__mmask8) -1);
533 extern __inline __mmask8
534 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
535 _mm_mask_cmp_ph_mask (__mmask8 __A, __m128h __B, __m128h __C,
536 const int __D)
538 return (__mmask8) __builtin_ia32_cmpph128_mask (__B, __C, __D, __A);
541 extern __inline __mmask16
542 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
543 _mm_cmp_ph_mask (__m256h __A, __m256h __B, const int __C)
545 return (__mmask16) __builtin_ia32_cmpph256_mask (__A, __B, __C,
546 (__mmask16) -1);
549 extern __inline __mmask16
550 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
551 _mm_mask_cmp_ph_mask (__mmask16 __A, __m256h __B, __m256h __C,
552 const int __D)
554 return (__mmask16) __builtin_ia32_cmpph256_mask (__B, __C, __D,
555 __A);
558 #else
559 #define _mm_cmp_ph_mask(A, B, C) \
560 (__builtin_ia32_cmpph128_mask ((A), (B), (C), (-1)))
562 #define _mm_mask_cmp_ph_mask(A, B, C, D) \
563 (__builtin_ia32_cmpph128_mask ((B), (C), (D), (A)))
565 #define _mm256_cmp_ph_mask(A, B, C) \
566 (__builtin_ia32_cmpph256_mask ((A), (B), (C), (-1)))
568 #define _mm256_mask_cmp_ph_mask(A, B, C, D) \
569 (__builtin_ia32_cmpph256_mask ((B), (C), (D), (A)))
571 #endif /* __OPTIMIZE__ */
573 /* Intrinsics vsqrtph. */
574 extern __inline __m128h
575 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
576 _mm_sqrt_ph (__m128h __A)
578 return __builtin_ia32_sqrtph128_mask (__A, _mm_setzero_ph (),
579 (__mmask8) -1);
582 extern __inline __m256h
583 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
584 _mm256_sqrt_ph (__m256h __A)
586 return __builtin_ia32_sqrtph256_mask (__A, _mm256_setzero_ph (),
587 (__mmask16) -1);
590 extern __inline __m128h
591 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
592 _mm_mask_sqrt_ph (__m128h __A, __mmask8 __B, __m128h __C)
594 return __builtin_ia32_sqrtph128_mask (__C, __A, __B);
597 extern __inline __m256h
598 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
599 _mm256_mask_sqrt_ph (__m256h __A, __mmask16 __B, __m256h __C)
601 return __builtin_ia32_sqrtph256_mask (__C, __A, __B);
604 extern __inline __m128h
605 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
606 _mm_maskz_sqrt_ph (__mmask8 __A, __m128h __B)
608 return __builtin_ia32_sqrtph128_mask (__B, _mm_setzero_ph (),
609 __A);
612 extern __inline __m256h
613 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
614 _mm256_maskz_sqrt_ph (__mmask16 __A, __m256h __B)
616 return __builtin_ia32_sqrtph256_mask (__B, _mm256_setzero_ph (),
617 __A);
620 /* Intrinsics vrsqrtph. */
621 extern __inline __m128h
622 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
623 _mm_rsqrt_ph (__m128h __A)
625 return __builtin_ia32_rsqrtph128_mask (__A, _mm_setzero_ph (),
626 (__mmask8) -1);
629 extern __inline __m256h
630 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
631 _mm256_rsqrt_ph (__m256h __A)
633 return __builtin_ia32_rsqrtph256_mask (__A, _mm256_setzero_ph (),
634 (__mmask16) -1);
637 extern __inline __m128h
638 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
639 _mm_mask_rsqrt_ph (__m128h __A, __mmask8 __B, __m128h __C)
641 return __builtin_ia32_rsqrtph128_mask (__C, __A, __B);
644 extern __inline __m256h
645 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
646 _mm256_mask_rsqrt_ph (__m256h __A, __mmask16 __B, __m256h __C)
648 return __builtin_ia32_rsqrtph256_mask (__C, __A, __B);
651 extern __inline __m128h
652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
653 _mm_maskz_rsqrt_ph (__mmask8 __A, __m128h __B)
655 return __builtin_ia32_rsqrtph128_mask (__B, _mm_setzero_ph (), __A);
658 extern __inline __m256h
659 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
660 _mm256_maskz_rsqrt_ph (__mmask16 __A, __m256h __B)
662 return __builtin_ia32_rsqrtph256_mask (__B, _mm256_setzero_ph (),
663 __A);
666 /* Intrinsics vrcpph. */
667 extern __inline __m128h
668 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
669 _mm_rcp_ph (__m128h __A)
671 return __builtin_ia32_rcpph128_mask (__A, _mm_setzero_ph (),
672 (__mmask8) -1);
675 extern __inline __m256h
676 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
677 _mm256_rcp_ph (__m256h __A)
679 return __builtin_ia32_rcpph256_mask (__A, _mm256_setzero_ph (),
680 (__mmask16) -1);
683 extern __inline __m128h
684 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
685 _mm_mask_rcp_ph (__m128h __A, __mmask8 __B, __m128h __C)
687 return __builtin_ia32_rcpph128_mask (__C, __A, __B);
690 extern __inline __m256h
691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
692 _mm256_mask_rcp_ph (__m256h __A, __mmask16 __B, __m256h __C)
694 return __builtin_ia32_rcpph256_mask (__C, __A, __B);
697 extern __inline __m128h
698 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
699 _mm_maskz_rcp_ph (__mmask8 __A, __m128h __B)
701 return __builtin_ia32_rcpph128_mask (__B, _mm_setzero_ph (), __A);
704 extern __inline __m256h
705 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
706 _mm256_maskz_rcp_ph (__mmask16 __A, __m256h __B)
708 return __builtin_ia32_rcpph256_mask (__B, _mm256_setzero_ph (),
709 __A);
712 /* Intrinsics vscalefph. */
713 extern __inline __m128h
714 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
715 _mm_scalef_ph (__m128h __A, __m128h __B)
717 return __builtin_ia32_scalefph128_mask (__A, __B,
718 _mm_setzero_ph (),
719 (__mmask8) -1);
722 extern __inline __m256h
723 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
724 _mm256_scalef_ph (__m256h __A, __m256h __B)
726 return __builtin_ia32_scalefph256_mask (__A, __B,
727 _mm256_setzero_ph (),
728 (__mmask16) -1);
731 extern __inline __m128h
732 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
733 _mm_mask_scalef_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
735 return __builtin_ia32_scalefph128_mask (__C, __D, __A, __B);
738 extern __inline __m256h
739 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
740 _mm256_mask_scalef_ph (__m256h __A, __mmask16 __B, __m256h __C,
741 __m256h __D)
743 return __builtin_ia32_scalefph256_mask (__C, __D, __A, __B);
746 extern __inline __m128h
747 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
748 _mm_maskz_scalef_ph (__mmask8 __A, __m128h __B, __m128h __C)
750 return __builtin_ia32_scalefph128_mask (__B, __C,
751 _mm_setzero_ph (), __A);
754 extern __inline __m256h
755 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
756 _mm256_maskz_scalef_ph (__mmask16 __A, __m256h __B, __m256h __C)
758 return __builtin_ia32_scalefph256_mask (__B, __C,
759 _mm256_setzero_ph (),
760 __A);
763 /* Intrinsics vreduceph. */
764 #ifdef __OPTIMIZE__
765 extern __inline __m128h
766 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
767 _mm_reduce_ph (__m128h __A, int __B)
769 return __builtin_ia32_reduceph128_mask (__A, __B,
770 _mm_setzero_ph (),
771 (__mmask8) -1);
774 extern __inline __m128h
775 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
776 _mm_mask_reduce_ph (__m128h __A, __mmask8 __B, __m128h __C, int __D)
778 return __builtin_ia32_reduceph128_mask (__C, __D, __A, __B);
781 extern __inline __m128h
782 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
783 _mm_maskz_reduce_ph (__mmask8 __A, __m128h __B, int __C)
785 return __builtin_ia32_reduceph128_mask (__B, __C,
786 _mm_setzero_ph (), __A);
789 extern __inline __m256h
790 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
791 _mm256_reduce_ph (__m256h __A, int __B)
793 return __builtin_ia32_reduceph256_mask (__A, __B,
794 _mm256_setzero_ph (),
795 (__mmask16) -1);
798 extern __inline __m256h
799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
800 _mm256_mask_reduce_ph (__m256h __A, __mmask16 __B, __m256h __C, int __D)
802 return __builtin_ia32_reduceph256_mask (__C, __D, __A, __B);
805 extern __inline __m256h
806 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
807 _mm256_maskz_reduce_ph (__mmask16 __A, __m256h __B, int __C)
809 return __builtin_ia32_reduceph256_mask (__B, __C,
810 _mm256_setzero_ph (),
811 __A);
814 #else
815 #define _mm_reduce_ph(A, B) \
816 (__builtin_ia32_reduceph128_mask ((A), (B), \
817 _mm_setzero_ph (), \
818 ((__mmask8)-1)))
820 #define _mm_mask_reduce_ph(A, B, C, D) \
821 (__builtin_ia32_reduceph128_mask ((C), (D), (A), (B)))
823 #define _mm_maskz_reduce_ph(A, B, C) \
824 (__builtin_ia32_reduceph128_mask ((B), (C), _mm_setzero_ph (), (A)))
826 #define _mm256_reduce_ph(A, B) \
827 (__builtin_ia32_reduceph256_mask ((A), (B), \
828 _mm256_setzero_ph (), \
829 ((__mmask16)-1)))
831 #define _mm256_mask_reduce_ph(A, B, C, D) \
832 (__builtin_ia32_reduceph256_mask ((C), (D), (A), (B)))
834 #define _mm256_maskz_reduce_ph(A, B, C) \
835 (__builtin_ia32_reduceph256_mask ((B), (C), _mm256_setzero_ph (), (A)))
837 #endif /* __OPTIMIZE__ */
839 /* Intrinsics vrndscaleph. */
840 #ifdef __OPTIMIZE__
841 extern __inline __m128h
842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
843 _mm_roundscale_ph (__m128h __A, int __B)
845 return __builtin_ia32_rndscaleph128_mask (__A, __B,
846 _mm_setzero_ph (),
847 (__mmask8) -1);
850 extern __inline __m128h
851 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
852 _mm_mask_roundscale_ph (__m128h __A, __mmask8 __B, __m128h __C, int __D)
854 return __builtin_ia32_rndscaleph128_mask (__C, __D, __A, __B);
857 extern __inline __m128h
858 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
859 _mm_maskz_roundscale_ph (__mmask8 __A, __m128h __B, int __C)
861 return __builtin_ia32_rndscaleph128_mask (__B, __C,
862 _mm_setzero_ph (), __A);
865 extern __inline __m256h
866 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
867 _mm256_roundscale_ph (__m256h __A, int __B)
869 return __builtin_ia32_rndscaleph256_mask (__A, __B,
870 _mm256_setzero_ph (),
871 (__mmask16) -1);
874 extern __inline __m256h
875 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
876 _mm256_mask_roundscale_ph (__m256h __A, __mmask16 __B, __m256h __C,
877 int __D)
879 return __builtin_ia32_rndscaleph256_mask (__C, __D, __A, __B);
882 extern __inline __m256h
883 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
884 _mm256_maskz_roundscale_ph (__mmask16 __A, __m256h __B, int __C)
886 return __builtin_ia32_rndscaleph256_mask (__B, __C,
887 _mm256_setzero_ph (),
888 __A);
891 #else
892 #define _mm_roundscale_ph(A, B) \
893 (__builtin_ia32_rndscaleph128_mask ((A), (B), _mm_setzero_ph (), \
894 ((__mmask8)-1)))
896 #define _mm_mask_roundscale_ph(A, B, C, D) \
897 (__builtin_ia32_rndscaleph128_mask ((C), (D), (A), (B)))
899 #define _mm_maskz_roundscale_ph(A, B, C) \
900 (__builtin_ia32_rndscaleph128_mask ((B), (C), _mm_setzero_ph (), (A)))
902 #define _mm256_roundscale_ph(A, B) \
903 (__builtin_ia32_rndscaleph256_mask ((A), (B), \
904 _mm256_setzero_ph(), \
905 ((__mmask16)-1)))
907 #define _mm256_mask_roundscale_ph(A, B, C, D) \
908 (__builtin_ia32_rndscaleph256_mask ((C), (D), (A), (B)))
910 #define _mm256_maskz_roundscale_ph(A, B, C) \
911 (__builtin_ia32_rndscaleph256_mask ((B), (C), \
912 _mm256_setzero_ph (), (A)))
914 #endif /* __OPTIMIZE__ */
916 /* Intrinsics vfpclassph. */
917 #ifdef __OPTIMIZE__
918 extern __inline __mmask8
919 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
920 _mm_mask_fpclass_ph_mask (__mmask8 __U, __m128h __A, const int __imm)
922 return (__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) __A,
923 __imm, __U);
926 extern __inline __mmask8
927 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
928 _mm_fpclass_ph_mask (__m128h __A, const int __imm)
930 return (__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) __A,
931 __imm,
932 (__mmask8) -1);
935 extern __inline __mmask16
936 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
937 _mm256_mask_fpclass_ph_mask (__mmask16 __U, __m256h __A, const int __imm)
939 return (__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) __A,
940 __imm, __U);
943 extern __inline __mmask16
944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
945 _mm256_fpclass_ph_mask (__m256h __A, const int __imm)
947 return (__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) __A,
948 __imm,
949 (__mmask16) -1);
952 #else
953 #define _mm_fpclass_ph_mask(X, C) \
954 ((__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) (__m128h) (X), \
955 (int) (C),(__mmask8)-1))
957 #define _mm_mask_fpclass_ph_mask(u, X, C) \
958 ((__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) (__m128h) (X), \
959 (int) (C),(__mmask8)(u)))
961 #define _mm256_fpclass_ph_mask(X, C) \
962 ((__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) (__m256h) (X), \
963 (int) (C),(__mmask16)-1))
965 #define _mm256_mask_fpclass_ph_mask(u, X, C) \
966 ((__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) (__m256h) (X), \
967 (int) (C),(__mmask16)(u)))
968 #endif /* __OPTIMIZE__ */
970 /* Intrinsics vgetexpph, vgetexpsh. */
971 extern __inline __m256h
972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
973 _mm256_getexp_ph (__m256h __A)
975 return (__m256h) __builtin_ia32_getexpph256_mask ((__v16hf) __A,
976 (__v16hf)
977 _mm256_setzero_ph (),
978 (__mmask16) -1);
981 extern __inline __m256h
982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
983 _mm256_mask_getexp_ph (__m256h __W, __mmask16 __U, __m256h __A)
985 return (__m256h) __builtin_ia32_getexpph256_mask ((__v16hf) __A,
986 (__v16hf) __W,
987 (__mmask16) __U);
990 extern __inline __m256h
991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
992 _mm256_maskz_getexp_ph (__mmask16 __U, __m256h __A)
994 return (__m256h) __builtin_ia32_getexpph256_mask ((__v16hf) __A,
995 (__v16hf)
996 _mm256_setzero_ph (),
997 (__mmask16) __U);
1000 extern __inline __m128h
1001 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1002 _mm_getexp_ph (__m128h __A)
1004 return (__m128h) __builtin_ia32_getexpph128_mask ((__v8hf) __A,
1005 (__v8hf)
1006 _mm_setzero_ph (),
1007 (__mmask8) -1);
1010 extern __inline __m128h
1011 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1012 _mm_mask_getexp_ph (__m128h __W, __mmask8 __U, __m128h __A)
1014 return (__m128h) __builtin_ia32_getexpph128_mask ((__v8hf) __A,
1015 (__v8hf) __W,
1016 (__mmask8) __U);
1019 extern __inline __m128h
1020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1021 _mm_maskz_getexp_ph (__mmask8 __U, __m128h __A)
1023 return (__m128h) __builtin_ia32_getexpph128_mask ((__v8hf) __A,
1024 (__v8hf)
1025 _mm_setzero_ph (),
1026 (__mmask8) __U);
1030 /* Intrinsics vgetmantph, vgetmantsh. */
1031 #ifdef __OPTIMIZE__
1032 extern __inline __m256h
1033 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1034 _mm256_getmant_ph (__m256h __A, _MM_MANTISSA_NORM_ENUM __B,
1035 _MM_MANTISSA_SIGN_ENUM __C)
1037 return (__m256h) __builtin_ia32_getmantph256_mask ((__v16hf) __A,
1038 (__C << 2) | __B,
1039 (__v16hf)
1040 _mm256_setzero_ph (),
1041 (__mmask16) -1);
1044 extern __inline __m256h
1045 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1046 _mm256_mask_getmant_ph (__m256h __W, __mmask16 __U, __m256h __A,
1047 _MM_MANTISSA_NORM_ENUM __B,
1048 _MM_MANTISSA_SIGN_ENUM __C)
1050 return (__m256h) __builtin_ia32_getmantph256_mask ((__v16hf) __A,
1051 (__C << 2) | __B,
1052 (__v16hf) __W,
1053 (__mmask16) __U);
1056 extern __inline __m256h
1057 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1058 _mm256_maskz_getmant_ph (__mmask16 __U, __m256h __A,
1059 _MM_MANTISSA_NORM_ENUM __B,
1060 _MM_MANTISSA_SIGN_ENUM __C)
1062 return (__m256h) __builtin_ia32_getmantph256_mask ((__v16hf) __A,
1063 (__C << 2) | __B,
1064 (__v16hf)
1065 _mm256_setzero_ph (),
1066 (__mmask16) __U);
1069 extern __inline __m128h
1070 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1071 _mm_getmant_ph (__m128h __A, _MM_MANTISSA_NORM_ENUM __B,
1072 _MM_MANTISSA_SIGN_ENUM __C)
1074 return (__m128h) __builtin_ia32_getmantph128_mask ((__v8hf) __A,
1075 (__C << 2) | __B,
1076 (__v8hf)
1077 _mm_setzero_ph (),
1078 (__mmask8) -1);
1081 extern __inline __m128h
1082 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1083 _mm_mask_getmant_ph (__m128h __W, __mmask8 __U, __m128h __A,
1084 _MM_MANTISSA_NORM_ENUM __B,
1085 _MM_MANTISSA_SIGN_ENUM __C)
1087 return (__m128h) __builtin_ia32_getmantph128_mask ((__v8hf) __A,
1088 (__C << 2) | __B,
1089 (__v8hf) __W,
1090 (__mmask8) __U);
1093 extern __inline __m128h
1094 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1095 _mm_maskz_getmant_ph (__mmask8 __U, __m128h __A,
1096 _MM_MANTISSA_NORM_ENUM __B,
1097 _MM_MANTISSA_SIGN_ENUM __C)
1099 return (__m128h) __builtin_ia32_getmantph128_mask ((__v8hf) __A,
1100 (__C << 2) | __B,
1101 (__v8hf)
1102 _mm_setzero_ph (),
1103 (__mmask8) __U);
1106 #else
1107 #define _mm256_getmant_ph(X, B, C) \
1108 ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X), \
1109 (int)(((C)<<2) | (B)), \
1110 (__v16hf)(__m256h)_mm256_setzero_ph (), \
1111 (__mmask16)-1))
1113 #define _mm256_mask_getmant_ph(W, U, X, B, C) \
1114 ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X), \
1115 (int)(((C)<<2) | (B)), \
1116 (__v16hf)(__m256h)(W), \
1117 (__mmask16)(U)))
1119 #define _mm256_maskz_getmant_ph(U, X, B, C) \
1120 ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X), \
1121 (int)(((C)<<2) | (B)), \
1122 (__v16hf)(__m256h)_mm256_setzero_ph (), \
1123 (__mmask16)(U)))
1125 #define _mm_getmant_ph(X, B, C) \
1126 ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X), \
1127 (int)(((C)<<2) | (B)), \
1128 (__v8hf)(__m128h)_mm_setzero_ph (), \
1129 (__mmask8)-1))
1131 #define _mm_mask_getmant_ph(W, U, X, B, C) \
1132 ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X), \
1133 (int)(((C)<<2) | (B)), \
1134 (__v8hf)(__m128h)(W), \
1135 (__mmask8)(U)))
1137 #define _mm_maskz_getmant_ph(U, X, B, C) \
1138 ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X), \
1139 (int)(((C)<<2) | (B)), \
1140 (__v8hf)(__m128h)_mm_setzero_ph (), \
1141 (__mmask8)(U)))
1143 #endif /* __OPTIMIZE__ */
1145 /* Intrinsics vcvtph2dq. */
1146 extern __inline __m128i
1147 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1148 _mm_cvtph_epi32 (__m128h __A)
1150 return (__m128i)
1151 __builtin_ia32_vcvtph2dq128_mask (__A,
1152 (__v4si)
1153 _mm_avx512_setzero_si128 (),
1154 (__mmask8) -1);
1157 extern __inline __m128i
1158 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1159 _mm_mask_cvtph_epi32 (__m128i __A, __mmask8 __B, __m128h __C)
1161 return (__m128i)
1162 __builtin_ia32_vcvtph2dq128_mask (__C, ( __v4si) __A, __B);
1165 extern __inline __m128i
1166 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1167 _mm_maskz_cvtph_epi32 (__mmask8 __A, __m128h __B)
1169 return (__m128i)
1170 __builtin_ia32_vcvtph2dq128_mask (__B,
1171 (__v4si) _mm_avx512_setzero_si128 (),
1172 __A);
1175 extern __inline __m256i
1176 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1177 _mm256_cvtph_epi32 (__m128h __A)
1179 return (__m256i)
1180 __builtin_ia32_vcvtph2dq256_mask (__A,
1181 (__v8si)
1182 _mm256_avx512_setzero_si256 (),
1183 (__mmask8) -1);
1186 extern __inline __m256i
1187 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1188 _mm256_mask_cvtph_epi32 (__m256i __A, __mmask8 __B, __m128h __C)
1190 return (__m256i)
1191 __builtin_ia32_vcvtph2dq256_mask (__C, ( __v8si) __A, __B);
1194 extern __inline __m256i
1195 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1196 _mm256_maskz_cvtph_epi32 (__mmask8 __A, __m128h __B)
1198 return (__m256i)
1199 __builtin_ia32_vcvtph2dq256_mask (__B,
1200 (__v8si)
1201 _mm256_avx512_setzero_si256 (),
1202 __A);
1205 /* Intrinsics vcvtph2udq. */
1206 extern __inline __m128i
1207 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1208 _mm_cvtph_epu32 (__m128h __A)
1210 return (__m128i)
1211 __builtin_ia32_vcvtph2udq128_mask (__A,
1212 (__v4si)
1213 _mm_avx512_setzero_si128 (),
1214 (__mmask8) -1);
1217 extern __inline __m128i
1218 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1219 _mm_mask_cvtph_epu32 (__m128i __A, __mmask8 __B, __m128h __C)
1221 return (__m128i)
1222 __builtin_ia32_vcvtph2udq128_mask (__C, ( __v4si) __A, __B);
1225 extern __inline __m128i
1226 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1227 _mm_maskz_cvtph_epu32 (__mmask8 __A, __m128h __B)
1229 return (__m128i)
1230 __builtin_ia32_vcvtph2udq128_mask (__B,
1231 (__v4si)
1232 _mm_avx512_setzero_si128 (),
1233 __A);
1236 extern __inline __m256i
1237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1238 _mm256_cvtph_epu32 (__m128h __A)
1240 return (__m256i)
1241 __builtin_ia32_vcvtph2udq256_mask (__A,
1242 (__v8si)
1243 _mm256_avx512_setzero_si256 (),
1244 (__mmask8) -1);
1247 extern __inline __m256i
1248 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1249 _mm256_mask_cvtph_epu32 (__m256i __A, __mmask8 __B, __m128h __C)
1251 return (__m256i)
1252 __builtin_ia32_vcvtph2udq256_mask (__C, ( __v8si) __A, __B);
1255 extern __inline __m256i
1256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1257 _mm256_maskz_cvtph_epu32 (__mmask8 __A, __m128h __B)
1259 return (__m256i)
1260 __builtin_ia32_vcvtph2udq256_mask (__B,
1261 (__v8si) _mm256_avx512_setzero_si256 (),
1262 __A);
1265 /* Intrinsics vcvttph2dq. */
1266 extern __inline __m128i
1267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1268 _mm_cvttph_epi32 (__m128h __A)
1270 return (__m128i)
1271 __builtin_ia32_vcvttph2dq128_mask (__A,
1272 (__v4si) _mm_avx512_setzero_si128 (),
1273 (__mmask8) -1);
1276 extern __inline __m128i
1277 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1278 _mm_mask_cvttph_epi32 (__m128i __A, __mmask8 __B, __m128h __C)
1280 return (__m128i)__builtin_ia32_vcvttph2dq128_mask (__C,
1281 ( __v4si) __A,
1282 __B);
1285 extern __inline __m128i
1286 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1287 _mm_maskz_cvttph_epi32 (__mmask8 __A, __m128h __B)
1289 return (__m128i)
1290 __builtin_ia32_vcvttph2dq128_mask (__B,
1291 (__v4si) _mm_avx512_setzero_si128 (),
1292 __A);
1295 extern __inline __m256i
1296 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1297 _mm256_cvttph_epi32 (__m128h __A)
1299 return (__m256i)
1300 __builtin_ia32_vcvttph2dq256_mask (__A,
1301 (__v8si)
1302 _mm256_avx512_setzero_si256 (),
1303 (__mmask8) -1);
1306 extern __inline __m256i
1307 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1308 _mm256_mask_cvttph_epi32 (__m256i __A, __mmask8 __B, __m128h __C)
1310 return (__m256i)
1311 __builtin_ia32_vcvttph2dq256_mask (__C,
1312 ( __v8si) __A,
1313 __B);
1316 extern __inline __m256i
1317 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1318 _mm256_maskz_cvttph_epi32 (__mmask8 __A, __m128h __B)
1320 return (__m256i)
1321 __builtin_ia32_vcvttph2dq256_mask (__B,
1322 (__v8si)
1323 _mm256_avx512_setzero_si256 (),
1324 __A);
1327 /* Intrinsics vcvttph2udq. */
1328 extern __inline __m128i
1329 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1330 _mm_cvttph_epu32 (__m128h __A)
1332 return (__m128i)
1333 __builtin_ia32_vcvttph2udq128_mask (__A,
1334 (__v4si)
1335 _mm_avx512_setzero_si128 (),
1336 (__mmask8) -1);
1339 extern __inline __m128i
1340 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1341 _mm_mask_cvttph_epu32 (__m128i __A, __mmask8 __B, __m128h __C)
1343 return (__m128i)
1344 __builtin_ia32_vcvttph2udq128_mask (__C,
1345 ( __v4si) __A,
1346 __B);
1349 extern __inline __m128i
1350 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1351 _mm_maskz_cvttph_epu32 (__mmask8 __A, __m128h __B)
1353 return (__m128i)
1354 __builtin_ia32_vcvttph2udq128_mask (__B,
1355 (__v4si)
1356 _mm_avx512_setzero_si128 (),
1357 __A);
1360 extern __inline __m256i
1361 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1362 _mm256_cvttph_epu32 (__m128h __A)
1364 return (__m256i)
1365 __builtin_ia32_vcvttph2udq256_mask (__A,
1366 (__v8si)
1367 _mm256_avx512_setzero_si256 (), (__mmask8) -1);
1370 extern __inline __m256i
1371 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1372 _mm256_mask_cvttph_epu32 (__m256i __A, __mmask8 __B, __m128h __C)
1374 return (__m256i)
1375 __builtin_ia32_vcvttph2udq256_mask (__C,
1376 ( __v8si) __A,
1377 __B);
1380 extern __inline __m256i
1381 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1382 _mm256_maskz_cvttph_epu32 (__mmask8 __A, __m128h __B)
1384 return (__m256i)
1385 __builtin_ia32_vcvttph2udq256_mask (__B,
1386 (__v8si)
1387 _mm256_avx512_setzero_si256 (),
1388 __A);
1391 /* Intrinsics vcvtdq2ph. */
1392 extern __inline __m128h
1393 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1394 _mm_cvtepi32_ph (__m128i __A)
1396 return __builtin_ia32_vcvtdq2ph128_mask ((__v4si) __A,
1397 _mm_setzero_ph (),
1398 (__mmask8) -1);
1401 extern __inline __m128h
1402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1403 _mm_mask_cvtepi32_ph (__m128h __A, __mmask8 __B, __m128i __C)
1405 return __builtin_ia32_vcvtdq2ph128_mask ((__v4si) __C, __A, __B);
1408 extern __inline __m128h
1409 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1410 _mm_maskz_cvtepi32_ph (__mmask8 __A, __m128i __B)
1412 return __builtin_ia32_vcvtdq2ph128_mask ((__v4si) __B,
1413 _mm_setzero_ph (),
1414 __A);
1417 extern __inline __m128h
1418 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1419 _mm256_cvtepi32_ph (__m256i __A)
1421 return __builtin_ia32_vcvtdq2ph256_mask ((__v8si) __A,
1422 _mm_setzero_ph (),
1423 (__mmask8) -1);
1426 extern __inline __m128h
1427 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1428 _mm256_mask_cvtepi32_ph (__m128h __A, __mmask8 __B, __m256i __C)
1430 return __builtin_ia32_vcvtdq2ph256_mask ((__v8si) __C, __A, __B);
1433 extern __inline __m128h
1434 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1435 _mm256_maskz_cvtepi32_ph (__mmask8 __A, __m256i __B)
1437 return __builtin_ia32_vcvtdq2ph256_mask ((__v8si) __B,
1438 _mm_setzero_ph (),
1439 __A);
1442 /* Intrinsics vcvtudq2ph. */
1443 extern __inline __m128h
1444 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1445 _mm_cvtepu32_ph (__m128i __A)
1447 return __builtin_ia32_vcvtudq2ph128_mask ((__v4si) __A,
1448 _mm_setzero_ph (),
1449 (__mmask8) -1);
1452 extern __inline __m128h
1453 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1454 _mm_mask_cvtepu32_ph (__m128h __A, __mmask8 __B, __m128i __C)
1456 return __builtin_ia32_vcvtudq2ph128_mask ((__v4si) __C,
1457 __A,
1458 __B);
1461 extern __inline __m128h
1462 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1463 _mm_maskz_cvtepu32_ph (__mmask8 __A, __m128i __B)
1465 return __builtin_ia32_vcvtudq2ph128_mask ((__v4si) __B,
1466 _mm_setzero_ph (),
1467 __A);
1470 extern __inline __m128h
1471 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1472 _mm256_cvtepu32_ph (__m256i __A)
1474 return __builtin_ia32_vcvtudq2ph256_mask ((__v8si) __A,
1475 _mm_setzero_ph (),
1476 (__mmask8) -1);
1479 extern __inline __m128h
1480 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1481 _mm256_mask_cvtepu32_ph (__m128h __A, __mmask8 __B, __m256i __C)
1483 return __builtin_ia32_vcvtudq2ph256_mask ((__v8si) __C, __A, __B);
1486 extern __inline __m128h
1487 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1488 _mm256_maskz_cvtepu32_ph (__mmask8 __A, __m256i __B)
1490 return __builtin_ia32_vcvtudq2ph256_mask ((__v8si) __B,
1491 _mm_setzero_ph (),
1492 __A);
1495 /* Intrinsics vcvtph2qq. */
1496 extern __inline __m128i
1497 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1498 _mm_cvtph_epi64 (__m128h __A)
1500 return
1501 __builtin_ia32_vcvtph2qq128_mask (__A,
1502 _mm_avx512_setzero_si128 (),
1503 (__mmask8) -1);
1506 extern __inline __m128i
1507 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1508 _mm_mask_cvtph_epi64 (__m128i __A, __mmask8 __B, __m128h __C)
1510 return __builtin_ia32_vcvtph2qq128_mask (__C, __A, __B);
1513 extern __inline __m128i
1514 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1515 _mm_maskz_cvtph_epi64 (__mmask8 __A, __m128h __B)
1517 return __builtin_ia32_vcvtph2qq128_mask (__B,
1518 _mm_avx512_setzero_si128 (),
1519 __A);
1522 extern __inline __m256i
1523 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1524 _mm256_cvtph_epi64 (__m128h __A)
1526 return __builtin_ia32_vcvtph2qq256_mask (__A,
1527 _mm256_avx512_setzero_si256 (),
1528 (__mmask8) -1);
1531 extern __inline __m256i
1532 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1533 _mm256_mask_cvtph_epi64 (__m256i __A, __mmask8 __B, __m128h __C)
1535 return __builtin_ia32_vcvtph2qq256_mask (__C, __A, __B);
1538 extern __inline __m256i
1539 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1540 _mm256_maskz_cvtph_epi64 (__mmask8 __A, __m128h __B)
1542 return __builtin_ia32_vcvtph2qq256_mask (__B,
1543 _mm256_avx512_setzero_si256 (),
1544 __A);
1547 /* Intrinsics vcvtph2uqq. */
1548 extern __inline __m128i
1549 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1550 _mm_cvtph_epu64 (__m128h __A)
1552 return __builtin_ia32_vcvtph2uqq128_mask (__A,
1553 _mm_avx512_setzero_si128 (),
1554 (__mmask8) -1);
1557 extern __inline __m128i
1558 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1559 _mm_mask_cvtph_epu64 (__m128i __A, __mmask8 __B, __m128h __C)
1561 return __builtin_ia32_vcvtph2uqq128_mask (__C, __A, __B);
1564 extern __inline __m128i
1565 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1566 _mm_maskz_cvtph_epu64 (__mmask8 __A, __m128h __B)
1568 return __builtin_ia32_vcvtph2uqq128_mask (__B,
1569 _mm_avx512_setzero_si128 (),
1570 __A);
1573 extern __inline __m256i
1574 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1575 _mm256_cvtph_epu64 (__m128h __A)
1577 return __builtin_ia32_vcvtph2uqq256_mask (__A,
1578 _mm256_avx512_setzero_si256 (),
1579 (__mmask8) -1);
1582 extern __inline __m256i
1583 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1584 _mm256_mask_cvtph_epu64 (__m256i __A, __mmask8 __B, __m128h __C)
1586 return __builtin_ia32_vcvtph2uqq256_mask (__C, __A, __B);
1589 extern __inline __m256i
1590 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1591 _mm256_maskz_cvtph_epu64 (__mmask8 __A, __m128h __B)
1593 return __builtin_ia32_vcvtph2uqq256_mask (__B,
1594 _mm256_avx512_setzero_si256 (),
1595 __A);
1598 /* Intrinsics vcvttph2qq. */
1599 extern __inline __m128i
1600 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1601 _mm_cvttph_epi64 (__m128h __A)
1603 return __builtin_ia32_vcvttph2qq128_mask (__A,
1604 _mm_avx512_setzero_si128 (),
1605 (__mmask8) -1);
1608 extern __inline __m128i
1609 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1610 _mm_mask_cvttph_epi64 (__m128i __A, __mmask8 __B, __m128h __C)
1612 return __builtin_ia32_vcvttph2qq128_mask (__C,
1613 __A,
1614 __B);
1617 extern __inline __m128i
1618 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1619 _mm_maskz_cvttph_epi64 (__mmask8 __A, __m128h __B)
1621 return __builtin_ia32_vcvttph2qq128_mask (__B,
1622 _mm_avx512_setzero_si128 (),
1623 __A);
1626 extern __inline __m256i
1627 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1628 _mm256_cvttph_epi64 (__m128h __A)
1630 return __builtin_ia32_vcvttph2qq256_mask (__A,
1631 _mm256_avx512_setzero_si256 (),
1632 (__mmask8) -1);
1635 extern __inline __m256i
1636 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1637 _mm256_mask_cvttph_epi64 (__m256i __A, __mmask8 __B, __m128h __C)
1639 return __builtin_ia32_vcvttph2qq256_mask (__C,
1640 __A,
1641 __B);
1644 extern __inline __m256i
1645 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1646 _mm256_maskz_cvttph_epi64 (__mmask8 __A, __m128h __B)
1648 return __builtin_ia32_vcvttph2qq256_mask (__B,
1649 _mm256_avx512_setzero_si256 (),
1650 __A);
1653 /* Intrinsics vcvttph2uqq. */
1654 extern __inline __m128i
1655 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1656 _mm_cvttph_epu64 (__m128h __A)
1658 return __builtin_ia32_vcvttph2uqq128_mask (__A,
1659 _mm_avx512_setzero_si128 (),
1660 (__mmask8) -1);
1663 extern __inline __m128i
1664 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1665 _mm_mask_cvttph_epu64 (__m128i __A, __mmask8 __B, __m128h __C)
1667 return __builtin_ia32_vcvttph2uqq128_mask (__C,
1668 __A,
1669 __B);
1672 extern __inline __m128i
1673 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1674 _mm_maskz_cvttph_epu64 (__mmask8 __A, __m128h __B)
1676 return __builtin_ia32_vcvttph2uqq128_mask (__B,
1677 _mm_avx512_setzero_si128 (),
1678 __A);
1681 extern __inline __m256i
1682 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1683 _mm256_cvttph_epu64 (__m128h __A)
1685 return __builtin_ia32_vcvttph2uqq256_mask (__A,
1686 _mm256_avx512_setzero_si256 (),
1687 (__mmask8) -1);
1690 extern __inline __m256i
1691 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1692 _mm256_mask_cvttph_epu64 (__m256i __A, __mmask8 __B, __m128h __C)
1694 return __builtin_ia32_vcvttph2uqq256_mask (__C,
1695 __A,
1696 __B);
1699 extern __inline __m256i
1700 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1701 _mm256_maskz_cvttph_epu64 (__mmask8 __A, __m128h __B)
1703 return __builtin_ia32_vcvttph2uqq256_mask (__B,
1704 _mm256_avx512_setzero_si256 (),
1705 __A);
1708 /* Intrinsics vcvtqq2ph. */
1709 extern __inline __m128h
1710 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1711 _mm_cvtepi64_ph (__m128i __A)
1713 return __builtin_ia32_vcvtqq2ph128_mask ((__v2di) __A,
1714 _mm_setzero_ph (),
1715 (__mmask8) -1);
1718 extern __inline __m128h
1719 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1720 _mm_mask_cvtepi64_ph (__m128h __A, __mmask8 __B, __m128i __C)
1722 return __builtin_ia32_vcvtqq2ph128_mask ((__v2di) __C, __A, __B);
1725 extern __inline __m128h
1726 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1727 _mm_maskz_cvtepi64_ph (__mmask8 __A, __m128i __B)
1729 return __builtin_ia32_vcvtqq2ph128_mask ((__v2di) __B,
1730 _mm_setzero_ph (),
1731 __A);
1734 extern __inline __m128h
1735 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1736 _mm256_cvtepi64_ph (__m256i __A)
1738 return __builtin_ia32_vcvtqq2ph256_mask ((__v4di) __A,
1739 _mm_setzero_ph (),
1740 (__mmask8) -1);
1743 extern __inline __m128h
1744 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1745 _mm256_mask_cvtepi64_ph (__m128h __A, __mmask8 __B, __m256i __C)
1747 return __builtin_ia32_vcvtqq2ph256_mask ((__v4di) __C, __A, __B);
1750 extern __inline __m128h
1751 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1752 _mm256_maskz_cvtepi64_ph (__mmask8 __A, __m256i __B)
1754 return __builtin_ia32_vcvtqq2ph256_mask ((__v4di) __B,
1755 _mm_setzero_ph (),
1756 __A);
1759 /* Intrinsics vcvtuqq2ph. */
1760 extern __inline __m128h
1761 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1762 _mm_cvtepu64_ph (__m128i __A)
1764 return __builtin_ia32_vcvtuqq2ph128_mask ((__v2di) __A,
1765 _mm_setzero_ph (),
1766 (__mmask8) -1);
1769 extern __inline __m128h
1770 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1771 _mm_mask_cvtepu64_ph (__m128h __A, __mmask8 __B, __m128i __C)
1773 return __builtin_ia32_vcvtuqq2ph128_mask ((__v2di) __C, __A, __B);
1776 extern __inline __m128h
1777 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1778 _mm_maskz_cvtepu64_ph (__mmask8 __A, __m128i __B)
1780 return __builtin_ia32_vcvtuqq2ph128_mask ((__v2di) __B,
1781 _mm_setzero_ph (),
1782 __A);
1785 extern __inline __m128h
1786 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1787 _mm256_cvtepu64_ph (__m256i __A)
1789 return __builtin_ia32_vcvtuqq2ph256_mask ((__v4di) __A,
1790 _mm_setzero_ph (),
1791 (__mmask8) -1);
1794 extern __inline __m128h
1795 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1796 _mm256_mask_cvtepu64_ph (__m128h __A, __mmask8 __B, __m256i __C)
1798 return __builtin_ia32_vcvtuqq2ph256_mask ((__v4di) __C, __A, __B);
1801 extern __inline __m128h
1802 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1803 _mm256_maskz_cvtepu64_ph (__mmask8 __A, __m256i __B)
1805 return __builtin_ia32_vcvtuqq2ph256_mask ((__v4di) __B,
1806 _mm_setzero_ph (),
1807 __A);
1810 /* Intrinsics vcvtph2w. */
1811 extern __inline __m128i
1812 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1813 _mm_cvtph_epi16 (__m128h __A)
1815 return (__m128i)
1816 __builtin_ia32_vcvtph2w128_mask (__A,
1817 (__v8hi)
1818 _mm_avx512_setzero_si128 (),
1819 (__mmask8) -1);
1822 extern __inline __m128i
1823 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1824 _mm_mask_cvtph_epi16 (__m128i __A, __mmask8 __B, __m128h __C)
1826 return (__m128i)
1827 __builtin_ia32_vcvtph2w128_mask (__C, ( __v8hi) __A, __B);
1830 extern __inline __m128i
1831 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1832 _mm_maskz_cvtph_epi16 (__mmask8 __A, __m128h __B)
1834 return (__m128i)
1835 __builtin_ia32_vcvtph2w128_mask (__B,
1836 (__v8hi)
1837 _mm_avx512_setzero_si128 (),
1838 __A);
1841 extern __inline __m256i
1842 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1843 _mm256_cvtph_epi16 (__m256h __A)
1845 return (__m256i)
1846 __builtin_ia32_vcvtph2w256_mask (__A,
1847 (__v16hi)
1848 _mm256_avx512_setzero_si256 (),
1849 (__mmask16) -1);
1852 extern __inline __m256i
1853 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1854 _mm256_mask_cvtph_epi16 (__m256i __A, __mmask16 __B, __m256h __C)
1856 return (__m256i)
1857 __builtin_ia32_vcvtph2w256_mask (__C, ( __v16hi) __A, __B);
1860 extern __inline __m256i
1861 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1862 _mm256_maskz_cvtph_epi16 (__mmask16 __A, __m256h __B)
1864 return (__m256i)
1865 __builtin_ia32_vcvtph2w256_mask (__B,
1866 (__v16hi)
1867 _mm256_avx512_setzero_si256 (),
1868 __A);
1871 /* Intrinsics vcvtph2uw. */
1872 extern __inline __m128i
1873 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1874 _mm_cvtph_epu16 (__m128h __A)
1876 return (__m128i)
1877 __builtin_ia32_vcvtph2uw128_mask (__A,
1878 (__v8hi)
1879 _mm_avx512_setzero_si128 (),
1880 (__mmask8) -1);
1883 extern __inline __m128i
1884 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1885 _mm_mask_cvtph_epu16 (__m128i __A, __mmask8 __B, __m128h __C)
1887 return (__m128i)
1888 __builtin_ia32_vcvtph2uw128_mask (__C, ( __v8hi) __A, __B);
1891 extern __inline __m128i
1892 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1893 _mm_maskz_cvtph_epu16 (__mmask8 __A, __m128h __B)
1895 return (__m128i)
1896 __builtin_ia32_vcvtph2uw128_mask (__B,
1897 (__v8hi)
1898 _mm_avx512_setzero_si128 (),
1899 __A);
1902 extern __inline __m256i
1903 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1904 _mm256_cvtph_epu16 (__m256h __A)
1906 return (__m256i)
1907 __builtin_ia32_vcvtph2uw256_mask (__A,
1908 (__v16hi)
1909 _mm256_avx512_setzero_si256 (),
1910 (__mmask16) -1);
1913 extern __inline __m256i
1914 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1915 _mm256_mask_cvtph_epu16 (__m256i __A, __mmask16 __B, __m256h __C)
1917 return (__m256i)
1918 __builtin_ia32_vcvtph2uw256_mask (__C, ( __v16hi) __A, __B);
1921 extern __inline __m256i
1922 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1923 _mm256_maskz_cvtph_epu16 (__mmask16 __A, __m256h __B)
1925 return (__m256i)
1926 __builtin_ia32_vcvtph2uw256_mask (__B,
1927 (__v16hi)
1928 _mm256_avx512_setzero_si256 (),
1929 __A);
1932 /* Intrinsics vcvttph2w. */
1933 extern __inline __m128i
1934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1935 _mm_cvttph_epi16 (__m128h __A)
1937 return (__m128i)
1938 __builtin_ia32_vcvttph2w128_mask (__A,
1939 (__v8hi)
1940 _mm_avx512_setzero_si128 (),
1941 (__mmask8) -1);
1944 extern __inline __m128i
1945 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1946 _mm_mask_cvttph_epi16 (__m128i __A, __mmask8 __B, __m128h __C)
1948 return (__m128i)
1949 __builtin_ia32_vcvttph2w128_mask (__C,
1950 ( __v8hi) __A,
1951 __B);
1954 extern __inline __m128i
1955 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1956 _mm_maskz_cvttph_epi16 (__mmask8 __A, __m128h __B)
1958 return (__m128i)
1959 __builtin_ia32_vcvttph2w128_mask (__B,
1960 (__v8hi)
1961 _mm_avx512_setzero_si128 (),
1962 __A);
1965 extern __inline __m256i
1966 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1967 _mm256_cvttph_epi16 (__m256h __A)
1969 return (__m256i)
1970 __builtin_ia32_vcvttph2w256_mask (__A,
1971 (__v16hi)
1972 _mm256_avx512_setzero_si256 (),
1973 (__mmask16) -1);
1976 extern __inline __m256i
1977 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1978 _mm256_mask_cvttph_epi16 (__m256i __A, __mmask16 __B, __m256h __C)
1980 return (__m256i)
1981 __builtin_ia32_vcvttph2w256_mask (__C,
1982 ( __v16hi) __A,
1983 __B);
1986 extern __inline __m256i
1987 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
1988 _mm256_maskz_cvttph_epi16 (__mmask16 __A, __m256h __B)
1990 return (__m256i)
1991 __builtin_ia32_vcvttph2w256_mask (__B,
1992 (__v16hi)
1993 _mm256_avx512_setzero_si256 (),
1994 __A);
1997 /* Intrinsics vcvttph2uw. */
1998 extern __inline __m128i
1999 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2000 _mm_cvttph_epu16 (__m128h __A)
2002 return (__m128i)
2003 __builtin_ia32_vcvttph2uw128_mask (__A,
2004 (__v8hi)
2005 _mm_avx512_setzero_si128 (),
2006 (__mmask8) -1);
2009 extern __inline __m128i
2010 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2011 _mm_mask_cvttph_epu16 (__m128i __A, __mmask8 __B, __m128h __C)
2013 return (__m128i)
2014 __builtin_ia32_vcvttph2uw128_mask (__C,
2015 ( __v8hi) __A,
2016 __B);
2019 extern __inline __m128i
2020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2021 _mm_maskz_cvttph_epu16 (__mmask8 __A, __m128h __B)
2023 return (__m128i)
2024 __builtin_ia32_vcvttph2uw128_mask (__B,
2025 (__v8hi)
2026 _mm_avx512_setzero_si128 (),
2027 __A);
2030 extern __inline __m256i
2031 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2032 _mm256_cvttph_epu16 (__m256h __A)
2034 return (__m256i)
2035 __builtin_ia32_vcvttph2uw256_mask (__A,
2036 (__v16hi)
2037 _mm256_avx512_setzero_si256 (),
2038 (__mmask16) -1);
2041 extern __inline __m256i
2042 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2043 _mm256_mask_cvttph_epu16 (__m256i __A, __mmask16 __B, __m256h __C)
2045 return (__m256i)
2046 __builtin_ia32_vcvttph2uw256_mask (__C,
2047 ( __v16hi) __A,
2048 __B);
2051 extern __inline __m256i
2052 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2053 _mm256_maskz_cvttph_epu16 (__mmask16 __A, __m256h __B)
2055 return (__m256i)
2056 __builtin_ia32_vcvttph2uw256_mask (__B,
2057 (__v16hi) _mm256_avx512_setzero_si256 (),
2058 __A);
2061 /* Intrinsics vcvtw2ph. */
2062 extern __inline __m128h
2063 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2064 _mm_cvtepi16_ph (__m128i __A)
2066 return __builtin_ia32_vcvtw2ph128_mask ((__v8hi) __A,
2067 _mm_setzero_ph (),
2068 (__mmask8) -1);
2071 extern __inline __m128h
2072 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2073 _mm_mask_cvtepi16_ph (__m128h __A, __mmask8 __B, __m128i __C)
2075 return __builtin_ia32_vcvtw2ph128_mask ((__v8hi) __C,
2076 __A,
2077 __B);
2080 extern __inline __m128h
2081 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2082 _mm_maskz_cvtepi16_ph (__mmask8 __A, __m128i __B)
2084 return __builtin_ia32_vcvtw2ph128_mask ((__v8hi) __B,
2085 _mm_setzero_ph (),
2086 __A);
2089 extern __inline __m256h
2090 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2091 _mm256_cvtepi16_ph (__m256i __A)
2093 return __builtin_ia32_vcvtw2ph256_mask ((__v16hi) __A,
2094 _mm256_setzero_ph (),
2095 (__mmask16) -1);
2098 extern __inline __m256h
2099 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2100 _mm256_mask_cvtepi16_ph (__m256h __A, __mmask16 __B, __m256i __C)
2102 return __builtin_ia32_vcvtw2ph256_mask ((__v16hi) __C,
2103 __A,
2104 __B);
2107 extern __inline __m256h
2108 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2109 _mm256_maskz_cvtepi16_ph (__mmask16 __A, __m256i __B)
2111 return __builtin_ia32_vcvtw2ph256_mask ((__v16hi) __B,
2112 _mm256_setzero_ph (),
2113 __A);
2116 /* Intrinsics vcvtuw2ph. */
2117 extern __inline __m128h
2118 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2119 _mm_cvtepu16_ph (__m128i __A)
2121 return __builtin_ia32_vcvtuw2ph128_mask ((__v8hi) __A,
2122 _mm_setzero_ph (),
2123 (__mmask8) -1);
2126 extern __inline __m128h
2127 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2128 _mm_mask_cvtepu16_ph (__m128h __A, __mmask8 __B, __m128i __C)
2130 return __builtin_ia32_vcvtuw2ph128_mask ((__v8hi) __C, __A, __B);
2133 extern __inline __m128h
2134 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2135 _mm_maskz_cvtepu16_ph (__mmask8 __A, __m128i __B)
2137 return __builtin_ia32_vcvtuw2ph128_mask ((__v8hi) __B,
2138 _mm_setzero_ph (),
2139 __A);
2142 extern __inline __m256h
2143 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2144 _mm256_cvtepu16_ph (__m256i __A)
2146 return __builtin_ia32_vcvtuw2ph256_mask ((__v16hi) __A,
2147 _mm256_setzero_ph (),
2148 (__mmask16) -1);
2151 extern __inline __m256h
2152 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2153 _mm256_mask_cvtepu16_ph (__m256h __A, __mmask16 __B, __m256i __C)
2155 return __builtin_ia32_vcvtuw2ph256_mask ((__v16hi) __C, __A, __B);
2158 extern __inline __m256h
2159 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2160 _mm256_maskz_cvtepu16_ph (__mmask16 __A, __m256i __B)
2162 return __builtin_ia32_vcvtuw2ph256_mask ((__v16hi) __B,
2163 _mm256_setzero_ph (),
2164 __A);
2167 /* Intrinsics vcvtph2pd. */
2168 extern __inline __m128d
2169 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2170 _mm_cvtph_pd (__m128h __A)
2172 return __builtin_ia32_vcvtph2pd128_mask (__A,
2173 _mm_avx512_setzero_pd (),
2174 (__mmask8) -1);
2177 extern __inline __m128d
2178 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2179 _mm_mask_cvtph_pd (__m128d __A, __mmask8 __B, __m128h __C)
2181 return __builtin_ia32_vcvtph2pd128_mask (__C, __A, __B);
2184 extern __inline __m128d
2185 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2186 _mm_maskz_cvtph_pd (__mmask8 __A, __m128h __B)
2188 return __builtin_ia32_vcvtph2pd128_mask (__B, _mm_avx512_setzero_pd (), __A);
2191 extern __inline __m256d
2192 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2193 _mm256_cvtph_pd (__m128h __A)
2195 return __builtin_ia32_vcvtph2pd256_mask (__A,
2196 _mm256_avx512_setzero_pd (),
2197 (__mmask8) -1);
2200 extern __inline __m256d
2201 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2202 _mm256_mask_cvtph_pd (__m256d __A, __mmask8 __B, __m128h __C)
2204 return __builtin_ia32_vcvtph2pd256_mask (__C, __A, __B);
2207 extern __inline __m256d
2208 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2209 _mm256_maskz_cvtph_pd (__mmask8 __A, __m128h __B)
2211 return __builtin_ia32_vcvtph2pd256_mask (__B,
2212 _mm256_avx512_setzero_pd (),
2213 __A);
2216 /* Intrinsics vcvtph2ps. */
2217 extern __inline __m128
2218 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2219 _mm_cvtxph_ps (__m128h __A)
2221 return __builtin_ia32_vcvtph2psx128_mask (__A,
2222 _mm_avx512_setzero_ps (),
2223 (__mmask8) -1);
2226 extern __inline __m128
2227 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2228 _mm_mask_cvtxph_ps (__m128 __A, __mmask8 __B, __m128h __C)
2230 return __builtin_ia32_vcvtph2psx128_mask (__C, __A, __B);
2233 extern __inline __m128
2234 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2235 _mm_maskz_cvtxph_ps (__mmask8 __A, __m128h __B)
2237 return __builtin_ia32_vcvtph2psx128_mask (__B, _mm_avx512_setzero_ps (), __A);
2240 extern __inline __m256
2241 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2242 _mm256_cvtxph_ps (__m128h __A)
2244 return __builtin_ia32_vcvtph2psx256_mask (__A,
2245 _mm256_avx512_setzero_ps (),
2246 (__mmask8) -1);
2249 extern __inline __m256
2250 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2251 _mm256_mask_cvtxph_ps (__m256 __A, __mmask8 __B, __m128h __C)
2253 return __builtin_ia32_vcvtph2psx256_mask (__C, __A, __B);
2256 extern __inline __m256
2257 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2258 _mm256_maskz_cvtxph_ps (__mmask8 __A, __m128h __B)
2260 return __builtin_ia32_vcvtph2psx256_mask (__B,
2261 _mm256_avx512_setzero_ps (),
2262 __A);
2265 /* Intrinsics vcvtxps2ph. */
2266 extern __inline __m128h
2267 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2268 _mm_cvtxps_ph (__m128 __A)
2270 return __builtin_ia32_vcvtps2phx128_mask ((__v4sf) __A,
2271 _mm_setzero_ph (),
2272 (__mmask8) -1);
2275 extern __inline __m128h
2276 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2277 _mm_mask_cvtxps_ph (__m128h __A, __mmask8 __B, __m128 __C)
2279 return __builtin_ia32_vcvtps2phx128_mask ((__v4sf) __C, __A, __B);
2282 extern __inline __m128h
2283 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2284 _mm_maskz_cvtxps_ph (__mmask8 __A, __m128 __B)
2286 return __builtin_ia32_vcvtps2phx128_mask ((__v4sf) __B,
2287 _mm_setzero_ph (),
2288 __A);
2291 extern __inline __m128h
2292 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2293 _mm256_cvtxps_ph (__m256 __A)
2295 return __builtin_ia32_vcvtps2phx256_mask ((__v8sf) __A,
2296 _mm_setzero_ph (),
2297 (__mmask8) -1);
2300 extern __inline __m128h
2301 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2302 _mm256_mask_cvtxps_ph (__m128h __A, __mmask8 __B, __m256 __C)
2304 return __builtin_ia32_vcvtps2phx256_mask ((__v8sf) __C, __A, __B);
2307 extern __inline __m128h
2308 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2309 _mm256_maskz_cvtxps_ph (__mmask8 __A, __m256 __B)
2311 return __builtin_ia32_vcvtps2phx256_mask ((__v8sf) __B,
2312 _mm_setzero_ph (),
2313 __A);
2316 /* Intrinsics vcvtpd2ph. */
2317 extern __inline __m128h
2318 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2319 _mm_cvtpd_ph (__m128d __A)
2321 return __builtin_ia32_vcvtpd2ph128_mask ((__v2df) __A,
2322 _mm_setzero_ph (),
2323 (__mmask8) -1);
2326 extern __inline __m128h
2327 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2328 _mm_mask_cvtpd_ph (__m128h __A, __mmask8 __B, __m128d __C)
2330 return __builtin_ia32_vcvtpd2ph128_mask ((__v2df) __C, __A, __B);
2333 extern __inline __m128h
2334 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2335 _mm_maskz_cvtpd_ph (__mmask8 __A, __m128d __B)
2337 return __builtin_ia32_vcvtpd2ph128_mask ((__v2df) __B,
2338 _mm_setzero_ph (),
2339 __A);
2342 extern __inline __m128h
2343 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2344 _mm256_cvtpd_ph (__m256d __A)
2346 return __builtin_ia32_vcvtpd2ph256_mask ((__v4df) __A,
2347 _mm_setzero_ph (),
2348 (__mmask8) -1);
2351 extern __inline __m128h
2352 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2353 _mm256_mask_cvtpd_ph (__m128h __A, __mmask8 __B, __m256d __C)
2355 return __builtin_ia32_vcvtpd2ph256_mask ((__v4df) __C, __A, __B);
2358 extern __inline __m128h
2359 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2360 _mm256_maskz_cvtpd_ph (__mmask8 __A, __m256d __B)
2362 return __builtin_ia32_vcvtpd2ph256_mask ((__v4df) __B,
2363 _mm_setzero_ph (),
2364 __A);
2367 /* Intrinsics vfmaddsub[132,213,231]ph. */
2368 extern __inline __m256h
2369 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2370 _mm256_fmaddsub_ph (__m256h __A, __m256h __B, __m256h __C)
2372 return (__m256h)__builtin_ia32_vfmaddsubph256_mask ((__v16hf)__A,
2373 (__v16hf)__B,
2374 (__v16hf)__C,
2375 (__mmask16)-1);
2378 extern __inline __m256h
2379 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2380 _mm256_mask_fmaddsub_ph (__m256h __A, __mmask16 __U, __m256h __B,
2381 __m256h __C)
2383 return (__m256h) __builtin_ia32_vfmaddsubph256_mask ((__v16hf) __A,
2384 (__v16hf) __B,
2385 (__v16hf) __C,
2386 (__mmask16) __U);
2389 extern __inline __m256h
2390 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2391 _mm256_mask3_fmaddsub_ph (__m256h __A, __m256h __B, __m256h __C,
2392 __mmask16 __U)
2394 return (__m256h) __builtin_ia32_vfmaddsubph256_mask3 ((__v16hf) __A,
2395 (__v16hf) __B,
2396 (__v16hf) __C,
2397 (__mmask16)
2398 __U);
2401 extern __inline __m256h
2402 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2403 _mm256_maskz_fmaddsub_ph (__mmask16 __U, __m256h __A, __m256h __B,
2404 __m256h __C)
2406 return (__m256h) __builtin_ia32_vfmaddsubph256_maskz ((__v16hf) __A,
2407 (__v16hf) __B,
2408 (__v16hf) __C,
2409 (__mmask16)
2410 __U);
2413 extern __inline __m128h
2414 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2415 _mm_fmaddsub_ph (__m128h __A, __m128h __B, __m128h __C)
2417 return (__m128h)__builtin_ia32_vfmaddsubph128_mask ((__v8hf)__A,
2418 (__v8hf)__B,
2419 (__v8hf)__C,
2420 (__mmask8)-1);
2423 extern __inline __m128h
2424 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2425 _mm_mask_fmaddsub_ph (__m128h __A, __mmask8 __U, __m128h __B,
2426 __m128h __C)
2428 return (__m128h) __builtin_ia32_vfmaddsubph128_mask ((__v8hf) __A,
2429 (__v8hf) __B,
2430 (__v8hf) __C,
2431 (__mmask8) __U);
2434 extern __inline __m128h
2435 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2436 _mm_mask3_fmaddsub_ph (__m128h __A, __m128h __B, __m128h __C,
2437 __mmask8 __U)
2439 return (__m128h) __builtin_ia32_vfmaddsubph128_mask3 ((__v8hf) __A,
2440 (__v8hf) __B,
2441 (__v8hf) __C,
2442 (__mmask8)
2443 __U);
2446 extern __inline __m128h
2447 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2448 _mm_maskz_fmaddsub_ph (__mmask8 __U, __m128h __A, __m128h __B,
2449 __m128h __C)
2451 return (__m128h) __builtin_ia32_vfmaddsubph128_maskz ((__v8hf) __A,
2452 (__v8hf) __B,
2453 (__v8hf) __C,
2454 (__mmask8)
2455 __U);
2458 /* Intrinsics vfmsubadd[132,213,231]ph. */
2459 extern __inline __m256h
2460 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2461 _mm256_fmsubadd_ph (__m256h __A, __m256h __B, __m256h __C)
2463 return (__m256h) __builtin_ia32_vfmsubaddph256_mask ((__v16hf) __A,
2464 (__v16hf) __B,
2465 (__v16hf) __C,
2466 (__mmask16) -1);
2469 extern __inline __m256h
2470 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2471 _mm256_mask_fmsubadd_ph (__m256h __A, __mmask16 __U, __m256h __B,
2472 __m256h __C)
2474 return (__m256h) __builtin_ia32_vfmsubaddph256_mask ((__v16hf) __A,
2475 (__v16hf) __B,
2476 (__v16hf) __C,
2477 (__mmask16) __U);
2480 extern __inline __m256h
2481 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2482 _mm256_mask3_fmsubadd_ph (__m256h __A, __m256h __B, __m256h __C,
2483 __mmask16 __U)
2485 return (__m256h) __builtin_ia32_vfmsubaddph256_mask3 ((__v16hf) __A,
2486 (__v16hf) __B,
2487 (__v16hf) __C,
2488 (__mmask16)
2489 __U);
2492 extern __inline __m256h
2493 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2494 _mm256_maskz_fmsubadd_ph (__mmask16 __U, __m256h __A, __m256h __B,
2495 __m256h __C)
2497 return (__m256h) __builtin_ia32_vfmsubaddph256_maskz ((__v16hf) __A,
2498 (__v16hf) __B,
2499 (__v16hf) __C,
2500 (__mmask16)
2501 __U);
2504 extern __inline __m128h
2505 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2506 _mm_fmsubadd_ph (__m128h __A, __m128h __B, __m128h __C)
2508 return (__m128h) __builtin_ia32_vfmsubaddph128_mask ((__v8hf) __A,
2509 (__v8hf) __B,
2510 (__v8hf) __C,
2511 (__mmask8) -1);
2514 extern __inline __m128h
2515 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2516 _mm_mask_fmsubadd_ph (__m128h __A, __mmask8 __U, __m128h __B,
2517 __m128h __C)
2519 return (__m128h) __builtin_ia32_vfmsubaddph128_mask ((__v8hf) __A,
2520 (__v8hf) __B,
2521 (__v8hf) __C,
2522 (__mmask8) __U);
2525 extern __inline __m128h
2526 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2527 _mm_mask3_fmsubadd_ph (__m128h __A, __m128h __B, __m128h __C,
2528 __mmask8 __U)
2530 return (__m128h) __builtin_ia32_vfmsubaddph128_mask3 ((__v8hf) __A,
2531 (__v8hf) __B,
2532 (__v8hf) __C,
2533 (__mmask8)
2534 __U);
2537 extern __inline __m128h
2538 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2539 _mm_maskz_fmsubadd_ph (__mmask8 __U, __m128h __A, __m128h __B,
2540 __m128h __C)
2542 return (__m128h) __builtin_ia32_vfmsubaddph128_maskz ((__v8hf) __A,
2543 (__v8hf) __B,
2544 (__v8hf) __C,
2545 (__mmask8)
2546 __U);
2549 /* Intrinsics vfmadd[132,213,231]ph. */
2550 extern __inline __m256h
2551 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2552 _mm256_fmadd_ph (__m256h __A, __m256h __B, __m256h __C)
2554 return (__m256h) __builtin_ia32_vfmaddph256_mask ((__v16hf) __A,
2555 (__v16hf) __B,
2556 (__v16hf) __C,
2557 (__mmask16) -1);
2560 extern __inline __m256h
2561 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2562 _mm256_mask_fmadd_ph (__m256h __A, __mmask16 __U, __m256h __B,
2563 __m256h __C)
2565 return (__m256h) __builtin_ia32_vfmaddph256_mask ((__v16hf) __A,
2566 (__v16hf) __B,
2567 (__v16hf) __C,
2568 (__mmask16) __U);
2571 extern __inline __m256h
2572 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2573 _mm256_mask3_fmadd_ph (__m256h __A, __m256h __B, __m256h __C,
2574 __mmask16 __U)
2576 return (__m256h) __builtin_ia32_vfmaddph256_mask3 ((__v16hf) __A,
2577 (__v16hf) __B,
2578 (__v16hf) __C,
2579 (__mmask16)
2580 __U);
2583 extern __inline __m256h
2584 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2585 _mm256_maskz_fmadd_ph (__mmask16 __U, __m256h __A, __m256h __B,
2586 __m256h __C)
2588 return (__m256h) __builtin_ia32_vfmaddph256_maskz ((__v16hf) __A,
2589 (__v16hf) __B,
2590 (__v16hf) __C,
2591 (__mmask16)
2592 __U);
2595 extern __inline __m128h
2596 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2597 _mm_fmadd_ph (__m128h __A, __m128h __B, __m128h __C)
2599 return (__m128h) __builtin_ia32_vfmaddph128_mask ((__v8hf) __A,
2600 (__v8hf) __B,
2601 (__v8hf) __C,
2602 (__mmask8) -1);
2605 extern __inline __m128h
2606 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2607 _mm_mask_fmadd_ph (__m128h __A, __mmask8 __U, __m128h __B,
2608 __m128h __C)
2610 return (__m128h) __builtin_ia32_vfmaddph128_mask ((__v8hf) __A,
2611 (__v8hf) __B,
2612 (__v8hf) __C,
2613 (__mmask8) __U);
2616 extern __inline __m128h
2617 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2618 _mm_mask3_fmadd_ph (__m128h __A, __m128h __B, __m128h __C,
2619 __mmask8 __U)
2621 return (__m128h) __builtin_ia32_vfmaddph128_mask3 ((__v8hf) __A,
2622 (__v8hf) __B,
2623 (__v8hf) __C,
2624 (__mmask8)
2625 __U);
2628 extern __inline __m128h
2629 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2630 _mm_maskz_fmadd_ph (__mmask8 __U, __m128h __A, __m128h __B,
2631 __m128h __C)
2633 return (__m128h) __builtin_ia32_vfmaddph128_maskz ((__v8hf) __A,
2634 (__v8hf) __B,
2635 (__v8hf) __C,
2636 (__mmask8)
2637 __U);
2640 /* Intrinsics vfnmadd[132,213,231]ph. */
2641 extern __inline __m256h
2642 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2643 _mm256_fnmadd_ph (__m256h __A, __m256h __B, __m256h __C)
2645 return (__m256h) __builtin_ia32_vfnmaddph256_mask ((__v16hf) __A,
2646 (__v16hf) __B,
2647 (__v16hf) __C,
2648 (__mmask16) -1);
2651 extern __inline __m256h
2652 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2653 _mm256_mask_fnmadd_ph (__m256h __A, __mmask16 __U, __m256h __B,
2654 __m256h __C)
2656 return (__m256h) __builtin_ia32_vfnmaddph256_mask ((__v16hf) __A,
2657 (__v16hf) __B,
2658 (__v16hf) __C,
2659 (__mmask16) __U);
2662 extern __inline __m256h
2663 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2664 _mm256_mask3_fnmadd_ph (__m256h __A, __m256h __B, __m256h __C,
2665 __mmask16 __U)
2667 return (__m256h) __builtin_ia32_vfnmaddph256_mask3 ((__v16hf) __A,
2668 (__v16hf) __B,
2669 (__v16hf) __C,
2670 (__mmask16)
2671 __U);
2674 extern __inline __m256h
2675 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2676 _mm256_maskz_fnmadd_ph (__mmask16 __U, __m256h __A, __m256h __B,
2677 __m256h __C)
2679 return (__m256h) __builtin_ia32_vfnmaddph256_maskz ((__v16hf) __A,
2680 (__v16hf) __B,
2681 (__v16hf) __C,
2682 (__mmask16)
2683 __U);
2686 extern __inline __m128h
2687 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2688 _mm_fnmadd_ph (__m128h __A, __m128h __B, __m128h __C)
2690 return (__m128h) __builtin_ia32_vfnmaddph128_mask ((__v8hf) __A,
2691 (__v8hf) __B,
2692 (__v8hf) __C,
2693 (__mmask8) -1);
2696 extern __inline __m128h
2697 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2698 _mm_mask_fnmadd_ph (__m128h __A, __mmask8 __U, __m128h __B,
2699 __m128h __C)
2701 return (__m128h) __builtin_ia32_vfnmaddph128_mask ((__v8hf) __A,
2702 (__v8hf) __B,
2703 (__v8hf) __C,
2704 (__mmask8) __U);
2707 extern __inline __m128h
2708 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2709 _mm_mask3_fnmadd_ph (__m128h __A, __m128h __B, __m128h __C,
2710 __mmask8 __U)
2712 return (__m128h) __builtin_ia32_vfnmaddph128_mask3 ((__v8hf) __A,
2713 (__v8hf) __B,
2714 (__v8hf) __C,
2715 (__mmask8)
2716 __U);
2719 extern __inline __m128h
2720 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2721 _mm_maskz_fnmadd_ph (__mmask8 __U, __m128h __A, __m128h __B,
2722 __m128h __C)
2724 return (__m128h) __builtin_ia32_vfnmaddph128_maskz ((__v8hf) __A,
2725 (__v8hf) __B,
2726 (__v8hf) __C,
2727 (__mmask8)
2728 __U);
2731 /* Intrinsics vfmsub[132,213,231]ph. */
2732 extern __inline __m256h
2733 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2734 _mm256_fmsub_ph (__m256h __A, __m256h __B, __m256h __C)
2736 return (__m256h) __builtin_ia32_vfmsubph256_mask ((__v16hf) __A,
2737 (__v16hf) __B,
2738 (__v16hf) __C,
2739 (__mmask16) -1);
2742 extern __inline __m256h
2743 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2744 _mm256_mask_fmsub_ph (__m256h __A, __mmask16 __U, __m256h __B,
2745 __m256h __C)
2747 return (__m256h) __builtin_ia32_vfmsubph256_mask ((__v16hf) __A,
2748 (__v16hf) __B,
2749 (__v16hf) __C,
2750 (__mmask16) __U);
2753 extern __inline __m256h
2754 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2755 _mm256_mask3_fmsub_ph (__m256h __A, __m256h __B, __m256h __C,
2756 __mmask16 __U)
2758 return (__m256h) __builtin_ia32_vfmsubph256_mask3 ((__v16hf) __A,
2759 (__v16hf) __B,
2760 (__v16hf) __C,
2761 (__mmask16)
2762 __U);
2765 extern __inline __m256h
2766 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2767 _mm256_maskz_fmsub_ph (__mmask16 __U, __m256h __A, __m256h __B,
2768 __m256h __C)
2770 return (__m256h) __builtin_ia32_vfmsubph256_maskz ((__v16hf) __A,
2771 (__v16hf) __B,
2772 (__v16hf) __C,
2773 (__mmask16)
2774 __U);
2777 extern __inline __m128h
2778 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2779 _mm_fmsub_ph (__m128h __A, __m128h __B, __m128h __C)
2781 return (__m128h) __builtin_ia32_vfmsubph128_mask ((__v8hf) __A,
2782 (__v8hf) __B,
2783 (__v8hf) __C,
2784 (__mmask8) -1);
2787 extern __inline __m128h
2788 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2789 _mm_mask_fmsub_ph (__m128h __A, __mmask8 __U, __m128h __B,
2790 __m128h __C)
2792 return (__m128h) __builtin_ia32_vfmsubph128_mask ((__v8hf) __A,
2793 (__v8hf) __B,
2794 (__v8hf) __C,
2795 (__mmask8) __U);
2798 extern __inline __m128h
2799 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2800 _mm_mask3_fmsub_ph (__m128h __A, __m128h __B, __m128h __C,
2801 __mmask8 __U)
2803 return (__m128h) __builtin_ia32_vfmsubph128_mask3 ((__v8hf) __A,
2804 (__v8hf) __B,
2805 (__v8hf) __C,
2806 (__mmask8)
2807 __U);
2810 extern __inline __m128h
2811 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2812 _mm_maskz_fmsub_ph (__mmask8 __U, __m128h __A, __m128h __B,
2813 __m128h __C)
2815 return (__m128h) __builtin_ia32_vfmsubph128_maskz ((__v8hf) __A,
2816 (__v8hf) __B,
2817 (__v8hf) __C,
2818 (__mmask8)
2819 __U);
2822 /* Intrinsics vfnmsub[132,213,231]ph. */
2823 extern __inline __m256h
2824 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2825 _mm256_fnmsub_ph (__m256h __A, __m256h __B, __m256h __C)
2827 return (__m256h) __builtin_ia32_vfnmsubph256_mask ((__v16hf) __A,
2828 (__v16hf) __B,
2829 (__v16hf) __C,
2830 (__mmask16) -1);
2833 extern __inline __m256h
2834 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2835 _mm256_mask_fnmsub_ph (__m256h __A, __mmask16 __U, __m256h __B,
2836 __m256h __C)
2838 return (__m256h) __builtin_ia32_vfnmsubph256_mask ((__v16hf) __A,
2839 (__v16hf) __B,
2840 (__v16hf) __C,
2841 (__mmask16) __U);
2844 extern __inline __m256h
2845 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2846 _mm256_mask3_fnmsub_ph (__m256h __A, __m256h __B, __m256h __C,
2847 __mmask16 __U)
2849 return (__m256h) __builtin_ia32_vfnmsubph256_mask3 ((__v16hf) __A,
2850 (__v16hf) __B,
2851 (__v16hf) __C,
2852 (__mmask16)
2853 __U);
2856 extern __inline __m256h
2857 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2858 _mm256_maskz_fnmsub_ph (__mmask16 __U, __m256h __A, __m256h __B,
2859 __m256h __C)
2861 return (__m256h) __builtin_ia32_vfnmsubph256_maskz ((__v16hf) __A,
2862 (__v16hf) __B,
2863 (__v16hf) __C,
2864 (__mmask16)
2865 __U);
2868 extern __inline __m128h
2869 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2870 _mm_fnmsub_ph (__m128h __A, __m128h __B, __m128h __C)
2872 return (__m128h) __builtin_ia32_vfnmsubph128_mask ((__v8hf) __A,
2873 (__v8hf) __B,
2874 (__v8hf) __C,
2875 (__mmask8) -1);
2878 extern __inline __m128h
2879 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2880 _mm_mask_fnmsub_ph (__m128h __A, __mmask8 __U, __m128h __B,
2881 __m128h __C)
2883 return (__m128h) __builtin_ia32_vfnmsubph128_mask ((__v8hf) __A,
2884 (__v8hf) __B,
2885 (__v8hf) __C,
2886 (__mmask8) __U);
2889 extern __inline __m128h
2890 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2891 _mm_mask3_fnmsub_ph (__m128h __A, __m128h __B, __m128h __C,
2892 __mmask8 __U)
2894 return (__m128h) __builtin_ia32_vfnmsubph128_mask3 ((__v8hf) __A,
2895 (__v8hf) __B,
2896 (__v8hf) __C,
2897 (__mmask8)
2898 __U);
2901 extern __inline __m128h
2902 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2903 _mm_maskz_fnmsub_ph (__mmask8 __U, __m128h __A, __m128h __B,
2904 __m128h __C)
2906 return (__m128h) __builtin_ia32_vfnmsubph128_maskz ((__v8hf) __A,
2907 (__v8hf) __B,
2908 (__v8hf) __C,
2909 (__mmask8)
2910 __U);
2913 /* Intrinsics vf[,c]maddcph. */
2914 extern __inline __m128h
2915 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2916 _mm_fmadd_pch (__m128h __A, __m128h __B, __m128h __C)
2918 return (__m128h) __builtin_ia32_vfmaddcph128 ((__v8hf) __A,
2919 (__v8hf) __B,
2920 (__v8hf) __C);
2923 extern __inline __m128h
2924 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2925 _mm_mask_fmadd_pch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
2927 return (__m128h)
2928 __builtin_ia32_vfmaddcph128_mask ((__v8hf) __A,
2929 (__v8hf) __C,
2930 (__v8hf) __D, __B);
2933 extern __inline __m128h
2934 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2935 _mm_mask3_fmadd_pch (__m128h __A, __m128h __B, __m128h __C, __mmask8 __D)
2937 return (__m128h)
2938 __builtin_ia32_vfmaddcph128_mask3 ((__v8hf) __A,
2939 (__v8hf) __B,
2940 (__v8hf) __C, __D);
2943 extern __inline __m128h
2944 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2945 _mm_maskz_fmadd_pch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D)
2947 return (__m128h) __builtin_ia32_vfmaddcph128_maskz ((__v8hf) __B,
2948 (__v8hf) __C,
2949 (__v8hf) __D, __A);
2952 extern __inline __m256h
2953 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2954 _mm256_fmadd_pch (__m256h __A, __m256h __B, __m256h __C)
2956 return (__m256h) __builtin_ia32_vfmaddcph256 ((__v16hf) __A,
2957 (__v16hf) __B,
2958 (__v16hf) __C);
2961 extern __inline __m256h
2962 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2963 _mm256_mask_fmadd_pch (__m256h __A, __mmask8 __B, __m256h __C, __m256h __D)
2965 return (__m256h)
2966 __builtin_ia32_vfmaddcph256_mask ((__v16hf) __A,
2967 (__v16hf) __C,
2968 (__v16hf) __D, __B);
2971 extern __inline __m256h
2972 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2973 _mm256_mask3_fmadd_pch (__m256h __A, __m256h __B, __m256h __C, __mmask8 __D)
2975 return (__m256h)
2976 __builtin_ia32_vfmaddcph256_mask3 ((__v16hf) __A,
2977 (__v16hf) __B,
2978 (__v16hf) __C, __D);
2981 extern __inline __m256h
2982 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2983 _mm256_maskz_fmadd_pch (__mmask8 __A, __m256h __B, __m256h __C, __m256h __D)
2985 return (__m256h)__builtin_ia32_vfmaddcph256_maskz ((__v16hf) __B,
2986 (__v16hf) __C,
2987 (__v16hf) __D, __A);
2990 extern __inline __m128h
2991 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
2992 _mm_fcmadd_pch (__m128h __A, __m128h __B, __m128h __C)
2994 return (__m128h) __builtin_ia32_vfcmaddcph128 ((__v8hf) __A,
2995 (__v8hf) __B,
2996 (__v8hf) __C);
2999 extern __inline __m128h
3000 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3001 _mm_mask_fcmadd_pch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
3003 return (__m128h)
3004 __builtin_ia32_vfcmaddcph128_mask ((__v8hf) __A,
3005 (__v8hf) __C,
3006 (__v8hf) __D, __B);
3009 extern __inline __m128h
3010 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3011 _mm_mask3_fcmadd_pch (__m128h __A, __m128h __B, __m128h __C, __mmask8 __D)
3013 return (__m128h)
3014 __builtin_ia32_vfcmaddcph128_mask3 ((__v8hf) __A,
3015 (__v8hf) __B,
3016 (__v8hf) __C, __D);
3019 extern __inline __m128h
3020 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3021 _mm_maskz_fcmadd_pch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D)
3023 return (__m128h)__builtin_ia32_vfcmaddcph128_maskz ((__v8hf) __B,
3024 (__v8hf) __C,
3025 (__v8hf) __D, __A);
3028 extern __inline __m256h
3029 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3030 _mm256_fcmadd_pch (__m256h __A, __m256h __B, __m256h __C)
3032 return (__m256h) __builtin_ia32_vfcmaddcph256 ((__v16hf) __A,
3033 (__v16hf) __B,
3034 (__v16hf) __C);
3037 extern __inline __m256h
3038 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3039 _mm256_mask_fcmadd_pch (__m256h __A, __mmask8 __B, __m256h __C, __m256h __D)
3041 return (__m256h)
3042 __builtin_ia32_vfcmaddcph256_mask ((__v16hf) __A,
3043 (__v16hf) __C,
3044 (__v16hf) __D, __B);
3047 extern __inline __m256h
3048 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3049 _mm256_mask3_fcmadd_pch (__m256h __A, __m256h __B, __m256h __C, __mmask8 __D)
3051 return (__m256h)
3052 __builtin_ia32_vfcmaddcph256_mask3 ((__v16hf) __A,
3053 (__v16hf) __B,
3054 (__v16hf) __C, __D);
3057 extern __inline __m256h
3058 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3059 _mm256_maskz_fcmadd_pch (__mmask8 __A, __m256h __B, __m256h __C, __m256h __D)
3061 return (__m256h) __builtin_ia32_vfcmaddcph256_maskz ((__v16hf) __B,
3062 (__v16hf) __C,
3063 (__v16hf) __D, __A);
3066 /* Intrinsics vf[,c]mulcph. */
3067 extern __inline __m128h
3068 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3069 _mm_fmul_pch (__m128h __A, __m128h __B)
3071 return (__m128h) __builtin_ia32_vfmulcph128 ((__v8hf) __A, (__v8hf) __B);
3074 extern __inline __m128h
3075 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3076 _mm_mask_fmul_pch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
3078 return (__m128h) __builtin_ia32_vfmulcph128_mask ((__v8hf) __C,
3079 (__v8hf) __D,
3080 (__v8hf) __A, __B);
3083 extern __inline __m128h
3084 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3085 _mm_maskz_fmul_pch (__mmask8 __A, __m128h __B, __m128h __C)
3087 return (__m128h) __builtin_ia32_vfmulcph128_mask ((__v8hf) __B,
3088 (__v8hf) __C,
3089 _mm_setzero_ph (),
3090 __A);
3093 extern __inline __m256h
3094 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3095 _mm256_fmul_pch (__m256h __A, __m256h __B)
3097 return (__m256h) __builtin_ia32_vfmulcph256 ((__v16hf) __A,
3098 (__v16hf) __B);
3101 extern __inline __m256h
3102 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3103 _mm256_mask_fmul_pch (__m256h __A, __mmask8 __B, __m256h __C, __m256h __D)
3105 return (__m256h) __builtin_ia32_vfmulcph256_mask ((__v16hf) __C,
3106 (__v16hf) __D,
3107 (__v16hf) __A, __B);
3110 extern __inline __m256h
3111 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3112 _mm256_maskz_fmul_pch (__mmask8 __A, __m256h __B, __m256h __C)
3114 return (__m256h) __builtin_ia32_vfmulcph256_mask ((__v16hf) __B,
3115 (__v16hf) __C,
3116 _mm256_setzero_ph (),
3117 __A);
3120 extern __inline __m128h
3121 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3122 _mm_fcmul_pch (__m128h __A, __m128h __B)
3124 return (__m128h) __builtin_ia32_vfcmulcph128 ((__v8hf) __A,
3125 (__v8hf) __B);
3128 extern __inline __m128h
3129 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3130 _mm_mask_fcmul_pch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D)
3132 return (__m128h) __builtin_ia32_vfcmulcph128_mask ((__v8hf) __C,
3133 (__v8hf) __D,
3134 (__v8hf) __A, __B);
3137 extern __inline __m128h
3138 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3139 _mm_maskz_fcmul_pch (__mmask8 __A, __m128h __B, __m128h __C)
3141 return (__m128h) __builtin_ia32_vfcmulcph128_mask ((__v8hf) __B,
3142 (__v8hf) __C,
3143 _mm_setzero_ph (),
3144 __A);
3147 extern __inline __m256h
3148 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3149 _mm256_fcmul_pch (__m256h __A, __m256h __B)
3151 return (__m256h) __builtin_ia32_vfcmulcph256 ((__v16hf) __A, (__v16hf) __B);
3154 extern __inline __m256h
3155 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3156 _mm256_mask_fcmul_pch (__m256h __A, __mmask8 __B, __m256h __C, __m256h __D)
3158 return (__m256h) __builtin_ia32_vfcmulcph256_mask ((__v16hf) __C,
3159 (__v16hf) __D,
3160 (__v16hf) __A, __B);
3163 extern __inline __m256h
3164 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3165 _mm256_maskz_fcmul_pch (__mmask8 __A, __m256h __B, __m256h __C)
3167 return (__m256h) __builtin_ia32_vfcmulcph256_mask ((__v16hf) __B,
3168 (__v16hf) __C,
3169 _mm256_setzero_ph (),
3170 __A);
3173 #define _MM256_REDUCE_OP(op) \
3174 __m128h __T1 = (__m128h) _mm256_avx512_extractf128_pd ((__m256d) __A, 0); \
3175 __m128h __T2 = (__m128h) _mm256_avx512_extractf128_pd ((__m256d) __A, 1); \
3176 __m128h __T3 = (__T1 op __T2); \
3177 __m128h __T4 = (__m128h) __builtin_shuffle (__T3, \
3178 (__v8hi) { 4, 5, 6, 7, 0, 1, 2, 3 }); \
3179 __m128h __T5 = (__T3) op (__T4); \
3180 __m128h __T6 = (__m128h) __builtin_shuffle (__T5, \
3181 (__v8hi) { 2, 3, 0, 1, 4, 5, 6, 7 }); \
3182 __m128h __T7 = __T5 op __T6; \
3183 return __T7[0] op __T7[1]
3185 extern __inline _Float16
3186 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3187 _mm256_reduce_add_ph (__m256h __A)
3189 _MM256_REDUCE_OP (+);
3192 extern __inline _Float16
3193 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3194 _mm256_reduce_mul_ph (__m256h __A)
3196 _MM256_REDUCE_OP (*);
3199 #undef _MM256_REDUCE_OP
3200 #define _MM256_REDUCE_OP(op) \
3201 __m128h __T1 = (__m128h) _mm256_avx512_extractf128_pd ((__m256d) __A, 0); \
3202 __m128h __T2 = (__m128h) _mm256_avx512_extractf128_pd ((__m256d) __A, 1); \
3203 __m128h __T3 = _mm_##op (__T1, __T2); \
3204 __m128h __T4 = (__m128h) __builtin_shuffle (__T3, \
3205 (__v8hi) { 2, 3, 0, 1, 6, 7, 4, 5 }); \
3206 __m128h __T5 = _mm_##op (__T3, __T4); \
3207 __m128h __T6 = (__m128h) __builtin_shuffle (__T5, (__v8hi) { 4, 5 }); \
3208 __m128h __T7 = _mm_##op (__T5, __T6); \
3209 __m128h __T8 = (__m128h) __builtin_shuffle (__T7, (__v8hi) { 1, 0 }); \
3210 __m128h __T9 = _mm_##op (__T7, __T8); \
3211 return __T9[0]
3213 extern __inline _Float16
3214 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3215 _mm256_reduce_min_ph (__m256h __A)
3217 _MM256_REDUCE_OP (min_ph);
3220 extern __inline _Float16
3221 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3222 _mm256_reduce_max_ph (__m256h __A)
3224 _MM256_REDUCE_OP (max_ph);
3227 #define _MM_REDUCE_OP(op) \
3228 __m128h __T1 = (__m128h) __builtin_shuffle (__A, \
3229 (__v8hi) { 4, 5, 6, 7, 0, 1, 2, 3 }); \
3230 __m128h __T2 = (__A) op (__T1); \
3231 __m128h __T3 = (__m128h) __builtin_shuffle (__T2, \
3232 (__v8hi){ 2, 3, 0, 1, 4, 5, 6, 7 }); \
3233 __m128h __T4 = __T2 op __T3; \
3234 return __T4[0] op __T4[1]
3236 extern __inline _Float16
3237 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3238 _mm_reduce_add_ph (__m128h __A)
3240 _MM_REDUCE_OP (+);
3243 extern __inline _Float16
3244 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3245 _mm_reduce_mul_ph (__m128h __A)
3247 _MM_REDUCE_OP (*);
3250 #undef _MM_REDUCE_OP
3251 #define _MM_REDUCE_OP(op) \
3252 __m128h __T1 = (__m128h) __builtin_shuffle (__A, \
3253 (__v8hi) { 2, 3, 0, 1, 6, 7, 4, 5 }); \
3254 __m128h __T2 = _mm_##op (__A, __T1); \
3255 __m128h __T3 = (__m128h) __builtin_shuffle (__T2, (__v8hi){ 4, 5 }); \
3256 __m128h __T4 = _mm_##op (__T2, __T3); \
3257 __m128h __T5 = (__m128h) __builtin_shuffle (__T4, (__v8hi){ 1, 0 }); \
3258 __m128h __T6 = _mm_##op (__T4, __T5); \
3259 return __T6[0]
3261 extern __inline _Float16
3262 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3263 _mm_reduce_min_ph (__m128h __A)
3265 _MM_REDUCE_OP (min_ph);
3268 extern __inline _Float16
3269 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3270 _mm_reduce_max_ph (__m128h __A)
3272 _MM_REDUCE_OP (max_ph);
3275 #undef _MM256_REDUCE_OP
3276 #undef _MM_REDUCE_OP
3278 extern __inline __m256h
3279 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3280 _mm256_mask_blend_ph (__mmask16 __U, __m256h __A, __m256h __W)
3282 return (__m256h) __builtin_ia32_movdquhi256_mask ((__v16hi) __W,
3283 (__v16hi) __A,
3284 (__mmask16) __U);
3288 extern __inline __m256h
3289 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3290 _mm256_permutex2var_ph (__m256h __A, __m256i __I, __m256h __B)
3292 return (__m256h) __builtin_ia32_vpermi2varhi256_mask ((__v16hi) __A,
3293 (__v16hi) __I,
3294 (__v16hi) __B,
3295 (__mmask16)-1);
3298 extern __inline __m256h
3299 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3300 _mm256_permutexvar_ph (__m256i __A, __m256h __B)
3302 return (__m256h) __builtin_ia32_permvarhi256_mask ((__v16hi) __B,
3303 (__v16hi) __A,
3304 (__v16hi)
3305 (_mm256_setzero_ph ()),
3306 (__mmask16)-1);
3309 extern __inline __m128h
3310 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3311 _mm_mask_blend_ph (__mmask8 __U, __m128h __A, __m128h __W)
3313 return (__m128h) __builtin_ia32_movdquhi128_mask ((__v8hi) __W,
3314 (__v8hi) __A,
3315 (__mmask8) __U);
3319 extern __inline __m128h
3320 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3321 _mm_permutex2var_ph (__m128h __A, __m128i __I, __m128h __B)
3323 return (__m128h) __builtin_ia32_vpermi2varhi128_mask ((__v8hi) __A,
3324 (__v8hi) __I,
3325 (__v8hi) __B,
3326 (__mmask8)-1);
3329 extern __inline __m128h
3330 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3331 _mm_permutexvar_ph (__m128i __A, __m128h __B)
3333 return (__m128h) __builtin_ia32_permvarhi128_mask ((__v8hi) __B,
3334 (__v8hi) __A,
3335 (__v8hi)
3336 (_mm_setzero_ph ()),
3337 (__mmask8)-1);
3340 extern __inline __m256h
3341 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3342 _mm256_set1_pch (_Float16 _Complex __A)
3344 union
3346 _Float16 _Complex __a;
3347 float __b;
3348 } __u = { .__a = __A };
3350 return (__m256h) _mm256_avx512_set1_ps (__u.__b);
3353 extern __inline __m128h
3354 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
3355 _mm_set1_pch (_Float16 _Complex __A)
3357 union
3359 _Float16 _Complex __a;
3360 float __b;
3361 } __u = { .__a = __A };
3363 return (__m128h) _mm_avx512_set1_ps (__u.__b);
3366 // intrinsics below are alias for f*mul_*ch
3367 #define _mm_mul_pch(A, B) _mm_fmul_pch ((A), (B))
3368 #define _mm_mask_mul_pch(W, U, A, B) _mm_mask_fmul_pch ((W), (U), (A), (B))
3369 #define _mm_maskz_mul_pch(U, A, B) _mm_maskz_fmul_pch ((U), (A), (B))
3370 #define _mm256_mul_pch(A, B) _mm256_fmul_pch ((A), (B))
3371 #define _mm256_mask_mul_pch(W, U, A, B) \
3372 _mm256_mask_fmul_pch ((W), (U), (A), (B))
3373 #define _mm256_maskz_mul_pch(U, A, B) _mm256_maskz_fmul_pch ((U), (A), (B))
3375 #define _mm_cmul_pch(A, B) _mm_fcmul_pch ((A), (B))
3376 #define _mm_mask_cmul_pch(W, U, A, B) _mm_mask_fcmul_pch ((W), (U), (A), (B))
3377 #define _mm_maskz_cmul_pch(U, A, B) _mm_maskz_fcmul_pch ((U), (A), (B))
3378 #define _mm256_cmul_pch(A, B) _mm256_fcmul_pch ((A), (B))
3379 #define _mm256_mask_cmul_pch(W, U, A, B) \
3380 _mm256_mask_fcmul_pch ((W), (U), (A), (B))
3381 #define _mm256_maskz_cmul_pch(U, A, B) _mm256_maskz_fcmul_pch((U), (A), (B))
3383 #ifdef __DISABLE_AVX512FP16VL__
3384 #undef __DISABLE_AVX512FP16VL__
3385 #pragma GCC pop_options
3386 #endif /* __DISABLE_AVX512FP16VL__ */
3388 #endif /* __AVX512FP16VLINTRIN_H_INCLUDED */