[AMDGPU][AsmParser][NFC] Get rid of custom default operand handlers.
[llvm-project.git] / clang / lib / Headers / avx512fp16intrin.h
blobd326586578bb36dc409a2823248e8b16470180df
1 /*===----------- avx512fp16intrin.h - AVX512-FP16 intrinsics ---------------===
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *===-----------------------------------------------------------------------===
8 */
9 #ifndef __IMMINTRIN_H
10 #error "Never use <avx512fp16intrin.h> directly; include <immintrin.h> instead."
11 #endif
13 #ifdef __SSE2__
15 #ifndef __AVX512FP16INTRIN_H
16 #define __AVX512FP16INTRIN_H
18 /* Define the default attributes for the functions in this file. */
19 typedef _Float16 __v32hf __attribute__((__vector_size__(64), __aligned__(64)));
20 typedef _Float16 __m512h __attribute__((__vector_size__(64), __aligned__(64)));
21 typedef _Float16 __m512h_u __attribute__((__vector_size__(64), __aligned__(1)));
23 /* Define the default attributes for the functions in this file. */
24 #define __DEFAULT_FN_ATTRS512 \
25 __attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"), \
26 __min_vector_width__(512)))
27 #define __DEFAULT_FN_ATTRS256 \
28 __attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"), \
29 __min_vector_width__(256)))
30 #define __DEFAULT_FN_ATTRS128 \
31 __attribute__((__always_inline__, __nodebug__, __target__("avx512fp16"), \
32 __min_vector_width__(128)))
34 static __inline__ _Float16 __DEFAULT_FN_ATTRS512 _mm512_cvtsh_h(__m512h __a) {
35 return __a[0];
38 static __inline __m128h __DEFAULT_FN_ATTRS128 _mm_setzero_ph(void) {
39 return (__m128h){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
42 static __inline __m256h __DEFAULT_FN_ATTRS256 _mm256_setzero_ph(void) {
43 return (__m256h){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
44 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
47 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_undefined_ph(void) {
48 return (__m256h)__builtin_ia32_undef256();
51 static __inline __m512h __DEFAULT_FN_ATTRS512 _mm512_setzero_ph(void) {
52 return (__m512h){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
53 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
54 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
57 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_undefined_ph(void) {
58 return (__m128h)__builtin_ia32_undef128();
61 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_undefined_ph(void) {
62 return (__m512h)__builtin_ia32_undef512();
65 static __inline __m512h __DEFAULT_FN_ATTRS512 _mm512_set1_ph(_Float16 __h) {
66 return (__m512h)(__v32hf){__h, __h, __h, __h, __h, __h, __h, __h,
67 __h, __h, __h, __h, __h, __h, __h, __h,
68 __h, __h, __h, __h, __h, __h, __h, __h,
69 __h, __h, __h, __h, __h, __h, __h, __h};
72 static __inline __m512h __DEFAULT_FN_ATTRS512
73 _mm512_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
74 _Float16 __h5, _Float16 __h6, _Float16 __h7, _Float16 __h8,
75 _Float16 __h9, _Float16 __h10, _Float16 __h11, _Float16 __h12,
76 _Float16 __h13, _Float16 __h14, _Float16 __h15, _Float16 __h16,
77 _Float16 __h17, _Float16 __h18, _Float16 __h19, _Float16 __h20,
78 _Float16 __h21, _Float16 __h22, _Float16 __h23, _Float16 __h24,
79 _Float16 __h25, _Float16 __h26, _Float16 __h27, _Float16 __h28,
80 _Float16 __h29, _Float16 __h30, _Float16 __h31, _Float16 __h32) {
81 return (__m512h)(__v32hf){__h32, __h31, __h30, __h29, __h28, __h27, __h26,
82 __h25, __h24, __h23, __h22, __h21, __h20, __h19,
83 __h18, __h17, __h16, __h15, __h14, __h13, __h12,
84 __h11, __h10, __h9, __h8, __h7, __h6, __h5,
85 __h4, __h3, __h2, __h1};
88 #define _mm512_setr_ph(h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11, h12, h13, \
89 h14, h15, h16, h17, h18, h19, h20, h21, h22, h23, h24, \
90 h25, h26, h27, h28, h29, h30, h31, h32) \
91 _mm512_set_ph((h32), (h31), (h30), (h29), (h28), (h27), (h26), (h25), (h24), \
92 (h23), (h22), (h21), (h20), (h19), (h18), (h17), (h16), (h15), \
93 (h14), (h13), (h12), (h11), (h10), (h9), (h8), (h7), (h6), \
94 (h5), (h4), (h3), (h2), (h1))
96 static __inline __m512h __DEFAULT_FN_ATTRS512
97 _mm512_set1_pch(_Float16 _Complex h) {
98 return (__m512h)_mm512_set1_ps(__builtin_bit_cast(float, h));
101 static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_castph_ps(__m128h __a) {
102 return (__m128)__a;
105 static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_castph_ps(__m256h __a) {
106 return (__m256)__a;
109 static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_castph_ps(__m512h __a) {
110 return (__m512)__a;
113 static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_castph_pd(__m128h __a) {
114 return (__m128d)__a;
117 static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_castph_pd(__m256h __a) {
118 return (__m256d)__a;
121 static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_castph_pd(__m512h __a) {
122 return (__m512d)__a;
125 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_castph_si128(__m128h __a) {
126 return (__m128i)__a;
129 static __inline__ __m256i __DEFAULT_FN_ATTRS256
130 _mm256_castph_si256(__m256h __a) {
131 return (__m256i)__a;
134 static __inline__ __m512i __DEFAULT_FN_ATTRS512
135 _mm512_castph_si512(__m512h __a) {
136 return (__m512i)__a;
139 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_castps_ph(__m128 __a) {
140 return (__m128h)__a;
143 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_castps_ph(__m256 __a) {
144 return (__m256h)__a;
147 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_castps_ph(__m512 __a) {
148 return (__m512h)__a;
151 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_castpd_ph(__m128d __a) {
152 return (__m128h)__a;
155 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_castpd_ph(__m256d __a) {
156 return (__m256h)__a;
159 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_castpd_ph(__m512d __a) {
160 return (__m512h)__a;
163 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_castsi128_ph(__m128i __a) {
164 return (__m128h)__a;
167 static __inline__ __m256h __DEFAULT_FN_ATTRS256
168 _mm256_castsi256_ph(__m256i __a) {
169 return (__m256h)__a;
172 static __inline__ __m512h __DEFAULT_FN_ATTRS512
173 _mm512_castsi512_ph(__m512i __a) {
174 return (__m512h)__a;
177 static __inline__ __m128h __DEFAULT_FN_ATTRS256
178 _mm256_castph256_ph128(__m256h __a) {
179 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7);
182 static __inline__ __m128h __DEFAULT_FN_ATTRS512
183 _mm512_castph512_ph128(__m512h __a) {
184 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7);
187 static __inline__ __m256h __DEFAULT_FN_ATTRS512
188 _mm512_castph512_ph256(__m512h __a) {
189 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
190 12, 13, 14, 15);
193 static __inline__ __m256h __DEFAULT_FN_ATTRS256
194 _mm256_castph128_ph256(__m128h __a) {
195 return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a),
196 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
199 static __inline__ __m512h __DEFAULT_FN_ATTRS512
200 _mm512_castph128_ph512(__m128h __a) {
201 __m256h __b = __builtin_nondeterministic_value(__b);
202 return __builtin_shufflevector(
203 __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a),
204 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15),
205 __b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
206 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31);
209 static __inline__ __m512h __DEFAULT_FN_ATTRS512
210 _mm512_castph256_ph512(__m256h __a) {
211 return __builtin_shufflevector(__a, __builtin_nondeterministic_value(__a), 0,
212 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
213 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
214 27, 28, 29, 30, 31);
217 /// Constructs a 256-bit floating-point vector of [16 x half] from a
218 /// 128-bit floating-point vector of [8 x half]. The lower 128 bits
219 /// contain the value of the source vector. The upper 384 bits are set
220 /// to zero.
222 /// \headerfile <x86intrin.h>
224 /// This intrinsic has no corresponding instruction.
226 /// \param __a
227 /// A 128-bit vector of [8 x half].
228 /// \returns A 512-bit floating-point vector of [16 x half]. The lower 128 bits
229 /// contain the value of the parameter. The upper 384 bits are set to zero.
230 static __inline__ __m256h __DEFAULT_FN_ATTRS256
231 _mm256_zextph128_ph256(__m128h __a) {
232 return __builtin_shufflevector(__a, (__v8hf)_mm_setzero_ph(), 0, 1, 2, 3, 4,
233 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
236 /// Constructs a 512-bit floating-point vector of [32 x half] from a
237 /// 128-bit floating-point vector of [8 x half]. The lower 128 bits
238 /// contain the value of the source vector. The upper 384 bits are set
239 /// to zero.
241 /// \headerfile <x86intrin.h>
243 /// This intrinsic has no corresponding instruction.
245 /// \param __a
246 /// A 128-bit vector of [8 x half].
247 /// \returns A 512-bit floating-point vector of [32 x half]. The lower 128 bits
248 /// contain the value of the parameter. The upper 384 bits are set to zero.
249 static __inline__ __m512h __DEFAULT_FN_ATTRS512
250 _mm512_zextph128_ph512(__m128h __a) {
251 return __builtin_shufflevector(
252 __a, (__v8hf)_mm_setzero_ph(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
253 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15);
256 /// Constructs a 512-bit floating-point vector of [32 x half] from a
257 /// 256-bit floating-point vector of [16 x half]. The lower 256 bits
258 /// contain the value of the source vector. The upper 256 bits are set
259 /// to zero.
261 /// \headerfile <x86intrin.h>
263 /// This intrinsic has no corresponding instruction.
265 /// \param __a
266 /// A 256-bit vector of [16 x half].
267 /// \returns A 512-bit floating-point vector of [32 x half]. The lower 256 bits
268 /// contain the value of the parameter. The upper 256 bits are set to zero.
269 static __inline__ __m512h __DEFAULT_FN_ATTRS512
270 _mm512_zextph256_ph512(__m256h __a) {
271 return __builtin_shufflevector(__a, (__v16hf)_mm256_setzero_ph(), 0, 1, 2, 3,
272 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
273 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
274 29, 30, 31);
277 #define _mm_comi_round_sh(A, B, P, R) \
278 __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, (int)(P), (int)(R))
280 #define _mm_comi_sh(A, B, pred) \
281 _mm_comi_round_sh((A), (B), (pred), _MM_FROUND_CUR_DIRECTION)
283 static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comieq_sh(__m128h A,
284 __m128h B) {
285 return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_EQ_OS,
286 _MM_FROUND_CUR_DIRECTION);
289 static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comilt_sh(__m128h A,
290 __m128h B) {
291 return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_LT_OS,
292 _MM_FROUND_CUR_DIRECTION);
295 static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comile_sh(__m128h A,
296 __m128h B) {
297 return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_LE_OS,
298 _MM_FROUND_CUR_DIRECTION);
301 static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comigt_sh(__m128h A,
302 __m128h B) {
303 return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_GT_OS,
304 _MM_FROUND_CUR_DIRECTION);
307 static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comige_sh(__m128h A,
308 __m128h B) {
309 return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_GE_OS,
310 _MM_FROUND_CUR_DIRECTION);
313 static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comineq_sh(__m128h A,
314 __m128h B) {
315 return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_NEQ_US,
316 _MM_FROUND_CUR_DIRECTION);
319 static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomieq_sh(__m128h A,
320 __m128h B) {
321 return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_EQ_OQ,
322 _MM_FROUND_CUR_DIRECTION);
325 static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomilt_sh(__m128h A,
326 __m128h B) {
327 return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_LT_OQ,
328 _MM_FROUND_CUR_DIRECTION);
331 static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomile_sh(__m128h A,
332 __m128h B) {
333 return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_LE_OQ,
334 _MM_FROUND_CUR_DIRECTION);
337 static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomigt_sh(__m128h A,
338 __m128h B) {
339 return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_GT_OQ,
340 _MM_FROUND_CUR_DIRECTION);
343 static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomige_sh(__m128h A,
344 __m128h B) {
345 return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_GE_OQ,
346 _MM_FROUND_CUR_DIRECTION);
349 static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomineq_sh(__m128h A,
350 __m128h B) {
351 return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_NEQ_UQ,
352 _MM_FROUND_CUR_DIRECTION);
355 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_add_ph(__m512h __A,
356 __m512h __B) {
357 return (__m512h)((__v32hf)__A + (__v32hf)__B);
360 static __inline__ __m512h __DEFAULT_FN_ATTRS512
361 _mm512_mask_add_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
362 return (__m512h)__builtin_ia32_selectph_512(
363 (__mmask32)__U, (__v32hf)_mm512_add_ph(__A, __B), (__v32hf)__W);
366 static __inline__ __m512h __DEFAULT_FN_ATTRS512
367 _mm512_maskz_add_ph(__mmask32 __U, __m512h __A, __m512h __B) {
368 return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U,
369 (__v32hf)_mm512_add_ph(__A, __B),
370 (__v32hf)_mm512_setzero_ph());
373 #define _mm512_add_round_ph(A, B, R) \
374 ((__m512h)__builtin_ia32_addph512((__v32hf)(__m512h)(A), \
375 (__v32hf)(__m512h)(B), (int)(R)))
377 #define _mm512_mask_add_round_ph(W, U, A, B, R) \
378 ((__m512h)__builtin_ia32_selectph_512( \
379 (__mmask32)(U), (__v32hf)_mm512_add_round_ph((A), (B), (R)), \
380 (__v32hf)(__m512h)(W)))
382 #define _mm512_maskz_add_round_ph(U, A, B, R) \
383 ((__m512h)__builtin_ia32_selectph_512( \
384 (__mmask32)(U), (__v32hf)_mm512_add_round_ph((A), (B), (R)), \
385 (__v32hf)_mm512_setzero_ph()))
387 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_sub_ph(__m512h __A,
388 __m512h __B) {
389 return (__m512h)((__v32hf)__A - (__v32hf)__B);
392 static __inline__ __m512h __DEFAULT_FN_ATTRS512
393 _mm512_mask_sub_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
394 return (__m512h)__builtin_ia32_selectph_512(
395 (__mmask32)__U, (__v32hf)_mm512_sub_ph(__A, __B), (__v32hf)__W);
398 static __inline__ __m512h __DEFAULT_FN_ATTRS512
399 _mm512_maskz_sub_ph(__mmask32 __U, __m512h __A, __m512h __B) {
400 return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U,
401 (__v32hf)_mm512_sub_ph(__A, __B),
402 (__v32hf)_mm512_setzero_ph());
405 #define _mm512_sub_round_ph(A, B, R) \
406 ((__m512h)__builtin_ia32_subph512((__v32hf)(__m512h)(A), \
407 (__v32hf)(__m512h)(B), (int)(R)))
409 #define _mm512_mask_sub_round_ph(W, U, A, B, R) \
410 ((__m512h)__builtin_ia32_selectph_512( \
411 (__mmask32)(U), (__v32hf)_mm512_sub_round_ph((A), (B), (R)), \
412 (__v32hf)(__m512h)(W)))
414 #define _mm512_maskz_sub_round_ph(U, A, B, R) \
415 ((__m512h)__builtin_ia32_selectph_512( \
416 (__mmask32)(U), (__v32hf)_mm512_sub_round_ph((A), (B), (R)), \
417 (__v32hf)_mm512_setzero_ph()))
419 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_mul_ph(__m512h __A,
420 __m512h __B) {
421 return (__m512h)((__v32hf)__A * (__v32hf)__B);
424 static __inline__ __m512h __DEFAULT_FN_ATTRS512
425 _mm512_mask_mul_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
426 return (__m512h)__builtin_ia32_selectph_512(
427 (__mmask32)__U, (__v32hf)_mm512_mul_ph(__A, __B), (__v32hf)__W);
430 static __inline__ __m512h __DEFAULT_FN_ATTRS512
431 _mm512_maskz_mul_ph(__mmask32 __U, __m512h __A, __m512h __B) {
432 return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U,
433 (__v32hf)_mm512_mul_ph(__A, __B),
434 (__v32hf)_mm512_setzero_ph());
437 #define _mm512_mul_round_ph(A, B, R) \
438 ((__m512h)__builtin_ia32_mulph512((__v32hf)(__m512h)(A), \
439 (__v32hf)(__m512h)(B), (int)(R)))
441 #define _mm512_mask_mul_round_ph(W, U, A, B, R) \
442 ((__m512h)__builtin_ia32_selectph_512( \
443 (__mmask32)(U), (__v32hf)_mm512_mul_round_ph((A), (B), (R)), \
444 (__v32hf)(__m512h)(W)))
446 #define _mm512_maskz_mul_round_ph(U, A, B, R) \
447 ((__m512h)__builtin_ia32_selectph_512( \
448 (__mmask32)(U), (__v32hf)_mm512_mul_round_ph((A), (B), (R)), \
449 (__v32hf)_mm512_setzero_ph()))
451 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_div_ph(__m512h __A,
452 __m512h __B) {
453 return (__m512h)((__v32hf)__A / (__v32hf)__B);
456 static __inline__ __m512h __DEFAULT_FN_ATTRS512
457 _mm512_mask_div_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
458 return (__m512h)__builtin_ia32_selectph_512(
459 (__mmask32)__U, (__v32hf)_mm512_div_ph(__A, __B), (__v32hf)__W);
462 static __inline__ __m512h __DEFAULT_FN_ATTRS512
463 _mm512_maskz_div_ph(__mmask32 __U, __m512h __A, __m512h __B) {
464 return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U,
465 (__v32hf)_mm512_div_ph(__A, __B),
466 (__v32hf)_mm512_setzero_ph());
469 #define _mm512_div_round_ph(A, B, R) \
470 ((__m512h)__builtin_ia32_divph512((__v32hf)(__m512h)(A), \
471 (__v32hf)(__m512h)(B), (int)(R)))
473 #define _mm512_mask_div_round_ph(W, U, A, B, R) \
474 ((__m512h)__builtin_ia32_selectph_512( \
475 (__mmask32)(U), (__v32hf)_mm512_div_round_ph((A), (B), (R)), \
476 (__v32hf)(__m512h)(W)))
478 #define _mm512_maskz_div_round_ph(U, A, B, R) \
479 ((__m512h)__builtin_ia32_selectph_512( \
480 (__mmask32)(U), (__v32hf)_mm512_div_round_ph((A), (B), (R)), \
481 (__v32hf)_mm512_setzero_ph()))
483 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_min_ph(__m512h __A,
484 __m512h __B) {
485 return (__m512h)__builtin_ia32_minph512((__v32hf)__A, (__v32hf)__B,
486 _MM_FROUND_CUR_DIRECTION);
489 static __inline__ __m512h __DEFAULT_FN_ATTRS512
490 _mm512_mask_min_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
491 return (__m512h)__builtin_ia32_selectph_512(
492 (__mmask32)__U, (__v32hf)_mm512_min_ph(__A, __B), (__v32hf)__W);
495 static __inline__ __m512h __DEFAULT_FN_ATTRS512
496 _mm512_maskz_min_ph(__mmask32 __U, __m512h __A, __m512h __B) {
497 return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U,
498 (__v32hf)_mm512_min_ph(__A, __B),
499 (__v32hf)_mm512_setzero_ph());
502 #define _mm512_min_round_ph(A, B, R) \
503 ((__m512h)__builtin_ia32_minph512((__v32hf)(__m512h)(A), \
504 (__v32hf)(__m512h)(B), (int)(R)))
506 #define _mm512_mask_min_round_ph(W, U, A, B, R) \
507 ((__m512h)__builtin_ia32_selectph_512( \
508 (__mmask32)(U), (__v32hf)_mm512_min_round_ph((A), (B), (R)), \
509 (__v32hf)(__m512h)(W)))
511 #define _mm512_maskz_min_round_ph(U, A, B, R) \
512 ((__m512h)__builtin_ia32_selectph_512( \
513 (__mmask32)(U), (__v32hf)_mm512_min_round_ph((A), (B), (R)), \
514 (__v32hf)_mm512_setzero_ph()))
516 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_max_ph(__m512h __A,
517 __m512h __B) {
518 return (__m512h)__builtin_ia32_maxph512((__v32hf)__A, (__v32hf)__B,
519 _MM_FROUND_CUR_DIRECTION);
522 static __inline__ __m512h __DEFAULT_FN_ATTRS512
523 _mm512_mask_max_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
524 return (__m512h)__builtin_ia32_selectph_512(
525 (__mmask32)__U, (__v32hf)_mm512_max_ph(__A, __B), (__v32hf)__W);
528 static __inline__ __m512h __DEFAULT_FN_ATTRS512
529 _mm512_maskz_max_ph(__mmask32 __U, __m512h __A, __m512h __B) {
530 return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U,
531 (__v32hf)_mm512_max_ph(__A, __B),
532 (__v32hf)_mm512_setzero_ph());
535 #define _mm512_max_round_ph(A, B, R) \
536 ((__m512h)__builtin_ia32_maxph512((__v32hf)(__m512h)(A), \
537 (__v32hf)(__m512h)(B), (int)(R)))
539 #define _mm512_mask_max_round_ph(W, U, A, B, R) \
540 ((__m512h)__builtin_ia32_selectph_512( \
541 (__mmask32)(U), (__v32hf)_mm512_max_round_ph((A), (B), (R)), \
542 (__v32hf)(__m512h)(W)))
544 #define _mm512_maskz_max_round_ph(U, A, B, R) \
545 ((__m512h)__builtin_ia32_selectph_512( \
546 (__mmask32)(U), (__v32hf)_mm512_max_round_ph((A), (B), (R)), \
547 (__v32hf)_mm512_setzero_ph()))
549 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_abs_ph(__m512h __A) {
550 return (__m512h)_mm512_and_epi32(_mm512_set1_epi32(0x7FFF7FFF), (__m512i)__A);
553 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_conj_pch(__m512h __A) {
554 return (__m512h)_mm512_xor_ps((__m512)__A, _mm512_set1_ps(-0.0f));
557 static __inline__ __m512h __DEFAULT_FN_ATTRS512
558 _mm512_mask_conj_pch(__m512h __W, __mmask16 __U, __m512h __A) {
559 return (__m512h)__builtin_ia32_selectps_512(
560 (__mmask16)__U, (__v16sf)_mm512_conj_pch(__A), (__v16sf)__W);
563 static __inline__ __m512h __DEFAULT_FN_ATTRS512
564 _mm512_maskz_conj_pch(__mmask16 __U, __m512h __A) {
565 return (__m512h)__builtin_ia32_selectps_512((__mmask16)__U,
566 (__v16sf)_mm512_conj_pch(__A),
567 (__v16sf)_mm512_setzero_ps());
570 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_add_sh(__m128h __A,
571 __m128h __B) {
572 __A[0] += __B[0];
573 return __A;
576 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_add_sh(__m128h __W,
577 __mmask8 __U,
578 __m128h __A,
579 __m128h __B) {
580 __A = _mm_add_sh(__A, __B);
581 return __builtin_ia32_selectsh_128(__U, __A, __W);
584 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_add_sh(__mmask8 __U,
585 __m128h __A,
586 __m128h __B) {
587 __A = _mm_add_sh(__A, __B);
588 return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph());
591 #define _mm_add_round_sh(A, B, R) \
592 ((__m128h)__builtin_ia32_addsh_round_mask( \
593 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
594 (__mmask8)-1, (int)(R)))
596 #define _mm_mask_add_round_sh(W, U, A, B, R) \
597 ((__m128h)__builtin_ia32_addsh_round_mask( \
598 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
599 (__mmask8)(U), (int)(R)))
601 #define _mm_maskz_add_round_sh(U, A, B, R) \
602 ((__m128h)__builtin_ia32_addsh_round_mask( \
603 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
604 (__mmask8)(U), (int)(R)))
606 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sub_sh(__m128h __A,
607 __m128h __B) {
608 __A[0] -= __B[0];
609 return __A;
612 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sub_sh(__m128h __W,
613 __mmask8 __U,
614 __m128h __A,
615 __m128h __B) {
616 __A = _mm_sub_sh(__A, __B);
617 return __builtin_ia32_selectsh_128(__U, __A, __W);
620 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sub_sh(__mmask8 __U,
621 __m128h __A,
622 __m128h __B) {
623 __A = _mm_sub_sh(__A, __B);
624 return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph());
627 #define _mm_sub_round_sh(A, B, R) \
628 ((__m128h)__builtin_ia32_subsh_round_mask( \
629 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
630 (__mmask8)-1, (int)(R)))
632 #define _mm_mask_sub_round_sh(W, U, A, B, R) \
633 ((__m128h)__builtin_ia32_subsh_round_mask( \
634 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
635 (__mmask8)(U), (int)(R)))
637 #define _mm_maskz_sub_round_sh(U, A, B, R) \
638 ((__m128h)__builtin_ia32_subsh_round_mask( \
639 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
640 (__mmask8)(U), (int)(R)))
642 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mul_sh(__m128h __A,
643 __m128h __B) {
644 __A[0] *= __B[0];
645 return __A;
648 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_mul_sh(__m128h __W,
649 __mmask8 __U,
650 __m128h __A,
651 __m128h __B) {
652 __A = _mm_mul_sh(__A, __B);
653 return __builtin_ia32_selectsh_128(__U, __A, __W);
656 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_mul_sh(__mmask8 __U,
657 __m128h __A,
658 __m128h __B) {
659 __A = _mm_mul_sh(__A, __B);
660 return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph());
663 #define _mm_mul_round_sh(A, B, R) \
664 ((__m128h)__builtin_ia32_mulsh_round_mask( \
665 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
666 (__mmask8)-1, (int)(R)))
668 #define _mm_mask_mul_round_sh(W, U, A, B, R) \
669 ((__m128h)__builtin_ia32_mulsh_round_mask( \
670 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
671 (__mmask8)(U), (int)(R)))
673 #define _mm_maskz_mul_round_sh(U, A, B, R) \
674 ((__m128h)__builtin_ia32_mulsh_round_mask( \
675 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
676 (__mmask8)(U), (int)(R)))
678 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_div_sh(__m128h __A,
679 __m128h __B) {
680 __A[0] /= __B[0];
681 return __A;
684 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_div_sh(__m128h __W,
685 __mmask8 __U,
686 __m128h __A,
687 __m128h __B) {
688 __A = _mm_div_sh(__A, __B);
689 return __builtin_ia32_selectsh_128(__U, __A, __W);
692 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_div_sh(__mmask8 __U,
693 __m128h __A,
694 __m128h __B) {
695 __A = _mm_div_sh(__A, __B);
696 return __builtin_ia32_selectsh_128(__U, __A, _mm_setzero_ph());
699 #define _mm_div_round_sh(A, B, R) \
700 ((__m128h)__builtin_ia32_divsh_round_mask( \
701 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
702 (__mmask8)-1, (int)(R)))
704 #define _mm_mask_div_round_sh(W, U, A, B, R) \
705 ((__m128h)__builtin_ia32_divsh_round_mask( \
706 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
707 (__mmask8)(U), (int)(R)))
709 #define _mm_maskz_div_round_sh(U, A, B, R) \
710 ((__m128h)__builtin_ia32_divsh_round_mask( \
711 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
712 (__mmask8)(U), (int)(R)))
714 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_min_sh(__m128h __A,
715 __m128h __B) {
716 return (__m128h)__builtin_ia32_minsh_round_mask(
717 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
718 _MM_FROUND_CUR_DIRECTION);
721 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_min_sh(__m128h __W,
722 __mmask8 __U,
723 __m128h __A,
724 __m128h __B) {
725 return (__m128h)__builtin_ia32_minsh_round_mask((__v8hf)__A, (__v8hf)__B,
726 (__v8hf)__W, (__mmask8)__U,
727 _MM_FROUND_CUR_DIRECTION);
730 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_min_sh(__mmask8 __U,
731 __m128h __A,
732 __m128h __B) {
733 return (__m128h)__builtin_ia32_minsh_round_mask(
734 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
735 _MM_FROUND_CUR_DIRECTION);
738 #define _mm_min_round_sh(A, B, R) \
739 ((__m128h)__builtin_ia32_minsh_round_mask( \
740 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
741 (__mmask8)-1, (int)(R)))
743 #define _mm_mask_min_round_sh(W, U, A, B, R) \
744 ((__m128h)__builtin_ia32_minsh_round_mask( \
745 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
746 (__mmask8)(U), (int)(R)))
748 #define _mm_maskz_min_round_sh(U, A, B, R) \
749 ((__m128h)__builtin_ia32_minsh_round_mask( \
750 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
751 (__mmask8)(U), (int)(R)))
753 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_max_sh(__m128h __A,
754 __m128h __B) {
755 return (__m128h)__builtin_ia32_maxsh_round_mask(
756 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
757 _MM_FROUND_CUR_DIRECTION);
760 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_max_sh(__m128h __W,
761 __mmask8 __U,
762 __m128h __A,
763 __m128h __B) {
764 return (__m128h)__builtin_ia32_maxsh_round_mask((__v8hf)__A, (__v8hf)__B,
765 (__v8hf)__W, (__mmask8)__U,
766 _MM_FROUND_CUR_DIRECTION);
769 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_max_sh(__mmask8 __U,
770 __m128h __A,
771 __m128h __B) {
772 return (__m128h)__builtin_ia32_maxsh_round_mask(
773 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
774 _MM_FROUND_CUR_DIRECTION);
777 #define _mm_max_round_sh(A, B, R) \
778 ((__m128h)__builtin_ia32_maxsh_round_mask( \
779 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
780 (__mmask8)-1, (int)(R)))
782 #define _mm_mask_max_round_sh(W, U, A, B, R) \
783 ((__m128h)__builtin_ia32_maxsh_round_mask( \
784 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
785 (__mmask8)(U), (int)(R)))
787 #define _mm_maskz_max_round_sh(U, A, B, R) \
788 ((__m128h)__builtin_ia32_maxsh_round_mask( \
789 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
790 (__mmask8)(U), (int)(R)))
792 #define _mm512_cmp_round_ph_mask(A, B, P, R) \
793 ((__mmask32)__builtin_ia32_cmpph512_mask((__v32hf)(__m512h)(A), \
794 (__v32hf)(__m512h)(B), (int)(P), \
795 (__mmask32)-1, (int)(R)))
797 #define _mm512_mask_cmp_round_ph_mask(U, A, B, P, R) \
798 ((__mmask32)__builtin_ia32_cmpph512_mask((__v32hf)(__m512h)(A), \
799 (__v32hf)(__m512h)(B), (int)(P), \
800 (__mmask32)(U), (int)(R)))
802 #define _mm512_cmp_ph_mask(A, B, P) \
803 _mm512_cmp_round_ph_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
805 #define _mm512_mask_cmp_ph_mask(U, A, B, P) \
806 _mm512_mask_cmp_round_ph_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
808 #define _mm_cmp_round_sh_mask(X, Y, P, R) \
809 ((__mmask8)__builtin_ia32_cmpsh_mask((__v8hf)(__m128h)(X), \
810 (__v8hf)(__m128h)(Y), (int)(P), \
811 (__mmask8)-1, (int)(R)))
813 #define _mm_mask_cmp_round_sh_mask(M, X, Y, P, R) \
814 ((__mmask8)__builtin_ia32_cmpsh_mask((__v8hf)(__m128h)(X), \
815 (__v8hf)(__m128h)(Y), (int)(P), \
816 (__mmask8)(M), (int)(R)))
818 #define _mm_cmp_sh_mask(X, Y, P) \
819 ((__mmask8)__builtin_ia32_cmpsh_mask( \
820 (__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), (int)(P), (__mmask8)-1, \
821 _MM_FROUND_CUR_DIRECTION))
823 #define _mm_mask_cmp_sh_mask(M, X, Y, P) \
824 ((__mmask8)__builtin_ia32_cmpsh_mask( \
825 (__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), (int)(P), (__mmask8)(M), \
826 _MM_FROUND_CUR_DIRECTION))
827 // loads with vmovsh:
828 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_load_sh(void const *__dp) {
829 struct __mm_load_sh_struct {
830 _Float16 __u;
831 } __attribute__((__packed__, __may_alias__));
832 _Float16 __u = ((const struct __mm_load_sh_struct *)__dp)->__u;
833 return (__m128h){__u, 0, 0, 0, 0, 0, 0, 0};
836 static __inline__ __m128h __DEFAULT_FN_ATTRS128
837 _mm_mask_load_sh(__m128h __W, __mmask8 __U, const void *__A) {
838 __m128h src = (__v8hf)__builtin_shufflevector(
839 (__v8hf)__W, (__v8hf)_mm_setzero_ph(), 0, 8, 8, 8, 8, 8, 8, 8);
841 return (__m128h)__builtin_ia32_loadsh128_mask((const __v8hf *)__A, src, __U & 1);
844 static __inline__ __m128h __DEFAULT_FN_ATTRS128
845 _mm_maskz_load_sh(__mmask8 __U, const void *__A) {
846 return (__m128h)__builtin_ia32_loadsh128_mask(
847 (const __v8hf *)__A, (__v8hf)_mm_setzero_ph(), __U & 1);
850 static __inline__ __m512h __DEFAULT_FN_ATTRS512
851 _mm512_load_ph(void const *__p) {
852 return *(const __m512h *)__p;
855 static __inline__ __m256h __DEFAULT_FN_ATTRS256
856 _mm256_load_ph(void const *__p) {
857 return *(const __m256h *)__p;
860 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_load_ph(void const *__p) {
861 return *(const __m128h *)__p;
864 static __inline__ __m512h __DEFAULT_FN_ATTRS512
865 _mm512_loadu_ph(void const *__p) {
866 struct __loadu_ph {
867 __m512h_u __v;
868 } __attribute__((__packed__, __may_alias__));
869 return ((const struct __loadu_ph *)__p)->__v;
872 static __inline__ __m256h __DEFAULT_FN_ATTRS256
873 _mm256_loadu_ph(void const *__p) {
874 struct __loadu_ph {
875 __m256h_u __v;
876 } __attribute__((__packed__, __may_alias__));
877 return ((const struct __loadu_ph *)__p)->__v;
880 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_loadu_ph(void const *__p) {
881 struct __loadu_ph {
882 __m128h_u __v;
883 } __attribute__((__packed__, __may_alias__));
884 return ((const struct __loadu_ph *)__p)->__v;
887 // stores with vmovsh:
888 static __inline__ void __DEFAULT_FN_ATTRS128 _mm_store_sh(void *__dp,
889 __m128h __a) {
890 struct __mm_store_sh_struct {
891 _Float16 __u;
892 } __attribute__((__packed__, __may_alias__));
893 ((struct __mm_store_sh_struct *)__dp)->__u = __a[0];
896 static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_sh(void *__W,
897 __mmask8 __U,
898 __m128h __A) {
899 __builtin_ia32_storesh128_mask((__v8hf *)__W, __A, __U & 1);
902 static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_store_ph(void *__P,
903 __m512h __A) {
904 *(__m512h *)__P = __A;
907 static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_store_ph(void *__P,
908 __m256h __A) {
909 *(__m256h *)__P = __A;
912 static __inline__ void __DEFAULT_FN_ATTRS128 _mm_store_ph(void *__P,
913 __m128h __A) {
914 *(__m128h *)__P = __A;
917 static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_storeu_ph(void *__P,
918 __m512h __A) {
919 struct __storeu_ph {
920 __m512h_u __v;
921 } __attribute__((__packed__, __may_alias__));
922 ((struct __storeu_ph *)__P)->__v = __A;
925 static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_storeu_ph(void *__P,
926 __m256h __A) {
927 struct __storeu_ph {
928 __m256h_u __v;
929 } __attribute__((__packed__, __may_alias__));
930 ((struct __storeu_ph *)__P)->__v = __A;
933 static __inline__ void __DEFAULT_FN_ATTRS128 _mm_storeu_ph(void *__P,
934 __m128h __A) {
935 struct __storeu_ph {
936 __m128h_u __v;
937 } __attribute__((__packed__, __may_alias__));
938 ((struct __storeu_ph *)__P)->__v = __A;
941 // moves with vmovsh:
942 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_move_sh(__m128h __a,
943 __m128h __b) {
944 __a[0] = __b[0];
945 return __a;
948 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_move_sh(__m128h __W,
949 __mmask8 __U,
950 __m128h __A,
951 __m128h __B) {
952 return __builtin_ia32_selectsh_128(__U, _mm_move_sh(__A, __B), __W);
955 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_move_sh(__mmask8 __U,
956 __m128h __A,
957 __m128h __B) {
958 return __builtin_ia32_selectsh_128(__U, _mm_move_sh(__A, __B),
959 _mm_setzero_ph());
962 // vmovw:
963 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsi16_si128(short __a) {
964 return (__m128i)(__v8hi){__a, 0, 0, 0, 0, 0, 0, 0};
967 static __inline__ short __DEFAULT_FN_ATTRS128 _mm_cvtsi128_si16(__m128i __a) {
968 __v8hi __b = (__v8hi)__a;
969 return __b[0];
972 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_rcp_ph(__m512h __A) {
973 return (__m512h)__builtin_ia32_rcpph512_mask(
974 (__v32hf)__A, (__v32hf)_mm512_undefined_ph(), (__mmask32)-1);
977 static __inline__ __m512h __DEFAULT_FN_ATTRS512
978 _mm512_mask_rcp_ph(__m512h __W, __mmask32 __U, __m512h __A) {
979 return (__m512h)__builtin_ia32_rcpph512_mask((__v32hf)__A, (__v32hf)__W,
980 (__mmask32)__U);
983 static __inline__ __m512h __DEFAULT_FN_ATTRS512
984 _mm512_maskz_rcp_ph(__mmask32 __U, __m512h __A) {
985 return (__m512h)__builtin_ia32_rcpph512_mask(
986 (__v32hf)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U);
989 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_rsqrt_ph(__m512h __A) {
990 return (__m512h)__builtin_ia32_rsqrtph512_mask(
991 (__v32hf)__A, (__v32hf)_mm512_undefined_ph(), (__mmask32)-1);
994 static __inline__ __m512h __DEFAULT_FN_ATTRS512
995 _mm512_mask_rsqrt_ph(__m512h __W, __mmask32 __U, __m512h __A) {
996 return (__m512h)__builtin_ia32_rsqrtph512_mask((__v32hf)__A, (__v32hf)__W,
997 (__mmask32)__U);
1000 static __inline__ __m512h __DEFAULT_FN_ATTRS512
1001 _mm512_maskz_rsqrt_ph(__mmask32 __U, __m512h __A) {
1002 return (__m512h)__builtin_ia32_rsqrtph512_mask(
1003 (__v32hf)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U);
1006 #define _mm512_getmant_ph(A, B, C) \
1007 ((__m512h)__builtin_ia32_getmantph512_mask( \
1008 (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), \
1009 (__v32hf)_mm512_undefined_ph(), (__mmask32)-1, \
1010 _MM_FROUND_CUR_DIRECTION))
1012 #define _mm512_mask_getmant_ph(W, U, A, B, C) \
1013 ((__m512h)__builtin_ia32_getmantph512_mask( \
1014 (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), (__v32hf)(__m512h)(W), \
1015 (__mmask32)(U), _MM_FROUND_CUR_DIRECTION))
1017 #define _mm512_maskz_getmant_ph(U, A, B, C) \
1018 ((__m512h)__builtin_ia32_getmantph512_mask( \
1019 (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), \
1020 (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), _MM_FROUND_CUR_DIRECTION))
1022 #define _mm512_getmant_round_ph(A, B, C, R) \
1023 ((__m512h)__builtin_ia32_getmantph512_mask( \
1024 (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), \
1025 (__v32hf)_mm512_undefined_ph(), (__mmask32)-1, (int)(R)))
1027 #define _mm512_mask_getmant_round_ph(W, U, A, B, C, R) \
1028 ((__m512h)__builtin_ia32_getmantph512_mask( \
1029 (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), (__v32hf)(__m512h)(W), \
1030 (__mmask32)(U), (int)(R)))
1032 #define _mm512_maskz_getmant_round_ph(U, A, B, C, R) \
1033 ((__m512h)__builtin_ia32_getmantph512_mask( \
1034 (__v32hf)(__m512h)(A), (int)(((C) << 2) | (B)), \
1035 (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R)))
1037 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_getexp_ph(__m512h __A) {
1038 return (__m512h)__builtin_ia32_getexpph512_mask(
1039 (__v32hf)__A, (__v32hf)_mm512_undefined_ph(), (__mmask32)-1,
1040 _MM_FROUND_CUR_DIRECTION);
1043 static __inline__ __m512h __DEFAULT_FN_ATTRS512
1044 _mm512_mask_getexp_ph(__m512h __W, __mmask32 __U, __m512h __A) {
1045 return (__m512h)__builtin_ia32_getexpph512_mask(
1046 (__v32hf)__A, (__v32hf)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1049 static __inline__ __m512h __DEFAULT_FN_ATTRS512
1050 _mm512_maskz_getexp_ph(__mmask32 __U, __m512h __A) {
1051 return (__m512h)__builtin_ia32_getexpph512_mask(
1052 (__v32hf)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U,
1053 _MM_FROUND_CUR_DIRECTION);
1056 #define _mm512_getexp_round_ph(A, R) \
1057 ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \
1058 (__v32hf)_mm512_undefined_ph(), \
1059 (__mmask32)-1, (int)(R)))
1061 #define _mm512_mask_getexp_round_ph(W, U, A, R) \
1062 ((__m512h)__builtin_ia32_getexpph512_mask( \
1063 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(W), (__mmask32)(U), (int)(R)))
1065 #define _mm512_maskz_getexp_round_ph(U, A, R) \
1066 ((__m512h)__builtin_ia32_getexpph512_mask((__v32hf)(__m512h)(A), \
1067 (__v32hf)_mm512_setzero_ph(), \
1068 (__mmask32)(U), (int)(R)))
1070 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_scalef_ph(__m512h __A,
1071 __m512h __B) {
1072 return (__m512h)__builtin_ia32_scalefph512_mask(
1073 (__v32hf)__A, (__v32hf)__B, (__v32hf)_mm512_undefined_ph(), (__mmask32)-1,
1074 _MM_FROUND_CUR_DIRECTION);
1077 static __inline__ __m512h __DEFAULT_FN_ATTRS512
1078 _mm512_mask_scalef_ph(__m512h __W, __mmask32 __U, __m512h __A, __m512h __B) {
1079 return (__m512h)__builtin_ia32_scalefph512_mask((__v32hf)__A, (__v32hf)__B,
1080 (__v32hf)__W, (__mmask32)__U,
1081 _MM_FROUND_CUR_DIRECTION);
1084 static __inline__ __m512h __DEFAULT_FN_ATTRS512
1085 _mm512_maskz_scalef_ph(__mmask32 __U, __m512h __A, __m512h __B) {
1086 return (__m512h)__builtin_ia32_scalefph512_mask(
1087 (__v32hf)__A, (__v32hf)__B, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U,
1088 _MM_FROUND_CUR_DIRECTION);
1091 #define _mm512_scalef_round_ph(A, B, R) \
1092 ((__m512h)__builtin_ia32_scalefph512_mask( \
1093 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), \
1094 (__v32hf)_mm512_undefined_ph(), (__mmask32)-1, (int)(R)))
1096 #define _mm512_mask_scalef_round_ph(W, U, A, B, R) \
1097 ((__m512h)__builtin_ia32_scalefph512_mask( \
1098 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(W), \
1099 (__mmask32)(U), (int)(R)))
1101 #define _mm512_maskz_scalef_round_ph(U, A, B, R) \
1102 ((__m512h)__builtin_ia32_scalefph512_mask( \
1103 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), \
1104 (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R)))
1106 #define _mm512_roundscale_ph(A, B) \
1107 ((__m512h)__builtin_ia32_rndscaleph_mask( \
1108 (__v32hf)(__m512h)(A), (int)(B), (__v32hf)(__m512h)(A), (__mmask32)-1, \
1109 _MM_FROUND_CUR_DIRECTION))
1111 #define _mm512_mask_roundscale_ph(A, B, C, imm) \
1112 ((__m512h)__builtin_ia32_rndscaleph_mask( \
1113 (__v32hf)(__m512h)(C), (int)(imm), (__v32hf)(__m512h)(A), \
1114 (__mmask32)(B), _MM_FROUND_CUR_DIRECTION))
1116 #define _mm512_maskz_roundscale_ph(A, B, imm) \
1117 ((__m512h)__builtin_ia32_rndscaleph_mask( \
1118 (__v32hf)(__m512h)(B), (int)(imm), (__v32hf)_mm512_setzero_ph(), \
1119 (__mmask32)(A), _MM_FROUND_CUR_DIRECTION))
1121 #define _mm512_mask_roundscale_round_ph(A, B, C, imm, R) \
1122 ((__m512h)__builtin_ia32_rndscaleph_mask((__v32hf)(__m512h)(C), (int)(imm), \
1123 (__v32hf)(__m512h)(A), \
1124 (__mmask32)(B), (int)(R)))
1126 #define _mm512_maskz_roundscale_round_ph(A, B, imm, R) \
1127 ((__m512h)__builtin_ia32_rndscaleph_mask((__v32hf)(__m512h)(B), (int)(imm), \
1128 (__v32hf)_mm512_setzero_ph(), \
1129 (__mmask32)(A), (int)(R)))
1131 #define _mm512_roundscale_round_ph(A, imm, R) \
1132 ((__m512h)__builtin_ia32_rndscaleph_mask((__v32hf)(__m512h)(A), (int)(imm), \
1133 (__v32hf)_mm512_undefined_ph(), \
1134 (__mmask32)-1, (int)(R)))
1136 #define _mm512_reduce_ph(A, imm) \
1137 ((__m512h)__builtin_ia32_reduceph512_mask( \
1138 (__v32hf)(__m512h)(A), (int)(imm), (__v32hf)_mm512_undefined_ph(), \
1139 (__mmask32)-1, _MM_FROUND_CUR_DIRECTION))
1141 #define _mm512_mask_reduce_ph(W, U, A, imm) \
1142 ((__m512h)__builtin_ia32_reduceph512_mask( \
1143 (__v32hf)(__m512h)(A), (int)(imm), (__v32hf)(__m512h)(W), \
1144 (__mmask32)(U), _MM_FROUND_CUR_DIRECTION))
1146 #define _mm512_maskz_reduce_ph(U, A, imm) \
1147 ((__m512h)__builtin_ia32_reduceph512_mask( \
1148 (__v32hf)(__m512h)(A), (int)(imm), (__v32hf)_mm512_setzero_ph(), \
1149 (__mmask32)(U), _MM_FROUND_CUR_DIRECTION))
1151 #define _mm512_mask_reduce_round_ph(W, U, A, imm, R) \
1152 ((__m512h)__builtin_ia32_reduceph512_mask((__v32hf)(__m512h)(A), (int)(imm), \
1153 (__v32hf)(__m512h)(W), \
1154 (__mmask32)(U), (int)(R)))
1156 #define _mm512_maskz_reduce_round_ph(U, A, imm, R) \
1157 ((__m512h)__builtin_ia32_reduceph512_mask((__v32hf)(__m512h)(A), (int)(imm), \
1158 (__v32hf)_mm512_setzero_ph(), \
1159 (__mmask32)(U), (int)(R)))
1161 #define _mm512_reduce_round_ph(A, imm, R) \
1162 ((__m512h)__builtin_ia32_reduceph512_mask((__v32hf)(__m512h)(A), (int)(imm), \
1163 (__v32hf)_mm512_undefined_ph(), \
1164 (__mmask32)-1, (int)(R)))
1166 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rcp_sh(__m128h __A,
1167 __m128h __B) {
1168 return (__m128h)__builtin_ia32_rcpsh_mask(
1169 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1);
1172 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rcp_sh(__m128h __W,
1173 __mmask8 __U,
1174 __m128h __A,
1175 __m128h __B) {
1176 return (__m128h)__builtin_ia32_rcpsh_mask((__v8hf)__A, (__v8hf)__B,
1177 (__v8hf)__W, (__mmask8)__U);
1180 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_rcp_sh(__mmask8 __U,
1181 __m128h __A,
1182 __m128h __B) {
1183 return (__m128h)__builtin_ia32_rcpsh_mask(
1184 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1187 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_rsqrt_sh(__m128h __A,
1188 __m128h __B) {
1189 return (__m128h)__builtin_ia32_rsqrtsh_mask(
1190 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1);
1193 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt_sh(__m128h __W,
1194 __mmask8 __U,
1195 __m128h __A,
1196 __m128h __B) {
1197 return (__m128h)__builtin_ia32_rsqrtsh_mask((__v8hf)__A, (__v8hf)__B,
1198 (__v8hf)__W, (__mmask8)__U);
1201 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1202 _mm_maskz_rsqrt_sh(__mmask8 __U, __m128h __A, __m128h __B) {
1203 return (__m128h)__builtin_ia32_rsqrtsh_mask(
1204 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
1207 #define _mm_getmant_round_sh(A, B, C, D, R) \
1208 ((__m128h)__builtin_ia32_getmantsh_round_mask( \
1209 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \
1210 (__v8hf)_mm_setzero_ph(), (__mmask8)-1, (int)(R)))
1212 #define _mm_getmant_sh(A, B, C, D) \
1213 ((__m128h)__builtin_ia32_getmantsh_round_mask( \
1214 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \
1215 (__v8hf)_mm_setzero_ph(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION))
1217 #define _mm_mask_getmant_sh(W, U, A, B, C, D) \
1218 ((__m128h)__builtin_ia32_getmantsh_round_mask( \
1219 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \
1220 (__v8hf)(__m128h)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
1222 #define _mm_mask_getmant_round_sh(W, U, A, B, C, D, R) \
1223 ((__m128h)__builtin_ia32_getmantsh_round_mask( \
1224 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \
1225 (__v8hf)(__m128h)(W), (__mmask8)(U), (int)(R)))
1227 #define _mm_maskz_getmant_sh(U, A, B, C, D) \
1228 ((__m128h)__builtin_ia32_getmantsh_round_mask( \
1229 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \
1230 (__v8hf)_mm_setzero_ph(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION))
1232 #define _mm_maskz_getmant_round_sh(U, A, B, C, D, R) \
1233 ((__m128h)__builtin_ia32_getmantsh_round_mask( \
1234 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(((D) << 2) | (C)), \
1235 (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
1237 #define _mm_getexp_round_sh(A, B, R) \
1238 ((__m128h)__builtin_ia32_getexpsh128_round_mask( \
1239 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1240 (__mmask8)-1, (int)(R)))
1242 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_getexp_sh(__m128h __A,
1243 __m128h __B) {
1244 return (__m128h)__builtin_ia32_getexpsh128_round_mask(
1245 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
1246 _MM_FROUND_CUR_DIRECTION);
1249 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1250 _mm_mask_getexp_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
1251 return (__m128h)__builtin_ia32_getexpsh128_round_mask(
1252 (__v8hf)__A, (__v8hf)__B, (__v8hf)__W, (__mmask8)__U,
1253 _MM_FROUND_CUR_DIRECTION);
1256 #define _mm_mask_getexp_round_sh(W, U, A, B, R) \
1257 ((__m128h)__builtin_ia32_getexpsh128_round_mask( \
1258 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
1259 (__mmask8)(U), (int)(R)))
1261 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1262 _mm_maskz_getexp_sh(__mmask8 __U, __m128h __A, __m128h __B) {
1263 return (__m128h)__builtin_ia32_getexpsh128_round_mask(
1264 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
1265 _MM_FROUND_CUR_DIRECTION);
1268 #define _mm_maskz_getexp_round_sh(U, A, B, R) \
1269 ((__m128h)__builtin_ia32_getexpsh128_round_mask( \
1270 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1271 (__mmask8)(U), (int)(R)))
1273 #define _mm_scalef_round_sh(A, B, R) \
1274 ((__m128h)__builtin_ia32_scalefsh_round_mask( \
1275 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1276 (__mmask8)-1, (int)(R)))
1278 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_scalef_sh(__m128h __A,
1279 __m128h __B) {
1280 return (__m128h)__builtin_ia32_scalefsh_round_mask(
1281 (__v8hf)__A, (__v8hf)(__B), (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
1282 _MM_FROUND_CUR_DIRECTION);
1285 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1286 _mm_mask_scalef_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
1287 return (__m128h)__builtin_ia32_scalefsh_round_mask((__v8hf)__A, (__v8hf)__B,
1288 (__v8hf)__W, (__mmask8)__U,
1289 _MM_FROUND_CUR_DIRECTION);
1292 #define _mm_mask_scalef_round_sh(W, U, A, B, R) \
1293 ((__m128h)__builtin_ia32_scalefsh_round_mask( \
1294 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
1295 (__mmask8)(U), (int)(R)))
1297 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1298 _mm_maskz_scalef_sh(__mmask8 __U, __m128h __A, __m128h __B) {
1299 return (__m128h)__builtin_ia32_scalefsh_round_mask(
1300 (__v8hf)__A, (__v8hf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
1301 _MM_FROUND_CUR_DIRECTION);
1304 #define _mm_maskz_scalef_round_sh(U, A, B, R) \
1305 ((__m128h)__builtin_ia32_scalefsh_round_mask( \
1306 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1307 (__mmask8)(U), (int)(R)))
1309 #define _mm_roundscale_round_sh(A, B, imm, R) \
1310 ((__m128h)__builtin_ia32_rndscalesh_round_mask( \
1311 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1312 (__mmask8)-1, (int)(imm), (int)(R)))
1314 #define _mm_roundscale_sh(A, B, imm) \
1315 ((__m128h)__builtin_ia32_rndscalesh_round_mask( \
1316 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1317 (__mmask8)-1, (int)(imm), _MM_FROUND_CUR_DIRECTION))
1319 #define _mm_mask_roundscale_sh(W, U, A, B, I) \
1320 ((__m128h)__builtin_ia32_rndscalesh_round_mask( \
1321 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
1322 (__mmask8)(U), (int)(I), _MM_FROUND_CUR_DIRECTION))
1324 #define _mm_mask_roundscale_round_sh(W, U, A, B, I, R) \
1325 ((__m128h)__builtin_ia32_rndscalesh_round_mask( \
1326 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
1327 (__mmask8)(U), (int)(I), (int)(R)))
1329 #define _mm_maskz_roundscale_sh(U, A, B, I) \
1330 ((__m128h)__builtin_ia32_rndscalesh_round_mask( \
1331 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1332 (__mmask8)(U), (int)(I), _MM_FROUND_CUR_DIRECTION))
1334 #define _mm_maskz_roundscale_round_sh(U, A, B, I, R) \
1335 ((__m128h)__builtin_ia32_rndscalesh_round_mask( \
1336 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1337 (__mmask8)(U), (int)(I), (int)(R)))
1339 #define _mm_reduce_sh(A, B, C) \
1340 ((__m128h)__builtin_ia32_reducesh_mask( \
1341 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1342 (__mmask8)-1, (int)(C), _MM_FROUND_CUR_DIRECTION))
1344 #define _mm_mask_reduce_sh(W, U, A, B, C) \
1345 ((__m128h)__builtin_ia32_reducesh_mask( \
1346 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
1347 (__mmask8)(U), (int)(C), _MM_FROUND_CUR_DIRECTION))
1349 #define _mm_maskz_reduce_sh(U, A, B, C) \
1350 ((__m128h)__builtin_ia32_reducesh_mask( \
1351 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1352 (__mmask8)(U), (int)(C), _MM_FROUND_CUR_DIRECTION))
1354 #define _mm_reduce_round_sh(A, B, C, R) \
1355 ((__m128h)__builtin_ia32_reducesh_mask( \
1356 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1357 (__mmask8)-1, (int)(C), (int)(R)))
1359 #define _mm_mask_reduce_round_sh(W, U, A, B, C, R) \
1360 ((__m128h)__builtin_ia32_reducesh_mask( \
1361 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
1362 (__mmask8)(U), (int)(C), (int)(R)))
1364 #define _mm_maskz_reduce_round_sh(U, A, B, C, R) \
1365 ((__m128h)__builtin_ia32_reducesh_mask( \
1366 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1367 (__mmask8)(U), (int)(C), (int)(R)))
1369 #define _mm512_sqrt_round_ph(A, R) \
1370 ((__m512h)__builtin_ia32_sqrtph512((__v32hf)(__m512h)(A), (int)(R)))
1372 #define _mm512_mask_sqrt_round_ph(W, U, A, R) \
1373 ((__m512h)__builtin_ia32_selectph_512( \
1374 (__mmask32)(U), (__v32hf)_mm512_sqrt_round_ph((A), (R)), \
1375 (__v32hf)(__m512h)(W)))
1377 #define _mm512_maskz_sqrt_round_ph(U, A, R) \
1378 ((__m512h)__builtin_ia32_selectph_512( \
1379 (__mmask32)(U), (__v32hf)_mm512_sqrt_round_ph((A), (R)), \
1380 (__v32hf)_mm512_setzero_ph()))
1382 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_sqrt_ph(__m512h __A) {
1383 return (__m512h)__builtin_ia32_sqrtph512((__v32hf)__A,
1384 _MM_FROUND_CUR_DIRECTION);
1387 static __inline__ __m512h __DEFAULT_FN_ATTRS512
1388 _mm512_mask_sqrt_ph(__m512h __W, __mmask32 __U, __m512h __A) {
1389 return (__m512h)__builtin_ia32_selectph_512(
1390 (__mmask32)(__U),
1391 (__v32hf)__builtin_ia32_sqrtph512((__A), (_MM_FROUND_CUR_DIRECTION)),
1392 (__v32hf)(__m512h)(__W));
1395 static __inline__ __m512h __DEFAULT_FN_ATTRS512
1396 _mm512_maskz_sqrt_ph(__mmask32 __U, __m512h __A) {
1397 return (__m512h)__builtin_ia32_selectph_512(
1398 (__mmask32)(__U),
1399 (__v32hf)__builtin_ia32_sqrtph512((__A), (_MM_FROUND_CUR_DIRECTION)),
1400 (__v32hf)_mm512_setzero_ph());
1403 #define _mm_sqrt_round_sh(A, B, R) \
1404 ((__m128h)__builtin_ia32_sqrtsh_round_mask( \
1405 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1406 (__mmask8)-1, (int)(R)))
1408 #define _mm_mask_sqrt_round_sh(W, U, A, B, R) \
1409 ((__m128h)__builtin_ia32_sqrtsh_round_mask( \
1410 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(W), \
1411 (__mmask8)(U), (int)(R)))
1413 #define _mm_maskz_sqrt_round_sh(U, A, B, R) \
1414 ((__m128h)__builtin_ia32_sqrtsh_round_mask( \
1415 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)_mm_setzero_ph(), \
1416 (__mmask8)(U), (int)(R)))
1418 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_sqrt_sh(__m128h __A,
1419 __m128h __B) {
1420 return (__m128h)__builtin_ia32_sqrtsh_round_mask(
1421 (__v8hf)(__m128h)(__A), (__v8hf)(__m128h)(__B), (__v8hf)_mm_setzero_ph(),
1422 (__mmask8)-1, _MM_FROUND_CUR_DIRECTION);
1425 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_sh(__m128h __W,
1426 __mmask32 __U,
1427 __m128h __A,
1428 __m128h __B) {
1429 return (__m128h)__builtin_ia32_sqrtsh_round_mask(
1430 (__v8hf)(__m128h)(__A), (__v8hf)(__m128h)(__B), (__v8hf)(__m128h)(__W),
1431 (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
1434 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_sh(__mmask32 __U,
1435 __m128h __A,
1436 __m128h __B) {
1437 return (__m128h)__builtin_ia32_sqrtsh_round_mask(
1438 (__v8hf)(__m128h)(__A), (__v8hf)(__m128h)(__B), (__v8hf)_mm_setzero_ph(),
1439 (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION);
1442 #define _mm512_mask_fpclass_ph_mask(U, A, imm) \
1443 ((__mmask32)__builtin_ia32_fpclassph512_mask((__v32hf)(__m512h)(A), \
1444 (int)(imm), (__mmask32)(U)))
1446 #define _mm512_fpclass_ph_mask(A, imm) \
1447 ((__mmask32)__builtin_ia32_fpclassph512_mask((__v32hf)(__m512h)(A), \
1448 (int)(imm), (__mmask32)-1))
1450 #define _mm_fpclass_sh_mask(A, imm) \
1451 ((__mmask8)__builtin_ia32_fpclasssh_mask((__v8hf)(__m128h)(A), (int)(imm), \
1452 (__mmask8)-1))
1454 #define _mm_mask_fpclass_sh_mask(U, A, imm) \
1455 ((__mmask8)__builtin_ia32_fpclasssh_mask((__v8hf)(__m128h)(A), (int)(imm), \
1456 (__mmask8)(U)))
1458 #define _mm512_cvt_roundpd_ph(A, R) \
1459 ((__m128h)__builtin_ia32_vcvtpd2ph512_mask( \
1460 (__v8df)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R)))
1462 #define _mm512_mask_cvt_roundpd_ph(W, U, A, R) \
1463 ((__m128h)__builtin_ia32_vcvtpd2ph512_mask((__v8df)(A), (__v8hf)(W), \
1464 (__mmask8)(U), (int)(R)))
1466 #define _mm512_maskz_cvt_roundpd_ph(U, A, R) \
1467 ((__m128h)__builtin_ia32_vcvtpd2ph512_mask( \
1468 (__v8df)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
1470 static __inline__ __m128h __DEFAULT_FN_ATTRS512 _mm512_cvtpd_ph(__m512d __A) {
1471 return (__m128h)__builtin_ia32_vcvtpd2ph512_mask(
1472 (__v8df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
1473 _MM_FROUND_CUR_DIRECTION);
1476 static __inline__ __m128h __DEFAULT_FN_ATTRS512
1477 _mm512_mask_cvtpd_ph(__m128h __W, __mmask8 __U, __m512d __A) {
1478 return (__m128h)__builtin_ia32_vcvtpd2ph512_mask(
1479 (__v8df)__A, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
1482 static __inline__ __m128h __DEFAULT_FN_ATTRS512
1483 _mm512_maskz_cvtpd_ph(__mmask8 __U, __m512d __A) {
1484 return (__m128h)__builtin_ia32_vcvtpd2ph512_mask(
1485 (__v8df)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
1486 _MM_FROUND_CUR_DIRECTION);
1489 #define _mm512_cvt_roundph_pd(A, R) \
1490 ((__m512d)__builtin_ia32_vcvtph2pd512_mask( \
1491 (__v8hf)(A), (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), (int)(R)))
1493 #define _mm512_mask_cvt_roundph_pd(W, U, A, R) \
1494 ((__m512d)__builtin_ia32_vcvtph2pd512_mask((__v8hf)(A), (__v8df)(W), \
1495 (__mmask8)(U), (int)(R)))
1497 #define _mm512_maskz_cvt_roundph_pd(U, A, R) \
1498 ((__m512d)__builtin_ia32_vcvtph2pd512_mask( \
1499 (__v8hf)(A), (__v8df)_mm512_setzero_pd(), (__mmask8)(U), (int)(R)))
1501 static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtph_pd(__m128h __A) {
1502 return (__m512d)__builtin_ia32_vcvtph2pd512_mask(
1503 (__v8hf)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)-1,
1504 _MM_FROUND_CUR_DIRECTION);
1507 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1508 _mm512_mask_cvtph_pd(__m512d __W, __mmask8 __U, __m128h __A) {
1509 return (__m512d)__builtin_ia32_vcvtph2pd512_mask(
1510 (__v8hf)__A, (__v8df)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
1513 static __inline__ __m512d __DEFAULT_FN_ATTRS512
1514 _mm512_maskz_cvtph_pd(__mmask8 __U, __m128h __A) {
1515 return (__m512d)__builtin_ia32_vcvtph2pd512_mask(
1516 (__v8hf)__A, (__v8df)_mm512_setzero_pd(), (__mmask8)__U,
1517 _MM_FROUND_CUR_DIRECTION);
1520 #define _mm_cvt_roundsh_ss(A, B, R) \
1521 ((__m128)__builtin_ia32_vcvtsh2ss_round_mask((__v4sf)(A), (__v8hf)(B), \
1522 (__v4sf)_mm_undefined_ps(), \
1523 (__mmask8)(-1), (int)(R)))
1525 #define _mm_mask_cvt_roundsh_ss(W, U, A, B, R) \
1526 ((__m128)__builtin_ia32_vcvtsh2ss_round_mask( \
1527 (__v4sf)(A), (__v8hf)(B), (__v4sf)(W), (__mmask8)(U), (int)(R)))
1529 #define _mm_maskz_cvt_roundsh_ss(U, A, B, R) \
1530 ((__m128)__builtin_ia32_vcvtsh2ss_round_mask((__v4sf)(A), (__v8hf)(B), \
1531 (__v4sf)_mm_setzero_ps(), \
1532 (__mmask8)(U), (int)(R)))
1534 static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtsh_ss(__m128 __A,
1535 __m128h __B) {
1536 return (__m128)__builtin_ia32_vcvtsh2ss_round_mask(
1537 (__v4sf)__A, (__v8hf)__B, (__v4sf)_mm_undefined_ps(), (__mmask8)-1,
1538 _MM_FROUND_CUR_DIRECTION);
1541 static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtsh_ss(__m128 __W,
1542 __mmask8 __U,
1543 __m128 __A,
1544 __m128h __B) {
1545 return (__m128)__builtin_ia32_vcvtsh2ss_round_mask((__v4sf)__A, (__v8hf)__B,
1546 (__v4sf)__W, (__mmask8)__U,
1547 _MM_FROUND_CUR_DIRECTION);
1550 static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsh_ss(__mmask8 __U,
1551 __m128 __A,
1552 __m128h __B) {
1553 return (__m128)__builtin_ia32_vcvtsh2ss_round_mask(
1554 (__v4sf)__A, (__v8hf)__B, (__v4sf)_mm_setzero_ps(), (__mmask8)__U,
1555 _MM_FROUND_CUR_DIRECTION);
1558 #define _mm_cvt_roundss_sh(A, B, R) \
1559 ((__m128h)__builtin_ia32_vcvtss2sh_round_mask((__v8hf)(A), (__v4sf)(B), \
1560 (__v8hf)_mm_undefined_ph(), \
1561 (__mmask8)(-1), (int)(R)))
1563 #define _mm_mask_cvt_roundss_sh(W, U, A, B, R) \
1564 ((__m128h)__builtin_ia32_vcvtss2sh_round_mask( \
1565 (__v8hf)(A), (__v4sf)(B), (__v8hf)(W), (__mmask8)(U), (int)(R)))
1567 #define _mm_maskz_cvt_roundss_sh(U, A, B, R) \
1568 ((__m128h)__builtin_ia32_vcvtss2sh_round_mask((__v8hf)(A), (__v4sf)(B), \
1569 (__v8hf)_mm_setzero_ph(), \
1570 (__mmask8)(U), (int)(R)))
1572 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtss_sh(__m128h __A,
1573 __m128 __B) {
1574 return (__m128h)__builtin_ia32_vcvtss2sh_round_mask(
1575 (__v8hf)__A, (__v4sf)__B, (__v8hf)_mm_undefined_ph(), (__mmask8)-1,
1576 _MM_FROUND_CUR_DIRECTION);
1579 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sh(__m128h __W,
1580 __mmask8 __U,
1581 __m128h __A,
1582 __m128 __B) {
1583 return (__m128h)__builtin_ia32_vcvtss2sh_round_mask(
1584 (__v8hf)__A, (__v4sf)__B, (__v8hf)__W, (__mmask8)__U,
1585 _MM_FROUND_CUR_DIRECTION);
1588 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sh(__mmask8 __U,
1589 __m128h __A,
1590 __m128 __B) {
1591 return (__m128h)__builtin_ia32_vcvtss2sh_round_mask(
1592 (__v8hf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
1593 _MM_FROUND_CUR_DIRECTION);
1596 #define _mm_cvt_roundsd_sh(A, B, R) \
1597 ((__m128h)__builtin_ia32_vcvtsd2sh_round_mask((__v8hf)(A), (__v2df)(B), \
1598 (__v8hf)_mm_undefined_ph(), \
1599 (__mmask8)(-1), (int)(R)))
1601 #define _mm_mask_cvt_roundsd_sh(W, U, A, B, R) \
1602 ((__m128h)__builtin_ia32_vcvtsd2sh_round_mask( \
1603 (__v8hf)(A), (__v2df)(B), (__v8hf)(W), (__mmask8)(U), (int)(R)))
1605 #define _mm_maskz_cvt_roundsd_sh(U, A, B, R) \
1606 ((__m128h)__builtin_ia32_vcvtsd2sh_round_mask((__v8hf)(A), (__v2df)(B), \
1607 (__v8hf)_mm_setzero_ph(), \
1608 (__mmask8)(U), (int)(R)))
1610 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtsd_sh(__m128h __A,
1611 __m128d __B) {
1612 return (__m128h)__builtin_ia32_vcvtsd2sh_round_mask(
1613 (__v8hf)__A, (__v2df)__B, (__v8hf)_mm_undefined_ph(), (__mmask8)-1,
1614 _MM_FROUND_CUR_DIRECTION);
1617 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvtsd_sh(__m128h __W,
1618 __mmask8 __U,
1619 __m128h __A,
1620 __m128d __B) {
1621 return (__m128h)__builtin_ia32_vcvtsd2sh_round_mask(
1622 (__v8hf)__A, (__v2df)__B, (__v8hf)__W, (__mmask8)__U,
1623 _MM_FROUND_CUR_DIRECTION);
1626 static __inline__ __m128h __DEFAULT_FN_ATTRS128
1627 _mm_maskz_cvtsd_sh(__mmask8 __U, __m128h __A, __m128d __B) {
1628 return (__m128h)__builtin_ia32_vcvtsd2sh_round_mask(
1629 (__v8hf)__A, (__v2df)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
1630 _MM_FROUND_CUR_DIRECTION);
1633 #define _mm_cvt_roundsh_sd(A, B, R) \
1634 ((__m128d)__builtin_ia32_vcvtsh2sd_round_mask((__v2df)(A), (__v8hf)(B), \
1635 (__v2df)_mm_undefined_pd(), \
1636 (__mmask8)(-1), (int)(R)))
1638 #define _mm_mask_cvt_roundsh_sd(W, U, A, B, R) \
1639 ((__m128d)__builtin_ia32_vcvtsh2sd_round_mask( \
1640 (__v2df)(A), (__v8hf)(B), (__v2df)(W), (__mmask8)(U), (int)(R)))
1642 #define _mm_maskz_cvt_roundsh_sd(U, A, B, R) \
1643 ((__m128d)__builtin_ia32_vcvtsh2sd_round_mask((__v2df)(A), (__v8hf)(B), \
1644 (__v2df)_mm_setzero_pd(), \
1645 (__mmask8)(U), (int)(R)))
1647 static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtsh_sd(__m128d __A,
1648 __m128h __B) {
1649 return (__m128d)__builtin_ia32_vcvtsh2sd_round_mask(
1650 (__v2df)__A, (__v8hf)__B, (__v2df)_mm_undefined_pd(), (__mmask8)-1,
1651 _MM_FROUND_CUR_DIRECTION);
1654 static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtsh_sd(__m128d __W,
1655 __mmask8 __U,
1656 __m128d __A,
1657 __m128h __B) {
1658 return (__m128d)__builtin_ia32_vcvtsh2sd_round_mask(
1659 (__v2df)__A, (__v8hf)__B, (__v2df)__W, (__mmask8)__U,
1660 _MM_FROUND_CUR_DIRECTION);
1663 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1664 _mm_maskz_cvtsh_sd(__mmask8 __U, __m128d __A, __m128h __B) {
1665 return (__m128d)__builtin_ia32_vcvtsh2sd_round_mask(
1666 (__v2df)__A, (__v8hf)__B, (__v2df)_mm_setzero_pd(), (__mmask8)__U,
1667 _MM_FROUND_CUR_DIRECTION);
1670 #define _mm512_cvt_roundph_epi16(A, R) \
1671 ((__m512i)__builtin_ia32_vcvtph2w512_mask((__v32hf)(A), \
1672 (__v32hi)_mm512_undefined_epi32(), \
1673 (__mmask32)(-1), (int)(R)))
1675 #define _mm512_mask_cvt_roundph_epi16(W, U, A, R) \
1676 ((__m512i)__builtin_ia32_vcvtph2w512_mask((__v32hf)(A), (__v32hi)(W), \
1677 (__mmask32)(U), (int)(R)))
1679 #define _mm512_maskz_cvt_roundph_epi16(U, A, R) \
1680 ((__m512i)__builtin_ia32_vcvtph2w512_mask((__v32hf)(A), \
1681 (__v32hi)_mm512_setzero_epi32(), \
1682 (__mmask32)(U), (int)(R)))
1684 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1685 _mm512_cvtph_epi16(__m512h __A) {
1686 return (__m512i)__builtin_ia32_vcvtph2w512_mask(
1687 (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)-1,
1688 _MM_FROUND_CUR_DIRECTION);
1691 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1692 _mm512_mask_cvtph_epi16(__m512i __W, __mmask32 __U, __m512h __A) {
1693 return (__m512i)__builtin_ia32_vcvtph2w512_mask(
1694 (__v32hf)__A, (__v32hi)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1697 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1698 _mm512_maskz_cvtph_epi16(__mmask32 __U, __m512h __A) {
1699 return (__m512i)__builtin_ia32_vcvtph2w512_mask(
1700 (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)__U,
1701 _MM_FROUND_CUR_DIRECTION);
1704 #define _mm512_cvtt_roundph_epi16(A, R) \
1705 ((__m512i)__builtin_ia32_vcvttph2w512_mask( \
1706 (__v32hf)(A), (__v32hi)_mm512_undefined_epi32(), (__mmask32)(-1), \
1707 (int)(R)))
1709 #define _mm512_mask_cvtt_roundph_epi16(W, U, A, R) \
1710 ((__m512i)__builtin_ia32_vcvttph2w512_mask((__v32hf)(A), (__v32hi)(W), \
1711 (__mmask32)(U), (int)(R)))
1713 #define _mm512_maskz_cvtt_roundph_epi16(U, A, R) \
1714 ((__m512i)__builtin_ia32_vcvttph2w512_mask((__v32hf)(A), \
1715 (__v32hi)_mm512_setzero_epi32(), \
1716 (__mmask32)(U), (int)(R)))
1718 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1719 _mm512_cvttph_epi16(__m512h __A) {
1720 return (__m512i)__builtin_ia32_vcvttph2w512_mask(
1721 (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)-1,
1722 _MM_FROUND_CUR_DIRECTION);
1725 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1726 _mm512_mask_cvttph_epi16(__m512i __W, __mmask32 __U, __m512h __A) {
1727 return (__m512i)__builtin_ia32_vcvttph2w512_mask(
1728 (__v32hf)__A, (__v32hi)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1731 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1732 _mm512_maskz_cvttph_epi16(__mmask32 __U, __m512h __A) {
1733 return (__m512i)__builtin_ia32_vcvttph2w512_mask(
1734 (__v32hf)__A, (__v32hi)_mm512_setzero_epi32(), (__mmask32)__U,
1735 _MM_FROUND_CUR_DIRECTION);
1738 #define _mm512_cvt_roundepi16_ph(A, R) \
1739 ((__m512h)__builtin_ia32_vcvtw2ph512_mask((__v32hi)(A), \
1740 (__v32hf)_mm512_undefined_ph(), \
1741 (__mmask32)(-1), (int)(R)))
1743 #define _mm512_mask_cvt_roundepi16_ph(W, U, A, R) \
1744 ((__m512h)__builtin_ia32_vcvtw2ph512_mask((__v32hi)(A), (__v32hf)(W), \
1745 (__mmask32)(U), (int)(R)))
1747 #define _mm512_maskz_cvt_roundepi16_ph(U, A, R) \
1748 ((__m512h)__builtin_ia32_vcvtw2ph512_mask( \
1749 (__v32hi)(A), (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R)))
1751 static __inline__ __m512h __DEFAULT_FN_ATTRS512
1752 _mm512_cvtepi16_ph(__m512i __A) {
1753 return (__m512h)__builtin_ia32_vcvtw2ph512_mask(
1754 (__v32hi)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)-1,
1755 _MM_FROUND_CUR_DIRECTION);
1758 static __inline__ __m512h __DEFAULT_FN_ATTRS512
1759 _mm512_mask_cvtepi16_ph(__m512h __W, __mmask32 __U, __m512i __A) {
1760 return (__m512h)__builtin_ia32_vcvtw2ph512_mask(
1761 (__v32hi)__A, (__v32hf)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1764 static __inline__ __m512h __DEFAULT_FN_ATTRS512
1765 _mm512_maskz_cvtepi16_ph(__mmask32 __U, __m512i __A) {
1766 return (__m512h)__builtin_ia32_vcvtw2ph512_mask(
1767 (__v32hi)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U,
1768 _MM_FROUND_CUR_DIRECTION);
1771 #define _mm512_cvt_roundph_epu16(A, R) \
1772 ((__m512i)__builtin_ia32_vcvtph2uw512_mask( \
1773 (__v32hf)(A), (__v32hu)_mm512_undefined_epi32(), (__mmask32)(-1), \
1774 (int)(R)))
1776 #define _mm512_mask_cvt_roundph_epu16(W, U, A, R) \
1777 ((__m512i)__builtin_ia32_vcvtph2uw512_mask((__v32hf)(A), (__v32hu)(W), \
1778 (__mmask32)(U), (int)(R)))
1780 #define _mm512_maskz_cvt_roundph_epu16(U, A, R) \
1781 ((__m512i)__builtin_ia32_vcvtph2uw512_mask((__v32hf)(A), \
1782 (__v32hu)_mm512_setzero_epi32(), \
1783 (__mmask32)(U), (int)(R)))
1785 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1786 _mm512_cvtph_epu16(__m512h __A) {
1787 return (__m512i)__builtin_ia32_vcvtph2uw512_mask(
1788 (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)-1,
1789 _MM_FROUND_CUR_DIRECTION);
1792 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1793 _mm512_mask_cvtph_epu16(__m512i __W, __mmask32 __U, __m512h __A) {
1794 return (__m512i)__builtin_ia32_vcvtph2uw512_mask(
1795 (__v32hf)__A, (__v32hu)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1798 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1799 _mm512_maskz_cvtph_epu16(__mmask32 __U, __m512h __A) {
1800 return (__m512i)__builtin_ia32_vcvtph2uw512_mask(
1801 (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)__U,
1802 _MM_FROUND_CUR_DIRECTION);
1805 #define _mm512_cvtt_roundph_epu16(A, R) \
1806 ((__m512i)__builtin_ia32_vcvttph2uw512_mask( \
1807 (__v32hf)(A), (__v32hu)_mm512_undefined_epi32(), (__mmask32)(-1), \
1808 (int)(R)))
1810 #define _mm512_mask_cvtt_roundph_epu16(W, U, A, R) \
1811 ((__m512i)__builtin_ia32_vcvttph2uw512_mask((__v32hf)(A), (__v32hu)(W), \
1812 (__mmask32)(U), (int)(R)))
1814 #define _mm512_maskz_cvtt_roundph_epu16(U, A, R) \
1815 ((__m512i)__builtin_ia32_vcvttph2uw512_mask((__v32hf)(A), \
1816 (__v32hu)_mm512_setzero_epi32(), \
1817 (__mmask32)(U), (int)(R)))
1819 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1820 _mm512_cvttph_epu16(__m512h __A) {
1821 return (__m512i)__builtin_ia32_vcvttph2uw512_mask(
1822 (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)-1,
1823 _MM_FROUND_CUR_DIRECTION);
1826 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1827 _mm512_mask_cvttph_epu16(__m512i __W, __mmask32 __U, __m512h __A) {
1828 return (__m512i)__builtin_ia32_vcvttph2uw512_mask(
1829 (__v32hf)__A, (__v32hu)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1832 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1833 _mm512_maskz_cvttph_epu16(__mmask32 __U, __m512h __A) {
1834 return (__m512i)__builtin_ia32_vcvttph2uw512_mask(
1835 (__v32hf)__A, (__v32hu)_mm512_setzero_epi32(), (__mmask32)__U,
1836 _MM_FROUND_CUR_DIRECTION);
1839 #define _mm512_cvt_roundepu16_ph(A, R) \
1840 ((__m512h)__builtin_ia32_vcvtuw2ph512_mask((__v32hu)(A), \
1841 (__v32hf)_mm512_undefined_ph(), \
1842 (__mmask32)(-1), (int)(R)))
1844 #define _mm512_mask_cvt_roundepu16_ph(W, U, A, R) \
1845 ((__m512h)__builtin_ia32_vcvtuw2ph512_mask((__v32hu)(A), (__v32hf)(W), \
1846 (__mmask32)(U), (int)(R)))
1848 #define _mm512_maskz_cvt_roundepu16_ph(U, A, R) \
1849 ((__m512h)__builtin_ia32_vcvtuw2ph512_mask( \
1850 (__v32hu)(A), (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R)))
1852 static __inline__ __m512h __DEFAULT_FN_ATTRS512
1853 _mm512_cvtepu16_ph(__m512i __A) {
1854 return (__m512h)__builtin_ia32_vcvtuw2ph512_mask(
1855 (__v32hu)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)-1,
1856 _MM_FROUND_CUR_DIRECTION);
1859 static __inline__ __m512h __DEFAULT_FN_ATTRS512
1860 _mm512_mask_cvtepu16_ph(__m512h __W, __mmask32 __U, __m512i __A) {
1861 return (__m512h)__builtin_ia32_vcvtuw2ph512_mask(
1862 (__v32hu)__A, (__v32hf)__W, (__mmask32)__U, _MM_FROUND_CUR_DIRECTION);
1865 static __inline__ __m512h __DEFAULT_FN_ATTRS512
1866 _mm512_maskz_cvtepu16_ph(__mmask32 __U, __m512i __A) {
1867 return (__m512h)__builtin_ia32_vcvtuw2ph512_mask(
1868 (__v32hu)__A, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U,
1869 _MM_FROUND_CUR_DIRECTION);
1872 #define _mm512_cvt_roundph_epi32(A, R) \
1873 ((__m512i)__builtin_ia32_vcvtph2dq512_mask( \
1874 (__v16hf)(A), (__v16si)_mm512_undefined_epi32(), (__mmask16)(-1), \
1875 (int)(R)))
1877 #define _mm512_mask_cvt_roundph_epi32(W, U, A, R) \
1878 ((__m512i)__builtin_ia32_vcvtph2dq512_mask((__v16hf)(A), (__v16si)(W), \
1879 (__mmask16)(U), (int)(R)))
1881 #define _mm512_maskz_cvt_roundph_epi32(U, A, R) \
1882 ((__m512i)__builtin_ia32_vcvtph2dq512_mask((__v16hf)(A), \
1883 (__v16si)_mm512_setzero_epi32(), \
1884 (__mmask16)(U), (int)(R)))
1886 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1887 _mm512_cvtph_epi32(__m256h __A) {
1888 return (__m512i)__builtin_ia32_vcvtph2dq512_mask(
1889 (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)-1,
1890 _MM_FROUND_CUR_DIRECTION);
1893 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1894 _mm512_mask_cvtph_epi32(__m512i __W, __mmask16 __U, __m256h __A) {
1895 return (__m512i)__builtin_ia32_vcvtph2dq512_mask(
1896 (__v16hf)__A, (__v16si)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
1899 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1900 _mm512_maskz_cvtph_epi32(__mmask16 __U, __m256h __A) {
1901 return (__m512i)__builtin_ia32_vcvtph2dq512_mask(
1902 (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)__U,
1903 _MM_FROUND_CUR_DIRECTION);
1906 #define _mm512_cvt_roundph_epu32(A, R) \
1907 ((__m512i)__builtin_ia32_vcvtph2udq512_mask( \
1908 (__v16hf)(A), (__v16su)_mm512_undefined_epi32(), (__mmask16)(-1), \
1909 (int)(R)))
1911 #define _mm512_mask_cvt_roundph_epu32(W, U, A, R) \
1912 ((__m512i)__builtin_ia32_vcvtph2udq512_mask((__v16hf)(A), (__v16su)(W), \
1913 (__mmask16)(U), (int)(R)))
1915 #define _mm512_maskz_cvt_roundph_epu32(U, A, R) \
1916 ((__m512i)__builtin_ia32_vcvtph2udq512_mask((__v16hf)(A), \
1917 (__v16su)_mm512_setzero_epi32(), \
1918 (__mmask16)(U), (int)(R)))
1920 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1921 _mm512_cvtph_epu32(__m256h __A) {
1922 return (__m512i)__builtin_ia32_vcvtph2udq512_mask(
1923 (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)-1,
1924 _MM_FROUND_CUR_DIRECTION);
1927 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1928 _mm512_mask_cvtph_epu32(__m512i __W, __mmask16 __U, __m256h __A) {
1929 return (__m512i)__builtin_ia32_vcvtph2udq512_mask(
1930 (__v16hf)__A, (__v16su)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
1933 static __inline__ __m512i __DEFAULT_FN_ATTRS512
1934 _mm512_maskz_cvtph_epu32(__mmask16 __U, __m256h __A) {
1935 return (__m512i)__builtin_ia32_vcvtph2udq512_mask(
1936 (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)__U,
1937 _MM_FROUND_CUR_DIRECTION);
1940 #define _mm512_cvt_roundepi32_ph(A, R) \
1941 ((__m256h)__builtin_ia32_vcvtdq2ph512_mask((__v16si)(A), \
1942 (__v16hf)_mm256_undefined_ph(), \
1943 (__mmask16)(-1), (int)(R)))
1945 #define _mm512_mask_cvt_roundepi32_ph(W, U, A, R) \
1946 ((__m256h)__builtin_ia32_vcvtdq2ph512_mask((__v16si)(A), (__v16hf)(W), \
1947 (__mmask16)(U), (int)(R)))
1949 #define _mm512_maskz_cvt_roundepi32_ph(U, A, R) \
1950 ((__m256h)__builtin_ia32_vcvtdq2ph512_mask( \
1951 (__v16si)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R)))
1953 static __inline__ __m256h __DEFAULT_FN_ATTRS512
1954 _mm512_cvtepi32_ph(__m512i __A) {
1955 return (__m256h)__builtin_ia32_vcvtdq2ph512_mask(
1956 (__v16si)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1,
1957 _MM_FROUND_CUR_DIRECTION);
1960 static __inline__ __m256h __DEFAULT_FN_ATTRS512
1961 _mm512_mask_cvtepi32_ph(__m256h __W, __mmask16 __U, __m512i __A) {
1962 return (__m256h)__builtin_ia32_vcvtdq2ph512_mask(
1963 (__v16si)__A, (__v16hf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
1966 static __inline__ __m256h __DEFAULT_FN_ATTRS512
1967 _mm512_maskz_cvtepi32_ph(__mmask16 __U, __m512i __A) {
1968 return (__m256h)__builtin_ia32_vcvtdq2ph512_mask(
1969 (__v16si)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U,
1970 _MM_FROUND_CUR_DIRECTION);
1973 #define _mm512_cvt_roundepu32_ph(A, R) \
1974 ((__m256h)__builtin_ia32_vcvtudq2ph512_mask((__v16su)(A), \
1975 (__v16hf)_mm256_undefined_ph(), \
1976 (__mmask16)(-1), (int)(R)))
1978 #define _mm512_mask_cvt_roundepu32_ph(W, U, A, R) \
1979 ((__m256h)__builtin_ia32_vcvtudq2ph512_mask((__v16su)(A), (__v16hf)(W), \
1980 (__mmask16)(U), (int)(R)))
1982 #define _mm512_maskz_cvt_roundepu32_ph(U, A, R) \
1983 ((__m256h)__builtin_ia32_vcvtudq2ph512_mask( \
1984 (__v16su)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R)))
1986 static __inline__ __m256h __DEFAULT_FN_ATTRS512
1987 _mm512_cvtepu32_ph(__m512i __A) {
1988 return (__m256h)__builtin_ia32_vcvtudq2ph512_mask(
1989 (__v16su)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1,
1990 _MM_FROUND_CUR_DIRECTION);
1993 static __inline__ __m256h __DEFAULT_FN_ATTRS512
1994 _mm512_mask_cvtepu32_ph(__m256h __W, __mmask16 __U, __m512i __A) {
1995 return (__m256h)__builtin_ia32_vcvtudq2ph512_mask(
1996 (__v16su)__A, (__v16hf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
1999 static __inline__ __m256h __DEFAULT_FN_ATTRS512
2000 _mm512_maskz_cvtepu32_ph(__mmask16 __U, __m512i __A) {
2001 return (__m256h)__builtin_ia32_vcvtudq2ph512_mask(
2002 (__v16su)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U,
2003 _MM_FROUND_CUR_DIRECTION);
2006 #define _mm512_cvtt_roundph_epi32(A, R) \
2007 ((__m512i)__builtin_ia32_vcvttph2dq512_mask( \
2008 (__v16hf)(A), (__v16si)_mm512_undefined_epi32(), (__mmask16)(-1), \
2009 (int)(R)))
2011 #define _mm512_mask_cvtt_roundph_epi32(W, U, A, R) \
2012 ((__m512i)__builtin_ia32_vcvttph2dq512_mask((__v16hf)(A), (__v16si)(W), \
2013 (__mmask16)(U), (int)(R)))
2015 #define _mm512_maskz_cvtt_roundph_epi32(U, A, R) \
2016 ((__m512i)__builtin_ia32_vcvttph2dq512_mask((__v16hf)(A), \
2017 (__v16si)_mm512_setzero_epi32(), \
2018 (__mmask16)(U), (int)(R)))
2020 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2021 _mm512_cvttph_epi32(__m256h __A) {
2022 return (__m512i)__builtin_ia32_vcvttph2dq512_mask(
2023 (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)-1,
2024 _MM_FROUND_CUR_DIRECTION);
2027 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2028 _mm512_mask_cvttph_epi32(__m512i __W, __mmask16 __U, __m256h __A) {
2029 return (__m512i)__builtin_ia32_vcvttph2dq512_mask(
2030 (__v16hf)__A, (__v16si)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
2033 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2034 _mm512_maskz_cvttph_epi32(__mmask16 __U, __m256h __A) {
2035 return (__m512i)__builtin_ia32_vcvttph2dq512_mask(
2036 (__v16hf)__A, (__v16si)_mm512_setzero_epi32(), (__mmask16)__U,
2037 _MM_FROUND_CUR_DIRECTION);
2040 #define _mm512_cvtt_roundph_epu32(A, R) \
2041 ((__m512i)__builtin_ia32_vcvttph2udq512_mask( \
2042 (__v16hf)(A), (__v16su)_mm512_undefined_epi32(), (__mmask16)(-1), \
2043 (int)(R)))
2045 #define _mm512_mask_cvtt_roundph_epu32(W, U, A, R) \
2046 ((__m512i)__builtin_ia32_vcvttph2udq512_mask((__v16hf)(A), (__v16su)(W), \
2047 (__mmask16)(U), (int)(R)))
2049 #define _mm512_maskz_cvtt_roundph_epu32(U, A, R) \
2050 ((__m512i)__builtin_ia32_vcvttph2udq512_mask( \
2051 (__v16hf)(A), (__v16su)_mm512_setzero_epi32(), (__mmask16)(U), \
2052 (int)(R)))
2054 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2055 _mm512_cvttph_epu32(__m256h __A) {
2056 return (__m512i)__builtin_ia32_vcvttph2udq512_mask(
2057 (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)-1,
2058 _MM_FROUND_CUR_DIRECTION);
2061 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2062 _mm512_mask_cvttph_epu32(__m512i __W, __mmask16 __U, __m256h __A) {
2063 return (__m512i)__builtin_ia32_vcvttph2udq512_mask(
2064 (__v16hf)__A, (__v16su)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
2067 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2068 _mm512_maskz_cvttph_epu32(__mmask16 __U, __m256h __A) {
2069 return (__m512i)__builtin_ia32_vcvttph2udq512_mask(
2070 (__v16hf)__A, (__v16su)_mm512_setzero_epi32(), (__mmask16)__U,
2071 _MM_FROUND_CUR_DIRECTION);
2074 #define _mm512_cvt_roundepi64_ph(A, R) \
2075 ((__m128h)__builtin_ia32_vcvtqq2ph512_mask( \
2076 (__v8di)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R)))
2078 #define _mm512_mask_cvt_roundepi64_ph(W, U, A, R) \
2079 ((__m128h)__builtin_ia32_vcvtqq2ph512_mask((__v8di)(A), (__v8hf)(W), \
2080 (__mmask8)(U), (int)(R)))
2082 #define _mm512_maskz_cvt_roundepi64_ph(U, A, R) \
2083 ((__m128h)__builtin_ia32_vcvtqq2ph512_mask( \
2084 (__v8di)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
2086 static __inline__ __m128h __DEFAULT_FN_ATTRS512
2087 _mm512_cvtepi64_ph(__m512i __A) {
2088 return (__m128h)__builtin_ia32_vcvtqq2ph512_mask(
2089 (__v8di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
2090 _MM_FROUND_CUR_DIRECTION);
2093 static __inline__ __m128h __DEFAULT_FN_ATTRS512
2094 _mm512_mask_cvtepi64_ph(__m128h __W, __mmask8 __U, __m512i __A) {
2095 return (__m128h)__builtin_ia32_vcvtqq2ph512_mask(
2096 (__v8di)__A, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
2099 static __inline__ __m128h __DEFAULT_FN_ATTRS512
2100 _mm512_maskz_cvtepi64_ph(__mmask8 __U, __m512i __A) {
2101 return (__m128h)__builtin_ia32_vcvtqq2ph512_mask(
2102 (__v8di)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
2103 _MM_FROUND_CUR_DIRECTION);
2106 #define _mm512_cvt_roundph_epi64(A, R) \
2107 ((__m512i)__builtin_ia32_vcvtph2qq512_mask((__v8hf)(A), \
2108 (__v8di)_mm512_undefined_epi32(), \
2109 (__mmask8)(-1), (int)(R)))
2111 #define _mm512_mask_cvt_roundph_epi64(W, U, A, R) \
2112 ((__m512i)__builtin_ia32_vcvtph2qq512_mask((__v8hf)(A), (__v8di)(W), \
2113 (__mmask8)(U), (int)(R)))
2115 #define _mm512_maskz_cvt_roundph_epi64(U, A, R) \
2116 ((__m512i)__builtin_ia32_vcvtph2qq512_mask( \
2117 (__v8hf)(A), (__v8di)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R)))
2119 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2120 _mm512_cvtph_epi64(__m128h __A) {
2121 return (__m512i)__builtin_ia32_vcvtph2qq512_mask(
2122 (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)-1,
2123 _MM_FROUND_CUR_DIRECTION);
2126 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2127 _mm512_mask_cvtph_epi64(__m512i __W, __mmask8 __U, __m128h __A) {
2128 return (__m512i)__builtin_ia32_vcvtph2qq512_mask(
2129 (__v8hf)__A, (__v8di)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
2132 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2133 _mm512_maskz_cvtph_epi64(__mmask8 __U, __m128h __A) {
2134 return (__m512i)__builtin_ia32_vcvtph2qq512_mask(
2135 (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)__U,
2136 _MM_FROUND_CUR_DIRECTION);
2139 #define _mm512_cvt_roundepu64_ph(A, R) \
2140 ((__m128h)__builtin_ia32_vcvtuqq2ph512_mask( \
2141 (__v8du)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R)))
2143 #define _mm512_mask_cvt_roundepu64_ph(W, U, A, R) \
2144 ((__m128h)__builtin_ia32_vcvtuqq2ph512_mask((__v8du)(A), (__v8hf)(W), \
2145 (__mmask8)(U), (int)(R)))
2147 #define _mm512_maskz_cvt_roundepu64_ph(U, A, R) \
2148 ((__m128h)__builtin_ia32_vcvtuqq2ph512_mask( \
2149 (__v8du)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
2151 static __inline__ __m128h __DEFAULT_FN_ATTRS512
2152 _mm512_cvtepu64_ph(__m512i __A) {
2153 return (__m128h)__builtin_ia32_vcvtuqq2ph512_mask(
2154 (__v8du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)-1,
2155 _MM_FROUND_CUR_DIRECTION);
2158 static __inline__ __m128h __DEFAULT_FN_ATTRS512
2159 _mm512_mask_cvtepu64_ph(__m128h __W, __mmask8 __U, __m512i __A) {
2160 return (__m128h)__builtin_ia32_vcvtuqq2ph512_mask(
2161 (__v8du)__A, (__v8hf)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
2164 static __inline__ __m128h __DEFAULT_FN_ATTRS512
2165 _mm512_maskz_cvtepu64_ph(__mmask8 __U, __m512i __A) {
2166 return (__m128h)__builtin_ia32_vcvtuqq2ph512_mask(
2167 (__v8du)__A, (__v8hf)_mm_setzero_ph(), (__mmask8)__U,
2168 _MM_FROUND_CUR_DIRECTION);
2171 #define _mm512_cvt_roundph_epu64(A, R) \
2172 ((__m512i)__builtin_ia32_vcvtph2uqq512_mask( \
2173 (__v8hf)(A), (__v8du)_mm512_undefined_epi32(), (__mmask8)(-1), \
2174 (int)(R)))
2176 #define _mm512_mask_cvt_roundph_epu64(W, U, A, R) \
2177 ((__m512i)__builtin_ia32_vcvtph2uqq512_mask((__v8hf)(A), (__v8du)(W), \
2178 (__mmask8)(U), (int)(R)))
2180 #define _mm512_maskz_cvt_roundph_epu64(U, A, R) \
2181 ((__m512i)__builtin_ia32_vcvtph2uqq512_mask( \
2182 (__v8hf)(A), (__v8du)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R)))
2184 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2185 _mm512_cvtph_epu64(__m128h __A) {
2186 return (__m512i)__builtin_ia32_vcvtph2uqq512_mask(
2187 (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)-1,
2188 _MM_FROUND_CUR_DIRECTION);
2191 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2192 _mm512_mask_cvtph_epu64(__m512i __W, __mmask8 __U, __m128h __A) {
2193 return (__m512i)__builtin_ia32_vcvtph2uqq512_mask(
2194 (__v8hf)__A, (__v8du)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
2197 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2198 _mm512_maskz_cvtph_epu64(__mmask8 __U, __m128h __A) {
2199 return (__m512i)__builtin_ia32_vcvtph2uqq512_mask(
2200 (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)__U,
2201 _MM_FROUND_CUR_DIRECTION);
2204 #define _mm512_cvtt_roundph_epi64(A, R) \
2205 ((__m512i)__builtin_ia32_vcvttph2qq512_mask( \
2206 (__v8hf)(A), (__v8di)_mm512_undefined_epi32(), (__mmask8)(-1), \
2207 (int)(R)))
2209 #define _mm512_mask_cvtt_roundph_epi64(W, U, A, R) \
2210 ((__m512i)__builtin_ia32_vcvttph2qq512_mask((__v8hf)(A), (__v8di)(W), \
2211 (__mmask8)(U), (int)(R)))
2213 #define _mm512_maskz_cvtt_roundph_epi64(U, A, R) \
2214 ((__m512i)__builtin_ia32_vcvttph2qq512_mask( \
2215 (__v8hf)(A), (__v8di)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R)))
2217 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2218 _mm512_cvttph_epi64(__m128h __A) {
2219 return (__m512i)__builtin_ia32_vcvttph2qq512_mask(
2220 (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)-1,
2221 _MM_FROUND_CUR_DIRECTION);
2224 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2225 _mm512_mask_cvttph_epi64(__m512i __W, __mmask8 __U, __m128h __A) {
2226 return (__m512i)__builtin_ia32_vcvttph2qq512_mask(
2227 (__v8hf)__A, (__v8di)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
2230 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2231 _mm512_maskz_cvttph_epi64(__mmask8 __U, __m128h __A) {
2232 return (__m512i)__builtin_ia32_vcvttph2qq512_mask(
2233 (__v8hf)__A, (__v8di)_mm512_setzero_epi32(), (__mmask8)__U,
2234 _MM_FROUND_CUR_DIRECTION);
2237 #define _mm512_cvtt_roundph_epu64(A, R) \
2238 ((__m512i)__builtin_ia32_vcvttph2uqq512_mask( \
2239 (__v8hf)(A), (__v8du)_mm512_undefined_epi32(), (__mmask8)(-1), \
2240 (int)(R)))
2242 #define _mm512_mask_cvtt_roundph_epu64(W, U, A, R) \
2243 ((__m512i)__builtin_ia32_vcvttph2uqq512_mask((__v8hf)(A), (__v8du)(W), \
2244 (__mmask8)(U), (int)(R)))
2246 #define _mm512_maskz_cvtt_roundph_epu64(U, A, R) \
2247 ((__m512i)__builtin_ia32_vcvttph2uqq512_mask( \
2248 (__v8hf)(A), (__v8du)_mm512_setzero_epi32(), (__mmask8)(U), (int)(R)))
2250 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2251 _mm512_cvttph_epu64(__m128h __A) {
2252 return (__m512i)__builtin_ia32_vcvttph2uqq512_mask(
2253 (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)-1,
2254 _MM_FROUND_CUR_DIRECTION);
2257 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2258 _mm512_mask_cvttph_epu64(__m512i __W, __mmask8 __U, __m128h __A) {
2259 return (__m512i)__builtin_ia32_vcvttph2uqq512_mask(
2260 (__v8hf)__A, (__v8du)__W, (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
2263 static __inline__ __m512i __DEFAULT_FN_ATTRS512
2264 _mm512_maskz_cvttph_epu64(__mmask8 __U, __m128h __A) {
2265 return (__m512i)__builtin_ia32_vcvttph2uqq512_mask(
2266 (__v8hf)__A, (__v8du)_mm512_setzero_epi32(), (__mmask8)__U,
2267 _MM_FROUND_CUR_DIRECTION);
2270 #define _mm_cvt_roundsh_i32(A, R) \
2271 ((int)__builtin_ia32_vcvtsh2si32((__v8hf)(A), (int)(R)))
2273 static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvtsh_i32(__m128h __A) {
2274 return (int)__builtin_ia32_vcvtsh2si32((__v8hf)__A, _MM_FROUND_CUR_DIRECTION);
2277 #define _mm_cvt_roundsh_u32(A, R) \
2278 ((unsigned int)__builtin_ia32_vcvtsh2usi32((__v8hf)(A), (int)(R)))
2280 static __inline__ unsigned int __DEFAULT_FN_ATTRS128
2281 _mm_cvtsh_u32(__m128h __A) {
2282 return (unsigned int)__builtin_ia32_vcvtsh2usi32((__v8hf)__A,
2283 _MM_FROUND_CUR_DIRECTION);
2286 #ifdef __x86_64__
2287 #define _mm_cvt_roundsh_i64(A, R) \
2288 ((long long)__builtin_ia32_vcvtsh2si64((__v8hf)(A), (int)(R)))
2290 static __inline__ long long __DEFAULT_FN_ATTRS128 _mm_cvtsh_i64(__m128h __A) {
2291 return (long long)__builtin_ia32_vcvtsh2si64((__v8hf)__A,
2292 _MM_FROUND_CUR_DIRECTION);
2295 #define _mm_cvt_roundsh_u64(A, R) \
2296 ((unsigned long long)__builtin_ia32_vcvtsh2usi64((__v8hf)(A), (int)(R)))
2298 static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
2299 _mm_cvtsh_u64(__m128h __A) {
2300 return (unsigned long long)__builtin_ia32_vcvtsh2usi64(
2301 (__v8hf)__A, _MM_FROUND_CUR_DIRECTION);
2303 #endif // __x86_64__
2305 #define _mm_cvt_roundu32_sh(A, B, R) \
2306 ((__m128h)__builtin_ia32_vcvtusi2sh((__v8hf)(A), (unsigned int)(B), (int)(R)))
2308 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2309 _mm_cvtu32_sh(__m128h __A, unsigned int __B) {
2310 __A[0] = __B;
2311 return __A;
2314 #ifdef __x86_64__
2315 #define _mm_cvt_roundu64_sh(A, B, R) \
2316 ((__m128h)__builtin_ia32_vcvtusi642sh((__v8hf)(A), (unsigned long long)(B), \
2317 (int)(R)))
2319 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2320 _mm_cvtu64_sh(__m128h __A, unsigned long long __B) {
2321 __A[0] = __B;
2322 return __A;
2324 #endif
2326 #define _mm_cvt_roundi32_sh(A, B, R) \
2327 ((__m128h)__builtin_ia32_vcvtsi2sh((__v8hf)(A), (int)(B), (int)(R)))
2329 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvti32_sh(__m128h __A,
2330 int __B) {
2331 __A[0] = __B;
2332 return __A;
2335 #ifdef __x86_64__
2336 #define _mm_cvt_roundi64_sh(A, B, R) \
2337 ((__m128h)__builtin_ia32_vcvtsi642sh((__v8hf)(A), (long long)(B), (int)(R)))
2339 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvti64_sh(__m128h __A,
2340 long long __B) {
2341 __A[0] = __B;
2342 return __A;
2344 #endif
2346 #define _mm_cvtt_roundsh_i32(A, R) \
2347 ((int)__builtin_ia32_vcvttsh2si32((__v8hf)(A), (int)(R)))
2349 static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsh_i32(__m128h __A) {
2350 return (int)__builtin_ia32_vcvttsh2si32((__v8hf)__A,
2351 _MM_FROUND_CUR_DIRECTION);
2354 #ifdef __x86_64__
2355 #define _mm_cvtt_roundsh_i64(A, R) \
2356 ((long long)__builtin_ia32_vcvttsh2si64((__v8hf)(A), (int)(R)))
2358 static __inline__ long long __DEFAULT_FN_ATTRS128 _mm_cvttsh_i64(__m128h __A) {
2359 return (long long)__builtin_ia32_vcvttsh2si64((__v8hf)__A,
2360 _MM_FROUND_CUR_DIRECTION);
2362 #endif
2364 #define _mm_cvtt_roundsh_u32(A, R) \
2365 ((unsigned int)__builtin_ia32_vcvttsh2usi32((__v8hf)(A), (int)(R)))
2367 static __inline__ unsigned int __DEFAULT_FN_ATTRS128
2368 _mm_cvttsh_u32(__m128h __A) {
2369 return (unsigned int)__builtin_ia32_vcvttsh2usi32((__v8hf)__A,
2370 _MM_FROUND_CUR_DIRECTION);
2373 #ifdef __x86_64__
2374 #define _mm_cvtt_roundsh_u64(A, R) \
2375 ((unsigned long long)__builtin_ia32_vcvttsh2usi64((__v8hf)(A), (int)(R)))
2377 static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
2378 _mm_cvttsh_u64(__m128h __A) {
2379 return (unsigned long long)__builtin_ia32_vcvttsh2usi64(
2380 (__v8hf)__A, _MM_FROUND_CUR_DIRECTION);
2382 #endif
2384 #define _mm512_cvtx_roundph_ps(A, R) \
2385 ((__m512)__builtin_ia32_vcvtph2psx512_mask((__v16hf)(A), \
2386 (__v16sf)_mm512_undefined_ps(), \
2387 (__mmask16)(-1), (int)(R)))
2389 #define _mm512_mask_cvtx_roundph_ps(W, U, A, R) \
2390 ((__m512)__builtin_ia32_vcvtph2psx512_mask((__v16hf)(A), (__v16sf)(W), \
2391 (__mmask16)(U), (int)(R)))
2393 #define _mm512_maskz_cvtx_roundph_ps(U, A, R) \
2394 ((__m512)__builtin_ia32_vcvtph2psx512_mask( \
2395 (__v16hf)(A), (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), (int)(R)))
2397 static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtxph_ps(__m256h __A) {
2398 return (__m512)__builtin_ia32_vcvtph2psx512_mask(
2399 (__v16hf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)-1,
2400 _MM_FROUND_CUR_DIRECTION);
2403 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2404 _mm512_mask_cvtxph_ps(__m512 __W, __mmask16 __U, __m256h __A) {
2405 return (__m512)__builtin_ia32_vcvtph2psx512_mask(
2406 (__v16hf)__A, (__v16sf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
2409 static __inline__ __m512 __DEFAULT_FN_ATTRS512
2410 _mm512_maskz_cvtxph_ps(__mmask16 __U, __m256h __A) {
2411 return (__m512)__builtin_ia32_vcvtph2psx512_mask(
2412 (__v16hf)__A, (__v16sf)_mm512_setzero_ps(), (__mmask16)__U,
2413 _MM_FROUND_CUR_DIRECTION);
2416 #define _mm512_cvtx_roundps_ph(A, R) \
2417 ((__m256h)__builtin_ia32_vcvtps2phx512_mask((__v16sf)(A), \
2418 (__v16hf)_mm256_undefined_ph(), \
2419 (__mmask16)(-1), (int)(R)))
2421 #define _mm512_mask_cvtx_roundps_ph(W, U, A, R) \
2422 ((__m256h)__builtin_ia32_vcvtps2phx512_mask((__v16sf)(A), (__v16hf)(W), \
2423 (__mmask16)(U), (int)(R)))
2425 #define _mm512_maskz_cvtx_roundps_ph(U, A, R) \
2426 ((__m256h)__builtin_ia32_vcvtps2phx512_mask( \
2427 (__v16sf)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R)))
2429 static __inline__ __m256h __DEFAULT_FN_ATTRS512 _mm512_cvtxps_ph(__m512 __A) {
2430 return (__m256h)__builtin_ia32_vcvtps2phx512_mask(
2431 (__v16sf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)-1,
2432 _MM_FROUND_CUR_DIRECTION);
2435 static __inline__ __m256h __DEFAULT_FN_ATTRS512
2436 _mm512_mask_cvtxps_ph(__m256h __W, __mmask16 __U, __m512 __A) {
2437 return (__m256h)__builtin_ia32_vcvtps2phx512_mask(
2438 (__v16sf)__A, (__v16hf)__W, (__mmask16)__U, _MM_FROUND_CUR_DIRECTION);
2441 static __inline__ __m256h __DEFAULT_FN_ATTRS512
2442 _mm512_maskz_cvtxps_ph(__mmask16 __U, __m512 __A) {
2443 return (__m256h)__builtin_ia32_vcvtps2phx512_mask(
2444 (__v16sf)__A, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U,
2445 _MM_FROUND_CUR_DIRECTION);
2448 #define _mm512_fmadd_round_ph(A, B, C, R) \
2449 ((__m512h)__builtin_ia32_vfmaddph512_mask( \
2450 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2451 (__mmask32)-1, (int)(R)))
2453 #define _mm512_mask_fmadd_round_ph(A, U, B, C, R) \
2454 ((__m512h)__builtin_ia32_vfmaddph512_mask( \
2455 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2456 (__mmask32)(U), (int)(R)))
2458 #define _mm512_mask3_fmadd_round_ph(A, B, C, U, R) \
2459 ((__m512h)__builtin_ia32_vfmaddph512_mask3( \
2460 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2461 (__mmask32)(U), (int)(R)))
2463 #define _mm512_maskz_fmadd_round_ph(U, A, B, C, R) \
2464 ((__m512h)__builtin_ia32_vfmaddph512_maskz( \
2465 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2466 (__mmask32)(U), (int)(R)))
2468 #define _mm512_fmsub_round_ph(A, B, C, R) \
2469 ((__m512h)__builtin_ia32_vfmaddph512_mask( \
2470 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2471 (__mmask32)-1, (int)(R)))
2473 #define _mm512_mask_fmsub_round_ph(A, U, B, C, R) \
2474 ((__m512h)__builtin_ia32_vfmaddph512_mask( \
2475 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2476 (__mmask32)(U), (int)(R)))
2478 #define _mm512_maskz_fmsub_round_ph(U, A, B, C, R) \
2479 ((__m512h)__builtin_ia32_vfmaddph512_maskz( \
2480 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2481 (__mmask32)(U), (int)(R)))
2483 #define _mm512_fnmadd_round_ph(A, B, C, R) \
2484 ((__m512h)__builtin_ia32_vfmaddph512_mask( \
2485 (__v32hf)(__m512h)(A), -(__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2486 (__mmask32)-1, (int)(R)))
2488 #define _mm512_mask3_fnmadd_round_ph(A, B, C, U, R) \
2489 ((__m512h)__builtin_ia32_vfmaddph512_mask3( \
2490 -(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2491 (__mmask32)(U), (int)(R)))
2493 #define _mm512_maskz_fnmadd_round_ph(U, A, B, C, R) \
2494 ((__m512h)__builtin_ia32_vfmaddph512_maskz( \
2495 -(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2496 (__mmask32)(U), (int)(R)))
2498 #define _mm512_fnmsub_round_ph(A, B, C, R) \
2499 ((__m512h)__builtin_ia32_vfmaddph512_mask( \
2500 (__v32hf)(__m512h)(A), -(__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2501 (__mmask32)-1, (int)(R)))
2503 #define _mm512_maskz_fnmsub_round_ph(U, A, B, C, R) \
2504 ((__m512h)__builtin_ia32_vfmaddph512_maskz( \
2505 -(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2506 (__mmask32)(U), (int)(R)))
2508 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmadd_ph(__m512h __A,
2509 __m512h __B,
2510 __m512h __C) {
2511 return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B,
2512 (__v32hf)__C, (__mmask32)-1,
2513 _MM_FROUND_CUR_DIRECTION);
2516 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2517 _mm512_mask_fmadd_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
2518 return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B,
2519 (__v32hf)__C, (__mmask32)__U,
2520 _MM_FROUND_CUR_DIRECTION);
2523 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2524 _mm512_mask3_fmadd_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
2525 return (__m512h)__builtin_ia32_vfmaddph512_mask3((__v32hf)__A, (__v32hf)__B,
2526 (__v32hf)__C, (__mmask32)__U,
2527 _MM_FROUND_CUR_DIRECTION);
2530 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2531 _mm512_maskz_fmadd_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
2532 return (__m512h)__builtin_ia32_vfmaddph512_maskz((__v32hf)__A, (__v32hf)__B,
2533 (__v32hf)__C, (__mmask32)__U,
2534 _MM_FROUND_CUR_DIRECTION);
2537 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmsub_ph(__m512h __A,
2538 __m512h __B,
2539 __m512h __C) {
2540 return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B,
2541 -(__v32hf)__C, (__mmask32)-1,
2542 _MM_FROUND_CUR_DIRECTION);
2545 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2546 _mm512_mask_fmsub_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
2547 return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, (__v32hf)__B,
2548 -(__v32hf)__C, (__mmask32)__U,
2549 _MM_FROUND_CUR_DIRECTION);
2552 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2553 _mm512_maskz_fmsub_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
2554 return (__m512h)__builtin_ia32_vfmaddph512_maskz(
2555 (__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U,
2556 _MM_FROUND_CUR_DIRECTION);
2559 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fnmadd_ph(__m512h __A,
2560 __m512h __B,
2561 __m512h __C) {
2562 return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B,
2563 (__v32hf)__C, (__mmask32)-1,
2564 _MM_FROUND_CUR_DIRECTION);
2567 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2568 _mm512_mask3_fnmadd_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
2569 return (__m512h)__builtin_ia32_vfmaddph512_mask3(-(__v32hf)__A, (__v32hf)__B,
2570 (__v32hf)__C, (__mmask32)__U,
2571 _MM_FROUND_CUR_DIRECTION);
2574 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2575 _mm512_maskz_fnmadd_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
2576 return (__m512h)__builtin_ia32_vfmaddph512_maskz(-(__v32hf)__A, (__v32hf)__B,
2577 (__v32hf)__C, (__mmask32)__U,
2578 _MM_FROUND_CUR_DIRECTION);
2581 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fnmsub_ph(__m512h __A,
2582 __m512h __B,
2583 __m512h __C) {
2584 return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B,
2585 -(__v32hf)__C, (__mmask32)-1,
2586 _MM_FROUND_CUR_DIRECTION);
2589 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2590 _mm512_maskz_fnmsub_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
2591 return (__m512h)__builtin_ia32_vfmaddph512_maskz(
2592 -(__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U,
2593 _MM_FROUND_CUR_DIRECTION);
2596 #define _mm512_fmaddsub_round_ph(A, B, C, R) \
2597 ((__m512h)__builtin_ia32_vfmaddsubph512_mask( \
2598 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2599 (__mmask32)-1, (int)(R)))
2601 #define _mm512_mask_fmaddsub_round_ph(A, U, B, C, R) \
2602 ((__m512h)__builtin_ia32_vfmaddsubph512_mask( \
2603 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2604 (__mmask32)(U), (int)(R)))
2606 #define _mm512_mask3_fmaddsub_round_ph(A, B, C, U, R) \
2607 ((__m512h)__builtin_ia32_vfmaddsubph512_mask3( \
2608 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2609 (__mmask32)(U), (int)(R)))
2611 #define _mm512_maskz_fmaddsub_round_ph(U, A, B, C, R) \
2612 ((__m512h)__builtin_ia32_vfmaddsubph512_maskz( \
2613 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2614 (__mmask32)(U), (int)(R)))
2616 #define _mm512_fmsubadd_round_ph(A, B, C, R) \
2617 ((__m512h)__builtin_ia32_vfmaddsubph512_mask( \
2618 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2619 (__mmask32)-1, (int)(R)))
2621 #define _mm512_mask_fmsubadd_round_ph(A, U, B, C, R) \
2622 ((__m512h)__builtin_ia32_vfmaddsubph512_mask( \
2623 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2624 (__mmask32)(U), (int)(R)))
2626 #define _mm512_maskz_fmsubadd_round_ph(U, A, B, C, R) \
2627 ((__m512h)__builtin_ia32_vfmaddsubph512_maskz( \
2628 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2629 (__mmask32)(U), (int)(R)))
2631 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2632 _mm512_fmaddsub_ph(__m512h __A, __m512h __B, __m512h __C) {
2633 return (__m512h)__builtin_ia32_vfmaddsubph512_mask(
2634 (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)-1,
2635 _MM_FROUND_CUR_DIRECTION);
2638 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2639 _mm512_mask_fmaddsub_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
2640 return (__m512h)__builtin_ia32_vfmaddsubph512_mask(
2641 (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U,
2642 _MM_FROUND_CUR_DIRECTION);
2645 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2646 _mm512_mask3_fmaddsub_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
2647 return (__m512h)__builtin_ia32_vfmaddsubph512_mask3(
2648 (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U,
2649 _MM_FROUND_CUR_DIRECTION);
2652 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2653 _mm512_maskz_fmaddsub_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
2654 return (__m512h)__builtin_ia32_vfmaddsubph512_maskz(
2655 (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U,
2656 _MM_FROUND_CUR_DIRECTION);
2659 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2660 _mm512_fmsubadd_ph(__m512h __A, __m512h __B, __m512h __C) {
2661 return (__m512h)__builtin_ia32_vfmaddsubph512_mask(
2662 (__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)-1,
2663 _MM_FROUND_CUR_DIRECTION);
2666 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2667 _mm512_mask_fmsubadd_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
2668 return (__m512h)__builtin_ia32_vfmaddsubph512_mask(
2669 (__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U,
2670 _MM_FROUND_CUR_DIRECTION);
2673 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2674 _mm512_maskz_fmsubadd_ph(__mmask32 __U, __m512h __A, __m512h __B, __m512h __C) {
2675 return (__m512h)__builtin_ia32_vfmaddsubph512_maskz(
2676 (__v32hf)__A, (__v32hf)__B, -(__v32hf)__C, (__mmask32)__U,
2677 _MM_FROUND_CUR_DIRECTION);
2680 #define _mm512_mask3_fmsub_round_ph(A, B, C, U, R) \
2681 ((__m512h)__builtin_ia32_vfmsubph512_mask3( \
2682 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2683 (__mmask32)(U), (int)(R)))
2685 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2686 _mm512_mask3_fmsub_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
2687 return (__m512h)__builtin_ia32_vfmsubph512_mask3((__v32hf)__A, (__v32hf)__B,
2688 (__v32hf)__C, (__mmask32)__U,
2689 _MM_FROUND_CUR_DIRECTION);
2692 #define _mm512_mask3_fmsubadd_round_ph(A, B, C, U, R) \
2693 ((__m512h)__builtin_ia32_vfmsubaddph512_mask3( \
2694 (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2695 (__mmask32)(U), (int)(R)))
2697 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2698 _mm512_mask3_fmsubadd_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
2699 return (__m512h)__builtin_ia32_vfmsubaddph512_mask3(
2700 (__v32hf)__A, (__v32hf)__B, (__v32hf)__C, (__mmask32)__U,
2701 _MM_FROUND_CUR_DIRECTION);
2704 #define _mm512_mask_fnmadd_round_ph(A, U, B, C, R) \
2705 ((__m512h)__builtin_ia32_vfmaddph512_mask( \
2706 (__v32hf)(__m512h)(A), -(__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2707 (__mmask32)(U), (int)(R)))
2709 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2710 _mm512_mask_fnmadd_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
2711 return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B,
2712 (__v32hf)__C, (__mmask32)__U,
2713 _MM_FROUND_CUR_DIRECTION);
2716 #define _mm512_mask_fnmsub_round_ph(A, U, B, C, R) \
2717 ((__m512h)__builtin_ia32_vfmaddph512_mask( \
2718 (__v32hf)(__m512h)(A), -(__v32hf)(__m512h)(B), -(__v32hf)(__m512h)(C), \
2719 (__mmask32)(U), (int)(R)))
2721 #define _mm512_mask3_fnmsub_round_ph(A, B, C, U, R) \
2722 ((__m512h)__builtin_ia32_vfmsubph512_mask3( \
2723 -(__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (__v32hf)(__m512h)(C), \
2724 (__mmask32)(U), (int)(R)))
2726 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2727 _mm512_mask_fnmsub_ph(__m512h __A, __mmask32 __U, __m512h __B, __m512h __C) {
2728 return (__m512h)__builtin_ia32_vfmaddph512_mask((__v32hf)__A, -(__v32hf)__B,
2729 -(__v32hf)__C, (__mmask32)__U,
2730 _MM_FROUND_CUR_DIRECTION);
2733 static __inline__ __m512h __DEFAULT_FN_ATTRS512
2734 _mm512_mask3_fnmsub_ph(__m512h __A, __m512h __B, __m512h __C, __mmask32 __U) {
2735 return (__m512h)__builtin_ia32_vfmsubph512_mask3(-(__v32hf)__A, (__v32hf)__B,
2736 (__v32hf)__C, (__mmask32)__U,
2737 _MM_FROUND_CUR_DIRECTION);
2740 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_sh(__m128h __W,
2741 __m128h __A,
2742 __m128h __B) {
2743 return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, (__v8hf)__A, (__v8hf)__B,
2744 (__mmask8)-1, _MM_FROUND_CUR_DIRECTION);
2747 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_sh(__m128h __W,
2748 __mmask8 __U,
2749 __m128h __A,
2750 __m128h __B) {
2751 return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, (__v8hf)__A, (__v8hf)__B,
2752 (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
2755 #define _mm_fmadd_round_sh(A, B, C, R) \
2756 ((__m128h)__builtin_ia32_vfmaddsh3_mask( \
2757 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(C), \
2758 (__mmask8)-1, (int)(R)))
2760 #define _mm_mask_fmadd_round_sh(W, U, A, B, R) \
2761 ((__m128h)__builtin_ia32_vfmaddsh3_mask( \
2762 (__v8hf)(__m128h)(W), (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), \
2763 (__mmask8)(U), (int)(R)))
2765 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2766 _mm_maskz_fmadd_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
2767 return __builtin_ia32_vfmaddsh3_maskz((__v8hf)__A, (__v8hf)__B, (__v8hf)__C,
2768 (__mmask8)__U,
2769 _MM_FROUND_CUR_DIRECTION);
2772 #define _mm_maskz_fmadd_round_sh(U, A, B, C, R) \
2773 ((__m128h)__builtin_ia32_vfmaddsh3_maskz( \
2774 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (__v8hf)(__m128h)(C), \
2775 (__mmask8)(U), (int)(R)))
2777 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2778 _mm_mask3_fmadd_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U) {
2779 return __builtin_ia32_vfmaddsh3_mask3((__v8hf)__W, (__v8hf)__X, (__v8hf)__Y,
2780 (__mmask8)__U,
2781 _MM_FROUND_CUR_DIRECTION);
2784 #define _mm_mask3_fmadd_round_sh(W, X, Y, U, R) \
2785 ((__m128h)__builtin_ia32_vfmaddsh3_mask3( \
2786 (__v8hf)(__m128h)(W), (__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), \
2787 (__mmask8)(U), (int)(R)))
2789 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmsub_sh(__m128h __W,
2790 __m128h __A,
2791 __m128h __B) {
2792 return (__m128h)__builtin_ia32_vfmaddsh3_mask((__v8hf)__W, (__v8hf)__A,
2793 -(__v8hf)__B, (__mmask8)-1,
2794 _MM_FROUND_CUR_DIRECTION);
2797 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_sh(__m128h __W,
2798 __mmask8 __U,
2799 __m128h __A,
2800 __m128h __B) {
2801 return (__m128h)__builtin_ia32_vfmaddsh3_mask((__v8hf)__W, (__v8hf)__A,
2802 -(__v8hf)__B, (__mmask8)__U,
2803 _MM_FROUND_CUR_DIRECTION);
2806 #define _mm_fmsub_round_sh(A, B, C, R) \
2807 ((__m128h)__builtin_ia32_vfmaddsh3_mask( \
2808 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), -(__v8hf)(__m128h)(C), \
2809 (__mmask8)-1, (int)(R)))
2811 #define _mm_mask_fmsub_round_sh(W, U, A, B, R) \
2812 ((__m128h)__builtin_ia32_vfmaddsh3_mask( \
2813 (__v8hf)(__m128h)(W), (__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), \
2814 (__mmask8)(U), (int)(R)))
2816 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2817 _mm_maskz_fmsub_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
2818 return (__m128h)__builtin_ia32_vfmaddsh3_maskz((__v8hf)__A, (__v8hf)__B,
2819 -(__v8hf)__C, (__mmask8)__U,
2820 _MM_FROUND_CUR_DIRECTION);
2823 #define _mm_maskz_fmsub_round_sh(U, A, B, C, R) \
2824 ((__m128h)__builtin_ia32_vfmaddsh3_maskz( \
2825 (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), -(__v8hf)(__m128h)(C), \
2826 (__mmask8)(U), (int)R))
2828 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2829 _mm_mask3_fmsub_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U) {
2830 return __builtin_ia32_vfmsubsh3_mask3((__v8hf)__W, (__v8hf)__X, (__v8hf)__Y,
2831 (__mmask8)__U,
2832 _MM_FROUND_CUR_DIRECTION);
2835 #define _mm_mask3_fmsub_round_sh(W, X, Y, U, R) \
2836 ((__m128h)__builtin_ia32_vfmsubsh3_mask3( \
2837 (__v8hf)(__m128h)(W), (__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), \
2838 (__mmask8)(U), (int)(R)))
2840 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmadd_sh(__m128h __W,
2841 __m128h __A,
2842 __m128h __B) {
2843 return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, -(__v8hf)__A, (__v8hf)__B,
2844 (__mmask8)-1, _MM_FROUND_CUR_DIRECTION);
2847 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2848 _mm_mask_fnmadd_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
2849 return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, -(__v8hf)__A, (__v8hf)__B,
2850 (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
2853 #define _mm_fnmadd_round_sh(A, B, C, R) \
2854 ((__m128h)__builtin_ia32_vfmaddsh3_mask( \
2855 (__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), (__v8hf)(__m128h)(C), \
2856 (__mmask8)-1, (int)(R)))
2858 #define _mm_mask_fnmadd_round_sh(W, U, A, B, R) \
2859 ((__m128h)__builtin_ia32_vfmaddsh3_mask( \
2860 (__v8hf)(__m128h)(W), -(__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), \
2861 (__mmask8)(U), (int)(R)))
2863 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2864 _mm_maskz_fnmadd_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
2865 return __builtin_ia32_vfmaddsh3_maskz((__v8hf)__A, -(__v8hf)__B, (__v8hf)__C,
2866 (__mmask8)__U,
2867 _MM_FROUND_CUR_DIRECTION);
2870 #define _mm_maskz_fnmadd_round_sh(U, A, B, C, R) \
2871 ((__m128h)__builtin_ia32_vfmaddsh3_maskz( \
2872 (__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), (__v8hf)(__m128h)(C), \
2873 (__mmask8)(U), (int)(R)))
2875 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2876 _mm_mask3_fnmadd_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U) {
2877 return __builtin_ia32_vfmaddsh3_mask3((__v8hf)__W, -(__v8hf)__X, (__v8hf)__Y,
2878 (__mmask8)__U,
2879 _MM_FROUND_CUR_DIRECTION);
2882 #define _mm_mask3_fnmadd_round_sh(W, X, Y, U, R) \
2883 ((__m128h)__builtin_ia32_vfmaddsh3_mask3( \
2884 (__v8hf)(__m128h)(W), -(__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), \
2885 (__mmask8)(U), (int)(R)))
2887 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fnmsub_sh(__m128h __W,
2888 __m128h __A,
2889 __m128h __B) {
2890 return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, -(__v8hf)__A, -(__v8hf)__B,
2891 (__mmask8)-1, _MM_FROUND_CUR_DIRECTION);
2894 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2895 _mm_mask_fnmsub_sh(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
2896 return __builtin_ia32_vfmaddsh3_mask((__v8hf)__W, -(__v8hf)__A, -(__v8hf)__B,
2897 (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
2900 #define _mm_fnmsub_round_sh(A, B, C, R) \
2901 ((__m128h)__builtin_ia32_vfmaddsh3_mask( \
2902 (__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), -(__v8hf)(__m128h)(C), \
2903 (__mmask8)-1, (int)(R)))
2905 #define _mm_mask_fnmsub_round_sh(W, U, A, B, R) \
2906 ((__m128h)__builtin_ia32_vfmaddsh3_mask( \
2907 (__v8hf)(__m128h)(W), -(__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), \
2908 (__mmask8)(U), (int)(R)))
2910 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2911 _mm_maskz_fnmsub_sh(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
2912 return __builtin_ia32_vfmaddsh3_maskz((__v8hf)__A, -(__v8hf)__B, -(__v8hf)__C,
2913 (__mmask8)__U,
2914 _MM_FROUND_CUR_DIRECTION);
2917 #define _mm_maskz_fnmsub_round_sh(U, A, B, C, R) \
2918 ((__m128h)__builtin_ia32_vfmaddsh3_maskz( \
2919 (__v8hf)(__m128h)(A), -(__v8hf)(__m128h)(B), -(__v8hf)(__m128h)(C), \
2920 (__mmask8)(U), (int)(R)))
2922 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2923 _mm_mask3_fnmsub_sh(__m128h __W, __m128h __X, __m128h __Y, __mmask8 __U) {
2924 return __builtin_ia32_vfmsubsh3_mask3((__v8hf)__W, -(__v8hf)__X, (__v8hf)__Y,
2925 (__mmask8)__U,
2926 _MM_FROUND_CUR_DIRECTION);
2929 #define _mm_mask3_fnmsub_round_sh(W, X, Y, U, R) \
2930 ((__m128h)__builtin_ia32_vfmsubsh3_mask3( \
2931 (__v8hf)(__m128h)(W), -(__v8hf)(__m128h)(X), (__v8hf)(__m128h)(Y), \
2932 (__mmask8)(U), (int)(R)))
2934 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmadd_sch(__m128h __A,
2935 __m128h __B,
2936 __m128h __C) {
2937 return (__m128h)__builtin_ia32_vfcmaddcsh_mask((__v4sf)__A, (__v4sf)__B,
2938 (__v4sf)__C, (__mmask8)-1,
2939 _MM_FROUND_CUR_DIRECTION);
2942 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2943 _mm_mask_fcmadd_sch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
2944 return (__m128h)__builtin_ia32_vfcmaddcsh_round_mask(
2945 (__v4sf)__A, (__v4sf)(__B), (__v4sf)(__C), __U, _MM_FROUND_CUR_DIRECTION);
2948 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2949 _mm_maskz_fcmadd_sch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
2950 return (__m128h)__builtin_ia32_vfcmaddcsh_maskz((__v4sf)__A, (__v4sf)__B,
2951 (__v4sf)__C, (__mmask8)__U,
2952 _MM_FROUND_CUR_DIRECTION);
2955 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2956 _mm_mask3_fcmadd_sch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
2957 return (__m128h)__builtin_ia32_vfcmaddcsh_round_mask3(
2958 (__v4sf)__A, (__v4sf)__B, (__v4sf)__C, __U, _MM_FROUND_CUR_DIRECTION);
2961 #define _mm_fcmadd_round_sch(A, B, C, R) \
2962 ((__m128h)__builtin_ia32_vfcmaddcsh_mask( \
2963 (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
2964 (__mmask8)-1, (int)(R)))
2966 #define _mm_mask_fcmadd_round_sch(A, U, B, C, R) \
2967 ((__m128h)__builtin_ia32_vfcmaddcsh_round_mask( \
2968 (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
2969 (__mmask8)(U), (int)(R)))
2971 #define _mm_maskz_fcmadd_round_sch(U, A, B, C, R) \
2972 ((__m128h)__builtin_ia32_vfcmaddcsh_maskz( \
2973 (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
2974 (__mmask8)(U), (int)(R)))
2976 #define _mm_mask3_fcmadd_round_sch(A, B, C, U, R) \
2977 ((__m128h)__builtin_ia32_vfcmaddcsh_round_mask3( \
2978 (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
2979 (__mmask8)(U), (int)(R)))
2981 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_sch(__m128h __A,
2982 __m128h __B,
2983 __m128h __C) {
2984 return (__m128h)__builtin_ia32_vfmaddcsh_mask((__v4sf)__A, (__v4sf)__B,
2985 (__v4sf)__C, (__mmask8)-1,
2986 _MM_FROUND_CUR_DIRECTION);
2989 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2990 _mm_mask_fmadd_sch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
2991 return (__m128h)__builtin_ia32_vfmaddcsh_round_mask(
2992 (__v4sf)__A, (__v4sf)(__B), (__v4sf)(__C), __U, _MM_FROUND_CUR_DIRECTION);
2995 static __inline__ __m128h __DEFAULT_FN_ATTRS128
2996 _mm_maskz_fmadd_sch(__mmask8 __U, __m128h __A, __m128h __B, __m128h __C) {
2997 return (__m128h)__builtin_ia32_vfmaddcsh_maskz((__v4sf)__A, (__v4sf)__B,
2998 (__v4sf)__C, (__mmask8)__U,
2999 _MM_FROUND_CUR_DIRECTION);
3002 static __inline__ __m128h __DEFAULT_FN_ATTRS128
3003 _mm_mask3_fmadd_sch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
3004 return (__m128h)__builtin_ia32_vfmaddcsh_round_mask3(
3005 (__v4sf)__A, (__v4sf)__B, (__v4sf)__C, __U, _MM_FROUND_CUR_DIRECTION);
3008 #define _mm_fmadd_round_sch(A, B, C, R) \
3009 ((__m128h)__builtin_ia32_vfmaddcsh_mask( \
3010 (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
3011 (__mmask8)-1, (int)(R)))
3013 #define _mm_mask_fmadd_round_sch(A, U, B, C, R) \
3014 ((__m128h)__builtin_ia32_vfmaddcsh_round_mask( \
3015 (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
3016 (__mmask8)(U), (int)(R)))
3018 #define _mm_maskz_fmadd_round_sch(U, A, B, C, R) \
3019 ((__m128h)__builtin_ia32_vfmaddcsh_maskz( \
3020 (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
3021 (__mmask8)(U), (int)(R)))
3023 #define _mm_mask3_fmadd_round_sch(A, B, C, U, R) \
3024 ((__m128h)__builtin_ia32_vfmaddcsh_round_mask3( \
3025 (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
3026 (__mmask8)(U), (int)(R)))
3028 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmul_sch(__m128h __A,
3029 __m128h __B) {
3030 return (__m128h)__builtin_ia32_vfcmulcsh_mask(
3031 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (__mmask8)-1,
3032 _MM_FROUND_CUR_DIRECTION);
3035 static __inline__ __m128h __DEFAULT_FN_ATTRS128
3036 _mm_mask_fcmul_sch(__m128h __W, __mmask8 __U, __m128h __A, __m128h __B) {
3037 return (__m128h)__builtin_ia32_vfcmulcsh_mask((__v4sf)__A, (__v4sf)__B,
3038 (__v4sf)__W, (__mmask8)__U,
3039 _MM_FROUND_CUR_DIRECTION);
3042 static __inline__ __m128h __DEFAULT_FN_ATTRS128
3043 _mm_maskz_fcmul_sch(__mmask8 __U, __m128h __A, __m128h __B) {
3044 return (__m128h)__builtin_ia32_vfcmulcsh_mask(
3045 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (__mmask8)__U,
3046 _MM_FROUND_CUR_DIRECTION);
3049 #define _mm_fcmul_round_sch(A, B, R) \
3050 ((__m128h)__builtin_ia32_vfcmulcsh_mask( \
3051 (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), \
3052 (__v4sf)(__m128h)_mm_undefined_ph(), (__mmask8)-1, (int)(R)))
3054 #define _mm_mask_fcmul_round_sch(W, U, A, B, R) \
3055 ((__m128h)__builtin_ia32_vfcmulcsh_mask( \
3056 (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(W), \
3057 (__mmask8)(U), (int)(R)))
3059 #define _mm_maskz_fcmul_round_sch(U, A, B, R) \
3060 ((__m128h)__builtin_ia32_vfcmulcsh_mask( \
3061 (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), \
3062 (__v4sf)(__m128h)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
3064 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmul_sch(__m128h __A,
3065 __m128h __B) {
3066 return (__m128h)__builtin_ia32_vfmulcsh_mask(
3067 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_undefined_ph(), (__mmask8)-1,
3068 _MM_FROUND_CUR_DIRECTION);
3071 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_fmul_sch(__m128h __W,
3072 __mmask8 __U,
3073 __m128h __A,
3074 __m128h __B) {
3075 return (__m128h)__builtin_ia32_vfmulcsh_mask((__v4sf)__A, (__v4sf)__B,
3076 (__v4sf)__W, (__mmask8)__U,
3077 _MM_FROUND_CUR_DIRECTION);
3080 static __inline__ __m128h __DEFAULT_FN_ATTRS128
3081 _mm_maskz_fmul_sch(__mmask8 __U, __m128h __A, __m128h __B) {
3082 return (__m128h)__builtin_ia32_vfmulcsh_mask(
3083 (__v4sf)__A, (__v4sf)__B, (__v4sf)_mm_setzero_ph(), (__mmask8)__U,
3084 _MM_FROUND_CUR_DIRECTION);
3087 #define _mm_fmul_round_sch(A, B, R) \
3088 ((__m128h)__builtin_ia32_vfmulcsh_mask( \
3089 (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), \
3090 (__v4sf)(__m128h)_mm_undefined_ph(), (__mmask8)-1, (int)(R)))
3092 #define _mm_mask_fmul_round_sch(W, U, A, B, R) \
3093 ((__m128h)__builtin_ia32_vfmulcsh_mask( \
3094 (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(W), \
3095 (__mmask8)(U), (int)(R)))
3097 #define _mm_maskz_fmul_round_sch(U, A, B, R) \
3098 ((__m128h)__builtin_ia32_vfmulcsh_mask( \
3099 (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), \
3100 (__v4sf)(__m128h)_mm_setzero_ph(), (__mmask8)(U), (int)(R)))
3102 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fcmul_pch(__m512h __A,
3103 __m512h __B) {
3104 return (__m512h)__builtin_ia32_vfcmulcph512_mask(
3105 (__v16sf)__A, (__v16sf)__B, (__v16sf)_mm512_undefined_ph(), (__mmask16)-1,
3106 _MM_FROUND_CUR_DIRECTION);
3109 static __inline__ __m512h __DEFAULT_FN_ATTRS512
3110 _mm512_mask_fcmul_pch(__m512h __W, __mmask16 __U, __m512h __A, __m512h __B) {
3111 return (__m512h)__builtin_ia32_vfcmulcph512_mask((__v16sf)__A, (__v16sf)__B,
3112 (__v16sf)__W, (__mmask16)__U,
3113 _MM_FROUND_CUR_DIRECTION);
3116 static __inline__ __m512h __DEFAULT_FN_ATTRS512
3117 _mm512_maskz_fcmul_pch(__mmask16 __U, __m512h __A, __m512h __B) {
3118 return (__m512h)__builtin_ia32_vfcmulcph512_mask(
3119 (__v16sf)__A, (__v16sf)__B, (__v16sf)_mm512_setzero_ph(), (__mmask16)__U,
3120 _MM_FROUND_CUR_DIRECTION);
3123 #define _mm512_fcmul_round_pch(A, B, R) \
3124 ((__m512h)__builtin_ia32_vfcmulcph512_mask( \
3125 (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), \
3126 (__v16sf)(__m512h)_mm512_undefined_ph(), (__mmask16)-1, (int)(R)))
3128 #define _mm512_mask_fcmul_round_pch(W, U, A, B, R) \
3129 ((__m512h)__builtin_ia32_vfcmulcph512_mask( \
3130 (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(W), \
3131 (__mmask16)(U), (int)(R)))
3133 #define _mm512_maskz_fcmul_round_pch(U, A, B, R) \
3134 ((__m512h)__builtin_ia32_vfcmulcph512_mask( \
3135 (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), \
3136 (__v16sf)(__m512h)_mm512_setzero_ph(), (__mmask16)(U), (int)(R)))
3138 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmul_pch(__m512h __A,
3139 __m512h __B) {
3140 return (__m512h)__builtin_ia32_vfmulcph512_mask(
3141 (__v16sf)__A, (__v16sf)__B, (__v16sf)_mm512_undefined_ph(), (__mmask16)-1,
3142 _MM_FROUND_CUR_DIRECTION);
3145 static __inline__ __m512h __DEFAULT_FN_ATTRS512
3146 _mm512_mask_fmul_pch(__m512h __W, __mmask16 __U, __m512h __A, __m512h __B) {
3147 return (__m512h)__builtin_ia32_vfmulcph512_mask((__v16sf)__A, (__v16sf)__B,
3148 (__v16sf)__W, (__mmask16)__U,
3149 _MM_FROUND_CUR_DIRECTION);
3152 static __inline__ __m512h __DEFAULT_FN_ATTRS512
3153 _mm512_maskz_fmul_pch(__mmask16 __U, __m512h __A, __m512h __B) {
3154 return (__m512h)__builtin_ia32_vfmulcph512_mask(
3155 (__v16sf)__A, (__v16sf)__B, (__v16sf)_mm512_setzero_ph(), (__mmask16)__U,
3156 _MM_FROUND_CUR_DIRECTION);
3159 #define _mm512_fmul_round_pch(A, B, R) \
3160 ((__m512h)__builtin_ia32_vfmulcph512_mask( \
3161 (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), \
3162 (__v16sf)(__m512h)_mm512_undefined_ph(), (__mmask16)-1, (int)(R)))
3164 #define _mm512_mask_fmul_round_pch(W, U, A, B, R) \
3165 ((__m512h)__builtin_ia32_vfmulcph512_mask( \
3166 (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(W), \
3167 (__mmask16)(U), (int)(R)))
3169 #define _mm512_maskz_fmul_round_pch(U, A, B, R) \
3170 ((__m512h)__builtin_ia32_vfmulcph512_mask( \
3171 (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), \
3172 (__v16sf)(__m512h)_mm512_setzero_ph(), (__mmask16)(U), (int)(R)))
3174 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fcmadd_pch(__m512h __A,
3175 __m512h __B,
3176 __m512h __C) {
3177 return (__m512h)__builtin_ia32_vfcmaddcph512_mask3(
3178 (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)-1,
3179 _MM_FROUND_CUR_DIRECTION);
3182 static __inline__ __m512h __DEFAULT_FN_ATTRS512
3183 _mm512_mask_fcmadd_pch(__m512h __A, __mmask16 __U, __m512h __B, __m512h __C) {
3184 return (__m512h)__builtin_ia32_vfcmaddcph512_mask(
3185 (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
3186 _MM_FROUND_CUR_DIRECTION);
3189 static __inline__ __m512h __DEFAULT_FN_ATTRS512
3190 _mm512_mask3_fcmadd_pch(__m512h __A, __m512h __B, __m512h __C, __mmask16 __U) {
3191 return (__m512h)__builtin_ia32_vfcmaddcph512_mask3(
3192 (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
3193 _MM_FROUND_CUR_DIRECTION);
3196 static __inline__ __m512h __DEFAULT_FN_ATTRS512
3197 _mm512_maskz_fcmadd_pch(__mmask16 __U, __m512h __A, __m512h __B, __m512h __C) {
3198 return (__m512h)__builtin_ia32_vfcmaddcph512_maskz(
3199 (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
3200 _MM_FROUND_CUR_DIRECTION);
3203 #define _mm512_fcmadd_round_pch(A, B, C, R) \
3204 ((__m512h)__builtin_ia32_vfcmaddcph512_mask3( \
3205 (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
3206 (__mmask16)-1, (int)(R)))
3208 #define _mm512_mask_fcmadd_round_pch(A, U, B, C, R) \
3209 ((__m512h)__builtin_ia32_vfcmaddcph512_mask( \
3210 (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
3211 (__mmask16)(U), (int)(R)))
3213 #define _mm512_mask3_fcmadd_round_pch(A, B, C, U, R) \
3214 ((__m512h)__builtin_ia32_vfcmaddcph512_mask3( \
3215 (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
3216 (__mmask16)(U), (int)(R)))
3218 #define _mm512_maskz_fcmadd_round_pch(U, A, B, C, R) \
3219 ((__m512h)__builtin_ia32_vfcmaddcph512_maskz( \
3220 (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
3221 (__mmask16)(U), (int)(R)))
3223 static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmadd_pch(__m512h __A,
3224 __m512h __B,
3225 __m512h __C) {
3226 return (__m512h)__builtin_ia32_vfmaddcph512_mask3((__v16sf)__A, (__v16sf)__B,
3227 (__v16sf)__C, (__mmask16)-1,
3228 _MM_FROUND_CUR_DIRECTION);
3231 static __inline__ __m512h __DEFAULT_FN_ATTRS512
3232 _mm512_mask_fmadd_pch(__m512h __A, __mmask16 __U, __m512h __B, __m512h __C) {
3233 return (__m512h)__builtin_ia32_vfmaddcph512_mask((__v16sf)__A, (__v16sf)__B,
3234 (__v16sf)__C, (__mmask16)__U,
3235 _MM_FROUND_CUR_DIRECTION);
3238 static __inline__ __m512h __DEFAULT_FN_ATTRS512
3239 _mm512_mask3_fmadd_pch(__m512h __A, __m512h __B, __m512h __C, __mmask16 __U) {
3240 return (__m512h)__builtin_ia32_vfmaddcph512_mask3(
3241 (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
3242 _MM_FROUND_CUR_DIRECTION);
3245 static __inline__ __m512h __DEFAULT_FN_ATTRS512
3246 _mm512_maskz_fmadd_pch(__mmask16 __U, __m512h __A, __m512h __B, __m512h __C) {
3247 return (__m512h)__builtin_ia32_vfmaddcph512_maskz(
3248 (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
3249 _MM_FROUND_CUR_DIRECTION);
3252 #define _mm512_fmadd_round_pch(A, B, C, R) \
3253 ((__m512h)__builtin_ia32_vfmaddcph512_mask3( \
3254 (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
3255 (__mmask16)-1, (int)(R)))
3257 #define _mm512_mask_fmadd_round_pch(A, U, B, C, R) \
3258 ((__m512h)__builtin_ia32_vfmaddcph512_mask( \
3259 (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
3260 (__mmask16)(U), (int)(R)))
3262 #define _mm512_mask3_fmadd_round_pch(A, B, C, U, R) \
3263 ((__m512h)__builtin_ia32_vfmaddcph512_mask3( \
3264 (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
3265 (__mmask16)(U), (int)(R)))
3267 #define _mm512_maskz_fmadd_round_pch(U, A, B, C, R) \
3268 ((__m512h)__builtin_ia32_vfmaddcph512_maskz( \
3269 (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
3270 (__mmask16)(U), (int)(R)))
3272 static __inline__ _Float16 __DEFAULT_FN_ATTRS512
3273 _mm512_reduce_add_ph(__m512h __W) {
3274 return __builtin_ia32_reduce_fadd_ph512(-0.0f16, __W);
3277 static __inline__ _Float16 __DEFAULT_FN_ATTRS512
3278 _mm512_reduce_mul_ph(__m512h __W) {
3279 return __builtin_ia32_reduce_fmul_ph512(1.0f16, __W);
3282 static __inline__ _Float16 __DEFAULT_FN_ATTRS512
3283 _mm512_reduce_max_ph(__m512h __V) {
3284 return __builtin_ia32_reduce_fmax_ph512(__V);
3287 static __inline__ _Float16 __DEFAULT_FN_ATTRS512
3288 _mm512_reduce_min_ph(__m512h __V) {
3289 return __builtin_ia32_reduce_fmin_ph512(__V);
3292 static __inline__ __m512h __DEFAULT_FN_ATTRS512
3293 _mm512_mask_blend_ph(__mmask32 __U, __m512h __A, __m512h __W) {
3294 return (__m512h)__builtin_ia32_selectph_512((__mmask32)__U, (__v32hf)__W,
3295 (__v32hf)__A);
3298 static __inline__ __m512h __DEFAULT_FN_ATTRS512
3299 _mm512_permutex2var_ph(__m512h __A, __m512i __I, __m512h __B) {
3300 return (__m512h)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I,
3301 (__v32hi)__B);
3304 static __inline__ __m512h __DEFAULT_FN_ATTRS512
3305 _mm512_permutexvar_ph(__m512i __A, __m512h __B) {
3306 return (__m512h)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);
3309 // intrinsics below are alias for f*mul_*ch
3310 #define _mm512_mul_pch(A, B) _mm512_fmul_pch(A, B)
3311 #define _mm512_mask_mul_pch(W, U, A, B) _mm512_mask_fmul_pch(W, U, A, B)
3312 #define _mm512_maskz_mul_pch(U, A, B) _mm512_maskz_fmul_pch(U, A, B)
3313 #define _mm512_mul_round_pch(A, B, R) _mm512_fmul_round_pch(A, B, R)
3314 #define _mm512_mask_mul_round_pch(W, U, A, B, R) \
3315 _mm512_mask_fmul_round_pch(W, U, A, B, R)
3316 #define _mm512_maskz_mul_round_pch(U, A, B, R) \
3317 _mm512_maskz_fmul_round_pch(U, A, B, R)
3319 #define _mm512_cmul_pch(A, B) _mm512_fcmul_pch(A, B)
3320 #define _mm512_mask_cmul_pch(W, U, A, B) _mm512_mask_fcmul_pch(W, U, A, B)
3321 #define _mm512_maskz_cmul_pch(U, A, B) _mm512_maskz_fcmul_pch(U, A, B)
3322 #define _mm512_cmul_round_pch(A, B, R) _mm512_fcmul_round_pch(A, B, R)
3323 #define _mm512_mask_cmul_round_pch(W, U, A, B, R) \
3324 _mm512_mask_fcmul_round_pch(W, U, A, B, R)
3325 #define _mm512_maskz_cmul_round_pch(U, A, B, R) \
3326 _mm512_maskz_fcmul_round_pch(U, A, B, R)
3328 #define _mm_mul_sch(A, B) _mm_fmul_sch(A, B)
3329 #define _mm_mask_mul_sch(W, U, A, B) _mm_mask_fmul_sch(W, U, A, B)
3330 #define _mm_maskz_mul_sch(U, A, B) _mm_maskz_fmul_sch(U, A, B)
3331 #define _mm_mul_round_sch(A, B, R) _mm_fmul_round_sch(A, B, R)
3332 #define _mm_mask_mul_round_sch(W, U, A, B, R) \
3333 _mm_mask_fmul_round_sch(W, U, A, B, R)
3334 #define _mm_maskz_mul_round_sch(U, A, B, R) _mm_maskz_fmul_round_sch(U, A, B, R)
3336 #define _mm_cmul_sch(A, B) _mm_fcmul_sch(A, B)
3337 #define _mm_mask_cmul_sch(W, U, A, B) _mm_mask_fcmul_sch(W, U, A, B)
3338 #define _mm_maskz_cmul_sch(U, A, B) _mm_maskz_fcmul_sch(U, A, B)
3339 #define _mm_cmul_round_sch(A, B, R) _mm_fcmul_round_sch(A, B, R)
3340 #define _mm_mask_cmul_round_sch(W, U, A, B, R) \
3341 _mm_mask_fcmul_round_sch(W, U, A, B, R)
3342 #define _mm_maskz_cmul_round_sch(U, A, B, R) \
3343 _mm_maskz_fcmul_round_sch(U, A, B, R)
3345 #undef __DEFAULT_FN_ATTRS128
3346 #undef __DEFAULT_FN_ATTRS256
3347 #undef __DEFAULT_FN_ATTRS512
3349 #endif
3350 #endif