1 /*===----------- avx10_2_512bf16intrin.h - AVX10-BF16 intrinsics ---------===
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *===-----------------------------------------------------------------------===
11 "Never use <avx10_2_512bf16intrin.h> directly; include <immintrin.h> instead."
16 #ifndef __AVX10_2_512BF16INTRIN_H
17 #define __AVX10_2_512BF16INTRIN_H
19 /* Define the default attributes for the functions in this file. */
20 typedef __bf16 __m512bh_u
__attribute__((__vector_size__(64), __aligned__(1)));
22 /* Define the default attributes for the functions in this file. */
23 #define __DEFAULT_FN_ATTRS512 \
24 __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-512"), \
25 __min_vector_width__(512)))
27 static __inline __m512bh __DEFAULT_FN_ATTRS512
_mm512_setzero_pbh(void) {
28 return __builtin_bit_cast(__m512bh
, _mm512_setzero_ps());
31 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
_mm512_undefined_pbh(void) {
32 return (__m512bh
)__builtin_ia32_undef512();
35 static __inline __m512bh __DEFAULT_FN_ATTRS512
_mm512_set1_pbh(__bf16 bf
) {
36 return (__m512bh
)(__v32bf
){bf
, bf
, bf
, bf
, bf
, bf
, bf
, bf
, bf
, bf
, bf
,
37 bf
, bf
, bf
, bf
, bf
, bf
, bf
, bf
, bf
, bf
, bf
,
38 bf
, bf
, bf
, bf
, bf
, bf
, bf
, bf
, bf
, bf
};
41 static __inline __m512bh __DEFAULT_FN_ATTRS512
_mm512_set_pbh(
42 __bf16 bf1
, __bf16 bf2
, __bf16 bf3
, __bf16 bf4
, __bf16 bf5
, __bf16 bf6
,
43 __bf16 bf7
, __bf16 bf8
, __bf16 bf9
, __bf16 bf10
, __bf16 bf11
, __bf16 bf12
,
44 __bf16 bf13
, __bf16 bf14
, __bf16 bf15
, __bf16 bf16
, __bf16 bf17
,
45 __bf16 bf18
, __bf16 bf19
, __bf16 bf20
, __bf16 bf21
, __bf16 bf22
,
46 __bf16 bf23
, __bf16 bf24
, __bf16 bf25
, __bf16 bf26
, __bf16 bf27
,
47 __bf16 bf28
, __bf16 bf29
, __bf16 bf30
, __bf16 bf31
, __bf16 bf32
) {
48 return (__m512bh
)(__v32bf
){bf32
, bf31
, bf30
, bf29
, bf28
, bf27
, bf26
, bf25
,
49 bf24
, bf23
, bf22
, bf21
, bf20
, bf19
, bf18
, bf17
,
50 bf16
, bf15
, bf14
, bf13
, bf12
, bf11
, bf10
, bf9
,
51 bf8
, bf7
, bf6
, bf5
, bf4
, bf3
, bf2
, bf1
};
54 #define _mm512_setr_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8, bf9, bf10, \
55 bf11, bf12, bf13, bf14, bf15, bf16, bf17, bf18, bf19, \
56 bf20, bf21, bf22, bf23, bf24, bf25, bf26, bf27, bf28, \
57 bf29, bf30, bf31, bf32) \
58 _mm512_set_pbh((bf32), (bf31), (bf30), (bf29), (bf28), (bf27), (bf26), \
59 (bf25), (bf24), (bf23), (bf22), (bf21), (bf20), (bf19), \
60 (bf18), (bf17), (bf16), (bf15), (bf14), (bf13), (bf12), \
61 (bf11), (bf10), (bf9), (bf8), (bf7), (bf6), (bf5), (bf4), \
64 static __inline__ __m512 __DEFAULT_FN_ATTRS512
65 _mm512_castpbf16_ps(__m512bh __a
) {
69 static __inline__ __m512d __DEFAULT_FN_ATTRS512
70 _mm512_castpbf16_pd(__m512bh __a
) {
74 static __inline__ __m512i __DEFAULT_FN_ATTRS512
75 _mm512_castpbf16_si512(__m512bh __a
) {
79 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
_mm512_castps_pbh(__m512 __a
) {
83 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
84 _mm512_castpd_pbh(__m512d __a
) {
88 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
89 _mm512_castsi512_pbh(__m512i __a
) {
93 static __inline__ __m128bh __DEFAULT_FN_ATTRS512
94 _mm512_castpbf16512_pbh128(__m512bh __a
) {
95 return __builtin_shufflevector(__a
, __a
, 0, 1, 2, 3, 4, 5, 6, 7);
98 static __inline__ __m256bh __DEFAULT_FN_ATTRS512
99 _mm512_castpbf16512_pbh256(__m512bh __a
) {
100 return __builtin_shufflevector(__a
, __a
, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
104 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
105 _mm512_castpbf16128_pbh512(__m128bh __a
) {
106 return __builtin_shufflevector(__a
, __a
, 0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1,
107 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
108 -1, -1, -1, -1, -1, -1, -1, -1, -1);
111 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
112 _mm512_castpbf16256_pbh512(__m256bh __a
) {
113 return __builtin_shufflevector(__a
, __a
, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
114 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1,
115 -1, -1, -1, -1, -1, -1, -1, -1);
118 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
119 _mm512_zextpbf16128_pbh512(__m128bh __a
) {
120 return __builtin_shufflevector(
121 __a
, (__v8bf
)_mm_setzero_pbh(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
122 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15);
125 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
126 _mm512_zextpbf16256_pbh512(__m256bh __a
) {
127 return __builtin_shufflevector(__a
, (__v16bf
)_mm256_setzero_pbh(), 0, 1, 2, 3,
128 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
129 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
133 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
_mm512_abs_pbh(__m512bh __A
) {
134 return (__m512bh
)_mm512_and_epi32(_mm512_set1_epi32(0x7FFF7FFF),
138 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
139 _mm512_load_pbh(void const *__p
) {
140 return *(const __m512bh
*)__p
;
143 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
144 _mm512_loadu_pbh(void const *__p
) {
147 } __attribute__((__packed__
, __may_alias__
));
148 return ((const struct __loadu_pbh
*)__p
)->__v
;
151 static __inline__
void __DEFAULT_FN_ATTRS512
_mm512_store_pbh(void *__P
,
153 *(__m512bh
*)__P
= __A
;
156 static __inline__
void __DEFAULT_FN_ATTRS512
_mm512_storeu_pbh(void *__P
,
158 struct __storeu_pbh
{
160 } __attribute__((__packed__
, __may_alias__
));
161 ((struct __storeu_pbh
*)__P
)->__v
= __A
;
164 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
165 _mm512_mask_blend_pbh(__mmask32 __U
, __m512bh __A
, __m512bh __W
) {
166 return (__m512bh
)__builtin_ia32_selectpbf_512((__mmask32
)__U
, (__v32bf
)__W
,
170 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
171 _mm512_permutex2var_pbh(__m512bh __A
, __m512i __I
, __m512bh __B
) {
172 return (__m512bh
)__builtin_ia32_vpermi2varhi512((__v32hi
)__A
, (__v32hi
)__I
,
176 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
177 _mm512_permutexvar_pbh(__m512i __A
, __m512bh __B
) {
178 return (__m512bh
)__builtin_ia32_permvarhi512((__v32hi
)__B
, (__v32hi
)__A
);
181 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
182 _mm512_addne_pbh(__m512bh __A
, __m512bh __B
) {
183 return (__m512bh
)((__v32bf
)__A
+ (__v32bf
)__B
);
186 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
187 _mm512_mask_addne_pbh(__m512bh __W
, __mmask32 __U
, __m512bh __A
, __m512bh __B
) {
188 return (__m512bh
)__builtin_ia32_selectpbf_512(
189 (__mmask32
)__U
, (__v32bf
)_mm512_addne_pbh(__A
, __B
), (__v32bf
)__W
);
192 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
193 _mm512_maskz_addne_pbh(__mmask32 __U
, __m512bh __A
, __m512bh __B
) {
194 return (__m512bh
)__builtin_ia32_selectpbf_512(
195 (__mmask32
)__U
, (__v32bf
)_mm512_addne_pbh(__A
, __B
),
196 (__v32bf
)_mm512_setzero_pbh());
199 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
200 _mm512_subne_pbh(__m512bh __A
, __m512bh __B
) {
201 return (__m512bh
)((__v32bf
)__A
- (__v32bf
)__B
);
204 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
205 _mm512_mask_subne_pbh(__m512bh __W
, __mmask32 __U
, __m512bh __A
, __m512bh __B
) {
206 return (__m512bh
)__builtin_ia32_selectpbf_512(
207 (__mmask32
)__U
, (__v32bf
)_mm512_subne_pbh(__A
, __B
), (__v32bf
)__W
);
210 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
211 _mm512_maskz_subne_pbh(__mmask32 __U
, __m512bh __A
, __m512bh __B
) {
212 return (__m512bh
)__builtin_ia32_selectpbf_512(
213 (__mmask32
)__U
, (__v32bf
)_mm512_subne_pbh(__A
, __B
),
214 (__v32bf
)_mm512_setzero_pbh());
217 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
218 _mm512_mulne_pbh(__m512bh __A
, __m512bh __B
) {
219 return (__m512bh
)((__v32bf
)__A
* (__v32bf
)__B
);
222 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
223 _mm512_mask_mulne_pbh(__m512bh __W
, __mmask32 __U
, __m512bh __A
, __m512bh __B
) {
224 return (__m512bh
)__builtin_ia32_selectpbf_512(
225 (__mmask32
)__U
, (__v32bf
)_mm512_mulne_pbh(__A
, __B
), (__v32bf
)__W
);
228 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
229 _mm512_maskz_mulne_pbh(__mmask32 __U
, __m512bh __A
, __m512bh __B
) {
230 return (__m512bh
)__builtin_ia32_selectpbf_512(
231 (__mmask32
)__U
, (__v32bf
)_mm512_mulne_pbh(__A
, __B
),
232 (__v32bf
)_mm512_setzero_pbh());
235 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
236 _mm512_divne_pbh(__m512bh __A
, __m512bh __B
) {
237 return (__m512bh
)((__v32bf
)__A
/ (__v32bf
)__B
);
240 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
241 _mm512_mask_divne_pbh(__m512bh __W
, __mmask32 __U
, __m512bh __A
, __m512bh __B
) {
242 return (__m512bh
)__builtin_ia32_selectpbf_512(
243 (__mmask32
)__U
, (__v32bf
)_mm512_divne_pbh(__A
, __B
), (__v32bf
)__W
);
246 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
247 _mm512_maskz_divne_pbh(__mmask32 __U
, __m512bh __A
, __m512bh __B
) {
248 return (__m512bh
)__builtin_ia32_selectpbf_512(
249 (__mmask32
)__U
, (__v32bf
)_mm512_divne_pbh(__A
, __B
),
250 (__v32bf
)_mm512_setzero_pbh());
253 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
_mm512_max_pbh(__m512bh __A
,
255 return (__m512bh
)__builtin_ia32_vmaxpbf16512((__v32bf
)__A
, (__v32bf
)__B
);
258 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
259 _mm512_mask_max_pbh(__m512bh __W
, __mmask32 __U
, __m512bh __A
, __m512bh __B
) {
260 return (__m512bh
)__builtin_ia32_selectpbf_512(
261 (__mmask32
)__U
, (__v32bf
)_mm512_max_pbh(__A
, __B
), (__v32bf
)__W
);
264 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
265 _mm512_maskz_max_pbh(__mmask32 __U
, __m512bh __A
, __m512bh __B
) {
266 return (__m512bh
)__builtin_ia32_selectpbf_512(
267 (__mmask32
)__U
, (__v32bf
)_mm512_max_pbh(__A
, __B
),
268 (__v32bf
)_mm512_setzero_pbh());
271 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
_mm512_min_pbh(__m512bh __A
,
273 return (__m512bh
)__builtin_ia32_vminpbf16512((__v32bf
)__A
, (__v32bf
)__B
);
276 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
277 _mm512_mask_min_pbh(__m512bh __W
, __mmask32 __U
, __m512bh __A
, __m512bh __B
) {
278 return (__m512bh
)__builtin_ia32_selectpbf_512(
279 (__mmask32
)__U
, (__v32bf
)_mm512_min_pbh(__A
, __B
), (__v32bf
)__W
);
282 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
283 _mm512_maskz_min_pbh(__mmask32 __U
, __m512bh __A
, __m512bh __B
) {
284 return (__m512bh
)__builtin_ia32_selectpbf_512(
285 (__mmask32
)__U
, (__v32bf
)_mm512_min_pbh(__A
, __B
),
286 (__v32bf
)_mm512_setzero_pbh());
289 #define _mm512_cmp_pbh_mask(__A, __B, __P) \
290 ((__mmask32)__builtin_ia32_vcmppbf16512_mask((__v32bf)(__m512bh)(__A), \
291 (__v32bf)(__m512bh)(__B), \
292 (int)(__P), (__mmask32) - 1))
294 #define _mm512_mask_cmp_pbh_mask(__U, __A, __B, __P) \
295 ((__mmask32)__builtin_ia32_vcmppbf16512_mask((__v32bf)(__m512bh)(__A), \
296 (__v32bf)(__m512bh)(__B), \
297 (int)(__P), (__mmask32)(__U)))
299 #define _mm512_mask_fpclass_pbh_mask(__U, __A, imm) \
300 ((__mmask32)__builtin_ia32_vfpclasspbf16512_mask( \
301 (__v32bf)(__m512bh)(__A), (int)(imm), (__mmask32)(__U)))
303 #define _mm512_fpclass_pbh_mask(__A, imm) \
304 ((__mmask32)__builtin_ia32_vfpclasspbf16512_mask( \
305 (__v32bf)(__m512bh)(__A), (int)(imm), (__mmask32) - 1))
307 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
308 _mm512_scalef_pbh(__m512bh __A
, __m512bh __B
) {
309 return (__m512bh
)__builtin_ia32_vscalefpbf16512_mask(
310 (__v32bf
)__A
, (__v32bf
)__B
, (__v32bf
)_mm512_undefined_pbh(),
314 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
_mm512_mask_scalef_pbh(
315 __m512bh __W
, __mmask32 __U
, __m512bh __A
, __m512bh __B
) {
316 return (__m512bh
)__builtin_ia32_vscalefpbf16512_mask(
317 (__v32bf
)__A
, (__v32bf
)__B
, (__v32bf
)__W
, (__mmask32
)__U
);
320 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
321 _mm512_maskz_scalef_pbh(__mmask32 __U
, __m512bh __A
, __m512bh __B
) {
322 return (__m512bh
)__builtin_ia32_vscalefpbf16512_mask(
323 (__v32bf
)__A
, (__v32bf
)__B
, (__v32bf
)_mm512_setzero_pbh(),
327 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
_mm512_rcp_pbh(__m512bh __A
) {
328 return (__m512bh
)__builtin_ia32_vrcppbf16512_mask(
329 (__v32bf
)__A
, (__v32bf
)_mm512_undefined_pbh(), (__mmask32
)-1);
332 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
333 _mm512_mask_rcp_pbh(__m512bh __W
, __mmask32 __U
, __m512bh __A
) {
334 return (__m512bh
)__builtin_ia32_vrcppbf16512_mask((__v32bf
)__A
, (__v32bf
)__W
,
338 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
339 _mm512_maskz_rcp_pbh(__mmask32 __U
, __m512bh __A
) {
340 return (__m512bh
)__builtin_ia32_vrcppbf16512_mask(
341 (__v32bf
)__A
, (__v32bf
)_mm512_setzero_pbh(), (__mmask32
)__U
);
344 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
345 _mm512_getexp_pbh(__m512bh __A
) {
346 return (__m512bh
)__builtin_ia32_vgetexppbf16512_mask(
347 (__v32bf
)__A
, (__v32bf
)_mm512_undefined_pbh(), (__mmask32
)-1);
350 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
351 _mm512_mask_getexp_pbh(__m512bh __W
, __mmask32 __U
, __m512bh __A
) {
352 return (__m512bh
)__builtin_ia32_vgetexppbf16512_mask(
353 (__v32bf
)__A
, (__v32bf
)__W
, (__mmask32
)__U
);
356 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
357 _mm512_maskz_getexp_pbh(__mmask32 __U
, __m512bh __A
) {
358 return (__m512bh
)__builtin_ia32_vgetexppbf16512_mask(
359 (__v32bf
)__A
, (__v32bf
)_mm512_setzero_pbh(), (__mmask32
)__U
);
362 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
363 _mm512_rsqrt_pbh(__m512bh __A
) {
364 return (__m512bh
)__builtin_ia32_vrsqrtpbf16512_mask(
365 (__v32bf
)__A
, (__v32bf
)_mm512_undefined_pbh(), (__mmask32
)-1);
368 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
369 _mm512_mask_rsqrt_pbh(__m512bh __W
, __mmask32 __U
, __m512bh __A
) {
370 return (__m512bh
)__builtin_ia32_vrsqrtpbf16512_mask(
371 (__v32bf
)__A
, (__v32bf
)__W
, (__mmask32
)__U
);
374 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
375 _mm512_maskz_rsqrt_pbh(__mmask32 __U
, __m512bh __A
) {
376 return (__m512bh
)__builtin_ia32_vrsqrtpbf16512_mask(
377 (__v32bf
)__A
, (__v32bf
)_mm512_setzero_pbh(), (__mmask32
)__U
);
380 #define _mm512_reducene_pbh(__A, imm) \
381 ((__m512bh)__builtin_ia32_vreducenepbf16512_mask( \
382 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_undefined_pbh(), \
385 #define _mm512_mask_reducene_pbh(__W, __U, __A, imm) \
386 ((__m512bh)__builtin_ia32_vreducenepbf16512_mask( \
387 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)(__m512bh)(__W), \
390 #define _mm512_maskz_reducene_pbh(__U, __A, imm) \
391 ((__m512bh)__builtin_ia32_vreducenepbf16512_mask( \
392 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \
395 #define _mm512_roundscalene_pbh(__A, imm) \
396 ((__m512bh)__builtin_ia32_vrndscalenepbf16_mask( \
397 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \
400 #define _mm512_mask_roundscalene_pbh(__W, __U, __A, imm) \
401 ((__m512bh)__builtin_ia32_vrndscalenepbf16_mask( \
402 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)(__m512bh)(__W), \
405 #define _mm512_maskz_roundscalene_pbh(__U, __A, imm) \
406 ((__m512bh)__builtin_ia32_vrndscalenepbf16_mask( \
407 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \
410 #define _mm512_getmant_pbh(__A, __B, __C) \
411 ((__m512bh)__builtin_ia32_vgetmantpbf16512_mask( \
412 (__v32bf)(__m512bh)(__A), (int)(((__C) << 2) | (__B)), \
413 (__v32bf)_mm512_undefined_pbh(), (__mmask32) - 1))
415 #define _mm512_mask_getmant_pbh(__W, __U, __A, __B, __C) \
416 ((__m512bh)__builtin_ia32_vgetmantpbf16512_mask( \
417 (__v32bf)(__m512bh)(__A), (int)(((__C) << 2) | (__B)), \
418 (__v32bf)(__m512bh)(__W), (__mmask32)(__U)))
420 #define _mm512_maskz_getmant_pbh(__U, __A, __B, __C) \
421 ((__m512bh)__builtin_ia32_vgetmantpbf16512_mask( \
422 (__v32bf)(__m512bh)(__A), (int)(((__C) << 2) | (__B)), \
423 (__v32bf)_mm512_setzero_pbh(), (__mmask32)(__U)))
425 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
_mm512_sqrt_pbh(__m512bh __A
) {
426 return (__m512bh
)__builtin_ia32_vsqrtnepbf16512((__v32bf
)__A
);
429 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
430 _mm512_mask_sqrt_pbh(__m512bh __W
, __mmask32 __U
, __m512bh __A
) {
431 return (__m512bh
)__builtin_ia32_selectpbf_512(
432 (__mmask32
)__U
, (__v32bf
)_mm512_sqrt_pbh(__A
), (__v32bf
)__W
);
435 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
436 _mm512_maskz_sqrt_pbh(__mmask32 __U
, __m512bh __A
) {
437 return (__m512bh
)__builtin_ia32_selectpbf_512((__mmask32
)__U
,
438 (__v32bf
)_mm512_sqrt_pbh(__A
),
439 (__v32bf
)_mm512_setzero_pbh());
442 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
443 _mm512_fmaddne_pbh(__m512bh __A
, __m512bh __B
, __m512bh __C
) {
444 return (__m512bh
)__builtin_ia32_vfmaddnepbh512((__v32bf
)__A
, (__v32bf
)__B
,
448 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
_mm512_mask_fmaddne_pbh(
449 __m512bh __A
, __mmask32 __U
, __m512bh __B
, __m512bh __C
) {
450 return (__m512bh
)__builtin_ia32_selectpbf_512(
452 _mm512_fmaddne_pbh((__v32bf
)__A
, (__v32bf
)__B
, (__v32bf
)__C
),
456 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
_mm512_mask3_fmaddne_pbh(
457 __m512bh __A
, __m512bh __B
, __m512bh __C
, __mmask32 __U
) {
458 return (__m512bh
)__builtin_ia32_selectpbf_512(
460 _mm512_fmaddne_pbh((__v32bf
)__A
, (__v32bf
)__B
, (__v32bf
)__C
),
464 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
_mm512_maskz_fmaddne_pbh(
465 __mmask32 __U
, __m512bh __A
, __m512bh __B
, __m512bh __C
) {
466 return (__m512bh
)__builtin_ia32_selectpbf_512(
468 _mm512_fmaddne_pbh((__v32bf
)__A
, (__v32bf
)__B
, (__v32bf
)__C
),
469 (__v32bf
)_mm512_setzero_pbh());
472 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
473 _mm512_fmsubne_pbh(__m512bh __A
, __m512bh __B
, __m512bh __C
) {
474 return (__m512bh
)__builtin_ia32_vfmaddnepbh512((__v32bf
)__A
, (__v32bf
)__B
,
478 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
_mm512_mask_fmsubne_pbh(
479 __m512bh __A
, __mmask32 __U
, __m512bh __B
, __m512bh __C
) {
480 return (__m512bh
)__builtin_ia32_selectpbf_512(
482 _mm512_fmsubne_pbh((__v32bf
)__A
, (__v32bf
)__B
, (__v32bf
)__C
),
486 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
_mm512_mask3_fmsubne_pbh(
487 __m512bh __A
, __m512bh __B
, __m512bh __C
, __mmask32 __U
) {
488 return (__m512bh
)__builtin_ia32_selectpbf_512(
490 _mm512_fmsubne_pbh((__v32bf
)__A
, (__v32bf
)__B
, (__v32bf
)__C
),
494 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
_mm512_maskz_fmsubne_pbh(
495 __mmask32 __U
, __m512bh __A
, __m512bh __B
, __m512bh __C
) {
496 return (__m512bh
)__builtin_ia32_selectpbf_512(
498 _mm512_fmsubne_pbh((__v32bf
)__A
, (__v32bf
)__B
, (__v32bf
)__C
),
499 (__v32bf
)_mm512_setzero_pbh());
502 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
503 _mm512_fnmaddne_pbh(__m512bh __A
, __m512bh __B
, __m512bh __C
) {
504 return (__m512bh
)__builtin_ia32_vfmaddnepbh512((__v32bf
)__A
, -(__v32bf
)__B
,
508 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
_mm512_mask_fnmaddne_pbh(
509 __m512bh __A
, __mmask32 __U
, __m512bh __B
, __m512bh __C
) {
510 return (__m512bh
)__builtin_ia32_selectpbf_512(
512 _mm512_fnmaddne_pbh((__v32bf
)__A
, (__v32bf
)__B
, (__v32bf
)__C
),
516 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
_mm512_mask3_fnmaddne_pbh(
517 __m512bh __A
, __m512bh __B
, __m512bh __C
, __mmask32 __U
) {
518 return (__m512bh
)__builtin_ia32_selectpbf_512(
520 _mm512_fnmaddne_pbh((__v32bf
)__A
, (__v32bf
)__B
, (__v32bf
)__C
),
524 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
_mm512_maskz_fnmaddne_pbh(
525 __mmask32 __U
, __m512bh __A
, __m512bh __B
, __m512bh __C
) {
526 return (__m512bh
)__builtin_ia32_selectpbf_512(
528 _mm512_fnmaddne_pbh((__v32bf
)__A
, (__v32bf
)__B
, (__v32bf
)__C
),
529 (__v32bf
)_mm512_setzero_pbh());
532 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
533 _mm512_fnmsubne_pbh(__m512bh __A
, __m512bh __B
, __m512bh __C
) {
534 return (__m512bh
)__builtin_ia32_vfmaddnepbh512((__v32bf
)__A
, -(__v32bf
)__B
,
538 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
_mm512_mask_fnmsubne_pbh(
539 __m512bh __A
, __mmask32 __U
, __m512bh __B
, __m512bh __C
) {
540 return (__m512bh
)__builtin_ia32_selectpbf_512(
542 _mm512_fnmsubne_pbh((__v32bf
)__A
, (__v32bf
)__B
, (__v32bf
)__C
),
546 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
_mm512_mask3_fnmsubne_pbh(
547 __m512bh __A
, __m512bh __B
, __m512bh __C
, __mmask32 __U
) {
548 return (__m512bh
)__builtin_ia32_selectpbf_512(
550 _mm512_fnmsubne_pbh((__v32bf
)__A
, (__v32bf
)__B
, (__v32bf
)__C
),
554 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
_mm512_maskz_fnmsubne_pbh(
555 __mmask32 __U
, __m512bh __A
, __m512bh __B
, __m512bh __C
) {
556 return (__m512bh
)__builtin_ia32_selectpbf_512(
558 _mm512_fnmsubne_pbh((__v32bf
)__A
, (__v32bf
)__B
, (__v32bf
)__C
),
559 (__v32bf
)_mm512_setzero_pbh());
562 #undef __DEFAULT_FN_ATTRS512