[SDAG] Use BatchAAResults for querying alias analysis (AA) results (#123934)
[llvm-project.git] / clang / lib / Headers / avx10_2_512bf16intrin.h
blob392b7ae770c5b5dd6d9cd9b2ff7d8d67fb03475b
1 /*===----------- avx10_2_512bf16intrin.h - AVX10-BF16 intrinsics ---------===
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *===-----------------------------------------------------------------------===
8 */
9 #ifndef __IMMINTRIN_H
10 #error \
11 "Never use <avx10_2_512bf16intrin.h> directly; include <immintrin.h> instead."
12 #endif
14 #ifdef __SSE2__
16 #ifndef __AVX10_2_512BF16INTRIN_H
17 #define __AVX10_2_512BF16INTRIN_H
19 /* Define the default attributes for the functions in this file. */
20 typedef __bf16 __m512bh_u __attribute__((__vector_size__(64), __aligned__(1)));
22 /* Define the default attributes for the functions in this file. */
23 #define __DEFAULT_FN_ATTRS512 \
24 __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-512"), \
25 __min_vector_width__(512)))
27 static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_setzero_pbh(void) {
28 return __builtin_bit_cast(__m512bh, _mm512_setzero_ps());
31 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_undefined_pbh(void) {
32 return (__m512bh)__builtin_ia32_undef512();
35 static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_set1_pbh(__bf16 bf) {
36 return (__m512bh)(__v32bf){bf, bf, bf, bf, bf, bf, bf, bf, bf, bf, bf,
37 bf, bf, bf, bf, bf, bf, bf, bf, bf, bf, bf,
38 bf, bf, bf, bf, bf, bf, bf, bf, bf, bf};
41 static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_set_pbh(
42 __bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4, __bf16 bf5, __bf16 bf6,
43 __bf16 bf7, __bf16 bf8, __bf16 bf9, __bf16 bf10, __bf16 bf11, __bf16 bf12,
44 __bf16 bf13, __bf16 bf14, __bf16 bf15, __bf16 bf16, __bf16 bf17,
45 __bf16 bf18, __bf16 bf19, __bf16 bf20, __bf16 bf21, __bf16 bf22,
46 __bf16 bf23, __bf16 bf24, __bf16 bf25, __bf16 bf26, __bf16 bf27,
47 __bf16 bf28, __bf16 bf29, __bf16 bf30, __bf16 bf31, __bf16 bf32) {
48 return (__m512bh)(__v32bf){bf32, bf31, bf30, bf29, bf28, bf27, bf26, bf25,
49 bf24, bf23, bf22, bf21, bf20, bf19, bf18, bf17,
50 bf16, bf15, bf14, bf13, bf12, bf11, bf10, bf9,
51 bf8, bf7, bf6, bf5, bf4, bf3, bf2, bf1};
54 #define _mm512_setr_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8, bf9, bf10, \
55 bf11, bf12, bf13, bf14, bf15, bf16, bf17, bf18, bf19, \
56 bf20, bf21, bf22, bf23, bf24, bf25, bf26, bf27, bf28, \
57 bf29, bf30, bf31, bf32) \
58 _mm512_set_pbh((bf32), (bf31), (bf30), (bf29), (bf28), (bf27), (bf26), \
59 (bf25), (bf24), (bf23), (bf22), (bf21), (bf20), (bf19), \
60 (bf18), (bf17), (bf16), (bf15), (bf14), (bf13), (bf12), \
61 (bf11), (bf10), (bf9), (bf8), (bf7), (bf6), (bf5), (bf4), \
62 (bf3), (bf2), (bf1))
64 static __inline__ __m512 __DEFAULT_FN_ATTRS512
65 _mm512_castpbf16_ps(__m512bh __a) {
66 return (__m512)__a;
69 static __inline__ __m512d __DEFAULT_FN_ATTRS512
70 _mm512_castpbf16_pd(__m512bh __a) {
71 return (__m512d)__a;
74 static __inline__ __m512i __DEFAULT_FN_ATTRS512
75 _mm512_castpbf16_si512(__m512bh __a) {
76 return (__m512i)__a;
79 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_castps_pbh(__m512 __a) {
80 return (__m512bh)__a;
83 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
84 _mm512_castpd_pbh(__m512d __a) {
85 return (__m512bh)__a;
88 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
89 _mm512_castsi512_pbh(__m512i __a) {
90 return (__m512bh)__a;
93 static __inline__ __m128bh __DEFAULT_FN_ATTRS512
94 _mm512_castpbf16512_pbh128(__m512bh __a) {
95 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7);
98 static __inline__ __m256bh __DEFAULT_FN_ATTRS512
99 _mm512_castpbf16512_pbh256(__m512bh __a) {
100 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
101 12, 13, 14, 15);
104 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
105 _mm512_castpbf16128_pbh512(__m128bh __a) {
106 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1,
107 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
108 -1, -1, -1, -1, -1, -1, -1, -1, -1);
111 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
112 _mm512_castpbf16256_pbh512(__m256bh __a) {
113 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
114 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1,
115 -1, -1, -1, -1, -1, -1, -1, -1);
118 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
119 _mm512_zextpbf16128_pbh512(__m128bh __a) {
120 return __builtin_shufflevector(
121 __a, (__v8bf)_mm_setzero_pbh(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
122 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15);
125 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
126 _mm512_zextpbf16256_pbh512(__m256bh __a) {
127 return __builtin_shufflevector(__a, (__v16bf)_mm256_setzero_pbh(), 0, 1, 2, 3,
128 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
129 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
130 29, 30, 31);
133 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_abs_pbh(__m512bh __A) {
134 return (__m512bh)_mm512_and_epi32(_mm512_set1_epi32(0x7FFF7FFF),
135 (__m512i)__A);
138 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
139 _mm512_load_pbh(void const *__p) {
140 return *(const __m512bh *)__p;
143 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
144 _mm512_loadu_pbh(void const *__p) {
145 struct __loadu_pbh {
146 __m512bh_u __v;
147 } __attribute__((__packed__, __may_alias__));
148 return ((const struct __loadu_pbh *)__p)->__v;
151 static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_store_pbh(void *__P,
152 __m512bh __A) {
153 *(__m512bh *)__P = __A;
156 static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_storeu_pbh(void *__P,
157 __m512bh __A) {
158 struct __storeu_pbh {
159 __m512bh_u __v;
160 } __attribute__((__packed__, __may_alias__));
161 ((struct __storeu_pbh *)__P)->__v = __A;
164 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
165 _mm512_mask_blend_pbh(__mmask32 __U, __m512bh __A, __m512bh __W) {
166 return (__m512bh)__builtin_ia32_selectpbf_512((__mmask32)__U, (__v32bf)__W,
167 (__v32bf)__A);
170 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
171 _mm512_permutex2var_pbh(__m512bh __A, __m512i __I, __m512bh __B) {
172 return (__m512bh)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I,
173 (__v32hi)__B);
176 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
177 _mm512_permutexvar_pbh(__m512i __A, __m512bh __B) {
178 return (__m512bh)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);
181 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
182 _mm512_addne_pbh(__m512bh __A, __m512bh __B) {
183 return (__m512bh)((__v32bf)__A + (__v32bf)__B);
186 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
187 _mm512_mask_addne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
188 return (__m512bh)__builtin_ia32_selectpbf_512(
189 (__mmask32)__U, (__v32bf)_mm512_addne_pbh(__A, __B), (__v32bf)__W);
192 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
193 _mm512_maskz_addne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
194 return (__m512bh)__builtin_ia32_selectpbf_512(
195 (__mmask32)__U, (__v32bf)_mm512_addne_pbh(__A, __B),
196 (__v32bf)_mm512_setzero_pbh());
199 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
200 _mm512_subne_pbh(__m512bh __A, __m512bh __B) {
201 return (__m512bh)((__v32bf)__A - (__v32bf)__B);
204 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
205 _mm512_mask_subne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
206 return (__m512bh)__builtin_ia32_selectpbf_512(
207 (__mmask32)__U, (__v32bf)_mm512_subne_pbh(__A, __B), (__v32bf)__W);
210 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
211 _mm512_maskz_subne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
212 return (__m512bh)__builtin_ia32_selectpbf_512(
213 (__mmask32)__U, (__v32bf)_mm512_subne_pbh(__A, __B),
214 (__v32bf)_mm512_setzero_pbh());
217 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
218 _mm512_mulne_pbh(__m512bh __A, __m512bh __B) {
219 return (__m512bh)((__v32bf)__A * (__v32bf)__B);
222 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
223 _mm512_mask_mulne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
224 return (__m512bh)__builtin_ia32_selectpbf_512(
225 (__mmask32)__U, (__v32bf)_mm512_mulne_pbh(__A, __B), (__v32bf)__W);
228 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
229 _mm512_maskz_mulne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
230 return (__m512bh)__builtin_ia32_selectpbf_512(
231 (__mmask32)__U, (__v32bf)_mm512_mulne_pbh(__A, __B),
232 (__v32bf)_mm512_setzero_pbh());
235 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
236 _mm512_divne_pbh(__m512bh __A, __m512bh __B) {
237 return (__m512bh)((__v32bf)__A / (__v32bf)__B);
240 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
241 _mm512_mask_divne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
242 return (__m512bh)__builtin_ia32_selectpbf_512(
243 (__mmask32)__U, (__v32bf)_mm512_divne_pbh(__A, __B), (__v32bf)__W);
246 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
247 _mm512_maskz_divne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
248 return (__m512bh)__builtin_ia32_selectpbf_512(
249 (__mmask32)__U, (__v32bf)_mm512_divne_pbh(__A, __B),
250 (__v32bf)_mm512_setzero_pbh());
253 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_max_pbh(__m512bh __A,
254 __m512bh __B) {
255 return (__m512bh)__builtin_ia32_vmaxpbf16512((__v32bf)__A, (__v32bf)__B);
258 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
259 _mm512_mask_max_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
260 return (__m512bh)__builtin_ia32_selectpbf_512(
261 (__mmask32)__U, (__v32bf)_mm512_max_pbh(__A, __B), (__v32bf)__W);
264 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
265 _mm512_maskz_max_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
266 return (__m512bh)__builtin_ia32_selectpbf_512(
267 (__mmask32)__U, (__v32bf)_mm512_max_pbh(__A, __B),
268 (__v32bf)_mm512_setzero_pbh());
271 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_min_pbh(__m512bh __A,
272 __m512bh __B) {
273 return (__m512bh)__builtin_ia32_vminpbf16512((__v32bf)__A, (__v32bf)__B);
276 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
277 _mm512_mask_min_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
278 return (__m512bh)__builtin_ia32_selectpbf_512(
279 (__mmask32)__U, (__v32bf)_mm512_min_pbh(__A, __B), (__v32bf)__W);
282 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
283 _mm512_maskz_min_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
284 return (__m512bh)__builtin_ia32_selectpbf_512(
285 (__mmask32)__U, (__v32bf)_mm512_min_pbh(__A, __B),
286 (__v32bf)_mm512_setzero_pbh());
289 #define _mm512_cmp_pbh_mask(__A, __B, __P) \
290 ((__mmask32)__builtin_ia32_vcmppbf16512_mask((__v32bf)(__m512bh)(__A), \
291 (__v32bf)(__m512bh)(__B), \
292 (int)(__P), (__mmask32) - 1))
294 #define _mm512_mask_cmp_pbh_mask(__U, __A, __B, __P) \
295 ((__mmask32)__builtin_ia32_vcmppbf16512_mask((__v32bf)(__m512bh)(__A), \
296 (__v32bf)(__m512bh)(__B), \
297 (int)(__P), (__mmask32)(__U)))
299 #define _mm512_mask_fpclass_pbh_mask(__U, __A, imm) \
300 ((__mmask32)__builtin_ia32_vfpclasspbf16512_mask( \
301 (__v32bf)(__m512bh)(__A), (int)(imm), (__mmask32)(__U)))
303 #define _mm512_fpclass_pbh_mask(__A, imm) \
304 ((__mmask32)__builtin_ia32_vfpclasspbf16512_mask( \
305 (__v32bf)(__m512bh)(__A), (int)(imm), (__mmask32) - 1))
307 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
308 _mm512_scalef_pbh(__m512bh __A, __m512bh __B) {
309 return (__m512bh)__builtin_ia32_vscalefpbf16512_mask(
310 (__v32bf)__A, (__v32bf)__B, (__v32bf)_mm512_undefined_pbh(),
311 (__mmask32)-1);
314 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_pbh(
315 __m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) {
316 return (__m512bh)__builtin_ia32_vscalefpbf16512_mask(
317 (__v32bf)__A, (__v32bf)__B, (__v32bf)__W, (__mmask32)__U);
320 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
321 _mm512_maskz_scalef_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) {
322 return (__m512bh)__builtin_ia32_vscalefpbf16512_mask(
323 (__v32bf)__A, (__v32bf)__B, (__v32bf)_mm512_setzero_pbh(),
324 (__mmask32)__U);
327 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_rcp_pbh(__m512bh __A) {
328 return (__m512bh)__builtin_ia32_vrcppbf16512_mask(
329 (__v32bf)__A, (__v32bf)_mm512_undefined_pbh(), (__mmask32)-1);
332 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
333 _mm512_mask_rcp_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
334 return (__m512bh)__builtin_ia32_vrcppbf16512_mask((__v32bf)__A, (__v32bf)__W,
335 (__mmask32)__U);
338 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
339 _mm512_maskz_rcp_pbh(__mmask32 __U, __m512bh __A) {
340 return (__m512bh)__builtin_ia32_vrcppbf16512_mask(
341 (__v32bf)__A, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U);
344 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
345 _mm512_getexp_pbh(__m512bh __A) {
346 return (__m512bh)__builtin_ia32_vgetexppbf16512_mask(
347 (__v32bf)__A, (__v32bf)_mm512_undefined_pbh(), (__mmask32)-1);
350 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
351 _mm512_mask_getexp_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
352 return (__m512bh)__builtin_ia32_vgetexppbf16512_mask(
353 (__v32bf)__A, (__v32bf)__W, (__mmask32)__U);
356 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
357 _mm512_maskz_getexp_pbh(__mmask32 __U, __m512bh __A) {
358 return (__m512bh)__builtin_ia32_vgetexppbf16512_mask(
359 (__v32bf)__A, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U);
362 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
363 _mm512_rsqrt_pbh(__m512bh __A) {
364 return (__m512bh)__builtin_ia32_vrsqrtpbf16512_mask(
365 (__v32bf)__A, (__v32bf)_mm512_undefined_pbh(), (__mmask32)-1);
368 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
369 _mm512_mask_rsqrt_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
370 return (__m512bh)__builtin_ia32_vrsqrtpbf16512_mask(
371 (__v32bf)__A, (__v32bf)__W, (__mmask32)__U);
374 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
375 _mm512_maskz_rsqrt_pbh(__mmask32 __U, __m512bh __A) {
376 return (__m512bh)__builtin_ia32_vrsqrtpbf16512_mask(
377 (__v32bf)__A, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U);
380 #define _mm512_reducene_pbh(__A, imm) \
381 ((__m512bh)__builtin_ia32_vreducenepbf16512_mask( \
382 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_undefined_pbh(), \
383 (__mmask32) - 1))
385 #define _mm512_mask_reducene_pbh(__W, __U, __A, imm) \
386 ((__m512bh)__builtin_ia32_vreducenepbf16512_mask( \
387 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)(__m512bh)(__W), \
388 (__mmask32)(__U)))
390 #define _mm512_maskz_reducene_pbh(__U, __A, imm) \
391 ((__m512bh)__builtin_ia32_vreducenepbf16512_mask( \
392 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \
393 (__mmask32)(__U)))
395 #define _mm512_roundscalene_pbh(__A, imm) \
396 ((__m512bh)__builtin_ia32_vrndscalenepbf16_mask( \
397 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \
398 (__mmask32) - 1))
400 #define _mm512_mask_roundscalene_pbh(__W, __U, __A, imm) \
401 ((__m512bh)__builtin_ia32_vrndscalenepbf16_mask( \
402 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)(__m512bh)(__W), \
403 (__mmask32)(__U)))
405 #define _mm512_maskz_roundscalene_pbh(__U, __A, imm) \
406 ((__m512bh)__builtin_ia32_vrndscalenepbf16_mask( \
407 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \
408 (__mmask32)(__U)))
410 #define _mm512_getmant_pbh(__A, __B, __C) \
411 ((__m512bh)__builtin_ia32_vgetmantpbf16512_mask( \
412 (__v32bf)(__m512bh)(__A), (int)(((__C) << 2) | (__B)), \
413 (__v32bf)_mm512_undefined_pbh(), (__mmask32) - 1))
415 #define _mm512_mask_getmant_pbh(__W, __U, __A, __B, __C) \
416 ((__m512bh)__builtin_ia32_vgetmantpbf16512_mask( \
417 (__v32bf)(__m512bh)(__A), (int)(((__C) << 2) | (__B)), \
418 (__v32bf)(__m512bh)(__W), (__mmask32)(__U)))
420 #define _mm512_maskz_getmant_pbh(__U, __A, __B, __C) \
421 ((__m512bh)__builtin_ia32_vgetmantpbf16512_mask( \
422 (__v32bf)(__m512bh)(__A), (int)(((__C) << 2) | (__B)), \
423 (__v32bf)_mm512_setzero_pbh(), (__mmask32)(__U)))
425 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_sqrt_pbh(__m512bh __A) {
426 return (__m512bh)__builtin_ia32_vsqrtnepbf16512((__v32bf)__A);
429 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
430 _mm512_mask_sqrt_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) {
431 return (__m512bh)__builtin_ia32_selectpbf_512(
432 (__mmask32)__U, (__v32bf)_mm512_sqrt_pbh(__A), (__v32bf)__W);
435 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
436 _mm512_maskz_sqrt_pbh(__mmask32 __U, __m512bh __A) {
437 return (__m512bh)__builtin_ia32_selectpbf_512((__mmask32)__U,
438 (__v32bf)_mm512_sqrt_pbh(__A),
439 (__v32bf)_mm512_setzero_pbh());
442 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
443 _mm512_fmaddne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
444 return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, (__v32bf)__B,
445 (__v32bf)__C);
448 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddne_pbh(
449 __m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
450 return (__m512bh)__builtin_ia32_selectpbf_512(
451 (__mmask32)__U,
452 _mm512_fmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
453 (__v32bf)__A);
456 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddne_pbh(
457 __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
458 return (__m512bh)__builtin_ia32_selectpbf_512(
459 (__mmask32)__U,
460 _mm512_fmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
461 (__v32bf)__C);
464 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddne_pbh(
465 __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
466 return (__m512bh)__builtin_ia32_selectpbf_512(
467 (__mmask32)__U,
468 _mm512_fmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
469 (__v32bf)_mm512_setzero_pbh());
472 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
473 _mm512_fmsubne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
474 return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, (__v32bf)__B,
475 -(__v32bf)__C);
478 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubne_pbh(
479 __m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
480 return (__m512bh)__builtin_ia32_selectpbf_512(
481 (__mmask32)__U,
482 _mm512_fmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
483 (__v32bf)__A);
486 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubne_pbh(
487 __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
488 return (__m512bh)__builtin_ia32_selectpbf_512(
489 (__mmask32)__U,
490 _mm512_fmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
491 (__v32bf)__C);
494 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubne_pbh(
495 __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
496 return (__m512bh)__builtin_ia32_selectpbf_512(
497 (__mmask32)__U,
498 _mm512_fmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
499 (__v32bf)_mm512_setzero_pbh());
502 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
503 _mm512_fnmaddne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
504 return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, -(__v32bf)__B,
505 (__v32bf)__C);
508 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmaddne_pbh(
509 __m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
510 return (__m512bh)__builtin_ia32_selectpbf_512(
511 (__mmask32)__U,
512 _mm512_fnmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
513 (__v32bf)__A);
516 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmaddne_pbh(
517 __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
518 return (__m512bh)__builtin_ia32_selectpbf_512(
519 (__mmask32)__U,
520 _mm512_fnmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
521 (__v32bf)__C);
524 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmaddne_pbh(
525 __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
526 return (__m512bh)__builtin_ia32_selectpbf_512(
527 (__mmask32)__U,
528 _mm512_fnmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
529 (__v32bf)_mm512_setzero_pbh());
532 static __inline__ __m512bh __DEFAULT_FN_ATTRS512
533 _mm512_fnmsubne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) {
534 return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, -(__v32bf)__B,
535 -(__v32bf)__C);
538 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsubne_pbh(
539 __m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) {
540 return (__m512bh)__builtin_ia32_selectpbf_512(
541 (__mmask32)__U,
542 _mm512_fnmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
543 (__v32bf)__A);
546 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsubne_pbh(
547 __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) {
548 return (__m512bh)__builtin_ia32_selectpbf_512(
549 (__mmask32)__U,
550 _mm512_fnmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
551 (__v32bf)__C);
554 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsubne_pbh(
555 __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) {
556 return (__m512bh)__builtin_ia32_selectpbf_512(
557 (__mmask32)__U,
558 _mm512_fnmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C),
559 (__v32bf)_mm512_setzero_pbh());
562 #undef __DEFAULT_FN_ATTRS512
564 #endif
565 #endif