1 /*===--------------- avx10_2convertintrin.h - AVX10_2CONVERT ---------------===
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *===-----------------------------------------------------------------------===
11 "Never use <avx10_2convertintrin.h> directly; include <immintrin.h> instead."
12 #endif // __IMMINTRIN_H
16 #ifndef __AVX10_2CONVERTINTRIN_H
17 #define __AVX10_2CONVERTINTRIN_H
19 /* Define the default attributes for the functions in this file. */
20 #define __DEFAULT_FN_ATTRS128 \
21 __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \
22 __min_vector_width__(128)))
23 #define __DEFAULT_FN_ATTRS256 \
24 __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \
25 __min_vector_width__(256)))
27 static __inline__ __m128h __DEFAULT_FN_ATTRS128
_mm_cvtx2ps_ph(__m128 __A
,
29 return (__m128h
)__builtin_ia32_vcvt2ps2phx128_mask(
30 (__v4sf
)__A
, (__v4sf
)__B
, (__v8hf
)_mm_setzero_ph(), (__mmask8
)(-1));
33 static __inline__ __m128h __DEFAULT_FN_ATTRS128
34 _mm_mask_cvtx2ps_ph(__m128h __W
, __mmask8 __U
, __m128 __A
, __m128 __B
) {
35 return (__m128h
)__builtin_ia32_vcvt2ps2phx128_mask(
36 (__v4sf
)__A
, (__v4sf
)__B
, (__v8hf
)__W
, (__mmask8
)__U
);
39 static __inline__ __m128h __DEFAULT_FN_ATTRS128
40 _mm_maskz_cvtx2ps_ph(__mmask8 __U
, __m128 __A
, __m128 __B
) {
41 return (__m128h
)__builtin_ia32_vcvt2ps2phx128_mask(
42 (__v4sf
)__A
, (__v4sf
)__B
, (__v8hf
)_mm_setzero_ph(), (__mmask8
)__U
);
45 static __inline__ __m256h __DEFAULT_FN_ATTRS256
_mm256_cvtx2ps_ph(__m256 __A
,
47 return (__m256h
)__builtin_ia32_vcvt2ps2phx256_mask(
48 (__v8sf
)__A
, (__v8sf
)__B
, (__v16hf
)_mm256_setzero_ph(), (__mmask16
)(-1),
49 _MM_FROUND_CUR_DIRECTION
);
52 static __inline__ __m256h __DEFAULT_FN_ATTRS256
53 _mm256_mask_cvtx2ps_ph(__m256h __W
, __mmask16 __U
, __m256 __A
, __m256 __B
) {
54 return (__m256h
)__builtin_ia32_vcvt2ps2phx256_mask(
55 (__v8sf
)__A
, (__v8sf
)__B
, (__v16hf
)__W
, (__mmask16
)__U
,
56 _MM_FROUND_CUR_DIRECTION
);
59 static __inline__ __m256h __DEFAULT_FN_ATTRS256
60 _mm256_maskz_cvtx2ps_ph(__mmask16 __U
, __m256 __A
, __m256 __B
) {
61 return (__m256h
)__builtin_ia32_vcvt2ps2phx256_mask(
62 (__v8sf
)__A
, (__v8sf
)__B
, (__v16hf
)_mm256_setzero_ph(), (__mmask16
)__U
,
63 _MM_FROUND_CUR_DIRECTION
);
66 #define _mm256_cvtx_round2ps_ph(A, B, R) \
67 ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \
68 (__v8sf)(A), (__v8sf)(B), (__v16hf)_mm256_undefined_ph(), \
69 (__mmask16)(-1), (const int)(R)))
71 #define _mm256_mask_cvtx_round2ps_ph(W, U, A, B, R) \
72 ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \
73 (__v8sf)(A), (__v8sf)(B), (__v16hf)(W), (__mmask16)(U), (const int)(R)))
75 #define _mm256_maskz_cvtx_round2ps_ph(U, A, B, R) \
76 ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \
77 (__v8sf)(A), (__v8sf)(B), (__v16hf)(_mm256_setzero_ph()), \
78 (__mmask16)(U), (const int)(R)))
80 static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_cvtbiasph_bf8(__m128i __A
,
82 return (__m128i
)__builtin_ia32_vcvtbiasph2bf8_128_mask(
83 (__v16qi
)__A
, (__v8hf
)__B
, (__v16qi
)_mm_undefined_si128(), (__mmask8
)-1);
86 static __inline__ __m128i __DEFAULT_FN_ATTRS128
87 _mm_mask_cvtbiasph_bf8(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128h __B
) {
88 return (__m128i
)__builtin_ia32_vcvtbiasph2bf8_128_mask(
89 (__v16qi
)__A
, (__v8hf
)__B
, (__v16qi
)(__m128i
)__W
, (__mmask8
)__U
);
92 static __inline__ __m128i __DEFAULT_FN_ATTRS128
93 _mm_maskz_cvtbiasph_bf8(__mmask8 __U
, __m128i __A
, __m128h __B
) {
94 return (__m128i
)__builtin_ia32_vcvtbiasph2bf8_128_mask(
95 (__v16qi
)__A
, (__v8hf
)__B
, (__v16qi
)(__m128i
)_mm_setzero_si128(),
99 static __inline__ __m128i __DEFAULT_FN_ATTRS256
100 _mm256_cvtbiasph_bf8(__m256i __A
, __m256h __B
) {
101 return (__m128i
)__builtin_ia32_vcvtbiasph2bf8_256_mask(
102 (__v32qi
)__A
, (__v16hf
)__B
, (__v16qi
)(__m128i
)_mm_undefined_si128(),
106 static __inline__ __m128i __DEFAULT_FN_ATTRS256
_mm256_mask_cvtbiasph_bf8(
107 __m128i __W
, __mmask16 __U
, __m256i __A
, __m256h __B
) {
108 return (__m128i
)__builtin_ia32_vcvtbiasph2bf8_256_mask(
109 (__v32qi
)__A
, (__v16hf
)__B
, (__v16qi
)(__m128i
)__W
, (__mmask16
)__U
);
112 static __inline__ __m128i __DEFAULT_FN_ATTRS256
113 _mm256_maskz_cvtbiasph_bf8(__mmask16 __U
, __m256i __A
, __m256h __B
) {
114 return (__m128i
)__builtin_ia32_vcvtbiasph2bf8_256_mask(
115 (__v32qi
)__A
, (__v16hf
)__B
, (__v16qi
)(__m128i
)_mm_setzero_si128(),
119 static __inline__ __m128i __DEFAULT_FN_ATTRS128
120 _mm_cvtbiassph_bf8(__m128i __A
, __m128h __B
) {
121 return (__m128i
)__builtin_ia32_vcvtbiasph2bf8s_128_mask(
122 (__v16qi
)__A
, (__v8hf
)__B
, (__v16qi
)_mm_undefined_si128(), (__mmask8
)-1);
125 static __inline__ __m128i __DEFAULT_FN_ATTRS128
126 _mm_mask_cvtbiassph_bf8(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128h __B
) {
127 return (__m128i
)__builtin_ia32_vcvtbiasph2bf8s_128_mask(
128 (__v16qi
)__A
, (__v8hf
)__B
, (__v16qi
)(__m128i
)__W
, (__mmask8
)__U
);
131 static __inline__ __m128i __DEFAULT_FN_ATTRS128
132 _mm_maskz_cvtbiassph_bf8(__mmask8 __U
, __m128i __A
, __m128h __B
) {
133 return (__m128i
)__builtin_ia32_vcvtbiasph2bf8s_128_mask(
134 (__v16qi
)__A
, (__v8hf
)__B
, (__v16qi
)(__m128i
)_mm_setzero_si128(),
138 static __inline__ __m128i __DEFAULT_FN_ATTRS256
139 _mm256_cvtbiassph_bf8(__m256i __A
, __m256h __B
) {
140 return (__m128i
)__builtin_ia32_vcvtbiasph2bf8s_256_mask(
141 (__v32qi
)__A
, (__v16hf
)__B
, (__v16qi
)(__m128i
)_mm_undefined_si128(),
145 static __inline__ __m128i __DEFAULT_FN_ATTRS256
_mm256_mask_cvtbiassph_bf8(
146 __m128i __W
, __mmask16 __U
, __m256i __A
, __m256h __B
) {
147 return (__m128i
)__builtin_ia32_vcvtbiasph2bf8s_256_mask(
148 (__v32qi
)__A
, (__v16hf
)__B
, (__v16qi
)(__m128i
)__W
, (__mmask16
)__U
);
151 static __inline__ __m128i __DEFAULT_FN_ATTRS256
152 _mm256_maskz_cvtbiassph_bf8(__mmask16 __U
, __m256i __A
, __m256h __B
) {
153 return (__m128i
)__builtin_ia32_vcvtbiasph2bf8s_256_mask(
154 (__v32qi
)__A
, (__v16hf
)__B
, (__v16qi
)(__m128i
)_mm_setzero_si128(),
158 static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_cvtbiasph_hf8(__m128i __A
,
160 return (__m128i
)__builtin_ia32_vcvtbiasph2hf8_128_mask(
161 (__v16qi
)__A
, (__v8hf
)__B
, (__v16qi
)_mm_undefined_si128(), (__mmask8
)-1);
164 static __inline__ __m128i __DEFAULT_FN_ATTRS128
165 _mm_mask_cvtbiasph_hf8(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128h __B
) {
166 return (__m128i
)__builtin_ia32_vcvtbiasph2hf8_128_mask(
167 (__v16qi
)__A
, (__v8hf
)__B
, (__v16qi
)(__m128i
)__W
, (__mmask8
)__U
);
170 static __inline__ __m128i __DEFAULT_FN_ATTRS128
171 _mm_maskz_cvtbiasph_hf8(__mmask8 __U
, __m128i __A
, __m128h __B
) {
172 return (__m128i
)__builtin_ia32_vcvtbiasph2hf8_128_mask(
173 (__v16qi
)__A
, (__v8hf
)__B
, (__v16qi
)(__m128i
)_mm_setzero_si128(),
177 static __inline__ __m128i __DEFAULT_FN_ATTRS256
178 _mm256_cvtbiasph_hf8(__m256i __A
, __m256h __B
) {
179 return (__m128i
)__builtin_ia32_vcvtbiasph2hf8_256_mask(
180 (__v32qi
)__A
, (__v16hf
)__B
, (__v16qi
)(__m128i
)_mm_undefined_si128(),
184 static __inline__ __m128i __DEFAULT_FN_ATTRS256
_mm256_mask_cvtbiasph_hf8(
185 __m128i __W
, __mmask16 __U
, __m256i __A
, __m256h __B
) {
186 return (__m128i
)__builtin_ia32_vcvtbiasph2hf8_256_mask(
187 (__v32qi
)__A
, (__v16hf
)__B
, (__v16qi
)(__m128i
)__W
, (__mmask16
)__U
);
190 static __inline__ __m128i __DEFAULT_FN_ATTRS256
191 _mm256_maskz_cvtbiasph_hf8(__mmask16 __U
, __m256i __A
, __m256h __B
) {
192 return (__m128i
)__builtin_ia32_vcvtbiasph2hf8_256_mask(
193 (__v32qi
)__A
, (__v16hf
)__B
, (__v16qi
)(__m128i
)_mm_setzero_si128(),
197 static __inline__ __m128i __DEFAULT_FN_ATTRS128
198 _mm_cvtbiassph_hf8(__m128i __A
, __m128h __B
) {
199 return (__m128i
)__builtin_ia32_vcvtbiasph2hf8s_128_mask(
200 (__v16qi
)__A
, (__v8hf
)__B
, (__v16qi
)_mm_undefined_si128(), (__mmask8
)-1);
203 static __inline__ __m128i __DEFAULT_FN_ATTRS128
204 _mm_mask_cvtbiassph_hf8(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128h __B
) {
205 return (__m128i
)__builtin_ia32_vcvtbiasph2hf8s_128_mask(
206 (__v16qi
)__A
, (__v8hf
)__B
, (__v16qi
)(__m128i
)__W
, (__mmask8
)__U
);
209 static __inline__ __m128i __DEFAULT_FN_ATTRS128
210 _mm_maskz_cvtbiassph_hf8(__mmask8 __U
, __m128i __A
, __m128h __B
) {
211 return (__m128i
)__builtin_ia32_vcvtbiasph2hf8s_128_mask(
212 (__v16qi
)__A
, (__v8hf
)__B
, (__v16qi
)(__m128i
)_mm_setzero_si128(),
216 static __inline__ __m128i __DEFAULT_FN_ATTRS256
217 _mm256_cvtbiassph_hf8(__m256i __A
, __m256h __B
) {
218 return (__m128i
)__builtin_ia32_vcvtbiasph2hf8s_256_mask(
219 (__v32qi
)__A
, (__v16hf
)__B
, (__v16qi
)(__m128i
)_mm_undefined_si128(),
223 static __inline__ __m128i __DEFAULT_FN_ATTRS256
_mm256_mask_cvtbiassph_hf8(
224 __m128i __W
, __mmask16 __U
, __m256i __A
, __m256h __B
) {
225 return (__m128i
)__builtin_ia32_vcvtbiasph2hf8s_256_mask(
226 (__v32qi
)__A
, (__v16hf
)__B
, (__v16qi
)(__m128i
)__W
, (__mmask16
)__U
);
229 static __inline__ __m128i __DEFAULT_FN_ATTRS256
230 _mm256_maskz_cvtbiassph_hf8(__mmask16 __U
, __m256i __A
, __m256h __B
) {
231 return (__m128i
)__builtin_ia32_vcvtbiasph2hf8s_256_mask(
232 (__v32qi
)__A
, (__v16hf
)__B
, (__v16qi
)(__m128i
)_mm_setzero_si128(),
236 static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_cvt2ph_bf8(__m128h __A
,
238 return (__m128i
)__builtin_ia32_vcvt2ph2bf8_128((__v8hf
)(__A
), (__v8hf
)(__B
));
241 static __inline__ __m128i __DEFAULT_FN_ATTRS128
242 _mm_mask_cvt2ph_bf8(__m128i __W
, __mmask16 __U
, __m128h __A
, __m128h __B
) {
243 return (__m128i
)__builtin_ia32_selectb_128(
244 (__mmask16
)__U
, (__v16qi
)_mm_cvt2ph_bf8(__A
, __B
), (__v16qi
)__W
);
247 static __inline__ __m128i __DEFAULT_FN_ATTRS128
248 _mm_maskz_cvt2ph_bf8(__mmask16 __U
, __m128h __A
, __m128h __B
) {
249 return (__m128i
)__builtin_ia32_selectb_128(
250 (__mmask16
)__U
, (__v16qi
)_mm_cvt2ph_bf8(__A
, __B
),
251 (__v16qi
)(__m128i
)_mm_setzero_si128());
254 static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_cvt2ph_bf8(__m256h __A
,
256 return (__m256i
)__builtin_ia32_vcvt2ph2bf8_256((__v16hf
)(__A
),
260 static __inline__ __m256i __DEFAULT_FN_ATTRS256
261 _mm256_mask_cvt2ph_bf8(__m256i __W
, __mmask32 __U
, __m256h __A
, __m256h __B
) {
262 return (__m256i
)__builtin_ia32_selectb_256(
263 (__mmask16
)__U
, (__v32qi
)_mm256_cvt2ph_bf8(__A
, __B
), (__v32qi
)__W
);
266 static __inline__ __m256i __DEFAULT_FN_ATTRS256
267 _mm256_maskz_cvt2ph_bf8(__mmask32 __U
, __m256h __A
, __m256h __B
) {
268 return (__m256i
)__builtin_ia32_selectb_256(
269 (__mmask16
)__U
, (__v32qi
)_mm256_cvt2ph_bf8(__A
, __B
),
270 (__v32qi
)(__m256i
)_mm256_setzero_si256());
273 static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_cvts2ph_bf8(__m128h __A
,
275 return (__m128i
)__builtin_ia32_vcvt2ph2bf8s_128((__v8hf
)(__A
), (__v8hf
)(__B
));
278 static __inline__ __m128i __DEFAULT_FN_ATTRS128
279 _mm_mask_cvts2ph_bf8(__m128i __W
, __mmask16 __U
, __m128h __A
, __m128h __B
) {
280 return (__m128i
)__builtin_ia32_selectb_128(
281 (__mmask16
)__U
, (__v16qi
)_mm_cvts2ph_bf8(__A
, __B
), (__v16qi
)__W
);
284 static __inline__ __m128i __DEFAULT_FN_ATTRS128
285 _mm_maskz_cvts2ph_bf8(__mmask16 __U
, __m128h __A
, __m128h __B
) {
286 return (__m128i
)__builtin_ia32_selectb_128(
287 (__mmask16
)__U
, (__v16qi
)_mm_cvts2ph_bf8(__A
, __B
),
288 (__v16qi
)(__m128i
)_mm_setzero_si128());
291 static __inline__ __m256i __DEFAULT_FN_ATTRS256
292 _mm256_cvts2ph_bf8(__m256h __A
, __m256h __B
) {
293 return (__m256i
)__builtin_ia32_vcvt2ph2bf8s_256((__v16hf
)(__A
),
297 static __inline__ __m256i __DEFAULT_FN_ATTRS256
298 _mm256_mask_cvts2ph_bf8(__m256i __W
, __mmask32 __U
, __m256h __A
, __m256h __B
) {
299 return (__m256i
)__builtin_ia32_selectb_256(
300 (__mmask16
)__U
, (__v32qi
)_mm256_cvts2ph_bf8(__A
, __B
), (__v32qi
)__W
);
303 static __inline__ __m256i __DEFAULT_FN_ATTRS256
304 _mm256_maskz_cvts2ph_bf8(__mmask32 __U
, __m256h __A
, __m256h __B
) {
305 return (__m256i
)__builtin_ia32_selectb_256(
306 (__mmask16
)__U
, (__v32qi
)_mm256_cvts2ph_bf8(__A
, __B
),
307 (__v32qi
)(__m256i
)_mm256_setzero_si256());
310 static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_cvt2ph_hf8(__m128h __A
,
312 return (__m128i
)__builtin_ia32_vcvt2ph2hf8_128((__v8hf
)(__A
), (__v8hf
)(__B
));
315 static __inline__ __m128i __DEFAULT_FN_ATTRS128
316 _mm_mask_cvt2ph_hf8(__m128i __W
, __mmask16 __U
, __m128h __A
, __m128h __B
) {
317 return (__m128i
)__builtin_ia32_selectb_128(
318 (__mmask16
)__U
, (__v16qi
)_mm_cvt2ph_hf8(__A
, __B
), (__v16qi
)__W
);
321 static __inline__ __m128i __DEFAULT_FN_ATTRS128
322 _mm_maskz_cvt2ph_hf8(__mmask16 __U
, __m128h __A
, __m128h __B
) {
323 return (__m128i
)__builtin_ia32_selectb_128(
324 (__mmask16
)__U
, (__v16qi
)_mm_cvt2ph_hf8(__A
, __B
),
325 (__v16qi
)(__m128i
)_mm_setzero_si128());
328 static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_cvt2ph_hf8(__m256h __A
,
330 return (__m256i
)__builtin_ia32_vcvt2ph2hf8_256((__v16hf
)(__A
),
334 static __inline__ __m256i __DEFAULT_FN_ATTRS256
335 _mm256_mask_cvt2ph_hf8(__m256i __W
, __mmask32 __U
, __m256h __A
, __m256h __B
) {
336 return (__m256i
)__builtin_ia32_selectb_256(
337 (__mmask16
)__U
, (__v32qi
)_mm256_cvt2ph_hf8(__A
, __B
), (__v32qi
)__W
);
340 static __inline__ __m256i __DEFAULT_FN_ATTRS256
341 _mm256_maskz_cvt2ph_hf8(__mmask32 __U
, __m256h __A
, __m256h __B
) {
342 return (__m256i
)__builtin_ia32_selectb_256(
343 (__mmask16
)__U
, (__v32qi
)_mm256_cvt2ph_hf8(__A
, __B
),
344 (__v32qi
)(__m256i
)_mm256_setzero_si256());
347 static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_cvts2ph_hf8(__m128h __A
,
349 return (__m128i
)__builtin_ia32_vcvt2ph2hf8s_128((__v8hf
)(__A
), (__v8hf
)(__B
));
352 static __inline__ __m128i __DEFAULT_FN_ATTRS128
353 _mm_mask_cvts2ph_hf8(__m128i __W
, __mmask16 __U
, __m128h __A
, __m128h __B
) {
354 return (__m128i
)__builtin_ia32_selectb_128(
355 (__mmask16
)__U
, (__v16qi
)_mm_cvts2ph_hf8(__A
, __B
), (__v16qi
)__W
);
358 static __inline__ __m128i __DEFAULT_FN_ATTRS128
359 _mm_maskz_cvts2ph_hf8(__mmask16 __U
, __m128h __A
, __m128h __B
) {
360 return (__m128i
)__builtin_ia32_selectb_128(
361 (__mmask16
)__U
, (__v16qi
)_mm_cvts2ph_hf8(__A
, __B
),
362 (__v16qi
)(__m128i
)_mm_setzero_si128());
365 static __inline__ __m256i __DEFAULT_FN_ATTRS256
366 _mm256_cvts2ph_hf8(__m256h __A
, __m256h __B
) {
367 return (__m256i
)__builtin_ia32_vcvt2ph2hf8s_256((__v16hf
)(__A
),
371 static __inline__ __m256i __DEFAULT_FN_ATTRS256
372 _mm256_mask_cvts2ph_hf8(__m256i __W
, __mmask32 __U
, __m256h __A
, __m256h __B
) {
373 return (__m256i
)__builtin_ia32_selectb_256(
374 (__mmask16
)__U
, (__v32qi
)_mm256_cvts2ph_hf8(__A
, __B
), (__v32qi
)__W
);
377 static __inline__ __m256i __DEFAULT_FN_ATTRS256
378 _mm256_maskz_cvts2ph_hf8(__mmask32 __U
, __m256h __A
, __m256h __B
) {
379 return (__m256i
)__builtin_ia32_selectb_256(
380 (__mmask16
)__U
, (__v32qi
)_mm256_cvts2ph_hf8(__A
, __B
),
381 (__v32qi
)(__m256i
)_mm256_setzero_si256());
384 static __inline__ __m128h __DEFAULT_FN_ATTRS128
_mm_cvthf8(__m128i __A
) {
385 return (__m128h
)__builtin_ia32_vcvthf8_2ph128_mask(
386 (__v16qi
)__A
, (__v8hf
)(__m128h
)_mm_undefined_ph(), (__mmask8
)-1);
389 static __inline__ __m128h __DEFAULT_FN_ATTRS128
_mm_mask_cvthf8(__m128h __W
,
392 return (__m128h
)__builtin_ia32_vcvthf8_2ph128_mask(
393 (__v16qi
)__A
, (__v8hf
)(__m128h
)__W
, (__mmask8
)__U
);
396 static __inline__ __m128h __DEFAULT_FN_ATTRS128
_mm_maskz_cvthf8(__mmask8 __U
,
398 return (__m128h
)__builtin_ia32_vcvthf8_2ph128_mask(
399 (__v16qi
)__A
, (__v8hf
)(__m128h
)_mm_setzero_ph(), (__mmask8
)__U
);
402 static __inline__ __m256h __DEFAULT_FN_ATTRS256
_mm256_cvthf8(__m128i __A
) {
403 return (__m256h
)__builtin_ia32_vcvthf8_2ph256_mask(
404 (__v16qi
)__A
, (__v16hf
)(__m256h
)_mm256_undefined_ph(), (__mmask16
)-1);
407 static __inline__ __m256h __DEFAULT_FN_ATTRS256
408 _mm256_mask_cvthf8(__m256h __W
, __mmask16 __U
, __m128i __A
) {
409 return (__m256h
)__builtin_ia32_vcvthf8_2ph256_mask(
410 (__v16qi
)__A
, (__v16hf
)(__m256h
)__W
, (__mmask16
)__U
);
413 static __inline__ __m256h __DEFAULT_FN_ATTRS256
414 _mm256_maskz_cvthf8(__mmask16 __U
, __m128i __A
) {
415 return (__m256h
)__builtin_ia32_vcvthf8_2ph256_mask(
416 (__v16qi
)__A
, (__v16hf
)(__m256h
)_mm256_setzero_ph(), (__mmask16
)__U
);
419 static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_cvtph_bf8(__m128h __A
) {
420 return (__m128i
)__builtin_ia32_vcvtph2bf8_128_mask(
421 (__v8hf
)__A
, (__v16qi
)(__m128i
)_mm_undefined_si128(), (__mmask8
)-1);
424 static __inline__ __m128i __DEFAULT_FN_ATTRS128
425 _mm_mask_cvtph_bf8(__m128i __W
, __mmask8 __U
, __m128h __A
) {
426 return (__m128i
)__builtin_ia32_vcvtph2bf8_128_mask(
427 (__v8hf
)__A
, (__v16qi
)(__m128i
)__W
, (__mmask8
)__U
);
430 static __inline__ __m128i __DEFAULT_FN_ATTRS128
431 _mm_maskz_cvtph_bf8(__mmask8 __U
, __m128h __A
) {
432 return (__m128i
)__builtin_ia32_vcvtph2bf8_128_mask(
433 (__v8hf
)__A
, (__v16qi
)(__m128i
)_mm_setzero_si128(), (__mmask8
)__U
);
436 static __inline__ __m128i __DEFAULT_FN_ATTRS256
_mm256_cvtph_bf8(__m256h __A
) {
437 return (__m128i
)__builtin_ia32_vcvtph2bf8_256_mask(
438 (__v16hf
)__A
, (__v16qi
)(__m128i
)_mm_undefined_si128(), (__mmask16
)-1);
441 static __inline__ __m128i __DEFAULT_FN_ATTRS256
442 _mm256_mask_cvtph_bf8(__m128i __W
, __mmask16 __U
, __m256h __A
) {
443 return (__m128i
)__builtin_ia32_vcvtph2bf8_256_mask(
444 (__v16hf
)__A
, (__v16qi
)(__m128i
)__W
, (__mmask16
)__U
);
447 static __inline__ __m128i __DEFAULT_FN_ATTRS256
448 _mm256_maskz_cvtph_bf8(__mmask16 __U
, __m256h __A
) {
449 return (__m128i
)__builtin_ia32_vcvtph2bf8_256_mask(
450 (__v16hf
)__A
, (__v16qi
)(__m128i
)_mm_setzero_si128(), (__mmask16
)__U
);
453 static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_cvtsph_bf8(__m128h __A
) {
454 return (__m128i
)__builtin_ia32_vcvtph2bf8s_128_mask(
455 (__v8hf
)__A
, (__v16qi
)(__m128i
)_mm_undefined_si128(), (__mmask8
)-1);
458 static __inline__ __m128i __DEFAULT_FN_ATTRS128
459 _mm_mask_cvtsph_bf8(__m128i __W
, __mmask8 __U
, __m128h __A
) {
460 return (__m128i
)__builtin_ia32_vcvtph2bf8s_128_mask(
461 (__v8hf
)__A
, (__v16qi
)(__m128i
)__W
, (__mmask8
)__U
);
464 static __inline__ __m128i __DEFAULT_FN_ATTRS128
465 _mm_maskz_cvtsph_bf8(__mmask8 __U
, __m128h __A
) {
466 return (__m128i
)__builtin_ia32_vcvtph2bf8s_128_mask(
467 (__v8hf
)__A
, (__v16qi
)(__m128i
)_mm_setzero_si128(), (__mmask8
)__U
);
470 static __inline__ __m128i __DEFAULT_FN_ATTRS256
_mm256_cvtsph_bf8(__m256h __A
) {
471 return (__m128i
)__builtin_ia32_vcvtph2bf8s_256_mask(
472 (__v16hf
)__A
, (__v16qi
)(__m128i
)_mm_undefined_si128(), (__mmask16
)-1);
475 static __inline__ __m128i __DEFAULT_FN_ATTRS256
476 _mm256_mask_cvtsph_bf8(__m128i __W
, __mmask16 __U
, __m256h __A
) {
477 return (__m128i
)__builtin_ia32_vcvtph2bf8s_256_mask(
478 (__v16hf
)__A
, (__v16qi
)(__m128i
)__W
, (__mmask16
)__U
);
481 static __inline__ __m128i __DEFAULT_FN_ATTRS256
482 _mm256_maskz_cvtsph_bf8(__mmask16 __U
, __m256h __A
) {
483 return (__m128i
)__builtin_ia32_vcvtph2bf8s_256_mask(
484 (__v16hf
)__A
, (__v16qi
)(__m128i
)_mm_setzero_si128(), (__mmask16
)__U
);
487 static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_cvtph_hf8(__m128h __A
) {
488 return (__m128i
)__builtin_ia32_vcvtph2hf8_128_mask(
489 (__v8hf
)__A
, (__v16qi
)(__m128i
)_mm_undefined_si128(), (__mmask8
)-1);
492 static __inline__ __m128i __DEFAULT_FN_ATTRS128
493 _mm_mask_cvtph_hf8(__m128i __W
, __mmask8 __U
, __m128h __A
) {
494 return (__m128i
)__builtin_ia32_vcvtph2hf8_128_mask(
495 (__v8hf
)__A
, (__v16qi
)(__m128i
)__W
, (__mmask8
)__U
);
498 static __inline__ __m128i __DEFAULT_FN_ATTRS128
499 _mm_maskz_cvtph_hf8(__mmask8 __U
, __m128h __A
) {
500 return (__m128i
)__builtin_ia32_vcvtph2hf8_128_mask(
501 (__v8hf
)__A
, (__v16qi
)(__m128i
)_mm_setzero_si128(), (__mmask8
)__U
);
504 static __inline__ __m128i __DEFAULT_FN_ATTRS256
_mm256_cvtph_hf8(__m256h __A
) {
505 return (__m128i
)__builtin_ia32_vcvtph2hf8_256_mask(
506 (__v16hf
)__A
, (__v16qi
)(__m128i
)_mm_undefined_si128(), (__mmask16
)-1);
509 static __inline__ __m128i __DEFAULT_FN_ATTRS256
510 _mm256_mask_cvtph_hf8(__m128i __W
, __mmask16 __U
, __m256h __A
) {
511 return (__m128i
)__builtin_ia32_vcvtph2hf8_256_mask(
512 (__v16hf
)__A
, (__v16qi
)(__m128i
)__W
, (__mmask16
)__U
);
515 static __inline__ __m128i __DEFAULT_FN_ATTRS256
516 _mm256_maskz_cvtph_hf8(__mmask16 __U
, __m256h __A
) {
517 return (__m128i
)__builtin_ia32_vcvtph2hf8_256_mask(
518 (__v16hf
)__A
, (__v16qi
)(__m128i
)_mm_setzero_si128(), (__mmask16
)__U
);
521 static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_cvtsph_hf8(__m128h __A
) {
522 return (__m128i
)__builtin_ia32_vcvtph2hf8s_128_mask(
523 (__v8hf
)__A
, (__v16qi
)(__m128i
)_mm_undefined_si128(), (__mmask8
)-1);
526 static __inline__ __m128i __DEFAULT_FN_ATTRS128
527 _mm_mask_cvtsph_hf8(__m128i __W
, __mmask8 __U
, __m128h __A
) {
528 return (__m128i
)__builtin_ia32_vcvtph2hf8s_128_mask(
529 (__v8hf
)__A
, (__v16qi
)(__m128i
)__W
, (__mmask8
)__U
);
532 static __inline__ __m128i __DEFAULT_FN_ATTRS128
533 _mm_maskz_cvtsph_hf8(__mmask8 __U
, __m128h __A
) {
534 return (__m128i
)__builtin_ia32_vcvtph2hf8s_128_mask(
535 (__v8hf
)__A
, (__v16qi
)(__m128i
)_mm_setzero_si128(), (__mmask8
)__U
);
538 static __inline__ __m128i __DEFAULT_FN_ATTRS256
_mm256_cvtsph_hf8(__m256h __A
) {
539 return (__m128i
)__builtin_ia32_vcvtph2hf8s_256_mask(
540 (__v16hf
)__A
, (__v16qi
)(__m128i
)_mm_undefined_si128(), (__mmask16
)-1);
543 static __inline__ __m128i __DEFAULT_FN_ATTRS256
544 _mm256_mask_cvtsph_hf8(__m128i __W
, __mmask16 __U
, __m256h __A
) {
545 return (__m128i
)__builtin_ia32_vcvtph2hf8s_256_mask(
546 (__v16hf
)__A
, (__v16qi
)(__m128i
)__W
, (__mmask16
)__U
);
549 static __inline__ __m128i __DEFAULT_FN_ATTRS256
550 _mm256_maskz_cvtsph_hf8(__mmask16 __U
, __m256h __A
) {
551 return (__m128i
)__builtin_ia32_vcvtph2hf8s_256_mask(
552 (__v16hf
)__A
, (__v16qi
)(__m128i
)_mm_setzero_si128(), (__mmask16
)__U
);
555 static __inline__ __m128h __DEFAULT_FN_ATTRS128
_mm_cvtbf8_ph(__m128i __A
) {
556 return _mm_castsi128_ph(_mm_slli_epi16(_mm_cvtepi8_epi16(__A
), 8));
559 static __inline__ __m128h __DEFAULT_FN_ATTRS128
560 _mm_mask_cvtbf8_ph(__m128h __S
, __mmask8 __U
, __m128i __A
) {
561 return _mm_castsi128_ph(
562 _mm_mask_slli_epi16((__m128i
)__S
, __U
, _mm_cvtepi8_epi16(__A
), 8));
565 static __inline__ __m128h __DEFAULT_FN_ATTRS128
566 _mm_maskz_cvtbf8_ph(__mmask8 __U
, __m128i __A
) {
567 return _mm_castsi128_ph(_mm_slli_epi16(_mm_maskz_cvtepi8_epi16(__U
, __A
), 8));
570 static __inline__ __m256h __DEFAULT_FN_ATTRS256
_mm256_cvtbf8_ph(__m128i __A
) {
571 return _mm256_castsi256_ph(_mm256_slli_epi16(_mm256_cvtepi8_epi16(__A
), 8));
574 static __inline__ __m256h __DEFAULT_FN_ATTRS256
575 _mm256_mask_cvtbf8_ph(__m256h __S
, __mmask16 __U
, __m128i __A
) {
576 return _mm256_castsi256_ph(
577 _mm256_mask_slli_epi16((__m256i
)__S
, __U
, _mm256_cvtepi8_epi16(__A
), 8));
580 static __inline__ __m256h __DEFAULT_FN_ATTRS256
581 _mm256_maskz_cvtbf8_ph(__mmask16 __U
, __m128i __A
) {
582 return _mm256_castsi256_ph(
583 _mm256_slli_epi16(_mm256_maskz_cvtepi8_epi16(__U
, __A
), 8));
586 #undef __DEFAULT_FN_ATTRS128
587 #undef __DEFAULT_FN_ATTRS256
589 #endif // __AVX10_2CONVERTINTRIN_H