[ControlHeightReduction] Add assert to avoid underflow (#116339)
[llvm-project.git] / clang / lib / Headers / avx10_2convertintrin.h
blob134adb2850c8defe97bdfa827bf0bca1a2708951
1 /*===--------------- avx10_2convertintrin.h - AVX10_2CONVERT ---------------===
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *===-----------------------------------------------------------------------===
8 */
9 #ifndef __IMMINTRIN_H
10 #error \
11 "Never use <avx10_2convertintrin.h> directly; include <immintrin.h> instead."
12 #endif // __IMMINTRIN_H
14 #ifdef __SSE2__
16 #ifndef __AVX10_2CONVERTINTRIN_H
17 #define __AVX10_2CONVERTINTRIN_H
19 /* Define the default attributes for the functions in this file. */
20 #define __DEFAULT_FN_ATTRS128 \
21 __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \
22 __min_vector_width__(128)))
23 #define __DEFAULT_FN_ATTRS256 \
24 __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \
25 __min_vector_width__(256)))
27 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtx2ps_ph(__m128 __A,
28 __m128 __B) {
29 return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask(
30 (__v4sf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)(-1));
33 static __inline__ __m128h __DEFAULT_FN_ATTRS128
34 _mm_mask_cvtx2ps_ph(__m128h __W, __mmask8 __U, __m128 __A, __m128 __B) {
35 return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask(
36 (__v4sf)__A, (__v4sf)__B, (__v8hf)__W, (__mmask8)__U);
39 static __inline__ __m128h __DEFAULT_FN_ATTRS128
40 _mm_maskz_cvtx2ps_ph(__mmask8 __U, __m128 __A, __m128 __B) {
41 return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask(
42 (__v4sf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
45 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtx2ps_ph(__m256 __A,
46 __m256 __B) {
47 return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask(
48 (__v8sf)__A, (__v8sf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)(-1),
49 _MM_FROUND_CUR_DIRECTION);
52 static __inline__ __m256h __DEFAULT_FN_ATTRS256
53 _mm256_mask_cvtx2ps_ph(__m256h __W, __mmask16 __U, __m256 __A, __m256 __B) {
54 return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask(
55 (__v8sf)__A, (__v8sf)__B, (__v16hf)__W, (__mmask16)__U,
56 _MM_FROUND_CUR_DIRECTION);
59 static __inline__ __m256h __DEFAULT_FN_ATTRS256
60 _mm256_maskz_cvtx2ps_ph(__mmask16 __U, __m256 __A, __m256 __B) {
61 return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask(
62 (__v8sf)__A, (__v8sf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U,
63 _MM_FROUND_CUR_DIRECTION);
66 #define _mm256_cvtx_round2ps_ph(A, B, R) \
67 ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \
68 (__v8sf)(A), (__v8sf)(B), (__v16hf)_mm256_undefined_ph(), \
69 (__mmask16)(-1), (const int)(R)))
71 #define _mm256_mask_cvtx_round2ps_ph(W, U, A, B, R) \
72 ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \
73 (__v8sf)(A), (__v8sf)(B), (__v16hf)(W), (__mmask16)(U), (const int)(R)))
75 #define _mm256_maskz_cvtx_round2ps_ph(U, A, B, R) \
76 ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \
77 (__v8sf)(A), (__v8sf)(B), (__v16hf)(_mm256_setzero_ph()), \
78 (__mmask16)(U), (const int)(R)))
80 static __inline__ __m128i __DEFAULT_FN_ATTRS128
81 _mm_cvtbiasph_pbf8(__m128i __A, __m128h __B) {
82 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask(
83 (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1);
86 static __inline__ __m128i __DEFAULT_FN_ATTRS128
87 _mm_mask_cvtbiasph_pbf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) {
88 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask(
89 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U);
92 static __inline__ __m128i __DEFAULT_FN_ATTRS128
93 _mm_maskz_cvtbiasph_pbf8(__mmask8 __U, __m128i __A, __m128h __B) {
94 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask(
95 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(),
96 (__mmask8)__U);
99 static __inline__ __m128i __DEFAULT_FN_ATTRS256
100 _mm256_cvtbiasph_pbf8(__m256i __A, __m256h __B) {
101 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask(
102 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_undefined_si128(),
103 (__mmask16)-1);
106 static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiasph_pbf8(
107 __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) {
108 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask(
109 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U);
112 static __inline__ __m128i __DEFAULT_FN_ATTRS256
113 _mm256_maskz_cvtbiasph_pbf8(__mmask16 __U, __m256i __A, __m256h __B) {
114 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask(
115 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(),
116 (__mmask16)__U);
119 static __inline__ __m128i __DEFAULT_FN_ATTRS128
120 _mm_cvtbiassph_pbf8(__m128i __A, __m128h __B) {
121 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask(
122 (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1);
125 static __inline__ __m128i __DEFAULT_FN_ATTRS128
126 _mm_mask_cvtbiassph_pbf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) {
127 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask(
128 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U);
131 static __inline__ __m128i __DEFAULT_FN_ATTRS128
132 _mm_maskz_cvtbiassph_pbf8(__mmask8 __U, __m128i __A, __m128h __B) {
133 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask(
134 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(),
135 (__mmask8)__U);
138 static __inline__ __m128i __DEFAULT_FN_ATTRS256
139 _mm256_cvtbiassph_pbf8(__m256i __A, __m256h __B) {
140 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask(
141 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_undefined_si128(),
142 (__mmask16)-1);
145 static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiassph_pbf8(
146 __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) {
147 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask(
148 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U);
151 static __inline__ __m128i __DEFAULT_FN_ATTRS256
152 _mm256_maskz_cvtbiassph_pbf8(__mmask16 __U, __m256i __A, __m256h __B) {
153 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask(
154 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(),
155 (__mmask16)__U);
158 static __inline__ __m128i __DEFAULT_FN_ATTRS128
159 _mm_cvtbiasph_phf8(__m128i __A, __m128h __B) {
160 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask(
161 (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1);
164 static __inline__ __m128i __DEFAULT_FN_ATTRS128
165 _mm_mask_cvtbiasph_phf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) {
166 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask(
167 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U);
170 static __inline__ __m128i __DEFAULT_FN_ATTRS128
171 _mm_maskz_cvtbiasph_phf8(__mmask8 __U, __m128i __A, __m128h __B) {
172 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask(
173 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(),
174 (__mmask8)__U);
177 static __inline__ __m128i __DEFAULT_FN_ATTRS256
178 _mm256_cvtbiasph_phf8(__m256i __A, __m256h __B) {
179 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask(
180 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_undefined_si128(),
181 (__mmask16)-1);
184 static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiasph_phf8(
185 __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) {
186 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask(
187 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U);
190 static __inline__ __m128i __DEFAULT_FN_ATTRS256
191 _mm256_maskz_cvtbiasph_phf8(__mmask16 __U, __m256i __A, __m256h __B) {
192 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask(
193 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(),
194 (__mmask16)__U);
197 static __inline__ __m128i __DEFAULT_FN_ATTRS128
198 _mm_cvtbiassph_phf8(__m128i __A, __m128h __B) {
199 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask(
200 (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1);
203 static __inline__ __m128i __DEFAULT_FN_ATTRS128
204 _mm_mask_cvtbiassph_phf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) {
205 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask(
206 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U);
209 static __inline__ __m128i __DEFAULT_FN_ATTRS128
210 _mm_maskz_cvtbiassph_phf8(__mmask8 __U, __m128i __A, __m128h __B) {
211 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask(
212 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(),
213 (__mmask8)__U);
216 static __inline__ __m128i __DEFAULT_FN_ATTRS256
217 _mm256_cvtbiassph_phf8(__m256i __A, __m256h __B) {
218 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask(
219 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_undefined_si128(),
220 (__mmask16)-1);
223 static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiassph_phf8(
224 __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) {
225 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask(
226 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U);
229 static __inline__ __m128i __DEFAULT_FN_ATTRS256
230 _mm256_maskz_cvtbiassph_phf8(__mmask16 __U, __m256i __A, __m256h __B) {
231 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask(
232 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(),
233 (__mmask16)__U);
236 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtne2ph_pbf8(__m128h __A,
237 __m128h __B) {
238 return (__m128i)__builtin_ia32_vcvtne2ph2bf8_128((__v8hf)(__A),
239 (__v8hf)(__B));
242 static __inline__ __m128i __DEFAULT_FN_ATTRS128
243 _mm_mask_cvtne2ph_pbf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) {
244 return (__m128i)__builtin_ia32_selectb_128(
245 (__mmask16)__U, (__v16qi)_mm_cvtne2ph_pbf8(__A, __B), (__v16qi)__W);
248 static __inline__ __m128i __DEFAULT_FN_ATTRS128
249 _mm_maskz_cvtne2ph_pbf8(__mmask16 __U, __m128h __A, __m128h __B) {
250 return (__m128i)__builtin_ia32_selectb_128(
251 (__mmask16)__U, (__v16qi)_mm_cvtne2ph_pbf8(__A, __B),
252 (__v16qi)(__m128i)_mm_setzero_si128());
255 static __inline__ __m256i __DEFAULT_FN_ATTRS256
256 _mm256_cvtne2ph_pbf8(__m256h __A, __m256h __B) {
257 return (__m256i)__builtin_ia32_vcvtne2ph2bf8_256((__v16hf)(__A),
258 (__v16hf)(__B));
261 static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtne2ph_pbf8(
262 __m256i __W, __mmask32 __U, __m256h __A, __m256h __B) {
263 return (__m256i)__builtin_ia32_selectb_256(
264 (__mmask16)__U, (__v32qi)_mm256_cvtne2ph_pbf8(__A, __B), (__v32qi)__W);
267 static __inline__ __m256i __DEFAULT_FN_ATTRS256
268 _mm256_maskz_cvtne2ph_pbf8(__mmask32 __U, __m256h __A, __m256h __B) {
269 return (__m256i)__builtin_ia32_selectb_256(
270 (__mmask16)__U, (__v32qi)_mm256_cvtne2ph_pbf8(__A, __B),
271 (__v32qi)(__m256i)_mm256_setzero_si256());
274 static __inline__ __m128i __DEFAULT_FN_ATTRS128
275 _mm_cvtnes2ph_pbf8(__m128h __A, __m128h __B) {
276 return (__m128i)__builtin_ia32_vcvtne2ph2bf8s_128((__v8hf)(__A),
277 (__v8hf)(__B));
280 static __inline__ __m128i __DEFAULT_FN_ATTRS128
281 _mm_mask_cvtnes2ph_pbf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) {
282 return (__m128i)__builtin_ia32_selectb_128(
283 (__mmask16)__U, (__v16qi)_mm_cvtnes2ph_pbf8(__A, __B), (__v16qi)__W);
286 static __inline__ __m128i __DEFAULT_FN_ATTRS128
287 _mm_maskz_cvtnes2ph_pbf8(__mmask16 __U, __m128h __A, __m128h __B) {
288 return (__m128i)__builtin_ia32_selectb_128(
289 (__mmask16)__U, (__v16qi)_mm_cvtnes2ph_pbf8(__A, __B),
290 (__v16qi)(__m128i)_mm_setzero_si128());
293 static __inline__ __m256i __DEFAULT_FN_ATTRS256
294 _mm256_cvtnes2ph_pbf8(__m256h __A, __m256h __B) {
295 return (__m256i)__builtin_ia32_vcvtne2ph2bf8s_256((__v16hf)(__A),
296 (__v16hf)(__B));
299 static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtnes2ph_pbf8(
300 __m256i __W, __mmask32 __U, __m256h __A, __m256h __B) {
301 return (__m256i)__builtin_ia32_selectb_256(
302 (__mmask16)__U, (__v32qi)_mm256_cvtnes2ph_pbf8(__A, __B), (__v32qi)__W);
305 static __inline__ __m256i __DEFAULT_FN_ATTRS256
306 _mm256_maskz_cvtnes2ph_pbf8(__mmask32 __U, __m256h __A, __m256h __B) {
307 return (__m256i)__builtin_ia32_selectb_256(
308 (__mmask16)__U, (__v32qi)_mm256_cvtnes2ph_pbf8(__A, __B),
309 (__v32qi)(__m256i)_mm256_setzero_si256());
312 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtne2ph_phf8(__m128h __A,
313 __m128h __B) {
314 return (__m128i)__builtin_ia32_vcvtne2ph2hf8_128((__v8hf)(__A),
315 (__v8hf)(__B));
318 static __inline__ __m128i __DEFAULT_FN_ATTRS128
319 _mm_mask_cvtne2ph_phf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) {
320 return (__m128i)__builtin_ia32_selectb_128(
321 (__mmask16)__U, (__v16qi)_mm_cvtne2ph_phf8(__A, __B), (__v16qi)__W);
324 static __inline__ __m128i __DEFAULT_FN_ATTRS128
325 _mm_maskz_cvtne2ph_phf8(__mmask16 __U, __m128h __A, __m128h __B) {
326 return (__m128i)__builtin_ia32_selectb_128(
327 (__mmask16)__U, (__v16qi)_mm_cvtne2ph_phf8(__A, __B),
328 (__v16qi)(__m128i)_mm_setzero_si128());
331 static __inline__ __m256i __DEFAULT_FN_ATTRS256
332 _mm256_cvtne2ph_phf8(__m256h __A, __m256h __B) {
333 return (__m256i)__builtin_ia32_vcvtne2ph2hf8_256((__v16hf)(__A),
334 (__v16hf)(__B));
337 static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtne2ph_phf8(
338 __m256i __W, __mmask32 __U, __m256h __A, __m256h __B) {
339 return (__m256i)__builtin_ia32_selectb_256(
340 (__mmask16)__U, (__v32qi)_mm256_cvtne2ph_phf8(__A, __B), (__v32qi)__W);
343 static __inline__ __m256i __DEFAULT_FN_ATTRS256
344 _mm256_maskz_cvtne2ph_phf8(__mmask32 __U, __m256h __A, __m256h __B) {
345 return (__m256i)__builtin_ia32_selectb_256(
346 (__mmask16)__U, (__v32qi)_mm256_cvtne2ph_phf8(__A, __B),
347 (__v32qi)(__m256i)_mm256_setzero_si256());
350 static __inline__ __m128i __DEFAULT_FN_ATTRS128
351 _mm_cvtnes2ph_phf8(__m128h __A, __m128h __B) {
352 return (__m128i)__builtin_ia32_vcvtne2ph2hf8s_128((__v8hf)(__A),
353 (__v8hf)(__B));
356 static __inline__ __m128i __DEFAULT_FN_ATTRS128
357 _mm_mask_cvtnes2ph_phf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) {
358 return (__m128i)__builtin_ia32_selectb_128(
359 (__mmask16)__U, (__v16qi)_mm_cvtnes2ph_phf8(__A, __B), (__v16qi)__W);
362 static __inline__ __m128i __DEFAULT_FN_ATTRS128
363 _mm_maskz_cvtnes2ph_phf8(__mmask16 __U, __m128h __A, __m128h __B) {
364 return (__m128i)__builtin_ia32_selectb_128(
365 (__mmask16)__U, (__v16qi)_mm_cvtnes2ph_phf8(__A, __B),
366 (__v16qi)(__m128i)_mm_setzero_si128());
369 static __inline__ __m256i __DEFAULT_FN_ATTRS256
370 _mm256_cvtnes2ph_phf8(__m256h __A, __m256h __B) {
371 return (__m256i)__builtin_ia32_vcvtne2ph2hf8s_256((__v16hf)(__A),
372 (__v16hf)(__B));
375 static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtnes2ph_phf8(
376 __m256i __W, __mmask32 __U, __m256h __A, __m256h __B) {
377 return (__m256i)__builtin_ia32_selectb_256(
378 (__mmask16)__U, (__v32qi)_mm256_cvtnes2ph_phf8(__A, __B), (__v32qi)__W);
381 static __inline__ __m256i __DEFAULT_FN_ATTRS256
382 _mm256_maskz_cvtnes2ph_phf8(__mmask32 __U, __m256h __A, __m256h __B) {
383 return (__m256i)__builtin_ia32_selectb_256(
384 (__mmask16)__U, (__v32qi)_mm256_cvtnes2ph_phf8(__A, __B),
385 (__v32qi)(__m256i)_mm256_setzero_si256());
388 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtnehf8_ph(__m128i __A) {
389 return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask(
390 (__v16qi)__A, (__v8hf)(__m128h)_mm_undefined_ph(), (__mmask8)-1);
393 static __inline__ __m128h __DEFAULT_FN_ATTRS128
394 _mm_mask_cvtnehf8_ph(__m128h __W, __mmask8 __U, __m128i __A) {
395 return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask(
396 (__v16qi)__A, (__v8hf)(__m128h)__W, (__mmask8)__U);
399 static __inline__ __m128h __DEFAULT_FN_ATTRS128
400 _mm_maskz_cvtnehf8_ph(__mmask8 __U, __m128i __A) {
401 return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask(
402 (__v16qi)__A, (__v8hf)(__m128h)_mm_setzero_ph(), (__mmask8)__U);
405 static __inline__ __m256h __DEFAULT_FN_ATTRS256
406 _mm256_cvtnehf8_ph(__m128i __A) {
407 return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask(
408 (__v16qi)__A, (__v16hf)(__m256h)_mm256_undefined_ph(), (__mmask16)-1);
411 static __inline__ __m256h __DEFAULT_FN_ATTRS256
412 _mm256_mask_cvtnehf8_ph(__m256h __W, __mmask16 __U, __m128i __A) {
413 return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask(
414 (__v16qi)__A, (__v16hf)(__m256h)__W, (__mmask16)__U);
417 static __inline__ __m256h __DEFAULT_FN_ATTRS256
418 _mm256_maskz_cvtnehf8_ph(__mmask16 __U, __m128i __A) {
419 return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask(
420 (__v16qi)__A, (__v16hf)(__m256h)_mm256_setzero_ph(), (__mmask16)__U);
423 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtneph_pbf8(__m128h __A) {
424 return (__m128i)__builtin_ia32_vcvtneph2bf8_128_mask(
425 (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1);
428 static __inline__ __m128i __DEFAULT_FN_ATTRS128
429 _mm_mask_cvtneph_pbf8(__m128i __W, __mmask8 __U, __m128h __A) {
430 return (__m128i)__builtin_ia32_vcvtneph2bf8_128_mask(
431 (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U);
434 static __inline__ __m128i __DEFAULT_FN_ATTRS128
435 _mm_maskz_cvtneph_pbf8(__mmask8 __U, __m128h __A) {
436 return (__m128i)__builtin_ia32_vcvtneph2bf8_128_mask(
437 (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U);
440 static __inline__ __m128i __DEFAULT_FN_ATTRS256
441 _mm256_cvtneph_pbf8(__m256h __A) {
442 return (__m128i)__builtin_ia32_vcvtneph2bf8_256_mask(
443 (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1);
446 static __inline__ __m128i __DEFAULT_FN_ATTRS256
447 _mm256_mask_cvtneph_pbf8(__m128i __W, __mmask16 __U, __m256h __A) {
448 return (__m128i)__builtin_ia32_vcvtneph2bf8_256_mask(
449 (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U);
452 static __inline__ __m128i __DEFAULT_FN_ATTRS256
453 _mm256_maskz_cvtneph_pbf8(__mmask16 __U, __m256h __A) {
454 return (__m128i)__builtin_ia32_vcvtneph2bf8_256_mask(
455 (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U);
458 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtnesph_pbf8(__m128h __A) {
459 return (__m128i)__builtin_ia32_vcvtneph2bf8s_128_mask(
460 (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1);
463 static __inline__ __m128i __DEFAULT_FN_ATTRS128
464 _mm_mask_cvtnesph_pbf8(__m128i __W, __mmask8 __U, __m128h __A) {
465 return (__m128i)__builtin_ia32_vcvtneph2bf8s_128_mask(
466 (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U);
469 static __inline__ __m128i __DEFAULT_FN_ATTRS128
470 _mm_maskz_cvtnesph_pbf8(__mmask8 __U, __m128h __A) {
471 return (__m128i)__builtin_ia32_vcvtneph2bf8s_128_mask(
472 (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U);
475 static __inline__ __m128i __DEFAULT_FN_ATTRS256
476 _mm256_cvtnesph_pbf8(__m256h __A) {
477 return (__m128i)__builtin_ia32_vcvtneph2bf8s_256_mask(
478 (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1);
481 static __inline__ __m128i __DEFAULT_FN_ATTRS256
482 _mm256_mask_cvtnesph_pbf8(__m128i __W, __mmask16 __U, __m256h __A) {
483 return (__m128i)__builtin_ia32_vcvtneph2bf8s_256_mask(
484 (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U);
487 static __inline__ __m128i __DEFAULT_FN_ATTRS256
488 _mm256_maskz_cvtnesph_pbf8(__mmask16 __U, __m256h __A) {
489 return (__m128i)__builtin_ia32_vcvtneph2bf8s_256_mask(
490 (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U);
493 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtneph_phf8(__m128h __A) {
494 return (__m128i)__builtin_ia32_vcvtneph2hf8_128_mask(
495 (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1);
498 static __inline__ __m128i __DEFAULT_FN_ATTRS128
499 _mm_mask_cvtneph_phf8(__m128i __W, __mmask8 __U, __m128h __A) {
500 return (__m128i)__builtin_ia32_vcvtneph2hf8_128_mask(
501 (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U);
504 static __inline__ __m128i __DEFAULT_FN_ATTRS128
505 _mm_maskz_cvtneph_phf8(__mmask8 __U, __m128h __A) {
506 return (__m128i)__builtin_ia32_vcvtneph2hf8_128_mask(
507 (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U);
510 static __inline__ __m128i __DEFAULT_FN_ATTRS256
511 _mm256_cvtneph_phf8(__m256h __A) {
512 return (__m128i)__builtin_ia32_vcvtneph2hf8_256_mask(
513 (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1);
516 static __inline__ __m128i __DEFAULT_FN_ATTRS256
517 _mm256_mask_cvtneph_phf8(__m128i __W, __mmask16 __U, __m256h __A) {
518 return (__m128i)__builtin_ia32_vcvtneph2hf8_256_mask(
519 (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U);
522 static __inline__ __m128i __DEFAULT_FN_ATTRS256
523 _mm256_maskz_cvtneph_phf8(__mmask16 __U, __m256h __A) {
524 return (__m128i)__builtin_ia32_vcvtneph2hf8_256_mask(
525 (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U);
528 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtnesph_phf8(__m128h __A) {
529 return (__m128i)__builtin_ia32_vcvtneph2hf8s_128_mask(
530 (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1);
533 static __inline__ __m128i __DEFAULT_FN_ATTRS128
534 _mm_mask_cvtnesph_phf8(__m128i __W, __mmask8 __U, __m128h __A) {
535 return (__m128i)__builtin_ia32_vcvtneph2hf8s_128_mask(
536 (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U);
539 static __inline__ __m128i __DEFAULT_FN_ATTRS128
540 _mm_maskz_cvtnesph_phf8(__mmask8 __U, __m128h __A) {
541 return (__m128i)__builtin_ia32_vcvtneph2hf8s_128_mask(
542 (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U);
545 static __inline__ __m128i __DEFAULT_FN_ATTRS256
546 _mm256_cvtnesph_phf8(__m256h __A) {
547 return (__m128i)__builtin_ia32_vcvtneph2hf8s_256_mask(
548 (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1);
551 static __inline__ __m128i __DEFAULT_FN_ATTRS256
552 _mm256_mask_cvtnesph_phf8(__m128i __W, __mmask16 __U, __m256h __A) {
553 return (__m128i)__builtin_ia32_vcvtneph2hf8s_256_mask(
554 (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U);
557 static __inline__ __m128i __DEFAULT_FN_ATTRS256
558 _mm256_maskz_cvtnesph_phf8(__mmask16 __U, __m256h __A) {
559 return (__m128i)__builtin_ia32_vcvtneph2hf8s_256_mask(
560 (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U);
563 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtpbf8_ph(__m128i __A) {
564 return _mm_castsi128_ph(_mm_slli_epi16(_mm_cvtepi8_epi16(__A), 8));
567 static __inline__ __m128h __DEFAULT_FN_ATTRS128
568 _mm_mask_cvtpbf8_ph(__m128h __S, __mmask8 __U, __m128i __A) {
569 return _mm_castsi128_ph(
570 _mm_mask_slli_epi16((__m128i)__S, __U, _mm_cvtepi8_epi16(__A), 8));
573 static __inline__ __m128h __DEFAULT_FN_ATTRS128
574 _mm_maskz_cvtpbf8_ph(__mmask8 __U, __m128i __A) {
575 return _mm_castsi128_ph(_mm_slli_epi16(_mm_maskz_cvtepi8_epi16(__U, __A), 8));
578 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtpbf8_ph(__m128i __A) {
579 return _mm256_castsi256_ph(_mm256_slli_epi16(_mm256_cvtepi8_epi16(__A), 8));
582 static __inline__ __m256h __DEFAULT_FN_ATTRS256
583 _mm256_mask_cvtpbf8_ph(__m256h __S, __mmask8 __U, __m128i __A) {
584 return _mm256_castsi256_ph(
585 _mm256_mask_slli_epi16((__m256i)__S, __U, _mm256_cvtepi8_epi16(__A), 8));
588 static __inline__ __m256h __DEFAULT_FN_ATTRS256
589 _mm256_maskz_cvtpbf8_ph(__mmask8 __U, __m128i __A) {
590 return _mm256_castsi256_ph(
591 _mm256_slli_epi16(_mm256_maskz_cvtepi8_epi16(__U, __A), 8));
594 #undef __DEFAULT_FN_ATTRS128
595 #undef __DEFAULT_FN_ATTRS256
597 #endif // __AVX10_2CONVERTINTRIN_H
598 #endif // __SSE2__