1 /*===----------- avx10_2satcvtintrin.h - AVX10_2SATCVT intrinsics ----------===
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *===-----------------------------------------------------------------------===
11 "Never use <avx10_2satcvtintrin.h> directly; include <immintrin.h> instead."
12 #endif // __IMMINTRIN_H
14 #ifndef __AVX10_2SATCVTINTRIN_H
15 #define __AVX10_2SATCVTINTRIN_H
17 #define _mm_ipcvtnebf16_epi8(A) \
18 ((__m128i)__builtin_ia32_vcvtnebf162ibs128((__v8bf)(__m128bh)(A)))
20 #define _mm_mask_ipcvtnebf16_epi8(W, U, A) \
21 ((__m128i)__builtin_ia32_selectw_128( \
22 (__mmask8)(U), (__v8hi)_mm_ipcvtnebf16_epi8(A), (__v8hi)(__m128i)(W)))
24 #define _mm_maskz_ipcvtnebf16_epi8(U, A) \
25 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
26 (__v8hi)_mm_ipcvtnebf16_epi8(A), \
27 (__v8hi)_mm_setzero_si128()))
29 #define _mm256_ipcvtnebf16_epi8(A) \
30 ((__m256i)__builtin_ia32_vcvtnebf162ibs256((__v16bf)(__m256bh)(A)))
32 #define _mm256_mask_ipcvtnebf16_epi8(W, U, A) \
33 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
34 (__v16hi)_mm256_ipcvtnebf16_epi8(A), \
35 (__v16hi)(__m256i)(W)))
37 #define _mm256_maskz_ipcvtnebf16_epi8(U, A) \
38 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
39 (__v16hi)_mm256_ipcvtnebf16_epi8(A), \
40 (__v16hi)_mm256_setzero_si256()))
42 #define _mm_ipcvtnebf16_epu8(A) \
43 ((__m128i)__builtin_ia32_vcvtnebf162iubs128((__v8bf)(__m128bh)(A)))
45 #define _mm_mask_ipcvtnebf16_epu8(W, U, A) \
46 ((__m128i)__builtin_ia32_selectw_128( \
47 (__mmask8)(U), (__v8hi)_mm_ipcvtnebf16_epu8(A), (__v8hi)(__m128i)(W)))
49 #define _mm_maskz_ipcvtnebf16_epu8(U, A) \
50 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
51 (__v8hi)_mm_ipcvtnebf16_epu8(A), \
52 (__v8hi)_mm_setzero_si128()))
54 #define _mm256_ipcvtnebf16_epu8(A) \
55 ((__m256i)__builtin_ia32_vcvtnebf162iubs256((__v16bf)(__m256bh)(A)))
57 #define _mm256_mask_ipcvtnebf16_epu8(W, U, A) \
58 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
59 (__v16hi)_mm256_ipcvtnebf16_epu8(A), \
60 (__v16hi)(__m256i)(W)))
62 #define _mm256_maskz_ipcvtnebf16_epu8(U, A) \
63 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
64 (__v16hi)_mm256_ipcvtnebf16_epu8(A), \
65 (__v16hi)_mm256_setzero_si256()))
67 #define _mm_ipcvtph_epi8(A) \
68 ((__m128i)__builtin_ia32_vcvtph2ibs128_mask( \
69 (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
71 #define _mm_mask_ipcvtph_epi8(W, U, A) \
72 ((__m128i)__builtin_ia32_vcvtph2ibs128_mask((__v8hf)(__m128h)(A), \
73 (__v8hu)(W), (__mmask8)(U)))
75 #define _mm_maskz_ipcvtph_epi8(U, A) \
76 ((__m128i)__builtin_ia32_vcvtph2ibs128_mask( \
77 (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U)))
79 #define _mm256_ipcvtph_epi8(A) \
80 ((__m256i)__builtin_ia32_vcvtph2ibs256_mask( \
81 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
82 _MM_FROUND_CUR_DIRECTION))
84 #define _mm256_mask_ipcvtph_epi8(W, U, A) \
85 ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A), \
86 (__v16hu)(W), (__mmask16)(U), \
87 _MM_FROUND_CUR_DIRECTION))
89 #define _mm256_maskz_ipcvtph_epi8(U, A) \
90 ((__m256i)__builtin_ia32_vcvtph2ibs256_mask( \
91 (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \
92 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
94 #define _mm256_ipcvt_roundph_epi8(A, R) \
95 ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A), \
96 (__v16hu)_mm256_setzero_si256(), \
97 (__mmask16)-1, (const int)R))
99 #define _mm256_mask_ipcvt_roundph_epi8(W, U, A, R) \
100 ((__m256i)__builtin_ia32_vcvtph2ibs256_mask( \
101 (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R))
103 #define _mm256_maskz_ipcvt_roundph_epi8(U, A, R) \
104 ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A), \
105 (__v16hu)_mm256_setzero_si256(), \
106 (__mmask16)(U), (const int)R))
108 #define _mm_ipcvtph_epu8(A) \
109 ((__m128i)__builtin_ia32_vcvtph2iubs128_mask( \
110 (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
112 #define _mm_mask_ipcvtph_epu8(W, U, A) \
113 ((__m128i)__builtin_ia32_vcvtph2iubs128_mask((__v8hf)(__m128h)(A), \
114 (__v8hu)(W), (__mmask8)(U)))
116 #define _mm_maskz_ipcvtph_epu8(U, A) \
117 ((__m128i)__builtin_ia32_vcvtph2iubs128_mask( \
118 (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U)))
120 #define _mm256_ipcvtph_epu8(A) \
121 ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \
122 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
123 _MM_FROUND_CUR_DIRECTION))
125 #define _mm256_mask_ipcvtph_epu8(W, U, A) \
126 ((__m256i)__builtin_ia32_vcvtph2iubs256_mask((__v16hf)(__m256h)(A), \
127 (__v16hu)(W), (__mmask16)(U), \
128 _MM_FROUND_CUR_DIRECTION))
130 #define _mm256_maskz_ipcvtph_epu8(U, A) \
131 ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \
132 (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \
133 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
135 #define _mm256_ipcvt_roundph_epu8(A, R) \
136 ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \
137 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
140 #define _mm256_mask_ipcvt_roundph_epu8(W, U, A, R) \
141 ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \
142 (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R))
144 #define _mm256_maskz_ipcvt_roundph_epu8(U, A, R) \
145 ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \
146 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U), \
149 #define _mm_ipcvtps_epi8(A) \
150 ((__m128i)__builtin_ia32_vcvtps2ibs128_mask( \
151 (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
153 #define _mm_mask_ipcvtps_epi8(W, U, A) \
154 ((__m128i)__builtin_ia32_vcvtps2ibs128_mask((__v4sf)(__m128)(A), \
155 (__v4su)(W), (__mmask8)(U)))
157 #define _mm_maskz_ipcvtps_epi8(U, A) \
158 ((__m128i)__builtin_ia32_vcvtps2ibs128_mask( \
159 (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U)))
161 #define _mm256_ipcvtps_epi8(A) \
162 ((__m256i)__builtin_ia32_vcvtps2ibs256_mask( \
163 (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \
164 _MM_FROUND_CUR_DIRECTION))
166 #define _mm256_mask_ipcvtps_epi8(W, U, A) \
167 ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A), \
168 (__v8su)(W), (__mmask8)(U), \
169 _MM_FROUND_CUR_DIRECTION))
171 #define _mm256_maskz_ipcvtps_epi8(U, A) \
172 ((__m256i)__builtin_ia32_vcvtps2ibs256_mask( \
173 (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U), \
174 _MM_FROUND_CUR_DIRECTION))
176 #define _mm256_ipcvt_roundps_epi8(A, R) \
177 ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A), \
178 (__v8su)_mm256_setzero_si256(), \
179 (__mmask8)-1, (const int)R))
181 #define _mm256_mask_ipcvt_roundps_epi8(W, U, A, R) \
182 ((__m256i)__builtin_ia32_vcvtps2ibs256_mask( \
183 (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R))
185 #define _mm256_maskz_ipcvt_roundps_epi8(U, A, R) \
186 ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A), \
187 (__v8su)_mm256_setzero_si256(), \
188 (__mmask8)(U), (const int)R))
190 #define _mm_ipcvtps_epu8(A) \
191 ((__m128i)__builtin_ia32_vcvtps2iubs128_mask( \
192 (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
194 #define _mm_mask_ipcvtps_epu8(W, U, A) \
195 ((__m128i)__builtin_ia32_vcvtps2iubs128_mask((__v4sf)(__m128)(A), \
196 (__v4su)(W), (__mmask8)(U)))
198 #define _mm_maskz_ipcvtps_epu8(U, A) \
199 ((__m128i)__builtin_ia32_vcvtps2iubs128_mask( \
200 (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U)))
202 #define _mm256_ipcvtps_epu8(A) \
203 ((__m256i)__builtin_ia32_vcvtps2iubs256_mask( \
204 (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \
205 _MM_FROUND_CUR_DIRECTION))
207 #define _mm256_mask_ipcvtps_epu8(W, U, A) \
208 ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A), \
209 (__v8su)(W), (__mmask8)(U), \
210 _MM_FROUND_CUR_DIRECTION))
212 #define _mm256_maskz_ipcvtps_epu8(U, A) \
213 ((__m256i)__builtin_ia32_vcvtps2iubs256_mask( \
214 (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U), \
215 _MM_FROUND_CUR_DIRECTION))
217 #define _mm256_ipcvt_roundps_epu8(A, R) \
218 ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A), \
219 (__v8su)_mm256_setzero_si256(), \
220 (__mmask8)-1, (const int)R))
222 #define _mm256_mask_ipcvt_roundps_epu8(W, U, A, R) \
223 ((__m256i)__builtin_ia32_vcvtps2iubs256_mask( \
224 (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R))
226 #define _mm256_maskz_ipcvt_roundps_epu8(U, A, R) \
227 ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A), \
228 (__v8su)_mm256_setzero_si256(), \
229 (__mmask8)(U), (const int)R))
231 #define _mm_ipcvttnebf16_epi8(A) \
232 ((__m128i)__builtin_ia32_vcvttnebf162ibs128((__v8bf)(__m128bh)(A)))
234 #define _mm_mask_ipcvttnebf16_epi8(W, U, A) \
235 ((__m128i)__builtin_ia32_selectw_128( \
236 (__mmask8)(U), (__v8hi)_mm_ipcvttnebf16_epi8(A), (__v8hi)(__m128i)(W)))
238 #define _mm_maskz_ipcvttnebf16_epi8(U, A) \
239 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
240 (__v8hi)_mm_ipcvttnebf16_epi8(A), \
241 (__v8hi)_mm_setzero_si128()))
243 #define _mm256_ipcvttnebf16_epi8(A) \
244 ((__m256i)__builtin_ia32_vcvttnebf162ibs256((__v16bf)(__m256bh)(A)))
246 #define _mm256_mask_ipcvttnebf16_epi8(W, U, A) \
247 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
248 (__v16hi)_mm256_ipcvttnebf16_epi8(A), \
249 (__v16hi)(__m256i)(W)))
251 #define _mm256_maskz_ipcvttnebf16_epi8(U, A) \
252 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
253 (__v16hi)_mm256_ipcvttnebf16_epi8(A), \
254 (__v16hi)_mm256_setzero_si256()))
256 #define _mm_ipcvttnebf16_epu8(A) \
257 ((__m128i)__builtin_ia32_vcvttnebf162iubs128((__v8bf)(__m128bh)(A)))
259 #define _mm_mask_ipcvttnebf16_epu8(W, U, A) \
260 ((__m128i)__builtin_ia32_selectw_128( \
261 (__mmask8)(U), (__v8hi)_mm_ipcvttnebf16_epu8(A), (__v8hi)(__m128i)(W)))
263 #define _mm_maskz_ipcvttnebf16_epu8(U, A) \
264 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
265 (__v8hi)_mm_ipcvttnebf16_epu8(A), \
266 (__v8hi)_mm_setzero_si128()))
268 #define _mm256_ipcvttnebf16_epu8(A) \
269 ((__m256i)__builtin_ia32_vcvttnebf162iubs256((__v16bf)(__m256bh)(A)))
271 #define _mm256_mask_ipcvttnebf16_epu8(W, U, A) \
272 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
273 (__v16hi)_mm256_ipcvttnebf16_epu8(A), \
274 (__v16hi)(__m256i)(W)))
276 #define _mm256_maskz_ipcvttnebf16_epu8(U, A) \
277 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
278 (__v16hi)_mm256_ipcvttnebf16_epu8(A), \
279 (__v16hi)_mm256_setzero_si256()))
281 #define _mm_ipcvttph_epi8(A) \
282 ((__m128i)__builtin_ia32_vcvttph2ibs128_mask( \
283 (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
285 #define _mm_mask_ipcvttph_epi8(W, U, A) \
286 ((__m128i)__builtin_ia32_vcvttph2ibs128_mask((__v8hf)(__m128h)(A), \
287 (__v8hu)(W), (__mmask8)(U)))
289 #define _mm_maskz_ipcvttph_epi8(U, A) \
290 ((__m128i)__builtin_ia32_vcvttph2ibs128_mask( \
291 (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U)))
293 #define _mm256_ipcvttph_epi8(A) \
294 ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \
295 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
296 _MM_FROUND_CUR_DIRECTION))
298 #define _mm256_mask_ipcvttph_epi8(W, U, A) \
299 ((__m256i)__builtin_ia32_vcvttph2ibs256_mask((__v16hf)(__m256h)(A), \
300 (__v16hu)(W), (__mmask16)(U), \
301 _MM_FROUND_CUR_DIRECTION))
303 #define _mm256_maskz_ipcvttph_epi8(U, A) \
304 ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \
305 (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \
306 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
308 #define _mm256_ipcvtt_roundph_epi8(A, R) \
309 ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \
310 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
313 #define _mm256_mask_ipcvtt_roundph_epi8(W, U, A, R) \
314 ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \
315 (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R))
317 #define _mm256_maskz_ipcvtt_roundph_epi8(U, A, R) \
318 ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \
319 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U), \
322 #define _mm_ipcvttph_epu8(A) \
323 ((__m128i)__builtin_ia32_vcvttph2iubs128_mask( \
324 (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
326 #define _mm_mask_ipcvttph_epu8(W, U, A) \
327 ((__m128i)__builtin_ia32_vcvttph2iubs128_mask((__v8hf)(__m128h)(A), \
328 (__v8hu)(W), (__mmask8)(U)))
330 #define _mm_maskz_ipcvttph_epu8(U, A) \
331 ((__m128i)__builtin_ia32_vcvttph2iubs128_mask( \
332 (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U)))
334 #define _mm256_ipcvttph_epu8(A) \
335 ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \
336 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
337 _MM_FROUND_CUR_DIRECTION))
339 #define _mm256_mask_ipcvttph_epu8(W, U, A) \
340 ((__m256i)__builtin_ia32_vcvttph2iubs256_mask((__v16hf)(__m256h)(A), \
341 (__v16hu)(W), (__mmask16)(U), \
342 _MM_FROUND_CUR_DIRECTION))
344 #define _mm256_maskz_ipcvttph_epu8(U, A) \
345 ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \
346 (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \
347 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION))
349 #define _mm256_ipcvtt_roundph_epu8(A, R) \
350 ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \
351 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \
354 #define _mm256_mask_ipcvtt_roundph_epu8(W, U, A, R) \
355 ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \
356 (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R))
358 #define _mm256_maskz_ipcvtt_roundph_epu8(U, A, R) \
359 ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \
360 (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U), \
363 #define _mm_ipcvttps_epi8(A) \
364 ((__m128i)__builtin_ia32_vcvttps2ibs128_mask( \
365 (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
367 #define _mm_mask_ipcvttps_epi8(W, U, A) \
368 ((__m128i)__builtin_ia32_vcvttps2ibs128_mask((__v4sf)(__m128)(A), \
369 (__v4su)(W), (__mmask8)(U)))
371 #define _mm_maskz_ipcvttps_epi8(U, A) \
372 ((__m128i)__builtin_ia32_vcvttps2ibs128_mask( \
373 (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U)))
375 #define _mm256_ipcvttps_epi8(A) \
376 ((__m256i)__builtin_ia32_vcvttps2ibs256_mask( \
377 (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \
378 _MM_FROUND_CUR_DIRECTION))
380 #define _mm256_mask_ipcvttps_epi8(W, U, A) \
381 ((__m256i)__builtin_ia32_vcvttps2ibs256_mask((__v8sf)(__m256)(A), \
382 (__v8su)(W), (__mmask8)(U), \
383 _MM_FROUND_CUR_DIRECTION))
385 #define _mm256_maskz_ipcvttps_epi8(U, A) \
386 ((__m256i)__builtin_ia32_vcvttps2ibs256_mask( \
387 (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U), \
388 _MM_FROUND_CUR_DIRECTION))
390 #define _mm256_ipcvtt_roundps_epi8(A, R) \
391 ((__m256i)__builtin_ia32_vcvttps2ibs256_mask((__v8sf)(__m256)(A), \
392 (__v8su)_mm256_setzero_si256(), \
393 (__mmask8)-1, (const int)R))
395 #define _mm256_mask_ipcvtt_roundps_epi8(W, U, A, R) \
396 ((__m256i)__builtin_ia32_vcvttps2ibs256_mask( \
397 (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R))
399 #define _mm256_maskz_ipcvtt_roundps_epi8(U, A, R) \
400 ((__m256i)__builtin_ia32_vcvttps2ibs256_mask((__v8sf)(__m256)(A), \
401 (__v8su)_mm256_setzero_si256(), \
402 (__mmask8)(U), (const int)R))
404 #define _mm_ipcvttps_epu8(A) \
405 ((__m128i)__builtin_ia32_vcvttps2iubs128_mask( \
406 (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
408 #define _mm_mask_ipcvttps_epu8(W, U, A) \
409 ((__m128i)__builtin_ia32_vcvttps2iubs128_mask((__v4sf)(__m128)(A), \
410 (__v4su)(W), (__mmask8)(U)))
412 #define _mm_maskz_ipcvttps_epu8(U, A) \
413 ((__m128i)__builtin_ia32_vcvttps2iubs128_mask( \
414 (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U)))
416 #define _mm256_ipcvttps_epu8(A) \
417 ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \
418 (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \
419 _MM_FROUND_CUR_DIRECTION))
421 #define _mm256_mask_ipcvttps_epu8(W, U, A) \
422 ((__m256i)__builtin_ia32_vcvttps2iubs256_mask((__v8sf)(__m256)(A), \
423 (__v8su)(W), (__mmask8)(U), \
424 _MM_FROUND_CUR_DIRECTION))
426 #define _mm256_maskz_ipcvttps_epu8(U, A) \
427 ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \
428 (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U), \
429 _MM_FROUND_CUR_DIRECTION))
431 #define _mm256_ipcvtt_roundps_epu8(A, R) \
432 ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \
433 (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \
436 #define _mm256_mask_ipcvtt_roundps_epu8(W, U, A, R) \
437 ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \
438 (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R))
440 #define _mm256_maskz_ipcvtt_roundps_epu8(U, A, R) \
441 ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \
442 (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)(U), \
444 #endif // __AVX10_2SATCVTINTRIN_H