1 /*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------===
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *===-----------------------------------------------------------------------===
11 #error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
14 #ifndef __AVX512VLINTRIN_H
15 #define __AVX512VLINTRIN_H
17 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(128)))
18 #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(256)))
20 typedef short __v2hi
__attribute__((__vector_size__(4)));
21 typedef char __v4qi
__attribute__((__vector_size__(4)));
22 typedef char __v2qi
__attribute__((__vector_size__(2)));
26 #define _mm_cmpeq_epi32_mask(A, B) \
27 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
28 #define _mm_mask_cmpeq_epi32_mask(k, A, B) \
29 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
30 #define _mm_cmpge_epi32_mask(A, B) \
31 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
32 #define _mm_mask_cmpge_epi32_mask(k, A, B) \
33 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
34 #define _mm_cmpgt_epi32_mask(A, B) \
35 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
36 #define _mm_mask_cmpgt_epi32_mask(k, A, B) \
37 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
38 #define _mm_cmple_epi32_mask(A, B) \
39 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
40 #define _mm_mask_cmple_epi32_mask(k, A, B) \
41 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
42 #define _mm_cmplt_epi32_mask(A, B) \
43 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
44 #define _mm_mask_cmplt_epi32_mask(k, A, B) \
45 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
46 #define _mm_cmpneq_epi32_mask(A, B) \
47 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
48 #define _mm_mask_cmpneq_epi32_mask(k, A, B) \
49 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
51 #define _mm256_cmpeq_epi32_mask(A, B) \
52 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
53 #define _mm256_mask_cmpeq_epi32_mask(k, A, B) \
54 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
55 #define _mm256_cmpge_epi32_mask(A, B) \
56 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
57 #define _mm256_mask_cmpge_epi32_mask(k, A, B) \
58 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
59 #define _mm256_cmpgt_epi32_mask(A, B) \
60 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
61 #define _mm256_mask_cmpgt_epi32_mask(k, A, B) \
62 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
63 #define _mm256_cmple_epi32_mask(A, B) \
64 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
65 #define _mm256_mask_cmple_epi32_mask(k, A, B) \
66 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
67 #define _mm256_cmplt_epi32_mask(A, B) \
68 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
69 #define _mm256_mask_cmplt_epi32_mask(k, A, B) \
70 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
71 #define _mm256_cmpneq_epi32_mask(A, B) \
72 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
73 #define _mm256_mask_cmpneq_epi32_mask(k, A, B) \
74 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
76 #define _mm_cmpeq_epu32_mask(A, B) \
77 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
78 #define _mm_mask_cmpeq_epu32_mask(k, A, B) \
79 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
80 #define _mm_cmpge_epu32_mask(A, B) \
81 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
82 #define _mm_mask_cmpge_epu32_mask(k, A, B) \
83 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
84 #define _mm_cmpgt_epu32_mask(A, B) \
85 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
86 #define _mm_mask_cmpgt_epu32_mask(k, A, B) \
87 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
88 #define _mm_cmple_epu32_mask(A, B) \
89 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
90 #define _mm_mask_cmple_epu32_mask(k, A, B) \
91 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
92 #define _mm_cmplt_epu32_mask(A, B) \
93 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
94 #define _mm_mask_cmplt_epu32_mask(k, A, B) \
95 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
96 #define _mm_cmpneq_epu32_mask(A, B) \
97 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
98 #define _mm_mask_cmpneq_epu32_mask(k, A, B) \
99 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
101 #define _mm256_cmpeq_epu32_mask(A, B) \
102 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
103 #define _mm256_mask_cmpeq_epu32_mask(k, A, B) \
104 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
105 #define _mm256_cmpge_epu32_mask(A, B) \
106 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
107 #define _mm256_mask_cmpge_epu32_mask(k, A, B) \
108 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
109 #define _mm256_cmpgt_epu32_mask(A, B) \
110 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
111 #define _mm256_mask_cmpgt_epu32_mask(k, A, B) \
112 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
113 #define _mm256_cmple_epu32_mask(A, B) \
114 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
115 #define _mm256_mask_cmple_epu32_mask(k, A, B) \
116 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
117 #define _mm256_cmplt_epu32_mask(A, B) \
118 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
119 #define _mm256_mask_cmplt_epu32_mask(k, A, B) \
120 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
121 #define _mm256_cmpneq_epu32_mask(A, B) \
122 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
123 #define _mm256_mask_cmpneq_epu32_mask(k, A, B) \
124 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
126 #define _mm_cmpeq_epi64_mask(A, B) \
127 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
128 #define _mm_mask_cmpeq_epi64_mask(k, A, B) \
129 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
130 #define _mm_cmpge_epi64_mask(A, B) \
131 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
132 #define _mm_mask_cmpge_epi64_mask(k, A, B) \
133 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
134 #define _mm_cmpgt_epi64_mask(A, B) \
135 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
136 #define _mm_mask_cmpgt_epi64_mask(k, A, B) \
137 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
138 #define _mm_cmple_epi64_mask(A, B) \
139 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
140 #define _mm_mask_cmple_epi64_mask(k, A, B) \
141 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
142 #define _mm_cmplt_epi64_mask(A, B) \
143 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
144 #define _mm_mask_cmplt_epi64_mask(k, A, B) \
145 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
146 #define _mm_cmpneq_epi64_mask(A, B) \
147 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
148 #define _mm_mask_cmpneq_epi64_mask(k, A, B) \
149 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
151 #define _mm256_cmpeq_epi64_mask(A, B) \
152 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
153 #define _mm256_mask_cmpeq_epi64_mask(k, A, B) \
154 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
155 #define _mm256_cmpge_epi64_mask(A, B) \
156 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
157 #define _mm256_mask_cmpge_epi64_mask(k, A, B) \
158 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
159 #define _mm256_cmpgt_epi64_mask(A, B) \
160 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
161 #define _mm256_mask_cmpgt_epi64_mask(k, A, B) \
162 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
163 #define _mm256_cmple_epi64_mask(A, B) \
164 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
165 #define _mm256_mask_cmple_epi64_mask(k, A, B) \
166 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
167 #define _mm256_cmplt_epi64_mask(A, B) \
168 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
169 #define _mm256_mask_cmplt_epi64_mask(k, A, B) \
170 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
171 #define _mm256_cmpneq_epi64_mask(A, B) \
172 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
173 #define _mm256_mask_cmpneq_epi64_mask(k, A, B) \
174 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
176 #define _mm_cmpeq_epu64_mask(A, B) \
177 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
178 #define _mm_mask_cmpeq_epu64_mask(k, A, B) \
179 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
180 #define _mm_cmpge_epu64_mask(A, B) \
181 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
182 #define _mm_mask_cmpge_epu64_mask(k, A, B) \
183 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
184 #define _mm_cmpgt_epu64_mask(A, B) \
185 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
186 #define _mm_mask_cmpgt_epu64_mask(k, A, B) \
187 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
188 #define _mm_cmple_epu64_mask(A, B) \
189 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
190 #define _mm_mask_cmple_epu64_mask(k, A, B) \
191 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
192 #define _mm_cmplt_epu64_mask(A, B) \
193 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
194 #define _mm_mask_cmplt_epu64_mask(k, A, B) \
195 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
196 #define _mm_cmpneq_epu64_mask(A, B) \
197 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
198 #define _mm_mask_cmpneq_epu64_mask(k, A, B) \
199 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
201 #define _mm256_cmpeq_epu64_mask(A, B) \
202 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
203 #define _mm256_mask_cmpeq_epu64_mask(k, A, B) \
204 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
205 #define _mm256_cmpge_epu64_mask(A, B) \
206 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
207 #define _mm256_mask_cmpge_epu64_mask(k, A, B) \
208 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
209 #define _mm256_cmpgt_epu64_mask(A, B) \
210 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
211 #define _mm256_mask_cmpgt_epu64_mask(k, A, B) \
212 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
213 #define _mm256_cmple_epu64_mask(A, B) \
214 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
215 #define _mm256_mask_cmple_epu64_mask(k, A, B) \
216 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
217 #define _mm256_cmplt_epu64_mask(A, B) \
218 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
219 #define _mm256_mask_cmplt_epu64_mask(k, A, B) \
220 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
221 #define _mm256_cmpneq_epu64_mask(A, B) \
222 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
223 #define _mm256_mask_cmpneq_epu64_mask(k, A, B) \
224 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
226 static __inline__ __m256i __DEFAULT_FN_ATTRS256
227 _mm256_mask_add_epi32(__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
)
229 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
230 (__v8si
)_mm256_add_epi32(__A
, __B
),
234 static __inline__ __m256i __DEFAULT_FN_ATTRS256
235 _mm256_maskz_add_epi32(__mmask8 __U
, __m256i __A
, __m256i __B
)
237 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
238 (__v8si
)_mm256_add_epi32(__A
, __B
),
239 (__v8si
)_mm256_setzero_si256());
242 static __inline__ __m256i __DEFAULT_FN_ATTRS256
243 _mm256_mask_add_epi64(__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
)
245 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
246 (__v4di
)_mm256_add_epi64(__A
, __B
),
250 static __inline__ __m256i __DEFAULT_FN_ATTRS256
251 _mm256_maskz_add_epi64(__mmask8 __U
, __m256i __A
, __m256i __B
)
253 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
254 (__v4di
)_mm256_add_epi64(__A
, __B
),
255 (__v4di
)_mm256_setzero_si256());
258 static __inline__ __m256i __DEFAULT_FN_ATTRS256
259 _mm256_mask_sub_epi32(__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
)
261 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
262 (__v8si
)_mm256_sub_epi32(__A
, __B
),
266 static __inline__ __m256i __DEFAULT_FN_ATTRS256
267 _mm256_maskz_sub_epi32(__mmask8 __U
, __m256i __A
, __m256i __B
)
269 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
270 (__v8si
)_mm256_sub_epi32(__A
, __B
),
271 (__v8si
)_mm256_setzero_si256());
274 static __inline__ __m256i __DEFAULT_FN_ATTRS256
275 _mm256_mask_sub_epi64(__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
)
277 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
278 (__v4di
)_mm256_sub_epi64(__A
, __B
),
282 static __inline__ __m256i __DEFAULT_FN_ATTRS256
283 _mm256_maskz_sub_epi64(__mmask8 __U
, __m256i __A
, __m256i __B
)
285 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
286 (__v4di
)_mm256_sub_epi64(__A
, __B
),
287 (__v4di
)_mm256_setzero_si256());
290 static __inline__ __m128i __DEFAULT_FN_ATTRS128
291 _mm_mask_add_epi32(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
293 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
294 (__v4si
)_mm_add_epi32(__A
, __B
),
298 static __inline__ __m128i __DEFAULT_FN_ATTRS128
299 _mm_maskz_add_epi32(__mmask8 __U
, __m128i __A
, __m128i __B
)
301 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
302 (__v4si
)_mm_add_epi32(__A
, __B
),
303 (__v4si
)_mm_setzero_si128());
306 static __inline__ __m128i __DEFAULT_FN_ATTRS128
307 _mm_mask_add_epi64(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
309 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
310 (__v2di
)_mm_add_epi64(__A
, __B
),
314 static __inline__ __m128i __DEFAULT_FN_ATTRS128
315 _mm_maskz_add_epi64(__mmask8 __U
, __m128i __A
, __m128i __B
)
317 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
318 (__v2di
)_mm_add_epi64(__A
, __B
),
319 (__v2di
)_mm_setzero_si128());
322 static __inline__ __m128i __DEFAULT_FN_ATTRS128
323 _mm_mask_sub_epi32(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
325 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
326 (__v4si
)_mm_sub_epi32(__A
, __B
),
330 static __inline__ __m128i __DEFAULT_FN_ATTRS128
331 _mm_maskz_sub_epi32(__mmask8 __U
, __m128i __A
, __m128i __B
)
333 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
334 (__v4si
)_mm_sub_epi32(__A
, __B
),
335 (__v4si
)_mm_setzero_si128());
338 static __inline__ __m128i __DEFAULT_FN_ATTRS128
339 _mm_mask_sub_epi64(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
341 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
342 (__v2di
)_mm_sub_epi64(__A
, __B
),
346 static __inline__ __m128i __DEFAULT_FN_ATTRS128
347 _mm_maskz_sub_epi64(__mmask8 __U
, __m128i __A
, __m128i __B
)
349 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
350 (__v2di
)_mm_sub_epi64(__A
, __B
),
351 (__v2di
)_mm_setzero_si128());
354 static __inline__ __m256i __DEFAULT_FN_ATTRS256
355 _mm256_mask_mul_epi32(__m256i __W
, __mmask8 __M
, __m256i __X
, __m256i __Y
)
357 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__M
,
358 (__v4di
)_mm256_mul_epi32(__X
, __Y
),
362 static __inline__ __m256i __DEFAULT_FN_ATTRS256
363 _mm256_maskz_mul_epi32(__mmask8 __M
, __m256i __X
, __m256i __Y
)
365 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__M
,
366 (__v4di
)_mm256_mul_epi32(__X
, __Y
),
367 (__v4di
)_mm256_setzero_si256());
370 static __inline__ __m128i __DEFAULT_FN_ATTRS128
371 _mm_mask_mul_epi32(__m128i __W
, __mmask8 __M
, __m128i __X
, __m128i __Y
)
373 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__M
,
374 (__v2di
)_mm_mul_epi32(__X
, __Y
),
378 static __inline__ __m128i __DEFAULT_FN_ATTRS128
379 _mm_maskz_mul_epi32(__mmask8 __M
, __m128i __X
, __m128i __Y
)
381 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__M
,
382 (__v2di
)_mm_mul_epi32(__X
, __Y
),
383 (__v2di
)_mm_setzero_si128());
386 static __inline__ __m256i __DEFAULT_FN_ATTRS256
387 _mm256_mask_mul_epu32(__m256i __W
, __mmask8 __M
, __m256i __X
, __m256i __Y
)
389 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__M
,
390 (__v4di
)_mm256_mul_epu32(__X
, __Y
),
394 static __inline__ __m256i __DEFAULT_FN_ATTRS256
395 _mm256_maskz_mul_epu32(__mmask8 __M
, __m256i __X
, __m256i __Y
)
397 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__M
,
398 (__v4di
)_mm256_mul_epu32(__X
, __Y
),
399 (__v4di
)_mm256_setzero_si256());
402 static __inline__ __m128i __DEFAULT_FN_ATTRS128
403 _mm_mask_mul_epu32(__m128i __W
, __mmask8 __M
, __m128i __X
, __m128i __Y
)
405 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__M
,
406 (__v2di
)_mm_mul_epu32(__X
, __Y
),
410 static __inline__ __m128i __DEFAULT_FN_ATTRS128
411 _mm_maskz_mul_epu32(__mmask8 __M
, __m128i __X
, __m128i __Y
)
413 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__M
,
414 (__v2di
)_mm_mul_epu32(__X
, __Y
),
415 (__v2di
)_mm_setzero_si128());
418 static __inline__ __m256i __DEFAULT_FN_ATTRS256
419 _mm256_maskz_mullo_epi32(__mmask8 __M
, __m256i __A
, __m256i __B
)
421 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__M
,
422 (__v8si
)_mm256_mullo_epi32(__A
, __B
),
423 (__v8si
)_mm256_setzero_si256());
426 static __inline__ __m256i __DEFAULT_FN_ATTRS256
427 _mm256_mask_mullo_epi32(__m256i __W
, __mmask8 __M
, __m256i __A
, __m256i __B
)
429 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__M
,
430 (__v8si
)_mm256_mullo_epi32(__A
, __B
),
434 static __inline__ __m128i __DEFAULT_FN_ATTRS128
435 _mm_maskz_mullo_epi32(__mmask8 __M
, __m128i __A
, __m128i __B
)
437 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__M
,
438 (__v4si
)_mm_mullo_epi32(__A
, __B
),
439 (__v4si
)_mm_setzero_si128());
442 static __inline__ __m128i __DEFAULT_FN_ATTRS128
443 _mm_mask_mullo_epi32(__m128i __W
, __mmask8 __M
, __m128i __A
, __m128i __B
)
445 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__M
,
446 (__v4si
)_mm_mullo_epi32(__A
, __B
),
450 static __inline__ __m256i __DEFAULT_FN_ATTRS256
451 _mm256_and_epi32(__m256i __a
, __m256i __b
)
453 return (__m256i
)((__v8su
)__a
& (__v8su
)__b
);
456 static __inline__ __m256i __DEFAULT_FN_ATTRS256
457 _mm256_mask_and_epi32(__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
)
459 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
460 (__v8si
)_mm256_and_epi32(__A
, __B
),
464 static __inline__ __m256i __DEFAULT_FN_ATTRS256
465 _mm256_maskz_and_epi32(__mmask8 __U
, __m256i __A
, __m256i __B
)
467 return (__m256i
)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U
, __A
, __B
);
470 static __inline__ __m128i __DEFAULT_FN_ATTRS128
471 _mm_and_epi32(__m128i __a
, __m128i __b
)
473 return (__m128i
)((__v4su
)__a
& (__v4su
)__b
);
476 static __inline__ __m128i __DEFAULT_FN_ATTRS128
477 _mm_mask_and_epi32(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
479 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
480 (__v4si
)_mm_and_epi32(__A
, __B
),
484 static __inline__ __m128i __DEFAULT_FN_ATTRS128
485 _mm_maskz_and_epi32(__mmask8 __U
, __m128i __A
, __m128i __B
)
487 return (__m128i
)_mm_mask_and_epi32(_mm_setzero_si128(), __U
, __A
, __B
);
490 static __inline__ __m256i __DEFAULT_FN_ATTRS256
491 _mm256_andnot_epi32(__m256i __A
, __m256i __B
)
493 return (__m256i
)(~(__v8su
)__A
& (__v8su
)__B
);
496 static __inline__ __m256i __DEFAULT_FN_ATTRS256
497 _mm256_mask_andnot_epi32(__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
)
499 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
500 (__v8si
)_mm256_andnot_epi32(__A
, __B
),
504 static __inline__ __m256i __DEFAULT_FN_ATTRS256
505 _mm256_maskz_andnot_epi32(__mmask8 __U
, __m256i __A
, __m256i __B
)
507 return (__m256i
)_mm256_mask_andnot_epi32(_mm256_setzero_si256(),
511 static __inline__ __m128i __DEFAULT_FN_ATTRS128
512 _mm_andnot_epi32(__m128i __A
, __m128i __B
)
514 return (__m128i
)(~(__v4su
)__A
& (__v4su
)__B
);
517 static __inline__ __m128i __DEFAULT_FN_ATTRS128
518 _mm_mask_andnot_epi32(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
520 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
521 (__v4si
)_mm_andnot_epi32(__A
, __B
),
525 static __inline__ __m128i __DEFAULT_FN_ATTRS128
526 _mm_maskz_andnot_epi32(__mmask8 __U
, __m128i __A
, __m128i __B
)
528 return (__m128i
)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U
, __A
, __B
);
531 static __inline__ __m256i __DEFAULT_FN_ATTRS256
532 _mm256_or_epi32(__m256i __a
, __m256i __b
)
534 return (__m256i
)((__v8su
)__a
| (__v8su
)__b
);
537 static __inline__ __m256i __DEFAULT_FN_ATTRS256
538 _mm256_mask_or_epi32 (__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
)
540 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
541 (__v8si
)_mm256_or_epi32(__A
, __B
),
545 static __inline__ __m256i __DEFAULT_FN_ATTRS256
546 _mm256_maskz_or_epi32(__mmask8 __U
, __m256i __A
, __m256i __B
)
548 return (__m256i
)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U
, __A
, __B
);
551 static __inline__ __m128i __DEFAULT_FN_ATTRS128
552 _mm_or_epi32(__m128i __a
, __m128i __b
)
554 return (__m128i
)((__v4su
)__a
| (__v4su
)__b
);
557 static __inline__ __m128i __DEFAULT_FN_ATTRS128
558 _mm_mask_or_epi32(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
560 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
561 (__v4si
)_mm_or_epi32(__A
, __B
),
565 static __inline__ __m128i __DEFAULT_FN_ATTRS128
566 _mm_maskz_or_epi32(__mmask8 __U
, __m128i __A
, __m128i __B
)
568 return (__m128i
)_mm_mask_or_epi32(_mm_setzero_si128(), __U
, __A
, __B
);
571 static __inline__ __m256i __DEFAULT_FN_ATTRS256
572 _mm256_xor_epi32(__m256i __a
, __m256i __b
)
574 return (__m256i
)((__v8su
)__a
^ (__v8su
)__b
);
577 static __inline__ __m256i __DEFAULT_FN_ATTRS256
578 _mm256_mask_xor_epi32(__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
)
580 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
581 (__v8si
)_mm256_xor_epi32(__A
, __B
),
585 static __inline__ __m256i __DEFAULT_FN_ATTRS256
586 _mm256_maskz_xor_epi32(__mmask8 __U
, __m256i __A
, __m256i __B
)
588 return (__m256i
)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U
, __A
, __B
);
591 static __inline__ __m128i __DEFAULT_FN_ATTRS128
592 _mm_xor_epi32(__m128i __a
, __m128i __b
)
594 return (__m128i
)((__v4su
)__a
^ (__v4su
)__b
);
597 static __inline__ __m128i __DEFAULT_FN_ATTRS128
598 _mm_mask_xor_epi32(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
600 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
601 (__v4si
)_mm_xor_epi32(__A
, __B
),
605 static __inline__ __m128i __DEFAULT_FN_ATTRS128
606 _mm_maskz_xor_epi32(__mmask8 __U
, __m128i __A
, __m128i __B
)
608 return (__m128i
)_mm_mask_xor_epi32(_mm_setzero_si128(), __U
, __A
, __B
);
611 static __inline__ __m256i __DEFAULT_FN_ATTRS256
612 _mm256_and_epi64(__m256i __a
, __m256i __b
)
614 return (__m256i
)((__v4du
)__a
& (__v4du
)__b
);
617 static __inline__ __m256i __DEFAULT_FN_ATTRS256
618 _mm256_mask_and_epi64(__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
)
620 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
621 (__v4di
)_mm256_and_epi64(__A
, __B
),
625 static __inline__ __m256i __DEFAULT_FN_ATTRS256
626 _mm256_maskz_and_epi64(__mmask8 __U
, __m256i __A
, __m256i __B
)
628 return (__m256i
)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U
, __A
, __B
);
631 static __inline__ __m128i __DEFAULT_FN_ATTRS128
632 _mm_and_epi64(__m128i __a
, __m128i __b
)
634 return (__m128i
)((__v2du
)__a
& (__v2du
)__b
);
637 static __inline__ __m128i __DEFAULT_FN_ATTRS128
638 _mm_mask_and_epi64(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
640 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
641 (__v2di
)_mm_and_epi64(__A
, __B
),
645 static __inline__ __m128i __DEFAULT_FN_ATTRS128
646 _mm_maskz_and_epi64(__mmask8 __U
, __m128i __A
, __m128i __B
)
648 return (__m128i
)_mm_mask_and_epi64(_mm_setzero_si128(), __U
, __A
, __B
);
651 static __inline__ __m256i __DEFAULT_FN_ATTRS256
652 _mm256_andnot_epi64(__m256i __A
, __m256i __B
)
654 return (__m256i
)(~(__v4du
)__A
& (__v4du
)__B
);
657 static __inline__ __m256i __DEFAULT_FN_ATTRS256
658 _mm256_mask_andnot_epi64(__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
)
660 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
661 (__v4di
)_mm256_andnot_epi64(__A
, __B
),
665 static __inline__ __m256i __DEFAULT_FN_ATTRS256
666 _mm256_maskz_andnot_epi64(__mmask8 __U
, __m256i __A
, __m256i __B
)
668 return (__m256i
)_mm256_mask_andnot_epi64(_mm256_setzero_si256(),
672 static __inline__ __m128i __DEFAULT_FN_ATTRS128
673 _mm_andnot_epi64(__m128i __A
, __m128i __B
)
675 return (__m128i
)(~(__v2du
)__A
& (__v2du
)__B
);
678 static __inline__ __m128i __DEFAULT_FN_ATTRS128
679 _mm_mask_andnot_epi64(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
681 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
682 (__v2di
)_mm_andnot_epi64(__A
, __B
),
686 static __inline__ __m128i __DEFAULT_FN_ATTRS128
687 _mm_maskz_andnot_epi64(__mmask8 __U
, __m128i __A
, __m128i __B
)
689 return (__m128i
)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U
, __A
, __B
);
692 static __inline__ __m256i __DEFAULT_FN_ATTRS256
693 _mm256_or_epi64(__m256i __a
, __m256i __b
)
695 return (__m256i
)((__v4du
)__a
| (__v4du
)__b
);
698 static __inline__ __m256i __DEFAULT_FN_ATTRS256
699 _mm256_mask_or_epi64(__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
)
701 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
702 (__v4di
)_mm256_or_epi64(__A
, __B
),
706 static __inline__ __m256i __DEFAULT_FN_ATTRS256
707 _mm256_maskz_or_epi64(__mmask8 __U
, __m256i __A
, __m256i __B
)
709 return (__m256i
)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U
, __A
, __B
);
712 static __inline__ __m128i __DEFAULT_FN_ATTRS128
713 _mm_or_epi64(__m128i __a
, __m128i __b
)
715 return (__m128i
)((__v2du
)__a
| (__v2du
)__b
);
718 static __inline__ __m128i __DEFAULT_FN_ATTRS128
719 _mm_mask_or_epi64(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
721 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
722 (__v2di
)_mm_or_epi64(__A
, __B
),
726 static __inline__ __m128i __DEFAULT_FN_ATTRS128
727 _mm_maskz_or_epi64(__mmask8 __U
, __m128i __A
, __m128i __B
)
729 return (__m128i
)_mm_mask_or_epi64(_mm_setzero_si128(), __U
, __A
, __B
);
732 static __inline__ __m256i __DEFAULT_FN_ATTRS256
733 _mm256_xor_epi64(__m256i __a
, __m256i __b
)
735 return (__m256i
)((__v4du
)__a
^ (__v4du
)__b
);
738 static __inline__ __m256i __DEFAULT_FN_ATTRS256
739 _mm256_mask_xor_epi64(__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
)
741 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
742 (__v4di
)_mm256_xor_epi64(__A
, __B
),
746 static __inline__ __m256i __DEFAULT_FN_ATTRS256
747 _mm256_maskz_xor_epi64(__mmask8 __U
, __m256i __A
, __m256i __B
)
749 return (__m256i
)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U
, __A
, __B
);
752 static __inline__ __m128i __DEFAULT_FN_ATTRS128
753 _mm_xor_epi64(__m128i __a
, __m128i __b
)
755 return (__m128i
)((__v2du
)__a
^ (__v2du
)__b
);
758 static __inline__ __m128i __DEFAULT_FN_ATTRS128
759 _mm_mask_xor_epi64(__m128i __W
, __mmask8 __U
, __m128i __A
,
762 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
763 (__v2di
)_mm_xor_epi64(__A
, __B
),
767 static __inline__ __m128i __DEFAULT_FN_ATTRS128
768 _mm_maskz_xor_epi64(__mmask8 __U
, __m128i __A
, __m128i __B
)
770 return (__m128i
)_mm_mask_xor_epi64(_mm_setzero_si128(), __U
, __A
, __B
);
773 #define _mm_cmp_epi32_mask(a, b, p) \
774 ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
775 (__v4si)(__m128i)(b), (int)(p), \
778 #define _mm_mask_cmp_epi32_mask(m, a, b, p) \
779 ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
780 (__v4si)(__m128i)(b), (int)(p), \
783 #define _mm_cmp_epu32_mask(a, b, p) \
784 ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
785 (__v4si)(__m128i)(b), (int)(p), \
788 #define _mm_mask_cmp_epu32_mask(m, a, b, p) \
789 ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
790 (__v4si)(__m128i)(b), (int)(p), \
793 #define _mm256_cmp_epi32_mask(a, b, p) \
794 ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
795 (__v8si)(__m256i)(b), (int)(p), \
798 #define _mm256_mask_cmp_epi32_mask(m, a, b, p) \
799 ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
800 (__v8si)(__m256i)(b), (int)(p), \
803 #define _mm256_cmp_epu32_mask(a, b, p) \
804 ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
805 (__v8si)(__m256i)(b), (int)(p), \
808 #define _mm256_mask_cmp_epu32_mask(m, a, b, p) \
809 ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
810 (__v8si)(__m256i)(b), (int)(p), \
813 #define _mm_cmp_epi64_mask(a, b, p) \
814 ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
815 (__v2di)(__m128i)(b), (int)(p), \
818 #define _mm_mask_cmp_epi64_mask(m, a, b, p) \
819 ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
820 (__v2di)(__m128i)(b), (int)(p), \
823 #define _mm_cmp_epu64_mask(a, b, p) \
824 ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
825 (__v2di)(__m128i)(b), (int)(p), \
828 #define _mm_mask_cmp_epu64_mask(m, a, b, p) \
829 ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
830 (__v2di)(__m128i)(b), (int)(p), \
833 #define _mm256_cmp_epi64_mask(a, b, p) \
834 ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
835 (__v4di)(__m256i)(b), (int)(p), \
838 #define _mm256_mask_cmp_epi64_mask(m, a, b, p) \
839 ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
840 (__v4di)(__m256i)(b), (int)(p), \
843 #define _mm256_cmp_epu64_mask(a, b, p) \
844 ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
845 (__v4di)(__m256i)(b), (int)(p), \
848 #define _mm256_mask_cmp_epu64_mask(m, a, b, p) \
849 ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
850 (__v4di)(__m256i)(b), (int)(p), \
853 #define _mm256_cmp_ps_mask(a, b, p) \
854 ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
855 (__v8sf)(__m256)(b), (int)(p), \
858 #define _mm256_mask_cmp_ps_mask(m, a, b, p) \
859 ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
860 (__v8sf)(__m256)(b), (int)(p), \
863 #define _mm256_cmp_pd_mask(a, b, p) \
864 ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
865 (__v4df)(__m256d)(b), (int)(p), \
868 #define _mm256_mask_cmp_pd_mask(m, a, b, p) \
869 ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
870 (__v4df)(__m256d)(b), (int)(p), \
873 #define _mm_cmp_ps_mask(a, b, p) \
874 ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
875 (__v4sf)(__m128)(b), (int)(p), \
878 #define _mm_mask_cmp_ps_mask(m, a, b, p) \
879 ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
880 (__v4sf)(__m128)(b), (int)(p), \
883 #define _mm_cmp_pd_mask(a, b, p) \
884 ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
885 (__v2df)(__m128d)(b), (int)(p), \
888 #define _mm_mask_cmp_pd_mask(m, a, b, p) \
889 ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
890 (__v2df)(__m128d)(b), (int)(p), \
893 static __inline__ __m128d __DEFAULT_FN_ATTRS128
894 _mm_mask_fmadd_pd(__m128d __A
, __mmask8 __U
, __m128d __B
, __m128d __C
)
896 return (__m128d
) __builtin_ia32_selectpd_128((__mmask8
) __U
,
897 __builtin_ia32_vfmaddpd ((__v2df
) __A
,
903 static __inline__ __m128d __DEFAULT_FN_ATTRS128
904 _mm_mask3_fmadd_pd(__m128d __A
, __m128d __B
, __m128d __C
, __mmask8 __U
)
906 return (__m128d
) __builtin_ia32_selectpd_128((__mmask8
) __U
,
907 __builtin_ia32_vfmaddpd ((__v2df
) __A
,
913 static __inline__ __m128d __DEFAULT_FN_ATTRS128
914 _mm_maskz_fmadd_pd(__mmask8 __U
, __m128d __A
, __m128d __B
, __m128d __C
)
916 return (__m128d
) __builtin_ia32_selectpd_128((__mmask8
) __U
,
917 __builtin_ia32_vfmaddpd ((__v2df
) __A
,
920 (__v2df
)_mm_setzero_pd());
923 static __inline__ __m128d __DEFAULT_FN_ATTRS128
924 _mm_mask_fmsub_pd(__m128d __A
, __mmask8 __U
, __m128d __B
, __m128d __C
)
926 return (__m128d
) __builtin_ia32_selectpd_128((__mmask8
) __U
,
927 __builtin_ia32_vfmaddpd ((__v2df
) __A
,
933 static __inline__ __m128d __DEFAULT_FN_ATTRS128
934 _mm_maskz_fmsub_pd(__mmask8 __U
, __m128d __A
, __m128d __B
, __m128d __C
)
936 return (__m128d
) __builtin_ia32_selectpd_128((__mmask8
) __U
,
937 __builtin_ia32_vfmaddpd ((__v2df
) __A
,
940 (__v2df
)_mm_setzero_pd());
943 static __inline__ __m128d __DEFAULT_FN_ATTRS128
944 _mm_mask3_fnmadd_pd(__m128d __A
, __m128d __B
, __m128d __C
, __mmask8 __U
)
946 return (__m128d
) __builtin_ia32_selectpd_128((__mmask8
) __U
,
947 __builtin_ia32_vfmaddpd (-(__v2df
) __A
,
953 static __inline__ __m128d __DEFAULT_FN_ATTRS128
954 _mm_maskz_fnmadd_pd(__mmask8 __U
, __m128d __A
, __m128d __B
, __m128d __C
)
956 return (__m128d
) __builtin_ia32_selectpd_128((__mmask8
) __U
,
957 __builtin_ia32_vfmaddpd (-(__v2df
) __A
,
960 (__v2df
)_mm_setzero_pd());
963 static __inline__ __m128d __DEFAULT_FN_ATTRS128
964 _mm_maskz_fnmsub_pd(__mmask8 __U
, __m128d __A
, __m128d __B
, __m128d __C
)
966 return (__m128d
) __builtin_ia32_selectpd_128((__mmask8
) __U
,
967 __builtin_ia32_vfmaddpd (-(__v2df
) __A
,
970 (__v2df
)_mm_setzero_pd());
973 static __inline__ __m256d __DEFAULT_FN_ATTRS256
974 _mm256_mask_fmadd_pd(__m256d __A
, __mmask8 __U
, __m256d __B
, __m256d __C
)
976 return (__m256d
) __builtin_ia32_selectpd_256((__mmask8
) __U
,
977 __builtin_ia32_vfmaddpd256 ((__v4df
) __A
,
983 static __inline__ __m256d __DEFAULT_FN_ATTRS256
984 _mm256_mask3_fmadd_pd(__m256d __A
, __m256d __B
, __m256d __C
, __mmask8 __U
)
986 return (__m256d
) __builtin_ia32_selectpd_256((__mmask8
) __U
,
987 __builtin_ia32_vfmaddpd256 ((__v4df
) __A
,
993 static __inline__ __m256d __DEFAULT_FN_ATTRS256
994 _mm256_maskz_fmadd_pd(__mmask8 __U
, __m256d __A
, __m256d __B
, __m256d __C
)
996 return (__m256d
) __builtin_ia32_selectpd_256((__mmask8
) __U
,
997 __builtin_ia32_vfmaddpd256 ((__v4df
) __A
,
1000 (__v4df
)_mm256_setzero_pd());
1003 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1004 _mm256_mask_fmsub_pd(__m256d __A
, __mmask8 __U
, __m256d __B
, __m256d __C
)
1006 return (__m256d
) __builtin_ia32_selectpd_256((__mmask8
) __U
,
1007 __builtin_ia32_vfmaddpd256 ((__v4df
) __A
,
1013 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1014 _mm256_maskz_fmsub_pd(__mmask8 __U
, __m256d __A
, __m256d __B
, __m256d __C
)
1016 return (__m256d
) __builtin_ia32_selectpd_256((__mmask8
) __U
,
1017 __builtin_ia32_vfmaddpd256 ((__v4df
) __A
,
1020 (__v4df
)_mm256_setzero_pd());
1023 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1024 _mm256_mask3_fnmadd_pd(__m256d __A
, __m256d __B
, __m256d __C
, __mmask8 __U
)
1026 return (__m256d
) __builtin_ia32_selectpd_256((__mmask8
) __U
,
1027 __builtin_ia32_vfmaddpd256 (-(__v4df
) __A
,
1033 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1034 _mm256_maskz_fnmadd_pd(__mmask8 __U
, __m256d __A
, __m256d __B
, __m256d __C
)
1036 return (__m256d
) __builtin_ia32_selectpd_256((__mmask8
) __U
,
1037 __builtin_ia32_vfmaddpd256 (-(__v4df
) __A
,
1040 (__v4df
)_mm256_setzero_pd());
1043 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1044 _mm256_maskz_fnmsub_pd(__mmask8 __U
, __m256d __A
, __m256d __B
, __m256d __C
)
1046 return (__m256d
) __builtin_ia32_selectpd_256((__mmask8
) __U
,
1047 __builtin_ia32_vfmaddpd256 (-(__v4df
) __A
,
1050 (__v4df
)_mm256_setzero_pd());
1053 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1054 _mm_mask_fmadd_ps(__m128 __A
, __mmask8 __U
, __m128 __B
, __m128 __C
)
1056 return (__m128
) __builtin_ia32_selectps_128((__mmask8
) __U
,
1057 __builtin_ia32_vfmaddps ((__v4sf
) __A
,
1063 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1064 _mm_mask3_fmadd_ps(__m128 __A
, __m128 __B
, __m128 __C
, __mmask8 __U
)
1066 return (__m128
) __builtin_ia32_selectps_128((__mmask8
) __U
,
1067 __builtin_ia32_vfmaddps ((__v4sf
) __A
,
1073 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1074 _mm_maskz_fmadd_ps(__mmask8 __U
, __m128 __A
, __m128 __B
, __m128 __C
)
1076 return (__m128
) __builtin_ia32_selectps_128((__mmask8
) __U
,
1077 __builtin_ia32_vfmaddps ((__v4sf
) __A
,
1080 (__v4sf
)_mm_setzero_ps());
1083 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1084 _mm_mask_fmsub_ps(__m128 __A
, __mmask8 __U
, __m128 __B
, __m128 __C
)
1086 return (__m128
) __builtin_ia32_selectps_128((__mmask8
) __U
,
1087 __builtin_ia32_vfmaddps ((__v4sf
) __A
,
1093 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1094 _mm_maskz_fmsub_ps(__mmask8 __U
, __m128 __A
, __m128 __B
, __m128 __C
)
1096 return (__m128
) __builtin_ia32_selectps_128((__mmask8
) __U
,
1097 __builtin_ia32_vfmaddps ((__v4sf
) __A
,
1100 (__v4sf
)_mm_setzero_ps());
1103 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1104 _mm_mask3_fnmadd_ps(__m128 __A
, __m128 __B
, __m128 __C
, __mmask8 __U
)
1106 return (__m128
) __builtin_ia32_selectps_128((__mmask8
) __U
,
1107 __builtin_ia32_vfmaddps (-(__v4sf
) __A
,
1113 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1114 _mm_maskz_fnmadd_ps(__mmask8 __U
, __m128 __A
, __m128 __B
, __m128 __C
)
1116 return (__m128
) __builtin_ia32_selectps_128((__mmask8
) __U
,
1117 __builtin_ia32_vfmaddps (-(__v4sf
) __A
,
1120 (__v4sf
)_mm_setzero_ps());
1123 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1124 _mm_maskz_fnmsub_ps(__mmask8 __U
, __m128 __A
, __m128 __B
, __m128 __C
)
1126 return (__m128
) __builtin_ia32_selectps_128((__mmask8
) __U
,
1127 __builtin_ia32_vfmaddps (-(__v4sf
) __A
,
1130 (__v4sf
)_mm_setzero_ps());
1133 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1134 _mm256_mask_fmadd_ps(__m256 __A
, __mmask8 __U
, __m256 __B
, __m256 __C
)
1136 return (__m256
) __builtin_ia32_selectps_256((__mmask8
) __U
,
1137 __builtin_ia32_vfmaddps256 ((__v8sf
) __A
,
1143 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1144 _mm256_mask3_fmadd_ps(__m256 __A
, __m256 __B
, __m256 __C
, __mmask8 __U
)
1146 return (__m256
) __builtin_ia32_selectps_256((__mmask8
) __U
,
1147 __builtin_ia32_vfmaddps256 ((__v8sf
) __A
,
1153 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1154 _mm256_maskz_fmadd_ps(__mmask8 __U
, __m256 __A
, __m256 __B
, __m256 __C
)
1156 return (__m256
) __builtin_ia32_selectps_256((__mmask8
) __U
,
1157 __builtin_ia32_vfmaddps256 ((__v8sf
) __A
,
1160 (__v8sf
)_mm256_setzero_ps());
1163 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1164 _mm256_mask_fmsub_ps(__m256 __A
, __mmask8 __U
, __m256 __B
, __m256 __C
)
1166 return (__m256
) __builtin_ia32_selectps_256((__mmask8
) __U
,
1167 __builtin_ia32_vfmaddps256 ((__v8sf
) __A
,
1173 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1174 _mm256_maskz_fmsub_ps(__mmask8 __U
, __m256 __A
, __m256 __B
, __m256 __C
)
1176 return (__m256
) __builtin_ia32_selectps_256((__mmask8
) __U
,
1177 __builtin_ia32_vfmaddps256 ((__v8sf
) __A
,
1180 (__v8sf
)_mm256_setzero_ps());
1183 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1184 _mm256_mask3_fnmadd_ps(__m256 __A
, __m256 __B
, __m256 __C
, __mmask8 __U
)
1186 return (__m256
) __builtin_ia32_selectps_256((__mmask8
) __U
,
1187 __builtin_ia32_vfmaddps256 (-(__v8sf
) __A
,
1193 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1194 _mm256_maskz_fnmadd_ps(__mmask8 __U
, __m256 __A
, __m256 __B
, __m256 __C
)
1196 return (__m256
) __builtin_ia32_selectps_256((__mmask8
) __U
,
1197 __builtin_ia32_vfmaddps256 (-(__v8sf
) __A
,
1200 (__v8sf
)_mm256_setzero_ps());
1203 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1204 _mm256_maskz_fnmsub_ps(__mmask8 __U
, __m256 __A
, __m256 __B
, __m256 __C
)
1206 return (__m256
) __builtin_ia32_selectps_256((__mmask8
) __U
,
1207 __builtin_ia32_vfmaddps256 (-(__v8sf
) __A
,
1210 (__v8sf
)_mm256_setzero_ps());
1213 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1214 _mm_mask_fmaddsub_pd(__m128d __A
, __mmask8 __U
, __m128d __B
, __m128d __C
)
1216 return (__m128d
) __builtin_ia32_selectpd_128((__mmask8
) __U
,
1217 __builtin_ia32_vfmaddsubpd ((__v2df
) __A
,
1223 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1224 _mm_mask3_fmaddsub_pd(__m128d __A
, __m128d __B
, __m128d __C
, __mmask8 __U
)
1226 return (__m128d
) __builtin_ia32_selectpd_128((__mmask8
) __U
,
1227 __builtin_ia32_vfmaddsubpd ((__v2df
) __A
,
1233 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1234 _mm_maskz_fmaddsub_pd(__mmask8 __U
, __m128d __A
, __m128d __B
, __m128d __C
)
1236 return (__m128d
) __builtin_ia32_selectpd_128((__mmask8
) __U
,
1237 __builtin_ia32_vfmaddsubpd ((__v2df
) __A
,
1240 (__v2df
)_mm_setzero_pd());
1243 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1244 _mm_mask_fmsubadd_pd(__m128d __A
, __mmask8 __U
, __m128d __B
, __m128d __C
)
1246 return (__m128d
) __builtin_ia32_selectpd_128((__mmask8
) __U
,
1247 __builtin_ia32_vfmaddsubpd ((__v2df
) __A
,
1253 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1254 _mm_maskz_fmsubadd_pd(__mmask8 __U
, __m128d __A
, __m128d __B
, __m128d __C
)
1256 return (__m128d
) __builtin_ia32_selectpd_128((__mmask8
) __U
,
1257 __builtin_ia32_vfmaddsubpd ((__v2df
) __A
,
1260 (__v2df
)_mm_setzero_pd());
1263 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1264 _mm256_mask_fmaddsub_pd(__m256d __A
, __mmask8 __U
, __m256d __B
, __m256d __C
)
1266 return (__m256d
) __builtin_ia32_selectpd_256((__mmask8
) __U
,
1267 __builtin_ia32_vfmaddsubpd256 ((__v4df
) __A
,
1273 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1274 _mm256_mask3_fmaddsub_pd(__m256d __A
, __m256d __B
, __m256d __C
, __mmask8 __U
)
1276 return (__m256d
) __builtin_ia32_selectpd_256((__mmask8
) __U
,
1277 __builtin_ia32_vfmaddsubpd256 ((__v4df
) __A
,
1283 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1284 _mm256_maskz_fmaddsub_pd(__mmask8 __U
, __m256d __A
, __m256d __B
, __m256d __C
)
1286 return (__m256d
) __builtin_ia32_selectpd_256((__mmask8
) __U
,
1287 __builtin_ia32_vfmaddsubpd256 ((__v4df
) __A
,
1290 (__v4df
)_mm256_setzero_pd());
1293 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1294 _mm256_mask_fmsubadd_pd(__m256d __A
, __mmask8 __U
, __m256d __B
, __m256d __C
)
1296 return (__m256d
) __builtin_ia32_selectpd_256((__mmask8
) __U
,
1297 __builtin_ia32_vfmaddsubpd256 ((__v4df
) __A
,
1303 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1304 _mm256_maskz_fmsubadd_pd(__mmask8 __U
, __m256d __A
, __m256d __B
, __m256d __C
)
1306 return (__m256d
) __builtin_ia32_selectpd_256((__mmask8
) __U
,
1307 __builtin_ia32_vfmaddsubpd256 ((__v4df
) __A
,
1310 (__v4df
)_mm256_setzero_pd());
1313 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1314 _mm_mask_fmaddsub_ps(__m128 __A
, __mmask8 __U
, __m128 __B
, __m128 __C
)
1316 return (__m128
) __builtin_ia32_selectps_128((__mmask8
) __U
,
1317 __builtin_ia32_vfmaddsubps ((__v4sf
) __A
,
1323 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1324 _mm_mask3_fmaddsub_ps(__m128 __A
, __m128 __B
, __m128 __C
, __mmask8 __U
)
1326 return (__m128
) __builtin_ia32_selectps_128((__mmask8
) __U
,
1327 __builtin_ia32_vfmaddsubps ((__v4sf
) __A
,
1333 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1334 _mm_maskz_fmaddsub_ps(__mmask8 __U
, __m128 __A
, __m128 __B
, __m128 __C
)
1336 return (__m128
) __builtin_ia32_selectps_128((__mmask8
) __U
,
1337 __builtin_ia32_vfmaddsubps ((__v4sf
) __A
,
1340 (__v4sf
)_mm_setzero_ps());
1343 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1344 _mm_mask_fmsubadd_ps(__m128 __A
, __mmask8 __U
, __m128 __B
, __m128 __C
)
1346 return (__m128
) __builtin_ia32_selectps_128((__mmask8
) __U
,
1347 __builtin_ia32_vfmaddsubps ((__v4sf
) __A
,
1353 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1354 _mm_maskz_fmsubadd_ps(__mmask8 __U
, __m128 __A
, __m128 __B
, __m128 __C
)
1356 return (__m128
) __builtin_ia32_selectps_128((__mmask8
) __U
,
1357 __builtin_ia32_vfmaddsubps ((__v4sf
) __A
,
1360 (__v4sf
)_mm_setzero_ps());
1363 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1364 _mm256_mask_fmaddsub_ps(__m256 __A
, __mmask8 __U
, __m256 __B
,
1367 return (__m256
) __builtin_ia32_selectps_256((__mmask8
) __U
,
1368 __builtin_ia32_vfmaddsubps256 ((__v8sf
) __A
,
1374 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1375 _mm256_mask3_fmaddsub_ps(__m256 __A
, __m256 __B
, __m256 __C
, __mmask8 __U
)
1377 return (__m256
) __builtin_ia32_selectps_256((__mmask8
) __U
,
1378 __builtin_ia32_vfmaddsubps256 ((__v8sf
) __A
,
1384 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1385 _mm256_maskz_fmaddsub_ps(__mmask8 __U
, __m256 __A
, __m256 __B
, __m256 __C
)
1387 return (__m256
) __builtin_ia32_selectps_256((__mmask8
) __U
,
1388 __builtin_ia32_vfmaddsubps256 ((__v8sf
) __A
,
1391 (__v8sf
)_mm256_setzero_ps());
1394 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1395 _mm256_mask_fmsubadd_ps(__m256 __A
, __mmask8 __U
, __m256 __B
, __m256 __C
)
1397 return (__m256
) __builtin_ia32_selectps_256((__mmask8
) __U
,
1398 __builtin_ia32_vfmaddsubps256 ((__v8sf
) __A
,
1404 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1405 _mm256_maskz_fmsubadd_ps(__mmask8 __U
, __m256 __A
, __m256 __B
, __m256 __C
)
1407 return (__m256
) __builtin_ia32_selectps_256((__mmask8
) __U
,
1408 __builtin_ia32_vfmaddsubps256 ((__v8sf
) __A
,
1411 (__v8sf
)_mm256_setzero_ps());
1414 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1415 _mm_mask3_fmsub_pd(__m128d __A
, __m128d __B
, __m128d __C
, __mmask8 __U
)
1417 return (__m128d
) __builtin_ia32_selectpd_128((__mmask8
) __U
,
1418 __builtin_ia32_vfmaddpd ((__v2df
) __A
,
1424 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1425 _mm256_mask3_fmsub_pd(__m256d __A
, __m256d __B
, __m256d __C
, __mmask8 __U
)
1427 return (__m256d
) __builtin_ia32_selectpd_256((__mmask8
) __U
,
1428 __builtin_ia32_vfmaddpd256 ((__v4df
) __A
,
1434 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1435 _mm_mask3_fmsub_ps(__m128 __A
, __m128 __B
, __m128 __C
, __mmask8 __U
)
1437 return (__m128
) __builtin_ia32_selectps_128((__mmask8
) __U
,
1438 __builtin_ia32_vfmaddps ((__v4sf
) __A
,
1444 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1445 _mm256_mask3_fmsub_ps(__m256 __A
, __m256 __B
, __m256 __C
, __mmask8 __U
)
1447 return (__m256
) __builtin_ia32_selectps_256((__mmask8
) __U
,
1448 __builtin_ia32_vfmaddps256 ((__v8sf
) __A
,
1454 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1455 _mm_mask3_fmsubadd_pd(__m128d __A
, __m128d __B
, __m128d __C
, __mmask8 __U
)
1457 return (__m128d
) __builtin_ia32_selectpd_128((__mmask8
) __U
,
1458 __builtin_ia32_vfmaddsubpd ((__v2df
) __A
,
1464 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1465 _mm256_mask3_fmsubadd_pd(__m256d __A
, __m256d __B
, __m256d __C
, __mmask8 __U
)
1467 return (__m256d
) __builtin_ia32_selectpd_256((__mmask8
) __U
,
1468 __builtin_ia32_vfmaddsubpd256 ((__v4df
) __A
,
1474 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1475 _mm_mask3_fmsubadd_ps(__m128 __A
, __m128 __B
, __m128 __C
, __mmask8 __U
)
1477 return (__m128
) __builtin_ia32_selectps_128((__mmask8
) __U
,
1478 __builtin_ia32_vfmaddsubps ((__v4sf
) __A
,
1484 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1485 _mm256_mask3_fmsubadd_ps(__m256 __A
, __m256 __B
, __m256 __C
, __mmask8 __U
)
1487 return (__m256
) __builtin_ia32_selectps_256((__mmask8
) __U
,
1488 __builtin_ia32_vfmaddsubps256 ((__v8sf
) __A
,
1494 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1495 _mm_mask_fnmadd_pd(__m128d __A
, __mmask8 __U
, __m128d __B
, __m128d __C
)
1497 return (__m128d
) __builtin_ia32_selectpd_128((__mmask8
) __U
,
1498 __builtin_ia32_vfmaddpd ((__v2df
) __A
,
1504 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1505 _mm256_mask_fnmadd_pd(__m256d __A
, __mmask8 __U
, __m256d __B
, __m256d __C
)
1507 return (__m256d
) __builtin_ia32_selectpd_256((__mmask8
) __U
,
1508 __builtin_ia32_vfmaddpd256 ((__v4df
) __A
,
1514 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1515 _mm_mask_fnmadd_ps(__m128 __A
, __mmask8 __U
, __m128 __B
, __m128 __C
)
1517 return (__m128
) __builtin_ia32_selectps_128((__mmask8
) __U
,
1518 __builtin_ia32_vfmaddps ((__v4sf
) __A
,
1524 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1525 _mm256_mask_fnmadd_ps(__m256 __A
, __mmask8 __U
, __m256 __B
, __m256 __C
)
1527 return (__m256
) __builtin_ia32_selectps_256((__mmask8
) __U
,
1528 __builtin_ia32_vfmaddps256 ((__v8sf
) __A
,
1534 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1535 _mm_mask_fnmsub_pd(__m128d __A
, __mmask8 __U
, __m128d __B
, __m128d __C
)
1537 return (__m128d
) __builtin_ia32_selectpd_128((__mmask8
) __U
,
1538 __builtin_ia32_vfmaddpd ((__v2df
) __A
,
1544 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1545 _mm_mask3_fnmsub_pd(__m128d __A
, __m128d __B
, __m128d __C
, __mmask8 __U
)
1547 return (__m128d
) __builtin_ia32_selectpd_128((__mmask8
) __U
,
1548 __builtin_ia32_vfmaddpd ((__v2df
) __A
,
1554 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1555 _mm256_mask_fnmsub_pd(__m256d __A
, __mmask8 __U
, __m256d __B
, __m256d __C
)
1557 return (__m256d
) __builtin_ia32_selectpd_256((__mmask8
) __U
,
1558 __builtin_ia32_vfmaddpd256 ((__v4df
) __A
,
1564 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1565 _mm256_mask3_fnmsub_pd(__m256d __A
, __m256d __B
, __m256d __C
, __mmask8 __U
)
1567 return (__m256d
) __builtin_ia32_selectpd_256((__mmask8
) __U
,
1568 __builtin_ia32_vfmaddpd256 ((__v4df
) __A
,
1574 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1575 _mm_mask_fnmsub_ps(__m128 __A
, __mmask8 __U
, __m128 __B
, __m128 __C
)
1577 return (__m128
) __builtin_ia32_selectps_128((__mmask8
) __U
,
1578 __builtin_ia32_vfmaddps ((__v4sf
) __A
,
1584 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1585 _mm_mask3_fnmsub_ps(__m128 __A
, __m128 __B
, __m128 __C
, __mmask8 __U
)
1587 return (__m128
) __builtin_ia32_selectps_128((__mmask8
) __U
,
1588 __builtin_ia32_vfmaddps ((__v4sf
) __A
,
1594 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1595 _mm256_mask_fnmsub_ps(__m256 __A
, __mmask8 __U
, __m256 __B
, __m256 __C
)
1597 return (__m256
) __builtin_ia32_selectps_256((__mmask8
) __U
,
1598 __builtin_ia32_vfmaddps256 ((__v8sf
) __A
,
1604 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1605 _mm256_mask3_fnmsub_ps(__m256 __A
, __m256 __B
, __m256 __C
, __mmask8 __U
)
1607 return (__m256
) __builtin_ia32_selectps_256((__mmask8
) __U
,
1608 __builtin_ia32_vfmaddps256 ((__v8sf
) __A
,
1614 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1615 _mm_mask_add_pd(__m128d __W
, __mmask8 __U
, __m128d __A
, __m128d __B
) {
1616 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
1617 (__v2df
)_mm_add_pd(__A
, __B
),
1621 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1622 _mm_maskz_add_pd(__mmask8 __U
, __m128d __A
, __m128d __B
) {
1623 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
1624 (__v2df
)_mm_add_pd(__A
, __B
),
1625 (__v2df
)_mm_setzero_pd());
1628 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1629 _mm256_mask_add_pd(__m256d __W
, __mmask8 __U
, __m256d __A
, __m256d __B
) {
1630 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
1631 (__v4df
)_mm256_add_pd(__A
, __B
),
1635 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1636 _mm256_maskz_add_pd(__mmask8 __U
, __m256d __A
, __m256d __B
) {
1637 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
1638 (__v4df
)_mm256_add_pd(__A
, __B
),
1639 (__v4df
)_mm256_setzero_pd());
1642 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1643 _mm_mask_add_ps(__m128 __W
, __mmask8 __U
, __m128 __A
, __m128 __B
) {
1644 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
1645 (__v4sf
)_mm_add_ps(__A
, __B
),
1649 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1650 _mm_maskz_add_ps(__mmask8 __U
, __m128 __A
, __m128 __B
) {
1651 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
1652 (__v4sf
)_mm_add_ps(__A
, __B
),
1653 (__v4sf
)_mm_setzero_ps());
1656 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1657 _mm256_mask_add_ps(__m256 __W
, __mmask8 __U
, __m256 __A
, __m256 __B
) {
1658 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
1659 (__v8sf
)_mm256_add_ps(__A
, __B
),
1663 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1664 _mm256_maskz_add_ps(__mmask8 __U
, __m256 __A
, __m256 __B
) {
1665 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
1666 (__v8sf
)_mm256_add_ps(__A
, __B
),
1667 (__v8sf
)_mm256_setzero_ps());
1670 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1671 _mm_mask_blend_epi32 (__mmask8 __U
, __m128i __A
, __m128i __W
) {
1672 return (__m128i
) __builtin_ia32_selectd_128 ((__mmask8
) __U
,
1677 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1678 _mm256_mask_blend_epi32 (__mmask8 __U
, __m256i __A
, __m256i __W
) {
1679 return (__m256i
) __builtin_ia32_selectd_256 ((__mmask8
) __U
,
1684 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1685 _mm_mask_blend_pd (__mmask8 __U
, __m128d __A
, __m128d __W
) {
1686 return (__m128d
) __builtin_ia32_selectpd_128 ((__mmask8
) __U
,
1691 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1692 _mm256_mask_blend_pd (__mmask8 __U
, __m256d __A
, __m256d __W
) {
1693 return (__m256d
) __builtin_ia32_selectpd_256 ((__mmask8
) __U
,
1698 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1699 _mm_mask_blend_ps (__mmask8 __U
, __m128 __A
, __m128 __W
) {
1700 return (__m128
) __builtin_ia32_selectps_128 ((__mmask8
) __U
,
1705 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1706 _mm256_mask_blend_ps (__mmask8 __U
, __m256 __A
, __m256 __W
) {
1707 return (__m256
) __builtin_ia32_selectps_256 ((__mmask8
) __U
,
1712 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1713 _mm_mask_blend_epi64 (__mmask8 __U
, __m128i __A
, __m128i __W
) {
1714 return (__m128i
) __builtin_ia32_selectq_128 ((__mmask8
) __U
,
1719 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1720 _mm256_mask_blend_epi64 (__mmask8 __U
, __m256i __A
, __m256i __W
) {
1721 return (__m256i
) __builtin_ia32_selectq_256 ((__mmask8
) __U
,
1726 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1727 _mm_mask_compress_pd (__m128d __W
, __mmask8 __U
, __m128d __A
) {
1728 return (__m128d
) __builtin_ia32_compressdf128_mask ((__v2df
) __A
,
1733 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1734 _mm_maskz_compress_pd (__mmask8 __U
, __m128d __A
) {
1735 return (__m128d
) __builtin_ia32_compressdf128_mask ((__v2df
) __A
,
1741 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1742 _mm256_mask_compress_pd (__m256d __W
, __mmask8 __U
, __m256d __A
) {
1743 return (__m256d
) __builtin_ia32_compressdf256_mask ((__v4df
) __A
,
1748 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1749 _mm256_maskz_compress_pd (__mmask8 __U
, __m256d __A
) {
1750 return (__m256d
) __builtin_ia32_compressdf256_mask ((__v4df
) __A
,
1752 _mm256_setzero_pd (),
1756 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1757 _mm_mask_compress_epi64 (__m128i __W
, __mmask8 __U
, __m128i __A
) {
1758 return (__m128i
) __builtin_ia32_compressdi128_mask ((__v2di
) __A
,
1763 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1764 _mm_maskz_compress_epi64 (__mmask8 __U
, __m128i __A
) {
1765 return (__m128i
) __builtin_ia32_compressdi128_mask ((__v2di
) __A
,
1767 _mm_setzero_si128 (),
1771 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1772 _mm256_mask_compress_epi64 (__m256i __W
, __mmask8 __U
, __m256i __A
) {
1773 return (__m256i
) __builtin_ia32_compressdi256_mask ((__v4di
) __A
,
1778 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1779 _mm256_maskz_compress_epi64 (__mmask8 __U
, __m256i __A
) {
1780 return (__m256i
) __builtin_ia32_compressdi256_mask ((__v4di
) __A
,
1782 _mm256_setzero_si256 (),
1786 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1787 _mm_mask_compress_ps (__m128 __W
, __mmask8 __U
, __m128 __A
) {
1788 return (__m128
) __builtin_ia32_compresssf128_mask ((__v4sf
) __A
,
1793 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1794 _mm_maskz_compress_ps (__mmask8 __U
, __m128 __A
) {
1795 return (__m128
) __builtin_ia32_compresssf128_mask ((__v4sf
) __A
,
1801 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1802 _mm256_mask_compress_ps (__m256 __W
, __mmask8 __U
, __m256 __A
) {
1803 return (__m256
) __builtin_ia32_compresssf256_mask ((__v8sf
) __A
,
1808 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1809 _mm256_maskz_compress_ps (__mmask8 __U
, __m256 __A
) {
1810 return (__m256
) __builtin_ia32_compresssf256_mask ((__v8sf
) __A
,
1812 _mm256_setzero_ps (),
1816 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1817 _mm_mask_compress_epi32 (__m128i __W
, __mmask8 __U
, __m128i __A
) {
1818 return (__m128i
) __builtin_ia32_compresssi128_mask ((__v4si
) __A
,
1823 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1824 _mm_maskz_compress_epi32 (__mmask8 __U
, __m128i __A
) {
1825 return (__m128i
) __builtin_ia32_compresssi128_mask ((__v4si
) __A
,
1827 _mm_setzero_si128 (),
1831 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1832 _mm256_mask_compress_epi32 (__m256i __W
, __mmask8 __U
, __m256i __A
) {
1833 return (__m256i
) __builtin_ia32_compresssi256_mask ((__v8si
) __A
,
1838 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1839 _mm256_maskz_compress_epi32 (__mmask8 __U
, __m256i __A
) {
1840 return (__m256i
) __builtin_ia32_compresssi256_mask ((__v8si
) __A
,
1842 _mm256_setzero_si256 (),
1846 static __inline__
void __DEFAULT_FN_ATTRS128
1847 _mm_mask_compressstoreu_pd (void *__P
, __mmask8 __U
, __m128d __A
) {
1848 __builtin_ia32_compressstoredf128_mask ((__v2df
*) __P
,
1853 static __inline__
void __DEFAULT_FN_ATTRS256
1854 _mm256_mask_compressstoreu_pd (void *__P
, __mmask8 __U
, __m256d __A
) {
1855 __builtin_ia32_compressstoredf256_mask ((__v4df
*) __P
,
1860 static __inline__
void __DEFAULT_FN_ATTRS128
1861 _mm_mask_compressstoreu_epi64 (void *__P
, __mmask8 __U
, __m128i __A
) {
1862 __builtin_ia32_compressstoredi128_mask ((__v2di
*) __P
,
1867 static __inline__
void __DEFAULT_FN_ATTRS256
1868 _mm256_mask_compressstoreu_epi64 (void *__P
, __mmask8 __U
, __m256i __A
) {
1869 __builtin_ia32_compressstoredi256_mask ((__v4di
*) __P
,
1874 static __inline__
void __DEFAULT_FN_ATTRS128
1875 _mm_mask_compressstoreu_ps (void *__P
, __mmask8 __U
, __m128 __A
) {
1876 __builtin_ia32_compressstoresf128_mask ((__v4sf
*) __P
,
1881 static __inline__
void __DEFAULT_FN_ATTRS256
1882 _mm256_mask_compressstoreu_ps (void *__P
, __mmask8 __U
, __m256 __A
) {
1883 __builtin_ia32_compressstoresf256_mask ((__v8sf
*) __P
,
1888 static __inline__
void __DEFAULT_FN_ATTRS128
1889 _mm_mask_compressstoreu_epi32 (void *__P
, __mmask8 __U
, __m128i __A
) {
1890 __builtin_ia32_compressstoresi128_mask ((__v4si
*) __P
,
1895 static __inline__
void __DEFAULT_FN_ATTRS256
1896 _mm256_mask_compressstoreu_epi32 (void *__P
, __mmask8 __U
, __m256i __A
) {
1897 __builtin_ia32_compressstoresi256_mask ((__v8si
*) __P
,
1902 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1903 _mm_mask_cvtepi32_pd (__m128d __W
, __mmask8 __U
, __m128i __A
) {
1904 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
) __U
,
1905 (__v2df
)_mm_cvtepi32_pd(__A
),
1909 static __inline__ __m128d __DEFAULT_FN_ATTRS128
1910 _mm_maskz_cvtepi32_pd (__mmask8 __U
, __m128i __A
) {
1911 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
) __U
,
1912 (__v2df
)_mm_cvtepi32_pd(__A
),
1913 (__v2df
)_mm_setzero_pd());
1916 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1917 _mm256_mask_cvtepi32_pd (__m256d __W
, __mmask8 __U
, __m128i __A
) {
1918 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
) __U
,
1919 (__v4df
)_mm256_cvtepi32_pd(__A
),
1923 static __inline__ __m256d __DEFAULT_FN_ATTRS256
1924 _mm256_maskz_cvtepi32_pd (__mmask8 __U
, __m128i __A
) {
1925 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
) __U
,
1926 (__v4df
)_mm256_cvtepi32_pd(__A
),
1927 (__v4df
)_mm256_setzero_pd());
1930 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1931 _mm_mask_cvtepi32_ps (__m128 __W
, __mmask8 __U
, __m128i __A
) {
1932 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
1933 (__v4sf
)_mm_cvtepi32_ps(__A
),
1937 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1938 _mm_maskz_cvtepi32_ps (__mmask8 __U
, __m128i __A
) {
1939 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
1940 (__v4sf
)_mm_cvtepi32_ps(__A
),
1941 (__v4sf
)_mm_setzero_ps());
1944 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1945 _mm256_mask_cvtepi32_ps (__m256 __W
, __mmask8 __U
, __m256i __A
) {
1946 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
1947 (__v8sf
)_mm256_cvtepi32_ps(__A
),
1951 static __inline__ __m256 __DEFAULT_FN_ATTRS256
1952 _mm256_maskz_cvtepi32_ps (__mmask8 __U
, __m256i __A
) {
1953 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
1954 (__v8sf
)_mm256_cvtepi32_ps(__A
),
1955 (__v8sf
)_mm256_setzero_ps());
1958 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1959 _mm_mask_cvtpd_epi32 (__m128i __W
, __mmask8 __U
, __m128d __A
) {
1960 return (__m128i
) __builtin_ia32_cvtpd2dq128_mask ((__v2df
) __A
,
1965 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1966 _mm_maskz_cvtpd_epi32 (__mmask8 __U
, __m128d __A
) {
1967 return (__m128i
) __builtin_ia32_cvtpd2dq128_mask ((__v2df
) __A
,
1969 _mm_setzero_si128 (),
1973 static __inline__ __m128i __DEFAULT_FN_ATTRS256
1974 _mm256_mask_cvtpd_epi32 (__m128i __W
, __mmask8 __U
, __m256d __A
) {
1975 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
1976 (__v4si
)_mm256_cvtpd_epi32(__A
),
1980 static __inline__ __m128i __DEFAULT_FN_ATTRS256
1981 _mm256_maskz_cvtpd_epi32 (__mmask8 __U
, __m256d __A
) {
1982 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
1983 (__v4si
)_mm256_cvtpd_epi32(__A
),
1984 (__v4si
)_mm_setzero_si128());
1987 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1988 _mm_mask_cvtpd_ps (__m128 __W
, __mmask8 __U
, __m128d __A
) {
1989 return (__m128
) __builtin_ia32_cvtpd2ps_mask ((__v2df
) __A
,
1994 static __inline__ __m128 __DEFAULT_FN_ATTRS128
1995 _mm_maskz_cvtpd_ps (__mmask8 __U
, __m128d __A
) {
1996 return (__m128
) __builtin_ia32_cvtpd2ps_mask ((__v2df
) __A
,
2002 static __inline__ __m128 __DEFAULT_FN_ATTRS256
2003 _mm256_mask_cvtpd_ps (__m128 __W
, __mmask8 __U
, __m256d __A
) {
2004 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
2005 (__v4sf
)_mm256_cvtpd_ps(__A
),
2009 static __inline__ __m128 __DEFAULT_FN_ATTRS256
2010 _mm256_maskz_cvtpd_ps (__mmask8 __U
, __m256d __A
) {
2011 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
2012 (__v4sf
)_mm256_cvtpd_ps(__A
),
2013 (__v4sf
)_mm_setzero_ps());
2016 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2017 _mm_cvtpd_epu32 (__m128d __A
) {
2018 return (__m128i
) __builtin_ia32_cvtpd2udq128_mask ((__v2df
) __A
,
2020 _mm_setzero_si128 (),
2024 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2025 _mm_mask_cvtpd_epu32 (__m128i __W
, __mmask8 __U
, __m128d __A
) {
2026 return (__m128i
) __builtin_ia32_cvtpd2udq128_mask ((__v2df
) __A
,
2031 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2032 _mm_maskz_cvtpd_epu32 (__mmask8 __U
, __m128d __A
) {
2033 return (__m128i
) __builtin_ia32_cvtpd2udq128_mask ((__v2df
) __A
,
2035 _mm_setzero_si128 (),
2039 static __inline__ __m128i __DEFAULT_FN_ATTRS256
2040 _mm256_cvtpd_epu32 (__m256d __A
) {
2041 return (__m128i
) __builtin_ia32_cvtpd2udq256_mask ((__v4df
) __A
,
2043 _mm_setzero_si128 (),
2047 static __inline__ __m128i __DEFAULT_FN_ATTRS256
2048 _mm256_mask_cvtpd_epu32 (__m128i __W
, __mmask8 __U
, __m256d __A
) {
2049 return (__m128i
) __builtin_ia32_cvtpd2udq256_mask ((__v4df
) __A
,
2054 static __inline__ __m128i __DEFAULT_FN_ATTRS256
2055 _mm256_maskz_cvtpd_epu32 (__mmask8 __U
, __m256d __A
) {
2056 return (__m128i
) __builtin_ia32_cvtpd2udq256_mask ((__v4df
) __A
,
2058 _mm_setzero_si128 (),
2062 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2063 _mm_mask_cvtps_epi32 (__m128i __W
, __mmask8 __U
, __m128 __A
) {
2064 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
2065 (__v4si
)_mm_cvtps_epi32(__A
),
2069 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2070 _mm_maskz_cvtps_epi32 (__mmask8 __U
, __m128 __A
) {
2071 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
2072 (__v4si
)_mm_cvtps_epi32(__A
),
2073 (__v4si
)_mm_setzero_si128());
2076 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2077 _mm256_mask_cvtps_epi32 (__m256i __W
, __mmask8 __U
, __m256 __A
) {
2078 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
2079 (__v8si
)_mm256_cvtps_epi32(__A
),
2083 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2084 _mm256_maskz_cvtps_epi32 (__mmask8 __U
, __m256 __A
) {
2085 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
2086 (__v8si
)_mm256_cvtps_epi32(__A
),
2087 (__v8si
)_mm256_setzero_si256());
2090 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2091 _mm_mask_cvtps_pd (__m128d __W
, __mmask8 __U
, __m128 __A
) {
2092 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
2093 (__v2df
)_mm_cvtps_pd(__A
),
2097 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2098 _mm_maskz_cvtps_pd (__mmask8 __U
, __m128 __A
) {
2099 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
2100 (__v2df
)_mm_cvtps_pd(__A
),
2101 (__v2df
)_mm_setzero_pd());
2104 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2105 _mm256_mask_cvtps_pd (__m256d __W
, __mmask8 __U
, __m128 __A
) {
2106 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
2107 (__v4df
)_mm256_cvtps_pd(__A
),
2111 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2112 _mm256_maskz_cvtps_pd (__mmask8 __U
, __m128 __A
) {
2113 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
2114 (__v4df
)_mm256_cvtps_pd(__A
),
2115 (__v4df
)_mm256_setzero_pd());
2118 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2119 _mm_cvtps_epu32 (__m128 __A
) {
2120 return (__m128i
) __builtin_ia32_cvtps2udq128_mask ((__v4sf
) __A
,
2122 _mm_setzero_si128 (),
2126 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2127 _mm_mask_cvtps_epu32 (__m128i __W
, __mmask8 __U
, __m128 __A
) {
2128 return (__m128i
) __builtin_ia32_cvtps2udq128_mask ((__v4sf
) __A
,
2133 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2134 _mm_maskz_cvtps_epu32 (__mmask8 __U
, __m128 __A
) {
2135 return (__m128i
) __builtin_ia32_cvtps2udq128_mask ((__v4sf
) __A
,
2137 _mm_setzero_si128 (),
2141 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2142 _mm256_cvtps_epu32 (__m256 __A
) {
2143 return (__m256i
) __builtin_ia32_cvtps2udq256_mask ((__v8sf
) __A
,
2145 _mm256_setzero_si256 (),
2149 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2150 _mm256_mask_cvtps_epu32 (__m256i __W
, __mmask8 __U
, __m256 __A
) {
2151 return (__m256i
) __builtin_ia32_cvtps2udq256_mask ((__v8sf
) __A
,
2156 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2157 _mm256_maskz_cvtps_epu32 (__mmask8 __U
, __m256 __A
) {
2158 return (__m256i
) __builtin_ia32_cvtps2udq256_mask ((__v8sf
) __A
,
2160 _mm256_setzero_si256 (),
2164 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2165 _mm_mask_cvttpd_epi32 (__m128i __W
, __mmask8 __U
, __m128d __A
) {
2166 return (__m128i
) __builtin_ia32_cvttpd2dq128_mask ((__v2df
) __A
,
2171 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2172 _mm_maskz_cvttpd_epi32 (__mmask8 __U
, __m128d __A
) {
2173 return (__m128i
) __builtin_ia32_cvttpd2dq128_mask ((__v2df
) __A
,
2175 _mm_setzero_si128 (),
2179 static __inline__ __m128i __DEFAULT_FN_ATTRS256
2180 _mm256_mask_cvttpd_epi32 (__m128i __W
, __mmask8 __U
, __m256d __A
) {
2181 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
2182 (__v4si
)_mm256_cvttpd_epi32(__A
),
2186 static __inline__ __m128i __DEFAULT_FN_ATTRS256
2187 _mm256_maskz_cvttpd_epi32 (__mmask8 __U
, __m256d __A
) {
2188 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
2189 (__v4si
)_mm256_cvttpd_epi32(__A
),
2190 (__v4si
)_mm_setzero_si128());
2193 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2194 _mm_cvttpd_epu32 (__m128d __A
) {
2195 return (__m128i
) __builtin_ia32_cvttpd2udq128_mask ((__v2df
) __A
,
2197 _mm_setzero_si128 (),
2201 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2202 _mm_mask_cvttpd_epu32 (__m128i __W
, __mmask8 __U
, __m128d __A
) {
2203 return (__m128i
) __builtin_ia32_cvttpd2udq128_mask ((__v2df
) __A
,
2208 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2209 _mm_maskz_cvttpd_epu32 (__mmask8 __U
, __m128d __A
) {
2210 return (__m128i
) __builtin_ia32_cvttpd2udq128_mask ((__v2df
) __A
,
2212 _mm_setzero_si128 (),
2216 static __inline__ __m128i __DEFAULT_FN_ATTRS256
2217 _mm256_cvttpd_epu32 (__m256d __A
) {
2218 return (__m128i
) __builtin_ia32_cvttpd2udq256_mask ((__v4df
) __A
,
2220 _mm_setzero_si128 (),
2224 static __inline__ __m128i __DEFAULT_FN_ATTRS256
2225 _mm256_mask_cvttpd_epu32 (__m128i __W
, __mmask8 __U
, __m256d __A
) {
2226 return (__m128i
) __builtin_ia32_cvttpd2udq256_mask ((__v4df
) __A
,
2231 static __inline__ __m128i __DEFAULT_FN_ATTRS256
2232 _mm256_maskz_cvttpd_epu32 (__mmask8 __U
, __m256d __A
) {
2233 return (__m128i
) __builtin_ia32_cvttpd2udq256_mask ((__v4df
) __A
,
2235 _mm_setzero_si128 (),
2239 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2240 _mm_mask_cvttps_epi32 (__m128i __W
, __mmask8 __U
, __m128 __A
) {
2241 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
2242 (__v4si
)_mm_cvttps_epi32(__A
),
2246 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2247 _mm_maskz_cvttps_epi32 (__mmask8 __U
, __m128 __A
) {
2248 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
2249 (__v4si
)_mm_cvttps_epi32(__A
),
2250 (__v4si
)_mm_setzero_si128());
2253 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2254 _mm256_mask_cvttps_epi32 (__m256i __W
, __mmask8 __U
, __m256 __A
) {
2255 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
2256 (__v8si
)_mm256_cvttps_epi32(__A
),
2260 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2261 _mm256_maskz_cvttps_epi32 (__mmask8 __U
, __m256 __A
) {
2262 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
2263 (__v8si
)_mm256_cvttps_epi32(__A
),
2264 (__v8si
)_mm256_setzero_si256());
2267 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2268 _mm_cvttps_epu32 (__m128 __A
) {
2269 return (__m128i
) __builtin_ia32_cvttps2udq128_mask ((__v4sf
) __A
,
2271 _mm_setzero_si128 (),
2275 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2276 _mm_mask_cvttps_epu32 (__m128i __W
, __mmask8 __U
, __m128 __A
) {
2277 return (__m128i
) __builtin_ia32_cvttps2udq128_mask ((__v4sf
) __A
,
2282 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2283 _mm_maskz_cvttps_epu32 (__mmask8 __U
, __m128 __A
) {
2284 return (__m128i
) __builtin_ia32_cvttps2udq128_mask ((__v4sf
) __A
,
2286 _mm_setzero_si128 (),
2290 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2291 _mm256_cvttps_epu32 (__m256 __A
) {
2292 return (__m256i
) __builtin_ia32_cvttps2udq256_mask ((__v8sf
) __A
,
2294 _mm256_setzero_si256 (),
2298 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2299 _mm256_mask_cvttps_epu32 (__m256i __W
, __mmask8 __U
, __m256 __A
) {
2300 return (__m256i
) __builtin_ia32_cvttps2udq256_mask ((__v8sf
) __A
,
2305 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2306 _mm256_maskz_cvttps_epu32 (__mmask8 __U
, __m256 __A
) {
2307 return (__m256i
) __builtin_ia32_cvttps2udq256_mask ((__v8sf
) __A
,
2309 _mm256_setzero_si256 (),
2313 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2314 _mm_cvtepu32_pd (__m128i __A
) {
2315 return (__m128d
) __builtin_convertvector(
2316 __builtin_shufflevector((__v4su
)__A
, (__v4su
)__A
, 0, 1), __v2df
);
2319 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2320 _mm_mask_cvtepu32_pd (__m128d __W
, __mmask8 __U
, __m128i __A
) {
2321 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
) __U
,
2322 (__v2df
)_mm_cvtepu32_pd(__A
),
2326 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2327 _mm_maskz_cvtepu32_pd (__mmask8 __U
, __m128i __A
) {
2328 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
) __U
,
2329 (__v2df
)_mm_cvtepu32_pd(__A
),
2330 (__v2df
)_mm_setzero_pd());
2333 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2334 _mm256_cvtepu32_pd (__m128i __A
) {
2335 return (__m256d
)__builtin_convertvector((__v4su
)__A
, __v4df
);
2338 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2339 _mm256_mask_cvtepu32_pd (__m256d __W
, __mmask8 __U
, __m128i __A
) {
2340 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
) __U
,
2341 (__v4df
)_mm256_cvtepu32_pd(__A
),
2345 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2346 _mm256_maskz_cvtepu32_pd (__mmask8 __U
, __m128i __A
) {
2347 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
) __U
,
2348 (__v4df
)_mm256_cvtepu32_pd(__A
),
2349 (__v4df
)_mm256_setzero_pd());
2352 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2353 _mm_cvtepu32_ps (__m128i __A
) {
2354 return (__m128
)__builtin_convertvector((__v4su
)__A
, __v4sf
);
2357 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2358 _mm_mask_cvtepu32_ps (__m128 __W
, __mmask8 __U
, __m128i __A
) {
2359 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
2360 (__v4sf
)_mm_cvtepu32_ps(__A
),
2364 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2365 _mm_maskz_cvtepu32_ps (__mmask8 __U
, __m128i __A
) {
2366 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
2367 (__v4sf
)_mm_cvtepu32_ps(__A
),
2368 (__v4sf
)_mm_setzero_ps());
2371 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2372 _mm256_cvtepu32_ps (__m256i __A
) {
2373 return (__m256
)__builtin_convertvector((__v8su
)__A
, __v8sf
);
2376 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2377 _mm256_mask_cvtepu32_ps (__m256 __W
, __mmask8 __U
, __m256i __A
) {
2378 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
2379 (__v8sf
)_mm256_cvtepu32_ps(__A
),
2383 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2384 _mm256_maskz_cvtepu32_ps (__mmask8 __U
, __m256i __A
) {
2385 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
2386 (__v8sf
)_mm256_cvtepu32_ps(__A
),
2387 (__v8sf
)_mm256_setzero_ps());
2390 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2391 _mm_mask_div_pd(__m128d __W
, __mmask8 __U
, __m128d __A
, __m128d __B
) {
2392 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
2393 (__v2df
)_mm_div_pd(__A
, __B
),
2397 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2398 _mm_maskz_div_pd(__mmask8 __U
, __m128d __A
, __m128d __B
) {
2399 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
2400 (__v2df
)_mm_div_pd(__A
, __B
),
2401 (__v2df
)_mm_setzero_pd());
2404 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2405 _mm256_mask_div_pd(__m256d __W
, __mmask8 __U
, __m256d __A
, __m256d __B
) {
2406 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
2407 (__v4df
)_mm256_div_pd(__A
, __B
),
2411 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2412 _mm256_maskz_div_pd(__mmask8 __U
, __m256d __A
, __m256d __B
) {
2413 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
2414 (__v4df
)_mm256_div_pd(__A
, __B
),
2415 (__v4df
)_mm256_setzero_pd());
2418 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2419 _mm_mask_div_ps(__m128 __W
, __mmask8 __U
, __m128 __A
, __m128 __B
) {
2420 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
2421 (__v4sf
)_mm_div_ps(__A
, __B
),
2425 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2426 _mm_maskz_div_ps(__mmask8 __U
, __m128 __A
, __m128 __B
) {
2427 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
2428 (__v4sf
)_mm_div_ps(__A
, __B
),
2429 (__v4sf
)_mm_setzero_ps());
2432 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2433 _mm256_mask_div_ps(__m256 __W
, __mmask8 __U
, __m256 __A
, __m256 __B
) {
2434 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
2435 (__v8sf
)_mm256_div_ps(__A
, __B
),
2439 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2440 _mm256_maskz_div_ps(__mmask8 __U
, __m256 __A
, __m256 __B
) {
2441 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
2442 (__v8sf
)_mm256_div_ps(__A
, __B
),
2443 (__v8sf
)_mm256_setzero_ps());
2446 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2447 _mm_mask_expand_pd (__m128d __W
, __mmask8 __U
, __m128d __A
) {
2448 return (__m128d
) __builtin_ia32_expanddf128_mask ((__v2df
) __A
,
2453 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2454 _mm_maskz_expand_pd (__mmask8 __U
, __m128d __A
) {
2455 return (__m128d
) __builtin_ia32_expanddf128_mask ((__v2df
) __A
,
2461 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2462 _mm256_mask_expand_pd (__m256d __W
, __mmask8 __U
, __m256d __A
) {
2463 return (__m256d
) __builtin_ia32_expanddf256_mask ((__v4df
) __A
,
2468 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2469 _mm256_maskz_expand_pd (__mmask8 __U
, __m256d __A
) {
2470 return (__m256d
) __builtin_ia32_expanddf256_mask ((__v4df
) __A
,
2472 _mm256_setzero_pd (),
2476 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2477 _mm_mask_expand_epi64 (__m128i __W
, __mmask8 __U
, __m128i __A
) {
2478 return (__m128i
) __builtin_ia32_expanddi128_mask ((__v2di
) __A
,
2483 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2484 _mm_maskz_expand_epi64 (__mmask8 __U
, __m128i __A
) {
2485 return (__m128i
) __builtin_ia32_expanddi128_mask ((__v2di
) __A
,
2487 _mm_setzero_si128 (),
2491 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2492 _mm256_mask_expand_epi64 (__m256i __W
, __mmask8 __U
, __m256i __A
) {
2493 return (__m256i
) __builtin_ia32_expanddi256_mask ((__v4di
) __A
,
2498 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2499 _mm256_maskz_expand_epi64 (__mmask8 __U
, __m256i __A
) {
2500 return (__m256i
) __builtin_ia32_expanddi256_mask ((__v4di
) __A
,
2502 _mm256_setzero_si256 (),
2506 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2507 _mm_mask_expandloadu_pd (__m128d __W
, __mmask8 __U
, void const *__P
) {
2508 return (__m128d
) __builtin_ia32_expandloaddf128_mask ((const __v2df
*) __P
,
2514 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2515 _mm_maskz_expandloadu_pd (__mmask8 __U
, void const *__P
) {
2516 return (__m128d
) __builtin_ia32_expandloaddf128_mask ((const __v2df
*) __P
,
2523 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2524 _mm256_mask_expandloadu_pd (__m256d __W
, __mmask8 __U
, void const *__P
) {
2525 return (__m256d
) __builtin_ia32_expandloaddf256_mask ((const __v4df
*) __P
,
2531 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2532 _mm256_maskz_expandloadu_pd (__mmask8 __U
, void const *__P
) {
2533 return (__m256d
) __builtin_ia32_expandloaddf256_mask ((const __v4df
*) __P
,
2535 _mm256_setzero_pd (),
2540 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2541 _mm_mask_expandloadu_epi64 (__m128i __W
, __mmask8 __U
, void const *__P
) {
2542 return (__m128i
) __builtin_ia32_expandloaddi128_mask ((const __v2di
*) __P
,
2548 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2549 _mm_maskz_expandloadu_epi64 (__mmask8 __U
, void const *__P
) {
2550 return (__m128i
) __builtin_ia32_expandloaddi128_mask ((const __v2di
*) __P
,
2552 _mm_setzero_si128 (),
2557 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2558 _mm256_mask_expandloadu_epi64 (__m256i __W
, __mmask8 __U
,
2560 return (__m256i
) __builtin_ia32_expandloaddi256_mask ((const __v4di
*) __P
,
2566 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2567 _mm256_maskz_expandloadu_epi64 (__mmask8 __U
, void const *__P
) {
2568 return (__m256i
) __builtin_ia32_expandloaddi256_mask ((const __v4di
*) __P
,
2570 _mm256_setzero_si256 (),
2575 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2576 _mm_mask_expandloadu_ps (__m128 __W
, __mmask8 __U
, void const *__P
) {
2577 return (__m128
) __builtin_ia32_expandloadsf128_mask ((const __v4sf
*) __P
,
2582 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2583 _mm_maskz_expandloadu_ps (__mmask8 __U
, void const *__P
) {
2584 return (__m128
) __builtin_ia32_expandloadsf128_mask ((const __v4sf
*) __P
,
2591 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2592 _mm256_mask_expandloadu_ps (__m256 __W
, __mmask8 __U
, void const *__P
) {
2593 return (__m256
) __builtin_ia32_expandloadsf256_mask ((const __v8sf
*) __P
,
2598 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2599 _mm256_maskz_expandloadu_ps (__mmask8 __U
, void const *__P
) {
2600 return (__m256
) __builtin_ia32_expandloadsf256_mask ((const __v8sf
*) __P
,
2602 _mm256_setzero_ps (),
2607 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2608 _mm_mask_expandloadu_epi32 (__m128i __W
, __mmask8 __U
, void const *__P
) {
2609 return (__m128i
) __builtin_ia32_expandloadsi128_mask ((const __v4si
*) __P
,
2615 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2616 _mm_maskz_expandloadu_epi32 (__mmask8 __U
, void const *__P
) {
2617 return (__m128i
) __builtin_ia32_expandloadsi128_mask ((const __v4si
*) __P
,
2619 _mm_setzero_si128 (),
2623 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2624 _mm256_mask_expandloadu_epi32 (__m256i __W
, __mmask8 __U
,
2626 return (__m256i
) __builtin_ia32_expandloadsi256_mask ((const __v8si
*) __P
,
2632 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2633 _mm256_maskz_expandloadu_epi32 (__mmask8 __U
, void const *__P
) {
2634 return (__m256i
) __builtin_ia32_expandloadsi256_mask ((const __v8si
*) __P
,
2636 _mm256_setzero_si256 (),
2641 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2642 _mm_mask_expand_ps (__m128 __W
, __mmask8 __U
, __m128 __A
) {
2643 return (__m128
) __builtin_ia32_expandsf128_mask ((__v4sf
) __A
,
2648 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2649 _mm_maskz_expand_ps (__mmask8 __U
, __m128 __A
) {
2650 return (__m128
) __builtin_ia32_expandsf128_mask ((__v4sf
) __A
,
2656 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2657 _mm256_mask_expand_ps (__m256 __W
, __mmask8 __U
, __m256 __A
) {
2658 return (__m256
) __builtin_ia32_expandsf256_mask ((__v8sf
) __A
,
2663 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2664 _mm256_maskz_expand_ps (__mmask8 __U
, __m256 __A
) {
2665 return (__m256
) __builtin_ia32_expandsf256_mask ((__v8sf
) __A
,
2667 _mm256_setzero_ps (),
2671 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2672 _mm_mask_expand_epi32 (__m128i __W
, __mmask8 __U
, __m128i __A
) {
2673 return (__m128i
) __builtin_ia32_expandsi128_mask ((__v4si
) __A
,
2678 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2679 _mm_maskz_expand_epi32 (__mmask8 __U
, __m128i __A
) {
2680 return (__m128i
) __builtin_ia32_expandsi128_mask ((__v4si
) __A
,
2682 _mm_setzero_si128 (),
2686 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2687 _mm256_mask_expand_epi32 (__m256i __W
, __mmask8 __U
, __m256i __A
) {
2688 return (__m256i
) __builtin_ia32_expandsi256_mask ((__v8si
) __A
,
2693 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2694 _mm256_maskz_expand_epi32 (__mmask8 __U
, __m256i __A
) {
2695 return (__m256i
) __builtin_ia32_expandsi256_mask ((__v8si
) __A
,
2697 _mm256_setzero_si256 (),
2701 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2702 _mm_getexp_pd (__m128d __A
) {
2703 return (__m128d
) __builtin_ia32_getexppd128_mask ((__v2df
) __A
,
2709 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2710 _mm_mask_getexp_pd (__m128d __W
, __mmask8 __U
, __m128d __A
) {
2711 return (__m128d
) __builtin_ia32_getexppd128_mask ((__v2df
) __A
,
2716 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2717 _mm_maskz_getexp_pd (__mmask8 __U
, __m128d __A
) {
2718 return (__m128d
) __builtin_ia32_getexppd128_mask ((__v2df
) __A
,
2724 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2725 _mm256_getexp_pd (__m256d __A
) {
2726 return (__m256d
) __builtin_ia32_getexppd256_mask ((__v4df
) __A
,
2728 _mm256_setzero_pd (),
2732 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2733 _mm256_mask_getexp_pd (__m256d __W
, __mmask8 __U
, __m256d __A
) {
2734 return (__m256d
) __builtin_ia32_getexppd256_mask ((__v4df
) __A
,
2739 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2740 _mm256_maskz_getexp_pd (__mmask8 __U
, __m256d __A
) {
2741 return (__m256d
) __builtin_ia32_getexppd256_mask ((__v4df
) __A
,
2743 _mm256_setzero_pd (),
2747 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2748 _mm_getexp_ps (__m128 __A
) {
2749 return (__m128
) __builtin_ia32_getexpps128_mask ((__v4sf
) __A
,
2755 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2756 _mm_mask_getexp_ps (__m128 __W
, __mmask8 __U
, __m128 __A
) {
2757 return (__m128
) __builtin_ia32_getexpps128_mask ((__v4sf
) __A
,
2762 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2763 _mm_maskz_getexp_ps (__mmask8 __U
, __m128 __A
) {
2764 return (__m128
) __builtin_ia32_getexpps128_mask ((__v4sf
) __A
,
2770 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2771 _mm256_getexp_ps (__m256 __A
) {
2772 return (__m256
) __builtin_ia32_getexpps256_mask ((__v8sf
) __A
,
2774 _mm256_setzero_ps (),
2778 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2779 _mm256_mask_getexp_ps (__m256 __W
, __mmask8 __U
, __m256 __A
) {
2780 return (__m256
) __builtin_ia32_getexpps256_mask ((__v8sf
) __A
,
2785 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2786 _mm256_maskz_getexp_ps (__mmask8 __U
, __m256 __A
) {
2787 return (__m256
) __builtin_ia32_getexpps256_mask ((__v8sf
) __A
,
2789 _mm256_setzero_ps (),
2793 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2794 _mm_mask_max_pd(__m128d __W
, __mmask8 __U
, __m128d __A
, __m128d __B
) {
2795 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
2796 (__v2df
)_mm_max_pd(__A
, __B
),
2800 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2801 _mm_maskz_max_pd(__mmask8 __U
, __m128d __A
, __m128d __B
) {
2802 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
2803 (__v2df
)_mm_max_pd(__A
, __B
),
2804 (__v2df
)_mm_setzero_pd());
2807 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2808 _mm256_mask_max_pd(__m256d __W
, __mmask8 __U
, __m256d __A
, __m256d __B
) {
2809 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
2810 (__v4df
)_mm256_max_pd(__A
, __B
),
2814 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2815 _mm256_maskz_max_pd(__mmask8 __U
, __m256d __A
, __m256d __B
) {
2816 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
2817 (__v4df
)_mm256_max_pd(__A
, __B
),
2818 (__v4df
)_mm256_setzero_pd());
2821 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2822 _mm_mask_max_ps(__m128 __W
, __mmask8 __U
, __m128 __A
, __m128 __B
) {
2823 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
2824 (__v4sf
)_mm_max_ps(__A
, __B
),
2828 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2829 _mm_maskz_max_ps(__mmask8 __U
, __m128 __A
, __m128 __B
) {
2830 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
2831 (__v4sf
)_mm_max_ps(__A
, __B
),
2832 (__v4sf
)_mm_setzero_ps());
2835 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2836 _mm256_mask_max_ps(__m256 __W
, __mmask8 __U
, __m256 __A
, __m256 __B
) {
2837 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
2838 (__v8sf
)_mm256_max_ps(__A
, __B
),
2842 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2843 _mm256_maskz_max_ps(__mmask8 __U
, __m256 __A
, __m256 __B
) {
2844 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
2845 (__v8sf
)_mm256_max_ps(__A
, __B
),
2846 (__v8sf
)_mm256_setzero_ps());
2849 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2850 _mm_mask_min_pd(__m128d __W
, __mmask8 __U
, __m128d __A
, __m128d __B
) {
2851 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
2852 (__v2df
)_mm_min_pd(__A
, __B
),
2856 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2857 _mm_maskz_min_pd(__mmask8 __U
, __m128d __A
, __m128d __B
) {
2858 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
2859 (__v2df
)_mm_min_pd(__A
, __B
),
2860 (__v2df
)_mm_setzero_pd());
2863 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2864 _mm256_mask_min_pd(__m256d __W
, __mmask8 __U
, __m256d __A
, __m256d __B
) {
2865 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
2866 (__v4df
)_mm256_min_pd(__A
, __B
),
2870 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2871 _mm256_maskz_min_pd(__mmask8 __U
, __m256d __A
, __m256d __B
) {
2872 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
2873 (__v4df
)_mm256_min_pd(__A
, __B
),
2874 (__v4df
)_mm256_setzero_pd());
2877 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2878 _mm_mask_min_ps(__m128 __W
, __mmask8 __U
, __m128 __A
, __m128 __B
) {
2879 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
2880 (__v4sf
)_mm_min_ps(__A
, __B
),
2884 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2885 _mm_maskz_min_ps(__mmask8 __U
, __m128 __A
, __m128 __B
) {
2886 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
2887 (__v4sf
)_mm_min_ps(__A
, __B
),
2888 (__v4sf
)_mm_setzero_ps());
2891 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2892 _mm256_mask_min_ps(__m256 __W
, __mmask8 __U
, __m256 __A
, __m256 __B
) {
2893 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
2894 (__v8sf
)_mm256_min_ps(__A
, __B
),
2898 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2899 _mm256_maskz_min_ps(__mmask8 __U
, __m256 __A
, __m256 __B
) {
2900 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
2901 (__v8sf
)_mm256_min_ps(__A
, __B
),
2902 (__v8sf
)_mm256_setzero_ps());
2905 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2906 _mm_mask_mul_pd(__m128d __W
, __mmask8 __U
, __m128d __A
, __m128d __B
) {
2907 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
2908 (__v2df
)_mm_mul_pd(__A
, __B
),
2912 static __inline__ __m128d __DEFAULT_FN_ATTRS128
2913 _mm_maskz_mul_pd(__mmask8 __U
, __m128d __A
, __m128d __B
) {
2914 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
2915 (__v2df
)_mm_mul_pd(__A
, __B
),
2916 (__v2df
)_mm_setzero_pd());
2919 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2920 _mm256_mask_mul_pd(__m256d __W
, __mmask8 __U
, __m256d __A
, __m256d __B
) {
2921 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
2922 (__v4df
)_mm256_mul_pd(__A
, __B
),
2926 static __inline__ __m256d __DEFAULT_FN_ATTRS256
2927 _mm256_maskz_mul_pd(__mmask8 __U
, __m256d __A
, __m256d __B
) {
2928 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
2929 (__v4df
)_mm256_mul_pd(__A
, __B
),
2930 (__v4df
)_mm256_setzero_pd());
2933 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2934 _mm_mask_mul_ps(__m128 __W
, __mmask8 __U
, __m128 __A
, __m128 __B
) {
2935 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
2936 (__v4sf
)_mm_mul_ps(__A
, __B
),
2940 static __inline__ __m128 __DEFAULT_FN_ATTRS128
2941 _mm_maskz_mul_ps(__mmask8 __U
, __m128 __A
, __m128 __B
) {
2942 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
2943 (__v4sf
)_mm_mul_ps(__A
, __B
),
2944 (__v4sf
)_mm_setzero_ps());
2947 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2948 _mm256_mask_mul_ps(__m256 __W
, __mmask8 __U
, __m256 __A
, __m256 __B
) {
2949 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
2950 (__v8sf
)_mm256_mul_ps(__A
, __B
),
2954 static __inline__ __m256 __DEFAULT_FN_ATTRS256
2955 _mm256_maskz_mul_ps(__mmask8 __U
, __m256 __A
, __m256 __B
) {
2956 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
2957 (__v8sf
)_mm256_mul_ps(__A
, __B
),
2958 (__v8sf
)_mm256_setzero_ps());
2961 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2962 _mm_mask_abs_epi32(__m128i __W
, __mmask8 __U
, __m128i __A
) {
2963 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
2964 (__v4si
)_mm_abs_epi32(__A
),
2968 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2969 _mm_maskz_abs_epi32(__mmask8 __U
, __m128i __A
) {
2970 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
2971 (__v4si
)_mm_abs_epi32(__A
),
2972 (__v4si
)_mm_setzero_si128());
2975 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2976 _mm256_mask_abs_epi32(__m256i __W
, __mmask8 __U
, __m256i __A
) {
2977 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
2978 (__v8si
)_mm256_abs_epi32(__A
),
2982 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2983 _mm256_maskz_abs_epi32(__mmask8 __U
, __m256i __A
) {
2984 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
2985 (__v8si
)_mm256_abs_epi32(__A
),
2986 (__v8si
)_mm256_setzero_si256());
2989 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2990 _mm_abs_epi64 (__m128i __A
) {
2991 return (__m128i
)__builtin_elementwise_abs((__v2di
)__A
);
2994 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2995 _mm_mask_abs_epi64 (__m128i __W
, __mmask8 __U
, __m128i __A
) {
2996 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
2997 (__v2di
)_mm_abs_epi64(__A
),
3001 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3002 _mm_maskz_abs_epi64 (__mmask8 __U
, __m128i __A
) {
3003 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
3004 (__v2di
)_mm_abs_epi64(__A
),
3005 (__v2di
)_mm_setzero_si128());
3008 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3009 _mm256_abs_epi64 (__m256i __A
) {
3010 return (__m256i
)__builtin_elementwise_abs((__v4di
)__A
);
3013 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3014 _mm256_mask_abs_epi64 (__m256i __W
, __mmask8 __U
, __m256i __A
) {
3015 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
3016 (__v4di
)_mm256_abs_epi64(__A
),
3020 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3021 _mm256_maskz_abs_epi64 (__mmask8 __U
, __m256i __A
) {
3022 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
3023 (__v4di
)_mm256_abs_epi64(__A
),
3024 (__v4di
)_mm256_setzero_si256());
3027 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3028 _mm_maskz_max_epi32(__mmask8 __M
, __m128i __A
, __m128i __B
) {
3029 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__M
,
3030 (__v4si
)_mm_max_epi32(__A
, __B
),
3031 (__v4si
)_mm_setzero_si128());
3034 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3035 _mm_mask_max_epi32(__m128i __W
, __mmask8 __M
, __m128i __A
, __m128i __B
) {
3036 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__M
,
3037 (__v4si
)_mm_max_epi32(__A
, __B
),
3041 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3042 _mm256_maskz_max_epi32(__mmask8 __M
, __m256i __A
, __m256i __B
) {
3043 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__M
,
3044 (__v8si
)_mm256_max_epi32(__A
, __B
),
3045 (__v8si
)_mm256_setzero_si256());
3048 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3049 _mm256_mask_max_epi32(__m256i __W
, __mmask8 __M
, __m256i __A
, __m256i __B
) {
3050 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__M
,
3051 (__v8si
)_mm256_max_epi32(__A
, __B
),
3055 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3056 _mm_max_epi64 (__m128i __A
, __m128i __B
) {
3057 return (__m128i
)__builtin_elementwise_max((__v2di
)__A
, (__v2di
)__B
);
3060 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3061 _mm_maskz_max_epi64 (__mmask8 __M
, __m128i __A
, __m128i __B
) {
3062 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__M
,
3063 (__v2di
)_mm_max_epi64(__A
, __B
),
3064 (__v2di
)_mm_setzero_si128());
3067 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3068 _mm_mask_max_epi64 (__m128i __W
, __mmask8 __M
, __m128i __A
, __m128i __B
) {
3069 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__M
,
3070 (__v2di
)_mm_max_epi64(__A
, __B
),
3074 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3075 _mm256_max_epi64 (__m256i __A
, __m256i __B
) {
3076 return (__m256i
)__builtin_elementwise_max((__v4di
)__A
, (__v4di
)__B
);
3079 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3080 _mm256_maskz_max_epi64 (__mmask8 __M
, __m256i __A
, __m256i __B
) {
3081 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__M
,
3082 (__v4di
)_mm256_max_epi64(__A
, __B
),
3083 (__v4di
)_mm256_setzero_si256());
3086 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3087 _mm256_mask_max_epi64 (__m256i __W
, __mmask8 __M
, __m256i __A
, __m256i __B
) {
3088 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__M
,
3089 (__v4di
)_mm256_max_epi64(__A
, __B
),
3093 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3094 _mm_maskz_max_epu32(__mmask8 __M
, __m128i __A
, __m128i __B
) {
3095 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__M
,
3096 (__v4si
)_mm_max_epu32(__A
, __B
),
3097 (__v4si
)_mm_setzero_si128());
3100 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3101 _mm_mask_max_epu32(__m128i __W
, __mmask8 __M
, __m128i __A
, __m128i __B
) {
3102 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__M
,
3103 (__v4si
)_mm_max_epu32(__A
, __B
),
3107 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3108 _mm256_maskz_max_epu32(__mmask8 __M
, __m256i __A
, __m256i __B
) {
3109 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__M
,
3110 (__v8si
)_mm256_max_epu32(__A
, __B
),
3111 (__v8si
)_mm256_setzero_si256());
3114 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3115 _mm256_mask_max_epu32(__m256i __W
, __mmask8 __M
, __m256i __A
, __m256i __B
) {
3116 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__M
,
3117 (__v8si
)_mm256_max_epu32(__A
, __B
),
3121 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3122 _mm_max_epu64 (__m128i __A
, __m128i __B
) {
3123 return (__m128i
)__builtin_elementwise_max((__v2du
)__A
, (__v2du
)__B
);
3126 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3127 _mm_maskz_max_epu64 (__mmask8 __M
, __m128i __A
, __m128i __B
) {
3128 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__M
,
3129 (__v2di
)_mm_max_epu64(__A
, __B
),
3130 (__v2di
)_mm_setzero_si128());
3133 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3134 _mm_mask_max_epu64 (__m128i __W
, __mmask8 __M
, __m128i __A
, __m128i __B
) {
3135 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__M
,
3136 (__v2di
)_mm_max_epu64(__A
, __B
),
3140 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3141 _mm256_max_epu64 (__m256i __A
, __m256i __B
) {
3142 return (__m256i
)__builtin_elementwise_max((__v4du
)__A
, (__v4du
)__B
);
3145 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3146 _mm256_maskz_max_epu64 (__mmask8 __M
, __m256i __A
, __m256i __B
) {
3147 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__M
,
3148 (__v4di
)_mm256_max_epu64(__A
, __B
),
3149 (__v4di
)_mm256_setzero_si256());
3152 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3153 _mm256_mask_max_epu64 (__m256i __W
, __mmask8 __M
, __m256i __A
, __m256i __B
) {
3154 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__M
,
3155 (__v4di
)_mm256_max_epu64(__A
, __B
),
3159 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3160 _mm_maskz_min_epi32(__mmask8 __M
, __m128i __A
, __m128i __B
) {
3161 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__M
,
3162 (__v4si
)_mm_min_epi32(__A
, __B
),
3163 (__v4si
)_mm_setzero_si128());
3166 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3167 _mm_mask_min_epi32(__m128i __W
, __mmask8 __M
, __m128i __A
, __m128i __B
) {
3168 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__M
,
3169 (__v4si
)_mm_min_epi32(__A
, __B
),
3173 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3174 _mm256_maskz_min_epi32(__mmask8 __M
, __m256i __A
, __m256i __B
) {
3175 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__M
,
3176 (__v8si
)_mm256_min_epi32(__A
, __B
),
3177 (__v8si
)_mm256_setzero_si256());
3180 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3181 _mm256_mask_min_epi32(__m256i __W
, __mmask8 __M
, __m256i __A
, __m256i __B
) {
3182 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__M
,
3183 (__v8si
)_mm256_min_epi32(__A
, __B
),
3187 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3188 _mm_min_epi64 (__m128i __A
, __m128i __B
) {
3189 return (__m128i
)__builtin_elementwise_min((__v2di
)__A
, (__v2di
)__B
);
3192 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3193 _mm_mask_min_epi64 (__m128i __W
, __mmask8 __M
, __m128i __A
, __m128i __B
) {
3194 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__M
,
3195 (__v2di
)_mm_min_epi64(__A
, __B
),
3199 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3200 _mm_maskz_min_epi64 (__mmask8 __M
, __m128i __A
, __m128i __B
) {
3201 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__M
,
3202 (__v2di
)_mm_min_epi64(__A
, __B
),
3203 (__v2di
)_mm_setzero_si128());
3206 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3207 _mm256_min_epi64 (__m256i __A
, __m256i __B
) {
3208 return (__m256i
)__builtin_elementwise_min((__v4di
)__A
, (__v4di
)__B
);
3211 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3212 _mm256_mask_min_epi64 (__m256i __W
, __mmask8 __M
, __m256i __A
, __m256i __B
) {
3213 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__M
,
3214 (__v4di
)_mm256_min_epi64(__A
, __B
),
3218 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3219 _mm256_maskz_min_epi64 (__mmask8 __M
, __m256i __A
, __m256i __B
) {
3220 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__M
,
3221 (__v4di
)_mm256_min_epi64(__A
, __B
),
3222 (__v4di
)_mm256_setzero_si256());
3225 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3226 _mm_maskz_min_epu32(__mmask8 __M
, __m128i __A
, __m128i __B
) {
3227 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__M
,
3228 (__v4si
)_mm_min_epu32(__A
, __B
),
3229 (__v4si
)_mm_setzero_si128());
3232 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3233 _mm_mask_min_epu32(__m128i __W
, __mmask8 __M
, __m128i __A
, __m128i __B
) {
3234 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__M
,
3235 (__v4si
)_mm_min_epu32(__A
, __B
),
3239 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3240 _mm256_maskz_min_epu32(__mmask8 __M
, __m256i __A
, __m256i __B
) {
3241 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__M
,
3242 (__v8si
)_mm256_min_epu32(__A
, __B
),
3243 (__v8si
)_mm256_setzero_si256());
3246 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3247 _mm256_mask_min_epu32(__m256i __W
, __mmask8 __M
, __m256i __A
, __m256i __B
) {
3248 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__M
,
3249 (__v8si
)_mm256_min_epu32(__A
, __B
),
3253 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3254 _mm_min_epu64 (__m128i __A
, __m128i __B
) {
3255 return (__m128i
)__builtin_elementwise_min((__v2du
)__A
, (__v2du
)__B
);
3258 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3259 _mm_mask_min_epu64 (__m128i __W
, __mmask8 __M
, __m128i __A
, __m128i __B
) {
3260 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__M
,
3261 (__v2di
)_mm_min_epu64(__A
, __B
),
3265 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3266 _mm_maskz_min_epu64 (__mmask8 __M
, __m128i __A
, __m128i __B
) {
3267 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__M
,
3268 (__v2di
)_mm_min_epu64(__A
, __B
),
3269 (__v2di
)_mm_setzero_si128());
3272 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3273 _mm256_min_epu64 (__m256i __A
, __m256i __B
) {
3274 return (__m256i
)__builtin_elementwise_min((__v4du
)__A
, (__v4du
)__B
);
3277 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3278 _mm256_mask_min_epu64 (__m256i __W
, __mmask8 __M
, __m256i __A
, __m256i __B
) {
3279 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__M
,
3280 (__v4di
)_mm256_min_epu64(__A
, __B
),
3284 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3285 _mm256_maskz_min_epu64 (__mmask8 __M
, __m256i __A
, __m256i __B
) {
3286 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__M
,
3287 (__v4di
)_mm256_min_epu64(__A
, __B
),
3288 (__v4di
)_mm256_setzero_si256());
3291 #define _mm_roundscale_pd(A, imm) \
3292 ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3294 (__v2df)_mm_setzero_pd(), \
3298 #define _mm_mask_roundscale_pd(W, U, A, imm) \
3299 ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3301 (__v2df)(__m128d)(W), \
3305 #define _mm_maskz_roundscale_pd(U, A, imm) \
3306 ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3308 (__v2df)_mm_setzero_pd(), \
3312 #define _mm256_roundscale_pd(A, imm) \
3313 ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3315 (__v4df)_mm256_setzero_pd(), \
3319 #define _mm256_mask_roundscale_pd(W, U, A, imm) \
3320 ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3322 (__v4df)(__m256d)(W), \
3326 #define _mm256_maskz_roundscale_pd(U, A, imm) \
3327 ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3329 (__v4df)_mm256_setzero_pd(), \
3332 #define _mm_roundscale_ps(A, imm) \
3333 ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3334 (__v4sf)_mm_setzero_ps(), \
3338 #define _mm_mask_roundscale_ps(W, U, A, imm) \
3339 ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3340 (__v4sf)(__m128)(W), \
3344 #define _mm_maskz_roundscale_ps(U, A, imm) \
3345 ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3346 (__v4sf)_mm_setzero_ps(), \
3349 #define _mm256_roundscale_ps(A, imm) \
3350 ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3351 (__v8sf)_mm256_setzero_ps(), \
3354 #define _mm256_mask_roundscale_ps(W, U, A, imm) \
3355 ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3356 (__v8sf)(__m256)(W), \
3360 #define _mm256_maskz_roundscale_ps(U, A, imm) \
3361 ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3362 (__v8sf)_mm256_setzero_ps(), \
3365 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3366 _mm_scalef_pd (__m128d __A
, __m128d __B
) {
3367 return (__m128d
) __builtin_ia32_scalefpd128_mask ((__v2df
) __A
,
3374 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3375 _mm_mask_scalef_pd (__m128d __W
, __mmask8 __U
, __m128d __A
,
3377 return (__m128d
) __builtin_ia32_scalefpd128_mask ((__v2df
) __A
,
3383 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3384 _mm_maskz_scalef_pd (__mmask8 __U
, __m128d __A
, __m128d __B
) {
3385 return (__m128d
) __builtin_ia32_scalefpd128_mask ((__v2df
) __A
,
3392 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3393 _mm256_scalef_pd (__m256d __A
, __m256d __B
) {
3394 return (__m256d
) __builtin_ia32_scalefpd256_mask ((__v4df
) __A
,
3397 _mm256_setzero_pd (),
3401 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3402 _mm256_mask_scalef_pd (__m256d __W
, __mmask8 __U
, __m256d __A
,
3404 return (__m256d
) __builtin_ia32_scalefpd256_mask ((__v4df
) __A
,
3410 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3411 _mm256_maskz_scalef_pd (__mmask8 __U
, __m256d __A
, __m256d __B
) {
3412 return (__m256d
) __builtin_ia32_scalefpd256_mask ((__v4df
) __A
,
3415 _mm256_setzero_pd (),
3419 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3420 _mm_scalef_ps (__m128 __A
, __m128 __B
) {
3421 return (__m128
) __builtin_ia32_scalefps128_mask ((__v4sf
) __A
,
3428 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3429 _mm_mask_scalef_ps (__m128 __W
, __mmask8 __U
, __m128 __A
, __m128 __B
) {
3430 return (__m128
) __builtin_ia32_scalefps128_mask ((__v4sf
) __A
,
3436 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3437 _mm_maskz_scalef_ps (__mmask8 __U
, __m128 __A
, __m128 __B
) {
3438 return (__m128
) __builtin_ia32_scalefps128_mask ((__v4sf
) __A
,
3445 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3446 _mm256_scalef_ps (__m256 __A
, __m256 __B
) {
3447 return (__m256
) __builtin_ia32_scalefps256_mask ((__v8sf
) __A
,
3450 _mm256_setzero_ps (),
3454 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3455 _mm256_mask_scalef_ps (__m256 __W
, __mmask8 __U
, __m256 __A
,
3457 return (__m256
) __builtin_ia32_scalefps256_mask ((__v8sf
) __A
,
3463 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3464 _mm256_maskz_scalef_ps (__mmask8 __U
, __m256 __A
, __m256 __B
) {
3465 return (__m256
) __builtin_ia32_scalefps256_mask ((__v8sf
) __A
,
3468 _mm256_setzero_ps (),
3472 #define _mm_i64scatter_pd(addr, index, v1, scale) \
3473 __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)-1, \
3474 (__v2di)(__m128i)(index), \
3475 (__v2df)(__m128d)(v1), (int)(scale))
3477 #define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3478 __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)(mask), \
3479 (__v2di)(__m128i)(index), \
3480 (__v2df)(__m128d)(v1), (int)(scale))
3482 #define _mm_i64scatter_epi64(addr, index, v1, scale) \
3483 __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)-1, \
3484 (__v2di)(__m128i)(index), \
3485 (__v2di)(__m128i)(v1), (int)(scale))
3487 #define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3488 __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)(mask), \
3489 (__v2di)(__m128i)(index), \
3490 (__v2di)(__m128i)(v1), (int)(scale))
3492 #define _mm256_i64scatter_pd(addr, index, v1, scale) \
3493 __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)-1, \
3494 (__v4di)(__m256i)(index), \
3495 (__v4df)(__m256d)(v1), (int)(scale))
3497 #define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3498 __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)(mask), \
3499 (__v4di)(__m256i)(index), \
3500 (__v4df)(__m256d)(v1), (int)(scale))
3502 #define _mm256_i64scatter_epi64(addr, index, v1, scale) \
3503 __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)-1, \
3504 (__v4di)(__m256i)(index), \
3505 (__v4di)(__m256i)(v1), (int)(scale))
3507 #define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3508 __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)(mask), \
3509 (__v4di)(__m256i)(index), \
3510 (__v4di)(__m256i)(v1), (int)(scale))
3512 #define _mm_i64scatter_ps(addr, index, v1, scale) \
3513 __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)-1, \
3514 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3517 #define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3518 __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)(mask), \
3519 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3522 #define _mm_i64scatter_epi32(addr, index, v1, scale) \
3523 __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)-1, \
3524 (__v2di)(__m128i)(index), \
3525 (__v4si)(__m128i)(v1), (int)(scale))
3527 #define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3528 __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)(mask), \
3529 (__v2di)(__m128i)(index), \
3530 (__v4si)(__m128i)(v1), (int)(scale))
3532 #define _mm256_i64scatter_ps(addr, index, v1, scale) \
3533 __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)-1, \
3534 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3537 #define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3538 __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)(mask), \
3539 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3542 #define _mm256_i64scatter_epi32(addr, index, v1, scale) \
3543 __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)-1, \
3544 (__v4di)(__m256i)(index), \
3545 (__v4si)(__m128i)(v1), (int)(scale))
3547 #define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3548 __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)(mask), \
3549 (__v4di)(__m256i)(index), \
3550 (__v4si)(__m128i)(v1), (int)(scale))
3552 #define _mm_i32scatter_pd(addr, index, v1, scale) \
3553 __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)-1, \
3554 (__v4si)(__m128i)(index), \
3555 (__v2df)(__m128d)(v1), (int)(scale))
3557 #define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3558 __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)(mask), \
3559 (__v4si)(__m128i)(index), \
3560 (__v2df)(__m128d)(v1), (int)(scale))
3562 #define _mm_i32scatter_epi64(addr, index, v1, scale) \
3563 __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)-1, \
3564 (__v4si)(__m128i)(index), \
3565 (__v2di)(__m128i)(v1), (int)(scale))
3567 #define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3568 __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)(mask), \
3569 (__v4si)(__m128i)(index), \
3570 (__v2di)(__m128i)(v1), (int)(scale))
3572 #define _mm256_i32scatter_pd(addr, index, v1, scale) \
3573 __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)-1, \
3574 (__v4si)(__m128i)(index), \
3575 (__v4df)(__m256d)(v1), (int)(scale))
3577 #define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3578 __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)(mask), \
3579 (__v4si)(__m128i)(index), \
3580 (__v4df)(__m256d)(v1), (int)(scale))
3582 #define _mm256_i32scatter_epi64(addr, index, v1, scale) \
3583 __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)-1, \
3584 (__v4si)(__m128i)(index), \
3585 (__v4di)(__m256i)(v1), (int)(scale))
3587 #define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3588 __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)(mask), \
3589 (__v4si)(__m128i)(index), \
3590 (__v4di)(__m256i)(v1), (int)(scale))
3592 #define _mm_i32scatter_ps(addr, index, v1, scale) \
3593 __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)-1, \
3594 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3597 #define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3598 __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)(mask), \
3599 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3602 #define _mm_i32scatter_epi32(addr, index, v1, scale) \
3603 __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)-1, \
3604 (__v4si)(__m128i)(index), \
3605 (__v4si)(__m128i)(v1), (int)(scale))
3607 #define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3608 __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)(mask), \
3609 (__v4si)(__m128i)(index), \
3610 (__v4si)(__m128i)(v1), (int)(scale))
3612 #define _mm256_i32scatter_ps(addr, index, v1, scale) \
3613 __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)-1, \
3614 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3617 #define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3618 __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)(mask), \
3619 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3622 #define _mm256_i32scatter_epi32(addr, index, v1, scale) \
3623 __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)-1, \
3624 (__v8si)(__m256i)(index), \
3625 (__v8si)(__m256i)(v1), (int)(scale))
3627 #define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3628 __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)(mask), \
3629 (__v8si)(__m256i)(index), \
3630 (__v8si)(__m256i)(v1), (int)(scale))
3632 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3633 _mm_mask_sqrt_pd(__m128d __W
, __mmask8 __U
, __m128d __A
) {
3634 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
3635 (__v2df
)_mm_sqrt_pd(__A
),
3639 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3640 _mm_maskz_sqrt_pd(__mmask8 __U
, __m128d __A
) {
3641 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
3642 (__v2df
)_mm_sqrt_pd(__A
),
3643 (__v2df
)_mm_setzero_pd());
3646 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3647 _mm256_mask_sqrt_pd(__m256d __W
, __mmask8 __U
, __m256d __A
) {
3648 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
3649 (__v4df
)_mm256_sqrt_pd(__A
),
3653 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3654 _mm256_maskz_sqrt_pd(__mmask8 __U
, __m256d __A
) {
3655 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
3656 (__v4df
)_mm256_sqrt_pd(__A
),
3657 (__v4df
)_mm256_setzero_pd());
3660 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3661 _mm_mask_sqrt_ps(__m128 __W
, __mmask8 __U
, __m128 __A
) {
3662 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
3663 (__v4sf
)_mm_sqrt_ps(__A
),
3667 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3668 _mm_maskz_sqrt_ps(__mmask8 __U
, __m128 __A
) {
3669 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
3670 (__v4sf
)_mm_sqrt_ps(__A
),
3671 (__v4sf
)_mm_setzero_ps());
3674 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3675 _mm256_mask_sqrt_ps(__m256 __W
, __mmask8 __U
, __m256 __A
) {
3676 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
3677 (__v8sf
)_mm256_sqrt_ps(__A
),
3681 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3682 _mm256_maskz_sqrt_ps(__mmask8 __U
, __m256 __A
) {
3683 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
3684 (__v8sf
)_mm256_sqrt_ps(__A
),
3685 (__v8sf
)_mm256_setzero_ps());
3688 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3689 _mm_mask_sub_pd(__m128d __W
, __mmask8 __U
, __m128d __A
, __m128d __B
) {
3690 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
3691 (__v2df
)_mm_sub_pd(__A
, __B
),
3695 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3696 _mm_maskz_sub_pd(__mmask8 __U
, __m128d __A
, __m128d __B
) {
3697 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
3698 (__v2df
)_mm_sub_pd(__A
, __B
),
3699 (__v2df
)_mm_setzero_pd());
3702 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3703 _mm256_mask_sub_pd(__m256d __W
, __mmask8 __U
, __m256d __A
, __m256d __B
) {
3704 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
3705 (__v4df
)_mm256_sub_pd(__A
, __B
),
3709 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3710 _mm256_maskz_sub_pd(__mmask8 __U
, __m256d __A
, __m256d __B
) {
3711 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
3712 (__v4df
)_mm256_sub_pd(__A
, __B
),
3713 (__v4df
)_mm256_setzero_pd());
3716 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3717 _mm_mask_sub_ps(__m128 __W
, __mmask8 __U
, __m128 __A
, __m128 __B
) {
3718 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
3719 (__v4sf
)_mm_sub_ps(__A
, __B
),
3723 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3724 _mm_maskz_sub_ps(__mmask8 __U
, __m128 __A
, __m128 __B
) {
3725 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
3726 (__v4sf
)_mm_sub_ps(__A
, __B
),
3727 (__v4sf
)_mm_setzero_ps());
3730 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3731 _mm256_mask_sub_ps(__m256 __W
, __mmask8 __U
, __m256 __A
, __m256 __B
) {
3732 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
3733 (__v8sf
)_mm256_sub_ps(__A
, __B
),
3737 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3738 _mm256_maskz_sub_ps(__mmask8 __U
, __m256 __A
, __m256 __B
) {
3739 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
3740 (__v8sf
)_mm256_sub_ps(__A
, __B
),
3741 (__v8sf
)_mm256_setzero_ps());
3744 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3745 _mm_permutex2var_epi32(__m128i __A
, __m128i __I
, __m128i __B
) {
3746 return (__m128i
)__builtin_ia32_vpermi2vard128((__v4si
) __A
, (__v4si
)__I
,
3750 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3751 _mm_mask_permutex2var_epi32(__m128i __A
, __mmask8 __U
, __m128i __I
,
3753 return (__m128i
)__builtin_ia32_selectd_128(__U
,
3754 (__v4si
)_mm_permutex2var_epi32(__A
, __I
, __B
),
3758 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3759 _mm_mask2_permutex2var_epi32(__m128i __A
, __m128i __I
, __mmask8 __U
,
3761 return (__m128i
)__builtin_ia32_selectd_128(__U
,
3762 (__v4si
)_mm_permutex2var_epi32(__A
, __I
, __B
),
3766 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3767 _mm_maskz_permutex2var_epi32(__mmask8 __U
, __m128i __A
, __m128i __I
,
3769 return (__m128i
)__builtin_ia32_selectd_128(__U
,
3770 (__v4si
)_mm_permutex2var_epi32(__A
, __I
, __B
),
3771 (__v4si
)_mm_setzero_si128());
3774 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3775 _mm256_permutex2var_epi32(__m256i __A
, __m256i __I
, __m256i __B
) {
3776 return (__m256i
)__builtin_ia32_vpermi2vard256((__v8si
)__A
, (__v8si
) __I
,
3780 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3781 _mm256_mask_permutex2var_epi32(__m256i __A
, __mmask8 __U
, __m256i __I
,
3783 return (__m256i
)__builtin_ia32_selectd_256(__U
,
3784 (__v8si
)_mm256_permutex2var_epi32(__A
, __I
, __B
),
3788 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3789 _mm256_mask2_permutex2var_epi32(__m256i __A
, __m256i __I
, __mmask8 __U
,
3791 return (__m256i
)__builtin_ia32_selectd_256(__U
,
3792 (__v8si
)_mm256_permutex2var_epi32(__A
, __I
, __B
),
3796 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3797 _mm256_maskz_permutex2var_epi32(__mmask8 __U
, __m256i __A
, __m256i __I
,
3799 return (__m256i
)__builtin_ia32_selectd_256(__U
,
3800 (__v8si
)_mm256_permutex2var_epi32(__A
, __I
, __B
),
3801 (__v8si
)_mm256_setzero_si256());
3804 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3805 _mm_permutex2var_pd(__m128d __A
, __m128i __I
, __m128d __B
) {
3806 return (__m128d
)__builtin_ia32_vpermi2varpd128((__v2df
)__A
, (__v2di
)__I
,
3810 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3811 _mm_mask_permutex2var_pd(__m128d __A
, __mmask8 __U
, __m128i __I
, __m128d __B
) {
3812 return (__m128d
)__builtin_ia32_selectpd_128(__U
,
3813 (__v2df
)_mm_permutex2var_pd(__A
, __I
, __B
),
3817 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3818 _mm_mask2_permutex2var_pd(__m128d __A
, __m128i __I
, __mmask8 __U
, __m128d __B
) {
3819 return (__m128d
)__builtin_ia32_selectpd_128(__U
,
3820 (__v2df
)_mm_permutex2var_pd(__A
, __I
, __B
),
3821 (__v2df
)(__m128d
)__I
);
3824 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3825 _mm_maskz_permutex2var_pd(__mmask8 __U
, __m128d __A
, __m128i __I
, __m128d __B
) {
3826 return (__m128d
)__builtin_ia32_selectpd_128(__U
,
3827 (__v2df
)_mm_permutex2var_pd(__A
, __I
, __B
),
3828 (__v2df
)_mm_setzero_pd());
3831 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3832 _mm256_permutex2var_pd(__m256d __A
, __m256i __I
, __m256d __B
) {
3833 return (__m256d
)__builtin_ia32_vpermi2varpd256((__v4df
)__A
, (__v4di
)__I
,
3837 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3838 _mm256_mask_permutex2var_pd(__m256d __A
, __mmask8 __U
, __m256i __I
,
3840 return (__m256d
)__builtin_ia32_selectpd_256(__U
,
3841 (__v4df
)_mm256_permutex2var_pd(__A
, __I
, __B
),
3845 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3846 _mm256_mask2_permutex2var_pd(__m256d __A
, __m256i __I
, __mmask8 __U
,
3848 return (__m256d
)__builtin_ia32_selectpd_256(__U
,
3849 (__v4df
)_mm256_permutex2var_pd(__A
, __I
, __B
),
3850 (__v4df
)(__m256d
)__I
);
3853 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3854 _mm256_maskz_permutex2var_pd(__mmask8 __U
, __m256d __A
, __m256i __I
,
3856 return (__m256d
)__builtin_ia32_selectpd_256(__U
,
3857 (__v4df
)_mm256_permutex2var_pd(__A
, __I
, __B
),
3858 (__v4df
)_mm256_setzero_pd());
3861 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3862 _mm_permutex2var_ps(__m128 __A
, __m128i __I
, __m128 __B
) {
3863 return (__m128
)__builtin_ia32_vpermi2varps128((__v4sf
)__A
, (__v4si
)__I
,
3867 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3868 _mm_mask_permutex2var_ps(__m128 __A
, __mmask8 __U
, __m128i __I
, __m128 __B
) {
3869 return (__m128
)__builtin_ia32_selectps_128(__U
,
3870 (__v4sf
)_mm_permutex2var_ps(__A
, __I
, __B
),
3874 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3875 _mm_mask2_permutex2var_ps(__m128 __A
, __m128i __I
, __mmask8 __U
, __m128 __B
) {
3876 return (__m128
)__builtin_ia32_selectps_128(__U
,
3877 (__v4sf
)_mm_permutex2var_ps(__A
, __I
, __B
),
3878 (__v4sf
)(__m128
)__I
);
3881 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3882 _mm_maskz_permutex2var_ps(__mmask8 __U
, __m128 __A
, __m128i __I
, __m128 __B
) {
3883 return (__m128
)__builtin_ia32_selectps_128(__U
,
3884 (__v4sf
)_mm_permutex2var_ps(__A
, __I
, __B
),
3885 (__v4sf
)_mm_setzero_ps());
3888 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3889 _mm256_permutex2var_ps(__m256 __A
, __m256i __I
, __m256 __B
) {
3890 return (__m256
)__builtin_ia32_vpermi2varps256((__v8sf
)__A
, (__v8si
)__I
,
3894 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3895 _mm256_mask_permutex2var_ps(__m256 __A
, __mmask8 __U
, __m256i __I
, __m256 __B
) {
3896 return (__m256
)__builtin_ia32_selectps_256(__U
,
3897 (__v8sf
)_mm256_permutex2var_ps(__A
, __I
, __B
),
3901 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3902 _mm256_mask2_permutex2var_ps(__m256 __A
, __m256i __I
, __mmask8 __U
,
3904 return (__m256
)__builtin_ia32_selectps_256(__U
,
3905 (__v8sf
)_mm256_permutex2var_ps(__A
, __I
, __B
),
3906 (__v8sf
)(__m256
)__I
);
3909 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3910 _mm256_maskz_permutex2var_ps(__mmask8 __U
, __m256 __A
, __m256i __I
,
3912 return (__m256
)__builtin_ia32_selectps_256(__U
,
3913 (__v8sf
)_mm256_permutex2var_ps(__A
, __I
, __B
),
3914 (__v8sf
)_mm256_setzero_ps());
3917 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3918 _mm_permutex2var_epi64(__m128i __A
, __m128i __I
, __m128i __B
) {
3919 return (__m128i
)__builtin_ia32_vpermi2varq128((__v2di
)__A
, (__v2di
)__I
,
3923 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3924 _mm_mask_permutex2var_epi64(__m128i __A
, __mmask8 __U
, __m128i __I
,
3926 return (__m128i
)__builtin_ia32_selectq_128(__U
,
3927 (__v2di
)_mm_permutex2var_epi64(__A
, __I
, __B
),
3931 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3932 _mm_mask2_permutex2var_epi64(__m128i __A
, __m128i __I
, __mmask8 __U
,
3934 return (__m128i
)__builtin_ia32_selectq_128(__U
,
3935 (__v2di
)_mm_permutex2var_epi64(__A
, __I
, __B
),
3939 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3940 _mm_maskz_permutex2var_epi64(__mmask8 __U
, __m128i __A
, __m128i __I
,
3942 return (__m128i
)__builtin_ia32_selectq_128(__U
,
3943 (__v2di
)_mm_permutex2var_epi64(__A
, __I
, __B
),
3944 (__v2di
)_mm_setzero_si128());
3948 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3949 _mm256_permutex2var_epi64(__m256i __A
, __m256i __I
, __m256i __B
) {
3950 return (__m256i
)__builtin_ia32_vpermi2varq256((__v4di
)__A
, (__v4di
) __I
,
3954 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3955 _mm256_mask_permutex2var_epi64(__m256i __A
, __mmask8 __U
, __m256i __I
,
3957 return (__m256i
)__builtin_ia32_selectq_256(__U
,
3958 (__v4di
)_mm256_permutex2var_epi64(__A
, __I
, __B
),
3962 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3963 _mm256_mask2_permutex2var_epi64(__m256i __A
, __m256i __I
, __mmask8 __U
,
3965 return (__m256i
)__builtin_ia32_selectq_256(__U
,
3966 (__v4di
)_mm256_permutex2var_epi64(__A
, __I
, __B
),
3970 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3971 _mm256_maskz_permutex2var_epi64(__mmask8 __U
, __m256i __A
, __m256i __I
,
3973 return (__m256i
)__builtin_ia32_selectq_256(__U
,
3974 (__v4di
)_mm256_permutex2var_epi64(__A
, __I
, __B
),
3975 (__v4di
)_mm256_setzero_si256());
3978 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3979 _mm_mask_cvtepi8_epi32(__m128i __W
, __mmask8 __U
, __m128i __A
)
3981 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
3982 (__v4si
)_mm_cvtepi8_epi32(__A
),
3986 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3987 _mm_maskz_cvtepi8_epi32(__mmask8 __U
, __m128i __A
)
3989 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
3990 (__v4si
)_mm_cvtepi8_epi32(__A
),
3991 (__v4si
)_mm_setzero_si128());
3994 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3995 _mm256_mask_cvtepi8_epi32 (__m256i __W
, __mmask8 __U
, __m128i __A
)
3997 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
3998 (__v8si
)_mm256_cvtepi8_epi32(__A
),
4002 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4003 _mm256_maskz_cvtepi8_epi32 (__mmask8 __U
, __m128i __A
)
4005 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
4006 (__v8si
)_mm256_cvtepi8_epi32(__A
),
4007 (__v8si
)_mm256_setzero_si256());
4010 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4011 _mm_mask_cvtepi8_epi64(__m128i __W
, __mmask8 __U
, __m128i __A
)
4013 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
4014 (__v2di
)_mm_cvtepi8_epi64(__A
),
4018 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4019 _mm_maskz_cvtepi8_epi64(__mmask8 __U
, __m128i __A
)
4021 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
4022 (__v2di
)_mm_cvtepi8_epi64(__A
),
4023 (__v2di
)_mm_setzero_si128());
4026 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4027 _mm256_mask_cvtepi8_epi64(__m256i __W
, __mmask8 __U
, __m128i __A
)
4029 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
4030 (__v4di
)_mm256_cvtepi8_epi64(__A
),
4034 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4035 _mm256_maskz_cvtepi8_epi64(__mmask8 __U
, __m128i __A
)
4037 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
4038 (__v4di
)_mm256_cvtepi8_epi64(__A
),
4039 (__v4di
)_mm256_setzero_si256());
4042 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4043 _mm_mask_cvtepi32_epi64(__m128i __W
, __mmask8 __U
, __m128i __X
)
4045 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
4046 (__v2di
)_mm_cvtepi32_epi64(__X
),
4050 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4051 _mm_maskz_cvtepi32_epi64(__mmask8 __U
, __m128i __X
)
4053 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
4054 (__v2di
)_mm_cvtepi32_epi64(__X
),
4055 (__v2di
)_mm_setzero_si128());
4058 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4059 _mm256_mask_cvtepi32_epi64(__m256i __W
, __mmask8 __U
, __m128i __X
)
4061 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
4062 (__v4di
)_mm256_cvtepi32_epi64(__X
),
4066 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4067 _mm256_maskz_cvtepi32_epi64(__mmask8 __U
, __m128i __X
)
4069 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
4070 (__v4di
)_mm256_cvtepi32_epi64(__X
),
4071 (__v4di
)_mm256_setzero_si256());
4074 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4075 _mm_mask_cvtepi16_epi32(__m128i __W
, __mmask8 __U
, __m128i __A
)
4077 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
4078 (__v4si
)_mm_cvtepi16_epi32(__A
),
4082 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4083 _mm_maskz_cvtepi16_epi32(__mmask8 __U
, __m128i __A
)
4085 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
4086 (__v4si
)_mm_cvtepi16_epi32(__A
),
4087 (__v4si
)_mm_setzero_si128());
4090 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4091 _mm256_mask_cvtepi16_epi32(__m256i __W
, __mmask8 __U
, __m128i __A
)
4093 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
4094 (__v8si
)_mm256_cvtepi16_epi32(__A
),
4098 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4099 _mm256_maskz_cvtepi16_epi32 (__mmask8 __U
, __m128i __A
)
4101 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
4102 (__v8si
)_mm256_cvtepi16_epi32(__A
),
4103 (__v8si
)_mm256_setzero_si256());
4106 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4107 _mm_mask_cvtepi16_epi64(__m128i __W
, __mmask8 __U
, __m128i __A
)
4109 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
4110 (__v2di
)_mm_cvtepi16_epi64(__A
),
4114 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4115 _mm_maskz_cvtepi16_epi64(__mmask8 __U
, __m128i __A
)
4117 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
4118 (__v2di
)_mm_cvtepi16_epi64(__A
),
4119 (__v2di
)_mm_setzero_si128());
4122 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4123 _mm256_mask_cvtepi16_epi64(__m256i __W
, __mmask8 __U
, __m128i __A
)
4125 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
4126 (__v4di
)_mm256_cvtepi16_epi64(__A
),
4130 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4131 _mm256_maskz_cvtepi16_epi64(__mmask8 __U
, __m128i __A
)
4133 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
4134 (__v4di
)_mm256_cvtepi16_epi64(__A
),
4135 (__v4di
)_mm256_setzero_si256());
4139 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4140 _mm_mask_cvtepu8_epi32(__m128i __W
, __mmask8 __U
, __m128i __A
)
4142 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
4143 (__v4si
)_mm_cvtepu8_epi32(__A
),
4147 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4148 _mm_maskz_cvtepu8_epi32(__mmask8 __U
, __m128i __A
)
4150 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
4151 (__v4si
)_mm_cvtepu8_epi32(__A
),
4152 (__v4si
)_mm_setzero_si128());
4155 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4156 _mm256_mask_cvtepu8_epi32(__m256i __W
, __mmask8 __U
, __m128i __A
)
4158 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
4159 (__v8si
)_mm256_cvtepu8_epi32(__A
),
4163 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4164 _mm256_maskz_cvtepu8_epi32(__mmask8 __U
, __m128i __A
)
4166 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
4167 (__v8si
)_mm256_cvtepu8_epi32(__A
),
4168 (__v8si
)_mm256_setzero_si256());
4171 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4172 _mm_mask_cvtepu8_epi64(__m128i __W
, __mmask8 __U
, __m128i __A
)
4174 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
4175 (__v2di
)_mm_cvtepu8_epi64(__A
),
4179 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4180 _mm_maskz_cvtepu8_epi64(__mmask8 __U
, __m128i __A
)
4182 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
4183 (__v2di
)_mm_cvtepu8_epi64(__A
),
4184 (__v2di
)_mm_setzero_si128());
4187 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4188 _mm256_mask_cvtepu8_epi64(__m256i __W
, __mmask8 __U
, __m128i __A
)
4190 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
4191 (__v4di
)_mm256_cvtepu8_epi64(__A
),
4195 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4196 _mm256_maskz_cvtepu8_epi64 (__mmask8 __U
, __m128i __A
)
4198 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
4199 (__v4di
)_mm256_cvtepu8_epi64(__A
),
4200 (__v4di
)_mm256_setzero_si256());
4203 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4204 _mm_mask_cvtepu32_epi64(__m128i __W
, __mmask8 __U
, __m128i __X
)
4206 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
4207 (__v2di
)_mm_cvtepu32_epi64(__X
),
4211 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4212 _mm_maskz_cvtepu32_epi64(__mmask8 __U
, __m128i __X
)
4214 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
4215 (__v2di
)_mm_cvtepu32_epi64(__X
),
4216 (__v2di
)_mm_setzero_si128());
4219 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4220 _mm256_mask_cvtepu32_epi64(__m256i __W
, __mmask8 __U
, __m128i __X
)
4222 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
4223 (__v4di
)_mm256_cvtepu32_epi64(__X
),
4227 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4228 _mm256_maskz_cvtepu32_epi64(__mmask8 __U
, __m128i __X
)
4230 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
4231 (__v4di
)_mm256_cvtepu32_epi64(__X
),
4232 (__v4di
)_mm256_setzero_si256());
4235 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4236 _mm_mask_cvtepu16_epi32(__m128i __W
, __mmask8 __U
, __m128i __A
)
4238 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
4239 (__v4si
)_mm_cvtepu16_epi32(__A
),
4243 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4244 _mm_maskz_cvtepu16_epi32(__mmask8 __U
, __m128i __A
)
4246 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
4247 (__v4si
)_mm_cvtepu16_epi32(__A
),
4248 (__v4si
)_mm_setzero_si128());
4251 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4252 _mm256_mask_cvtepu16_epi32(__m256i __W
, __mmask8 __U
, __m128i __A
)
4254 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
4255 (__v8si
)_mm256_cvtepu16_epi32(__A
),
4259 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4260 _mm256_maskz_cvtepu16_epi32(__mmask8 __U
, __m128i __A
)
4262 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
4263 (__v8si
)_mm256_cvtepu16_epi32(__A
),
4264 (__v8si
)_mm256_setzero_si256());
4267 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4268 _mm_mask_cvtepu16_epi64(__m128i __W
, __mmask8 __U
, __m128i __A
)
4270 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
4271 (__v2di
)_mm_cvtepu16_epi64(__A
),
4275 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4276 _mm_maskz_cvtepu16_epi64(__mmask8 __U
, __m128i __A
)
4278 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
4279 (__v2di
)_mm_cvtepu16_epi64(__A
),
4280 (__v2di
)_mm_setzero_si128());
4283 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4284 _mm256_mask_cvtepu16_epi64(__m256i __W
, __mmask8 __U
, __m128i __A
)
4286 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
4287 (__v4di
)_mm256_cvtepu16_epi64(__A
),
4291 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4292 _mm256_maskz_cvtepu16_epi64(__mmask8 __U
, __m128i __A
)
4294 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
4295 (__v4di
)_mm256_cvtepu16_epi64(__A
),
4296 (__v4di
)_mm256_setzero_si256());
4300 #define _mm_rol_epi32(a, b) \
4301 ((__m128i)__builtin_ia32_prold128((__v4si)(__m128i)(a), (int)(b)))
4303 #define _mm_mask_rol_epi32(w, u, a, b) \
4304 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4305 (__v4si)_mm_rol_epi32((a), (b)), \
4306 (__v4si)(__m128i)(w)))
4308 #define _mm_maskz_rol_epi32(u, a, b) \
4309 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4310 (__v4si)_mm_rol_epi32((a), (b)), \
4311 (__v4si)_mm_setzero_si128()))
4313 #define _mm256_rol_epi32(a, b) \
4314 ((__m256i)__builtin_ia32_prold256((__v8si)(__m256i)(a), (int)(b)))
4316 #define _mm256_mask_rol_epi32(w, u, a, b) \
4317 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4318 (__v8si)_mm256_rol_epi32((a), (b)), \
4319 (__v8si)(__m256i)(w)))
4321 #define _mm256_maskz_rol_epi32(u, a, b) \
4322 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4323 (__v8si)_mm256_rol_epi32((a), (b)), \
4324 (__v8si)_mm256_setzero_si256()))
4326 #define _mm_rol_epi64(a, b) \
4327 ((__m128i)__builtin_ia32_prolq128((__v2di)(__m128i)(a), (int)(b)))
4329 #define _mm_mask_rol_epi64(w, u, a, b) \
4330 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4331 (__v2di)_mm_rol_epi64((a), (b)), \
4332 (__v2di)(__m128i)(w)))
4334 #define _mm_maskz_rol_epi64(u, a, b) \
4335 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4336 (__v2di)_mm_rol_epi64((a), (b)), \
4337 (__v2di)_mm_setzero_si128()))
4339 #define _mm256_rol_epi64(a, b) \
4340 ((__m256i)__builtin_ia32_prolq256((__v4di)(__m256i)(a), (int)(b)))
4342 #define _mm256_mask_rol_epi64(w, u, a, b) \
4343 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4344 (__v4di)_mm256_rol_epi64((a), (b)), \
4345 (__v4di)(__m256i)(w)))
4347 #define _mm256_maskz_rol_epi64(u, a, b) \
4348 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4349 (__v4di)_mm256_rol_epi64((a), (b)), \
4350 (__v4di)_mm256_setzero_si256()))
4352 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4353 _mm_rolv_epi32 (__m128i __A
, __m128i __B
)
4355 return (__m128i
)__builtin_ia32_prolvd128((__v4si
)__A
, (__v4si
)__B
);
4358 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4359 _mm_mask_rolv_epi32 (__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
4361 return (__m128i
)__builtin_ia32_selectd_128(__U
,
4362 (__v4si
)_mm_rolv_epi32(__A
, __B
),
4366 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4367 _mm_maskz_rolv_epi32 (__mmask8 __U
, __m128i __A
, __m128i __B
)
4369 return (__m128i
)__builtin_ia32_selectd_128(__U
,
4370 (__v4si
)_mm_rolv_epi32(__A
, __B
),
4371 (__v4si
)_mm_setzero_si128());
4374 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4375 _mm256_rolv_epi32 (__m256i __A
, __m256i __B
)
4377 return (__m256i
)__builtin_ia32_prolvd256((__v8si
)__A
, (__v8si
)__B
);
4380 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4381 _mm256_mask_rolv_epi32 (__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
)
4383 return (__m256i
)__builtin_ia32_selectd_256(__U
,
4384 (__v8si
)_mm256_rolv_epi32(__A
, __B
),
4388 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4389 _mm256_maskz_rolv_epi32 (__mmask8 __U
, __m256i __A
, __m256i __B
)
4391 return (__m256i
)__builtin_ia32_selectd_256(__U
,
4392 (__v8si
)_mm256_rolv_epi32(__A
, __B
),
4393 (__v8si
)_mm256_setzero_si256());
4396 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4397 _mm_rolv_epi64 (__m128i __A
, __m128i __B
)
4399 return (__m128i
)__builtin_ia32_prolvq128((__v2di
)__A
, (__v2di
)__B
);
4402 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4403 _mm_mask_rolv_epi64 (__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
4405 return (__m128i
)__builtin_ia32_selectq_128(__U
,
4406 (__v2di
)_mm_rolv_epi64(__A
, __B
),
4410 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4411 _mm_maskz_rolv_epi64 (__mmask8 __U
, __m128i __A
, __m128i __B
)
4413 return (__m128i
)__builtin_ia32_selectq_128(__U
,
4414 (__v2di
)_mm_rolv_epi64(__A
, __B
),
4415 (__v2di
)_mm_setzero_si128());
4418 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4419 _mm256_rolv_epi64 (__m256i __A
, __m256i __B
)
4421 return (__m256i
)__builtin_ia32_prolvq256((__v4di
)__A
, (__v4di
)__B
);
4424 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4425 _mm256_mask_rolv_epi64 (__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
)
4427 return (__m256i
)__builtin_ia32_selectq_256(__U
,
4428 (__v4di
)_mm256_rolv_epi64(__A
, __B
),
4432 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4433 _mm256_maskz_rolv_epi64 (__mmask8 __U
, __m256i __A
, __m256i __B
)
4435 return (__m256i
)__builtin_ia32_selectq_256(__U
,
4436 (__v4di
)_mm256_rolv_epi64(__A
, __B
),
4437 (__v4di
)_mm256_setzero_si256());
4440 #define _mm_ror_epi32(a, b) \
4441 ((__m128i)__builtin_ia32_prord128((__v4si)(__m128i)(a), (int)(b)))
4443 #define _mm_mask_ror_epi32(w, u, a, b) \
4444 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4445 (__v4si)_mm_ror_epi32((a), (b)), \
4446 (__v4si)(__m128i)(w)))
4448 #define _mm_maskz_ror_epi32(u, a, b) \
4449 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4450 (__v4si)_mm_ror_epi32((a), (b)), \
4451 (__v4si)_mm_setzero_si128()))
4453 #define _mm256_ror_epi32(a, b) \
4454 ((__m256i)__builtin_ia32_prord256((__v8si)(__m256i)(a), (int)(b)))
4456 #define _mm256_mask_ror_epi32(w, u, a, b) \
4457 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4458 (__v8si)_mm256_ror_epi32((a), (b)), \
4459 (__v8si)(__m256i)(w)))
4461 #define _mm256_maskz_ror_epi32(u, a, b) \
4462 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4463 (__v8si)_mm256_ror_epi32((a), (b)), \
4464 (__v8si)_mm256_setzero_si256()))
4466 #define _mm_ror_epi64(a, b) \
4467 ((__m128i)__builtin_ia32_prorq128((__v2di)(__m128i)(a), (int)(b)))
4469 #define _mm_mask_ror_epi64(w, u, a, b) \
4470 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4471 (__v2di)_mm_ror_epi64((a), (b)), \
4472 (__v2di)(__m128i)(w)))
4474 #define _mm_maskz_ror_epi64(u, a, b) \
4475 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4476 (__v2di)_mm_ror_epi64((a), (b)), \
4477 (__v2di)_mm_setzero_si128()))
4479 #define _mm256_ror_epi64(a, b) \
4480 ((__m256i)__builtin_ia32_prorq256((__v4di)(__m256i)(a), (int)(b)))
4482 #define _mm256_mask_ror_epi64(w, u, a, b) \
4483 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4484 (__v4di)_mm256_ror_epi64((a), (b)), \
4485 (__v4di)(__m256i)(w)))
4487 #define _mm256_maskz_ror_epi64(u, a, b) \
4488 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4489 (__v4di)_mm256_ror_epi64((a), (b)), \
4490 (__v4di)_mm256_setzero_si256()))
4492 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4493 _mm_mask_sll_epi32(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
4495 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
4496 (__v4si
)_mm_sll_epi32(__A
, __B
),
4500 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4501 _mm_maskz_sll_epi32(__mmask8 __U
, __m128i __A
, __m128i __B
)
4503 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
4504 (__v4si
)_mm_sll_epi32(__A
, __B
),
4505 (__v4si
)_mm_setzero_si128());
4508 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4509 _mm256_mask_sll_epi32(__m256i __W
, __mmask8 __U
, __m256i __A
, __m128i __B
)
4511 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
4512 (__v8si
)_mm256_sll_epi32(__A
, __B
),
4516 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4517 _mm256_maskz_sll_epi32(__mmask8 __U
, __m256i __A
, __m128i __B
)
4519 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
4520 (__v8si
)_mm256_sll_epi32(__A
, __B
),
4521 (__v8si
)_mm256_setzero_si256());
4524 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4525 _mm_mask_slli_epi32(__m128i __W
, __mmask8 __U
, __m128i __A
, unsigned int __B
)
4527 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
4528 (__v4si
)_mm_slli_epi32(__A
, (int)__B
),
4532 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4533 _mm_maskz_slli_epi32(__mmask8 __U
, __m128i __A
, unsigned int __B
)
4535 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
4536 (__v4si
)_mm_slli_epi32(__A
, (int)__B
),
4537 (__v4si
)_mm_setzero_si128());
4540 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4541 _mm256_mask_slli_epi32(__m256i __W
, __mmask8 __U
, __m256i __A
, unsigned int __B
)
4543 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
4544 (__v8si
)_mm256_slli_epi32(__A
, (int)__B
),
4548 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4549 _mm256_maskz_slli_epi32(__mmask8 __U
, __m256i __A
, unsigned int __B
)
4551 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
4552 (__v8si
)_mm256_slli_epi32(__A
, (int)__B
),
4553 (__v8si
)_mm256_setzero_si256());
4556 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4557 _mm_mask_sll_epi64(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
4559 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
4560 (__v2di
)_mm_sll_epi64(__A
, __B
),
4564 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4565 _mm_maskz_sll_epi64(__mmask8 __U
, __m128i __A
, __m128i __B
)
4567 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
4568 (__v2di
)_mm_sll_epi64(__A
, __B
),
4569 (__v2di
)_mm_setzero_si128());
4572 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4573 _mm256_mask_sll_epi64(__m256i __W
, __mmask8 __U
, __m256i __A
, __m128i __B
)
4575 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
4576 (__v4di
)_mm256_sll_epi64(__A
, __B
),
4580 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4581 _mm256_maskz_sll_epi64(__mmask8 __U
, __m256i __A
, __m128i __B
)
4583 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
4584 (__v4di
)_mm256_sll_epi64(__A
, __B
),
4585 (__v4di
)_mm256_setzero_si256());
4588 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4589 _mm_mask_slli_epi64(__m128i __W
, __mmask8 __U
, __m128i __A
, unsigned int __B
)
4591 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
4592 (__v2di
)_mm_slli_epi64(__A
, (int)__B
),
4596 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4597 _mm_maskz_slli_epi64(__mmask8 __U
, __m128i __A
, unsigned int __B
)
4599 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
4600 (__v2di
)_mm_slli_epi64(__A
, (int)__B
),
4601 (__v2di
)_mm_setzero_si128());
4604 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4605 _mm256_mask_slli_epi64(__m256i __W
, __mmask8 __U
, __m256i __A
, unsigned int __B
)
4607 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
4608 (__v4di
)_mm256_slli_epi64(__A
, (int)__B
),
4612 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4613 _mm256_maskz_slli_epi64(__mmask8 __U
, __m256i __A
, unsigned int __B
)
4615 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
4616 (__v4di
)_mm256_slli_epi64(__A
, (int)__B
),
4617 (__v4di
)_mm256_setzero_si256());
4620 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4621 _mm_rorv_epi32 (__m128i __A
, __m128i __B
)
4623 return (__m128i
)__builtin_ia32_prorvd128((__v4si
)__A
, (__v4si
)__B
);
4626 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4627 _mm_mask_rorv_epi32 (__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
4629 return (__m128i
)__builtin_ia32_selectd_128(__U
,
4630 (__v4si
)_mm_rorv_epi32(__A
, __B
),
4634 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4635 _mm_maskz_rorv_epi32 (__mmask8 __U
, __m128i __A
, __m128i __B
)
4637 return (__m128i
)__builtin_ia32_selectd_128(__U
,
4638 (__v4si
)_mm_rorv_epi32(__A
, __B
),
4639 (__v4si
)_mm_setzero_si128());
4642 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4643 _mm256_rorv_epi32 (__m256i __A
, __m256i __B
)
4645 return (__m256i
)__builtin_ia32_prorvd256((__v8si
)__A
, (__v8si
)__B
);
4648 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4649 _mm256_mask_rorv_epi32 (__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
)
4651 return (__m256i
)__builtin_ia32_selectd_256(__U
,
4652 (__v8si
)_mm256_rorv_epi32(__A
, __B
),
4656 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4657 _mm256_maskz_rorv_epi32 (__mmask8 __U
, __m256i __A
, __m256i __B
)
4659 return (__m256i
)__builtin_ia32_selectd_256(__U
,
4660 (__v8si
)_mm256_rorv_epi32(__A
, __B
),
4661 (__v8si
)_mm256_setzero_si256());
4664 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4665 _mm_rorv_epi64 (__m128i __A
, __m128i __B
)
4667 return (__m128i
)__builtin_ia32_prorvq128((__v2di
)__A
, (__v2di
)__B
);
4670 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4671 _mm_mask_rorv_epi64 (__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
4673 return (__m128i
)__builtin_ia32_selectq_128(__U
,
4674 (__v2di
)_mm_rorv_epi64(__A
, __B
),
4678 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4679 _mm_maskz_rorv_epi64 (__mmask8 __U
, __m128i __A
, __m128i __B
)
4681 return (__m128i
)__builtin_ia32_selectq_128(__U
,
4682 (__v2di
)_mm_rorv_epi64(__A
, __B
),
4683 (__v2di
)_mm_setzero_si128());
4686 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4687 _mm256_rorv_epi64 (__m256i __A
, __m256i __B
)
4689 return (__m256i
)__builtin_ia32_prorvq256((__v4di
)__A
, (__v4di
)__B
);
4692 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4693 _mm256_mask_rorv_epi64 (__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
)
4695 return (__m256i
)__builtin_ia32_selectq_256(__U
,
4696 (__v4di
)_mm256_rorv_epi64(__A
, __B
),
4700 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4701 _mm256_maskz_rorv_epi64 (__mmask8 __U
, __m256i __A
, __m256i __B
)
4703 return (__m256i
)__builtin_ia32_selectq_256(__U
,
4704 (__v4di
)_mm256_rorv_epi64(__A
, __B
),
4705 (__v4di
)_mm256_setzero_si256());
4708 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4709 _mm_mask_sllv_epi64(__m128i __W
, __mmask8 __U
, __m128i __X
, __m128i __Y
)
4711 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
4712 (__v2di
)_mm_sllv_epi64(__X
, __Y
),
4716 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4717 _mm_maskz_sllv_epi64(__mmask8 __U
, __m128i __X
, __m128i __Y
)
4719 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
4720 (__v2di
)_mm_sllv_epi64(__X
, __Y
),
4721 (__v2di
)_mm_setzero_si128());
4724 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4725 _mm256_mask_sllv_epi64(__m256i __W
, __mmask8 __U
, __m256i __X
, __m256i __Y
)
4727 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
4728 (__v4di
)_mm256_sllv_epi64(__X
, __Y
),
4732 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4733 _mm256_maskz_sllv_epi64(__mmask8 __U
, __m256i __X
, __m256i __Y
)
4735 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
4736 (__v4di
)_mm256_sllv_epi64(__X
, __Y
),
4737 (__v4di
)_mm256_setzero_si256());
4740 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4741 _mm_mask_sllv_epi32(__m128i __W
, __mmask8 __U
, __m128i __X
, __m128i __Y
)
4743 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
4744 (__v4si
)_mm_sllv_epi32(__X
, __Y
),
4748 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4749 _mm_maskz_sllv_epi32(__mmask8 __U
, __m128i __X
, __m128i __Y
)
4751 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
4752 (__v4si
)_mm_sllv_epi32(__X
, __Y
),
4753 (__v4si
)_mm_setzero_si128());
4756 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4757 _mm256_mask_sllv_epi32(__m256i __W
, __mmask8 __U
, __m256i __X
, __m256i __Y
)
4759 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
4760 (__v8si
)_mm256_sllv_epi32(__X
, __Y
),
4764 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4765 _mm256_maskz_sllv_epi32(__mmask8 __U
, __m256i __X
, __m256i __Y
)
4767 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
4768 (__v8si
)_mm256_sllv_epi32(__X
, __Y
),
4769 (__v8si
)_mm256_setzero_si256());
4772 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4773 _mm_mask_srlv_epi64(__m128i __W
, __mmask8 __U
, __m128i __X
, __m128i __Y
)
4775 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
4776 (__v2di
)_mm_srlv_epi64(__X
, __Y
),
4780 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4781 _mm_maskz_srlv_epi64(__mmask8 __U
, __m128i __X
, __m128i __Y
)
4783 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
4784 (__v2di
)_mm_srlv_epi64(__X
, __Y
),
4785 (__v2di
)_mm_setzero_si128());
4788 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4789 _mm256_mask_srlv_epi64(__m256i __W
, __mmask8 __U
, __m256i __X
, __m256i __Y
)
4791 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
4792 (__v4di
)_mm256_srlv_epi64(__X
, __Y
),
4796 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4797 _mm256_maskz_srlv_epi64(__mmask8 __U
, __m256i __X
, __m256i __Y
)
4799 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
4800 (__v4di
)_mm256_srlv_epi64(__X
, __Y
),
4801 (__v4di
)_mm256_setzero_si256());
4804 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4805 _mm_mask_srlv_epi32(__m128i __W
, __mmask8 __U
, __m128i __X
, __m128i __Y
)
4807 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
4808 (__v4si
)_mm_srlv_epi32(__X
, __Y
),
4812 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4813 _mm_maskz_srlv_epi32(__mmask8 __U
, __m128i __X
, __m128i __Y
)
4815 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
4816 (__v4si
)_mm_srlv_epi32(__X
, __Y
),
4817 (__v4si
)_mm_setzero_si128());
4820 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4821 _mm256_mask_srlv_epi32(__m256i __W
, __mmask8 __U
, __m256i __X
, __m256i __Y
)
4823 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
4824 (__v8si
)_mm256_srlv_epi32(__X
, __Y
),
4828 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4829 _mm256_maskz_srlv_epi32(__mmask8 __U
, __m256i __X
, __m256i __Y
)
4831 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
4832 (__v8si
)_mm256_srlv_epi32(__X
, __Y
),
4833 (__v8si
)_mm256_setzero_si256());
4836 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4837 _mm_mask_srl_epi32(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
4839 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
4840 (__v4si
)_mm_srl_epi32(__A
, __B
),
4844 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4845 _mm_maskz_srl_epi32(__mmask8 __U
, __m128i __A
, __m128i __B
)
4847 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
4848 (__v4si
)_mm_srl_epi32(__A
, __B
),
4849 (__v4si
)_mm_setzero_si128());
4852 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4853 _mm256_mask_srl_epi32(__m256i __W
, __mmask8 __U
, __m256i __A
, __m128i __B
)
4855 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
4856 (__v8si
)_mm256_srl_epi32(__A
, __B
),
4860 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4861 _mm256_maskz_srl_epi32(__mmask8 __U
, __m256i __A
, __m128i __B
)
4863 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
4864 (__v8si
)_mm256_srl_epi32(__A
, __B
),
4865 (__v8si
)_mm256_setzero_si256());
4868 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4869 _mm_mask_srli_epi32(__m128i __W
, __mmask8 __U
, __m128i __A
, unsigned int __B
)
4871 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
4872 (__v4si
)_mm_srli_epi32(__A
, (int)__B
),
4876 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4877 _mm_maskz_srli_epi32(__mmask8 __U
, __m128i __A
, unsigned int __B
)
4879 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
4880 (__v4si
)_mm_srli_epi32(__A
, (int)__B
),
4881 (__v4si
)_mm_setzero_si128());
4884 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4885 _mm256_mask_srli_epi32(__m256i __W
, __mmask8 __U
, __m256i __A
, unsigned int __B
)
4887 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
4888 (__v8si
)_mm256_srli_epi32(__A
, (int)__B
),
4892 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4893 _mm256_maskz_srli_epi32(__mmask8 __U
, __m256i __A
, unsigned int __B
)
4895 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
4896 (__v8si
)_mm256_srli_epi32(__A
, (int)__B
),
4897 (__v8si
)_mm256_setzero_si256());
4900 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4901 _mm_mask_srl_epi64(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
4903 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
4904 (__v2di
)_mm_srl_epi64(__A
, __B
),
4908 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4909 _mm_maskz_srl_epi64(__mmask8 __U
, __m128i __A
, __m128i __B
)
4911 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
4912 (__v2di
)_mm_srl_epi64(__A
, __B
),
4913 (__v2di
)_mm_setzero_si128());
4916 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4917 _mm256_mask_srl_epi64(__m256i __W
, __mmask8 __U
, __m256i __A
, __m128i __B
)
4919 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
4920 (__v4di
)_mm256_srl_epi64(__A
, __B
),
4924 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4925 _mm256_maskz_srl_epi64(__mmask8 __U
, __m256i __A
, __m128i __B
)
4927 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
4928 (__v4di
)_mm256_srl_epi64(__A
, __B
),
4929 (__v4di
)_mm256_setzero_si256());
4932 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4933 _mm_mask_srli_epi64(__m128i __W
, __mmask8 __U
, __m128i __A
, unsigned int __B
)
4935 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
4936 (__v2di
)_mm_srli_epi64(__A
, (int)__B
),
4940 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4941 _mm_maskz_srli_epi64(__mmask8 __U
, __m128i __A
, unsigned int __B
)
4943 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
4944 (__v2di
)_mm_srli_epi64(__A
, (int)__B
),
4945 (__v2di
)_mm_setzero_si128());
4948 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4949 _mm256_mask_srli_epi64(__m256i __W
, __mmask8 __U
, __m256i __A
, unsigned int __B
)
4951 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
4952 (__v4di
)_mm256_srli_epi64(__A
, (int)__B
),
4956 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4957 _mm256_maskz_srli_epi64(__mmask8 __U
, __m256i __A
, unsigned int __B
)
4959 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
4960 (__v4di
)_mm256_srli_epi64(__A
, (int)__B
),
4961 (__v4di
)_mm256_setzero_si256());
4964 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4965 _mm_mask_srav_epi32(__m128i __W
, __mmask8 __U
, __m128i __X
, __m128i __Y
)
4967 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
4968 (__v4si
)_mm_srav_epi32(__X
, __Y
),
4972 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4973 _mm_maskz_srav_epi32(__mmask8 __U
, __m128i __X
, __m128i __Y
)
4975 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
4976 (__v4si
)_mm_srav_epi32(__X
, __Y
),
4977 (__v4si
)_mm_setzero_si128());
4980 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4981 _mm256_mask_srav_epi32(__m256i __W
, __mmask8 __U
, __m256i __X
, __m256i __Y
)
4983 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
4984 (__v8si
)_mm256_srav_epi32(__X
, __Y
),
4988 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4989 _mm256_maskz_srav_epi32(__mmask8 __U
, __m256i __X
, __m256i __Y
)
4991 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
4992 (__v8si
)_mm256_srav_epi32(__X
, __Y
),
4993 (__v8si
)_mm256_setzero_si256());
4996 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4997 _mm_srav_epi64(__m128i __X
, __m128i __Y
)
4999 return (__m128i
)__builtin_ia32_psravq128((__v2di
)__X
, (__v2di
)__Y
);
5002 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5003 _mm_mask_srav_epi64(__m128i __W
, __mmask8 __U
, __m128i __X
, __m128i __Y
)
5005 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
5006 (__v2di
)_mm_srav_epi64(__X
, __Y
),
5010 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5011 _mm_maskz_srav_epi64(__mmask8 __U
, __m128i __X
, __m128i __Y
)
5013 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
5014 (__v2di
)_mm_srav_epi64(__X
, __Y
),
5015 (__v2di
)_mm_setzero_si128());
5018 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5019 _mm256_srav_epi64(__m256i __X
, __m256i __Y
)
5021 return (__m256i
)__builtin_ia32_psravq256((__v4di
)__X
, (__v4di
) __Y
);
5024 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5025 _mm256_mask_srav_epi64(__m256i __W
, __mmask8 __U
, __m256i __X
, __m256i __Y
)
5027 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
5028 (__v4di
)_mm256_srav_epi64(__X
, __Y
),
5032 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5033 _mm256_maskz_srav_epi64 (__mmask8 __U
, __m256i __X
, __m256i __Y
)
5035 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
5036 (__v4di
)_mm256_srav_epi64(__X
, __Y
),
5037 (__v4di
)_mm256_setzero_si256());
5040 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5041 _mm_mask_mov_epi32 (__m128i __W
, __mmask8 __U
, __m128i __A
)
5043 return (__m128i
) __builtin_ia32_selectd_128 ((__mmask8
) __U
,
5048 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5049 _mm_maskz_mov_epi32 (__mmask8 __U
, __m128i __A
)
5051 return (__m128i
) __builtin_ia32_selectd_128 ((__mmask8
) __U
,
5053 (__v4si
) _mm_setzero_si128 ());
5057 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5058 _mm256_mask_mov_epi32 (__m256i __W
, __mmask8 __U
, __m256i __A
)
5060 return (__m256i
) __builtin_ia32_selectd_256 ((__mmask8
) __U
,
5065 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5066 _mm256_maskz_mov_epi32 (__mmask8 __U
, __m256i __A
)
5068 return (__m256i
) __builtin_ia32_selectd_256 ((__mmask8
) __U
,
5070 (__v8si
) _mm256_setzero_si256 ());
5073 static __inline __m128i __DEFAULT_FN_ATTRS128
5074 _mm_load_epi32 (void const *__P
)
5076 return *(const __m128i
*) __P
;
5079 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5080 _mm_mask_load_epi32 (__m128i __W
, __mmask8 __U
, void const *__P
)
5082 return (__m128i
) __builtin_ia32_movdqa32load128_mask ((const __v4si
*) __P
,
5088 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5089 _mm_maskz_load_epi32 (__mmask8 __U
, void const *__P
)
5091 return (__m128i
) __builtin_ia32_movdqa32load128_mask ((const __v4si
*) __P
,
5093 _mm_setzero_si128 (),
5098 static __inline __m256i __DEFAULT_FN_ATTRS256
5099 _mm256_load_epi32 (void const *__P
)
5101 return *(const __m256i
*) __P
;
5104 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5105 _mm256_mask_load_epi32 (__m256i __W
, __mmask8 __U
, void const *__P
)
5107 return (__m256i
) __builtin_ia32_movdqa32load256_mask ((const __v8si
*) __P
,
5113 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5114 _mm256_maskz_load_epi32 (__mmask8 __U
, void const *__P
)
5116 return (__m256i
) __builtin_ia32_movdqa32load256_mask ((const __v8si
*) __P
,
5118 _mm256_setzero_si256 (),
5123 static __inline
void __DEFAULT_FN_ATTRS128
5124 _mm_store_epi32 (void *__P
, __m128i __A
)
5126 *(__m128i
*) __P
= __A
;
5129 static __inline__
void __DEFAULT_FN_ATTRS128
5130 _mm_mask_store_epi32 (void *__P
, __mmask8 __U
, __m128i __A
)
5132 __builtin_ia32_movdqa32store128_mask ((__v4si
*) __P
,
5137 static __inline
void __DEFAULT_FN_ATTRS256
5138 _mm256_store_epi32 (void *__P
, __m256i __A
)
5140 *(__m256i
*) __P
= __A
;
5143 static __inline__
void __DEFAULT_FN_ATTRS256
5144 _mm256_mask_store_epi32 (void *__P
, __mmask8 __U
, __m256i __A
)
5146 __builtin_ia32_movdqa32store256_mask ((__v8si
*) __P
,
5151 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5152 _mm_mask_mov_epi64 (__m128i __W
, __mmask8 __U
, __m128i __A
)
5154 return (__m128i
) __builtin_ia32_selectq_128 ((__mmask8
) __U
,
5159 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5160 _mm_maskz_mov_epi64 (__mmask8 __U
, __m128i __A
)
5162 return (__m128i
) __builtin_ia32_selectq_128 ((__mmask8
) __U
,
5164 (__v2di
) _mm_setzero_si128 ());
5167 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5168 _mm256_mask_mov_epi64 (__m256i __W
, __mmask8 __U
, __m256i __A
)
5170 return (__m256i
) __builtin_ia32_selectq_256 ((__mmask8
) __U
,
5175 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5176 _mm256_maskz_mov_epi64 (__mmask8 __U
, __m256i __A
)
5178 return (__m256i
) __builtin_ia32_selectq_256 ((__mmask8
) __U
,
5180 (__v4di
) _mm256_setzero_si256 ());
5183 static __inline __m128i __DEFAULT_FN_ATTRS128
5184 _mm_load_epi64 (void const *__P
)
5186 return *(const __m128i
*) __P
;
5189 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5190 _mm_mask_load_epi64 (__m128i __W
, __mmask8 __U
, void const *__P
)
5192 return (__m128i
) __builtin_ia32_movdqa64load128_mask ((const __v2di
*) __P
,
5198 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5199 _mm_maskz_load_epi64 (__mmask8 __U
, void const *__P
)
5201 return (__m128i
) __builtin_ia32_movdqa64load128_mask ((const __v2di
*) __P
,
5203 _mm_setzero_si128 (),
5208 static __inline __m256i __DEFAULT_FN_ATTRS256
5209 _mm256_load_epi64 (void const *__P
)
5211 return *(const __m256i
*) __P
;
5214 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5215 _mm256_mask_load_epi64 (__m256i __W
, __mmask8 __U
, void const *__P
)
5217 return (__m256i
) __builtin_ia32_movdqa64load256_mask ((const __v4di
*) __P
,
5223 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5224 _mm256_maskz_load_epi64 (__mmask8 __U
, void const *__P
)
5226 return (__m256i
) __builtin_ia32_movdqa64load256_mask ((const __v4di
*) __P
,
5228 _mm256_setzero_si256 (),
5233 static __inline
void __DEFAULT_FN_ATTRS128
5234 _mm_store_epi64 (void *__P
, __m128i __A
)
5236 *(__m128i
*) __P
= __A
;
5239 static __inline__
void __DEFAULT_FN_ATTRS128
5240 _mm_mask_store_epi64 (void *__P
, __mmask8 __U
, __m128i __A
)
5242 __builtin_ia32_movdqa64store128_mask ((__v2di
*) __P
,
5247 static __inline
void __DEFAULT_FN_ATTRS256
5248 _mm256_store_epi64 (void *__P
, __m256i __A
)
5250 *(__m256i
*) __P
= __A
;
5253 static __inline__
void __DEFAULT_FN_ATTRS256
5254 _mm256_mask_store_epi64 (void *__P
, __mmask8 __U
, __m256i __A
)
5256 __builtin_ia32_movdqa64store256_mask ((__v4di
*) __P
,
5261 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5262 _mm_mask_movedup_pd (__m128d __W
, __mmask8 __U
, __m128d __A
)
5264 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
5265 (__v2df
)_mm_movedup_pd(__A
),
5269 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5270 _mm_maskz_movedup_pd (__mmask8 __U
, __m128d __A
)
5272 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
5273 (__v2df
)_mm_movedup_pd(__A
),
5274 (__v2df
)_mm_setzero_pd());
5277 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5278 _mm256_mask_movedup_pd (__m256d __W
, __mmask8 __U
, __m256d __A
)
5280 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
5281 (__v4df
)_mm256_movedup_pd(__A
),
5285 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5286 _mm256_maskz_movedup_pd (__mmask8 __U
, __m256d __A
)
5288 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
5289 (__v4df
)_mm256_movedup_pd(__A
),
5290 (__v4df
)_mm256_setzero_pd());
5293 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5294 _mm_mask_set1_epi32(__m128i __O
, __mmask8 __M
, int __A
)
5296 return (__m128i
)__builtin_ia32_selectd_128(__M
,
5297 (__v4si
) _mm_set1_epi32(__A
),
5301 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5302 _mm_maskz_set1_epi32( __mmask8 __M
, int __A
)
5304 return (__m128i
)__builtin_ia32_selectd_128(__M
,
5305 (__v4si
) _mm_set1_epi32(__A
),
5306 (__v4si
)_mm_setzero_si128());
5309 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5310 _mm256_mask_set1_epi32(__m256i __O
, __mmask8 __M
, int __A
)
5312 return (__m256i
)__builtin_ia32_selectd_256(__M
,
5313 (__v8si
) _mm256_set1_epi32(__A
),
5317 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5318 _mm256_maskz_set1_epi32( __mmask8 __M
, int __A
)
5320 return (__m256i
)__builtin_ia32_selectd_256(__M
,
5321 (__v8si
) _mm256_set1_epi32(__A
),
5322 (__v8si
)_mm256_setzero_si256());
5326 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5327 _mm_mask_set1_epi64 (__m128i __O
, __mmask8 __M
, long long __A
)
5329 return (__m128i
) __builtin_ia32_selectq_128(__M
,
5330 (__v2di
) _mm_set1_epi64x(__A
),
5334 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5335 _mm_maskz_set1_epi64 (__mmask8 __M
, long long __A
)
5337 return (__m128i
) __builtin_ia32_selectq_128(__M
,
5338 (__v2di
) _mm_set1_epi64x(__A
),
5339 (__v2di
) _mm_setzero_si128());
5342 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5343 _mm256_mask_set1_epi64 (__m256i __O
, __mmask8 __M
, long long __A
)
5345 return (__m256i
) __builtin_ia32_selectq_256(__M
,
5346 (__v4di
) _mm256_set1_epi64x(__A
),
5350 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5351 _mm256_maskz_set1_epi64 (__mmask8 __M
, long long __A
)
5353 return (__m256i
) __builtin_ia32_selectq_256(__M
,
5354 (__v4di
) _mm256_set1_epi64x(__A
),
5355 (__v4di
) _mm256_setzero_si256());
5358 #define _mm_fixupimm_pd(A, B, C, imm) \
5359 ((__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5360 (__v2df)(__m128d)(B), \
5361 (__v2di)(__m128i)(C), (int)(imm), \
5364 #define _mm_mask_fixupimm_pd(A, U, B, C, imm) \
5365 ((__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5366 (__v2df)(__m128d)(B), \
5367 (__v2di)(__m128i)(C), (int)(imm), \
5370 #define _mm_maskz_fixupimm_pd(U, A, B, C, imm) \
5371 ((__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \
5372 (__v2df)(__m128d)(B), \
5373 (__v2di)(__m128i)(C), \
5374 (int)(imm), (__mmask8)(U)))
5376 #define _mm256_fixupimm_pd(A, B, C, imm) \
5377 ((__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5378 (__v4df)(__m256d)(B), \
5379 (__v4di)(__m256i)(C), (int)(imm), \
5382 #define _mm256_mask_fixupimm_pd(A, U, B, C, imm) \
5383 ((__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5384 (__v4df)(__m256d)(B), \
5385 (__v4di)(__m256i)(C), (int)(imm), \
5388 #define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) \
5389 ((__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \
5390 (__v4df)(__m256d)(B), \
5391 (__v4di)(__m256i)(C), \
5392 (int)(imm), (__mmask8)(U)))
5394 #define _mm_fixupimm_ps(A, B, C, imm) \
5395 ((__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5396 (__v4sf)(__m128)(B), \
5397 (__v4si)(__m128i)(C), (int)(imm), \
5400 #define _mm_mask_fixupimm_ps(A, U, B, C, imm) \
5401 ((__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5402 (__v4sf)(__m128)(B), \
5403 (__v4si)(__m128i)(C), (int)(imm), \
5406 #define _mm_maskz_fixupimm_ps(U, A, B, C, imm) \
5407 ((__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \
5408 (__v4sf)(__m128)(B), \
5409 (__v4si)(__m128i)(C), (int)(imm), \
5412 #define _mm256_fixupimm_ps(A, B, C, imm) \
5413 ((__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5414 (__v8sf)(__m256)(B), \
5415 (__v8si)(__m256i)(C), (int)(imm), \
5418 #define _mm256_mask_fixupimm_ps(A, U, B, C, imm) \
5419 ((__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5420 (__v8sf)(__m256)(B), \
5421 (__v8si)(__m256i)(C), (int)(imm), \
5424 #define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) \
5425 ((__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \
5426 (__v8sf)(__m256)(B), \
5427 (__v8si)(__m256i)(C), (int)(imm), \
5430 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5431 _mm_mask_load_pd (__m128d __W
, __mmask8 __U
, void const *__P
)
5433 return (__m128d
) __builtin_ia32_loadapd128_mask ((const __v2df
*) __P
,
5438 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5439 _mm_maskz_load_pd (__mmask8 __U
, void const *__P
)
5441 return (__m128d
) __builtin_ia32_loadapd128_mask ((const __v2df
*) __P
,
5447 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5448 _mm256_mask_load_pd (__m256d __W
, __mmask8 __U
, void const *__P
)
5450 return (__m256d
) __builtin_ia32_loadapd256_mask ((const __v4df
*) __P
,
5455 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5456 _mm256_maskz_load_pd (__mmask8 __U
, void const *__P
)
5458 return (__m256d
) __builtin_ia32_loadapd256_mask ((const __v4df
*) __P
,
5460 _mm256_setzero_pd (),
5464 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5465 _mm_mask_load_ps (__m128 __W
, __mmask8 __U
, void const *__P
)
5467 return (__m128
) __builtin_ia32_loadaps128_mask ((const __v4sf
*) __P
,
5472 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5473 _mm_maskz_load_ps (__mmask8 __U
, void const *__P
)
5475 return (__m128
) __builtin_ia32_loadaps128_mask ((const __v4sf
*) __P
,
5481 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5482 _mm256_mask_load_ps (__m256 __W
, __mmask8 __U
, void const *__P
)
5484 return (__m256
) __builtin_ia32_loadaps256_mask ((const __v8sf
*) __P
,
5489 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5490 _mm256_maskz_load_ps (__mmask8 __U
, void const *__P
)
5492 return (__m256
) __builtin_ia32_loadaps256_mask ((const __v8sf
*) __P
,
5494 _mm256_setzero_ps (),
5498 static __inline __m128i __DEFAULT_FN_ATTRS128
5499 _mm_loadu_epi64 (void const *__P
)
5501 struct __loadu_epi64
{
5503 } __attribute__((__packed__
, __may_alias__
));
5504 return ((const struct __loadu_epi64
*)__P
)->__v
;
5507 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5508 _mm_mask_loadu_epi64 (__m128i __W
, __mmask8 __U
, void const *__P
)
5510 return (__m128i
) __builtin_ia32_loaddqudi128_mask ((const __v2di
*) __P
,
5515 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5516 _mm_maskz_loadu_epi64 (__mmask8 __U
, void const *__P
)
5518 return (__m128i
) __builtin_ia32_loaddqudi128_mask ((const __v2di
*) __P
,
5520 _mm_setzero_si128 (),
5524 static __inline __m256i __DEFAULT_FN_ATTRS256
5525 _mm256_loadu_epi64 (void const *__P
)
5527 struct __loadu_epi64
{
5529 } __attribute__((__packed__
, __may_alias__
));
5530 return ((const struct __loadu_epi64
*)__P
)->__v
;
5533 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5534 _mm256_mask_loadu_epi64 (__m256i __W
, __mmask8 __U
, void const *__P
)
5536 return (__m256i
) __builtin_ia32_loaddqudi256_mask ((const __v4di
*) __P
,
5541 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5542 _mm256_maskz_loadu_epi64 (__mmask8 __U
, void const *__P
)
5544 return (__m256i
) __builtin_ia32_loaddqudi256_mask ((const __v4di
*) __P
,
5546 _mm256_setzero_si256 (),
5550 static __inline __m128i __DEFAULT_FN_ATTRS128
5551 _mm_loadu_epi32 (void const *__P
)
5553 struct __loadu_epi32
{
5555 } __attribute__((__packed__
, __may_alias__
));
5556 return ((const struct __loadu_epi32
*)__P
)->__v
;
5559 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5560 _mm_mask_loadu_epi32 (__m128i __W
, __mmask8 __U
, void const *__P
)
5562 return (__m128i
) __builtin_ia32_loaddqusi128_mask ((const __v4si
*) __P
,
5567 static __inline__ __m128i __DEFAULT_FN_ATTRS128
5568 _mm_maskz_loadu_epi32 (__mmask8 __U
, void const *__P
)
5570 return (__m128i
) __builtin_ia32_loaddqusi128_mask ((const __v4si
*) __P
,
5572 _mm_setzero_si128 (),
5576 static __inline __m256i __DEFAULT_FN_ATTRS256
5577 _mm256_loadu_epi32 (void const *__P
)
5579 struct __loadu_epi32
{
5581 } __attribute__((__packed__
, __may_alias__
));
5582 return ((const struct __loadu_epi32
*)__P
)->__v
;
5585 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5586 _mm256_mask_loadu_epi32 (__m256i __W
, __mmask8 __U
, void const *__P
)
5588 return (__m256i
) __builtin_ia32_loaddqusi256_mask ((const __v8si
*) __P
,
5593 static __inline__ __m256i __DEFAULT_FN_ATTRS256
5594 _mm256_maskz_loadu_epi32 (__mmask8 __U
, void const *__P
)
5596 return (__m256i
) __builtin_ia32_loaddqusi256_mask ((const __v8si
*) __P
,
5598 _mm256_setzero_si256 (),
5602 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5603 _mm_mask_loadu_pd (__m128d __W
, __mmask8 __U
, void const *__P
)
5605 return (__m128d
) __builtin_ia32_loadupd128_mask ((const __v2df
*) __P
,
5610 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5611 _mm_maskz_loadu_pd (__mmask8 __U
, void const *__P
)
5613 return (__m128d
) __builtin_ia32_loadupd128_mask ((const __v2df
*) __P
,
5619 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5620 _mm256_mask_loadu_pd (__m256d __W
, __mmask8 __U
, void const *__P
)
5622 return (__m256d
) __builtin_ia32_loadupd256_mask ((const __v4df
*) __P
,
5627 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5628 _mm256_maskz_loadu_pd (__mmask8 __U
, void const *__P
)
5630 return (__m256d
) __builtin_ia32_loadupd256_mask ((const __v4df
*) __P
,
5632 _mm256_setzero_pd (),
5636 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5637 _mm_mask_loadu_ps (__m128 __W
, __mmask8 __U
, void const *__P
)
5639 return (__m128
) __builtin_ia32_loadups128_mask ((const __v4sf
*) __P
,
5644 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5645 _mm_maskz_loadu_ps (__mmask8 __U
, void const *__P
)
5647 return (__m128
) __builtin_ia32_loadups128_mask ((const __v4sf
*) __P
,
5653 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5654 _mm256_mask_loadu_ps (__m256 __W
, __mmask8 __U
, void const *__P
)
5656 return (__m256
) __builtin_ia32_loadups256_mask ((const __v8sf
*) __P
,
5661 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5662 _mm256_maskz_loadu_ps (__mmask8 __U
, void const *__P
)
5664 return (__m256
) __builtin_ia32_loadups256_mask ((const __v8sf
*) __P
,
5666 _mm256_setzero_ps (),
5670 static __inline__
void __DEFAULT_FN_ATTRS128
5671 _mm_mask_store_pd (void *__P
, __mmask8 __U
, __m128d __A
)
5673 __builtin_ia32_storeapd128_mask ((__v2df
*) __P
,
5678 static __inline__
void __DEFAULT_FN_ATTRS256
5679 _mm256_mask_store_pd (void *__P
, __mmask8 __U
, __m256d __A
)
5681 __builtin_ia32_storeapd256_mask ((__v4df
*) __P
,
5686 static __inline__
void __DEFAULT_FN_ATTRS128
5687 _mm_mask_store_ps (void *__P
, __mmask8 __U
, __m128 __A
)
5689 __builtin_ia32_storeaps128_mask ((__v4sf
*) __P
,
5694 static __inline__
void __DEFAULT_FN_ATTRS256
5695 _mm256_mask_store_ps (void *__P
, __mmask8 __U
, __m256 __A
)
5697 __builtin_ia32_storeaps256_mask ((__v8sf
*) __P
,
5702 static __inline
void __DEFAULT_FN_ATTRS128
5703 _mm_storeu_epi64 (void *__P
, __m128i __A
)
5705 struct __storeu_epi64
{
5707 } __attribute__((__packed__
, __may_alias__
));
5708 ((struct __storeu_epi64
*)__P
)->__v
= __A
;
5711 static __inline__
void __DEFAULT_FN_ATTRS128
5712 _mm_mask_storeu_epi64 (void *__P
, __mmask8 __U
, __m128i __A
)
5714 __builtin_ia32_storedqudi128_mask ((__v2di
*) __P
,
5719 static __inline
void __DEFAULT_FN_ATTRS256
5720 _mm256_storeu_epi64 (void *__P
, __m256i __A
)
5722 struct __storeu_epi64
{
5724 } __attribute__((__packed__
, __may_alias__
));
5725 ((struct __storeu_epi64
*)__P
)->__v
= __A
;
5728 static __inline__
void __DEFAULT_FN_ATTRS256
5729 _mm256_mask_storeu_epi64 (void *__P
, __mmask8 __U
, __m256i __A
)
5731 __builtin_ia32_storedqudi256_mask ((__v4di
*) __P
,
5736 static __inline
void __DEFAULT_FN_ATTRS128
5737 _mm_storeu_epi32 (void *__P
, __m128i __A
)
5739 struct __storeu_epi32
{
5741 } __attribute__((__packed__
, __may_alias__
));
5742 ((struct __storeu_epi32
*)__P
)->__v
= __A
;
5745 static __inline__
void __DEFAULT_FN_ATTRS128
5746 _mm_mask_storeu_epi32 (void *__P
, __mmask8 __U
, __m128i __A
)
5748 __builtin_ia32_storedqusi128_mask ((__v4si
*) __P
,
5753 static __inline
void __DEFAULT_FN_ATTRS256
5754 _mm256_storeu_epi32 (void *__P
, __m256i __A
)
5756 struct __storeu_epi32
{
5758 } __attribute__((__packed__
, __may_alias__
));
5759 ((struct __storeu_epi32
*)__P
)->__v
= __A
;
5762 static __inline__
void __DEFAULT_FN_ATTRS256
5763 _mm256_mask_storeu_epi32 (void *__P
, __mmask8 __U
, __m256i __A
)
5765 __builtin_ia32_storedqusi256_mask ((__v8si
*) __P
,
5770 static __inline__
void __DEFAULT_FN_ATTRS128
5771 _mm_mask_storeu_pd (void *__P
, __mmask8 __U
, __m128d __A
)
5773 __builtin_ia32_storeupd128_mask ((__v2df
*) __P
,
5778 static __inline__
void __DEFAULT_FN_ATTRS256
5779 _mm256_mask_storeu_pd (void *__P
, __mmask8 __U
, __m256d __A
)
5781 __builtin_ia32_storeupd256_mask ((__v4df
*) __P
,
5786 static __inline__
void __DEFAULT_FN_ATTRS128
5787 _mm_mask_storeu_ps (void *__P
, __mmask8 __U
, __m128 __A
)
5789 __builtin_ia32_storeups128_mask ((__v4sf
*) __P
,
5794 static __inline__
void __DEFAULT_FN_ATTRS256
5795 _mm256_mask_storeu_ps (void *__P
, __mmask8 __U
, __m256 __A
)
5797 __builtin_ia32_storeups256_mask ((__v8sf
*) __P
,
5803 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5804 _mm_mask_unpackhi_pd(__m128d __W
, __mmask8 __U
, __m128d __A
, __m128d __B
)
5806 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
5807 (__v2df
)_mm_unpackhi_pd(__A
, __B
),
5811 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5812 _mm_maskz_unpackhi_pd(__mmask8 __U
, __m128d __A
, __m128d __B
)
5814 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
5815 (__v2df
)_mm_unpackhi_pd(__A
, __B
),
5816 (__v2df
)_mm_setzero_pd());
5819 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5820 _mm256_mask_unpackhi_pd(__m256d __W
, __mmask8 __U
, __m256d __A
, __m256d __B
)
5822 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
5823 (__v4df
)_mm256_unpackhi_pd(__A
, __B
),
5827 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5828 _mm256_maskz_unpackhi_pd(__mmask8 __U
, __m256d __A
, __m256d __B
)
5830 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
5831 (__v4df
)_mm256_unpackhi_pd(__A
, __B
),
5832 (__v4df
)_mm256_setzero_pd());
5835 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5836 _mm_mask_unpackhi_ps(__m128 __W
, __mmask8 __U
, __m128 __A
, __m128 __B
)
5838 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
5839 (__v4sf
)_mm_unpackhi_ps(__A
, __B
),
5843 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5844 _mm_maskz_unpackhi_ps(__mmask8 __U
, __m128 __A
, __m128 __B
)
5846 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
5847 (__v4sf
)_mm_unpackhi_ps(__A
, __B
),
5848 (__v4sf
)_mm_setzero_ps());
5851 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5852 _mm256_mask_unpackhi_ps(__m256 __W
, __mmask8 __U
, __m256 __A
, __m256 __B
)
5854 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
5855 (__v8sf
)_mm256_unpackhi_ps(__A
, __B
),
5859 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5860 _mm256_maskz_unpackhi_ps(__mmask8 __U
, __m256 __A
, __m256 __B
)
5862 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
5863 (__v8sf
)_mm256_unpackhi_ps(__A
, __B
),
5864 (__v8sf
)_mm256_setzero_ps());
5867 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5868 _mm_mask_unpacklo_pd(__m128d __W
, __mmask8 __U
, __m128d __A
, __m128d __B
)
5870 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
5871 (__v2df
)_mm_unpacklo_pd(__A
, __B
),
5875 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5876 _mm_maskz_unpacklo_pd(__mmask8 __U
, __m128d __A
, __m128d __B
)
5878 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
5879 (__v2df
)_mm_unpacklo_pd(__A
, __B
),
5880 (__v2df
)_mm_setzero_pd());
5883 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5884 _mm256_mask_unpacklo_pd(__m256d __W
, __mmask8 __U
, __m256d __A
, __m256d __B
)
5886 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
5887 (__v4df
)_mm256_unpacklo_pd(__A
, __B
),
5891 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5892 _mm256_maskz_unpacklo_pd(__mmask8 __U
, __m256d __A
, __m256d __B
)
5894 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
5895 (__v4df
)_mm256_unpacklo_pd(__A
, __B
),
5896 (__v4df
)_mm256_setzero_pd());
5899 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5900 _mm_mask_unpacklo_ps(__m128 __W
, __mmask8 __U
, __m128 __A
, __m128 __B
)
5902 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
5903 (__v4sf
)_mm_unpacklo_ps(__A
, __B
),
5907 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5908 _mm_maskz_unpacklo_ps(__mmask8 __U
, __m128 __A
, __m128 __B
)
5910 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
5911 (__v4sf
)_mm_unpacklo_ps(__A
, __B
),
5912 (__v4sf
)_mm_setzero_ps());
5915 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5916 _mm256_mask_unpacklo_ps(__m256 __W
, __mmask8 __U
, __m256 __A
, __m256 __B
)
5918 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
5919 (__v8sf
)_mm256_unpacklo_ps(__A
, __B
),
5923 static __inline__ __m256 __DEFAULT_FN_ATTRS256
5924 _mm256_maskz_unpacklo_ps(__mmask8 __U
, __m256 __A
, __m256 __B
)
5926 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
5927 (__v8sf
)_mm256_unpacklo_ps(__A
, __B
),
5928 (__v8sf
)_mm256_setzero_ps());
5931 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5932 _mm_rcp14_pd (__m128d __A
)
5934 return (__m128d
) __builtin_ia32_rcp14pd128_mask ((__v2df
) __A
,
5940 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5941 _mm_mask_rcp14_pd (__m128d __W
, __mmask8 __U
, __m128d __A
)
5943 return (__m128d
) __builtin_ia32_rcp14pd128_mask ((__v2df
) __A
,
5948 static __inline__ __m128d __DEFAULT_FN_ATTRS128
5949 _mm_maskz_rcp14_pd (__mmask8 __U
, __m128d __A
)
5951 return (__m128d
) __builtin_ia32_rcp14pd128_mask ((__v2df
) __A
,
5957 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5958 _mm256_rcp14_pd (__m256d __A
)
5960 return (__m256d
) __builtin_ia32_rcp14pd256_mask ((__v4df
) __A
,
5962 _mm256_setzero_pd (),
5966 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5967 _mm256_mask_rcp14_pd (__m256d __W
, __mmask8 __U
, __m256d __A
)
5969 return (__m256d
) __builtin_ia32_rcp14pd256_mask ((__v4df
) __A
,
5974 static __inline__ __m256d __DEFAULT_FN_ATTRS256
5975 _mm256_maskz_rcp14_pd (__mmask8 __U
, __m256d __A
)
5977 return (__m256d
) __builtin_ia32_rcp14pd256_mask ((__v4df
) __A
,
5979 _mm256_setzero_pd (),
5983 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5984 _mm_rcp14_ps (__m128 __A
)
5986 return (__m128
) __builtin_ia32_rcp14ps128_mask ((__v4sf
) __A
,
5992 static __inline__ __m128 __DEFAULT_FN_ATTRS128
5993 _mm_mask_rcp14_ps (__m128 __W
, __mmask8 __U
, __m128 __A
)
5995 return (__m128
) __builtin_ia32_rcp14ps128_mask ((__v4sf
) __A
,
6000 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6001 _mm_maskz_rcp14_ps (__mmask8 __U
, __m128 __A
)
6003 return (__m128
) __builtin_ia32_rcp14ps128_mask ((__v4sf
) __A
,
6009 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6010 _mm256_rcp14_ps (__m256 __A
)
6012 return (__m256
) __builtin_ia32_rcp14ps256_mask ((__v8sf
) __A
,
6014 _mm256_setzero_ps (),
6018 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6019 _mm256_mask_rcp14_ps (__m256 __W
, __mmask8 __U
, __m256 __A
)
6021 return (__m256
) __builtin_ia32_rcp14ps256_mask ((__v8sf
) __A
,
6026 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6027 _mm256_maskz_rcp14_ps (__mmask8 __U
, __m256 __A
)
6029 return (__m256
) __builtin_ia32_rcp14ps256_mask ((__v8sf
) __A
,
6031 _mm256_setzero_ps (),
6035 #define _mm_mask_permute_pd(W, U, X, C) \
6036 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6037 (__v2df)_mm_permute_pd((X), (C)), \
6038 (__v2df)(__m128d)(W)))
6040 #define _mm_maskz_permute_pd(U, X, C) \
6041 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6042 (__v2df)_mm_permute_pd((X), (C)), \
6043 (__v2df)_mm_setzero_pd()))
6045 #define _mm256_mask_permute_pd(W, U, X, C) \
6046 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6047 (__v4df)_mm256_permute_pd((X), (C)), \
6048 (__v4df)(__m256d)(W)))
6050 #define _mm256_maskz_permute_pd(U, X, C) \
6051 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6052 (__v4df)_mm256_permute_pd((X), (C)), \
6053 (__v4df)_mm256_setzero_pd()))
6055 #define _mm_mask_permute_ps(W, U, X, C) \
6056 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6057 (__v4sf)_mm_permute_ps((X), (C)), \
6058 (__v4sf)(__m128)(W)))
6060 #define _mm_maskz_permute_ps(U, X, C) \
6061 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6062 (__v4sf)_mm_permute_ps((X), (C)), \
6063 (__v4sf)_mm_setzero_ps()))
6065 #define _mm256_mask_permute_ps(W, U, X, C) \
6066 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6067 (__v8sf)_mm256_permute_ps((X), (C)), \
6068 (__v8sf)(__m256)(W)))
6070 #define _mm256_maskz_permute_ps(U, X, C) \
6071 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6072 (__v8sf)_mm256_permute_ps((X), (C)), \
6073 (__v8sf)_mm256_setzero_ps()))
6075 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6076 _mm_mask_permutevar_pd(__m128d __W
, __mmask8 __U
, __m128d __A
, __m128i __C
)
6078 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
6079 (__v2df
)_mm_permutevar_pd(__A
, __C
),
6083 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6084 _mm_maskz_permutevar_pd(__mmask8 __U
, __m128d __A
, __m128i __C
)
6086 return (__m128d
)__builtin_ia32_selectpd_128((__mmask8
)__U
,
6087 (__v2df
)_mm_permutevar_pd(__A
, __C
),
6088 (__v2df
)_mm_setzero_pd());
6091 static __inline__ __m256d __DEFAULT_FN_ATTRS256
6092 _mm256_mask_permutevar_pd(__m256d __W
, __mmask8 __U
, __m256d __A
, __m256i __C
)
6094 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
6095 (__v4df
)_mm256_permutevar_pd(__A
, __C
),
6099 static __inline__ __m256d __DEFAULT_FN_ATTRS256
6100 _mm256_maskz_permutevar_pd(__mmask8 __U
, __m256d __A
, __m256i __C
)
6102 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
6103 (__v4df
)_mm256_permutevar_pd(__A
, __C
),
6104 (__v4df
)_mm256_setzero_pd());
6107 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6108 _mm_mask_permutevar_ps(__m128 __W
, __mmask8 __U
, __m128 __A
, __m128i __C
)
6110 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
6111 (__v4sf
)_mm_permutevar_ps(__A
, __C
),
6115 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6116 _mm_maskz_permutevar_ps(__mmask8 __U
, __m128 __A
, __m128i __C
)
6118 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
6119 (__v4sf
)_mm_permutevar_ps(__A
, __C
),
6120 (__v4sf
)_mm_setzero_ps());
6123 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6124 _mm256_mask_permutevar_ps(__m256 __W
, __mmask8 __U
, __m256 __A
, __m256i __C
)
6126 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
6127 (__v8sf
)_mm256_permutevar_ps(__A
, __C
),
6131 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6132 _mm256_maskz_permutevar_ps(__mmask8 __U
, __m256 __A
, __m256i __C
)
6134 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
6135 (__v8sf
)_mm256_permutevar_ps(__A
, __C
),
6136 (__v8sf
)_mm256_setzero_ps());
6139 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6140 _mm_test_epi32_mask (__m128i __A
, __m128i __B
)
6142 return _mm_cmpneq_epi32_mask (_mm_and_si128 (__A
, __B
), _mm_setzero_si128());
6145 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6146 _mm_mask_test_epi32_mask (__mmask8 __U
, __m128i __A
, __m128i __B
)
6148 return _mm_mask_cmpneq_epi32_mask (__U
, _mm_and_si128 (__A
, __B
),
6149 _mm_setzero_si128());
6152 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6153 _mm256_test_epi32_mask (__m256i __A
, __m256i __B
)
6155 return _mm256_cmpneq_epi32_mask (_mm256_and_si256 (__A
, __B
),
6156 _mm256_setzero_si256());
6159 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6160 _mm256_mask_test_epi32_mask (__mmask8 __U
, __m256i __A
, __m256i __B
)
6162 return _mm256_mask_cmpneq_epi32_mask (__U
, _mm256_and_si256 (__A
, __B
),
6163 _mm256_setzero_si256());
6166 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6167 _mm_test_epi64_mask (__m128i __A
, __m128i __B
)
6169 return _mm_cmpneq_epi64_mask (_mm_and_si128 (__A
, __B
), _mm_setzero_si128());
6172 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6173 _mm_mask_test_epi64_mask (__mmask8 __U
, __m128i __A
, __m128i __B
)
6175 return _mm_mask_cmpneq_epi64_mask (__U
, _mm_and_si128 (__A
, __B
),
6176 _mm_setzero_si128());
6179 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6180 _mm256_test_epi64_mask (__m256i __A
, __m256i __B
)
6182 return _mm256_cmpneq_epi64_mask (_mm256_and_si256 (__A
, __B
),
6183 _mm256_setzero_si256());
6186 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6187 _mm256_mask_test_epi64_mask (__mmask8 __U
, __m256i __A
, __m256i __B
)
6189 return _mm256_mask_cmpneq_epi64_mask (__U
, _mm256_and_si256 (__A
, __B
),
6190 _mm256_setzero_si256());
6193 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6194 _mm_testn_epi32_mask (__m128i __A
, __m128i __B
)
6196 return _mm_cmpeq_epi32_mask (_mm_and_si128 (__A
, __B
), _mm_setzero_si128());
6199 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6200 _mm_mask_testn_epi32_mask (__mmask8 __U
, __m128i __A
, __m128i __B
)
6202 return _mm_mask_cmpeq_epi32_mask (__U
, _mm_and_si128 (__A
, __B
),
6203 _mm_setzero_si128());
6206 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6207 _mm256_testn_epi32_mask (__m256i __A
, __m256i __B
)
6209 return _mm256_cmpeq_epi32_mask (_mm256_and_si256 (__A
, __B
),
6210 _mm256_setzero_si256());
6213 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6214 _mm256_mask_testn_epi32_mask (__mmask8 __U
, __m256i __A
, __m256i __B
)
6216 return _mm256_mask_cmpeq_epi32_mask (__U
, _mm256_and_si256 (__A
, __B
),
6217 _mm256_setzero_si256());
6220 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6221 _mm_testn_epi64_mask (__m128i __A
, __m128i __B
)
6223 return _mm_cmpeq_epi64_mask (_mm_and_si128 (__A
, __B
), _mm_setzero_si128());
6226 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6227 _mm_mask_testn_epi64_mask (__mmask8 __U
, __m128i __A
, __m128i __B
)
6229 return _mm_mask_cmpeq_epi64_mask (__U
, _mm_and_si128 (__A
, __B
),
6230 _mm_setzero_si128());
6233 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6234 _mm256_testn_epi64_mask (__m256i __A
, __m256i __B
)
6236 return _mm256_cmpeq_epi64_mask (_mm256_and_si256 (__A
, __B
),
6237 _mm256_setzero_si256());
6240 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6241 _mm256_mask_testn_epi64_mask (__mmask8 __U
, __m256i __A
, __m256i __B
)
6243 return _mm256_mask_cmpeq_epi64_mask (__U
, _mm256_and_si256 (__A
, __B
),
6244 _mm256_setzero_si256());
6247 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6248 _mm_mask_unpackhi_epi32(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
6250 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
6251 (__v4si
)_mm_unpackhi_epi32(__A
, __B
),
6255 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6256 _mm_maskz_unpackhi_epi32(__mmask8 __U
, __m128i __A
, __m128i __B
)
6258 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
6259 (__v4si
)_mm_unpackhi_epi32(__A
, __B
),
6260 (__v4si
)_mm_setzero_si128());
6263 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6264 _mm256_mask_unpackhi_epi32(__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
)
6266 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
6267 (__v8si
)_mm256_unpackhi_epi32(__A
, __B
),
6271 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6272 _mm256_maskz_unpackhi_epi32(__mmask8 __U
, __m256i __A
, __m256i __B
)
6274 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
6275 (__v8si
)_mm256_unpackhi_epi32(__A
, __B
),
6276 (__v8si
)_mm256_setzero_si256());
6279 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6280 _mm_mask_unpackhi_epi64(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
6282 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
6283 (__v2di
)_mm_unpackhi_epi64(__A
, __B
),
6287 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6288 _mm_maskz_unpackhi_epi64(__mmask8 __U
, __m128i __A
, __m128i __B
)
6290 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
6291 (__v2di
)_mm_unpackhi_epi64(__A
, __B
),
6292 (__v2di
)_mm_setzero_si128());
6295 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6296 _mm256_mask_unpackhi_epi64(__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
)
6298 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
6299 (__v4di
)_mm256_unpackhi_epi64(__A
, __B
),
6303 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6304 _mm256_maskz_unpackhi_epi64(__mmask8 __U
, __m256i __A
, __m256i __B
)
6306 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
6307 (__v4di
)_mm256_unpackhi_epi64(__A
, __B
),
6308 (__v4di
)_mm256_setzero_si256());
6311 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6312 _mm_mask_unpacklo_epi32(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
6314 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
6315 (__v4si
)_mm_unpacklo_epi32(__A
, __B
),
6319 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6320 _mm_maskz_unpacklo_epi32(__mmask8 __U
, __m128i __A
, __m128i __B
)
6322 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
6323 (__v4si
)_mm_unpacklo_epi32(__A
, __B
),
6324 (__v4si
)_mm_setzero_si128());
6327 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6328 _mm256_mask_unpacklo_epi32(__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
)
6330 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
6331 (__v8si
)_mm256_unpacklo_epi32(__A
, __B
),
6335 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6336 _mm256_maskz_unpacklo_epi32(__mmask8 __U
, __m256i __A
, __m256i __B
)
6338 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
6339 (__v8si
)_mm256_unpacklo_epi32(__A
, __B
),
6340 (__v8si
)_mm256_setzero_si256());
6343 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6344 _mm_mask_unpacklo_epi64(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
6346 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
6347 (__v2di
)_mm_unpacklo_epi64(__A
, __B
),
6351 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6352 _mm_maskz_unpacklo_epi64(__mmask8 __U
, __m128i __A
, __m128i __B
)
6354 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
,
6355 (__v2di
)_mm_unpacklo_epi64(__A
, __B
),
6356 (__v2di
)_mm_setzero_si128());
6359 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6360 _mm256_mask_unpacklo_epi64(__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
)
6362 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
6363 (__v4di
)_mm256_unpacklo_epi64(__A
, __B
),
6367 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6368 _mm256_maskz_unpacklo_epi64(__mmask8 __U
, __m256i __A
, __m256i __B
)
6370 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
,
6371 (__v4di
)_mm256_unpacklo_epi64(__A
, __B
),
6372 (__v4di
)_mm256_setzero_si256());
6375 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6376 _mm_mask_sra_epi32(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
6378 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
6379 (__v4si
)_mm_sra_epi32(__A
, __B
),
6383 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6384 _mm_maskz_sra_epi32(__mmask8 __U
, __m128i __A
, __m128i __B
)
6386 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
6387 (__v4si
)_mm_sra_epi32(__A
, __B
),
6388 (__v4si
)_mm_setzero_si128());
6391 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6392 _mm256_mask_sra_epi32(__m256i __W
, __mmask8 __U
, __m256i __A
, __m128i __B
)
6394 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
6395 (__v8si
)_mm256_sra_epi32(__A
, __B
),
6399 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6400 _mm256_maskz_sra_epi32(__mmask8 __U
, __m256i __A
, __m128i __B
)
6402 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
6403 (__v8si
)_mm256_sra_epi32(__A
, __B
),
6404 (__v8si
)_mm256_setzero_si256());
6407 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6408 _mm_mask_srai_epi32(__m128i __W
, __mmask8 __U
, __m128i __A
, unsigned int __B
)
6410 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
6411 (__v4si
)_mm_srai_epi32(__A
, (int)__B
),
6415 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6416 _mm_maskz_srai_epi32(__mmask8 __U
, __m128i __A
, unsigned int __B
)
6418 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
6419 (__v4si
)_mm_srai_epi32(__A
, (int)__B
),
6420 (__v4si
)_mm_setzero_si128());
6423 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6424 _mm256_mask_srai_epi32(__m256i __W
, __mmask8 __U
, __m256i __A
, unsigned int __B
)
6426 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
6427 (__v8si
)_mm256_srai_epi32(__A
, (int)__B
),
6431 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6432 _mm256_maskz_srai_epi32(__mmask8 __U
, __m256i __A
, unsigned int __B
)
6434 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
6435 (__v8si
)_mm256_srai_epi32(__A
, (int)__B
),
6436 (__v8si
)_mm256_setzero_si256());
6439 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6440 _mm_sra_epi64(__m128i __A
, __m128i __B
)
6442 return (__m128i
)__builtin_ia32_psraq128((__v2di
)__A
, (__v2di
)__B
);
6445 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6446 _mm_mask_sra_epi64(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
6448 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
, \
6449 (__v2di
)_mm_sra_epi64(__A
, __B
), \
6453 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6454 _mm_maskz_sra_epi64(__mmask8 __U
, __m128i __A
, __m128i __B
)
6456 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
, \
6457 (__v2di
)_mm_sra_epi64(__A
, __B
), \
6458 (__v2di
)_mm_setzero_si128());
6461 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6462 _mm256_sra_epi64(__m256i __A
, __m128i __B
)
6464 return (__m256i
)__builtin_ia32_psraq256((__v4di
) __A
, (__v2di
) __B
);
6467 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6468 _mm256_mask_sra_epi64(__m256i __W
, __mmask8 __U
, __m256i __A
, __m128i __B
)
6470 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
, \
6471 (__v4di
)_mm256_sra_epi64(__A
, __B
), \
6475 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6476 _mm256_maskz_sra_epi64(__mmask8 __U
, __m256i __A
, __m128i __B
)
6478 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
, \
6479 (__v4di
)_mm256_sra_epi64(__A
, __B
), \
6480 (__v4di
)_mm256_setzero_si256());
6483 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6484 _mm_srai_epi64(__m128i __A
, unsigned int __imm
)
6486 return (__m128i
)__builtin_ia32_psraqi128((__v2di
)__A
, (int)__imm
);
6489 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6490 _mm_mask_srai_epi64(__m128i __W
, __mmask8 __U
, __m128i __A
, unsigned int __imm
)
6492 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
, \
6493 (__v2di
)_mm_srai_epi64(__A
, __imm
), \
6497 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6498 _mm_maskz_srai_epi64(__mmask8 __U
, __m128i __A
, unsigned int __imm
)
6500 return (__m128i
)__builtin_ia32_selectq_128((__mmask8
)__U
, \
6501 (__v2di
)_mm_srai_epi64(__A
, __imm
), \
6502 (__v2di
)_mm_setzero_si128());
6505 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6506 _mm256_srai_epi64(__m256i __A
, unsigned int __imm
)
6508 return (__m256i
)__builtin_ia32_psraqi256((__v4di
)__A
, (int)__imm
);
6511 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6512 _mm256_mask_srai_epi64(__m256i __W
, __mmask8 __U
, __m256i __A
,
6515 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
, \
6516 (__v4di
)_mm256_srai_epi64(__A
, __imm
), \
6520 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6521 _mm256_maskz_srai_epi64(__mmask8 __U
, __m256i __A
, unsigned int __imm
)
6523 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__U
, \
6524 (__v4di
)_mm256_srai_epi64(__A
, __imm
), \
6525 (__v4di
)_mm256_setzero_si256());
6528 #define _mm_ternarylogic_epi32(A, B, C, imm) \
6529 ((__m128i)__builtin_ia32_pternlogd128_mask( \
6530 (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \
6531 (unsigned char)(imm), (__mmask8)-1))
6533 #define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) \
6534 ((__m128i)__builtin_ia32_pternlogd128_mask( \
6535 (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \
6536 (unsigned char)(imm), (__mmask8)(U)))
6538 #define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) \
6539 ((__m128i)__builtin_ia32_pternlogd128_maskz( \
6540 (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \
6541 (unsigned char)(imm), (__mmask8)(U)))
6543 #define _mm256_ternarylogic_epi32(A, B, C, imm) \
6544 ((__m256i)__builtin_ia32_pternlogd256_mask( \
6545 (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \
6546 (unsigned char)(imm), (__mmask8)-1))
6548 #define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) \
6549 ((__m256i)__builtin_ia32_pternlogd256_mask( \
6550 (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \
6551 (unsigned char)(imm), (__mmask8)(U)))
6553 #define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) \
6554 ((__m256i)__builtin_ia32_pternlogd256_maskz( \
6555 (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \
6556 (unsigned char)(imm), (__mmask8)(U)))
6558 #define _mm_ternarylogic_epi64(A, B, C, imm) \
6559 ((__m128i)__builtin_ia32_pternlogq128_mask( \
6560 (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \
6561 (unsigned char)(imm), (__mmask8)-1))
6563 #define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) \
6564 ((__m128i)__builtin_ia32_pternlogq128_mask( \
6565 (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \
6566 (unsigned char)(imm), (__mmask8)(U)))
6568 #define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) \
6569 ((__m128i)__builtin_ia32_pternlogq128_maskz( \
6570 (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \
6571 (unsigned char)(imm), (__mmask8)(U)))
6573 #define _mm256_ternarylogic_epi64(A, B, C, imm) \
6574 ((__m256i)__builtin_ia32_pternlogq256_mask( \
6575 (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \
6576 (unsigned char)(imm), (__mmask8)-1))
6578 #define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) \
6579 ((__m256i)__builtin_ia32_pternlogq256_mask( \
6580 (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \
6581 (unsigned char)(imm), (__mmask8)(U)))
6583 #define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) \
6584 ((__m256i)__builtin_ia32_pternlogq256_maskz( \
6585 (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \
6586 (unsigned char)(imm), (__mmask8)(U)))
6588 #define _mm256_shuffle_f32x4(A, B, imm) \
6589 ((__m256)__builtin_ia32_shuf_f32x4_256((__v8sf)(__m256)(A), \
6590 (__v8sf)(__m256)(B), (int)(imm)))
6592 #define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) \
6593 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6594 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6595 (__v8sf)(__m256)(W)))
6597 #define _mm256_maskz_shuffle_f32x4(U, A, B, imm) \
6598 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6599 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6600 (__v8sf)_mm256_setzero_ps()))
6602 #define _mm256_shuffle_f64x2(A, B, imm) \
6603 ((__m256d)__builtin_ia32_shuf_f64x2_256((__v4df)(__m256d)(A), \
6604 (__v4df)(__m256d)(B), (int)(imm)))
6606 #define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) \
6607 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6608 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6609 (__v4df)(__m256d)(W)))
6611 #define _mm256_maskz_shuffle_f64x2(U, A, B, imm) \
6612 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6613 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6614 (__v4df)_mm256_setzero_pd()))
6616 #define _mm256_shuffle_i32x4(A, B, imm) \
6617 ((__m256i)__builtin_ia32_shuf_i32x4_256((__v8si)(__m256i)(A), \
6618 (__v8si)(__m256i)(B), (int)(imm)))
6620 #define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) \
6621 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6622 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6623 (__v8si)(__m256i)(W)))
6625 #define _mm256_maskz_shuffle_i32x4(U, A, B, imm) \
6626 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6627 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6628 (__v8si)_mm256_setzero_si256()))
6630 #define _mm256_shuffle_i64x2(A, B, imm) \
6631 ((__m256i)__builtin_ia32_shuf_i64x2_256((__v4di)(__m256i)(A), \
6632 (__v4di)(__m256i)(B), (int)(imm)))
6634 #define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) \
6635 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6636 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6637 (__v4di)(__m256i)(W)))
6640 #define _mm256_maskz_shuffle_i64x2(U, A, B, imm) \
6641 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6642 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6643 (__v4di)_mm256_setzero_si256()))
6645 #define _mm_mask_shuffle_pd(W, U, A, B, M) \
6646 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6647 (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6648 (__v2df)(__m128d)(W)))
6650 #define _mm_maskz_shuffle_pd(U, A, B, M) \
6651 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6652 (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6653 (__v2df)_mm_setzero_pd()))
6655 #define _mm256_mask_shuffle_pd(W, U, A, B, M) \
6656 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6657 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6658 (__v4df)(__m256d)(W)))
6660 #define _mm256_maskz_shuffle_pd(U, A, B, M) \
6661 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6662 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6663 (__v4df)_mm256_setzero_pd()))
6665 #define _mm_mask_shuffle_ps(W, U, A, B, M) \
6666 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6667 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6668 (__v4sf)(__m128)(W)))
6670 #define _mm_maskz_shuffle_ps(U, A, B, M) \
6671 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6672 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6673 (__v4sf)_mm_setzero_ps()))
6675 #define _mm256_mask_shuffle_ps(W, U, A, B, M) \
6676 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6677 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6678 (__v8sf)(__m256)(W)))
6680 #define _mm256_maskz_shuffle_ps(U, A, B, M) \
6681 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6682 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6683 (__v8sf)_mm256_setzero_ps()))
6685 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6686 _mm_rsqrt14_pd (__m128d __A
)
6688 return (__m128d
) __builtin_ia32_rsqrt14pd128_mask ((__v2df
) __A
,
6694 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6695 _mm_mask_rsqrt14_pd (__m128d __W
, __mmask8 __U
, __m128d __A
)
6697 return (__m128d
) __builtin_ia32_rsqrt14pd128_mask ((__v2df
) __A
,
6702 static __inline__ __m128d __DEFAULT_FN_ATTRS128
6703 _mm_maskz_rsqrt14_pd (__mmask8 __U
, __m128d __A
)
6705 return (__m128d
) __builtin_ia32_rsqrt14pd128_mask ((__v2df
) __A
,
6711 static __inline__ __m256d __DEFAULT_FN_ATTRS256
6712 _mm256_rsqrt14_pd (__m256d __A
)
6714 return (__m256d
) __builtin_ia32_rsqrt14pd256_mask ((__v4df
) __A
,
6716 _mm256_setzero_pd (),
6720 static __inline__ __m256d __DEFAULT_FN_ATTRS256
6721 _mm256_mask_rsqrt14_pd (__m256d __W
, __mmask8 __U
, __m256d __A
)
6723 return (__m256d
) __builtin_ia32_rsqrt14pd256_mask ((__v4df
) __A
,
6728 static __inline__ __m256d __DEFAULT_FN_ATTRS256
6729 _mm256_maskz_rsqrt14_pd (__mmask8 __U
, __m256d __A
)
6731 return (__m256d
) __builtin_ia32_rsqrt14pd256_mask ((__v4df
) __A
,
6733 _mm256_setzero_pd (),
6737 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6738 _mm_rsqrt14_ps (__m128 __A
)
6740 return (__m128
) __builtin_ia32_rsqrt14ps128_mask ((__v4sf
) __A
,
6746 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6747 _mm_mask_rsqrt14_ps (__m128 __W
, __mmask8 __U
, __m128 __A
)
6749 return (__m128
) __builtin_ia32_rsqrt14ps128_mask ((__v4sf
) __A
,
6754 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6755 _mm_maskz_rsqrt14_ps (__mmask8 __U
, __m128 __A
)
6757 return (__m128
) __builtin_ia32_rsqrt14ps128_mask ((__v4sf
) __A
,
6763 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6764 _mm256_rsqrt14_ps (__m256 __A
)
6766 return (__m256
) __builtin_ia32_rsqrt14ps256_mask ((__v8sf
) __A
,
6768 _mm256_setzero_ps (),
6772 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6773 _mm256_mask_rsqrt14_ps (__m256 __W
, __mmask8 __U
, __m256 __A
)
6775 return (__m256
) __builtin_ia32_rsqrt14ps256_mask ((__v8sf
) __A
,
6780 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6781 _mm256_maskz_rsqrt14_ps (__mmask8 __U
, __m256 __A
)
6783 return (__m256
) __builtin_ia32_rsqrt14ps256_mask ((__v8sf
) __A
,
6785 _mm256_setzero_ps (),
6789 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6790 _mm256_broadcast_f32x4(__m128 __A
)
6792 return (__m256
)__builtin_shufflevector((__v4sf
)__A
, (__v4sf
)__A
,
6793 0, 1, 2, 3, 0, 1, 2, 3);
6796 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6797 _mm256_mask_broadcast_f32x4(__m256 __O
, __mmask8 __M
, __m128 __A
)
6799 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__M
,
6800 (__v8sf
)_mm256_broadcast_f32x4(__A
),
6804 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6805 _mm256_maskz_broadcast_f32x4 (__mmask8 __M
, __m128 __A
)
6807 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__M
,
6808 (__v8sf
)_mm256_broadcast_f32x4(__A
),
6809 (__v8sf
)_mm256_setzero_ps());
6812 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6813 _mm256_broadcast_i32x4(__m128i __A
)
6815 return (__m256i
)__builtin_shufflevector((__v4si
)__A
, (__v4si
)__A
,
6816 0, 1, 2, 3, 0, 1, 2, 3);
6819 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6820 _mm256_mask_broadcast_i32x4(__m256i __O
, __mmask8 __M
, __m128i __A
)
6822 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__M
,
6823 (__v8si
)_mm256_broadcast_i32x4(__A
),
6827 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6828 _mm256_maskz_broadcast_i32x4(__mmask8 __M
, __m128i __A
)
6830 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__M
,
6831 (__v8si
)_mm256_broadcast_i32x4(__A
),
6832 (__v8si
)_mm256_setzero_si256());
6835 static __inline__ __m256d __DEFAULT_FN_ATTRS256
6836 _mm256_mask_broadcastsd_pd (__m256d __O
, __mmask8 __M
, __m128d __A
)
6838 return (__m256d
)__builtin_ia32_selectpd_256(__M
,
6839 (__v4df
) _mm256_broadcastsd_pd(__A
),
6843 static __inline__ __m256d __DEFAULT_FN_ATTRS256
6844 _mm256_maskz_broadcastsd_pd (__mmask8 __M
, __m128d __A
)
6846 return (__m256d
)__builtin_ia32_selectpd_256(__M
,
6847 (__v4df
) _mm256_broadcastsd_pd(__A
),
6848 (__v4df
) _mm256_setzero_pd());
6851 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6852 _mm_mask_broadcastss_ps (__m128 __O
, __mmask8 __M
, __m128 __A
)
6854 return (__m128
)__builtin_ia32_selectps_128(__M
,
6855 (__v4sf
) _mm_broadcastss_ps(__A
),
6859 static __inline__ __m128 __DEFAULT_FN_ATTRS128
6860 _mm_maskz_broadcastss_ps (__mmask8 __M
, __m128 __A
)
6862 return (__m128
)__builtin_ia32_selectps_128(__M
,
6863 (__v4sf
) _mm_broadcastss_ps(__A
),
6864 (__v4sf
) _mm_setzero_ps());
6867 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6868 _mm256_mask_broadcastss_ps (__m256 __O
, __mmask8 __M
, __m128 __A
)
6870 return (__m256
)__builtin_ia32_selectps_256(__M
,
6871 (__v8sf
) _mm256_broadcastss_ps(__A
),
6875 static __inline__ __m256 __DEFAULT_FN_ATTRS256
6876 _mm256_maskz_broadcastss_ps (__mmask8 __M
, __m128 __A
)
6878 return (__m256
)__builtin_ia32_selectps_256(__M
,
6879 (__v8sf
) _mm256_broadcastss_ps(__A
),
6880 (__v8sf
) _mm256_setzero_ps());
6883 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6884 _mm_mask_broadcastd_epi32 (__m128i __O
, __mmask8 __M
, __m128i __A
)
6886 return (__m128i
)__builtin_ia32_selectd_128(__M
,
6887 (__v4si
) _mm_broadcastd_epi32(__A
),
6891 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6892 _mm_maskz_broadcastd_epi32 (__mmask8 __M
, __m128i __A
)
6894 return (__m128i
)__builtin_ia32_selectd_128(__M
,
6895 (__v4si
) _mm_broadcastd_epi32(__A
),
6896 (__v4si
) _mm_setzero_si128());
6899 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6900 _mm256_mask_broadcastd_epi32 (__m256i __O
, __mmask8 __M
, __m128i __A
)
6902 return (__m256i
)__builtin_ia32_selectd_256(__M
,
6903 (__v8si
) _mm256_broadcastd_epi32(__A
),
6907 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6908 _mm256_maskz_broadcastd_epi32 (__mmask8 __M
, __m128i __A
)
6910 return (__m256i
)__builtin_ia32_selectd_256(__M
,
6911 (__v8si
) _mm256_broadcastd_epi32(__A
),
6912 (__v8si
) _mm256_setzero_si256());
6915 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6916 _mm_mask_broadcastq_epi64 (__m128i __O
, __mmask8 __M
, __m128i __A
)
6918 return (__m128i
)__builtin_ia32_selectq_128(__M
,
6919 (__v2di
) _mm_broadcastq_epi64(__A
),
6923 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6924 _mm_maskz_broadcastq_epi64 (__mmask8 __M
, __m128i __A
)
6926 return (__m128i
)__builtin_ia32_selectq_128(__M
,
6927 (__v2di
) _mm_broadcastq_epi64(__A
),
6928 (__v2di
) _mm_setzero_si128());
6931 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6932 _mm256_mask_broadcastq_epi64 (__m256i __O
, __mmask8 __M
, __m128i __A
)
6934 return (__m256i
)__builtin_ia32_selectq_256(__M
,
6935 (__v4di
) _mm256_broadcastq_epi64(__A
),
6939 static __inline__ __m256i __DEFAULT_FN_ATTRS256
6940 _mm256_maskz_broadcastq_epi64 (__mmask8 __M
, __m128i __A
)
6942 return (__m256i
)__builtin_ia32_selectq_256(__M
,
6943 (__v4di
) _mm256_broadcastq_epi64(__A
),
6944 (__v4di
) _mm256_setzero_si256());
6947 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6948 _mm_cvtsepi32_epi8 (__m128i __A
)
6950 return (__m128i
) __builtin_ia32_pmovsdb128_mask ((__v4si
) __A
,
6951 (__v16qi
)_mm_undefined_si128(),
6955 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6956 _mm_mask_cvtsepi32_epi8 (__m128i __O
, __mmask8 __M
, __m128i __A
)
6958 return (__m128i
) __builtin_ia32_pmovsdb128_mask ((__v4si
) __A
,
6959 (__v16qi
) __O
, __M
);
6962 static __inline__ __m128i __DEFAULT_FN_ATTRS128
6963 _mm_maskz_cvtsepi32_epi8 (__mmask8 __M
, __m128i __A
)
6965 return (__m128i
) __builtin_ia32_pmovsdb128_mask ((__v4si
) __A
,
6966 (__v16qi
) _mm_setzero_si128 (),
6970 static __inline__
void __DEFAULT_FN_ATTRS128
6971 _mm_mask_cvtsepi32_storeu_epi8 (void * __P
, __mmask8 __M
, __m128i __A
)
6973 __builtin_ia32_pmovsdb128mem_mask ((__v16qi
*) __P
, (__v4si
) __A
, __M
);
6976 static __inline__ __m128i __DEFAULT_FN_ATTRS256
6977 _mm256_cvtsepi32_epi8 (__m256i __A
)
6979 return (__m128i
) __builtin_ia32_pmovsdb256_mask ((__v8si
) __A
,
6980 (__v16qi
)_mm_undefined_si128(),
6984 static __inline__ __m128i __DEFAULT_FN_ATTRS256
6985 _mm256_mask_cvtsepi32_epi8 (__m128i __O
, __mmask8 __M
, __m256i __A
)
6987 return (__m128i
) __builtin_ia32_pmovsdb256_mask ((__v8si
) __A
,
6988 (__v16qi
) __O
, __M
);
6991 static __inline__ __m128i __DEFAULT_FN_ATTRS256
6992 _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M
, __m256i __A
)
6994 return (__m128i
) __builtin_ia32_pmovsdb256_mask ((__v8si
) __A
,
6995 (__v16qi
) _mm_setzero_si128 (),
6999 static __inline__
void __DEFAULT_FN_ATTRS256
7000 _mm256_mask_cvtsepi32_storeu_epi8 (void * __P
, __mmask8 __M
, __m256i __A
)
7002 __builtin_ia32_pmovsdb256mem_mask ((__v16qi
*) __P
, (__v8si
) __A
, __M
);
7005 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7006 _mm_cvtsepi32_epi16 (__m128i __A
)
7008 return (__m128i
) __builtin_ia32_pmovsdw128_mask ((__v4si
) __A
,
7009 (__v8hi
)_mm_setzero_si128 (),
7013 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7014 _mm_mask_cvtsepi32_epi16 (__m128i __O
, __mmask8 __M
, __m128i __A
)
7016 return (__m128i
) __builtin_ia32_pmovsdw128_mask ((__v4si
) __A
,
7021 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7022 _mm_maskz_cvtsepi32_epi16 (__mmask8 __M
, __m128i __A
)
7024 return (__m128i
) __builtin_ia32_pmovsdw128_mask ((__v4si
) __A
,
7025 (__v8hi
) _mm_setzero_si128 (),
7029 static __inline__
void __DEFAULT_FN_ATTRS128
7030 _mm_mask_cvtsepi32_storeu_epi16 (void * __P
, __mmask8 __M
, __m128i __A
)
7032 __builtin_ia32_pmovsdw128mem_mask ((__v8hi
*) __P
, (__v4si
) __A
, __M
);
7035 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7036 _mm256_cvtsepi32_epi16 (__m256i __A
)
7038 return (__m128i
) __builtin_ia32_pmovsdw256_mask ((__v8si
) __A
,
7039 (__v8hi
)_mm_undefined_si128(),
7043 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7044 _mm256_mask_cvtsepi32_epi16 (__m128i __O
, __mmask8 __M
, __m256i __A
)
7046 return (__m128i
) __builtin_ia32_pmovsdw256_mask ((__v8si
) __A
,
7050 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7051 _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M
, __m256i __A
)
7053 return (__m128i
) __builtin_ia32_pmovsdw256_mask ((__v8si
) __A
,
7054 (__v8hi
) _mm_setzero_si128 (),
7058 static __inline__
void __DEFAULT_FN_ATTRS256
7059 _mm256_mask_cvtsepi32_storeu_epi16 (void * __P
, __mmask8 __M
, __m256i __A
)
7061 __builtin_ia32_pmovsdw256mem_mask ((__v8hi
*) __P
, (__v8si
) __A
, __M
);
7064 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7065 _mm_cvtsepi64_epi8 (__m128i __A
)
7067 return (__m128i
) __builtin_ia32_pmovsqb128_mask ((__v2di
) __A
,
7068 (__v16qi
)_mm_undefined_si128(),
7072 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7073 _mm_mask_cvtsepi64_epi8 (__m128i __O
, __mmask8 __M
, __m128i __A
)
7075 return (__m128i
) __builtin_ia32_pmovsqb128_mask ((__v2di
) __A
,
7076 (__v16qi
) __O
, __M
);
7079 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7080 _mm_maskz_cvtsepi64_epi8 (__mmask8 __M
, __m128i __A
)
7082 return (__m128i
) __builtin_ia32_pmovsqb128_mask ((__v2di
) __A
,
7083 (__v16qi
) _mm_setzero_si128 (),
7087 static __inline__
void __DEFAULT_FN_ATTRS128
7088 _mm_mask_cvtsepi64_storeu_epi8 (void * __P
, __mmask8 __M
, __m128i __A
)
7090 __builtin_ia32_pmovsqb128mem_mask ((__v16qi
*) __P
, (__v2di
) __A
, __M
);
7093 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7094 _mm256_cvtsepi64_epi8 (__m256i __A
)
7096 return (__m128i
) __builtin_ia32_pmovsqb256_mask ((__v4di
) __A
,
7097 (__v16qi
)_mm_undefined_si128(),
7101 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7102 _mm256_mask_cvtsepi64_epi8 (__m128i __O
, __mmask8 __M
, __m256i __A
)
7104 return (__m128i
) __builtin_ia32_pmovsqb256_mask ((__v4di
) __A
,
7105 (__v16qi
) __O
, __M
);
7108 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7109 _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M
, __m256i __A
)
7111 return (__m128i
) __builtin_ia32_pmovsqb256_mask ((__v4di
) __A
,
7112 (__v16qi
) _mm_setzero_si128 (),
7116 static __inline__
void __DEFAULT_FN_ATTRS256
7117 _mm256_mask_cvtsepi64_storeu_epi8 (void * __P
, __mmask8 __M
, __m256i __A
)
7119 __builtin_ia32_pmovsqb256mem_mask ((__v16qi
*) __P
, (__v4di
) __A
, __M
);
7122 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7123 _mm_cvtsepi64_epi32 (__m128i __A
)
7125 return (__m128i
) __builtin_ia32_pmovsqd128_mask ((__v2di
) __A
,
7126 (__v4si
)_mm_undefined_si128(),
7130 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7131 _mm_mask_cvtsepi64_epi32 (__m128i __O
, __mmask8 __M
, __m128i __A
)
7133 return (__m128i
) __builtin_ia32_pmovsqd128_mask ((__v2di
) __A
,
7137 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7138 _mm_maskz_cvtsepi64_epi32 (__mmask8 __M
, __m128i __A
)
7140 return (__m128i
) __builtin_ia32_pmovsqd128_mask ((__v2di
) __A
,
7141 (__v4si
) _mm_setzero_si128 (),
7145 static __inline__
void __DEFAULT_FN_ATTRS128
7146 _mm_mask_cvtsepi64_storeu_epi32 (void * __P
, __mmask8 __M
, __m128i __A
)
7148 __builtin_ia32_pmovsqd128mem_mask ((__v4si
*) __P
, (__v2di
) __A
, __M
);
7151 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7152 _mm256_cvtsepi64_epi32 (__m256i __A
)
7154 return (__m128i
) __builtin_ia32_pmovsqd256_mask ((__v4di
) __A
,
7155 (__v4si
)_mm_undefined_si128(),
7159 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7160 _mm256_mask_cvtsepi64_epi32 (__m128i __O
, __mmask8 __M
, __m256i __A
)
7162 return (__m128i
) __builtin_ia32_pmovsqd256_mask ((__v4di
) __A
,
7167 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7168 _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M
, __m256i __A
)
7170 return (__m128i
) __builtin_ia32_pmovsqd256_mask ((__v4di
) __A
,
7171 (__v4si
) _mm_setzero_si128 (),
7175 static __inline__
void __DEFAULT_FN_ATTRS256
7176 _mm256_mask_cvtsepi64_storeu_epi32 (void * __P
, __mmask8 __M
, __m256i __A
)
7178 __builtin_ia32_pmovsqd256mem_mask ((__v4si
*) __P
, (__v4di
) __A
, __M
);
7181 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7182 _mm_cvtsepi64_epi16 (__m128i __A
)
7184 return (__m128i
) __builtin_ia32_pmovsqw128_mask ((__v2di
) __A
,
7185 (__v8hi
)_mm_undefined_si128(),
7189 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7190 _mm_mask_cvtsepi64_epi16 (__m128i __O
, __mmask8 __M
, __m128i __A
)
7192 return (__m128i
) __builtin_ia32_pmovsqw128_mask ((__v2di
) __A
,
7196 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7197 _mm_maskz_cvtsepi64_epi16 (__mmask8 __M
, __m128i __A
)
7199 return (__m128i
) __builtin_ia32_pmovsqw128_mask ((__v2di
) __A
,
7200 (__v8hi
) _mm_setzero_si128 (),
7204 static __inline__
void __DEFAULT_FN_ATTRS128
7205 _mm_mask_cvtsepi64_storeu_epi16 (void * __P
, __mmask8 __M
, __m128i __A
)
7207 __builtin_ia32_pmovsqw128mem_mask ((__v8hi
*) __P
, (__v2di
) __A
, __M
);
7210 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7211 _mm256_cvtsepi64_epi16 (__m256i __A
)
7213 return (__m128i
) __builtin_ia32_pmovsqw256_mask ((__v4di
) __A
,
7214 (__v8hi
)_mm_undefined_si128(),
7218 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7219 _mm256_mask_cvtsepi64_epi16 (__m128i __O
, __mmask8 __M
, __m256i __A
)
7221 return (__m128i
) __builtin_ia32_pmovsqw256_mask ((__v4di
) __A
,
7225 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7226 _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M
, __m256i __A
)
7228 return (__m128i
) __builtin_ia32_pmovsqw256_mask ((__v4di
) __A
,
7229 (__v8hi
) _mm_setzero_si128 (),
7233 static __inline__
void __DEFAULT_FN_ATTRS256
7234 _mm256_mask_cvtsepi64_storeu_epi16 (void * __P
, __mmask8 __M
, __m256i __A
)
7236 __builtin_ia32_pmovsqw256mem_mask ((__v8hi
*) __P
, (__v4di
) __A
, __M
);
7239 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7240 _mm_cvtusepi32_epi8 (__m128i __A
)
7242 return (__m128i
) __builtin_ia32_pmovusdb128_mask ((__v4si
) __A
,
7243 (__v16qi
)_mm_undefined_si128(),
7247 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7248 _mm_mask_cvtusepi32_epi8 (__m128i __O
, __mmask8 __M
, __m128i __A
)
7250 return (__m128i
) __builtin_ia32_pmovusdb128_mask ((__v4si
) __A
,
7255 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7256 _mm_maskz_cvtusepi32_epi8 (__mmask8 __M
, __m128i __A
)
7258 return (__m128i
) __builtin_ia32_pmovusdb128_mask ((__v4si
) __A
,
7259 (__v16qi
) _mm_setzero_si128 (),
7263 static __inline__
void __DEFAULT_FN_ATTRS128
7264 _mm_mask_cvtusepi32_storeu_epi8 (void * __P
, __mmask8 __M
, __m128i __A
)
7266 __builtin_ia32_pmovusdb128mem_mask ((__v16qi
*) __P
, (__v4si
) __A
, __M
);
7269 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7270 _mm256_cvtusepi32_epi8 (__m256i __A
)
7272 return (__m128i
) __builtin_ia32_pmovusdb256_mask ((__v8si
) __A
,
7273 (__v16qi
)_mm_undefined_si128(),
7277 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7278 _mm256_mask_cvtusepi32_epi8 (__m128i __O
, __mmask8 __M
, __m256i __A
)
7280 return (__m128i
) __builtin_ia32_pmovusdb256_mask ((__v8si
) __A
,
7285 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7286 _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M
, __m256i __A
)
7288 return (__m128i
) __builtin_ia32_pmovusdb256_mask ((__v8si
) __A
,
7289 (__v16qi
) _mm_setzero_si128 (),
7293 static __inline__
void __DEFAULT_FN_ATTRS256
7294 _mm256_mask_cvtusepi32_storeu_epi8 (void * __P
, __mmask8 __M
, __m256i __A
)
7296 __builtin_ia32_pmovusdb256mem_mask ((__v16qi
*) __P
, (__v8si
) __A
, __M
);
7299 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7300 _mm_cvtusepi32_epi16 (__m128i __A
)
7302 return (__m128i
) __builtin_ia32_pmovusdw128_mask ((__v4si
) __A
,
7303 (__v8hi
)_mm_undefined_si128(),
7307 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7308 _mm_mask_cvtusepi32_epi16 (__m128i __O
, __mmask8 __M
, __m128i __A
)
7310 return (__m128i
) __builtin_ia32_pmovusdw128_mask ((__v4si
) __A
,
7314 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7315 _mm_maskz_cvtusepi32_epi16 (__mmask8 __M
, __m128i __A
)
7317 return (__m128i
) __builtin_ia32_pmovusdw128_mask ((__v4si
) __A
,
7318 (__v8hi
) _mm_setzero_si128 (),
7322 static __inline__
void __DEFAULT_FN_ATTRS128
7323 _mm_mask_cvtusepi32_storeu_epi16 (void * __P
, __mmask8 __M
, __m128i __A
)
7325 __builtin_ia32_pmovusdw128mem_mask ((__v8hi
*) __P
, (__v4si
) __A
, __M
);
7328 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7329 _mm256_cvtusepi32_epi16 (__m256i __A
)
7331 return (__m128i
) __builtin_ia32_pmovusdw256_mask ((__v8si
) __A
,
7332 (__v8hi
) _mm_undefined_si128(),
7336 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7337 _mm256_mask_cvtusepi32_epi16 (__m128i __O
, __mmask8 __M
, __m256i __A
)
7339 return (__m128i
) __builtin_ia32_pmovusdw256_mask ((__v8si
) __A
,
7343 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7344 _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M
, __m256i __A
)
7346 return (__m128i
) __builtin_ia32_pmovusdw256_mask ((__v8si
) __A
,
7347 (__v8hi
) _mm_setzero_si128 (),
7351 static __inline__
void __DEFAULT_FN_ATTRS256
7352 _mm256_mask_cvtusepi32_storeu_epi16 (void * __P
, __mmask8 __M
, __m256i __A
)
7354 __builtin_ia32_pmovusdw256mem_mask ((__v8hi
*) __P
, (__v8si
) __A
, __M
);
7357 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7358 _mm_cvtusepi64_epi8 (__m128i __A
)
7360 return (__m128i
) __builtin_ia32_pmovusqb128_mask ((__v2di
) __A
,
7361 (__v16qi
)_mm_undefined_si128(),
7365 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7366 _mm_mask_cvtusepi64_epi8 (__m128i __O
, __mmask8 __M
, __m128i __A
)
7368 return (__m128i
) __builtin_ia32_pmovusqb128_mask ((__v2di
) __A
,
7373 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7374 _mm_maskz_cvtusepi64_epi8 (__mmask8 __M
, __m128i __A
)
7376 return (__m128i
) __builtin_ia32_pmovusqb128_mask ((__v2di
) __A
,
7377 (__v16qi
) _mm_setzero_si128 (),
7381 static __inline__
void __DEFAULT_FN_ATTRS128
7382 _mm_mask_cvtusepi64_storeu_epi8 (void * __P
, __mmask8 __M
, __m128i __A
)
7384 __builtin_ia32_pmovusqb128mem_mask ((__v16qi
*) __P
, (__v2di
) __A
, __M
);
7387 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7388 _mm256_cvtusepi64_epi8 (__m256i __A
)
7390 return (__m128i
) __builtin_ia32_pmovusqb256_mask ((__v4di
) __A
,
7391 (__v16qi
)_mm_undefined_si128(),
7395 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7396 _mm256_mask_cvtusepi64_epi8 (__m128i __O
, __mmask8 __M
, __m256i __A
)
7398 return (__m128i
) __builtin_ia32_pmovusqb256_mask ((__v4di
) __A
,
7403 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7404 _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M
, __m256i __A
)
7406 return (__m128i
) __builtin_ia32_pmovusqb256_mask ((__v4di
) __A
,
7407 (__v16qi
) _mm_setzero_si128 (),
7411 static __inline__
void __DEFAULT_FN_ATTRS256
7412 _mm256_mask_cvtusepi64_storeu_epi8 (void * __P
, __mmask8 __M
, __m256i __A
)
7414 __builtin_ia32_pmovusqb256mem_mask ((__v16qi
*) __P
, (__v4di
) __A
, __M
);
7417 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7418 _mm_cvtusepi64_epi32 (__m128i __A
)
7420 return (__m128i
) __builtin_ia32_pmovusqd128_mask ((__v2di
) __A
,
7421 (__v4si
)_mm_undefined_si128(),
7425 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7426 _mm_mask_cvtusepi64_epi32 (__m128i __O
, __mmask8 __M
, __m128i __A
)
7428 return (__m128i
) __builtin_ia32_pmovusqd128_mask ((__v2di
) __A
,
7432 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7433 _mm_maskz_cvtusepi64_epi32 (__mmask8 __M
, __m128i __A
)
7435 return (__m128i
) __builtin_ia32_pmovusqd128_mask ((__v2di
) __A
,
7436 (__v4si
) _mm_setzero_si128 (),
7440 static __inline__
void __DEFAULT_FN_ATTRS128
7441 _mm_mask_cvtusepi64_storeu_epi32 (void * __P
, __mmask8 __M
, __m128i __A
)
7443 __builtin_ia32_pmovusqd128mem_mask ((__v4si
*) __P
, (__v2di
) __A
, __M
);
7446 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7447 _mm256_cvtusepi64_epi32 (__m256i __A
)
7449 return (__m128i
) __builtin_ia32_pmovusqd256_mask ((__v4di
) __A
,
7450 (__v4si
)_mm_undefined_si128(),
7454 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7455 _mm256_mask_cvtusepi64_epi32 (__m128i __O
, __mmask8 __M
, __m256i __A
)
7457 return (__m128i
) __builtin_ia32_pmovusqd256_mask ((__v4di
) __A
,
7461 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7462 _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M
, __m256i __A
)
7464 return (__m128i
) __builtin_ia32_pmovusqd256_mask ((__v4di
) __A
,
7465 (__v4si
) _mm_setzero_si128 (),
7469 static __inline__
void __DEFAULT_FN_ATTRS256
7470 _mm256_mask_cvtusepi64_storeu_epi32 (void * __P
, __mmask8 __M
, __m256i __A
)
7472 __builtin_ia32_pmovusqd256mem_mask ((__v4si
*) __P
, (__v4di
) __A
, __M
);
7475 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7476 _mm_cvtusepi64_epi16 (__m128i __A
)
7478 return (__m128i
) __builtin_ia32_pmovusqw128_mask ((__v2di
) __A
,
7479 (__v8hi
)_mm_undefined_si128(),
7483 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7484 _mm_mask_cvtusepi64_epi16 (__m128i __O
, __mmask8 __M
, __m128i __A
)
7486 return (__m128i
) __builtin_ia32_pmovusqw128_mask ((__v2di
) __A
,
7490 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7491 _mm_maskz_cvtusepi64_epi16 (__mmask8 __M
, __m128i __A
)
7493 return (__m128i
) __builtin_ia32_pmovusqw128_mask ((__v2di
) __A
,
7494 (__v8hi
) _mm_setzero_si128 (),
7498 static __inline__
void __DEFAULT_FN_ATTRS128
7499 _mm_mask_cvtusepi64_storeu_epi16 (void * __P
, __mmask8 __M
, __m128i __A
)
7501 __builtin_ia32_pmovusqw128mem_mask ((__v8hi
*) __P
, (__v2di
) __A
, __M
);
7504 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7505 _mm256_cvtusepi64_epi16 (__m256i __A
)
7507 return (__m128i
) __builtin_ia32_pmovusqw256_mask ((__v4di
) __A
,
7508 (__v8hi
)_mm_undefined_si128(),
7512 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7513 _mm256_mask_cvtusepi64_epi16 (__m128i __O
, __mmask8 __M
, __m256i __A
)
7515 return (__m128i
) __builtin_ia32_pmovusqw256_mask ((__v4di
) __A
,
7519 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7520 _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M
, __m256i __A
)
7522 return (__m128i
) __builtin_ia32_pmovusqw256_mask ((__v4di
) __A
,
7523 (__v8hi
) _mm_setzero_si128 (),
7527 static __inline__
void __DEFAULT_FN_ATTRS256
7528 _mm256_mask_cvtusepi64_storeu_epi16 (void * __P
, __mmask8 __M
, __m256i __A
)
7530 __builtin_ia32_pmovusqw256mem_mask ((__v8hi
*) __P
, (__v4di
) __A
, __M
);
7533 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7534 _mm_cvtepi32_epi8 (__m128i __A
)
7536 return (__m128i
)__builtin_shufflevector(
7537 __builtin_convertvector((__v4si
)__A
, __v4qi
), (__v4qi
){0, 0, 0, 0}, 0, 1,
7538 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
7541 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7542 _mm_mask_cvtepi32_epi8 (__m128i __O
, __mmask8 __M
, __m128i __A
)
7544 return (__m128i
) __builtin_ia32_pmovdb128_mask ((__v4si
) __A
,
7545 (__v16qi
) __O
, __M
);
7548 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7549 _mm_maskz_cvtepi32_epi8 (__mmask8 __M
, __m128i __A
)
7551 return (__m128i
) __builtin_ia32_pmovdb128_mask ((__v4si
) __A
,
7553 _mm_setzero_si128 (),
7557 static __inline__
void __DEFAULT_FN_ATTRS128
7558 _mm_mask_cvtepi32_storeu_epi8 (void * __P
, __mmask8 __M
, __m128i __A
)
7560 __builtin_ia32_pmovdb128mem_mask ((__v16qi
*) __P
, (__v4si
) __A
, __M
);
7563 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7564 _mm256_cvtepi32_epi8 (__m256i __A
)
7566 return (__m128i
)__builtin_shufflevector(
7567 __builtin_convertvector((__v8si
)__A
, __v8qi
),
7568 (__v8qi
){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
7572 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7573 _mm256_mask_cvtepi32_epi8 (__m128i __O
, __mmask8 __M
, __m256i __A
)
7575 return (__m128i
) __builtin_ia32_pmovdb256_mask ((__v8si
) __A
,
7576 (__v16qi
) __O
, __M
);
7579 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7580 _mm256_maskz_cvtepi32_epi8 (__mmask8 __M
, __m256i __A
)
7582 return (__m128i
) __builtin_ia32_pmovdb256_mask ((__v8si
) __A
,
7583 (__v16qi
) _mm_setzero_si128 (),
7587 static __inline__
void __DEFAULT_FN_ATTRS256
7588 _mm256_mask_cvtepi32_storeu_epi8 (void * __P
, __mmask8 __M
, __m256i __A
)
7590 __builtin_ia32_pmovdb256mem_mask ((__v16qi
*) __P
, (__v8si
) __A
, __M
);
7593 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7594 _mm_cvtepi32_epi16 (__m128i __A
)
7596 return (__m128i
)__builtin_shufflevector(
7597 __builtin_convertvector((__v4si
)__A
, __v4hi
), (__v4hi
){0, 0, 0, 0}, 0, 1,
7601 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7602 _mm_mask_cvtepi32_epi16 (__m128i __O
, __mmask8 __M
, __m128i __A
)
7604 return (__m128i
) __builtin_ia32_pmovdw128_mask ((__v4si
) __A
,
7608 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7609 _mm_maskz_cvtepi32_epi16 (__mmask8 __M
, __m128i __A
)
7611 return (__m128i
) __builtin_ia32_pmovdw128_mask ((__v4si
) __A
,
7612 (__v8hi
) _mm_setzero_si128 (),
7616 static __inline__
void __DEFAULT_FN_ATTRS128
7617 _mm_mask_cvtepi32_storeu_epi16 (void * __P
, __mmask8 __M
, __m128i __A
)
7619 __builtin_ia32_pmovdw128mem_mask ((__v8hi
*) __P
, (__v4si
) __A
, __M
);
7622 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7623 _mm256_cvtepi32_epi16 (__m256i __A
)
7625 return (__m128i
)__builtin_convertvector((__v8si
)__A
, __v8hi
);
7628 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7629 _mm256_mask_cvtepi32_epi16 (__m128i __O
, __mmask8 __M
, __m256i __A
)
7631 return (__m128i
) __builtin_ia32_pmovdw256_mask ((__v8si
) __A
,
7635 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7636 _mm256_maskz_cvtepi32_epi16 (__mmask8 __M
, __m256i __A
)
7638 return (__m128i
) __builtin_ia32_pmovdw256_mask ((__v8si
) __A
,
7639 (__v8hi
) _mm_setzero_si128 (),
7643 static __inline__
void __DEFAULT_FN_ATTRS256
7644 _mm256_mask_cvtepi32_storeu_epi16 (void * __P
, __mmask8 __M
, __m256i __A
)
7646 __builtin_ia32_pmovdw256mem_mask ((__v8hi
*) __P
, (__v8si
) __A
, __M
);
7649 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7650 _mm_cvtepi64_epi8 (__m128i __A
)
7652 return (__m128i
)__builtin_shufflevector(
7653 __builtin_convertvector((__v2di
)__A
, __v2qi
), (__v2qi
){0, 0}, 0, 1, 2, 3,
7654 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
7657 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7658 _mm_mask_cvtepi64_epi8 (__m128i __O
, __mmask8 __M
, __m128i __A
)
7660 return (__m128i
) __builtin_ia32_pmovqb128_mask ((__v2di
) __A
,
7661 (__v16qi
) __O
, __M
);
7664 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7665 _mm_maskz_cvtepi64_epi8 (__mmask8 __M
, __m128i __A
)
7667 return (__m128i
) __builtin_ia32_pmovqb128_mask ((__v2di
) __A
,
7668 (__v16qi
) _mm_setzero_si128 (),
7672 static __inline__
void __DEFAULT_FN_ATTRS128
7673 _mm_mask_cvtepi64_storeu_epi8 (void * __P
, __mmask8 __M
, __m128i __A
)
7675 __builtin_ia32_pmovqb128mem_mask ((__v16qi
*) __P
, (__v2di
) __A
, __M
);
7678 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7679 _mm256_cvtepi64_epi8 (__m256i __A
)
7681 return (__m128i
)__builtin_shufflevector(
7682 __builtin_convertvector((__v4di
)__A
, __v4qi
), (__v4qi
){0, 0, 0, 0}, 0, 1,
7683 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
7686 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7687 _mm256_mask_cvtepi64_epi8 (__m128i __O
, __mmask8 __M
, __m256i __A
)
7689 return (__m128i
) __builtin_ia32_pmovqb256_mask ((__v4di
) __A
,
7690 (__v16qi
) __O
, __M
);
7693 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7694 _mm256_maskz_cvtepi64_epi8 (__mmask8 __M
, __m256i __A
)
7696 return (__m128i
) __builtin_ia32_pmovqb256_mask ((__v4di
) __A
,
7697 (__v16qi
) _mm_setzero_si128 (),
7701 static __inline__
void __DEFAULT_FN_ATTRS256
7702 _mm256_mask_cvtepi64_storeu_epi8 (void * __P
, __mmask8 __M
, __m256i __A
)
7704 __builtin_ia32_pmovqb256mem_mask ((__v16qi
*) __P
, (__v4di
) __A
, __M
);
7707 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7708 _mm_cvtepi64_epi32 (__m128i __A
)
7710 return (__m128i
)__builtin_shufflevector(
7711 __builtin_convertvector((__v2di
)__A
, __v2si
), (__v2si
){0, 0}, 0, 1, 2, 3);
7714 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7715 _mm_mask_cvtepi64_epi32 (__m128i __O
, __mmask8 __M
, __m128i __A
)
7717 return (__m128i
) __builtin_ia32_pmovqd128_mask ((__v2di
) __A
,
7721 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7722 _mm_maskz_cvtepi64_epi32 (__mmask8 __M
, __m128i __A
)
7724 return (__m128i
) __builtin_ia32_pmovqd128_mask ((__v2di
) __A
,
7725 (__v4si
) _mm_setzero_si128 (),
7729 static __inline__
void __DEFAULT_FN_ATTRS128
7730 _mm_mask_cvtepi64_storeu_epi32 (void * __P
, __mmask8 __M
, __m128i __A
)
7732 __builtin_ia32_pmovqd128mem_mask ((__v4si
*) __P
, (__v2di
) __A
, __M
);
7735 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7736 _mm256_cvtepi64_epi32 (__m256i __A
)
7738 return (__m128i
)__builtin_convertvector((__v4di
)__A
, __v4si
);
7741 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7742 _mm256_mask_cvtepi64_epi32 (__m128i __O
, __mmask8 __M
, __m256i __A
)
7744 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__M
,
7745 (__v4si
)_mm256_cvtepi64_epi32(__A
),
7749 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7750 _mm256_maskz_cvtepi64_epi32 (__mmask8 __M
, __m256i __A
)
7752 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__M
,
7753 (__v4si
)_mm256_cvtepi64_epi32(__A
),
7754 (__v4si
)_mm_setzero_si128());
7757 static __inline__
void __DEFAULT_FN_ATTRS256
7758 _mm256_mask_cvtepi64_storeu_epi32 (void * __P
, __mmask8 __M
, __m256i __A
)
7760 __builtin_ia32_pmovqd256mem_mask ((__v4si
*) __P
, (__v4di
) __A
, __M
);
7763 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7764 _mm_cvtepi64_epi16 (__m128i __A
)
7766 return (__m128i
)__builtin_shufflevector(
7767 __builtin_convertvector((__v2di
)__A
, __v2hi
), (__v2hi
){0, 0}, 0, 1, 2, 3,
7771 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7772 _mm_mask_cvtepi64_epi16 (__m128i __O
, __mmask8 __M
, __m128i __A
)
7774 return (__m128i
) __builtin_ia32_pmovqw128_mask ((__v2di
) __A
,
7779 static __inline__ __m128i __DEFAULT_FN_ATTRS128
7780 _mm_maskz_cvtepi64_epi16 (__mmask8 __M
, __m128i __A
)
7782 return (__m128i
) __builtin_ia32_pmovqw128_mask ((__v2di
) __A
,
7783 (__v8hi
) _mm_setzero_si128 (),
7787 static __inline__
void __DEFAULT_FN_ATTRS128
7788 _mm_mask_cvtepi64_storeu_epi16 (void * __P
, __mmask8 __M
, __m128i __A
)
7790 __builtin_ia32_pmovqw128mem_mask ((__v8hi
*) __P
, (__v2di
) __A
, __M
);
7793 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7794 _mm256_cvtepi64_epi16 (__m256i __A
)
7796 return (__m128i
)__builtin_shufflevector(
7797 __builtin_convertvector((__v4di
)__A
, __v4hi
), (__v4hi
){0, 0, 0, 0}, 0, 1,
7801 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7802 _mm256_mask_cvtepi64_epi16 (__m128i __O
, __mmask8 __M
, __m256i __A
)
7804 return (__m128i
) __builtin_ia32_pmovqw256_mask ((__v4di
) __A
,
7808 static __inline__ __m128i __DEFAULT_FN_ATTRS256
7809 _mm256_maskz_cvtepi64_epi16 (__mmask8 __M
, __m256i __A
)
7811 return (__m128i
) __builtin_ia32_pmovqw256_mask ((__v4di
) __A
,
7812 (__v8hi
) _mm_setzero_si128 (),
7816 static __inline__
void __DEFAULT_FN_ATTRS256
7817 _mm256_mask_cvtepi64_storeu_epi16 (void * __P
, __mmask8 __M
, __m256i __A
)
7819 __builtin_ia32_pmovqw256mem_mask ((__v8hi
*) __P
, (__v4di
) __A
, __M
);
7822 #define _mm256_extractf32x4_ps(A, imm) \
7823 ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7825 (__v4sf)_mm_undefined_ps(), \
7828 #define _mm256_mask_extractf32x4_ps(W, U, A, imm) \
7829 ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7831 (__v4sf)(__m128)(W), \
7834 #define _mm256_maskz_extractf32x4_ps(U, A, imm) \
7835 ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7837 (__v4sf)_mm_setzero_ps(), \
7840 #define _mm256_extracti32x4_epi32(A, imm) \
7841 ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7843 (__v4si)_mm_undefined_si128(), \
7846 #define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \
7847 ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7849 (__v4si)(__m128i)(W), \
7852 #define _mm256_maskz_extracti32x4_epi32(U, A, imm) \
7853 ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7855 (__v4si)_mm_setzero_si128(), \
7858 #define _mm256_insertf32x4(A, B, imm) \
7859 ((__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \
7860 (__v4sf)(__m128)(B), (int)(imm)))
7862 #define _mm256_mask_insertf32x4(W, U, A, B, imm) \
7863 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7864 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
7865 (__v8sf)(__m256)(W)))
7867 #define _mm256_maskz_insertf32x4(U, A, B, imm) \
7868 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7869 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
7870 (__v8sf)_mm256_setzero_ps()))
7872 #define _mm256_inserti32x4(A, B, imm) \
7873 ((__m256i)__builtin_ia32_inserti32x4_256((__v8si)(__m256i)(A), \
7874 (__v4si)(__m128i)(B), (int)(imm)))
7876 #define _mm256_mask_inserti32x4(W, U, A, B, imm) \
7877 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7878 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
7879 (__v8si)(__m256i)(W)))
7881 #define _mm256_maskz_inserti32x4(U, A, B, imm) \
7882 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7883 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
7884 (__v8si)_mm256_setzero_si256()))
7886 #define _mm_getmant_pd(A, B, C) \
7887 ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7888 (int)(((C)<<2) | (B)), \
7889 (__v2df)_mm_setzero_pd(), \
7892 #define _mm_mask_getmant_pd(W, U, A, B, C) \
7893 ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7894 (int)(((C)<<2) | (B)), \
7895 (__v2df)(__m128d)(W), \
7898 #define _mm_maskz_getmant_pd(U, A, B, C) \
7899 ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7900 (int)(((C)<<2) | (B)), \
7901 (__v2df)_mm_setzero_pd(), \
7904 #define _mm256_getmant_pd(A, B, C) \
7905 ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7906 (int)(((C)<<2) | (B)), \
7907 (__v4df)_mm256_setzero_pd(), \
7910 #define _mm256_mask_getmant_pd(W, U, A, B, C) \
7911 ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7912 (int)(((C)<<2) | (B)), \
7913 (__v4df)(__m256d)(W), \
7916 #define _mm256_maskz_getmant_pd(U, A, B, C) \
7917 ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7918 (int)(((C)<<2) | (B)), \
7919 (__v4df)_mm256_setzero_pd(), \
7922 #define _mm_getmant_ps(A, B, C) \
7923 ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7924 (int)(((C)<<2) | (B)), \
7925 (__v4sf)_mm_setzero_ps(), \
7928 #define _mm_mask_getmant_ps(W, U, A, B, C) \
7929 ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7930 (int)(((C)<<2) | (B)), \
7931 (__v4sf)(__m128)(W), \
7934 #define _mm_maskz_getmant_ps(U, A, B, C) \
7935 ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7936 (int)(((C)<<2) | (B)), \
7937 (__v4sf)_mm_setzero_ps(), \
7940 #define _mm256_getmant_ps(A, B, C) \
7941 ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7942 (int)(((C)<<2) | (B)), \
7943 (__v8sf)_mm256_setzero_ps(), \
7946 #define _mm256_mask_getmant_ps(W, U, A, B, C) \
7947 ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7948 (int)(((C)<<2) | (B)), \
7949 (__v8sf)(__m256)(W), \
7952 #define _mm256_maskz_getmant_ps(U, A, B, C) \
7953 ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7954 (int)(((C)<<2) | (B)), \
7955 (__v8sf)_mm256_setzero_ps(), \
7958 #define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
7959 ((__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \
7960 (void const *)(addr), \
7961 (__v2di)(__m128i)(index), \
7962 (__mmask8)(mask), (int)(scale)))
7964 #define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7965 ((__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \
7966 (void const *)(addr), \
7967 (__v2di)(__m128i)(index), \
7968 (__mmask8)(mask), (int)(scale)))
7970 #define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
7971 ((__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \
7972 (void const *)(addr), \
7973 (__v4di)(__m256i)(index), \
7974 (__mmask8)(mask), (int)(scale)))
7976 #define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7977 ((__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \
7978 (void const *)(addr), \
7979 (__v4di)(__m256i)(index), \
7980 (__mmask8)(mask), (int)(scale)))
7982 #define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
7983 ((__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \
7984 (void const *)(addr), \
7985 (__v2di)(__m128i)(index), \
7986 (__mmask8)(mask), (int)(scale)))
7988 #define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7989 ((__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \
7990 (void const *)(addr), \
7991 (__v2di)(__m128i)(index), \
7992 (__mmask8)(mask), (int)(scale)))
7994 #define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
7995 ((__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \
7996 (void const *)(addr), \
7997 (__v4di)(__m256i)(index), \
7998 (__mmask8)(mask), (int)(scale)))
8000 #define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
8001 ((__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \
8002 (void const *)(addr), \
8003 (__v4di)(__m256i)(index), \
8004 (__mmask8)(mask), (int)(scale)))
8006 #define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
8007 ((__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \
8008 (void const *)(addr), \
8009 (__v4si)(__m128i)(index), \
8010 (__mmask8)(mask), (int)(scale)))
8012 #define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
8013 ((__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \
8014 (void const *)(addr), \
8015 (__v4si)(__m128i)(index), \
8016 (__mmask8)(mask), (int)(scale)))
8018 #define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
8019 ((__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \
8020 (void const *)(addr), \
8021 (__v4si)(__m128i)(index), \
8022 (__mmask8)(mask), (int)(scale)))
8024 #define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
8025 ((__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \
8026 (void const *)(addr), \
8027 (__v4si)(__m128i)(index), \
8028 (__mmask8)(mask), (int)(scale)))
8030 #define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
8031 ((__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \
8032 (void const *)(addr), \
8033 (__v4si)(__m128i)(index), \
8034 (__mmask8)(mask), (int)(scale)))
8036 #define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
8037 ((__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \
8038 (void const *)(addr), \
8039 (__v4si)(__m128i)(index), \
8040 (__mmask8)(mask), (int)(scale)))
8042 #define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
8043 ((__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \
8044 (void const *)(addr), \
8045 (__v8si)(__m256i)(index), \
8046 (__mmask8)(mask), (int)(scale)))
8048 #define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
8049 ((__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \
8050 (void const *)(addr), \
8051 (__v8si)(__m256i)(index), \
8052 (__mmask8)(mask), (int)(scale)))
8054 #define _mm256_permutex_pd(X, C) \
8055 ((__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(X), (int)(C)))
8057 #define _mm256_mask_permutex_pd(W, U, X, C) \
8058 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8059 (__v4df)_mm256_permutex_pd((X), (C)), \
8060 (__v4df)(__m256d)(W)))
8062 #define _mm256_maskz_permutex_pd(U, X, C) \
8063 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8064 (__v4df)_mm256_permutex_pd((X), (C)), \
8065 (__v4df)_mm256_setzero_pd()))
8067 #define _mm256_permutex_epi64(X, C) \
8068 ((__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(X), (int)(C)))
8070 #define _mm256_mask_permutex_epi64(W, U, X, C) \
8071 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8072 (__v4di)_mm256_permutex_epi64((X), (C)), \
8073 (__v4di)(__m256i)(W)))
8075 #define _mm256_maskz_permutex_epi64(U, X, C) \
8076 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8077 (__v4di)_mm256_permutex_epi64((X), (C)), \
8078 (__v4di)_mm256_setzero_si256()))
8080 static __inline__ __m256d __DEFAULT_FN_ATTRS256
8081 _mm256_permutexvar_pd (__m256i __X
, __m256d __Y
)
8083 return (__m256d
)__builtin_ia32_permvardf256((__v4df
)__Y
, (__v4di
)__X
);
8086 static __inline__ __m256d __DEFAULT_FN_ATTRS256
8087 _mm256_mask_permutexvar_pd (__m256d __W
, __mmask8 __U
, __m256i __X
,
8090 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
8091 (__v4df
)_mm256_permutexvar_pd(__X
, __Y
),
8095 static __inline__ __m256d __DEFAULT_FN_ATTRS256
8096 _mm256_maskz_permutexvar_pd (__mmask8 __U
, __m256i __X
, __m256d __Y
)
8098 return (__m256d
)__builtin_ia32_selectpd_256((__mmask8
)__U
,
8099 (__v4df
)_mm256_permutexvar_pd(__X
, __Y
),
8100 (__v4df
)_mm256_setzero_pd());
8103 static __inline__ __m256i __DEFAULT_FN_ATTRS256
8104 _mm256_permutexvar_epi64 ( __m256i __X
, __m256i __Y
)
8106 return (__m256i
)__builtin_ia32_permvardi256((__v4di
) __Y
, (__v4di
) __X
);
8109 static __inline__ __m256i __DEFAULT_FN_ATTRS256
8110 _mm256_maskz_permutexvar_epi64 (__mmask8 __M
, __m256i __X
, __m256i __Y
)
8112 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__M
,
8113 (__v4di
)_mm256_permutexvar_epi64(__X
, __Y
),
8114 (__v4di
)_mm256_setzero_si256());
8117 static __inline__ __m256i __DEFAULT_FN_ATTRS256
8118 _mm256_mask_permutexvar_epi64 (__m256i __W
, __mmask8 __M
, __m256i __X
,
8121 return (__m256i
)__builtin_ia32_selectq_256((__mmask8
)__M
,
8122 (__v4di
)_mm256_permutexvar_epi64(__X
, __Y
),
8126 #define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A))
8128 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8129 _mm256_mask_permutexvar_ps(__m256 __W
, __mmask8 __U
, __m256i __X
, __m256 __Y
)
8131 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
8132 (__v8sf
)_mm256_permutexvar_ps(__X
, __Y
),
8136 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8137 _mm256_maskz_permutexvar_ps(__mmask8 __U
, __m256i __X
, __m256 __Y
)
8139 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
8140 (__v8sf
)_mm256_permutexvar_ps(__X
, __Y
),
8141 (__v8sf
)_mm256_setzero_ps());
8144 #define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A))
8146 static __inline__ __m256i __DEFAULT_FN_ATTRS256
8147 _mm256_mask_permutexvar_epi32(__m256i __W
, __mmask8 __M
, __m256i __X
,
8150 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__M
,
8151 (__v8si
)_mm256_permutexvar_epi32(__X
, __Y
),
8155 static __inline__ __m256i __DEFAULT_FN_ATTRS256
8156 _mm256_maskz_permutexvar_epi32(__mmask8 __M
, __m256i __X
, __m256i __Y
)
8158 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__M
,
8159 (__v8si
)_mm256_permutexvar_epi32(__X
, __Y
),
8160 (__v8si
)_mm256_setzero_si256());
8163 #define _mm_alignr_epi32(A, B, imm) \
8164 ((__m128i)__builtin_ia32_alignd128((__v4si)(__m128i)(A), \
8165 (__v4si)(__m128i)(B), (int)(imm)))
8167 #define _mm_mask_alignr_epi32(W, U, A, B, imm) \
8168 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8169 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
8170 (__v4si)(__m128i)(W)))
8172 #define _mm_maskz_alignr_epi32(U, A, B, imm) \
8173 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8174 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
8175 (__v4si)_mm_setzero_si128()))
8177 #define _mm256_alignr_epi32(A, B, imm) \
8178 ((__m256i)__builtin_ia32_alignd256((__v8si)(__m256i)(A), \
8179 (__v8si)(__m256i)(B), (int)(imm)))
8181 #define _mm256_mask_alignr_epi32(W, U, A, B, imm) \
8182 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8183 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
8184 (__v8si)(__m256i)(W)))
8186 #define _mm256_maskz_alignr_epi32(U, A, B, imm) \
8187 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8188 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
8189 (__v8si)_mm256_setzero_si256()))
8191 #define _mm_alignr_epi64(A, B, imm) \
8192 ((__m128i)__builtin_ia32_alignq128((__v2di)(__m128i)(A), \
8193 (__v2di)(__m128i)(B), (int)(imm)))
8195 #define _mm_mask_alignr_epi64(W, U, A, B, imm) \
8196 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
8197 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
8198 (__v2di)(__m128i)(W)))
8200 #define _mm_maskz_alignr_epi64(U, A, B, imm) \
8201 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
8202 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
8203 (__v2di)_mm_setzero_si128()))
8205 #define _mm256_alignr_epi64(A, B, imm) \
8206 ((__m256i)__builtin_ia32_alignq256((__v4di)(__m256i)(A), \
8207 (__v4di)(__m256i)(B), (int)(imm)))
8209 #define _mm256_mask_alignr_epi64(W, U, A, B, imm) \
8210 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8211 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
8212 (__v4di)(__m256i)(W)))
8214 #define _mm256_maskz_alignr_epi64(U, A, B, imm) \
8215 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8216 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
8217 (__v4di)_mm256_setzero_si256()))
8219 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8220 _mm_mask_movehdup_ps (__m128 __W
, __mmask8 __U
, __m128 __A
)
8222 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
8223 (__v4sf
)_mm_movehdup_ps(__A
),
8227 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8228 _mm_maskz_movehdup_ps (__mmask8 __U
, __m128 __A
)
8230 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
8231 (__v4sf
)_mm_movehdup_ps(__A
),
8232 (__v4sf
)_mm_setzero_ps());
8235 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8236 _mm256_mask_movehdup_ps (__m256 __W
, __mmask8 __U
, __m256 __A
)
8238 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
8239 (__v8sf
)_mm256_movehdup_ps(__A
),
8243 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8244 _mm256_maskz_movehdup_ps (__mmask8 __U
, __m256 __A
)
8246 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
8247 (__v8sf
)_mm256_movehdup_ps(__A
),
8248 (__v8sf
)_mm256_setzero_ps());
8251 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8252 _mm_mask_moveldup_ps (__m128 __W
, __mmask8 __U
, __m128 __A
)
8254 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
8255 (__v4sf
)_mm_moveldup_ps(__A
),
8259 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8260 _mm_maskz_moveldup_ps (__mmask8 __U
, __m128 __A
)
8262 return (__m128
)__builtin_ia32_selectps_128((__mmask8
)__U
,
8263 (__v4sf
)_mm_moveldup_ps(__A
),
8264 (__v4sf
)_mm_setzero_ps());
8267 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8268 _mm256_mask_moveldup_ps (__m256 __W
, __mmask8 __U
, __m256 __A
)
8270 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
8271 (__v8sf
)_mm256_moveldup_ps(__A
),
8275 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8276 _mm256_maskz_moveldup_ps (__mmask8 __U
, __m256 __A
)
8278 return (__m256
)__builtin_ia32_selectps_256((__mmask8
)__U
,
8279 (__v8sf
)_mm256_moveldup_ps(__A
),
8280 (__v8sf
)_mm256_setzero_ps());
8283 #define _mm256_mask_shuffle_epi32(W, U, A, I) \
8284 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8285 (__v8si)_mm256_shuffle_epi32((A), (I)), \
8286 (__v8si)(__m256i)(W)))
8288 #define _mm256_maskz_shuffle_epi32(U, A, I) \
8289 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8290 (__v8si)_mm256_shuffle_epi32((A), (I)), \
8291 (__v8si)_mm256_setzero_si256()))
8293 #define _mm_mask_shuffle_epi32(W, U, A, I) \
8294 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8295 (__v4si)_mm_shuffle_epi32((A), (I)), \
8296 (__v4si)(__m128i)(W)))
8298 #define _mm_maskz_shuffle_epi32(U, A, I) \
8299 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8300 (__v4si)_mm_shuffle_epi32((A), (I)), \
8301 (__v4si)_mm_setzero_si128()))
8303 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8304 _mm_mask_mov_pd (__m128d __W
, __mmask8 __U
, __m128d __A
)
8306 return (__m128d
) __builtin_ia32_selectpd_128 ((__mmask8
) __U
,
8311 static __inline__ __m128d __DEFAULT_FN_ATTRS128
8312 _mm_maskz_mov_pd (__mmask8 __U
, __m128d __A
)
8314 return (__m128d
) __builtin_ia32_selectpd_128 ((__mmask8
) __U
,
8316 (__v2df
) _mm_setzero_pd ());
8319 static __inline__ __m256d __DEFAULT_FN_ATTRS256
8320 _mm256_mask_mov_pd (__m256d __W
, __mmask8 __U
, __m256d __A
)
8322 return (__m256d
) __builtin_ia32_selectpd_256 ((__mmask8
) __U
,
8327 static __inline__ __m256d __DEFAULT_FN_ATTRS256
8328 _mm256_maskz_mov_pd (__mmask8 __U
, __m256d __A
)
8330 return (__m256d
) __builtin_ia32_selectpd_256 ((__mmask8
) __U
,
8332 (__v4df
) _mm256_setzero_pd ());
8335 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8336 _mm_mask_mov_ps (__m128 __W
, __mmask8 __U
, __m128 __A
)
8338 return (__m128
) __builtin_ia32_selectps_128 ((__mmask8
) __U
,
8343 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8344 _mm_maskz_mov_ps (__mmask8 __U
, __m128 __A
)
8346 return (__m128
) __builtin_ia32_selectps_128 ((__mmask8
) __U
,
8348 (__v4sf
) _mm_setzero_ps ());
8351 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8352 _mm256_mask_mov_ps (__m256 __W
, __mmask8 __U
, __m256 __A
)
8354 return (__m256
) __builtin_ia32_selectps_256 ((__mmask8
) __U
,
8359 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8360 _mm256_maskz_mov_ps (__mmask8 __U
, __m256 __A
)
8362 return (__m256
) __builtin_ia32_selectps_256 ((__mmask8
) __U
,
8364 (__v8sf
) _mm256_setzero_ps ());
8367 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8368 _mm_mask_cvtph_ps (__m128 __W
, __mmask8 __U
, __m128i __A
)
8370 return (__m128
) __builtin_ia32_vcvtph2ps_mask ((__v8hi
) __A
,
8375 static __inline__ __m128 __DEFAULT_FN_ATTRS128
8376 _mm_maskz_cvtph_ps (__mmask8 __U
, __m128i __A
)
8378 return (__m128
) __builtin_ia32_vcvtph2ps_mask ((__v8hi
) __A
,
8384 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8385 _mm256_mask_cvtph_ps (__m256 __W
, __mmask8 __U
, __m128i __A
)
8387 return (__m256
) __builtin_ia32_vcvtph2ps256_mask ((__v8hi
) __A
,
8392 static __inline__ __m256 __DEFAULT_FN_ATTRS256
8393 _mm256_maskz_cvtph_ps (__mmask8 __U
, __m128i __A
)
8395 return (__m256
) __builtin_ia32_vcvtph2ps256_mask ((__v8hi
) __A
,
8397 _mm256_setzero_ps (),
8401 #define _mm_mask_cvt_roundps_ph(W, U, A, I) \
8402 ((__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8403 (__v8hi)(__m128i)(W), \
8406 #define _mm_maskz_cvt_roundps_ph(U, A, I) \
8407 ((__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8408 (__v8hi)_mm_setzero_si128(), \
8411 #define _mm_mask_cvtps_ph _mm_mask_cvt_roundps_ph
8412 #define _mm_maskz_cvtps_ph _mm_maskz_cvt_roundps_ph
8414 #define _mm256_mask_cvt_roundps_ph(W, U, A, I) \
8415 ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8416 (__v8hi)(__m128i)(W), \
8419 #define _mm256_maskz_cvt_roundps_ph(U, A, I) \
8420 ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8421 (__v8hi)_mm_setzero_si128(), \
8424 #define _mm256_mask_cvtps_ph _mm256_mask_cvt_roundps_ph
8425 #define _mm256_maskz_cvtps_ph _mm256_maskz_cvt_roundps_ph
8428 #undef __DEFAULT_FN_ATTRS128
8429 #undef __DEFAULT_FN_ATTRS256
8431 #endif /* __AVX512VLINTRIN_H */