1 /*===---- avx512vlbwintrin.h - AVX512VL and AVX512BW intrinsics ------------===
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *===-----------------------------------------------------------------------===
11 #error "Never use <avx512vlbwintrin.h> directly; include <immintrin.h> instead."
14 #ifndef __AVX512VLBWINTRIN_H
15 #define __AVX512VLBWINTRIN_H
17 /* Define the default attributes for the functions in this file. */
18 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"), __min_vector_width__(128)))
19 #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"), __min_vector_width__(256)))
23 #define _mm_cmp_epi8_mask(a, b, p) \
24 ((__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
25 (__v16qi)(__m128i)(b), (int)(p), \
28 #define _mm_mask_cmp_epi8_mask(m, a, b, p) \
29 ((__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
30 (__v16qi)(__m128i)(b), (int)(p), \
33 #define _mm_cmp_epu8_mask(a, b, p) \
34 ((__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \
35 (__v16qi)(__m128i)(b), (int)(p), \
38 #define _mm_mask_cmp_epu8_mask(m, a, b, p) \
39 ((__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \
40 (__v16qi)(__m128i)(b), (int)(p), \
43 #define _mm256_cmp_epi8_mask(a, b, p) \
44 ((__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \
45 (__v32qi)(__m256i)(b), (int)(p), \
48 #define _mm256_mask_cmp_epi8_mask(m, a, b, p) \
49 ((__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \
50 (__v32qi)(__m256i)(b), (int)(p), \
53 #define _mm256_cmp_epu8_mask(a, b, p) \
54 ((__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \
55 (__v32qi)(__m256i)(b), (int)(p), \
58 #define _mm256_mask_cmp_epu8_mask(m, a, b, p) \
59 ((__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \
60 (__v32qi)(__m256i)(b), (int)(p), \
63 #define _mm_cmp_epi16_mask(a, b, p) \
64 ((__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \
65 (__v8hi)(__m128i)(b), (int)(p), \
68 #define _mm_mask_cmp_epi16_mask(m, a, b, p) \
69 ((__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \
70 (__v8hi)(__m128i)(b), (int)(p), \
73 #define _mm_cmp_epu16_mask(a, b, p) \
74 ((__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \
75 (__v8hi)(__m128i)(b), (int)(p), \
78 #define _mm_mask_cmp_epu16_mask(m, a, b, p) \
79 ((__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \
80 (__v8hi)(__m128i)(b), (int)(p), \
83 #define _mm256_cmp_epi16_mask(a, b, p) \
84 ((__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \
85 (__v16hi)(__m256i)(b), (int)(p), \
88 #define _mm256_mask_cmp_epi16_mask(m, a, b, p) \
89 ((__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \
90 (__v16hi)(__m256i)(b), (int)(p), \
93 #define _mm256_cmp_epu16_mask(a, b, p) \
94 ((__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \
95 (__v16hi)(__m256i)(b), (int)(p), \
98 #define _mm256_mask_cmp_epu16_mask(m, a, b, p) \
99 ((__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \
100 (__v16hi)(__m256i)(b), (int)(p), \
103 #define _mm_cmpeq_epi8_mask(A, B) \
104 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
105 #define _mm_mask_cmpeq_epi8_mask(k, A, B) \
106 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ)
107 #define _mm_cmpge_epi8_mask(A, B) \
108 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_GE)
109 #define _mm_mask_cmpge_epi8_mask(k, A, B) \
110 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE)
111 #define _mm_cmpgt_epi8_mask(A, B) \
112 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_GT)
113 #define _mm_mask_cmpgt_epi8_mask(k, A, B) \
114 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT)
115 #define _mm_cmple_epi8_mask(A, B) \
116 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_LE)
117 #define _mm_mask_cmple_epi8_mask(k, A, B) \
118 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE)
119 #define _mm_cmplt_epi8_mask(A, B) \
120 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_LT)
121 #define _mm_mask_cmplt_epi8_mask(k, A, B) \
122 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT)
123 #define _mm_cmpneq_epi8_mask(A, B) \
124 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_NE)
125 #define _mm_mask_cmpneq_epi8_mask(k, A, B) \
126 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE)
128 #define _mm256_cmpeq_epi8_mask(A, B) \
129 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
130 #define _mm256_mask_cmpeq_epi8_mask(k, A, B) \
131 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ)
132 #define _mm256_cmpge_epi8_mask(A, B) \
133 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_GE)
134 #define _mm256_mask_cmpge_epi8_mask(k, A, B) \
135 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE)
136 #define _mm256_cmpgt_epi8_mask(A, B) \
137 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_GT)
138 #define _mm256_mask_cmpgt_epi8_mask(k, A, B) \
139 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT)
140 #define _mm256_cmple_epi8_mask(A, B) \
141 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_LE)
142 #define _mm256_mask_cmple_epi8_mask(k, A, B) \
143 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE)
144 #define _mm256_cmplt_epi8_mask(A, B) \
145 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_LT)
146 #define _mm256_mask_cmplt_epi8_mask(k, A, B) \
147 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT)
148 #define _mm256_cmpneq_epi8_mask(A, B) \
149 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_NE)
150 #define _mm256_mask_cmpneq_epi8_mask(k, A, B) \
151 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE)
153 #define _mm_cmpeq_epu8_mask(A, B) \
154 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ)
155 #define _mm_mask_cmpeq_epu8_mask(k, A, B) \
156 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ)
157 #define _mm_cmpge_epu8_mask(A, B) \
158 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_GE)
159 #define _mm_mask_cmpge_epu8_mask(k, A, B) \
160 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE)
161 #define _mm_cmpgt_epu8_mask(A, B) \
162 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_GT)
163 #define _mm_mask_cmpgt_epu8_mask(k, A, B) \
164 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT)
165 #define _mm_cmple_epu8_mask(A, B) \
166 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_LE)
167 #define _mm_mask_cmple_epu8_mask(k, A, B) \
168 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE)
169 #define _mm_cmplt_epu8_mask(A, B) \
170 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_LT)
171 #define _mm_mask_cmplt_epu8_mask(k, A, B) \
172 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT)
173 #define _mm_cmpneq_epu8_mask(A, B) \
174 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_NE)
175 #define _mm_mask_cmpneq_epu8_mask(k, A, B) \
176 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE)
178 #define _mm256_cmpeq_epu8_mask(A, B) \
179 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ)
180 #define _mm256_mask_cmpeq_epu8_mask(k, A, B) \
181 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ)
182 #define _mm256_cmpge_epu8_mask(A, B) \
183 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_GE)
184 #define _mm256_mask_cmpge_epu8_mask(k, A, B) \
185 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE)
186 #define _mm256_cmpgt_epu8_mask(A, B) \
187 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_GT)
188 #define _mm256_mask_cmpgt_epu8_mask(k, A, B) \
189 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT)
190 #define _mm256_cmple_epu8_mask(A, B) \
191 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_LE)
192 #define _mm256_mask_cmple_epu8_mask(k, A, B) \
193 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE)
194 #define _mm256_cmplt_epu8_mask(A, B) \
195 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_LT)
196 #define _mm256_mask_cmplt_epu8_mask(k, A, B) \
197 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT)
198 #define _mm256_cmpneq_epu8_mask(A, B) \
199 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_NE)
200 #define _mm256_mask_cmpneq_epu8_mask(k, A, B) \
201 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE)
203 #define _mm_cmpeq_epi16_mask(A, B) \
204 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ)
205 #define _mm_mask_cmpeq_epi16_mask(k, A, B) \
206 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ)
207 #define _mm_cmpge_epi16_mask(A, B) \
208 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_GE)
209 #define _mm_mask_cmpge_epi16_mask(k, A, B) \
210 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE)
211 #define _mm_cmpgt_epi16_mask(A, B) \
212 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_GT)
213 #define _mm_mask_cmpgt_epi16_mask(k, A, B) \
214 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT)
215 #define _mm_cmple_epi16_mask(A, B) \
216 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_LE)
217 #define _mm_mask_cmple_epi16_mask(k, A, B) \
218 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE)
219 #define _mm_cmplt_epi16_mask(A, B) \
220 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_LT)
221 #define _mm_mask_cmplt_epi16_mask(k, A, B) \
222 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT)
223 #define _mm_cmpneq_epi16_mask(A, B) \
224 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_NE)
225 #define _mm_mask_cmpneq_epi16_mask(k, A, B) \
226 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE)
228 #define _mm256_cmpeq_epi16_mask(A, B) \
229 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ)
230 #define _mm256_mask_cmpeq_epi16_mask(k, A, B) \
231 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ)
232 #define _mm256_cmpge_epi16_mask(A, B) \
233 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_GE)
234 #define _mm256_mask_cmpge_epi16_mask(k, A, B) \
235 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE)
236 #define _mm256_cmpgt_epi16_mask(A, B) \
237 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_GT)
238 #define _mm256_mask_cmpgt_epi16_mask(k, A, B) \
239 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT)
240 #define _mm256_cmple_epi16_mask(A, B) \
241 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_LE)
242 #define _mm256_mask_cmple_epi16_mask(k, A, B) \
243 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE)
244 #define _mm256_cmplt_epi16_mask(A, B) \
245 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_LT)
246 #define _mm256_mask_cmplt_epi16_mask(k, A, B) \
247 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT)
248 #define _mm256_cmpneq_epi16_mask(A, B) \
249 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_NE)
250 #define _mm256_mask_cmpneq_epi16_mask(k, A, B) \
251 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE)
253 #define _mm_cmpeq_epu16_mask(A, B) \
254 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ)
255 #define _mm_mask_cmpeq_epu16_mask(k, A, B) \
256 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ)
257 #define _mm_cmpge_epu16_mask(A, B) \
258 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_GE)
259 #define _mm_mask_cmpge_epu16_mask(k, A, B) \
260 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE)
261 #define _mm_cmpgt_epu16_mask(A, B) \
262 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_GT)
263 #define _mm_mask_cmpgt_epu16_mask(k, A, B) \
264 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT)
265 #define _mm_cmple_epu16_mask(A, B) \
266 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_LE)
267 #define _mm_mask_cmple_epu16_mask(k, A, B) \
268 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE)
269 #define _mm_cmplt_epu16_mask(A, B) \
270 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
271 #define _mm_mask_cmplt_epu16_mask(k, A, B) \
272 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT)
273 #define _mm_cmpneq_epu16_mask(A, B) \
274 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_NE)
275 #define _mm_mask_cmpneq_epu16_mask(k, A, B) \
276 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
278 #define _mm256_cmpeq_epu16_mask(A, B) \
279 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ)
280 #define _mm256_mask_cmpeq_epu16_mask(k, A, B) \
281 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ)
282 #define _mm256_cmpge_epu16_mask(A, B) \
283 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_GE)
284 #define _mm256_mask_cmpge_epu16_mask(k, A, B) \
285 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE)
286 #define _mm256_cmpgt_epu16_mask(A, B) \
287 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_GT)
288 #define _mm256_mask_cmpgt_epu16_mask(k, A, B) \
289 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT)
290 #define _mm256_cmple_epu16_mask(A, B) \
291 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_LE)
292 #define _mm256_mask_cmple_epu16_mask(k, A, B) \
293 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE)
294 #define _mm256_cmplt_epu16_mask(A, B) \
295 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
296 #define _mm256_mask_cmplt_epu16_mask(k, A, B) \
297 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT)
298 #define _mm256_cmpneq_epu16_mask(A, B) \
299 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_NE)
300 #define _mm256_mask_cmpneq_epu16_mask(k, A, B) \
301 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
303 static __inline__ __m256i __DEFAULT_FN_ATTRS256
304 _mm256_mask_add_epi8(__m256i __W
, __mmask32 __U
, __m256i __A
, __m256i __B
){
305 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
306 (__v32qi
)_mm256_add_epi8(__A
, __B
),
310 static __inline__ __m256i __DEFAULT_FN_ATTRS256
311 _mm256_maskz_add_epi8(__mmask32 __U
, __m256i __A
, __m256i __B
) {
312 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
313 (__v32qi
)_mm256_add_epi8(__A
, __B
),
314 (__v32qi
)_mm256_setzero_si256());
317 static __inline__ __m256i __DEFAULT_FN_ATTRS256
318 _mm256_mask_add_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m256i __B
) {
319 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
320 (__v16hi
)_mm256_add_epi16(__A
, __B
),
324 static __inline__ __m256i __DEFAULT_FN_ATTRS256
325 _mm256_maskz_add_epi16(__mmask16 __U
, __m256i __A
, __m256i __B
) {
326 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
327 (__v16hi
)_mm256_add_epi16(__A
, __B
),
328 (__v16hi
)_mm256_setzero_si256());
331 static __inline__ __m256i __DEFAULT_FN_ATTRS256
332 _mm256_mask_sub_epi8(__m256i __W
, __mmask32 __U
, __m256i __A
, __m256i __B
) {
333 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
334 (__v32qi
)_mm256_sub_epi8(__A
, __B
),
338 static __inline__ __m256i __DEFAULT_FN_ATTRS256
339 _mm256_maskz_sub_epi8(__mmask32 __U
, __m256i __A
, __m256i __B
) {
340 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
341 (__v32qi
)_mm256_sub_epi8(__A
, __B
),
342 (__v32qi
)_mm256_setzero_si256());
345 static __inline__ __m256i __DEFAULT_FN_ATTRS256
346 _mm256_mask_sub_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m256i __B
) {
347 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
348 (__v16hi
)_mm256_sub_epi16(__A
, __B
),
352 static __inline__ __m256i __DEFAULT_FN_ATTRS256
353 _mm256_maskz_sub_epi16(__mmask16 __U
, __m256i __A
, __m256i __B
) {
354 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
355 (__v16hi
)_mm256_sub_epi16(__A
, __B
),
356 (__v16hi
)_mm256_setzero_si256());
359 static __inline__ __m128i __DEFAULT_FN_ATTRS128
360 _mm_mask_add_epi8(__m128i __W
, __mmask16 __U
, __m128i __A
, __m128i __B
) {
361 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
362 (__v16qi
)_mm_add_epi8(__A
, __B
),
366 static __inline__ __m128i __DEFAULT_FN_ATTRS128
367 _mm_maskz_add_epi8(__mmask16 __U
, __m128i __A
, __m128i __B
) {
368 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
369 (__v16qi
)_mm_add_epi8(__A
, __B
),
370 (__v16qi
)_mm_setzero_si128());
373 static __inline__ __m128i __DEFAULT_FN_ATTRS128
374 _mm_mask_add_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
) {
375 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
376 (__v8hi
)_mm_add_epi16(__A
, __B
),
380 static __inline__ __m128i __DEFAULT_FN_ATTRS128
381 _mm_maskz_add_epi16(__mmask8 __U
, __m128i __A
, __m128i __B
) {
382 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
383 (__v8hi
)_mm_add_epi16(__A
, __B
),
384 (__v8hi
)_mm_setzero_si128());
387 static __inline__ __m128i __DEFAULT_FN_ATTRS128
388 _mm_mask_sub_epi8(__m128i __W
, __mmask16 __U
, __m128i __A
, __m128i __B
) {
389 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
390 (__v16qi
)_mm_sub_epi8(__A
, __B
),
394 static __inline__ __m128i __DEFAULT_FN_ATTRS128
395 _mm_maskz_sub_epi8(__mmask16 __U
, __m128i __A
, __m128i __B
) {
396 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
397 (__v16qi
)_mm_sub_epi8(__A
, __B
),
398 (__v16qi
)_mm_setzero_si128());
401 static __inline__ __m128i __DEFAULT_FN_ATTRS128
402 _mm_mask_sub_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
) {
403 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
404 (__v8hi
)_mm_sub_epi16(__A
, __B
),
408 static __inline__ __m128i __DEFAULT_FN_ATTRS128
409 _mm_maskz_sub_epi16(__mmask8 __U
, __m128i __A
, __m128i __B
) {
410 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
411 (__v8hi
)_mm_sub_epi16(__A
, __B
),
412 (__v8hi
)_mm_setzero_si128());
415 static __inline__ __m256i __DEFAULT_FN_ATTRS256
416 _mm256_mask_mullo_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m256i __B
) {
417 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
418 (__v16hi
)_mm256_mullo_epi16(__A
, __B
),
422 static __inline__ __m256i __DEFAULT_FN_ATTRS256
423 _mm256_maskz_mullo_epi16(__mmask16 __U
, __m256i __A
, __m256i __B
) {
424 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
425 (__v16hi
)_mm256_mullo_epi16(__A
, __B
),
426 (__v16hi
)_mm256_setzero_si256());
429 static __inline__ __m128i __DEFAULT_FN_ATTRS128
430 _mm_mask_mullo_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
) {
431 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
432 (__v8hi
)_mm_mullo_epi16(__A
, __B
),
436 static __inline__ __m128i __DEFAULT_FN_ATTRS128
437 _mm_maskz_mullo_epi16(__mmask8 __U
, __m128i __A
, __m128i __B
) {
438 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
439 (__v8hi
)_mm_mullo_epi16(__A
, __B
),
440 (__v8hi
)_mm_setzero_si128());
443 static __inline__ __m128i __DEFAULT_FN_ATTRS128
444 _mm_mask_blend_epi8 (__mmask16 __U
, __m128i __A
, __m128i __W
)
446 return (__m128i
) __builtin_ia32_selectb_128 ((__mmask16
) __U
,
451 static __inline__ __m256i __DEFAULT_FN_ATTRS256
452 _mm256_mask_blend_epi8 (__mmask32 __U
, __m256i __A
, __m256i __W
)
454 return (__m256i
) __builtin_ia32_selectb_256 ((__mmask32
) __U
,
459 static __inline__ __m128i __DEFAULT_FN_ATTRS128
460 _mm_mask_blend_epi16 (__mmask8 __U
, __m128i __A
, __m128i __W
)
462 return (__m128i
) __builtin_ia32_selectw_128 ((__mmask8
) __U
,
467 static __inline__ __m256i __DEFAULT_FN_ATTRS256
468 _mm256_mask_blend_epi16 (__mmask16 __U
, __m256i __A
, __m256i __W
)
470 return (__m256i
) __builtin_ia32_selectw_256 ((__mmask16
) __U
,
475 static __inline__ __m128i __DEFAULT_FN_ATTRS128
476 _mm_mask_abs_epi8(__m128i __W
, __mmask16 __U
, __m128i __A
)
478 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
479 (__v16qi
)_mm_abs_epi8(__A
),
483 static __inline__ __m128i __DEFAULT_FN_ATTRS128
484 _mm_maskz_abs_epi8(__mmask16 __U
, __m128i __A
)
486 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
487 (__v16qi
)_mm_abs_epi8(__A
),
488 (__v16qi
)_mm_setzero_si128());
491 static __inline__ __m256i __DEFAULT_FN_ATTRS256
492 _mm256_mask_abs_epi8(__m256i __W
, __mmask32 __U
, __m256i __A
)
494 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
495 (__v32qi
)_mm256_abs_epi8(__A
),
499 static __inline__ __m256i __DEFAULT_FN_ATTRS256
500 _mm256_maskz_abs_epi8 (__mmask32 __U
, __m256i __A
)
502 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
503 (__v32qi
)_mm256_abs_epi8(__A
),
504 (__v32qi
)_mm256_setzero_si256());
507 static __inline__ __m128i __DEFAULT_FN_ATTRS128
508 _mm_mask_abs_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
)
510 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
511 (__v8hi
)_mm_abs_epi16(__A
),
515 static __inline__ __m128i __DEFAULT_FN_ATTRS128
516 _mm_maskz_abs_epi16(__mmask8 __U
, __m128i __A
)
518 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
519 (__v8hi
)_mm_abs_epi16(__A
),
520 (__v8hi
)_mm_setzero_si128());
523 static __inline__ __m256i __DEFAULT_FN_ATTRS256
524 _mm256_mask_abs_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
)
526 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
527 (__v16hi
)_mm256_abs_epi16(__A
),
531 static __inline__ __m256i __DEFAULT_FN_ATTRS256
532 _mm256_maskz_abs_epi16(__mmask16 __U
, __m256i __A
)
534 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
535 (__v16hi
)_mm256_abs_epi16(__A
),
536 (__v16hi
)_mm256_setzero_si256());
539 static __inline__ __m128i __DEFAULT_FN_ATTRS128
540 _mm_maskz_packs_epi32(__mmask8 __M
, __m128i __A
, __m128i __B
) {
541 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__M
,
542 (__v8hi
)_mm_packs_epi32(__A
, __B
),
543 (__v8hi
)_mm_setzero_si128());
546 static __inline__ __m128i __DEFAULT_FN_ATTRS128
547 _mm_mask_packs_epi32(__m128i __W
, __mmask8 __M
, __m128i __A
, __m128i __B
)
549 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__M
,
550 (__v8hi
)_mm_packs_epi32(__A
, __B
),
554 static __inline__ __m256i __DEFAULT_FN_ATTRS256
555 _mm256_maskz_packs_epi32(__mmask16 __M
, __m256i __A
, __m256i __B
)
557 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__M
,
558 (__v16hi
)_mm256_packs_epi32(__A
, __B
),
559 (__v16hi
)_mm256_setzero_si256());
562 static __inline__ __m256i __DEFAULT_FN_ATTRS256
563 _mm256_mask_packs_epi32(__m256i __W
, __mmask16 __M
, __m256i __A
, __m256i __B
)
565 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__M
,
566 (__v16hi
)_mm256_packs_epi32(__A
, __B
),
570 static __inline__ __m128i __DEFAULT_FN_ATTRS128
571 _mm_maskz_packs_epi16(__mmask16 __M
, __m128i __A
, __m128i __B
)
573 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__M
,
574 (__v16qi
)_mm_packs_epi16(__A
, __B
),
575 (__v16qi
)_mm_setzero_si128());
578 static __inline__ __m128i __DEFAULT_FN_ATTRS128
579 _mm_mask_packs_epi16(__m128i __W
, __mmask16 __M
, __m128i __A
, __m128i __B
)
581 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__M
,
582 (__v16qi
)_mm_packs_epi16(__A
, __B
),
586 static __inline__ __m256i __DEFAULT_FN_ATTRS256
587 _mm256_maskz_packs_epi16(__mmask32 __M
, __m256i __A
, __m256i __B
)
589 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__M
,
590 (__v32qi
)_mm256_packs_epi16(__A
, __B
),
591 (__v32qi
)_mm256_setzero_si256());
594 static __inline__ __m256i __DEFAULT_FN_ATTRS256
595 _mm256_mask_packs_epi16(__m256i __W
, __mmask32 __M
, __m256i __A
, __m256i __B
)
597 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__M
,
598 (__v32qi
)_mm256_packs_epi16(__A
, __B
),
602 static __inline__ __m128i __DEFAULT_FN_ATTRS128
603 _mm_maskz_packus_epi32(__mmask8 __M
, __m128i __A
, __m128i __B
)
605 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__M
,
606 (__v8hi
)_mm_packus_epi32(__A
, __B
),
607 (__v8hi
)_mm_setzero_si128());
610 static __inline__ __m128i __DEFAULT_FN_ATTRS128
611 _mm_mask_packus_epi32(__m128i __W
, __mmask8 __M
, __m128i __A
, __m128i __B
)
613 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__M
,
614 (__v8hi
)_mm_packus_epi32(__A
, __B
),
618 static __inline__ __m256i __DEFAULT_FN_ATTRS256
619 _mm256_maskz_packus_epi32(__mmask16 __M
, __m256i __A
, __m256i __B
)
621 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__M
,
622 (__v16hi
)_mm256_packus_epi32(__A
, __B
),
623 (__v16hi
)_mm256_setzero_si256());
626 static __inline__ __m256i __DEFAULT_FN_ATTRS256
627 _mm256_mask_packus_epi32(__m256i __W
, __mmask16 __M
, __m256i __A
, __m256i __B
)
629 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__M
,
630 (__v16hi
)_mm256_packus_epi32(__A
, __B
),
634 static __inline__ __m128i __DEFAULT_FN_ATTRS128
635 _mm_maskz_packus_epi16(__mmask16 __M
, __m128i __A
, __m128i __B
)
637 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__M
,
638 (__v16qi
)_mm_packus_epi16(__A
, __B
),
639 (__v16qi
)_mm_setzero_si128());
642 static __inline__ __m128i __DEFAULT_FN_ATTRS128
643 _mm_mask_packus_epi16(__m128i __W
, __mmask16 __M
, __m128i __A
, __m128i __B
)
645 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__M
,
646 (__v16qi
)_mm_packus_epi16(__A
, __B
),
650 static __inline__ __m256i __DEFAULT_FN_ATTRS256
651 _mm256_maskz_packus_epi16(__mmask32 __M
, __m256i __A
, __m256i __B
)
653 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__M
,
654 (__v32qi
)_mm256_packus_epi16(__A
, __B
),
655 (__v32qi
)_mm256_setzero_si256());
658 static __inline__ __m256i __DEFAULT_FN_ATTRS256
659 _mm256_mask_packus_epi16(__m256i __W
, __mmask32 __M
, __m256i __A
, __m256i __B
)
661 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__M
,
662 (__v32qi
)_mm256_packus_epi16(__A
, __B
),
666 static __inline__ __m128i __DEFAULT_FN_ATTRS128
667 _mm_mask_adds_epi8(__m128i __W
, __mmask16 __U
, __m128i __A
, __m128i __B
)
669 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
670 (__v16qi
)_mm_adds_epi8(__A
, __B
),
674 static __inline__ __m128i __DEFAULT_FN_ATTRS128
675 _mm_maskz_adds_epi8(__mmask16 __U
, __m128i __A
, __m128i __B
)
677 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
678 (__v16qi
)_mm_adds_epi8(__A
, __B
),
679 (__v16qi
)_mm_setzero_si128());
682 static __inline__ __m256i __DEFAULT_FN_ATTRS256
683 _mm256_mask_adds_epi8(__m256i __W
, __mmask32 __U
, __m256i __A
, __m256i __B
)
685 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
686 (__v32qi
)_mm256_adds_epi8(__A
, __B
),
690 static __inline__ __m256i __DEFAULT_FN_ATTRS256
691 _mm256_maskz_adds_epi8(__mmask32 __U
, __m256i __A
, __m256i __B
)
693 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
694 (__v32qi
)_mm256_adds_epi8(__A
, __B
),
695 (__v32qi
)_mm256_setzero_si256());
698 static __inline__ __m128i __DEFAULT_FN_ATTRS128
699 _mm_mask_adds_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
701 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
702 (__v8hi
)_mm_adds_epi16(__A
, __B
),
706 static __inline__ __m128i __DEFAULT_FN_ATTRS128
707 _mm_maskz_adds_epi16(__mmask8 __U
, __m128i __A
, __m128i __B
)
709 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
710 (__v8hi
)_mm_adds_epi16(__A
, __B
),
711 (__v8hi
)_mm_setzero_si128());
714 static __inline__ __m256i __DEFAULT_FN_ATTRS256
715 _mm256_mask_adds_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m256i __B
)
717 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
718 (__v16hi
)_mm256_adds_epi16(__A
, __B
),
722 static __inline__ __m256i __DEFAULT_FN_ATTRS256
723 _mm256_maskz_adds_epi16(__mmask16 __U
, __m256i __A
, __m256i __B
)
725 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
726 (__v16hi
)_mm256_adds_epi16(__A
, __B
),
727 (__v16hi
)_mm256_setzero_si256());
730 static __inline__ __m128i __DEFAULT_FN_ATTRS128
731 _mm_mask_adds_epu8(__m128i __W
, __mmask16 __U
, __m128i __A
, __m128i __B
)
733 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
734 (__v16qi
)_mm_adds_epu8(__A
, __B
),
738 static __inline__ __m128i __DEFAULT_FN_ATTRS128
739 _mm_maskz_adds_epu8(__mmask16 __U
, __m128i __A
, __m128i __B
)
741 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
742 (__v16qi
)_mm_adds_epu8(__A
, __B
),
743 (__v16qi
)_mm_setzero_si128());
746 static __inline__ __m256i __DEFAULT_FN_ATTRS256
747 _mm256_mask_adds_epu8(__m256i __W
, __mmask32 __U
, __m256i __A
, __m256i __B
)
749 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
750 (__v32qi
)_mm256_adds_epu8(__A
, __B
),
754 static __inline__ __m256i __DEFAULT_FN_ATTRS256
755 _mm256_maskz_adds_epu8(__mmask32 __U
, __m256i __A
, __m256i __B
)
757 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
758 (__v32qi
)_mm256_adds_epu8(__A
, __B
),
759 (__v32qi
)_mm256_setzero_si256());
762 static __inline__ __m128i __DEFAULT_FN_ATTRS128
763 _mm_mask_adds_epu16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
765 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
766 (__v8hi
)_mm_adds_epu16(__A
, __B
),
770 static __inline__ __m128i __DEFAULT_FN_ATTRS128
771 _mm_maskz_adds_epu16(__mmask8 __U
, __m128i __A
, __m128i __B
)
773 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
774 (__v8hi
)_mm_adds_epu16(__A
, __B
),
775 (__v8hi
)_mm_setzero_si128());
778 static __inline__ __m256i __DEFAULT_FN_ATTRS256
779 _mm256_mask_adds_epu16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m256i __B
)
781 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
782 (__v16hi
)_mm256_adds_epu16(__A
, __B
),
786 static __inline__ __m256i __DEFAULT_FN_ATTRS256
787 _mm256_maskz_adds_epu16(__mmask16 __U
, __m256i __A
, __m256i __B
)
789 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
790 (__v16hi
)_mm256_adds_epu16(__A
, __B
),
791 (__v16hi
)_mm256_setzero_si256());
794 static __inline__ __m128i __DEFAULT_FN_ATTRS128
795 _mm_mask_avg_epu8(__m128i __W
, __mmask16 __U
, __m128i __A
, __m128i __B
)
797 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
798 (__v16qi
)_mm_avg_epu8(__A
, __B
),
802 static __inline__ __m128i __DEFAULT_FN_ATTRS128
803 _mm_maskz_avg_epu8(__mmask16 __U
, __m128i __A
, __m128i __B
)
805 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
806 (__v16qi
)_mm_avg_epu8(__A
, __B
),
807 (__v16qi
)_mm_setzero_si128());
810 static __inline__ __m256i __DEFAULT_FN_ATTRS256
811 _mm256_mask_avg_epu8(__m256i __W
, __mmask32 __U
, __m256i __A
, __m256i __B
)
813 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
814 (__v32qi
)_mm256_avg_epu8(__A
, __B
),
818 static __inline__ __m256i __DEFAULT_FN_ATTRS256
819 _mm256_maskz_avg_epu8(__mmask32 __U
, __m256i __A
, __m256i __B
)
821 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
822 (__v32qi
)_mm256_avg_epu8(__A
, __B
),
823 (__v32qi
)_mm256_setzero_si256());
826 static __inline__ __m128i __DEFAULT_FN_ATTRS128
827 _mm_mask_avg_epu16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
829 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
830 (__v8hi
)_mm_avg_epu16(__A
, __B
),
834 static __inline__ __m128i __DEFAULT_FN_ATTRS128
835 _mm_maskz_avg_epu16(__mmask8 __U
, __m128i __A
, __m128i __B
)
837 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
838 (__v8hi
)_mm_avg_epu16(__A
, __B
),
839 (__v8hi
)_mm_setzero_si128());
842 static __inline__ __m256i __DEFAULT_FN_ATTRS256
843 _mm256_mask_avg_epu16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m256i __B
)
845 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
846 (__v16hi
)_mm256_avg_epu16(__A
, __B
),
850 static __inline__ __m256i __DEFAULT_FN_ATTRS256
851 _mm256_maskz_avg_epu16(__mmask16 __U
, __m256i __A
, __m256i __B
)
853 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
854 (__v16hi
)_mm256_avg_epu16(__A
, __B
),
855 (__v16hi
)_mm256_setzero_si256());
858 static __inline__ __m128i __DEFAULT_FN_ATTRS128
859 _mm_maskz_max_epi8(__mmask16 __M
, __m128i __A
, __m128i __B
)
861 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__M
,
862 (__v16qi
)_mm_max_epi8(__A
, __B
),
863 (__v16qi
)_mm_setzero_si128());
866 static __inline__ __m128i __DEFAULT_FN_ATTRS128
867 _mm_mask_max_epi8(__m128i __W
, __mmask16 __M
, __m128i __A
, __m128i __B
)
869 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__M
,
870 (__v16qi
)_mm_max_epi8(__A
, __B
),
874 static __inline__ __m256i __DEFAULT_FN_ATTRS256
875 _mm256_maskz_max_epi8(__mmask32 __M
, __m256i __A
, __m256i __B
)
877 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__M
,
878 (__v32qi
)_mm256_max_epi8(__A
, __B
),
879 (__v32qi
)_mm256_setzero_si256());
882 static __inline__ __m256i __DEFAULT_FN_ATTRS256
883 _mm256_mask_max_epi8(__m256i __W
, __mmask32 __M
, __m256i __A
, __m256i __B
)
885 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__M
,
886 (__v32qi
)_mm256_max_epi8(__A
, __B
),
890 static __inline__ __m128i __DEFAULT_FN_ATTRS128
891 _mm_maskz_max_epi16(__mmask8 __M
, __m128i __A
, __m128i __B
)
893 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__M
,
894 (__v8hi
)_mm_max_epi16(__A
, __B
),
895 (__v8hi
)_mm_setzero_si128());
898 static __inline__ __m128i __DEFAULT_FN_ATTRS128
899 _mm_mask_max_epi16(__m128i __W
, __mmask8 __M
, __m128i __A
, __m128i __B
)
901 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__M
,
902 (__v8hi
)_mm_max_epi16(__A
, __B
),
906 static __inline__ __m256i __DEFAULT_FN_ATTRS256
907 _mm256_maskz_max_epi16(__mmask16 __M
, __m256i __A
, __m256i __B
)
909 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__M
,
910 (__v16hi
)_mm256_max_epi16(__A
, __B
),
911 (__v16hi
)_mm256_setzero_si256());
914 static __inline__ __m256i __DEFAULT_FN_ATTRS256
915 _mm256_mask_max_epi16(__m256i __W
, __mmask16 __M
, __m256i __A
, __m256i __B
)
917 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__M
,
918 (__v16hi
)_mm256_max_epi16(__A
, __B
),
922 static __inline__ __m128i __DEFAULT_FN_ATTRS128
923 _mm_maskz_max_epu8(__mmask16 __M
, __m128i __A
, __m128i __B
)
925 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__M
,
926 (__v16qi
)_mm_max_epu8(__A
, __B
),
927 (__v16qi
)_mm_setzero_si128());
930 static __inline__ __m128i __DEFAULT_FN_ATTRS128
931 _mm_mask_max_epu8(__m128i __W
, __mmask16 __M
, __m128i __A
, __m128i __B
)
933 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__M
,
934 (__v16qi
)_mm_max_epu8(__A
, __B
),
938 static __inline__ __m256i __DEFAULT_FN_ATTRS256
939 _mm256_maskz_max_epu8 (__mmask32 __M
, __m256i __A
, __m256i __B
)
941 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__M
,
942 (__v32qi
)_mm256_max_epu8(__A
, __B
),
943 (__v32qi
)_mm256_setzero_si256());
946 static __inline__ __m256i __DEFAULT_FN_ATTRS256
947 _mm256_mask_max_epu8(__m256i __W
, __mmask32 __M
, __m256i __A
, __m256i __B
)
949 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__M
,
950 (__v32qi
)_mm256_max_epu8(__A
, __B
),
954 static __inline__ __m128i __DEFAULT_FN_ATTRS128
955 _mm_maskz_max_epu16(__mmask8 __M
, __m128i __A
, __m128i __B
)
957 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__M
,
958 (__v8hi
)_mm_max_epu16(__A
, __B
),
959 (__v8hi
)_mm_setzero_si128());
962 static __inline__ __m128i __DEFAULT_FN_ATTRS128
963 _mm_mask_max_epu16(__m128i __W
, __mmask8 __M
, __m128i __A
, __m128i __B
)
965 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__M
,
966 (__v8hi
)_mm_max_epu16(__A
, __B
),
970 static __inline__ __m256i __DEFAULT_FN_ATTRS256
971 _mm256_maskz_max_epu16(__mmask16 __M
, __m256i __A
, __m256i __B
)
973 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__M
,
974 (__v16hi
)_mm256_max_epu16(__A
, __B
),
975 (__v16hi
)_mm256_setzero_si256());
978 static __inline__ __m256i __DEFAULT_FN_ATTRS256
979 _mm256_mask_max_epu16(__m256i __W
, __mmask16 __M
, __m256i __A
, __m256i __B
)
981 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__M
,
982 (__v16hi
)_mm256_max_epu16(__A
, __B
),
986 static __inline__ __m128i __DEFAULT_FN_ATTRS128
987 _mm_maskz_min_epi8(__mmask16 __M
, __m128i __A
, __m128i __B
)
989 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__M
,
990 (__v16qi
)_mm_min_epi8(__A
, __B
),
991 (__v16qi
)_mm_setzero_si128());
994 static __inline__ __m128i __DEFAULT_FN_ATTRS128
995 _mm_mask_min_epi8(__m128i __W
, __mmask16 __M
, __m128i __A
, __m128i __B
)
997 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__M
,
998 (__v16qi
)_mm_min_epi8(__A
, __B
),
1002 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1003 _mm256_maskz_min_epi8(__mmask32 __M
, __m256i __A
, __m256i __B
)
1005 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__M
,
1006 (__v32qi
)_mm256_min_epi8(__A
, __B
),
1007 (__v32qi
)_mm256_setzero_si256());
1010 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1011 _mm256_mask_min_epi8(__m256i __W
, __mmask32 __M
, __m256i __A
, __m256i __B
)
1013 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__M
,
1014 (__v32qi
)_mm256_min_epi8(__A
, __B
),
1018 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1019 _mm_maskz_min_epi16(__mmask8 __M
, __m128i __A
, __m128i __B
)
1021 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__M
,
1022 (__v8hi
)_mm_min_epi16(__A
, __B
),
1023 (__v8hi
)_mm_setzero_si128());
1026 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1027 _mm_mask_min_epi16(__m128i __W
, __mmask8 __M
, __m128i __A
, __m128i __B
)
1029 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__M
,
1030 (__v8hi
)_mm_min_epi16(__A
, __B
),
1034 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1035 _mm256_maskz_min_epi16(__mmask16 __M
, __m256i __A
, __m256i __B
)
1037 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__M
,
1038 (__v16hi
)_mm256_min_epi16(__A
, __B
),
1039 (__v16hi
)_mm256_setzero_si256());
1042 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1043 _mm256_mask_min_epi16(__m256i __W
, __mmask16 __M
, __m256i __A
, __m256i __B
)
1045 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__M
,
1046 (__v16hi
)_mm256_min_epi16(__A
, __B
),
1050 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1051 _mm_maskz_min_epu8(__mmask16 __M
, __m128i __A
, __m128i __B
)
1053 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__M
,
1054 (__v16qi
)_mm_min_epu8(__A
, __B
),
1055 (__v16qi
)_mm_setzero_si128());
1058 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1059 _mm_mask_min_epu8(__m128i __W
, __mmask16 __M
, __m128i __A
, __m128i __B
)
1061 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__M
,
1062 (__v16qi
)_mm_min_epu8(__A
, __B
),
1066 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1067 _mm256_maskz_min_epu8 (__mmask32 __M
, __m256i __A
, __m256i __B
)
1069 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__M
,
1070 (__v32qi
)_mm256_min_epu8(__A
, __B
),
1071 (__v32qi
)_mm256_setzero_si256());
1074 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1075 _mm256_mask_min_epu8(__m256i __W
, __mmask32 __M
, __m256i __A
, __m256i __B
)
1077 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__M
,
1078 (__v32qi
)_mm256_min_epu8(__A
, __B
),
1082 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1083 _mm_maskz_min_epu16(__mmask8 __M
, __m128i __A
, __m128i __B
)
1085 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__M
,
1086 (__v8hi
)_mm_min_epu16(__A
, __B
),
1087 (__v8hi
)_mm_setzero_si128());
1090 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1091 _mm_mask_min_epu16(__m128i __W
, __mmask8 __M
, __m128i __A
, __m128i __B
)
1093 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__M
,
1094 (__v8hi
)_mm_min_epu16(__A
, __B
),
1098 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1099 _mm256_maskz_min_epu16(__mmask16 __M
, __m256i __A
, __m256i __B
)
1101 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__M
,
1102 (__v16hi
)_mm256_min_epu16(__A
, __B
),
1103 (__v16hi
)_mm256_setzero_si256());
1106 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1107 _mm256_mask_min_epu16(__m256i __W
, __mmask16 __M
, __m256i __A
, __m256i __B
)
1109 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__M
,
1110 (__v16hi
)_mm256_min_epu16(__A
, __B
),
1114 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1115 _mm_mask_shuffle_epi8(__m128i __W
, __mmask16 __U
, __m128i __A
, __m128i __B
)
1117 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
1118 (__v16qi
)_mm_shuffle_epi8(__A
, __B
),
1122 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1123 _mm_maskz_shuffle_epi8(__mmask16 __U
, __m128i __A
, __m128i __B
)
1125 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
1126 (__v16qi
)_mm_shuffle_epi8(__A
, __B
),
1127 (__v16qi
)_mm_setzero_si128());
1130 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1131 _mm256_mask_shuffle_epi8(__m256i __W
, __mmask32 __U
, __m256i __A
, __m256i __B
)
1133 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
1134 (__v32qi
)_mm256_shuffle_epi8(__A
, __B
),
1138 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1139 _mm256_maskz_shuffle_epi8(__mmask32 __U
, __m256i __A
, __m256i __B
)
1141 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
1142 (__v32qi
)_mm256_shuffle_epi8(__A
, __B
),
1143 (__v32qi
)_mm256_setzero_si256());
1146 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1147 _mm_mask_subs_epi8(__m128i __W
, __mmask16 __U
, __m128i __A
, __m128i __B
)
1149 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
1150 (__v16qi
)_mm_subs_epi8(__A
, __B
),
1154 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1155 _mm_maskz_subs_epi8(__mmask16 __U
, __m128i __A
, __m128i __B
)
1157 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
1158 (__v16qi
)_mm_subs_epi8(__A
, __B
),
1159 (__v16qi
)_mm_setzero_si128());
1162 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1163 _mm256_mask_subs_epi8(__m256i __W
, __mmask32 __U
, __m256i __A
, __m256i __B
)
1165 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
1166 (__v32qi
)_mm256_subs_epi8(__A
, __B
),
1170 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1171 _mm256_maskz_subs_epi8(__mmask32 __U
, __m256i __A
, __m256i __B
)
1173 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
1174 (__v32qi
)_mm256_subs_epi8(__A
, __B
),
1175 (__v32qi
)_mm256_setzero_si256());
1178 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1179 _mm_mask_subs_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
1181 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1182 (__v8hi
)_mm_subs_epi16(__A
, __B
),
1186 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1187 _mm_maskz_subs_epi16(__mmask8 __U
, __m128i __A
, __m128i __B
)
1189 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1190 (__v8hi
)_mm_subs_epi16(__A
, __B
),
1191 (__v8hi
)_mm_setzero_si128());
1194 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1195 _mm256_mask_subs_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m256i __B
)
1197 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1198 (__v16hi
)_mm256_subs_epi16(__A
, __B
),
1202 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1203 _mm256_maskz_subs_epi16(__mmask16 __U
, __m256i __A
, __m256i __B
)
1205 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1206 (__v16hi
)_mm256_subs_epi16(__A
, __B
),
1207 (__v16hi
)_mm256_setzero_si256());
1210 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1211 _mm_mask_subs_epu8(__m128i __W
, __mmask16 __U
, __m128i __A
, __m128i __B
)
1213 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
1214 (__v16qi
)_mm_subs_epu8(__A
, __B
),
1218 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1219 _mm_maskz_subs_epu8(__mmask16 __U
, __m128i __A
, __m128i __B
)
1221 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
1222 (__v16qi
)_mm_subs_epu8(__A
, __B
),
1223 (__v16qi
)_mm_setzero_si128());
1226 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1227 _mm256_mask_subs_epu8(__m256i __W
, __mmask32 __U
, __m256i __A
, __m256i __B
)
1229 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
1230 (__v32qi
)_mm256_subs_epu8(__A
, __B
),
1234 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1235 _mm256_maskz_subs_epu8(__mmask32 __U
, __m256i __A
, __m256i __B
)
1237 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
1238 (__v32qi
)_mm256_subs_epu8(__A
, __B
),
1239 (__v32qi
)_mm256_setzero_si256());
1242 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1243 _mm_mask_subs_epu16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
1245 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1246 (__v8hi
)_mm_subs_epu16(__A
, __B
),
1250 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1251 _mm_maskz_subs_epu16(__mmask8 __U
, __m128i __A
, __m128i __B
)
1253 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1254 (__v8hi
)_mm_subs_epu16(__A
, __B
),
1255 (__v8hi
)_mm_setzero_si128());
1258 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1259 _mm256_mask_subs_epu16(__m256i __W
, __mmask16 __U
, __m256i __A
,
1261 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1262 (__v16hi
)_mm256_subs_epu16(__A
, __B
),
1266 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1267 _mm256_maskz_subs_epu16(__mmask16 __U
, __m256i __A
, __m256i __B
)
1269 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1270 (__v16hi
)_mm256_subs_epu16(__A
, __B
),
1271 (__v16hi
)_mm256_setzero_si256());
1274 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1275 _mm_permutex2var_epi16(__m128i __A
, __m128i __I
, __m128i __B
)
1277 return (__m128i
)__builtin_ia32_vpermi2varhi128((__v8hi
)__A
, (__v8hi
)__I
,
1281 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1282 _mm_mask_permutex2var_epi16(__m128i __A
, __mmask8 __U
, __m128i __I
,
1285 return (__m128i
)__builtin_ia32_selectw_128(__U
,
1286 (__v8hi
)_mm_permutex2var_epi16(__A
, __I
, __B
),
1290 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1291 _mm_mask2_permutex2var_epi16(__m128i __A
, __m128i __I
, __mmask8 __U
,
1294 return (__m128i
)__builtin_ia32_selectw_128(__U
,
1295 (__v8hi
)_mm_permutex2var_epi16(__A
, __I
, __B
),
1299 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1300 _mm_maskz_permutex2var_epi16 (__mmask8 __U
, __m128i __A
, __m128i __I
,
1303 return (__m128i
)__builtin_ia32_selectw_128(__U
,
1304 (__v8hi
)_mm_permutex2var_epi16(__A
, __I
, __B
),
1305 (__v8hi
)_mm_setzero_si128());
1308 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1309 _mm256_permutex2var_epi16(__m256i __A
, __m256i __I
, __m256i __B
)
1311 return (__m256i
)__builtin_ia32_vpermi2varhi256((__v16hi
)__A
, (__v16hi
)__I
,
1315 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1316 _mm256_mask_permutex2var_epi16(__m256i __A
, __mmask16 __U
, __m256i __I
,
1319 return (__m256i
)__builtin_ia32_selectw_256(__U
,
1320 (__v16hi
)_mm256_permutex2var_epi16(__A
, __I
, __B
),
1324 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1325 _mm256_mask2_permutex2var_epi16(__m256i __A
, __m256i __I
, __mmask16 __U
,
1328 return (__m256i
)__builtin_ia32_selectw_256(__U
,
1329 (__v16hi
)_mm256_permutex2var_epi16(__A
, __I
, __B
),
1333 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1334 _mm256_maskz_permutex2var_epi16 (__mmask16 __U
, __m256i __A
, __m256i __I
,
1337 return (__m256i
)__builtin_ia32_selectw_256(__U
,
1338 (__v16hi
)_mm256_permutex2var_epi16(__A
, __I
, __B
),
1339 (__v16hi
)_mm256_setzero_si256());
1342 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1343 _mm_mask_maddubs_epi16(__m128i __W
, __mmask8 __U
, __m128i __X
, __m128i __Y
) {
1344 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1345 (__v8hi
)_mm_maddubs_epi16(__X
, __Y
),
1349 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1350 _mm_maskz_maddubs_epi16(__mmask8 __U
, __m128i __X
, __m128i __Y
) {
1351 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1352 (__v8hi
)_mm_maddubs_epi16(__X
, __Y
),
1353 (__v8hi
)_mm_setzero_si128());
1356 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1357 _mm256_mask_maddubs_epi16(__m256i __W
, __mmask16 __U
, __m256i __X
,
1359 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1360 (__v16hi
)_mm256_maddubs_epi16(__X
, __Y
),
1364 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1365 _mm256_maskz_maddubs_epi16(__mmask16 __U
, __m256i __X
, __m256i __Y
) {
1366 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1367 (__v16hi
)_mm256_maddubs_epi16(__X
, __Y
),
1368 (__v16hi
)_mm256_setzero_si256());
1371 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1372 _mm_mask_madd_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
) {
1373 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
1374 (__v4si
)_mm_madd_epi16(__A
, __B
),
1378 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1379 _mm_maskz_madd_epi16(__mmask8 __U
, __m128i __A
, __m128i __B
) {
1380 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
1381 (__v4si
)_mm_madd_epi16(__A
, __B
),
1382 (__v4si
)_mm_setzero_si128());
1385 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1386 _mm256_mask_madd_epi16(__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
) {
1387 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
1388 (__v8si
)_mm256_madd_epi16(__A
, __B
),
1392 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1393 _mm256_maskz_madd_epi16(__mmask8 __U
, __m256i __A
, __m256i __B
) {
1394 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
1395 (__v8si
)_mm256_madd_epi16(__A
, __B
),
1396 (__v8si
)_mm256_setzero_si256());
1399 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1400 _mm_cvtsepi16_epi8 (__m128i __A
) {
1401 return (__m128i
) __builtin_ia32_pmovswb128_mask ((__v8hi
) __A
,
1402 (__v16qi
) _mm_setzero_si128(),
1406 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1407 _mm_mask_cvtsepi16_epi8 (__m128i __O
, __mmask8 __M
, __m128i __A
) {
1408 return (__m128i
) __builtin_ia32_pmovswb128_mask ((__v8hi
) __A
,
1413 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1414 _mm_maskz_cvtsepi16_epi8 (__mmask8 __M
, __m128i __A
) {
1415 return (__m128i
) __builtin_ia32_pmovswb128_mask ((__v8hi
) __A
,
1416 (__v16qi
) _mm_setzero_si128(),
1420 static __inline__ __m128i __DEFAULT_FN_ATTRS256
1421 _mm256_cvtsepi16_epi8 (__m256i __A
) {
1422 return (__m128i
) __builtin_ia32_pmovswb256_mask ((__v16hi
) __A
,
1423 (__v16qi
) _mm_setzero_si128(),
1427 static __inline__ __m128i __DEFAULT_FN_ATTRS256
1428 _mm256_mask_cvtsepi16_epi8 (__m128i __O
, __mmask16 __M
, __m256i __A
) {
1429 return (__m128i
) __builtin_ia32_pmovswb256_mask ((__v16hi
) __A
,
1434 static __inline__ __m128i __DEFAULT_FN_ATTRS256
1435 _mm256_maskz_cvtsepi16_epi8 (__mmask16 __M
, __m256i __A
) {
1436 return (__m128i
) __builtin_ia32_pmovswb256_mask ((__v16hi
) __A
,
1437 (__v16qi
) _mm_setzero_si128(),
1441 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1442 _mm_cvtusepi16_epi8 (__m128i __A
) {
1443 return (__m128i
) __builtin_ia32_pmovuswb128_mask ((__v8hi
) __A
,
1444 (__v16qi
) _mm_setzero_si128(),
1448 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1449 _mm_mask_cvtusepi16_epi8 (__m128i __O
, __mmask8 __M
, __m128i __A
) {
1450 return (__m128i
) __builtin_ia32_pmovuswb128_mask ((__v8hi
) __A
,
1455 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1456 _mm_maskz_cvtusepi16_epi8 (__mmask8 __M
, __m128i __A
) {
1457 return (__m128i
) __builtin_ia32_pmovuswb128_mask ((__v8hi
) __A
,
1458 (__v16qi
) _mm_setzero_si128(),
1462 static __inline__ __m128i __DEFAULT_FN_ATTRS256
1463 _mm256_cvtusepi16_epi8 (__m256i __A
) {
1464 return (__m128i
) __builtin_ia32_pmovuswb256_mask ((__v16hi
) __A
,
1465 (__v16qi
) _mm_setzero_si128(),
1469 static __inline__ __m128i __DEFAULT_FN_ATTRS256
1470 _mm256_mask_cvtusepi16_epi8 (__m128i __O
, __mmask16 __M
, __m256i __A
) {
1471 return (__m128i
) __builtin_ia32_pmovuswb256_mask ((__v16hi
) __A
,
1476 static __inline__ __m128i __DEFAULT_FN_ATTRS256
1477 _mm256_maskz_cvtusepi16_epi8 (__mmask16 __M
, __m256i __A
) {
1478 return (__m128i
) __builtin_ia32_pmovuswb256_mask ((__v16hi
) __A
,
1479 (__v16qi
) _mm_setzero_si128(),
1483 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1484 _mm_cvtepi16_epi8 (__m128i __A
) {
1485 return (__m128i
)__builtin_shufflevector(
1486 __builtin_convertvector((__v8hi
)__A
, __v8qi
),
1487 (__v8qi
){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
1491 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1492 _mm_mask_cvtepi16_epi8 (__m128i __O
, __mmask8 __M
, __m128i __A
) {
1493 return (__m128i
) __builtin_ia32_pmovwb128_mask ((__v8hi
) __A
,
1498 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1499 _mm_maskz_cvtepi16_epi8 (__mmask8 __M
, __m128i __A
) {
1500 return (__m128i
) __builtin_ia32_pmovwb128_mask ((__v8hi
) __A
,
1501 (__v16qi
) _mm_setzero_si128(),
1505 static __inline__
void __DEFAULT_FN_ATTRS128
1506 _mm_mask_cvtepi16_storeu_epi8 (void * __P
, __mmask8 __M
, __m128i __A
)
1508 __builtin_ia32_pmovwb128mem_mask ((__v16qi
*) __P
, (__v8hi
) __A
, __M
);
1512 static __inline__
void __DEFAULT_FN_ATTRS128
1513 _mm_mask_cvtsepi16_storeu_epi8 (void * __P
, __mmask8 __M
, __m128i __A
)
1515 __builtin_ia32_pmovswb128mem_mask ((__v16qi
*) __P
, (__v8hi
) __A
, __M
);
1518 static __inline__
void __DEFAULT_FN_ATTRS128
1519 _mm_mask_cvtusepi16_storeu_epi8 (void * __P
, __mmask8 __M
, __m128i __A
)
1521 __builtin_ia32_pmovuswb128mem_mask ((__v16qi
*) __P
, (__v8hi
) __A
, __M
);
1524 static __inline__ __m128i __DEFAULT_FN_ATTRS256
1525 _mm256_cvtepi16_epi8 (__m256i __A
) {
1526 return (__m128i
)__builtin_convertvector((__v16hi
) __A
, __v16qi
);
1529 static __inline__ __m128i __DEFAULT_FN_ATTRS256
1530 _mm256_mask_cvtepi16_epi8 (__m128i __O
, __mmask16 __M
, __m256i __A
) {
1531 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__M
,
1532 (__v16qi
)_mm256_cvtepi16_epi8(__A
),
1536 static __inline__ __m128i __DEFAULT_FN_ATTRS256
1537 _mm256_maskz_cvtepi16_epi8 (__mmask16 __M
, __m256i __A
) {
1538 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__M
,
1539 (__v16qi
)_mm256_cvtepi16_epi8(__A
),
1540 (__v16qi
)_mm_setzero_si128());
1543 static __inline__
void __DEFAULT_FN_ATTRS256
1544 _mm256_mask_cvtepi16_storeu_epi8 (void * __P
, __mmask16 __M
, __m256i __A
)
1546 __builtin_ia32_pmovwb256mem_mask ((__v16qi
*) __P
, (__v16hi
) __A
, __M
);
1549 static __inline__
void __DEFAULT_FN_ATTRS256
1550 _mm256_mask_cvtsepi16_storeu_epi8 (void * __P
, __mmask16 __M
, __m256i __A
)
1552 __builtin_ia32_pmovswb256mem_mask ((__v16qi
*) __P
, (__v16hi
) __A
, __M
);
1555 static __inline__
void __DEFAULT_FN_ATTRS256
1556 _mm256_mask_cvtusepi16_storeu_epi8 (void * __P
, __mmask16 __M
, __m256i __A
)
1558 __builtin_ia32_pmovuswb256mem_mask ((__v16qi
*) __P
, (__v16hi
) __A
, __M
);
1561 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1562 _mm_mask_mulhrs_epi16(__m128i __W
, __mmask8 __U
, __m128i __X
, __m128i __Y
) {
1563 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1564 (__v8hi
)_mm_mulhrs_epi16(__X
, __Y
),
1568 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1569 _mm_maskz_mulhrs_epi16(__mmask8 __U
, __m128i __X
, __m128i __Y
) {
1570 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1571 (__v8hi
)_mm_mulhrs_epi16(__X
, __Y
),
1572 (__v8hi
)_mm_setzero_si128());
1575 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1576 _mm256_mask_mulhrs_epi16(__m256i __W
, __mmask16 __U
, __m256i __X
, __m256i __Y
) {
1577 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1578 (__v16hi
)_mm256_mulhrs_epi16(__X
, __Y
),
1582 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1583 _mm256_maskz_mulhrs_epi16(__mmask16 __U
, __m256i __X
, __m256i __Y
) {
1584 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1585 (__v16hi
)_mm256_mulhrs_epi16(__X
, __Y
),
1586 (__v16hi
)_mm256_setzero_si256());
1589 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1590 _mm_mask_mulhi_epu16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
) {
1591 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1592 (__v8hi
)_mm_mulhi_epu16(__A
, __B
),
1596 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1597 _mm_maskz_mulhi_epu16(__mmask8 __U
, __m128i __A
, __m128i __B
) {
1598 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1599 (__v8hi
)_mm_mulhi_epu16(__A
, __B
),
1600 (__v8hi
)_mm_setzero_si128());
1603 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1604 _mm256_mask_mulhi_epu16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m256i __B
) {
1605 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1606 (__v16hi
)_mm256_mulhi_epu16(__A
, __B
),
1610 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1611 _mm256_maskz_mulhi_epu16(__mmask16 __U
, __m256i __A
, __m256i __B
) {
1612 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1613 (__v16hi
)_mm256_mulhi_epu16(__A
, __B
),
1614 (__v16hi
)_mm256_setzero_si256());
1617 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1618 _mm_mask_mulhi_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
) {
1619 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1620 (__v8hi
)_mm_mulhi_epi16(__A
, __B
),
1624 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1625 _mm_maskz_mulhi_epi16(__mmask8 __U
, __m128i __A
, __m128i __B
) {
1626 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1627 (__v8hi
)_mm_mulhi_epi16(__A
, __B
),
1628 (__v8hi
)_mm_setzero_si128());
1631 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1632 _mm256_mask_mulhi_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m256i __B
) {
1633 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1634 (__v16hi
)_mm256_mulhi_epi16(__A
, __B
),
1638 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1639 _mm256_maskz_mulhi_epi16(__mmask16 __U
, __m256i __A
, __m256i __B
) {
1640 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1641 (__v16hi
)_mm256_mulhi_epi16(__A
, __B
),
1642 (__v16hi
)_mm256_setzero_si256());
1645 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1646 _mm_mask_unpackhi_epi8(__m128i __W
, __mmask16 __U
, __m128i __A
, __m128i __B
) {
1647 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
1648 (__v16qi
)_mm_unpackhi_epi8(__A
, __B
),
1652 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1653 _mm_maskz_unpackhi_epi8(__mmask16 __U
, __m128i __A
, __m128i __B
) {
1654 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
1655 (__v16qi
)_mm_unpackhi_epi8(__A
, __B
),
1656 (__v16qi
)_mm_setzero_si128());
1659 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1660 _mm256_mask_unpackhi_epi8(__m256i __W
, __mmask32 __U
, __m256i __A
, __m256i __B
) {
1661 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
1662 (__v32qi
)_mm256_unpackhi_epi8(__A
, __B
),
1666 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1667 _mm256_maskz_unpackhi_epi8(__mmask32 __U
, __m256i __A
, __m256i __B
) {
1668 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
1669 (__v32qi
)_mm256_unpackhi_epi8(__A
, __B
),
1670 (__v32qi
)_mm256_setzero_si256());
1673 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1674 _mm_mask_unpackhi_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
) {
1675 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1676 (__v8hi
)_mm_unpackhi_epi16(__A
, __B
),
1680 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1681 _mm_maskz_unpackhi_epi16(__mmask8 __U
, __m128i __A
, __m128i __B
) {
1682 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1683 (__v8hi
)_mm_unpackhi_epi16(__A
, __B
),
1684 (__v8hi
) _mm_setzero_si128());
1687 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1688 _mm256_mask_unpackhi_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m256i __B
) {
1689 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1690 (__v16hi
)_mm256_unpackhi_epi16(__A
, __B
),
1694 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1695 _mm256_maskz_unpackhi_epi16(__mmask16 __U
, __m256i __A
, __m256i __B
) {
1696 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1697 (__v16hi
)_mm256_unpackhi_epi16(__A
, __B
),
1698 (__v16hi
)_mm256_setzero_si256());
1701 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1702 _mm_mask_unpacklo_epi8(__m128i __W
, __mmask16 __U
, __m128i __A
, __m128i __B
) {
1703 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
1704 (__v16qi
)_mm_unpacklo_epi8(__A
, __B
),
1708 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1709 _mm_maskz_unpacklo_epi8(__mmask16 __U
, __m128i __A
, __m128i __B
) {
1710 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
1711 (__v16qi
)_mm_unpacklo_epi8(__A
, __B
),
1712 (__v16qi
)_mm_setzero_si128());
1715 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1716 _mm256_mask_unpacklo_epi8(__m256i __W
, __mmask32 __U
, __m256i __A
, __m256i __B
) {
1717 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
1718 (__v32qi
)_mm256_unpacklo_epi8(__A
, __B
),
1722 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1723 _mm256_maskz_unpacklo_epi8(__mmask32 __U
, __m256i __A
, __m256i __B
) {
1724 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
1725 (__v32qi
)_mm256_unpacklo_epi8(__A
, __B
),
1726 (__v32qi
)_mm256_setzero_si256());
1729 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1730 _mm_mask_unpacklo_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
) {
1731 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1732 (__v8hi
)_mm_unpacklo_epi16(__A
, __B
),
1736 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1737 _mm_maskz_unpacklo_epi16(__mmask8 __U
, __m128i __A
, __m128i __B
) {
1738 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1739 (__v8hi
)_mm_unpacklo_epi16(__A
, __B
),
1740 (__v8hi
) _mm_setzero_si128());
1743 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1744 _mm256_mask_unpacklo_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m256i __B
) {
1745 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1746 (__v16hi
)_mm256_unpacklo_epi16(__A
, __B
),
1750 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1751 _mm256_maskz_unpacklo_epi16(__mmask16 __U
, __m256i __A
, __m256i __B
) {
1752 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1753 (__v16hi
)_mm256_unpacklo_epi16(__A
, __B
),
1754 (__v16hi
)_mm256_setzero_si256());
1757 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1758 _mm_mask_cvtepi8_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
)
1760 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1761 (__v8hi
)_mm_cvtepi8_epi16(__A
),
1765 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1766 _mm_maskz_cvtepi8_epi16(__mmask8 __U
, __m128i __A
)
1768 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1769 (__v8hi
)_mm_cvtepi8_epi16(__A
),
1770 (__v8hi
)_mm_setzero_si128());
1773 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1774 _mm256_mask_cvtepi8_epi16(__m256i __W
, __mmask16 __U
, __m128i __A
)
1776 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1777 (__v16hi
)_mm256_cvtepi8_epi16(__A
),
1781 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1782 _mm256_maskz_cvtepi8_epi16(__mmask16 __U
, __m128i __A
)
1784 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1785 (__v16hi
)_mm256_cvtepi8_epi16(__A
),
1786 (__v16hi
)_mm256_setzero_si256());
1790 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1791 _mm_mask_cvtepu8_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
)
1793 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1794 (__v8hi
)_mm_cvtepu8_epi16(__A
),
1798 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1799 _mm_maskz_cvtepu8_epi16(__mmask8 __U
, __m128i __A
)
1801 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1802 (__v8hi
)_mm_cvtepu8_epi16(__A
),
1803 (__v8hi
)_mm_setzero_si128());
1806 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1807 _mm256_mask_cvtepu8_epi16(__m256i __W
, __mmask16 __U
, __m128i __A
)
1809 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1810 (__v16hi
)_mm256_cvtepu8_epi16(__A
),
1814 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1815 _mm256_maskz_cvtepu8_epi16 (__mmask16 __U
, __m128i __A
)
1817 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1818 (__v16hi
)_mm256_cvtepu8_epi16(__A
),
1819 (__v16hi
)_mm256_setzero_si256());
1823 #define _mm_mask_shufflehi_epi16(W, U, A, imm) \
1824 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1825 (__v8hi)_mm_shufflehi_epi16((A), (imm)), \
1826 (__v8hi)(__m128i)(W)))
1828 #define _mm_maskz_shufflehi_epi16(U, A, imm) \
1829 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1830 (__v8hi)_mm_shufflehi_epi16((A), (imm)), \
1831 (__v8hi)_mm_setzero_si128()))
1833 #define _mm256_mask_shufflehi_epi16(W, U, A, imm) \
1834 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1835 (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \
1836 (__v16hi)(__m256i)(W)))
1838 #define _mm256_maskz_shufflehi_epi16(U, A, imm) \
1839 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1840 (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \
1841 (__v16hi)_mm256_setzero_si256()))
1843 #define _mm_mask_shufflelo_epi16(W, U, A, imm) \
1844 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1845 (__v8hi)_mm_shufflelo_epi16((A), (imm)), \
1846 (__v8hi)(__m128i)(W)))
1848 #define _mm_maskz_shufflelo_epi16(U, A, imm) \
1849 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1850 (__v8hi)_mm_shufflelo_epi16((A), (imm)), \
1851 (__v8hi)_mm_setzero_si128()))
1853 #define _mm256_mask_shufflelo_epi16(W, U, A, imm) \
1854 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1855 (__v16hi)_mm256_shufflelo_epi16((A), \
1857 (__v16hi)(__m256i)(W)))
1859 #define _mm256_maskz_shufflelo_epi16(U, A, imm) \
1860 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1861 (__v16hi)_mm256_shufflelo_epi16((A), \
1863 (__v16hi)_mm256_setzero_si256()))
1865 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1866 _mm256_sllv_epi16(__m256i __A
, __m256i __B
)
1868 return (__m256i
)__builtin_ia32_psllv16hi((__v16hi
)__A
, (__v16hi
)__B
);
1871 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1872 _mm256_mask_sllv_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m256i __B
)
1874 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1875 (__v16hi
)_mm256_sllv_epi16(__A
, __B
),
1879 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1880 _mm256_maskz_sllv_epi16(__mmask16 __U
, __m256i __A
, __m256i __B
)
1882 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1883 (__v16hi
)_mm256_sllv_epi16(__A
, __B
),
1884 (__v16hi
)_mm256_setzero_si256());
1887 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1888 _mm_sllv_epi16(__m128i __A
, __m128i __B
)
1890 return (__m128i
)__builtin_ia32_psllv8hi((__v8hi
)__A
, (__v8hi
)__B
);
1893 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1894 _mm_mask_sllv_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
1896 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1897 (__v8hi
)_mm_sllv_epi16(__A
, __B
),
1901 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1902 _mm_maskz_sllv_epi16(__mmask8 __U
, __m128i __A
, __m128i __B
)
1904 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1905 (__v8hi
)_mm_sllv_epi16(__A
, __B
),
1906 (__v8hi
)_mm_setzero_si128());
1909 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1910 _mm_mask_sll_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
1912 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1913 (__v8hi
)_mm_sll_epi16(__A
, __B
),
1917 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1918 _mm_maskz_sll_epi16 (__mmask8 __U
, __m128i __A
, __m128i __B
)
1920 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1921 (__v8hi
)_mm_sll_epi16(__A
, __B
),
1922 (__v8hi
)_mm_setzero_si128());
1925 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1926 _mm256_mask_sll_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m128i __B
)
1928 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1929 (__v16hi
)_mm256_sll_epi16(__A
, __B
),
1933 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1934 _mm256_maskz_sll_epi16(__mmask16 __U
, __m256i __A
, __m128i __B
)
1936 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1937 (__v16hi
)_mm256_sll_epi16(__A
, __B
),
1938 (__v16hi
)_mm256_setzero_si256());
1941 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1942 _mm_mask_slli_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, unsigned int __B
)
1944 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1945 (__v8hi
)_mm_slli_epi16(__A
, (int)__B
),
1949 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1950 _mm_maskz_slli_epi16 (__mmask8 __U
, __m128i __A
, unsigned int __B
)
1952 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1953 (__v8hi
)_mm_slli_epi16(__A
, (int)__B
),
1954 (__v8hi
)_mm_setzero_si128());
1957 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1958 _mm256_mask_slli_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
,
1961 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1962 (__v16hi
)_mm256_slli_epi16(__A
, (int)__B
),
1966 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1967 _mm256_maskz_slli_epi16(__mmask16 __U
, __m256i __A
, unsigned int __B
)
1969 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1970 (__v16hi
)_mm256_slli_epi16(__A
, (int)__B
),
1971 (__v16hi
)_mm256_setzero_si256());
1974 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1975 _mm256_srlv_epi16(__m256i __A
, __m256i __B
)
1977 return (__m256i
)__builtin_ia32_psrlv16hi((__v16hi
)__A
, (__v16hi
)__B
);
1980 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1981 _mm256_mask_srlv_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m256i __B
)
1983 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1984 (__v16hi
)_mm256_srlv_epi16(__A
, __B
),
1988 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1989 _mm256_maskz_srlv_epi16(__mmask16 __U
, __m256i __A
, __m256i __B
)
1991 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1992 (__v16hi
)_mm256_srlv_epi16(__A
, __B
),
1993 (__v16hi
)_mm256_setzero_si256());
1996 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1997 _mm_srlv_epi16(__m128i __A
, __m128i __B
)
1999 return (__m128i
)__builtin_ia32_psrlv8hi((__v8hi
)__A
, (__v8hi
)__B
);
2002 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2003 _mm_mask_srlv_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
2005 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
2006 (__v8hi
)_mm_srlv_epi16(__A
, __B
),
2010 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2011 _mm_maskz_srlv_epi16(__mmask8 __U
, __m128i __A
, __m128i __B
)
2013 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
2014 (__v8hi
)_mm_srlv_epi16(__A
, __B
),
2015 (__v8hi
)_mm_setzero_si128());
2018 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2019 _mm256_srav_epi16(__m256i __A
, __m256i __B
)
2021 return (__m256i
)__builtin_ia32_psrav16hi((__v16hi
)__A
, (__v16hi
)__B
);
2024 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2025 _mm256_mask_srav_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m256i __B
)
2027 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
2028 (__v16hi
)_mm256_srav_epi16(__A
, __B
),
2032 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2033 _mm256_maskz_srav_epi16(__mmask16 __U
, __m256i __A
, __m256i __B
)
2035 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
2036 (__v16hi
)_mm256_srav_epi16(__A
, __B
),
2037 (__v16hi
)_mm256_setzero_si256());
2040 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2041 _mm_srav_epi16(__m128i __A
, __m128i __B
)
2043 return (__m128i
)__builtin_ia32_psrav8hi((__v8hi
)__A
, (__v8hi
)__B
);
2046 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2047 _mm_mask_srav_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
2049 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
2050 (__v8hi
)_mm_srav_epi16(__A
, __B
),
2054 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2055 _mm_maskz_srav_epi16(__mmask8 __U
, __m128i __A
, __m128i __B
)
2057 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
2058 (__v8hi
)_mm_srav_epi16(__A
, __B
),
2059 (__v8hi
)_mm_setzero_si128());
2062 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2063 _mm_mask_sra_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
2065 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
2066 (__v8hi
)_mm_sra_epi16(__A
, __B
),
2070 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2071 _mm_maskz_sra_epi16(__mmask8 __U
, __m128i __A
, __m128i __B
)
2073 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
2074 (__v8hi
)_mm_sra_epi16(__A
, __B
),
2075 (__v8hi
)_mm_setzero_si128());
2078 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2079 _mm256_mask_sra_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m128i __B
)
2081 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
2082 (__v16hi
)_mm256_sra_epi16(__A
, __B
),
2086 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2087 _mm256_maskz_sra_epi16(__mmask16 __U
, __m256i __A
, __m128i __B
)
2089 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
2090 (__v16hi
)_mm256_sra_epi16(__A
, __B
),
2091 (__v16hi
)_mm256_setzero_si256());
2094 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2095 _mm_mask_srai_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, unsigned int __B
)
2097 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
2098 (__v8hi
)_mm_srai_epi16(__A
, (int)__B
),
2102 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2103 _mm_maskz_srai_epi16(__mmask8 __U
, __m128i __A
, unsigned int __B
)
2105 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
2106 (__v8hi
)_mm_srai_epi16(__A
, (int)__B
),
2107 (__v8hi
)_mm_setzero_si128());
2110 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2111 _mm256_mask_srai_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
,
2114 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
2115 (__v16hi
)_mm256_srai_epi16(__A
, (int)__B
),
2119 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2120 _mm256_maskz_srai_epi16(__mmask16 __U
, __m256i __A
, unsigned int __B
)
2122 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
2123 (__v16hi
)_mm256_srai_epi16(__A
, (int)__B
),
2124 (__v16hi
)_mm256_setzero_si256());
2127 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2128 _mm_mask_srl_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
2130 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
2131 (__v8hi
)_mm_srl_epi16(__A
, __B
),
2135 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2136 _mm_maskz_srl_epi16 (__mmask8 __U
, __m128i __A
, __m128i __B
)
2138 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
2139 (__v8hi
)_mm_srl_epi16(__A
, __B
),
2140 (__v8hi
)_mm_setzero_si128());
2143 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2144 _mm256_mask_srl_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m128i __B
)
2146 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
2147 (__v16hi
)_mm256_srl_epi16(__A
, __B
),
2151 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2152 _mm256_maskz_srl_epi16(__mmask16 __U
, __m256i __A
, __m128i __B
)
2154 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
2155 (__v16hi
)_mm256_srl_epi16(__A
, __B
),
2156 (__v16hi
)_mm256_setzero_si256());
2159 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2160 _mm_mask_srli_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, int __B
)
2162 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
2163 (__v8hi
)_mm_srli_epi16(__A
, __B
),
2167 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2168 _mm_maskz_srli_epi16 (__mmask8 __U
, __m128i __A
, int __B
)
2170 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
2171 (__v8hi
)_mm_srli_epi16(__A
, __B
),
2172 (__v8hi
)_mm_setzero_si128());
2175 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2176 _mm256_mask_srli_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
, int __B
)
2178 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
2179 (__v16hi
)_mm256_srli_epi16(__A
, __B
),
2183 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2184 _mm256_maskz_srli_epi16(__mmask16 __U
, __m256i __A
, int __B
)
2186 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
2187 (__v16hi
)_mm256_srli_epi16(__A
, __B
),
2188 (__v16hi
)_mm256_setzero_si256());
2191 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2192 _mm_mask_mov_epi16 (__m128i __W
, __mmask8 __U
, __m128i __A
)
2194 return (__m128i
) __builtin_ia32_selectw_128 ((__mmask8
) __U
,
2199 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2200 _mm_maskz_mov_epi16 (__mmask8 __U
, __m128i __A
)
2202 return (__m128i
) __builtin_ia32_selectw_128 ((__mmask8
) __U
,
2204 (__v8hi
) _mm_setzero_si128 ());
2207 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2208 _mm256_mask_mov_epi16 (__m256i __W
, __mmask16 __U
, __m256i __A
)
2210 return (__m256i
) __builtin_ia32_selectw_256 ((__mmask16
) __U
,
2215 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2216 _mm256_maskz_mov_epi16 (__mmask16 __U
, __m256i __A
)
2218 return (__m256i
) __builtin_ia32_selectw_256 ((__mmask16
) __U
,
2220 (__v16hi
) _mm256_setzero_si256 ());
2223 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2224 _mm_mask_mov_epi8 (__m128i __W
, __mmask16 __U
, __m128i __A
)
2226 return (__m128i
) __builtin_ia32_selectb_128 ((__mmask16
) __U
,
2231 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2232 _mm_maskz_mov_epi8 (__mmask16 __U
, __m128i __A
)
2234 return (__m128i
) __builtin_ia32_selectb_128 ((__mmask16
) __U
,
2236 (__v16qi
) _mm_setzero_si128 ());
2239 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2240 _mm256_mask_mov_epi8 (__m256i __W
, __mmask32 __U
, __m256i __A
)
2242 return (__m256i
) __builtin_ia32_selectb_256 ((__mmask32
) __U
,
2247 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2248 _mm256_maskz_mov_epi8 (__mmask32 __U
, __m256i __A
)
2250 return (__m256i
) __builtin_ia32_selectb_256 ((__mmask32
) __U
,
2252 (__v32qi
) _mm256_setzero_si256 ());
2256 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2257 _mm_mask_set1_epi8 (__m128i __O
, __mmask16 __M
, char __A
)
2259 return (__m128i
) __builtin_ia32_selectb_128(__M
,
2260 (__v16qi
) _mm_set1_epi8(__A
),
2264 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2265 _mm_maskz_set1_epi8 (__mmask16 __M
, char __A
)
2267 return (__m128i
) __builtin_ia32_selectb_128(__M
,
2268 (__v16qi
) _mm_set1_epi8(__A
),
2269 (__v16qi
) _mm_setzero_si128());
2272 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2273 _mm256_mask_set1_epi8 (__m256i __O
, __mmask32 __M
, char __A
)
2275 return (__m256i
) __builtin_ia32_selectb_256(__M
,
2276 (__v32qi
) _mm256_set1_epi8(__A
),
2280 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2281 _mm256_maskz_set1_epi8 (__mmask32 __M
, char __A
)
2283 return (__m256i
) __builtin_ia32_selectb_256(__M
,
2284 (__v32qi
) _mm256_set1_epi8(__A
),
2285 (__v32qi
) _mm256_setzero_si256());
2288 static __inline __m128i __DEFAULT_FN_ATTRS128
2289 _mm_loadu_epi16 (void const *__P
)
2291 struct __loadu_epi16
{
2293 } __attribute__((__packed__
, __may_alias__
));
2294 return ((const struct __loadu_epi16
*)__P
)->__v
;
2297 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2298 _mm_mask_loadu_epi16 (__m128i __W
, __mmask8 __U
, void const *__P
)
2300 return (__m128i
) __builtin_ia32_loaddquhi128_mask ((const __v8hi
*) __P
,
2305 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2306 _mm_maskz_loadu_epi16 (__mmask8 __U
, void const *__P
)
2308 return (__m128i
) __builtin_ia32_loaddquhi128_mask ((const __v8hi
*) __P
,
2310 _mm_setzero_si128 (),
2314 static __inline __m256i __DEFAULT_FN_ATTRS256
2315 _mm256_loadu_epi16 (void const *__P
)
2317 struct __loadu_epi16
{
2319 } __attribute__((__packed__
, __may_alias__
));
2320 return ((const struct __loadu_epi16
*)__P
)->__v
;
2323 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2324 _mm256_mask_loadu_epi16 (__m256i __W
, __mmask16 __U
, void const *__P
)
2326 return (__m256i
) __builtin_ia32_loaddquhi256_mask ((const __v16hi
*) __P
,
2331 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2332 _mm256_maskz_loadu_epi16 (__mmask16 __U
, void const *__P
)
2334 return (__m256i
) __builtin_ia32_loaddquhi256_mask ((const __v16hi
*) __P
,
2336 _mm256_setzero_si256 (),
2340 static __inline __m128i __DEFAULT_FN_ATTRS128
2341 _mm_loadu_epi8 (void const *__P
)
2343 struct __loadu_epi8
{
2345 } __attribute__((__packed__
, __may_alias__
));
2346 return ((const struct __loadu_epi8
*)__P
)->__v
;
2349 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2350 _mm_mask_loadu_epi8 (__m128i __W
, __mmask16 __U
, void const *__P
)
2352 return (__m128i
) __builtin_ia32_loaddquqi128_mask ((const __v16qi
*) __P
,
2357 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2358 _mm_maskz_loadu_epi8 (__mmask16 __U
, void const *__P
)
2360 return (__m128i
) __builtin_ia32_loaddquqi128_mask ((const __v16qi
*) __P
,
2362 _mm_setzero_si128 (),
2366 static __inline __m256i __DEFAULT_FN_ATTRS256
2367 _mm256_loadu_epi8 (void const *__P
)
2369 struct __loadu_epi8
{
2371 } __attribute__((__packed__
, __may_alias__
));
2372 return ((const struct __loadu_epi8
*)__P
)->__v
;
2375 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2376 _mm256_mask_loadu_epi8 (__m256i __W
, __mmask32 __U
, void const *__P
)
2378 return (__m256i
) __builtin_ia32_loaddquqi256_mask ((const __v32qi
*) __P
,
2383 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2384 _mm256_maskz_loadu_epi8 (__mmask32 __U
, void const *__P
)
2386 return (__m256i
) __builtin_ia32_loaddquqi256_mask ((const __v32qi
*) __P
,
2388 _mm256_setzero_si256 (),
2392 static __inline
void __DEFAULT_FN_ATTRS128
2393 _mm_storeu_epi16 (void *__P
, __m128i __A
)
2395 struct __storeu_epi16
{
2397 } __attribute__((__packed__
, __may_alias__
));
2398 ((struct __storeu_epi16
*)__P
)->__v
= __A
;
2401 static __inline__
void __DEFAULT_FN_ATTRS128
2402 _mm_mask_storeu_epi16 (void *__P
, __mmask8 __U
, __m128i __A
)
2404 __builtin_ia32_storedquhi128_mask ((__v8hi
*) __P
,
2409 static __inline
void __DEFAULT_FN_ATTRS256
2410 _mm256_storeu_epi16 (void *__P
, __m256i __A
)
2412 struct __storeu_epi16
{
2414 } __attribute__((__packed__
, __may_alias__
));
2415 ((struct __storeu_epi16
*)__P
)->__v
= __A
;
2418 static __inline__
void __DEFAULT_FN_ATTRS256
2419 _mm256_mask_storeu_epi16 (void *__P
, __mmask16 __U
, __m256i __A
)
2421 __builtin_ia32_storedquhi256_mask ((__v16hi
*) __P
,
2426 static __inline
void __DEFAULT_FN_ATTRS128
2427 _mm_storeu_epi8 (void *__P
, __m128i __A
)
2429 struct __storeu_epi8
{
2431 } __attribute__((__packed__
, __may_alias__
));
2432 ((struct __storeu_epi8
*)__P
)->__v
= __A
;
2435 static __inline__
void __DEFAULT_FN_ATTRS128
2436 _mm_mask_storeu_epi8 (void *__P
, __mmask16 __U
, __m128i __A
)
2438 __builtin_ia32_storedquqi128_mask ((__v16qi
*) __P
,
2443 static __inline
void __DEFAULT_FN_ATTRS256
2444 _mm256_storeu_epi8 (void *__P
, __m256i __A
)
2446 struct __storeu_epi8
{
2448 } __attribute__((__packed__
, __may_alias__
));
2449 ((struct __storeu_epi8
*)__P
)->__v
= __A
;
2452 static __inline__
void __DEFAULT_FN_ATTRS256
2453 _mm256_mask_storeu_epi8 (void *__P
, __mmask32 __U
, __m256i __A
)
2455 __builtin_ia32_storedquqi256_mask ((__v32qi
*) __P
,
2460 static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2461 _mm_test_epi8_mask (__m128i __A
, __m128i __B
)
2463 return _mm_cmpneq_epi8_mask (_mm_and_si128(__A
, __B
), _mm_setzero_si128());
2466 static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2467 _mm_mask_test_epi8_mask (__mmask16 __U
, __m128i __A
, __m128i __B
)
2469 return _mm_mask_cmpneq_epi8_mask (__U
, _mm_and_si128 (__A
, __B
),
2470 _mm_setzero_si128());
2473 static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2474 _mm256_test_epi8_mask (__m256i __A
, __m256i __B
)
2476 return _mm256_cmpneq_epi8_mask (_mm256_and_si256(__A
, __B
),
2477 _mm256_setzero_si256());
2480 static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2481 _mm256_mask_test_epi8_mask (__mmask32 __U
, __m256i __A
, __m256i __B
)
2483 return _mm256_mask_cmpneq_epi8_mask (__U
, _mm256_and_si256(__A
, __B
),
2484 _mm256_setzero_si256());
2487 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2488 _mm_test_epi16_mask (__m128i __A
, __m128i __B
)
2490 return _mm_cmpneq_epi16_mask (_mm_and_si128 (__A
, __B
), _mm_setzero_si128());
2493 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2494 _mm_mask_test_epi16_mask (__mmask8 __U
, __m128i __A
, __m128i __B
)
2496 return _mm_mask_cmpneq_epi16_mask (__U
, _mm_and_si128 (__A
, __B
),
2497 _mm_setzero_si128());
2500 static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2501 _mm256_test_epi16_mask (__m256i __A
, __m256i __B
)
2503 return _mm256_cmpneq_epi16_mask (_mm256_and_si256 (__A
, __B
),
2504 _mm256_setzero_si256 ());
2507 static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2508 _mm256_mask_test_epi16_mask (__mmask16 __U
, __m256i __A
, __m256i __B
)
2510 return _mm256_mask_cmpneq_epi16_mask (__U
, _mm256_and_si256(__A
, __B
),
2511 _mm256_setzero_si256());
2514 static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2515 _mm_testn_epi8_mask (__m128i __A
, __m128i __B
)
2517 return _mm_cmpeq_epi8_mask (_mm_and_si128 (__A
, __B
), _mm_setzero_si128());
2520 static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2521 _mm_mask_testn_epi8_mask (__mmask16 __U
, __m128i __A
, __m128i __B
)
2523 return _mm_mask_cmpeq_epi8_mask (__U
, _mm_and_si128 (__A
, __B
),
2524 _mm_setzero_si128());
2527 static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2528 _mm256_testn_epi8_mask (__m256i __A
, __m256i __B
)
2530 return _mm256_cmpeq_epi8_mask (_mm256_and_si256 (__A
, __B
),
2531 _mm256_setzero_si256());
2534 static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2535 _mm256_mask_testn_epi8_mask (__mmask32 __U
, __m256i __A
, __m256i __B
)
2537 return _mm256_mask_cmpeq_epi8_mask (__U
, _mm256_and_si256 (__A
, __B
),
2538 _mm256_setzero_si256());
2541 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2542 _mm_testn_epi16_mask (__m128i __A
, __m128i __B
)
2544 return _mm_cmpeq_epi16_mask (_mm_and_si128 (__A
, __B
), _mm_setzero_si128());
2547 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2548 _mm_mask_testn_epi16_mask (__mmask8 __U
, __m128i __A
, __m128i __B
)
2550 return _mm_mask_cmpeq_epi16_mask (__U
, _mm_and_si128(__A
, __B
), _mm_setzero_si128());
2553 static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2554 _mm256_testn_epi16_mask (__m256i __A
, __m256i __B
)
2556 return _mm256_cmpeq_epi16_mask (_mm256_and_si256(__A
, __B
),
2557 _mm256_setzero_si256());
2560 static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2561 _mm256_mask_testn_epi16_mask (__mmask16 __U
, __m256i __A
, __m256i __B
)
2563 return _mm256_mask_cmpeq_epi16_mask (__U
, _mm256_and_si256 (__A
, __B
),
2564 _mm256_setzero_si256());
2567 static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2568 _mm_movepi8_mask (__m128i __A
)
2570 return (__mmask16
) __builtin_ia32_cvtb2mask128 ((__v16qi
) __A
);
2573 static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2574 _mm256_movepi8_mask (__m256i __A
)
2576 return (__mmask32
) __builtin_ia32_cvtb2mask256 ((__v32qi
) __A
);
2579 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2580 _mm_movepi16_mask (__m128i __A
)
2582 return (__mmask8
) __builtin_ia32_cvtw2mask128 ((__v8hi
) __A
);
2585 static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2586 _mm256_movepi16_mask (__m256i __A
)
2588 return (__mmask16
) __builtin_ia32_cvtw2mask256 ((__v16hi
) __A
);
2591 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2592 _mm_movm_epi8 (__mmask16 __A
)
2594 return (__m128i
) __builtin_ia32_cvtmask2b128 (__A
);
2597 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2598 _mm256_movm_epi8 (__mmask32 __A
)
2600 return (__m256i
) __builtin_ia32_cvtmask2b256 (__A
);
2603 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2604 _mm_movm_epi16 (__mmask8 __A
)
2606 return (__m128i
) __builtin_ia32_cvtmask2w128 (__A
);
2609 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2610 _mm256_movm_epi16 (__mmask16 __A
)
2612 return (__m256i
) __builtin_ia32_cvtmask2w256 (__A
);
2615 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2616 _mm_mask_broadcastb_epi8 (__m128i __O
, __mmask16 __M
, __m128i __A
)
2618 return (__m128i
)__builtin_ia32_selectb_128(__M
,
2619 (__v16qi
) _mm_broadcastb_epi8(__A
),
2623 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2624 _mm_maskz_broadcastb_epi8 (__mmask16 __M
, __m128i __A
)
2626 return (__m128i
)__builtin_ia32_selectb_128(__M
,
2627 (__v16qi
) _mm_broadcastb_epi8(__A
),
2628 (__v16qi
) _mm_setzero_si128());
2631 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2632 _mm256_mask_broadcastb_epi8 (__m256i __O
, __mmask32 __M
, __m128i __A
)
2634 return (__m256i
)__builtin_ia32_selectb_256(__M
,
2635 (__v32qi
) _mm256_broadcastb_epi8(__A
),
2639 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2640 _mm256_maskz_broadcastb_epi8 (__mmask32 __M
, __m128i __A
)
2642 return (__m256i
)__builtin_ia32_selectb_256(__M
,
2643 (__v32qi
) _mm256_broadcastb_epi8(__A
),
2644 (__v32qi
) _mm256_setzero_si256());
2647 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2648 _mm_mask_broadcastw_epi16 (__m128i __O
, __mmask8 __M
, __m128i __A
)
2650 return (__m128i
)__builtin_ia32_selectw_128(__M
,
2651 (__v8hi
) _mm_broadcastw_epi16(__A
),
2655 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2656 _mm_maskz_broadcastw_epi16 (__mmask8 __M
, __m128i __A
)
2658 return (__m128i
)__builtin_ia32_selectw_128(__M
,
2659 (__v8hi
) _mm_broadcastw_epi16(__A
),
2660 (__v8hi
) _mm_setzero_si128());
2663 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2664 _mm256_mask_broadcastw_epi16 (__m256i __O
, __mmask16 __M
, __m128i __A
)
2666 return (__m256i
)__builtin_ia32_selectw_256(__M
,
2667 (__v16hi
) _mm256_broadcastw_epi16(__A
),
2671 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2672 _mm256_maskz_broadcastw_epi16 (__mmask16 __M
, __m128i __A
)
2674 return (__m256i
)__builtin_ia32_selectw_256(__M
,
2675 (__v16hi
) _mm256_broadcastw_epi16(__A
),
2676 (__v16hi
) _mm256_setzero_si256());
2679 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2680 _mm256_mask_set1_epi16 (__m256i __O
, __mmask16 __M
, short __A
)
2682 return (__m256i
) __builtin_ia32_selectw_256 (__M
,
2683 (__v16hi
) _mm256_set1_epi16(__A
),
2687 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2688 _mm256_maskz_set1_epi16 (__mmask16 __M
, short __A
)
2690 return (__m256i
) __builtin_ia32_selectw_256(__M
,
2691 (__v16hi
)_mm256_set1_epi16(__A
),
2692 (__v16hi
) _mm256_setzero_si256());
2695 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2696 _mm_mask_set1_epi16 (__m128i __O
, __mmask8 __M
, short __A
)
2698 return (__m128i
) __builtin_ia32_selectw_128(__M
,
2699 (__v8hi
) _mm_set1_epi16(__A
),
2703 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2704 _mm_maskz_set1_epi16 (__mmask8 __M
, short __A
)
2706 return (__m128i
) __builtin_ia32_selectw_128(__M
,
2707 (__v8hi
) _mm_set1_epi16(__A
),
2708 (__v8hi
) _mm_setzero_si128());
2711 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2712 _mm_permutexvar_epi16 (__m128i __A
, __m128i __B
)
2714 return (__m128i
)__builtin_ia32_permvarhi128((__v8hi
) __B
, (__v8hi
) __A
);
2717 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2718 _mm_maskz_permutexvar_epi16 (__mmask8 __M
, __m128i __A
, __m128i __B
)
2720 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__M
,
2721 (__v8hi
)_mm_permutexvar_epi16(__A
, __B
),
2722 (__v8hi
) _mm_setzero_si128());
2725 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2726 _mm_mask_permutexvar_epi16 (__m128i __W
, __mmask8 __M
, __m128i __A
,
2729 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__M
,
2730 (__v8hi
)_mm_permutexvar_epi16(__A
, __B
),
2734 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2735 _mm256_permutexvar_epi16 (__m256i __A
, __m256i __B
)
2737 return (__m256i
)__builtin_ia32_permvarhi256((__v16hi
) __B
, (__v16hi
) __A
);
2740 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2741 _mm256_maskz_permutexvar_epi16 (__mmask16 __M
, __m256i __A
,
2744 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__M
,
2745 (__v16hi
)_mm256_permutexvar_epi16(__A
, __B
),
2746 (__v16hi
)_mm256_setzero_si256());
2749 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2750 _mm256_mask_permutexvar_epi16 (__m256i __W
, __mmask16 __M
, __m256i __A
,
2753 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__M
,
2754 (__v16hi
)_mm256_permutexvar_epi16(__A
, __B
),
2758 #define _mm_mask_alignr_epi8(W, U, A, B, N) \
2759 ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
2760 (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \
2761 (__v16qi)(__m128i)(W)))
2763 #define _mm_maskz_alignr_epi8(U, A, B, N) \
2764 ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
2765 (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \
2766 (__v16qi)_mm_setzero_si128()))
2768 #define _mm256_mask_alignr_epi8(W, U, A, B, N) \
2769 ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
2770 (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \
2771 (__v32qi)(__m256i)(W)))
2773 #define _mm256_maskz_alignr_epi8(U, A, B, N) \
2774 ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
2775 (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \
2776 (__v32qi)_mm256_setzero_si256()))
2778 #define _mm_dbsad_epu8(A, B, imm) \
2779 ((__m128i)__builtin_ia32_dbpsadbw128((__v16qi)(__m128i)(A), \
2780 (__v16qi)(__m128i)(B), (int)(imm)))
2782 #define _mm_mask_dbsad_epu8(W, U, A, B, imm) \
2783 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
2784 (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \
2785 (__v8hi)(__m128i)(W)))
2787 #define _mm_maskz_dbsad_epu8(U, A, B, imm) \
2788 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
2789 (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \
2790 (__v8hi)_mm_setzero_si128()))
2792 #define _mm256_dbsad_epu8(A, B, imm) \
2793 ((__m256i)__builtin_ia32_dbpsadbw256((__v32qi)(__m256i)(A), \
2794 (__v32qi)(__m256i)(B), (int)(imm)))
2796 #define _mm256_mask_dbsad_epu8(W, U, A, B, imm) \
2797 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
2798 (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \
2799 (__v16hi)(__m256i)(W)))
2801 #define _mm256_maskz_dbsad_epu8(U, A, B, imm) \
2802 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
2803 (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \
2804 (__v16hi)_mm256_setzero_si256()))
2806 static __inline__
short __DEFAULT_FN_ATTRS128
2807 _mm_reduce_add_epi16(__m128i __W
) {
2808 return __builtin_reduce_add((__v8hi
)__W
);
2811 static __inline__
short __DEFAULT_FN_ATTRS128
2812 _mm_reduce_mul_epi16(__m128i __W
) {
2813 return __builtin_reduce_mul((__v8hi
)__W
);
2816 static __inline__
short __DEFAULT_FN_ATTRS128
2817 _mm_reduce_and_epi16(__m128i __W
) {
2818 return __builtin_reduce_and((__v8hi
)__W
);
2821 static __inline__
short __DEFAULT_FN_ATTRS128
2822 _mm_reduce_or_epi16(__m128i __W
) {
2823 return __builtin_reduce_or((__v8hi
)__W
);
2826 static __inline__
short __DEFAULT_FN_ATTRS128
2827 _mm_mask_reduce_add_epi16( __mmask8 __M
, __m128i __W
) {
2828 __W
= _mm_maskz_mov_epi16(__M
, __W
);
2829 return __builtin_reduce_add((__v8hi
)__W
);
2832 static __inline__
short __DEFAULT_FN_ATTRS128
2833 _mm_mask_reduce_mul_epi16( __mmask8 __M
, __m128i __W
) {
2834 __W
= _mm_mask_mov_epi16(_mm_set1_epi16(1), __M
, __W
);
2835 return __builtin_reduce_mul((__v8hi
)__W
);
2838 static __inline__
short __DEFAULT_FN_ATTRS128
2839 _mm_mask_reduce_and_epi16( __mmask8 __M
, __m128i __W
) {
2840 __W
= _mm_mask_mov_epi16(_mm_set1_epi16(-1), __M
, __W
);
2841 return __builtin_reduce_and((__v8hi
)__W
);
2844 static __inline__
short __DEFAULT_FN_ATTRS128
2845 _mm_mask_reduce_or_epi16(__mmask8 __M
, __m128i __W
) {
2846 __W
= _mm_maskz_mov_epi16(__M
, __W
);
2847 return __builtin_reduce_or((__v8hi
)__W
);
2850 static __inline__
short __DEFAULT_FN_ATTRS128
2851 _mm_reduce_max_epi16(__m128i __V
) {
2852 return __builtin_reduce_max((__v8hi
)__V
);
2855 static __inline__
unsigned short __DEFAULT_FN_ATTRS128
2856 _mm_reduce_max_epu16(__m128i __V
) {
2857 return __builtin_reduce_max((__v8hu
)__V
);
2860 static __inline__
short __DEFAULT_FN_ATTRS128
2861 _mm_reduce_min_epi16(__m128i __V
) {
2862 return __builtin_reduce_min((__v8hi
)__V
);
2865 static __inline__
unsigned short __DEFAULT_FN_ATTRS128
2866 _mm_reduce_min_epu16(__m128i __V
) {
2867 return __builtin_reduce_min((__v8hu
)__V
);
2870 static __inline__
short __DEFAULT_FN_ATTRS128
2871 _mm_mask_reduce_max_epi16(__mmask16 __M
, __m128i __V
) {
2872 __V
= _mm_mask_mov_epi16(_mm_set1_epi16(-32767-1), __M
, __V
);
2873 return __builtin_reduce_max((__v8hi
)__V
);
2876 static __inline__
unsigned short __DEFAULT_FN_ATTRS128
2877 _mm_mask_reduce_max_epu16(__mmask16 __M
, __m128i __V
) {
2878 __V
= _mm_maskz_mov_epi16(__M
, __V
);
2879 return __builtin_reduce_max((__v8hu
)__V
);
2882 static __inline__
short __DEFAULT_FN_ATTRS128
2883 _mm_mask_reduce_min_epi16(__mmask16 __M
, __m128i __V
) {
2884 __V
= _mm_mask_mov_epi16(_mm_set1_epi16(32767), __M
, __V
);
2885 return __builtin_reduce_min((__v8hi
)__V
);
2888 static __inline__
unsigned short __DEFAULT_FN_ATTRS128
2889 _mm_mask_reduce_min_epu16(__mmask16 __M
, __m128i __V
) {
2890 __V
= _mm_mask_mov_epi16(_mm_set1_epi16(-1), __M
, __V
);
2891 return __builtin_reduce_min((__v8hu
)__V
);
2894 static __inline__
short __DEFAULT_FN_ATTRS256
2895 _mm256_reduce_add_epi16(__m256i __W
) {
2896 return __builtin_reduce_add((__v16hi
)__W
);
2899 static __inline__
short __DEFAULT_FN_ATTRS256
2900 _mm256_reduce_mul_epi16(__m256i __W
) {
2901 return __builtin_reduce_mul((__v16hi
)__W
);
2904 static __inline__
short __DEFAULT_FN_ATTRS256
2905 _mm256_reduce_and_epi16(__m256i __W
) {
2906 return __builtin_reduce_and((__v16hi
)__W
);
2909 static __inline__
short __DEFAULT_FN_ATTRS256
2910 _mm256_reduce_or_epi16(__m256i __W
) {
2911 return __builtin_reduce_or((__v16hi
)__W
);
2914 static __inline__
short __DEFAULT_FN_ATTRS256
2915 _mm256_mask_reduce_add_epi16( __mmask16 __M
, __m256i __W
) {
2916 __W
= _mm256_maskz_mov_epi16(__M
, __W
);
2917 return __builtin_reduce_add((__v16hi
)__W
);
2920 static __inline__
short __DEFAULT_FN_ATTRS256
2921 _mm256_mask_reduce_mul_epi16( __mmask16 __M
, __m256i __W
) {
2922 __W
= _mm256_mask_mov_epi16(_mm256_set1_epi16(1), __M
, __W
);
2923 return __builtin_reduce_mul((__v16hi
)__W
);
2926 static __inline__
short __DEFAULT_FN_ATTRS256
2927 _mm256_mask_reduce_and_epi16( __mmask16 __M
, __m256i __W
) {
2928 __W
= _mm256_mask_mov_epi16(_mm256_set1_epi16(-1), __M
, __W
);
2929 return __builtin_reduce_and((__v16hi
)__W
);
2932 static __inline__
short __DEFAULT_FN_ATTRS256
2933 _mm256_mask_reduce_or_epi16(__mmask16 __M
, __m256i __W
) {
2934 __W
= _mm256_maskz_mov_epi16(__M
, __W
);
2935 return __builtin_reduce_or((__v16hi
)__W
);
2938 static __inline__
short __DEFAULT_FN_ATTRS256
2939 _mm256_reduce_max_epi16(__m256i __V
) {
2940 return __builtin_reduce_max((__v16hi
)__V
);
2943 static __inline__
unsigned short __DEFAULT_FN_ATTRS256
2944 _mm256_reduce_max_epu16(__m256i __V
) {
2945 return __builtin_reduce_max((__v16hu
)__V
);
2948 static __inline__
short __DEFAULT_FN_ATTRS256
2949 _mm256_reduce_min_epi16(__m256i __V
) {
2950 return __builtin_reduce_min((__v16hi
)__V
);
2953 static __inline__
unsigned short __DEFAULT_FN_ATTRS256
2954 _mm256_reduce_min_epu16(__m256i __V
) {
2955 return __builtin_reduce_min((__v16hu
)__V
);
2958 static __inline__
short __DEFAULT_FN_ATTRS256
2959 _mm256_mask_reduce_max_epi16(__mmask16 __M
, __m256i __V
) {
2960 __V
= _mm256_mask_mov_epi16(_mm256_set1_epi16(-32767-1), __M
, __V
);
2961 return __builtin_reduce_max((__v16hi
)__V
);
2964 static __inline__
unsigned short __DEFAULT_FN_ATTRS256
2965 _mm256_mask_reduce_max_epu16(__mmask16 __M
, __m256i __V
) {
2966 __V
= _mm256_maskz_mov_epi16(__M
, __V
);
2967 return __builtin_reduce_max((__v16hu
)__V
);
2970 static __inline__
short __DEFAULT_FN_ATTRS256
2971 _mm256_mask_reduce_min_epi16(__mmask16 __M
, __m256i __V
) {
2972 __V
= _mm256_mask_mov_epi16(_mm256_set1_epi16(32767), __M
, __V
);
2973 return __builtin_reduce_min((__v16hi
)__V
);
2976 static __inline__
unsigned short __DEFAULT_FN_ATTRS256
2977 _mm256_mask_reduce_min_epu16(__mmask16 __M
, __m256i __V
) {
2978 __V
= _mm256_mask_mov_epi16(_mm256_set1_epi16(-1), __M
, __V
);
2979 return __builtin_reduce_min((__v16hu
)__V
);
2982 static __inline__
signed char __DEFAULT_FN_ATTRS128
2983 _mm_reduce_add_epi8(__m128i __W
) {
2984 return __builtin_reduce_add((__v16qs
)__W
);
2987 static __inline__
signed char __DEFAULT_FN_ATTRS128
2988 _mm_reduce_mul_epi8(__m128i __W
) {
2989 return __builtin_reduce_mul((__v16qs
)__W
);
2992 static __inline__
signed char __DEFAULT_FN_ATTRS128
2993 _mm_reduce_and_epi8(__m128i __W
) {
2994 return __builtin_reduce_and((__v16qs
)__W
);
2997 static __inline__
signed char __DEFAULT_FN_ATTRS128
2998 _mm_reduce_or_epi8(__m128i __W
) {
2999 return __builtin_reduce_or((__v16qs
)__W
);
3002 static __inline__
signed char __DEFAULT_FN_ATTRS128
3003 _mm_mask_reduce_add_epi8(__mmask16 __M
, __m128i __W
) {
3004 __W
= _mm_maskz_mov_epi8(__M
, __W
);
3005 return __builtin_reduce_add((__v16qs
)__W
);
3008 static __inline__
signed char __DEFAULT_FN_ATTRS128
3009 _mm_mask_reduce_mul_epi8(__mmask16 __M
, __m128i __W
) {
3010 __W
= _mm_mask_mov_epi8(_mm_set1_epi8(1), __M
, __W
);
3011 return __builtin_reduce_mul((__v16qs
)__W
);
3014 static __inline__
signed char __DEFAULT_FN_ATTRS128
3015 _mm_mask_reduce_and_epi8(__mmask16 __M
, __m128i __W
) {
3016 __W
= _mm_mask_mov_epi8(_mm_set1_epi8(-1), __M
, __W
);
3017 return __builtin_reduce_and((__v16qs
)__W
);
3020 static __inline__
signed char __DEFAULT_FN_ATTRS128
3021 _mm_mask_reduce_or_epi8(__mmask16 __M
, __m128i __W
) {
3022 __W
= _mm_maskz_mov_epi8(__M
, __W
);
3023 return __builtin_reduce_or((__v16qs
)__W
);
3026 static __inline__
signed char __DEFAULT_FN_ATTRS128
3027 _mm_reduce_max_epi8(__m128i __V
) {
3028 return __builtin_reduce_max((__v16qs
)__V
);
3031 static __inline__
unsigned char __DEFAULT_FN_ATTRS128
3032 _mm_reduce_max_epu8(__m128i __V
) {
3033 return __builtin_reduce_max((__v16qu
)__V
);
3036 static __inline__
signed char __DEFAULT_FN_ATTRS128
3037 _mm_reduce_min_epi8(__m128i __V
) {
3038 return __builtin_reduce_min((__v16qs
)__V
);
3041 static __inline__
unsigned char __DEFAULT_FN_ATTRS128
3042 _mm_reduce_min_epu8(__m128i __V
) {
3043 return __builtin_reduce_min((__v16qu
)__V
);
3046 static __inline__
signed char __DEFAULT_FN_ATTRS128
3047 _mm_mask_reduce_max_epi8(__mmask16 __M
, __m128i __V
) {
3048 __V
= _mm_mask_mov_epi8(_mm_set1_epi8(-127-1), __M
, __V
);
3049 return __builtin_reduce_max((__v16qs
)__V
);
3052 static __inline__
unsigned char __DEFAULT_FN_ATTRS128
3053 _mm_mask_reduce_max_epu8(__mmask16 __M
, __m128i __V
) {
3054 __V
= _mm_maskz_mov_epi8(__M
, __V
);
3055 return __builtin_reduce_max((__v16qu
)__V
);
3058 static __inline__
signed char __DEFAULT_FN_ATTRS128
3059 _mm_mask_reduce_min_epi8(__mmask16 __M
, __m128i __V
) {
3060 __V
= _mm_mask_mov_epi8(_mm_set1_epi8(127), __M
, __V
);
3061 return __builtin_reduce_min((__v16qs
)__V
);
3064 static __inline__
unsigned char __DEFAULT_FN_ATTRS128
3065 _mm_mask_reduce_min_epu8(__mmask16 __M
, __m128i __V
) {
3066 __V
= _mm_mask_mov_epi8(_mm_set1_epi8(-1), __M
, __V
);
3067 return __builtin_reduce_min((__v16qu
)__V
);
3070 static __inline__
signed char __DEFAULT_FN_ATTRS256
3071 _mm256_reduce_add_epi8(__m256i __W
) {
3072 return __builtin_reduce_add((__v32qs
)__W
);
3075 static __inline__
signed char __DEFAULT_FN_ATTRS256
3076 _mm256_reduce_mul_epi8(__m256i __W
) {
3077 return __builtin_reduce_mul((__v32qs
)__W
);
3080 static __inline__
signed char __DEFAULT_FN_ATTRS256
3081 _mm256_reduce_and_epi8(__m256i __W
) {
3082 return __builtin_reduce_and((__v32qs
)__W
);
3085 static __inline__
signed char __DEFAULT_FN_ATTRS256
3086 _mm256_reduce_or_epi8(__m256i __W
) {
3087 return __builtin_reduce_or((__v32qs
)__W
);
3090 static __inline__
signed char __DEFAULT_FN_ATTRS256
3091 _mm256_mask_reduce_add_epi8(__mmask32 __M
, __m256i __W
) {
3092 __W
= _mm256_maskz_mov_epi8(__M
, __W
);
3093 return __builtin_reduce_add((__v32qs
)__W
);
3096 static __inline__
signed char __DEFAULT_FN_ATTRS256
3097 _mm256_mask_reduce_mul_epi8(__mmask32 __M
, __m256i __W
) {
3098 __W
= _mm256_mask_mov_epi8(_mm256_set1_epi8(1), __M
, __W
);
3099 return __builtin_reduce_mul((__v32qs
)__W
);
3102 static __inline__
signed char __DEFAULT_FN_ATTRS256
3103 _mm256_mask_reduce_and_epi8(__mmask32 __M
, __m256i __W
) {
3104 __W
= _mm256_mask_mov_epi8(_mm256_set1_epi8(-1), __M
, __W
);
3105 return __builtin_reduce_and((__v32qs
)__W
);
3108 static __inline__
signed char __DEFAULT_FN_ATTRS256
3109 _mm256_mask_reduce_or_epi8(__mmask32 __M
, __m256i __W
) {
3110 __W
= _mm256_maskz_mov_epi8(__M
, __W
);
3111 return __builtin_reduce_or((__v32qs
)__W
);
3114 static __inline__
signed char __DEFAULT_FN_ATTRS256
3115 _mm256_reduce_max_epi8(__m256i __V
) {
3116 return __builtin_reduce_max((__v32qs
)__V
);
3119 static __inline__
unsigned char __DEFAULT_FN_ATTRS256
3120 _mm256_reduce_max_epu8(__m256i __V
) {
3121 return __builtin_reduce_max((__v32qu
)__V
);
3124 static __inline__
signed char __DEFAULT_FN_ATTRS256
3125 _mm256_reduce_min_epi8(__m256i __V
) {
3126 return __builtin_reduce_min((__v32qs
)__V
);
3129 static __inline__
unsigned char __DEFAULT_FN_ATTRS256
3130 _mm256_reduce_min_epu8(__m256i __V
) {
3131 return __builtin_reduce_min((__v32qu
)__V
);
3134 static __inline__
signed char __DEFAULT_FN_ATTRS256
3135 _mm256_mask_reduce_max_epi8(__mmask32 __M
, __m256i __V
) {
3136 __V
= _mm256_mask_mov_epi8(_mm256_set1_epi8(-127-1), __M
, __V
);
3137 return __builtin_reduce_max((__v32qs
)__V
);
3140 static __inline__
unsigned char __DEFAULT_FN_ATTRS256
3141 _mm256_mask_reduce_max_epu8(__mmask32 __M
, __m256i __V
) {
3142 __V
= _mm256_maskz_mov_epi8(__M
, __V
);
3143 return __builtin_reduce_max((__v32qu
)__V
);
3146 static __inline__
signed char __DEFAULT_FN_ATTRS256
3147 _mm256_mask_reduce_min_epi8(__mmask32 __M
, __m256i __V
) {
3148 __V
= _mm256_mask_mov_epi8(_mm256_set1_epi8(127), __M
, __V
);
3149 return __builtin_reduce_min((__v32qs
)__V
);
3152 static __inline__
unsigned char __DEFAULT_FN_ATTRS256
3153 _mm256_mask_reduce_min_epu8(__mmask32 __M
, __m256i __V
) {
3154 __V
= _mm256_mask_mov_epi8(_mm256_set1_epi8(-1), __M
, __V
);
3155 return __builtin_reduce_min((__v32qu
)__V
);
3158 #undef __DEFAULT_FN_ATTRS128
3159 #undef __DEFAULT_FN_ATTRS256
3161 #endif /* __AVX512VLBWINTRIN_H */