1 /*===---- avx512vlbwintrin.h - AVX512VL and AVX512BW intrinsics ------------===
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *===-----------------------------------------------------------------------===
11 #error "Never use <avx512vlbwintrin.h> directly; include <immintrin.h> instead."
14 #ifndef __AVX512VLBWINTRIN_H
15 #define __AVX512VLBWINTRIN_H
17 /* Define the default attributes for the functions in this file. */
18 #define __DEFAULT_FN_ATTRS128 \
19 __attribute__((__always_inline__, __nodebug__, \
20 __target__("avx512vl,avx512bw,no-evex512"), \
21 __min_vector_width__(128)))
22 #define __DEFAULT_FN_ATTRS256 \
23 __attribute__((__always_inline__, __nodebug__, \
24 __target__("avx512vl,avx512bw,no-evex512"), \
25 __min_vector_width__(256)))
29 #define _mm_cmp_epi8_mask(a, b, p) \
30 ((__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
31 (__v16qi)(__m128i)(b), (int)(p), \
34 #define _mm_mask_cmp_epi8_mask(m, a, b, p) \
35 ((__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
36 (__v16qi)(__m128i)(b), (int)(p), \
39 #define _mm_cmp_epu8_mask(a, b, p) \
40 ((__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \
41 (__v16qi)(__m128i)(b), (int)(p), \
44 #define _mm_mask_cmp_epu8_mask(m, a, b, p) \
45 ((__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \
46 (__v16qi)(__m128i)(b), (int)(p), \
49 #define _mm256_cmp_epi8_mask(a, b, p) \
50 ((__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \
51 (__v32qi)(__m256i)(b), (int)(p), \
54 #define _mm256_mask_cmp_epi8_mask(m, a, b, p) \
55 ((__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \
56 (__v32qi)(__m256i)(b), (int)(p), \
59 #define _mm256_cmp_epu8_mask(a, b, p) \
60 ((__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \
61 (__v32qi)(__m256i)(b), (int)(p), \
64 #define _mm256_mask_cmp_epu8_mask(m, a, b, p) \
65 ((__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \
66 (__v32qi)(__m256i)(b), (int)(p), \
69 #define _mm_cmp_epi16_mask(a, b, p) \
70 ((__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \
71 (__v8hi)(__m128i)(b), (int)(p), \
74 #define _mm_mask_cmp_epi16_mask(m, a, b, p) \
75 ((__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \
76 (__v8hi)(__m128i)(b), (int)(p), \
79 #define _mm_cmp_epu16_mask(a, b, p) \
80 ((__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \
81 (__v8hi)(__m128i)(b), (int)(p), \
84 #define _mm_mask_cmp_epu16_mask(m, a, b, p) \
85 ((__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \
86 (__v8hi)(__m128i)(b), (int)(p), \
89 #define _mm256_cmp_epi16_mask(a, b, p) \
90 ((__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \
91 (__v16hi)(__m256i)(b), (int)(p), \
94 #define _mm256_mask_cmp_epi16_mask(m, a, b, p) \
95 ((__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \
96 (__v16hi)(__m256i)(b), (int)(p), \
99 #define _mm256_cmp_epu16_mask(a, b, p) \
100 ((__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \
101 (__v16hi)(__m256i)(b), (int)(p), \
104 #define _mm256_mask_cmp_epu16_mask(m, a, b, p) \
105 ((__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \
106 (__v16hi)(__m256i)(b), (int)(p), \
109 #define _mm_cmpeq_epi8_mask(A, B) \
110 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
111 #define _mm_mask_cmpeq_epi8_mask(k, A, B) \
112 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ)
113 #define _mm_cmpge_epi8_mask(A, B) \
114 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_GE)
115 #define _mm_mask_cmpge_epi8_mask(k, A, B) \
116 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE)
117 #define _mm_cmpgt_epi8_mask(A, B) \
118 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_GT)
119 #define _mm_mask_cmpgt_epi8_mask(k, A, B) \
120 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT)
121 #define _mm_cmple_epi8_mask(A, B) \
122 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_LE)
123 #define _mm_mask_cmple_epi8_mask(k, A, B) \
124 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE)
125 #define _mm_cmplt_epi8_mask(A, B) \
126 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_LT)
127 #define _mm_mask_cmplt_epi8_mask(k, A, B) \
128 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT)
129 #define _mm_cmpneq_epi8_mask(A, B) \
130 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_NE)
131 #define _mm_mask_cmpneq_epi8_mask(k, A, B) \
132 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE)
134 #define _mm256_cmpeq_epi8_mask(A, B) \
135 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
136 #define _mm256_mask_cmpeq_epi8_mask(k, A, B) \
137 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ)
138 #define _mm256_cmpge_epi8_mask(A, B) \
139 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_GE)
140 #define _mm256_mask_cmpge_epi8_mask(k, A, B) \
141 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE)
142 #define _mm256_cmpgt_epi8_mask(A, B) \
143 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_GT)
144 #define _mm256_mask_cmpgt_epi8_mask(k, A, B) \
145 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT)
146 #define _mm256_cmple_epi8_mask(A, B) \
147 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_LE)
148 #define _mm256_mask_cmple_epi8_mask(k, A, B) \
149 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE)
150 #define _mm256_cmplt_epi8_mask(A, B) \
151 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_LT)
152 #define _mm256_mask_cmplt_epi8_mask(k, A, B) \
153 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT)
154 #define _mm256_cmpneq_epi8_mask(A, B) \
155 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_NE)
156 #define _mm256_mask_cmpneq_epi8_mask(k, A, B) \
157 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE)
159 #define _mm_cmpeq_epu8_mask(A, B) \
160 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ)
161 #define _mm_mask_cmpeq_epu8_mask(k, A, B) \
162 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ)
163 #define _mm_cmpge_epu8_mask(A, B) \
164 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_GE)
165 #define _mm_mask_cmpge_epu8_mask(k, A, B) \
166 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE)
167 #define _mm_cmpgt_epu8_mask(A, B) \
168 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_GT)
169 #define _mm_mask_cmpgt_epu8_mask(k, A, B) \
170 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT)
171 #define _mm_cmple_epu8_mask(A, B) \
172 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_LE)
173 #define _mm_mask_cmple_epu8_mask(k, A, B) \
174 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE)
175 #define _mm_cmplt_epu8_mask(A, B) \
176 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_LT)
177 #define _mm_mask_cmplt_epu8_mask(k, A, B) \
178 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT)
179 #define _mm_cmpneq_epu8_mask(A, B) \
180 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_NE)
181 #define _mm_mask_cmpneq_epu8_mask(k, A, B) \
182 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE)
184 #define _mm256_cmpeq_epu8_mask(A, B) \
185 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ)
186 #define _mm256_mask_cmpeq_epu8_mask(k, A, B) \
187 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ)
188 #define _mm256_cmpge_epu8_mask(A, B) \
189 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_GE)
190 #define _mm256_mask_cmpge_epu8_mask(k, A, B) \
191 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE)
192 #define _mm256_cmpgt_epu8_mask(A, B) \
193 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_GT)
194 #define _mm256_mask_cmpgt_epu8_mask(k, A, B) \
195 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT)
196 #define _mm256_cmple_epu8_mask(A, B) \
197 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_LE)
198 #define _mm256_mask_cmple_epu8_mask(k, A, B) \
199 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE)
200 #define _mm256_cmplt_epu8_mask(A, B) \
201 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_LT)
202 #define _mm256_mask_cmplt_epu8_mask(k, A, B) \
203 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT)
204 #define _mm256_cmpneq_epu8_mask(A, B) \
205 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_NE)
206 #define _mm256_mask_cmpneq_epu8_mask(k, A, B) \
207 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE)
209 #define _mm_cmpeq_epi16_mask(A, B) \
210 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ)
211 #define _mm_mask_cmpeq_epi16_mask(k, A, B) \
212 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ)
213 #define _mm_cmpge_epi16_mask(A, B) \
214 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_GE)
215 #define _mm_mask_cmpge_epi16_mask(k, A, B) \
216 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE)
217 #define _mm_cmpgt_epi16_mask(A, B) \
218 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_GT)
219 #define _mm_mask_cmpgt_epi16_mask(k, A, B) \
220 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT)
221 #define _mm_cmple_epi16_mask(A, B) \
222 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_LE)
223 #define _mm_mask_cmple_epi16_mask(k, A, B) \
224 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE)
225 #define _mm_cmplt_epi16_mask(A, B) \
226 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_LT)
227 #define _mm_mask_cmplt_epi16_mask(k, A, B) \
228 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT)
229 #define _mm_cmpneq_epi16_mask(A, B) \
230 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_NE)
231 #define _mm_mask_cmpneq_epi16_mask(k, A, B) \
232 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE)
234 #define _mm256_cmpeq_epi16_mask(A, B) \
235 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ)
236 #define _mm256_mask_cmpeq_epi16_mask(k, A, B) \
237 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ)
238 #define _mm256_cmpge_epi16_mask(A, B) \
239 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_GE)
240 #define _mm256_mask_cmpge_epi16_mask(k, A, B) \
241 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE)
242 #define _mm256_cmpgt_epi16_mask(A, B) \
243 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_GT)
244 #define _mm256_mask_cmpgt_epi16_mask(k, A, B) \
245 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT)
246 #define _mm256_cmple_epi16_mask(A, B) \
247 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_LE)
248 #define _mm256_mask_cmple_epi16_mask(k, A, B) \
249 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE)
250 #define _mm256_cmplt_epi16_mask(A, B) \
251 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_LT)
252 #define _mm256_mask_cmplt_epi16_mask(k, A, B) \
253 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT)
254 #define _mm256_cmpneq_epi16_mask(A, B) \
255 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_NE)
256 #define _mm256_mask_cmpneq_epi16_mask(k, A, B) \
257 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE)
259 #define _mm_cmpeq_epu16_mask(A, B) \
260 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ)
261 #define _mm_mask_cmpeq_epu16_mask(k, A, B) \
262 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ)
263 #define _mm_cmpge_epu16_mask(A, B) \
264 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_GE)
265 #define _mm_mask_cmpge_epu16_mask(k, A, B) \
266 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE)
267 #define _mm_cmpgt_epu16_mask(A, B) \
268 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_GT)
269 #define _mm_mask_cmpgt_epu16_mask(k, A, B) \
270 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT)
271 #define _mm_cmple_epu16_mask(A, B) \
272 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_LE)
273 #define _mm_mask_cmple_epu16_mask(k, A, B) \
274 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE)
275 #define _mm_cmplt_epu16_mask(A, B) \
276 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
277 #define _mm_mask_cmplt_epu16_mask(k, A, B) \
278 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT)
279 #define _mm_cmpneq_epu16_mask(A, B) \
280 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_NE)
281 #define _mm_mask_cmpneq_epu16_mask(k, A, B) \
282 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
284 #define _mm256_cmpeq_epu16_mask(A, B) \
285 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ)
286 #define _mm256_mask_cmpeq_epu16_mask(k, A, B) \
287 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ)
288 #define _mm256_cmpge_epu16_mask(A, B) \
289 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_GE)
290 #define _mm256_mask_cmpge_epu16_mask(k, A, B) \
291 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE)
292 #define _mm256_cmpgt_epu16_mask(A, B) \
293 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_GT)
294 #define _mm256_mask_cmpgt_epu16_mask(k, A, B) \
295 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT)
296 #define _mm256_cmple_epu16_mask(A, B) \
297 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_LE)
298 #define _mm256_mask_cmple_epu16_mask(k, A, B) \
299 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE)
300 #define _mm256_cmplt_epu16_mask(A, B) \
301 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
302 #define _mm256_mask_cmplt_epu16_mask(k, A, B) \
303 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT)
304 #define _mm256_cmpneq_epu16_mask(A, B) \
305 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_NE)
306 #define _mm256_mask_cmpneq_epu16_mask(k, A, B) \
307 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
309 static __inline__ __m256i __DEFAULT_FN_ATTRS256
310 _mm256_mask_add_epi8(__m256i __W
, __mmask32 __U
, __m256i __A
, __m256i __B
){
311 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
312 (__v32qi
)_mm256_add_epi8(__A
, __B
),
316 static __inline__ __m256i __DEFAULT_FN_ATTRS256
317 _mm256_maskz_add_epi8(__mmask32 __U
, __m256i __A
, __m256i __B
) {
318 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
319 (__v32qi
)_mm256_add_epi8(__A
, __B
),
320 (__v32qi
)_mm256_setzero_si256());
323 static __inline__ __m256i __DEFAULT_FN_ATTRS256
324 _mm256_mask_add_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m256i __B
) {
325 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
326 (__v16hi
)_mm256_add_epi16(__A
, __B
),
330 static __inline__ __m256i __DEFAULT_FN_ATTRS256
331 _mm256_maskz_add_epi16(__mmask16 __U
, __m256i __A
, __m256i __B
) {
332 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
333 (__v16hi
)_mm256_add_epi16(__A
, __B
),
334 (__v16hi
)_mm256_setzero_si256());
337 static __inline__ __m256i __DEFAULT_FN_ATTRS256
338 _mm256_mask_sub_epi8(__m256i __W
, __mmask32 __U
, __m256i __A
, __m256i __B
) {
339 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
340 (__v32qi
)_mm256_sub_epi8(__A
, __B
),
344 static __inline__ __m256i __DEFAULT_FN_ATTRS256
345 _mm256_maskz_sub_epi8(__mmask32 __U
, __m256i __A
, __m256i __B
) {
346 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
347 (__v32qi
)_mm256_sub_epi8(__A
, __B
),
348 (__v32qi
)_mm256_setzero_si256());
351 static __inline__ __m256i __DEFAULT_FN_ATTRS256
352 _mm256_mask_sub_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m256i __B
) {
353 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
354 (__v16hi
)_mm256_sub_epi16(__A
, __B
),
358 static __inline__ __m256i __DEFAULT_FN_ATTRS256
359 _mm256_maskz_sub_epi16(__mmask16 __U
, __m256i __A
, __m256i __B
) {
360 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
361 (__v16hi
)_mm256_sub_epi16(__A
, __B
),
362 (__v16hi
)_mm256_setzero_si256());
365 static __inline__ __m128i __DEFAULT_FN_ATTRS128
366 _mm_mask_add_epi8(__m128i __W
, __mmask16 __U
, __m128i __A
, __m128i __B
) {
367 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
368 (__v16qi
)_mm_add_epi8(__A
, __B
),
372 static __inline__ __m128i __DEFAULT_FN_ATTRS128
373 _mm_maskz_add_epi8(__mmask16 __U
, __m128i __A
, __m128i __B
) {
374 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
375 (__v16qi
)_mm_add_epi8(__A
, __B
),
376 (__v16qi
)_mm_setzero_si128());
379 static __inline__ __m128i __DEFAULT_FN_ATTRS128
380 _mm_mask_add_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
) {
381 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
382 (__v8hi
)_mm_add_epi16(__A
, __B
),
386 static __inline__ __m128i __DEFAULT_FN_ATTRS128
387 _mm_maskz_add_epi16(__mmask8 __U
, __m128i __A
, __m128i __B
) {
388 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
389 (__v8hi
)_mm_add_epi16(__A
, __B
),
390 (__v8hi
)_mm_setzero_si128());
393 static __inline__ __m128i __DEFAULT_FN_ATTRS128
394 _mm_mask_sub_epi8(__m128i __W
, __mmask16 __U
, __m128i __A
, __m128i __B
) {
395 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
396 (__v16qi
)_mm_sub_epi8(__A
, __B
),
400 static __inline__ __m128i __DEFAULT_FN_ATTRS128
401 _mm_maskz_sub_epi8(__mmask16 __U
, __m128i __A
, __m128i __B
) {
402 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
403 (__v16qi
)_mm_sub_epi8(__A
, __B
),
404 (__v16qi
)_mm_setzero_si128());
407 static __inline__ __m128i __DEFAULT_FN_ATTRS128
408 _mm_mask_sub_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
) {
409 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
410 (__v8hi
)_mm_sub_epi16(__A
, __B
),
414 static __inline__ __m128i __DEFAULT_FN_ATTRS128
415 _mm_maskz_sub_epi16(__mmask8 __U
, __m128i __A
, __m128i __B
) {
416 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
417 (__v8hi
)_mm_sub_epi16(__A
, __B
),
418 (__v8hi
)_mm_setzero_si128());
421 static __inline__ __m256i __DEFAULT_FN_ATTRS256
422 _mm256_mask_mullo_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m256i __B
) {
423 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
424 (__v16hi
)_mm256_mullo_epi16(__A
, __B
),
428 static __inline__ __m256i __DEFAULT_FN_ATTRS256
429 _mm256_maskz_mullo_epi16(__mmask16 __U
, __m256i __A
, __m256i __B
) {
430 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
431 (__v16hi
)_mm256_mullo_epi16(__A
, __B
),
432 (__v16hi
)_mm256_setzero_si256());
435 static __inline__ __m128i __DEFAULT_FN_ATTRS128
436 _mm_mask_mullo_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
) {
437 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
438 (__v8hi
)_mm_mullo_epi16(__A
, __B
),
442 static __inline__ __m128i __DEFAULT_FN_ATTRS128
443 _mm_maskz_mullo_epi16(__mmask8 __U
, __m128i __A
, __m128i __B
) {
444 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
445 (__v8hi
)_mm_mullo_epi16(__A
, __B
),
446 (__v8hi
)_mm_setzero_si128());
449 static __inline__ __m128i __DEFAULT_FN_ATTRS128
450 _mm_mask_blend_epi8 (__mmask16 __U
, __m128i __A
, __m128i __W
)
452 return (__m128i
) __builtin_ia32_selectb_128 ((__mmask16
) __U
,
457 static __inline__ __m256i __DEFAULT_FN_ATTRS256
458 _mm256_mask_blend_epi8 (__mmask32 __U
, __m256i __A
, __m256i __W
)
460 return (__m256i
) __builtin_ia32_selectb_256 ((__mmask32
) __U
,
465 static __inline__ __m128i __DEFAULT_FN_ATTRS128
466 _mm_mask_blend_epi16 (__mmask8 __U
, __m128i __A
, __m128i __W
)
468 return (__m128i
) __builtin_ia32_selectw_128 ((__mmask8
) __U
,
473 static __inline__ __m256i __DEFAULT_FN_ATTRS256
474 _mm256_mask_blend_epi16 (__mmask16 __U
, __m256i __A
, __m256i __W
)
476 return (__m256i
) __builtin_ia32_selectw_256 ((__mmask16
) __U
,
481 static __inline__ __m128i __DEFAULT_FN_ATTRS128
482 _mm_mask_abs_epi8(__m128i __W
, __mmask16 __U
, __m128i __A
)
484 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
485 (__v16qi
)_mm_abs_epi8(__A
),
489 static __inline__ __m128i __DEFAULT_FN_ATTRS128
490 _mm_maskz_abs_epi8(__mmask16 __U
, __m128i __A
)
492 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
493 (__v16qi
)_mm_abs_epi8(__A
),
494 (__v16qi
)_mm_setzero_si128());
497 static __inline__ __m256i __DEFAULT_FN_ATTRS256
498 _mm256_mask_abs_epi8(__m256i __W
, __mmask32 __U
, __m256i __A
)
500 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
501 (__v32qi
)_mm256_abs_epi8(__A
),
505 static __inline__ __m256i __DEFAULT_FN_ATTRS256
506 _mm256_maskz_abs_epi8 (__mmask32 __U
, __m256i __A
)
508 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
509 (__v32qi
)_mm256_abs_epi8(__A
),
510 (__v32qi
)_mm256_setzero_si256());
513 static __inline__ __m128i __DEFAULT_FN_ATTRS128
514 _mm_mask_abs_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
)
516 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
517 (__v8hi
)_mm_abs_epi16(__A
),
521 static __inline__ __m128i __DEFAULT_FN_ATTRS128
522 _mm_maskz_abs_epi16(__mmask8 __U
, __m128i __A
)
524 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
525 (__v8hi
)_mm_abs_epi16(__A
),
526 (__v8hi
)_mm_setzero_si128());
529 static __inline__ __m256i __DEFAULT_FN_ATTRS256
530 _mm256_mask_abs_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
)
532 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
533 (__v16hi
)_mm256_abs_epi16(__A
),
537 static __inline__ __m256i __DEFAULT_FN_ATTRS256
538 _mm256_maskz_abs_epi16(__mmask16 __U
, __m256i __A
)
540 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
541 (__v16hi
)_mm256_abs_epi16(__A
),
542 (__v16hi
)_mm256_setzero_si256());
545 static __inline__ __m128i __DEFAULT_FN_ATTRS128
546 _mm_maskz_packs_epi32(__mmask8 __M
, __m128i __A
, __m128i __B
) {
547 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__M
,
548 (__v8hi
)_mm_packs_epi32(__A
, __B
),
549 (__v8hi
)_mm_setzero_si128());
552 static __inline__ __m128i __DEFAULT_FN_ATTRS128
553 _mm_mask_packs_epi32(__m128i __W
, __mmask8 __M
, __m128i __A
, __m128i __B
)
555 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__M
,
556 (__v8hi
)_mm_packs_epi32(__A
, __B
),
560 static __inline__ __m256i __DEFAULT_FN_ATTRS256
561 _mm256_maskz_packs_epi32(__mmask16 __M
, __m256i __A
, __m256i __B
)
563 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__M
,
564 (__v16hi
)_mm256_packs_epi32(__A
, __B
),
565 (__v16hi
)_mm256_setzero_si256());
568 static __inline__ __m256i __DEFAULT_FN_ATTRS256
569 _mm256_mask_packs_epi32(__m256i __W
, __mmask16 __M
, __m256i __A
, __m256i __B
)
571 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__M
,
572 (__v16hi
)_mm256_packs_epi32(__A
, __B
),
576 static __inline__ __m128i __DEFAULT_FN_ATTRS128
577 _mm_maskz_packs_epi16(__mmask16 __M
, __m128i __A
, __m128i __B
)
579 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__M
,
580 (__v16qi
)_mm_packs_epi16(__A
, __B
),
581 (__v16qi
)_mm_setzero_si128());
584 static __inline__ __m128i __DEFAULT_FN_ATTRS128
585 _mm_mask_packs_epi16(__m128i __W
, __mmask16 __M
, __m128i __A
, __m128i __B
)
587 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__M
,
588 (__v16qi
)_mm_packs_epi16(__A
, __B
),
592 static __inline__ __m256i __DEFAULT_FN_ATTRS256
593 _mm256_maskz_packs_epi16(__mmask32 __M
, __m256i __A
, __m256i __B
)
595 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__M
,
596 (__v32qi
)_mm256_packs_epi16(__A
, __B
),
597 (__v32qi
)_mm256_setzero_si256());
600 static __inline__ __m256i __DEFAULT_FN_ATTRS256
601 _mm256_mask_packs_epi16(__m256i __W
, __mmask32 __M
, __m256i __A
, __m256i __B
)
603 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__M
,
604 (__v32qi
)_mm256_packs_epi16(__A
, __B
),
608 static __inline__ __m128i __DEFAULT_FN_ATTRS128
609 _mm_maskz_packus_epi32(__mmask8 __M
, __m128i __A
, __m128i __B
)
611 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__M
,
612 (__v8hi
)_mm_packus_epi32(__A
, __B
),
613 (__v8hi
)_mm_setzero_si128());
616 static __inline__ __m128i __DEFAULT_FN_ATTRS128
617 _mm_mask_packus_epi32(__m128i __W
, __mmask8 __M
, __m128i __A
, __m128i __B
)
619 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__M
,
620 (__v8hi
)_mm_packus_epi32(__A
, __B
),
624 static __inline__ __m256i __DEFAULT_FN_ATTRS256
625 _mm256_maskz_packus_epi32(__mmask16 __M
, __m256i __A
, __m256i __B
)
627 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__M
,
628 (__v16hi
)_mm256_packus_epi32(__A
, __B
),
629 (__v16hi
)_mm256_setzero_si256());
632 static __inline__ __m256i __DEFAULT_FN_ATTRS256
633 _mm256_mask_packus_epi32(__m256i __W
, __mmask16 __M
, __m256i __A
, __m256i __B
)
635 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__M
,
636 (__v16hi
)_mm256_packus_epi32(__A
, __B
),
640 static __inline__ __m128i __DEFAULT_FN_ATTRS128
641 _mm_maskz_packus_epi16(__mmask16 __M
, __m128i __A
, __m128i __B
)
643 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__M
,
644 (__v16qi
)_mm_packus_epi16(__A
, __B
),
645 (__v16qi
)_mm_setzero_si128());
648 static __inline__ __m128i __DEFAULT_FN_ATTRS128
649 _mm_mask_packus_epi16(__m128i __W
, __mmask16 __M
, __m128i __A
, __m128i __B
)
651 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__M
,
652 (__v16qi
)_mm_packus_epi16(__A
, __B
),
656 static __inline__ __m256i __DEFAULT_FN_ATTRS256
657 _mm256_maskz_packus_epi16(__mmask32 __M
, __m256i __A
, __m256i __B
)
659 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__M
,
660 (__v32qi
)_mm256_packus_epi16(__A
, __B
),
661 (__v32qi
)_mm256_setzero_si256());
664 static __inline__ __m256i __DEFAULT_FN_ATTRS256
665 _mm256_mask_packus_epi16(__m256i __W
, __mmask32 __M
, __m256i __A
, __m256i __B
)
667 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__M
,
668 (__v32qi
)_mm256_packus_epi16(__A
, __B
),
672 static __inline__ __m128i __DEFAULT_FN_ATTRS128
673 _mm_mask_adds_epi8(__m128i __W
, __mmask16 __U
, __m128i __A
, __m128i __B
)
675 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
676 (__v16qi
)_mm_adds_epi8(__A
, __B
),
680 static __inline__ __m128i __DEFAULT_FN_ATTRS128
681 _mm_maskz_adds_epi8(__mmask16 __U
, __m128i __A
, __m128i __B
)
683 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
684 (__v16qi
)_mm_adds_epi8(__A
, __B
),
685 (__v16qi
)_mm_setzero_si128());
688 static __inline__ __m256i __DEFAULT_FN_ATTRS256
689 _mm256_mask_adds_epi8(__m256i __W
, __mmask32 __U
, __m256i __A
, __m256i __B
)
691 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
692 (__v32qi
)_mm256_adds_epi8(__A
, __B
),
696 static __inline__ __m256i __DEFAULT_FN_ATTRS256
697 _mm256_maskz_adds_epi8(__mmask32 __U
, __m256i __A
, __m256i __B
)
699 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
700 (__v32qi
)_mm256_adds_epi8(__A
, __B
),
701 (__v32qi
)_mm256_setzero_si256());
704 static __inline__ __m128i __DEFAULT_FN_ATTRS128
705 _mm_mask_adds_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
707 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
708 (__v8hi
)_mm_adds_epi16(__A
, __B
),
712 static __inline__ __m128i __DEFAULT_FN_ATTRS128
713 _mm_maskz_adds_epi16(__mmask8 __U
, __m128i __A
, __m128i __B
)
715 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
716 (__v8hi
)_mm_adds_epi16(__A
, __B
),
717 (__v8hi
)_mm_setzero_si128());
720 static __inline__ __m256i __DEFAULT_FN_ATTRS256
721 _mm256_mask_adds_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m256i __B
)
723 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
724 (__v16hi
)_mm256_adds_epi16(__A
, __B
),
728 static __inline__ __m256i __DEFAULT_FN_ATTRS256
729 _mm256_maskz_adds_epi16(__mmask16 __U
, __m256i __A
, __m256i __B
)
731 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
732 (__v16hi
)_mm256_adds_epi16(__A
, __B
),
733 (__v16hi
)_mm256_setzero_si256());
736 static __inline__ __m128i __DEFAULT_FN_ATTRS128
737 _mm_mask_adds_epu8(__m128i __W
, __mmask16 __U
, __m128i __A
, __m128i __B
)
739 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
740 (__v16qi
)_mm_adds_epu8(__A
, __B
),
744 static __inline__ __m128i __DEFAULT_FN_ATTRS128
745 _mm_maskz_adds_epu8(__mmask16 __U
, __m128i __A
, __m128i __B
)
747 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
748 (__v16qi
)_mm_adds_epu8(__A
, __B
),
749 (__v16qi
)_mm_setzero_si128());
752 static __inline__ __m256i __DEFAULT_FN_ATTRS256
753 _mm256_mask_adds_epu8(__m256i __W
, __mmask32 __U
, __m256i __A
, __m256i __B
)
755 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
756 (__v32qi
)_mm256_adds_epu8(__A
, __B
),
760 static __inline__ __m256i __DEFAULT_FN_ATTRS256
761 _mm256_maskz_adds_epu8(__mmask32 __U
, __m256i __A
, __m256i __B
)
763 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
764 (__v32qi
)_mm256_adds_epu8(__A
, __B
),
765 (__v32qi
)_mm256_setzero_si256());
768 static __inline__ __m128i __DEFAULT_FN_ATTRS128
769 _mm_mask_adds_epu16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
771 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
772 (__v8hi
)_mm_adds_epu16(__A
, __B
),
776 static __inline__ __m128i __DEFAULT_FN_ATTRS128
777 _mm_maskz_adds_epu16(__mmask8 __U
, __m128i __A
, __m128i __B
)
779 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
780 (__v8hi
)_mm_adds_epu16(__A
, __B
),
781 (__v8hi
)_mm_setzero_si128());
784 static __inline__ __m256i __DEFAULT_FN_ATTRS256
785 _mm256_mask_adds_epu16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m256i __B
)
787 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
788 (__v16hi
)_mm256_adds_epu16(__A
, __B
),
792 static __inline__ __m256i __DEFAULT_FN_ATTRS256
793 _mm256_maskz_adds_epu16(__mmask16 __U
, __m256i __A
, __m256i __B
)
795 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
796 (__v16hi
)_mm256_adds_epu16(__A
, __B
),
797 (__v16hi
)_mm256_setzero_si256());
800 static __inline__ __m128i __DEFAULT_FN_ATTRS128
801 _mm_mask_avg_epu8(__m128i __W
, __mmask16 __U
, __m128i __A
, __m128i __B
)
803 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
804 (__v16qi
)_mm_avg_epu8(__A
, __B
),
808 static __inline__ __m128i __DEFAULT_FN_ATTRS128
809 _mm_maskz_avg_epu8(__mmask16 __U
, __m128i __A
, __m128i __B
)
811 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
812 (__v16qi
)_mm_avg_epu8(__A
, __B
),
813 (__v16qi
)_mm_setzero_si128());
816 static __inline__ __m256i __DEFAULT_FN_ATTRS256
817 _mm256_mask_avg_epu8(__m256i __W
, __mmask32 __U
, __m256i __A
, __m256i __B
)
819 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
820 (__v32qi
)_mm256_avg_epu8(__A
, __B
),
824 static __inline__ __m256i __DEFAULT_FN_ATTRS256
825 _mm256_maskz_avg_epu8(__mmask32 __U
, __m256i __A
, __m256i __B
)
827 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
828 (__v32qi
)_mm256_avg_epu8(__A
, __B
),
829 (__v32qi
)_mm256_setzero_si256());
832 static __inline__ __m128i __DEFAULT_FN_ATTRS128
833 _mm_mask_avg_epu16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
835 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
836 (__v8hi
)_mm_avg_epu16(__A
, __B
),
840 static __inline__ __m128i __DEFAULT_FN_ATTRS128
841 _mm_maskz_avg_epu16(__mmask8 __U
, __m128i __A
, __m128i __B
)
843 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
844 (__v8hi
)_mm_avg_epu16(__A
, __B
),
845 (__v8hi
)_mm_setzero_si128());
848 static __inline__ __m256i __DEFAULT_FN_ATTRS256
849 _mm256_mask_avg_epu16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m256i __B
)
851 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
852 (__v16hi
)_mm256_avg_epu16(__A
, __B
),
856 static __inline__ __m256i __DEFAULT_FN_ATTRS256
857 _mm256_maskz_avg_epu16(__mmask16 __U
, __m256i __A
, __m256i __B
)
859 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
860 (__v16hi
)_mm256_avg_epu16(__A
, __B
),
861 (__v16hi
)_mm256_setzero_si256());
864 static __inline__ __m128i __DEFAULT_FN_ATTRS128
865 _mm_maskz_max_epi8(__mmask16 __M
, __m128i __A
, __m128i __B
)
867 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__M
,
868 (__v16qi
)_mm_max_epi8(__A
, __B
),
869 (__v16qi
)_mm_setzero_si128());
872 static __inline__ __m128i __DEFAULT_FN_ATTRS128
873 _mm_mask_max_epi8(__m128i __W
, __mmask16 __M
, __m128i __A
, __m128i __B
)
875 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__M
,
876 (__v16qi
)_mm_max_epi8(__A
, __B
),
880 static __inline__ __m256i __DEFAULT_FN_ATTRS256
881 _mm256_maskz_max_epi8(__mmask32 __M
, __m256i __A
, __m256i __B
)
883 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__M
,
884 (__v32qi
)_mm256_max_epi8(__A
, __B
),
885 (__v32qi
)_mm256_setzero_si256());
888 static __inline__ __m256i __DEFAULT_FN_ATTRS256
889 _mm256_mask_max_epi8(__m256i __W
, __mmask32 __M
, __m256i __A
, __m256i __B
)
891 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__M
,
892 (__v32qi
)_mm256_max_epi8(__A
, __B
),
896 static __inline__ __m128i __DEFAULT_FN_ATTRS128
897 _mm_maskz_max_epi16(__mmask8 __M
, __m128i __A
, __m128i __B
)
899 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__M
,
900 (__v8hi
)_mm_max_epi16(__A
, __B
),
901 (__v8hi
)_mm_setzero_si128());
904 static __inline__ __m128i __DEFAULT_FN_ATTRS128
905 _mm_mask_max_epi16(__m128i __W
, __mmask8 __M
, __m128i __A
, __m128i __B
)
907 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__M
,
908 (__v8hi
)_mm_max_epi16(__A
, __B
),
912 static __inline__ __m256i __DEFAULT_FN_ATTRS256
913 _mm256_maskz_max_epi16(__mmask16 __M
, __m256i __A
, __m256i __B
)
915 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__M
,
916 (__v16hi
)_mm256_max_epi16(__A
, __B
),
917 (__v16hi
)_mm256_setzero_si256());
920 static __inline__ __m256i __DEFAULT_FN_ATTRS256
921 _mm256_mask_max_epi16(__m256i __W
, __mmask16 __M
, __m256i __A
, __m256i __B
)
923 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__M
,
924 (__v16hi
)_mm256_max_epi16(__A
, __B
),
928 static __inline__ __m128i __DEFAULT_FN_ATTRS128
929 _mm_maskz_max_epu8(__mmask16 __M
, __m128i __A
, __m128i __B
)
931 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__M
,
932 (__v16qi
)_mm_max_epu8(__A
, __B
),
933 (__v16qi
)_mm_setzero_si128());
936 static __inline__ __m128i __DEFAULT_FN_ATTRS128
937 _mm_mask_max_epu8(__m128i __W
, __mmask16 __M
, __m128i __A
, __m128i __B
)
939 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__M
,
940 (__v16qi
)_mm_max_epu8(__A
, __B
),
944 static __inline__ __m256i __DEFAULT_FN_ATTRS256
945 _mm256_maskz_max_epu8 (__mmask32 __M
, __m256i __A
, __m256i __B
)
947 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__M
,
948 (__v32qi
)_mm256_max_epu8(__A
, __B
),
949 (__v32qi
)_mm256_setzero_si256());
952 static __inline__ __m256i __DEFAULT_FN_ATTRS256
953 _mm256_mask_max_epu8(__m256i __W
, __mmask32 __M
, __m256i __A
, __m256i __B
)
955 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__M
,
956 (__v32qi
)_mm256_max_epu8(__A
, __B
),
960 static __inline__ __m128i __DEFAULT_FN_ATTRS128
961 _mm_maskz_max_epu16(__mmask8 __M
, __m128i __A
, __m128i __B
)
963 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__M
,
964 (__v8hi
)_mm_max_epu16(__A
, __B
),
965 (__v8hi
)_mm_setzero_si128());
968 static __inline__ __m128i __DEFAULT_FN_ATTRS128
969 _mm_mask_max_epu16(__m128i __W
, __mmask8 __M
, __m128i __A
, __m128i __B
)
971 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__M
,
972 (__v8hi
)_mm_max_epu16(__A
, __B
),
976 static __inline__ __m256i __DEFAULT_FN_ATTRS256
977 _mm256_maskz_max_epu16(__mmask16 __M
, __m256i __A
, __m256i __B
)
979 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__M
,
980 (__v16hi
)_mm256_max_epu16(__A
, __B
),
981 (__v16hi
)_mm256_setzero_si256());
984 static __inline__ __m256i __DEFAULT_FN_ATTRS256
985 _mm256_mask_max_epu16(__m256i __W
, __mmask16 __M
, __m256i __A
, __m256i __B
)
987 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__M
,
988 (__v16hi
)_mm256_max_epu16(__A
, __B
),
992 static __inline__ __m128i __DEFAULT_FN_ATTRS128
993 _mm_maskz_min_epi8(__mmask16 __M
, __m128i __A
, __m128i __B
)
995 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__M
,
996 (__v16qi
)_mm_min_epi8(__A
, __B
),
997 (__v16qi
)_mm_setzero_si128());
1000 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1001 _mm_mask_min_epi8(__m128i __W
, __mmask16 __M
, __m128i __A
, __m128i __B
)
1003 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__M
,
1004 (__v16qi
)_mm_min_epi8(__A
, __B
),
1008 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1009 _mm256_maskz_min_epi8(__mmask32 __M
, __m256i __A
, __m256i __B
)
1011 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__M
,
1012 (__v32qi
)_mm256_min_epi8(__A
, __B
),
1013 (__v32qi
)_mm256_setzero_si256());
1016 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1017 _mm256_mask_min_epi8(__m256i __W
, __mmask32 __M
, __m256i __A
, __m256i __B
)
1019 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__M
,
1020 (__v32qi
)_mm256_min_epi8(__A
, __B
),
1024 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1025 _mm_maskz_min_epi16(__mmask8 __M
, __m128i __A
, __m128i __B
)
1027 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__M
,
1028 (__v8hi
)_mm_min_epi16(__A
, __B
),
1029 (__v8hi
)_mm_setzero_si128());
1032 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1033 _mm_mask_min_epi16(__m128i __W
, __mmask8 __M
, __m128i __A
, __m128i __B
)
1035 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__M
,
1036 (__v8hi
)_mm_min_epi16(__A
, __B
),
1040 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1041 _mm256_maskz_min_epi16(__mmask16 __M
, __m256i __A
, __m256i __B
)
1043 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__M
,
1044 (__v16hi
)_mm256_min_epi16(__A
, __B
),
1045 (__v16hi
)_mm256_setzero_si256());
1048 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1049 _mm256_mask_min_epi16(__m256i __W
, __mmask16 __M
, __m256i __A
, __m256i __B
)
1051 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__M
,
1052 (__v16hi
)_mm256_min_epi16(__A
, __B
),
1056 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1057 _mm_maskz_min_epu8(__mmask16 __M
, __m128i __A
, __m128i __B
)
1059 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__M
,
1060 (__v16qi
)_mm_min_epu8(__A
, __B
),
1061 (__v16qi
)_mm_setzero_si128());
1064 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1065 _mm_mask_min_epu8(__m128i __W
, __mmask16 __M
, __m128i __A
, __m128i __B
)
1067 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__M
,
1068 (__v16qi
)_mm_min_epu8(__A
, __B
),
1072 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1073 _mm256_maskz_min_epu8 (__mmask32 __M
, __m256i __A
, __m256i __B
)
1075 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__M
,
1076 (__v32qi
)_mm256_min_epu8(__A
, __B
),
1077 (__v32qi
)_mm256_setzero_si256());
1080 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1081 _mm256_mask_min_epu8(__m256i __W
, __mmask32 __M
, __m256i __A
, __m256i __B
)
1083 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__M
,
1084 (__v32qi
)_mm256_min_epu8(__A
, __B
),
1088 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1089 _mm_maskz_min_epu16(__mmask8 __M
, __m128i __A
, __m128i __B
)
1091 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__M
,
1092 (__v8hi
)_mm_min_epu16(__A
, __B
),
1093 (__v8hi
)_mm_setzero_si128());
1096 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1097 _mm_mask_min_epu16(__m128i __W
, __mmask8 __M
, __m128i __A
, __m128i __B
)
1099 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__M
,
1100 (__v8hi
)_mm_min_epu16(__A
, __B
),
1104 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1105 _mm256_maskz_min_epu16(__mmask16 __M
, __m256i __A
, __m256i __B
)
1107 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__M
,
1108 (__v16hi
)_mm256_min_epu16(__A
, __B
),
1109 (__v16hi
)_mm256_setzero_si256());
1112 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1113 _mm256_mask_min_epu16(__m256i __W
, __mmask16 __M
, __m256i __A
, __m256i __B
)
1115 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__M
,
1116 (__v16hi
)_mm256_min_epu16(__A
, __B
),
1120 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1121 _mm_mask_shuffle_epi8(__m128i __W
, __mmask16 __U
, __m128i __A
, __m128i __B
)
1123 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
1124 (__v16qi
)_mm_shuffle_epi8(__A
, __B
),
1128 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1129 _mm_maskz_shuffle_epi8(__mmask16 __U
, __m128i __A
, __m128i __B
)
1131 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
1132 (__v16qi
)_mm_shuffle_epi8(__A
, __B
),
1133 (__v16qi
)_mm_setzero_si128());
1136 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1137 _mm256_mask_shuffle_epi8(__m256i __W
, __mmask32 __U
, __m256i __A
, __m256i __B
)
1139 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
1140 (__v32qi
)_mm256_shuffle_epi8(__A
, __B
),
1144 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1145 _mm256_maskz_shuffle_epi8(__mmask32 __U
, __m256i __A
, __m256i __B
)
1147 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
1148 (__v32qi
)_mm256_shuffle_epi8(__A
, __B
),
1149 (__v32qi
)_mm256_setzero_si256());
1152 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1153 _mm_mask_subs_epi8(__m128i __W
, __mmask16 __U
, __m128i __A
, __m128i __B
)
1155 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
1156 (__v16qi
)_mm_subs_epi8(__A
, __B
),
1160 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1161 _mm_maskz_subs_epi8(__mmask16 __U
, __m128i __A
, __m128i __B
)
1163 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
1164 (__v16qi
)_mm_subs_epi8(__A
, __B
),
1165 (__v16qi
)_mm_setzero_si128());
1168 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1169 _mm256_mask_subs_epi8(__m256i __W
, __mmask32 __U
, __m256i __A
, __m256i __B
)
1171 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
1172 (__v32qi
)_mm256_subs_epi8(__A
, __B
),
1176 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1177 _mm256_maskz_subs_epi8(__mmask32 __U
, __m256i __A
, __m256i __B
)
1179 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
1180 (__v32qi
)_mm256_subs_epi8(__A
, __B
),
1181 (__v32qi
)_mm256_setzero_si256());
1184 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1185 _mm_mask_subs_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
1187 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1188 (__v8hi
)_mm_subs_epi16(__A
, __B
),
1192 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1193 _mm_maskz_subs_epi16(__mmask8 __U
, __m128i __A
, __m128i __B
)
1195 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1196 (__v8hi
)_mm_subs_epi16(__A
, __B
),
1197 (__v8hi
)_mm_setzero_si128());
1200 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1201 _mm256_mask_subs_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m256i __B
)
1203 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1204 (__v16hi
)_mm256_subs_epi16(__A
, __B
),
1208 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1209 _mm256_maskz_subs_epi16(__mmask16 __U
, __m256i __A
, __m256i __B
)
1211 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1212 (__v16hi
)_mm256_subs_epi16(__A
, __B
),
1213 (__v16hi
)_mm256_setzero_si256());
1216 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1217 _mm_mask_subs_epu8(__m128i __W
, __mmask16 __U
, __m128i __A
, __m128i __B
)
1219 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
1220 (__v16qi
)_mm_subs_epu8(__A
, __B
),
1224 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1225 _mm_maskz_subs_epu8(__mmask16 __U
, __m128i __A
, __m128i __B
)
1227 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
1228 (__v16qi
)_mm_subs_epu8(__A
, __B
),
1229 (__v16qi
)_mm_setzero_si128());
1232 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1233 _mm256_mask_subs_epu8(__m256i __W
, __mmask32 __U
, __m256i __A
, __m256i __B
)
1235 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
1236 (__v32qi
)_mm256_subs_epu8(__A
, __B
),
1240 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1241 _mm256_maskz_subs_epu8(__mmask32 __U
, __m256i __A
, __m256i __B
)
1243 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
1244 (__v32qi
)_mm256_subs_epu8(__A
, __B
),
1245 (__v32qi
)_mm256_setzero_si256());
1248 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1249 _mm_mask_subs_epu16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
1251 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1252 (__v8hi
)_mm_subs_epu16(__A
, __B
),
1256 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1257 _mm_maskz_subs_epu16(__mmask8 __U
, __m128i __A
, __m128i __B
)
1259 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1260 (__v8hi
)_mm_subs_epu16(__A
, __B
),
1261 (__v8hi
)_mm_setzero_si128());
1264 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1265 _mm256_mask_subs_epu16(__m256i __W
, __mmask16 __U
, __m256i __A
,
1267 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1268 (__v16hi
)_mm256_subs_epu16(__A
, __B
),
1272 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1273 _mm256_maskz_subs_epu16(__mmask16 __U
, __m256i __A
, __m256i __B
)
1275 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1276 (__v16hi
)_mm256_subs_epu16(__A
, __B
),
1277 (__v16hi
)_mm256_setzero_si256());
1280 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1281 _mm_permutex2var_epi16(__m128i __A
, __m128i __I
, __m128i __B
)
1283 return (__m128i
)__builtin_ia32_vpermi2varhi128((__v8hi
)__A
, (__v8hi
)__I
,
1287 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1288 _mm_mask_permutex2var_epi16(__m128i __A
, __mmask8 __U
, __m128i __I
,
1291 return (__m128i
)__builtin_ia32_selectw_128(__U
,
1292 (__v8hi
)_mm_permutex2var_epi16(__A
, __I
, __B
),
1296 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1297 _mm_mask2_permutex2var_epi16(__m128i __A
, __m128i __I
, __mmask8 __U
,
1300 return (__m128i
)__builtin_ia32_selectw_128(__U
,
1301 (__v8hi
)_mm_permutex2var_epi16(__A
, __I
, __B
),
1305 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1306 _mm_maskz_permutex2var_epi16 (__mmask8 __U
, __m128i __A
, __m128i __I
,
1309 return (__m128i
)__builtin_ia32_selectw_128(__U
,
1310 (__v8hi
)_mm_permutex2var_epi16(__A
, __I
, __B
),
1311 (__v8hi
)_mm_setzero_si128());
1314 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1315 _mm256_permutex2var_epi16(__m256i __A
, __m256i __I
, __m256i __B
)
1317 return (__m256i
)__builtin_ia32_vpermi2varhi256((__v16hi
)__A
, (__v16hi
)__I
,
1321 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1322 _mm256_mask_permutex2var_epi16(__m256i __A
, __mmask16 __U
, __m256i __I
,
1325 return (__m256i
)__builtin_ia32_selectw_256(__U
,
1326 (__v16hi
)_mm256_permutex2var_epi16(__A
, __I
, __B
),
1330 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1331 _mm256_mask2_permutex2var_epi16(__m256i __A
, __m256i __I
, __mmask16 __U
,
1334 return (__m256i
)__builtin_ia32_selectw_256(__U
,
1335 (__v16hi
)_mm256_permutex2var_epi16(__A
, __I
, __B
),
1339 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1340 _mm256_maskz_permutex2var_epi16 (__mmask16 __U
, __m256i __A
, __m256i __I
,
1343 return (__m256i
)__builtin_ia32_selectw_256(__U
,
1344 (__v16hi
)_mm256_permutex2var_epi16(__A
, __I
, __B
),
1345 (__v16hi
)_mm256_setzero_si256());
1348 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1349 _mm_mask_maddubs_epi16(__m128i __W
, __mmask8 __U
, __m128i __X
, __m128i __Y
) {
1350 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1351 (__v8hi
)_mm_maddubs_epi16(__X
, __Y
),
1355 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1356 _mm_maskz_maddubs_epi16(__mmask8 __U
, __m128i __X
, __m128i __Y
) {
1357 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1358 (__v8hi
)_mm_maddubs_epi16(__X
, __Y
),
1359 (__v8hi
)_mm_setzero_si128());
1362 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1363 _mm256_mask_maddubs_epi16(__m256i __W
, __mmask16 __U
, __m256i __X
,
1365 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1366 (__v16hi
)_mm256_maddubs_epi16(__X
, __Y
),
1370 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1371 _mm256_maskz_maddubs_epi16(__mmask16 __U
, __m256i __X
, __m256i __Y
) {
1372 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1373 (__v16hi
)_mm256_maddubs_epi16(__X
, __Y
),
1374 (__v16hi
)_mm256_setzero_si256());
1377 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1378 _mm_mask_madd_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
) {
1379 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
1380 (__v4si
)_mm_madd_epi16(__A
, __B
),
1384 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1385 _mm_maskz_madd_epi16(__mmask8 __U
, __m128i __A
, __m128i __B
) {
1386 return (__m128i
)__builtin_ia32_selectd_128((__mmask8
)__U
,
1387 (__v4si
)_mm_madd_epi16(__A
, __B
),
1388 (__v4si
)_mm_setzero_si128());
1391 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1392 _mm256_mask_madd_epi16(__m256i __W
, __mmask8 __U
, __m256i __A
, __m256i __B
) {
1393 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
1394 (__v8si
)_mm256_madd_epi16(__A
, __B
),
1398 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1399 _mm256_maskz_madd_epi16(__mmask8 __U
, __m256i __A
, __m256i __B
) {
1400 return (__m256i
)__builtin_ia32_selectd_256((__mmask8
)__U
,
1401 (__v8si
)_mm256_madd_epi16(__A
, __B
),
1402 (__v8si
)_mm256_setzero_si256());
1405 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1406 _mm_cvtsepi16_epi8 (__m128i __A
) {
1407 return (__m128i
) __builtin_ia32_pmovswb128_mask ((__v8hi
) __A
,
1408 (__v16qi
) _mm_setzero_si128(),
1412 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1413 _mm_mask_cvtsepi16_epi8 (__m128i __O
, __mmask8 __M
, __m128i __A
) {
1414 return (__m128i
) __builtin_ia32_pmovswb128_mask ((__v8hi
) __A
,
1419 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1420 _mm_maskz_cvtsepi16_epi8 (__mmask8 __M
, __m128i __A
) {
1421 return (__m128i
) __builtin_ia32_pmovswb128_mask ((__v8hi
) __A
,
1422 (__v16qi
) _mm_setzero_si128(),
1426 static __inline__ __m128i __DEFAULT_FN_ATTRS256
1427 _mm256_cvtsepi16_epi8 (__m256i __A
) {
1428 return (__m128i
) __builtin_ia32_pmovswb256_mask ((__v16hi
) __A
,
1429 (__v16qi
) _mm_setzero_si128(),
1433 static __inline__ __m128i __DEFAULT_FN_ATTRS256
1434 _mm256_mask_cvtsepi16_epi8 (__m128i __O
, __mmask16 __M
, __m256i __A
) {
1435 return (__m128i
) __builtin_ia32_pmovswb256_mask ((__v16hi
) __A
,
1440 static __inline__ __m128i __DEFAULT_FN_ATTRS256
1441 _mm256_maskz_cvtsepi16_epi8 (__mmask16 __M
, __m256i __A
) {
1442 return (__m128i
) __builtin_ia32_pmovswb256_mask ((__v16hi
) __A
,
1443 (__v16qi
) _mm_setzero_si128(),
1447 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1448 _mm_cvtusepi16_epi8 (__m128i __A
) {
1449 return (__m128i
) __builtin_ia32_pmovuswb128_mask ((__v8hi
) __A
,
1450 (__v16qi
) _mm_setzero_si128(),
1454 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1455 _mm_mask_cvtusepi16_epi8 (__m128i __O
, __mmask8 __M
, __m128i __A
) {
1456 return (__m128i
) __builtin_ia32_pmovuswb128_mask ((__v8hi
) __A
,
1461 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1462 _mm_maskz_cvtusepi16_epi8 (__mmask8 __M
, __m128i __A
) {
1463 return (__m128i
) __builtin_ia32_pmovuswb128_mask ((__v8hi
) __A
,
1464 (__v16qi
) _mm_setzero_si128(),
1468 static __inline__ __m128i __DEFAULT_FN_ATTRS256
1469 _mm256_cvtusepi16_epi8 (__m256i __A
) {
1470 return (__m128i
) __builtin_ia32_pmovuswb256_mask ((__v16hi
) __A
,
1471 (__v16qi
) _mm_setzero_si128(),
1475 static __inline__ __m128i __DEFAULT_FN_ATTRS256
1476 _mm256_mask_cvtusepi16_epi8 (__m128i __O
, __mmask16 __M
, __m256i __A
) {
1477 return (__m128i
) __builtin_ia32_pmovuswb256_mask ((__v16hi
) __A
,
1482 static __inline__ __m128i __DEFAULT_FN_ATTRS256
1483 _mm256_maskz_cvtusepi16_epi8 (__mmask16 __M
, __m256i __A
) {
1484 return (__m128i
) __builtin_ia32_pmovuswb256_mask ((__v16hi
) __A
,
1485 (__v16qi
) _mm_setzero_si128(),
1489 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1490 _mm_cvtepi16_epi8 (__m128i __A
) {
1491 return (__m128i
)__builtin_shufflevector(
1492 __builtin_convertvector((__v8hi
)__A
, __v8qi
),
1493 (__v8qi
){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
1497 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1498 _mm_mask_cvtepi16_epi8 (__m128i __O
, __mmask8 __M
, __m128i __A
) {
1499 return (__m128i
) __builtin_ia32_pmovwb128_mask ((__v8hi
) __A
,
1504 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1505 _mm_maskz_cvtepi16_epi8 (__mmask8 __M
, __m128i __A
) {
1506 return (__m128i
) __builtin_ia32_pmovwb128_mask ((__v8hi
) __A
,
1507 (__v16qi
) _mm_setzero_si128(),
1511 static __inline__
void __DEFAULT_FN_ATTRS128
1512 _mm_mask_cvtepi16_storeu_epi8 (void * __P
, __mmask8 __M
, __m128i __A
)
1514 __builtin_ia32_pmovwb128mem_mask ((__v16qi
*) __P
, (__v8hi
) __A
, __M
);
1518 static __inline__
void __DEFAULT_FN_ATTRS128
1519 _mm_mask_cvtsepi16_storeu_epi8 (void * __P
, __mmask8 __M
, __m128i __A
)
1521 __builtin_ia32_pmovswb128mem_mask ((__v16qi
*) __P
, (__v8hi
) __A
, __M
);
1524 static __inline__
void __DEFAULT_FN_ATTRS128
1525 _mm_mask_cvtusepi16_storeu_epi8 (void * __P
, __mmask8 __M
, __m128i __A
)
1527 __builtin_ia32_pmovuswb128mem_mask ((__v16qi
*) __P
, (__v8hi
) __A
, __M
);
1530 static __inline__ __m128i __DEFAULT_FN_ATTRS256
1531 _mm256_cvtepi16_epi8 (__m256i __A
) {
1532 return (__m128i
)__builtin_convertvector((__v16hi
) __A
, __v16qi
);
1535 static __inline__ __m128i __DEFAULT_FN_ATTRS256
1536 _mm256_mask_cvtepi16_epi8 (__m128i __O
, __mmask16 __M
, __m256i __A
) {
1537 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__M
,
1538 (__v16qi
)_mm256_cvtepi16_epi8(__A
),
1542 static __inline__ __m128i __DEFAULT_FN_ATTRS256
1543 _mm256_maskz_cvtepi16_epi8 (__mmask16 __M
, __m256i __A
) {
1544 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__M
,
1545 (__v16qi
)_mm256_cvtepi16_epi8(__A
),
1546 (__v16qi
)_mm_setzero_si128());
1549 static __inline__
void __DEFAULT_FN_ATTRS256
1550 _mm256_mask_cvtepi16_storeu_epi8 (void * __P
, __mmask16 __M
, __m256i __A
)
1552 __builtin_ia32_pmovwb256mem_mask ((__v16qi
*) __P
, (__v16hi
) __A
, __M
);
1555 static __inline__
void __DEFAULT_FN_ATTRS256
1556 _mm256_mask_cvtsepi16_storeu_epi8 (void * __P
, __mmask16 __M
, __m256i __A
)
1558 __builtin_ia32_pmovswb256mem_mask ((__v16qi
*) __P
, (__v16hi
) __A
, __M
);
1561 static __inline__
void __DEFAULT_FN_ATTRS256
1562 _mm256_mask_cvtusepi16_storeu_epi8 (void * __P
, __mmask16 __M
, __m256i __A
)
1564 __builtin_ia32_pmovuswb256mem_mask ((__v16qi
*) __P
, (__v16hi
) __A
, __M
);
1567 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1568 _mm_mask_mulhrs_epi16(__m128i __W
, __mmask8 __U
, __m128i __X
, __m128i __Y
) {
1569 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1570 (__v8hi
)_mm_mulhrs_epi16(__X
, __Y
),
1574 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1575 _mm_maskz_mulhrs_epi16(__mmask8 __U
, __m128i __X
, __m128i __Y
) {
1576 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1577 (__v8hi
)_mm_mulhrs_epi16(__X
, __Y
),
1578 (__v8hi
)_mm_setzero_si128());
1581 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1582 _mm256_mask_mulhrs_epi16(__m256i __W
, __mmask16 __U
, __m256i __X
, __m256i __Y
) {
1583 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1584 (__v16hi
)_mm256_mulhrs_epi16(__X
, __Y
),
1588 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1589 _mm256_maskz_mulhrs_epi16(__mmask16 __U
, __m256i __X
, __m256i __Y
) {
1590 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1591 (__v16hi
)_mm256_mulhrs_epi16(__X
, __Y
),
1592 (__v16hi
)_mm256_setzero_si256());
1595 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1596 _mm_mask_mulhi_epu16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
) {
1597 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1598 (__v8hi
)_mm_mulhi_epu16(__A
, __B
),
1602 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1603 _mm_maskz_mulhi_epu16(__mmask8 __U
, __m128i __A
, __m128i __B
) {
1604 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1605 (__v8hi
)_mm_mulhi_epu16(__A
, __B
),
1606 (__v8hi
)_mm_setzero_si128());
1609 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1610 _mm256_mask_mulhi_epu16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m256i __B
) {
1611 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1612 (__v16hi
)_mm256_mulhi_epu16(__A
, __B
),
1616 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1617 _mm256_maskz_mulhi_epu16(__mmask16 __U
, __m256i __A
, __m256i __B
) {
1618 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1619 (__v16hi
)_mm256_mulhi_epu16(__A
, __B
),
1620 (__v16hi
)_mm256_setzero_si256());
1623 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1624 _mm_mask_mulhi_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
) {
1625 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1626 (__v8hi
)_mm_mulhi_epi16(__A
, __B
),
1630 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1631 _mm_maskz_mulhi_epi16(__mmask8 __U
, __m128i __A
, __m128i __B
) {
1632 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1633 (__v8hi
)_mm_mulhi_epi16(__A
, __B
),
1634 (__v8hi
)_mm_setzero_si128());
1637 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1638 _mm256_mask_mulhi_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m256i __B
) {
1639 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1640 (__v16hi
)_mm256_mulhi_epi16(__A
, __B
),
1644 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1645 _mm256_maskz_mulhi_epi16(__mmask16 __U
, __m256i __A
, __m256i __B
) {
1646 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1647 (__v16hi
)_mm256_mulhi_epi16(__A
, __B
),
1648 (__v16hi
)_mm256_setzero_si256());
1651 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1652 _mm_mask_unpackhi_epi8(__m128i __W
, __mmask16 __U
, __m128i __A
, __m128i __B
) {
1653 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
1654 (__v16qi
)_mm_unpackhi_epi8(__A
, __B
),
1658 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1659 _mm_maskz_unpackhi_epi8(__mmask16 __U
, __m128i __A
, __m128i __B
) {
1660 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
1661 (__v16qi
)_mm_unpackhi_epi8(__A
, __B
),
1662 (__v16qi
)_mm_setzero_si128());
1665 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1666 _mm256_mask_unpackhi_epi8(__m256i __W
, __mmask32 __U
, __m256i __A
, __m256i __B
) {
1667 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
1668 (__v32qi
)_mm256_unpackhi_epi8(__A
, __B
),
1672 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1673 _mm256_maskz_unpackhi_epi8(__mmask32 __U
, __m256i __A
, __m256i __B
) {
1674 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
1675 (__v32qi
)_mm256_unpackhi_epi8(__A
, __B
),
1676 (__v32qi
)_mm256_setzero_si256());
1679 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1680 _mm_mask_unpackhi_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
) {
1681 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1682 (__v8hi
)_mm_unpackhi_epi16(__A
, __B
),
1686 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1687 _mm_maskz_unpackhi_epi16(__mmask8 __U
, __m128i __A
, __m128i __B
) {
1688 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1689 (__v8hi
)_mm_unpackhi_epi16(__A
, __B
),
1690 (__v8hi
) _mm_setzero_si128());
1693 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1694 _mm256_mask_unpackhi_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m256i __B
) {
1695 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1696 (__v16hi
)_mm256_unpackhi_epi16(__A
, __B
),
1700 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1701 _mm256_maskz_unpackhi_epi16(__mmask16 __U
, __m256i __A
, __m256i __B
) {
1702 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1703 (__v16hi
)_mm256_unpackhi_epi16(__A
, __B
),
1704 (__v16hi
)_mm256_setzero_si256());
1707 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1708 _mm_mask_unpacklo_epi8(__m128i __W
, __mmask16 __U
, __m128i __A
, __m128i __B
) {
1709 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
1710 (__v16qi
)_mm_unpacklo_epi8(__A
, __B
),
1714 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1715 _mm_maskz_unpacklo_epi8(__mmask16 __U
, __m128i __A
, __m128i __B
) {
1716 return (__m128i
)__builtin_ia32_selectb_128((__mmask16
)__U
,
1717 (__v16qi
)_mm_unpacklo_epi8(__A
, __B
),
1718 (__v16qi
)_mm_setzero_si128());
1721 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1722 _mm256_mask_unpacklo_epi8(__m256i __W
, __mmask32 __U
, __m256i __A
, __m256i __B
) {
1723 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
1724 (__v32qi
)_mm256_unpacklo_epi8(__A
, __B
),
1728 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1729 _mm256_maskz_unpacklo_epi8(__mmask32 __U
, __m256i __A
, __m256i __B
) {
1730 return (__m256i
)__builtin_ia32_selectb_256((__mmask32
)__U
,
1731 (__v32qi
)_mm256_unpacklo_epi8(__A
, __B
),
1732 (__v32qi
)_mm256_setzero_si256());
1735 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1736 _mm_mask_unpacklo_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
) {
1737 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1738 (__v8hi
)_mm_unpacklo_epi16(__A
, __B
),
1742 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1743 _mm_maskz_unpacklo_epi16(__mmask8 __U
, __m128i __A
, __m128i __B
) {
1744 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1745 (__v8hi
)_mm_unpacklo_epi16(__A
, __B
),
1746 (__v8hi
) _mm_setzero_si128());
1749 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1750 _mm256_mask_unpacklo_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m256i __B
) {
1751 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1752 (__v16hi
)_mm256_unpacklo_epi16(__A
, __B
),
1756 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1757 _mm256_maskz_unpacklo_epi16(__mmask16 __U
, __m256i __A
, __m256i __B
) {
1758 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1759 (__v16hi
)_mm256_unpacklo_epi16(__A
, __B
),
1760 (__v16hi
)_mm256_setzero_si256());
1763 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1764 _mm_mask_cvtepi8_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
)
1766 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1767 (__v8hi
)_mm_cvtepi8_epi16(__A
),
1771 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1772 _mm_maskz_cvtepi8_epi16(__mmask8 __U
, __m128i __A
)
1774 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1775 (__v8hi
)_mm_cvtepi8_epi16(__A
),
1776 (__v8hi
)_mm_setzero_si128());
1779 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1780 _mm256_mask_cvtepi8_epi16(__m256i __W
, __mmask16 __U
, __m128i __A
)
1782 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1783 (__v16hi
)_mm256_cvtepi8_epi16(__A
),
1787 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1788 _mm256_maskz_cvtepi8_epi16(__mmask16 __U
, __m128i __A
)
1790 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1791 (__v16hi
)_mm256_cvtepi8_epi16(__A
),
1792 (__v16hi
)_mm256_setzero_si256());
1796 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1797 _mm_mask_cvtepu8_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
)
1799 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1800 (__v8hi
)_mm_cvtepu8_epi16(__A
),
1804 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1805 _mm_maskz_cvtepu8_epi16(__mmask8 __U
, __m128i __A
)
1807 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1808 (__v8hi
)_mm_cvtepu8_epi16(__A
),
1809 (__v8hi
)_mm_setzero_si128());
1812 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1813 _mm256_mask_cvtepu8_epi16(__m256i __W
, __mmask16 __U
, __m128i __A
)
1815 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1816 (__v16hi
)_mm256_cvtepu8_epi16(__A
),
1820 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1821 _mm256_maskz_cvtepu8_epi16 (__mmask16 __U
, __m128i __A
)
1823 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1824 (__v16hi
)_mm256_cvtepu8_epi16(__A
),
1825 (__v16hi
)_mm256_setzero_si256());
1829 #define _mm_mask_shufflehi_epi16(W, U, A, imm) \
1830 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1831 (__v8hi)_mm_shufflehi_epi16((A), (imm)), \
1832 (__v8hi)(__m128i)(W)))
1834 #define _mm_maskz_shufflehi_epi16(U, A, imm) \
1835 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1836 (__v8hi)_mm_shufflehi_epi16((A), (imm)), \
1837 (__v8hi)_mm_setzero_si128()))
1839 #define _mm256_mask_shufflehi_epi16(W, U, A, imm) \
1840 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1841 (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \
1842 (__v16hi)(__m256i)(W)))
1844 #define _mm256_maskz_shufflehi_epi16(U, A, imm) \
1845 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1846 (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \
1847 (__v16hi)_mm256_setzero_si256()))
1849 #define _mm_mask_shufflelo_epi16(W, U, A, imm) \
1850 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1851 (__v8hi)_mm_shufflelo_epi16((A), (imm)), \
1852 (__v8hi)(__m128i)(W)))
1854 #define _mm_maskz_shufflelo_epi16(U, A, imm) \
1855 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
1856 (__v8hi)_mm_shufflelo_epi16((A), (imm)), \
1857 (__v8hi)_mm_setzero_si128()))
1859 #define _mm256_mask_shufflelo_epi16(W, U, A, imm) \
1860 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1861 (__v16hi)_mm256_shufflelo_epi16((A), \
1863 (__v16hi)(__m256i)(W)))
1865 #define _mm256_maskz_shufflelo_epi16(U, A, imm) \
1866 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
1867 (__v16hi)_mm256_shufflelo_epi16((A), \
1869 (__v16hi)_mm256_setzero_si256()))
1871 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1872 _mm256_sllv_epi16(__m256i __A
, __m256i __B
)
1874 return (__m256i
)__builtin_ia32_psllv16hi((__v16hi
)__A
, (__v16hi
)__B
);
1877 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1878 _mm256_mask_sllv_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m256i __B
)
1880 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1881 (__v16hi
)_mm256_sllv_epi16(__A
, __B
),
1885 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1886 _mm256_maskz_sllv_epi16(__mmask16 __U
, __m256i __A
, __m256i __B
)
1888 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1889 (__v16hi
)_mm256_sllv_epi16(__A
, __B
),
1890 (__v16hi
)_mm256_setzero_si256());
1893 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1894 _mm_sllv_epi16(__m128i __A
, __m128i __B
)
1896 return (__m128i
)__builtin_ia32_psllv8hi((__v8hi
)__A
, (__v8hi
)__B
);
1899 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1900 _mm_mask_sllv_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
1902 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1903 (__v8hi
)_mm_sllv_epi16(__A
, __B
),
1907 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1908 _mm_maskz_sllv_epi16(__mmask8 __U
, __m128i __A
, __m128i __B
)
1910 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1911 (__v8hi
)_mm_sllv_epi16(__A
, __B
),
1912 (__v8hi
)_mm_setzero_si128());
1915 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1916 _mm_mask_sll_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
1918 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1919 (__v8hi
)_mm_sll_epi16(__A
, __B
),
1923 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1924 _mm_maskz_sll_epi16 (__mmask8 __U
, __m128i __A
, __m128i __B
)
1926 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1927 (__v8hi
)_mm_sll_epi16(__A
, __B
),
1928 (__v8hi
)_mm_setzero_si128());
1931 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1932 _mm256_mask_sll_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m128i __B
)
1934 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1935 (__v16hi
)_mm256_sll_epi16(__A
, __B
),
1939 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1940 _mm256_maskz_sll_epi16(__mmask16 __U
, __m256i __A
, __m128i __B
)
1942 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1943 (__v16hi
)_mm256_sll_epi16(__A
, __B
),
1944 (__v16hi
)_mm256_setzero_si256());
1947 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1948 _mm_mask_slli_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, unsigned int __B
)
1950 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1951 (__v8hi
)_mm_slli_epi16(__A
, (int)__B
),
1955 static __inline__ __m128i __DEFAULT_FN_ATTRS128
1956 _mm_maskz_slli_epi16 (__mmask8 __U
, __m128i __A
, unsigned int __B
)
1958 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
1959 (__v8hi
)_mm_slli_epi16(__A
, (int)__B
),
1960 (__v8hi
)_mm_setzero_si128());
1963 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1964 _mm256_mask_slli_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
,
1967 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1968 (__v16hi
)_mm256_slli_epi16(__A
, (int)__B
),
1972 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1973 _mm256_maskz_slli_epi16(__mmask16 __U
, __m256i __A
, unsigned int __B
)
1975 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1976 (__v16hi
)_mm256_slli_epi16(__A
, (int)__B
),
1977 (__v16hi
)_mm256_setzero_si256());
1980 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1981 _mm256_srlv_epi16(__m256i __A
, __m256i __B
)
1983 return (__m256i
)__builtin_ia32_psrlv16hi((__v16hi
)__A
, (__v16hi
)__B
);
1986 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1987 _mm256_mask_srlv_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m256i __B
)
1989 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1990 (__v16hi
)_mm256_srlv_epi16(__A
, __B
),
1994 static __inline__ __m256i __DEFAULT_FN_ATTRS256
1995 _mm256_maskz_srlv_epi16(__mmask16 __U
, __m256i __A
, __m256i __B
)
1997 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
1998 (__v16hi
)_mm256_srlv_epi16(__A
, __B
),
1999 (__v16hi
)_mm256_setzero_si256());
2002 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2003 _mm_srlv_epi16(__m128i __A
, __m128i __B
)
2005 return (__m128i
)__builtin_ia32_psrlv8hi((__v8hi
)__A
, (__v8hi
)__B
);
2008 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2009 _mm_mask_srlv_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
2011 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
2012 (__v8hi
)_mm_srlv_epi16(__A
, __B
),
2016 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2017 _mm_maskz_srlv_epi16(__mmask8 __U
, __m128i __A
, __m128i __B
)
2019 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
2020 (__v8hi
)_mm_srlv_epi16(__A
, __B
),
2021 (__v8hi
)_mm_setzero_si128());
2024 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2025 _mm256_srav_epi16(__m256i __A
, __m256i __B
)
2027 return (__m256i
)__builtin_ia32_psrav16hi((__v16hi
)__A
, (__v16hi
)__B
);
2030 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2031 _mm256_mask_srav_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m256i __B
)
2033 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
2034 (__v16hi
)_mm256_srav_epi16(__A
, __B
),
2038 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2039 _mm256_maskz_srav_epi16(__mmask16 __U
, __m256i __A
, __m256i __B
)
2041 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
2042 (__v16hi
)_mm256_srav_epi16(__A
, __B
),
2043 (__v16hi
)_mm256_setzero_si256());
2046 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2047 _mm_srav_epi16(__m128i __A
, __m128i __B
)
2049 return (__m128i
)__builtin_ia32_psrav8hi((__v8hi
)__A
, (__v8hi
)__B
);
2052 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2053 _mm_mask_srav_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
2055 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
2056 (__v8hi
)_mm_srav_epi16(__A
, __B
),
2060 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2061 _mm_maskz_srav_epi16(__mmask8 __U
, __m128i __A
, __m128i __B
)
2063 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
2064 (__v8hi
)_mm_srav_epi16(__A
, __B
),
2065 (__v8hi
)_mm_setzero_si128());
2068 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2069 _mm_mask_sra_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
2071 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
2072 (__v8hi
)_mm_sra_epi16(__A
, __B
),
2076 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2077 _mm_maskz_sra_epi16(__mmask8 __U
, __m128i __A
, __m128i __B
)
2079 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
2080 (__v8hi
)_mm_sra_epi16(__A
, __B
),
2081 (__v8hi
)_mm_setzero_si128());
2084 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2085 _mm256_mask_sra_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m128i __B
)
2087 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
2088 (__v16hi
)_mm256_sra_epi16(__A
, __B
),
2092 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2093 _mm256_maskz_sra_epi16(__mmask16 __U
, __m256i __A
, __m128i __B
)
2095 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
2096 (__v16hi
)_mm256_sra_epi16(__A
, __B
),
2097 (__v16hi
)_mm256_setzero_si256());
2100 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2101 _mm_mask_srai_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, unsigned int __B
)
2103 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
2104 (__v8hi
)_mm_srai_epi16(__A
, (int)__B
),
2108 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2109 _mm_maskz_srai_epi16(__mmask8 __U
, __m128i __A
, unsigned int __B
)
2111 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
2112 (__v8hi
)_mm_srai_epi16(__A
, (int)__B
),
2113 (__v8hi
)_mm_setzero_si128());
2116 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2117 _mm256_mask_srai_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
,
2120 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
2121 (__v16hi
)_mm256_srai_epi16(__A
, (int)__B
),
2125 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2126 _mm256_maskz_srai_epi16(__mmask16 __U
, __m256i __A
, unsigned int __B
)
2128 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
2129 (__v16hi
)_mm256_srai_epi16(__A
, (int)__B
),
2130 (__v16hi
)_mm256_setzero_si256());
2133 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2134 _mm_mask_srl_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, __m128i __B
)
2136 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
2137 (__v8hi
)_mm_srl_epi16(__A
, __B
),
2141 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2142 _mm_maskz_srl_epi16 (__mmask8 __U
, __m128i __A
, __m128i __B
)
2144 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
2145 (__v8hi
)_mm_srl_epi16(__A
, __B
),
2146 (__v8hi
)_mm_setzero_si128());
2149 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2150 _mm256_mask_srl_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
, __m128i __B
)
2152 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
2153 (__v16hi
)_mm256_srl_epi16(__A
, __B
),
2157 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2158 _mm256_maskz_srl_epi16(__mmask16 __U
, __m256i __A
, __m128i __B
)
2160 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
2161 (__v16hi
)_mm256_srl_epi16(__A
, __B
),
2162 (__v16hi
)_mm256_setzero_si256());
2165 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2166 _mm_mask_srli_epi16(__m128i __W
, __mmask8 __U
, __m128i __A
, int __B
)
2168 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
2169 (__v8hi
)_mm_srli_epi16(__A
, __B
),
2173 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2174 _mm_maskz_srli_epi16 (__mmask8 __U
, __m128i __A
, int __B
)
2176 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__U
,
2177 (__v8hi
)_mm_srli_epi16(__A
, __B
),
2178 (__v8hi
)_mm_setzero_si128());
2181 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2182 _mm256_mask_srli_epi16(__m256i __W
, __mmask16 __U
, __m256i __A
, int __B
)
2184 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
2185 (__v16hi
)_mm256_srli_epi16(__A
, __B
),
2189 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2190 _mm256_maskz_srli_epi16(__mmask16 __U
, __m256i __A
, int __B
)
2192 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__U
,
2193 (__v16hi
)_mm256_srli_epi16(__A
, __B
),
2194 (__v16hi
)_mm256_setzero_si256());
2197 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2198 _mm_mask_mov_epi16 (__m128i __W
, __mmask8 __U
, __m128i __A
)
2200 return (__m128i
) __builtin_ia32_selectw_128 ((__mmask8
) __U
,
2205 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2206 _mm_maskz_mov_epi16 (__mmask8 __U
, __m128i __A
)
2208 return (__m128i
) __builtin_ia32_selectw_128 ((__mmask8
) __U
,
2210 (__v8hi
) _mm_setzero_si128 ());
2213 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2214 _mm256_mask_mov_epi16 (__m256i __W
, __mmask16 __U
, __m256i __A
)
2216 return (__m256i
) __builtin_ia32_selectw_256 ((__mmask16
) __U
,
2221 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2222 _mm256_maskz_mov_epi16 (__mmask16 __U
, __m256i __A
)
2224 return (__m256i
) __builtin_ia32_selectw_256 ((__mmask16
) __U
,
2226 (__v16hi
) _mm256_setzero_si256 ());
2229 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2230 _mm_mask_mov_epi8 (__m128i __W
, __mmask16 __U
, __m128i __A
)
2232 return (__m128i
) __builtin_ia32_selectb_128 ((__mmask16
) __U
,
2237 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2238 _mm_maskz_mov_epi8 (__mmask16 __U
, __m128i __A
)
2240 return (__m128i
) __builtin_ia32_selectb_128 ((__mmask16
) __U
,
2242 (__v16qi
) _mm_setzero_si128 ());
2245 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2246 _mm256_mask_mov_epi8 (__m256i __W
, __mmask32 __U
, __m256i __A
)
2248 return (__m256i
) __builtin_ia32_selectb_256 ((__mmask32
) __U
,
2253 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2254 _mm256_maskz_mov_epi8 (__mmask32 __U
, __m256i __A
)
2256 return (__m256i
) __builtin_ia32_selectb_256 ((__mmask32
) __U
,
2258 (__v32qi
) _mm256_setzero_si256 ());
2262 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2263 _mm_mask_set1_epi8 (__m128i __O
, __mmask16 __M
, char __A
)
2265 return (__m128i
) __builtin_ia32_selectb_128(__M
,
2266 (__v16qi
) _mm_set1_epi8(__A
),
2270 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2271 _mm_maskz_set1_epi8 (__mmask16 __M
, char __A
)
2273 return (__m128i
) __builtin_ia32_selectb_128(__M
,
2274 (__v16qi
) _mm_set1_epi8(__A
),
2275 (__v16qi
) _mm_setzero_si128());
2278 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2279 _mm256_mask_set1_epi8 (__m256i __O
, __mmask32 __M
, char __A
)
2281 return (__m256i
) __builtin_ia32_selectb_256(__M
,
2282 (__v32qi
) _mm256_set1_epi8(__A
),
2286 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2287 _mm256_maskz_set1_epi8 (__mmask32 __M
, char __A
)
2289 return (__m256i
) __builtin_ia32_selectb_256(__M
,
2290 (__v32qi
) _mm256_set1_epi8(__A
),
2291 (__v32qi
) _mm256_setzero_si256());
2294 static __inline __m128i __DEFAULT_FN_ATTRS128
2295 _mm_loadu_epi16 (void const *__P
)
2297 struct __loadu_epi16
{
2299 } __attribute__((__packed__
, __may_alias__
));
2300 return ((const struct __loadu_epi16
*)__P
)->__v
;
2303 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2304 _mm_mask_loadu_epi16 (__m128i __W
, __mmask8 __U
, void const *__P
)
2306 return (__m128i
) __builtin_ia32_loaddquhi128_mask ((const __v8hi
*) __P
,
2311 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2312 _mm_maskz_loadu_epi16 (__mmask8 __U
, void const *__P
)
2314 return (__m128i
) __builtin_ia32_loaddquhi128_mask ((const __v8hi
*) __P
,
2316 _mm_setzero_si128 (),
2320 static __inline __m256i __DEFAULT_FN_ATTRS256
2321 _mm256_loadu_epi16 (void const *__P
)
2323 struct __loadu_epi16
{
2325 } __attribute__((__packed__
, __may_alias__
));
2326 return ((const struct __loadu_epi16
*)__P
)->__v
;
2329 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2330 _mm256_mask_loadu_epi16 (__m256i __W
, __mmask16 __U
, void const *__P
)
2332 return (__m256i
) __builtin_ia32_loaddquhi256_mask ((const __v16hi
*) __P
,
2337 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2338 _mm256_maskz_loadu_epi16 (__mmask16 __U
, void const *__P
)
2340 return (__m256i
) __builtin_ia32_loaddquhi256_mask ((const __v16hi
*) __P
,
2342 _mm256_setzero_si256 (),
2346 static __inline __m128i __DEFAULT_FN_ATTRS128
2347 _mm_loadu_epi8 (void const *__P
)
2349 struct __loadu_epi8
{
2351 } __attribute__((__packed__
, __may_alias__
));
2352 return ((const struct __loadu_epi8
*)__P
)->__v
;
2355 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2356 _mm_mask_loadu_epi8 (__m128i __W
, __mmask16 __U
, void const *__P
)
2358 return (__m128i
) __builtin_ia32_loaddquqi128_mask ((const __v16qi
*) __P
,
2363 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2364 _mm_maskz_loadu_epi8 (__mmask16 __U
, void const *__P
)
2366 return (__m128i
) __builtin_ia32_loaddquqi128_mask ((const __v16qi
*) __P
,
2368 _mm_setzero_si128 (),
2372 static __inline __m256i __DEFAULT_FN_ATTRS256
2373 _mm256_loadu_epi8 (void const *__P
)
2375 struct __loadu_epi8
{
2377 } __attribute__((__packed__
, __may_alias__
));
2378 return ((const struct __loadu_epi8
*)__P
)->__v
;
2381 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2382 _mm256_mask_loadu_epi8 (__m256i __W
, __mmask32 __U
, void const *__P
)
2384 return (__m256i
) __builtin_ia32_loaddquqi256_mask ((const __v32qi
*) __P
,
2389 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2390 _mm256_maskz_loadu_epi8 (__mmask32 __U
, void const *__P
)
2392 return (__m256i
) __builtin_ia32_loaddquqi256_mask ((const __v32qi
*) __P
,
2394 _mm256_setzero_si256 (),
2398 static __inline
void __DEFAULT_FN_ATTRS128
2399 _mm_storeu_epi16 (void *__P
, __m128i __A
)
2401 struct __storeu_epi16
{
2403 } __attribute__((__packed__
, __may_alias__
));
2404 ((struct __storeu_epi16
*)__P
)->__v
= __A
;
2407 static __inline__
void __DEFAULT_FN_ATTRS128
2408 _mm_mask_storeu_epi16 (void *__P
, __mmask8 __U
, __m128i __A
)
2410 __builtin_ia32_storedquhi128_mask ((__v8hi
*) __P
,
2415 static __inline
void __DEFAULT_FN_ATTRS256
2416 _mm256_storeu_epi16 (void *__P
, __m256i __A
)
2418 struct __storeu_epi16
{
2420 } __attribute__((__packed__
, __may_alias__
));
2421 ((struct __storeu_epi16
*)__P
)->__v
= __A
;
2424 static __inline__
void __DEFAULT_FN_ATTRS256
2425 _mm256_mask_storeu_epi16 (void *__P
, __mmask16 __U
, __m256i __A
)
2427 __builtin_ia32_storedquhi256_mask ((__v16hi
*) __P
,
2432 static __inline
void __DEFAULT_FN_ATTRS128
2433 _mm_storeu_epi8 (void *__P
, __m128i __A
)
2435 struct __storeu_epi8
{
2437 } __attribute__((__packed__
, __may_alias__
));
2438 ((struct __storeu_epi8
*)__P
)->__v
= __A
;
2441 static __inline__
void __DEFAULT_FN_ATTRS128
2442 _mm_mask_storeu_epi8 (void *__P
, __mmask16 __U
, __m128i __A
)
2444 __builtin_ia32_storedquqi128_mask ((__v16qi
*) __P
,
2449 static __inline
void __DEFAULT_FN_ATTRS256
2450 _mm256_storeu_epi8 (void *__P
, __m256i __A
)
2452 struct __storeu_epi8
{
2454 } __attribute__((__packed__
, __may_alias__
));
2455 ((struct __storeu_epi8
*)__P
)->__v
= __A
;
2458 static __inline__
void __DEFAULT_FN_ATTRS256
2459 _mm256_mask_storeu_epi8 (void *__P
, __mmask32 __U
, __m256i __A
)
2461 __builtin_ia32_storedquqi256_mask ((__v32qi
*) __P
,
2466 static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2467 _mm_test_epi8_mask (__m128i __A
, __m128i __B
)
2469 return _mm_cmpneq_epi8_mask (_mm_and_si128(__A
, __B
), _mm_setzero_si128());
2472 static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2473 _mm_mask_test_epi8_mask (__mmask16 __U
, __m128i __A
, __m128i __B
)
2475 return _mm_mask_cmpneq_epi8_mask (__U
, _mm_and_si128 (__A
, __B
),
2476 _mm_setzero_si128());
2479 static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2480 _mm256_test_epi8_mask (__m256i __A
, __m256i __B
)
2482 return _mm256_cmpneq_epi8_mask (_mm256_and_si256(__A
, __B
),
2483 _mm256_setzero_si256());
2486 static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2487 _mm256_mask_test_epi8_mask (__mmask32 __U
, __m256i __A
, __m256i __B
)
2489 return _mm256_mask_cmpneq_epi8_mask (__U
, _mm256_and_si256(__A
, __B
),
2490 _mm256_setzero_si256());
2493 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2494 _mm_test_epi16_mask (__m128i __A
, __m128i __B
)
2496 return _mm_cmpneq_epi16_mask (_mm_and_si128 (__A
, __B
), _mm_setzero_si128());
2499 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2500 _mm_mask_test_epi16_mask (__mmask8 __U
, __m128i __A
, __m128i __B
)
2502 return _mm_mask_cmpneq_epi16_mask (__U
, _mm_and_si128 (__A
, __B
),
2503 _mm_setzero_si128());
2506 static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2507 _mm256_test_epi16_mask (__m256i __A
, __m256i __B
)
2509 return _mm256_cmpneq_epi16_mask (_mm256_and_si256 (__A
, __B
),
2510 _mm256_setzero_si256 ());
2513 static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2514 _mm256_mask_test_epi16_mask (__mmask16 __U
, __m256i __A
, __m256i __B
)
2516 return _mm256_mask_cmpneq_epi16_mask (__U
, _mm256_and_si256(__A
, __B
),
2517 _mm256_setzero_si256());
2520 static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2521 _mm_testn_epi8_mask (__m128i __A
, __m128i __B
)
2523 return _mm_cmpeq_epi8_mask (_mm_and_si128 (__A
, __B
), _mm_setzero_si128());
2526 static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2527 _mm_mask_testn_epi8_mask (__mmask16 __U
, __m128i __A
, __m128i __B
)
2529 return _mm_mask_cmpeq_epi8_mask (__U
, _mm_and_si128 (__A
, __B
),
2530 _mm_setzero_si128());
2533 static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2534 _mm256_testn_epi8_mask (__m256i __A
, __m256i __B
)
2536 return _mm256_cmpeq_epi8_mask (_mm256_and_si256 (__A
, __B
),
2537 _mm256_setzero_si256());
2540 static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2541 _mm256_mask_testn_epi8_mask (__mmask32 __U
, __m256i __A
, __m256i __B
)
2543 return _mm256_mask_cmpeq_epi8_mask (__U
, _mm256_and_si256 (__A
, __B
),
2544 _mm256_setzero_si256());
2547 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2548 _mm_testn_epi16_mask (__m128i __A
, __m128i __B
)
2550 return _mm_cmpeq_epi16_mask (_mm_and_si128 (__A
, __B
), _mm_setzero_si128());
2553 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2554 _mm_mask_testn_epi16_mask (__mmask8 __U
, __m128i __A
, __m128i __B
)
2556 return _mm_mask_cmpeq_epi16_mask (__U
, _mm_and_si128(__A
, __B
), _mm_setzero_si128());
2559 static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2560 _mm256_testn_epi16_mask (__m256i __A
, __m256i __B
)
2562 return _mm256_cmpeq_epi16_mask (_mm256_and_si256(__A
, __B
),
2563 _mm256_setzero_si256());
2566 static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2567 _mm256_mask_testn_epi16_mask (__mmask16 __U
, __m256i __A
, __m256i __B
)
2569 return _mm256_mask_cmpeq_epi16_mask (__U
, _mm256_and_si256 (__A
, __B
),
2570 _mm256_setzero_si256());
2573 static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
2574 _mm_movepi8_mask (__m128i __A
)
2576 return (__mmask16
) __builtin_ia32_cvtb2mask128 ((__v16qi
) __A
);
2579 static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
2580 _mm256_movepi8_mask (__m256i __A
)
2582 return (__mmask32
) __builtin_ia32_cvtb2mask256 ((__v32qi
) __A
);
2585 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
2586 _mm_movepi16_mask (__m128i __A
)
2588 return (__mmask8
) __builtin_ia32_cvtw2mask128 ((__v8hi
) __A
);
2591 static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
2592 _mm256_movepi16_mask (__m256i __A
)
2594 return (__mmask16
) __builtin_ia32_cvtw2mask256 ((__v16hi
) __A
);
2597 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2598 _mm_movm_epi8 (__mmask16 __A
)
2600 return (__m128i
) __builtin_ia32_cvtmask2b128 (__A
);
2603 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2604 _mm256_movm_epi8 (__mmask32 __A
)
2606 return (__m256i
) __builtin_ia32_cvtmask2b256 (__A
);
2609 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2610 _mm_movm_epi16 (__mmask8 __A
)
2612 return (__m128i
) __builtin_ia32_cvtmask2w128 (__A
);
2615 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2616 _mm256_movm_epi16 (__mmask16 __A
)
2618 return (__m256i
) __builtin_ia32_cvtmask2w256 (__A
);
2621 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2622 _mm_mask_broadcastb_epi8 (__m128i __O
, __mmask16 __M
, __m128i __A
)
2624 return (__m128i
)__builtin_ia32_selectb_128(__M
,
2625 (__v16qi
) _mm_broadcastb_epi8(__A
),
2629 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2630 _mm_maskz_broadcastb_epi8 (__mmask16 __M
, __m128i __A
)
2632 return (__m128i
)__builtin_ia32_selectb_128(__M
,
2633 (__v16qi
) _mm_broadcastb_epi8(__A
),
2634 (__v16qi
) _mm_setzero_si128());
2637 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2638 _mm256_mask_broadcastb_epi8 (__m256i __O
, __mmask32 __M
, __m128i __A
)
2640 return (__m256i
)__builtin_ia32_selectb_256(__M
,
2641 (__v32qi
) _mm256_broadcastb_epi8(__A
),
2645 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2646 _mm256_maskz_broadcastb_epi8 (__mmask32 __M
, __m128i __A
)
2648 return (__m256i
)__builtin_ia32_selectb_256(__M
,
2649 (__v32qi
) _mm256_broadcastb_epi8(__A
),
2650 (__v32qi
) _mm256_setzero_si256());
2653 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2654 _mm_mask_broadcastw_epi16 (__m128i __O
, __mmask8 __M
, __m128i __A
)
2656 return (__m128i
)__builtin_ia32_selectw_128(__M
,
2657 (__v8hi
) _mm_broadcastw_epi16(__A
),
2661 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2662 _mm_maskz_broadcastw_epi16 (__mmask8 __M
, __m128i __A
)
2664 return (__m128i
)__builtin_ia32_selectw_128(__M
,
2665 (__v8hi
) _mm_broadcastw_epi16(__A
),
2666 (__v8hi
) _mm_setzero_si128());
2669 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2670 _mm256_mask_broadcastw_epi16 (__m256i __O
, __mmask16 __M
, __m128i __A
)
2672 return (__m256i
)__builtin_ia32_selectw_256(__M
,
2673 (__v16hi
) _mm256_broadcastw_epi16(__A
),
2677 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2678 _mm256_maskz_broadcastw_epi16 (__mmask16 __M
, __m128i __A
)
2680 return (__m256i
)__builtin_ia32_selectw_256(__M
,
2681 (__v16hi
) _mm256_broadcastw_epi16(__A
),
2682 (__v16hi
) _mm256_setzero_si256());
2685 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2686 _mm256_mask_set1_epi16 (__m256i __O
, __mmask16 __M
, short __A
)
2688 return (__m256i
) __builtin_ia32_selectw_256 (__M
,
2689 (__v16hi
) _mm256_set1_epi16(__A
),
2693 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2694 _mm256_maskz_set1_epi16 (__mmask16 __M
, short __A
)
2696 return (__m256i
) __builtin_ia32_selectw_256(__M
,
2697 (__v16hi
)_mm256_set1_epi16(__A
),
2698 (__v16hi
) _mm256_setzero_si256());
2701 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2702 _mm_mask_set1_epi16 (__m128i __O
, __mmask8 __M
, short __A
)
2704 return (__m128i
) __builtin_ia32_selectw_128(__M
,
2705 (__v8hi
) _mm_set1_epi16(__A
),
2709 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2710 _mm_maskz_set1_epi16 (__mmask8 __M
, short __A
)
2712 return (__m128i
) __builtin_ia32_selectw_128(__M
,
2713 (__v8hi
) _mm_set1_epi16(__A
),
2714 (__v8hi
) _mm_setzero_si128());
2717 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2718 _mm_permutexvar_epi16 (__m128i __A
, __m128i __B
)
2720 return (__m128i
)__builtin_ia32_permvarhi128((__v8hi
) __B
, (__v8hi
) __A
);
2723 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2724 _mm_maskz_permutexvar_epi16 (__mmask8 __M
, __m128i __A
, __m128i __B
)
2726 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__M
,
2727 (__v8hi
)_mm_permutexvar_epi16(__A
, __B
),
2728 (__v8hi
) _mm_setzero_si128());
2731 static __inline__ __m128i __DEFAULT_FN_ATTRS128
2732 _mm_mask_permutexvar_epi16 (__m128i __W
, __mmask8 __M
, __m128i __A
,
2735 return (__m128i
)__builtin_ia32_selectw_128((__mmask8
)__M
,
2736 (__v8hi
)_mm_permutexvar_epi16(__A
, __B
),
2740 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2741 _mm256_permutexvar_epi16 (__m256i __A
, __m256i __B
)
2743 return (__m256i
)__builtin_ia32_permvarhi256((__v16hi
) __B
, (__v16hi
) __A
);
2746 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2747 _mm256_maskz_permutexvar_epi16 (__mmask16 __M
, __m256i __A
,
2750 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__M
,
2751 (__v16hi
)_mm256_permutexvar_epi16(__A
, __B
),
2752 (__v16hi
)_mm256_setzero_si256());
2755 static __inline__ __m256i __DEFAULT_FN_ATTRS256
2756 _mm256_mask_permutexvar_epi16 (__m256i __W
, __mmask16 __M
, __m256i __A
,
2759 return (__m256i
)__builtin_ia32_selectw_256((__mmask16
)__M
,
2760 (__v16hi
)_mm256_permutexvar_epi16(__A
, __B
),
2764 #define _mm_mask_alignr_epi8(W, U, A, B, N) \
2765 ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
2766 (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \
2767 (__v16qi)(__m128i)(W)))
2769 #define _mm_maskz_alignr_epi8(U, A, B, N) \
2770 ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \
2771 (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \
2772 (__v16qi)_mm_setzero_si128()))
2774 #define _mm256_mask_alignr_epi8(W, U, A, B, N) \
2775 ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
2776 (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \
2777 (__v32qi)(__m256i)(W)))
2779 #define _mm256_maskz_alignr_epi8(U, A, B, N) \
2780 ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \
2781 (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \
2782 (__v32qi)_mm256_setzero_si256()))
2784 #define _mm_dbsad_epu8(A, B, imm) \
2785 ((__m128i)__builtin_ia32_dbpsadbw128((__v16qi)(__m128i)(A), \
2786 (__v16qi)(__m128i)(B), (int)(imm)))
2788 #define _mm_mask_dbsad_epu8(W, U, A, B, imm) \
2789 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
2790 (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \
2791 (__v8hi)(__m128i)(W)))
2793 #define _mm_maskz_dbsad_epu8(U, A, B, imm) \
2794 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \
2795 (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \
2796 (__v8hi)_mm_setzero_si128()))
2798 #define _mm256_dbsad_epu8(A, B, imm) \
2799 ((__m256i)__builtin_ia32_dbpsadbw256((__v32qi)(__m256i)(A), \
2800 (__v32qi)(__m256i)(B), (int)(imm)))
2802 #define _mm256_mask_dbsad_epu8(W, U, A, B, imm) \
2803 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
2804 (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \
2805 (__v16hi)(__m256i)(W)))
2807 #define _mm256_maskz_dbsad_epu8(U, A, B, imm) \
2808 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \
2809 (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \
2810 (__v16hi)_mm256_setzero_si256()))
2812 static __inline__
short __DEFAULT_FN_ATTRS128
2813 _mm_reduce_add_epi16(__m128i __W
) {
2814 return __builtin_reduce_add((__v8hi
)__W
);
2817 static __inline__
short __DEFAULT_FN_ATTRS128
2818 _mm_reduce_mul_epi16(__m128i __W
) {
2819 return __builtin_reduce_mul((__v8hi
)__W
);
2822 static __inline__
short __DEFAULT_FN_ATTRS128
2823 _mm_reduce_and_epi16(__m128i __W
) {
2824 return __builtin_reduce_and((__v8hi
)__W
);
2827 static __inline__
short __DEFAULT_FN_ATTRS128
2828 _mm_reduce_or_epi16(__m128i __W
) {
2829 return __builtin_reduce_or((__v8hi
)__W
);
2832 static __inline__
short __DEFAULT_FN_ATTRS128
2833 _mm_mask_reduce_add_epi16( __mmask8 __M
, __m128i __W
) {
2834 __W
= _mm_maskz_mov_epi16(__M
, __W
);
2835 return __builtin_reduce_add((__v8hi
)__W
);
2838 static __inline__
short __DEFAULT_FN_ATTRS128
2839 _mm_mask_reduce_mul_epi16( __mmask8 __M
, __m128i __W
) {
2840 __W
= _mm_mask_mov_epi16(_mm_set1_epi16(1), __M
, __W
);
2841 return __builtin_reduce_mul((__v8hi
)__W
);
2844 static __inline__
short __DEFAULT_FN_ATTRS128
2845 _mm_mask_reduce_and_epi16( __mmask8 __M
, __m128i __W
) {
2846 __W
= _mm_mask_mov_epi16(_mm_set1_epi16(-1), __M
, __W
);
2847 return __builtin_reduce_and((__v8hi
)__W
);
2850 static __inline__
short __DEFAULT_FN_ATTRS128
2851 _mm_mask_reduce_or_epi16(__mmask8 __M
, __m128i __W
) {
2852 __W
= _mm_maskz_mov_epi16(__M
, __W
);
2853 return __builtin_reduce_or((__v8hi
)__W
);
2856 static __inline__
short __DEFAULT_FN_ATTRS128
2857 _mm_reduce_max_epi16(__m128i __V
) {
2858 return __builtin_reduce_max((__v8hi
)__V
);
2861 static __inline__
unsigned short __DEFAULT_FN_ATTRS128
2862 _mm_reduce_max_epu16(__m128i __V
) {
2863 return __builtin_reduce_max((__v8hu
)__V
);
2866 static __inline__
short __DEFAULT_FN_ATTRS128
2867 _mm_reduce_min_epi16(__m128i __V
) {
2868 return __builtin_reduce_min((__v8hi
)__V
);
2871 static __inline__
unsigned short __DEFAULT_FN_ATTRS128
2872 _mm_reduce_min_epu16(__m128i __V
) {
2873 return __builtin_reduce_min((__v8hu
)__V
);
2876 static __inline__
short __DEFAULT_FN_ATTRS128
2877 _mm_mask_reduce_max_epi16(__mmask16 __M
, __m128i __V
) {
2878 __V
= _mm_mask_mov_epi16(_mm_set1_epi16(-32767-1), __M
, __V
);
2879 return __builtin_reduce_max((__v8hi
)__V
);
2882 static __inline__
unsigned short __DEFAULT_FN_ATTRS128
2883 _mm_mask_reduce_max_epu16(__mmask16 __M
, __m128i __V
) {
2884 __V
= _mm_maskz_mov_epi16(__M
, __V
);
2885 return __builtin_reduce_max((__v8hu
)__V
);
2888 static __inline__
short __DEFAULT_FN_ATTRS128
2889 _mm_mask_reduce_min_epi16(__mmask16 __M
, __m128i __V
) {
2890 __V
= _mm_mask_mov_epi16(_mm_set1_epi16(32767), __M
, __V
);
2891 return __builtin_reduce_min((__v8hi
)__V
);
2894 static __inline__
unsigned short __DEFAULT_FN_ATTRS128
2895 _mm_mask_reduce_min_epu16(__mmask16 __M
, __m128i __V
) {
2896 __V
= _mm_mask_mov_epi16(_mm_set1_epi16(-1), __M
, __V
);
2897 return __builtin_reduce_min((__v8hu
)__V
);
2900 static __inline__
short __DEFAULT_FN_ATTRS256
2901 _mm256_reduce_add_epi16(__m256i __W
) {
2902 return __builtin_reduce_add((__v16hi
)__W
);
2905 static __inline__
short __DEFAULT_FN_ATTRS256
2906 _mm256_reduce_mul_epi16(__m256i __W
) {
2907 return __builtin_reduce_mul((__v16hi
)__W
);
2910 static __inline__
short __DEFAULT_FN_ATTRS256
2911 _mm256_reduce_and_epi16(__m256i __W
) {
2912 return __builtin_reduce_and((__v16hi
)__W
);
2915 static __inline__
short __DEFAULT_FN_ATTRS256
2916 _mm256_reduce_or_epi16(__m256i __W
) {
2917 return __builtin_reduce_or((__v16hi
)__W
);
2920 static __inline__
short __DEFAULT_FN_ATTRS256
2921 _mm256_mask_reduce_add_epi16( __mmask16 __M
, __m256i __W
) {
2922 __W
= _mm256_maskz_mov_epi16(__M
, __W
);
2923 return __builtin_reduce_add((__v16hi
)__W
);
2926 static __inline__
short __DEFAULT_FN_ATTRS256
2927 _mm256_mask_reduce_mul_epi16( __mmask16 __M
, __m256i __W
) {
2928 __W
= _mm256_mask_mov_epi16(_mm256_set1_epi16(1), __M
, __W
);
2929 return __builtin_reduce_mul((__v16hi
)__W
);
2932 static __inline__
short __DEFAULT_FN_ATTRS256
2933 _mm256_mask_reduce_and_epi16( __mmask16 __M
, __m256i __W
) {
2934 __W
= _mm256_mask_mov_epi16(_mm256_set1_epi16(-1), __M
, __W
);
2935 return __builtin_reduce_and((__v16hi
)__W
);
2938 static __inline__
short __DEFAULT_FN_ATTRS256
2939 _mm256_mask_reduce_or_epi16(__mmask16 __M
, __m256i __W
) {
2940 __W
= _mm256_maskz_mov_epi16(__M
, __W
);
2941 return __builtin_reduce_or((__v16hi
)__W
);
2944 static __inline__
short __DEFAULT_FN_ATTRS256
2945 _mm256_reduce_max_epi16(__m256i __V
) {
2946 return __builtin_reduce_max((__v16hi
)__V
);
2949 static __inline__
unsigned short __DEFAULT_FN_ATTRS256
2950 _mm256_reduce_max_epu16(__m256i __V
) {
2951 return __builtin_reduce_max((__v16hu
)__V
);
2954 static __inline__
short __DEFAULT_FN_ATTRS256
2955 _mm256_reduce_min_epi16(__m256i __V
) {
2956 return __builtin_reduce_min((__v16hi
)__V
);
2959 static __inline__
unsigned short __DEFAULT_FN_ATTRS256
2960 _mm256_reduce_min_epu16(__m256i __V
) {
2961 return __builtin_reduce_min((__v16hu
)__V
);
2964 static __inline__
short __DEFAULT_FN_ATTRS256
2965 _mm256_mask_reduce_max_epi16(__mmask16 __M
, __m256i __V
) {
2966 __V
= _mm256_mask_mov_epi16(_mm256_set1_epi16(-32767-1), __M
, __V
);
2967 return __builtin_reduce_max((__v16hi
)__V
);
2970 static __inline__
unsigned short __DEFAULT_FN_ATTRS256
2971 _mm256_mask_reduce_max_epu16(__mmask16 __M
, __m256i __V
) {
2972 __V
= _mm256_maskz_mov_epi16(__M
, __V
);
2973 return __builtin_reduce_max((__v16hu
)__V
);
2976 static __inline__
short __DEFAULT_FN_ATTRS256
2977 _mm256_mask_reduce_min_epi16(__mmask16 __M
, __m256i __V
) {
2978 __V
= _mm256_mask_mov_epi16(_mm256_set1_epi16(32767), __M
, __V
);
2979 return __builtin_reduce_min((__v16hi
)__V
);
2982 static __inline__
unsigned short __DEFAULT_FN_ATTRS256
2983 _mm256_mask_reduce_min_epu16(__mmask16 __M
, __m256i __V
) {
2984 __V
= _mm256_mask_mov_epi16(_mm256_set1_epi16(-1), __M
, __V
);
2985 return __builtin_reduce_min((__v16hu
)__V
);
2988 static __inline__
signed char __DEFAULT_FN_ATTRS128
2989 _mm_reduce_add_epi8(__m128i __W
) {
2990 return __builtin_reduce_add((__v16qs
)__W
);
2993 static __inline__
signed char __DEFAULT_FN_ATTRS128
2994 _mm_reduce_mul_epi8(__m128i __W
) {
2995 return __builtin_reduce_mul((__v16qs
)__W
);
2998 static __inline__
signed char __DEFAULT_FN_ATTRS128
2999 _mm_reduce_and_epi8(__m128i __W
) {
3000 return __builtin_reduce_and((__v16qs
)__W
);
3003 static __inline__
signed char __DEFAULT_FN_ATTRS128
3004 _mm_reduce_or_epi8(__m128i __W
) {
3005 return __builtin_reduce_or((__v16qs
)__W
);
3008 static __inline__
signed char __DEFAULT_FN_ATTRS128
3009 _mm_mask_reduce_add_epi8(__mmask16 __M
, __m128i __W
) {
3010 __W
= _mm_maskz_mov_epi8(__M
, __W
);
3011 return __builtin_reduce_add((__v16qs
)__W
);
3014 static __inline__
signed char __DEFAULT_FN_ATTRS128
3015 _mm_mask_reduce_mul_epi8(__mmask16 __M
, __m128i __W
) {
3016 __W
= _mm_mask_mov_epi8(_mm_set1_epi8(1), __M
, __W
);
3017 return __builtin_reduce_mul((__v16qs
)__W
);
3020 static __inline__
signed char __DEFAULT_FN_ATTRS128
3021 _mm_mask_reduce_and_epi8(__mmask16 __M
, __m128i __W
) {
3022 __W
= _mm_mask_mov_epi8(_mm_set1_epi8(-1), __M
, __W
);
3023 return __builtin_reduce_and((__v16qs
)__W
);
3026 static __inline__
signed char __DEFAULT_FN_ATTRS128
3027 _mm_mask_reduce_or_epi8(__mmask16 __M
, __m128i __W
) {
3028 __W
= _mm_maskz_mov_epi8(__M
, __W
);
3029 return __builtin_reduce_or((__v16qs
)__W
);
3032 static __inline__
signed char __DEFAULT_FN_ATTRS128
3033 _mm_reduce_max_epi8(__m128i __V
) {
3034 return __builtin_reduce_max((__v16qs
)__V
);
3037 static __inline__
unsigned char __DEFAULT_FN_ATTRS128
3038 _mm_reduce_max_epu8(__m128i __V
) {
3039 return __builtin_reduce_max((__v16qu
)__V
);
3042 static __inline__
signed char __DEFAULT_FN_ATTRS128
3043 _mm_reduce_min_epi8(__m128i __V
) {
3044 return __builtin_reduce_min((__v16qs
)__V
);
3047 static __inline__
unsigned char __DEFAULT_FN_ATTRS128
3048 _mm_reduce_min_epu8(__m128i __V
) {
3049 return __builtin_reduce_min((__v16qu
)__V
);
3052 static __inline__
signed char __DEFAULT_FN_ATTRS128
3053 _mm_mask_reduce_max_epi8(__mmask16 __M
, __m128i __V
) {
3054 __V
= _mm_mask_mov_epi8(_mm_set1_epi8(-127-1), __M
, __V
);
3055 return __builtin_reduce_max((__v16qs
)__V
);
3058 static __inline__
unsigned char __DEFAULT_FN_ATTRS128
3059 _mm_mask_reduce_max_epu8(__mmask16 __M
, __m128i __V
) {
3060 __V
= _mm_maskz_mov_epi8(__M
, __V
);
3061 return __builtin_reduce_max((__v16qu
)__V
);
3064 static __inline__
signed char __DEFAULT_FN_ATTRS128
3065 _mm_mask_reduce_min_epi8(__mmask16 __M
, __m128i __V
) {
3066 __V
= _mm_mask_mov_epi8(_mm_set1_epi8(127), __M
, __V
);
3067 return __builtin_reduce_min((__v16qs
)__V
);
3070 static __inline__
unsigned char __DEFAULT_FN_ATTRS128
3071 _mm_mask_reduce_min_epu8(__mmask16 __M
, __m128i __V
) {
3072 __V
= _mm_mask_mov_epi8(_mm_set1_epi8(-1), __M
, __V
);
3073 return __builtin_reduce_min((__v16qu
)__V
);
3076 static __inline__
signed char __DEFAULT_FN_ATTRS256
3077 _mm256_reduce_add_epi8(__m256i __W
) {
3078 return __builtin_reduce_add((__v32qs
)__W
);
3081 static __inline__
signed char __DEFAULT_FN_ATTRS256
3082 _mm256_reduce_mul_epi8(__m256i __W
) {
3083 return __builtin_reduce_mul((__v32qs
)__W
);
3086 static __inline__
signed char __DEFAULT_FN_ATTRS256
3087 _mm256_reduce_and_epi8(__m256i __W
) {
3088 return __builtin_reduce_and((__v32qs
)__W
);
3091 static __inline__
signed char __DEFAULT_FN_ATTRS256
3092 _mm256_reduce_or_epi8(__m256i __W
) {
3093 return __builtin_reduce_or((__v32qs
)__W
);
3096 static __inline__
signed char __DEFAULT_FN_ATTRS256
3097 _mm256_mask_reduce_add_epi8(__mmask32 __M
, __m256i __W
) {
3098 __W
= _mm256_maskz_mov_epi8(__M
, __W
);
3099 return __builtin_reduce_add((__v32qs
)__W
);
3102 static __inline__
signed char __DEFAULT_FN_ATTRS256
3103 _mm256_mask_reduce_mul_epi8(__mmask32 __M
, __m256i __W
) {
3104 __W
= _mm256_mask_mov_epi8(_mm256_set1_epi8(1), __M
, __W
);
3105 return __builtin_reduce_mul((__v32qs
)__W
);
3108 static __inline__
signed char __DEFAULT_FN_ATTRS256
3109 _mm256_mask_reduce_and_epi8(__mmask32 __M
, __m256i __W
) {
3110 __W
= _mm256_mask_mov_epi8(_mm256_set1_epi8(-1), __M
, __W
);
3111 return __builtin_reduce_and((__v32qs
)__W
);
3114 static __inline__
signed char __DEFAULT_FN_ATTRS256
3115 _mm256_mask_reduce_or_epi8(__mmask32 __M
, __m256i __W
) {
3116 __W
= _mm256_maskz_mov_epi8(__M
, __W
);
3117 return __builtin_reduce_or((__v32qs
)__W
);
3120 static __inline__
signed char __DEFAULT_FN_ATTRS256
3121 _mm256_reduce_max_epi8(__m256i __V
) {
3122 return __builtin_reduce_max((__v32qs
)__V
);
3125 static __inline__
unsigned char __DEFAULT_FN_ATTRS256
3126 _mm256_reduce_max_epu8(__m256i __V
) {
3127 return __builtin_reduce_max((__v32qu
)__V
);
3130 static __inline__
signed char __DEFAULT_FN_ATTRS256
3131 _mm256_reduce_min_epi8(__m256i __V
) {
3132 return __builtin_reduce_min((__v32qs
)__V
);
3135 static __inline__
unsigned char __DEFAULT_FN_ATTRS256
3136 _mm256_reduce_min_epu8(__m256i __V
) {
3137 return __builtin_reduce_min((__v32qu
)__V
);
3140 static __inline__
signed char __DEFAULT_FN_ATTRS256
3141 _mm256_mask_reduce_max_epi8(__mmask32 __M
, __m256i __V
) {
3142 __V
= _mm256_mask_mov_epi8(_mm256_set1_epi8(-127-1), __M
, __V
);
3143 return __builtin_reduce_max((__v32qs
)__V
);
3146 static __inline__
unsigned char __DEFAULT_FN_ATTRS256
3147 _mm256_mask_reduce_max_epu8(__mmask32 __M
, __m256i __V
) {
3148 __V
= _mm256_maskz_mov_epi8(__M
, __V
);
3149 return __builtin_reduce_max((__v32qu
)__V
);
3152 static __inline__
signed char __DEFAULT_FN_ATTRS256
3153 _mm256_mask_reduce_min_epi8(__mmask32 __M
, __m256i __V
) {
3154 __V
= _mm256_mask_mov_epi8(_mm256_set1_epi8(127), __M
, __V
);
3155 return __builtin_reduce_min((__v32qs
)__V
);
3158 static __inline__
unsigned char __DEFAULT_FN_ATTRS256
3159 _mm256_mask_reduce_min_epu8(__mmask32 __M
, __m256i __V
) {
3160 __V
= _mm256_mask_mov_epi8(_mm256_set1_epi8(-1), __M
, __V
);
3161 return __builtin_reduce_min((__v32qu
)__V
);
3164 #undef __DEFAULT_FN_ATTRS128
3165 #undef __DEFAULT_FN_ATTRS256
3167 #endif /* __AVX512VLBWINTRIN_H */