[docs] Fix build-docs.sh
[llvm-project.git] / clang / lib / Headers / ppc_wrappers / smmintrin.h
blob6fe6c8a93d9ba46662529be0918286b674e3a850
1 /*===---- smmintrin.h - Implementation of SSE4 intrinsics on PowerPC -------===
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *===-----------------------------------------------------------------------===
8 */
10 /* Implemented from the specification included in the Intel C++ Compiler
11 User Guide and Reference, version 9.0.
13 NOTE: This is NOT a complete implementation of the SSE4 intrinsics! */
15 #ifndef NO_WARN_X86_INTRINSICS
16 /* This header is distributed to simplify porting x86_64 code that
17 makes explicit use of Intel intrinsics to powerp64/powerpc64le.
19 It is the user's responsibility to determine if the results are
20 acceptable and make additional changes as necessary.
22 Note that much code that uses Intel intrinsics can be rewritten in
23 standard C or GNU C extensions, which are more portable and better
24 optimized across multiple targets. */
25 #error \
26 "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."
27 #endif
29 #ifndef SMMINTRIN_H_
30 #define SMMINTRIN_H_
32 #if defined(__ppc64__) && \
33 (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX))
35 #include <altivec.h>
36 #include <tmmintrin.h>
38 /* Rounding mode macros. */
39 #define _MM_FROUND_TO_NEAREST_INT 0x00
40 #define _MM_FROUND_TO_ZERO 0x01
41 #define _MM_FROUND_TO_POS_INF 0x02
42 #define _MM_FROUND_TO_NEG_INF 0x03
43 #define _MM_FROUND_CUR_DIRECTION 0x04
45 #define _MM_FROUND_NINT (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC)
46 #define _MM_FROUND_FLOOR (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC)
47 #define _MM_FROUND_CEIL (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC)
48 #define _MM_FROUND_TRUNC (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC)
49 #define _MM_FROUND_RINT (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC)
50 #define _MM_FROUND_NEARBYINT (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC)
52 #define _MM_FROUND_RAISE_EXC 0x00
53 #define _MM_FROUND_NO_EXC 0x08
55 extern __inline __m128d
56 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
57 _mm_round_pd(__m128d __A, int __rounding) {
58 __v2df __r;
59 union {
60 double __fr;
61 long long __fpscr;
62 } __enables_save, __fpscr_save;
64 if (__rounding & _MM_FROUND_NO_EXC) {
65 /* Save enabled exceptions, disable all exceptions,
66 and preserve the rounding mode. */
67 #ifdef _ARCH_PWR9
68 __asm__("mffsce %0" : "=f"(__fpscr_save.__fr));
69 __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
70 #else
71 __fpscr_save.__fr = __builtin_mffs();
72 __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
73 __fpscr_save.__fpscr &= ~0xf8;
74 __builtin_mtfsf(0b00000011, __fpscr_save.__fr);
75 #endif
76 /* Insert an artificial "read/write" reference to the variable
77 read below, to ensure the compiler does not schedule
78 a read/use of the variable before the FPSCR is modified, above.
79 This can be removed if and when GCC PR102783 is fixed.
81 __asm__("" : "+wa"(__A));
84 switch (__rounding) {
85 case _MM_FROUND_TO_NEAREST_INT:
86 __fpscr_save.__fr = __builtin_mffsl();
87 __attribute__((fallthrough));
88 case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC:
89 __builtin_set_fpscr_rn(0b00);
90 /* Insert an artificial "read/write" reference to the variable
91 read below, to ensure the compiler does not schedule
92 a read/use of the variable before the FPSCR is modified, above.
93 This can be removed if and when GCC PR102783 is fixed.
95 __asm__("" : "+wa"(__A));
97 __r = vec_rint((__v2df)__A);
99 /* Insert an artificial "read" reference to the variable written
100 above, to ensure the compiler does not schedule the computation
101 of the value after the manipulation of the FPSCR, below.
102 This can be removed if and when GCC PR102783 is fixed.
104 __asm__("" : : "wa"(__r));
105 __builtin_set_fpscr_rn(__fpscr_save.__fpscr);
106 break;
107 case _MM_FROUND_TO_NEG_INF:
108 case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC:
109 __r = vec_floor((__v2df)__A);
110 break;
111 case _MM_FROUND_TO_POS_INF:
112 case _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC:
113 __r = vec_ceil((__v2df)__A);
114 break;
115 case _MM_FROUND_TO_ZERO:
116 case _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC:
117 __r = vec_trunc((__v2df)__A);
118 break;
119 case _MM_FROUND_CUR_DIRECTION:
120 __r = vec_rint((__v2df)__A);
121 break;
123 if (__rounding & _MM_FROUND_NO_EXC) {
124 /* Insert an artificial "read" reference to the variable written
125 above, to ensure the compiler does not schedule the computation
126 of the value after the manipulation of the FPSCR, below.
127 This can be removed if and when GCC PR102783 is fixed.
129 __asm__("" : : "wa"(__r));
130 /* Restore enabled exceptions. */
131 __fpscr_save.__fr = __builtin_mffsl();
132 __fpscr_save.__fpscr |= __enables_save.__fpscr;
133 __builtin_mtfsf(0b00000011, __fpscr_save.__fr);
135 return (__m128d)__r;
138 extern __inline __m128d
139 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
140 _mm_round_sd(__m128d __A, __m128d __B, int __rounding) {
141 __B = _mm_round_pd(__B, __rounding);
142 __v2df __r = {((__v2df)__B)[0], ((__v2df)__A)[1]};
143 return (__m128d)__r;
146 extern __inline __m128
147 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
148 _mm_round_ps(__m128 __A, int __rounding) {
149 __v4sf __r;
150 union {
151 double __fr;
152 long long __fpscr;
153 } __enables_save, __fpscr_save;
155 if (__rounding & _MM_FROUND_NO_EXC) {
156 /* Save enabled exceptions, disable all exceptions,
157 and preserve the rounding mode. */
158 #ifdef _ARCH_PWR9
159 __asm__("mffsce %0" : "=f"(__fpscr_save.__fr));
160 __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
161 #else
162 __fpscr_save.__fr = __builtin_mffs();
163 __enables_save.__fpscr = __fpscr_save.__fpscr & 0xf8;
164 __fpscr_save.__fpscr &= ~0xf8;
165 __builtin_mtfsf(0b00000011, __fpscr_save.__fr);
166 #endif
167 /* Insert an artificial "read/write" reference to the variable
168 read below, to ensure the compiler does not schedule
169 a read/use of the variable before the FPSCR is modified, above.
170 This can be removed if and when GCC PR102783 is fixed.
172 __asm__("" : "+wa"(__A));
175 switch (__rounding) {
176 case _MM_FROUND_TO_NEAREST_INT:
177 __fpscr_save.__fr = __builtin_mffsl();
178 __attribute__((fallthrough));
179 case _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC:
180 __builtin_set_fpscr_rn(0b00);
181 /* Insert an artificial "read/write" reference to the variable
182 read below, to ensure the compiler does not schedule
183 a read/use of the variable before the FPSCR is modified, above.
184 This can be removed if and when GCC PR102783 is fixed.
186 __asm__("" : "+wa"(__A));
188 __r = vec_rint((__v4sf)__A);
190 /* Insert an artificial "read" reference to the variable written
191 above, to ensure the compiler does not schedule the computation
192 of the value after the manipulation of the FPSCR, below.
193 This can be removed if and when GCC PR102783 is fixed.
195 __asm__("" : : "wa"(__r));
196 __builtin_set_fpscr_rn(__fpscr_save.__fpscr);
197 break;
198 case _MM_FROUND_TO_NEG_INF:
199 case _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC:
200 __r = vec_floor((__v4sf)__A);
201 break;
202 case _MM_FROUND_TO_POS_INF:
203 case _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC:
204 __r = vec_ceil((__v4sf)__A);
205 break;
206 case _MM_FROUND_TO_ZERO:
207 case _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC:
208 __r = vec_trunc((__v4sf)__A);
209 break;
210 case _MM_FROUND_CUR_DIRECTION:
211 __r = vec_rint((__v4sf)__A);
212 break;
214 if (__rounding & _MM_FROUND_NO_EXC) {
215 /* Insert an artificial "read" reference to the variable written
216 above, to ensure the compiler does not schedule the computation
217 of the value after the manipulation of the FPSCR, below.
218 This can be removed if and when GCC PR102783 is fixed.
220 __asm__("" : : "wa"(__r));
221 /* Restore enabled exceptions. */
222 __fpscr_save.__fr = __builtin_mffsl();
223 __fpscr_save.__fpscr |= __enables_save.__fpscr;
224 __builtin_mtfsf(0b00000011, __fpscr_save.__fr);
226 return (__m128)__r;
229 extern __inline __m128
230 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
231 _mm_round_ss(__m128 __A, __m128 __B, int __rounding) {
232 __B = _mm_round_ps(__B, __rounding);
233 __v4sf __r = (__v4sf)__A;
234 __r[0] = ((__v4sf)__B)[0];
235 return (__m128)__r;
238 #define _mm_ceil_pd(V) _mm_round_pd((V), _MM_FROUND_CEIL)
239 #define _mm_ceil_sd(D, V) _mm_round_sd((D), (V), _MM_FROUND_CEIL)
241 #define _mm_floor_pd(V) _mm_round_pd((V), _MM_FROUND_FLOOR)
242 #define _mm_floor_sd(D, V) _mm_round_sd((D), (V), _MM_FROUND_FLOOR)
244 #define _mm_ceil_ps(V) _mm_round_ps((V), _MM_FROUND_CEIL)
245 #define _mm_ceil_ss(D, V) _mm_round_ss((D), (V), _MM_FROUND_CEIL)
247 #define _mm_floor_ps(V) _mm_round_ps((V), _MM_FROUND_FLOOR)
248 #define _mm_floor_ss(D, V) _mm_round_ss((D), (V), _MM_FROUND_FLOOR)
250 extern __inline __m128i
251 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
252 _mm_insert_epi8(__m128i const __A, int const __D, int const __N) {
253 __v16qi __result = (__v16qi)__A;
255 __result[__N & 0xf] = __D;
257 return (__m128i)__result;
260 extern __inline __m128i
261 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
262 _mm_insert_epi32(__m128i const __A, int const __D, int const __N) {
263 __v4si __result = (__v4si)__A;
265 __result[__N & 3] = __D;
267 return (__m128i)__result;
270 extern __inline __m128i
271 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
272 _mm_insert_epi64(__m128i const __A, long long const __D, int const __N) {
273 __v2di __result = (__v2di)__A;
275 __result[__N & 1] = __D;
277 return (__m128i)__result;
280 extern __inline int
281 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
282 _mm_extract_epi8(__m128i __X, const int __N) {
283 return (unsigned char)((__v16qi)__X)[__N & 15];
286 extern __inline int
287 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
288 _mm_extract_epi32(__m128i __X, const int __N) {
289 return ((__v4si)__X)[__N & 3];
292 extern __inline int
293 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
294 _mm_extract_epi64(__m128i __X, const int __N) {
295 return ((__v2di)__X)[__N & 1];
298 extern __inline int
299 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
300 _mm_extract_ps(__m128 __X, const int __N) {
301 return ((__v4si)__X)[__N & 3];
304 #ifdef _ARCH_PWR8
305 extern __inline __m128i
306 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
307 _mm_blend_epi16(__m128i __A, __m128i __B, const int __imm8) {
308 __v16qi __charmask = vec_splats((signed char)__imm8);
309 __charmask = vec_gb(__charmask);
310 __v8hu __shortmask = (__v8hu)vec_unpackh(__charmask);
311 #ifdef __BIG_ENDIAN__
312 __shortmask = vec_reve(__shortmask);
313 #endif
314 return (__m128i)vec_sel((__v8hu)__A, (__v8hu)__B, __shortmask);
316 #endif
318 extern __inline __m128i
319 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
320 _mm_blendv_epi8(__m128i __A, __m128i __B, __m128i __mask) {
321 #ifdef _ARCH_PWR10
322 return (__m128i)vec_blendv((__v16qi)__A, (__v16qi)__B, (__v16qu)__mask);
323 #else
324 const __v16qu __seven = vec_splats((unsigned char)0x07);
325 __v16qu __lmask = vec_sra((__v16qu)__mask, __seven);
326 return (__m128i)vec_sel((__v16qi)__A, (__v16qi)__B, __lmask);
327 #endif
330 extern __inline __m128
331 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
332 _mm_blend_ps(__m128 __A, __m128 __B, const int __imm8) {
333 __v16qu __pcv[] = {
334 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
335 {16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
336 {0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15},
337 {16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15},
338 {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 12, 13, 14, 15},
339 {16, 17, 18, 19, 4, 5, 6, 7, 24, 25, 26, 27, 12, 13, 14, 15},
340 {0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 12, 13, 14, 15},
341 {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 12, 13, 14, 15},
342 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 28, 29, 30, 31},
343 {16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 28, 29, 30, 31},
344 {0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31},
345 {16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31},
346 {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31},
347 {16, 17, 18, 19, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31},
348 {0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31},
349 {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31},
351 __v16qu __r = vec_perm((__v16qu)__A, (__v16qu)__B, __pcv[__imm8]);
352 return (__m128)__r;
355 extern __inline __m128
356 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
357 _mm_blendv_ps(__m128 __A, __m128 __B, __m128 __mask) {
358 #ifdef _ARCH_PWR10
359 return (__m128)vec_blendv((__v4sf)__A, (__v4sf)__B, (__v4su)__mask);
360 #else
361 const __v4si __zero = {0};
362 const __vector __bool int __boolmask = vec_cmplt((__v4si)__mask, __zero);
363 return (__m128)vec_sel((__v4su)__A, (__v4su)__B, (__v4su)__boolmask);
364 #endif
367 extern __inline __m128d
368 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
369 _mm_blend_pd(__m128d __A, __m128d __B, const int __imm8) {
370 __v16qu __pcv[] = {
371 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
372 {16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15},
373 {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31},
374 {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}};
375 __v16qu __r = vec_perm((__v16qu)__A, (__v16qu)__B, __pcv[__imm8]);
376 return (__m128d)__r;
379 #ifdef _ARCH_PWR8
380 extern __inline __m128d
381 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
382 _mm_blendv_pd(__m128d __A, __m128d __B, __m128d __mask) {
383 #ifdef _ARCH_PWR10
384 return (__m128d)vec_blendv((__v2df)__A, (__v2df)__B, (__v2du)__mask);
385 #else
386 const __v2di __zero = {0};
387 const __vector __bool long long __boolmask =
388 vec_cmplt((__v2di)__mask, __zero);
389 return (__m128d)vec_sel((__v2du)__A, (__v2du)__B, (__v2du)__boolmask);
390 #endif
392 #endif
394 extern __inline int
395 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
396 _mm_testz_si128(__m128i __A, __m128i __B) {
397 /* Note: This implementation does NOT set "zero" or "carry" flags. */
398 const __v16qu __zero = {0};
399 return vec_all_eq(vec_and((__v16qu)__A, (__v16qu)__B), __zero);
402 extern __inline int
403 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
404 _mm_testc_si128(__m128i __A, __m128i __B) {
405 /* Note: This implementation does NOT set "zero" or "carry" flags. */
406 const __v16qu __zero = {0};
407 const __v16qu __notA = vec_nor((__v16qu)__A, (__v16qu)__A);
408 return vec_all_eq(vec_and((__v16qu)__notA, (__v16qu)__B), __zero);
411 extern __inline int
412 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
413 _mm_testnzc_si128(__m128i __A, __m128i __B) {
414 /* Note: This implementation does NOT set "zero" or "carry" flags. */
415 return _mm_testz_si128(__A, __B) == 0 && _mm_testc_si128(__A, __B) == 0;
418 #define _mm_test_all_zeros(M, V) _mm_testz_si128((M), (V))
420 #define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_cmpeq_epi32((V), (V)))
422 #define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V))
424 #ifdef _ARCH_PWR8
425 extern __inline __m128i
426 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
427 _mm_cmpeq_epi64(__m128i __X, __m128i __Y) {
428 return (__m128i)vec_cmpeq((__v2di)__X, (__v2di)__Y);
430 #endif
432 extern __inline __m128i
433 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
434 _mm_min_epi8(__m128i __X, __m128i __Y) {
435 return (__m128i)vec_min((__v16qi)__X, (__v16qi)__Y);
438 extern __inline __m128i
439 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
440 _mm_min_epu16(__m128i __X, __m128i __Y) {
441 return (__m128i)vec_min((__v8hu)__X, (__v8hu)__Y);
444 extern __inline __m128i
445 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
446 _mm_min_epi32(__m128i __X, __m128i __Y) {
447 return (__m128i)vec_min((__v4si)__X, (__v4si)__Y);
450 extern __inline __m128i
451 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
452 _mm_min_epu32(__m128i __X, __m128i __Y) {
453 return (__m128i)vec_min((__v4su)__X, (__v4su)__Y);
456 extern __inline __m128i
457 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
458 _mm_max_epi8(__m128i __X, __m128i __Y) {
459 return (__m128i)vec_max((__v16qi)__X, (__v16qi)__Y);
462 extern __inline __m128i
463 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
464 _mm_max_epu16(__m128i __X, __m128i __Y) {
465 return (__m128i)vec_max((__v8hu)__X, (__v8hu)__Y);
468 extern __inline __m128i
469 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
470 _mm_max_epi32(__m128i __X, __m128i __Y) {
471 return (__m128i)vec_max((__v4si)__X, (__v4si)__Y);
474 extern __inline __m128i
475 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
476 _mm_max_epu32(__m128i __X, __m128i __Y) {
477 return (__m128i)vec_max((__v4su)__X, (__v4su)__Y);
480 extern __inline __m128i
481 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
482 _mm_mullo_epi32(__m128i __X, __m128i __Y) {
483 return (__m128i)vec_mul((__v4su)__X, (__v4su)__Y);
486 #ifdef _ARCH_PWR8
487 extern __inline __m128i
488 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
489 _mm_mul_epi32(__m128i __X, __m128i __Y) {
490 return (__m128i)vec_mule((__v4si)__X, (__v4si)__Y);
492 #endif
494 extern __inline __m128i
495 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
496 _mm_cvtepi8_epi16(__m128i __A) {
497 return (__m128i)vec_unpackh((__v16qi)__A);
500 extern __inline __m128i
501 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
502 _mm_cvtepi8_epi32(__m128i __A) {
503 __A = (__m128i)vec_unpackh((__v16qi)__A);
504 return (__m128i)vec_unpackh((__v8hi)__A);
507 #ifdef _ARCH_PWR8
508 extern __inline __m128i
509 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
510 _mm_cvtepi8_epi64(__m128i __A) {
511 __A = (__m128i)vec_unpackh((__v16qi)__A);
512 __A = (__m128i)vec_unpackh((__v8hi)__A);
513 return (__m128i)vec_unpackh((__v4si)__A);
515 #endif
517 extern __inline __m128i
518 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
519 _mm_cvtepi16_epi32(__m128i __A) {
520 return (__m128i)vec_unpackh((__v8hi)__A);
523 #ifdef _ARCH_PWR8
524 extern __inline __m128i
525 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
526 _mm_cvtepi16_epi64(__m128i __A) {
527 __A = (__m128i)vec_unpackh((__v8hi)__A);
528 return (__m128i)vec_unpackh((__v4si)__A);
530 #endif
532 #ifdef _ARCH_PWR8
533 extern __inline __m128i
534 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
535 _mm_cvtepi32_epi64(__m128i __A) {
536 return (__m128i)vec_unpackh((__v4si)__A);
538 #endif
540 extern __inline __m128i
541 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
542 _mm_cvtepu8_epi16(__m128i __A) {
543 const __v16qu __zero = {0};
544 #ifdef __LITTLE_ENDIAN__
545 __A = (__m128i)vec_mergeh((__v16qu)__A, __zero);
546 #else /* __BIG_ENDIAN__. */
547 __A = (__m128i)vec_mergeh(__zero, (__v16qu)__A);
548 #endif /* __BIG_ENDIAN__. */
549 return __A;
552 extern __inline __m128i
553 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
554 _mm_cvtepu8_epi32(__m128i __A) {
555 const __v16qu __zero = {0};
556 #ifdef __LITTLE_ENDIAN__
557 __A = (__m128i)vec_mergeh((__v16qu)__A, __zero);
558 __A = (__m128i)vec_mergeh((__v8hu)__A, (__v8hu)__zero);
559 #else /* __BIG_ENDIAN__. */
560 __A = (__m128i)vec_mergeh(__zero, (__v16qu)__A);
561 __A = (__m128i)vec_mergeh((__v8hu)__zero, (__v8hu)__A);
562 #endif /* __BIG_ENDIAN__. */
563 return __A;
566 extern __inline __m128i
567 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
568 _mm_cvtepu8_epi64(__m128i __A) {
569 const __v16qu __zero = {0};
570 #ifdef __LITTLE_ENDIAN__
571 __A = (__m128i)vec_mergeh((__v16qu)__A, __zero);
572 __A = (__m128i)vec_mergeh((__v8hu)__A, (__v8hu)__zero);
573 __A = (__m128i)vec_mergeh((__v4su)__A, (__v4su)__zero);
574 #else /* __BIG_ENDIAN__. */
575 __A = (__m128i)vec_mergeh(__zero, (__v16qu)__A);
576 __A = (__m128i)vec_mergeh((__v8hu)__zero, (__v8hu)__A);
577 __A = (__m128i)vec_mergeh((__v4su)__zero, (__v4su)__A);
578 #endif /* __BIG_ENDIAN__. */
579 return __A;
582 extern __inline __m128i
583 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
584 _mm_cvtepu16_epi32(__m128i __A) {
585 const __v8hu __zero = {0};
586 #ifdef __LITTLE_ENDIAN__
587 __A = (__m128i)vec_mergeh((__v8hu)__A, __zero);
588 #else /* __BIG_ENDIAN__. */
589 __A = (__m128i)vec_mergeh(__zero, (__v8hu)__A);
590 #endif /* __BIG_ENDIAN__. */
591 return __A;
594 extern __inline __m128i
595 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
596 _mm_cvtepu16_epi64(__m128i __A) {
597 const __v8hu __zero = {0};
598 #ifdef __LITTLE_ENDIAN__
599 __A = (__m128i)vec_mergeh((__v8hu)__A, __zero);
600 __A = (__m128i)vec_mergeh((__v4su)__A, (__v4su)__zero);
601 #else /* __BIG_ENDIAN__. */
602 __A = (__m128i)vec_mergeh(__zero, (__v8hu)__A);
603 __A = (__m128i)vec_mergeh((__v4su)__zero, (__v4su)__A);
604 #endif /* __BIG_ENDIAN__. */
605 return __A;
608 extern __inline __m128i
609 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
610 _mm_cvtepu32_epi64(__m128i __A) {
611 const __v4su __zero = {0};
612 #ifdef __LITTLE_ENDIAN__
613 __A = (__m128i)vec_mergeh((__v4su)__A, __zero);
614 #else /* __BIG_ENDIAN__. */
615 __A = (__m128i)vec_mergeh(__zero, (__v4su)__A);
616 #endif /* __BIG_ENDIAN__. */
617 return __A;
620 /* Return horizontal packed word minimum and its index in bits [15:0]
621 and bits [18:16] respectively. */
622 extern __inline __m128i
623 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
624 _mm_minpos_epu16(__m128i __A) {
625 union __u {
626 __m128i __m;
627 __v8hu __uh;
629 union __u __u = {.__m = __A}, __r = {.__m = {0}};
630 unsigned short __ridx = 0;
631 unsigned short __rmin = __u.__uh[__ridx];
632 unsigned long __i;
633 for (__i = 1; __i < 8; __i++) {
634 if (__u.__uh[__i] < __rmin) {
635 __rmin = __u.__uh[__i];
636 __ridx = __i;
639 __r.__uh[0] = __rmin;
640 __r.__uh[1] = __ridx;
641 return __r.__m;
644 extern __inline __m128i
645 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
646 _mm_packus_epi32(__m128i __X, __m128i __Y) {
647 return (__m128i)vec_packsu((__v4si)__X, (__v4si)__Y);
650 #ifdef _ARCH_PWR8
651 extern __inline __m128i
652 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
653 _mm_cmpgt_epi64(__m128i __X, __m128i __Y) {
654 return (__m128i)vec_cmpgt((__v2di)__X, (__v2di)__Y);
656 #endif
658 #else
659 #include_next <smmintrin.h>
660 #endif /* defined(__ppc64__) &&
661 * (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */
663 #endif /* SMMINTRIN_H_ */