1 /*===---- smmintrin.h - Implementation of SSE4 intrinsics on PowerPC -------===
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *===-----------------------------------------------------------------------===
10 /* Implemented from the specification included in the Intel C++ Compiler
11 User Guide and Reference, version 9.0.
13 NOTE: This is NOT a complete implementation of the SSE4 intrinsics! */
15 #ifndef NO_WARN_X86_INTRINSICS
16 /* This header is distributed to simplify porting x86_64 code that
17 makes explicit use of Intel intrinsics to powerp64/powerpc64le.
19 It is the user's responsibility to determine if the results are
20 acceptable and make additional changes as necessary.
22 Note that much code that uses Intel intrinsics can be rewritten in
23 standard C or GNU C extensions, which are more portable and better
24 optimized across multiple targets. */
26 "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."
32 #if defined(__ppc64__) && \
33 (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX))
36 #include <tmmintrin.h>
38 /* Rounding mode macros. */
39 #define _MM_FROUND_TO_NEAREST_INT 0x00
40 #define _MM_FROUND_TO_ZERO 0x01
41 #define _MM_FROUND_TO_POS_INF 0x02
42 #define _MM_FROUND_TO_NEG_INF 0x03
43 #define _MM_FROUND_CUR_DIRECTION 0x04
45 #define _MM_FROUND_NINT (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC)
46 #define _MM_FROUND_FLOOR (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC)
47 #define _MM_FROUND_CEIL (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC)
48 #define _MM_FROUND_TRUNC (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC)
49 #define _MM_FROUND_RINT (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC)
50 #define _MM_FROUND_NEARBYINT (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC)
52 #define _MM_FROUND_RAISE_EXC 0x00
53 #define _MM_FROUND_NO_EXC 0x08
55 extern __inline __m128d
56 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
57 _mm_round_pd(__m128d __A
, int __rounding
) {
62 } __enables_save
, __fpscr_save
;
64 if (__rounding
& _MM_FROUND_NO_EXC
) {
65 /* Save enabled exceptions, disable all exceptions,
66 and preserve the rounding mode. */
68 __asm__("mffsce %0" : "=f"(__fpscr_save
.__fr
));
69 __enables_save
.__fpscr
= __fpscr_save
.__fpscr
& 0xf8;
71 __fpscr_save
.__fr
= __builtin_mffs();
72 __enables_save
.__fpscr
= __fpscr_save
.__fpscr
& 0xf8;
73 __fpscr_save
.__fpscr
&= ~0xf8;
74 __builtin_mtfsf(0b00000011, __fpscr_save
.__fr
);
76 /* Insert an artificial "read/write" reference to the variable
77 read below, to ensure the compiler does not schedule
78 a read/use of the variable before the FPSCR is modified, above.
79 This can be removed if and when GCC PR102783 is fixed.
81 __asm__("" : "+wa"(__A
));
85 case _MM_FROUND_TO_NEAREST_INT
:
86 __fpscr_save
.__fr
= __builtin_mffsl();
87 __attribute__((fallthrough
));
88 case _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
:
89 __builtin_set_fpscr_rn(0b00);
90 /* Insert an artificial "read/write" reference to the variable
91 read below, to ensure the compiler does not schedule
92 a read/use of the variable before the FPSCR is modified, above.
93 This can be removed if and when GCC PR102783 is fixed.
95 __asm__("" : "+wa"(__A
));
97 __r
= vec_rint((__v2df
)__A
);
99 /* Insert an artificial "read" reference to the variable written
100 above, to ensure the compiler does not schedule the computation
101 of the value after the manipulation of the FPSCR, below.
102 This can be removed if and when GCC PR102783 is fixed.
104 __asm__("" : : "wa"(__r
));
105 __builtin_set_fpscr_rn(__fpscr_save
.__fpscr
);
107 case _MM_FROUND_TO_NEG_INF
:
108 case _MM_FROUND_TO_NEG_INF
| _MM_FROUND_NO_EXC
:
109 __r
= vec_floor((__v2df
)__A
);
111 case _MM_FROUND_TO_POS_INF
:
112 case _MM_FROUND_TO_POS_INF
| _MM_FROUND_NO_EXC
:
113 __r
= vec_ceil((__v2df
)__A
);
115 case _MM_FROUND_TO_ZERO
:
116 case _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
:
117 __r
= vec_trunc((__v2df
)__A
);
119 case _MM_FROUND_CUR_DIRECTION
:
120 __r
= vec_rint((__v2df
)__A
);
123 if (__rounding
& _MM_FROUND_NO_EXC
) {
124 /* Insert an artificial "read" reference to the variable written
125 above, to ensure the compiler does not schedule the computation
126 of the value after the manipulation of the FPSCR, below.
127 This can be removed if and when GCC PR102783 is fixed.
129 __asm__("" : : "wa"(__r
));
130 /* Restore enabled exceptions. */
131 __fpscr_save
.__fr
= __builtin_mffsl();
132 __fpscr_save
.__fpscr
|= __enables_save
.__fpscr
;
133 __builtin_mtfsf(0b00000011, __fpscr_save
.__fr
);
138 extern __inline __m128d
139 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
140 _mm_round_sd(__m128d __A
, __m128d __B
, int __rounding
) {
141 __B
= _mm_round_pd(__B
, __rounding
);
142 __v2df __r
= {((__v2df
)__B
)[0], ((__v2df
)__A
)[1]};
146 extern __inline __m128
147 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
148 _mm_round_ps(__m128 __A
, int __rounding
) {
153 } __enables_save
, __fpscr_save
;
155 if (__rounding
& _MM_FROUND_NO_EXC
) {
156 /* Save enabled exceptions, disable all exceptions,
157 and preserve the rounding mode. */
159 __asm__("mffsce %0" : "=f"(__fpscr_save
.__fr
));
160 __enables_save
.__fpscr
= __fpscr_save
.__fpscr
& 0xf8;
162 __fpscr_save
.__fr
= __builtin_mffs();
163 __enables_save
.__fpscr
= __fpscr_save
.__fpscr
& 0xf8;
164 __fpscr_save
.__fpscr
&= ~0xf8;
165 __builtin_mtfsf(0b00000011, __fpscr_save
.__fr
);
167 /* Insert an artificial "read/write" reference to the variable
168 read below, to ensure the compiler does not schedule
169 a read/use of the variable before the FPSCR is modified, above.
170 This can be removed if and when GCC PR102783 is fixed.
172 __asm__("" : "+wa"(__A
));
175 switch (__rounding
) {
176 case _MM_FROUND_TO_NEAREST_INT
:
177 __fpscr_save
.__fr
= __builtin_mffsl();
178 __attribute__((fallthrough
));
179 case _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
:
180 __builtin_set_fpscr_rn(0b00);
181 /* Insert an artificial "read/write" reference to the variable
182 read below, to ensure the compiler does not schedule
183 a read/use of the variable before the FPSCR is modified, above.
184 This can be removed if and when GCC PR102783 is fixed.
186 __asm__("" : "+wa"(__A
));
188 __r
= vec_rint((__v4sf
)__A
);
190 /* Insert an artificial "read" reference to the variable written
191 above, to ensure the compiler does not schedule the computation
192 of the value after the manipulation of the FPSCR, below.
193 This can be removed if and when GCC PR102783 is fixed.
195 __asm__("" : : "wa"(__r
));
196 __builtin_set_fpscr_rn(__fpscr_save
.__fpscr
);
198 case _MM_FROUND_TO_NEG_INF
:
199 case _MM_FROUND_TO_NEG_INF
| _MM_FROUND_NO_EXC
:
200 __r
= vec_floor((__v4sf
)__A
);
202 case _MM_FROUND_TO_POS_INF
:
203 case _MM_FROUND_TO_POS_INF
| _MM_FROUND_NO_EXC
:
204 __r
= vec_ceil((__v4sf
)__A
);
206 case _MM_FROUND_TO_ZERO
:
207 case _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
:
208 __r
= vec_trunc((__v4sf
)__A
);
210 case _MM_FROUND_CUR_DIRECTION
:
211 __r
= vec_rint((__v4sf
)__A
);
214 if (__rounding
& _MM_FROUND_NO_EXC
) {
215 /* Insert an artificial "read" reference to the variable written
216 above, to ensure the compiler does not schedule the computation
217 of the value after the manipulation of the FPSCR, below.
218 This can be removed if and when GCC PR102783 is fixed.
220 __asm__("" : : "wa"(__r
));
221 /* Restore enabled exceptions. */
222 __fpscr_save
.__fr
= __builtin_mffsl();
223 __fpscr_save
.__fpscr
|= __enables_save
.__fpscr
;
224 __builtin_mtfsf(0b00000011, __fpscr_save
.__fr
);
229 extern __inline __m128
230 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
231 _mm_round_ss(__m128 __A
, __m128 __B
, int __rounding
) {
232 __B
= _mm_round_ps(__B
, __rounding
);
233 __v4sf __r
= (__v4sf
)__A
;
234 __r
[0] = ((__v4sf
)__B
)[0];
238 #define _mm_ceil_pd(V) _mm_round_pd((V), _MM_FROUND_CEIL)
239 #define _mm_ceil_sd(D, V) _mm_round_sd((D), (V), _MM_FROUND_CEIL)
241 #define _mm_floor_pd(V) _mm_round_pd((V), _MM_FROUND_FLOOR)
242 #define _mm_floor_sd(D, V) _mm_round_sd((D), (V), _MM_FROUND_FLOOR)
244 #define _mm_ceil_ps(V) _mm_round_ps((V), _MM_FROUND_CEIL)
245 #define _mm_ceil_ss(D, V) _mm_round_ss((D), (V), _MM_FROUND_CEIL)
247 #define _mm_floor_ps(V) _mm_round_ps((V), _MM_FROUND_FLOOR)
248 #define _mm_floor_ss(D, V) _mm_round_ss((D), (V), _MM_FROUND_FLOOR)
250 extern __inline __m128i
251 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
252 _mm_insert_epi8(__m128i
const __A
, int const __D
, int const __N
) {
253 __v16qi __result
= (__v16qi
)__A
;
255 __result
[__N
& 0xf] = __D
;
257 return (__m128i
)__result
;
260 extern __inline __m128i
261 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
262 _mm_insert_epi32(__m128i
const __A
, int const __D
, int const __N
) {
263 __v4si __result
= (__v4si
)__A
;
265 __result
[__N
& 3] = __D
;
267 return (__m128i
)__result
;
270 extern __inline __m128i
271 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
272 _mm_insert_epi64(__m128i
const __A
, long long const __D
, int const __N
) {
273 __v2di __result
= (__v2di
)__A
;
275 __result
[__N
& 1] = __D
;
277 return (__m128i
)__result
;
281 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
282 _mm_extract_epi8(__m128i __X
, const int __N
) {
283 return (unsigned char)((__v16qi
)__X
)[__N
& 15];
287 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
288 _mm_extract_epi32(__m128i __X
, const int __N
) {
289 return ((__v4si
)__X
)[__N
& 3];
293 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
294 _mm_extract_epi64(__m128i __X
, const int __N
) {
295 return ((__v2di
)__X
)[__N
& 1];
299 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
300 _mm_extract_ps(__m128 __X
, const int __N
) {
301 return ((__v4si
)__X
)[__N
& 3];
305 extern __inline __m128i
306 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
307 _mm_blend_epi16(__m128i __A
, __m128i __B
, const int __imm8
) {
308 __v16qi __charmask
= vec_splats((signed char)__imm8
);
309 __charmask
= vec_gb(__charmask
);
310 __v8hu __shortmask
= (__v8hu
)vec_unpackh(__charmask
);
311 #ifdef __BIG_ENDIAN__
312 __shortmask
= vec_reve(__shortmask
);
314 return (__m128i
)vec_sel((__v8hu
)__A
, (__v8hu
)__B
, __shortmask
);
318 extern __inline __m128i
319 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
320 _mm_blendv_epi8(__m128i __A
, __m128i __B
, __m128i __mask
) {
322 return (__m128i
)vec_blendv((__v16qi
)__A
, (__v16qi
)__B
, (__v16qu
)__mask
);
324 const __v16qu __seven
= vec_splats((unsigned char)0x07);
325 __v16qu __lmask
= vec_sra((__v16qu
)__mask
, __seven
);
326 return (__m128i
)vec_sel((__v16qi
)__A
, (__v16qi
)__B
, __lmask
);
330 extern __inline __m128
331 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
332 _mm_blend_ps(__m128 __A
, __m128 __B
, const int __imm8
) {
334 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
335 {16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
336 {0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15},
337 {16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15},
338 {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 12, 13, 14, 15},
339 {16, 17, 18, 19, 4, 5, 6, 7, 24, 25, 26, 27, 12, 13, 14, 15},
340 {0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 12, 13, 14, 15},
341 {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 12, 13, 14, 15},
342 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 28, 29, 30, 31},
343 {16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 28, 29, 30, 31},
344 {0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31},
345 {16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31},
346 {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31},
347 {16, 17, 18, 19, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31},
348 {0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31},
349 {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31},
351 __v16qu __r
= vec_perm((__v16qu
)__A
, (__v16qu
)__B
, __pcv
[__imm8
]);
355 extern __inline __m128
356 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
357 _mm_blendv_ps(__m128 __A
, __m128 __B
, __m128 __mask
) {
359 return (__m128
)vec_blendv((__v4sf
)__A
, (__v4sf
)__B
, (__v4su
)__mask
);
361 const __v4si __zero
= {0};
362 const __vector __bool
int __boolmask
= vec_cmplt((__v4si
)__mask
, __zero
);
363 return (__m128
)vec_sel((__v4su
)__A
, (__v4su
)__B
, (__v4su
)__boolmask
);
367 extern __inline __m128d
368 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
369 _mm_blend_pd(__m128d __A
, __m128d __B
, const int __imm8
) {
371 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
372 {16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15},
373 {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31},
374 {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}};
375 __v16qu __r
= vec_perm((__v16qu
)__A
, (__v16qu
)__B
, __pcv
[__imm8
]);
380 extern __inline __m128d
381 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
382 _mm_blendv_pd(__m128d __A
, __m128d __B
, __m128d __mask
) {
384 return (__m128d
)vec_blendv((__v2df
)__A
, (__v2df
)__B
, (__v2du
)__mask
);
386 const __v2di __zero
= {0};
387 const __vector __bool
long long __boolmask
=
388 vec_cmplt((__v2di
)__mask
, __zero
);
389 return (__m128d
)vec_sel((__v2du
)__A
, (__v2du
)__B
, (__v2du
)__boolmask
);
395 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
396 _mm_testz_si128(__m128i __A
, __m128i __B
) {
397 /* Note: This implementation does NOT set "zero" or "carry" flags. */
398 const __v16qu __zero
= {0};
399 return vec_all_eq(vec_and((__v16qu
)__A
, (__v16qu
)__B
), __zero
);
403 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
404 _mm_testc_si128(__m128i __A
, __m128i __B
) {
405 /* Note: This implementation does NOT set "zero" or "carry" flags. */
406 const __v16qu __zero
= {0};
407 const __v16qu __notA
= vec_nor((__v16qu
)__A
, (__v16qu
)__A
);
408 return vec_all_eq(vec_and((__v16qu
)__notA
, (__v16qu
)__B
), __zero
);
412 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
413 _mm_testnzc_si128(__m128i __A
, __m128i __B
) {
414 /* Note: This implementation does NOT set "zero" or "carry" flags. */
415 return _mm_testz_si128(__A
, __B
) == 0 && _mm_testc_si128(__A
, __B
) == 0;
418 #define _mm_test_all_zeros(M, V) _mm_testz_si128((M), (V))
420 #define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_cmpeq_epi32((V), (V)))
422 #define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V))
425 extern __inline __m128i
426 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
427 _mm_cmpeq_epi64(__m128i __X
, __m128i __Y
) {
428 return (__m128i
)vec_cmpeq((__v2di
)__X
, (__v2di
)__Y
);
432 extern __inline __m128i
433 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
434 _mm_min_epi8(__m128i __X
, __m128i __Y
) {
435 return (__m128i
)vec_min((__v16qi
)__X
, (__v16qi
)__Y
);
438 extern __inline __m128i
439 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
440 _mm_min_epu16(__m128i __X
, __m128i __Y
) {
441 return (__m128i
)vec_min((__v8hu
)__X
, (__v8hu
)__Y
);
444 extern __inline __m128i
445 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
446 _mm_min_epi32(__m128i __X
, __m128i __Y
) {
447 return (__m128i
)vec_min((__v4si
)__X
, (__v4si
)__Y
);
450 extern __inline __m128i
451 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
452 _mm_min_epu32(__m128i __X
, __m128i __Y
) {
453 return (__m128i
)vec_min((__v4su
)__X
, (__v4su
)__Y
);
456 extern __inline __m128i
457 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
458 _mm_max_epi8(__m128i __X
, __m128i __Y
) {
459 return (__m128i
)vec_max((__v16qi
)__X
, (__v16qi
)__Y
);
462 extern __inline __m128i
463 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
464 _mm_max_epu16(__m128i __X
, __m128i __Y
) {
465 return (__m128i
)vec_max((__v8hu
)__X
, (__v8hu
)__Y
);
468 extern __inline __m128i
469 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
470 _mm_max_epi32(__m128i __X
, __m128i __Y
) {
471 return (__m128i
)vec_max((__v4si
)__X
, (__v4si
)__Y
);
474 extern __inline __m128i
475 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
476 _mm_max_epu32(__m128i __X
, __m128i __Y
) {
477 return (__m128i
)vec_max((__v4su
)__X
, (__v4su
)__Y
);
480 extern __inline __m128i
481 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
482 _mm_mullo_epi32(__m128i __X
, __m128i __Y
) {
483 return (__m128i
)vec_mul((__v4su
)__X
, (__v4su
)__Y
);
487 extern __inline __m128i
488 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
489 _mm_mul_epi32(__m128i __X
, __m128i __Y
) {
490 return (__m128i
)vec_mule((__v4si
)__X
, (__v4si
)__Y
);
494 extern __inline __m128i
495 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
496 _mm_cvtepi8_epi16(__m128i __A
) {
497 return (__m128i
)vec_unpackh((__v16qi
)__A
);
500 extern __inline __m128i
501 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
502 _mm_cvtepi8_epi32(__m128i __A
) {
503 __A
= (__m128i
)vec_unpackh((__v16qi
)__A
);
504 return (__m128i
)vec_unpackh((__v8hi
)__A
);
508 extern __inline __m128i
509 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
510 _mm_cvtepi8_epi64(__m128i __A
) {
511 __A
= (__m128i
)vec_unpackh((__v16qi
)__A
);
512 __A
= (__m128i
)vec_unpackh((__v8hi
)__A
);
513 return (__m128i
)vec_unpackh((__v4si
)__A
);
517 extern __inline __m128i
518 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
519 _mm_cvtepi16_epi32(__m128i __A
) {
520 return (__m128i
)vec_unpackh((__v8hi
)__A
);
524 extern __inline __m128i
525 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
526 _mm_cvtepi16_epi64(__m128i __A
) {
527 __A
= (__m128i
)vec_unpackh((__v8hi
)__A
);
528 return (__m128i
)vec_unpackh((__v4si
)__A
);
533 extern __inline __m128i
534 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
535 _mm_cvtepi32_epi64(__m128i __A
) {
536 return (__m128i
)vec_unpackh((__v4si
)__A
);
540 extern __inline __m128i
541 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
542 _mm_cvtepu8_epi16(__m128i __A
) {
543 const __v16qu __zero
= {0};
544 #ifdef __LITTLE_ENDIAN__
545 __A
= (__m128i
)vec_mergeh((__v16qu
)__A
, __zero
);
546 #else /* __BIG_ENDIAN__. */
547 __A
= (__m128i
)vec_mergeh(__zero
, (__v16qu
)__A
);
548 #endif /* __BIG_ENDIAN__. */
552 extern __inline __m128i
553 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
554 _mm_cvtepu8_epi32(__m128i __A
) {
555 const __v16qu __zero
= {0};
556 #ifdef __LITTLE_ENDIAN__
557 __A
= (__m128i
)vec_mergeh((__v16qu
)__A
, __zero
);
558 __A
= (__m128i
)vec_mergeh((__v8hu
)__A
, (__v8hu
)__zero
);
559 #else /* __BIG_ENDIAN__. */
560 __A
= (__m128i
)vec_mergeh(__zero
, (__v16qu
)__A
);
561 __A
= (__m128i
)vec_mergeh((__v8hu
)__zero
, (__v8hu
)__A
);
562 #endif /* __BIG_ENDIAN__. */
566 extern __inline __m128i
567 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
568 _mm_cvtepu8_epi64(__m128i __A
) {
569 const __v16qu __zero
= {0};
570 #ifdef __LITTLE_ENDIAN__
571 __A
= (__m128i
)vec_mergeh((__v16qu
)__A
, __zero
);
572 __A
= (__m128i
)vec_mergeh((__v8hu
)__A
, (__v8hu
)__zero
);
573 __A
= (__m128i
)vec_mergeh((__v4su
)__A
, (__v4su
)__zero
);
574 #else /* __BIG_ENDIAN__. */
575 __A
= (__m128i
)vec_mergeh(__zero
, (__v16qu
)__A
);
576 __A
= (__m128i
)vec_mergeh((__v8hu
)__zero
, (__v8hu
)__A
);
577 __A
= (__m128i
)vec_mergeh((__v4su
)__zero
, (__v4su
)__A
);
578 #endif /* __BIG_ENDIAN__. */
582 extern __inline __m128i
583 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
584 _mm_cvtepu16_epi32(__m128i __A
) {
585 const __v8hu __zero
= {0};
586 #ifdef __LITTLE_ENDIAN__
587 __A
= (__m128i
)vec_mergeh((__v8hu
)__A
, __zero
);
588 #else /* __BIG_ENDIAN__. */
589 __A
= (__m128i
)vec_mergeh(__zero
, (__v8hu
)__A
);
590 #endif /* __BIG_ENDIAN__. */
594 extern __inline __m128i
595 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
596 _mm_cvtepu16_epi64(__m128i __A
) {
597 const __v8hu __zero
= {0};
598 #ifdef __LITTLE_ENDIAN__
599 __A
= (__m128i
)vec_mergeh((__v8hu
)__A
, __zero
);
600 __A
= (__m128i
)vec_mergeh((__v4su
)__A
, (__v4su
)__zero
);
601 #else /* __BIG_ENDIAN__. */
602 __A
= (__m128i
)vec_mergeh(__zero
, (__v8hu
)__A
);
603 __A
= (__m128i
)vec_mergeh((__v4su
)__zero
, (__v4su
)__A
);
604 #endif /* __BIG_ENDIAN__. */
608 extern __inline __m128i
609 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
610 _mm_cvtepu32_epi64(__m128i __A
) {
611 const __v4su __zero
= {0};
612 #ifdef __LITTLE_ENDIAN__
613 __A
= (__m128i
)vec_mergeh((__v4su
)__A
, __zero
);
614 #else /* __BIG_ENDIAN__. */
615 __A
= (__m128i
)vec_mergeh(__zero
, (__v4su
)__A
);
616 #endif /* __BIG_ENDIAN__. */
620 /* Return horizontal packed word minimum and its index in bits [15:0]
621 and bits [18:16] respectively. */
622 extern __inline __m128i
623 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
624 _mm_minpos_epu16(__m128i __A
) {
629 union __u __u
= {.__m
= __A
}, __r
= {.__m
= {0}};
630 unsigned short __ridx
= 0;
631 unsigned short __rmin
= __u
.__uh
[__ridx
];
633 for (__i
= 1; __i
< 8; __i
++) {
634 if (__u
.__uh
[__i
] < __rmin
) {
635 __rmin
= __u
.__uh
[__i
];
639 __r
.__uh
[0] = __rmin
;
640 __r
.__uh
[1] = __ridx
;
644 extern __inline __m128i
645 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
646 _mm_packus_epi32(__m128i __X
, __m128i __Y
) {
647 return (__m128i
)vec_packsu((__v4si
)__X
, (__v4si
)__Y
);
651 extern __inline __m128i
652 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
653 _mm_cmpgt_epi64(__m128i __X
, __m128i __Y
) {
654 return (__m128i
)vec_cmpgt((__v2di
)__X
, (__v2di
)__Y
);
659 #include_next <smmintrin.h>
660 #endif /* defined(__ppc64__) &&
661 * (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */
663 #endif /* SMMINTRIN_H_ */