1 /*===---- smmintrin.h - Implementation of SSE4 intrinsics on PowerPC -------===
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *===-----------------------------------------------------------------------===
10 /* Implemented from the specification included in the Intel C++ Compiler
11 User Guide and Reference, version 9.0.
13 NOTE: This is NOT a complete implementation of the SSE4 intrinsics! */
15 #ifndef NO_WARN_X86_INTRINSICS
16 /* This header is distributed to simplify porting x86_64 code that
17 makes explicit use of Intel intrinsics to powerpc64/powerpc64le.
19 It is the user's responsibility to determine if the results are
20 acceptable and make additional changes as necessary.
22 Note that much code that uses Intel intrinsics can be rewritten in
23 standard C or GNU C extensions, which are more portable and better
24 optimized across multiple targets. */
26 "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."
32 #if defined(__powerpc64__) && \
33 (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX))
36 #include <tmmintrin.h>
38 /* Rounding mode macros. */
39 #define _MM_FROUND_TO_NEAREST_INT 0x00
40 #define _MM_FROUND_TO_ZERO 0x01
41 #define _MM_FROUND_TO_POS_INF 0x02
42 #define _MM_FROUND_TO_NEG_INF 0x03
43 #define _MM_FROUND_CUR_DIRECTION 0x04
45 #define _MM_FROUND_NINT (_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC)
46 #define _MM_FROUND_FLOOR (_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC)
47 #define _MM_FROUND_CEIL (_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC)
48 #define _MM_FROUND_TRUNC (_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC)
49 #define _MM_FROUND_RINT (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC)
50 #define _MM_FROUND_NEARBYINT (_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC)
52 #define _MM_FROUND_RAISE_EXC 0x00
53 #define _MM_FROUND_NO_EXC 0x08
55 extern __inline __m128d
56 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
57 _mm_round_pd(__m128d __A
, int __rounding
) {
62 } __enables_save
, __fpscr_save
;
64 if (__rounding
& _MM_FROUND_NO_EXC
) {
65 /* Save enabled exceptions, disable all exceptions,
66 and preserve the rounding mode. */
68 __asm__("mffsce %0" : "=f"(__fpscr_save
.__fr
));
69 __enables_save
.__fpscr
= __fpscr_save
.__fpscr
& 0xf8;
71 __fpscr_save
.__fr
= __builtin_ppc_mffs();
72 __enables_save
.__fpscr
= __fpscr_save
.__fpscr
& 0xf8;
73 __fpscr_save
.__fpscr
&= ~0xf8;
74 __builtin_ppc_mtfsf(0b00000011, __fpscr_save
.__fr
);
76 /* Insert an artificial "read/write" reference to the variable
77 read below, to ensure the compiler does not schedule
78 a read/use of the variable before the FPSCR is modified, above.
79 This can be removed if and when GCC PR102783 is fixed.
81 __asm__("" : "+wa"(__A
));
85 case _MM_FROUND_TO_NEAREST_INT
:
87 __fpscr_save
.__fr
= __builtin_ppc_mffsl();
89 __fpscr_save
.__fr
= __builtin_ppc_mffs();
90 __fpscr_save
.__fpscr
&= 0x70007f0ffL
;
92 __attribute__((fallthrough
));
93 case _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
:
94 __builtin_ppc_set_fpscr_rn(0b00);
95 /* Insert an artificial "read/write" reference to the variable
96 read below, to ensure the compiler does not schedule
97 a read/use of the variable before the FPSCR is modified, above.
98 This can be removed if and when GCC PR102783 is fixed.
100 __asm__("" : "+wa"(__A
));
102 __r
= vec_rint((__v2df
)__A
);
104 /* Insert an artificial "read" reference to the variable written
105 above, to ensure the compiler does not schedule the computation
106 of the value after the manipulation of the FPSCR, below.
107 This can be removed if and when GCC PR102783 is fixed.
109 __asm__("" : : "wa"(__r
));
110 __builtin_ppc_set_fpscr_rn(__fpscr_save
.__fpscr
);
112 case _MM_FROUND_TO_NEG_INF
:
113 case _MM_FROUND_TO_NEG_INF
| _MM_FROUND_NO_EXC
:
114 __r
= vec_floor((__v2df
)__A
);
116 case _MM_FROUND_TO_POS_INF
:
117 case _MM_FROUND_TO_POS_INF
| _MM_FROUND_NO_EXC
:
118 __r
= vec_ceil((__v2df
)__A
);
120 case _MM_FROUND_TO_ZERO
:
121 case _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
:
122 __r
= vec_trunc((__v2df
)__A
);
124 case _MM_FROUND_CUR_DIRECTION
:
125 __r
= vec_rint((__v2df
)__A
);
128 if (__rounding
& _MM_FROUND_NO_EXC
) {
129 /* Insert an artificial "read" reference to the variable written
130 above, to ensure the compiler does not schedule the computation
131 of the value after the manipulation of the FPSCR, below.
132 This can be removed if and when GCC PR102783 is fixed.
134 __asm__("" : : "wa"(__r
));
135 /* Restore enabled exceptions. */
137 __fpscr_save
.__fr
= __builtin_ppc_mffsl();
139 __fpscr_save
.__fr
= __builtin_ppc_mffs();
140 __fpscr_save
.__fpscr
&= 0x70007f0ffL
;
142 __fpscr_save
.__fpscr
|= __enables_save
.__fpscr
;
143 __builtin_ppc_mtfsf(0b00000011, __fpscr_save
.__fr
);
148 extern __inline __m128d
149 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
150 _mm_round_sd(__m128d __A
, __m128d __B
, int __rounding
) {
151 __B
= _mm_round_pd(__B
, __rounding
);
152 __v2df __r
= {((__v2df
)__B
)[0], ((__v2df
)__A
)[1]};
156 extern __inline __m128
157 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
158 _mm_round_ps(__m128 __A
, int __rounding
) {
163 } __enables_save
, __fpscr_save
;
165 if (__rounding
& _MM_FROUND_NO_EXC
) {
166 /* Save enabled exceptions, disable all exceptions,
167 and preserve the rounding mode. */
169 __asm__("mffsce %0" : "=f"(__fpscr_save
.__fr
));
170 __enables_save
.__fpscr
= __fpscr_save
.__fpscr
& 0xf8;
172 __fpscr_save
.__fr
= __builtin_ppc_mffs();
173 __enables_save
.__fpscr
= __fpscr_save
.__fpscr
& 0xf8;
174 __fpscr_save
.__fpscr
&= ~0xf8;
175 __builtin_ppc_mtfsf(0b00000011, __fpscr_save
.__fr
);
177 /* Insert an artificial "read/write" reference to the variable
178 read below, to ensure the compiler does not schedule
179 a read/use of the variable before the FPSCR is modified, above.
180 This can be removed if and when GCC PR102783 is fixed.
182 __asm__("" : "+wa"(__A
));
185 switch (__rounding
) {
186 case _MM_FROUND_TO_NEAREST_INT
:
188 __fpscr_save
.__fr
= __builtin_ppc_mffsl();
190 __fpscr_save
.__fr
= __builtin_ppc_mffs();
191 __fpscr_save
.__fpscr
&= 0x70007f0ffL
;
193 __attribute__((fallthrough
));
194 case _MM_FROUND_TO_NEAREST_INT
| _MM_FROUND_NO_EXC
:
195 __builtin_ppc_set_fpscr_rn(0b00);
196 /* Insert an artificial "read/write" reference to the variable
197 read below, to ensure the compiler does not schedule
198 a read/use of the variable before the FPSCR is modified, above.
199 This can be removed if and when GCC PR102783 is fixed.
201 __asm__("" : "+wa"(__A
));
203 __r
= vec_rint((__v4sf
)__A
);
205 /* Insert an artificial "read" reference to the variable written
206 above, to ensure the compiler does not schedule the computation
207 of the value after the manipulation of the FPSCR, below.
208 This can be removed if and when GCC PR102783 is fixed.
210 __asm__("" : : "wa"(__r
));
211 __builtin_ppc_set_fpscr_rn(__fpscr_save
.__fpscr
);
213 case _MM_FROUND_TO_NEG_INF
:
214 case _MM_FROUND_TO_NEG_INF
| _MM_FROUND_NO_EXC
:
215 __r
= vec_floor((__v4sf
)__A
);
217 case _MM_FROUND_TO_POS_INF
:
218 case _MM_FROUND_TO_POS_INF
| _MM_FROUND_NO_EXC
:
219 __r
= vec_ceil((__v4sf
)__A
);
221 case _MM_FROUND_TO_ZERO
:
222 case _MM_FROUND_TO_ZERO
| _MM_FROUND_NO_EXC
:
223 __r
= vec_trunc((__v4sf
)__A
);
225 case _MM_FROUND_CUR_DIRECTION
:
226 __r
= vec_rint((__v4sf
)__A
);
229 if (__rounding
& _MM_FROUND_NO_EXC
) {
230 /* Insert an artificial "read" reference to the variable written
231 above, to ensure the compiler does not schedule the computation
232 of the value after the manipulation of the FPSCR, below.
233 This can be removed if and when GCC PR102783 is fixed.
235 __asm__("" : : "wa"(__r
));
236 /* Restore enabled exceptions. */
238 __fpscr_save
.__fr
= __builtin_ppc_mffsl();
240 __fpscr_save
.__fr
= __builtin_ppc_mffs();
241 __fpscr_save
.__fpscr
&= 0x70007f0ffL
;
243 __fpscr_save
.__fpscr
|= __enables_save
.__fpscr
;
244 __builtin_ppc_mtfsf(0b00000011, __fpscr_save
.__fr
);
249 extern __inline __m128
250 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
251 _mm_round_ss(__m128 __A
, __m128 __B
, int __rounding
) {
252 __B
= _mm_round_ps(__B
, __rounding
);
253 __v4sf __r
= (__v4sf
)__A
;
254 __r
[0] = ((__v4sf
)__B
)[0];
258 #define _mm_ceil_pd(V) _mm_round_pd((V), _MM_FROUND_CEIL)
259 #define _mm_ceil_sd(D, V) _mm_round_sd((D), (V), _MM_FROUND_CEIL)
261 #define _mm_floor_pd(V) _mm_round_pd((V), _MM_FROUND_FLOOR)
262 #define _mm_floor_sd(D, V) _mm_round_sd((D), (V), _MM_FROUND_FLOOR)
264 #define _mm_ceil_ps(V) _mm_round_ps((V), _MM_FROUND_CEIL)
265 #define _mm_ceil_ss(D, V) _mm_round_ss((D), (V), _MM_FROUND_CEIL)
267 #define _mm_floor_ps(V) _mm_round_ps((V), _MM_FROUND_FLOOR)
268 #define _mm_floor_ss(D, V) _mm_round_ss((D), (V), _MM_FROUND_FLOOR)
270 extern __inline __m128i
271 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
272 _mm_insert_epi8(__m128i
const __A
, int const __D
, int const __N
) {
273 __v16qi __result
= (__v16qi
)__A
;
275 __result
[__N
& 0xf] = __D
;
277 return (__m128i
)__result
;
280 extern __inline __m128i
281 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
282 _mm_insert_epi32(__m128i
const __A
, int const __D
, int const __N
) {
283 __v4si __result
= (__v4si
)__A
;
285 __result
[__N
& 3] = __D
;
287 return (__m128i
)__result
;
290 extern __inline __m128i
291 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
292 _mm_insert_epi64(__m128i
const __A
, long long const __D
, int const __N
) {
293 __v2di __result
= (__v2di
)__A
;
295 __result
[__N
& 1] = __D
;
297 return (__m128i
)__result
;
301 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
302 _mm_extract_epi8(__m128i __X
, const int __N
) {
303 return (unsigned char)((__v16qi
)__X
)[__N
& 15];
307 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
308 _mm_extract_epi32(__m128i __X
, const int __N
) {
309 return ((__v4si
)__X
)[__N
& 3];
313 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
314 _mm_extract_epi64(__m128i __X
, const int __N
) {
315 return ((__v2di
)__X
)[__N
& 1];
319 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
320 _mm_extract_ps(__m128 __X
, const int __N
) {
321 return ((__v4si
)__X
)[__N
& 3];
325 extern __inline __m128i
326 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
327 _mm_blend_epi16(__m128i __A
, __m128i __B
, const int __imm8
) {
328 __v16qu __charmask
= vec_splats((unsigned char)__imm8
);
329 __charmask
= vec_gb(__charmask
);
330 __v8hu __shortmask
= (__v8hu
)vec_unpackh((__v16qi
)__charmask
);
331 #ifdef __BIG_ENDIAN__
332 __shortmask
= vec_reve(__shortmask
);
334 return (__m128i
)vec_sel((__v8hu
)__A
, (__v8hu
)__B
, __shortmask
);
338 extern __inline __m128i
339 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
340 _mm_blendv_epi8(__m128i __A
, __m128i __B
, __m128i __mask
) {
342 return (__m128i
)vec_blendv((__v16qi
)__A
, (__v16qi
)__B
, (__v16qu
)__mask
);
344 const __v16qu __seven
= vec_splats((unsigned char)0x07);
345 __v16qu __lmask
= vec_sra((__v16qu
)__mask
, __seven
);
346 return (__m128i
)vec_sel((__v16qi
)__A
, (__v16qi
)__B
, __lmask
);
350 extern __inline __m128
351 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
352 _mm_blend_ps(__m128 __A
, __m128 __B
, const int __imm8
) {
354 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
355 {16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
356 {0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15},
357 {16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15},
358 {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 12, 13, 14, 15},
359 {16, 17, 18, 19, 4, 5, 6, 7, 24, 25, 26, 27, 12, 13, 14, 15},
360 {0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 12, 13, 14, 15},
361 {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 12, 13, 14, 15},
362 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 28, 29, 30, 31},
363 {16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 28, 29, 30, 31},
364 {0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31},
365 {16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31},
366 {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31},
367 {16, 17, 18, 19, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31},
368 {0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31},
369 {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31},
371 __v16qu __r
= vec_perm((__v16qu
)__A
, (__v16qu
)__B
, __pcv
[__imm8
]);
375 extern __inline __m128
376 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
377 _mm_blendv_ps(__m128 __A
, __m128 __B
, __m128 __mask
) {
379 return (__m128
)vec_blendv((__v4sf
)__A
, (__v4sf
)__B
, (__v4su
)__mask
);
381 const __v4si __zero
= {0};
382 const __vector __bool
int __boolmask
= vec_cmplt((__v4si
)__mask
, __zero
);
383 return (__m128
)vec_sel((__v4su
)__A
, (__v4su
)__B
, (__v4su
)__boolmask
);
387 extern __inline __m128d
388 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
389 _mm_blend_pd(__m128d __A
, __m128d __B
, const int __imm8
) {
391 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
392 {16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15},
393 {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31},
394 {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}};
395 __v16qu __r
= vec_perm((__v16qu
)__A
, (__v16qu
)__B
, __pcv
[__imm8
]);
400 extern __inline __m128d
401 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
402 _mm_blendv_pd(__m128d __A
, __m128d __B
, __m128d __mask
) {
404 return (__m128d
)vec_blendv((__v2df
)__A
, (__v2df
)__B
, (__v2du
)__mask
);
406 const __v2di __zero
= {0};
407 const __vector __bool
long long __boolmask
=
408 vec_cmplt((__v2di
)__mask
, __zero
);
409 return (__m128d
)vec_sel((__v2du
)__A
, (__v2du
)__B
, (__v2du
)__boolmask
);
415 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
416 _mm_testz_si128(__m128i __A
, __m128i __B
) {
417 /* Note: This implementation does NOT set "zero" or "carry" flags. */
418 const __v16qu __zero
= {0};
419 return vec_all_eq(vec_and((__v16qu
)__A
, (__v16qu
)__B
), __zero
);
423 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
424 _mm_testc_si128(__m128i __A
, __m128i __B
) {
425 /* Note: This implementation does NOT set "zero" or "carry" flags. */
426 const __v16qu __zero
= {0};
427 const __v16qu __notA
= vec_nor((__v16qu
)__A
, (__v16qu
)__A
);
428 return vec_all_eq(vec_and((__v16qu
)__notA
, (__v16qu
)__B
), __zero
);
432 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
433 _mm_testnzc_si128(__m128i __A
, __m128i __B
) {
434 /* Note: This implementation does NOT set "zero" or "carry" flags. */
435 return _mm_testz_si128(__A
, __B
) == 0 && _mm_testc_si128(__A
, __B
) == 0;
438 #define _mm_test_all_zeros(M, V) _mm_testz_si128((M), (V))
440 #define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_cmpeq_epi32((V), (V)))
442 #define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V))
445 extern __inline __m128i
446 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
447 _mm_cmpeq_epi64(__m128i __X
, __m128i __Y
) {
448 return (__m128i
)vec_cmpeq((__v2di
)__X
, (__v2di
)__Y
);
452 extern __inline __m128i
453 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
454 _mm_min_epi8(__m128i __X
, __m128i __Y
) {
455 return (__m128i
)vec_min((__v16qi
)__X
, (__v16qi
)__Y
);
458 extern __inline __m128i
459 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
460 _mm_min_epu16(__m128i __X
, __m128i __Y
) {
461 return (__m128i
)vec_min((__v8hu
)__X
, (__v8hu
)__Y
);
464 extern __inline __m128i
465 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
466 _mm_min_epi32(__m128i __X
, __m128i __Y
) {
467 return (__m128i
)vec_min((__v4si
)__X
, (__v4si
)__Y
);
470 extern __inline __m128i
471 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
472 _mm_min_epu32(__m128i __X
, __m128i __Y
) {
473 return (__m128i
)vec_min((__v4su
)__X
, (__v4su
)__Y
);
476 extern __inline __m128i
477 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
478 _mm_max_epi8(__m128i __X
, __m128i __Y
) {
479 return (__m128i
)vec_max((__v16qi
)__X
, (__v16qi
)__Y
);
482 extern __inline __m128i
483 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
484 _mm_max_epu16(__m128i __X
, __m128i __Y
) {
485 return (__m128i
)vec_max((__v8hu
)__X
, (__v8hu
)__Y
);
488 extern __inline __m128i
489 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
490 _mm_max_epi32(__m128i __X
, __m128i __Y
) {
491 return (__m128i
)vec_max((__v4si
)__X
, (__v4si
)__Y
);
494 extern __inline __m128i
495 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
496 _mm_max_epu32(__m128i __X
, __m128i __Y
) {
497 return (__m128i
)vec_max((__v4su
)__X
, (__v4su
)__Y
);
500 extern __inline __m128i
501 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
502 _mm_mullo_epi32(__m128i __X
, __m128i __Y
) {
503 return (__m128i
)vec_mul((__v4su
)__X
, (__v4su
)__Y
);
507 extern __inline __m128i
508 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
509 _mm_mul_epi32(__m128i __X
, __m128i __Y
) {
510 return (__m128i
)vec_mule((__v4si
)__X
, (__v4si
)__Y
);
514 extern __inline __m128i
515 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
516 _mm_cvtepi8_epi16(__m128i __A
) {
517 return (__m128i
)vec_unpackh((__v16qi
)__A
);
520 extern __inline __m128i
521 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
522 _mm_cvtepi8_epi32(__m128i __A
) {
523 __A
= (__m128i
)vec_unpackh((__v16qi
)__A
);
524 return (__m128i
)vec_unpackh((__v8hi
)__A
);
528 extern __inline __m128i
529 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
530 _mm_cvtepi8_epi64(__m128i __A
) {
531 __A
= (__m128i
)vec_unpackh((__v16qi
)__A
);
532 __A
= (__m128i
)vec_unpackh((__v8hi
)__A
);
533 return (__m128i
)vec_unpackh((__v4si
)__A
);
537 extern __inline __m128i
538 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
539 _mm_cvtepi16_epi32(__m128i __A
) {
540 return (__m128i
)vec_unpackh((__v8hi
)__A
);
544 extern __inline __m128i
545 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
546 _mm_cvtepi16_epi64(__m128i __A
) {
547 __A
= (__m128i
)vec_unpackh((__v8hi
)__A
);
548 return (__m128i
)vec_unpackh((__v4si
)__A
);
553 extern __inline __m128i
554 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
555 _mm_cvtepi32_epi64(__m128i __A
) {
556 return (__m128i
)vec_unpackh((__v4si
)__A
);
560 extern __inline __m128i
561 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
562 _mm_cvtepu8_epi16(__m128i __A
) {
563 const __v16qu __zero
= {0};
564 #ifdef __LITTLE_ENDIAN__
565 __A
= (__m128i
)vec_mergeh((__v16qu
)__A
, __zero
);
566 #else /* __BIG_ENDIAN__. */
567 __A
= (__m128i
)vec_mergeh(__zero
, (__v16qu
)__A
);
568 #endif /* __BIG_ENDIAN__. */
572 extern __inline __m128i
573 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
574 _mm_cvtepu8_epi32(__m128i __A
) {
575 const __v16qu __zero
= {0};
576 #ifdef __LITTLE_ENDIAN__
577 __A
= (__m128i
)vec_mergeh((__v16qu
)__A
, __zero
);
578 __A
= (__m128i
)vec_mergeh((__v8hu
)__A
, (__v8hu
)__zero
);
579 #else /* __BIG_ENDIAN__. */
580 __A
= (__m128i
)vec_mergeh(__zero
, (__v16qu
)__A
);
581 __A
= (__m128i
)vec_mergeh((__v8hu
)__zero
, (__v8hu
)__A
);
582 #endif /* __BIG_ENDIAN__. */
586 extern __inline __m128i
587 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
588 _mm_cvtepu8_epi64(__m128i __A
) {
589 const __v16qu __zero
= {0};
590 #ifdef __LITTLE_ENDIAN__
591 __A
= (__m128i
)vec_mergeh((__v16qu
)__A
, __zero
);
592 __A
= (__m128i
)vec_mergeh((__v8hu
)__A
, (__v8hu
)__zero
);
593 __A
= (__m128i
)vec_mergeh((__v4su
)__A
, (__v4su
)__zero
);
594 #else /* __BIG_ENDIAN__. */
595 __A
= (__m128i
)vec_mergeh(__zero
, (__v16qu
)__A
);
596 __A
= (__m128i
)vec_mergeh((__v8hu
)__zero
, (__v8hu
)__A
);
597 __A
= (__m128i
)vec_mergeh((__v4su
)__zero
, (__v4su
)__A
);
598 #endif /* __BIG_ENDIAN__. */
602 extern __inline __m128i
603 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
604 _mm_cvtepu16_epi32(__m128i __A
) {
605 const __v8hu __zero
= {0};
606 #ifdef __LITTLE_ENDIAN__
607 __A
= (__m128i
)vec_mergeh((__v8hu
)__A
, __zero
);
608 #else /* __BIG_ENDIAN__. */
609 __A
= (__m128i
)vec_mergeh(__zero
, (__v8hu
)__A
);
610 #endif /* __BIG_ENDIAN__. */
614 extern __inline __m128i
615 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
616 _mm_cvtepu16_epi64(__m128i __A
) {
617 const __v8hu __zero
= {0};
618 #ifdef __LITTLE_ENDIAN__
619 __A
= (__m128i
)vec_mergeh((__v8hu
)__A
, __zero
);
620 __A
= (__m128i
)vec_mergeh((__v4su
)__A
, (__v4su
)__zero
);
621 #else /* __BIG_ENDIAN__. */
622 __A
= (__m128i
)vec_mergeh(__zero
, (__v8hu
)__A
);
623 __A
= (__m128i
)vec_mergeh((__v4su
)__zero
, (__v4su
)__A
);
624 #endif /* __BIG_ENDIAN__. */
628 extern __inline __m128i
629 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
630 _mm_cvtepu32_epi64(__m128i __A
) {
631 const __v4su __zero
= {0};
632 #ifdef __LITTLE_ENDIAN__
633 __A
= (__m128i
)vec_mergeh((__v4su
)__A
, __zero
);
634 #else /* __BIG_ENDIAN__. */
635 __A
= (__m128i
)vec_mergeh(__zero
, (__v4su
)__A
);
636 #endif /* __BIG_ENDIAN__. */
640 /* Return horizontal packed word minimum and its index in bits [15:0]
641 and bits [18:16] respectively. */
642 extern __inline __m128i
643 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
644 _mm_minpos_epu16(__m128i __A
) {
649 union __u __u
= {.__m
= __A
}, __r
= {.__m
= {0}};
650 unsigned short __ridx
= 0;
651 unsigned short __rmin
= __u
.__uh
[__ridx
];
653 for (__i
= 1; __i
< 8; __i
++) {
654 if (__u
.__uh
[__i
] < __rmin
) {
655 __rmin
= __u
.__uh
[__i
];
659 __r
.__uh
[0] = __rmin
;
660 __r
.__uh
[1] = __ridx
;
664 extern __inline __m128i
665 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
666 _mm_packus_epi32(__m128i __X
, __m128i __Y
) {
667 return (__m128i
)vec_packsu((__v4si
)__X
, (__v4si
)__Y
);
671 extern __inline __m128i
672 __attribute__((__gnu_inline__
, __always_inline__
, __artificial__
))
673 _mm_cmpgt_epi64(__m128i __X
, __m128i __Y
) {
674 return (__m128i
)vec_cmpgt((__v2di
)__X
, (__v2di
)__Y
);
679 #include_next <smmintrin.h>
680 #endif /* defined(__powerpc64__) && \
681 * (defined(__linux__) || defined(__FreeBSD__) || defined(_AIX)) */
683 #endif /* SMMINTRIN_H_ */