1 /*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *===-----------------------------------------------------------------------===
13 #if !defined(__i386__) && !defined(__x86_64__)
14 #error "This header is only meant to be used on x86 and x64 architecture"
17 #include <pmmintrin.h>
19 /* Define the default attributes for the functions in this file. */
20 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"), __min_vector_width__(64)))
21 #define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,ssse3"), __min_vector_width__(64)))
23 /// Computes the absolute value of each of the packed 8-bit signed
24 /// integers in the source operand and stores the 8-bit unsigned integer
25 /// results in the destination.
27 /// \headerfile <x86intrin.h>
29 /// This intrinsic corresponds to the \c PABSB instruction.
32 /// A 64-bit vector of [8 x i8].
33 /// \returns A 64-bit integer vector containing the absolute values of the
34 /// elements in the operand.
35 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
36 _mm_abs_pi8(__m64 __a
)
38 return (__m64
)__builtin_ia32_pabsb((__v8qi
)__a
);
41 /// Computes the absolute value of each of the packed 8-bit signed
42 /// integers in the source operand and stores the 8-bit unsigned integer
43 /// results in the destination.
45 /// \headerfile <x86intrin.h>
47 /// This intrinsic corresponds to the \c VPABSB instruction.
50 /// A 128-bit vector of [16 x i8].
51 /// \returns A 128-bit integer vector containing the absolute values of the
52 /// elements in the operand.
53 static __inline__ __m128i __DEFAULT_FN_ATTRS
54 _mm_abs_epi8(__m128i __a
)
56 return (__m128i
)__builtin_elementwise_abs((__v16qs
)__a
);
59 /// Computes the absolute value of each of the packed 16-bit signed
60 /// integers in the source operand and stores the 16-bit unsigned integer
61 /// results in the destination.
63 /// \headerfile <x86intrin.h>
65 /// This intrinsic corresponds to the \c PABSW instruction.
68 /// A 64-bit vector of [4 x i16].
69 /// \returns A 64-bit integer vector containing the absolute values of the
70 /// elements in the operand.
71 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
72 _mm_abs_pi16(__m64 __a
)
74 return (__m64
)__builtin_ia32_pabsw((__v4hi
)__a
);
77 /// Computes the absolute value of each of the packed 16-bit signed
78 /// integers in the source operand and stores the 16-bit unsigned integer
79 /// results in the destination.
81 /// \headerfile <x86intrin.h>
83 /// This intrinsic corresponds to the \c VPABSW instruction.
86 /// A 128-bit vector of [8 x i16].
87 /// \returns A 128-bit integer vector containing the absolute values of the
88 /// elements in the operand.
89 static __inline__ __m128i __DEFAULT_FN_ATTRS
90 _mm_abs_epi16(__m128i __a
)
92 return (__m128i
)__builtin_elementwise_abs((__v8hi
)__a
);
95 /// Computes the absolute value of each of the packed 32-bit signed
96 /// integers in the source operand and stores the 32-bit unsigned integer
97 /// results in the destination.
99 /// \headerfile <x86intrin.h>
101 /// This intrinsic corresponds to the \c PABSD instruction.
104 /// A 64-bit vector of [2 x i32].
105 /// \returns A 64-bit integer vector containing the absolute values of the
106 /// elements in the operand.
107 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
108 _mm_abs_pi32(__m64 __a
)
110 return (__m64
)__builtin_ia32_pabsd((__v2si
)__a
);
113 /// Computes the absolute value of each of the packed 32-bit signed
114 /// integers in the source operand and stores the 32-bit unsigned integer
115 /// results in the destination.
117 /// \headerfile <x86intrin.h>
119 /// This intrinsic corresponds to the \c VPABSD instruction.
122 /// A 128-bit vector of [4 x i32].
123 /// \returns A 128-bit integer vector containing the absolute values of the
124 /// elements in the operand.
125 static __inline__ __m128i __DEFAULT_FN_ATTRS
126 _mm_abs_epi32(__m128i __a
)
128 return (__m128i
)__builtin_elementwise_abs((__v4si
)__a
);
131 /// Concatenates the two 128-bit integer vector operands, and
132 /// right-shifts the result by the number of bytes specified in the immediate
135 /// \headerfile <x86intrin.h>
138 /// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n);
141 /// This intrinsic corresponds to the \c PALIGNR instruction.
144 /// A 128-bit vector of [16 x i8] containing one of the source operands.
146 /// A 128-bit vector of [16 x i8] containing one of the source operands.
148 /// An immediate operand specifying how many bytes to right-shift the result.
149 /// \returns A 128-bit integer vector containing the concatenated right-shifted
151 #define _mm_alignr_epi8(a, b, n) \
152 ((__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
153 (__v16qi)(__m128i)(b), (n)))
155 /// Concatenates the two 64-bit integer vector operands, and right-shifts
156 /// the result by the number of bytes specified in the immediate operand.
158 /// \headerfile <x86intrin.h>
161 /// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n);
164 /// This intrinsic corresponds to the \c PALIGNR instruction.
167 /// A 64-bit vector of [8 x i8] containing one of the source operands.
169 /// A 64-bit vector of [8 x i8] containing one of the source operands.
171 /// An immediate operand specifying how many bytes to right-shift the result.
172 /// \returns A 64-bit integer vector containing the concatenated right-shifted
174 #define _mm_alignr_pi8(a, b, n) \
175 ((__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)))
177 /// Horizontally adds the adjacent pairs of values contained in 2 packed
178 /// 128-bit vectors of [8 x i16].
180 /// \headerfile <x86intrin.h>
182 /// This intrinsic corresponds to the \c VPHADDW instruction.
185 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
186 /// horizontal sums of the values are stored in the lower bits of the
189 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
190 /// horizontal sums of the values are stored in the upper bits of the
192 /// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of
194 static __inline__ __m128i __DEFAULT_FN_ATTRS
195 _mm_hadd_epi16(__m128i __a
, __m128i __b
)
197 return (__m128i
)__builtin_ia32_phaddw128((__v8hi
)__a
, (__v8hi
)__b
);
200 /// Horizontally adds the adjacent pairs of values contained in 2 packed
201 /// 128-bit vectors of [4 x i32].
203 /// \headerfile <x86intrin.h>
205 /// This intrinsic corresponds to the \c VPHADDD instruction.
208 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
209 /// horizontal sums of the values are stored in the lower bits of the
212 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
213 /// horizontal sums of the values are stored in the upper bits of the
215 /// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of
217 static __inline__ __m128i __DEFAULT_FN_ATTRS
218 _mm_hadd_epi32(__m128i __a
, __m128i __b
)
220 return (__m128i
)__builtin_ia32_phaddd128((__v4si
)__a
, (__v4si
)__b
);
223 /// Horizontally adds the adjacent pairs of values contained in 2 packed
224 /// 64-bit vectors of [4 x i16].
226 /// \headerfile <x86intrin.h>
228 /// This intrinsic corresponds to the \c PHADDW instruction.
231 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
232 /// horizontal sums of the values are stored in the lower bits of the
235 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
236 /// horizontal sums of the values are stored in the upper bits of the
238 /// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
240 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
241 _mm_hadd_pi16(__m64 __a
, __m64 __b
)
243 return (__m64
)__builtin_ia32_phaddw((__v4hi
)__a
, (__v4hi
)__b
);
246 /// Horizontally adds the adjacent pairs of values contained in 2 packed
247 /// 64-bit vectors of [2 x i32].
249 /// \headerfile <x86intrin.h>
251 /// This intrinsic corresponds to the \c PHADDD instruction.
254 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
255 /// horizontal sums of the values are stored in the lower bits of the
258 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
259 /// horizontal sums of the values are stored in the upper bits of the
261 /// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
263 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
264 _mm_hadd_pi32(__m64 __a
, __m64 __b
)
266 return (__m64
)__builtin_ia32_phaddd((__v2si
)__a
, (__v2si
)__b
);
269 /// Horizontally adds the adjacent pairs of values contained in 2 packed
270 /// 128-bit vectors of [8 x i16]. Positive sums greater than 0x7FFF are
271 /// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
274 /// \headerfile <x86intrin.h>
276 /// This intrinsic corresponds to the \c VPHADDSW instruction.
279 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
280 /// horizontal sums of the values are stored in the lower bits of the
283 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
284 /// horizontal sums of the values are stored in the upper bits of the
286 /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
287 /// sums of both operands.
288 static __inline__ __m128i __DEFAULT_FN_ATTRS
289 _mm_hadds_epi16(__m128i __a
, __m128i __b
)
291 return (__m128i
)__builtin_ia32_phaddsw128((__v8hi
)__a
, (__v8hi
)__b
);
294 /// Horizontally adds the adjacent pairs of values contained in 2 packed
295 /// 64-bit vectors of [4 x i16]. Positive sums greater than 0x7FFF are
296 /// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
299 /// \headerfile <x86intrin.h>
301 /// This intrinsic corresponds to the \c PHADDSW instruction.
304 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
305 /// horizontal sums of the values are stored in the lower bits of the
308 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
309 /// horizontal sums of the values are stored in the upper bits of the
311 /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
312 /// sums of both operands.
313 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
314 _mm_hadds_pi16(__m64 __a
, __m64 __b
)
316 return (__m64
)__builtin_ia32_phaddsw((__v4hi
)__a
, (__v4hi
)__b
);
319 /// Horizontally subtracts the adjacent pairs of values contained in 2
320 /// packed 128-bit vectors of [8 x i16].
322 /// \headerfile <x86intrin.h>
324 /// This intrinsic corresponds to the \c VPHSUBW instruction.
327 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
328 /// horizontal differences between the values are stored in the lower bits of
331 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
332 /// horizontal differences between the values are stored in the upper bits of
334 /// \returns A 128-bit vector of [8 x i16] containing the horizontal differences
335 /// of both operands.
336 static __inline__ __m128i __DEFAULT_FN_ATTRS
337 _mm_hsub_epi16(__m128i __a
, __m128i __b
)
339 return (__m128i
)__builtin_ia32_phsubw128((__v8hi
)__a
, (__v8hi
)__b
);
342 /// Horizontally subtracts the adjacent pairs of values contained in 2
343 /// packed 128-bit vectors of [4 x i32].
345 /// \headerfile <x86intrin.h>
347 /// This intrinsic corresponds to the \c VPHSUBD instruction.
350 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
351 /// horizontal differences between the values are stored in the lower bits of
354 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
355 /// horizontal differences between the values are stored in the upper bits of
357 /// \returns A 128-bit vector of [4 x i32] containing the horizontal differences
358 /// of both operands.
359 static __inline__ __m128i __DEFAULT_FN_ATTRS
360 _mm_hsub_epi32(__m128i __a
, __m128i __b
)
362 return (__m128i
)__builtin_ia32_phsubd128((__v4si
)__a
, (__v4si
)__b
);
365 /// Horizontally subtracts the adjacent pairs of values contained in 2
366 /// packed 64-bit vectors of [4 x i16].
368 /// \headerfile <x86intrin.h>
370 /// This intrinsic corresponds to the \c PHSUBW instruction.
373 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
374 /// horizontal differences between the values are stored in the lower bits of
377 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
378 /// horizontal differences between the values are stored in the upper bits of
380 /// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
381 /// of both operands.
382 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
383 _mm_hsub_pi16(__m64 __a
, __m64 __b
)
385 return (__m64
)__builtin_ia32_phsubw((__v4hi
)__a
, (__v4hi
)__b
);
388 /// Horizontally subtracts the adjacent pairs of values contained in 2
389 /// packed 64-bit vectors of [2 x i32].
391 /// \headerfile <x86intrin.h>
393 /// This intrinsic corresponds to the \c PHSUBD instruction.
396 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
397 /// horizontal differences between the values are stored in the lower bits of
400 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
401 /// horizontal differences between the values are stored in the upper bits of
403 /// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
404 /// of both operands.
405 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
406 _mm_hsub_pi32(__m64 __a
, __m64 __b
)
408 return (__m64
)__builtin_ia32_phsubd((__v2si
)__a
, (__v2si
)__b
);
411 /// Horizontally subtracts the adjacent pairs of values contained in 2
412 /// packed 128-bit vectors of [8 x i16]. Positive differences greater than
413 /// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
414 /// saturated to 0x8000.
416 /// \headerfile <x86intrin.h>
418 /// This intrinsic corresponds to the \c VPHSUBSW instruction.
421 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
422 /// horizontal differences between the values are stored in the lower bits of
425 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
426 /// horizontal differences between the values are stored in the upper bits of
428 /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
429 /// differences of both operands.
430 static __inline__ __m128i __DEFAULT_FN_ATTRS
431 _mm_hsubs_epi16(__m128i __a
, __m128i __b
)
433 return (__m128i
)__builtin_ia32_phsubsw128((__v8hi
)__a
, (__v8hi
)__b
);
436 /// Horizontally subtracts the adjacent pairs of values contained in 2
437 /// packed 64-bit vectors of [4 x i16]. Positive differences greater than
438 /// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
439 /// saturated to 0x8000.
441 /// \headerfile <x86intrin.h>
443 /// This intrinsic corresponds to the \c PHSUBSW instruction.
446 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
447 /// horizontal differences between the values are stored in the lower bits of
450 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
451 /// horizontal differences between the values are stored in the upper bits of
453 /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
454 /// differences of both operands.
455 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
456 _mm_hsubs_pi16(__m64 __a
, __m64 __b
)
458 return (__m64
)__builtin_ia32_phsubsw((__v4hi
)__a
, (__v4hi
)__b
);
461 /// Multiplies corresponding pairs of packed 8-bit unsigned integer
462 /// values contained in the first source operand and packed 8-bit signed
463 /// integer values contained in the second source operand, adds pairs of
464 /// contiguous products with signed saturation, and writes the 16-bit sums to
465 /// the corresponding bits in the destination.
467 /// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
468 /// both operands are multiplied, and the sum of both results is written to
469 /// bits [15:0] of the destination.
471 /// \headerfile <x86intrin.h>
473 /// This intrinsic corresponds to the \c VPMADDUBSW instruction.
476 /// A 128-bit integer vector containing the first source operand.
478 /// A 128-bit integer vector containing the second source operand.
479 /// \returns A 128-bit integer vector containing the sums of products of both
481 /// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
482 /// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
483 /// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
484 /// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) \n
485 /// \a R4 := (\a __a8 * \a __b8) + (\a __a9 * \a __b9) \n
486 /// \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n
487 /// \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n
488 /// \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15)
489 static __inline__ __m128i __DEFAULT_FN_ATTRS
490 _mm_maddubs_epi16(__m128i __a
, __m128i __b
)
492 return (__m128i
)__builtin_ia32_pmaddubsw128((__v16qi
)__a
, (__v16qi
)__b
);
495 /// Multiplies corresponding pairs of packed 8-bit unsigned integer
496 /// values contained in the first source operand and packed 8-bit signed
497 /// integer values contained in the second source operand, adds pairs of
498 /// contiguous products with signed saturation, and writes the 16-bit sums to
499 /// the corresponding bits in the destination.
501 /// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
502 /// both operands are multiplied, and the sum of both results is written to
503 /// bits [15:0] of the destination.
505 /// \headerfile <x86intrin.h>
507 /// This intrinsic corresponds to the \c PMADDUBSW instruction.
510 /// A 64-bit integer vector containing the first source operand.
512 /// A 64-bit integer vector containing the second source operand.
513 /// \returns A 64-bit integer vector containing the sums of products of both
515 /// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
516 /// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
517 /// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
518 /// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7)
519 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
520 _mm_maddubs_pi16(__m64 __a
, __m64 __b
)
522 return (__m64
)__builtin_ia32_pmaddubsw((__v8qi
)__a
, (__v8qi
)__b
);
525 /// Multiplies packed 16-bit signed integer values, truncates the 32-bit
526 /// products to the 18 most significant bits by right-shifting, rounds the
527 /// truncated value by adding 1, and writes bits [16:1] to the destination.
529 /// \headerfile <x86intrin.h>
531 /// This intrinsic corresponds to the \c VPMULHRSW instruction.
534 /// A 128-bit vector of [8 x i16] containing one of the source operands.
536 /// A 128-bit vector of [8 x i16] containing one of the source operands.
537 /// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
538 /// products of both operands.
539 static __inline__ __m128i __DEFAULT_FN_ATTRS
540 _mm_mulhrs_epi16(__m128i __a
, __m128i __b
)
542 return (__m128i
)__builtin_ia32_pmulhrsw128((__v8hi
)__a
, (__v8hi
)__b
);
545 /// Multiplies packed 16-bit signed integer values, truncates the 32-bit
546 /// products to the 18 most significant bits by right-shifting, rounds the
547 /// truncated value by adding 1, and writes bits [16:1] to the destination.
549 /// \headerfile <x86intrin.h>
551 /// This intrinsic corresponds to the \c PMULHRSW instruction.
554 /// A 64-bit vector of [4 x i16] containing one of the source operands.
556 /// A 64-bit vector of [4 x i16] containing one of the source operands.
557 /// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled
558 /// products of both operands.
559 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
560 _mm_mulhrs_pi16(__m64 __a
, __m64 __b
)
562 return (__m64
)__builtin_ia32_pmulhrsw((__v4hi
)__a
, (__v4hi
)__b
);
565 /// Copies the 8-bit integers from a 128-bit integer vector to the
566 /// destination or clears 8-bit values in the destination, as specified by
567 /// the second source operand.
569 /// \headerfile <x86intrin.h>
571 /// This intrinsic corresponds to the \c VPSHUFB instruction.
574 /// A 128-bit integer vector containing the values to be copied.
576 /// A 128-bit integer vector containing control bytes corresponding to
577 /// positions in the destination:
579 /// 1: Clear the corresponding byte in the destination. \n
580 /// 0: Copy the selected source byte to the corresponding byte in the
582 /// Bits [6:4] Reserved. \n
583 /// Bits [3:0] select the source byte to be copied.
584 /// \returns A 128-bit integer vector containing the copied or cleared values.
585 static __inline__ __m128i __DEFAULT_FN_ATTRS
586 _mm_shuffle_epi8(__m128i __a
, __m128i __b
)
588 return (__m128i
)__builtin_ia32_pshufb128((__v16qi
)__a
, (__v16qi
)__b
);
591 /// Copies the 8-bit integers from a 64-bit integer vector to the
592 /// destination or clears 8-bit values in the destination, as specified by
593 /// the second source operand.
595 /// \headerfile <x86intrin.h>
597 /// This intrinsic corresponds to the \c PSHUFB instruction.
600 /// A 64-bit integer vector containing the values to be copied.
602 /// A 64-bit integer vector containing control bytes corresponding to
603 /// positions in the destination:
605 /// 1: Clear the corresponding byte in the destination. \n
606 /// 0: Copy the selected source byte to the corresponding byte in the
608 /// Bits [3:0] select the source byte to be copied.
609 /// \returns A 64-bit integer vector containing the copied or cleared values.
610 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
611 _mm_shuffle_pi8(__m64 __a
, __m64 __b
)
613 return (__m64
)__builtin_ia32_pshufb((__v8qi
)__a
, (__v8qi
)__b
);
616 /// For each 8-bit integer in the first source operand, perform one of
617 /// the following actions as specified by the second source operand.
619 /// If the byte in the second source is negative, calculate the two's
620 /// complement of the corresponding byte in the first source, and write that
621 /// value to the destination. If the byte in the second source is positive,
622 /// copy the corresponding byte from the first source to the destination. If
623 /// the byte in the second source is zero, clear the corresponding byte in
626 /// \headerfile <x86intrin.h>
628 /// This intrinsic corresponds to the \c VPSIGNB instruction.
631 /// A 128-bit integer vector containing the values to be copied.
633 /// A 128-bit integer vector containing control bytes corresponding to
634 /// positions in the destination.
635 /// \returns A 128-bit integer vector containing the resultant values.
636 static __inline__ __m128i __DEFAULT_FN_ATTRS
637 _mm_sign_epi8(__m128i __a
, __m128i __b
)
639 return (__m128i
)__builtin_ia32_psignb128((__v16qi
)__a
, (__v16qi
)__b
);
642 /// For each 16-bit integer in the first source operand, perform one of
643 /// the following actions as specified by the second source operand.
645 /// If the word in the second source is negative, calculate the two's
646 /// complement of the corresponding word in the first source, and write that
647 /// value to the destination. If the word in the second source is positive,
648 /// copy the corresponding word from the first source to the destination. If
649 /// the word in the second source is zero, clear the corresponding word in
652 /// \headerfile <x86intrin.h>
654 /// This intrinsic corresponds to the \c VPSIGNW instruction.
657 /// A 128-bit integer vector containing the values to be copied.
659 /// A 128-bit integer vector containing control words corresponding to
660 /// positions in the destination.
661 /// \returns A 128-bit integer vector containing the resultant values.
662 static __inline__ __m128i __DEFAULT_FN_ATTRS
663 _mm_sign_epi16(__m128i __a
, __m128i __b
)
665 return (__m128i
)__builtin_ia32_psignw128((__v8hi
)__a
, (__v8hi
)__b
);
668 /// For each 32-bit integer in the first source operand, perform one of
669 /// the following actions as specified by the second source operand.
671 /// If the doubleword in the second source is negative, calculate the two's
672 /// complement of the corresponding word in the first source, and write that
673 /// value to the destination. If the doubleword in the second source is
674 /// positive, copy the corresponding word from the first source to the
675 /// destination. If the doubleword in the second source is zero, clear the
676 /// corresponding word in the destination.
678 /// \headerfile <x86intrin.h>
680 /// This intrinsic corresponds to the \c VPSIGND instruction.
683 /// A 128-bit integer vector containing the values to be copied.
685 /// A 128-bit integer vector containing control doublewords corresponding to
686 /// positions in the destination.
687 /// \returns A 128-bit integer vector containing the resultant values.
688 static __inline__ __m128i __DEFAULT_FN_ATTRS
689 _mm_sign_epi32(__m128i __a
, __m128i __b
)
691 return (__m128i
)__builtin_ia32_psignd128((__v4si
)__a
, (__v4si
)__b
);
694 /// For each 8-bit integer in the first source operand, perform one of
695 /// the following actions as specified by the second source operand.
697 /// If the byte in the second source is negative, calculate the two's
698 /// complement of the corresponding byte in the first source, and write that
699 /// value to the destination. If the byte in the second source is positive,
700 /// copy the corresponding byte from the first source to the destination. If
701 /// the byte in the second source is zero, clear the corresponding byte in
704 /// \headerfile <x86intrin.h>
706 /// This intrinsic corresponds to the \c PSIGNB instruction.
709 /// A 64-bit integer vector containing the values to be copied.
711 /// A 64-bit integer vector containing control bytes corresponding to
712 /// positions in the destination.
713 /// \returns A 64-bit integer vector containing the resultant values.
714 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
715 _mm_sign_pi8(__m64 __a
, __m64 __b
)
717 return (__m64
)__builtin_ia32_psignb((__v8qi
)__a
, (__v8qi
)__b
);
720 /// For each 16-bit integer in the first source operand, perform one of
721 /// the following actions as specified by the second source operand.
723 /// If the word in the second source is negative, calculate the two's
724 /// complement of the corresponding word in the first source, and write that
725 /// value to the destination. If the word in the second source is positive,
726 /// copy the corresponding word from the first source to the destination. If
727 /// the word in the second source is zero, clear the corresponding word in
730 /// \headerfile <x86intrin.h>
732 /// This intrinsic corresponds to the \c PSIGNW instruction.
735 /// A 64-bit integer vector containing the values to be copied.
737 /// A 64-bit integer vector containing control words corresponding to
738 /// positions in the destination.
739 /// \returns A 64-bit integer vector containing the resultant values.
740 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
741 _mm_sign_pi16(__m64 __a
, __m64 __b
)
743 return (__m64
)__builtin_ia32_psignw((__v4hi
)__a
, (__v4hi
)__b
);
746 /// For each 32-bit integer in the first source operand, perform one of
747 /// the following actions as specified by the second source operand.
749 /// If the doubleword in the second source is negative, calculate the two's
750 /// complement of the corresponding doubleword in the first source, and
751 /// write that value to the destination. If the doubleword in the second
752 /// source is positive, copy the corresponding doubleword from the first
753 /// source to the destination. If the doubleword in the second source is
754 /// zero, clear the corresponding doubleword in the destination.
756 /// \headerfile <x86intrin.h>
758 /// This intrinsic corresponds to the \c PSIGND instruction.
761 /// A 64-bit integer vector containing the values to be copied.
763 /// A 64-bit integer vector containing two control doublewords corresponding
764 /// to positions in the destination.
765 /// \returns A 64-bit integer vector containing the resultant values.
766 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
767 _mm_sign_pi32(__m64 __a
, __m64 __b
)
769 return (__m64
)__builtin_ia32_psignd((__v2si
)__a
, (__v2si
)__b
);
772 #undef __DEFAULT_FN_ATTRS
773 #undef __DEFAULT_FN_ATTRS_MMX
775 #endif /* __TMMINTRIN_H */