[docs] Fix build-docs.sh
[llvm-project.git] / clang / lib / Headers / tmmintrin.h
blobcb9be2349de5a72ddd2e926f09ef1a94c04a819f
1 /*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *===-----------------------------------------------------------------------===
8 */
10 #ifndef __TMMINTRIN_H
11 #define __TMMINTRIN_H
13 #if !defined(__i386__) && !defined(__x86_64__)
14 #error "This header is only meant to be used on x86 and x64 architecture"
15 #endif
17 #include <pmmintrin.h>
19 /* Define the default attributes for the functions in this file. */
20 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"), __min_vector_width__(64)))
21 #define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,ssse3"), __min_vector_width__(64)))
23 /// Computes the absolute value of each of the packed 8-bit signed
24 /// integers in the source operand and stores the 8-bit unsigned integer
25 /// results in the destination.
26 ///
27 /// \headerfile <x86intrin.h>
28 ///
29 /// This intrinsic corresponds to the \c PABSB instruction.
30 ///
31 /// \param __a
32 /// A 64-bit vector of [8 x i8].
33 /// \returns A 64-bit integer vector containing the absolute values of the
34 /// elements in the operand.
35 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
36 _mm_abs_pi8(__m64 __a)
38 return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
41 /// Computes the absolute value of each of the packed 8-bit signed
42 /// integers in the source operand and stores the 8-bit unsigned integer
43 /// results in the destination.
44 ///
45 /// \headerfile <x86intrin.h>
46 ///
47 /// This intrinsic corresponds to the \c VPABSB instruction.
48 ///
49 /// \param __a
50 /// A 128-bit vector of [16 x i8].
51 /// \returns A 128-bit integer vector containing the absolute values of the
52 /// elements in the operand.
53 static __inline__ __m128i __DEFAULT_FN_ATTRS
54 _mm_abs_epi8(__m128i __a)
56 return (__m128i)__builtin_elementwise_abs((__v16qs)__a);
59 /// Computes the absolute value of each of the packed 16-bit signed
60 /// integers in the source operand and stores the 16-bit unsigned integer
61 /// results in the destination.
62 ///
63 /// \headerfile <x86intrin.h>
64 ///
65 /// This intrinsic corresponds to the \c PABSW instruction.
66 ///
67 /// \param __a
68 /// A 64-bit vector of [4 x i16].
69 /// \returns A 64-bit integer vector containing the absolute values of the
70 /// elements in the operand.
71 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
72 _mm_abs_pi16(__m64 __a)
74 return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
77 /// Computes the absolute value of each of the packed 16-bit signed
78 /// integers in the source operand and stores the 16-bit unsigned integer
79 /// results in the destination.
80 ///
81 /// \headerfile <x86intrin.h>
82 ///
83 /// This intrinsic corresponds to the \c VPABSW instruction.
84 ///
85 /// \param __a
86 /// A 128-bit vector of [8 x i16].
87 /// \returns A 128-bit integer vector containing the absolute values of the
88 /// elements in the operand.
89 static __inline__ __m128i __DEFAULT_FN_ATTRS
90 _mm_abs_epi16(__m128i __a)
92 return (__m128i)__builtin_elementwise_abs((__v8hi)__a);
95 /// Computes the absolute value of each of the packed 32-bit signed
96 /// integers in the source operand and stores the 32-bit unsigned integer
97 /// results in the destination.
98 ///
99 /// \headerfile <x86intrin.h>
101 /// This intrinsic corresponds to the \c PABSD instruction.
103 /// \param __a
104 /// A 64-bit vector of [2 x i32].
105 /// \returns A 64-bit integer vector containing the absolute values of the
106 /// elements in the operand.
107 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
108 _mm_abs_pi32(__m64 __a)
110 return (__m64)__builtin_ia32_pabsd((__v2si)__a);
113 /// Computes the absolute value of each of the packed 32-bit signed
114 /// integers in the source operand and stores the 32-bit unsigned integer
115 /// results in the destination.
117 /// \headerfile <x86intrin.h>
119 /// This intrinsic corresponds to the \c VPABSD instruction.
121 /// \param __a
122 /// A 128-bit vector of [4 x i32].
123 /// \returns A 128-bit integer vector containing the absolute values of the
124 /// elements in the operand.
125 static __inline__ __m128i __DEFAULT_FN_ATTRS
126 _mm_abs_epi32(__m128i __a)
128 return (__m128i)__builtin_elementwise_abs((__v4si)__a);
131 /// Concatenates the two 128-bit integer vector operands, and
132 /// right-shifts the result by the number of bytes specified in the immediate
133 /// operand.
135 /// \headerfile <x86intrin.h>
137 /// \code
138 /// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n);
139 /// \endcode
141 /// This intrinsic corresponds to the \c PALIGNR instruction.
143 /// \param a
144 /// A 128-bit vector of [16 x i8] containing one of the source operands.
145 /// \param b
146 /// A 128-bit vector of [16 x i8] containing one of the source operands.
147 /// \param n
148 /// An immediate operand specifying how many bytes to right-shift the result.
149 /// \returns A 128-bit integer vector containing the concatenated right-shifted
150 /// value.
151 #define _mm_alignr_epi8(a, b, n) \
152 ((__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
153 (__v16qi)(__m128i)(b), (n)))
155 /// Concatenates the two 64-bit integer vector operands, and right-shifts
156 /// the result by the number of bytes specified in the immediate operand.
158 /// \headerfile <x86intrin.h>
160 /// \code
161 /// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n);
162 /// \endcode
164 /// This intrinsic corresponds to the \c PALIGNR instruction.
166 /// \param a
167 /// A 64-bit vector of [8 x i8] containing one of the source operands.
168 /// \param b
169 /// A 64-bit vector of [8 x i8] containing one of the source operands.
170 /// \param n
171 /// An immediate operand specifying how many bytes to right-shift the result.
172 /// \returns A 64-bit integer vector containing the concatenated right-shifted
173 /// value.
174 #define _mm_alignr_pi8(a, b, n) \
175 ((__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)))
177 /// Horizontally adds the adjacent pairs of values contained in 2 packed
178 /// 128-bit vectors of [8 x i16].
180 /// \headerfile <x86intrin.h>
182 /// This intrinsic corresponds to the \c VPHADDW instruction.
184 /// \param __a
185 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
186 /// horizontal sums of the values are stored in the lower bits of the
187 /// destination.
188 /// \param __b
189 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
190 /// horizontal sums of the values are stored in the upper bits of the
191 /// destination.
192 /// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of
193 /// both operands.
194 static __inline__ __m128i __DEFAULT_FN_ATTRS
195 _mm_hadd_epi16(__m128i __a, __m128i __b)
197 return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
200 /// Horizontally adds the adjacent pairs of values contained in 2 packed
201 /// 128-bit vectors of [4 x i32].
203 /// \headerfile <x86intrin.h>
205 /// This intrinsic corresponds to the \c VPHADDD instruction.
207 /// \param __a
208 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
209 /// horizontal sums of the values are stored in the lower bits of the
210 /// destination.
211 /// \param __b
212 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
213 /// horizontal sums of the values are stored in the upper bits of the
214 /// destination.
215 /// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of
216 /// both operands.
217 static __inline__ __m128i __DEFAULT_FN_ATTRS
218 _mm_hadd_epi32(__m128i __a, __m128i __b)
220 return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
223 /// Horizontally adds the adjacent pairs of values contained in 2 packed
224 /// 64-bit vectors of [4 x i16].
226 /// \headerfile <x86intrin.h>
228 /// This intrinsic corresponds to the \c PHADDW instruction.
230 /// \param __a
231 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
232 /// horizontal sums of the values are stored in the lower bits of the
233 /// destination.
234 /// \param __b
235 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
236 /// horizontal sums of the values are stored in the upper bits of the
237 /// destination.
238 /// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
239 /// operands.
240 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
241 _mm_hadd_pi16(__m64 __a, __m64 __b)
243 return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
246 /// Horizontally adds the adjacent pairs of values contained in 2 packed
247 /// 64-bit vectors of [2 x i32].
249 /// \headerfile <x86intrin.h>
251 /// This intrinsic corresponds to the \c PHADDD instruction.
253 /// \param __a
254 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
255 /// horizontal sums of the values are stored in the lower bits of the
256 /// destination.
257 /// \param __b
258 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
259 /// horizontal sums of the values are stored in the upper bits of the
260 /// destination.
261 /// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
262 /// operands.
263 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
264 _mm_hadd_pi32(__m64 __a, __m64 __b)
266 return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
269 /// Horizontally adds the adjacent pairs of values contained in 2 packed
270 /// 128-bit vectors of [8 x i16]. Positive sums greater than 0x7FFF are
271 /// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
272 /// 0x8000.
274 /// \headerfile <x86intrin.h>
276 /// This intrinsic corresponds to the \c VPHADDSW instruction.
278 /// \param __a
279 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
280 /// horizontal sums of the values are stored in the lower bits of the
281 /// destination.
282 /// \param __b
283 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
284 /// horizontal sums of the values are stored in the upper bits of the
285 /// destination.
286 /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
287 /// sums of both operands.
288 static __inline__ __m128i __DEFAULT_FN_ATTRS
289 _mm_hadds_epi16(__m128i __a, __m128i __b)
291 return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
294 /// Horizontally adds the adjacent pairs of values contained in 2 packed
295 /// 64-bit vectors of [4 x i16]. Positive sums greater than 0x7FFF are
296 /// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
297 /// 0x8000.
299 /// \headerfile <x86intrin.h>
301 /// This intrinsic corresponds to the \c PHADDSW instruction.
303 /// \param __a
304 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
305 /// horizontal sums of the values are stored in the lower bits of the
306 /// destination.
307 /// \param __b
308 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
309 /// horizontal sums of the values are stored in the upper bits of the
310 /// destination.
311 /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
312 /// sums of both operands.
313 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
314 _mm_hadds_pi16(__m64 __a, __m64 __b)
316 return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
319 /// Horizontally subtracts the adjacent pairs of values contained in 2
320 /// packed 128-bit vectors of [8 x i16].
322 /// \headerfile <x86intrin.h>
324 /// This intrinsic corresponds to the \c VPHSUBW instruction.
326 /// \param __a
327 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
328 /// horizontal differences between the values are stored in the lower bits of
329 /// the destination.
330 /// \param __b
331 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
332 /// horizontal differences between the values are stored in the upper bits of
333 /// the destination.
334 /// \returns A 128-bit vector of [8 x i16] containing the horizontal differences
335 /// of both operands.
336 static __inline__ __m128i __DEFAULT_FN_ATTRS
337 _mm_hsub_epi16(__m128i __a, __m128i __b)
339 return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
342 /// Horizontally subtracts the adjacent pairs of values contained in 2
343 /// packed 128-bit vectors of [4 x i32].
345 /// \headerfile <x86intrin.h>
347 /// This intrinsic corresponds to the \c VPHSUBD instruction.
349 /// \param __a
350 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
351 /// horizontal differences between the values are stored in the lower bits of
352 /// the destination.
353 /// \param __b
354 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
355 /// horizontal differences between the values are stored in the upper bits of
356 /// the destination.
357 /// \returns A 128-bit vector of [4 x i32] containing the horizontal differences
358 /// of both operands.
359 static __inline__ __m128i __DEFAULT_FN_ATTRS
360 _mm_hsub_epi32(__m128i __a, __m128i __b)
362 return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
365 /// Horizontally subtracts the adjacent pairs of values contained in 2
366 /// packed 64-bit vectors of [4 x i16].
368 /// \headerfile <x86intrin.h>
370 /// This intrinsic corresponds to the \c PHSUBW instruction.
372 /// \param __a
373 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
374 /// horizontal differences between the values are stored in the lower bits of
375 /// the destination.
376 /// \param __b
377 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
378 /// horizontal differences between the values are stored in the upper bits of
379 /// the destination.
380 /// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
381 /// of both operands.
382 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
383 _mm_hsub_pi16(__m64 __a, __m64 __b)
385 return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
388 /// Horizontally subtracts the adjacent pairs of values contained in 2
389 /// packed 64-bit vectors of [2 x i32].
391 /// \headerfile <x86intrin.h>
393 /// This intrinsic corresponds to the \c PHSUBD instruction.
395 /// \param __a
396 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
397 /// horizontal differences between the values are stored in the lower bits of
398 /// the destination.
399 /// \param __b
400 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
401 /// horizontal differences between the values are stored in the upper bits of
402 /// the destination.
403 /// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
404 /// of both operands.
405 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
406 _mm_hsub_pi32(__m64 __a, __m64 __b)
408 return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
411 /// Horizontally subtracts the adjacent pairs of values contained in 2
412 /// packed 128-bit vectors of [8 x i16]. Positive differences greater than
413 /// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
414 /// saturated to 0x8000.
416 /// \headerfile <x86intrin.h>
418 /// This intrinsic corresponds to the \c VPHSUBSW instruction.
420 /// \param __a
421 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
422 /// horizontal differences between the values are stored in the lower bits of
423 /// the destination.
424 /// \param __b
425 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
426 /// horizontal differences between the values are stored in the upper bits of
427 /// the destination.
428 /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
429 /// differences of both operands.
430 static __inline__ __m128i __DEFAULT_FN_ATTRS
431 _mm_hsubs_epi16(__m128i __a, __m128i __b)
433 return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
436 /// Horizontally subtracts the adjacent pairs of values contained in 2
437 /// packed 64-bit vectors of [4 x i16]. Positive differences greater than
438 /// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
439 /// saturated to 0x8000.
441 /// \headerfile <x86intrin.h>
443 /// This intrinsic corresponds to the \c PHSUBSW instruction.
445 /// \param __a
446 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
447 /// horizontal differences between the values are stored in the lower bits of
448 /// the destination.
449 /// \param __b
450 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
451 /// horizontal differences between the values are stored in the upper bits of
452 /// the destination.
453 /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
454 /// differences of both operands.
455 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
456 _mm_hsubs_pi16(__m64 __a, __m64 __b)
458 return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
461 /// Multiplies corresponding pairs of packed 8-bit unsigned integer
462 /// values contained in the first source operand and packed 8-bit signed
463 /// integer values contained in the second source operand, adds pairs of
464 /// contiguous products with signed saturation, and writes the 16-bit sums to
465 /// the corresponding bits in the destination.
467 /// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
468 /// both operands are multiplied, and the sum of both results is written to
469 /// bits [15:0] of the destination.
471 /// \headerfile <x86intrin.h>
473 /// This intrinsic corresponds to the \c VPMADDUBSW instruction.
475 /// \param __a
476 /// A 128-bit integer vector containing the first source operand.
477 /// \param __b
478 /// A 128-bit integer vector containing the second source operand.
479 /// \returns A 128-bit integer vector containing the sums of products of both
480 /// operands: \n
481 /// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
482 /// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
483 /// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
484 /// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) \n
485 /// \a R4 := (\a __a8 * \a __b8) + (\a __a9 * \a __b9) \n
486 /// \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n
487 /// \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n
488 /// \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15)
489 static __inline__ __m128i __DEFAULT_FN_ATTRS
490 _mm_maddubs_epi16(__m128i __a, __m128i __b)
492 return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
495 /// Multiplies corresponding pairs of packed 8-bit unsigned integer
496 /// values contained in the first source operand and packed 8-bit signed
497 /// integer values contained in the second source operand, adds pairs of
498 /// contiguous products with signed saturation, and writes the 16-bit sums to
499 /// the corresponding bits in the destination.
501 /// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
502 /// both operands are multiplied, and the sum of both results is written to
503 /// bits [15:0] of the destination.
505 /// \headerfile <x86intrin.h>
507 /// This intrinsic corresponds to the \c PMADDUBSW instruction.
509 /// \param __a
510 /// A 64-bit integer vector containing the first source operand.
511 /// \param __b
512 /// A 64-bit integer vector containing the second source operand.
513 /// \returns A 64-bit integer vector containing the sums of products of both
514 /// operands: \n
515 /// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
516 /// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
517 /// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
518 /// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7)
519 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
520 _mm_maddubs_pi16(__m64 __a, __m64 __b)
522 return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
525 /// Multiplies packed 16-bit signed integer values, truncates the 32-bit
526 /// products to the 18 most significant bits by right-shifting, rounds the
527 /// truncated value by adding 1, and writes bits [16:1] to the destination.
529 /// \headerfile <x86intrin.h>
531 /// This intrinsic corresponds to the \c VPMULHRSW instruction.
533 /// \param __a
534 /// A 128-bit vector of [8 x i16] containing one of the source operands.
535 /// \param __b
536 /// A 128-bit vector of [8 x i16] containing one of the source operands.
537 /// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
538 /// products of both operands.
539 static __inline__ __m128i __DEFAULT_FN_ATTRS
540 _mm_mulhrs_epi16(__m128i __a, __m128i __b)
542 return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
545 /// Multiplies packed 16-bit signed integer values, truncates the 32-bit
546 /// products to the 18 most significant bits by right-shifting, rounds the
547 /// truncated value by adding 1, and writes bits [16:1] to the destination.
549 /// \headerfile <x86intrin.h>
551 /// This intrinsic corresponds to the \c PMULHRSW instruction.
553 /// \param __a
554 /// A 64-bit vector of [4 x i16] containing one of the source operands.
555 /// \param __b
556 /// A 64-bit vector of [4 x i16] containing one of the source operands.
557 /// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled
558 /// products of both operands.
559 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
560 _mm_mulhrs_pi16(__m64 __a, __m64 __b)
562 return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
565 /// Copies the 8-bit integers from a 128-bit integer vector to the
566 /// destination or clears 8-bit values in the destination, as specified by
567 /// the second source operand.
569 /// \headerfile <x86intrin.h>
571 /// This intrinsic corresponds to the \c VPSHUFB instruction.
573 /// \param __a
574 /// A 128-bit integer vector containing the values to be copied.
575 /// \param __b
576 /// A 128-bit integer vector containing control bytes corresponding to
577 /// positions in the destination:
578 /// Bit 7: \n
579 /// 1: Clear the corresponding byte in the destination. \n
580 /// 0: Copy the selected source byte to the corresponding byte in the
581 /// destination. \n
582 /// Bits [6:4] Reserved. \n
583 /// Bits [3:0] select the source byte to be copied.
584 /// \returns A 128-bit integer vector containing the copied or cleared values.
585 static __inline__ __m128i __DEFAULT_FN_ATTRS
586 _mm_shuffle_epi8(__m128i __a, __m128i __b)
588 return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
591 /// Copies the 8-bit integers from a 64-bit integer vector to the
592 /// destination or clears 8-bit values in the destination, as specified by
593 /// the second source operand.
595 /// \headerfile <x86intrin.h>
597 /// This intrinsic corresponds to the \c PSHUFB instruction.
599 /// \param __a
600 /// A 64-bit integer vector containing the values to be copied.
601 /// \param __b
602 /// A 64-bit integer vector containing control bytes corresponding to
603 /// positions in the destination:
604 /// Bit 7: \n
605 /// 1: Clear the corresponding byte in the destination. \n
606 /// 0: Copy the selected source byte to the corresponding byte in the
607 /// destination. \n
608 /// Bits [3:0] select the source byte to be copied.
609 /// \returns A 64-bit integer vector containing the copied or cleared values.
610 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
611 _mm_shuffle_pi8(__m64 __a, __m64 __b)
613 return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
616 /// For each 8-bit integer in the first source operand, perform one of
617 /// the following actions as specified by the second source operand.
619 /// If the byte in the second source is negative, calculate the two's
620 /// complement of the corresponding byte in the first source, and write that
621 /// value to the destination. If the byte in the second source is positive,
622 /// copy the corresponding byte from the first source to the destination. If
623 /// the byte in the second source is zero, clear the corresponding byte in
624 /// the destination.
626 /// \headerfile <x86intrin.h>
628 /// This intrinsic corresponds to the \c VPSIGNB instruction.
630 /// \param __a
631 /// A 128-bit integer vector containing the values to be copied.
632 /// \param __b
633 /// A 128-bit integer vector containing control bytes corresponding to
634 /// positions in the destination.
635 /// \returns A 128-bit integer vector containing the resultant values.
636 static __inline__ __m128i __DEFAULT_FN_ATTRS
637 _mm_sign_epi8(__m128i __a, __m128i __b)
639 return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
642 /// For each 16-bit integer in the first source operand, perform one of
643 /// the following actions as specified by the second source operand.
645 /// If the word in the second source is negative, calculate the two's
646 /// complement of the corresponding word in the first source, and write that
647 /// value to the destination. If the word in the second source is positive,
648 /// copy the corresponding word from the first source to the destination. If
649 /// the word in the second source is zero, clear the corresponding word in
650 /// the destination.
652 /// \headerfile <x86intrin.h>
654 /// This intrinsic corresponds to the \c VPSIGNW instruction.
656 /// \param __a
657 /// A 128-bit integer vector containing the values to be copied.
658 /// \param __b
659 /// A 128-bit integer vector containing control words corresponding to
660 /// positions in the destination.
661 /// \returns A 128-bit integer vector containing the resultant values.
662 static __inline__ __m128i __DEFAULT_FN_ATTRS
663 _mm_sign_epi16(__m128i __a, __m128i __b)
665 return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
668 /// For each 32-bit integer in the first source operand, perform one of
669 /// the following actions as specified by the second source operand.
671 /// If the doubleword in the second source is negative, calculate the two's
672 /// complement of the corresponding word in the first source, and write that
673 /// value to the destination. If the doubleword in the second source is
674 /// positive, copy the corresponding word from the first source to the
675 /// destination. If the doubleword in the second source is zero, clear the
676 /// corresponding word in the destination.
678 /// \headerfile <x86intrin.h>
680 /// This intrinsic corresponds to the \c VPSIGND instruction.
682 /// \param __a
683 /// A 128-bit integer vector containing the values to be copied.
684 /// \param __b
685 /// A 128-bit integer vector containing control doublewords corresponding to
686 /// positions in the destination.
687 /// \returns A 128-bit integer vector containing the resultant values.
688 static __inline__ __m128i __DEFAULT_FN_ATTRS
689 _mm_sign_epi32(__m128i __a, __m128i __b)
691 return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
694 /// For each 8-bit integer in the first source operand, perform one of
695 /// the following actions as specified by the second source operand.
697 /// If the byte in the second source is negative, calculate the two's
698 /// complement of the corresponding byte in the first source, and write that
699 /// value to the destination. If the byte in the second source is positive,
700 /// copy the corresponding byte from the first source to the destination. If
701 /// the byte in the second source is zero, clear the corresponding byte in
702 /// the destination.
704 /// \headerfile <x86intrin.h>
706 /// This intrinsic corresponds to the \c PSIGNB instruction.
708 /// \param __a
709 /// A 64-bit integer vector containing the values to be copied.
710 /// \param __b
711 /// A 64-bit integer vector containing control bytes corresponding to
712 /// positions in the destination.
713 /// \returns A 64-bit integer vector containing the resultant values.
714 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
715 _mm_sign_pi8(__m64 __a, __m64 __b)
717 return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
720 /// For each 16-bit integer in the first source operand, perform one of
721 /// the following actions as specified by the second source operand.
723 /// If the word in the second source is negative, calculate the two's
724 /// complement of the corresponding word in the first source, and write that
725 /// value to the destination. If the word in the second source is positive,
726 /// copy the corresponding word from the first source to the destination. If
727 /// the word in the second source is zero, clear the corresponding word in
728 /// the destination.
730 /// \headerfile <x86intrin.h>
732 /// This intrinsic corresponds to the \c PSIGNW instruction.
734 /// \param __a
735 /// A 64-bit integer vector containing the values to be copied.
736 /// \param __b
737 /// A 64-bit integer vector containing control words corresponding to
738 /// positions in the destination.
739 /// \returns A 64-bit integer vector containing the resultant values.
740 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
741 _mm_sign_pi16(__m64 __a, __m64 __b)
743 return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
746 /// For each 32-bit integer in the first source operand, perform one of
747 /// the following actions as specified by the second source operand.
749 /// If the doubleword in the second source is negative, calculate the two's
750 /// complement of the corresponding doubleword in the first source, and
751 /// write that value to the destination. If the doubleword in the second
752 /// source is positive, copy the corresponding doubleword from the first
753 /// source to the destination. If the doubleword in the second source is
754 /// zero, clear the corresponding doubleword in the destination.
756 /// \headerfile <x86intrin.h>
758 /// This intrinsic corresponds to the \c PSIGND instruction.
760 /// \param __a
761 /// A 64-bit integer vector containing the values to be copied.
762 /// \param __b
763 /// A 64-bit integer vector containing two control doublewords corresponding
764 /// to positions in the destination.
765 /// \returns A 64-bit integer vector containing the resultant values.
766 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
767 _mm_sign_pi32(__m64 __a, __m64 __b)
769 return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
772 #undef __DEFAULT_FN_ATTRS
773 #undef __DEFAULT_FN_ATTRS_MMX
775 #endif /* __TMMINTRIN_H */