lib/Header: Fix Visual Studio builds try #2
[llvm-project.git] / clang / lib / Headers / tmmintrin.h
blob35533e115c7d26d46266f63b34833d57c283a6f8
1 /*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *===-----------------------------------------------------------------------===
8 */
10 #ifndef __TMMINTRIN_H
11 #define __TMMINTRIN_H
13 #include <pmmintrin.h>
15 /* Define the default attributes for the functions in this file. */
16 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"), __min_vector_width__(64)))
17 #define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,ssse3"), __min_vector_width__(64)))
19 /// Computes the absolute value of each of the packed 8-bit signed
20 /// integers in the source operand and stores the 8-bit unsigned integer
21 /// results in the destination.
22 ///
23 /// \headerfile <x86intrin.h>
24 ///
25 /// This intrinsic corresponds to the \c PABSB instruction.
26 ///
27 /// \param __a
28 /// A 64-bit vector of [8 x i8].
29 /// \returns A 64-bit integer vector containing the absolute values of the
30 /// elements in the operand.
31 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
32 _mm_abs_pi8(__m64 __a)
34 return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
37 /// Computes the absolute value of each of the packed 8-bit signed
38 /// integers in the source operand and stores the 8-bit unsigned integer
39 /// results in the destination.
40 ///
41 /// \headerfile <x86intrin.h>
42 ///
43 /// This intrinsic corresponds to the \c VPABSB instruction.
44 ///
45 /// \param __a
46 /// A 128-bit vector of [16 x i8].
47 /// \returns A 128-bit integer vector containing the absolute values of the
48 /// elements in the operand.
49 static __inline__ __m128i __DEFAULT_FN_ATTRS
50 _mm_abs_epi8(__m128i __a)
52 return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);
55 /// Computes the absolute value of each of the packed 16-bit signed
56 /// integers in the source operand and stores the 16-bit unsigned integer
57 /// results in the destination.
58 ///
59 /// \headerfile <x86intrin.h>
60 ///
61 /// This intrinsic corresponds to the \c PABSW instruction.
62 ///
63 /// \param __a
64 /// A 64-bit vector of [4 x i16].
65 /// \returns A 64-bit integer vector containing the absolute values of the
66 /// elements in the operand.
67 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
68 _mm_abs_pi16(__m64 __a)
70 return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
73 /// Computes the absolute value of each of the packed 16-bit signed
74 /// integers in the source operand and stores the 16-bit unsigned integer
75 /// results in the destination.
76 ///
77 /// \headerfile <x86intrin.h>
78 ///
79 /// This intrinsic corresponds to the \c VPABSW instruction.
80 ///
81 /// \param __a
82 /// A 128-bit vector of [8 x i16].
83 /// \returns A 128-bit integer vector containing the absolute values of the
84 /// elements in the operand.
85 static __inline__ __m128i __DEFAULT_FN_ATTRS
86 _mm_abs_epi16(__m128i __a)
88 return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);
91 /// Computes the absolute value of each of the packed 32-bit signed
92 /// integers in the source operand and stores the 32-bit unsigned integer
93 /// results in the destination.
94 ///
95 /// \headerfile <x86intrin.h>
96 ///
97 /// This intrinsic corresponds to the \c PABSD instruction.
98 ///
99 /// \param __a
100 /// A 64-bit vector of [2 x i32].
101 /// \returns A 64-bit integer vector containing the absolute values of the
102 /// elements in the operand.
103 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
104 _mm_abs_pi32(__m64 __a)
106 return (__m64)__builtin_ia32_pabsd((__v2si)__a);
109 /// Computes the absolute value of each of the packed 32-bit signed
110 /// integers in the source operand and stores the 32-bit unsigned integer
111 /// results in the destination.
113 /// \headerfile <x86intrin.h>
115 /// This intrinsic corresponds to the \c VPABSD instruction.
117 /// \param __a
118 /// A 128-bit vector of [4 x i32].
119 /// \returns A 128-bit integer vector containing the absolute values of the
120 /// elements in the operand.
121 static __inline__ __m128i __DEFAULT_FN_ATTRS
122 _mm_abs_epi32(__m128i __a)
124 return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);
127 /// Concatenates the two 128-bit integer vector operands, and
128 /// right-shifts the result by the number of bytes specified in the immediate
129 /// operand.
131 /// \headerfile <x86intrin.h>
133 /// \code
134 /// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n);
135 /// \endcode
137 /// This intrinsic corresponds to the \c PALIGNR instruction.
139 /// \param a
140 /// A 128-bit vector of [16 x i8] containing one of the source operands.
141 /// \param b
142 /// A 128-bit vector of [16 x i8] containing one of the source operands.
143 /// \param n
144 /// An immediate operand specifying how many bytes to right-shift the result.
145 /// \returns A 128-bit integer vector containing the concatenated right-shifted
146 /// value.
147 #define _mm_alignr_epi8(a, b, n) \
148 (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
149 (__v16qi)(__m128i)(b), (n))
151 /// Concatenates the two 64-bit integer vector operands, and right-shifts
152 /// the result by the number of bytes specified in the immediate operand.
154 /// \headerfile <x86intrin.h>
156 /// \code
157 /// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n);
158 /// \endcode
160 /// This intrinsic corresponds to the \c PALIGNR instruction.
162 /// \param a
163 /// A 64-bit vector of [8 x i8] containing one of the source operands.
164 /// \param b
165 /// A 64-bit vector of [8 x i8] containing one of the source operands.
166 /// \param n
167 /// An immediate operand specifying how many bytes to right-shift the result.
168 /// \returns A 64-bit integer vector containing the concatenated right-shifted
169 /// value.
170 #define _mm_alignr_pi8(a, b, n) \
171 (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n))
173 /// Horizontally adds the adjacent pairs of values contained in 2 packed
174 /// 128-bit vectors of [8 x i16].
176 /// \headerfile <x86intrin.h>
178 /// This intrinsic corresponds to the \c VPHADDW instruction.
180 /// \param __a
181 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
182 /// horizontal sums of the values are stored in the lower bits of the
183 /// destination.
184 /// \param __b
185 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
186 /// horizontal sums of the values are stored in the upper bits of the
187 /// destination.
188 /// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of
189 /// both operands.
190 static __inline__ __m128i __DEFAULT_FN_ATTRS
191 _mm_hadd_epi16(__m128i __a, __m128i __b)
193 return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
196 /// Horizontally adds the adjacent pairs of values contained in 2 packed
197 /// 128-bit vectors of [4 x i32].
199 /// \headerfile <x86intrin.h>
201 /// This intrinsic corresponds to the \c VPHADDD instruction.
203 /// \param __a
204 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
205 /// horizontal sums of the values are stored in the lower bits of the
206 /// destination.
207 /// \param __b
208 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
209 /// horizontal sums of the values are stored in the upper bits of the
210 /// destination.
211 /// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of
212 /// both operands.
213 static __inline__ __m128i __DEFAULT_FN_ATTRS
214 _mm_hadd_epi32(__m128i __a, __m128i __b)
216 return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
219 /// Horizontally adds the adjacent pairs of values contained in 2 packed
220 /// 64-bit vectors of [4 x i16].
222 /// \headerfile <x86intrin.h>
224 /// This intrinsic corresponds to the \c PHADDW instruction.
226 /// \param __a
227 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
228 /// horizontal sums of the values are stored in the lower bits of the
229 /// destination.
230 /// \param __b
231 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
232 /// horizontal sums of the values are stored in the upper bits of the
233 /// destination.
234 /// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
235 /// operands.
236 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
237 _mm_hadd_pi16(__m64 __a, __m64 __b)
239 return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
242 /// Horizontally adds the adjacent pairs of values contained in 2 packed
243 /// 64-bit vectors of [2 x i32].
245 /// \headerfile <x86intrin.h>
247 /// This intrinsic corresponds to the \c PHADDD instruction.
249 /// \param __a
250 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
251 /// horizontal sums of the values are stored in the lower bits of the
252 /// destination.
253 /// \param __b
254 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
255 /// horizontal sums of the values are stored in the upper bits of the
256 /// destination.
257 /// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
258 /// operands.
259 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
260 _mm_hadd_pi32(__m64 __a, __m64 __b)
262 return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
265 /// Horizontally adds the adjacent pairs of values contained in 2 packed
266 /// 128-bit vectors of [8 x i16]. Positive sums greater than 0x7FFF are
267 /// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
268 /// 0x8000.
270 /// \headerfile <x86intrin.h>
272 /// This intrinsic corresponds to the \c VPHADDSW instruction.
274 /// \param __a
275 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
276 /// horizontal sums of the values are stored in the lower bits of the
277 /// destination.
278 /// \param __b
279 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
280 /// horizontal sums of the values are stored in the upper bits of the
281 /// destination.
282 /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
283 /// sums of both operands.
284 static __inline__ __m128i __DEFAULT_FN_ATTRS
285 _mm_hadds_epi16(__m128i __a, __m128i __b)
287 return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
290 /// Horizontally adds the adjacent pairs of values contained in 2 packed
291 /// 64-bit vectors of [4 x i16]. Positive sums greater than 0x7FFF are
292 /// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
293 /// 0x8000.
295 /// \headerfile <x86intrin.h>
297 /// This intrinsic corresponds to the \c PHADDSW instruction.
299 /// \param __a
300 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
301 /// horizontal sums of the values are stored in the lower bits of the
302 /// destination.
303 /// \param __b
304 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
305 /// horizontal sums of the values are stored in the upper bits of the
306 /// destination.
307 /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
308 /// sums of both operands.
309 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
310 _mm_hadds_pi16(__m64 __a, __m64 __b)
312 return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
315 /// Horizontally subtracts the adjacent pairs of values contained in 2
316 /// packed 128-bit vectors of [8 x i16].
318 /// \headerfile <x86intrin.h>
320 /// This intrinsic corresponds to the \c VPHSUBW instruction.
322 /// \param __a
323 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
324 /// horizontal differences between the values are stored in the lower bits of
325 /// the destination.
326 /// \param __b
327 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
328 /// horizontal differences between the values are stored in the upper bits of
329 /// the destination.
330 /// \returns A 128-bit vector of [8 x i16] containing the horizontal differences
331 /// of both operands.
332 static __inline__ __m128i __DEFAULT_FN_ATTRS
333 _mm_hsub_epi16(__m128i __a, __m128i __b)
335 return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
338 /// Horizontally subtracts the adjacent pairs of values contained in 2
339 /// packed 128-bit vectors of [4 x i32].
341 /// \headerfile <x86intrin.h>
343 /// This intrinsic corresponds to the \c VPHSUBD instruction.
345 /// \param __a
346 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
347 /// horizontal differences between the values are stored in the lower bits of
348 /// the destination.
349 /// \param __b
350 /// A 128-bit vector of [4 x i32] containing one of the source operands. The
351 /// horizontal differences between the values are stored in the upper bits of
352 /// the destination.
353 /// \returns A 128-bit vector of [4 x i32] containing the horizontal differences
354 /// of both operands.
355 static __inline__ __m128i __DEFAULT_FN_ATTRS
356 _mm_hsub_epi32(__m128i __a, __m128i __b)
358 return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
361 /// Horizontally subtracts the adjacent pairs of values contained in 2
362 /// packed 64-bit vectors of [4 x i16].
364 /// \headerfile <x86intrin.h>
366 /// This intrinsic corresponds to the \c PHSUBW instruction.
368 /// \param __a
369 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
370 /// horizontal differences between the values are stored in the lower bits of
371 /// the destination.
372 /// \param __b
373 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
374 /// horizontal differences between the values are stored in the upper bits of
375 /// the destination.
376 /// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
377 /// of both operands.
378 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
379 _mm_hsub_pi16(__m64 __a, __m64 __b)
381 return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
384 /// Horizontally subtracts the adjacent pairs of values contained in 2
385 /// packed 64-bit vectors of [2 x i32].
387 /// \headerfile <x86intrin.h>
389 /// This intrinsic corresponds to the \c PHSUBD instruction.
391 /// \param __a
392 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
393 /// horizontal differences between the values are stored in the lower bits of
394 /// the destination.
395 /// \param __b
396 /// A 64-bit vector of [2 x i32] containing one of the source operands. The
397 /// horizontal differences between the values are stored in the upper bits of
398 /// the destination.
399 /// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
400 /// of both operands.
401 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
402 _mm_hsub_pi32(__m64 __a, __m64 __b)
404 return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
407 /// Horizontally subtracts the adjacent pairs of values contained in 2
408 /// packed 128-bit vectors of [8 x i16]. Positive differences greater than
409 /// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
410 /// saturated to 0x8000.
412 /// \headerfile <x86intrin.h>
414 /// This intrinsic corresponds to the \c VPHSUBSW instruction.
416 /// \param __a
417 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
418 /// horizontal differences between the values are stored in the lower bits of
419 /// the destination.
420 /// \param __b
421 /// A 128-bit vector of [8 x i16] containing one of the source operands. The
422 /// horizontal differences between the values are stored in the upper bits of
423 /// the destination.
424 /// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
425 /// differences of both operands.
426 static __inline__ __m128i __DEFAULT_FN_ATTRS
427 _mm_hsubs_epi16(__m128i __a, __m128i __b)
429 return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
432 /// Horizontally subtracts the adjacent pairs of values contained in 2
433 /// packed 64-bit vectors of [4 x i16]. Positive differences greater than
434 /// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
435 /// saturated to 0x8000.
437 /// \headerfile <x86intrin.h>
439 /// This intrinsic corresponds to the \c PHSUBSW instruction.
441 /// \param __a
442 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
443 /// horizontal differences between the values are stored in the lower bits of
444 /// the destination.
445 /// \param __b
446 /// A 64-bit vector of [4 x i16] containing one of the source operands. The
447 /// horizontal differences between the values are stored in the upper bits of
448 /// the destination.
449 /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
450 /// differences of both operands.
451 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
452 _mm_hsubs_pi16(__m64 __a, __m64 __b)
454 return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
457 /// Multiplies corresponding pairs of packed 8-bit unsigned integer
458 /// values contained in the first source operand and packed 8-bit signed
459 /// integer values contained in the second source operand, adds pairs of
460 /// contiguous products with signed saturation, and writes the 16-bit sums to
461 /// the corresponding bits in the destination.
463 /// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
464 /// both operands are multiplied, and the sum of both results is written to
465 /// bits [15:0] of the destination.
467 /// \headerfile <x86intrin.h>
469 /// This intrinsic corresponds to the \c VPMADDUBSW instruction.
471 /// \param __a
472 /// A 128-bit integer vector containing the first source operand.
473 /// \param __b
474 /// A 128-bit integer vector containing the second source operand.
475 /// \returns A 128-bit integer vector containing the sums of products of both
476 /// operands: \n
477 /// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
478 /// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
479 /// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
480 /// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) \n
481 /// \a R4 := (\a __a8 * \a __b8) + (\a __a9 * \a __b9) \n
482 /// \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n
483 /// \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n
484 /// \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15)
485 static __inline__ __m128i __DEFAULT_FN_ATTRS
486 _mm_maddubs_epi16(__m128i __a, __m128i __b)
488 return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
491 /// Multiplies corresponding pairs of packed 8-bit unsigned integer
492 /// values contained in the first source operand and packed 8-bit signed
493 /// integer values contained in the second source operand, adds pairs of
494 /// contiguous products with signed saturation, and writes the 16-bit sums to
495 /// the corresponding bits in the destination.
497 /// For example, bits [7:0] of both operands are multiplied, bits [15:8] of
498 /// both operands are multiplied, and the sum of both results is written to
499 /// bits [15:0] of the destination.
501 /// \headerfile <x86intrin.h>
503 /// This intrinsic corresponds to the \c PMADDUBSW instruction.
505 /// \param __a
506 /// A 64-bit integer vector containing the first source operand.
507 /// \param __b
508 /// A 64-bit integer vector containing the second source operand.
509 /// \returns A 64-bit integer vector containing the sums of products of both
510 /// operands: \n
511 /// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
512 /// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
513 /// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
514 /// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7)
515 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
516 _mm_maddubs_pi16(__m64 __a, __m64 __b)
518 return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
521 /// Multiplies packed 16-bit signed integer values, truncates the 32-bit
522 /// products to the 18 most significant bits by right-shifting, rounds the
523 /// truncated value by adding 1, and writes bits [16:1] to the destination.
525 /// \headerfile <x86intrin.h>
527 /// This intrinsic corresponds to the \c VPMULHRSW instruction.
529 /// \param __a
530 /// A 128-bit vector of [8 x i16] containing one of the source operands.
531 /// \param __b
532 /// A 128-bit vector of [8 x i16] containing one of the source operands.
533 /// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
534 /// products of both operands.
535 static __inline__ __m128i __DEFAULT_FN_ATTRS
536 _mm_mulhrs_epi16(__m128i __a, __m128i __b)
538 return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
541 /// Multiplies packed 16-bit signed integer values, truncates the 32-bit
542 /// products to the 18 most significant bits by right-shifting, rounds the
543 /// truncated value by adding 1, and writes bits [16:1] to the destination.
545 /// \headerfile <x86intrin.h>
547 /// This intrinsic corresponds to the \c PMULHRSW instruction.
549 /// \param __a
550 /// A 64-bit vector of [4 x i16] containing one of the source operands.
551 /// \param __b
552 /// A 64-bit vector of [4 x i16] containing one of the source operands.
553 /// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled
554 /// products of both operands.
555 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
556 _mm_mulhrs_pi16(__m64 __a, __m64 __b)
558 return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
561 /// Copies the 8-bit integers from a 128-bit integer vector to the
562 /// destination or clears 8-bit values in the destination, as specified by
563 /// the second source operand.
565 /// \headerfile <x86intrin.h>
567 /// This intrinsic corresponds to the \c VPSHUFB instruction.
569 /// \param __a
570 /// A 128-bit integer vector containing the values to be copied.
571 /// \param __b
572 /// A 128-bit integer vector containing control bytes corresponding to
573 /// positions in the destination:
574 /// Bit 7: \n
575 /// 1: Clear the corresponding byte in the destination. \n
576 /// 0: Copy the selected source byte to the corresponding byte in the
577 /// destination. \n
578 /// Bits [6:4] Reserved. \n
579 /// Bits [3:0] select the source byte to be copied.
580 /// \returns A 128-bit integer vector containing the copied or cleared values.
581 static __inline__ __m128i __DEFAULT_FN_ATTRS
582 _mm_shuffle_epi8(__m128i __a, __m128i __b)
584 return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
587 /// Copies the 8-bit integers from a 64-bit integer vector to the
588 /// destination or clears 8-bit values in the destination, as specified by
589 /// the second source operand.
591 /// \headerfile <x86intrin.h>
593 /// This intrinsic corresponds to the \c PSHUFB instruction.
595 /// \param __a
596 /// A 64-bit integer vector containing the values to be copied.
597 /// \param __b
598 /// A 64-bit integer vector containing control bytes corresponding to
599 /// positions in the destination:
600 /// Bit 7: \n
601 /// 1: Clear the corresponding byte in the destination. \n
602 /// 0: Copy the selected source byte to the corresponding byte in the
603 /// destination. \n
604 /// Bits [3:0] select the source byte to be copied.
605 /// \returns A 64-bit integer vector containing the copied or cleared values.
606 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
607 _mm_shuffle_pi8(__m64 __a, __m64 __b)
609 return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
612 /// For each 8-bit integer in the first source operand, perform one of
613 /// the following actions as specified by the second source operand.
615 /// If the byte in the second source is negative, calculate the two's
616 /// complement of the corresponding byte in the first source, and write that
617 /// value to the destination. If the byte in the second source is positive,
618 /// copy the corresponding byte from the first source to the destination. If
619 /// the byte in the second source is zero, clear the corresponding byte in
620 /// the destination.
622 /// \headerfile <x86intrin.h>
624 /// This intrinsic corresponds to the \c VPSIGNB instruction.
626 /// \param __a
627 /// A 128-bit integer vector containing the values to be copied.
628 /// \param __b
629 /// A 128-bit integer vector containing control bytes corresponding to
630 /// positions in the destination.
631 /// \returns A 128-bit integer vector containing the resultant values.
632 static __inline__ __m128i __DEFAULT_FN_ATTRS
633 _mm_sign_epi8(__m128i __a, __m128i __b)
635 return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
638 /// For each 16-bit integer in the first source operand, perform one of
639 /// the following actions as specified by the second source operand.
641 /// If the word in the second source is negative, calculate the two's
642 /// complement of the corresponding word in the first source, and write that
643 /// value to the destination. If the word in the second source is positive,
644 /// copy the corresponding word from the first source to the destination. If
645 /// the word in the second source is zero, clear the corresponding word in
646 /// the destination.
648 /// \headerfile <x86intrin.h>
650 /// This intrinsic corresponds to the \c VPSIGNW instruction.
652 /// \param __a
653 /// A 128-bit integer vector containing the values to be copied.
654 /// \param __b
655 /// A 128-bit integer vector containing control words corresponding to
656 /// positions in the destination.
657 /// \returns A 128-bit integer vector containing the resultant values.
658 static __inline__ __m128i __DEFAULT_FN_ATTRS
659 _mm_sign_epi16(__m128i __a, __m128i __b)
661 return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
664 /// For each 32-bit integer in the first source operand, perform one of
665 /// the following actions as specified by the second source operand.
667 /// If the doubleword in the second source is negative, calculate the two's
668 /// complement of the corresponding word in the first source, and write that
669 /// value to the destination. If the doubleword in the second source is
670 /// positive, copy the corresponding word from the first source to the
671 /// destination. If the doubleword in the second source is zero, clear the
672 /// corresponding word in the destination.
674 /// \headerfile <x86intrin.h>
676 /// This intrinsic corresponds to the \c VPSIGND instruction.
678 /// \param __a
679 /// A 128-bit integer vector containing the values to be copied.
680 /// \param __b
681 /// A 128-bit integer vector containing control doublewords corresponding to
682 /// positions in the destination.
683 /// \returns A 128-bit integer vector containing the resultant values.
684 static __inline__ __m128i __DEFAULT_FN_ATTRS
685 _mm_sign_epi32(__m128i __a, __m128i __b)
687 return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
690 /// For each 8-bit integer in the first source operand, perform one of
691 /// the following actions as specified by the second source operand.
693 /// If the byte in the second source is negative, calculate the two's
694 /// complement of the corresponding byte in the first source, and write that
695 /// value to the destination. If the byte in the second source is positive,
696 /// copy the corresponding byte from the first source to the destination. If
697 /// the byte in the second source is zero, clear the corresponding byte in
698 /// the destination.
700 /// \headerfile <x86intrin.h>
702 /// This intrinsic corresponds to the \c PSIGNB instruction.
704 /// \param __a
705 /// A 64-bit integer vector containing the values to be copied.
706 /// \param __b
707 /// A 64-bit integer vector containing control bytes corresponding to
708 /// positions in the destination.
709 /// \returns A 64-bit integer vector containing the resultant values.
710 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
711 _mm_sign_pi8(__m64 __a, __m64 __b)
713 return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
716 /// For each 16-bit integer in the first source operand, perform one of
717 /// the following actions as specified by the second source operand.
719 /// If the word in the second source is negative, calculate the two's
720 /// complement of the corresponding word in the first source, and write that
721 /// value to the destination. If the word in the second source is positive,
722 /// copy the corresponding word from the first source to the destination. If
723 /// the word in the second source is zero, clear the corresponding word in
724 /// the destination.
726 /// \headerfile <x86intrin.h>
728 /// This intrinsic corresponds to the \c PSIGNW instruction.
730 /// \param __a
731 /// A 64-bit integer vector containing the values to be copied.
732 /// \param __b
733 /// A 64-bit integer vector containing control words corresponding to
734 /// positions in the destination.
735 /// \returns A 64-bit integer vector containing the resultant values.
736 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
737 _mm_sign_pi16(__m64 __a, __m64 __b)
739 return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
742 /// For each 32-bit integer in the first source operand, perform one of
743 /// the following actions as specified by the second source operand.
745 /// If the doubleword in the second source is negative, calculate the two's
746 /// complement of the corresponding doubleword in the first source, and
747 /// write that value to the destination. If the doubleword in the second
748 /// source is positive, copy the corresponding doubleword from the first
749 /// source to the destination. If the doubleword in the second source is
750 /// zero, clear the corresponding doubleword in the destination.
752 /// \headerfile <x86intrin.h>
754 /// This intrinsic corresponds to the \c PSIGND instruction.
756 /// \param __a
757 /// A 64-bit integer vector containing the values to be copied.
758 /// \param __b
759 /// A 64-bit integer vector containing two control doublewords corresponding
760 /// to positions in the destination.
761 /// \returns A 64-bit integer vector containing the resultant values.
762 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
763 _mm_sign_pi32(__m64 __a, __m64 __b)
765 return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
768 #undef __DEFAULT_FN_ATTRS
769 #undef __DEFAULT_FN_ATTRS_MMX
771 #endif /* __TMMINTRIN_H */