clang/lib/Headers/mmintrin.h

   1 /*===---- mmintrin.h - MMX intrinsics --------------------------------------===
   2  *
   3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4  * See https://llvm.org/LICENSE.txt for license information.
   5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6  *
   7  *===-----------------------------------------------------------------------===
   8  */
   9
  10 #ifndef __MMINTRIN_H
  11 #define __MMINTRIN_H
  12
  13 #if !defined(__i386__) && !defined(__x86_64__)
  14 #error "This header is only meant to be used on x86 and x64 architecture"
  15 #endif
  16
  17 typedef long long __m64 __attribute__((__vector_size__(8), __aligned__(8)));
  18
  19 typedef long long __v1di __attribute__((__vector_size__(8)));
  20 typedef int __v2si __attribute__((__vector_size__(8)));
  21 typedef short __v4hi __attribute__((__vector_size__(8)));
  22 typedef char __v8qi __attribute__((__vector_size__(8)));
  23
  24 /* Unsigned types */
  25 typedef unsigned long long __v1du __attribute__ ((__vector_size__ (8)));
  26 typedef unsigned int __v2su __attribute__ ((__vector_size__ (8)));
  27 typedef unsigned short __v4hu __attribute__((__vector_size__(8)));
  28 typedef unsigned char __v8qu __attribute__((__vector_size__(8)));
  29
  30 /* We need an explicitly signed variant for char. Note that this shouldn't
  31  * appear in the interface though. */
  32 typedef signed char __v8qs __attribute__((__vector_size__(8)));
  33
  34 /* SSE/SSE2 types */
  35 typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16)));
  36 typedef long long __v2di __attribute__ ((__vector_size__ (16)));
  37 typedef int __v4si __attribute__((__vector_size__(16)));
  38 typedef short __v8hi __attribute__((__vector_size__(16)));
  39 typedef char __v16qi __attribute__((__vector_size__(16)));
  40
  41 /* Define the default attributes for the functions in this file. */
  42 #if defined(__EVEX512__) && !defined(__AVX10_1_512__)
  43 #define __DEFAULT_FN_ATTRS_SSE2                                                \
  44   __attribute__((__always_inline__, __nodebug__,                               \
  45                  __target__("sse2,no-evex512"), __min_vector_width__(128)))
  46 #else
  47 #define __DEFAULT_FN_ATTRS_SSE2                                                \
  48   __attribute__((__always_inline__, __nodebug__, __target__("sse2"),           \
  49                  __min_vector_width__(128)))
  50 #endif
  51
  52 #if defined(__cplusplus) && (__cplusplus >= 201103L)
  53 #define __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR __DEFAULT_FN_ATTRS_SSE2 constexpr
  54 #else
  55 #define __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR __DEFAULT_FN_ATTRS_SSE2
  56 #endif
  57
  58 #define __trunc64(x)                                                           \
  59   (__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0)
  60 #define __anyext128(x)                                                         \
  61   (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0,   \
  62                                     1, -1, -1)
  63
  64 /// Clears the MMX state by setting the state of the x87 stack registers
  65 ///    to empty.
  66 ///
  67 /// \headerfile <x86intrin.h>
  68 ///
  69 /// This intrinsic corresponds to the <c> EMMS </c> instruction.
  70 ///
  71 static __inline__ void __attribute__((__always_inline__, __nodebug__,
  72                                       __target__("mmx,no-evex512")))
  73 _mm_empty(void) {
  74   __builtin_ia32_emms();
  75 }
  76
  77 /// Constructs a 64-bit integer vector, setting the lower 32 bits to the
  78 ///    value of the 32-bit integer parameter and setting the upper 32 bits to 0.
  79 ///
  80 /// \headerfile <x86intrin.h>
  81 ///
  82 /// This intrinsic corresponds to the <c> MOVD </c> instruction.
  83 ///
  84 /// \param __i
  85 ///    A 32-bit integer value.
  86 /// \returns A 64-bit integer vector. The lower 32 bits contain the value of the
  87 ///    parameter. The upper 32 bits are set to 0.
  88 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
  89 _mm_cvtsi32_si64(int __i)
  90 {
  91     return __extension__ (__m64)(__v2si){__i, 0};
  92 }
  93
  94 /// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit
  95 ///    signed integer.
  96 ///
  97 /// \headerfile <x86intrin.h>
  98 ///
  99 /// This intrinsic corresponds to the <c> MOVD </c> instruction.
 100 ///
 101 /// \param __m
 102 ///    A 64-bit integer vector.
 103 /// \returns A 32-bit signed integer value containing the lower 32 bits of the
 104 ///    parameter.
 105 static __inline__ int __DEFAULT_FN_ATTRS_SSE2
 106 _mm_cvtsi64_si32(__m64 __m)
 107 {
 108     return ((__v2si)__m)[0];
 109 }
 110
 111 /// Casts a 64-bit signed integer value into a 64-bit integer vector.
 112 ///
 113 /// \headerfile <x86intrin.h>
 114 ///
 115 /// This intrinsic corresponds to the <c> MOVQ </c> instruction.
 116 ///
 117 /// \param __i
 118 ///    A 64-bit signed integer.
 119 /// \returns A 64-bit integer vector containing the same bitwise pattern as the
 120 ///    parameter.
 121 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 122 _mm_cvtsi64_m64(long long __i)
 123 {
 124     return (__m64)__i;
 125 }
 126
 127 /// Casts a 64-bit integer vector into a 64-bit signed integer value.
 128 ///
 129 /// \headerfile <x86intrin.h>
 130 ///
 131 /// This intrinsic corresponds to the <c> MOVQ </c> instruction.
 132 ///
 133 /// \param __m
 134 ///    A 64-bit integer vector.
 135 /// \returns A 64-bit signed integer containing the same bitwise pattern as the
 136 ///    parameter.
 137 static __inline__ long long __DEFAULT_FN_ATTRS_SSE2
 138 _mm_cvtm64_si64(__m64 __m)
 139 {
 140     return (long long)__m;
 141 }
 142
 143 /// Converts, with saturation, 16-bit signed integers from both 64-bit integer
 144 ///    vector parameters of [4 x i16] into 8-bit signed integer values, and
 145 ///    constructs a 64-bit integer vector of [8 x i8] as the result.
 146 ///
 147 ///    Positive values greater than 0x7F are saturated to 0x7F. Negative values
 148 ///    less than 0x80 are saturated to 0x80.
 149 ///
 150 /// \headerfile <x86intrin.h>
 151 ///
 152 /// This intrinsic corresponds to the <c> PACKSSWB </c> instruction.
 153 ///
 154 /// \param __m1
 155 ///    A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
 156 ///    written to the lower 32 bits of the result.
 157 /// \param __m2
 158 ///    A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
 159 ///    written to the upper 32 bits of the result.
 160 /// \returns A 64-bit integer vector of [8 x i8] containing the converted
 161 ///    values.
 162 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 163 _mm_packs_pi16(__m64 __m1, __m64 __m2)
 164 {
 165     return __trunc64(__builtin_ia32_packsswb128(
 166         (__v8hi)__builtin_shufflevector(__m1, __m2, 0, 1), (__v8hi){}));
 167 }
 168
 169 /// Converts, with saturation, 32-bit signed integers from both 64-bit integer
 170 ///    vector parameters of [2 x i32] into 16-bit signed integer values, and
 171 ///    constructs a 64-bit integer vector of [4 x i16] as the result.
 172 ///
 173 ///    Positive values greater than 0x7FFF are saturated to 0x7FFF. Negative
 174 ///    values less than 0x8000 are saturated to 0x8000.
 175 ///
 176 /// \headerfile <x86intrin.h>
 177 ///
 178 /// This intrinsic corresponds to the <c> PACKSSDW </c> instruction.
 179 ///
 180 /// \param __m1
 181 ///    A 64-bit integer vector of [2 x i32]. The converted [2 x i16] values are
 182 ///    written to the lower 32 bits of the result.
 183 /// \param __m2
 184 ///    A 64-bit integer vector of [2 x i32]. The converted [2 x i16] values are
 185 ///    written to the upper 32 bits of the result.
 186 /// \returns A 64-bit integer vector of [4 x i16] containing the converted
 187 ///    values.
 188 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 189 _mm_packs_pi32(__m64 __m1, __m64 __m2)
 190 {
 191     return __trunc64(__builtin_ia32_packssdw128(
 192         (__v4si)__builtin_shufflevector(__m1, __m2, 0, 1), (__v4si){}));
 193 }
 194
 195 /// Converts, with saturation, 16-bit signed integers from both 64-bit integer
 196 ///    vector parameters of [4 x i16] into 8-bit unsigned integer values, and
 197 ///    constructs a 64-bit integer vector of [8 x i8] as the result.
 198 ///
 199 ///    Values greater than 0xFF are saturated to 0xFF. Values less than 0 are
 200 ///    saturated to 0.
 201 ///
 202 /// \headerfile <x86intrin.h>
 203 ///
 204 /// This intrinsic corresponds to the <c> PACKUSWB </c> instruction.
 205 ///
 206 /// \param __m1
 207 ///    A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
 208 ///    written to the lower 32 bits of the result.
 209 /// \param __m2
 210 ///    A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
 211 ///    written to the upper 32 bits of the result.
 212 /// \returns A 64-bit integer vector of [8 x i8] containing the converted
 213 ///    values.
 214 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 215 _mm_packs_pu16(__m64 __m1, __m64 __m2)
 216 {
 217     return __trunc64(__builtin_ia32_packuswb128(
 218         (__v8hi)__builtin_shufflevector(__m1, __m2, 0, 1), (__v8hi){}));
 219 }
 220
 221 /// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8]
 222 ///    and interleaves them into a 64-bit integer vector of [8 x i8].
 223 ///
 224 /// \headerfile <x86intrin.h>
 225 ///
 226 /// This intrinsic corresponds to the <c> PUNPCKHBW </c> instruction.
 227 ///
 228 /// \param __m1
 229 ///    A 64-bit integer vector of [8 x i8]. \n
 230 ///    Bits [39:32] are written to bits [7:0] of the result. \n
 231 ///    Bits [47:40] are written to bits [23:16] of the result. \n
 232 ///    Bits [55:48] are written to bits [39:32] of the result. \n
 233 ///    Bits [63:56] are written to bits [55:48] of the result.
 234 /// \param __m2
 235 ///    A 64-bit integer vector of [8 x i8].
 236 ///    Bits [39:32] are written to bits [15:8] of the result. \n
 237 ///    Bits [47:40] are written to bits [31:24] of the result. \n
 238 ///    Bits [55:48] are written to bits [47:40] of the result. \n
 239 ///    Bits [63:56] are written to bits [63:56] of the result.
 240 /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
 241 ///    values.
 242 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 243 _mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
 244 {
 245     return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2,
 246                                           4, 12, 5, 13, 6, 14, 7, 15);
 247 }
 248
 249 /// Unpacks the upper 32 bits from two 64-bit integer vectors of
 250 ///    [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
 251 ///
 252 /// \headerfile <x86intrin.h>
 253 ///
 254 /// This intrinsic corresponds to the <c> PUNPCKHWD </c> instruction.
 255 ///
 256 /// \param __m1
 257 ///    A 64-bit integer vector of [4 x i16].
 258 ///    Bits [47:32] are written to bits [15:0] of the result. \n
 259 ///    Bits [63:48] are written to bits [47:32] of the result.
 260 /// \param __m2
 261 ///    A 64-bit integer vector of [4 x i16].
 262 ///    Bits [47:32] are written to bits [31:16] of the result. \n
 263 ///    Bits [63:48] are written to bits [63:48] of the result.
 264 /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
 265 ///    values.
 266 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 267 _mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
 268 {
 269     return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2,
 270                                           2, 6, 3, 7);
 271 }
 272
 273 /// Unpacks the upper 32 bits from two 64-bit integer vectors of
 274 ///    [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
 275 ///
 276 /// \headerfile <x86intrin.h>
 277 ///
 278 /// This intrinsic corresponds to the <c> PUNPCKHDQ </c> instruction.
 279 ///
 280 /// \param __m1
 281 ///    A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
 282 ///    the lower 32 bits of the result.
 283 /// \param __m2
 284 ///    A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
 285 ///    the upper 32 bits of the result.
 286 /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
 287 ///    values.
 288 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 289 _mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
 290 {
 291     return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 1, 3);
 292 }
 293
 294 /// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
 295 ///    and interleaves them into a 64-bit integer vector of [8 x i8].
 296 ///
 297 /// \headerfile <x86intrin.h>
 298 ///
 299 /// This intrinsic corresponds to the <c> PUNPCKLBW </c> instruction.
 300 ///
 301 /// \param __m1
 302 ///    A 64-bit integer vector of [8 x i8].
 303 ///    Bits [7:0] are written to bits [7:0] of the result. \n
 304 ///    Bits [15:8] are written to bits [23:16] of the result. \n
 305 ///    Bits [23:16] are written to bits [39:32] of the result. \n
 306 ///    Bits [31:24] are written to bits [55:48] of the result.
 307 /// \param __m2
 308 ///    A 64-bit integer vector of [8 x i8].
 309 ///    Bits [7:0] are written to bits [15:8] of the result. \n
 310 ///    Bits [15:8] are written to bits [31:24] of the result. \n
 311 ///    Bits [23:16] are written to bits [47:40] of the result. \n
 312 ///    Bits [31:24] are written to bits [63:56] of the result.
 313 /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
 314 ///    values.
 315 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 316 _mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
 317 {
 318     return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2,
 319                                           0, 8, 1, 9, 2, 10, 3, 11);
 320 }
 321
 322 /// Unpacks the lower 32 bits from two 64-bit integer vectors of
 323 ///    [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
 324 ///
 325 /// \headerfile <x86intrin.h>
 326 ///
 327 /// This intrinsic corresponds to the <c> PUNPCKLWD </c> instruction.
 328 ///
 329 /// \param __m1
 330 ///    A 64-bit integer vector of [4 x i16].
 331 ///    Bits [15:0] are written to bits [15:0] of the result. \n
 332 ///    Bits [31:16] are written to bits [47:32] of the result.
 333 /// \param __m2
 334 ///    A 64-bit integer vector of [4 x i16].
 335 ///    Bits [15:0] are written to bits [31:16] of the result. \n
 336 ///    Bits [31:16] are written to bits [63:48] of the result.
 337 /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
 338 ///    values.
 339 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 340 _mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
 341 {
 342     return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2,
 343                                           0, 4, 1, 5);
 344 }
 345
 346 /// Unpacks the lower 32 bits from two 64-bit integer vectors of
 347 ///    [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
 348 ///
 349 /// \headerfile <x86intrin.h>
 350 ///
 351 /// This intrinsic corresponds to the <c> PUNPCKLDQ </c> instruction.
 352 ///
 353 /// \param __m1
 354 ///    A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
 355 ///    the lower 32 bits of the result.
 356 /// \param __m2
 357 ///    A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
 358 ///    the upper 32 bits of the result.
 359 /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
 360 ///    values.
 361 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 362 _mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
 363 {
 364     return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 0, 2);
 365 }
 366
 367 /// Adds each 8-bit integer element of the first 64-bit integer vector
 368 ///    of [8 x i8] to the corresponding 8-bit integer element of the second
 369 ///    64-bit integer vector of [8 x i8]. The lower 8 bits of the results are
 370 ///    packed into a 64-bit integer vector of [8 x i8].
 371 ///
 372 /// \headerfile <x86intrin.h>
 373 ///
 374 /// This intrinsic corresponds to the <c> PADDB </c> instruction.
 375 ///
 376 /// \param __m1
 377 ///    A 64-bit integer vector of [8 x i8].
 378 /// \param __m2
 379 ///    A 64-bit integer vector of [8 x i8].
 380 /// \returns A 64-bit integer vector of [8 x i8] containing the sums of both
 381 ///    parameters.
 382 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 383 _mm_add_pi8(__m64 __m1, __m64 __m2)
 384 {
 385     return (__m64)(((__v8qu)__m1) + ((__v8qu)__m2));
 386 }
 387
 388 /// Adds each 16-bit integer element of the first 64-bit integer vector
 389 ///    of [4 x i16] to the corresponding 16-bit integer element of the second
 390 ///    64-bit integer vector of [4 x i16]. The lower 16 bits of the results are
 391 ///    packed into a 64-bit integer vector of [4 x i16].
 392 ///
 393 /// \headerfile <x86intrin.h>
 394 ///
 395 /// This intrinsic corresponds to the <c> PADDW </c> instruction.
 396 ///
 397 /// \param __m1
 398 ///    A 64-bit integer vector of [4 x i16].
 399 /// \param __m2
 400 ///    A 64-bit integer vector of [4 x i16].
 401 /// \returns A 64-bit integer vector of [4 x i16] containing the sums of both
 402 ///    parameters.
 403 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 404 _mm_add_pi16(__m64 __m1, __m64 __m2)
 405 {
 406     return (__m64)(((__v4hu)__m1) + ((__v4hu)__m2));
 407 }
 408
 409 /// Adds each 32-bit integer element of the first 64-bit integer vector
 410 ///    of [2 x i32] to the corresponding 32-bit integer element of the second
 411 ///    64-bit integer vector of [2 x i32]. The lower 32 bits of the results are
 412 ///    packed into a 64-bit integer vector of [2 x i32].
 413 ///
 414 /// \headerfile <x86intrin.h>
 415 ///
 416 /// This intrinsic corresponds to the <c> PADDD </c> instruction.
 417 ///
 418 /// \param __m1
 419 ///    A 64-bit integer vector of [2 x i32].
 420 /// \param __m2
 421 ///    A 64-bit integer vector of [2 x i32].
 422 /// \returns A 64-bit integer vector of [2 x i32] containing the sums of both
 423 ///    parameters.
 424 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 425 _mm_add_pi32(__m64 __m1, __m64 __m2)
 426 {
 427     return (__m64)(((__v2su)__m1) + ((__v2su)__m2));
 428 }
 429
 430 /// Adds, with saturation, each 8-bit signed integer element of the first
 431 ///    64-bit integer vector of [8 x i8] to the corresponding 8-bit signed
 432 ///    integer element of the second 64-bit integer vector of [8 x i8].
 433 ///
 434 ///    Positive sums greater than 0x7F are saturated to 0x7F. Negative sums
 435 ///    less than 0x80 are saturated to 0x80. The results are packed into a
 436 ///    64-bit integer vector of [8 x i8].
 437 ///
 438 /// \headerfile <x86intrin.h>
 439 ///
 440 /// This intrinsic corresponds to the <c> PADDSB </c> instruction.
 441 ///
 442 /// \param __m1
 443 ///    A 64-bit integer vector of [8 x i8].
 444 /// \param __m2
 445 ///    A 64-bit integer vector of [8 x i8].
 446 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums
 447 ///    of both parameters.
 448 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 449 _mm_adds_pi8(__m64 __m1, __m64 __m2)
 450 {
 451     return (__m64)__builtin_elementwise_add_sat((__v8qs)__m1, (__v8qs)__m2);
 452 }
 453
 454 /// Adds, with saturation, each 16-bit signed integer element of the first
 455 ///    64-bit integer vector of [4 x i16] to the corresponding 16-bit signed
 456 ///    integer element of the second 64-bit integer vector of [4 x i16].
 457 ///
 458 ///    Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums
 459 ///    less than 0x8000 are saturated to 0x8000. The results are packed into a
 460 ///    64-bit integer vector of [4 x i16].
 461 ///
 462 /// \headerfile <x86intrin.h>
 463 ///
 464 /// This intrinsic corresponds to the <c> PADDSW </c> instruction.
 465 ///
 466 /// \param __m1
 467 ///    A 64-bit integer vector of [4 x i16].
 468 /// \param __m2
 469 ///    A 64-bit integer vector of [4 x i16].
 470 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums
 471 ///    of both parameters.
 472 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 473 _mm_adds_pi16(__m64 __m1, __m64 __m2)
 474 {
 475     return (__m64)__builtin_elementwise_add_sat((__v4hi)__m1, (__v4hi)__m2);
 476 }
 477
 478 /// Adds, with saturation, each 8-bit unsigned integer element of the first
 479 ///    64-bit integer vector of [8 x i8] to the corresponding 8-bit unsigned
 480 ///    integer element of the second 64-bit integer vector of [8 x i8].
 481 ///
 482 ///    Sums greater than 0xFF are saturated to 0xFF. The results are packed
 483 ///    into a 64-bit integer vector of [8 x i8].
 484 ///
 485 /// \headerfile <x86intrin.h>
 486 ///
 487 /// This intrinsic corresponds to the <c> PADDUSB </c> instruction.
 488 ///
 489 /// \param __m1
 490 ///    A 64-bit integer vector of [8 x i8].
 491 /// \param __m2
 492 ///    A 64-bit integer vector of [8 x i8].
 493 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated
 494 ///    unsigned sums of both parameters.
 495 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 496 _mm_adds_pu8(__m64 __m1, __m64 __m2)
 497 {
 498     return (__m64)__builtin_elementwise_add_sat((__v8qu)__m1, (__v8qu)__m2);
 499 }
 500
 501 /// Adds, with saturation, each 16-bit unsigned integer element of the first
 502 ///    64-bit integer vector of [4 x i16] to the corresponding 16-bit unsigned
 503 ///    integer element of the second 64-bit integer vector of [4 x i16].
 504 ///
 505 ///    Sums greater than 0xFFFF are saturated to 0xFFFF. The results are packed
 506 ///    into a 64-bit integer vector of [4 x i16].
 507 ///
 508 /// \headerfile <x86intrin.h>
 509 ///
 510 /// This intrinsic corresponds to the <c> PADDUSW </c> instruction.
 511 ///
 512 /// \param __m1
 513 ///    A 64-bit integer vector of [4 x i16].
 514 /// \param __m2
 515 ///    A 64-bit integer vector of [4 x i16].
 516 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated
 517 ///    unsigned sums of both parameters.
 518 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 519 _mm_adds_pu16(__m64 __m1, __m64 __m2)
 520 {
 521     return (__m64)__builtin_elementwise_add_sat((__v4hu)__m1, (__v4hu)__m2);
 522 }
 523
 524 /// Subtracts each 8-bit integer element of the second 64-bit integer
 525 ///    vector of [8 x i8] from the corresponding 8-bit integer element of the
 526 ///    first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results
 527 ///    are packed into a 64-bit integer vector of [8 x i8].
 528 ///
 529 /// \headerfile <x86intrin.h>
 530 ///
 531 /// This intrinsic corresponds to the <c> PSUBB </c> instruction.
 532 ///
 533 /// \param __m1
 534 ///    A 64-bit integer vector of [8 x i8] containing the minuends.
 535 /// \param __m2
 536 ///    A 64-bit integer vector of [8 x i8] containing the subtrahends.
 537 /// \returns A 64-bit integer vector of [8 x i8] containing the differences of
 538 ///    both parameters.
 539 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 540 _mm_sub_pi8(__m64 __m1, __m64 __m2)
 541 {
 542     return (__m64)(((__v8qu)__m1) - ((__v8qu)__m2));
 543 }
 544
 545 /// Subtracts each 16-bit integer element of the second 64-bit integer
 546 ///    vector of [4 x i16] from the corresponding 16-bit integer element of the
 547 ///    first 64-bit integer vector of [4 x i16]. The lower 16 bits of the
 548 ///    results are packed into a 64-bit integer vector of [4 x i16].
 549 ///
 550 /// \headerfile <x86intrin.h>
 551 ///
 552 /// This intrinsic corresponds to the <c> PSUBW </c> instruction.
 553 ///
 554 /// \param __m1
 555 ///    A 64-bit integer vector of [4 x i16] containing the minuends.
 556 /// \param __m2
 557 ///    A 64-bit integer vector of [4 x i16] containing the subtrahends.
 558 /// \returns A 64-bit integer vector of [4 x i16] containing the differences of
 559 ///    both parameters.
 560 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 561 _mm_sub_pi16(__m64 __m1, __m64 __m2)
 562 {
 563     return (__m64)(((__v4hu)__m1) - ((__v4hu)__m2));
 564 }
 565
 566 /// Subtracts each 32-bit integer element of the second 64-bit integer
 567 ///    vector of [2 x i32] from the corresponding 32-bit integer element of the
 568 ///    first 64-bit integer vector of [2 x i32]. The lower 32 bits of the
 569 ///    results are packed into a 64-bit integer vector of [2 x i32].
 570 ///
 571 /// \headerfile <x86intrin.h>
 572 ///
 573 /// This intrinsic corresponds to the <c> PSUBD </c> instruction.
 574 ///
 575 /// \param __m1
 576 ///    A 64-bit integer vector of [2 x i32] containing the minuends.
 577 /// \param __m2
 578 ///    A 64-bit integer vector of [2 x i32] containing the subtrahends.
 579 /// \returns A 64-bit integer vector of [2 x i32] containing the differences of
 580 ///    both parameters.
 581 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 582 _mm_sub_pi32(__m64 __m1, __m64 __m2)
 583 {
 584     return (__m64)(((__v2su)__m1) - ((__v2su)__m2));
 585 }
 586
 587 /// Subtracts, with saturation, each 8-bit signed integer element of the second
 588 ///    64-bit integer vector of [8 x i8] from the corresponding 8-bit signed
 589 ///    integer element of the first 64-bit integer vector of [8 x i8].
 590 ///
 591 ///    Positive results greater than 0x7F are saturated to 0x7F. Negative
 592 ///    results less than 0x80 are saturated to 0x80. The results are packed
 593 ///    into a 64-bit integer vector of [8 x i8].
 594 ///
 595 /// \headerfile <x86intrin.h>
 596 ///
 597 /// This intrinsic corresponds to the <c> PSUBSB </c> instruction.
 598 ///
 599 /// \param __m1
 600 ///    A 64-bit integer vector of [8 x i8] containing the minuends.
 601 /// \param __m2
 602 ///    A 64-bit integer vector of [8 x i8] containing the subtrahends.
 603 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated
 604 ///    differences of both parameters.
 605 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 606 _mm_subs_pi8(__m64 __m1, __m64 __m2)
 607 {
 608     return (__m64)__builtin_elementwise_sub_sat((__v8qs)__m1, (__v8qs)__m2);
 609 }
 610
 611 /// Subtracts, with saturation, each 16-bit signed integer element of the
 612 ///    second 64-bit integer vector of [4 x i16] from the corresponding 16-bit
 613 ///    signed integer element of the first 64-bit integer vector of [4 x i16].
 614 ///
 615 ///    Positive results greater than 0x7FFF are saturated to 0x7FFF. Negative
 616 ///    results less than 0x8000 are saturated to 0x8000. The results are packed
 617 ///    into a 64-bit integer vector of [4 x i16].
 618 ///
 619 /// \headerfile <x86intrin.h>
 620 ///
 621 /// This intrinsic corresponds to the <c> PSUBSW </c> instruction.
 622 ///
 623 /// \param __m1
 624 ///    A 64-bit integer vector of [4 x i16] containing the minuends.
 625 /// \param __m2
 626 ///    A 64-bit integer vector of [4 x i16] containing the subtrahends.
 627 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated
 628 ///    differences of both parameters.
 629 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 630 _mm_subs_pi16(__m64 __m1, __m64 __m2)
 631 {
 632     return (__m64)__builtin_elementwise_sub_sat((__v4hi)__m1, (__v4hi)__m2);
 633 }
 634
 635 /// Subtracts each 8-bit unsigned integer element of the second 64-bit
 636 ///    integer vector of [8 x i8] from the corresponding 8-bit unsigned integer
 637 ///    element of the first 64-bit integer vector of [8 x i8].
 638 ///
 639 ///    If an element of the first vector is less than the corresponding element
 640 ///    of the second vector, the result is saturated to 0. The results are
 641 ///    packed into a 64-bit integer vector of [8 x i8].
 642 ///
 643 /// \headerfile <x86intrin.h>
 644 ///
 645 /// This intrinsic corresponds to the <c> PSUBUSB </c> instruction.
 646 ///
 647 /// \param __m1
 648 ///    A 64-bit integer vector of [8 x i8] containing the minuends.
 649 /// \param __m2
 650 ///    A 64-bit integer vector of [8 x i8] containing the subtrahends.
 651 /// \returns A 64-bit integer vector of [8 x i8] containing the saturated
 652 ///    differences of both parameters.
 653 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 654 _mm_subs_pu8(__m64 __m1, __m64 __m2)
 655 {
 656     return (__m64)__builtin_elementwise_sub_sat((__v8qu)__m1, (__v8qu)__m2);
 657 }
 658
 659 /// Subtracts each 16-bit unsigned integer element of the second 64-bit
 660 ///    integer vector of [4 x i16] from the corresponding 16-bit unsigned
 661 ///    integer element of the first 64-bit integer vector of [4 x i16].
 662 ///
 663 ///    If an element of the first vector is less than the corresponding element
 664 ///    of the second vector, the result is saturated to 0. The results are
 665 ///    packed into a 64-bit integer vector of [4 x i16].
 666 ///
 667 /// \headerfile <x86intrin.h>
 668 ///
 669 /// This intrinsic corresponds to the <c> PSUBUSW </c> instruction.
 670 ///
 671 /// \param __m1
 672 ///    A 64-bit integer vector of [4 x i16] containing the minuends.
 673 /// \param __m2
 674 ///    A 64-bit integer vector of [4 x i16] containing the subtrahends.
 675 /// \returns A 64-bit integer vector of [4 x i16] containing the saturated
 676 ///    differences of both parameters.
 677 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 678 _mm_subs_pu16(__m64 __m1, __m64 __m2)
 679 {
 680     return (__m64)__builtin_elementwise_sub_sat((__v4hu)__m1, (__v4hu)__m2);
 681 }
 682
 683 /// Multiplies each 16-bit signed integer element of the first 64-bit
 684 ///    integer vector of [4 x i16] by the corresponding 16-bit signed integer
 685 ///    element of the second 64-bit integer vector of [4 x i16] and get four
 686 ///    32-bit products. Adds adjacent pairs of products to get two 32-bit sums.
 687 ///    The lower 32 bits of these two sums are packed into a 64-bit integer
 688 ///    vector of [2 x i32].
 689 ///
 690 ///    For example, bits [15:0] of both parameters are multiplied, bits [31:16]
 691 ///    of both parameters are multiplied, and the sum of both results is written
 692 ///    to bits [31:0] of the result.
 693 ///
 694 /// \headerfile <x86intrin.h>
 695 ///
 696 /// This intrinsic corresponds to the <c> PMADDWD </c> instruction.
 697 ///
 698 /// \param __m1
 699 ///    A 64-bit integer vector of [4 x i16].
 700 /// \param __m2
 701 ///    A 64-bit integer vector of [4 x i16].
 702 /// \returns A 64-bit integer vector of [2 x i32] containing the sums of
 703 ///    products of both parameters.
 704 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 705 _mm_madd_pi16(__m64 __m1, __m64 __m2)
 706 {
 707     return __trunc64(__builtin_ia32_pmaddwd128((__v8hi)__anyext128(__m1),
 708                                                (__v8hi)__anyext128(__m2)));
 709 }
 710
 711 /// Multiplies each 16-bit signed integer element of the first 64-bit
 712 ///    integer vector of [4 x i16] by the corresponding 16-bit signed integer
 713 ///    element of the second 64-bit integer vector of [4 x i16]. Packs the upper
 714 ///    16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
 715 ///
 716 /// \headerfile <x86intrin.h>
 717 ///
 718 /// This intrinsic corresponds to the <c> PMULHW </c> instruction.
 719 ///
 720 /// \param __m1
 721 ///    A 64-bit integer vector of [4 x i16].
 722 /// \param __m2
 723 ///    A 64-bit integer vector of [4 x i16].
 724 /// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits
 725 ///    of the products of both parameters.
 726 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 727 _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
 728 {
 729     return __trunc64(__builtin_ia32_pmulhw128((__v8hi)__anyext128(__m1),
 730                                               (__v8hi)__anyext128(__m2)));
 731 }
 732
 733 /// Multiplies each 16-bit signed integer element of the first 64-bit
 734 ///    integer vector of [4 x i16] by the corresponding 16-bit signed integer
 735 ///    element of the second 64-bit integer vector of [4 x i16]. Packs the lower
 736 ///    16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
 737 ///
 738 /// \headerfile <x86intrin.h>
 739 ///
 740 /// This intrinsic corresponds to the <c> PMULLW </c> instruction.
 741 ///
 742 /// \param __m1
 743 ///    A 64-bit integer vector of [4 x i16].
 744 /// \param __m2
 745 ///    A 64-bit integer vector of [4 x i16].
 746 /// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits
 747 ///    of the products of both parameters.
 748 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 749 _mm_mullo_pi16(__m64 __m1, __m64 __m2)
 750 {
 751     return (__m64)(((__v4hu)__m1) * ((__v4hu)__m2));
 752 }
 753
 754 /// Left-shifts each 16-bit signed integer element of the first
 755 ///    parameter, which is a 64-bit integer vector of [4 x i16], by the number
 756 ///    of bits specified by the second parameter, which is a 64-bit integer. The
 757 ///    lower 16 bits of the results are packed into a 64-bit integer vector of
 758 ///    [4 x i16].
 759 ///
 760 /// \headerfile <x86intrin.h>
 761 ///
 762 /// This intrinsic corresponds to the <c> PSLLW </c> instruction.
 763 ///
 764 /// \param __m
 765 ///    A 64-bit integer vector of [4 x i16].
 766 /// \param __count
 767 ///    A 64-bit integer vector interpreted as a single 64-bit integer.
 768 /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
 769 ///    values. If \a __count is greater or equal to 16, the result is set to all
 770 ///    0.
 771 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 772 _mm_sll_pi16(__m64 __m, __m64 __count)
 773 {
 774     return __trunc64(__builtin_ia32_psllw128((__v8hi)__anyext128(__m),
 775                                              (__v8hi)__anyext128(__count)));
 776 }
 777
 778 /// Left-shifts each 16-bit signed integer element of a 64-bit integer
 779 ///    vector of [4 x i16] by the number of bits specified by a 32-bit integer.
 780 ///    The lower 16 bits of the results are packed into a 64-bit integer vector
 781 ///    of [4 x i16].
 782 ///
 783 /// \headerfile <x86intrin.h>
 784 ///
 785 /// This intrinsic corresponds to the <c> PSLLW </c> instruction.
 786 ///
 787 /// \param __m
 788 ///    A 64-bit integer vector of [4 x i16].
 789 /// \param __count
 790 ///    A 32-bit integer value.
 791 /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
 792 ///    values. If \a __count is greater or equal to 16, the result is set to all
 793 ///    0.
 794 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 795 _mm_slli_pi16(__m64 __m, int __count)
 796 {
 797     return __trunc64(__builtin_ia32_psllwi128((__v8hi)__anyext128(__m),
 798                                               __count));
 799 }
 800
 801 /// Left-shifts each 32-bit signed integer element of the first
 802 ///    parameter, which is a 64-bit integer vector of [2 x i32], by the number
 803 ///    of bits specified by the second parameter, which is a 64-bit integer. The
 804 ///    lower 32 bits of the results are packed into a 64-bit integer vector of
 805 ///    [2 x i32].
 806 ///
 807 /// \headerfile <x86intrin.h>
 808 ///
 809 /// This intrinsic corresponds to the <c> PSLLD </c> instruction.
 810 ///
 811 /// \param __m
 812 ///    A 64-bit integer vector of [2 x i32].
 813 /// \param __count
 814 ///    A 64-bit integer vector interpreted as a single 64-bit integer.
 815 /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
 816 ///    values. If \a __count is greater or equal to 32, the result is set to all
 817 ///    0.
 818 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 819 _mm_sll_pi32(__m64 __m, __m64 __count)
 820 {
 821     return __trunc64(__builtin_ia32_pslld128((__v4si)__anyext128(__m),
 822                                              (__v4si)__anyext128(__count)));
 823 }
 824
 825 /// Left-shifts each 32-bit signed integer element of a 64-bit integer
 826 ///    vector of [2 x i32] by the number of bits specified by a 32-bit integer.
 827 ///    The lower 32 bits of the results are packed into a 64-bit integer vector
 828 ///    of [2 x i32].
 829 ///
 830 /// \headerfile <x86intrin.h>
 831 ///
 832 /// This intrinsic corresponds to the <c> PSLLD </c> instruction.
 833 ///
 834 /// \param __m
 835 ///    A 64-bit integer vector of [2 x i32].
 836 /// \param __count
 837 ///    A 32-bit integer value.
 838 /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
 839 ///    values. If \a __count is greater or equal to 32, the result is set to all
 840 ///    0.
 841 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 842 _mm_slli_pi32(__m64 __m, int __count)
 843 {
 844     return __trunc64(__builtin_ia32_pslldi128((__v4si)__anyext128(__m),
 845                                               __count));
 846 }
 847
 848 /// Left-shifts the first 64-bit integer parameter by the number of bits
 849 ///    specified by the second 64-bit integer parameter. The lower 64 bits of
 850 ///    result are returned.
 851 ///
 852 /// \headerfile <x86intrin.h>
 853 ///
 854 /// This intrinsic corresponds to the <c> PSLLQ </c> instruction.
 855 ///
 856 /// \param __m
 857 ///    A 64-bit integer vector interpreted as a single 64-bit integer.
 858 /// \param __count
 859 ///    A 64-bit integer vector interpreted as a single 64-bit integer.
 860 /// \returns A 64-bit integer vector containing the left-shifted value. If
 861 ///     \a __count is greater or equal to 64, the result is set to 0.
 862 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 863 _mm_sll_si64(__m64 __m, __m64 __count)
 864 {
 865     return __trunc64(__builtin_ia32_psllq128((__v2di)__anyext128(__m),
 866                                              (__v2di)__anyext128(__count)));
 867 }
 868
 869 /// Left-shifts the first parameter, which is a 64-bit integer, by the
 870 ///    number of bits specified by the second parameter, which is a 32-bit
 871 ///    integer. The lower 64 bits of result are returned.
 872 ///
 873 /// \headerfile <x86intrin.h>
 874 ///
 875 /// This intrinsic corresponds to the <c> PSLLQ </c> instruction.
 876 ///
 877 /// \param __m
 878 ///    A 64-bit integer vector interpreted as a single 64-bit integer.
 879 /// \param __count
 880 ///    A 32-bit integer value.
 881 /// \returns A 64-bit integer vector containing the left-shifted value. If
 882 ///     \a __count is greater or equal to 64, the result is set to 0.
 883 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 884 _mm_slli_si64(__m64 __m, int __count)
 885 {
 886     return __trunc64(__builtin_ia32_psllqi128((__v2di)__anyext128(__m),
 887                                               __count));
 888 }
 889
 890 /// Right-shifts each 16-bit integer element of the first parameter,
 891 ///    which is a 64-bit integer vector of [4 x i16], by the number of bits
 892 ///    specified by the second parameter, which is a 64-bit integer.
 893 ///
 894 ///    High-order bits are filled with the sign bit of the initial value of each
 895 ///    16-bit element. The 16-bit results are packed into a 64-bit integer
 896 ///    vector of [4 x i16].
 897 ///
 898 /// \headerfile <x86intrin.h>
 899 ///
 900 /// This intrinsic corresponds to the <c> PSRAW </c> instruction.
 901 ///
 902 /// \param __m
 903 ///    A 64-bit integer vector of [4 x i16].
 904 /// \param __count
 905 ///    A 64-bit integer vector interpreted as a single 64-bit integer.
 906 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
 907 ///    values.
 908 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 909 _mm_sra_pi16(__m64 __m, __m64 __count)
 910 {
 911     return __trunc64(__builtin_ia32_psraw128((__v8hi)__anyext128(__m),
 912                                              (__v8hi)__anyext128(__count)));
 913 }
 914
 915 /// Right-shifts each 16-bit integer element of a 64-bit integer vector
 916 ///    of [4 x i16] by the number of bits specified by a 32-bit integer.
 917 ///
 918 ///    High-order bits are filled with the sign bit of the initial value of each
 919 ///    16-bit element. The 16-bit results are packed into a 64-bit integer
 920 ///    vector of [4 x i16].
 921 ///
 922 /// \headerfile <x86intrin.h>
 923 ///
 924 /// This intrinsic corresponds to the <c> PSRAW </c> instruction.
 925 ///
 926 /// \param __m
 927 ///    A 64-bit integer vector of [4 x i16].
 928 /// \param __count
 929 ///    A 32-bit integer value.
 930 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
 931 ///    values.
 932 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 933 _mm_srai_pi16(__m64 __m, int __count)
 934 {
 935     return __trunc64(__builtin_ia32_psrawi128((__v8hi)__anyext128(__m),
 936                                               __count));
 937 }
 938
 939 /// Right-shifts each 32-bit integer element of the first parameter,
 940 ///    which is a 64-bit integer vector of [2 x i32], by the number of bits
 941 ///    specified by the second parameter, which is a 64-bit integer.
 942 ///
 943 ///    High-order bits are filled with the sign bit of the initial value of each
 944 ///    32-bit element. The 32-bit results are packed into a 64-bit integer
 945 ///    vector of [2 x i32].
 946 ///
 947 /// \headerfile <x86intrin.h>
 948 ///
 949 /// This intrinsic corresponds to the <c> PSRAD </c> instruction.
 950 ///
 951 /// \param __m
 952 ///    A 64-bit integer vector of [2 x i32].
 953 /// \param __count
 954 ///    A 64-bit integer vector interpreted as a single 64-bit integer.
 955 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
 956 ///    values.
 957 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 958 _mm_sra_pi32(__m64 __m, __m64 __count)
 959 {
 960     return __trunc64(__builtin_ia32_psrad128((__v4si)__anyext128(__m),
 961                                              (__v4si)__anyext128(__count)));
 962 }
 963
 964 /// Right-shifts each 32-bit integer element of a 64-bit integer vector
 965 ///    of [2 x i32] by the number of bits specified by a 32-bit integer.
 966 ///
 967 ///    High-order bits are filled with the sign bit of the initial value of each
 968 ///    32-bit element. The 32-bit results are packed into a 64-bit integer
 969 ///    vector of [2 x i32].
 970 ///
 971 /// \headerfile <x86intrin.h>
 972 ///
 973 /// This intrinsic corresponds to the <c> PSRAD </c> instruction.
 974 ///
 975 /// \param __m
 976 ///    A 64-bit integer vector of [2 x i32].
 977 /// \param __count
 978 ///    A 32-bit integer value.
 979 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
 980 ///    values.
 981 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
 982 _mm_srai_pi32(__m64 __m, int __count)
 983 {
 984     return __trunc64(__builtin_ia32_psradi128((__v4si)__anyext128(__m),
 985                                               __count));
 986 }
 987
 988 /// Right-shifts each 16-bit integer element of the first parameter,
 989 ///    which is a 64-bit integer vector of [4 x i16], by the number of bits
 990 ///    specified by the second parameter, which is a 64-bit integer.
 991 ///
 992 ///    High-order bits are cleared. The 16-bit results are packed into a 64-bit
 993 ///    integer vector of [4 x i16].
 994 ///
 995 /// \headerfile <x86intrin.h>
 996 ///
 997 /// This intrinsic corresponds to the <c> PSRLW </c> instruction.
 998 ///
 999 /// \param __m
1000 ///    A 64-bit integer vector of [4 x i16].
1001 /// \param __count
1002 ///    A 64-bit integer vector interpreted as a single 64-bit integer.
1003 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
1004 ///    values.
1005 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1006 _mm_srl_pi16(__m64 __m, __m64 __count)
1007 {
1008     return __trunc64(__builtin_ia32_psrlw128((__v8hi)__anyext128(__m),
1009                                              (__v8hi)__anyext128(__count)));
1010 }
1011
1012 /// Right-shifts each 16-bit integer element of a 64-bit integer vector
1013 ///    of [4 x i16] by the number of bits specified by a 32-bit integer.
1014 ///
1015 ///    High-order bits are cleared. The 16-bit results are packed into a 64-bit
1016 ///    integer vector of [4 x i16].
1017 ///
1018 /// \headerfile <x86intrin.h>
1019 ///
1020 /// This intrinsic corresponds to the <c> PSRLW </c> instruction.
1021 ///
1022 /// \param __m
1023 ///    A 64-bit integer vector of [4 x i16].
1024 /// \param __count
1025 ///    A 32-bit integer value.
1026 /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
1027 ///    values.
1028 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1029 _mm_srli_pi16(__m64 __m, int __count)
1030 {
1031     return __trunc64(__builtin_ia32_psrlwi128((__v8hi)__anyext128(__m),
1032                                               __count));
1033 }
1034
1035 /// Right-shifts each 32-bit integer element of the first parameter,
1036 ///    which is a 64-bit integer vector of [2 x i32], by the number of bits
1037 ///    specified by the second parameter, which is a 64-bit integer.
1038 ///
1039 ///    High-order bits are cleared. The 32-bit results are packed into a 64-bit
1040 ///    integer vector of [2 x i32].
1041 ///
1042 /// \headerfile <x86intrin.h>
1043 ///
1044 /// This intrinsic corresponds to the <c> PSRLD </c> instruction.
1045 ///
1046 /// \param __m
1047 ///    A 64-bit integer vector of [2 x i32].
1048 /// \param __count
1049 ///    A 64-bit integer vector interpreted as a single 64-bit integer.
1050 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
1051 ///    values.
1052 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1053 _mm_srl_pi32(__m64 __m, __m64 __count)
1054 {
1055     return __trunc64(__builtin_ia32_psrld128((__v4si)__anyext128(__m),
1056                                              (__v4si)__anyext128(__count)));
1057 }
1058
1059 /// Right-shifts each 32-bit integer element of a 64-bit integer vector
1060 ///    of [2 x i32] by the number of bits specified by a 32-bit integer.
1061 ///
1062 ///    High-order bits are cleared. The 32-bit results are packed into a 64-bit
1063 ///    integer vector of [2 x i32].
1064 ///
1065 /// \headerfile <x86intrin.h>
1066 ///
1067 /// This intrinsic corresponds to the <c> PSRLD </c> instruction.
1068 ///
1069 /// \param __m
1070 ///    A 64-bit integer vector of [2 x i32].
1071 /// \param __count
1072 ///    A 32-bit integer value.
1073 /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
1074 ///    values.
1075 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1076 _mm_srli_pi32(__m64 __m, int __count)
1077 {
1078     return __trunc64(__builtin_ia32_psrldi128((__v4si)__anyext128(__m),
1079                                               __count));
1080 }
1081
1082 /// Right-shifts the first 64-bit integer parameter by the number of bits
1083 ///    specified by the second 64-bit integer parameter.
1084 ///
1085 ///    High-order bits are cleared.
1086 ///
1087 /// \headerfile <x86intrin.h>
1088 ///
1089 /// This intrinsic corresponds to the <c> PSRLQ </c> instruction.
1090 ///
1091 /// \param __m
1092 ///    A 64-bit integer vector interpreted as a single 64-bit integer.
1093 /// \param __count
1094 ///    A 64-bit integer vector interpreted as a single 64-bit integer.
1095 /// \returns A 64-bit integer vector containing the right-shifted value.
1096 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1097 _mm_srl_si64(__m64 __m, __m64 __count)
1098 {
1099     return __trunc64(__builtin_ia32_psrlq128((__v2di)__anyext128(__m),
1100                                              (__v2di)__anyext128(__count)));
1101 }
1102
1103 /// Right-shifts the first parameter, which is a 64-bit integer, by the
1104 ///    number of bits specified by the second parameter, which is a 32-bit
1105 ///    integer.
1106 ///
1107 ///    High-order bits are cleared.
1108 ///
1109 /// \headerfile <x86intrin.h>
1110 ///
1111 /// This intrinsic corresponds to the <c> PSRLQ </c> instruction.
1112 ///
1113 /// \param __m
1114 ///    A 64-bit integer vector interpreted as a single 64-bit integer.
1115 /// \param __count
1116 ///    A 32-bit integer value.
1117 /// \returns A 64-bit integer vector containing the right-shifted value.
1118 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1119 _mm_srli_si64(__m64 __m, int __count)
1120 {
1121     return __trunc64(__builtin_ia32_psrlqi128((__v2di)__anyext128(__m),
1122                                               __count));
1123 }
1124
1125 /// Performs a bitwise AND of two 64-bit integer vectors.
1126 ///
1127 /// \headerfile <x86intrin.h>
1128 ///
1129 /// This intrinsic corresponds to the <c> PAND </c> instruction.
1130 ///
1131 /// \param __m1
1132 ///    A 64-bit integer vector.
1133 /// \param __m2
1134 ///    A 64-bit integer vector.
1135 /// \returns A 64-bit integer vector containing the bitwise AND of both
1136 ///    parameters.
1137 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1138 _mm_and_si64(__m64 __m1, __m64 __m2)
1139 {
1140     return (__m64)(((__v1du)__m1) & ((__v1du)__m2));
1141 }
1142
1143 /// Performs a bitwise NOT of the first 64-bit integer vector, and then
1144 ///    performs a bitwise AND of the intermediate result and the second 64-bit
1145 ///    integer vector.
1146 ///
1147 /// \headerfile <x86intrin.h>
1148 ///
1149 /// This intrinsic corresponds to the <c> PANDN </c> instruction.
1150 ///
1151 /// \param __m1
1152 ///    A 64-bit integer vector. The one's complement of this parameter is used
1153 ///    in the bitwise AND.
1154 /// \param __m2
1155 ///    A 64-bit integer vector.
1156 /// \returns A 64-bit integer vector containing the bitwise AND of the second
1157 ///    parameter and the one's complement of the first parameter.
1158 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1159 _mm_andnot_si64(__m64 __m1, __m64 __m2)
1160 {
1161     return (__m64)(~((__v1du)__m1) & ((__v1du)__m2));
1162 }
1163
1164 /// Performs a bitwise OR of two 64-bit integer vectors.
1165 ///
1166 /// \headerfile <x86intrin.h>
1167 ///
1168 /// This intrinsic corresponds to the <c> POR </c> instruction.
1169 ///
1170 /// \param __m1
1171 ///    A 64-bit integer vector.
1172 /// \param __m2
1173 ///    A 64-bit integer vector.
1174 /// \returns A 64-bit integer vector containing the bitwise OR of both
1175 ///    parameters.
1176 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1177 _mm_or_si64(__m64 __m1, __m64 __m2)
1178 {
1179     return (__m64)(((__v1du)__m1) | ((__v1du)__m2));
1180 }
1181
1182 /// Performs a bitwise exclusive OR of two 64-bit integer vectors.
1183 ///
1184 /// \headerfile <x86intrin.h>
1185 ///
1186 /// This intrinsic corresponds to the <c> PXOR </c> instruction.
1187 ///
1188 /// \param __m1
1189 ///    A 64-bit integer vector.
1190 /// \param __m2
1191 ///    A 64-bit integer vector.
1192 /// \returns A 64-bit integer vector containing the bitwise exclusive OR of both
1193 ///    parameters.
1194 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1195 _mm_xor_si64(__m64 __m1, __m64 __m2)
1196 {
1197     return (__m64)(((__v1du)__m1) ^ ((__v1du)__m2));
1198 }
1199
1200 /// Compares the 8-bit integer elements of two 64-bit integer vectors of
1201 ///    [8 x i8] to determine if the element of the first vector is equal to the
1202 ///    corresponding element of the second vector.
1203 ///
1204 ///    Each comparison returns 0 for false, 0xFF for true.
1205 ///
1206 /// \headerfile <x86intrin.h>
1207 ///
1208 /// This intrinsic corresponds to the <c> PCMPEQB </c> instruction.
1209 ///
1210 /// \param __m1
1211 ///    A 64-bit integer vector of [8 x i8].
1212 /// \param __m2
1213 ///    A 64-bit integer vector of [8 x i8].
1214 /// \returns A 64-bit integer vector of [8 x i8] containing the comparison
1215 ///    results.
1216 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1217 _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
1218 {
1219     return (__m64)(((__v8qi)__m1) == ((__v8qi)__m2));
1220 }
1221
1222 /// Compares the 16-bit integer elements of two 64-bit integer vectors of
1223 ///    [4 x i16] to determine if the element of the first vector is equal to the
1224 ///    corresponding element of the second vector.
1225 ///
1226 ///    Each comparison returns 0 for false, 0xFFFF for true.
1227 ///
1228 /// \headerfile <x86intrin.h>
1229 ///
1230 /// This intrinsic corresponds to the <c> PCMPEQW </c> instruction.
1231 ///
1232 /// \param __m1
1233 ///    A 64-bit integer vector of [4 x i16].
1234 /// \param __m2
1235 ///    A 64-bit integer vector of [4 x i16].
1236 /// \returns A 64-bit integer vector of [4 x i16] containing the comparison
1237 ///    results.
1238 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1239 _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
1240 {
1241     return (__m64)(((__v4hi)__m1) == ((__v4hi)__m2));
1242 }
1243
1244 /// Compares the 32-bit integer elements of two 64-bit integer vectors of
1245 ///    [2 x i32] to determine if the element of the first vector is equal to the
1246 ///    corresponding element of the second vector.
1247 ///
1248 ///    Each comparison returns 0 for false, 0xFFFFFFFF for true.
1249 ///
1250 /// \headerfile <x86intrin.h>
1251 ///
1252 /// This intrinsic corresponds to the <c> PCMPEQD </c> instruction.
1253 ///
1254 /// \param __m1
1255 ///    A 64-bit integer vector of [2 x i32].
1256 /// \param __m2
1257 ///    A 64-bit integer vector of [2 x i32].
1258 /// \returns A 64-bit integer vector of [2 x i32] containing the comparison
1259 ///    results.
1260 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1261 _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
1262 {
1263     return (__m64)(((__v2si)__m1) == ((__v2si)__m2));
1264 }
1265
1266 /// Compares the 8-bit integer elements of two 64-bit integer vectors of
1267 ///    [8 x i8] to determine if the element of the first vector is greater than
1268 ///    the corresponding element of the second vector.
1269 ///
1270 ///    Each comparison returns 0 for false, 0xFF for true.
1271 ///
1272 /// \headerfile <x86intrin.h>
1273 ///
1274 /// This intrinsic corresponds to the <c> PCMPGTB </c> instruction.
1275 ///
1276 /// \param __m1
1277 ///    A 64-bit integer vector of [8 x i8].
1278 /// \param __m2
1279 ///    A 64-bit integer vector of [8 x i8].
1280 /// \returns A 64-bit integer vector of [8 x i8] containing the comparison
1281 ///    results.
1282 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1283 _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
1284 {
1285   /* This function always performs a signed comparison, but __v8qi is a char
1286      which may be signed or unsigned, so use __v8qs. */
1287     return (__m64)((__v8qs)__m1 > (__v8qs)__m2);
1288 }
1289
1290 /// Compares the 16-bit integer elements of two 64-bit integer vectors of
1291 ///    [4 x i16] to determine if the element of the first vector is greater than
1292 ///    the corresponding element of the second vector.
1293 ///
1294 ///    Each comparison returns 0 for false, 0xFFFF for true.
1295 ///
1296 /// \headerfile <x86intrin.h>
1297 ///
1298 /// This intrinsic corresponds to the <c> PCMPGTW </c> instruction.
1299 ///
1300 /// \param __m1
1301 ///    A 64-bit integer vector of [4 x i16].
1302 /// \param __m2
1303 ///    A 64-bit integer vector of [4 x i16].
1304 /// \returns A 64-bit integer vector of [4 x i16] containing the comparison
1305 ///    results.
1306 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1307 _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
1308 {
1309     return (__m64)((__v4hi)__m1 > (__v4hi)__m2);
1310 }
1311
1312 /// Compares the 32-bit integer elements of two 64-bit integer vectors of
1313 ///    [2 x i32] to determine if the element of the first vector is greater than
1314 ///    the corresponding element of the second vector.
1315 ///
1316 ///    Each comparison returns 0 for false, 0xFFFFFFFF for true.
1317 ///
1318 /// \headerfile <x86intrin.h>
1319 ///
1320 /// This intrinsic corresponds to the <c> PCMPGTD </c> instruction.
1321 ///
1322 /// \param __m1
1323 ///    A 64-bit integer vector of [2 x i32].
1324 /// \param __m2
1325 ///    A 64-bit integer vector of [2 x i32].
1326 /// \returns A 64-bit integer vector of [2 x i32] containing the comparison
1327 ///    results.
1328 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
1329 _mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
1330 {
1331     return (__m64)((__v2si)__m1 > (__v2si)__m2);
1332 }
1333
1334 /// Constructs a 64-bit integer vector initialized to zero.
1335 ///
1336 /// \headerfile <x86intrin.h>
1337 ///
1338 /// This intrinsic corresponds to the <c> PXOR </c> instruction.
1339 ///
1340 /// \returns An initialized 64-bit integer vector with all elements set to zero.
1341 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
1342 _mm_setzero_si64(void) {
1343   return __extension__(__m64){0LL};
1344 }
1345
1346 /// Constructs a 64-bit integer vector initialized with the specified
1347 ///    32-bit integer values.
1348 ///
1349 /// \headerfile <x86intrin.h>
1350 ///
1351 /// This intrinsic is a utility function and does not correspond to a specific
1352 ///    instruction.
1353 ///
1354 /// \param __i1
1355 ///    A 32-bit integer value used to initialize the upper 32 bits of the
1356 ///    result.
1357 /// \param __i0
1358 ///    A 32-bit integer value used to initialize the lower 32 bits of the
1359 ///    result.
1360 /// \returns An initialized 64-bit integer vector.
1361 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
1362 _mm_set_pi32(int __i1, int __i0) {
1363   return __extension__(__m64)(__v2si){__i0, __i1};
1364 }
1365
1366 /// Constructs a 64-bit integer vector initialized with the specified
1367 ///    16-bit integer values.
1368 ///
1369 /// \headerfile <x86intrin.h>
1370 ///
1371 /// This intrinsic is a utility function and does not correspond to a specific
1372 ///    instruction.
1373 ///
1374 /// \param __s3
1375 ///    A 16-bit integer value used to initialize bits [63:48] of the result.
1376 /// \param __s2
1377 ///    A 16-bit integer value used to initialize bits [47:32] of the result.
1378 /// \param __s1
1379 ///    A 16-bit integer value used to initialize bits [31:16] of the result.
1380 /// \param __s0
1381 ///    A 16-bit integer value used to initialize bits [15:0] of the result.
1382 /// \returns An initialized 64-bit integer vector.
1383 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
1384 _mm_set_pi16(short __s3, short __s2, short __s1, short __s0) {
1385   return __extension__(__m64)(__v4hi){__s0, __s1, __s2, __s3};
1386 }
1387
1388 /// Constructs a 64-bit integer vector initialized with the specified
1389 ///    8-bit integer values.
1390 ///
1391 /// \headerfile <x86intrin.h>
1392 ///
1393 /// This intrinsic is a utility function and does not correspond to a specific
1394 ///    instruction.
1395 ///
1396 /// \param __b7
1397 ///    An 8-bit integer value used to initialize bits [63:56] of the result.
1398 /// \param __b6
1399 ///    An 8-bit integer value used to initialize bits [55:48] of the result.
1400 /// \param __b5
1401 ///    An 8-bit integer value used to initialize bits [47:40] of the result.
1402 /// \param __b4
1403 ///    An 8-bit integer value used to initialize bits [39:32] of the result.
1404 /// \param __b3
1405 ///    An 8-bit integer value used to initialize bits [31:24] of the result.
1406 /// \param __b2
1407 ///    An 8-bit integer value used to initialize bits [23:16] of the result.
1408 /// \param __b1
1409 ///    An 8-bit integer value used to initialize bits [15:8] of the result.
1410 /// \param __b0
1411 ///    An 8-bit integer value used to initialize bits [7:0] of the result.
1412 /// \returns An initialized 64-bit integer vector.
1413 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
1414 _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
1415             char __b1, char __b0) {
1416   return __extension__(__m64)(__v8qi){__b0, __b1, __b2, __b3,
1417                                       __b4, __b5, __b6, __b7};
1418 }
1419
1420 /// Constructs a 64-bit integer vector of [2 x i32], with each of the
1421 ///    32-bit integer vector elements set to the specified 32-bit integer
1422 ///    value.
1423 ///
1424 /// \headerfile <x86intrin.h>
1425 ///
1426 /// This intrinsic is a utility function and does not correspond to a specific
1427 ///    instruction.
1428 ///
1429 /// \param __i
1430 ///    A 32-bit integer value used to initialize each vector element of the
1431 ///    result.
1432 /// \returns An initialized 64-bit integer vector of [2 x i32].
1433 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
1434 _mm_set1_pi32(int __i) {
1435   return _mm_set_pi32(__i, __i);
1436 }
1437
1438 /// Constructs a 64-bit integer vector of [4 x i16], with each of the
1439 ///    16-bit integer vector elements set to the specified 16-bit integer
1440 ///    value.
1441 ///
1442 /// \headerfile <x86intrin.h>
1443 ///
1444 /// This intrinsic is a utility function and does not correspond to a specific
1445 ///    instruction.
1446 ///
1447 /// \param __w
1448 ///    A 16-bit integer value used to initialize each vector element of the
1449 ///    result.
1450 /// \returns An initialized 64-bit integer vector of [4 x i16].
1451 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
1452 _mm_set1_pi16(short __w) {
1453   return _mm_set_pi16(__w, __w, __w, __w);
1454 }
1455
1456 /// Constructs a 64-bit integer vector of [8 x i8], with each of the
1457 ///    8-bit integer vector elements set to the specified 8-bit integer value.
1458 ///
1459 /// \headerfile <x86intrin.h>
1460 ///
1461 /// This intrinsic is a utility function and does not correspond to a specific
1462 ///    instruction.
1463 ///
1464 /// \param __b
1465 ///    An 8-bit integer value used to initialize each vector element of the
1466 ///    result.
1467 /// \returns An initialized 64-bit integer vector of [8 x i8].
1468 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
1469 _mm_set1_pi8(char __b) {
1470   return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
1471 }
1472
1473 /// Constructs a 64-bit integer vector, initialized in reverse order with
1474 ///    the specified 32-bit integer values.
1475 ///
1476 /// \headerfile <x86intrin.h>
1477 ///
1478 /// This intrinsic is a utility function and does not correspond to a specific
1479 ///    instruction.
1480 ///
1481 /// \param __i0
1482 ///    A 32-bit integer value used to initialize the lower 32 bits of the
1483 ///    result.
1484 /// \param __i1
1485 ///    A 32-bit integer value used to initialize the upper 32 bits of the
1486 ///    result.
1487 /// \returns An initialized 64-bit integer vector.
1488 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
1489 _mm_setr_pi32(int __i0, int __i1) {
1490   return _mm_set_pi32(__i1, __i0);
1491 }
1492
1493 /// Constructs a 64-bit integer vector, initialized in reverse order with
1494 ///    the specified 16-bit integer values.
1495 ///
1496 /// \headerfile <x86intrin.h>
1497 ///
1498 /// This intrinsic is a utility function and does not correspond to a specific
1499 ///    instruction.
1500 ///
1501 /// \param __w0
1502 ///    A 16-bit integer value used to initialize bits [15:0] of the result.
1503 /// \param __w1
1504 ///    A 16-bit integer value used to initialize bits [31:16] of the result.
1505 /// \param __w2
1506 ///    A 16-bit integer value used to initialize bits [47:32] of the result.
1507 /// \param __w3
1508 ///    A 16-bit integer value used to initialize bits [63:48] of the result.
1509 /// \returns An initialized 64-bit integer vector.
1510 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
1511 _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) {
1512   return _mm_set_pi16(__w3, __w2, __w1, __w0);
1513 }
1514
1515 /// Constructs a 64-bit integer vector, initialized in reverse order with
1516 ///    the specified 8-bit integer values.
1517 ///
1518 /// \headerfile <x86intrin.h>
1519 ///
1520 /// This intrinsic is a utility function and does not correspond to a specific
1521 ///    instruction.
1522 ///
1523 /// \param __b0
1524 ///    An 8-bit integer value used to initialize bits [7:0] of the result.
1525 /// \param __b1
1526 ///    An 8-bit integer value used to initialize bits [15:8] of the result.
1527 /// \param __b2
1528 ///    An 8-bit integer value used to initialize bits [23:16] of the result.
1529 /// \param __b3
1530 ///    An 8-bit integer value used to initialize bits [31:24] of the result.
1531 /// \param __b4
1532 ///    An 8-bit integer value used to initialize bits [39:32] of the result.
1533 /// \param __b5
1534 ///    An 8-bit integer value used to initialize bits [47:40] of the result.
1535 /// \param __b6
1536 ///    An 8-bit integer value used to initialize bits [55:48] of the result.
1537 /// \param __b7
1538 ///    An 8-bit integer value used to initialize bits [63:56] of the result.
1539 /// \returns An initialized 64-bit integer vector.
1540 static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
1541 _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
1542              char __b6, char __b7) {
1543   return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
1544 }
1545
1546 #undef __anyext128
1547 #undef __trunc64
1548 #undef __DEFAULT_FN_ATTRS_SSE2
1549
1550 /* Aliases for compatibility. */
1551 #define _m_empty _mm_empty
1552 #define _m_from_int _mm_cvtsi32_si64
1553 #define _m_from_int64 _mm_cvtsi64_m64
1554 #define _m_to_int _mm_cvtsi64_si32
1555 #define _m_to_int64 _mm_cvtm64_si64
1556 #define _m_packsswb _mm_packs_pi16
1557 #define _m_packssdw _mm_packs_pi32
1558 #define _m_packuswb _mm_packs_pu16
1559 #define _m_punpckhbw _mm_unpackhi_pi8
1560 #define _m_punpckhwd _mm_unpackhi_pi16
1561 #define _m_punpckhdq _mm_unpackhi_pi32
1562 #define _m_punpcklbw _mm_unpacklo_pi8
1563 #define _m_punpcklwd _mm_unpacklo_pi16
1564 #define _m_punpckldq _mm_unpacklo_pi32
1565 #define _m_paddb _mm_add_pi8
1566 #define _m_paddw _mm_add_pi16
1567 #define _m_paddd _mm_add_pi32
1568 #define _m_paddsb _mm_adds_pi8
1569 #define _m_paddsw _mm_adds_pi16
1570 #define _m_paddusb _mm_adds_pu8
1571 #define _m_paddusw _mm_adds_pu16
1572 #define _m_psubb _mm_sub_pi8
1573 #define _m_psubw _mm_sub_pi16
1574 #define _m_psubd _mm_sub_pi32
1575 #define _m_psubsb _mm_subs_pi8
1576 #define _m_psubsw _mm_subs_pi16
1577 #define _m_psubusb _mm_subs_pu8
1578 #define _m_psubusw _mm_subs_pu16
1579 #define _m_pmaddwd _mm_madd_pi16
1580 #define _m_pmulhw _mm_mulhi_pi16
1581 #define _m_pmullw _mm_mullo_pi16
1582 #define _m_psllw _mm_sll_pi16
1583 #define _m_psllwi _mm_slli_pi16
1584 #define _m_pslld _mm_sll_pi32
1585 #define _m_pslldi _mm_slli_pi32
1586 #define _m_psllq _mm_sll_si64
1587 #define _m_psllqi _mm_slli_si64
1588 #define _m_psraw _mm_sra_pi16
1589 #define _m_psrawi _mm_srai_pi16
1590 #define _m_psrad _mm_sra_pi32
1591 #define _m_psradi _mm_srai_pi32
1592 #define _m_psrlw _mm_srl_pi16
1593 #define _m_psrlwi _mm_srli_pi16
1594 #define _m_psrld _mm_srl_pi32
1595 #define _m_psrldi _mm_srli_pi32
1596 #define _m_psrlq _mm_srl_si64
1597 #define _m_psrlqi _mm_srli_si64
1598 #define _m_pand _mm_and_si64
1599 #define _m_pandn _mm_andnot_si64
1600 #define _m_por _mm_or_si64
1601 #define _m_pxor _mm_xor_si64
1602 #define _m_pcmpeqb _mm_cmpeq_pi8
1603 #define _m_pcmpeqw _mm_cmpeq_pi16
1604 #define _m_pcmpeqd _mm_cmpeq_pi32
1605 #define _m_pcmpgtb _mm_cmpgt_pi8
1606 #define _m_pcmpgtw _mm_cmpgt_pi16
1607 #define _m_pcmpgtd _mm_cmpgt_pi32
1608
1609 #endif /* __MMINTRIN_H */
1610