1 /* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
10 GCC is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with GCC; see the file COPYING. If not, write to
17 the Free Software Foundation, 59 Temple Place - Suite 330,
18 Boston, MA 02111-1307, USA. */
20 /* As a special exception, if you include this header file into source
21 files compiled by GCC, this header file does not by itself cause
22 the resulting executable to be covered by the GNU General Public
23 License. This exception does not however invalidate any other
24 reasons why the executable file might be covered by the GNU General
27 /* Implemented from the specification included in the Intel C++ Compiler
28 User Guide and Reference, version 8.0. */
30 #ifndef _MMINTRIN_H_INCLUDED
31 #define _MMINTRIN_H_INCLUDED
34 # error "MMX instruction set not enabled"
36 /* The data type intended for user use. */
37 typedef int __m64
__attribute__ ((__mode__ (__V2SI__
)));
39 /* Internal data types for implementing the intrinsics. */
40 typedef int __v2si
__attribute__ ((__mode__ (__V2SI__
)));
41 typedef int __v4hi
__attribute__ ((__mode__ (__V4HI__
)));
42 typedef int __v8qi
__attribute__ ((__mode__ (__V8QI__
)));
44 /* Empty the multimedia state. */
48 __builtin_ia32_emms ();
57 /* Convert I to a __m64 object. The integer is zero-extended to 64-bits. */
59 _mm_cvtsi32_si64 (int __i
)
61 long long __tmp
= (unsigned int)__i
;
68 return _mm_cvtsi32_si64 (__i
);
72 /* Convert I to a __m64 object. */
74 _mm_cvtsi64x_si64 (long long __i
)
79 /* Convert I to a __m64 object. */
81 _mm_set_pi64x (long long __i
)
87 /* Convert the lower 32 bits of the __m64 object into an integer. */
89 _mm_cvtsi64_si32 (__m64 __i
)
91 long long __tmp
= (long long)__i
;
98 return _mm_cvtsi64_si32 (__i
);
102 /* Convert the lower 32 bits of the __m64 object into an integer. */
103 static __inline
long long
104 _mm_cvtsi64_si64x (__m64 __i
)
106 return (long long)__i
;
110 /* Pack the four 16-bit values from M1 into the lower four 8-bit values of
111 the result, and the four 16-bit values from M2 into the upper four 8-bit
112 values of the result, all with signed saturation. */
113 static __inline __m64
114 _mm_packs_pi16 (__m64 __m1
, __m64 __m2
)
116 return (__m64
) __builtin_ia32_packsswb ((__v4hi
)__m1
, (__v4hi
)__m2
);
119 static __inline __m64
120 _m_packsswb (__m64 __m1
, __m64 __m2
)
122 return _mm_packs_pi16 (__m1
, __m2
);
125 /* Pack the two 32-bit values from M1 in to the lower two 16-bit values of
126 the result, and the two 32-bit values from M2 into the upper two 16-bit
127 values of the result, all with signed saturation. */
128 static __inline __m64
129 _mm_packs_pi32 (__m64 __m1
, __m64 __m2
)
131 return (__m64
) __builtin_ia32_packssdw ((__v2si
)__m1
, (__v2si
)__m2
);
134 static __inline __m64
135 _m_packssdw (__m64 __m1
, __m64 __m2
)
137 return _mm_packs_pi32 (__m1
, __m2
);
140 /* Pack the four 16-bit values from M1 into the lower four 8-bit values of
141 the result, and the four 16-bit values from M2 into the upper four 8-bit
142 values of the result, all with unsigned saturation. */
143 static __inline __m64
144 _mm_packs_pu16 (__m64 __m1
, __m64 __m2
)
146 return (__m64
) __builtin_ia32_packuswb ((__v4hi
)__m1
, (__v4hi
)__m2
);
149 static __inline __m64
150 _m_packuswb (__m64 __m1
, __m64 __m2
)
152 return _mm_packs_pu16 (__m1
, __m2
);
155 /* Interleave the four 8-bit values from the high half of M1 with the four
156 8-bit values from the high half of M2. */
157 static __inline __m64
158 _mm_unpackhi_pi8 (__m64 __m1
, __m64 __m2
)
160 return (__m64
) __builtin_ia32_punpckhbw ((__v8qi
)__m1
, (__v8qi
)__m2
);
163 static __inline __m64
164 _m_punpckhbw (__m64 __m1
, __m64 __m2
)
166 return _mm_unpackhi_pi8 (__m1
, __m2
);
169 /* Interleave the two 16-bit values from the high half of M1 with the two
170 16-bit values from the high half of M2. */
171 static __inline __m64
172 _mm_unpackhi_pi16 (__m64 __m1
, __m64 __m2
)
174 return (__m64
) __builtin_ia32_punpckhwd ((__v4hi
)__m1
, (__v4hi
)__m2
);
177 static __inline __m64
178 _m_punpckhwd (__m64 __m1
, __m64 __m2
)
180 return _mm_unpackhi_pi16 (__m1
, __m2
);
183 /* Interleave the 32-bit value from the high half of M1 with the 32-bit
184 value from the high half of M2. */
185 static __inline __m64
186 _mm_unpackhi_pi32 (__m64 __m1
, __m64 __m2
)
188 return (__m64
) __builtin_ia32_punpckhdq ((__v2si
)__m1
, (__v2si
)__m2
);
191 static __inline __m64
192 _m_punpckhdq (__m64 __m1
, __m64 __m2
)
194 return _mm_unpackhi_pi32 (__m1
, __m2
);
197 /* Interleave the four 8-bit values from the low half of M1 with the four
198 8-bit values from the low half of M2. */
199 static __inline __m64
200 _mm_unpacklo_pi8 (__m64 __m1
, __m64 __m2
)
202 return (__m64
) __builtin_ia32_punpcklbw ((__v8qi
)__m1
, (__v8qi
)__m2
);
205 static __inline __m64
206 _m_punpcklbw (__m64 __m1
, __m64 __m2
)
208 return _mm_unpacklo_pi8 (__m1
, __m2
);
211 /* Interleave the two 16-bit values from the low half of M1 with the two
212 16-bit values from the low half of M2. */
213 static __inline __m64
214 _mm_unpacklo_pi16 (__m64 __m1
, __m64 __m2
)
216 return (__m64
) __builtin_ia32_punpcklwd ((__v4hi
)__m1
, (__v4hi
)__m2
);
219 static __inline __m64
220 _m_punpcklwd (__m64 __m1
, __m64 __m2
)
222 return _mm_unpacklo_pi16 (__m1
, __m2
);
225 /* Interleave the 32-bit value from the low half of M1 with the 32-bit
226 value from the low half of M2. */
227 static __inline __m64
228 _mm_unpacklo_pi32 (__m64 __m1
, __m64 __m2
)
230 return (__m64
) __builtin_ia32_punpckldq ((__v2si
)__m1
, (__v2si
)__m2
);
233 static __inline __m64
234 _m_punpckldq (__m64 __m1
, __m64 __m2
)
236 return _mm_unpacklo_pi32 (__m1
, __m2
);
239 /* Add the 8-bit values in M1 to the 8-bit values in M2. */
240 static __inline __m64
241 _mm_add_pi8 (__m64 __m1
, __m64 __m2
)
243 return (__m64
) __builtin_ia32_paddb ((__v8qi
)__m1
, (__v8qi
)__m2
);
246 static __inline __m64
247 _m_paddb (__m64 __m1
, __m64 __m2
)
249 return _mm_add_pi8 (__m1
, __m2
);
252 /* Add the 16-bit values in M1 to the 16-bit values in M2. */
253 static __inline __m64
254 _mm_add_pi16 (__m64 __m1
, __m64 __m2
)
256 return (__m64
) __builtin_ia32_paddw ((__v4hi
)__m1
, (__v4hi
)__m2
);
259 static __inline __m64
260 _m_paddw (__m64 __m1
, __m64 __m2
)
262 return _mm_add_pi16 (__m1
, __m2
);
265 /* Add the 32-bit values in M1 to the 32-bit values in M2. */
266 static __inline __m64
267 _mm_add_pi32 (__m64 __m1
, __m64 __m2
)
269 return (__m64
) __builtin_ia32_paddd ((__v2si
)__m1
, (__v2si
)__m2
);
272 static __inline __m64
273 _m_paddd (__m64 __m1
, __m64 __m2
)
275 return _mm_add_pi32 (__m1
, __m2
);
278 /* Add the 64-bit values in M1 to the 64-bit values in M2. */
279 static __inline __m64
280 _mm_add_si64 (__m64 __m1
, __m64 __m2
)
282 return (__m64
) __builtin_ia32_paddq ((long long)__m1
, (long long)__m2
);
285 /* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
286 saturated arithmetic. */
287 static __inline __m64
288 _mm_adds_pi8 (__m64 __m1
, __m64 __m2
)
290 return (__m64
) __builtin_ia32_paddsb ((__v8qi
)__m1
, (__v8qi
)__m2
);
293 static __inline __m64
294 _m_paddsb (__m64 __m1
, __m64 __m2
)
296 return _mm_adds_pi8 (__m1
, __m2
);
299 /* Add the 16-bit values in M1 to the 16-bit values in M2 using signed
300 saturated arithmetic. */
301 static __inline __m64
302 _mm_adds_pi16 (__m64 __m1
, __m64 __m2
)
304 return (__m64
) __builtin_ia32_paddsw ((__v4hi
)__m1
, (__v4hi
)__m2
);
307 static __inline __m64
308 _m_paddsw (__m64 __m1
, __m64 __m2
)
310 return _mm_adds_pi16 (__m1
, __m2
);
313 /* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned
314 saturated arithmetic. */
315 static __inline __m64
316 _mm_adds_pu8 (__m64 __m1
, __m64 __m2
)
318 return (__m64
) __builtin_ia32_paddusb ((__v8qi
)__m1
, (__v8qi
)__m2
);
321 static __inline __m64
322 _m_paddusb (__m64 __m1
, __m64 __m2
)
324 return _mm_adds_pu8 (__m1
, __m2
);
327 /* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned
328 saturated arithmetic. */
329 static __inline __m64
330 _mm_adds_pu16 (__m64 __m1
, __m64 __m2
)
332 return (__m64
) __builtin_ia32_paddusw ((__v4hi
)__m1
, (__v4hi
)__m2
);
335 static __inline __m64
336 _m_paddusw (__m64 __m1
, __m64 __m2
)
338 return _mm_adds_pu16 (__m1
, __m2
);
341 /* Subtract the 8-bit values in M2 from the 8-bit values in M1. */
342 static __inline __m64
343 _mm_sub_pi8 (__m64 __m1
, __m64 __m2
)
345 return (__m64
) __builtin_ia32_psubb ((__v8qi
)__m1
, (__v8qi
)__m2
);
348 static __inline __m64
349 _m_psubb (__m64 __m1
, __m64 __m2
)
351 return _mm_sub_pi8 (__m1
, __m2
);
354 /* Subtract the 16-bit values in M2 from the 16-bit values in M1. */
355 static __inline __m64
356 _mm_sub_pi16 (__m64 __m1
, __m64 __m2
)
358 return (__m64
) __builtin_ia32_psubw ((__v4hi
)__m1
, (__v4hi
)__m2
);
361 static __inline __m64
362 _m_psubw (__m64 __m1
, __m64 __m2
)
364 return _mm_sub_pi16 (__m1
, __m2
);
367 /* Subtract the 32-bit values in M2 from the 32-bit values in M1. */
368 static __inline __m64
369 _mm_sub_pi32 (__m64 __m1
, __m64 __m2
)
371 return (__m64
) __builtin_ia32_psubd ((__v2si
)__m1
, (__v2si
)__m2
);
374 static __inline __m64
375 _m_psubd (__m64 __m1
, __m64 __m2
)
377 return _mm_sub_pi32 (__m1
, __m2
);
380 /* Add the 64-bit values in M1 to the 64-bit values in M2. */
381 static __inline __m64
382 _mm_sub_si64 (__m64 __m1
, __m64 __m2
)
384 return (__m64
) __builtin_ia32_psubq ((long long)__m1
, (long long)__m2
);
387 /* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
388 saturating arithmetic. */
389 static __inline __m64
390 _mm_subs_pi8 (__m64 __m1
, __m64 __m2
)
392 return (__m64
) __builtin_ia32_psubsb ((__v8qi
)__m1
, (__v8qi
)__m2
);
395 static __inline __m64
396 _m_psubsb (__m64 __m1
, __m64 __m2
)
398 return _mm_subs_pi8 (__m1
, __m2
);
401 /* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
402 signed saturating arithmetic. */
403 static __inline __m64
404 _mm_subs_pi16 (__m64 __m1
, __m64 __m2
)
406 return (__m64
) __builtin_ia32_psubsw ((__v4hi
)__m1
, (__v4hi
)__m2
);
409 static __inline __m64
410 _m_psubsw (__m64 __m1
, __m64 __m2
)
412 return _mm_subs_pi16 (__m1
, __m2
);
415 /* Subtract the 8-bit values in M2 from the 8-bit values in M1 using
416 unsigned saturating arithmetic. */
417 static __inline __m64
418 _mm_subs_pu8 (__m64 __m1
, __m64 __m2
)
420 return (__m64
) __builtin_ia32_psubusb ((__v8qi
)__m1
, (__v8qi
)__m2
);
423 static __inline __m64
424 _m_psubusb (__m64 __m1
, __m64 __m2
)
426 return _mm_subs_pu8 (__m1
, __m2
);
429 /* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
430 unsigned saturating arithmetic. */
431 static __inline __m64
432 _mm_subs_pu16 (__m64 __m1
, __m64 __m2
)
434 return (__m64
) __builtin_ia32_psubusw ((__v4hi
)__m1
, (__v4hi
)__m2
);
437 static __inline __m64
438 _m_psubusw (__m64 __m1
, __m64 __m2
)
440 return _mm_subs_pu16 (__m1
, __m2
);
443 /* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
444 four 32-bit intermediate results, which are then summed by pairs to
445 produce two 32-bit results. */
446 static __inline __m64
447 _mm_madd_pi16 (__m64 __m1
, __m64 __m2
)
449 return (__m64
) __builtin_ia32_pmaddwd ((__v4hi
)__m1
, (__v4hi
)__m2
);
452 static __inline __m64
453 _m_pmaddwd (__m64 __m1
, __m64 __m2
)
455 return _mm_madd_pi16 (__m1
, __m2
);
458 /* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
459 M2 and produce the high 16 bits of the 32-bit results. */
460 static __inline __m64
461 _mm_mulhi_pi16 (__m64 __m1
, __m64 __m2
)
463 return (__m64
) __builtin_ia32_pmulhw ((__v4hi
)__m1
, (__v4hi
)__m2
);
466 static __inline __m64
467 _m_pmulhw (__m64 __m1
, __m64 __m2
)
469 return _mm_mulhi_pi16 (__m1
, __m2
);
472 /* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce
473 the low 16 bits of the results. */
474 static __inline __m64
475 _mm_mullo_pi16 (__m64 __m1
, __m64 __m2
)
477 return (__m64
) __builtin_ia32_pmullw ((__v4hi
)__m1
, (__v4hi
)__m2
);
480 static __inline __m64
481 _m_pmullw (__m64 __m1
, __m64 __m2
)
483 return _mm_mullo_pi16 (__m1
, __m2
);
486 /* Shift four 16-bit values in M left by COUNT. */
487 static __inline __m64
488 _mm_sll_pi16 (__m64 __m
, __m64 __count
)
490 return (__m64
) __builtin_ia32_psllw ((__v4hi
)__m
, (long long)__count
);
493 static __inline __m64
494 _m_psllw (__m64 __m
, __m64 __count
)
496 return _mm_sll_pi16 (__m
, __count
);
499 static __inline __m64
500 _mm_slli_pi16 (__m64 __m
, int __count
)
502 return (__m64
) __builtin_ia32_psllw ((__v4hi
)__m
, __count
);
505 static __inline __m64
506 _m_psllwi (__m64 __m
, int __count
)
508 return _mm_slli_pi16 (__m
, __count
);
511 /* Shift two 32-bit values in M left by COUNT. */
512 static __inline __m64
513 _mm_sll_pi32 (__m64 __m
, __m64 __count
)
515 return (__m64
) __builtin_ia32_pslld ((__v2si
)__m
, (long long)__count
);
518 static __inline __m64
519 _m_pslld (__m64 __m
, __m64 __count
)
521 return _mm_sll_pi32 (__m
, __count
);
524 static __inline __m64
525 _mm_slli_pi32 (__m64 __m
, int __count
)
527 return (__m64
) __builtin_ia32_pslld ((__v2si
)__m
, __count
);
530 static __inline __m64
531 _m_pslldi (__m64 __m
, int __count
)
533 return _mm_slli_pi32 (__m
, __count
);
536 /* Shift the 64-bit value in M left by COUNT. */
537 static __inline __m64
538 _mm_sll_si64 (__m64 __m
, __m64 __count
)
540 return (__m64
) __builtin_ia32_psllq ((long long)__m
, (long long)__count
);
543 static __inline __m64
544 _m_psllq (__m64 __m
, __m64 __count
)
546 return _mm_sll_si64 (__m
, __count
);
549 static __inline __m64
550 _mm_slli_si64 (__m64 __m
, int __count
)
552 return (__m64
) __builtin_ia32_psllq ((long long)__m
, (long long)__count
);
555 static __inline __m64
556 _m_psllqi (__m64 __m
, int __count
)
558 return _mm_slli_si64 (__m
, __count
);
561 /* Shift four 16-bit values in M right by COUNT; shift in the sign bit. */
562 static __inline __m64
563 _mm_sra_pi16 (__m64 __m
, __m64 __count
)
565 return (__m64
) __builtin_ia32_psraw ((__v4hi
)__m
, (long long)__count
);
568 static __inline __m64
569 _m_psraw (__m64 __m
, __m64 __count
)
571 return _mm_sra_pi16 (__m
, __count
);
574 static __inline __m64
575 _mm_srai_pi16 (__m64 __m
, int __count
)
577 return (__m64
) __builtin_ia32_psraw ((__v4hi
)__m
, __count
);
580 static __inline __m64
581 _m_psrawi (__m64 __m
, int __count
)
583 return _mm_srai_pi16 (__m
, __count
);
586 /* Shift two 32-bit values in M right by COUNT; shift in the sign bit. */
587 static __inline __m64
588 _mm_sra_pi32 (__m64 __m
, __m64 __count
)
590 return (__m64
) __builtin_ia32_psrad ((__v2si
)__m
, (long long)__count
);
593 static __inline __m64
594 _m_psrad (__m64 __m
, __m64 __count
)
596 return _mm_sra_pi32 (__m
, __count
);
599 static __inline __m64
600 _mm_srai_pi32 (__m64 __m
, int __count
)
602 return (__m64
) __builtin_ia32_psrad ((__v2si
)__m
, __count
);
605 static __inline __m64
606 _m_psradi (__m64 __m
, int __count
)
608 return _mm_srai_pi32 (__m
, __count
);
611 /* Shift four 16-bit values in M right by COUNT; shift in zeros. */
612 static __inline __m64
613 _mm_srl_pi16 (__m64 __m
, __m64 __count
)
615 return (__m64
) __builtin_ia32_psrlw ((__v4hi
)__m
, (long long)__count
);
618 static __inline __m64
619 _m_psrlw (__m64 __m
, __m64 __count
)
621 return _mm_srl_pi16 (__m
, __count
);
624 static __inline __m64
625 _mm_srli_pi16 (__m64 __m
, int __count
)
627 return (__m64
) __builtin_ia32_psrlw ((__v4hi
)__m
, __count
);
630 static __inline __m64
631 _m_psrlwi (__m64 __m
, int __count
)
633 return _mm_srli_pi16 (__m
, __count
);
636 /* Shift two 32-bit values in M right by COUNT; shift in zeros. */
637 static __inline __m64
638 _mm_srl_pi32 (__m64 __m
, __m64 __count
)
640 return (__m64
) __builtin_ia32_psrld ((__v2si
)__m
, (long long)__count
);
643 static __inline __m64
644 _m_psrld (__m64 __m
, __m64 __count
)
646 return _mm_srl_pi32 (__m
, __count
);
649 static __inline __m64
650 _mm_srli_pi32 (__m64 __m
, int __count
)
652 return (__m64
) __builtin_ia32_psrld ((__v2si
)__m
, __count
);
655 static __inline __m64
656 _m_psrldi (__m64 __m
, int __count
)
658 return _mm_srli_pi32 (__m
, __count
);
661 /* Shift the 64-bit value in M left by COUNT; shift in zeros. */
662 static __inline __m64
663 _mm_srl_si64 (__m64 __m
, __m64 __count
)
665 return (__m64
) __builtin_ia32_psrlq ((long long)__m
, (long long)__count
);
668 static __inline __m64
669 _m_psrlq (__m64 __m
, __m64 __count
)
671 return _mm_srl_si64 (__m
, __count
);
674 static __inline __m64
675 _mm_srli_si64 (__m64 __m
, int __count
)
677 return (__m64
) __builtin_ia32_psrlq ((long long)__m
, (long long)__count
);
680 static __inline __m64
681 _m_psrlqi (__m64 __m
, int __count
)
683 return _mm_srli_si64 (__m
, __count
);
686 /* Bit-wise AND the 64-bit values in M1 and M2. */
687 static __inline __m64
688 _mm_and_si64 (__m64 __m1
, __m64 __m2
)
690 return (__m64
) __builtin_ia32_pand ((long long)__m1
, (long long)__m2
);
693 static __inline __m64
694 _m_pand (__m64 __m1
, __m64 __m2
)
696 return _mm_and_si64 (__m1
, __m2
);
699 /* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the
700 64-bit value in M2. */
701 static __inline __m64
702 _mm_andnot_si64 (__m64 __m1
, __m64 __m2
)
704 return (__m64
) __builtin_ia32_pandn ((long long)__m1
, (long long)__m2
);
707 static __inline __m64
708 _m_pandn (__m64 __m1
, __m64 __m2
)
710 return _mm_andnot_si64 (__m1
, __m2
);
713 /* Bit-wise inclusive OR the 64-bit values in M1 and M2. */
714 static __inline __m64
715 _mm_or_si64 (__m64 __m1
, __m64 __m2
)
717 return (__m64
)__builtin_ia32_por ((long long)__m1
, (long long)__m2
);
720 static __inline __m64
721 _m_por (__m64 __m1
, __m64 __m2
)
723 return _mm_or_si64 (__m1
, __m2
);
726 /* Bit-wise exclusive OR the 64-bit values in M1 and M2. */
727 static __inline __m64
728 _mm_xor_si64 (__m64 __m1
, __m64 __m2
)
730 return (__m64
)__builtin_ia32_pxor ((long long)__m1
, (long long)__m2
);
733 static __inline __m64
734 _m_pxor (__m64 __m1
, __m64 __m2
)
736 return _mm_xor_si64 (__m1
, __m2
);
739 /* Compare eight 8-bit values. The result of the comparison is 0xFF if the
740 test is true and zero if false. */
741 static __inline __m64
742 _mm_cmpeq_pi8 (__m64 __m1
, __m64 __m2
)
744 return (__m64
) __builtin_ia32_pcmpeqb ((__v8qi
)__m1
, (__v8qi
)__m2
);
747 static __inline __m64
748 _m_pcmpeqb (__m64 __m1
, __m64 __m2
)
750 return _mm_cmpeq_pi8 (__m1
, __m2
);
753 static __inline __m64
754 _mm_cmpgt_pi8 (__m64 __m1
, __m64 __m2
)
756 return (__m64
) __builtin_ia32_pcmpgtb ((__v8qi
)__m1
, (__v8qi
)__m2
);
759 static __inline __m64
760 _m_pcmpgtb (__m64 __m1
, __m64 __m2
)
762 return _mm_cmpgt_pi8 (__m1
, __m2
);
765 /* Compare four 16-bit values. The result of the comparison is 0xFFFF if
766 the test is true and zero if false. */
767 static __inline __m64
768 _mm_cmpeq_pi16 (__m64 __m1
, __m64 __m2
)
770 return (__m64
) __builtin_ia32_pcmpeqw ((__v4hi
)__m1
, (__v4hi
)__m2
);
773 static __inline __m64
774 _m_pcmpeqw (__m64 __m1
, __m64 __m2
)
776 return _mm_cmpeq_pi16 (__m1
, __m2
);
779 static __inline __m64
780 _mm_cmpgt_pi16 (__m64 __m1
, __m64 __m2
)
782 return (__m64
) __builtin_ia32_pcmpgtw ((__v4hi
)__m1
, (__v4hi
)__m2
);
785 static __inline __m64
786 _m_pcmpgtw (__m64 __m1
, __m64 __m2
)
788 return _mm_cmpgt_pi16 (__m1
, __m2
);
791 /* Compare two 32-bit values. The result of the comparison is 0xFFFFFFFF if
792 the test is true and zero if false. */
793 static __inline __m64
794 _mm_cmpeq_pi32 (__m64 __m1
, __m64 __m2
)
796 return (__m64
) __builtin_ia32_pcmpeqd ((__v2si
)__m1
, (__v2si
)__m2
);
799 static __inline __m64
800 _m_pcmpeqd (__m64 __m1
, __m64 __m2
)
802 return _mm_cmpeq_pi32 (__m1
, __m2
);
805 static __inline __m64
806 _mm_cmpgt_pi32 (__m64 __m1
, __m64 __m2
)
808 return (__m64
) __builtin_ia32_pcmpgtd ((__v2si
)__m1
, (__v2si
)__m2
);
811 static __inline __m64
812 _m_pcmpgtd (__m64 __m1
, __m64 __m2
)
814 return _mm_cmpgt_pi32 (__m1
, __m2
);
817 /* Creates a 64-bit zero. */
818 static __inline __m64
819 _mm_setzero_si64 (void)
821 return (__m64
)__builtin_ia32_mmx_zero ();
824 /* Creates a vector of two 32-bit values; I0 is least significant. */
825 static __inline __m64
826 _mm_set_pi32 (int __i1
, int __i0
)
842 /* Creates a vector of four 16-bit values; W0 is least significant. */
843 static __inline __m64
844 _mm_set_pi16 (short __w3
, short __w2
, short __w1
, short __w0
)
846 unsigned int __i1
= (unsigned short)__w3
<< 16 | (unsigned short)__w2
;
847 unsigned int __i0
= (unsigned short)__w1
<< 16 | (unsigned short)__w0
;
848 return _mm_set_pi32 (__i1
, __i0
);
852 /* Creates a vector of eight 8-bit values; B0 is least significant. */
853 static __inline __m64
854 _mm_set_pi8 (char __b7
, char __b6
, char __b5
, char __b4
,
855 char __b3
, char __b2
, char __b1
, char __b0
)
857 unsigned int __i1
, __i0
;
859 __i1
= (unsigned char)__b7
;
860 __i1
= __i1
<< 8 | (unsigned char)__b6
;
861 __i1
= __i1
<< 8 | (unsigned char)__b5
;
862 __i1
= __i1
<< 8 | (unsigned char)__b4
;
864 __i0
= (unsigned char)__b3
;
865 __i0
= __i0
<< 8 | (unsigned char)__b2
;
866 __i0
= __i0
<< 8 | (unsigned char)__b1
;
867 __i0
= __i0
<< 8 | (unsigned char)__b0
;
869 return _mm_set_pi32 (__i1
, __i0
);
872 /* Similar, but with the arguments in reverse order. */
873 static __inline __m64
874 _mm_setr_pi32 (int __i0
, int __i1
)
876 return _mm_set_pi32 (__i1
, __i0
);
879 static __inline __m64
880 _mm_setr_pi16 (short __w0
, short __w1
, short __w2
, short __w3
)
882 return _mm_set_pi16 (__w3
, __w2
, __w1
, __w0
);
885 static __inline __m64
886 _mm_setr_pi8 (char __b0
, char __b1
, char __b2
, char __b3
,
887 char __b4
, char __b5
, char __b6
, char __b7
)
889 return _mm_set_pi8 (__b7
, __b6
, __b5
, __b4
, __b3
, __b2
, __b1
, __b0
);
892 /* Creates a vector of two 32-bit values, both elements containing I. */
893 static __inline __m64
894 _mm_set1_pi32 (int __i
)
896 return _mm_set_pi32 (__i
, __i
);
899 /* Creates a vector of four 16-bit values, all elements containing W. */
900 static __inline __m64
901 _mm_set1_pi16 (short __w
)
903 unsigned int __i
= (unsigned short)__w
<< 16 | (unsigned short)__w
;
904 return _mm_set1_pi32 (__i
);
907 /* Creates a vector of eight 8-bit values, all elements containing B. */
908 static __inline __m64
909 _mm_set1_pi8 (char __b
)
911 unsigned int __w
= (unsigned char)__b
<< 8 | (unsigned char)__b
;
912 unsigned int __i
= __w
<< 16 | __w
;
913 return _mm_set1_pi32 (__i
);
917 #endif /* _MMINTRIN_H_INCLUDED */