2 /* A program to test SSE4.1/SSE4.2 instructions.
3 Revisions: Nov.208 - wrote this file
4 Apr.10.2010 - added PEXTR* tests
5 Apr.16.2010 - added PINS* tests
9 gcc -m64 -g -O -Wall -o sse4-64 sse4-64.c
15 #include "tests/malloc.h"
19 typedef unsigned char V128
[16];
20 typedef unsigned int UInt
;
21 typedef signed int Int
;
22 typedef unsigned char UChar
;
23 typedef unsigned long long int ULong
;
25 typedef unsigned char Bool
;
26 #define False ((Bool)0)
27 #define True ((Bool)1)
45 static void do64HLtoV128 ( /*OUT*/V128
* res
, ULong wHi
, ULong wLo
)
47 // try to sidestep strict-aliasing snafus by memcpying explicitly
48 UChar
* p
= (UChar
*)res
;
49 memcpy(&p
[8], (UChar
*)&wHi
, 8);
50 memcpy(&p
[0], (UChar
*)&wLo
, 8);
53 static UChar
randUChar ( void )
55 static UInt seed
= 80021;
56 seed
= 1103515245 * seed
+ 12345;
57 return (seed
>> 17) & 0xFF;
60 static ULong
randULong ( void )
64 for (i
= 0; i
< 8; i
++) {
65 r
= (r
<< 8) | (ULong
)(0xFF & randUChar());
70 static void randV128 ( V128
* v
)
73 for (i
= 0; i
< 16; i
++)
74 (*v
)[i
] = randUChar();
77 static void showV128 ( V128
* v
)
80 for (i
= 15; i
>= 0; i
--)
81 printf("%02x", (Int
)(*v
)[i
]);
84 static void showMaskedV128 ( V128
* v
, V128
* mask
)
87 for (i
= 15; i
>= 0; i
--)
88 printf("%02x", (Int
)( ((*v
)[i
]) & ((*mask
)[i
]) ));
91 static void showIGVV( char* rOrM
, char* op
, Int imm
,
92 ULong src64
, V128
* dst
, V128
* res
)
94 printf("%s %10s $%d ", rOrM
, op
, imm
);
95 printf("%016llx", src64
);
103 static void showIAG ( char* rOrM
, char* op
, Int imm
,
104 V128
* argL
, ULong argR
, ULong res
)
106 printf("%s %10s $%d ", rOrM
, op
, imm
);
109 printf("%016llx", argR
);
111 printf("%016llx", res
);
115 static void showIAA ( char* rOrM
, char* op
, Int imm
, RRArgs
* rra
, V128
* rmask
)
117 printf("%s %10s $%d ", rOrM
, op
, imm
);
118 showV128(&rra
->arg1
);
120 showV128(&rra
->arg2
);
122 showMaskedV128(&rra
->res
, rmask
);
126 static void showAA ( char* rOrM
, char* op
, RRArgs
* rra
, V128
* rmask
)
128 printf("%s %10s ", rOrM
, op
);
129 showV128(&rra
->arg1
);
131 showV128(&rra
->arg2
);
133 showMaskedV128(&rra
->res
, rmask
);
137 /* Note: these are little endian. Hence first byte is the least
138 significant byte of lane zero. */
140 /* Mask for insns where all result bits are non-approximated. */
141 static V128 AllMask
= { 0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF,
142 0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF };
144 /* Mark for insns which produce approximated vector short results. */
145 __attribute__((unused
))
146 static V128 ApproxPS
= { 0x00,0x00,0x80,0xFF, 0x00,0x00,0x80,0xFF,
147 0x00,0x00,0x80,0xFF, 0x00,0x00,0x80,0xFF };
149 /* Mark for insns which produce approximated scalar short results. */
150 __attribute__((unused
))
151 static V128 ApproxSS
= { 0x00,0x00,0x80,0xFF, 0xFF,0xFF,0xFF,0xFF,
152 0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF };
154 static V128 fives
= { 0x55,0x55,0x55,0x55, 0x55,0x55,0x55,0x55,
155 0x55,0x55,0x55,0x55, 0x55,0x55,0x55,0x55 };
157 static V128 zeroes
= { 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,
158 0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00 };
160 double mkPosInf ( void ) { return 1.0 / 0.0; }
161 double mkNegInf ( void ) { return -mkPosInf(); }
162 double mkPosNan ( void ) { return 0.0 / 0.0; }
163 double mkNegNan ( void ) { return -mkPosNan(); }
165 __attribute__((noinline
))
166 UInt
get_mxcsr ( void )
169 __asm__
__volatile__(
170 "subq $8, %%rsp" "\n\t"
171 "stmxcsr (%%rsp)" "\n\t"
172 "movq (%%rsp), %0" "\n"
174 : /*OUT*/"=r"(w64
) : /*IN*/ : "memory","cc"
176 if (0) printf("get %08x\n", (UInt
)w64
);
180 __attribute__((noinline
))
181 void set_mxcsr ( UInt w32
)
183 if (0) printf("set %08x\n", w32
);
184 ULong w64
= (ULong
)w32
;
185 __asm__
__volatile__(
186 "subq $8, %%rsp" "\n\t"
187 "movq %0, (%%rsp)" "\n\t"
188 "ldmxcsr (%%rsp)" "\n\t"
190 : /*OUT*/ : /*IN*/"r"(w64
) : "memory",/*"mxcsr",*/"cc"
194 UInt
get_sse_roundingmode ( void )
196 UInt w
= get_mxcsr();
197 return (w
>> 13) & 3;
200 void set_sse_roundingmode ( UInt m
)
203 assert(0 == (m
& ~3));
211 #define DO_imm_r_r(_opname, _imm, _src, _dst) \
214 __asm__ __volatile__( \
215 "movupd (%0), %%xmm2" "\n\t" \
216 "movupd (%1), %%xmm11" "\n\t" \
217 _opname " $" #_imm ", %%xmm2, %%xmm11" "\n\t" \
218 "movupd %%xmm11, (%2)" "\n" \
219 : /*out*/ : /*in*/ "r"(&(_src)), "r"(&(_dst)), "r"(&(_tmp)) \
220 : "cc", "memory", "xmm2", "xmm11" \
223 memcpy(&rra.arg1, &(_src), sizeof(V128)); \
224 memcpy(&rra.arg2, &(_dst), sizeof(V128)); \
225 memcpy(&rra.res, &(_tmp), sizeof(V128)); \
226 showIAA("r", (_opname), (_imm), &rra, &AllMask); \
229 #define DO_imm_m_r(_opname, _imm, _src, _dst) \
232 V128* _srcM = memalign16(sizeof(V128)); \
233 memcpy(_srcM, &(_src), sizeof(V128)); \
234 __asm__ __volatile__( \
235 "movupd (%1), %%xmm11" "\n\t" \
236 _opname " $" #_imm ", (%0), %%xmm11" "\n\t" \
237 "movupd %%xmm11, (%2)" "\n" \
238 : /*out*/ : /*in*/ "r"(_srcM), "r"(&(_dst)), "r"(&(_tmp)) \
239 : "cc", "memory", "xmm11" \
242 memcpy(&rra.arg1, &(_src), sizeof(V128)); \
243 memcpy(&rra.arg2, &(_dst), sizeof(V128)); \
244 memcpy(&rra.res, &(_tmp), sizeof(V128)); \
245 showIAA("m", (_opname), (_imm), &rra, &AllMask); \
249 #define DO_imm_mandr_r(_opname, _imm, _src, _dst) \
250 DO_imm_r_r( _opname, _imm, _src, _dst ) \
251 DO_imm_m_r( _opname, _imm, _src, _dst )
257 #define DO_r_r(_opname, _src, _dst) \
260 __asm__ __volatile__( \
261 "movupd (%0), %%xmm2" "\n\t" \
262 "movupd (%1), %%xmm11" "\n\t" \
263 _opname " %%xmm2, %%xmm11" "\n\t" \
264 "movupd %%xmm11, (%2)" "\n" \
265 : /*out*/ : /*in*/ "r"(&(_src)), "r"(&(_dst)), "r"(&(_tmp)) \
266 : "cc", "memory", "xmm2", "xmm11" \
269 memcpy(&rra.arg1, &(_src), sizeof(V128)); \
270 memcpy(&rra.arg2, &(_dst), sizeof(V128)); \
271 memcpy(&rra.res, &(_tmp), sizeof(V128)); \
272 showAA("r", (_opname), &rra, &AllMask); \
275 #define DO_m_r(_opname, _src, _dst) \
278 V128* _srcM = memalign16(sizeof(V128)); \
279 memcpy(_srcM, &(_src), sizeof(V128)); \
280 __asm__ __volatile__( \
281 "movupd (%1), %%xmm11" "\n\t" \
282 _opname " (%0), %%xmm11" "\n\t" \
283 "movupd %%xmm11, (%2)" "\n" \
284 : /*out*/ : /*in*/ "r"(_srcM), "r"(&(_dst)), "r"(&(_tmp)) \
285 : "cc", "memory", "xmm11" \
288 memcpy(&rra.arg1, &(_src), sizeof(V128)); \
289 memcpy(&rra.arg2, &(_dst), sizeof(V128)); \
290 memcpy(&rra.res, &(_tmp), sizeof(V128)); \
291 showAA("m", (_opname), &rra, &AllMask); \
295 #define DO_mandr_r(_opname, _src, _dst) \
296 DO_r_r(_opname, _src, _dst) \
297 DO_m_r(_opname, _src, _dst)
302 #define DO_imm_r_to_rscalar(_opname, _imm, _src, _dstsuffix) \
304 ULong _scbefore = 0x5555555555555555ULL; \
305 ULong _scafter = 0xAAAAAAAAAAAAAAAAULL; \
306 /* This assumes that gcc won't make any of %0, %1, %2 */ \
307 /* be r11. That should be ensured (cough, cough) */ \
308 /* by declaring r11 to be clobbered. */ \
309 __asm__ __volatile__( \
310 "movupd (%0), %%xmm2" "\n\t" \
311 "movq (%1), %%r11" "\n\t" \
312 _opname " $" #_imm ", %%xmm2, %%r11" _dstsuffix "\n\t" \
313 "movq %%r11, (%2)" "\n" \
315 : /*in*/ "r"(&(_src)), "r"(&(_scbefore)), "r"(&(_scafter)) \
316 : "cc", "memory", "xmm2", "r11" \
318 showIAG("r", (_opname), (_imm), &(_src), (_scbefore), (_scafter)); \
321 #define DO_imm_r_to_mscalar(_opname, _imm, _src) \
323 ULong _scbefore = 0x5555555555555555ULL; \
324 ULong _scafter = _scbefore; \
325 __asm__ __volatile__( \
326 "movupd (%0), %%xmm2" "\n\t" \
327 _opname " $" #_imm ", %%xmm2, (%1)" "\n\t" \
329 : /*in*/ "r"(&(_src)), "r"(&(_scafter)) \
330 : "cc", "memory", "xmm2" \
332 showIAG("m", (_opname), (_imm), &(_src), (_scbefore), (_scafter)); \
335 #define DO_imm_r_to_mandrscalar(_opname, _imm, _src, _dstsuffix) \
336 DO_imm_r_to_rscalar( _opname, _imm, _src, _dstsuffix ) \
337 DO_imm_r_to_mscalar( _opname, _imm, _src )
346 #define DO_imm_rscalar_to_r(_opname, _imm, _src, _srcsuffix) \
350 ULong src64 = (ULong)(_src); \
351 memcpy(dstv, fives, sizeof(dstv)); \
352 memcpy(res, zeroes, sizeof(res)); \
353 /* This assumes that gcc won't make any of %0, %1, %2 */ \
354 /* be r11. That should be ensured (cough, cough) */ \
355 /* by declaring r11 to be clobbered. */ \
356 __asm__ __volatile__( \
357 "movupd (%0), %%xmm2" "\n\t" /*dstv*/ \
358 "movq (%1), %%r11" "\n\t" /*src64*/ \
359 _opname " $" #_imm ", %%r11" _srcsuffix ", %%xmm2" "\n\t" \
360 "movupd %%xmm2, (%2)" "\n" /*res*/ \
362 : /*in*/ "r"(&dstv), "r"(&src64), "r"(&res) \
363 : "cc", "memory", "xmm2", "r11" \
365 showIGVV("r", (_opname), (_imm), src64, &dstv, &res); \
367 #define DO_imm_mscalar_to_r(_opname, _imm, _src) \
371 ULong src64 = (ULong)(_src); \
372 memcpy(dstv, fives, sizeof(dstv)); \
373 memcpy(res, zeroes, sizeof(res)); \
374 __asm__ __volatile__( \
375 "movupd (%0), %%xmm2" "\n\t" /*dstv*/ \
376 _opname " $" #_imm ", (%1), %%xmm2" "\n\t" \
377 "movupd %%xmm2, (%2)" "\n" /*res*/ \
379 : /*in*/ "r"(&dstv), "r"(&src64), "r"(&res) \
380 : "cc", "memory", "xmm2" \
382 showIGVV("m", (_opname), (_imm), src64, &dstv, &res); \
385 #define DO_imm_mandrscalar_to_r(_opname, _imm, _src, _dstsuffix) \
386 DO_imm_rscalar_to_r( _opname, _imm, _src, _dstsuffix ) \
387 DO_imm_mscalar_to_r( _opname, _imm, _src )
393 void test_BLENDPD ( void )
397 for (i
= 0; i
< 10; i
++) {
400 DO_imm_mandr_r("blendpd", 0, src
, dst
);
401 DO_imm_mandr_r("blendpd", 1, src
, dst
);
402 DO_imm_mandr_r("blendpd", 2, src
, dst
);
403 DO_imm_mandr_r("blendpd", 3, src
, dst
);
407 void test_BLENDPS ( void )
411 for (i
= 0; i
< 10; i
++) {
414 DO_imm_mandr_r("blendps", 0, src
, dst
);
415 DO_imm_mandr_r("blendps", 1, src
, dst
);
416 DO_imm_mandr_r("blendps", 2, src
, dst
);
417 DO_imm_mandr_r("blendps", 3, src
, dst
);
418 DO_imm_mandr_r("blendps", 4, src
, dst
);
419 DO_imm_mandr_r("blendps", 5, src
, dst
);
420 DO_imm_mandr_r("blendps", 6, src
, dst
);
421 DO_imm_mandr_r("blendps", 7, src
, dst
);
422 DO_imm_mandr_r("blendps", 8, src
, dst
);
423 DO_imm_mandr_r("blendps", 9, src
, dst
);
424 DO_imm_mandr_r("blendps", 10, src
, dst
);
425 DO_imm_mandr_r("blendps", 11, src
, dst
);
426 DO_imm_mandr_r("blendps", 12, src
, dst
);
427 DO_imm_mandr_r("blendps", 13, src
, dst
);
428 DO_imm_mandr_r("blendps", 14, src
, dst
);
429 DO_imm_mandr_r("blendps", 15, src
, dst
);
433 void test_DPPD ( void )
437 *(double*)(&src
[0]) = 1.2345;
438 *(double*)(&src
[8]) = -6.78910;
439 *(double*)(&dst
[0]) = -11.121314;
440 *(double*)(&dst
[8]) = 15.161718;
441 DO_imm_mandr_r("dppd", 0, src
, dst
);
442 DO_imm_mandr_r("dppd", 1, src
, dst
);
443 DO_imm_mandr_r("dppd", 2, src
, dst
);
444 DO_imm_mandr_r("dppd", 3, src
, dst
);
445 DO_imm_mandr_r("dppd", 4, src
, dst
);
446 DO_imm_mandr_r("dppd", 5, src
, dst
);
447 DO_imm_mandr_r("dppd", 6, src
, dst
);
448 DO_imm_mandr_r("dppd", 7, src
, dst
);
449 DO_imm_mandr_r("dppd", 8, src
, dst
);
450 DO_imm_mandr_r("dppd", 9, src
, dst
);
451 DO_imm_mandr_r("dppd", 10, src
, dst
);
452 DO_imm_mandr_r("dppd", 11, src
, dst
);
453 DO_imm_mandr_r("dppd", 12, src
, dst
);
454 DO_imm_mandr_r("dppd", 13, src
, dst
);
455 DO_imm_mandr_r("dppd", 14, src
, dst
);
456 DO_imm_mandr_r("dppd", 15, src
, dst
);
457 DO_imm_mandr_r("dppd", 16, src
, dst
);
458 DO_imm_mandr_r("dppd", 17, src
, dst
);
459 DO_imm_mandr_r("dppd", 18, src
, dst
);
460 DO_imm_mandr_r("dppd", 19, src
, dst
);
461 DO_imm_mandr_r("dppd", 20, src
, dst
);
462 DO_imm_mandr_r("dppd", 21, src
, dst
);
463 DO_imm_mandr_r("dppd", 22, src
, dst
);
464 DO_imm_mandr_r("dppd", 23, src
, dst
);
465 DO_imm_mandr_r("dppd", 24, src
, dst
);
466 DO_imm_mandr_r("dppd", 25, src
, dst
);
467 DO_imm_mandr_r("dppd", 26, src
, dst
);
468 DO_imm_mandr_r("dppd", 27, src
, dst
);
469 DO_imm_mandr_r("dppd", 28, src
, dst
);
470 DO_imm_mandr_r("dppd", 29, src
, dst
);
471 DO_imm_mandr_r("dppd", 30, src
, dst
);
472 DO_imm_mandr_r("dppd", 31, src
, dst
);
473 DO_imm_mandr_r("dppd", 32, src
, dst
);
474 DO_imm_mandr_r("dppd", 33, src
, dst
);
475 DO_imm_mandr_r("dppd", 34, src
, dst
);
476 DO_imm_mandr_r("dppd", 35, src
, dst
);
477 DO_imm_mandr_r("dppd", 36, src
, dst
);
478 DO_imm_mandr_r("dppd", 37, src
, dst
);
479 DO_imm_mandr_r("dppd", 38, src
, dst
);
480 DO_imm_mandr_r("dppd", 39, src
, dst
);
481 DO_imm_mandr_r("dppd", 40, src
, dst
);
482 DO_imm_mandr_r("dppd", 41, src
, dst
);
483 DO_imm_mandr_r("dppd", 42, src
, dst
);
484 DO_imm_mandr_r("dppd", 43, src
, dst
);
485 DO_imm_mandr_r("dppd", 44, src
, dst
);
486 DO_imm_mandr_r("dppd", 45, src
, dst
);
487 DO_imm_mandr_r("dppd", 46, src
, dst
);
488 DO_imm_mandr_r("dppd", 47, src
, dst
);
489 DO_imm_mandr_r("dppd", 48, src
, dst
);
490 DO_imm_mandr_r("dppd", 49, src
, dst
);
491 DO_imm_mandr_r("dppd", 50, src
, dst
);
492 DO_imm_mandr_r("dppd", 51, src
, dst
);
493 DO_imm_mandr_r("dppd", 52, src
, dst
);
494 DO_imm_mandr_r("dppd", 53, src
, dst
);
495 DO_imm_mandr_r("dppd", 54, src
, dst
);
496 DO_imm_mandr_r("dppd", 55, src
, dst
);
497 DO_imm_mandr_r("dppd", 56, src
, dst
);
498 DO_imm_mandr_r("dppd", 57, src
, dst
);
499 DO_imm_mandr_r("dppd", 58, src
, dst
);
500 DO_imm_mandr_r("dppd", 59, src
, dst
);
501 DO_imm_mandr_r("dppd", 60, src
, dst
);
502 DO_imm_mandr_r("dppd", 61, src
, dst
);
503 DO_imm_mandr_r("dppd", 62, src
, dst
);
504 DO_imm_mandr_r("dppd", 63, src
, dst
);
505 DO_imm_mandr_r("dppd", 64, src
, dst
);
506 DO_imm_mandr_r("dppd", 65, src
, dst
);
507 DO_imm_mandr_r("dppd", 66, src
, dst
);
508 DO_imm_mandr_r("dppd", 67, src
, dst
);
509 DO_imm_mandr_r("dppd", 68, src
, dst
);
510 DO_imm_mandr_r("dppd", 69, src
, dst
);
511 DO_imm_mandr_r("dppd", 70, src
, dst
);
512 DO_imm_mandr_r("dppd", 71, src
, dst
);
513 DO_imm_mandr_r("dppd", 72, src
, dst
);
514 DO_imm_mandr_r("dppd", 73, src
, dst
);
515 DO_imm_mandr_r("dppd", 74, src
, dst
);
516 DO_imm_mandr_r("dppd", 75, src
, dst
);
517 DO_imm_mandr_r("dppd", 76, src
, dst
);
518 DO_imm_mandr_r("dppd", 77, src
, dst
);
519 DO_imm_mandr_r("dppd", 78, src
, dst
);
520 DO_imm_mandr_r("dppd", 79, src
, dst
);
521 DO_imm_mandr_r("dppd", 80, src
, dst
);
522 DO_imm_mandr_r("dppd", 81, src
, dst
);
523 DO_imm_mandr_r("dppd", 82, src
, dst
);
524 DO_imm_mandr_r("dppd", 83, src
, dst
);
525 DO_imm_mandr_r("dppd", 84, src
, dst
);
526 DO_imm_mandr_r("dppd", 85, src
, dst
);
527 DO_imm_mandr_r("dppd", 86, src
, dst
);
528 DO_imm_mandr_r("dppd", 87, src
, dst
);
529 DO_imm_mandr_r("dppd", 88, src
, dst
);
530 DO_imm_mandr_r("dppd", 89, src
, dst
);
531 DO_imm_mandr_r("dppd", 90, src
, dst
);
532 DO_imm_mandr_r("dppd", 91, src
, dst
);
533 DO_imm_mandr_r("dppd", 92, src
, dst
);
534 DO_imm_mandr_r("dppd", 93, src
, dst
);
535 DO_imm_mandr_r("dppd", 94, src
, dst
);
536 DO_imm_mandr_r("dppd", 95, src
, dst
);
537 DO_imm_mandr_r("dppd", 96, src
, dst
);
538 DO_imm_mandr_r("dppd", 97, src
, dst
);
539 DO_imm_mandr_r("dppd", 98, src
, dst
);
540 DO_imm_mandr_r("dppd", 99, src
, dst
);
541 DO_imm_mandr_r("dppd", 100, src
, dst
);
542 DO_imm_mandr_r("dppd", 101, src
, dst
);
543 DO_imm_mandr_r("dppd", 102, src
, dst
);
544 DO_imm_mandr_r("dppd", 103, src
, dst
);
545 DO_imm_mandr_r("dppd", 104, src
, dst
);
546 DO_imm_mandr_r("dppd", 105, src
, dst
);
547 DO_imm_mandr_r("dppd", 106, src
, dst
);
548 DO_imm_mandr_r("dppd", 107, src
, dst
);
549 DO_imm_mandr_r("dppd", 108, src
, dst
);
550 DO_imm_mandr_r("dppd", 109, src
, dst
);
551 DO_imm_mandr_r("dppd", 110, src
, dst
);
552 DO_imm_mandr_r("dppd", 111, src
, dst
);
553 DO_imm_mandr_r("dppd", 112, src
, dst
);
554 DO_imm_mandr_r("dppd", 113, src
, dst
);
555 DO_imm_mandr_r("dppd", 114, src
, dst
);
556 DO_imm_mandr_r("dppd", 115, src
, dst
);
557 DO_imm_mandr_r("dppd", 116, src
, dst
);
558 DO_imm_mandr_r("dppd", 117, src
, dst
);
559 DO_imm_mandr_r("dppd", 118, src
, dst
);
560 DO_imm_mandr_r("dppd", 119, src
, dst
);
561 DO_imm_mandr_r("dppd", 120, src
, dst
);
562 DO_imm_mandr_r("dppd", 121, src
, dst
);
563 DO_imm_mandr_r("dppd", 122, src
, dst
);
564 DO_imm_mandr_r("dppd", 123, src
, dst
);
565 DO_imm_mandr_r("dppd", 124, src
, dst
);
566 DO_imm_mandr_r("dppd", 125, src
, dst
);
567 DO_imm_mandr_r("dppd", 126, src
, dst
);
568 DO_imm_mandr_r("dppd", 127, src
, dst
);
569 DO_imm_mandr_r("dppd", 128, src
, dst
);
570 DO_imm_mandr_r("dppd", 129, src
, dst
);
571 DO_imm_mandr_r("dppd", 130, src
, dst
);
572 DO_imm_mandr_r("dppd", 131, src
, dst
);
573 DO_imm_mandr_r("dppd", 132, src
, dst
);
574 DO_imm_mandr_r("dppd", 133, src
, dst
);
575 DO_imm_mandr_r("dppd", 134, src
, dst
);
576 DO_imm_mandr_r("dppd", 135, src
, dst
);
577 DO_imm_mandr_r("dppd", 136, src
, dst
);
578 DO_imm_mandr_r("dppd", 137, src
, dst
);
579 DO_imm_mandr_r("dppd", 138, src
, dst
);
580 DO_imm_mandr_r("dppd", 139, src
, dst
);
581 DO_imm_mandr_r("dppd", 140, src
, dst
);
582 DO_imm_mandr_r("dppd", 141, src
, dst
);
583 DO_imm_mandr_r("dppd", 142, src
, dst
);
584 DO_imm_mandr_r("dppd", 143, src
, dst
);
585 DO_imm_mandr_r("dppd", 144, src
, dst
);
586 DO_imm_mandr_r("dppd", 145, src
, dst
);
587 DO_imm_mandr_r("dppd", 146, src
, dst
);
588 DO_imm_mandr_r("dppd", 147, src
, dst
);
589 DO_imm_mandr_r("dppd", 148, src
, dst
);
590 DO_imm_mandr_r("dppd", 149, src
, dst
);
591 DO_imm_mandr_r("dppd", 150, src
, dst
);
592 DO_imm_mandr_r("dppd", 151, src
, dst
);
593 DO_imm_mandr_r("dppd", 152, src
, dst
);
594 DO_imm_mandr_r("dppd", 153, src
, dst
);
595 DO_imm_mandr_r("dppd", 154, src
, dst
);
596 DO_imm_mandr_r("dppd", 155, src
, dst
);
597 DO_imm_mandr_r("dppd", 156, src
, dst
);
598 DO_imm_mandr_r("dppd", 157, src
, dst
);
599 DO_imm_mandr_r("dppd", 158, src
, dst
);
600 DO_imm_mandr_r("dppd", 159, src
, dst
);
601 DO_imm_mandr_r("dppd", 160, src
, dst
);
602 DO_imm_mandr_r("dppd", 161, src
, dst
);
603 DO_imm_mandr_r("dppd", 162, src
, dst
);
604 DO_imm_mandr_r("dppd", 163, src
, dst
);
605 DO_imm_mandr_r("dppd", 164, src
, dst
);
606 DO_imm_mandr_r("dppd", 165, src
, dst
);
607 DO_imm_mandr_r("dppd", 166, src
, dst
);
608 DO_imm_mandr_r("dppd", 167, src
, dst
);
609 DO_imm_mandr_r("dppd", 168, src
, dst
);
610 DO_imm_mandr_r("dppd", 169, src
, dst
);
611 DO_imm_mandr_r("dppd", 170, src
, dst
);
612 DO_imm_mandr_r("dppd", 171, src
, dst
);
613 DO_imm_mandr_r("dppd", 172, src
, dst
);
614 DO_imm_mandr_r("dppd", 173, src
, dst
);
615 DO_imm_mandr_r("dppd", 174, src
, dst
);
616 DO_imm_mandr_r("dppd", 175, src
, dst
);
617 DO_imm_mandr_r("dppd", 176, src
, dst
);
618 DO_imm_mandr_r("dppd", 177, src
, dst
);
619 DO_imm_mandr_r("dppd", 178, src
, dst
);
620 DO_imm_mandr_r("dppd", 179, src
, dst
);
621 DO_imm_mandr_r("dppd", 180, src
, dst
);
622 DO_imm_mandr_r("dppd", 181, src
, dst
);
623 DO_imm_mandr_r("dppd", 182, src
, dst
);
624 DO_imm_mandr_r("dppd", 183, src
, dst
);
625 DO_imm_mandr_r("dppd", 184, src
, dst
);
626 DO_imm_mandr_r("dppd", 185, src
, dst
);
627 DO_imm_mandr_r("dppd", 186, src
, dst
);
628 DO_imm_mandr_r("dppd", 187, src
, dst
);
629 DO_imm_mandr_r("dppd", 188, src
, dst
);
630 DO_imm_mandr_r("dppd", 189, src
, dst
);
631 DO_imm_mandr_r("dppd", 190, src
, dst
);
632 DO_imm_mandr_r("dppd", 191, src
, dst
);
633 DO_imm_mandr_r("dppd", 192, src
, dst
);
634 DO_imm_mandr_r("dppd", 193, src
, dst
);
635 DO_imm_mandr_r("dppd", 194, src
, dst
);
636 DO_imm_mandr_r("dppd", 195, src
, dst
);
637 DO_imm_mandr_r("dppd", 196, src
, dst
);
638 DO_imm_mandr_r("dppd", 197, src
, dst
);
639 DO_imm_mandr_r("dppd", 198, src
, dst
);
640 DO_imm_mandr_r("dppd", 199, src
, dst
);
641 DO_imm_mandr_r("dppd", 200, src
, dst
);
642 DO_imm_mandr_r("dppd", 201, src
, dst
);
643 DO_imm_mandr_r("dppd", 202, src
, dst
);
644 DO_imm_mandr_r("dppd", 203, src
, dst
);
645 DO_imm_mandr_r("dppd", 204, src
, dst
);
646 DO_imm_mandr_r("dppd", 205, src
, dst
);
647 DO_imm_mandr_r("dppd", 206, src
, dst
);
648 DO_imm_mandr_r("dppd", 207, src
, dst
);
649 DO_imm_mandr_r("dppd", 208, src
, dst
);
650 DO_imm_mandr_r("dppd", 209, src
, dst
);
651 DO_imm_mandr_r("dppd", 210, src
, dst
);
652 DO_imm_mandr_r("dppd", 211, src
, dst
);
653 DO_imm_mandr_r("dppd", 212, src
, dst
);
654 DO_imm_mandr_r("dppd", 213, src
, dst
);
655 DO_imm_mandr_r("dppd", 214, src
, dst
);
656 DO_imm_mandr_r("dppd", 215, src
, dst
);
657 DO_imm_mandr_r("dppd", 216, src
, dst
);
658 DO_imm_mandr_r("dppd", 217, src
, dst
);
659 DO_imm_mandr_r("dppd", 218, src
, dst
);
660 DO_imm_mandr_r("dppd", 219, src
, dst
);
661 DO_imm_mandr_r("dppd", 220, src
, dst
);
662 DO_imm_mandr_r("dppd", 221, src
, dst
);
663 DO_imm_mandr_r("dppd", 222, src
, dst
);
664 DO_imm_mandr_r("dppd", 223, src
, dst
);
665 DO_imm_mandr_r("dppd", 224, src
, dst
);
666 DO_imm_mandr_r("dppd", 225, src
, dst
);
667 DO_imm_mandr_r("dppd", 226, src
, dst
);
668 DO_imm_mandr_r("dppd", 227, src
, dst
);
669 DO_imm_mandr_r("dppd", 228, src
, dst
);
670 DO_imm_mandr_r("dppd", 229, src
, dst
);
671 DO_imm_mandr_r("dppd", 230, src
, dst
);
672 DO_imm_mandr_r("dppd", 231, src
, dst
);
673 DO_imm_mandr_r("dppd", 232, src
, dst
);
674 DO_imm_mandr_r("dppd", 233, src
, dst
);
675 DO_imm_mandr_r("dppd", 234, src
, dst
);
676 DO_imm_mandr_r("dppd", 235, src
, dst
);
677 DO_imm_mandr_r("dppd", 236, src
, dst
);
678 DO_imm_mandr_r("dppd", 237, src
, dst
);
679 DO_imm_mandr_r("dppd", 238, src
, dst
);
680 DO_imm_mandr_r("dppd", 239, src
, dst
);
681 DO_imm_mandr_r("dppd", 240, src
, dst
);
682 DO_imm_mandr_r("dppd", 241, src
, dst
);
683 DO_imm_mandr_r("dppd", 242, src
, dst
);
684 DO_imm_mandr_r("dppd", 243, src
, dst
);
685 DO_imm_mandr_r("dppd", 244, src
, dst
);
686 DO_imm_mandr_r("dppd", 245, src
, dst
);
687 DO_imm_mandr_r("dppd", 246, src
, dst
);
688 DO_imm_mandr_r("dppd", 247, src
, dst
);
689 DO_imm_mandr_r("dppd", 248, src
, dst
);
690 DO_imm_mandr_r("dppd", 249, src
, dst
);
691 DO_imm_mandr_r("dppd", 250, src
, dst
);
692 DO_imm_mandr_r("dppd", 251, src
, dst
);
693 DO_imm_mandr_r("dppd", 252, src
, dst
);
694 DO_imm_mandr_r("dppd", 253, src
, dst
);
695 DO_imm_mandr_r("dppd", 254, src
, dst
);
696 DO_imm_mandr_r("dppd", 255, src
, dst
);
700 void test_DPPS ( void )
704 *(float*)(&src
[0]) = 1.2;
705 *(float*)(&src
[4]) = -3.4;
706 *(float*)(&src
[8]) = -6.7;
707 *(float*)(&src
[12]) = 8.9;
708 *(float*)(&dst
[0]) = -10.11;
709 *(float*)(&dst
[4]) = 12.13;
710 *(float*)(&dst
[8]) = 14.15;
711 *(float*)(&dst
[12]) = -16.17;
712 DO_imm_mandr_r("dpps", 0, src
, dst
);
713 DO_imm_mandr_r("dpps", 1, src
, dst
);
714 DO_imm_mandr_r("dpps", 2, src
, dst
);
715 DO_imm_mandr_r("dpps", 3, src
, dst
);
716 DO_imm_mandr_r("dpps", 4, src
, dst
);
717 DO_imm_mandr_r("dpps", 5, src
, dst
);
718 DO_imm_mandr_r("dpps", 6, src
, dst
);
719 DO_imm_mandr_r("dpps", 7, src
, dst
);
720 DO_imm_mandr_r("dpps", 8, src
, dst
);
721 DO_imm_mandr_r("dpps", 9, src
, dst
);
722 DO_imm_mandr_r("dpps", 10, src
, dst
);
723 DO_imm_mandr_r("dpps", 11, src
, dst
);
724 DO_imm_mandr_r("dpps", 12, src
, dst
);
725 DO_imm_mandr_r("dpps", 13, src
, dst
);
726 DO_imm_mandr_r("dpps", 14, src
, dst
);
727 DO_imm_mandr_r("dpps", 15, src
, dst
);
728 DO_imm_mandr_r("dpps", 16, src
, dst
);
729 DO_imm_mandr_r("dpps", 17, src
, dst
);
730 DO_imm_mandr_r("dpps", 18, src
, dst
);
731 DO_imm_mandr_r("dpps", 19, src
, dst
);
732 DO_imm_mandr_r("dpps", 20, src
, dst
);
733 DO_imm_mandr_r("dpps", 21, src
, dst
);
734 DO_imm_mandr_r("dpps", 22, src
, dst
);
735 DO_imm_mandr_r("dpps", 23, src
, dst
);
736 DO_imm_mandr_r("dpps", 24, src
, dst
);
737 DO_imm_mandr_r("dpps", 25, src
, dst
);
738 DO_imm_mandr_r("dpps", 26, src
, dst
);
739 DO_imm_mandr_r("dpps", 27, src
, dst
);
740 DO_imm_mandr_r("dpps", 28, src
, dst
);
741 DO_imm_mandr_r("dpps", 29, src
, dst
);
742 DO_imm_mandr_r("dpps", 30, src
, dst
);
743 DO_imm_mandr_r("dpps", 31, src
, dst
);
744 DO_imm_mandr_r("dpps", 32, src
, dst
);
745 DO_imm_mandr_r("dpps", 33, src
, dst
);
746 DO_imm_mandr_r("dpps", 34, src
, dst
);
747 DO_imm_mandr_r("dpps", 35, src
, dst
);
748 DO_imm_mandr_r("dpps", 36, src
, dst
);
749 DO_imm_mandr_r("dpps", 37, src
, dst
);
750 DO_imm_mandr_r("dpps", 38, src
, dst
);
751 DO_imm_mandr_r("dpps", 39, src
, dst
);
752 DO_imm_mandr_r("dpps", 40, src
, dst
);
753 DO_imm_mandr_r("dpps", 41, src
, dst
);
754 DO_imm_mandr_r("dpps", 42, src
, dst
);
755 DO_imm_mandr_r("dpps", 43, src
, dst
);
756 DO_imm_mandr_r("dpps", 44, src
, dst
);
757 DO_imm_mandr_r("dpps", 45, src
, dst
);
758 DO_imm_mandr_r("dpps", 46, src
, dst
);
759 DO_imm_mandr_r("dpps", 47, src
, dst
);
760 DO_imm_mandr_r("dpps", 48, src
, dst
);
761 DO_imm_mandr_r("dpps", 49, src
, dst
);
762 DO_imm_mandr_r("dpps", 50, src
, dst
);
763 DO_imm_mandr_r("dpps", 51, src
, dst
);
764 DO_imm_mandr_r("dpps", 52, src
, dst
);
765 DO_imm_mandr_r("dpps", 53, src
, dst
);
766 DO_imm_mandr_r("dpps", 54, src
, dst
);
767 DO_imm_mandr_r("dpps", 55, src
, dst
);
768 DO_imm_mandr_r("dpps", 56, src
, dst
);
769 DO_imm_mandr_r("dpps", 57, src
, dst
);
770 DO_imm_mandr_r("dpps", 58, src
, dst
);
771 DO_imm_mandr_r("dpps", 59, src
, dst
);
772 DO_imm_mandr_r("dpps", 60, src
, dst
);
773 DO_imm_mandr_r("dpps", 61, src
, dst
);
774 DO_imm_mandr_r("dpps", 62, src
, dst
);
775 DO_imm_mandr_r("dpps", 63, src
, dst
);
776 DO_imm_mandr_r("dpps", 64, src
, dst
);
777 DO_imm_mandr_r("dpps", 65, src
, dst
);
778 DO_imm_mandr_r("dpps", 66, src
, dst
);
779 DO_imm_mandr_r("dpps", 67, src
, dst
);
780 DO_imm_mandr_r("dpps", 68, src
, dst
);
781 DO_imm_mandr_r("dpps", 69, src
, dst
);
782 DO_imm_mandr_r("dpps", 70, src
, dst
);
783 DO_imm_mandr_r("dpps", 71, src
, dst
);
784 DO_imm_mandr_r("dpps", 72, src
, dst
);
785 DO_imm_mandr_r("dpps", 73, src
, dst
);
786 DO_imm_mandr_r("dpps", 74, src
, dst
);
787 DO_imm_mandr_r("dpps", 75, src
, dst
);
788 DO_imm_mandr_r("dpps", 76, src
, dst
);
789 DO_imm_mandr_r("dpps", 77, src
, dst
);
790 DO_imm_mandr_r("dpps", 78, src
, dst
);
791 DO_imm_mandr_r("dpps", 79, src
, dst
);
792 DO_imm_mandr_r("dpps", 80, src
, dst
);
793 DO_imm_mandr_r("dpps", 81, src
, dst
);
794 DO_imm_mandr_r("dpps", 82, src
, dst
);
795 DO_imm_mandr_r("dpps", 83, src
, dst
);
796 DO_imm_mandr_r("dpps", 84, src
, dst
);
797 DO_imm_mandr_r("dpps", 85, src
, dst
);
798 DO_imm_mandr_r("dpps", 86, src
, dst
);
799 DO_imm_mandr_r("dpps", 87, src
, dst
);
800 DO_imm_mandr_r("dpps", 88, src
, dst
);
801 DO_imm_mandr_r("dpps", 89, src
, dst
);
802 DO_imm_mandr_r("dpps", 90, src
, dst
);
803 DO_imm_mandr_r("dpps", 91, src
, dst
);
804 DO_imm_mandr_r("dpps", 92, src
, dst
);
805 DO_imm_mandr_r("dpps", 93, src
, dst
);
806 DO_imm_mandr_r("dpps", 94, src
, dst
);
807 DO_imm_mandr_r("dpps", 95, src
, dst
);
808 DO_imm_mandr_r("dpps", 96, src
, dst
);
809 DO_imm_mandr_r("dpps", 97, src
, dst
);
810 DO_imm_mandr_r("dpps", 98, src
, dst
);
811 DO_imm_mandr_r("dpps", 99, src
, dst
);
812 DO_imm_mandr_r("dpps", 100, src
, dst
);
813 DO_imm_mandr_r("dpps", 101, src
, dst
);
814 DO_imm_mandr_r("dpps", 102, src
, dst
);
815 DO_imm_mandr_r("dpps", 103, src
, dst
);
816 DO_imm_mandr_r("dpps", 104, src
, dst
);
817 DO_imm_mandr_r("dpps", 105, src
, dst
);
818 DO_imm_mandr_r("dpps", 106, src
, dst
);
819 DO_imm_mandr_r("dpps", 107, src
, dst
);
820 DO_imm_mandr_r("dpps", 108, src
, dst
);
821 DO_imm_mandr_r("dpps", 109, src
, dst
);
822 DO_imm_mandr_r("dpps", 110, src
, dst
);
823 DO_imm_mandr_r("dpps", 111, src
, dst
);
824 DO_imm_mandr_r("dpps", 112, src
, dst
);
825 DO_imm_mandr_r("dpps", 113, src
, dst
);
826 DO_imm_mandr_r("dpps", 114, src
, dst
);
827 DO_imm_mandr_r("dpps", 115, src
, dst
);
828 DO_imm_mandr_r("dpps", 116, src
, dst
);
829 DO_imm_mandr_r("dpps", 117, src
, dst
);
830 DO_imm_mandr_r("dpps", 118, src
, dst
);
831 DO_imm_mandr_r("dpps", 119, src
, dst
);
832 DO_imm_mandr_r("dpps", 120, src
, dst
);
833 DO_imm_mandr_r("dpps", 121, src
, dst
);
834 DO_imm_mandr_r("dpps", 122, src
, dst
);
835 DO_imm_mandr_r("dpps", 123, src
, dst
);
836 DO_imm_mandr_r("dpps", 124, src
, dst
);
837 DO_imm_mandr_r("dpps", 125, src
, dst
);
838 DO_imm_mandr_r("dpps", 126, src
, dst
);
839 DO_imm_mandr_r("dpps", 127, src
, dst
);
840 DO_imm_mandr_r("dpps", 128, src
, dst
);
841 DO_imm_mandr_r("dpps", 129, src
, dst
);
842 DO_imm_mandr_r("dpps", 130, src
, dst
);
843 DO_imm_mandr_r("dpps", 131, src
, dst
);
844 DO_imm_mandr_r("dpps", 132, src
, dst
);
845 DO_imm_mandr_r("dpps", 133, src
, dst
);
846 DO_imm_mandr_r("dpps", 134, src
, dst
);
847 DO_imm_mandr_r("dpps", 135, src
, dst
);
848 DO_imm_mandr_r("dpps", 136, src
, dst
);
849 DO_imm_mandr_r("dpps", 137, src
, dst
);
850 DO_imm_mandr_r("dpps", 138, src
, dst
);
851 DO_imm_mandr_r("dpps", 139, src
, dst
);
852 DO_imm_mandr_r("dpps", 140, src
, dst
);
853 DO_imm_mandr_r("dpps", 141, src
, dst
);
854 DO_imm_mandr_r("dpps", 142, src
, dst
);
855 DO_imm_mandr_r("dpps", 143, src
, dst
);
856 DO_imm_mandr_r("dpps", 144, src
, dst
);
857 DO_imm_mandr_r("dpps", 145, src
, dst
);
858 DO_imm_mandr_r("dpps", 146, src
, dst
);
859 DO_imm_mandr_r("dpps", 147, src
, dst
);
860 DO_imm_mandr_r("dpps", 148, src
, dst
);
861 DO_imm_mandr_r("dpps", 149, src
, dst
);
862 DO_imm_mandr_r("dpps", 150, src
, dst
);
863 DO_imm_mandr_r("dpps", 151, src
, dst
);
864 DO_imm_mandr_r("dpps", 152, src
, dst
);
865 DO_imm_mandr_r("dpps", 153, src
, dst
);
866 DO_imm_mandr_r("dpps", 154, src
, dst
);
867 DO_imm_mandr_r("dpps", 155, src
, dst
);
868 DO_imm_mandr_r("dpps", 156, src
, dst
);
869 DO_imm_mandr_r("dpps", 157, src
, dst
);
870 DO_imm_mandr_r("dpps", 158, src
, dst
);
871 DO_imm_mandr_r("dpps", 159, src
, dst
);
872 DO_imm_mandr_r("dpps", 160, src
, dst
);
873 DO_imm_mandr_r("dpps", 161, src
, dst
);
874 DO_imm_mandr_r("dpps", 162, src
, dst
);
875 DO_imm_mandr_r("dpps", 163, src
, dst
);
876 DO_imm_mandr_r("dpps", 164, src
, dst
);
877 DO_imm_mandr_r("dpps", 165, src
, dst
);
878 DO_imm_mandr_r("dpps", 166, src
, dst
);
879 DO_imm_mandr_r("dpps", 167, src
, dst
);
880 DO_imm_mandr_r("dpps", 168, src
, dst
);
881 DO_imm_mandr_r("dpps", 169, src
, dst
);
882 DO_imm_mandr_r("dpps", 170, src
, dst
);
883 DO_imm_mandr_r("dpps", 171, src
, dst
);
884 DO_imm_mandr_r("dpps", 172, src
, dst
);
885 DO_imm_mandr_r("dpps", 173, src
, dst
);
886 DO_imm_mandr_r("dpps", 174, src
, dst
);
887 DO_imm_mandr_r("dpps", 175, src
, dst
);
888 DO_imm_mandr_r("dpps", 176, src
, dst
);
889 DO_imm_mandr_r("dpps", 177, src
, dst
);
890 DO_imm_mandr_r("dpps", 178, src
, dst
);
891 DO_imm_mandr_r("dpps", 179, src
, dst
);
892 DO_imm_mandr_r("dpps", 180, src
, dst
);
893 DO_imm_mandr_r("dpps", 181, src
, dst
);
894 DO_imm_mandr_r("dpps", 182, src
, dst
);
895 DO_imm_mandr_r("dpps", 183, src
, dst
);
896 DO_imm_mandr_r("dpps", 184, src
, dst
);
897 DO_imm_mandr_r("dpps", 185, src
, dst
);
898 DO_imm_mandr_r("dpps", 186, src
, dst
);
899 DO_imm_mandr_r("dpps", 187, src
, dst
);
900 DO_imm_mandr_r("dpps", 188, src
, dst
);
901 DO_imm_mandr_r("dpps", 189, src
, dst
);
902 DO_imm_mandr_r("dpps", 190, src
, dst
);
903 DO_imm_mandr_r("dpps", 191, src
, dst
);
904 DO_imm_mandr_r("dpps", 192, src
, dst
);
905 DO_imm_mandr_r("dpps", 193, src
, dst
);
906 DO_imm_mandr_r("dpps", 194, src
, dst
);
907 DO_imm_mandr_r("dpps", 195, src
, dst
);
908 DO_imm_mandr_r("dpps", 196, src
, dst
);
909 DO_imm_mandr_r("dpps", 197, src
, dst
);
910 DO_imm_mandr_r("dpps", 198, src
, dst
);
911 DO_imm_mandr_r("dpps", 199, src
, dst
);
912 DO_imm_mandr_r("dpps", 200, src
, dst
);
913 DO_imm_mandr_r("dpps", 201, src
, dst
);
914 DO_imm_mandr_r("dpps", 202, src
, dst
);
915 DO_imm_mandr_r("dpps", 203, src
, dst
);
916 DO_imm_mandr_r("dpps", 204, src
, dst
);
917 DO_imm_mandr_r("dpps", 205, src
, dst
);
918 DO_imm_mandr_r("dpps", 206, src
, dst
);
919 DO_imm_mandr_r("dpps", 207, src
, dst
);
920 DO_imm_mandr_r("dpps", 208, src
, dst
);
921 DO_imm_mandr_r("dpps", 209, src
, dst
);
922 DO_imm_mandr_r("dpps", 210, src
, dst
);
923 DO_imm_mandr_r("dpps", 211, src
, dst
);
924 DO_imm_mandr_r("dpps", 212, src
, dst
);
925 DO_imm_mandr_r("dpps", 213, src
, dst
);
926 DO_imm_mandr_r("dpps", 214, src
, dst
);
927 DO_imm_mandr_r("dpps", 215, src
, dst
);
928 DO_imm_mandr_r("dpps", 216, src
, dst
);
929 DO_imm_mandr_r("dpps", 217, src
, dst
);
930 DO_imm_mandr_r("dpps", 218, src
, dst
);
931 DO_imm_mandr_r("dpps", 219, src
, dst
);
932 DO_imm_mandr_r("dpps", 220, src
, dst
);
933 DO_imm_mandr_r("dpps", 221, src
, dst
);
934 DO_imm_mandr_r("dpps", 222, src
, dst
);
935 DO_imm_mandr_r("dpps", 223, src
, dst
);
936 DO_imm_mandr_r("dpps", 224, src
, dst
);
937 DO_imm_mandr_r("dpps", 225, src
, dst
);
938 DO_imm_mandr_r("dpps", 226, src
, dst
);
939 DO_imm_mandr_r("dpps", 227, src
, dst
);
940 DO_imm_mandr_r("dpps", 228, src
, dst
);
941 DO_imm_mandr_r("dpps", 229, src
, dst
);
942 DO_imm_mandr_r("dpps", 230, src
, dst
);
943 DO_imm_mandr_r("dpps", 231, src
, dst
);
944 DO_imm_mandr_r("dpps", 232, src
, dst
);
945 DO_imm_mandr_r("dpps", 233, src
, dst
);
946 DO_imm_mandr_r("dpps", 234, src
, dst
);
947 DO_imm_mandr_r("dpps", 235, src
, dst
);
948 DO_imm_mandr_r("dpps", 236, src
, dst
);
949 DO_imm_mandr_r("dpps", 237, src
, dst
);
950 DO_imm_mandr_r("dpps", 238, src
, dst
);
951 DO_imm_mandr_r("dpps", 239, src
, dst
);
952 DO_imm_mandr_r("dpps", 240, src
, dst
);
953 DO_imm_mandr_r("dpps", 241, src
, dst
);
954 DO_imm_mandr_r("dpps", 242, src
, dst
);
955 DO_imm_mandr_r("dpps", 243, src
, dst
);
956 DO_imm_mandr_r("dpps", 244, src
, dst
);
957 DO_imm_mandr_r("dpps", 245, src
, dst
);
958 DO_imm_mandr_r("dpps", 246, src
, dst
);
959 DO_imm_mandr_r("dpps", 247, src
, dst
);
960 DO_imm_mandr_r("dpps", 248, src
, dst
);
961 DO_imm_mandr_r("dpps", 249, src
, dst
);
962 DO_imm_mandr_r("dpps", 250, src
, dst
);
963 DO_imm_mandr_r("dpps", 251, src
, dst
);
964 DO_imm_mandr_r("dpps", 252, src
, dst
);
965 DO_imm_mandr_r("dpps", 253, src
, dst
);
966 DO_imm_mandr_r("dpps", 254, src
, dst
);
967 DO_imm_mandr_r("dpps", 255, src
, dst
);
971 void test_INSERTPS ( void )
975 *(float*)(&src
[0]) = 1.2;
976 *(float*)(&src
[4]) = -3.4;
977 *(float*)(&src
[8]) = -6.7;
978 *(float*)(&src
[12]) = 8.9;
979 *(float*)(&dst
[0]) = -10.11;
980 *(float*)(&dst
[4]) = 12.13;
981 *(float*)(&dst
[8]) = 14.15;
982 *(float*)(&dst
[12]) = -16.17;
983 DO_imm_mandr_r("insertps", 0, src
, dst
);
984 DO_imm_mandr_r("insertps", 1, src
, dst
);
985 DO_imm_mandr_r("insertps", 2, src
, dst
);
986 DO_imm_mandr_r("insertps", 3, src
, dst
);
987 DO_imm_mandr_r("insertps", 4, src
, dst
);
988 DO_imm_mandr_r("insertps", 5, src
, dst
);
989 DO_imm_mandr_r("insertps", 6, src
, dst
);
990 DO_imm_mandr_r("insertps", 7, src
, dst
);
991 DO_imm_mandr_r("insertps", 8, src
, dst
);
992 DO_imm_mandr_r("insertps", 9, src
, dst
);
993 DO_imm_mandr_r("insertps", 10, src
, dst
);
994 DO_imm_mandr_r("insertps", 11, src
, dst
);
995 DO_imm_mandr_r("insertps", 12, src
, dst
);
996 DO_imm_mandr_r("insertps", 13, src
, dst
);
997 DO_imm_mandr_r("insertps", 14, src
, dst
);
998 DO_imm_mandr_r("insertps", 15, src
, dst
);
999 DO_imm_mandr_r("insertps", 16, src
, dst
);
1000 DO_imm_mandr_r("insertps", 17, src
, dst
);
1001 DO_imm_mandr_r("insertps", 18, src
, dst
);
1002 DO_imm_mandr_r("insertps", 19, src
, dst
);
1003 DO_imm_mandr_r("insertps", 20, src
, dst
);
1004 DO_imm_mandr_r("insertps", 21, src
, dst
);
1005 DO_imm_mandr_r("insertps", 22, src
, dst
);
1006 DO_imm_mandr_r("insertps", 23, src
, dst
);
1007 DO_imm_mandr_r("insertps", 24, src
, dst
);
1008 DO_imm_mandr_r("insertps", 25, src
, dst
);
1009 DO_imm_mandr_r("insertps", 26, src
, dst
);
1010 DO_imm_mandr_r("insertps", 27, src
, dst
);
1011 DO_imm_mandr_r("insertps", 28, src
, dst
);
1012 DO_imm_mandr_r("insertps", 29, src
, dst
);
1013 DO_imm_mandr_r("insertps", 30, src
, dst
);
1014 DO_imm_mandr_r("insertps", 31, src
, dst
);
1015 DO_imm_mandr_r("insertps", 32, src
, dst
);
1016 DO_imm_mandr_r("insertps", 33, src
, dst
);
1017 DO_imm_mandr_r("insertps", 34, src
, dst
);
1018 DO_imm_mandr_r("insertps", 35, src
, dst
);
1019 DO_imm_mandr_r("insertps", 36, src
, dst
);
1020 DO_imm_mandr_r("insertps", 37, src
, dst
);
1021 DO_imm_mandr_r("insertps", 38, src
, dst
);
1022 DO_imm_mandr_r("insertps", 39, src
, dst
);
1023 DO_imm_mandr_r("insertps", 40, src
, dst
);
1024 DO_imm_mandr_r("insertps", 41, src
, dst
);
1025 DO_imm_mandr_r("insertps", 42, src
, dst
);
1026 DO_imm_mandr_r("insertps", 43, src
, dst
);
1027 DO_imm_mandr_r("insertps", 44, src
, dst
);
1028 DO_imm_mandr_r("insertps", 45, src
, dst
);
1029 DO_imm_mandr_r("insertps", 46, src
, dst
);
1030 DO_imm_mandr_r("insertps", 47, src
, dst
);
1031 DO_imm_mandr_r("insertps", 48, src
, dst
);
1032 DO_imm_mandr_r("insertps", 49, src
, dst
);
1033 DO_imm_mandr_r("insertps", 50, src
, dst
);
1034 DO_imm_mandr_r("insertps", 51, src
, dst
);
1035 DO_imm_mandr_r("insertps", 52, src
, dst
);
1036 DO_imm_mandr_r("insertps", 53, src
, dst
);
1037 DO_imm_mandr_r("insertps", 54, src
, dst
);
1038 DO_imm_mandr_r("insertps", 55, src
, dst
);
1039 DO_imm_mandr_r("insertps", 56, src
, dst
);
1040 DO_imm_mandr_r("insertps", 57, src
, dst
);
1041 DO_imm_mandr_r("insertps", 58, src
, dst
);
1042 DO_imm_mandr_r("insertps", 59, src
, dst
);
1043 DO_imm_mandr_r("insertps", 60, src
, dst
);
1044 DO_imm_mandr_r("insertps", 61, src
, dst
);
1045 DO_imm_mandr_r("insertps", 62, src
, dst
);
1046 DO_imm_mandr_r("insertps", 63, src
, dst
);
1047 DO_imm_mandr_r("insertps", 64, src
, dst
);
1048 DO_imm_mandr_r("insertps", 65, src
, dst
);
1049 DO_imm_mandr_r("insertps", 66, src
, dst
);
1050 DO_imm_mandr_r("insertps", 67, src
, dst
);
1051 DO_imm_mandr_r("insertps", 68, src
, dst
);
1052 DO_imm_mandr_r("insertps", 69, src
, dst
);
1053 DO_imm_mandr_r("insertps", 70, src
, dst
);
1054 DO_imm_mandr_r("insertps", 71, src
, dst
);
1055 DO_imm_mandr_r("insertps", 72, src
, dst
);
1056 DO_imm_mandr_r("insertps", 73, src
, dst
);
1057 DO_imm_mandr_r("insertps", 74, src
, dst
);
1058 DO_imm_mandr_r("insertps", 75, src
, dst
);
1059 DO_imm_mandr_r("insertps", 76, src
, dst
);
1060 DO_imm_mandr_r("insertps", 77, src
, dst
);
1061 DO_imm_mandr_r("insertps", 78, src
, dst
);
1062 DO_imm_mandr_r("insertps", 79, src
, dst
);
1063 DO_imm_mandr_r("insertps", 80, src
, dst
);
1064 DO_imm_mandr_r("insertps", 81, src
, dst
);
1065 DO_imm_mandr_r("insertps", 82, src
, dst
);
1066 DO_imm_mandr_r("insertps", 83, src
, dst
);
1067 DO_imm_mandr_r("insertps", 84, src
, dst
);
1068 DO_imm_mandr_r("insertps", 85, src
, dst
);
1069 DO_imm_mandr_r("insertps", 86, src
, dst
);
1070 DO_imm_mandr_r("insertps", 87, src
, dst
);
1071 DO_imm_mandr_r("insertps", 88, src
, dst
);
1072 DO_imm_mandr_r("insertps", 89, src
, dst
);
1073 DO_imm_mandr_r("insertps", 90, src
, dst
);
1074 DO_imm_mandr_r("insertps", 91, src
, dst
);
1075 DO_imm_mandr_r("insertps", 92, src
, dst
);
1076 DO_imm_mandr_r("insertps", 93, src
, dst
);
1077 DO_imm_mandr_r("insertps", 94, src
, dst
);
1078 DO_imm_mandr_r("insertps", 95, src
, dst
);
1079 DO_imm_mandr_r("insertps", 96, src
, dst
);
1080 DO_imm_mandr_r("insertps", 97, src
, dst
);
1081 DO_imm_mandr_r("insertps", 98, src
, dst
);
1082 DO_imm_mandr_r("insertps", 99, src
, dst
);
1083 DO_imm_mandr_r("insertps", 100, src
, dst
);
1084 DO_imm_mandr_r("insertps", 101, src
, dst
);
1085 DO_imm_mandr_r("insertps", 102, src
, dst
);
1086 DO_imm_mandr_r("insertps", 103, src
, dst
);
1087 DO_imm_mandr_r("insertps", 104, src
, dst
);
1088 DO_imm_mandr_r("insertps", 105, src
, dst
);
1089 DO_imm_mandr_r("insertps", 106, src
, dst
);
1090 DO_imm_mandr_r("insertps", 107, src
, dst
);
1091 DO_imm_mandr_r("insertps", 108, src
, dst
);
1092 DO_imm_mandr_r("insertps", 109, src
, dst
);
1093 DO_imm_mandr_r("insertps", 110, src
, dst
);
1094 DO_imm_mandr_r("insertps", 111, src
, dst
);
1095 DO_imm_mandr_r("insertps", 112, src
, dst
);
1096 DO_imm_mandr_r("insertps", 113, src
, dst
);
1097 DO_imm_mandr_r("insertps", 114, src
, dst
);
1098 DO_imm_mandr_r("insertps", 115, src
, dst
);
1099 DO_imm_mandr_r("insertps", 116, src
, dst
);
1100 DO_imm_mandr_r("insertps", 117, src
, dst
);
1101 DO_imm_mandr_r("insertps", 118, src
, dst
);
1102 DO_imm_mandr_r("insertps", 119, src
, dst
);
1103 DO_imm_mandr_r("insertps", 120, src
, dst
);
1104 DO_imm_mandr_r("insertps", 121, src
, dst
);
1105 DO_imm_mandr_r("insertps", 122, src
, dst
);
1106 DO_imm_mandr_r("insertps", 123, src
, dst
);
1107 DO_imm_mandr_r("insertps", 124, src
, dst
);
1108 DO_imm_mandr_r("insertps", 125, src
, dst
);
1109 DO_imm_mandr_r("insertps", 126, src
, dst
);
1110 DO_imm_mandr_r("insertps", 127, src
, dst
);
1111 DO_imm_mandr_r("insertps", 128, src
, dst
);
1112 DO_imm_mandr_r("insertps", 129, src
, dst
);
1113 DO_imm_mandr_r("insertps", 130, src
, dst
);
1114 DO_imm_mandr_r("insertps", 131, src
, dst
);
1115 DO_imm_mandr_r("insertps", 132, src
, dst
);
1116 DO_imm_mandr_r("insertps", 133, src
, dst
);
1117 DO_imm_mandr_r("insertps", 134, src
, dst
);
1118 DO_imm_mandr_r("insertps", 135, src
, dst
);
1119 DO_imm_mandr_r("insertps", 136, src
, dst
);
1120 DO_imm_mandr_r("insertps", 137, src
, dst
);
1121 DO_imm_mandr_r("insertps", 138, src
, dst
);
1122 DO_imm_mandr_r("insertps", 139, src
, dst
);
1123 DO_imm_mandr_r("insertps", 140, src
, dst
);
1124 DO_imm_mandr_r("insertps", 141, src
, dst
);
1125 DO_imm_mandr_r("insertps", 142, src
, dst
);
1126 DO_imm_mandr_r("insertps", 143, src
, dst
);
1127 DO_imm_mandr_r("insertps", 144, src
, dst
);
1128 DO_imm_mandr_r("insertps", 145, src
, dst
);
1129 DO_imm_mandr_r("insertps", 146, src
, dst
);
1130 DO_imm_mandr_r("insertps", 147, src
, dst
);
1131 DO_imm_mandr_r("insertps", 148, src
, dst
);
1132 DO_imm_mandr_r("insertps", 149, src
, dst
);
1133 DO_imm_mandr_r("insertps", 150, src
, dst
);
1134 DO_imm_mandr_r("insertps", 151, src
, dst
);
1135 DO_imm_mandr_r("insertps", 152, src
, dst
);
1136 DO_imm_mandr_r("insertps", 153, src
, dst
);
1137 DO_imm_mandr_r("insertps", 154, src
, dst
);
1138 DO_imm_mandr_r("insertps", 155, src
, dst
);
1139 DO_imm_mandr_r("insertps", 156, src
, dst
);
1140 DO_imm_mandr_r("insertps", 157, src
, dst
);
1141 DO_imm_mandr_r("insertps", 158, src
, dst
);
1142 DO_imm_mandr_r("insertps", 159, src
, dst
);
1143 DO_imm_mandr_r("insertps", 160, src
, dst
);
1144 DO_imm_mandr_r("insertps", 161, src
, dst
);
1145 DO_imm_mandr_r("insertps", 162, src
, dst
);
1146 DO_imm_mandr_r("insertps", 163, src
, dst
);
1147 DO_imm_mandr_r("insertps", 164, src
, dst
);
1148 DO_imm_mandr_r("insertps", 165, src
, dst
);
1149 DO_imm_mandr_r("insertps", 166, src
, dst
);
1150 DO_imm_mandr_r("insertps", 167, src
, dst
);
1151 DO_imm_mandr_r("insertps", 168, src
, dst
);
1152 DO_imm_mandr_r("insertps", 169, src
, dst
);
1153 DO_imm_mandr_r("insertps", 170, src
, dst
);
1154 DO_imm_mandr_r("insertps", 171, src
, dst
);
1155 DO_imm_mandr_r("insertps", 172, src
, dst
);
1156 DO_imm_mandr_r("insertps", 173, src
, dst
);
1157 DO_imm_mandr_r("insertps", 174, src
, dst
);
1158 DO_imm_mandr_r("insertps", 175, src
, dst
);
1159 DO_imm_mandr_r("insertps", 176, src
, dst
);
1160 DO_imm_mandr_r("insertps", 177, src
, dst
);
1161 DO_imm_mandr_r("insertps", 178, src
, dst
);
1162 DO_imm_mandr_r("insertps", 179, src
, dst
);
1163 DO_imm_mandr_r("insertps", 180, src
, dst
);
1164 DO_imm_mandr_r("insertps", 181, src
, dst
);
1165 DO_imm_mandr_r("insertps", 182, src
, dst
);
1166 DO_imm_mandr_r("insertps", 183, src
, dst
);
1167 DO_imm_mandr_r("insertps", 184, src
, dst
);
1168 DO_imm_mandr_r("insertps", 185, src
, dst
);
1169 DO_imm_mandr_r("insertps", 186, src
, dst
);
1170 DO_imm_mandr_r("insertps", 187, src
, dst
);
1171 DO_imm_mandr_r("insertps", 188, src
, dst
);
1172 DO_imm_mandr_r("insertps", 189, src
, dst
);
1173 DO_imm_mandr_r("insertps", 190, src
, dst
);
1174 DO_imm_mandr_r("insertps", 191, src
, dst
);
1175 DO_imm_mandr_r("insertps", 192, src
, dst
);
1176 DO_imm_mandr_r("insertps", 193, src
, dst
);
1177 DO_imm_mandr_r("insertps", 194, src
, dst
);
1178 DO_imm_mandr_r("insertps", 195, src
, dst
);
1179 DO_imm_mandr_r("insertps", 196, src
, dst
);
1180 DO_imm_mandr_r("insertps", 197, src
, dst
);
1181 DO_imm_mandr_r("insertps", 198, src
, dst
);
1182 DO_imm_mandr_r("insertps", 199, src
, dst
);
1183 DO_imm_mandr_r("insertps", 200, src
, dst
);
1184 DO_imm_mandr_r("insertps", 201, src
, dst
);
1185 DO_imm_mandr_r("insertps", 202, src
, dst
);
1186 DO_imm_mandr_r("insertps", 203, src
, dst
);
1187 DO_imm_mandr_r("insertps", 204, src
, dst
);
1188 DO_imm_mandr_r("insertps", 205, src
, dst
);
1189 DO_imm_mandr_r("insertps", 206, src
, dst
);
1190 DO_imm_mandr_r("insertps", 207, src
, dst
);
1191 DO_imm_mandr_r("insertps", 208, src
, dst
);
1192 DO_imm_mandr_r("insertps", 209, src
, dst
);
1193 DO_imm_mandr_r("insertps", 210, src
, dst
);
1194 DO_imm_mandr_r("insertps", 211, src
, dst
);
1195 DO_imm_mandr_r("insertps", 212, src
, dst
);
1196 DO_imm_mandr_r("insertps", 213, src
, dst
);
1197 DO_imm_mandr_r("insertps", 214, src
, dst
);
1198 DO_imm_mandr_r("insertps", 215, src
, dst
);
1199 DO_imm_mandr_r("insertps", 216, src
, dst
);
1200 DO_imm_mandr_r("insertps", 217, src
, dst
);
1201 DO_imm_mandr_r("insertps", 218, src
, dst
);
1202 DO_imm_mandr_r("insertps", 219, src
, dst
);
1203 DO_imm_mandr_r("insertps", 220, src
, dst
);
1204 DO_imm_mandr_r("insertps", 221, src
, dst
);
1205 DO_imm_mandr_r("insertps", 222, src
, dst
);
1206 DO_imm_mandr_r("insertps", 223, src
, dst
);
1207 DO_imm_mandr_r("insertps", 224, src
, dst
);
1208 DO_imm_mandr_r("insertps", 225, src
, dst
);
1209 DO_imm_mandr_r("insertps", 226, src
, dst
);
1210 DO_imm_mandr_r("insertps", 227, src
, dst
);
1211 DO_imm_mandr_r("insertps", 228, src
, dst
);
1212 DO_imm_mandr_r("insertps", 229, src
, dst
);
1213 DO_imm_mandr_r("insertps", 230, src
, dst
);
1214 DO_imm_mandr_r("insertps", 231, src
, dst
);
1215 DO_imm_mandr_r("insertps", 232, src
, dst
);
1216 DO_imm_mandr_r("insertps", 233, src
, dst
);
1217 DO_imm_mandr_r("insertps", 234, src
, dst
);
1218 DO_imm_mandr_r("insertps", 235, src
, dst
);
1219 DO_imm_mandr_r("insertps", 236, src
, dst
);
1220 DO_imm_mandr_r("insertps", 237, src
, dst
);
1221 DO_imm_mandr_r("insertps", 238, src
, dst
);
1222 DO_imm_mandr_r("insertps", 239, src
, dst
);
1223 DO_imm_mandr_r("insertps", 240, src
, dst
);
1224 DO_imm_mandr_r("insertps", 241, src
, dst
);
1225 DO_imm_mandr_r("insertps", 242, src
, dst
);
1226 DO_imm_mandr_r("insertps", 243, src
, dst
);
1227 DO_imm_mandr_r("insertps", 244, src
, dst
);
1228 DO_imm_mandr_r("insertps", 245, src
, dst
);
1229 DO_imm_mandr_r("insertps", 246, src
, dst
);
1230 DO_imm_mandr_r("insertps", 247, src
, dst
);
1231 DO_imm_mandr_r("insertps", 248, src
, dst
);
1232 DO_imm_mandr_r("insertps", 249, src
, dst
);
1233 DO_imm_mandr_r("insertps", 250, src
, dst
);
1234 DO_imm_mandr_r("insertps", 251, src
, dst
);
1235 DO_imm_mandr_r("insertps", 252, src
, dst
);
1236 DO_imm_mandr_r("insertps", 253, src
, dst
);
1237 DO_imm_mandr_r("insertps", 254, src
, dst
);
1238 DO_imm_mandr_r("insertps", 255, src
, dst
);
1242 void test_MPSADBW ( void )
1246 for (i
= 0; i
< 50; i
++) {
1249 DO_imm_mandr_r("mpsadbw", 0, src
, dst
);
1250 DO_imm_mandr_r("mpsadbw", 1, src
, dst
);
1251 DO_imm_mandr_r("mpsadbw", 2, src
, dst
);
1252 DO_imm_mandr_r("mpsadbw", 3, src
, dst
);
1253 DO_imm_mandr_r("mpsadbw", 4, src
, dst
);
1254 DO_imm_mandr_r("mpsadbw", 5, src
, dst
);
1255 DO_imm_mandr_r("mpsadbw", 6, src
, dst
);
1256 DO_imm_mandr_r("mpsadbw", 7, src
, dst
);
1260 void test_PACKUSDW ( void )
1264 for (i
= 0; i
< 10; i
++) {
1269 memset(&src
, 0, sizeof(src
));
1270 memset(&dst
, 0, sizeof(src
));
1271 src
[0] = 0x11; src
[1] = 0x22;
1272 src
[4] = 0x33; src
[5] = 0x44;
1273 src
[8] = 0x55; src
[9] = 0x66;
1274 src
[12] = 0x77; src
[13] = 0x88;
1275 dst
[0] = 0xaa; dst
[1] = 0xbb;
1276 dst
[4] = 0xcc; dst
[5] = 0xdd;
1277 dst
[8] = 0xee; dst
[9] = 0xff;
1278 dst
[12] = 0xa1; dst
[13] = 0xb2;
1280 DO_mandr_r("packusdw", src
, dst
);
1284 void test_PBLENDW ( void )
1290 DO_imm_mandr_r("pblendw", 0, src
, dst
);
1291 DO_imm_mandr_r("pblendw", 1, src
, dst
);
1292 DO_imm_mandr_r("pblendw", 2, src
, dst
);
1293 DO_imm_mandr_r("pblendw", 3, src
, dst
);
1294 DO_imm_mandr_r("pblendw", 4, src
, dst
);
1295 DO_imm_mandr_r("pblendw", 5, src
, dst
);
1296 DO_imm_mandr_r("pblendw", 6, src
, dst
);
1297 DO_imm_mandr_r("pblendw", 7, src
, dst
);
1298 DO_imm_mandr_r("pblendw", 8, src
, dst
);
1299 DO_imm_mandr_r("pblendw", 9, src
, dst
);
1300 DO_imm_mandr_r("pblendw", 10, src
, dst
);
1301 DO_imm_mandr_r("pblendw", 11, src
, dst
);
1302 DO_imm_mandr_r("pblendw", 12, src
, dst
);
1303 DO_imm_mandr_r("pblendw", 13, src
, dst
);
1304 DO_imm_mandr_r("pblendw", 14, src
, dst
);
1305 DO_imm_mandr_r("pblendw", 15, src
, dst
);
1306 DO_imm_mandr_r("pblendw", 16, src
, dst
);
1307 DO_imm_mandr_r("pblendw", 17, src
, dst
);
1308 DO_imm_mandr_r("pblendw", 18, src
, dst
);
1309 DO_imm_mandr_r("pblendw", 19, src
, dst
);
1310 DO_imm_mandr_r("pblendw", 20, src
, dst
);
1311 DO_imm_mandr_r("pblendw", 21, src
, dst
);
1312 DO_imm_mandr_r("pblendw", 22, src
, dst
);
1313 DO_imm_mandr_r("pblendw", 23, src
, dst
);
1314 DO_imm_mandr_r("pblendw", 24, src
, dst
);
1315 DO_imm_mandr_r("pblendw", 25, src
, dst
);
1316 DO_imm_mandr_r("pblendw", 26, src
, dst
);
1317 DO_imm_mandr_r("pblendw", 27, src
, dst
);
1318 DO_imm_mandr_r("pblendw", 28, src
, dst
);
1319 DO_imm_mandr_r("pblendw", 29, src
, dst
);
1320 DO_imm_mandr_r("pblendw", 30, src
, dst
);
1321 DO_imm_mandr_r("pblendw", 31, src
, dst
);
1322 DO_imm_mandr_r("pblendw", 32, src
, dst
);
1323 DO_imm_mandr_r("pblendw", 33, src
, dst
);
1324 DO_imm_mandr_r("pblendw", 34, src
, dst
);
1325 DO_imm_mandr_r("pblendw", 35, src
, dst
);
1326 DO_imm_mandr_r("pblendw", 36, src
, dst
);
1327 DO_imm_mandr_r("pblendw", 37, src
, dst
);
1328 DO_imm_mandr_r("pblendw", 38, src
, dst
);
1329 DO_imm_mandr_r("pblendw", 39, src
, dst
);
1330 DO_imm_mandr_r("pblendw", 40, src
, dst
);
1331 DO_imm_mandr_r("pblendw", 41, src
, dst
);
1332 DO_imm_mandr_r("pblendw", 42, src
, dst
);
1333 DO_imm_mandr_r("pblendw", 43, src
, dst
);
1334 DO_imm_mandr_r("pblendw", 44, src
, dst
);
1335 DO_imm_mandr_r("pblendw", 45, src
, dst
);
1336 DO_imm_mandr_r("pblendw", 46, src
, dst
);
1337 DO_imm_mandr_r("pblendw", 47, src
, dst
);
1338 DO_imm_mandr_r("pblendw", 48, src
, dst
);
1339 DO_imm_mandr_r("pblendw", 49, src
, dst
);
1340 DO_imm_mandr_r("pblendw", 50, src
, dst
);
1341 DO_imm_mandr_r("pblendw", 51, src
, dst
);
1342 DO_imm_mandr_r("pblendw", 52, src
, dst
);
1343 DO_imm_mandr_r("pblendw", 53, src
, dst
);
1344 DO_imm_mandr_r("pblendw", 54, src
, dst
);
1345 DO_imm_mandr_r("pblendw", 55, src
, dst
);
1346 DO_imm_mandr_r("pblendw", 56, src
, dst
);
1347 DO_imm_mandr_r("pblendw", 57, src
, dst
);
1348 DO_imm_mandr_r("pblendw", 58, src
, dst
);
1349 DO_imm_mandr_r("pblendw", 59, src
, dst
);
1350 DO_imm_mandr_r("pblendw", 60, src
, dst
);
1351 DO_imm_mandr_r("pblendw", 61, src
, dst
);
1352 DO_imm_mandr_r("pblendw", 62, src
, dst
);
1353 DO_imm_mandr_r("pblendw", 63, src
, dst
);
1354 DO_imm_mandr_r("pblendw", 64, src
, dst
);
1355 DO_imm_mandr_r("pblendw", 65, src
, dst
);
1356 DO_imm_mandr_r("pblendw", 66, src
, dst
);
1357 DO_imm_mandr_r("pblendw", 67, src
, dst
);
1358 DO_imm_mandr_r("pblendw", 68, src
, dst
);
1359 DO_imm_mandr_r("pblendw", 69, src
, dst
);
1360 DO_imm_mandr_r("pblendw", 70, src
, dst
);
1361 DO_imm_mandr_r("pblendw", 71, src
, dst
);
1362 DO_imm_mandr_r("pblendw", 72, src
, dst
);
1363 DO_imm_mandr_r("pblendw", 73, src
, dst
);
1364 DO_imm_mandr_r("pblendw", 74, src
, dst
);
1365 DO_imm_mandr_r("pblendw", 75, src
, dst
);
1366 DO_imm_mandr_r("pblendw", 76, src
, dst
);
1367 DO_imm_mandr_r("pblendw", 77, src
, dst
);
1368 DO_imm_mandr_r("pblendw", 78, src
, dst
);
1369 DO_imm_mandr_r("pblendw", 79, src
, dst
);
1370 DO_imm_mandr_r("pblendw", 80, src
, dst
);
1371 DO_imm_mandr_r("pblendw", 81, src
, dst
);
1372 DO_imm_mandr_r("pblendw", 82, src
, dst
);
1373 DO_imm_mandr_r("pblendw", 83, src
, dst
);
1374 DO_imm_mandr_r("pblendw", 84, src
, dst
);
1375 DO_imm_mandr_r("pblendw", 85, src
, dst
);
1376 DO_imm_mandr_r("pblendw", 86, src
, dst
);
1377 DO_imm_mandr_r("pblendw", 87, src
, dst
);
1378 DO_imm_mandr_r("pblendw", 88, src
, dst
);
1379 DO_imm_mandr_r("pblendw", 89, src
, dst
);
1380 DO_imm_mandr_r("pblendw", 90, src
, dst
);
1381 DO_imm_mandr_r("pblendw", 91, src
, dst
);
1382 DO_imm_mandr_r("pblendw", 92, src
, dst
);
1383 DO_imm_mandr_r("pblendw", 93, src
, dst
);
1384 DO_imm_mandr_r("pblendw", 94, src
, dst
);
1385 DO_imm_mandr_r("pblendw", 95, src
, dst
);
1386 DO_imm_mandr_r("pblendw", 96, src
, dst
);
1387 DO_imm_mandr_r("pblendw", 97, src
, dst
);
1388 DO_imm_mandr_r("pblendw", 98, src
, dst
);
1389 DO_imm_mandr_r("pblendw", 99, src
, dst
);
1390 DO_imm_mandr_r("pblendw", 100, src
, dst
);
1391 DO_imm_mandr_r("pblendw", 101, src
, dst
);
1392 DO_imm_mandr_r("pblendw", 102, src
, dst
);
1393 DO_imm_mandr_r("pblendw", 103, src
, dst
);
1394 DO_imm_mandr_r("pblendw", 104, src
, dst
);
1395 DO_imm_mandr_r("pblendw", 105, src
, dst
);
1396 DO_imm_mandr_r("pblendw", 106, src
, dst
);
1397 DO_imm_mandr_r("pblendw", 107, src
, dst
);
1398 DO_imm_mandr_r("pblendw", 108, src
, dst
);
1399 DO_imm_mandr_r("pblendw", 109, src
, dst
);
1400 DO_imm_mandr_r("pblendw", 110, src
, dst
);
1401 DO_imm_mandr_r("pblendw", 111, src
, dst
);
1402 DO_imm_mandr_r("pblendw", 112, src
, dst
);
1403 DO_imm_mandr_r("pblendw", 113, src
, dst
);
1404 DO_imm_mandr_r("pblendw", 114, src
, dst
);
1405 DO_imm_mandr_r("pblendw", 115, src
, dst
);
1406 DO_imm_mandr_r("pblendw", 116, src
, dst
);
1407 DO_imm_mandr_r("pblendw", 117, src
, dst
);
1408 DO_imm_mandr_r("pblendw", 118, src
, dst
);
1409 DO_imm_mandr_r("pblendw", 119, src
, dst
);
1410 DO_imm_mandr_r("pblendw", 120, src
, dst
);
1411 DO_imm_mandr_r("pblendw", 121, src
, dst
);
1412 DO_imm_mandr_r("pblendw", 122, src
, dst
);
1413 DO_imm_mandr_r("pblendw", 123, src
, dst
);
1414 DO_imm_mandr_r("pblendw", 124, src
, dst
);
1415 DO_imm_mandr_r("pblendw", 125, src
, dst
);
1416 DO_imm_mandr_r("pblendw", 126, src
, dst
);
1417 DO_imm_mandr_r("pblendw", 127, src
, dst
);
1418 DO_imm_mandr_r("pblendw", 128, src
, dst
);
1419 DO_imm_mandr_r("pblendw", 129, src
, dst
);
1420 DO_imm_mandr_r("pblendw", 130, src
, dst
);
1421 DO_imm_mandr_r("pblendw", 131, src
, dst
);
1422 DO_imm_mandr_r("pblendw", 132, src
, dst
);
1423 DO_imm_mandr_r("pblendw", 133, src
, dst
);
1424 DO_imm_mandr_r("pblendw", 134, src
, dst
);
1425 DO_imm_mandr_r("pblendw", 135, src
, dst
);
1426 DO_imm_mandr_r("pblendw", 136, src
, dst
);
1427 DO_imm_mandr_r("pblendw", 137, src
, dst
);
1428 DO_imm_mandr_r("pblendw", 138, src
, dst
);
1429 DO_imm_mandr_r("pblendw", 139, src
, dst
);
1430 DO_imm_mandr_r("pblendw", 140, src
, dst
);
1431 DO_imm_mandr_r("pblendw", 141, src
, dst
);
1432 DO_imm_mandr_r("pblendw", 142, src
, dst
);
1433 DO_imm_mandr_r("pblendw", 143, src
, dst
);
1434 DO_imm_mandr_r("pblendw", 144, src
, dst
);
1435 DO_imm_mandr_r("pblendw", 145, src
, dst
);
1436 DO_imm_mandr_r("pblendw", 146, src
, dst
);
1437 DO_imm_mandr_r("pblendw", 147, src
, dst
);
1438 DO_imm_mandr_r("pblendw", 148, src
, dst
);
1439 DO_imm_mandr_r("pblendw", 149, src
, dst
);
1440 DO_imm_mandr_r("pblendw", 150, src
, dst
);
1441 DO_imm_mandr_r("pblendw", 151, src
, dst
);
1442 DO_imm_mandr_r("pblendw", 152, src
, dst
);
1443 DO_imm_mandr_r("pblendw", 153, src
, dst
);
1444 DO_imm_mandr_r("pblendw", 154, src
, dst
);
1445 DO_imm_mandr_r("pblendw", 155, src
, dst
);
1446 DO_imm_mandr_r("pblendw", 156, src
, dst
);
1447 DO_imm_mandr_r("pblendw", 157, src
, dst
);
1448 DO_imm_mandr_r("pblendw", 158, src
, dst
);
1449 DO_imm_mandr_r("pblendw", 159, src
, dst
);
1450 DO_imm_mandr_r("pblendw", 160, src
, dst
);
1451 DO_imm_mandr_r("pblendw", 161, src
, dst
);
1452 DO_imm_mandr_r("pblendw", 162, src
, dst
);
1453 DO_imm_mandr_r("pblendw", 163, src
, dst
);
1454 DO_imm_mandr_r("pblendw", 164, src
, dst
);
1455 DO_imm_mandr_r("pblendw", 165, src
, dst
);
1456 DO_imm_mandr_r("pblendw", 166, src
, dst
);
1457 DO_imm_mandr_r("pblendw", 167, src
, dst
);
1458 DO_imm_mandr_r("pblendw", 168, src
, dst
);
1459 DO_imm_mandr_r("pblendw", 169, src
, dst
);
1460 DO_imm_mandr_r("pblendw", 170, src
, dst
);
1461 DO_imm_mandr_r("pblendw", 171, src
, dst
);
1462 DO_imm_mandr_r("pblendw", 172, src
, dst
);
1463 DO_imm_mandr_r("pblendw", 173, src
, dst
);
1464 DO_imm_mandr_r("pblendw", 174, src
, dst
);
1465 DO_imm_mandr_r("pblendw", 175, src
, dst
);
1466 DO_imm_mandr_r("pblendw", 176, src
, dst
);
1467 DO_imm_mandr_r("pblendw", 177, src
, dst
);
1468 DO_imm_mandr_r("pblendw", 178, src
, dst
);
1469 DO_imm_mandr_r("pblendw", 179, src
, dst
);
1470 DO_imm_mandr_r("pblendw", 180, src
, dst
);
1471 DO_imm_mandr_r("pblendw", 181, src
, dst
);
1472 DO_imm_mandr_r("pblendw", 182, src
, dst
);
1473 DO_imm_mandr_r("pblendw", 183, src
, dst
);
1474 DO_imm_mandr_r("pblendw", 184, src
, dst
);
1475 DO_imm_mandr_r("pblendw", 185, src
, dst
);
1476 DO_imm_mandr_r("pblendw", 186, src
, dst
);
1477 DO_imm_mandr_r("pblendw", 187, src
, dst
);
1478 DO_imm_mandr_r("pblendw", 188, src
, dst
);
1479 DO_imm_mandr_r("pblendw", 189, src
, dst
);
1480 DO_imm_mandr_r("pblendw", 190, src
, dst
);
1481 DO_imm_mandr_r("pblendw", 191, src
, dst
);
1482 DO_imm_mandr_r("pblendw", 192, src
, dst
);
1483 DO_imm_mandr_r("pblendw", 193, src
, dst
);
1484 DO_imm_mandr_r("pblendw", 194, src
, dst
);
1485 DO_imm_mandr_r("pblendw", 195, src
, dst
);
1486 DO_imm_mandr_r("pblendw", 196, src
, dst
);
1487 DO_imm_mandr_r("pblendw", 197, src
, dst
);
1488 DO_imm_mandr_r("pblendw", 198, src
, dst
);
1489 DO_imm_mandr_r("pblendw", 199, src
, dst
);
1490 DO_imm_mandr_r("pblendw", 200, src
, dst
);
1491 DO_imm_mandr_r("pblendw", 201, src
, dst
);
1492 DO_imm_mandr_r("pblendw", 202, src
, dst
);
1493 DO_imm_mandr_r("pblendw", 203, src
, dst
);
1494 DO_imm_mandr_r("pblendw", 204, src
, dst
);
1495 DO_imm_mandr_r("pblendw", 205, src
, dst
);
1496 DO_imm_mandr_r("pblendw", 206, src
, dst
);
1497 DO_imm_mandr_r("pblendw", 207, src
, dst
);
1498 DO_imm_mandr_r("pblendw", 208, src
, dst
);
1499 DO_imm_mandr_r("pblendw", 209, src
, dst
);
1500 DO_imm_mandr_r("pblendw", 210, src
, dst
);
1501 DO_imm_mandr_r("pblendw", 211, src
, dst
);
1502 DO_imm_mandr_r("pblendw", 212, src
, dst
);
1503 DO_imm_mandr_r("pblendw", 213, src
, dst
);
1504 DO_imm_mandr_r("pblendw", 214, src
, dst
);
1505 DO_imm_mandr_r("pblendw", 215, src
, dst
);
1506 DO_imm_mandr_r("pblendw", 216, src
, dst
);
1507 DO_imm_mandr_r("pblendw", 217, src
, dst
);
1508 DO_imm_mandr_r("pblendw", 218, src
, dst
);
1509 DO_imm_mandr_r("pblendw", 219, src
, dst
);
1510 DO_imm_mandr_r("pblendw", 220, src
, dst
);
1511 DO_imm_mandr_r("pblendw", 221, src
, dst
);
1512 DO_imm_mandr_r("pblendw", 222, src
, dst
);
1513 DO_imm_mandr_r("pblendw", 223, src
, dst
);
1514 DO_imm_mandr_r("pblendw", 224, src
, dst
);
1515 DO_imm_mandr_r("pblendw", 225, src
, dst
);
1516 DO_imm_mandr_r("pblendw", 226, src
, dst
);
1517 DO_imm_mandr_r("pblendw", 227, src
, dst
);
1518 DO_imm_mandr_r("pblendw", 228, src
, dst
);
1519 DO_imm_mandr_r("pblendw", 229, src
, dst
);
1520 DO_imm_mandr_r("pblendw", 230, src
, dst
);
1521 DO_imm_mandr_r("pblendw", 231, src
, dst
);
1522 DO_imm_mandr_r("pblendw", 232, src
, dst
);
1523 DO_imm_mandr_r("pblendw", 233, src
, dst
);
1524 DO_imm_mandr_r("pblendw", 234, src
, dst
);
1525 DO_imm_mandr_r("pblendw", 235, src
, dst
);
1526 DO_imm_mandr_r("pblendw", 236, src
, dst
);
1527 DO_imm_mandr_r("pblendw", 237, src
, dst
);
1528 DO_imm_mandr_r("pblendw", 238, src
, dst
);
1529 DO_imm_mandr_r("pblendw", 239, src
, dst
);
1530 DO_imm_mandr_r("pblendw", 240, src
, dst
);
1531 DO_imm_mandr_r("pblendw", 241, src
, dst
);
1532 DO_imm_mandr_r("pblendw", 242, src
, dst
);
1533 DO_imm_mandr_r("pblendw", 243, src
, dst
);
1534 DO_imm_mandr_r("pblendw", 244, src
, dst
);
1535 DO_imm_mandr_r("pblendw", 245, src
, dst
);
1536 DO_imm_mandr_r("pblendw", 246, src
, dst
);
1537 DO_imm_mandr_r("pblendw", 247, src
, dst
);
1538 DO_imm_mandr_r("pblendw", 248, src
, dst
);
1539 DO_imm_mandr_r("pblendw", 249, src
, dst
);
1540 DO_imm_mandr_r("pblendw", 250, src
, dst
);
1541 DO_imm_mandr_r("pblendw", 251, src
, dst
);
1542 DO_imm_mandr_r("pblendw", 252, src
, dst
);
1543 DO_imm_mandr_r("pblendw", 253, src
, dst
);
1544 DO_imm_mandr_r("pblendw", 254, src
, dst
);
1545 DO_imm_mandr_r("pblendw", 255, src
, dst
);
1550 void test_PCMPEQQ ( void )
1554 for (i
= 0; i
< 10; i
++) {
1558 case 0: memset(&src
[0], 0x55, 8);
1559 memset(&dst
[0], 0x55, 8); break;
1560 case 1: memset(&src
[8], 0x55, 8);
1561 memset(&dst
[8], 0x55, 8); break;
1565 DO_mandr_r("pcmpeqq", src
, dst
);
1570 void test_PEXTRB ( void )
1574 DO_imm_r_to_mandrscalar("pextrb", 0, src
, "d");
1575 DO_imm_r_to_mandrscalar("pextrb", 1, src
, "d");
1576 DO_imm_r_to_mandrscalar("pextrb", 2, src
, "d");
1577 DO_imm_r_to_mandrscalar("pextrb", 3, src
, "d");
1578 DO_imm_r_to_mandrscalar("pextrb", 4, src
, "d");
1579 DO_imm_r_to_mandrscalar("pextrb", 5, src
, "d");
1580 DO_imm_r_to_mandrscalar("pextrb", 6, src
, "d");
1581 DO_imm_r_to_mandrscalar("pextrb", 7, src
, "d");
1582 DO_imm_r_to_mandrscalar("pextrb", 8, src
, "d");
1583 DO_imm_r_to_mandrscalar("pextrb", 9, src
, "d");
1584 DO_imm_r_to_mandrscalar("pextrb", 10, src
, "d");
1585 DO_imm_r_to_mandrscalar("pextrb", 11, src
, "d");
1586 DO_imm_r_to_mandrscalar("pextrb", 12, src
, "d");
1587 DO_imm_r_to_mandrscalar("pextrb", 13, src
, "d");
1588 DO_imm_r_to_mandrscalar("pextrb", 14, src
, "d");
1589 DO_imm_r_to_mandrscalar("pextrb", 15, src
, "d");
1592 void test_PINSRB ( void )
1596 DO_imm_mandrscalar_to_r("pinsrb", 0, src
, "d");
1598 DO_imm_mandrscalar_to_r("pinsrb", 1, src
, "d");
1600 DO_imm_mandrscalar_to_r("pinsrb", 2, src
, "d");
1602 DO_imm_mandrscalar_to_r("pinsrb", 3, src
, "d");
1604 DO_imm_mandrscalar_to_r("pinsrb", 4, src
, "d");
1606 DO_imm_mandrscalar_to_r("pinsrb", 5, src
, "d");
1608 DO_imm_mandrscalar_to_r("pinsrb", 6, src
, "d");
1610 DO_imm_mandrscalar_to_r("pinsrb", 7, src
, "d");
1612 DO_imm_mandrscalar_to_r("pinsrb", 8, src
, "d");
1614 DO_imm_mandrscalar_to_r("pinsrb", 9, src
, "d");
1616 DO_imm_mandrscalar_to_r("pinsrb", 10, src
, "d");
1618 DO_imm_mandrscalar_to_r("pinsrb", 11, src
, "d");
1620 DO_imm_mandrscalar_to_r("pinsrb", 12, src
, "d");
1622 DO_imm_mandrscalar_to_r("pinsrb", 13, src
, "d");
1624 DO_imm_mandrscalar_to_r("pinsrb", 14, src
, "d");
1626 DO_imm_mandrscalar_to_r("pinsrb", 15, src
, "d");
1630 void test_PEXTRW ( void )
1634 DO_imm_r_to_mandrscalar("pextrw", 0, src
, "d");
1635 DO_imm_r_to_mandrscalar("pextrw", 1, src
, "d");
1636 DO_imm_r_to_mandrscalar("pextrw", 2, src
, "d");
1637 DO_imm_r_to_mandrscalar("pextrw", 3, src
, "d");
1638 DO_imm_r_to_mandrscalar("pextrw", 4, src
, "d");
1639 DO_imm_r_to_mandrscalar("pextrw", 5, src
, "d");
1640 DO_imm_r_to_mandrscalar("pextrw", 6, src
, "d");
1641 DO_imm_r_to_mandrscalar("pextrw", 7, src
, "d");
1644 void test_PINSRW ( void )
1648 DO_imm_mandrscalar_to_r("pinsrw", 0, src
, "d");
1650 DO_imm_mandrscalar_to_r("pinsrw", 1, src
, "d");
1652 DO_imm_mandrscalar_to_r("pinsrw", 2, src
, "d");
1654 DO_imm_mandrscalar_to_r("pinsrw", 3, src
, "d");
1656 DO_imm_mandrscalar_to_r("pinsrw", 4, src
, "d");
1658 DO_imm_mandrscalar_to_r("pinsrw", 5, src
, "d");
1660 DO_imm_mandrscalar_to_r("pinsrw", 6, src
, "d");
1662 DO_imm_mandrscalar_to_r("pinsrw", 7, src
, "d");
1666 void test_PEXTRD ( void )
1670 DO_imm_r_to_mandrscalar("pextrd", 0, src
, "d");
1671 DO_imm_r_to_mandrscalar("pextrd", 1, src
, "d");
1672 DO_imm_r_to_mandrscalar("pextrd", 2, src
, "d");
1673 DO_imm_r_to_mandrscalar("pextrd", 3, src
, "d");
1676 void test_PINSRD ( void )
1680 DO_imm_mandrscalar_to_r("pinsrd", 0, src
, "d");
1682 DO_imm_mandrscalar_to_r("pinsrd", 1, src
, "d");
1684 DO_imm_mandrscalar_to_r("pinsrd", 2, src
, "d");
1686 DO_imm_mandrscalar_to_r("pinsrd", 3, src
, "d");
1690 void test_PEXTRQ ( void )
1694 DO_imm_r_to_mandrscalar("pextrq", 0, src
, "");
1695 DO_imm_r_to_mandrscalar("pextrq", 1, src
, "");
1698 void test_PINSRQ ( void )
1702 DO_imm_mandrscalar_to_r("pinsrq", 0, src
, "");
1704 DO_imm_mandrscalar_to_r("pinsrq", 1, src
, "");
1708 void test_EXTRACTPS ( void )
1712 DO_imm_r_to_mandrscalar("extractps", 0, src
, "d");
1713 DO_imm_r_to_mandrscalar("extractps", 1, src
, "d");
1714 DO_imm_r_to_mandrscalar("extractps", 2, src
, "d");
1715 DO_imm_r_to_mandrscalar("extractps", 3, src
, "d");
1719 void test_PHMINPOSUW ( void )
1723 for (i
= 0; i
< 20; i
++) {
1726 DO_mandr_r("phminposuw", src
, dst
);
1728 memset(src
, 0x55, sizeof(src
));
1729 memset(dst
, 0xAA, sizeof(dst
));
1730 DO_mandr_r("phminposuw", src
, dst
);
1733 void test_PMAXSB ( void )
1737 for (i
= 0; i
< 10; i
++) {
1740 DO_mandr_r("pmaxsb", src
, dst
);
1744 void test_PMAXSD ( void )
1748 for (i
= 0; i
< 10; i
++) {
1751 DO_mandr_r("pmaxsd", src
, dst
);
1755 void test_PMAXUD ( void )
1759 for (i
= 0; i
< 10; i
++) {
1762 DO_mandr_r("pmaxud", src
, dst
);
1766 void test_PMAXUW ( void )
1770 for (i
= 0; i
< 10; i
++) {
1773 DO_mandr_r("pmaxuw", src
, dst
);
1777 void test_PMINSB ( void )
1781 for (i
= 0; i
< 10; i
++) {
1784 DO_mandr_r("pminsb", src
, dst
);
1788 void test_PMINSD ( void )
1792 for (i
= 0; i
< 10; i
++) {
1795 DO_mandr_r("pminsd", src
, dst
);
1799 void test_PMINUD ( void )
1803 for (i
= 0; i
< 10; i
++) {
1806 DO_mandr_r("pminud", src
, dst
);
1810 void test_PMINUW ( void )
1814 for (i
= 0; i
< 10; i
++) {
1817 DO_mandr_r("pminuw", src
, dst
);
1821 void test_PMOVSXBW ( void )
1825 for (i
= 0; i
< 10; i
++) {
1828 DO_mandr_r("pmovsxbw", src
, dst
);
1832 void test_PMOVSXBD ( void )
1836 for (i
= 0; i
< 10; i
++) {
1839 DO_mandr_r("pmovsxbd", src
, dst
);
1843 void test_PMOVSXBQ ( void )
1847 for (i
= 0; i
< 10; i
++) {
1850 DO_mandr_r("pmovsxbq", src
, dst
);
1854 void test_PMOVSXWD ( void )
1858 for (i
= 0; i
< 10; i
++) {
1861 DO_mandr_r("pmovsxwd", src
, dst
);
1865 void test_PMOVSXWQ ( void )
1869 for (i
= 0; i
< 10; i
++) {
1872 DO_mandr_r("pmovsxwq", src
, dst
);
1876 void test_PMOVSXDQ ( void )
1880 for (i
= 0; i
< 10; i
++) {
1883 DO_mandr_r("pmovsxdq", src
, dst
);
1887 void test_PMOVZXBW ( void )
1891 for (i
= 0; i
< 10; i
++) {
1894 DO_mandr_r("pmovzxbw", src
, dst
);
1898 void test_PMOVZXBD ( void )
1902 for (i
= 0; i
< 10; i
++) {
1905 DO_mandr_r("pmovzxbd", src
, dst
);
1909 void test_PMOVZXBQ ( void )
1913 for (i
= 0; i
< 10; i
++) {
1916 DO_mandr_r("pmovzxbq", src
, dst
);
1920 void test_PMOVZXWD ( void )
1924 for (i
= 0; i
< 10; i
++) {
1927 DO_mandr_r("pmovzxwd", src
, dst
);
1931 void test_PMOVZXWQ ( void )
1935 for (i
= 0; i
< 10; i
++) {
1938 DO_mandr_r("pmovzxwq", src
, dst
);
1942 void test_PMOVZXDQ ( void )
1946 for (i
= 0; i
< 10; i
++) {
1949 DO_mandr_r("pmovzxdq", src
, dst
);
1953 void test_PMULDQ ( void )
1957 for (i
= 0; i
< 10; i
++) {
1960 DO_mandr_r("pmuldq", src
, dst
);
1965 void test_PMULLD ( void )
1969 for (i
= 0; i
< 10; i
++) {
1972 DO_mandr_r("pmulld", src
, dst
);
1977 void test_POPCNTQ ( void )
1981 ULong oszacp_mask
= 0x8D5;
1982 for (i
= 0; i
< 10; i
++) {
1983 block
[0] = i
== 0 ? 0 : randULong();
1984 block
[1] = randULong();
1985 block
[2] = randULong();
1986 block
[3] = randULong();
1987 __asm__
__volatile__(
1988 "movq %0, %%rax" "\n\t"
1989 "movq 0(%%rax), %%rdi" "\n\t"
1990 "movq 8(%%rax), %%r11" "\n\t"
1991 #ifndef VGP_amd64_darwin
1992 "popcntq %%rdi, %%r11" "\n\t"
1994 "popcnt %%rdi, %%r11" "\n\t"
1996 "movq %%r11, 16(%%rax)" "\n\t"
1999 "movq %%r12, 24(%%rax)" "\n"
2001 : /*in*/"r"(&block
[0])
2002 : /*trash*/ "cc", "memory", "rdi", "r11", "r12"
2004 printf("r popcntq %016llx %016llx %016llx %016llx\n",
2005 block
[0], block
[1], block
[2], block
[3] & oszacp_mask
);
2007 block
[0] = i
== 0 ? 0 : randULong();
2008 block
[1] = randULong();
2009 block
[2] = randULong();
2010 block
[3] = randULong();
2011 __asm__
__volatile__(
2012 "movq %0, %%rax" "\n\t"
2013 "movq 8(%%rax), %%r11" "\n\t"
2014 #ifndef VGP_amd64_darwin
2015 "popcntq 0(%%rax), %%r11" "\n\t"
2017 "popcnt 0(%%rax), %%r11" "\n\t"
2019 "movq %%r11, 16(%%rax)" "\n\t"
2022 "movq %%r12, 24(%%rax)" "\n"
2024 : /*in*/"r"(&block
[0])
2025 : /*trash*/ "cc", "memory", "r11", "r12"
2027 printf("m popcntq %016llx %016llx %016llx %016llx\n",
2028 block
[0], block
[1], block
[2], block
[3] & oszacp_mask
);
2033 void test_POPCNTL ( void )
2037 ULong oszacp_mask
= 0x8D5;
2038 for (i
= 0; i
< 10; i
++) {
2039 block
[0] = i
== 0 ? 0 : randULong();
2040 block
[1] = randULong();
2041 block
[2] = randULong();
2042 block
[3] = randULong();
2043 __asm__
__volatile__(
2044 "movq %0, %%rax" "\n\t"
2045 "movq 0(%%rax), %%rdi" "\n\t"
2046 "movq 8(%%rax), %%r11" "\n\t"
2047 #ifndef VGP_amd64_darwin
2048 "popcntl %%edi, %%r11d" "\n\t"
2050 "popcnt %%edi, %%r11d" "\n\t"
2052 "movq %%r11, 16(%%rax)" "\n\t"
2055 "movq %%r12, 24(%%rax)" "\n"
2057 : /*in*/"r"(&block
[0])
2058 : /*trash*/ "cc", "memory", "rdi", "r11", "r12"
2060 printf("r popcntl %016llx %016llx %016llx %016llx\n",
2061 block
[0], block
[1], block
[2], block
[3] & oszacp_mask
);
2063 block
[0] = i
== 0 ? 0 : randULong();
2064 block
[1] = randULong();
2065 block
[2] = randULong();
2066 block
[3] = randULong();
2067 __asm__
__volatile__(
2068 "movq %0, %%rax" "\n\t"
2069 "movq 8(%%rax), %%r11" "\n\t"
2070 #ifndef VGP_amd64_darwin
2071 "popcntl 0(%%rax), %%r11d" "\n\t"
2073 "popcnt 0(%%rax), %%r11d" "\n\t"
2075 "movq %%r11, 16(%%rax)" "\n\t"
2078 "movq %%r12, 24(%%rax)" "\n"
2080 : /*in*/"r"(&block
[0])
2081 : /*trash*/ "cc", "memory", "r11", "r12"
2083 printf("m popcntl %016llx %016llx %016llx %016llx\n",
2084 block
[0], block
[1], block
[2], block
[3] & oszacp_mask
);
2089 void test_POPCNTW ( void )
2093 ULong oszacp_mask
= 0x8D5;
2094 for (i
= 0; i
< 10; i
++) {
2095 block
[0] = i
== 0 ? 0 : randULong();
2096 block
[1] = randULong();
2097 block
[2] = randULong();
2098 block
[3] = randULong();
2099 __asm__
__volatile__(
2100 "movq %0, %%rax" "\n\t"
2101 "movq 0(%%rax), %%rdi" "\n\t"
2102 "movq 8(%%rax), %%r11" "\n\t"
2103 #ifndef VGP_amd64_darwin
2104 "popcntw %%di, %%r11w" "\n\t"
2106 "popcnt %%di, %%r11w" "\n\t"
2108 "movq %%r11, 16(%%rax)" "\n\t"
2111 "movq %%r12, 24(%%rax)" "\n"
2113 : /*in*/"r"(&block
[0])
2114 : /*trash*/ "cc", "memory", "rdi", "r11", "r12"
2116 printf("r popcntw %016llx %016llx %016llx %016llx\n",
2117 block
[0], block
[1], block
[2], block
[3] & oszacp_mask
);
2119 block
[0] = i
== 0 ? 0 : randULong();
2120 block
[1] = randULong();
2121 block
[2] = randULong();
2122 block
[3] = randULong();
2123 __asm__
__volatile__(
2124 "movq %0, %%rax" "\n\t"
2125 "movq 8(%%rax), %%r11" "\n\t"
2126 #ifndef VGP_amd64_darwin
2127 "popcntw 0(%%rax), %%r11w" "\n\t"
2129 "popcnt 0(%%rax), %%r11w" "\n\t"
2131 "movq %%r11, 16(%%rax)" "\n\t"
2134 "movq %%r12, 24(%%rax)" "\n"
2136 : /*in*/"r"(&block
[0])
2137 : /*trash*/ "cc", "memory", "r11", "r12"
2139 printf("m popcntw %016llx %016llx %016llx %016llx\n",
2140 block
[0], block
[1], block
[2], block
[3] & oszacp_mask
);
2145 void test_PCMPGTQ ( void )
2148 do64HLtoV128( &spec
[0], 0x0000000000000000ULL
, 0xffffffffffffffffULL
);
2149 do64HLtoV128( &spec
[1], 0x0000000000000001ULL
, 0xfffffffffffffffeULL
);
2150 do64HLtoV128( &spec
[2], 0x7fffffffffffffffULL
, 0x8000000000000001ULL
);
2151 do64HLtoV128( &spec
[3], 0x8000000000000000ULL
, 0x8000000000000000ULL
);
2152 do64HLtoV128( &spec
[4], 0x8000000000000001ULL
, 0x7fffffffffffffffULL
);
2153 do64HLtoV128( &spec
[5], 0xfffffffffffffffeULL
, 0x0000000000000001ULL
);
2154 do64HLtoV128( &spec
[6], 0xffffffffffffffffULL
, 0x0000000000000000ULL
);
2158 for (i
= 0; i
< 10; i
++) {
2161 DO_mandr_r("pcmpgtq", src
, dst
);
2163 for (i
= 0; i
< 7; i
++) {
2164 for (j
= 0; j
< 7; j
++) {
2165 memcpy(&src
, &spec
[i
], 16);
2166 memcpy(&dst
, &spec
[j
], 16);
2167 DO_mandr_r("pcmpgtq", src
, dst
);
2172 /* ------------ ROUNDSD ------------ */
2174 void do_ROUNDSD_000 ( Bool mem
, V128
* src
, /*OUT*/V128
* dst
)
2177 __asm__
__volatile__(
2178 "movupd (%1), %%xmm11" "\n\t"
2179 "roundsd $0, (%0), %%xmm11" "\n\t"
2180 "movupd %%xmm11, (%1)" "\n"
2182 : /*IN*/ "r"(src
), "r"(dst
)
2186 __asm__
__volatile__(
2187 "movupd (%1), %%xmm11" "\n\t"
2188 "movupd (%0), %%xmm2" "\n\t"
2189 "roundsd $0, %%xmm2, %%xmm11" "\n\t"
2190 "movupd %%xmm11, (%1)" "\n"
2192 : /*IN*/ "r"(src
), "r"(dst
)
2193 : /*TRASH*/ "xmm11","xmm2"
2198 void do_ROUNDSD_001 ( Bool mem
, V128
* src
, /*OUT*/V128
* dst
)
2201 __asm__
__volatile__(
2202 "movupd (%1), %%xmm11" "\n\t"
2203 "roundsd $1, (%0), %%xmm11" "\n\t"
2204 "movupd %%xmm11, (%1)" "\n"
2206 : /*IN*/ "r"(src
), "r"(dst
)
2210 __asm__
__volatile__(
2211 "movupd (%1), %%xmm11" "\n\t"
2212 "movupd (%0), %%xmm2" "\n\t"
2213 "roundsd $1, %%xmm2, %%xmm11" "\n\t"
2214 "movupd %%xmm11, (%1)" "\n"
2216 : /*IN*/ "r"(src
), "r"(dst
)
2217 : /*TRASH*/ "xmm11","xmm2"
2222 void do_ROUNDSD_010 ( Bool mem
, V128
* src
, /*OUT*/V128
* dst
)
2225 __asm__
__volatile__(
2226 "movupd (%1), %%xmm11" "\n\t"
2227 "roundsd $2, (%0), %%xmm11" "\n\t"
2228 "movupd %%xmm11, (%1)" "\n"
2230 : /*IN*/ "r"(src
), "r"(dst
)
2234 __asm__
__volatile__(
2235 "movupd (%1), %%xmm11" "\n\t"
2236 "movupd (%0), %%xmm2" "\n\t"
2237 "roundsd $2, %%xmm2, %%xmm11" "\n\t"
2238 "movupd %%xmm11, (%1)" "\n"
2240 : /*IN*/ "r"(src
), "r"(dst
)
2241 : /*TRASH*/ "xmm11","xmm2"
2246 void do_ROUNDSD_011 ( Bool mem
, V128
* src
, /*OUT*/V128
* dst
)
2249 __asm__
__volatile__(
2250 "movupd (%1), %%xmm11" "\n\t"
2251 "roundsd $3, (%0), %%xmm11" "\n\t"
2252 "movupd %%xmm11, (%1)" "\n"
2254 : /*IN*/ "r"(src
), "r"(dst
)
2258 __asm__
__volatile__(
2259 "movupd (%1), %%xmm11" "\n\t"
2260 "movupd (%0), %%xmm2" "\n\t"
2261 "roundsd $3, %%xmm2, %%xmm11" "\n\t"
2262 "movupd %%xmm11, (%1)" "\n"
2264 : /*IN*/ "r"(src
), "r"(dst
)
2265 : /*TRASH*/ "xmm11","xmm2"
2270 void do_ROUNDSD_1XX ( Bool mem
, V128
* src
, /*OUT*/V128
* dst
)
2273 __asm__
__volatile__(
2274 "movupd (%1), %%xmm11" "\n\t"
2275 "roundsd $4, (%0), %%xmm11" "\n\t"
2276 "movupd %%xmm11, (%1)" "\n"
2278 : /*IN*/ "r"(src
), "r"(dst
)
2282 __asm__
__volatile__(
2283 "movupd (%1), %%xmm11" "\n\t"
2284 "movupd (%0), %%xmm2" "\n\t"
2285 "roundsd $4, %%xmm2, %%xmm11" "\n\t"
2286 "movupd %%xmm11, (%1)" "\n"
2288 : /*IN*/ "r"(src
), "r"(dst
)
2289 : /*TRASH*/ "xmm11","xmm2"
2294 void test_ROUNDSD_w_immediate_rounding ( void )
2300 vals
[i
++] = mkPosInf();
2301 vals
[i
++] = mkNegInf();
2302 vals
[i
++] = mkPosNan();
2303 vals
[i
++] = mkNegNan();
2308 vals
[i
++] = -0.50001;
2309 vals
[i
++] = -0.49999;
2314 vals
[i
++] = 0.49999;
2315 vals
[i
++] = 0.50001;
2322 for (i
= 0; i
< sizeof(vals
)/sizeof(vals
[0]); i
++) {
2327 memcpy(&src
[0], &vals
[i
], 8);
2328 do_ROUNDSD_000(False
/*reg*/, &src
, &dst
);
2329 printf("r roundsd_000 ");
2333 printf(" %10f %10f", vals
[i
], *(double*)(&dst
[0]));
2338 memcpy(&src
[0], &vals
[i
], 8);
2339 do_ROUNDSD_000(True
/*mem*/, &src
, &dst
);
2340 printf("m roundsd_000 ");
2344 printf(" %10f %10f", vals
[i
], *(double*)(&dst
[0]));
2350 memcpy(&src
[0], &vals
[i
], 8);
2351 do_ROUNDSD_001(False
/*reg*/, &src
, &dst
);
2352 printf("r roundsd_001 ");
2356 printf(" %10f %10f", vals
[i
], *(double*)(&dst
[0]));
2361 memcpy(&src
[0], &vals
[i
], 8);
2362 do_ROUNDSD_001(True
/*mem*/, &src
, &dst
);
2363 printf("m roundsd_001 ");
2367 printf(" %10f %10f", vals
[i
], *(double*)(&dst
[0]));
2373 memcpy(&src
[0], &vals
[i
], 8);
2374 do_ROUNDSD_010(False
/*reg*/, &src
, &dst
);
2375 printf("r roundsd_010 ");
2379 printf(" %10f %10f", vals
[i
], *(double*)(&dst
[0]));
2384 memcpy(&src
[0], &vals
[i
], 8);
2385 do_ROUNDSD_010(True
/*mem*/, &src
, &dst
);
2386 printf("m roundsd_010 ");
2390 printf(" %10f %10f", vals
[i
], *(double*)(&dst
[0]));
2396 memcpy(&src
[0], &vals
[i
], 8);
2397 do_ROUNDSD_011(False
/*reg*/, &src
, &dst
);
2398 printf("r roundsd_011 ");
2402 printf(" %10f %10f", vals
[i
], *(double*)(&dst
[0]));
2407 memcpy(&src
[0], &vals
[i
], 8);
2408 do_ROUNDSD_011(True
/*mem*/, &src
, &dst
);
2409 printf("m roundsd_011 ");
2413 printf(" %10f %10f", vals
[i
], *(double*)(&dst
[0]));
2418 void test_ROUNDSD_w_mxcsr_rounding ( void )
2425 vals
[i
++] = mkPosInf();
2426 vals
[i
++] = mkNegInf();
2427 vals
[i
++] = mkPosNan();
2428 vals
[i
++] = mkNegNan();
2433 vals
[i
++] = -0.50001;
2434 vals
[i
++] = -0.49999;
2439 vals
[i
++] = 0.49999;
2440 vals
[i
++] = 0.50001;
2447 rm
= get_sse_roundingmode();
2448 assert(rm
== 0); // 0 == RN == default
2450 for (i
= 0; i
< sizeof(vals
)/sizeof(vals
[0]); i
++) {
2453 for (rm
= 0; rm
<= 3; rm
++) {
2454 set_sse_roundingmode(rm
);
2458 memcpy(&src
[0], &vals
[i
], 8);
2459 do_ROUNDSD_1XX(False
/*reg*/, &src
, &dst
);
2460 printf("r (rm=%u) roundsd_1XX ", rm
);
2464 printf(" %10f %10f", vals
[i
], *(double*)(&dst
[0]));
2469 memcpy(&src
[0], &vals
[i
], 8);
2470 do_ROUNDSD_1XX(True
/*mem*/, &src
, &dst
);
2471 printf("m (rm=%u) roundsd_1XX ", rm
);
2475 printf(" %10f %10f", vals
[i
], *(double*)(&dst
[0]));
2480 rm
= get_sse_roundingmode();
2482 set_sse_roundingmode(0);
2483 rm
= get_sse_roundingmode();
2484 assert(rm
== 0); // 0 == RN == default
2488 /* ------------ ROUNDSS ------------ */
2490 void do_ROUNDSS_000 ( Bool mem
, V128
* src
, /*OUT*/V128
* dst
)
2493 __asm__
__volatile__(
2494 "movupd (%1), %%xmm11" "\n\t"
2495 "roundss $0, (%0), %%xmm11" "\n\t"
2496 "movupd %%xmm11, (%1)" "\n"
2498 : /*IN*/ "r"(src
), "r"(dst
)
2502 __asm__
__volatile__(
2503 "movupd (%1), %%xmm11" "\n\t"
2504 "movupd (%0), %%xmm2" "\n\t"
2505 "roundss $0, %%xmm2, %%xmm11" "\n\t"
2506 "movupd %%xmm11, (%1)" "\n"
2508 : /*IN*/ "r"(src
), "r"(dst
)
2509 : /*TRASH*/ "xmm11","xmm2"
2514 void do_ROUNDSS_001 ( Bool mem
, V128
* src
, /*OUT*/V128
* dst
)
2517 __asm__
__volatile__(
2518 "movupd (%1), %%xmm11" "\n\t"
2519 "roundss $1, (%0), %%xmm11" "\n\t"
2520 "movupd %%xmm11, (%1)" "\n"
2522 : /*IN*/ "r"(src
), "r"(dst
)
2526 __asm__
__volatile__(
2527 "movupd (%1), %%xmm11" "\n\t"
2528 "movupd (%0), %%xmm2" "\n\t"
2529 "roundss $1, %%xmm2, %%xmm11" "\n\t"
2530 "movupd %%xmm11, (%1)" "\n"
2532 : /*IN*/ "r"(src
), "r"(dst
)
2533 : /*TRASH*/ "xmm11","xmm2"
2538 void do_ROUNDSS_010 ( Bool mem
, V128
* src
, /*OUT*/V128
* dst
)
2541 __asm__
__volatile__(
2542 "movupd (%1), %%xmm11" "\n\t"
2543 "roundss $2, (%0), %%xmm11" "\n\t"
2544 "movupd %%xmm11, (%1)" "\n"
2546 : /*IN*/ "r"(src
), "r"(dst
)
2550 __asm__
__volatile__(
2551 "movupd (%1), %%xmm11" "\n\t"
2552 "movupd (%0), %%xmm2" "\n\t"
2553 "roundss $2, %%xmm2, %%xmm11" "\n\t"
2554 "movupd %%xmm11, (%1)" "\n"
2556 : /*IN*/ "r"(src
), "r"(dst
)
2557 : /*TRASH*/ "xmm11","xmm2"
2562 void do_ROUNDSS_011 ( Bool mem
, V128
* src
, /*OUT*/V128
* dst
)
2565 __asm__
__volatile__(
2566 "movupd (%1), %%xmm11" "\n\t"
2567 "roundss $3, (%0), %%xmm11" "\n\t"
2568 "movupd %%xmm11, (%1)" "\n"
2570 : /*IN*/ "r"(src
), "r"(dst
)
2574 __asm__
__volatile__(
2575 "movupd (%1), %%xmm11" "\n\t"
2576 "movupd (%0), %%xmm2" "\n\t"
2577 "roundss $3, %%xmm2, %%xmm11" "\n\t"
2578 "movupd %%xmm11, (%1)" "\n"
2580 : /*IN*/ "r"(src
), "r"(dst
)
2581 : /*TRASH*/ "xmm11","xmm2"
2586 void do_ROUNDSS_1XX ( Bool mem
, V128
* src
, /*OUT*/V128
* dst
)
2589 __asm__
__volatile__(
2590 "movupd (%1), %%xmm11" "\n\t"
2591 "roundss $4, (%0), %%xmm11" "\n\t"
2592 "movupd %%xmm11, (%1)" "\n"
2594 : /*IN*/ "r"(src
), "r"(dst
)
2598 __asm__
__volatile__(
2599 "movupd (%1), %%xmm11" "\n\t"
2600 "movupd (%0), %%xmm2" "\n\t"
2601 "roundss $4, %%xmm2, %%xmm11" "\n\t"
2602 "movupd %%xmm11, (%1)" "\n"
2604 : /*IN*/ "r"(src
), "r"(dst
)
2605 : /*TRASH*/ "xmm11","xmm2"
2610 void test_ROUNDSS_w_immediate_rounding ( void )
2616 vals
[i
++] = mkPosInf();
2617 vals
[i
++] = mkNegInf();
2618 vals
[i
++] = mkPosNan();
2619 vals
[i
++] = mkNegNan();
2624 vals
[i
++] = -0.50001;
2625 vals
[i
++] = -0.49999;
2630 vals
[i
++] = 0.49999;
2631 vals
[i
++] = 0.50001;
2638 for (i
= 0; i
< sizeof(vals
)/sizeof(vals
[0]); i
++) {
2643 memcpy(&src
[0], &vals
[i
], 4);
2644 do_ROUNDSS_000(False
/*reg*/, &src
, &dst
);
2645 printf("r roundss_000 ");
2649 printf(" %10f %10f", (double)vals
[i
], (double)*(float*)(&dst
[0]));
2654 memcpy(&src
[0], &vals
[i
], 4);
2655 do_ROUNDSS_000(True
/*mem*/, &src
, &dst
);
2656 printf("m roundss_000 ");
2660 printf(" %10f %10f", (double)vals
[i
], (double)*(float*)(&dst
[0]));
2666 memcpy(&src
[0], &vals
[i
], 4);
2667 do_ROUNDSS_001(False
/*reg*/, &src
, &dst
);
2668 printf("r roundss_001 ");
2672 printf(" %10f %10f", (double)vals
[i
], (double)*(float*)(&dst
[0]));
2677 memcpy(&src
[0], &vals
[i
], 4);
2678 do_ROUNDSS_001(True
/*mem*/, &src
, &dst
);
2679 printf("m roundss_001 ");
2683 printf(" %10f %10f", (double)vals
[i
], (double)*(float*)(&dst
[0]));
2689 memcpy(&src
[0], &vals
[i
], 4);
2690 do_ROUNDSS_010(False
/*reg*/, &src
, &dst
);
2691 printf("r roundss_010 ");
2695 printf(" %10f %10f", (double)vals
[i
], (double)*(float*)(&dst
[0]));
2700 memcpy(&src
[0], &vals
[i
], 4);
2701 do_ROUNDSS_010(True
/*mem*/, &src
, &dst
);
2702 printf("m roundss_010 ");
2706 printf(" %10f %10f", (double)vals
[i
], (double)*(float*)(&dst
[0]));
2712 memcpy(&src
[0], &vals
[i
], 4);
2713 do_ROUNDSS_011(False
/*reg*/, &src
, &dst
);
2714 printf("r roundss_011 ");
2718 printf(" %10f %10f", (double)vals
[i
], (double)*(float*)(&dst
[0]));
2723 memcpy(&src
[0], &vals
[i
], 4);
2724 do_ROUNDSS_011(True
/*mem*/, &src
, &dst
);
2725 printf("m roundss_011 ");
2729 printf(" %10f %10f", (double)vals
[i
], (double)*(float*)(&dst
[0]));
2734 void test_ROUNDSS_w_mxcsr_rounding ( void )
2741 vals
[i
++] = mkPosInf();
2742 vals
[i
++] = mkNegInf();
2743 vals
[i
++] = mkPosNan();
2744 vals
[i
++] = mkNegNan();
2749 vals
[i
++] = -0.50001;
2750 vals
[i
++] = -0.49999;
2755 vals
[i
++] = 0.49999;
2756 vals
[i
++] = 0.50001;
2763 rm
= get_sse_roundingmode();
2764 assert(rm
== 0); // 0 == RN == default
2766 for (i
= 0; i
< sizeof(vals
)/sizeof(vals
[0]); i
++) {
2769 for (rm
= 0; rm
<= 3; rm
++) {
2770 set_sse_roundingmode(rm
);
2774 memcpy(&src
[0], &vals
[i
], 4);
2775 do_ROUNDSS_1XX(False
/*reg*/, &src
, &dst
);
2776 printf("r (rm=%u) roundss_1XX ", rm
);
2780 printf(" %10f %10f", (double)vals
[i
], (double)*(float*)(&dst
[0]));
2785 memcpy(&src
[0], &vals
[i
], 4);
2786 do_ROUNDSS_1XX(True
/*mem*/, &src
, &dst
);
2787 printf("m (rm=%u) roundss_1XX ", rm
);
2791 printf(" %10f %10f", (double)vals
[i
], (double)*(float*)(&dst
[0]));
2796 rm
= get_sse_roundingmode();
2798 set_sse_roundingmode(0);
2799 rm
= get_sse_roundingmode();
2800 assert(rm
== 0); // 0 == RN == default
2803 /* ------------ ROUNDPD ------------ */
2805 void do_ROUNDPD_000 ( Bool mem
, V128
* src
, /*OUT*/V128
* dst
)
2808 __asm__
__volatile__(
2809 "movupd (%1), %%xmm11" "\n\t"
2810 "roundpd $0, (%0), %%xmm11" "\n\t"
2811 "movupd %%xmm11, (%1)" "\n"
2813 : /*IN*/ "r"(src
), "r"(dst
)
2817 __asm__
__volatile__(
2818 "movupd (%1), %%xmm11" "\n\t"
2819 "movupd (%0), %%xmm2" "\n\t"
2820 "roundpd $0, %%xmm2, %%xmm11" "\n\t"
2821 "movupd %%xmm11, (%1)" "\n"
2823 : /*IN*/ "r"(src
), "r"(dst
)
2824 : /*TRASH*/ "xmm11","xmm2"
2829 void do_ROUNDPD_001 ( Bool mem
, V128
* src
, /*OUT*/V128
* dst
)
2832 __asm__
__volatile__(
2833 "movupd (%1), %%xmm11" "\n\t"
2834 "roundpd $1, (%0), %%xmm11" "\n\t"
2835 "movupd %%xmm11, (%1)" "\n"
2837 : /*IN*/ "r"(src
), "r"(dst
)
2841 __asm__
__volatile__(
2842 "movupd (%1), %%xmm11" "\n\t"
2843 "movupd (%0), %%xmm2" "\n\t"
2844 "roundpd $1, %%xmm2, %%xmm11" "\n\t"
2845 "movupd %%xmm11, (%1)" "\n"
2847 : /*IN*/ "r"(src
), "r"(dst
)
2848 : /*TRASH*/ "xmm11","xmm2"
2853 void do_ROUNDPD_010 ( Bool mem
, V128
* src
, /*OUT*/V128
* dst
)
2856 __asm__
__volatile__(
2857 "movupd (%1), %%xmm11" "\n\t"
2858 "roundpd $2, (%0), %%xmm11" "\n\t"
2859 "movupd %%xmm11, (%1)" "\n"
2861 : /*IN*/ "r"(src
), "r"(dst
)
2865 __asm__
__volatile__(
2866 "movupd (%1), %%xmm11" "\n\t"
2867 "movupd (%0), %%xmm2" "\n\t"
2868 "roundpd $2, %%xmm2, %%xmm11" "\n\t"
2869 "movupd %%xmm11, (%1)" "\n"
2871 : /*IN*/ "r"(src
), "r"(dst
)
2872 : /*TRASH*/ "xmm11","xmm2"
2877 void do_ROUNDPD_011 ( Bool mem
, V128
* src
, /*OUT*/V128
* dst
)
2880 __asm__
__volatile__(
2881 "movupd (%1), %%xmm11" "\n\t"
2882 "roundpd $3, (%0), %%xmm11" "\n\t"
2883 "movupd %%xmm11, (%1)" "\n"
2885 : /*IN*/ "r"(src
), "r"(dst
)
2889 __asm__
__volatile__(
2890 "movupd (%1), %%xmm11" "\n\t"
2891 "movupd (%0), %%xmm2" "\n\t"
2892 "roundpd $3, %%xmm2, %%xmm11" "\n\t"
2893 "movupd %%xmm11, (%1)" "\n"
2895 : /*IN*/ "r"(src
), "r"(dst
)
2896 : /*TRASH*/ "xmm11","xmm2"
2901 void do_ROUNDPD_1XX ( Bool mem
, V128
* src
, /*OUT*/V128
* dst
)
2904 __asm__
__volatile__(
2905 "movupd (%1), %%xmm11" "\n\t"
2906 "roundpd $4, (%0), %%xmm11" "\n\t"
2907 "movupd %%xmm11, (%1)" "\n"
2909 : /*IN*/ "r"(src
), "r"(dst
)
2913 __asm__
__volatile__(
2914 "movupd (%1), %%xmm11" "\n\t"
2915 "movupd (%0), %%xmm2" "\n\t"
2916 "roundpd $4, %%xmm2, %%xmm11" "\n\t"
2917 "movupd %%xmm11, (%1)" "\n"
2919 : /*IN*/ "r"(src
), "r"(dst
)
2920 : /*TRASH*/ "xmm11","xmm2"
2925 void test_ROUNDPD_w_immediate_rounding ( void )
2931 vals
[i
++] = mkPosInf();
2932 vals
[i
++] = mkNegInf();
2933 vals
[i
++] = mkPosNan();
2934 vals
[i
++] = mkNegNan();
2939 vals
[i
++] = -0.50001;
2940 vals
[i
++] = -0.49999;
2945 vals
[i
++] = 0.49999;
2946 vals
[i
++] = 0.50001;
2953 for (i
= 0; i
< sizeof(vals
)/sizeof(vals
[0]); i
++) {
2958 memcpy(&src
[0], &vals
[i
], 8);
2959 memcpy(&src
[8], &vals
[(i
+11)%22], 8);
2960 do_ROUNDPD_000(False
/*reg*/, &src
, &dst
);
2961 printf("r roundpd_000 ");
2965 printf(" %10f -> %10f", vals
[i
], *(double*)(&dst
[0]));
2966 printf(" %10f -> %10f", vals
[(i
+11)%22], *(double*)(&dst
[8]));
2971 memcpy(&src
[0], &vals
[i
], 8);
2972 memcpy(&src
[8], &vals
[(i
+11)%22], 8);
2973 do_ROUNDPD_000(True
/*mem*/, &src
, &dst
);
2974 printf("m roundpd_000 ");
2978 printf(" %10f -> %10f", vals
[i
], *(double*)(&dst
[0]));
2979 printf(" %10f -> %10f", vals
[(i
+11)%22], *(double*)(&dst
[8]));
2985 memcpy(&src
[0], &vals
[i
], 8);
2986 memcpy(&src
[8], &vals
[(i
+11)%22], 8);
2987 do_ROUNDPD_001(False
/*reg*/, &src
, &dst
);
2988 printf("r roundpd_001 ");
2992 printf(" %10f -> %10f", vals
[i
], *(double*)(&dst
[0]));
2993 printf(" %10f -> %10f", vals
[(i
+11)%22], *(double*)(&dst
[8]));
2998 memcpy(&src
[0], &vals
[i
], 8);
2999 memcpy(&src
[8], &vals
[(i
+11)%22], 8);
3000 do_ROUNDPD_001(True
/*mem*/, &src
, &dst
);
3001 printf("m roundpd_001 ");
3005 printf(" %10f -> %10f", vals
[i
], *(double*)(&dst
[0]));
3006 printf(" %10f -> %10f", vals
[(i
+11)%22], *(double*)(&dst
[8]));
3012 memcpy(&src
[0], &vals
[i
], 8);
3013 memcpy(&src
[8], &vals
[(i
+11)%22], 8);
3014 do_ROUNDPD_010(False
/*reg*/, &src
, &dst
);
3015 printf("r roundpd_010 ");
3019 printf(" %10f -> %10f", vals
[i
], *(double*)(&dst
[0]));
3020 printf(" %10f -> %10f", vals
[(i
+11)%22], *(double*)(&dst
[8]));
3025 memcpy(&src
[0], &vals
[i
], 8);
3026 memcpy(&src
[8], &vals
[(i
+11)%22], 8);
3027 do_ROUNDPD_010(True
/*mem*/, &src
, &dst
);
3028 printf("m roundpd_010 ");
3032 printf(" %10f -> %10f", vals
[i
], *(double*)(&dst
[0]));
3033 printf(" %10f -> %10f", vals
[(i
+11)%22], *(double*)(&dst
[8]));
3039 memcpy(&src
[0], &vals
[i
], 8);
3040 memcpy(&src
[8], &vals
[(i
+11)%22], 8);
3041 do_ROUNDPD_011(False
/*reg*/, &src
, &dst
);
3042 printf("r roundpd_011 ");
3046 printf(" %10f -> %10f", vals
[i
], *(double*)(&dst
[0]));
3047 printf(" %10f -> %10f", vals
[(i
+11)%22], *(double*)(&dst
[8]));
3052 memcpy(&src
[0], &vals
[i
], 8);
3053 memcpy(&src
[8], &vals
[(i
+11)%22], 8);
3054 do_ROUNDPD_011(True
/*mem*/, &src
, &dst
);
3055 printf("m roundpd_011 ");
3059 printf(" %10f -> %10f", vals
[i
], *(double*)(&dst
[0]));
3060 printf(" %10f -> %10f", vals
[(i
+11)%22], *(double*)(&dst
[8]));
3065 void test_ROUNDPD_w_mxcsr_rounding ( void )
3072 vals
[i
++] = mkPosInf();
3073 vals
[i
++] = mkNegInf();
3074 vals
[i
++] = mkPosNan();
3075 vals
[i
++] = mkNegNan();
3080 vals
[i
++] = -0.50001;
3081 vals
[i
++] = -0.49999;
3086 vals
[i
++] = 0.49999;
3087 vals
[i
++] = 0.50001;
3094 rm
= get_sse_roundingmode();
3095 assert(rm
== 0); // 0 == RN == default
3097 for (i
= 0; i
< sizeof(vals
)/sizeof(vals
[0]); i
++) {
3100 for (rm
= 0; rm
<= 3; rm
++) {
3101 set_sse_roundingmode(rm
);
3105 memcpy(&src
[0], &vals
[i
], 8);
3106 memcpy(&src
[8], &vals
[(i
+11)%22], 8);
3107 do_ROUNDPD_1XX(False
/*reg*/, &src
, &dst
);
3108 printf("r (rm=%u) roundpd_1XX ", rm
);
3112 printf(" %10f -> %10f", vals
[i
], *(double*)(&dst
[0]));
3113 printf(" %10f -> %10f", vals
[(i
+11)%22], *(double*)(&dst
[8]));
3118 memcpy(&src
[0], &vals
[i
], 8);
3119 memcpy(&src
[8], &vals
[(i
+11)%22], 8);
3120 do_ROUNDPD_1XX(True
/*mem*/, &src
, &dst
);
3121 printf("m (rm=%u) roundpd_1XX ", rm
);
3125 printf(" %10f -> %10f", vals
[i
], *(double*)(&dst
[0]));
3126 printf(" %10f -> %10f", vals
[(i
+11)%22], *(double*)(&dst
[8]));
3131 rm
= get_sse_roundingmode();
3133 set_sse_roundingmode(0);
3134 rm
= get_sse_roundingmode();
3135 assert(rm
== 0); // 0 == RN == default
3138 /* ------------ ROUNDPS ------------ */
3140 void do_ROUNDPS_000 ( Bool mem
, V128
* src
, /*OUT*/V128
* dst
)
3143 __asm__
__volatile__(
3144 "movupd (%1), %%xmm11" "\n\t"
3145 "roundps $0, (%0), %%xmm11" "\n\t"
3146 "movupd %%xmm11, (%1)" "\n"
3148 : /*IN*/ "r"(src
), "r"(dst
)
3152 __asm__
__volatile__(
3153 "movupd (%1), %%xmm11" "\n\t"
3154 "movupd (%0), %%xmm2" "\n\t"
3155 "roundps $0, %%xmm2, %%xmm11" "\n\t"
3156 "movupd %%xmm11, (%1)" "\n"
3158 : /*IN*/ "r"(src
), "r"(dst
)
3159 : /*TRASH*/ "xmm11","xmm2"
3164 void do_ROUNDPS_001 ( Bool mem
, V128
* src
, /*OUT*/V128
* dst
)
3167 __asm__
__volatile__(
3168 "movupd (%1), %%xmm11" "\n\t"
3169 "roundps $1, (%0), %%xmm11" "\n\t"
3170 "movupd %%xmm11, (%1)" "\n"
3172 : /*IN*/ "r"(src
), "r"(dst
)
3176 __asm__
__volatile__(
3177 "movupd (%1), %%xmm11" "\n\t"
3178 "movupd (%0), %%xmm2" "\n\t"
3179 "roundps $1, %%xmm2, %%xmm11" "\n\t"
3180 "movupd %%xmm11, (%1)" "\n"
3182 : /*IN*/ "r"(src
), "r"(dst
)
3183 : /*TRASH*/ "xmm11","xmm2"
3188 void do_ROUNDPS_010 ( Bool mem
, V128
* src
, /*OUT*/V128
* dst
)
3191 __asm__
__volatile__(
3192 "movupd (%1), %%xmm11" "\n\t"
3193 "roundps $2, (%0), %%xmm11" "\n\t"
3194 "movupd %%xmm11, (%1)" "\n"
3196 : /*IN*/ "r"(src
), "r"(dst
)
3200 __asm__
__volatile__(
3201 "movupd (%1), %%xmm11" "\n\t"
3202 "movupd (%0), %%xmm2" "\n\t"
3203 "roundps $2, %%xmm2, %%xmm11" "\n\t"
3204 "movupd %%xmm11, (%1)" "\n"
3206 : /*IN*/ "r"(src
), "r"(dst
)
3207 : /*TRASH*/ "xmm11","xmm2"
3212 void do_ROUNDPS_011 ( Bool mem
, V128
* src
, /*OUT*/V128
* dst
)
3215 __asm__
__volatile__(
3216 "movupd (%1), %%xmm11" "\n\t"
3217 "roundps $3, (%0), %%xmm11" "\n\t"
3218 "movupd %%xmm11, (%1)" "\n"
3220 : /*IN*/ "r"(src
), "r"(dst
)
3224 __asm__
__volatile__(
3225 "movupd (%1), %%xmm11" "\n\t"
3226 "movupd (%0), %%xmm2" "\n\t"
3227 "roundps $3, %%xmm2, %%xmm11" "\n\t"
3228 "movupd %%xmm11, (%1)" "\n"
3230 : /*IN*/ "r"(src
), "r"(dst
)
3231 : /*TRASH*/ "xmm11","xmm2"
3236 void do_ROUNDPS_1XX ( Bool mem
, V128
* src
, /*OUT*/V128
* dst
)
3239 __asm__
__volatile__(
3240 "movupd (%1), %%xmm11" "\n\t"
3241 "roundps $4, (%0), %%xmm11" "\n\t"
3242 "movupd %%xmm11, (%1)" "\n"
3244 : /*IN*/ "r"(src
), "r"(dst
)
3248 __asm__
__volatile__(
3249 "movupd (%1), %%xmm11" "\n\t"
3250 "movupd (%0), %%xmm2" "\n\t"
3251 "roundps $4, %%xmm2, %%xmm11" "\n\t"
3252 "movupd %%xmm11, (%1)" "\n"
3254 : /*IN*/ "r"(src
), "r"(dst
)
3255 : /*TRASH*/ "xmm11","xmm2"
3260 void test_ROUNDPS_w_immediate_rounding ( void )
3266 vals
[i
++] = mkPosInf();
3267 vals
[i
++] = mkNegInf();
3268 vals
[i
++] = mkPosNan();
3269 vals
[i
++] = mkNegNan();
3274 vals
[i
++] = -0.50001;
3275 vals
[i
++] = -0.49999;
3280 vals
[i
++] = 0.49999;
3281 vals
[i
++] = 0.50001;
3288 for (i
= 0; i
< sizeof(vals
)/sizeof(vals
[0]); i
++) {
3293 memcpy(&src
[0], &vals
[i
], 4);
3294 memcpy(&src
[4], &vals
[(i
+5)%22], 4);
3295 memcpy(&src
[8], &vals
[(i
+11)%22], 4);
3296 memcpy(&src
[12], &vals
[(i
+17)%22], 4);
3297 do_ROUNDPS_000(False
/*reg*/, &src
, &dst
);
3298 printf("r roundps_000 ");
3302 printf(" %9f:%9f", vals
[i
], (double)*(float*)(&dst
[0]));
3303 printf(" %9f:%9f", vals
[(i
+5)%22], (double)*(float*)(&dst
[4]));
3304 printf(" %9f:%9f", vals
[(i
+11)%22], (double)*(float*)(&dst
[8]));
3305 printf(" %9f:%9f", vals
[(i
+17)%22], (double)*(float*)(&dst
[12]));
3310 memcpy(&src
[0], &vals
[i
], 4);
3311 memcpy(&src
[4], &vals
[(i
+5)%22], 4);
3312 memcpy(&src
[8], &vals
[(i
+11)%22], 4);
3313 memcpy(&src
[12], &vals
[(i
+17)%22], 4);
3314 do_ROUNDPS_000(True
/*mem*/, &src
, &dst
);
3315 printf("m roundps_000 ");
3319 printf(" %9f:%9f", vals
[i
], (double)*(float*)(&dst
[0]));
3320 printf(" %9f:%9f", vals
[(i
+5)%22], (double)*(float*)(&dst
[4]));
3321 printf(" %9f:%9f", vals
[(i
+11)%22], (double)*(float*)(&dst
[8]));
3322 printf(" %9f:%9f", vals
[(i
+17)%22], (double)*(float*)(&dst
[12]));
3328 memcpy(&src
[0], &vals
[i
], 4);
3329 memcpy(&src
[4], &vals
[(i
+5)%22], 4);
3330 memcpy(&src
[8], &vals
[(i
+11)%22], 4);
3331 memcpy(&src
[12], &vals
[(i
+17)%22], 4);
3332 do_ROUNDPS_001(False
/*reg*/, &src
, &dst
);
3333 printf("r roundps_001 ");
3337 printf(" %9f:%9f", vals
[i
], (double)*(float*)(&dst
[0]));
3338 printf(" %9f:%9f", vals
[(i
+5)%22], (double)*(float*)(&dst
[4]));
3339 printf(" %9f:%9f", vals
[(i
+11)%22], (double)*(float*)(&dst
[8]));
3340 printf(" %9f:%9f", vals
[(i
+17)%22], (double)*(float*)(&dst
[12]));
3345 memcpy(&src
[0], &vals
[i
], 4);
3346 memcpy(&src
[4], &vals
[(i
+5)%22], 4);
3347 memcpy(&src
[8], &vals
[(i
+11)%22], 4);
3348 memcpy(&src
[12], &vals
[(i
+17)%22], 4);
3349 do_ROUNDPS_001(True
/*mem*/, &src
, &dst
);
3350 printf("m roundps_001 ");
3354 printf(" %9f:%9f", vals
[i
], (double)*(float*)(&dst
[0]));
3355 printf(" %9f:%9f", vals
[(i
+5)%22], (double)*(float*)(&dst
[4]));
3356 printf(" %9f:%9f", vals
[(i
+11)%22], (double)*(float*)(&dst
[8]));
3357 printf(" %9f:%9f", vals
[(i
+17)%22], (double)*(float*)(&dst
[12]));
3363 memcpy(&src
[0], &vals
[i
], 4);
3364 memcpy(&src
[4], &vals
[(i
+5)%22], 4);
3365 memcpy(&src
[8], &vals
[(i
+11)%22], 4);
3366 memcpy(&src
[12], &vals
[(i
+17)%22], 4);
3367 do_ROUNDPS_010(False
/*reg*/, &src
, &dst
);
3368 printf("r roundps_010 ");
3372 printf(" %9f:%9f", vals
[i
], (double)*(float*)(&dst
[0]));
3373 printf(" %9f:%9f", vals
[(i
+5)%22], (double)*(float*)(&dst
[4]));
3374 printf(" %9f:%9f", vals
[(i
+11)%22], (double)*(float*)(&dst
[8]));
3375 printf(" %9f:%9f", vals
[(i
+17)%22], (double)*(float*)(&dst
[12]));
3380 memcpy(&src
[0], &vals
[i
], 4);
3381 memcpy(&src
[4], &vals
[(i
+5)%22], 4);
3382 memcpy(&src
[8], &vals
[(i
+11)%22], 4);
3383 memcpy(&src
[12], &vals
[(i
+17)%22], 4);
3384 do_ROUNDPS_010(True
/*mem*/, &src
, &dst
);
3385 printf("m roundps_010 ");
3389 printf(" %9f:%9f", vals
[i
], (double)*(float*)(&dst
[0]));
3390 printf(" %9f:%9f", vals
[(i
+5)%22], (double)*(float*)(&dst
[4]));
3391 printf(" %9f:%9f", vals
[(i
+11)%22], (double)*(float*)(&dst
[8]));
3392 printf(" %9f:%9f", vals
[(i
+17)%22], (double)*(float*)(&dst
[12]));
3398 memcpy(&src
[0], &vals
[i
], 4);
3399 memcpy(&src
[4], &vals
[(i
+5)%22], 4);
3400 memcpy(&src
[8], &vals
[(i
+11)%22], 4);
3401 memcpy(&src
[12], &vals
[(i
+17)%22], 4);
3402 do_ROUNDPS_011(False
/*reg*/, &src
, &dst
);
3403 printf("r roundps_011 ");
3407 printf(" %9f:%9f", vals
[i
], (double)*(float*)(&dst
[0]));
3408 printf(" %9f:%9f", vals
[(i
+5)%22], (double)*(float*)(&dst
[4]));
3409 printf(" %9f:%9f", vals
[(i
+11)%22], (double)*(float*)(&dst
[8]));
3410 printf(" %9f:%9f", vals
[(i
+17)%22], (double)*(float*)(&dst
[12]));
3415 memcpy(&src
[0], &vals
[i
], 4);
3416 memcpy(&src
[4], &vals
[(i
+5)%22], 4);
3417 memcpy(&src
[8], &vals
[(i
+11)%22], 4);
3418 memcpy(&src
[12], &vals
[(i
+17)%22], 4);
3419 do_ROUNDPS_011(True
/*mem*/, &src
, &dst
);
3420 printf("m roundps_011 ");
3424 printf(" %9f:%9f", vals
[i
], (double)*(float*)(&dst
[0]));
3425 printf(" %9f:%9f", vals
[(i
+5)%22], (double)*(float*)(&dst
[4]));
3426 printf(" %9f:%9f", vals
[(i
+11)%22], (double)*(float*)(&dst
[8]));
3427 printf(" %9f:%9f", vals
[(i
+17)%22], (double)*(float*)(&dst
[12]));
3432 void test_ROUNDPS_w_mxcsr_rounding ( void )
3439 vals
[i
++] = mkPosInf();
3440 vals
[i
++] = mkNegInf();
3441 vals
[i
++] = mkPosNan();
3442 vals
[i
++] = mkNegNan();
3447 vals
[i
++] = -0.50001;
3448 vals
[i
++] = -0.49999;
3453 vals
[i
++] = 0.49999;
3454 vals
[i
++] = 0.50001;
3461 rm
= get_sse_roundingmode();
3462 assert(rm
== 0); // 0 == RN == default
3464 for (i
= 0; i
< sizeof(vals
)/sizeof(vals
[0]); i
++) {
3467 for (rm
= 0; rm
<= 3; rm
++) {
3468 set_sse_roundingmode(rm
);
3472 memcpy(&src
[0], &vals
[i
], 4);
3473 memcpy(&src
[4], &vals
[(i
+5)%22], 4);
3474 memcpy(&src
[8], &vals
[(i
+11)%22], 4);
3475 memcpy(&src
[12], &vals
[(i
+17)%22], 4);
3476 do_ROUNDPS_1XX(False
/*reg*/, &src
, &dst
);
3477 printf("r (rm=%u) roundps_1XX ", rm
);
3481 printf(" %9f:%9f", vals
[i
], (double)*(float*)(&dst
[0]));
3482 printf(" %9f:%9f", vals
[(i
+5)%22], (double)*(float*)(&dst
[4]));
3483 printf(" %9f:%9f", vals
[(i
+11)%22], (double)*(float*)(&dst
[8]));
3484 printf(" %9f:%9f", vals
[(i
+17)%22], (double)*(float*)(&dst
[12]));
3489 memcpy(&src
[0], &vals
[i
], 4);
3490 memcpy(&src
[4], &vals
[(i
+5)%22], 4);
3491 memcpy(&src
[8], &vals
[(i
+11)%22], 4);
3492 memcpy(&src
[12], &vals
[(i
+17)%22], 4);
3493 do_ROUNDPS_1XX(True
/*mem*/, &src
, &dst
);
3494 printf("m (rm=%u) roundps_1XX ", rm
);
3498 printf(" %9f:%9f", vals
[i
], (double)*(float*)(&dst
[0]));
3499 printf(" %9f:%9f", vals
[(i
+5)%22], (double)*(float*)(&dst
[4]));
3500 printf(" %9f:%9f", vals
[(i
+11)%22], (double)*(float*)(&dst
[8]));
3501 printf(" %9f:%9f", vals
[(i
+17)%22], (double)*(float*)(&dst
[12]));
3506 rm
= get_sse_roundingmode();
3508 set_sse_roundingmode(0);
3509 rm
= get_sse_roundingmode();
3510 assert(rm
== 0); // 0 == RN == default
3513 /* ------------ PTEST ------------ */
3515 void test_PTEST ( void )
3517 const Int ntests
= 8;
3519 do64HLtoV128( &spec
[0], 0x0000000000000000ULL
, 0x0000000000000000ULL
);
3520 do64HLtoV128( &spec
[1], 0x0000000000000000ULL
, 0x0000000000000001ULL
);
3521 do64HLtoV128( &spec
[2], 0x0000000000000001ULL
, 0x0000000000000000ULL
);
3522 do64HLtoV128( &spec
[3], 0x0000000000000001ULL
, 0x0000000000000001ULL
);
3523 do64HLtoV128( &spec
[4], 0xffffffffffffffffULL
, 0xffffffffffffffffULL
);
3524 do64HLtoV128( &spec
[5], 0xffffffffffffffffULL
, 0xfffffffffffffffeULL
);
3525 do64HLtoV128( &spec
[6], 0xfffffffffffffffeULL
, 0xffffffffffffffffULL
);
3526 do64HLtoV128( &spec
[7], 0xfffffffffffffffeULL
, 0xfffffffffffffffeULL
);
3530 for (i
= 0; i
< ntests
; i
++) {
3531 for (j
= 0; j
< ntests
; j
++) {
3532 memcpy(&block
[0], &spec
[i
], 16);
3533 memcpy(&block
[1], &spec
[j
], 16);
3534 __asm__
__volatile__(
3535 "subq $256, %%rsp" "\n\t"
3536 "movupd 0(%1), %%xmm2" "\n\t"
3537 "ptest 16(%1), %%xmm2" "\n\t"
3540 "addq $256, %%rsp" "\n\t"
3541 : /*out*/"=r"(flags
) : /*in*/ "r"(&block
[0]) :
3542 "xmm2", "memory", "cc"
3545 showV128(&block
[0]);
3547 showV128(&block
[1]);
3548 printf(" -> eflags %04x\n", (UInt
)flags
& 0x8D5);
3553 /* ------------ PBLENDVB ------------ */
3555 void do_PBLENDVB ( Bool mem
, V128
* xmm0
, V128
* src
, /*MOD*/V128
* dst
)
3558 __asm__
__volatile__(
3559 "movupd (%2), %%xmm0" "\n\t"
3560 "movupd (%1), %%xmm11" "\n\t"
3561 "pblendvb (%0), %%xmm11" "\n\t"
3562 "movupd %%xmm11, (%1)" "\n"
3564 : /*IN*/ "r"(src
), "r"(dst
), "r"(xmm0
)
3565 : /*TRASH*/ "xmm11","xmm0"
3568 __asm__
__volatile__(
3569 "movupd (%2), %%xmm0" "\n\t"
3570 "movupd (%1), %%xmm11" "\n\t"
3571 "movupd (%0), %%xmm2" "\n\t"
3572 "pblendvb %%xmm2, %%xmm11" "\n\t"
3573 "movupd %%xmm11, (%1)" "\n"
3575 : /*IN*/ "r"(src
), "r"(dst
), "r"(xmm0
)
3576 : /*TRASH*/ "xmm11","xmm2","xmm0"
3581 void test_PBLENDVB ( void )
3583 V128 xmm0
, src
, dst
, t_xmm0
, t_src
, t_dst
;
3585 for (i
= 0; i
< 10; i
++) {
3590 memcpy(&xmm0
, &t_xmm0
, 16);
3591 memcpy(&src
, &t_src
, 16);
3592 memcpy(&dst
, &t_dst
, 16);
3593 do_PBLENDVB(False
/*reg*/, &xmm0
, &src
, &dst
);
3594 printf("r pblendvb ");
3604 memcpy(&xmm0
, &t_xmm0
, 16);
3605 memcpy(&src
, &t_src
, 16);
3606 memcpy(&dst
, &t_dst
, 16);
3607 do_PBLENDVB(True
/*mem*/, &xmm0
, &src
, &dst
);
3608 printf("m pblendvb ");
3620 /* ------------ BLENDVPD ------------ */
3622 void do_BLENDVPD ( Bool mem
, V128
* xmm0
, V128
* src
, /*MOD*/V128
* dst
)
3625 __asm__
__volatile__(
3626 "movupd (%2), %%xmm0" "\n\t"
3627 "movupd (%1), %%xmm11" "\n\t"
3628 "blendvpd (%0), %%xmm11" "\n\t"
3629 "movupd %%xmm11, (%1)" "\n"
3631 : /*IN*/ "r"(src
), "r"(dst
), "r"(xmm0
)
3632 : /*TRASH*/ "xmm11","xmm0"
3635 __asm__
__volatile__(
3636 "movupd (%2), %%xmm0" "\n\t"
3637 "movupd (%1), %%xmm11" "\n\t"
3638 "movupd (%0), %%xmm2" "\n\t"
3639 "blendvpd %%xmm2, %%xmm11" "\n\t"
3640 "movupd %%xmm11, (%1)" "\n"
3642 : /*IN*/ "r"(src
), "r"(dst
), "r"(xmm0
)
3643 : /*TRASH*/ "xmm11","xmm2","xmm0"
3648 void test_BLENDVPD ( void )
3650 V128 xmm0
, src
, dst
, t_xmm0
, t_src
, t_dst
;
3652 for (i
= 0; i
< 10; i
++) {
3657 memcpy(&xmm0
, &t_xmm0
, 16);
3658 memcpy(&src
, &t_src
, 16);
3659 memcpy(&dst
, &t_dst
, 16);
3660 do_BLENDVPD(False
/*reg*/, &xmm0
, &src
, &dst
);
3661 printf("r blendvpd ");
3671 memcpy(&xmm0
, &t_xmm0
, 16);
3672 memcpy(&src
, &t_src
, 16);
3673 memcpy(&dst
, &t_dst
, 16);
3674 do_BLENDVPD(True
/*mem*/, &xmm0
, &src
, &dst
);
3675 printf("m blendvpd ");
3687 /* ------------ BLENDVPS ------------ */
3689 void do_BLENDVPS ( Bool mem
, V128
* xmm0
, V128
* src
, /*MOD*/V128
* dst
)
3692 __asm__
__volatile__(
3693 "movupd (%2), %%xmm0" "\n\t"
3694 "movupd (%1), %%xmm11" "\n\t"
3695 "blendvps (%0), %%xmm11" "\n\t"
3696 "movupd %%xmm11, (%1)" "\n"
3698 : /*IN*/ "r"(src
), "r"(dst
), "r"(xmm0
)
3699 : /*TRASH*/ "xmm11","xmm0"
3702 __asm__
__volatile__(
3703 "movupd (%2), %%xmm0" "\n\t"
3704 "movupd (%1), %%xmm11" "\n\t"
3705 "movupd (%0), %%xmm2" "\n\t"
3706 "blendvps %%xmm2, %%xmm11" "\n\t"
3707 "movupd %%xmm11, (%1)" "\n"
3709 : /*IN*/ "r"(src
), "r"(dst
), "r"(xmm0
)
3710 : /*TRASH*/ "xmm11","xmm2","xmm0"
3715 void test_BLENDVPS ( void )
3717 V128 xmm0
, src
, dst
, t_xmm0
, t_src
, t_dst
;
3719 for (i
= 0; i
< 10; i
++) {
3724 memcpy(&xmm0
, &t_xmm0
, 16);
3725 memcpy(&src
, &t_src
, 16);
3726 memcpy(&dst
, &t_dst
, 16);
3727 do_BLENDVPS(False
/*reg*/, &xmm0
, &src
, &dst
);
3728 printf("r blendvps ");
3738 memcpy(&xmm0
, &t_xmm0
, 16);
3739 memcpy(&src
, &t_src
, 16);
3740 memcpy(&dst
, &t_dst
, 16);
3741 do_BLENDVPS(True
/*mem*/, &xmm0
, &src
, &dst
);
3742 printf("m blendvps ");
3754 void test_MOVNTDQA ( void )
3758 for (i
= 0; i
< 10; i
++) {
3760 /* make sure the load actually happens */
3762 DO_m_r("movntdqa", src
, dst
);
3766 /* ------------ main ------------ */
3768 int main ( int argc
, char** argv
)
3771 // ------ SSE 4.1 ------
3772 test_BLENDPD(); // done Apr.01.2010
3773 test_BLENDPS(); // done Apr.02.2010
3778 test_DPPD(); // done Apr.08.2010
3779 test_DPPS(); // done Apr.09.2010
3781 test_INSERTPS(); // done Apr.01.2010
3783 test_PEXTRB(); // done Apr.15.2010
3784 test_PEXTRD(); // done Apr.14.2010
3785 test_PEXTRQ(); // done Apr.14.2010
3786 test_PEXTRW(); // done Apr.14.2010
3787 test_PINSRQ(); // done Apr.16.2010
3788 test_PINSRD(); // todo
3789 test_PINSRW(); /* Umm, this is SSE2, not SSE4. Right? */
3790 test_PINSRB(); // todo
3792 test_PMAXSD(); // done Apr.09.2010
3793 test_PMAXUD(); // done Apr.16.2010
3796 test_PMINSD(); // done Apr.09.2010
3799 test_PMOVSXBW(); // done Apr.02.2010
3800 test_PMOVSXBD(); // done Mar.30.2010
3801 test_PMOVSXBQ(); // done Mar.30.2010
3802 test_PMOVSXWD(); // done Mar.31.2010
3803 test_PMOVSXWQ(); // done Mar.31.2010
3804 test_PMOVSXDQ(); // done Mar.31.2010
3805 test_PMOVZXBW(); // done Mar.28.2010
3806 test_PMOVZXBD(); // done Mar.29.2010
3807 test_PMOVZXBQ(); // done Mar.29.2010
3808 test_PMOVZXWD(); // done Mar.28.2010
3809 test_PMOVZXWQ(); // done Mar.29.2010
3810 test_PMOVZXDQ(); // done Mar.29.2010
3817 test_ROUNDSD_w_immediate_rounding();
3818 test_ROUNDSS_w_immediate_rounding();
3819 test_ROUNDPD_w_immediate_rounding();
3820 test_ROUNDPS_w_immediate_rounding();
3821 test_ROUNDSD_w_mxcsr_rounding();
3822 test_ROUNDSS_w_mxcsr_rounding();
3823 test_ROUNDPD_w_mxcsr_rounding();
3824 test_ROUNDPS_w_mxcsr_rounding();
3825 // ------ SSE 4.2 ------
3831 test_MOVNTDQA(); /* not sure whether this is 4.1 or 4.2 */