1 /* Copyright (C) 2011 IBM
3 Author: Maynard Johnson <maynardj@us.ibm.com>
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, see <http://www.gnu.org/licenses/>.
18 The GNU General Public License is contained in the file COPYING.
27 #include <unistd.h> // getopt
34 typedef uint32_t HWord_t
;
36 typedef uint64_t HWord_t
;
37 #endif /* __powerpc64__ */
39 typedef unsigned char Bool
;
43 #ifdef VGP_ppc64le_linux
49 register HWord_t r14
__asm__ ("r14");
50 register HWord_t r15
__asm__ ("r15");
51 register HWord_t r16
__asm__ ("r16");
52 register HWord_t r17
__asm__ ("r17");
53 register double f14
__asm__ ("fr14");
54 register double f15
__asm__ ("fr15");
55 register double f16
__asm__ ("fr16");
56 register double f17
__asm__ ("fr17");
58 static volatile unsigned int div_flags
, div_xer
;
60 #define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"
62 #define SET_CR(_arg) \
63 __asm__ __volatile__ ("mtcr %0" : : "b"(_arg) : ALLCR );
65 #define SET_XER(_arg) \
66 __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );
68 #define GET_CR(_lval) \
69 __asm__ __volatile__ ("mfcr %0" : "=b"(_lval) )
71 #define GET_XER(_lval) \
72 __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )
74 #define GET_CR_XER(_lval_cr,_lval_xer) \
75 do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)
80 #define SET_XER_ZERO \
83 #define SET_CR_XER_ZERO \
84 do { SET_CR_ZERO; SET_XER_ZERO; } while (0)
86 #define SET_FPSCR_ZERO \
87 do { double _d = 0.0; \
88 __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
92 typedef void (*test_func_t
)(void);
93 typedef struct test_table test_table_t
;
95 /* Defines for the instructiion groups, use bit field to identify */
96 #define SCALAR_DIV_INST 0x0001
97 #define OTHER_INST 0x0002
99 /* These functions below that construct a table of floating point
100 * values were lifted from none/tests/ppc32/jm-insns.c.
103 #if defined (DEBUG_ARGS_BUILD)
104 #define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
106 #define AB_DPRINTF(fmt, args...) do { } while (0)
109 static inline void register_farg (void *farg
,
110 int s
, uint16_t _exp
, uint64_t mant
)
114 tmp
= ((uint64_t)s
<< 63) | ((uint64_t)_exp
<< 52) | mant
;
115 *(uint64_t *)farg
= tmp
;
116 AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
117 s
, _exp
, mant
, *(uint64_t *)farg
, *(double *)farg
);
120 static inline void register_sp_farg (void *farg
,
121 int s
, uint16_t _exp
, uint32_t mant
)
124 tmp
= ((uint32_t)s
<< 31) | ((uint32_t)_exp
<< 23) | mant
;
125 *(uint32_t *)farg
= tmp
;
128 typedef struct fp_test_args
{
134 fp_test_args_t fp_cmp_tests
[] = {
202 fp_test_args_t two_arg_fp_tests
[] = {
270 static int nb_special_fargs
;
271 static double * spec_fargs
;
272 static float * spec_sp_fargs
;
274 static void build_special_fargs_table(void)
277 Entry Sign Exp fraction Special value
278 0 0 3fd 0x8000000000000ULL Positive finite number
279 1 0 404 0xf000000000000ULL ...
280 2 0 001 0x8000000b77501ULL ...
281 3 0 7fe 0x800000000051bULL ...
282 4 0 012 0x3214569900000ULL ...
283 5 0 000 0x0000000000000ULL +0.0 (+zero)
284 6 1 000 0x0000000000000ULL -0.0 (-zero)
285 7 0 7ff 0x0000000000000ULL +infinity
286 8 1 7ff 0x0000000000000ULL -infinity
287 9 0 7ff 0x7FFFFFFFFFFFFULL +SNaN
288 10 1 7ff 0x7FFFFFFFFFFFFULL -SNaN
289 11 0 7ff 0x8000000000000ULL +QNaN
290 12 1 7ff 0x8000000000000ULL -QNaN
291 13 1 000 0x8340000078000ULL Denormalized val (zero exp and non-zero fraction)
292 14 1 40d 0x0650f5a07b353ULL Negative finite number
293 15 0 412 0x32585a9900000ULL A couple more positive finite numbers
294 16 0 413 0x82511a2000000ULL ...
306 spec_fargs
= malloc( 17 * sizeof(double) );
307 spec_sp_fargs
= malloc( 17 * sizeof(float) );
312 mant
= 0x8000000000000ULL
;
313 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
318 mant
= 0xf000000000000ULL
;
319 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
321 /* None of the ftdiv tests succeed.
322 * FRA = value #0; FRB = value #1
324 * fl_flag || fg_flag || fe_flag = 100
327 /*************************************************
330 *************************************************/
332 /* fe_flag <- 1 if FRA is a NaN
333 * FRA = value #9; FRB = value #1
334 * e_a = 1024; e_b = 5
335 * fl_flag || fg_flag || fe_flag = 101
338 /* fe_flag <- 1 if FRB is a NaN
339 * FRA = value #1; FRB = value #12
340 * e_a = 5; e_b = 1024
341 * fl_flag || fg_flag || fe_flag = 101
344 /* fe_flag <- 1 if e_b <= -1022
345 * FRA = value #0; FRB = value #2
346 * e_a = -2; e_b = -1022
347 * fl_flag || fg_flag || fe_flag = 101
353 mant
= 0x8000000b77501ULL
;
354 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
356 /* fe_flag <- 1 if e_b >= 1021
357 * FRA = value #1; FRB = value #3
358 * e_a = 5; e_b = 1023
359 * fl_flag || fg_flag || fe_flag = 101
364 mant
= 0x800000000051bULL
;
365 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
367 /* fe_flag <- 1 if FRA != 0 && e_a - e_b >= 1023
368 * Let FRA = value #3 and FRB be value #0.
369 * e_a = 1023; e_b = -2
370 * fl_flag || fg_flag || fe_flag = 101
373 /* fe_flag <- 1 if FRA != 0 && e_a - e_b <= -1023
374 * Let FRA = value #0 above and FRB be value #3 above
375 * e_a = -2; e_b = 1023
376 * fl_flag || fg_flag || fe_flag = 101
379 /* fe_flag <- 1 if FRA != 0 && e_a <= -970
380 * Let FRA = value #4 and FRB be value #0
381 * e_a = -1005; e_b = -2
382 * fl_flag || fg_flag || fe_flag = 101
387 mant
= 0x3214569900000ULL
;
388 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
390 /*************************************************
393 *************************************************/
394 /* fg_flag <- 1 if FRA is an Infinity
395 * NOTE: FRA = Inf also sets fe_flag
396 * Do two tests, using values #7 and #8 (+/- Inf) for FRA.
398 * Let FRA be value #7 and FRB be value #1
399 * e_a = 1024; e_b = 5
400 * fl_flag || fg_flag || fe_flag = 111
403 * Let FRA be value #8 and FRB be value #1
404 * e_a = 1024; e_b = 5
405 * fl_flag || fg_flag || fe_flag = 111
409 /* fg_flag <- 1 if FRB is an Infinity
410 * NOTE: FRB = Inf also sets fe_flag
411 * Let FRA be value #1 and FRB be value #7
412 * e_a = 5; e_b = 1024
413 * fl_flag || fg_flag || fe_flag = 111
416 /* fg_flag <- 1 if FRB is denormalized
417 * NOTE: e_b < -1022 ==> fe_flag <- 1
418 * Let FRA be value #0 and FRB be value #13
419 * e_a = -2; e_b = -1023
420 * fl_flag || fg_flag || fe_flag = 111
423 /* fg_flag <- 1 if FRB is +zero
424 * NOTE: FRA = Inf also sets fe_flag
425 * Let FRA = val #5; FRB = val #5
426 * ea_ = -1023; e_b = -1023
427 * fl_flag || fg_flag || fe_flag = 111
430 /* fg_flag <- 1 if FRB is -zero
431 * NOTE: FRA = Inf also sets fe_flag
432 * Let FRA = val #5; FRB = val #6
433 * ea_ = -1023; e_b = -1023
434 * fl_flag || fg_flag || fe_flag = 111
438 /* +0.0 : 0 0x000 0x0000000000000 */
442 mant
= 0x0000000000000ULL
;
443 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
445 /* -0.0 : 1 0x000 0x0000000000000 */
449 mant
= 0x0000000000000ULL
;
450 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
452 /* +infinity : 0 0x7FF 0x0000000000000 */
456 mant
= 0x0000000000000ULL
;
457 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
459 /* -infinity : 1 0x7FF 0x0000000000000 */
463 mant
= 0x0000000000000ULL
;
464 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
467 * This comment applies to values #9 and #10 below:
468 * When src is a SNaN, it's converted to a QNaN first before rounding to single-precision,
469 * so we can't just copy the double-precision value to the corresponding slot in the
470 * single-precision array (i.e., in the loop at the end of this function). Instead, we
471 * have to manually set the bits using register_sp_farg().
474 /* +SNaN : 0 0x7FF 0x7FFFFFFFFFFFF */
478 mant
= 0x7FFFFFFFFFFFFULL
;
479 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
482 register_sp_farg(&spec_sp_fargs
[i
-1], s
, _exp
, mant_sp
);
484 /* -SNaN : 1 0x7FF 0x7FFFFFFFFFFFF */
488 mant
= 0x7FFFFFFFFFFFFULL
;
489 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
492 register_sp_farg(&spec_sp_fargs
[i
-1], s
, _exp
, mant_sp
);
494 /* +QNaN : 0 0x7FF 0x8000000000000 */
498 mant
= 0x8000000000000ULL
;
499 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
501 /* -QNaN : 1 0x7FF 0x8000000000000 */
505 mant
= 0x8000000000000ULL
;
506 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
508 /* denormalized value */
512 mant
= 0x8340000078000ULL
;
513 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
515 /* Negative finite number */
519 mant
= 0x0650f5a07b353ULL
;
520 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
522 /* A couple positive finite numbers ... */
526 mant
= 0x32585a9900000ULL
;
527 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
532 mant
= 0x82511a2000000ULL
;
533 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
535 nb_special_fargs
= i
;
536 for (j
= 0; j
< i
; j
++) {
537 if (!(j
== 9 || j
== 10))
538 spec_sp_fargs
[j
] = spec_fargs
[j
];
545 test_func_t test_category
;
547 unsigned int test_group
;
556 VX_SCALAR_FP_NMSUB
= 0,
557 // ALL VECTOR-TYPE OPS SHOULD BE ADDED AFTER THIS LINE
558 VX_VECTOR_FP_MULT_AND_OP2
= 10,
559 // and before this line
565 typedef struct vx_fp_test
567 test_func_t test_func
;
569 fp_test_args_t
* targs
;
571 precision_type_t precision
;
572 vx_fp_test_type type
;
576 static vector
unsigned int vec_out
, vec_inA
, vec_inB
, vec_inC
;
579 static void test_xvcmpeqdp(void)
582 __asm__
__volatile__ ("xvcmpeqdp. %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
584 __asm__
__volatile__ ("xvcmpeqdp %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
587 static void test_xvcmpgedp(void)
590 __asm__
__volatile__ ("xvcmpgedp. %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
592 __asm__
__volatile__ ("xvcmpgedp %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
595 static void test_xvcmpgtdp(void)
598 __asm__
__volatile__ ("xvcmpgtdp. %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
600 __asm__
__volatile__ ("xvcmpgtdp %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
603 static void test_xvcmpeqsp(void)
606 __asm__
__volatile__ ("xvcmpeqsp. %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
608 __asm__
__volatile__ ("xvcmpeqsp %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
611 static void test_xvcmpgesp(void)
614 __asm__
__volatile__ ("xvcmpgesp. %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
616 __asm__
__volatile__ ("xvcmpgesp %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
619 static void test_xvcmpgtsp(void)
622 __asm__
__volatile__ ("xvcmpgtsp. %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
624 __asm__
__volatile__ ("xvcmpgtsp %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
629 static void test_xsnmsub(void)
632 __asm__
__volatile__ ("xsnmsubadp %x0, %x1, %x2" : "+wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
634 __asm__
__volatile__ ("xsnmsubmdp %x0, %x1, %x2" : "+wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
637 static void test_xvmadd(void)
641 __asm__
__volatile__ ("xvmaddadp %x0, %x1, %x2" : "+wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
643 __asm__
__volatile__ ("xvmaddasp %x0, %x1, %x2" : "+wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
646 __asm__
__volatile__ ("xvmaddmdp %x0, %x1, %x2" : "+wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
648 __asm__
__volatile__ ("xvmaddmsp %x0, %x1, %x2" : "+wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
651 static void test_xvnmadd(void)
655 __asm__
__volatile__ ("xvnmaddadp %x0, %x1, %x2" : "+wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
657 __asm__
__volatile__ ("xvnmaddasp %x0, %x1, %x2" : "+wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
660 __asm__
__volatile__ ("xvnmaddmdp %x0, %x1, %x2" : "+wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
662 __asm__
__volatile__ ("xvnmaddmsp %x0, %x1, %x2" : "+wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
665 static void test_xvnmsub(void)
669 __asm__
__volatile__ ("xvnmsubadp %x0, %x1, %x2" : "+wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
671 __asm__
__volatile__ ("xvnmsubasp %x0, %x1, %x2" : "+wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
674 __asm__
__volatile__ ("xvnmsubmdp %x0, %x1, %x2" : "+wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
676 __asm__
__volatile__ ("xvnmsubmsp %x0, %x1, %x2" : "+wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
679 static void test_xvmsub(void)
683 __asm__
__volatile__ ("xvmsubadp %x0, %x1, %x2" : "+wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
685 __asm__
__volatile__ ("xvmsubasp %x0, %x1, %x2" : "+wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
688 __asm__
__volatile__ ("xvmsubmdp %x0, %x1, %x2" : "+wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
690 __asm__
__volatile__ ("xvmsubmsp %x0, %x1, %x2" : "+wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
693 static void test_xssqrtdp(void)
695 __asm__
__volatile__ ("xssqrtdp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
698 static void test_xsrdpim(void)
700 __asm__
__volatile__ ("xsrdpim %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
703 static void test_xsrdpip(void)
705 __asm__
__volatile__ ("xsrdpip %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
708 static void test_xstdivdp(void)
710 __asm__
__volatile__ ("xstdivdp 6, %x0, %x1" : : "wa" (vec_inA
), "wa" (vec_inB
));
713 static void test_xsmaxdp(void)
715 __asm__
__volatile__ ("xsmaxdp %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
718 static void test_xsmindp(void)
720 __asm__
__volatile__ ("xsmindp %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
723 static void test_xvadddp(void)
725 __asm__
__volatile__ ("xvadddp %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
728 static void test_xvaddsp(void)
730 __asm__
__volatile__ ("xvaddsp %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
733 static void test_xvdivdp(void)
735 __asm__
__volatile__ ("xvdivdp %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
738 static void test_xvdivsp(void)
740 __asm__
__volatile__ ("xvdivsp %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
743 static void test_xvmuldp(void)
745 __asm__
__volatile__ ("xvmuldp %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
748 static void test_xvmulsp(void)
750 __asm__
__volatile__ ("xvmulsp %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
753 static void test_xvsubdp(void)
755 __asm__
__volatile__ ("xvsubdp %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
758 static void test_xvmaxdp(void)
760 __asm__
__volatile__ ("xvmaxdp %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
763 static void test_xvmindp(void)
765 __asm__
__volatile__ ("xvmindp %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
768 static void test_xvmaxsp(void)
770 __asm__
__volatile__ ("xvmaxsp %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
773 static void test_xvminsp(void)
775 __asm__
__volatile__ ("xvminsp %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
778 static void test_xvsubsp(void)
780 __asm__
__volatile__ ("xvsubsp %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
783 static void test_xvresp(void)
785 __asm__
__volatile__ ("xvresp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
788 static void test_xxsel(void)
790 unsigned long long * dst
;
791 unsigned long long xa
[] = { 0xa12bc37de56f9708ULL
, 0x3894c1fddeadbeefULL
};
792 unsigned long long xb
[] = { 0xfedc432124681235ULL
, 0xf1e2d3c4e0057708ULL
};
793 unsigned long long xc
[] = { 0xffffffff01020304ULL
, 0x128934bd00000000ULL
};
795 memcpy(&vec_inA
, xa
, 16);
796 memcpy(&vec_inB
, xb
, 16);
797 memcpy(&vec_inC
, xc
, 16);
800 __asm__
__volatile__ ("xxsel %x0, %x1, %x2, %x3" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
),"wa" (vec_inC
));
801 dst
= (unsigned long long *) &vec_out
;
802 printf("xxsel %016llx,%016llx,%016llx => %016llx\n", xa
[0], xb
[0], xc
[0], *dst
);
804 printf("xxsel %016llx,%016llx,%016llx => %016llx\n", xa
[1], xb
[1], xc
[1], *dst
);
808 static void test_xxspltw(void)
811 unsigned long long * dst
= NULL
;
812 unsigned int xb
[] = { 0xfedc4321, 0x24681235, 0xf1e2d3c4, 0xe0057708};
814 void * vecB_ptr
= &vec_inB
;
816 for (i
= 3; i
>=0; i
--) {
817 memcpy(vecB_ptr
, &xb
[i
], 4);
821 for (i
= 0; i
< 4; i
++) {
822 memcpy(vecB_ptr
, &xb
[i
], 4);
827 for (uim
= 0; uim
< 4; uim
++) {
830 __asm__
__volatile__ ("xxspltw %x0, %x1, 0" : "=wa" (vec_out
): "wa" (vec_inB
));
833 __asm__
__volatile__ ("xxspltw %x0, %x1, 1" : "=wa" (vec_out
): "wa" (vec_inB
));
836 __asm__
__volatile__ ("xxspltw %x0, %x1, 2" : "=wa" (vec_out
): "wa" (vec_inB
));
839 __asm__
__volatile__ ("xxspltw %x0, %x1, 3" : "=wa" (vec_out
): "wa" (vec_inB
));
842 dst
= (unsigned long long *) &vec_out
;
843 printf("xxspltw 0x%08x%08x%08x%08x %d=> 0x%016llx", xb
[0], xb
[1],
844 xb
[2], xb
[3], uim
, *dst
);
846 printf("%016llx\n", *dst
);
851 static void test_xscvdpsxws(void)
853 __asm__
__volatile__ ("xscvdpsxws %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
856 static void test_xscvdpuxds(void)
858 __asm__
__volatile__ ("xscvdpuxds %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
861 static void test_xvcpsgndp(void)
863 __asm__
__volatile__ ("xvcpsgndp %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
866 static void test_xvcpsgnsp(void)
868 __asm__
__volatile__ ("xvcpsgnsp %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
871 static void test_xvcvdpsxws(void)
873 __asm__
__volatile__ ("xvcvdpsxws %x0, %x1 " : "=wa" (vec_out
): "wa" (vec_inB
));
876 static void test_xvcvspsxws(void)
878 __asm__
__volatile__ ("xvcvspsxws %x0, %x1 " : "=wa" (vec_out
): "wa" (vec_inB
));
882 vx_vector_one_fp_arg_tests
[] = {
883 { &test_xvresp
, "xvresp", NULL
, 16, SINGLE_TEST
, VX_BASIC_CMP
, "1/x"},
884 { &test_xvcvdpsxws
, "xvcvdpsxws", NULL
, 16, DOUBLE_TEST
, VX_CONV_WORD
, "conv"},
885 { &test_xvcvspsxws
, "xvcvspsxws", NULL
, 16, SINGLE_TEST
, VX_CONV_WORD
, "conv"},
886 { NULL
, NULL
, NULL
, 0 , 0, 0, NULL
}
890 vx_vector_fp_tests
[] = {
891 { &test_xvcmpeqdp
, "xvcmpeqdp", fp_cmp_tests
, 64, DOUBLE_TEST
, VX_BASIC_CMP
, "eq"},
892 { &test_xvcmpgedp
, "xvcmpgedp", fp_cmp_tests
, 64, DOUBLE_TEST
, VX_BASIC_CMP
, "ge"},
893 { &test_xvcmpgtdp
, "xvcmpgtdp", fp_cmp_tests
, 64, DOUBLE_TEST
, VX_BASIC_CMP
, "gt"},
894 { &test_xvcmpeqsp
, "xvcmpeqsp", fp_cmp_tests
, 64, SINGLE_TEST
, VX_BASIC_CMP
, "eq"},
895 { &test_xvcmpgesp
, "xvcmpgesp", fp_cmp_tests
, 64, SINGLE_TEST
, VX_BASIC_CMP
, "ge"},
896 { &test_xvcmpgtsp
, "xvcmpgtsp", fp_cmp_tests
, 64, SINGLE_TEST
, VX_BASIC_CMP
, "gt"},
897 { &test_xvadddp
, "xvadddp", two_arg_fp_tests
, 64, DOUBLE_TEST
, VX_DEFAULT
, "+" },
898 { &test_xvaddsp
, "xvaddsp", two_arg_fp_tests
, 64, SINGLE_TEST
, VX_DEFAULT
, "+" },
899 { &test_xvdivdp
, "xvdivdp", two_arg_fp_tests
, 64, DOUBLE_TEST
, VX_DEFAULT
, "/" },
900 { &test_xvdivsp
, "xvdivsp", two_arg_fp_tests
, 64, SINGLE_TEST
, VX_DEFAULT
, "/" },
901 { &test_xvmuldp
, "xvmuldp", two_arg_fp_tests
, 64, DOUBLE_TEST
, VX_DEFAULT
, "*" },
902 { &test_xvmulsp
, "xvmulsp", two_arg_fp_tests
, 64, SINGLE_TEST
, VX_DEFAULT
, "*" },
903 { &test_xvsubdp
, "xvsubdp", two_arg_fp_tests
, 64, DOUBLE_TEST
, VX_DEFAULT
, "-" },
904 { &test_xvsubsp
, "xvsubsp", two_arg_fp_tests
, 64, SINGLE_TEST
, VX_DEFAULT
, "-" },
905 { &test_xvmaxdp
, "xvmaxdp", two_arg_fp_tests
, 64, DOUBLE_TEST
, VX_DEFAULT
, "@max@" },
906 { &test_xvmindp
, "xvmindp", two_arg_fp_tests
, 64, DOUBLE_TEST
, VX_DEFAULT
, "@min@" },
907 { &test_xvmaxsp
, "xvmaxsp", two_arg_fp_tests
, 64, SINGLE_TEST
, VX_DEFAULT
, "@max@" },
908 { &test_xvminsp
, "xvminsp", two_arg_fp_tests
, 64, SINGLE_TEST
, VX_DEFAULT
, "@min@" },
909 { &test_xvcpsgndp
, "xvcpsgndp", two_arg_fp_tests
, 64, DOUBLE_TEST
, VX_DEFAULT
, "+-cp"},
910 { &test_xvcpsgnsp
, "xvcpsgnsp", two_arg_fp_tests
, 64, SINGLE_TEST
, VX_DEFAULT
, "+-cp"},
911 { NULL
, NULL
, NULL
, 0 , 0, 0, NULL
}
916 vx_aORm_fp_tests
[] = {
917 { &test_xsnmsub
, "xsnmsub", two_arg_fp_tests
, 64, DOUBLE_TEST
, VX_SCALAR_FP_NMSUB
, "!*-"},
918 { &test_xvmadd
, "xvmadd", two_arg_fp_tests
, 64, DOUBLE_TEST
, VX_VECTOR_FP_MULT_AND_OP2
, "*+"},
919 { &test_xvmadd
, "xvmadd", two_arg_fp_tests
, 64, SINGLE_TEST
, VX_VECTOR_FP_MULT_AND_OP2
, "*+"},
920 { &test_xvnmadd
, "xvnmadd", two_arg_fp_tests
, 64, DOUBLE_TEST
, VX_VECTOR_FP_MULT_AND_OP2
, "!*+"},
921 { &test_xvnmadd
, "xvnmadd", two_arg_fp_tests
, 64, SINGLE_TEST
, VX_VECTOR_FP_MULT_AND_OP2
, "!*+"},
922 { &test_xvmsub
, "xvmsub", two_arg_fp_tests
, 64, DOUBLE_TEST
, VX_VECTOR_FP_MULT_AND_OP2
, "*-"},
923 { &test_xvmsub
, "xvmsub", two_arg_fp_tests
, 64, SINGLE_TEST
, VX_VECTOR_FP_MULT_AND_OP2
, "*-"},
924 { &test_xvnmsub
, "xvnmsub", two_arg_fp_tests
, 64, DOUBLE_TEST
, VX_VECTOR_FP_MULT_AND_OP2
, "!*-"},
925 { &test_xvnmsub
, "xvnmsub", two_arg_fp_tests
, 64, SINGLE_TEST
, VX_VECTOR_FP_MULT_AND_OP2
, "!*-"},
926 { NULL
, NULL
, NULL
, 0, 0, 0, NULL
}
930 vx_simple_scalar_fp_tests
[] = {
931 { &test_xssqrtdp
, "xssqrtdp", NULL
, 17, DOUBLE_TEST
, VX_DEFAULT
, NULL
},
932 { &test_xsrdpim
, "xsrdpim", NULL
, 17, DOUBLE_TEST
, VX_DEFAULT
, NULL
},
933 { &test_xsrdpip
, "xsrdpip", NULL
, 17, DOUBLE_TEST
, VX_DEFAULT
, NULL
},
934 { &test_xstdivdp
, "xstdivdp", two_arg_fp_tests
, 64, DOUBLE_TEST
, VX_DEFAULT
, NULL
},
935 { &test_xsmaxdp
, "xsmaxdp", two_arg_fp_tests
, 64, DOUBLE_TEST
, VX_DEFAULT
, NULL
},
936 { &test_xsmindp
, "xsmindp", two_arg_fp_tests
, 64, DOUBLE_TEST
, VX_DEFAULT
, NULL
},
937 { &test_xscvdpsxws
, "xscvdpsxws", NULL
, 17, DOUBLE_TEST
, VX_CONV_WORD
, NULL
},
938 { &test_xscvdpuxds
, "xscvdpuxds", NULL
, 17, DOUBLE_TEST
, VX_DEFAULT
, NULL
},
939 { NULL
, NULL
, NULL
, 0, 0, 0, NULL
}
944 static void test_bpermd(void)
946 /* NOTE: Bit number is '0 . . . 63'
948 * Permuted bits are generated bit 0 -7 as follows:
949 * index = (r14)8*i:8*i+7
950 * perm[i] = (r15)index
952 * So, for i = 0, index is (r14)8*0:8*0+7, or (r14)0:7, which is the MSB
953 * byte of r14, 0x1b(27/base 10). This identifies bit 27 of r15, which is '1'.
954 * For i = 1, index is 0x2c, identifying bit 44 of r15, which is '1'.
955 * So the result of the first two iterations of i are:
959 r15
= 0xa12bc37de56f9708ULL
;
960 r14
= 0x1b2c31f030000001ULL
;
961 __asm__
__volatile__ ("bpermd %0, %1, %2" : "=r" (r17
) : "r" (r14
),"r" (r15
));
962 printf("bpermd: 0x%016llx : 0x%016llx => 0x%llx\n", (unsigned long long)r14
,
963 (unsigned long long)r15
, (unsigned long long)r17
);
974 /* Possible divde type combinations are:
981 static void test_divde(void)
983 int divde_type
= DIV_BASE
;
985 divde_type
|= DIV_OE
;
987 divde_type
|= DIV_DOT
;
989 switch (divde_type
) {
992 __asm__
__volatile__ ("divde %0, %1, %2" : "=r" (r17
) : "r" (r14
),"r" (r15
));
993 GET_CR_XER(div_flags
, div_xer
);
997 __asm__
__volatile__ ("divdeo %0, %1, %2" : "=r" (r17
) : "r" (r14
),"r" (r15
));
998 GET_CR_XER(div_flags
, div_xer
);
1002 __asm__
__volatile__ ("divde. %0, %1, %2" : "=r" (r17
) : "r" (r14
),"r" (r15
));
1003 GET_CR_XER(div_flags
, div_xer
);
1007 __asm__
__volatile__ ("divdeo. %0, %1, %2" : "=r" (r17
) : "r" (r14
),"r" (r15
));
1008 GET_CR_XER(div_flags
, div_xer
);
1011 fprintf(stderr
, "Invalid divde type. Exiting\n");
1017 static void test_divweu(void)
1019 int divweu_type
= DIV_BASE
;
1021 divweu_type
|= DIV_OE
;
1023 divweu_type
|= DIV_DOT
;
1025 switch (divweu_type
) {
1028 __asm__
__volatile__ ("divweu %0, %1, %2" : "=r" (r17
) : "r" (r14
),"r" (r15
));
1029 GET_CR_XER(div_flags
, div_xer
);
1033 __asm__
__volatile__ ("divweuo %0, %1, %2" : "=r" (r17
) : "r" (r14
),"r" (r15
));
1034 GET_CR_XER(div_flags
, div_xer
);
1038 __asm__
__volatile__ ("divweu. %0, %1, %2" : "=r" (r17
) : "r" (r14
),"r" (r15
));
1039 GET_CR_XER(div_flags
, div_xer
);
1043 __asm__
__volatile__ ("divweuo. %0, %1, %2" : "=r" (r17
) : "r" (r14
),"r" (r15
));
1044 GET_CR_XER(div_flags
, div_xer
);
1047 fprintf(stderr
, "Invalid divweu type. Exiting\n");
1052 static void test_fctiduz(void)
1055 __asm__
__volatile__ ("fctiduz. %0, %1" : "=d" (f17
) : "d" (f14
));
1057 __asm__
__volatile__ ("fctiduz %0, %1" : "=d" (f17
) : "d" (f14
));
1060 static void test_fctidu(void)
1063 __asm__
__volatile__ ("fctidu. %0, %1" : "=d" (f17
) : "d" (f14
));
1065 __asm__
__volatile__ ("fctidu %0, %1" : "=d" (f17
) : "d" (f14
));
1068 static void test_fctiwuz(void)
1071 __asm__
__volatile__ ("fctiwuz. %0, %1" : "=d" (f17
) : "d" (f14
));
1073 __asm__
__volatile__ ("fctiwuz %0, %1" : "=d" (f17
) : "d" (f14
));
1076 static void test_fctiwu(void)
1079 __asm__
__volatile__ ("fctiwu. %0, %1" : "=d" (f17
) : "d" (f14
));
1081 __asm__
__volatile__ ("fctiwu %0, %1" : "=d" (f17
) : "d" (f14
));
1084 typedef struct simple_test
{
1085 test_func_t test_func
;
1087 precision_type_t precision
;
1090 static simple_test_t fct_tests
[] = {
1091 { &test_fctiduz
, "fctiduz", DOUBLE_TEST
},
1092 { &test_fctidu
, "fctidu", DOUBLE_TEST
},
1093 { &test_fctiwuz
, "fctiwuz", SINGLE_TEST
},
1094 { &test_fctiwu
, "fctiwu", SINGLE_TEST
},
1098 static void setup_sp_fp_args(fp_test_args_t
* targs
, Bool swap_inputs
)
1100 int a_idx
, b_idx
, i
;
1102 void * vec_src
= swap_inputs
? &vec_out
: &vec_inB
;
1104 for (i
= 0; i
< 4; i
++) {
1105 a_idx
= targs
->fra_idx
;
1106 b_idx
= targs
->frb_idx
;
1107 inA
= (void *)&spec_sp_fargs
[a_idx
];
1108 inB
= (void *)&spec_sp_fargs
[b_idx
];
1109 // copy single precision FP into vector element i
1110 memcpy(((void *)&vec_inA
) + (i
* 4), inA
, 4);
1111 memcpy(vec_src
+ (i
* 4), inB
, 4);
1116 static void setup_dp_fp_args(fp_test_args_t
* targs
, Bool swap_inputs
)
1118 int a_idx
, b_idx
, i
;
1120 void * vec_src
= swap_inputs
? (void *)&vec_out
: (void *)&vec_inB
;
1122 for (i
= 0; i
< 2; i
++) {
1123 a_idx
= targs
->fra_idx
;
1124 b_idx
= targs
->frb_idx
;
1125 inA
= (void *)&spec_fargs
[a_idx
];
1126 inB
= (void *)&spec_fargs
[b_idx
];
1127 // copy double precision FP into vector element i
1128 memcpy(((void *)&vec_inA
) + (i
* 8), inA
, 8);
1129 memcpy(vec_src
+ (i
* 8), inB
, 8);
1134 #define VX_NOT_CMP_OP 0xffffffff
1135 static void print_vector_fp_result(unsigned int cc
, vx_fp_test_t
* test_group
, int i
)
1137 int a_idx
, b_idx
, k
;
1138 char * name
= malloc(20);
1139 int dp
= test_group
->precision
== DOUBLE_TEST
? 1 : 0;
1140 int loops
= dp
? 2 : 4;
1141 fp_test_args_t
* targs
= &test_group
->targs
[i
];
1142 unsigned long long * frA_dp
, * frB_dp
, * dst_dp
;
1143 unsigned int * frA_sp
, *frB_sp
, * dst_sp
;
1144 strcpy(name
, test_group
->name
);
1145 printf("#%d: %s%s ", dp
? i
/2 : i
/4, name
, (do_dot
? "." : ""));
1146 for (k
= 0; k
< loops
; k
++) {
1147 a_idx
= targs
->fra_idx
;
1148 b_idx
= targs
->frb_idx
;
1152 frA_dp
= (unsigned long long *)&spec_fargs
[a_idx
];
1153 frB_dp
= (unsigned long long *)&spec_fargs
[b_idx
];
1154 printf("%016llx %s %016llx", *frA_dp
, test_group
->op
, *frB_dp
);
1156 frA_sp
= (unsigned int *)&spec_sp_fargs
[a_idx
];
1157 frB_sp
= (unsigned int *)&spec_sp_fargs
[b_idx
];
1158 printf("%08x %s %08x", *frA_sp
, test_group
->op
, *frB_sp
);
1162 if (cc
!= VX_NOT_CMP_OP
)
1163 printf(" ? cc=%x", cc
);
1166 dst_dp
= (unsigned long long *) &vec_out
;
1167 printf(" => %016llx %016llx\n", dst_dp
[0], dst_dp
[1]);
1169 dst_sp
= (unsigned int *) &vec_out
;
1170 printf(" => %08x %08x %08x %08x\n", dst_sp
[0], dst_sp
[1], dst_sp
[2], dst_sp
[3]);
1176 static void print_vx_aORm_fp_result(unsigned long long * XT_arg
, unsigned long long * XB_arg
,
1177 vx_fp_test_t
* test_group
, int i
)
1180 char * name
= malloc(20);
1181 int dp
= test_group
->precision
== DOUBLE_TEST
? 1 : 0;
1182 int loops
= dp
? 2 : 4;
1183 fp_test_args_t
* targs
= &test_group
->targs
[i
];
1184 unsigned long long frA_dp
, * dst_dp
;
1185 unsigned int frA_sp
, * dst_sp
;
1187 strcpy(name
, test_group
->name
);
1190 strcat(name
, "adp");
1192 strcat(name
, "asp");
1195 strcat(name
, "mdp");
1197 strcat(name
, "msp");
1199 printf("#%d: %s ", dp
? i
/2 : i
/4, name
);
1200 for (k
= 0; k
< loops
; k
++) {
1201 a_idx
= targs
->fra_idx
;
1205 frA_dp
= *((unsigned long long *)&spec_fargs
[a_idx
]);
1206 printf("%s(%016llx,%016llx,%016llx)", test_group
->op
, XT_arg
[k
], frA_dp
, XB_arg
[k
]);
1208 unsigned int * xt_sp
= (unsigned int *)XT_arg
;
1209 unsigned int * xb_sp
= (unsigned int *)XB_arg
;
1210 frA_sp
= *((unsigned int *)&spec_sp_fargs
[a_idx
]);
1211 printf("%s(%08x,%08x,%08x)", test_group
->op
, xt_sp
[k
], frA_sp
, xb_sp
[k
]);
1217 dst_dp
= (unsigned long long *) &vec_out
;
1218 printf(" => %016llx %016llx\n", dst_dp
[0], dst_dp
[1]);
1220 dst_sp
= (unsigned int *) &vec_out
;
1221 printf(" => %08x %08x %08x %08x\n", dst_sp
[0], dst_sp
[1], dst_sp
[2], dst_sp
[3]);
1226 /* This function currently only supports double precision input arguments. */
1227 static void test_vx_simple_scalar_fp_ops(void)
1232 build_special_fargs_table();
1233 while ((func
= vx_simple_scalar_fp_tests
[k
].test_func
)) {
1234 unsigned long long * frap
, * frbp
, * dst
;
1237 vx_fp_test_t test_group
= vx_simple_scalar_fp_tests
[k
];
1238 Bool convToWord
= (test_group
.type
== VX_CONV_WORD
);
1239 if (test_group
.precision
!= DOUBLE_TEST
) {
1240 fprintf(stderr
, "Unsupported single precision for scalar op in test_vx_aORm_fp_ops\n");
1243 pv
= (unsigned int *)&vec_out
;
1245 for (idx
= 0; idx
< 4; idx
++, pv
++)
1248 /* If num_tests is exactly equal to nb_special_fargs, this implies the
1249 * instruction being tested only requires one floating point argument
1252 if (test_group
.num_tests
== nb_special_fargs
&& !test_group
.targs
) {
1253 void * inB
, * vec_void_ptr
= (void *)&vec_inB
;
1257 for (i
= 0; i
< nb_special_fargs
; i
++) {
1258 inB
= (void *)&spec_fargs
[i
];
1259 frbp
= (unsigned long long *)&spec_fargs
[i
];
1260 memcpy(vec_void_ptr
, inB
, 8);
1262 dst
= (unsigned long long *) &vec_out
;
1265 printf("#%d: %s %016llx => %016llx\n", i
, test_group
.name
, *frbp
,
1266 convToWord
? (*dst
& 0x00000000ffffffffULL
) : *dst
);
1269 void * inA
, * inB
, * vecA_void_ptr
, * vecB_void_ptr
;
1270 unsigned int condreg
, flags
;
1271 int isTdiv
= (strstr(test_group
.name
, "xstdivdp") != NULL
) ? 1 : 0;
1274 vecA_void_ptr
= (void *)&vec_inA
+ 8;
1275 vecB_void_ptr
= (void *)&vec_inB
+ 8;
1277 vecA_void_ptr
= (void *)&vec_inA
;
1278 vecB_void_ptr
= (void *)&vec_inB
;
1280 for (i
= 0; i
< test_group
.num_tests
; i
++) {
1281 fp_test_args_t aTest
= test_group
.targs
[i
];
1282 inA
= (void *)&spec_fargs
[aTest
.fra_idx
];
1283 inB
= (void *)&spec_fargs
[aTest
.frb_idx
];
1284 frap
= (unsigned long long *)&spec_fargs
[aTest
.fra_idx
];
1285 frbp
= (unsigned long long *)&spec_fargs
[aTest
.frb_idx
];
1286 // Only need to copy one doubleword into each vector's element 0
1287 memcpy(vecA_void_ptr
, inA
, 8);
1288 memcpy(vecB_void_ptr
, inB
, 8);
1294 condreg
= (flags
& 0x000000f0) >> 4;
1295 printf("#%d: %s %016llx,%016llx => cr %x\n", i
, test_group
.name
, *frap
, *frbp
, condreg
);
1297 dst
= (unsigned long long *) &vec_out
;
1300 printf("#%d: %s %016llx,%016llx => %016llx\n", i
, test_group
.name
,
1301 *frap
, *frbp
, *dst
);
1310 static void test_vx_aORm_fp_ops(void)
1312 /* These ops need a third src argument, which is stored in element 0 of
1313 * VSX[XT] -- i.e., vec_out. For the xs<ZZZ>m{d|s}p cases, VSX[XT] holds
1314 * src3 and VSX[XB] holds src2; for the xs<ZZZ>a{d|s}p cases, VSX[XT] holds
1315 * src2 and VSX[XB] holds src3. The fp_test_args_t that holds the test
1316 * data (input args, result) contain only two inputs, so I arbitrarily
1317 * choose some spec_fargs elements for the third source argument.
1318 * Note that that by using the same input data for a given pair of
1319 * a{d|s}p/m{d|s}p-type instructions (by swapping the src2 and src3
1320 * arguments), the expected result should be the same.
1325 char * test_name
= (char *)malloc(20);
1329 build_special_fargs_table();
1330 while ((func
= vx_aORm_fp_tests
[k
].test_func
)) {
1332 Bool repeat
= False
;
1333 Bool scalar
= False
;
1334 unsigned long long * frap
, * frbp
, * dst
;
1335 vx_fp_test_t test_group
= vx_aORm_fp_tests
[k
];
1336 vx_fp_test_type test_type
= test_group
.type
;
1337 do_dp
= test_group
.precision
== DOUBLE_TEST
? True
: False
;
1340 if (test_type
< VX_VECTOR_FP_MULT_AND_OP2
) {
1342 strcpy(test_name
, test_group
.name
);
1346 // Only support double precision scalar ops in this function
1348 strcat(test_name
, "adp");
1350 fprintf(stderr
, "Unsupported single precision for scalar op in test_vx_aORm_fp_ops\n");
1355 } else if (test_type
< VX_BASIC_CMP
) {
1356 // Then it must be a VX_VECTOR_xxx type
1357 stride
= do_dp
? 2 : 4;
1359 // No need to work up the testcase name here, since that will be done in
1360 // the print_vx_aORm_fp_result() function we'll call for vector-type ops.
1365 printf("ERROR: Invalid VX FP test type %d\n", test_type
);
1370 for (i
= 0; i
< test_group
.num_tests
; i
+=stride
) {
1373 unsigned long long vsr_XT
[2];
1374 unsigned long long vsr_XB
[2];
1375 fp_test_args_t aTest
= test_group
.targs
[i
];
1376 for (m
= 0; m
< stride
; m
++)
1377 fp_idx
[m
] = i
% (nb_special_fargs
- stride
) + m
;
1379 /* When repeat == True, we're on the first time through of one of the VX_FP_SMx
1380 * test types, meaning we're testing a xs<ZZZ>adp case, thus we have to swap
1381 * inputs as described above:
1386 #ifdef VGP_ppc64le_linux
1387 #define VECTOR_ADDR(_v) ((void *)&_v) + 8
1389 #define VECTOR_ADDR(_v) ((void *)&_v)
1391 // For scalar op, only need to copy one doubleword into each vector's element 0
1392 inA
= (void *)&spec_fargs
[aTest
.fra_idx
];
1393 inB
= (void *)&spec_fargs
[aTest
.frb_idx
];
1394 frap
= (unsigned long long *)&spec_fargs
[aTest
.fra_idx
];
1395 memcpy(VECTOR_ADDR(vec_inA
), inA
, 8);
1397 memcpy(VECTOR_ADDR(vec_out
), inB
, 8); // src2
1398 memcpy(VECTOR_ADDR(vec_inB
), &spec_fargs
[fp_idx
[0]], 8); //src3
1399 frbp
= (unsigned long long *)&spec_fargs
[fp_idx
[0]];
1401 frbp
= (unsigned long long *)&spec_fargs
[aTest
.frb_idx
];
1402 memcpy(VECTOR_ADDR(vec_inB
), inB
, 8); // src2
1403 memcpy(VECTOR_ADDR(vec_out
), &spec_fargs
[fp_idx
[0]], 8); //src3
1405 memcpy(vsr_XT
, VECTOR_ADDR(vec_out
), 8);
1407 int j
, loops
= do_dp
? 2 : 4;
1408 size_t len
= do_dp
? 8 : 4;
1409 void * vec_src
= repeat
? (void *)&vec_inB
: (void *)&vec_out
;
1410 for (j
= 0; j
< loops
; j
++) {
1412 memcpy(vec_src
+ (j
* len
), &spec_fargs
[fp_idx
[j
]], len
);
1414 memcpy(vec_src
+ (j
* len
), &spec_sp_fargs
[fp_idx
[j
]], len
);
1417 setup_dp_fp_args(&test_group
.targs
[i
], repeat
);
1419 setup_sp_fp_args(&test_group
.targs
[i
], repeat
);
1421 memcpy(vsr_XT
, &vec_out
, 16);
1422 memcpy(vsr_XB
, &vec_inB
, 16);
1426 dst
= (unsigned long long *) &vec_out
;
1429 if (test_type
< VX_VECTOR_FP_MULT_AND_OP2
)
1430 printf( "#%d: %s %s(%016llx,%016llx,%016llx) = %016llx\n", i
,
1431 test_name
, test_group
.op
, vsr_XT
[0], *frap
, *frbp
, *dst
);
1433 print_vx_aORm_fp_result(vsr_XT
, vsr_XB
, &test_group
, i
);
1439 if (test_type
< VX_VECTOR_FP_MULT_AND_OP2
) {
1440 strcpy(test_name
, test_group
.name
);
1441 strcat(test_name
, "mdp");
1452 static void test_vx_vector_one_fp_arg(void)
1457 build_special_fargs_table();
1459 while ((func
= vx_vector_one_fp_arg_tests
[k
].test_func
)) {
1461 vx_fp_test_t test_group
= vx_vector_one_fp_arg_tests
[k
];
1462 Bool convToWord
= (test_group
.type
== VX_CONV_WORD
);
1463 Bool dp
= (test_group
.precision
== DOUBLE_TEST
) ? True
: False
;
1464 Bool xvrespTest
= (strstr(test_group
.name
, "xvresp") != NULL
) ? True
: False
;
1465 int stride
= dp
? 2 : 4;
1467 for (i
= 0; i
< test_group
.num_tests
; i
+=stride
) {
1471 pv
= (unsigned int *)&vec_out
;
1473 for (idx
= 0; idx
< 4; idx
++, pv
++)
1478 unsigned long long * frB_dp
, *dst_dp
;
1479 for (j
= 0; j
< 2; j
++) {
1480 inB
= (void *)&spec_fargs
[i
+ j
];
1481 // copy double precision FP into vector element i
1482 memcpy(((void *)&vec_inB
) + (j
* 8), inB
, 8);
1484 // execute test insn
1486 dst_dp
= (unsigned long long *) &vec_out
;
1487 printf("#%d: %s ", i
/2, test_group
.name
);
1488 for (j
= 0; j
< 2; j
++) {
1491 frB_dp
= (unsigned long long *)&spec_fargs
[i
+ j
];
1492 printf("%s(%016llx)", test_group
.op
, *frB_dp
);
1493 printf(" = %016llx", convToWord
? (dst_dp
[j
] & 0x00000000ffffffffULL
) : dst_dp
[j
]);
1498 unsigned int * frB_sp
, * dst_sp
;
1500 for (j
= 0; j
< 4; j
++) {
1501 inB
= (void *)&spec_sp_fargs
[i
+ j
];
1502 // copy single precision FP into vector element i
1503 memcpy(((void *)&vec_inB
) + (j
* 4), inB
, 4);
1505 // execute test insn
1507 dst_sp
= (unsigned int *) &vec_out
;
1509 printf("#%d: %s ", i
/4, test_group
.name
);
1510 for (j
= 0; j
< 4; j
++) {
1513 frB_sp
= (unsigned int *)&spec_sp_fargs
[i
+ j
];
1514 printf("%s(%08x)", test_group
.op
, *frB_sp
);
1516 float calc_diff
= fabs(spec_sp_fargs
[i
+ j
]/256);
1518 memcpy(&sp_res
, &dst_sp
[j
], 4);
1519 float div_result
= 1/spec_sp_fargs
[i
+ j
];
1520 float real_diff
= fabs(sp_res
- div_result
);
1522 ( ( sp_res
== div_result
)
1523 || ( isnan(sp_res
) && isnan(div_result
) )
1524 || ( real_diff
<= calc_diff
) ) ? "PASS"
1527 printf(" = %08x", dst_sp
[j
]);
1539 /* This function assumes the instruction being tested requires two args. */
1540 static void test_vx_vector_fp_ops(void)
1545 build_special_fargs_table();
1547 while ((func
= vx_vector_fp_tests
[k
].test_func
)) {
1548 int idx
, i
, repeat
= 1;
1549 vx_fp_test_t test_group
= vx_vector_fp_tests
[k
];
1550 int stride
= test_group
.precision
== DOUBLE_TEST
? 2 : 4;
1554 for (i
= 0; i
< test_group
.num_tests
; i
+=stride
) {
1555 unsigned int * pv
, condreg
;
1558 pv
= (unsigned int *)&vec_out
;
1559 if (test_group
.precision
== DOUBLE_TEST
)
1560 setup_dp_fp_args(&test_group
.targs
[i
], False
);
1562 setup_sp_fp_args(&test_group
.targs
[i
], False
);
1565 for (idx
= 0; idx
< 4; idx
++, pv
++)
1568 // execute test insn
1573 if (test_group
.type
== VX_BASIC_CMP
) {
1574 condreg
= (flags
& 0x000000f0) >> 4;
1576 condreg
= VX_NOT_CMP_OP
;
1578 print_vector_fp_result(condreg
, &test_group
, i
);
1581 if (repeat
&& test_group
.type
== VX_BASIC_CMP
) {
1592 // The div doubleword test data
1593 signed long long div_dw_tdata
[13][2] = {
1599 { 0x8000000000000000ULL
, 0xa },
1602 { 0x1234fedc, 0x8000a873 },
1603 { 0xabcd87651234fedcULL
, 0xa123b893 },
1604 { 0x123456789abdcULL
, 0 },
1608 #define dw_tdata_len (sizeof(div_dw_tdata)/sizeof(signed long long)/2)
1610 // The div word test data
1611 unsigned int div_w_tdata
[6][2] = {
1614 { 0x7abc1234, 0xf0000000 },
1619 #define w_tdata_len (sizeof(div_w_tdata)/sizeof(unsigned int)/2)
1621 typedef struct div_ext_test
1623 test_func_t test_func
;
1626 div_type_t div_type
;
1627 precision_type_t precision
;
1630 static div_ext_test_t div_tests
[] = {
1631 #ifdef __powerpc64__
1632 { &test_divde
, "divde", dw_tdata_len
, DIV_BASE
, DOUBLE_TEST
},
1633 { &test_divde
, "divdeo", dw_tdata_len
, DIV_OE
, DOUBLE_TEST
},
1635 { &test_divweu
, "divweu", w_tdata_len
, DIV_BASE
, SINGLE_TEST
},
1636 { &test_divweu
, "divweuo", w_tdata_len
, DIV_OE
, SINGLE_TEST
},
1637 { NULL
, NULL
, 0, 0, 0 }
1640 static void test_div_extensions(void)
1646 while ((func
= div_tests
[k
].test_func
)) {
1648 div_ext_test_t test_group
= div_tests
[k
];
1652 for (i
= 0; i
< test_group
.num_tests
; i
++) {
1653 unsigned int condreg
;
1655 if (test_group
.div_type
== DIV_OE
)
1660 if (test_group
.precision
== DOUBLE_TEST
) {
1661 r14
= div_dw_tdata
[i
][0];
1662 r15
= div_dw_tdata
[i
][1];
1664 r14
= div_w_tdata
[i
][0];
1665 r15
= div_w_tdata
[i
][1];
1667 // execute test insn
1669 condreg
= (div_flags
& 0xf0000000) >> 28;
1670 printf("#%d: %s%s: ", i
, test_group
.name
, do_dot
? "." : "");
1671 if (test_group
.precision
== DOUBLE_TEST
) {
1672 printf("0x%016llx / 0x%016llx = 0x%016llx;",
1673 div_dw_tdata
[i
][0], div_dw_tdata
[i
][1], (signed long long) r17
);
1675 printf("0x%08x / 0x%08x = 0x%08x;",
1676 div_w_tdata
[i
][0], div_w_tdata
[i
][1], (unsigned int) r17
);
1678 printf(" CR=%x; XER=%x\n", condreg
, div_xer
);
1692 static void test_fct_ops(void)
1698 while ((func
= fct_tests
[k
].test_func
)) {
1700 simple_test_t test_group
= fct_tests
[k
];
1704 for (i
= 0; i
< nb_special_fargs
; i
++) {
1706 #define SINGLE_MASK 0x00000000FFFFFFFFULL
1708 f14
= spec_fargs
[i
];
1709 // execute test insn
1713 printf("#%d: %s%s: ", i
, test_group
.name
, do_dot
? "." : "");
1714 printf("0x%016llx (%e) ==> 0x%016llx\n",
1715 *((unsigned long long *)(&spec_fargs
[i
])), spec_fargs
[i
],
1716 test_group
.precision
== SINGLE_TEST
? (SINGLE_MASK
&
1717 *((unsigned long long *)(&result
))) :
1718 *((unsigned long long *)(&result
)));
1731 #ifdef __powerpc64__
1732 void test_stdbrx(void)
1734 unsigned long long store
, val
= 0xdeadbacf12345678ULL
;
1735 printf("stdbrx: 0x%llx ==> ", val
);
1737 r14
= (HWord_t
)&store
;
1738 __asm__
__volatile__ ("stdbrx %0, 0, %1" : : "r"(r17
), "r"(r14
));
1739 printf("0x%llx\n", store
);
1747 { &test_vx_vector_one_fp_arg
,
1748 "Test VSX vector single arg instructions", OTHER_INST
},
1749 { &test_vx_vector_fp_ops
,
1750 "Test VSX floating point compare and basic arithmetic instructions", OTHER_INST
},
1751 #ifdef __powerpc64__
1753 "Test bit permute double", OTHER_INST
},
1756 "Test xxsel instruction", OTHER_INST
},
1758 "Test xxspltw instruction", OTHER_INST
},
1759 { &test_div_extensions
,
1760 "Test div extensions", SCALAR_DIV_INST
},
1762 "Test floating point convert [word | doubleword] unsigned, with round toward zero", OTHER_INST
},
1763 #ifdef __powerpc64__
1765 "Test stdbrx instruction", OTHER_INST
},
1767 { &test_vx_aORm_fp_ops
,
1768 "Test floating point arithmetic instructions -- with a{d|s}p or m{d|s}p", OTHER_INST
},
1769 { &test_vx_simple_scalar_fp_ops
,
1770 "Test scalar floating point arithmetic instructions", OTHER_INST
},
1775 static void usage (void)
1778 "Usage: test_isa_3_0 [OPTIONS]\n"
1779 "\t-d: test scalar division instructions (default)\n"
1780 "\t-o: test non scalar division instructions (default)\n"
1781 "\t-A: test all instructions (default)\n"
1782 "\t-h: display this help and exit\n"
1786 int main(int argc
, char *argv
[])
1794 unsigned int test_run_mask
= 0;
1796 /* NOTE, ISA 3.0 introduces the OV32 and CA32 bits in the FPSCR. These
1797 * bits are set on various arithimetic instructions. This means this
1798 * test generates different FPSCR output for pre ISA 3.0 versus ISA 3.0
1799 * hardware. The tests have been grouped so that the tests that generate
1800 * different results are in one test and the rest are in a different test.
1801 * this minimizes the size of the result expect files for the two cases.
1804 while ((c
= getopt(argc
, argv
, "doAh")) != -1) {
1807 test_run_mask
|= SCALAR_DIV_INST
;
1810 test_run_mask
|= OTHER_INST
;
1813 test_run_mask
= 0xFFFF;
1821 fprintf(stderr
, "Unknown argument: '%c'\n", c
);
1826 while ((func
= all_tests
[i
].test_category
)) {
1827 aTest
= all_tests
[i
];
1828 if(test_run_mask
& aTest
.test_group
) {
1829 /* Test group specified on command line */
1831 printf( "%s\n", aTest
.name
);
1839 free(spec_sp_fargs
);