1 /* Copyright (C) 2011 IBM
3 Author: Maynard Johnson <maynardj@us.ibm.com>
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, see <http://www.gnu.org/licenses/>.
18 The GNU General Public License is contained in the file COPYING.
27 #include <unistd.h> // getopt
34 typedef uint32_t HWord_t
;
36 typedef uint64_t HWord_t
;
37 #endif /* __powerpc64__ */
39 #ifdef VGP_ppc64le_linux
45 typedef unsigned char Bool
;
48 register HWord_t r14
__asm__ ("r14");
49 register HWord_t r15
__asm__ ("r15");
50 register HWord_t r16
__asm__ ("r16");
51 register HWord_t r17
__asm__ ("r17");
52 register double f14
__asm__ ("fr14");
53 register double f15
__asm__ ("fr15");
54 register double f16
__asm__ ("fr16");
55 register double f17
__asm__ ("fr17");
57 static volatile unsigned int div_flags
, div_xer
;
59 #define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"
61 #define SET_CR(_arg) \
62 __asm__ __volatile__ ("mtcr %0" : : "b"(_arg) : ALLCR );
64 #define SET_XER(_arg) \
65 __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );
67 #define GET_CR(_lval) \
68 __asm__ __volatile__ ("mfcr %0" : "=b"(_lval) )
70 #define GET_XER(_lval) \
71 __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )
73 #define GET_CR_XER(_lval_cr,_lval_xer) \
74 do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)
79 #define SET_XER_ZERO \
82 #define SET_CR_XER_ZERO \
83 do { SET_CR_ZERO; SET_XER_ZERO; } while (0)
85 #define SET_FPSCR_ZERO \
86 do { double _d = 0.0; \
87 __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
91 typedef void (*test_func_t
)(void);
92 typedef struct test_table test_table_t
;
94 /* Defines for the instructiion groups, use bit field to identify */
95 #define SCALAR_DIV_INST 0x0001
96 #define OTHER_INST 0x0002
98 /* These functions below that construct a table of floating point
99 * values were lifted from none/tests/ppc32/jm-insns.c.
102 #if defined (DEBUG_ARGS_BUILD)
103 #define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
105 #define AB_DPRINTF(fmt, args...) do { } while (0)
108 static inline void register_farg (void *farg
,
109 int s
, uint16_t _exp
, uint64_t mant
)
113 tmp
= ((uint64_t)s
<< 63) | ((uint64_t)_exp
<< 52) | mant
;
114 *(uint64_t *)farg
= tmp
;
115 AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
116 s
, _exp
, mant
, *(uint64_t *)farg
, *(double *)farg
);
119 static inline void register_sp_farg (void *farg
,
120 int s
, uint16_t _exp
, uint32_t mant
)
123 tmp
= ((uint32_t)s
<< 31) | ((uint32_t)_exp
<< 23) | mant
;
124 *(uint32_t *)farg
= tmp
;
128 typedef struct fp_test_args
{
134 fp_test_args_t two_arg_fp_tests
[] = {
206 static int nb_special_fargs
;
207 static double * spec_fargs
;
208 static float * spec_sp_fargs
;
210 static void build_special_fargs_table(void)
213 Entry Sign Exp fraction Special value
214 0 0 3fd 0x8000000000000ULL Positive finite number
215 1 0 404 0xf000000000000ULL ...
216 2 0 001 0x8000000b77501ULL ...
217 3 0 7fe 0x800000000051bULL ...
218 4 0 012 0x3214569900000ULL ...
219 5 0 000 0x0000000000000ULL +0.0 (+zero)
220 6 1 000 0x0000000000000ULL -0.0 (-zero)
221 7 0 7ff 0x0000000000000ULL +infinity
222 8 1 7ff 0x0000000000000ULL -infinity
223 9 0 7ff 0x7FFFFFFFFFFFFULL +SNaN
224 10 1 7ff 0x7FFFFFFFFFFFFULL -SNaN
225 11 0 7ff 0x8000000000000ULL +QNaN
226 12 1 7ff 0x8000000000000ULL -QNaN
227 13 1 000 0x8340000078000ULL Denormalized val (zero exp and non-zero fraction)
228 14 1 40d 0x0650f5a07b353ULL Negative finite number
229 15 0 412 0x32585a9900000ULL A few more positive finite numbers
230 16 0 413 0x82511a2000000ULL ...
231 17 . . . . . . . . . . . . . . . . . . . . . . .
232 18 . . . . . . . . . . . . . . . . . . . . . . .
233 19 . . . . . . . . . . . . . . . . . . . . . . .
245 spec_fargs
= malloc( 20 * sizeof(double) );
246 spec_sp_fargs
= malloc( 20 * sizeof(float) );
251 mant
= 0x8000000000000ULL
;
252 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
257 mant
= 0xf000000000000ULL
;
258 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
263 mant
= 0x8000000b77501ULL
;
264 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
269 mant
= 0x800000000051bULL
;
270 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
275 mant
= 0x3214569900000ULL
;
276 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
280 /* +0.0 : 0 0x000 0x0000000000000 */
284 mant
= 0x0000000000000ULL
;
285 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
287 /* -0.0 : 1 0x000 0x0000000000000 */
291 mant
= 0x0000000000000ULL
;
292 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
294 /* +infinity : 0 0x7FF 0x0000000000000 */
298 mant
= 0x0000000000000ULL
;
299 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
301 /* -infinity : 1 0x7FF 0x0000000000000 */
305 mant
= 0x0000000000000ULL
;
306 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
309 * This comment applies to values #9 and #10 below:
310 * When src is a SNaN, it's converted to a QNaN first before rounding to single-precision,
311 * so we can't just copy the double-precision value to the corresponding slot in the
312 * single-precision array (i.e., in the loop at the end of this function). Instead, we
313 * have to manually set the bits using register_sp_farg().
316 /* +SNaN : 0 0x7FF 0x7FFFFFFFFFFFF */
320 mant
= 0x7FFFFFFFFFFFFULL
;
321 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
324 register_sp_farg(&spec_sp_fargs
[i
-1], s
, _exp
, mant_sp
);
326 /* -SNaN : 1 0x7FF 0x7FFFFFFFFFFFF */
330 mant
= 0x7FFFFFFFFFFFFULL
;
331 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
334 register_sp_farg(&spec_sp_fargs
[i
-1], s
, _exp
, mant_sp
);
336 /* +QNaN : 0 0x7FF 0x8000000000000 */
340 mant
= 0x8000000000000ULL
;
341 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
343 /* -QNaN : 1 0x7FF 0x8000000000000 */
347 mant
= 0x8000000000000ULL
;
348 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
350 /* denormalized value */
354 mant
= 0x8340000078000ULL
;
355 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
357 /* Negative finite number */
361 mant
= 0x0650f5a07b353ULL
;
362 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
364 /* A few positive finite numbers ... */
368 mant
= 0x32585a9900000ULL
;
369 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
374 mant
= 0x82511a2000000ULL
;
375 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
380 mant
= 0x12ef5a9300000ULL
;
381 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
386 mant
= 0x14bf5d2300000ULL
;
387 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
392 mant
= 0x76bf982440000ULL
;
393 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
395 nb_special_fargs
= i
;
396 for (j
= 0; j
< i
; j
++) {
397 if (!(j
== 9 || j
== 10))
398 spec_sp_fargs
[j
] = spec_fargs
[j
];
405 test_func_t test_category
;
407 unsigned int test_group
;
410 /* Type of input for floating point operations.*/
417 VX_SCALAR_CONV_TO_WORD
,
424 static vector
unsigned int vec_out
, vec_inA
, vec_inB
;
426 /* This function is for checking the reciprocal and reciprocal square root
427 * estimate instructions.
429 Bool
check_estimate(precision_type_t type
, Bool is_rsqrte
, int idx
, int output_vec_idx
)
431 /* Technically, the number of bits of precision for xvredp and xvrsqrtedp is
432 * 14 bits (14 = log2 16384). However, the VEX emulation of these instructions
433 * does an actual reciprocal calculation versus estimation, so the answer we get back from
434 * valgrind can easily differ from the estimate in the lower bits (within the 14 bits of
435 * precision) and the estimate may still be within expected tolerances. On top of that,
436 * we can't count on these estimates always being the same across implementations.
437 * For example, with the fre[s] instruction (which should be correct to within one part
438 * in 256 -- i.e., 8 bits of precision) . . . When approximating the value 1.0111_1111_1111,
439 * one implementation could return 1.0111_1111_0000 and another implementation could return
440 * 1.1000_0000_0000. Both estimates meet the 1/256 accuracy requirement, but share only a
441 * single bit in common.
443 * The upshot is we can't validate the VEX output for these instructions by comparing against
444 * stored bit patterns. We must check that the result is within expected tolerances.
448 /* A mask to be used for validation as a last resort.
449 * Only use 12 bits of precision for reasons discussed above.
451 #define VSX_RECIP_ESTIMATE_MASK_DP 0xFFFFFF0000000000ULL
452 #define VSX_RECIP_ESTIMATE_MASK_SP 0xFFFFFF00
455 Bool dp_test
= type
== DOUBLE_TEST
;
456 double src_dp
, res_dp
;
457 float src_sp
, res_sp
;
460 #define SRC (dp_test ? src_dp : src_sp)
461 #define RES (dp_test ? res_dp : res_sp)
462 Bool src_is_negative
= False
;
463 Bool res_is_negative
= False
;
464 unsigned long long * dst_dp
= NULL
;
465 unsigned int * dst_sp
= NULL
;
467 unsigned long long * src_dp_ull
;
468 dst_dp
= (unsigned long long *) &vec_out
;
469 src_dp
= spec_fargs
[idx
];
470 src_dp_ull
= (unsigned long long *) &src_dp
;
471 src_is_negative
= (*src_dp_ull
& 0x8000000000000000ULL
) ? True
: False
;
472 res_is_negative
= (dst_dp
[output_vec_idx
] & 0x8000000000000000ULL
) ? True
: False
;
473 memcpy(&res_dp
, &dst_dp
[output_vec_idx
], 8);
475 unsigned int * src_sp_uint
;
476 dst_sp
= (unsigned int *) &vec_out
;
477 src_sp
= spec_sp_fargs
[idx
];
478 src_sp_uint
= (unsigned int *) &src_sp
;
479 src_is_negative
= (*src_sp_uint
& 0x80000000) ? True
: False
;
480 res_is_negative
= (dst_sp
[output_vec_idx
] & 0x80000000) ? True
: False
;
481 memcpy(&res_sp
, &dst_sp
[output_vec_idx
], 4);
484 // Below are common rules for xvre{d|s}p and xvrsqrte{d|s}p
487 if (fpclassify(SRC
) == FP_ZERO
)
489 if (!src_is_negative
&& isinf(SRC
))
490 return !res_is_negative
&& (fpclassify(RES
) == FP_ZERO
);
495 if (src_is_negative
&& isinf(SRC
))
496 return res_is_negative
&& (fpclassify(RES
) == FP_ZERO
);
501 double recip_divisor
;
503 double calc_diff_tmp
;
506 recip_divisor
= sqrt(src_dp
);
508 recip_divisor
= src_dp
;
510 div_result
= 1.0/recip_divisor
;
511 calc_diff_tmp
= recip_divisor
* 16384.0;
512 if (isnormal(calc_diff_tmp
)) {
513 calc_diff
= fabs(1.0/calc_diff_tmp
);
514 real_diff
= fabs(res_dp
- div_result
);
515 result
= ( ( res_dp
== div_result
)
516 || ( real_diff
<= calc_diff
) );
518 /* Unable to compute theoretical difference, so we fall back to masking out
521 unsigned long long * div_result_dp
= (unsigned long long *) &div_result
;
522 result
= (dst_dp
[output_vec_idx
] & VSX_RECIP_ESTIMATE_MASK_DP
) == (*div_result_dp
& VSX_RECIP_ESTIMATE_MASK_DP
);
524 /* For debug use . . .
526 unsigned long long * dv = &div_result;
527 unsigned long long * rd = &real_diff;
528 unsigned long long * cd = &calc_diff;
529 printf("\n\t {actual div_result: %016llx; real_diff: %016llx; calc_diff: %016llx}\n",
533 } else { // single precision test (only have xvrsqrtesp, since xvresp was implemented in stage 2)
538 float recip_divisor
= sqrt(src_sp
);
540 div_result
= 1.0/recip_divisor
;
541 calc_diff_tmp
= recip_divisor
* 16384.0;
542 if (isnormal(calc_diff_tmp
)) {
543 calc_diff
= fabsf(1.0/calc_diff_tmp
);
544 real_diff
= fabsf(res_sp
- div_result
);
545 result
= ( ( res_sp
== div_result
)
546 || ( real_diff
<= calc_diff
) );
548 /* Unable to compute theoretical difference, so we fall back to masking out
551 unsigned int * div_result_sp
= (unsigned int *) &div_result
;
552 result
= (dst_sp
[output_vec_idx
] & VSX_RECIP_ESTIMATE_MASK_SP
) == (*div_result_sp
& VSX_RECIP_ESTIMATE_MASK_SP
);
554 /* For debug use . . .
556 unsigned long long * dv = &div_result;
557 unsigned long long * rd = &real_diff;
558 unsigned long long * cd = &calc_diff;
559 printf("\n\t {actual div_result: %016llx; real_diff: %016llx; calc_diff: %016llx}\n",
567 typedef struct vx_fp_test
569 test_func_t test_func
;
571 fp_test_args_t
* targs
;
573 precision_type_t precision
;
574 vx_fp_test_type type
;
581 static void test_xvredp(void)
583 __asm__
__volatile__ ("xvredp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
586 static void test_xsredp(void)
588 __asm__
__volatile__ ("xsredp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
591 static void test_xvrsqrtedp(void)
593 __asm__
__volatile__ ("xvrsqrtedp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
596 static void test_xsrsqrtedp(void)
598 __asm__
__volatile__ ("xsrsqrtedp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
601 static void test_xvrsqrtesp(void)
603 __asm__
__volatile__ ("xvrsqrtesp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
606 static void test_xstsqrtdp(void)
608 __asm__
__volatile__ ("xstsqrtdp cr1, %x0" : : "wa" (vec_inB
));
611 static void test_xvtsqrtdp(void)
613 __asm__
__volatile__ ("xvtsqrtdp cr1, %x0" : : "wa" (vec_inB
));
616 static void test_xvtsqrtsp(void)
618 __asm__
__volatile__ ("xvtsqrtsp cr1, %x0" : : "wa" (vec_inB
));
621 static void test_xvsqrtdp(void)
623 __asm__
__volatile__ ("xvsqrtdp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
626 static void test_xvsqrtsp(void)
628 __asm__
__volatile__ ("xvsqrtsp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
631 static void test_xvtdivdp(void)
633 __asm__
__volatile__ ("xvtdivdp cr1, %x0, %x1" : : "wa" (vec_inA
), "wa" (vec_inB
));
636 static void test_xvtdivsp(void)
638 __asm__
__volatile__ ("xvtdivsp cr1, %x0, %x1" : : "wa" (vec_inA
), "wa" (vec_inB
));
641 static void test_xscvdpsp(void)
643 __asm__
__volatile__ ("xscvdpsp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
646 static void test_xscvdpuxws(void)
648 __asm__
__volatile__ ("xscvdpuxws %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
651 static void test_xscvspdp(void)
653 __asm__
__volatile__ ("xscvspdp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
656 static void test_xvcvdpsp(void)
658 __asm__
__volatile__ ("xvcvdpsp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
661 static void test_xvcvdpuxds(void)
663 __asm__
__volatile__ ("xvcvdpuxds %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
666 static void test_xvcvdpuxws(void)
668 __asm__
__volatile__ ("xvcvdpuxws %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
671 static void test_xvcvspdp(void)
673 __asm__
__volatile__ ("xvcvspdp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
676 static void test_xvcvspsxds(void)
678 __asm__
__volatile__ ("xvcvspsxds %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
681 static void test_xvcvspuxds(void)
683 __asm__
__volatile__ ("xvcvspuxds %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
686 static void test_xvcvdpsxds(void)
688 __asm__
__volatile__ ("xvcvdpsxds %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
691 static void test_xvcvspuxws(void)
693 __asm__
__volatile__ ("xvcvspuxws %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
696 static void test_xvcvsxddp(void)
698 __asm__
__volatile__ ("xvcvsxddp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
701 static void test_xvcvuxddp(void)
703 __asm__
__volatile__ ("xvcvuxddp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
706 static void test_xvcvsxdsp(void)
708 __asm__
__volatile__ ("xvcvsxdsp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
711 static void test_xvcvuxdsp(void)
713 __asm__
__volatile__ ("xvcvuxdsp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
716 static void test_xvcvsxwdp(void)
718 __asm__
__volatile__ ("xvcvsxwdp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
721 static void test_xvcvuxwdp(void)
723 __asm__
__volatile__ ("xvcvuxwdp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
726 static void test_xvcvsxwsp(void)
728 __asm__
__volatile__ ("xvcvsxwsp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
731 static void test_xvcvuxwsp(void)
733 __asm__
__volatile__ ("xvcvuxwsp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
736 static void test_xsrdpic(void)
738 __asm__
__volatile__ ("xsrdpic %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
741 static void test_xsrdpiz(void)
743 __asm__
__volatile__ ("xsrdpiz %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
746 static void test_xsrdpi(void)
748 __asm__
__volatile__ ("xsrdpi %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
751 static void test_xvabsdp(void)
753 __asm__
__volatile__ ("xvabsdp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
756 static void test_xvnabsdp(void)
758 __asm__
__volatile__ ("xvnabsdp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
761 static void test_xvnegdp(void)
763 __asm__
__volatile__ ("xvnegdp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
766 static void test_xvnegsp(void)
768 __asm__
__volatile__ ("xvnegsp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
771 static void test_xvabssp(void)
773 __asm__
__volatile__ ("xvabssp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
776 static void test_xvnabssp(void)
778 __asm__
__volatile__ ("xvnabssp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
781 static void test_xvrdpi(void)
783 __asm__
__volatile__ ("xvrdpi %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
786 static void test_xvrdpic(void)
788 __asm__
__volatile__ ("xvrdpic %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
791 static void test_xvrdpim(void)
793 __asm__
__volatile__ ("xvrdpim %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
796 static void test_xvrdpip(void)
798 __asm__
__volatile__ ("xvrdpip %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
801 static void test_xvrdpiz(void)
803 __asm__
__volatile__ ("xvrdpiz %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
806 static void test_xvrspi(void)
808 __asm__
__volatile__ ("xvrspi %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
811 static void test_xvrspic(void)
813 __asm__
__volatile__ ("xvrspic %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
816 static void test_xvrspim(void)
818 __asm__
__volatile__ ("xvrspim %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
821 static void test_xvrspip(void)
823 __asm__
__volatile__ ("xvrspip %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
826 static void test_xvrspiz(void)
828 __asm__
__volatile__ ("xvrspiz %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
832 vsx_one_fp_arg_tests
[] = {
833 { &test_xvredp
, "xvredp", NULL
, 18, DOUBLE_TEST
, VX_ESTIMATE
, "1/x"},
834 { &test_xsredp
, "xsredp", NULL
, 18, DOUBLE_TEST
, VX_ESTIMATE
, "1/x"},
835 { &test_xvrsqrtedp
, "xvrsqrtedp", NULL
, 18, DOUBLE_TEST
, VX_ESTIMATE
, "1/x-sqrt"},
836 { &test_xsrsqrtedp
, "xsrsqrtedp", NULL
, 18, DOUBLE_TEST
, VX_ESTIMATE
, "1/x-sqrt"},
837 { &test_xvrsqrtesp
, "xvrsqrtesp", NULL
, 18, SINGLE_TEST
, VX_ESTIMATE
, "1/x-sqrt"},
838 { &test_xvsqrtdp
, "xvsqrtdp", NULL
, 18, DOUBLE_TEST
, VX_DEFAULT
, "sqrt"},
839 { &test_xvsqrtsp
, "xvsqrtsp", NULL
, 18, SINGLE_TEST
, VX_DEFAULT
, "sqrt"},
840 { &test_xscvdpsp
, "xscvdpsp", NULL
, 20, DOUBLE_TEST
, VX_CONV_TO_SINGLE
, "conv"},
841 { &test_xscvdpuxws
, "xscvdpuxws", NULL
, 20, DOUBLE_TEST
, VX_SCALAR_CONV_TO_WORD
, "conv"},
842 { &test_xscvspdp
, "xscvspdp", NULL
, 20, SINGLE_TEST
, VX_CONV_TO_DOUBLE
, "conv"},
843 { &test_xvcvdpsp
, "xvcvdpsp", NULL
, 20, DOUBLE_TEST
, VX_CONV_TO_SINGLE
, "conv"},
844 { &test_xvcvdpuxds
, "xvcvdpuxds", NULL
, 20, DOUBLE_TEST
, VX_CONV_TO_DOUBLE
, "conv"},
845 { &test_xvcvdpuxws
, "xvcvdpuxws", NULL
, 20, DOUBLE_TEST
, VX_CONV_TO_SINGLE
, "conv"},
846 { &test_xvcvspdp
, "xvcvspdp", NULL
, 20, SINGLE_TEST
, VX_CONV_TO_DOUBLE
, "conv"},
847 { &test_xvcvspsxds
, "xvcvspsxds", NULL
, 20, SINGLE_TEST
, VX_CONV_TO_DOUBLE
, "conv"},
848 { &test_xvcvdpsxds
, "xvcvdpsxds", NULL
, 20, DOUBLE_TEST
, VX_CONV_TO_DOUBLE
, "conv"},
849 { &test_xvcvspuxds
, "xvcvspuxds", NULL
, 20, SINGLE_TEST
, VX_CONV_TO_DOUBLE
, "conv"},
850 { &test_xvcvspuxws
, "xvcvspuxws", NULL
, 20, SINGLE_TEST
, VX_CONV_TO_SINGLE
, "conv"},
851 { &test_xsrdpic
, "xsrdpic", NULL
, 20, DOUBLE_TEST
, VX_CONV_TO_DOUBLE
, "round"},
852 { &test_xsrdpiz
, "xsrdpiz", NULL
, 20, DOUBLE_TEST
, VX_CONV_TO_DOUBLE
, "round"},
853 { &test_xsrdpi
, "xsrdpi", NULL
, 20, DOUBLE_TEST
, VX_CONV_TO_DOUBLE
, "round"},
854 { &test_xvabsdp
, "xvabsdp", NULL
, 20, DOUBLE_TEST
, VX_DEFAULT
, "abs"},
855 { &test_xvnabsdp
, "xvnabsdp", NULL
, 20, DOUBLE_TEST
, VX_DEFAULT
, "nabs"},
856 { &test_xvnegsp
, "xvnegsp", NULL
, 20, SINGLE_TEST
, VX_DEFAULT
, "neg"},
857 { &test_xvnegdp
, "xvnegdp", NULL
, 20, DOUBLE_TEST
, VX_DEFAULT
, "neg"},
858 { &test_xvabssp
, "xvabssp", NULL
, 20, SINGLE_TEST
, VX_DEFAULT
, "abs"},
859 { &test_xvnabssp
, "xvnabssp", NULL
, 20, SINGLE_TEST
, VX_DEFAULT
, "nabs"},
860 { &test_xvrdpi
, "xvrdpi", NULL
, 20, DOUBLE_TEST
, VX_CONV_TO_DOUBLE
, "round"},
861 { &test_xvrdpic
, "xvrdpic", NULL
, 20, DOUBLE_TEST
, VX_CONV_TO_DOUBLE
, "round"},
862 { &test_xvrdpim
, "xvrdpim", NULL
, 20, DOUBLE_TEST
, VX_CONV_TO_DOUBLE
, "round"},
863 { &test_xvrdpip
, "xvrdpip", NULL
, 20, DOUBLE_TEST
, VX_CONV_TO_DOUBLE
, "round"},
864 { &test_xvrdpiz
, "xvrdpiz", NULL
, 20, DOUBLE_TEST
, VX_CONV_TO_DOUBLE
, "round"},
865 { &test_xvrspi
, "xvrspi", NULL
, 20, SINGLE_TEST
, VX_CONV_TO_SINGLE
, "round"},
866 { &test_xvrspic
, "xvrspic", NULL
, 20, SINGLE_TEST
, VX_CONV_TO_SINGLE
, "round"},
867 { &test_xvrspim
, "xvrspim", NULL
, 20, SINGLE_TEST
, VX_CONV_TO_SINGLE
, "round"},
868 { &test_xvrspip
, "xvrspip", NULL
, 20, SINGLE_TEST
, VX_CONV_TO_SINGLE
, "round"},
869 { &test_xvrspiz
, "xvrspiz", NULL
, 20, SINGLE_TEST
, VX_CONV_TO_SINGLE
, "round"},
870 { NULL
, NULL
, NULL
, 0, 0, 0, NULL
}
874 vx_tdivORtsqrt_tests
[] = {
875 { &test_xstsqrtdp
, "xstsqrtdp", NULL
, 20, DOUBLE_TEST
, VX_DEFAULT
, "test-sqrt"},
876 { &test_xvtsqrtdp
, "xvtsqrtdp", NULL
, 20, DOUBLE_TEST
, VX_DEFAULT
, "test-sqrt"},
877 { &test_xvtsqrtsp
, "xvtsqrtsp", NULL
, 20, SINGLE_TEST
, VX_DEFAULT
, "test-sqrt"},
878 { &test_xvtdivdp
, "xvtdivdp", two_arg_fp_tests
, 68, DOUBLE_TEST
, VX_DEFAULT
, "test-div"},
879 { &test_xvtdivsp
, "xvtdivsp", two_arg_fp_tests
, 68, SINGLE_TEST
, VX_DEFAULT
, "test-div"},
880 { NULL
, NULL
, NULL
, 0 , 0, 0, NULL
}
883 static unsigned long long doubleWord
[] = { 0,
884 0xffffffff00000000LL
,
885 0x00000000ffffffffLL
,
886 0xffffffffffffffffLL
,
887 0x89abcde123456789LL
,
888 0x0102030405060708LL
,
889 0x00000000a0b1c2d3LL
,
893 static unsigned int singleWord
[] = {0,
903 typedef struct vx_intToFp_test
905 test_func_t test_func
;
909 precision_type_t precision
;
910 vx_fp_test_type type
;
913 static vx_intToFp_test_t
915 { test_xvcvsxddp
, "xvcvsxddp", (void *)doubleWord
, 8, DOUBLE_TEST
, VX_CONV_TO_DOUBLE
},
916 { test_xvcvuxddp
, "xvcvuxddp", (void *)doubleWord
, 8, DOUBLE_TEST
, VX_CONV_TO_DOUBLE
},
917 { test_xvcvsxdsp
, "xvcvsxdsp", (void *)doubleWord
, 8, DOUBLE_TEST
, VX_CONV_TO_SINGLE
},
918 { test_xvcvuxdsp
, "xvcvuxdsp", (void *)doubleWord
, 8, DOUBLE_TEST
, VX_CONV_TO_SINGLE
},
919 { test_xvcvsxwdp
, "xvcvsxwdp", (void *)singleWord
, 8, SINGLE_TEST
, VX_CONV_TO_DOUBLE
},
920 { test_xvcvuxwdp
, "xvcvuxwdp", (void *)singleWord
, 8, SINGLE_TEST
, VX_CONV_TO_DOUBLE
},
921 { test_xvcvsxwsp
, "xvcvsxwsp", (void *)singleWord
, 8, SINGLE_TEST
, VX_CONV_TO_SINGLE
},
922 { test_xvcvuxwsp
, "xvcvuxwsp", (void *)singleWord
, 8, SINGLE_TEST
, VX_CONV_TO_SINGLE
},
923 { NULL
, NULL
, NULL
, 0, 0 }
932 /* Possible divde type combinations are:
939 static void test_divdeu(void)
941 int divdeu_type
= DIV_BASE
;
943 divdeu_type
|= DIV_OE
;
945 divdeu_type
|= DIV_DOT
;
947 switch (divdeu_type
) {
950 __asm__
__volatile__ ("divdeu %0, %1, %2" : "=r" (r17
) : "r" (r14
),"r" (r15
));
951 GET_CR_XER(div_flags
, div_xer
);
955 __asm__
__volatile__ ("divdeuo %0, %1, %2" : "=r" (r17
) : "r" (r14
),"r" (r15
));
956 GET_CR_XER(div_flags
, div_xer
);
960 __asm__
__volatile__ ("divdeu. %0, %1, %2" : "=r" (r17
) : "r" (r14
),"r" (r15
));
961 GET_CR_XER(div_flags
, div_xer
);
965 __asm__
__volatile__ ("divdeuo. %0, %1, %2" : "=r" (r17
) : "r" (r14
),"r" (r15
));
966 GET_CR_XER(div_flags
, div_xer
);
969 fprintf(stderr
, "Invalid divdeu type. Exiting\n");
975 static void test_divwe(void)
977 int divwe_type
= DIV_BASE
;
979 divwe_type
|= DIV_OE
;
981 divwe_type
|= DIV_DOT
;
983 switch (divwe_type
) {
986 __asm__
__volatile__ ("divwe %0, %1, %2" : "=r" (r17
) : "r" (r14
),"r" (r15
));
987 GET_CR_XER(div_flags
, div_xer
);
991 __asm__
__volatile__ ("divweo %0, %1, %2" : "=r" (r17
) : "r" (r14
),"r" (r15
));
992 GET_CR_XER(div_flags
, div_xer
);
996 __asm__
__volatile__ ("divwe. %0, %1, %2" : "=r" (r17
) : "r" (r14
),"r" (r15
));
997 GET_CR_XER(div_flags
, div_xer
);
1001 __asm__
__volatile__ ("divweo. %0, %1, %2" : "=r" (r17
) : "r" (r14
),"r" (r15
));
1002 GET_CR_XER(div_flags
, div_xer
);
1005 fprintf(stderr
, "Invalid divweu type. Exiting\n");
1011 typedef struct simple_test
{
1012 test_func_t test_func
;
1014 precision_type_t precision
;
1018 static void setup_sp_fp_args(fp_test_args_t
* targs
, Bool swap_inputs
)
1020 int a_idx
, b_idx
, i
;
1022 void * vec_src
= swap_inputs
? &vec_out
: &vec_inB
;
1024 for (i
= 0; i
< 4; i
++) {
1025 a_idx
= targs
->fra_idx
;
1026 b_idx
= targs
->frb_idx
;
1027 inA
= (void *)&spec_sp_fargs
[a_idx
];
1028 inB
= (void *)&spec_sp_fargs
[b_idx
];
1029 // copy single precision FP into vector element i
1030 memcpy(((void *)&vec_inA
) + (i
* 4), inA
, 4);
1031 memcpy(vec_src
+ (i
* 4), inB
, 4);
1036 static void setup_dp_fp_args(fp_test_args_t
* targs
, Bool swap_inputs
)
1038 int a_idx
, b_idx
, i
;
1040 void * vec_src
= swap_inputs
? (void *)&vec_out
: (void *)&vec_inB
;
1042 for (i
= 0; i
< 2; i
++) {
1043 a_idx
= targs
->fra_idx
;
1044 b_idx
= targs
->frb_idx
;
1045 inA
= (void *)&spec_fargs
[a_idx
];
1046 inB
= (void *)&spec_fargs
[b_idx
];
1047 // copy double precision FP into vector element i
1048 memcpy(((void *)&vec_inA
) + (i
* 8), inA
, 8);
1049 memcpy(vec_src
+ (i
* 8), inB
, 8);
1054 #define VX_NOT_CMP_OP 0xffffffff
1055 static void print_vector_fp_result(unsigned int cc
, vx_fp_test_t
* test_group
, int i
, Bool print_vec_out
)
1057 int a_idx
, b_idx
, k
;
1058 char * name
= malloc(20);
1059 int dp
= test_group
->precision
== DOUBLE_TEST
? 1 : 0;
1060 int loops
= dp
? 2 : 4;
1061 fp_test_args_t
* targs
= &test_group
->targs
[i
];
1062 unsigned long long * frA_dp
, * frB_dp
, * dst_dp
;
1063 unsigned int * frA_sp
, *frB_sp
, * dst_sp
;
1064 strcpy(name
, test_group
->name
);
1065 printf("#%d: %s%s ", dp
? i
/2 : i
/4, name
, (do_dot
? "." : ""));
1066 for (k
= 0; k
< loops
; k
++) {
1067 a_idx
= targs
->fra_idx
;
1068 b_idx
= targs
->frb_idx
;
1072 frA_dp
= (unsigned long long *)&spec_fargs
[a_idx
];
1073 frB_dp
= (unsigned long long *)&spec_fargs
[b_idx
];
1074 printf("%016llx %s %016llx", *frA_dp
, test_group
->op
, *frB_dp
);
1076 frA_sp
= (unsigned int *)&spec_sp_fargs
[a_idx
];
1077 frB_sp
= (unsigned int *)&spec_sp_fargs
[b_idx
];
1078 printf("%08x %s %08x", *frA_sp
, test_group
->op
, *frB_sp
);
1082 if (cc
!= VX_NOT_CMP_OP
)
1083 printf(" ? cc=%x", cc
);
1085 if (print_vec_out
) {
1087 dst_dp
= (unsigned long long *) &vec_out
;
1088 printf(" => %016llx %016llx\n", dst_dp
[0], dst_dp
[1]);
1090 dst_sp
= (unsigned int *) &vec_out
;
1091 printf(" => %08x %08x %08x %08x\n", dst_sp
[0], dst_sp
[1], dst_sp
[2], dst_sp
[3]);
1101 static void test_vsx_one_fp_arg(void)
1106 build_special_fargs_table();
1108 while ((func
= vsx_one_fp_arg_tests
[k
].test_func
)) {
1110 vx_fp_test_t test_group
= vsx_one_fp_arg_tests
[k
];
1111 Bool estimate
= (test_group
.type
== VX_ESTIMATE
);
1112 Bool dp
= (test_group
.precision
== DOUBLE_TEST
) ? True
: False
;
1113 Bool is_sqrt
= (strstr(test_group
.name
, "sqrt")) ? True
: False
;
1114 Bool is_scalar
= (strstr(test_group
.name
, "xs")) ? True
: False
;
1115 Bool sparse_sp
= False
;
1116 int stride
= dp
? 2 : 4;
1117 int loops
= is_scalar
? 1 : stride
;
1118 stride
= is_scalar
? 1: stride
;
1120 /* For conversions of single to double, the 128-bit input register is sparsely populated:
1121 * |___ SP___|_Unused_|___SP___|__Unused__| // for vector op
1123 * |___ SP___|_Unused_|_Unused_|__Unused__| // for scalar op
1125 * For the vector op case, we need to adjust stride from '4' to '2', since
1126 * we'll only be loading two values per loop into the input register.
1128 if (!dp
&& !is_scalar
&& test_group
.type
== VX_CONV_TO_DOUBLE
) {
1133 for (i
= 0; i
< test_group
.num_tests
; i
+=stride
) {
1135 void * inB
, * vecB_void_ptr
= (void *)&vec_inB
;
1137 pv
= (unsigned int *)&vec_out
;
1139 for (idx
= 0; idx
< 4; idx
++, pv
++)
1144 unsigned long long * frB_dp
, *dst_dp
;
1145 for (j
= 0; j
< loops
; j
++) {
1146 inB
= (void *)&spec_fargs
[i
+ j
];
1147 // copy double precision FP into vector element i
1148 if (isLE
&& is_scalar
)
1150 memcpy(vecB_void_ptr
+ (j
* 8), inB
, 8);
1152 // execute test insn
1154 dst_dp
= (unsigned long long *) &vec_out
;
1155 if (isLE
&& is_scalar
)
1157 printf("#%d: %s ", i
/stride
, test_group
.name
);
1158 for (j
= 0; j
< loops
; j
++) {
1161 frB_dp
= (unsigned long long *)&spec_fargs
[i
+ j
];
1162 printf("%s(%016llx)", test_group
.op
, *frB_dp
);
1164 Bool res
= check_estimate(DOUBLE_TEST
, is_sqrt
, i
+ j
, (isLE
&& is_scalar
) ? 1: j
);
1165 printf(" ==> %s)", res
? "PASS" : "FAIL");
1166 /* For debugging . . .
1167 printf(" ==> %s (res=%016llx)", res ? "PASS" : "FAIL", dst_dp[j]);
1170 vx_fp_test_type type
= test_group
.type
;
1172 case VX_SCALAR_CONV_TO_WORD
:
1173 printf(" = %016llx", dst_dp
[j
] & 0x00000000ffffffffULL
);
1175 case VX_CONV_TO_SINGLE
:
1176 printf(" = %016llx", dst_dp
[j
] & 0xffffffff00000000ULL
);
1178 default: // For VX_CONV_TO_DOUBLE and non-convert instructions . . .
1179 printf(" = %016llx", dst_dp
[j
]);
1186 unsigned int * frB_sp
, * dst_sp
= NULL
;
1187 unsigned long long * dst_dp
= NULL
;
1190 for (j
= 0; j
< loops
; j
++) {
1191 inB
= (void *)&spec_sp_fargs
[i
+ j
];
1192 // copy single precision FP into vector element i
1195 memcpy(vecB_void_ptr
+ ((2 * j
* 4) + 4), inB
, 4);
1197 memcpy(vecB_void_ptr
+ ((2 * j
* 4) ), inB
, 4);
1199 if (isLE
&& is_scalar
)
1200 vecB_void_ptr
+= 12;
1201 memcpy(vecB_void_ptr
+ (j
* 4), inB
, 4);
1204 // execute test insn
1206 if (test_group
.type
== VX_CONV_TO_DOUBLE
) {
1207 dst_dp
= (unsigned long long *) &vec_out
;
1208 if (isLE
&& is_scalar
)
1211 dst_sp
= (unsigned int *) &vec_out
;
1212 if (isLE
&& is_scalar
)
1216 printf("#%d: %s ", i
/stride
, test_group
.name
);
1217 for (j
= 0; j
< loops
; j
++) {
1220 frB_sp
= (unsigned int *)&spec_sp_fargs
[i
+ j
];
1221 printf("%s(%08x)", test_group
.op
, *frB_sp
);
1223 Bool res
= check_estimate(SINGLE_TEST
, is_sqrt
, i
+ j
, (isLE
&& is_scalar
) ? 3 : j
);
1224 printf(" ==> %s)", res
? "PASS" : "FAIL");
1226 if (test_group
.type
== VX_CONV_TO_DOUBLE
)
1227 printf(" = %016llx", dst_dp
[j
]);
1229 /* Special case: Current VEX implementation for fsqrts (single precision)
1230 * uses the same implementation as that used for double precision fsqrt.
1231 * However, I've found that for xvsqrtsp, the result from that implementation
1232 * may be off by the two LSBs. Generally, even this small inaccuracy can cause the
1233 * output to appear very different if you end up with a carry. But for the given
1234 * inputs in this testcase, we can simply mask out these bits.
1236 printf(" = %08x", is_sqrt
? (dst_sp
[j
] & 0xfffffffc) : dst_sp
[j
]);
1247 static void test_int_to_fp_convert(void)
1253 while ((func
= intToFp_tests
[k
].test_func
)) {
1255 vx_intToFp_test_t test_group
= intToFp_tests
[k
];
1256 Bool dp
= (test_group
.precision
== DOUBLE_TEST
) ? True
: False
;
1257 Bool sparse_sp
= False
;
1258 int stride
= dp
? 2 : 4;
1261 /* For conversions of single to double, the 128-bit input register is sparsely populated:
1262 * |___ int___|_Unused_|___int___|__Unused__| // for vector op
1264 * We need to adjust stride from '4' to '2', since we'll only be loading
1265 * two values per loop into the input register.
1267 if (!dp
&& test_group
.type
== VX_CONV_TO_DOUBLE
) {
1272 for (i
= 0; i
< test_group
.num_tests
; i
+=stride
) {
1276 pv
= (unsigned int *)&vec_out
;
1278 for (idx
= 0; idx
< 4; idx
++, pv
++)
1283 unsigned long long *dst_dw
, * targs
= test_group
.targs
;
1284 for (j
= 0; j
< loops
; j
++) {
1285 inB
= (void *)&targs
[i
+ j
];
1286 // copy doubleword into vector element i
1287 memcpy(((void *)&vec_inB
) + (j
* 8), inB
, 8);
1289 // execute test insn
1291 dst_dw
= (unsigned long long *) &vec_out
;
1292 printf("#%d: %s ", i
/stride
, test_group
.name
);
1293 for (j
= 0; j
< loops
; j
++) {
1296 printf("conv(%016llx)", targs
[i
+ j
]);
1298 if (test_group
.type
== VX_CONV_TO_SINGLE
)
1299 printf(" = %016llx", dst_dw
[j
] & 0xffffffff00000000ULL
);
1301 printf(" = %016llx", dst_dw
[j
]);
1306 unsigned int * dst_sp
= NULL
;
1307 unsigned int * targs
= test_group
.targs
;
1308 unsigned long long * dst_dp
= NULL
;
1309 void * vecB_void_ptr
= (void *)&vec_inB
;
1312 for (j
= 0; j
< loops
; j
++) {
1313 inB
= (void *)&targs
[i
+ j
];
1314 // copy single word into vector element i
1317 memcpy(vecB_void_ptr
+ ((2 * j
* 4) + 4), inB
, 4);
1319 memcpy(vecB_void_ptr
+ ((2 * j
* 4) ), inB
, 4);
1321 memcpy(vecB_void_ptr
+ (j
* 4), inB
, 4);
1324 // execute test insn
1326 if (test_group
.type
== VX_CONV_TO_DOUBLE
)
1327 dst_dp
= (unsigned long long *) &vec_out
;
1329 dst_sp
= (unsigned int *) &vec_out
;
1331 printf("#%d: %s ", i
/stride
, test_group
.name
);
1332 for (j
= 0; j
< loops
; j
++) {
1335 printf("conv(%08x)", targs
[i
+ j
]);
1336 if (test_group
.type
== VX_CONV_TO_DOUBLE
)
1337 printf(" = %016llx", dst_dp
[j
]);
1339 printf(" = %08x", dst_sp
[j
]);
1351 // The div doubleword test data
1352 signed long long div_dw_tdata
[13][2] = {
1358 { 0x8000000000000000ULL
, 0xa },
1361 { 0x1234fedc, 0x8000a873 },
1362 { 0xabcd87651234fedcULL
, 0xa123b893 },
1363 { 0x123456789abdcULL
, 0 },
1367 #define dw_tdata_len (sizeof(div_dw_tdata)/sizeof(signed long long)/2)
1369 // The div word test data
1370 unsigned int div_w_tdata
[6][2] = {
1373 { 0x7abc1234, 0xf0000000 },
1378 #define w_tdata_len (sizeof(div_w_tdata)/sizeof(unsigned int)/2)
1380 typedef struct div_ext_test
1382 test_func_t test_func
;
1385 div_type_t div_type
;
1386 precision_type_t precision
;
1389 static div_ext_test_t div_tests
[] = {
1390 #ifdef __powerpc64__
1391 { &test_divdeu
, "divdeu", dw_tdata_len
, DIV_BASE
, DOUBLE_TEST
},
1392 { &test_divdeu
, "divdeuo", dw_tdata_len
, DIV_OE
, DOUBLE_TEST
},
1394 { &test_divwe
, "divwe", w_tdata_len
, DIV_BASE
, SINGLE_TEST
},
1395 { &test_divwe
, "divweo", w_tdata_len
, DIV_OE
, SINGLE_TEST
},
1396 { NULL
, NULL
, 0, 0, 0 }
1399 static void test_div_extensions(void)
1405 while ((func
= div_tests
[k
].test_func
)) {
1407 div_ext_test_t test_group
= div_tests
[k
];
1411 for (i
= 0; i
< test_group
.num_tests
; i
++) {
1412 unsigned int condreg
;
1414 if (test_group
.div_type
== DIV_OE
)
1419 if (test_group
.precision
== DOUBLE_TEST
) {
1420 r14
= div_dw_tdata
[i
][0];
1421 r15
= div_dw_tdata
[i
][1];
1423 r14
= div_w_tdata
[i
][0];
1424 r15
= div_w_tdata
[i
][1];
1426 // execute test insn
1428 condreg
= (div_flags
& 0xf0000000) >> 28;
1429 printf("#%d: %s%s: ", i
, test_group
.name
, do_dot
? "." : "");
1430 if (test_group
.precision
== DOUBLE_TEST
) {
1431 printf("0x%016llx0000000000000000 / 0x%016llx = 0x%016llx;",
1432 div_dw_tdata
[i
][0], div_dw_tdata
[i
][1], (signed long long) r17
);
1434 printf("0x%08x00000000 / 0x%08x = 0x%08x;",
1435 div_w_tdata
[i
][0], div_w_tdata
[i
][1], (unsigned int) r17
);
1437 printf(" CR=%x; XER=%x\n", condreg
, div_xer
);
1451 static void test_vx_tdivORtsqrt(void)
1458 build_special_fargs_table();
1460 while ((func
= vx_tdivORtsqrt_tests
[k
].test_func
)) {
1462 vx_fp_test_t test_group
= vx_tdivORtsqrt_tests
[k
];
1463 Bool dp
= (test_group
.precision
== DOUBLE_TEST
) ? True
: False
;
1464 Bool is_scalar
= (strstr(test_group
.name
, "xs")) ? True
: False
;
1465 Bool two_args
= test_group
.targs
? True
: False
;
1466 int stride
= dp
? 2 : 4;
1467 int loops
= is_scalar
? 1 : stride
;
1468 stride
= is_scalar
? 1: stride
;
1470 for (i
= 0; i
< test_group
.num_tests
; i
+=stride
) {
1472 void * inB
, * vecB_void_ptr
= (void *)&vec_inB
;
1474 pv
= (unsigned int *)&vec_out
;
1476 for (idx
= 0; idx
< 4; idx
++, pv
++)
1481 unsigned long long * frB_dp
;
1483 setup_dp_fp_args(&test_group
.targs
[i
], False
);
1485 for (j
= 0; j
< loops
; j
++) {
1486 inB
= (void *)&spec_fargs
[i
+ j
];
1487 // copy double precision FP into vector element i
1488 if (isLE
&& is_scalar
)
1490 memcpy(vecB_void_ptr
+ (j
* 8), inB
, 8);
1493 // execute test insn
1494 // Must do set/get of CRs immediately before/after calling the asm func
1495 // to avoid CRs being modified by other instructions.
1500 // assumes using CR1
1501 crx
= (flags
& 0x0f000000) >> 24;
1503 print_vector_fp_result(crx
, &test_group
, i
, False
/*do not print vec_out*/);
1505 printf("#%d: %s ", i
/stride
, test_group
.name
);
1506 for (j
= 0; j
< loops
; j
++) {
1509 frB_dp
= (unsigned long long *)&spec_fargs
[i
+ j
];
1510 printf("%s(%016llx)", test_group
.op
, *frB_dp
);
1512 printf( " ? %x (CRx)\n", crx
);
1516 unsigned int * frB_sp
;
1518 setup_sp_fp_args(&test_group
.targs
[i
], False
);
1520 for (j
= 0; j
< loops
; j
++) {
1521 inB
= (void *)&spec_sp_fargs
[i
+ j
];
1522 // copy single precision FP into vector element i
1523 memcpy(((void *)&vec_inB
) + (j
* 4), inB
, 4);
1526 // execute test insn
1531 crx
= (flags
& 0x0f000000) >> 24;
1534 print_vector_fp_result(crx
, &test_group
, i
, False
/*do not print vec_out*/);
1536 printf("#%d: %s ", i
/stride
, test_group
.name
);
1537 for (j
= 0; j
< loops
; j
++) {
1540 frB_sp
= (unsigned int *)&spec_sp_fargs
[i
+ j
];
1541 printf("%s(%08x)", test_group
.op
, *frB_sp
);
1543 printf( " ? %x (CRx)\n", crx
);
1553 static void test_ftsqrt(void)
1557 unsigned long long * frbp
;
1558 build_special_fargs_table();
1561 for (i
= 0; i
< nb_special_fargs
; i
++) {
1562 f14
= spec_fargs
[i
];
1563 frbp
= (unsigned long long *)&spec_fargs
[i
];
1566 __asm__
__volatile__ ("ftsqrt cr1, %0" : : "d" (f14
));
1568 crx
= (flags
& 0x0f000000) >> 24;
1569 printf( "ftsqrt: %016llx ? %x (CRx)\n", *frbp
, crx
);
1577 #ifdef __powerpc64__
1579 unsigned long long src
= 0x9182736405504536ULL
;
1581 __asm__
__volatile__ ("popcntw %0, %1" : "=r" (res
): "r" (r14
));
1582 printf("popcntw: 0x%llx => 0x%016llx\n", (unsigned long long)src
, (unsigned long long)res
);
1585 unsigned int src
= 0x9182730E;
1587 __asm__
__volatile__ ("popcntw %0, %1" : "=r" (res
): "r" (r14
));
1588 printf("popcntw: 0x%x => 0x%08x\n", src
, (int)res
);
1598 { &test_vsx_one_fp_arg
,
1599 "Test VSX vector and scalar single argument instructions", OTHER_INST
} ,
1600 { &test_int_to_fp_convert
,
1601 "Test VSX vector integer to float conversion instructions", OTHER_INST
},
1602 { &test_div_extensions
,
1603 "Test div extensions", SCALAR_DIV_INST
},
1605 "Test ftsqrt instruction", OTHER_INST
},
1606 { &test_vx_tdivORtsqrt
,
1607 "Test vector and scalar tdiv and tsqrt instructions", OTHER_INST
},
1609 "Test popcntw instruction", OTHER_INST
},
1614 static void usage (void)
1617 "Usage: test_isa_3_0 [OPTIONS]\n"
1618 "\t-d: test scalar division instructions (default)\n"
1619 "\t-o: test non scalar division instructions (default)\n"
1620 "\t-A: test all instructions (default)\n"
1621 "\t-h: display this help and exit\n"
1625 int main(int argc
, char **argv
)
1633 unsigned int test_run_mask
= 0;
1635 /* NOTE, ISA 3.0 introduces the OV32 and CA32 bits in the FPSCR. These
1636 * bits are set on various arithimetic instructions. This means this
1637 * test generates different FPSCR output for pre ISA 3.0 versus ISA 3.0
1638 * hardware. The tests have been grouped so that the tests that generate
1639 * different results are in one test and the rest are in a different test.
1640 * this minimizes the size of the result expect files for the two cases.
1643 while ((c
= getopt(argc
, argv
, "doAh")) != -1) {
1646 test_run_mask
|= SCALAR_DIV_INST
;
1649 test_run_mask
|= OTHER_INST
;
1652 test_run_mask
= 0xFFFF;
1660 fprintf(stderr
, "Unknown argument: '%c'\n", c
);
1665 while ((func
= all_tests
[i
].test_category
)) {
1666 aTest
= all_tests
[i
];
1668 if(test_run_mask
& aTest
.test_group
) {
1669 /* Test group specified on command line */
1671 printf( "%s\n", aTest
.name
);
1679 free(spec_sp_fargs
);