1 /* Copyright (C) 2013 IBM
3 Authors: Carl Love <carll@us.ibm.com>
4 Maynard Johnson <maynardj@us.ibm.com>
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 This program is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>.
19 The GNU General Public License is contained in the file COPYING.
21 This program is based heavily on the test_isa_2_06_part*.c source files.
36 typedef uint32_t HWord_t
;
38 typedef uint64_t HWord_t
;
39 #endif /* __powerpc64__ */
41 #ifdef VGP_ppc64le_linux
47 register HWord_t r14
__asm__ ("r14");
48 register HWord_t r15
__asm__ ("r15");
49 register HWord_t r16
__asm__ ("r16");
50 register HWord_t r17
__asm__ ("r17");
51 register double f14
__asm__ ("fr14");
52 register double f15
__asm__ ("fr15");
53 register double f16
__asm__ ("fr16");
54 register double f17
__asm__ ("fr17");
56 static volatile unsigned int cond_reg
;
61 #define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"
63 #define SET_CR(_arg) \
64 __asm__ __volatile__ ("mtcr %0" : : "b"(_arg) : ALLCR );
66 #define SET_XER(_arg) \
67 __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );
69 #define GET_CR(_lval) \
70 __asm__ __volatile__ ("mfcr %0" : "=b"(_lval) )
72 #define GET_XER(_lval) \
73 __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )
75 #define GET_CR_XER(_lval_cr,_lval_xer) \
76 do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)
81 #define SET_XER_ZERO \
84 #define SET_CR_XER_ZERO \
85 do { SET_CR_ZERO; SET_XER_ZERO; } while (0)
87 #define SET_FPSCR_ZERO \
88 do { double _d = 0.0; \
89 __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
92 typedef unsigned char Bool
;
95 /* These functions below that construct a table of floating point
96 * values were lifted from none/tests/ppc32/jm-insns.c.
99 #if defined (DEBUG_ARGS_BUILD)
100 #define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
102 #define AB_DPRINTF(fmt, args...) do { } while (0)
105 static inline void register_farg (void *farg
,
106 int s
, uint16_t _exp
, uint64_t mant
)
110 tmp
= ((uint64_t)s
<< 63) | ((uint64_t)_exp
<< 52) | mant
;
111 *(uint64_t *)farg
= tmp
;
112 AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
113 s
, _exp
, mant
, *(uint64_t *)farg
, *(double *)farg
);
116 static inline void register_sp_farg (void *farg
,
117 int s
, uint16_t _exp
, uint32_t mant
)
120 tmp
= ((uint32_t)s
<< 31) | ((uint32_t)_exp
<< 23) | mant
;
121 *(uint32_t *)farg
= tmp
;
125 typedef struct fp_test_args
{
130 static int nb_special_fargs
;
131 static double * spec_fargs
;
132 static float * spec_sp_fargs
;
134 static void build_special_fargs_table(void)
138 * Sign goes from zero to one (1 bit)
139 * Exponent goes from 0 to ((1 << 12) - 1) (11 bits)
140 * Mantissa goes from 1 to ((1 << 52) - 1) (52 bits)
142 * +0.0 : 0 0x000 0x0000000000000 => 0x0000000000000000
143 * -0.0 : 1 0x000 0x0000000000000 => 0x8000000000000000
144 * +infinity : 0 0x7FF 0x0000000000000 => 0x7FF0000000000000
145 * -infinity : 1 0x7FF 0x0000000000000 => 0xFFF0000000000000
146 * +SNaN : 0 0x7FF 0x7FFFFFFFFFFFF => 0x7FF7FFFFFFFFFFFF
147 * -SNaN : 1 0x7FF 0x7FFFFFFFFFFFF => 0xFFF7FFFFFFFFFFFF
148 * +QNaN : 0 0x7FF 0x8000000000000 => 0x7FF8000000000000
149 * -QNaN : 1 0x7FF 0x8000000000000 => 0xFFF8000000000000
156 * +0.0 : 0 0x00 0x000000 => 0x00000000
157 * -0.0 : 1 0x00 0x000000 => 0x80000000
158 * +infinity : 0 0xFF 0x000000 => 0x7F800000
159 * -infinity : 1 0xFF 0x000000 => 0xFF800000
160 * +SNaN : 0 0xFF 0x3FFFFF => 0x7FBFFFFF
161 * -SNaN : 1 0xFF 0x3FFFFF => 0xFFBFFFFF
162 * +QNaN : 0 0xFF 0x400000 => 0x7FC00000
163 * -QNaN : 1 0xFF 0x400000 => 0xFFC00000
175 spec_fargs
= malloc( 20 * sizeof(double) );
176 spec_sp_fargs
= malloc( 20 * sizeof(float) );
181 mant
= 0x8000000000000ULL
;
182 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
187 mant
= 0xf000000000000ULL
;
188 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
193 mant
= 0x8000000b77501ULL
;
194 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
199 mant
= 0x800000000051bULL
;
200 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
205 mant
= 0x3214569900000ULL
;
206 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
209 /* +0.0 : 0 0x000 0x0000000000000 */
213 mant
= 0x0000000000000ULL
;
214 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
216 /* -0.0 : 1 0x000 0x0000000000000 */
220 mant
= 0x0000000000000ULL
;
221 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
223 /* +infinity : 0 0x7FF 0x0000000000000 */
227 mant
= 0x0000000000000ULL
;
228 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
230 /* -infinity : 1 0x7FF 0x0000000000000 */
234 mant
= 0x0000000000000ULL
;
235 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
238 * This comment applies to values #9 and #10 below:
239 * When src is a SNaN, it's converted to a QNaN first before rounding to single-precision,
240 * so we can't just copy the double-precision value to the corresponding slot in the
241 * single-precision array (i.e., in the loop at the end of this function). Instead, we
242 * have to manually set the bits using register_sp_farg().
245 /* +SNaN : 0 0x7FF 0x7FFFFFFFFFFFF */
249 mant
= 0x7FFFFFFFFFFFFULL
;
250 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
253 register_sp_farg(&spec_sp_fargs
[i
-1], s
, _exp
, mant_sp
);
255 /* -SNaN : 1 0x7FF 0x7FFFFFFFFFFFF */
259 mant
= 0x7FFFFFFFFFFFFULL
;
260 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
263 register_sp_farg(&spec_sp_fargs
[i
-1], s
, _exp
, mant_sp
);
265 /* +QNaN : 0 0x7FF 0x8000000000000 */
269 mant
= 0x8000000000000ULL
;
270 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
272 /* -QNaN : 1 0x7FF 0x8000000000000 */
276 mant
= 0x8000000000000ULL
;
277 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
279 /* denormalized value */
283 mant
= 0x8340000078000ULL
;
284 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
286 /* Negative finite number */
290 mant
= 0x0650f5a07b353ULL
;
291 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
293 /* A few positive finite numbers ... */
297 mant
= 0x32585a9900000ULL
;
298 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
303 mant
= 0x82511a2000000ULL
;
304 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
309 mant
= 0x12ef5a9300000ULL
;
310 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
315 mant
= 0x14bf5d2300000ULL
;
316 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
321 mant
= 0x76bf982440000ULL
;
322 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
325 nb_special_fargs
= i
;
326 for (j
= 0; j
< i
; j
++) {
327 if (!(j
== 9 || j
== 10))
328 spec_sp_fargs
[j
] = spec_fargs
[j
];
332 static unsigned int vstg
[] __attribute__ ((aligned (16))) = { 0, 0, 0,0,
336 static unsigned int viargs
[] __attribute__ ((aligned (16))) = { 0x80000001,
345 #define NUM_VIARGS_INTS (sizeof viargs/sizeof viargs[0])
346 #define NUM_VIARGS_VECS (NUM_VIARGS_INTS/4)
349 static unsigned long long vdargs
[] __attribute__ ((aligned (16))) = {
350 0x0102030405060708ULL
,
351 0x090A0B0C0E0D0E0FULL
,
352 0xF1F2F3F4F5F6F7F8ULL
,
353 0xF9FAFBFCFEFDFEFFULL
355 #define NUM_VDARGS_INTS (sizeof vdargs/sizeof vdargs[0])
356 #define NUM_VDARGS_VECS (NUM_VDARGS_INTS/2)
358 typedef void (*test_func_t
)(void);
362 test_func_t test_category
;
369 SINGLE_TEST_SINGLE_RES
,
371 DOUBLE_TEST_SINGLE_RES
373 #define IS_DP_RESULT(x) ((x == SINGLE_TEST) || (x == DOUBLE_TEST))
376 VX_FP_SMAS
, // multiply add single precision result
377 VX_FP_SMSS
, // multiply sub single precision result
378 VX_FP_SNMAS
, // negative multiply add single precision result
379 VX_FP_SNMSS
, // negative multiply sub single precision result
385 VX_SCALAR_CONV_TO_WORD
,
386 VX_SCALAR_SP_TO_VECTOR_SP
,
409 test_func_t test_func
;
411 fp_test_args_t
* targs
;
413 vx_fp_test_type test_type
;
418 test_func_t test_func
;
420 precision_type_t precision
;
428 test_func_t test_func
;
430 fp_test_args_t
* targs
;
432 precision_type_t precision
;
433 vx_fp_test_type test_type
;
439 test_func_t test_func
;
446 test_func_t test_func
;
450 struct vsx_logic_test
452 test_func_t test_func
;
457 typedef struct vsx_logic_test logic_test_t
;
458 typedef struct ldst_test ldst_test_t
;
459 typedef struct simple_test xs_conv_test_t
;
460 typedef struct vx_fp_test1 vx_fp_test_basic_t
;
461 typedef struct vx_fp_test2 vx_fp_test2_t
;
462 typedef struct test_table test_table_t
;
465 static vector
unsigned int vec_out
, vec_inA
, vec_inB
;
467 static void test_xscvdpspn(void)
469 __asm__
__volatile__ ("xscvdpspn %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
472 static void test_xscvspdpn(void)
474 __asm__
__volatile__ ("xscvspdpn %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
478 static void test_xsmadds(void)
481 __asm__
__volatile__ ("xsmaddasp %x0, %x1, %x2" : "+wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
483 __asm__
__volatile__ ("xsmaddmsp %x0, %x1, %x2" : "+wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
486 static void test_xsmsubs(void)
489 __asm__
__volatile__ ("xsmsubasp %x0, %x1, %x2" : "+wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
491 __asm__
__volatile__ ("xsmsubmsp %x0, %x1, %x2" : "+wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
494 static void test_xscvsxdsp (void)
496 __asm__
__volatile__ ("xscvsxdsp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
499 static void test_xscvuxdsp (void)
501 __asm__
__volatile__ ("xscvuxdsp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
504 static void test_xsnmadds(void)
507 __asm__
__volatile__ ("xsnmaddasp %x0, %x1, %x2" : "+wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
509 __asm__
__volatile__ ("xsnmaddmsp %x0, %x1, %x2" : "+wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
512 static void test_xsnmsubs(void)
515 __asm__
__volatile__ ("xsnmsubasp %x0, %x1, %x2" : "+wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
517 __asm__
__volatile__ ("xsnmsubmsp %x0, %x1, %x2" : "+wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
520 static void test_stxsspx(void)
522 __asm__
__volatile__ ("stxsspx %x0, %1, %2" : : "wa" (vec_inA
), "b" (r14
),"r" (r15
));
525 static void test_stxsiwx(void)
527 __asm__
__volatile__ ("stxsiwx %x0, %1, %2" : : "wa" (vec_inA
), "b" (r14
),"r" (r15
));
530 static void test_lxsiwax(void)
532 __asm__
__volatile__ ("lxsiwax %x0, %1, %2" : "=wa" (vec_out
): "b" (r14
),"r" (r15
));
535 static void test_lxsiwzx(void)
537 __asm__
__volatile__ ("lxsiwzx %x0, %1, %2" : "=wa" (vec_out
): "b" (r14
),"r" (r15
));
540 static void test_lxsspx(void)
542 __asm__
__volatile__ ("lxsspx %x0, %1, %2" : "=wa" (vec_out
): "b" (r14
),"r" (r15
));
545 static void test_xssqrtsp(void)
547 __asm__
__volatile__ ("xssqrtsp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
550 static void test_xsrsqrtesp(void)
552 __asm__
__volatile__ ("xsrsqrtesp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
555 /* Three argument instuctions */
556 static void test_xxleqv(void)
558 __asm__
__volatile__ ("xxleqv %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
561 static void test_xxlorc(void)
563 __asm__
__volatile__ ("xxlorc %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
566 static void test_xxlnand(void)
568 __asm__
__volatile__ ("xxlnand %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
571 static void test_xsaddsp(void)
573 __asm__
__volatile__ ("xsaddsp %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
), "wa" (vec_inB
));
576 static void test_xssubsp(void)
578 __asm__
__volatile__ ("xssubsp %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
), "wa" (vec_inB
));
581 static void test_xsdivsp(void)
583 __asm__
__volatile__ ("xsdivsp %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
), "wa" (vec_inB
));
586 static void test_xsmulsp(void)
588 __asm__
__volatile__ ("xsmulsp %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
591 static void test_xsresp(void)
593 __asm__
__volatile__ ("xsresp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
595 static void test_xsrsp(void)
597 __asm__
__volatile__ ("xsrsp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
600 fp_test_args_t vx_math_tests
[] = {
731 // These are all double precision inputs with double word outputs (mostly converted to single precision)
732 static vx_fp_test_basic_t vx_fp_tests
[] = {
733 { &test_xsmadds
, "xsmadd", vx_math_tests
, 64, VX_FP_SMAS
},
734 { &test_xsmsubs
, "xsmsub", vx_math_tests
, 64, VX_FP_SMSS
},
735 { &test_xsmulsp
, "xsmulsp", vx_math_tests
, 64, VX_FP_OTHER
},
736 { &test_xsdivsp
, "xsdivsp", vx_math_tests
, 64, VX_FP_OTHER
},
737 { &test_xsnmadds
, "xsnmadd", vx_math_tests
, 64, VX_FP_SNMAS
},
738 { &test_xsnmsubs
, "xsnmsub", vx_math_tests
, 64, VX_FP_SNMSS
},
739 { NULL
, NULL
, NULL
, 0, 0 }
743 vsx_one_fp_arg_tests
[] = {
744 { &test_xscvdpspn
, "xscvdpspn", NULL
, 20, DOUBLE_TEST_SINGLE_RES
, VX_SCALAR_SP_TO_VECTOR_SP
, "conv"},
745 { &test_xscvspdpn
, "xscvspdpn", NULL
, 20, SINGLE_TEST
, VX_DEFAULT
, "conv"},
746 { &test_xsresp
, "xsresp", NULL
, 20, DOUBLE_TEST
, VX_ESTIMATE
, "1/x"},
747 { &test_xsrsp
, "xsrsp", NULL
, 20, DOUBLE_TEST
, VX_DEFAULT
, "round"},
748 { &test_xsrsqrtesp
, "xsrsqrtesp", NULL
, 20, DOUBLE_TEST
, VX_ESTIMATE
, "1/sqrt"},
749 { &test_xssqrtsp
, "xssqrtsp", NULL
, 20, DOUBLE_TEST
, VX_DEFAULT
, "sqrt"},
750 { NULL
, NULL
, NULL
, 0, 0, 0, NULL
}
753 // These are all double precision inputs with double word outputs (mostly converted to single precision)
754 static vx_fp_test_basic_t
755 vx_simple_scalar_fp_tests
[] = {
756 { &test_xssubsp
, "xssubsp", vx_math_tests
, 64, VX_DEFAULT
},
757 { &test_xsaddsp
, "xsaddsp", vx_math_tests
, 64, VX_DEFAULT
},
758 { NULL
, NULL
, NULL
, 0 , 0}
763 { &test_stxsspx
, "stxsspx", DOUBLE_TEST_SINGLE_RES
, vstg
, 0, VSX_STORE
},
764 { &test_stxsiwx
, "stxsiwx", SINGLE_TEST_SINGLE_RES
, vstg
, 4, VSX_STORE
},
765 { &test_lxsiwax
, "lxsiwax", SINGLE_TEST
, viargs
, 0, VSX_LOAD
},
766 { &test_lxsiwzx
, "lxsiwzx", SINGLE_TEST
, viargs
, 4, VSX_LOAD
},
767 { &test_lxsspx
, "lxsspx", SINGLE_TEST
, NULL
, 0, VSX_LOAD
},
768 { NULL
, NULL
, 0, NULL
, 0, 0 } };
770 static xs_conv_test_t
772 { &test_xscvsxdsp
, "xscvsxdsp"},
773 { &test_xscvuxdsp
, "xscvuxdsp"},
779 { &test_xxleqv
, "xxleqv", VSX_EQV
},
780 { &test_xxlorc
, "xxlorc", VSX_ORC
},
781 { &test_xxlnand
, "xxlnand", VSX_NAND
},
785 Bool
check_reciprocal_estimate(Bool is_rsqrte
, int idx
, int output_vec_idx
)
788 * This function has been verified only with the xsresp and xsrsqrtes instructions.
790 * Technically, the number of bits of precision for xsresp and xsrsqrtesp is
791 * 14 bits (14 = log2 16384). However, the VEX emulation of these instructions
792 * does an actual reciprocal calculation versus estimation, so the answer we get back from
793 * valgrind can easily differ from the estimate in the lower bits (within the 14 bits of
794 * precision) and the estimate may still be within expected tolerances. On top of that,
795 * we can't count on these estimates always being the same across implementations.
796 * For example, with the fre[s] instruction (which should be correct to within one part
797 * in 256 -- i.e., 8 bits of precision) . . . When approximating the value 1.0111_1111_1111,
798 * one implementation could return 1.0111_1111_0000 and another implementation could return
799 * 1.1000_0000_0000. Both estimates meet the 1/256 accuracy requirement, but share only a
800 * single bit in common.
802 * The upshot is we can't validate the VEX output for these instructions by comparing against
803 * stored bit patterns. We must check that the result is within expected tolerances.
806 /* A mask to be used for validation as a last resort.
807 * Only use 12 bits of precision for reasons discussed above.
809 #define VSX_RECIP_ESTIMATE_MASK_SP 0xFFFF8000
813 double src_dp
, res_dp
;
816 double recip_divisor
;
821 Bool src_is_negative
= False
;
822 Bool res_is_negative
= False
;
823 unsigned long long * dst_dp
= NULL
;
824 unsigned long long * src_dp_ull
;
825 dst_dp
= (unsigned long long *) &vec_out
;
826 src_dp
= spec_fargs
[idx
];
827 src_dp_ull
= (unsigned long long *) &src_dp
;
828 src_is_negative
= (*src_dp_ull
& 0x8000000000000000ULL
) ? True
: False
;
829 res_is_negative
= (dst_dp
[output_vec_idx
] & 0x8000000000000000ULL
) ? True
: False
;
830 memcpy(&res_dp
, &dst_dp
[output_vec_idx
], 8);
833 // Below are common rules
835 return isnan(res_dp
);
836 if (fpclassify(src_dp
) == FP_ZERO
)
837 return isinf(res_dp
);
838 if (!src_is_negative
&& isinf(src_dp
))
839 return !res_is_negative
&& (fpclassify(res_dp
) == FP_ZERO
);
842 return isnan(res_dp
);
844 if (src_is_negative
&& isinf(src_dp
))
845 return res_is_negative
&& (fpclassify(res_dp
) == FP_ZERO
);
849 recip_divisor
= sqrt(src_dp
);
851 recip_divisor
= src_dp
;
853 /* The instructions handled by this function take a double precision
854 * input, perform a reciprocal estimate in double-precision, round
855 * the result to single precision and store into the destination
856 * register in double precision format. So, to check the result
857 * for accuracy, we use float (single precision) values.
859 div_result
= 1.0/recip_divisor
;
860 calc_diff_tmp
= recip_divisor
* 16384.0;
861 if (isnormal(calc_diff_tmp
)) {
862 calc_diff
= fabs(1.0/calc_diff_tmp
);
863 real_diff
= fabs((float)res_dp
- div_result
);
864 result
= ( ( res_dp
== div_result
)
865 || ( real_diff
<= calc_diff
) );
867 unsigned int * dv
= (unsigned int *)&div_result
;
868 unsigned int * rd
= (unsigned int *)&real_diff
;
869 unsigned int * cd
= (unsigned int *)&calc_diff
;
870 printf("\n\t {computed div_result: %08x; real_diff: %08x; calc_diff: %08x}\n",
875 /* Unable to compute theoretical difference, so we fall back to masking out
878 unsigned int * div_result_sp
= (unsigned int *)&div_result
;
879 float res_sp
= (float)res_dp
;
880 unsigned int * dst_sp
= (unsigned int *)&res_sp
;
882 unsigned int * calc_diff_tmp_sp
= (unsigned int *)&calc_diff_tmp
;
883 printf("Unable to compute theoretical difference, so we fall back to masking\n");
884 printf("\tcalc_diff_tmp: %08x; div_result: %08x; vector result (sp): %08x\n",
885 *calc_diff_tmp_sp
, *div_result_sp
, *dst_sp
);
887 result
= (*dst_sp
& VSX_RECIP_ESTIMATE_MASK_SP
) == (*div_result_sp
& VSX_RECIP_ESTIMATE_MASK_SP
);
892 static void test_vx_fp_ops(void)
897 char * test_name
= (char *)malloc(20);
898 void * vecA_void_ptr
, * vecB_void_ptr
, * vecOut_void_ptr
;
901 vecA_void_ptr
= (void *)&vec_inA
+ 8;
902 vecB_void_ptr
= (void *)&vec_inB
+ 8;
903 vecOut_void_ptr
= (void *)&vec_out
+ 8;
905 vecA_void_ptr
= (void *)&vec_inA
;
906 vecB_void_ptr
= (void *)&vec_inB
;
907 vecOut_void_ptr
= (void *)&vec_out
;
911 build_special_fargs_table();
912 while ((func
= vx_fp_tests
[k
].test_func
)) {
914 unsigned long long * frap
, * frbp
, * dst
;
915 vx_fp_test_basic_t test_group
= vx_fp_tests
[k
];
916 vx_fp_test_type test_type
= test_group
.test_type
;
923 if (test_type
== VX_FP_SMAS
)
924 strcpy(test_name
, "xsmadd");
925 else if (test_type
== VX_FP_SMSS
)
926 strcpy(test_name
, "xsmsub");
927 else if (test_type
== VX_FP_SNMAS
)
928 strcpy(test_name
, "xsnmadd");
930 strcpy(test_name
, "xsnmsub");
934 strcat(test_name
, "asp");
939 strcpy(test_name
, test_group
.name
);
942 printf("ERROR: Invalid VX FP test type %d\n", test_type
);
947 for (i
= 0; i
< test_group
.num_tests
; i
++) {
948 unsigned int * inA
, * inB
, * pv
;
950 fp_test_args_t aTest
= test_group
.targs
[i
];
951 inA
= (unsigned int *)&spec_fargs
[aTest
.fra_idx
];
952 inB
= (unsigned int *)&spec_fargs
[aTest
.frb_idx
];
953 frap
= (unsigned long long *)&spec_fargs
[aTest
.fra_idx
];
954 frbp
= (unsigned long long *)&spec_fargs
[aTest
.frb_idx
];
956 unsigned long long vsr_XT
;
957 pv
= (unsigned int *)&vec_out
;
959 // Only need to copy one doubleword into each vector's element 0
960 memcpy(vecA_void_ptr
, inA
, 8);
961 memcpy(vecB_void_ptr
, inB
, 8);
964 for (idx
= 0; idx
< 4; idx
++, pv
++)
967 if (test_type
!= VX_FP_OTHER
) {
968 /* Then we need a third src argument, which is stored in element 0 of
969 * VSX[XT] -- i.e., vec_out. For the xs<ZZZ>mdp cases, VSX[XT] holds
970 * src3 and VSX[XB] holds src2; for the xs<ZZZ>adp cases, VSX[XT] holds
971 * src2 and VSX[XB] holds src3. The fp_test_args_t that holds the test
972 * data (input args, result) contain only two inputs, so I arbitrarily
973 * use spec_fargs elements 4 and 14 (alternating) for the third source
974 * argument. We can use the same input data for a given pair of
975 * adp/mdp-type instructions by swapping the src2 and src3 arguments; thus
976 * the expected result should be the same.
985 /* We're on the first time through of one of the VX_FP_SMx
986 * test types, meaning we're testing a xs<ZZZ>adp case, thus
987 * we have to swap inputs as described above:
991 memcpy(vecOut_void_ptr
, inB
, 8); // src2
992 memcpy(vecB_void_ptr
, &spec_fargs
[extra_arg_idx
], 8); //src3
993 frbp
= (unsigned long long *)&spec_fargs
[extra_arg_idx
];
995 // Don't need to init src2, as it's done before the switch()
996 memcpy(vecOut_void_ptr
, &spec_fargs
[extra_arg_idx
], 8); //src3
998 memcpy(&vsr_XT
, vecOut_void_ptr
, 8);
1002 dst
= (unsigned long long *) &vec_out
;
1006 if (test_type
== VX_FP_OTHER
)
1007 printf("#%d: %s %016llx %016llx = %016llx\n", i
, test_name
,
1008 *frap
, *frbp
, *dst
);
1010 printf( "#%d: %s %016llx %016llx %016llx = %016llx\n", i
,
1011 test_name
, vsr_XT
, *frap
, *frbp
, *dst
);
1016 // Debug code. Keep this block commented out except when debugging.
1017 double result, expected;
1018 memcpy(&result, dst, 8);
1019 memcpy(&expected, &aTest.dp_bin_result, 8);
1020 printf( "\tFRA + FRB: %e + %e: Expected = %e; Actual = %e\n",
1021 spec_fargs[aTest.fra_idx], spec_fargs[aTest.frb_idx],
1029 strcat(test_name
, "UNKNOWN");
1030 switch (test_type
) {
1035 if (test_type
== VX_FP_SMAS
)
1036 strcpy(test_name
, "xsmadd");
1037 else if (test_type
== VX_FP_SMSS
)
1038 strcpy(test_name
, "xsmsub");
1039 else if (test_type
== VX_FP_SNMAS
)
1040 strcpy(test_name
, "xsnmadd");
1042 strcpy(test_name
, "xsnmsub");
1045 strcat(test_name
, "msp");
1059 static void test_vsx_one_fp_arg(void)
1063 void * vecB_void_ptr
;
1066 build_special_fargs_table();
1068 while ((func
= vsx_one_fp_arg_tests
[k
].test_func
)) {
1070 unsigned long long *dst_dp
;
1071 unsigned int * dst_sp
;
1072 vx_fp_test2_t test_group
= vsx_one_fp_arg_tests
[k
];
1073 /* size of source operands */
1074 Bool dp
= ((test_group
.precision
== DOUBLE_TEST
) ||
1075 (test_group
.precision
== DOUBLE_TEST_SINGLE_RES
)) ? True
: False
;
1076 /* size of result */
1077 Bool dp_res
= IS_DP_RESULT(test_group
.precision
);
1078 Bool is_sqrt
= (strstr(test_group
.name
, "sqrt")) ? True
: False
;
1080 vecB_void_ptr
= (void *)&vec_inB
;
1082 vecB_void_ptr
+= dp
? 8 : 12;
1085 for (i
= 0; i
< test_group
.num_tests
; i
++) {
1089 pv
= (unsigned int *)&vec_out
;
1091 for (idx
= 0; idx
< 4; idx
++, pv
++)
1096 unsigned long long * frB_dp
;
1098 vec_out_idx
= dp_res
? 1 : 3;
1102 if (test_group
.test_type
== VX_SCALAR_SP_TO_VECTOR_SP
) {
1103 /* Take a single-precision value stored in double word element 0
1104 * of src in double-precision format and convert to single-
1105 * precision and store in word element 0 of dst.
1107 double input
= spec_sp_fargs
[i
];
1108 memcpy(vecB_void_ptr
, (void *)&input
, 8);
1110 inB
= (void *)&spec_fargs
[i
];
1111 // copy double precision FP into input vector element 0
1112 memcpy(vecB_void_ptr
, inB
, 8);
1115 // execute test insn
1118 dst_dp
= (unsigned long long *) &vec_out
;
1120 dst_sp
= (unsigned int *) &vec_out
;
1122 printf("#%d: %s ", i
, test_group
.name
);
1123 frB_dp
= (unsigned long long *)&spec_fargs
[i
];
1124 printf("%s(%016llx)", test_group
.op
, *frB_dp
);
1125 if (test_group
.test_type
== VX_ESTIMATE
)
1128 res
= check_reciprocal_estimate(is_sqrt
, i
, vec_out_idx
);
1129 printf(" ==> %s)", res
? "PASS" : "FAIL");
1130 } else if (dp_res
) {
1131 printf(" = %016llx", dst_dp
[vec_out_idx
]);
1133 printf(" = %08x", dst_sp
[vec_out_idx
]);
1137 } else { // single precision test type
1140 vec_out_idx
= dp_res
? 1 : 3;
1143 // Clear input vector
1144 pv
= (unsigned int *)&vec_inB
;
1145 for (idx
= 0; idx
< 4; idx
++, pv
++)
1147 inB
= (void *)&spec_sp_fargs
[i
];
1148 // copy single precision FP into input vector element i
1149 memcpy(vecB_void_ptr
, inB
, 4);
1150 // execute test insn
1153 dst_dp
= (unsigned long long *) &vec_out
;
1155 dst_sp
= (unsigned int *) &vec_out
;
1157 printf("#%d: %s ", i
, test_group
.name
);
1158 printf("%s(%08x)", test_group
.op
, *((unsigned int *)&spec_sp_fargs
[i
]));
1160 printf(" = %016llx", dst_dp
[vec_out_idx
]);
1162 printf(" = %08x", dst_sp
[vec_out_idx
]);
1172 /* This function currently only supports two double precision input arguments. */
1173 static void test_vsx_two_fp_arg(void)
1177 void * vecA_void_ptr
, * vecB_void_ptr
;
1180 vecA_void_ptr
= (void *)&vec_inA
+ 8;
1181 vecB_void_ptr
= (void *)&vec_inB
+ 8;
1183 vecA_void_ptr
= (void *)&vec_inA
;
1184 vecB_void_ptr
= (void *)&vec_inB
;
1187 build_special_fargs_table();
1188 while ((func
= vx_simple_scalar_fp_tests
[k
].test_func
)) {
1189 unsigned long long * frap
, * frbp
, * dst
;
1192 vx_fp_test_basic_t test_group
= vx_simple_scalar_fp_tests
[k
];
1193 pv
= (unsigned int *)&vec_out
;
1195 for (idx
= 0; idx
< 4; idx
++, pv
++)
1200 for (i
= 0; i
< test_group
.num_tests
; i
++) {
1201 fp_test_args_t aTest
= test_group
.targs
[i
];
1202 inA
= (void *)&spec_fargs
[aTest
.fra_idx
];
1203 inB
= (void *)&spec_fargs
[aTest
.frb_idx
];
1204 frap
= (unsigned long long *)&spec_fargs
[aTest
.fra_idx
];
1205 frbp
= (unsigned long long *)&spec_fargs
[aTest
.frb_idx
];
1206 // Only need to copy one doubleword into each vector's element 0
1207 memcpy(vecA_void_ptr
, inA
, 8);
1208 memcpy(vecB_void_ptr
, inB
, 8);
1210 dst
= (unsigned long long *) &vec_out
;
1213 printf("#%d: %s %016llx,%016llx => %016llx\n", i
, test_group
.name
,
1214 *frap
, *frbp
, *dst
);
1221 /* This function handles the following cases:
1222 * 1) Single precision value stored in double-precision
1223 * floating-point format in doubleword element 0 of src VSX register
1224 * 2) Integer word value stored in word element 1 of src VSX register
1226 static void _do_store_test (ldst_test_t storeTest
)
1229 unsigned int *dst32
;
1230 unsigned int i
, idx
;
1231 unsigned int * pv
= (unsigned int *) storeTest
.base_addr
;
1232 void * vecA_void_ptr
;
1235 if (storeTest
.precision
== SINGLE_TEST_SINGLE_RES
)
1236 vecA_void_ptr
= (void *)&vec_inA
+ 8;
1238 if (storeTest
.precision
== SINGLE_TEST_SINGLE_RES
)
1239 vecA_void_ptr
= (void *)&vec_inA
+ 4;
1241 vecA_void_ptr
= (void *)&vec_inA
;
1244 func
= storeTest
.test_func
;
1245 r14
= (HWord_t
) storeTest
.base_addr
;
1246 r15
= (HWord_t
) storeTest
.offset
;
1248 /* test some of the pre-defined single precision values */
1249 for (i
= 0; i
< nb_special_fargs
; i
+=3) {
1250 // clear out storage destination
1251 for (idx
= 0; idx
< 4; idx
++)
1254 printf( "%s:", storeTest
.name
);
1255 if (storeTest
.precision
== SINGLE_TEST_SINGLE_RES
)
1257 unsigned int * arg_ptr
= (unsigned int *)&spec_sp_fargs
[i
];
1258 memcpy(vecA_void_ptr
, arg_ptr
, sizeof(unsigned int));
1259 printf(" %08x ==> ", *arg_ptr
);
1261 unsigned long long * dp
;
1262 double input
= spec_sp_fargs
[i
];
1263 dp
= (unsigned long long *)&input
;
1264 memcpy(vecA_void_ptr
, dp
, sizeof(unsigned long long));
1265 printf(" %016llx ==> ", *dp
);
1268 // execute test insn
1270 dst32
= (unsigned int*)(storeTest
.base_addr
);
1271 dst32
+= (storeTest
.offset
/sizeof(int));
1272 printf( "%08x\n", *dst32
);
1278 static void _do_load_test(ldst_test_t loadTest
)
1282 unsigned long long * dst_dp
;
1284 func
= loadTest
.test_func
;
1285 r15
= (HWord_t
) loadTest
.offset
;
1287 if (loadTest
.base_addr
== NULL
) {
1288 /* Test lxsspx: source is single precision value, so let's */
1289 /* test some of the pre-defined single precision values. */
1290 int num_loops
= (loadTest
.offset
== 0) ? nb_special_fargs
: (nb_special_fargs
- (loadTest
.offset
/sizeof(int)));
1291 for (i
= 0; i
< num_loops
; i
+=3) {
1292 unsigned int * sp
= (unsigned int *)&spec_sp_fargs
[i
+ (loadTest
.offset
/sizeof(int))];
1293 printf( "%s:", loadTest
.name
);
1294 printf(" %08x ==> ", *sp
);
1295 r14
= (HWord_t
)&spec_sp_fargs
[i
];
1297 // execute test insn
1299 dst_dp
= (unsigned long long *) &vec_out
;
1302 printf("%016llx\n", *dst_dp
);
1305 // source is an integer word
1306 int num_loops
= (loadTest
.offset
== 0) ? NUM_VIARGS_INTS
: (NUM_VIARGS_INTS
- (loadTest
.offset
/sizeof(int)));
1307 for (i
= 0; i
< num_loops
; i
++) {
1308 printf( "%s:", loadTest
.name
);
1309 r14
= (HWord_t
)&viargs
[i
];
1310 printf(" %08x ==> ", viargs
[i
+ (loadTest
.offset
/sizeof(int))]);
1312 // execute test insn
1314 dst_dp
= (unsigned long long *) &vec_out
;
1317 printf("%016llx\n", *dst_dp
);
1323 static void test_ldst(void)
1327 while (ldst_tests
[k
].test_func
) {
1328 if (ldst_tests
[k
].type
== VSX_STORE
)
1329 _do_store_test(ldst_tests
[k
]);
1331 _do_load_test(ldst_tests
[k
]);
1338 static void test_xs_conv_ops(void)
1343 void * vecB_void_ptr
;
1346 vecB_void_ptr
= (void *)&vec_inB
+ 8;
1348 vecB_void_ptr
= (void *)&vec_inB
;
1350 build_special_fargs_table();
1351 while ((func
= xs_conv_tests
[k
].test_func
)) {
1353 unsigned long long * dst
;
1354 xs_conv_test_t test_group
= xs_conv_tests
[k
];
1355 for (i
= 0; i
< NUM_VDARGS_INTS
; i
++) {
1356 unsigned long long * inB
, * pv
;
1358 inB
= (unsigned long long *)&vdargs
[i
];
1359 memcpy(vecB_void_ptr
, inB
, 8);
1360 pv
= (unsigned long long *)&vec_out
;
1362 for (idx
= 0; idx
< 2; idx
++, pv
++)
1365 dst
= (unsigned long long *) &vec_out
;
1368 printf("#%d: %s %016llx => %016llx\n", i
, test_group
.name
, vdargs
[i
], *dst
);
1377 static void test_vsx_logic(void)
1384 while ((func
= logic_tests
[k
].test_func
)) {
1387 unsigned int * inA
, * inB
, * dst
;
1389 aTest
= logic_tests
[k
];
1390 for (i
= 0; i
<= NUM_VIARGS_VECS
; i
+=4) {
1391 pv
= (unsigned int *)&vec_out
;
1394 memcpy(&vec_inA
, inA
, sizeof(vector
unsigned int));
1395 memcpy(&vec_inB
, inB
, sizeof(vector
unsigned int));
1397 for (idx
= 0; idx
< 4; idx
++, pv
++)
1400 // execute test insn
1402 dst
= (unsigned int*) &vec_out
;
1404 printf( "#%d: %10s ", k
, aTest
.name
);
1405 printf( " (%08x %08x %08x %08x, ", inA
[0], inA
[1], inA
[2], inA
[3]);
1406 printf( " %08x %08x %08x %08x)", inB
[0], inB
[1], inB
[2], inB
[3]);
1407 printf(" ==> %08x %08x %08x %08x\n", dst
[0], dst
[1], dst
[2], dst
[3]);
1415 //----------------------------------------------------------
1417 static test_table_t all_tests
[] = {
1419 "Test VSX floating point instructions"},
1420 { &test_vsx_one_fp_arg
,
1421 "Test VSX vector and scalar single argument instructions"} ,
1423 "Test VSX logic instructions" },
1424 { &test_xs_conv_ops
,
1425 "Test VSX scalar integer conversion instructions" },
1427 "Test VSX load/store dp to sp instructions" },
1428 { &test_vsx_two_fp_arg
,
1429 "Test VSX vector and scalar two argument instructions"} ,
1435 int main(int argc
, char *argv
[])
1443 while ((func
= all_tests
[i
].test_category
)) {
1444 aTest
= all_tests
[i
];
1445 printf( "%s\n", aTest
.name
);
1450 printf("NO ISA 2.07 SUPPORT\n");