1 /* Copyright (C) 2011 IBM
3 Author: Maynard Johnson <maynardj@us.ibm.com>
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, see <http://www.gnu.org/licenses/>.
18 The GNU General Public License is contained in the file COPYING.
32 typedef uint32_t HWord_t
;
34 typedef uint64_t HWord_t
;
35 #endif /* __powerpc64__ */
37 #ifdef VGP_ppc64le_linux
43 register HWord_t r14
__asm__ ("r14");
44 register HWord_t r15
__asm__ ("r15");
45 register HWord_t r16
__asm__ ("r16");
46 register HWord_t r17
__asm__ ("r17");
47 register double f14
__asm__ ("fr14");
48 register double f15
__asm__ ("fr15");
49 register double f16
__asm__ ("fr16");
50 register double f17
__asm__ ("fr17");
52 static volatile unsigned int cond_reg
;
54 #define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"
56 #define SET_CR(_arg) \
57 __asm__ __volatile__ ("mtcr %0" : : "b"(_arg) : ALLCR );
59 #define SET_XER(_arg) \
60 __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );
62 #define GET_CR(_lval) \
63 __asm__ __volatile__ ("mfcr %0" : "=b"(_lval) )
65 #define GET_XER(_lval) \
66 __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )
68 #define GET_CR_XER(_lval_cr,_lval_xer) \
69 do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)
74 #define SET_XER_ZERO \
77 #define SET_CR_XER_ZERO \
78 do { SET_CR_ZERO; SET_XER_ZERO; } while (0)
80 #define SET_FPSCR_ZERO \
81 do { double _d = 0.0; \
82 __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
86 typedef void (*test_func_t
)(void);
87 typedef struct ldst_test ldst_test_t
;
88 typedef struct vsx_logic_test logic_test_t
;
89 typedef struct xs_conv_test xs_conv_test_t
;
90 typedef struct p7_fp_test fp_test_t
;
91 typedef struct vx_fp_test vx_fp_test_t
;
92 typedef struct vsx_move_test move_test_t
;
93 typedef struct vsx_permute_test permute_test_t
;
94 typedef struct test_table test_table_t
;
96 static double *fargs
= NULL
;
99 /* These functions below that construct a table of floating point
100 * values were lifted from none/tests/ppc32/jm-insns.c.
103 #if defined (DEBUG_ARGS_BUILD)
104 #define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
106 #define AB_DPRINTF(fmt, args...) do { } while (0)
109 static inline void register_farg (void *farg
,
110 int s
, uint16_t _exp
, uint64_t mant
)
114 tmp
= ((uint64_t)s
<< 63) | ((uint64_t)_exp
<< 52) | mant
;
115 *(uint64_t *)farg
= tmp
;
116 AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
117 s
, _exp
, mant
, *(uint64_t *)farg
, *(double *)farg
);
120 static void build_fargs_table(void)
123 * Sign goes from zero to one (1 bit)
124 * Exponent goes from 0 to ((1 << 12) - 1) (11 bits)
125 * Mantissa goes from 1 to ((1 << 52) - 1) (52 bits)
127 * +0.0 : 0 0x000 0x0000000000000 => 0x0000000000000000
128 * -0.0 : 1 0x000 0x0000000000000 => 0x8000000000000000
129 * +infinity : 0 0x7FF 0x0000000000000 => 0x7FF0000000000000
130 * -infinity : 1 0x7FF 0x0000000000000 => 0xFFF0000000000000
131 * +QNaN : 0 0x7FF 0x8000000000000 => 0x7FF8000000000000
132 * -QNaN : 1 0x7FF 0x8000000000000 => 0xFFF8000000000000
133 * +SNaN : 0 0x7FF 0x7FFFFFFFFFFFF => 0x7FF7FFFFFFFFFFFF
134 * -SNaN : 1 0x7FF 0x7FFFFFFFFFFFF => 0xFFF7FFFFFFFFFFFF
141 * +0.0 : 0 0x00 0x000000 => 0x00000000
142 * -0.0 : 1 0x00 0x000000 => 0x80000000
143 * +infinity : 0 0xFF 0x000000 => 0x7F800000
144 * -infinity : 1 0xFF 0x000000 => 0xFF800000
145 * +QNaN : 0 0xFF 0x400000 => 0x7FC00000
146 * -QNaN : 1 0xFF 0x400000 => 0xFFC00000
147 * +SNaN : 0 0xFF 0x3FFFFF => 0x7FBFFFFF
148 * -SNaN : 1 0xFF 0x3FFFFF => 0xFFBFFFFF
159 fargs
= malloc( 16 * sizeof(double) );
160 for (s
= 0; s
< 2; s
++) {
161 for (e1
= 0x001;; e1
= ((e1
+ 1) << 13) + 7) {
165 for (mant
= 0x0000000000001ULL
; mant
< (1ULL << 52);
166 /* Add 'random' bits */
167 mant
= ((mant
+ 0x4A6) << 29) + 0x359) {
168 register_farg( &fargs
[i
++], s
, _exp
, mant
);
174 // add a few smaller values to fargs . . .
177 mant
= 0x0000000000b01ULL
;
178 register_farg(&fargs
[i
++], s
, _exp
, mant
);
181 mant
= 0x00000203f0b3dULL
;
182 register_farg(&fargs
[i
++], s
, _exp
, mant
);
184 mant
= 0x00000005a203dULL
;
185 register_farg(&fargs
[i
++], s
, _exp
, mant
);
189 mant
= 0x0000000000b01ULL
;
190 register_farg(&fargs
[i
++], s
, _exp
, mant
);
193 mant
= 0x00000203f0b3dULL
;
194 register_farg(&fargs
[i
++], s
, _exp
, mant
);
200 typedef struct fp_test_args
{
207 fp_test_args_t ftdiv_tests
[] = {
224 fp_test_args_t xscmpX_tests
[] = {
291 fp_test_args_t xsadddp_tests
[] = {
358 fp_test_args_t xsdivdp_tests
[] = {
425 fp_test_args_t xsmaddXdp_tests
[] = {
492 fp_test_args_t xsmsubXdp_tests
[] = {
559 fp_test_args_t xsnmaddXdp_tests
[] = {
626 fp_test_args_t xsmuldp_tests
[] = {
693 fp_test_args_t xssubdp_tests
[] = {
762 static int nb_special_fargs
;
763 static double * spec_fargs
;
765 static void build_special_fargs_table(void)
767 /* The special floating point values created below are for
768 * use in the ftdiv tests for setting the fe_flag and fg_flag,
769 * but they can also be used for other tests (e.g., xscmpudp).
771 * Note that fl_flag is 'always '1' on ppc64 Linux.
773 Entry Sign Exp fraction Special value
774 0 0 3fd 0x8000000000000ULL Positive finite number
775 1 0 404 0xf000000000000ULL ...
776 2 0 001 0x8000000b77501ULL ...
777 3 0 7fe 0x800000000051bULL ...
778 4 0 012 0x3214569900000ULL ...
779 5 0 000 0x0000000000000ULL +0.0 (+zero)
780 6 1 000 0x0000000000000ULL -0.0 (-zero)
781 7 0 7ff 0x0000000000000ULL +infinity
782 8 1 7ff 0x0000000000000ULL -infinity
783 9 0 7ff 0x7FFFFFFFFFFFFULL +SNaN
784 10 1 7ff 0x7FFFFFFFFFFFFULL -SNaN
785 11 0 7ff 0x8000000000000ULL +QNaN
786 12 1 7ff 0x8000000000000ULL -QNaN
787 13 1 000 0x8340000078000ULL Denormalized val (zero exp and non-zero fraction)
788 14 1 40d 0x0650f5a07b353ULL Negative finite number
799 spec_fargs
= malloc( 16 * sizeof(double) );
804 mant
= 0x8000000000000ULL
;
805 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
810 mant
= 0xf000000000000ULL
;
811 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
813 /* None of the ftdiv tests succeed.
814 * FRA = value #0; FRB = value #1
816 * fl_flag || fg_flag || fe_flag = 100
819 /*************************************************
822 *************************************************/
824 /* fe_flag <- 1 if FRA is a NaN
825 * FRA = value #9; FRB = value #1
826 * e_a = 1024; e_b = 5
827 * fl_flag || fg_flag || fe_flag = 101
830 /* fe_flag <- 1 if FRB is a NaN
831 * FRA = value #1; FRB = value #12
832 * e_a = 5; e_b = 1024
833 * fl_flag || fg_flag || fe_flag = 101
836 /* fe_flag <- 1 if e_b <= -1022
837 * FRA = value #0; FRB = value #2
838 * e_a = -2; e_b = -1022
839 * fl_flag || fg_flag || fe_flag = 101
845 mant
= 0x8000000b77501ULL
;
846 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
848 /* fe_flag <- 1 if e_b >= 1021
849 * FRA = value #1; FRB = value #3
850 * e_a = 5; e_b = 1023
851 * fl_flag || fg_flag || fe_flag = 101
856 mant
= 0x800000000051bULL
;
857 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
859 /* fe_flag <- 1 if FRA != 0 && e_a - e_b >= 1023
860 * Let FRA = value #3 and FRB be value #0.
861 * e_a = 1023; e_b = -2
862 * fl_flag || fg_flag || fe_flag = 101
865 /* fe_flag <- 1 if FRA != 0 && e_a - e_b <= -1023
866 * Let FRA = value #0 above and FRB be value #3 above
867 * e_a = -2; e_b = 1023
868 * fl_flag || fg_flag || fe_flag = 101
871 /* fe_flag <- 1 if FRA != 0 && e_a <= -970
872 * Let FRA = value #4 and FRB be value #0
873 * e_a = -1005; e_b = -2
874 * fl_flag || fg_flag || fe_flag = 101
879 mant
= 0x3214569900000ULL
;
880 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
882 /*************************************************
885 *************************************************/
886 /* fg_flag <- 1 if FRA is an Infinity
887 * NOTE: FRA = Inf also sets fe_flag
888 * Do two tests, using values #7 and #8 (+/- Inf) for FRA.
890 * Let FRA be value #7 and FRB be value #1
891 * e_a = 1024; e_b = 5
892 * fl_flag || fg_flag || fe_flag = 111
895 * Let FRA be value #8 and FRB be value #1
896 * e_a = 1024; e_b = 5
897 * fl_flag || fg_flag || fe_flag = 111
901 /* fg_flag <- 1 if FRB is an Infinity
902 * NOTE: FRB = Inf also sets fe_flag
903 * Let FRA be value #1 and FRB be value #7
904 * e_a = 5; e_b = 1024
905 * fl_flag || fg_flag || fe_flag = 111
908 /* fg_flag <- 1 if FRB is denormalized
909 * NOTE: e_b < -1022 ==> fe_flag <- 1
910 * Let FRA be value #0 and FRB be value #13
911 * e_a = -2; e_b = -1023
912 * fl_flag || fg_flag || fe_flag = 111
915 /* fg_flag <- 1 if FRB is +zero
916 * NOTE: FRA = Inf also sets fe_flag
917 * Let FRA = val #5; FRB = val #5
918 * ea_ = -1023; e_b = -1023
919 * fl_flag || fg_flag || fe_flag = 111
922 /* fg_flag <- 1 if FRB is -zero
923 * NOTE: FRA = Inf also sets fe_flag
924 * Let FRA = val #5; FRB = val #6
925 * ea_ = -1023; e_b = -1023
926 * fl_flag || fg_flag || fe_flag = 111
930 /* +0.0 : 0 0x000 0x0000000000000 */
934 mant
= 0x0000000000000ULL
;
935 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
937 /* -0.0 : 1 0x000 0x0000000000000 */
941 mant
= 0x0000000000000ULL
;
942 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
944 /* +infinity : 0 0x7FF 0x0000000000000 */
948 mant
= 0x0000000000000ULL
;
949 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
951 /* -infinity : 1 0x7FF 0x0000000000000 */
955 mant
= 0x0000000000000ULL
;
956 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
958 /* +SNaN : 0 0x7FF 0x7FFFFFFFFFFFF */
962 mant
= 0x7FFFFFFFFFFFFULL
;
963 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
965 /* -SNaN : 1 0x7FF 0x7FFFFFFFFFFFF */
969 mant
= 0x7FFFFFFFFFFFFULL
;
970 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
972 /* +QNaN : 0 0x7FF 0x8000000000000 */
976 mant
= 0x8000000000000ULL
;
977 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
979 /* -QNaN : 1 0x7FF 0x8000000000000 */
983 mant
= 0x8000000000000ULL
;
984 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
986 /* denormalized value */
990 mant
= 0x8340000078000ULL
;
991 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
993 /* Negative finite number */
997 mant
= 0x0650f5a07b353ULL
;
998 register_farg(&spec_fargs
[i
++], s
, _exp
, mant
);
1000 nb_special_fargs
= i
;
1006 test_func_t test_category
;
1012 test_func_t test_func
;
1014 int single
; // 1=single precision result; 0=double precision result
1027 test_func_t test_func
;
1029 fp_test_args_t
* targs
;
1031 vx_fp_test_type test_type
;
1036 test_func_t test_func
;
1049 test_func_t test_func
;
1053 int num_words_to_process
;
1065 struct vsx_logic_test
1067 test_func_t test_func
;
1072 struct vsx_move_test
1074 test_func_t test_func
;
1078 struct vsx_permute_test
1080 test_func_t test_func
;
1086 static vector
unsigned int vec_out
, vec_inA
, vec_inB
;
1088 static void test_lxsdx(void)
1090 __asm__
__volatile__ ("lxsdx %x0, %1, %2" : "=wa" (vec_out
): "b" (r14
),"r" (r15
));
1096 __asm__
__volatile__ ("lxvd2x %x0, %1, %2" : "=wa" (vec_out
): "b" (r14
),"r" (r15
));
1099 static void test_lxvdsx(void)
1101 __asm__
__volatile__ ("lxvdsx %x0, %1, %2" : "=wa" (vec_out
): "b" (r14
),"r" (r15
));
1104 static void test_lxvw4x(void)
1106 __asm__
__volatile__ ("lxvw4x %x0, %1, %2" : "=wa" (vec_out
): "b" (r14
),"r" (r15
));
1109 static void test_stxsdx(void)
1111 __asm__
__volatile__ ("stxsdx %x0, %1, %2" : : "wa" (vec_inA
), "b" (r14
),"r" (r15
));
1114 static void test_stxvd2x(void)
1116 __asm__
__volatile__ ("stxvd2x %x0, %1, %2" : : "wa" (vec_inA
), "b" (r14
),"r" (r15
));
1119 static void test_stxvw4x(void)
1121 __asm__
__volatile__ ("stxvw4x %x0, %1, %2" : : "wa" (vec_inA
), "b" (r14
),"r" (r15
));
1124 static void test_xxlxor(void)
1126 __asm__
__volatile__ ("xxlxor %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
1129 static void test_xxlor(void)
1131 __asm__
__volatile__ ("xxlor %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
1134 static void test_xxlnor(void)
1136 __asm__
__volatile__ ("xxlnor %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
1139 static void test_xxland(void)
1141 __asm__
__volatile__ ("xxland %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
1144 static void test_xxlandc(void)
1146 __asm__
__volatile__ ("xxlandc %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
1149 static void test_xxmrghw(void)
1151 __asm__
__volatile__ ("xxmrghw %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
1154 static void test_xxmrglw(void)
1156 __asm__
__volatile__ ("xxmrglw %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
1159 static void test_xxpermdi_00(void)
1161 __asm__
__volatile__ ("xxpermdi %x0, %x1, %x2, 0x0" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
1164 static void test_xxpermdi_01(void)
1166 __asm__
__volatile__ ("xxpermdi %x0, %x1, %x2, 0x1" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
1169 static void test_xxpermdi_10(void)
1171 __asm__
__volatile__ ("xxpermdi %x0, %x1, %x2, 0x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
1174 static void test_xxpermdi_11(void)
1176 __asm__
__volatile__ ("xxpermdi %x0, %x1, %x2, 0x3" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
1179 static void test_xxsldwi_0(void)
1181 __asm__
__volatile__ ("xxsldwi %x0, %x1, %x2, 0" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
1184 static void test_xxsldwi_1(void)
1186 __asm__
__volatile__ ("xxsldwi %x0, %x1, %x2, 1" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
1189 static void test_xxsldwi_2(void)
1191 __asm__
__volatile__ ("xxsldwi %x0, %x1, %x2, 2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
1194 static void test_xxsldwi_3(void)
1196 __asm__
__volatile__ ("xxsldwi %x0, %x1, %x2, 3" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
1199 static void test_fcfids (void)
1201 __asm__
__volatile__ ("fcfids %0, %1" : "=f" (f17
): "d" (f14
));
1204 static void test_fcfidus (void)
1206 __asm__
__volatile__ ("fcfidus %0, %1" : "=f" (f17
): "d" (f14
));
1209 static void test_fcfidu (void)
1211 __asm__
__volatile__ ("fcfidu %0, %1" : "=f" (f17
): "d" (f14
));
1214 static void test_xsabsdp (void)
1216 __asm__
__volatile__ ("xsabsdp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
1219 static void test_xscpsgndp (void)
1221 __asm__
__volatile__ ("xscpsgndp %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
1224 static void test_xsnabsdp (void)
1226 __asm__
__volatile__ ("xsnabsdp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
1229 static void test_xsnegdp (void)
1231 __asm__
__volatile__ ("xsnegdp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
1234 static int do_cmpudp
;
1235 static void test_xscmp (void)
1238 __asm__
__volatile__ ("xscmpudp cr1, %x0, %x1" : : "wa" (vec_inA
),"wa" (vec_inB
));
1240 __asm__
__volatile__ ("xscmpodp cr1, %x0, %x1" : : "wa" (vec_inA
),"wa" (vec_inB
));
1243 static void test_xsadddp(void)
1245 __asm__
__volatile__ ("xsadddp %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
1248 static void test_xsdivdp(void)
1250 __asm__
__volatile__ ("xsdivdp %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
1254 static void test_xsmadd(void)
1257 __asm__
__volatile__ ("xsmaddadp %x0, %x1, %x2" : "+wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
1259 __asm__
__volatile__ ("xsmaddmdp %x0, %x1, %x2" : "+wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
1262 static void test_xsmsub(void)
1265 __asm__
__volatile__ ("xsmsubadp %x0, %x1, %x2" : "+wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
1267 __asm__
__volatile__ ("xsmsubmdp %x0, %x1, %x2" : "+wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
1270 static void test_xsnmadd(void)
1273 __asm__
__volatile__ ("xsnmaddadp %x0, %x1, %x2" : "+wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
1275 __asm__
__volatile__ ("xsnmaddmdp %x0, %x1, %x2" : "+wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
1278 static void test_xsmuldp(void)
1280 __asm__
__volatile__ ("xsmuldp %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
1283 static void test_xssubdp(void)
1285 __asm__
__volatile__ ("xssubdp %x0, %x1, %x2" : "=wa" (vec_out
): "wa" (vec_inA
),"wa" (vec_inB
));
1288 static void test_xscvdpsxds (void)
1290 __asm__
__volatile__ ("xscvdpsxds %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
1293 static void test_xscvsxddp (void)
1295 __asm__
__volatile__ ("xscvsxddp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
1298 static void test_xscvuxddp (void)
1300 __asm__
__volatile__ ("xscvuxddp %x0, %x1" : "=wa" (vec_out
): "wa" (vec_inB
));
1303 static unsigned int vstg
[] __attribute__ ((aligned (16))) = { 0, 0, 0,0,
1306 #define NUM_VSTG_INTS (sizeof vstg/sizeof vstg[0])
1307 #define NUM_VSTG_VECS (NUM_VSTG_INTS/4)
1309 static unsigned int viargs
[] __attribute__ ((aligned (16))) = { 0x01234567,
1321 #define NUM_VIARGS_INTS (sizeof viargs/sizeof viargs[0])
1322 #define NUM_VIARGS_VECS (NUM_VIARGS_INTS/4)
1324 static ldst_test_t ldst_tests
[] = { { &test_lxsdx
, "lxsdx", viargs
, 0, 2, VSX_LOAD
},
1325 { &test_lxsdx
, "lxsdx", viargs
, 4, 2, VSX_LOAD
},
1326 { &test_lxvd2x
, "lxvd2x", viargs
, 0, 4, VSX_LOAD
},
1327 { &test_lxvd2x
, "lxvd2x", viargs
, 4, 4, VSX_LOAD
},
1328 { &test_lxvdsx
, "lxvdsx", viargs
, 0, 4, VSX_LOAD_SPLAT
},
1329 { &test_lxvdsx
, "lxvdsx", viargs
, 4, 4, VSX_LOAD_SPLAT
},
1330 { &test_lxvw4x
, "lxvw4x", viargs
, 0, 4, VSX_LOAD
},
1331 { &test_lxvw4x
, "lxvw4x", viargs
, 4, 4, VSX_LOAD
},
1332 { &test_stxsdx
, "stxsdx", vstg
, 0, 2, VSX_STORE
},
1333 { &test_stxsdx
, "stxsdx", vstg
, 4, 2, VSX_STORE
},
1334 { &test_stxvd2x
, "stxvd2x", vstg
, 0, 4, VSX_STORE
},
1335 { &test_stxvd2x
, "stxvd2x", vstg
, 4, 4, VSX_STORE
},
1336 { &test_stxvw4x
, "stxvw4x", vstg
, 0, 4, VSX_STORE
},
1337 { &test_stxvw4x
, "stxvw4x", vstg
, 4, 4, VSX_STORE
},
1338 { NULL
, NULL
, NULL
, 0, 0, 0 } };
1340 static logic_test_t logic_tests
[] = { { &test_xxlxor
, "xxlxor", VSX_XOR
},
1341 { &test_xxlor
, "xxlor", VSX_OR
} ,
1342 { &test_xxlnor
, "xxlnor", VSX_NOR
},
1343 { &test_xxland
, "xxland", VSX_AND
},
1344 { &test_xxlandc
, "xxlandc", VSX_ANDC
},
1347 static move_test_t move_tests
[] = { { &test_xsabsdp
, "xsabsdp" },
1348 { &test_xscpsgndp
, "xscpsgndp" },
1349 { &test_xsnabsdp
, "xsnabsdp" },
1350 { &test_xsnegdp
, "xsnegdp" },
1355 static permute_test_t permute_tests
[] =
1357 { &test_xxmrghw
, "xxmrghw",
1358 { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1359 { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1361 { &test_xxmrghw
, "xxmrghw",
1362 { 0x00112233, 0x44556677, 0x8899aabb, 0xccddeeff }, /* XA input */
1363 { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XB input */
1365 { &test_xxmrglw
, "xxmrglw",
1366 { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1367 { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1369 { &test_xxmrglw
, "xxmrglw",
1370 { 0x00112233, 0x44556677, 0x8899aabb, 0xccddeeff}, /* XA input */
1371 { 0x11111111, 0x22222222, 0x33333333, 0x44444444}, /* XB input */
1373 { &test_xxpermdi_00
, "xxpermdi DM=00",
1374 { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1375 { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1377 { &test_xxpermdi_01
, "xxpermdi DM=01",
1378 { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1379 { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1381 { &test_xxpermdi_10
, "xxpermdi DM=10",
1382 { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1383 { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1385 { &test_xxpermdi_11
, "xxpermdi DM=11",
1386 { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1387 { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1389 { &test_xxsldwi_0
, "xxsldwi SHW=0",
1390 { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1391 { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1393 { &test_xxsldwi_1
, "xxsldwi SHW=1",
1394 { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1395 { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1397 { &test_xxsldwi_2
, "xxsldwi SHW=2",
1398 { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1399 { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1401 { &test_xxsldwi_3
, "xxsldwi SHW=3",
1402 { 0x11111111, 0x22222222, 0x33333333, 0x44444444 }, /* XA input */
1403 { 0x55555555, 0x66666666, 0x77777777, 0x88888888 }, /* XB input */
1408 static fp_test_t fp_tests
[] = { { &test_fcfids
, "fcfids", 1 },
1409 { &test_fcfidus
, "fcfidus", 1 },
1410 { &test_fcfidu
, "fcfidu", 1 },
1415 static vx_fp_test_t vx_fp_tests
[] = {
1416 { &test_xscmp
, "xscmp", xscmpX_tests
, 64, VX_FP_CMP
},
1417 { &test_xsadddp
, "xsadddp", xsadddp_tests
, 64, VX_FP_OTHER
},
1418 { &test_xsdivdp
, "xsdivdp", xsdivdp_tests
, 64, VX_FP_OTHER
},
1419 { &test_xsmadd
, "xsmadd", xsmaddXdp_tests
, 64, VX_FP_SMA
},
1420 { &test_xsmsub
, "xsmsub", xsmsubXdp_tests
, 64, VX_FP_SMS
},
1421 { &test_xsnmadd
, "xsnmadd", xsnmaddXdp_tests
, 64, VX_FP_SNMA
},
1422 { & test_xsmuldp
, "xsmuldp", xsmuldp_tests
, 64, VX_FP_OTHER
},
1423 { & test_xssubdp
, "xssubdp", xssubdp_tests
, 64, VX_FP_OTHER
},
1424 { NULL
, NULL
, NULL
, 0, 0 }
1427 static xs_conv_test_t xs_conv_tests
[] = {
1428 { &test_xscvdpsxds
, "xscvdpsxds", 15},
1429 { &test_xscvsxddp
, "xscvsxddp", 15},
1430 { &test_xscvuxddp
, "xscvuxddp", 15},
1434 #ifdef __powerpc64__
1435 static void test_ldbrx(void)
1439 unsigned char * byteIn
, * byteOut
;
1440 r14
= (HWord_t
)viargs
;
1441 // Just try the instruction an arbitrary number of times at different r15 offsets.
1442 for (i
= 0; i
< 3; i
++) {
1446 __asm__
__volatile__ ("ldbrx %0, %1, %2" : "=r" (reg_out
): "b" (r14
),"r" (r15
));
1447 byteIn
= ((unsigned char *)(r14
+ r15
));
1448 byteOut
= (unsigned char *)®_out
;
1451 for (k
= 0; k
< 8; k
++) {
1452 printf( " %02x", (byteIn
[k
]));
1454 printf(" (reverse) =>");
1455 for (j
= 0; j
< 8; j
++) {
1456 printf( " %02x", (byteOut
[j
]));
1467 unsigned long long src
= 0x9182736405504536ULL
;
1469 __asm__
__volatile__ ("popcntd %0, %1" : "=r" (res
): "r" (r14
));
1470 printf("popcntd: 0x%llx => %d\n", src
, (int)res
);
1481 r14
= (HWord_t
)viargs
;
1482 // Just try the instruction an arbitrary number of times at different r15 offsets.
1483 for (i
= 0; i
< 3; i
++) {
1486 __asm__
__volatile__ ("lfiwzx %0, %1, %2" : "=d" (reg_out
): "b" (r14
),"r" (r15
));
1487 src
= ((unsigned int *)(r14
+ r15
));
1488 printf("lfiwzx: %u => %llu.00\n", *src
, (unsigned long long)reg_out
);
1494 static void test_vx_fp_ops(void)
1499 char * test_name
= (char *)malloc(20);
1502 build_special_fargs_table();
1503 while ((func
= vx_fp_tests
[k
].test_func
)) {
1504 int i
, condreg
, repeat
= 0;
1506 unsigned long long * frap
, * frbp
, * dst
;
1507 vx_fp_test_t test_group
= vx_fp_tests
[k
];
1508 vx_fp_test_type test_type
= test_group
.test_type
;
1510 switch (test_type
) {
1512 strcpy(test_name
, "xscmp");
1515 strcat(test_name
, "udp");
1522 if (test_type
== VX_FP_SMA
)
1523 strcpy(test_name
, "xsmadd");
1524 else if (test_type
== VX_FP_SMS
)
1525 strcpy(test_name
, "xsmsub");
1527 strcpy(test_name
, "xsnmadd");
1530 strcat(test_name
, "adp");
1535 strcpy(test_name
, test_group
.name
);
1538 printf("ERROR: Invalid VX FP test type %d\n", test_type
);
1543 for (i
= 0; i
< test_group
.num_tests
; i
++) {
1544 unsigned int * inA
, * inB
, * pv
;
1545 double * dpA
= (double *)&vec_inA
;
1546 double * dpB
= (double *)&vec_inB
;
1547 double * dpT
= (double *)&vec_out
;
1549 fp_test_args_t aTest
= test_group
.targs
[i
];
1550 inA
= (unsigned int *)&spec_fargs
[aTest
.fra_idx
];
1551 inB
= (unsigned int *)&spec_fargs
[aTest
.frb_idx
];
1552 frap
= (unsigned long long *)&spec_fargs
[aTest
.fra_idx
];
1553 frbp
= (unsigned long long *)&spec_fargs
[aTest
.frb_idx
];
1554 // Only need to copy one doubleword into each vector's element 0
1556 // With LE, vector element 0 is the second doubleword from the left
1562 memcpy(dpA
, inA
, 8);
1563 memcpy(dpB
, inB
, 8);
1565 switch (test_type
) {
1571 condreg
= (flags
& 0x0f000000) >> 24;
1572 printf("#%d: %s %016llx <=> %016llx ? %x (CRx)\n", i
, test_name
, *frap
, *frbp
, condreg
);
1573 // printf("\tFRA: %e; FRB: %e\n", spec_fargs[aTest.fra_idx], spec_fargs[aTest.frb_idx]);
1574 if ( condreg
!= aTest
.cr_flags
) {
1575 printf("Error: Expected CR flags 0x%x; actual flags: 0x%x\n", aTest
.cr_flags
, condreg
);
1584 unsigned long long vsr_XT
;
1585 pv
= (unsigned int *)&vec_out
;
1587 for (idx
= 0; idx
< 4; idx
++, pv
++)
1590 if (test_type
!= VX_FP_OTHER
) {
1591 /* Then we need a third src argument, which is stored in element 0 of
1592 * VSX[XT] -- i.e., vec_out. For the xs<ZZZ>mdp cases, VSX[XT] holds
1593 * src3 and VSX[XB] holds src2; for the xs<ZZZ>adp cases, VSX[XT] holds
1594 * src2 and VSX[XB] holds src3. The fp_test_args_t that holds the test
1595 * data (input args) contain only two inputs, so I arbitrarily
1596 * use spec_fargs elements 4 and 14 (alternating) for the third source
1597 * argument. We can use the same input data for a given pair of
1598 * adp/mdp-type instructions by swapping the src2 and src3 arguments; thus
1599 * the expected result should be the same.
1608 /* We're on the first time through of one of the VX_FP_SMx
1609 * test types, meaning we're testing a xs<ZZZ>adp case, thus we
1610 * have to swap inputs as described above:
1616 memcpy(dpT
, inB
, 8); // src2
1617 memcpy(dpB
, &spec_fargs
[extra_arg_idx
], 8); //src3
1618 frbp
= (unsigned long long *)&spec_fargs
[extra_arg_idx
];
1620 // Don't need to init src2, as it's done before the switch()
1623 memcpy(dpT
, &spec_fargs
[extra_arg_idx
], 8); //src3
1625 memcpy(&vsr_XT
, dpT
, 8);
1629 dst
= (unsigned long long *) &vec_out
;
1632 if (test_type
== VX_FP_OTHER
)
1633 printf("#%d: %s %016llx %016llx = %016llx\n", i
, test_name
, *frap
, *frbp
, *dst
);
1635 printf( "#%d: %s %016llx %016llx %016llx = %016llx\n", i
,
1636 test_name
, vsr_XT
, *frap
, *frbp
, *dst
);
1640 // Debug code. Keep this block commented out except when debugging.
1641 double result, expected;
1642 memcpy(&result, dst, 8);
1643 memcpy(&expected, &aTest.dp_bin_result, 8);
1644 printf( "\tFRA + FRB: %e + %e: Expected = %e; Actual = %e\n",
1645 spec_fargs[aTest.fra_idx], spec_fargs[aTest.frb_idx],
1659 switch (test_type
) {
1661 strcpy(test_name
, "xscmp");
1662 strcat(test_name
, "odp");
1668 if (test_type
== VX_FP_SMA
)
1669 strcpy(test_name
, "xsmadd");
1670 else if (test_type
== VX_FP_SMS
)
1671 strcpy(test_name
, "xsmsub");
1673 strcpy(test_name
, "xsnmadd");
1674 strcat(test_name
, "mdp");
1688 static void test_xs_conv_ops(void)
1693 double * dpB
= (double *)&vec_inB
;
1699 build_special_fargs_table();
1700 while ((func
= xs_conv_tests
[k
].test_func
)) {
1702 unsigned long long * frbp
, * dst
;
1703 xs_conv_test_t test_group
= xs_conv_tests
[k
];
1704 for (i
= 0; i
< test_group
.num_tests
; i
++) {
1705 unsigned int * inB
, * pv
;
1707 inB
= (unsigned int *)&spec_fargs
[i
];
1708 frbp
= (unsigned long long *)&spec_fargs
[i
];
1710 memcpy(dpB
, inB
, 8);
1711 pv
= (unsigned int *)&vec_out
;
1713 for (idx
= 0; idx
< 4; idx
++, pv
++)
1716 dst
= (unsigned long long *) &vec_out
;
1719 printf("#%d: %s %016llx => %016llx\n", i
, test_group
.name
, *frbp
, *dst
);
1728 static void do_load_test(ldst_test_t loadTest
)
1731 unsigned int *src
, *dst
;
1732 int splat
= loadTest
.type
== VSX_LOAD_SPLAT
? 1: 0;
1736 func
= loadTest
.test_func
;
1737 for (i
= 0, r14
= (HWord_t
) loadTest
.base_addr
; i
< NUM_VIARGS_VECS
; i
++) {
1742 unsigned int * pv
= (unsigned int *)&vec_out
;
1745 for (idx
= 0; idx
< 4; idx
++, pv
+=idx
)
1751 // execute test insn
1754 src
= (unsigned int*) (((unsigned char *)r14
) + j
);
1755 dst
= (unsigned int*) &vec_out
;
1757 printf( "%s:", loadTest
.name
);
1758 for (m
= 0; m
< loadTest
.num_words_to_process
; m
++) {
1759 printf( " %08x", src
[splat
? m
% 2 : m
]);
1763 k
= loadTest
.num_words_to_process
;
1765 if (loadTest
.num_words_to_process
== 2) {
1771 for (; m
< k
; m
++) {
1772 printf( " %08x", dst
[m
]);
1775 if (j
== 0 && loadTest
.offset
) {
1777 j
+= loadTest
.offset
;
1785 do_store_test ( ldst_test_t storeTest
)
1788 unsigned int *src
, *dst
;
1791 func
= storeTest
.test_func
;
1792 r14
= (HWord_t
) storeTest
.base_addr
;
1793 r15
= (HWord_t
) storeTest
.offset
;
1794 unsigned int * pv
= (unsigned int *) storeTest
.base_addr
;
1796 // clear out storage destination
1797 for (idx
= 0; idx
< 4; idx
++, pv
+= idx
)
1800 memcpy(&vec_inA
, &viargs
[0], sizeof(vector
unsigned char));
1802 // execute test insn
1805 dst
= (unsigned int*) (((unsigned char *) r14
) + storeTest
.offset
);
1807 printf( "%s:", storeTest
.name
);
1808 for (m
= 0; m
< storeTest
.num_words_to_process
; m
++) {
1809 printf( " %08x", src
[m
] );
1812 for (m
= 0; m
< storeTest
.num_words_to_process
; m
++) {
1813 printf( " %08x", dst
[m
] );
1819 static void test_ldst(void)
1823 while (ldst_tests
[k
].test_func
) {
1824 if (ldst_tests
[k
].type
== VSX_STORE
)
1825 do_store_test(ldst_tests
[k
]);
1827 do_load_test(ldst_tests
[k
]);
1833 static void test_ftdiv(void)
1835 int i
, num_tests
, crx
;
1837 unsigned long long * frap
, * frbp
;
1838 build_special_fargs_table();
1840 num_tests
= sizeof ftdiv_tests
/sizeof ftdiv_tests
[0];
1842 for (i
= 0; i
< num_tests
; i
++) {
1843 fp_test_args_t aTest
= ftdiv_tests
[i
];
1844 f14
= spec_fargs
[aTest
.fra_idx
];
1845 f15
= spec_fargs
[aTest
.frb_idx
];
1846 frap
= (unsigned long long *)&spec_fargs
[aTest
.fra_idx
];
1847 frbp
= (unsigned long long *)&spec_fargs
[aTest
.frb_idx
];
1850 __asm__
__volatile__ ("ftdiv cr1, %0, %1" : : "d" (f14
), "d" (f15
));
1852 crx
= (flags
& 0x0f000000) >> 24;
1853 printf( "ftdiv: %016llx <=> %016llx ? %x (CRx)\n", *frap
, *frbp
, crx
);
1854 // printf("\tFRA: %e; FRB: %e\n", f14, f15);
1855 if ( crx
!= aTest
.cr_flags
) {
1856 printf("Error: Expected CR flags 0x%x; actual flags: 0x%x\n", aTest
.cr_flags
, crx
);
1863 static void test_p7_fpops ( void )
1868 build_fargs_table();
1869 while ((func
= fp_tests
[k
].test_func
)) {
1872 unsigned long long u0
;
1874 // fcfids - 64-bit fp converted to inf precise fp integer, rounded to SP. (32)
1875 // fcfidus - 64-bit fp converted to inf precise fp integer, rounded to SP. (32)
1876 // fcfidu - 64-bit fp converted to inf precise fp integer, rounded to DP. (64)
1878 (strcmp(fp_tests
[k
].name
, "fcfids")==0) ||
1879 (strcmp(fp_tests
[k
].name
, "fcfidus")==0) );
1881 for (i
= 0; i
< nb_fargs
; i
++) {
1882 u0
= *(unsigned long long *) (&fargs
[i
]);
1887 printf( "%s %016llx => (raw sp) %08x)",
1888 fp_tests
[k
].name
, u0
, *((unsigned int *)&res
));
1891 printf( "%s %016llx => (raw sp) %016llx)",
1892 fp_tests
[k
].name
, u0
, *(unsigned long long *)(&resd
));
1902 static void test_vsx_logic(void)
1909 while ((func
= logic_tests
[k
].test_func
)) {
1912 unsigned int * inA
, * inB
, * dst
;
1915 aTest
= logic_tests
[k
];
1916 for (i
= 0; i
<= (NUM_VIARGS_INTS
- (NUM_VIARGS_VECS
* sizeof(int))); i
++, startA
++) {
1917 startB
= startA
+ 4;
1918 pv
= (unsigned int *)&vec_out
;
1919 inA
= &viargs
[startA
];
1920 inB
= &viargs
[startB
];
1921 memcpy(&vec_inA
, inA
, sizeof(vector
unsigned char));
1922 memcpy(&vec_inB
, inB
, sizeof(vector
unsigned char));
1924 for (idx
= 0; idx
< 4; idx
++, pv
++)
1927 // execute test insn
1929 dst
= (unsigned int*) &vec_out
;
1931 printf( "%s:", aTest
.name
);
1932 printf( " %08x %08x %08x %08x %s", inA
[0], inA
[1], inA
[2], inA
[3], aTest
.name
);
1933 printf( " %08x %08x %08x %08x", inB
[0], inB
[1], inB
[2], inB
[3]);
1934 printf(" => %08x %08x %08x %08x\n", dst
[0], dst
[1], dst
[2], dst
[3]);
1942 static vector
unsigned long long vec_args
[] __attribute__ ((aligned (16))) =
1944 { 0x0123456789abcdefULL
, 0x0011223344556677ULL
},
1945 { 0x8899aabb19293942ULL
, 0xa1a2a3a4b1b2b3b4ULL
},
1946 { 0xc1c2c3c4d1d2d3d4ULL
, 0x7a6b5d3efc032778ULL
}
1948 #define NUM_VEC_ARGS_LONGS (sizeof vec_args/sizeof vec_args[0])
1950 static void test_move_ops (void)
1957 while ((func
= move_tests
[k
].test_func
)) {
1960 unsigned long long * inA
, * inB
, * dst
;
1961 int use_vecA
= (strcmp(move_tests
[k
].name
, "xscpsgndp") == 0);
1964 aTest
= move_tests
[k
];
1965 for (startB
= 0; startB
< NUM_VEC_ARGS_LONGS
; startB
++) {
1966 inB
= (unsigned long long *)&vec_args
[startB
];
1967 memcpy(&vec_inB
, inB
, sizeof(vector
unsigned char));
1973 inA
= (unsigned long long *)&vec_args
[startA
];
1974 memcpy(&vec_inA
, inA
, sizeof(vector
unsigned char));
1977 pv
= (unsigned int *)&vec_out
;
1979 for (idx
= 0; idx
< 4; idx
++, pv
++)
1982 // execute test insn
1984 dst
= (unsigned long long *) &vec_out
;
1990 printf( "%s:", aTest
.name
);
1992 printf( " X[A]: %016llx ", *inA
);
1993 printf( " X[B]: %016llx", *inB
);
1994 printf(" => %016llx\n", *dst
);
1996 if (use_vecA
&& startA
< NUM_VEC_ARGS_LONGS
)
2004 static void test_permute_ops (void)
2006 permute_test_t
*aTest
;
2007 unsigned int *dst
= (unsigned int *) &vec_out
;
2009 for (aTest
= &(permute_tests
[0]); aTest
->test_func
!= NULL
; aTest
++)
2011 /* Grab test input and clear output vector. */
2012 memcpy(&vec_inA
, aTest
->xa
, sizeof(vec_inA
));
2013 memcpy(&vec_inB
, aTest
->xb
, sizeof(vec_inB
));
2014 memset(dst
, 0, sizeof(vec_out
));
2016 /* execute test insn */
2019 printf( "%s:\n", aTest
->name
);
2020 printf( " XA[%08x,%08x,%08x,%08x]\n",
2021 aTest
->xa
[0], aTest
->xa
[1], aTest
->xa
[2], aTest
->xa
[3]);
2022 printf( " XB[%08x,%08x,%08x,%08x]\n",
2023 aTest
->xb
[0], aTest
->xb
[1], aTest
->xb
[2], aTest
->xb
[3]);
2024 printf( " => XT[%08x,%08x,%08x,%08x]\n",
2025 dst
[0], dst
[1], dst
[2], dst
[3]);
2031 static test_table_t all_tests
[] = { { &test_ldst
,
2032 "Test VSX load/store instructions" },
2034 "Test VSX logic instructions" },
2035 #ifdef __powerpc64__
2037 "Test ldbrx instruction" },
2039 "Test popcntd instruction" },
2042 "Test lfiwzx instruction" },
2044 "Test P7 floating point convert instructions"},
2046 "Test ftdiv instruction" },
2048 "Test VSX move instructions"},
2049 { &test_permute_ops
,
2050 "Test VSX permute instructions"},
2052 "Test VSX floating point instructions"},
2053 { &test_xs_conv_ops
,
2054 "Test VSX scalar integer conversion instructions" },
2059 int main(int argc
, char *argv
[])
2067 while ((func
= all_tests
[i
].test_category
)) {
2068 aTest
= all_tests
[i
];
2069 printf( "%s\n", aTest
.name
);