drd/tests/swapcontext: Improve the portability of this test further
[valgrind.git] / none / tests / ppc32 / test_isa_2_06_part2.c
blob4733f8e2d9c71455a9fce5b9bf5d1e18f567a5c9
1 /* Copyright (C) 2011 IBM
3 Author: Maynard Johnson <maynardj@us.ibm.com>
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, see <http://www.gnu.org/licenses/>.
18 The GNU General Public License is contained in the file COPYING.
21 #include <stdio.h>
22 #include <stdint.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <malloc.h>
26 #include <math.h>
27 #include <unistd.h> // getopt
29 #ifdef HAS_VSX
31 #include <altivec.h>
33 #ifndef __powerpc64__
34 typedef uint32_t HWord_t;
35 #else
36 typedef uint64_t HWord_t;
37 #endif /* __powerpc64__ */
39 typedef unsigned char Bool;
40 #define True 1
41 #define False 0
43 #ifdef VGP_ppc64le_linux
44 #define isLE 1
45 #else
46 #define isLE 0
47 #endif
49 register HWord_t r14 __asm__ ("r14");
50 register HWord_t r15 __asm__ ("r15");
51 register HWord_t r16 __asm__ ("r16");
52 register HWord_t r17 __asm__ ("r17");
53 register double f14 __asm__ ("fr14");
54 register double f15 __asm__ ("fr15");
55 register double f16 __asm__ ("fr16");
56 register double f17 __asm__ ("fr17");
58 static volatile unsigned int div_flags, div_xer;
60 #define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"
62 #define SET_CR(_arg) \
63 __asm__ __volatile__ ("mtcr %0" : : "b"(_arg) : ALLCR );
65 #define SET_XER(_arg) \
66 __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );
68 #define GET_CR(_lval) \
69 __asm__ __volatile__ ("mfcr %0" : "=b"(_lval) )
71 #define GET_XER(_lval) \
72 __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )
74 #define GET_CR_XER(_lval_cr,_lval_xer) \
75 do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)
77 #define SET_CR_ZERO \
78 SET_CR(0)
80 #define SET_XER_ZERO \
81 SET_XER(0)
83 #define SET_CR_XER_ZERO \
84 do { SET_CR_ZERO; SET_XER_ZERO; } while (0)
86 #define SET_FPSCR_ZERO \
87 do { double _d = 0.0; \
88 __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
89 } while (0)
92 typedef void (*test_func_t)(void);
93 typedef struct test_table test_table_t;
95 /* Defines for the instructiion groups, use bit field to identify */
96 #define SCALAR_DIV_INST 0x0001
97 #define OTHER_INST 0x0002
99 /* These functions below that construct a table of floating point
100 * values were lifted from none/tests/ppc32/jm-insns.c.
103 #if defined (DEBUG_ARGS_BUILD)
104 #define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
105 #else
106 #define AB_DPRINTF(fmt, args...) do { } while (0)
107 #endif
109 static inline void register_farg (void *farg,
110 int s, uint16_t _exp, uint64_t mant)
112 uint64_t tmp;
114 tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant;
115 *(uint64_t *)farg = tmp;
116 AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
117 s, _exp, mant, *(uint64_t *)farg, *(double *)farg);
120 static inline void register_sp_farg (void *farg,
121 int s, uint16_t _exp, uint32_t mant)
123 uint32_t tmp;
124 tmp = ((uint32_t)s << 31) | ((uint32_t)_exp << 23) | mant;
125 *(uint32_t *)farg = tmp;
128 typedef struct fp_test_args {
129 int fra_idx;
130 int frb_idx;
131 } fp_test_args_t;
134 fp_test_args_t fp_cmp_tests[] = {
135 {8, 8},
136 {8, 14},
137 {8, 6},
138 {8, 5},
139 {8, 4},
140 {8, 7},
141 {8, 9},
142 {8, 11},
143 {14, 8},
144 {14, 14},
145 {14, 6},
146 {14, 5},
147 {14, 4},
148 {14, 7},
149 {14, 9},
150 {14, 11},
151 {6, 8},
152 {6, 14},
153 {6, 6},
154 {6, 5},
155 {6, 4},
156 {6, 7},
157 {6, 9},
158 {6, 11},
159 {5, 8},
160 {5, 14},
161 {5, 6},
162 {5, 5},
163 {5, 4},
164 {5, 7},
165 {5, 9},
166 {5, 11},
167 {4, 8},
168 {4, 14},
169 {4, 6},
170 {4, 5},
171 {4, 1},
172 {4, 7},
173 {4, 9},
174 {4, 11},
175 {7, 8},
176 {7, 14},
177 {7, 6},
178 {7, 5},
179 {7, 4},
180 {7, 7},
181 {7, 9},
182 {7, 11},
183 {10, 8},
184 {10, 14},
185 {10, 6},
186 {10, 5},
187 {10, 4},
188 {10, 7},
189 {10, 9},
190 {10, 10},
191 {12, 8},
192 {12, 14},
193 {12, 6},
194 {12, 5},
195 {1, 1},
196 {2, 2},
197 {3, 3},
198 {4, 4},
202 fp_test_args_t two_arg_fp_tests[] = {
203 {8, 8},
204 {8, 14},
205 {15, 16},
206 {8, 5},
207 {8, 4},
208 {8, 7},
209 {8, 9},
210 {8, 11},
211 {14, 8},
212 {14, 14},
213 {14, 6},
214 {14, 5},
215 {14, 4},
216 {14, 7},
217 {14, 9},
218 {14, 11},
219 {6, 8},
220 {6, 14},
221 {6, 6},
222 {6, 5},
223 {6, 4},
224 {6, 7},
225 {6, 9},
226 {6, 11},
227 {5, 8},
228 {5, 14},
229 {5, 6},
230 {5, 5},
231 {5, 4},
232 {5, 7},
233 {5, 9},
234 {5, 11},
235 {4, 8},
236 {4, 14},
237 {4, 6},
238 {4, 5},
239 {4, 1},
240 {4, 7},
241 {4, 9},
242 {4, 11},
243 {7, 8},
244 {7, 14},
245 {7, 6},
246 {7, 5},
247 {7, 4},
248 {7, 7},
249 {7, 9},
250 {7, 11},
251 {10, 8},
252 {10, 14},
253 {12, 6},
254 {12, 5},
255 {10, 4},
256 {10, 7},
257 {10, 9},
258 {10, 11},
259 {12, 8 },
260 {12, 14},
261 {12, 6},
262 {15, 16},
263 {15, 16},
264 {9, 11},
265 {11, 11},
266 {11, 12}
270 static int nb_special_fargs;
271 static double * spec_fargs;
272 static float * spec_sp_fargs;
274 static void build_special_fargs_table(void)
277 Entry Sign Exp fraction Special value
278 0 0 3fd 0x8000000000000ULL Positive finite number
279 1 0 404 0xf000000000000ULL ...
280 2 0 001 0x8000000b77501ULL ...
281 3 0 7fe 0x800000000051bULL ...
282 4 0 012 0x3214569900000ULL ...
283 5 0 000 0x0000000000000ULL +0.0 (+zero)
284 6 1 000 0x0000000000000ULL -0.0 (-zero)
285 7 0 7ff 0x0000000000000ULL +infinity
286 8 1 7ff 0x0000000000000ULL -infinity
287 9 0 7ff 0x7FFFFFFFFFFFFULL +SNaN
288 10 1 7ff 0x7FFFFFFFFFFFFULL -SNaN
289 11 0 7ff 0x8000000000000ULL +QNaN
290 12 1 7ff 0x8000000000000ULL -QNaN
291 13 1 000 0x8340000078000ULL Denormalized val (zero exp and non-zero fraction)
292 14 1 40d 0x0650f5a07b353ULL Negative finite number
293 15 0 412 0x32585a9900000ULL A couple more positive finite numbers
294 16 0 413 0x82511a2000000ULL ...
297 uint64_t mant;
298 uint32_t mant_sp;
299 uint16_t _exp;
300 int s;
301 int j, i = 0;
303 if (spec_fargs)
304 return;
306 spec_fargs = malloc( 17 * sizeof(double) );
307 spec_sp_fargs = malloc( 17 * sizeof(float) );
309 // #0
310 s = 0;
311 _exp = 0x3fd;
312 mant = 0x8000000000000ULL;
313 register_farg(&spec_fargs[i++], s, _exp, mant);
315 // #1
316 s = 0;
317 _exp = 0x404;
318 mant = 0xf000000000000ULL;
319 register_farg(&spec_fargs[i++], s, _exp, mant);
321 /* None of the ftdiv tests succeed.
322 * FRA = value #0; FRB = value #1
323 * ea_ = -2; e_b = 5
324 * fl_flag || fg_flag || fe_flag = 100
327 /*************************************************
328 * fe_flag tests
330 *************************************************/
332 /* fe_flag <- 1 if FRA is a NaN
333 * FRA = value #9; FRB = value #1
334 * e_a = 1024; e_b = 5
335 * fl_flag || fg_flag || fe_flag = 101
338 /* fe_flag <- 1 if FRB is a NaN
339 * FRA = value #1; FRB = value #12
340 * e_a = 5; e_b = 1024
341 * fl_flag || fg_flag || fe_flag = 101
344 /* fe_flag <- 1 if e_b <= -1022
345 * FRA = value #0; FRB = value #2
346 * e_a = -2; e_b = -1022
347 * fl_flag || fg_flag || fe_flag = 101
350 // #2
351 s = 0;
352 _exp = 0x001;
353 mant = 0x8000000b77501ULL;
354 register_farg(&spec_fargs[i++], s, _exp, mant);
356 /* fe_flag <- 1 if e_b >= 1021
357 * FRA = value #1; FRB = value #3
358 * e_a = 5; e_b = 1023
359 * fl_flag || fg_flag || fe_flag = 101
361 // #3
362 s = 0;
363 _exp = 0x7fe;
364 mant = 0x800000000051bULL;
365 register_farg(&spec_fargs[i++], s, _exp, mant);
367 /* fe_flag <- 1 if FRA != 0 && e_a - e_b >= 1023
368 * Let FRA = value #3 and FRB be value #0.
369 * e_a = 1023; e_b = -2
370 * fl_flag || fg_flag || fe_flag = 101
373 /* fe_flag <- 1 if FRA != 0 && e_a - e_b <= -1023
374 * Let FRA = value #0 above and FRB be value #3 above
375 * e_a = -2; e_b = 1023
376 * fl_flag || fg_flag || fe_flag = 101
379 /* fe_flag <- 1 if FRA != 0 && e_a <= -970
380 * Let FRA = value #4 and FRB be value #0
381 * e_a = -1005; e_b = -2
382 * fl_flag || fg_flag || fe_flag = 101
384 // #4
385 s = 0;
386 _exp = 0x012;
387 mant = 0x3214569900000ULL;
388 register_farg(&spec_fargs[i++], s, _exp, mant);
390 /*************************************************
391 * fg_flag tests
393 *************************************************/
394 /* fg_flag <- 1 if FRA is an Infinity
395 * NOTE: FRA = Inf also sets fe_flag
396 * Do two tests, using values #7 and #8 (+/- Inf) for FRA.
397 * Test 1:
398 * Let FRA be value #7 and FRB be value #1
399 * e_a = 1024; e_b = 5
400 * fl_flag || fg_flag || fe_flag = 111
402 * Test 2:
403 * Let FRA be value #8 and FRB be value #1
404 * e_a = 1024; e_b = 5
405 * fl_flag || fg_flag || fe_flag = 111
409 /* fg_flag <- 1 if FRB is an Infinity
410 * NOTE: FRB = Inf also sets fe_flag
411 * Let FRA be value #1 and FRB be value #7
412 * e_a = 5; e_b = 1024
413 * fl_flag || fg_flag || fe_flag = 111
416 /* fg_flag <- 1 if FRB is denormalized
417 * NOTE: e_b < -1022 ==> fe_flag <- 1
418 * Let FRA be value #0 and FRB be value #13
419 * e_a = -2; e_b = -1023
420 * fl_flag || fg_flag || fe_flag = 111
423 /* fg_flag <- 1 if FRB is +zero
424 * NOTE: FRA = Inf also sets fe_flag
425 * Let FRA = val #5; FRB = val #5
426 * ea_ = -1023; e_b = -1023
427 * fl_flag || fg_flag || fe_flag = 111
430 /* fg_flag <- 1 if FRB is -zero
431 * NOTE: FRA = Inf also sets fe_flag
432 * Let FRA = val #5; FRB = val #6
433 * ea_ = -1023; e_b = -1023
434 * fl_flag || fg_flag || fe_flag = 111
437 /* Special values */
438 /* +0.0 : 0 0x000 0x0000000000000 */
439 // #5
440 s = 0;
441 _exp = 0x000;
442 mant = 0x0000000000000ULL;
443 register_farg(&spec_fargs[i++], s, _exp, mant);
445 /* -0.0 : 1 0x000 0x0000000000000 */
446 // #6
447 s = 1;
448 _exp = 0x000;
449 mant = 0x0000000000000ULL;
450 register_farg(&spec_fargs[i++], s, _exp, mant);
452 /* +infinity : 0 0x7FF 0x0000000000000 */
453 // #7
454 s = 0;
455 _exp = 0x7FF;
456 mant = 0x0000000000000ULL;
457 register_farg(&spec_fargs[i++], s, _exp, mant);
459 /* -infinity : 1 0x7FF 0x0000000000000 */
460 // #8
461 s = 1;
462 _exp = 0x7FF;
463 mant = 0x0000000000000ULL;
464 register_farg(&spec_fargs[i++], s, _exp, mant);
467 * This comment applies to values #9 and #10 below:
468 * When src is a SNaN, it's converted to a QNaN first before rounding to single-precision,
469 * so we can't just copy the double-precision value to the corresponding slot in the
470 * single-precision array (i.e., in the loop at the end of this function). Instead, we
471 * have to manually set the bits using register_sp_farg().
474 /* +SNaN : 0 0x7FF 0x7FFFFFFFFFFFF */
475 // #9
476 s = 0;
477 _exp = 0x7FF;
478 mant = 0x7FFFFFFFFFFFFULL;
479 register_farg(&spec_fargs[i++], s, _exp, mant);
480 _exp = 0xff;
481 mant_sp = 0x3FFFFF;
482 register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
484 /* -SNaN : 1 0x7FF 0x7FFFFFFFFFFFF */
485 // #10
486 s = 1;
487 _exp = 0x7FF;
488 mant = 0x7FFFFFFFFFFFFULL;
489 register_farg(&spec_fargs[i++], s, _exp, mant);
490 _exp = 0xff;
491 mant_sp = 0x3FFFFF;
492 register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
494 /* +QNaN : 0 0x7FF 0x8000000000000 */
495 // #11
496 s = 0;
497 _exp = 0x7FF;
498 mant = 0x8000000000000ULL;
499 register_farg(&spec_fargs[i++], s, _exp, mant);
501 /* -QNaN : 1 0x7FF 0x8000000000000 */
502 // #12
503 s = 1;
504 _exp = 0x7FF;
505 mant = 0x8000000000000ULL;
506 register_farg(&spec_fargs[i++], s, _exp, mant);
508 /* denormalized value */
509 // #13
510 s = 1;
511 _exp = 0x000;
512 mant = 0x8340000078000ULL;
513 register_farg(&spec_fargs[i++], s, _exp, mant);
515 /* Negative finite number */
516 // #14
517 s = 1;
518 _exp = 0x40d;
519 mant = 0x0650f5a07b353ULL;
520 register_farg(&spec_fargs[i++], s, _exp, mant);
522 /* A couple positive finite numbers ... */
523 // #15
524 s = 0;
525 _exp = 0x412;
526 mant = 0x32585a9900000ULL;
527 register_farg(&spec_fargs[i++], s, _exp, mant);
529 // #16
530 s = 0;
531 _exp = 0x413;
532 mant = 0x82511a2000000ULL;
533 register_farg(&spec_fargs[i++], s, _exp, mant);
535 nb_special_fargs = i;
536 for (j = 0; j < i; j++) {
537 if (!(j == 9 || j == 10))
538 spec_sp_fargs[j] = spec_fargs[j];
543 struct test_table
545 test_func_t test_category;
546 char * name;
547 unsigned int test_group;
550 typedef enum {
551 SINGLE_TEST,
552 DOUBLE_TEST
553 } precision_type_t;
555 typedef enum {
556 VX_SCALAR_FP_NMSUB = 0,
557 // ALL VECTOR-TYPE OPS SHOULD BE ADDED AFTER THIS LINE
558 VX_VECTOR_FP_MULT_AND_OP2 = 10,
559 // and before this line
560 VX_BASIC_CMP = 30,
561 VX_CONV_WORD,
562 VX_DEFAULT
563 } vx_fp_test_type;
565 typedef struct vx_fp_test
567 test_func_t test_func;
568 const char * name;
569 fp_test_args_t * targs;
570 int num_tests;
571 precision_type_t precision;
572 vx_fp_test_type type;
573 const char * op;
574 } vx_fp_test_t;
576 static vector unsigned int vec_out, vec_inA, vec_inB, vec_inC;
578 static Bool do_dot;
579 static void test_xvcmpeqdp(void)
581 if (do_dot)
582 __asm__ __volatile__ ("xvcmpeqdp. %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
583 else
584 __asm__ __volatile__ ("xvcmpeqdp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
587 static void test_xvcmpgedp(void)
589 if (do_dot)
590 __asm__ __volatile__ ("xvcmpgedp. %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
591 else
592 __asm__ __volatile__ ("xvcmpgedp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
595 static void test_xvcmpgtdp(void)
597 if (do_dot)
598 __asm__ __volatile__ ("xvcmpgtdp. %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
599 else
600 __asm__ __volatile__ ("xvcmpgtdp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
603 static void test_xvcmpeqsp(void)
605 if (do_dot)
606 __asm__ __volatile__ ("xvcmpeqsp. %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
607 else
608 __asm__ __volatile__ ("xvcmpeqsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
611 static void test_xvcmpgesp(void)
613 if (do_dot)
614 __asm__ __volatile__ ("xvcmpgesp. %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
615 else
616 __asm__ __volatile__ ("xvcmpgesp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
619 static void test_xvcmpgtsp(void)
621 if (do_dot)
622 __asm__ __volatile__ ("xvcmpgtsp. %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
623 else
624 __asm__ __volatile__ ("xvcmpgtsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
627 static Bool do_aXp;
628 static Bool do_dp;
629 static void test_xsnmsub(void)
631 if (do_aXp)
632 __asm__ __volatile__ ("xsnmsubadp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
633 else
634 __asm__ __volatile__ ("xsnmsubmdp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
637 static void test_xvmadd(void)
639 if (do_aXp)
640 if (do_dp)
641 __asm__ __volatile__ ("xvmaddadp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
642 else
643 __asm__ __volatile__ ("xvmaddasp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
644 else
645 if (do_dp)
646 __asm__ __volatile__ ("xvmaddmdp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
647 else
648 __asm__ __volatile__ ("xvmaddmsp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
651 static void test_xvnmadd(void)
653 if (do_aXp)
654 if (do_dp)
655 __asm__ __volatile__ ("xvnmaddadp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
656 else
657 __asm__ __volatile__ ("xvnmaddasp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
658 else
659 if (do_dp)
660 __asm__ __volatile__ ("xvnmaddmdp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
661 else
662 __asm__ __volatile__ ("xvnmaddmsp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
665 static void test_xvnmsub(void)
667 if (do_aXp)
668 if (do_dp)
669 __asm__ __volatile__ ("xvnmsubadp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
670 else
671 __asm__ __volatile__ ("xvnmsubasp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
672 else
673 if (do_dp)
674 __asm__ __volatile__ ("xvnmsubmdp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
675 else
676 __asm__ __volatile__ ("xvnmsubmsp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
679 static void test_xvmsub(void)
681 if (do_aXp)
682 if (do_dp)
683 __asm__ __volatile__ ("xvmsubadp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
684 else
685 __asm__ __volatile__ ("xvmsubasp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
686 else
687 if (do_dp)
688 __asm__ __volatile__ ("xvmsubmdp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
689 else
690 __asm__ __volatile__ ("xvmsubmsp %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
693 static void test_xssqrtdp(void)
695 __asm__ __volatile__ ("xssqrtdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
698 static void test_xsrdpim(void)
700 __asm__ __volatile__ ("xsrdpim %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
703 static void test_xsrdpip(void)
705 __asm__ __volatile__ ("xsrdpip %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
708 static void test_xstdivdp(void)
710 __asm__ __volatile__ ("xstdivdp 6, %x0, %x1" : : "wa" (vec_inA), "wa" (vec_inB));
713 static void test_xsmaxdp(void)
715 __asm__ __volatile__ ("xsmaxdp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
718 static void test_xsmindp(void)
720 __asm__ __volatile__ ("xsmindp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
723 static void test_xvadddp(void)
725 __asm__ __volatile__ ("xvadddp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
728 static void test_xvaddsp(void)
730 __asm__ __volatile__ ("xvaddsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
733 static void test_xvdivdp(void)
735 __asm__ __volatile__ ("xvdivdp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
738 static void test_xvdivsp(void)
740 __asm__ __volatile__ ("xvdivsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
743 static void test_xvmuldp(void)
745 __asm__ __volatile__ ("xvmuldp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
748 static void test_xvmulsp(void)
750 __asm__ __volatile__ ("xvmulsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
753 static void test_xvsubdp(void)
755 __asm__ __volatile__ ("xvsubdp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
758 static void test_xvmaxdp(void)
760 __asm__ __volatile__ ("xvmaxdp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
763 static void test_xvmindp(void)
765 __asm__ __volatile__ ("xvmindp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
768 static void test_xvmaxsp(void)
770 __asm__ __volatile__ ("xvmaxsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
773 static void test_xvminsp(void)
775 __asm__ __volatile__ ("xvminsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
778 static void test_xvsubsp(void)
780 __asm__ __volatile__ ("xvsubsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
783 static void test_xvresp(void)
785 __asm__ __volatile__ ("xvresp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
788 static void test_xxsel(void)
790 unsigned long long * dst;
791 unsigned long long xa[] = { 0xa12bc37de56f9708ULL, 0x3894c1fddeadbeefULL};
792 unsigned long long xb[] = { 0xfedc432124681235ULL, 0xf1e2d3c4e0057708ULL};
793 unsigned long long xc[] = { 0xffffffff01020304ULL, 0x128934bd00000000ULL};
795 memcpy(&vec_inA, xa, 16);
796 memcpy(&vec_inB, xb, 16);
797 memcpy(&vec_inC, xc, 16);
800 __asm__ __volatile__ ("xxsel %x0, %x1, %x2, %x3" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB),"wa" (vec_inC));
801 dst = (unsigned long long *) &vec_out;
802 printf("xxsel %016llx,%016llx,%016llx => %016llx\n", xa[0], xb[0], xc[0], *dst);
803 dst++;
804 printf("xxsel %016llx,%016llx,%016llx => %016llx\n", xa[1], xb[1], xc[1], *dst);
805 printf("\n");
808 static void test_xxspltw(void)
810 int uim;
811 unsigned long long * dst = NULL;
812 unsigned int xb[] = { 0xfedc4321, 0x24681235, 0xf1e2d3c4, 0xe0057708};
813 int i;
814 void * vecB_ptr = &vec_inB;
815 if (isLE) {
816 for (i = 3; i >=0; i--) {
817 memcpy(vecB_ptr, &xb[i], 4);
818 vecB_ptr+=4;
820 } else {
821 for (i = 0; i < 4; i++) {
822 memcpy(vecB_ptr, &xb[i], 4);
823 vecB_ptr+=4;
827 for (uim = 0; uim < 4; uim++) {
828 switch (uim) {
829 case 0:
830 __asm__ __volatile__ ("xxspltw %x0, %x1, 0" : "=wa" (vec_out): "wa" (vec_inB));
831 break;
832 case 1:
833 __asm__ __volatile__ ("xxspltw %x0, %x1, 1" : "=wa" (vec_out): "wa" (vec_inB));
834 break;
835 case 2:
836 __asm__ __volatile__ ("xxspltw %x0, %x1, 2" : "=wa" (vec_out): "wa" (vec_inB));
837 break;
838 case 3:
839 __asm__ __volatile__ ("xxspltw %x0, %x1, 3" : "=wa" (vec_out): "wa" (vec_inB));
840 break;
842 dst = (unsigned long long *) &vec_out;
843 printf("xxspltw 0x%08x%08x%08x%08x %d=> 0x%016llx", xb[0], xb[1],
844 xb[2], xb[3], uim, *dst);
845 dst++;
846 printf("%016llx\n", *dst);
848 printf("\n");
851 static void test_xscvdpsxws(void)
853 __asm__ __volatile__ ("xscvdpsxws %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
856 static void test_xscvdpuxds(void)
858 __asm__ __volatile__ ("xscvdpuxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
861 static void test_xvcpsgndp(void)
863 __asm__ __volatile__ ("xvcpsgndp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
866 static void test_xvcpsgnsp(void)
868 __asm__ __volatile__ ("xvcpsgnsp %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
871 static void test_xvcvdpsxws(void)
873 __asm__ __volatile__ ("xvcvdpsxws %x0, %x1 " : "=wa" (vec_out): "wa" (vec_inB));
876 static void test_xvcvspsxws(void)
878 __asm__ __volatile__ ("xvcvspsxws %x0, %x1 " : "=wa" (vec_out): "wa" (vec_inB));
881 static vx_fp_test_t
882 vx_vector_one_fp_arg_tests[] = {
883 { &test_xvresp, "xvresp", NULL, 16, SINGLE_TEST, VX_BASIC_CMP, "1/x"},
884 { &test_xvcvdpsxws, "xvcvdpsxws", NULL, 16, DOUBLE_TEST, VX_CONV_WORD, "conv"},
885 { &test_xvcvspsxws, "xvcvspsxws", NULL, 16, SINGLE_TEST, VX_CONV_WORD, "conv"},
886 { NULL, NULL, NULL, 0 , 0, 0, NULL}
889 static vx_fp_test_t
890 vx_vector_fp_tests[] = {
891 { &test_xvcmpeqdp, "xvcmpeqdp", fp_cmp_tests, 64, DOUBLE_TEST, VX_BASIC_CMP, "eq"},
892 { &test_xvcmpgedp, "xvcmpgedp", fp_cmp_tests, 64, DOUBLE_TEST, VX_BASIC_CMP, "ge"},
893 { &test_xvcmpgtdp, "xvcmpgtdp", fp_cmp_tests, 64, DOUBLE_TEST, VX_BASIC_CMP, "gt"},
894 { &test_xvcmpeqsp, "xvcmpeqsp", fp_cmp_tests, 64, SINGLE_TEST, VX_BASIC_CMP, "eq"},
895 { &test_xvcmpgesp, "xvcmpgesp", fp_cmp_tests, 64, SINGLE_TEST, VX_BASIC_CMP, "ge"},
896 { &test_xvcmpgtsp, "xvcmpgtsp", fp_cmp_tests, 64, SINGLE_TEST, VX_BASIC_CMP, "gt"},
897 { &test_xvadddp, "xvadddp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "+" },
898 { &test_xvaddsp, "xvaddsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "+" },
899 { &test_xvdivdp, "xvdivdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "/" },
900 { &test_xvdivsp, "xvdivsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "/" },
901 { &test_xvmuldp, "xvmuldp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "*" },
902 { &test_xvmulsp, "xvmulsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "*" },
903 { &test_xvsubdp, "xvsubdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "-" },
904 { &test_xvsubsp, "xvsubsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "-" },
905 { &test_xvmaxdp, "xvmaxdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "@max@" },
906 { &test_xvmindp, "xvmindp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "@min@" },
907 { &test_xvmaxsp, "xvmaxsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "@max@" },
908 { &test_xvminsp, "xvminsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "@min@" },
909 { &test_xvcpsgndp, "xvcpsgndp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "+-cp"},
910 { &test_xvcpsgnsp, "xvcpsgnsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "+-cp"},
911 { NULL, NULL, NULL, 0 , 0, 0, NULL}
915 static vx_fp_test_t
916 vx_aORm_fp_tests[] = {
917 { &test_xsnmsub, "xsnmsub", two_arg_fp_tests, 64, DOUBLE_TEST, VX_SCALAR_FP_NMSUB, "!*-"},
918 { &test_xvmadd, "xvmadd", two_arg_fp_tests, 64, DOUBLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "*+"},
919 { &test_xvmadd, "xvmadd", two_arg_fp_tests, 64, SINGLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "*+"},
920 { &test_xvnmadd, "xvnmadd", two_arg_fp_tests, 64, DOUBLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "!*+"},
921 { &test_xvnmadd, "xvnmadd", two_arg_fp_tests, 64, SINGLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "!*+"},
922 { &test_xvmsub, "xvmsub", two_arg_fp_tests, 64, DOUBLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "*-"},
923 { &test_xvmsub, "xvmsub", two_arg_fp_tests, 64, SINGLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "*-"},
924 { &test_xvnmsub, "xvnmsub", two_arg_fp_tests, 64, DOUBLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "!*-"},
925 { &test_xvnmsub, "xvnmsub", two_arg_fp_tests, 64, SINGLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "!*-"},
926 { NULL, NULL, NULL, 0, 0, 0, NULL }
929 static vx_fp_test_t
930 vx_simple_scalar_fp_tests[] = {
931 { &test_xssqrtdp, "xssqrtdp", NULL, 17, DOUBLE_TEST, VX_DEFAULT, NULL},
932 { &test_xsrdpim, "xsrdpim", NULL, 17, DOUBLE_TEST, VX_DEFAULT, NULL},
933 { &test_xsrdpip, "xsrdpip", NULL, 17, DOUBLE_TEST, VX_DEFAULT, NULL},
934 { &test_xstdivdp, "xstdivdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, NULL},
935 { &test_xsmaxdp, "xsmaxdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, NULL},
936 { &test_xsmindp, "xsmindp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, NULL},
937 { &test_xscvdpsxws, "xscvdpsxws", NULL, 17, DOUBLE_TEST, VX_CONV_WORD, NULL},
938 { &test_xscvdpuxds, "xscvdpuxds", NULL, 17, DOUBLE_TEST, VX_DEFAULT, NULL},
939 { NULL, NULL, NULL, 0, 0, 0, NULL }
943 #ifdef __powerpc64__
944 static void test_bpermd(void)
946 /* NOTE: Bit number is '0 . . . 63'
948 * Permuted bits are generated bit 0 -7 as follows:
949 * index = (r14)8*i:8*i+7
950 * perm[i] = (r15)index
952 * So, for i = 0, index is (r14)8*0:8*0+7, or (r14)0:7, which is the MSB
953 * byte of r14, 0x1b(27/base 10). This identifies bit 27 of r15, which is '1'.
954 * For i = 1, index is 0x2c, identifying bit 44 of r15, which is '1'.
955 * So the result of the first two iterations of i are:
956 * perm = 0b01xxxxxx
959 r15 = 0xa12bc37de56f9708ULL;
960 r14 = 0x1b2c31f030000001ULL;
961 __asm__ __volatile__ ("bpermd %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
962 printf("bpermd: 0x%016llx : 0x%016llx => 0x%llx\n", (unsigned long long)r14,
963 (unsigned long long)r15, (unsigned long long)r17);
964 printf("\n");
966 #endif
968 static Bool do_OE;
969 typedef enum {
970 DIV_BASE = 1,
971 DIV_OE = 2,
972 DIV_DOT = 4,
973 } div_type_t;
974 /* Possible divde type combinations are:
975 * - base
976 * - base+dot
977 * - base+OE
978 * - base+OE+dot
980 #ifdef __powerpc64__
981 static void test_divde(void)
983 int divde_type = DIV_BASE;
984 if (do_OE)
985 divde_type |= DIV_OE;
986 if (do_dot)
987 divde_type |= DIV_DOT;
989 switch (divde_type) {
990 case 1:
991 SET_CR_XER_ZERO;
992 __asm__ __volatile__ ("divde %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
993 GET_CR_XER(div_flags, div_xer);
994 break;
995 case 3:
996 SET_CR_XER_ZERO;
997 __asm__ __volatile__ ("divdeo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
998 GET_CR_XER(div_flags, div_xer);
999 break;
1000 case 5:
1001 SET_CR_XER_ZERO;
1002 __asm__ __volatile__ ("divde. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
1003 GET_CR_XER(div_flags, div_xer);
1004 break;
1005 case 7:
1006 SET_CR_XER_ZERO;
1007 __asm__ __volatile__ ("divdeo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
1008 GET_CR_XER(div_flags, div_xer);
1009 break;
1010 default:
1011 fprintf(stderr, "Invalid divde type. Exiting\n");
1012 exit(1);
1015 #endif
1017 static void test_divweu(void)
1019 int divweu_type = DIV_BASE;
1020 if (do_OE)
1021 divweu_type |= DIV_OE;
1022 if (do_dot)
1023 divweu_type |= DIV_DOT;
1025 switch (divweu_type) {
1026 case 1:
1027 SET_CR_XER_ZERO;
1028 __asm__ __volatile__ ("divweu %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
1029 GET_CR_XER(div_flags, div_xer);
1030 break;
1031 case 3:
1032 SET_CR_XER_ZERO;
1033 __asm__ __volatile__ ("divweuo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
1034 GET_CR_XER(div_flags, div_xer);
1035 break;
1036 case 5:
1037 SET_CR_XER_ZERO;
1038 __asm__ __volatile__ ("divweu. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
1039 GET_CR_XER(div_flags, div_xer);
1040 break;
1041 case 7:
1042 SET_CR_XER_ZERO;
1043 __asm__ __volatile__ ("divweuo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
1044 GET_CR_XER(div_flags, div_xer);
1045 break;
1046 default:
1047 fprintf(stderr, "Invalid divweu type. Exiting\n");
1048 exit(1);
1052 static void test_fctiduz(void)
1054 if (do_dot)
1055 __asm__ __volatile__ ("fctiduz. %0, %1" : "=d" (f17) : "d" (f14));
1056 else
1057 __asm__ __volatile__ ("fctiduz %0, %1" : "=d" (f17) : "d" (f14));
1060 static void test_fctidu(void)
1062 if (do_dot)
1063 __asm__ __volatile__ ("fctidu. %0, %1" : "=d" (f17) : "d" (f14));
1064 else
1065 __asm__ __volatile__ ("fctidu %0, %1" : "=d" (f17) : "d" (f14));
1068 static void test_fctiwuz(void)
1070 if (do_dot)
1071 __asm__ __volatile__ ("fctiwuz. %0, %1" : "=d" (f17) : "d" (f14));
1072 else
1073 __asm__ __volatile__ ("fctiwuz %0, %1" : "=d" (f17) : "d" (f14));
1076 static void test_fctiwu(void)
1078 if (do_dot)
1079 __asm__ __volatile__ ("fctiwu. %0, %1" : "=d" (f17) : "d" (f14));
1080 else
1081 __asm__ __volatile__ ("fctiwu %0, %1" : "=d" (f17) : "d" (f14));
1084 typedef struct simple_test {
1085 test_func_t test_func;
1086 char * name;
1087 precision_type_t precision;
1088 } simple_test_t;
1090 static simple_test_t fct_tests[] = {
1091 { &test_fctiduz, "fctiduz", DOUBLE_TEST },
1092 { &test_fctidu, "fctidu", DOUBLE_TEST },
1093 { &test_fctiwuz, "fctiwuz", SINGLE_TEST },
1094 { &test_fctiwu, "fctiwu", SINGLE_TEST },
1095 { NULL, NULL }
1098 static void setup_sp_fp_args(fp_test_args_t * targs, Bool swap_inputs)
1100 int a_idx, b_idx, i;
1101 void * inA, * inB;
1102 void * vec_src = swap_inputs ? &vec_out : &vec_inB;
1104 for (i = 0; i < 4; i++) {
1105 a_idx = targs->fra_idx;
1106 b_idx = targs->frb_idx;
1107 inA = (void *)&spec_sp_fargs[a_idx];
1108 inB = (void *)&spec_sp_fargs[b_idx];
1109 // copy single precision FP into vector element i
1110 memcpy(((void *)&vec_inA) + (i * 4), inA, 4);
1111 memcpy(vec_src + (i * 4), inB, 4);
1112 targs++;
1116 static void setup_dp_fp_args(fp_test_args_t * targs, Bool swap_inputs)
1118 int a_idx, b_idx, i;
1119 void * inA, * inB;
1120 void * vec_src = swap_inputs ? (void *)&vec_out : (void *)&vec_inB;
1122 for (i = 0; i < 2; i++) {
1123 a_idx = targs->fra_idx;
1124 b_idx = targs->frb_idx;
1125 inA = (void *)&spec_fargs[a_idx];
1126 inB = (void *)&spec_fargs[b_idx];
1127 // copy double precision FP into vector element i
1128 memcpy(((void *)&vec_inA) + (i * 8), inA, 8);
1129 memcpy(vec_src + (i * 8), inB, 8);
1130 targs++;
1134 #define VX_NOT_CMP_OP 0xffffffff
1135 static void print_vector_fp_result(unsigned int cc, vx_fp_test_t * test_group, int i)
1137 int a_idx, b_idx, k;
1138 char * name = malloc(20);
1139 int dp = test_group->precision == DOUBLE_TEST ? 1 : 0;
1140 int loops = dp ? 2 : 4;
1141 fp_test_args_t * targs = &test_group->targs[i];
1142 unsigned long long * frA_dp, * frB_dp, * dst_dp;
1143 unsigned int * frA_sp, *frB_sp, * dst_sp;
1144 strcpy(name, test_group->name);
1145 printf("#%d: %s%s ", dp? i/2 : i/4, name, (do_dot ? "." : ""));
1146 for (k = 0; k < loops; k++) {
1147 a_idx = targs->fra_idx;
1148 b_idx = targs->frb_idx;
1149 if (k)
1150 printf(" AND ");
1151 if (dp) {
1152 frA_dp = (unsigned long long *)&spec_fargs[a_idx];
1153 frB_dp = (unsigned long long *)&spec_fargs[b_idx];
1154 printf("%016llx %s %016llx", *frA_dp, test_group->op, *frB_dp);
1155 } else {
1156 frA_sp = (unsigned int *)&spec_sp_fargs[a_idx];
1157 frB_sp = (unsigned int *)&spec_sp_fargs[b_idx];
1158 printf("%08x %s %08x", *frA_sp, test_group->op, *frB_sp);
1160 targs++;
1162 if (cc != VX_NOT_CMP_OP)
1163 printf(" ? cc=%x", cc);
1165 if (dp) {
1166 dst_dp = (unsigned long long *) &vec_out;
1167 printf(" => %016llx %016llx\n", dst_dp[0], dst_dp[1]);
1168 } else {
1169 dst_sp = (unsigned int *) &vec_out;
1170 printf(" => %08x %08x %08x %08x\n", dst_sp[0], dst_sp[1], dst_sp[2], dst_sp[3]);
1172 free(name);
1176 static void print_vx_aORm_fp_result(unsigned long long * XT_arg, unsigned long long * XB_arg,
1177 vx_fp_test_t * test_group, int i)
1179 int a_idx, k;
1180 char * name = malloc(20);
1181 int dp = test_group->precision == DOUBLE_TEST ? 1 : 0;
1182 int loops = dp ? 2 : 4;
1183 fp_test_args_t * targs = &test_group->targs[i];
1184 unsigned long long frA_dp, * dst_dp;
1185 unsigned int frA_sp, * dst_sp;
1187 strcpy(name, test_group->name);
1188 if (do_aXp)
1189 if (dp)
1190 strcat(name, "adp");
1191 else
1192 strcat(name, "asp");
1193 else
1194 if (dp)
1195 strcat(name, "mdp");
1196 else
1197 strcat(name, "msp");
1199 printf("#%d: %s ", dp? i/2 : i/4, name);
1200 for (k = 0; k < loops; k++) {
1201 a_idx = targs->fra_idx;
1202 if (k)
1203 printf(" AND ");
1204 if (dp) {
1205 frA_dp = *((unsigned long long *)&spec_fargs[a_idx]);
1206 printf("%s(%016llx,%016llx,%016llx)", test_group->op, XT_arg[k], frA_dp, XB_arg[k]);
1207 } else {
1208 unsigned int * xt_sp = (unsigned int *)XT_arg;
1209 unsigned int * xb_sp = (unsigned int *)XB_arg;
1210 frA_sp = *((unsigned int *)&spec_sp_fargs[a_idx]);
1211 printf("%s(%08x,%08x,%08x)", test_group->op, xt_sp[k], frA_sp, xb_sp[k]);
1213 targs++;
1216 if (dp) {
1217 dst_dp = (unsigned long long *) &vec_out;
1218 printf(" => %016llx %016llx\n", dst_dp[0], dst_dp[1]);
1219 } else {
1220 dst_sp = (unsigned int *) &vec_out;
1221 printf(" => %08x %08x %08x %08x\n", dst_sp[0], dst_sp[1], dst_sp[2], dst_sp[3]);
1223 free(name);
1226 /* This function currently only supports double precision input arguments. */
1227 static void test_vx_simple_scalar_fp_ops(void)
1229 test_func_t func;
1230 int k = 0;
1232 build_special_fargs_table();
1233 while ((func = vx_simple_scalar_fp_tests[k].test_func)) {
1234 unsigned long long * frap, * frbp, * dst;
1235 unsigned int * pv;
1236 int idx;
1237 vx_fp_test_t test_group = vx_simple_scalar_fp_tests[k];
1238 Bool convToWord = (test_group.type == VX_CONV_WORD);
1239 if (test_group.precision != DOUBLE_TEST) {
1240 fprintf(stderr, "Unsupported single precision for scalar op in test_vx_aORm_fp_ops\n");
1241 exit(1);
1243 pv = (unsigned int *)&vec_out;
1244 // clear vec_out
1245 for (idx = 0; idx < 4; idx++, pv++)
1246 *pv = 0;
1248 /* If num_tests is exactly equal to nb_special_fargs, this implies the
1249 * instruction being tested only requires one floating point argument
1250 * (e.g. xssqrtdp).
1252 if (test_group.num_tests == nb_special_fargs && !test_group.targs) {
1253 void * inB, * vec_void_ptr = (void *)&vec_inB;
1254 int i;
1255 if (isLE)
1256 vec_void_ptr += 8;
1257 for (i = 0; i < nb_special_fargs; i++) {
1258 inB = (void *)&spec_fargs[i];
1259 frbp = (unsigned long long *)&spec_fargs[i];
1260 memcpy(vec_void_ptr, inB, 8);
1261 (*func)();
1262 dst = (unsigned long long *) &vec_out;
1263 if (isLE)
1264 dst++;
1265 printf("#%d: %s %016llx => %016llx\n", i, test_group.name, *frbp,
1266 convToWord ? (*dst & 0x00000000ffffffffULL) : *dst);
1268 } else {
1269 void * inA, * inB, * vecA_void_ptr, * vecB_void_ptr;
1270 unsigned int condreg, flags;
1271 int isTdiv = (strstr(test_group.name, "xstdivdp") != NULL) ? 1 : 0;
1272 int i;
1273 if (isLE) {
1274 vecA_void_ptr = (void *)&vec_inA + 8;
1275 vecB_void_ptr = (void *)&vec_inB + 8;
1276 } else {
1277 vecA_void_ptr = (void *)&vec_inA;
1278 vecB_void_ptr = (void *)&vec_inB;
1280 for (i = 0; i < test_group.num_tests; i++) {
1281 fp_test_args_t aTest = test_group.targs[i];
1282 inA = (void *)&spec_fargs[aTest.fra_idx];
1283 inB = (void *)&spec_fargs[aTest.frb_idx];
1284 frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
1285 frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
1286 // Only need to copy one doubleword into each vector's element 0
1287 memcpy(vecA_void_ptr, inA, 8);
1288 memcpy(vecB_void_ptr, inB, 8);
1289 SET_FPSCR_ZERO;
1290 SET_CR_XER_ZERO;
1291 (*func)();
1292 GET_CR(flags);
1293 if (isTdiv) {
1294 condreg = (flags & 0x000000f0) >> 4;
1295 printf("#%d: %s %016llx,%016llx => cr %x\n", i, test_group.name, *frap, *frbp, condreg);
1296 } else {
1297 dst = (unsigned long long *) &vec_out;
1298 if (isLE)
1299 dst++;
1300 printf("#%d: %s %016llx,%016llx => %016llx\n", i, test_group.name,
1301 *frap, *frbp, *dst);
1305 printf( "\n" );
1306 k++;
1310 static void test_vx_aORm_fp_ops(void)
1312 /* These ops need a third src argument, which is stored in element 0 of
1313 * VSX[XT] -- i.e., vec_out. For the xs<ZZZ>m{d|s}p cases, VSX[XT] holds
1314 * src3 and VSX[XB] holds src2; for the xs<ZZZ>a{d|s}p cases, VSX[XT] holds
1315 * src2 and VSX[XB] holds src3. The fp_test_args_t that holds the test
1316 * data (input args, result) contain only two inputs, so I arbitrarily
1317 * choose some spec_fargs elements for the third source argument.
1318 * Note that that by using the same input data for a given pair of
1319 * a{d|s}p/m{d|s}p-type instructions (by swapping the src2 and src3
1320 * arguments), the expected result should be the same.
1323 test_func_t func;
1324 int k;
1325 char * test_name = (char *)malloc(20);
1326 k = 0;
1327 do_dot = False;
1329 build_special_fargs_table();
1330 while ((func = vx_aORm_fp_tests[k].test_func)) {
1331 int i, stride;
1332 Bool repeat = False;
1333 Bool scalar = False;
1334 unsigned long long * frap, * frbp, * dst;
1335 vx_fp_test_t test_group = vx_aORm_fp_tests[k];
1336 vx_fp_test_type test_type = test_group.type;
1337 do_dp = test_group.precision == DOUBLE_TEST ? True : False;
1338 frap = frbp = NULL;
1340 if (test_type < VX_VECTOR_FP_MULT_AND_OP2) {
1341 scalar = True;
1342 strcpy(test_name, test_group.name);
1343 if (!repeat) {
1344 repeat = 1;
1345 stride = 1;
1346 // Only support double precision scalar ops in this function
1347 if (do_dp) {
1348 strcat(test_name, "adp");
1349 } else {
1350 fprintf(stderr, "Unsupported single precision for scalar op in test_vx_aORm_fp_ops\n");
1351 exit(1);
1353 do_aXp = True;
1355 } else if (test_type < VX_BASIC_CMP) {
1356 // Then it must be a VX_VECTOR_xxx type
1357 stride = do_dp ? 2 : 4;
1358 if (!repeat) {
1359 // No need to work up the testcase name here, since that will be done in
1360 // the print_vx_aORm_fp_result() function we'll call for vector-type ops.
1361 repeat = 1;
1362 do_aXp = True;
1364 } else {
1365 printf("ERROR: Invalid VX FP test type %d\n", test_type);
1366 exit(1);
1369 again:
1370 for (i = 0; i < test_group.num_tests; i+=stride) {
1371 void * inA, * inB;
1372 int m, fp_idx[4];
1373 unsigned long long vsr_XT[2];
1374 unsigned long long vsr_XB[2];
1375 fp_test_args_t aTest = test_group.targs[i];
1376 for (m = 0; m < stride; m++)
1377 fp_idx[m] = i % (nb_special_fargs - stride) + m;
1379 /* When repeat == True, we're on the first time through of one of the VX_FP_SMx
1380 * test types, meaning we're testing a xs<ZZZ>adp case, thus we have to swap
1381 * inputs as described above:
1382 * src2 <= VSX[XT]
1383 * src3 <= VSX[XB]
1385 if (scalar) {
1386 #ifdef VGP_ppc64le_linux
1387 #define VECTOR_ADDR(_v) ((void *)&_v) + 8
1388 #else
1389 #define VECTOR_ADDR(_v) ((void *)&_v)
1390 #endif
1391 // For scalar op, only need to copy one doubleword into each vector's element 0
1392 inA = (void *)&spec_fargs[aTest.fra_idx];
1393 inB = (void *)&spec_fargs[aTest.frb_idx];
1394 frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
1395 memcpy(VECTOR_ADDR(vec_inA), inA, 8);
1396 if (repeat) {
1397 memcpy(VECTOR_ADDR(vec_out), inB, 8); // src2
1398 memcpy(VECTOR_ADDR(vec_inB), &spec_fargs[fp_idx[0]], 8); //src3
1399 frbp = (unsigned long long *)&spec_fargs[fp_idx[0]];
1400 } else {
1401 frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
1402 memcpy(VECTOR_ADDR(vec_inB), inB, 8); // src2
1403 memcpy(VECTOR_ADDR(vec_out), &spec_fargs[fp_idx[0]], 8); //src3
1405 memcpy(vsr_XT, VECTOR_ADDR(vec_out), 8);
1406 } else {
1407 int j, loops = do_dp ? 2 : 4;
1408 size_t len = do_dp ? 8 : 4;
1409 void * vec_src = repeat ? (void *)&vec_inB : (void *)&vec_out;
1410 for (j = 0; j < loops; j++) {
1411 if (do_dp)
1412 memcpy(vec_src + (j * len), &spec_fargs[fp_idx[j]], len);
1413 else
1414 memcpy(vec_src + (j * len), &spec_sp_fargs[fp_idx[j]], len);
1416 if (do_dp)
1417 setup_dp_fp_args(&test_group.targs[i], repeat);
1418 else
1419 setup_sp_fp_args(&test_group.targs[i], repeat);
1421 memcpy(vsr_XT, &vec_out, 16);
1422 memcpy(vsr_XB, &vec_inB, 16);
1425 (*func)();
1426 dst = (unsigned long long *) &vec_out;
1427 if (isLE)
1428 dst++;
1429 if (test_type < VX_VECTOR_FP_MULT_AND_OP2)
1430 printf( "#%d: %s %s(%016llx,%016llx,%016llx) = %016llx\n", i,
1431 test_name, test_group.op, vsr_XT[0], *frap, *frbp, *dst );
1432 else
1433 print_vx_aORm_fp_result(vsr_XT, vsr_XB, &test_group, i);
1435 printf( "\n" );
1437 if (repeat) {
1438 repeat = 0;
1439 if (test_type < VX_VECTOR_FP_MULT_AND_OP2) {
1440 strcpy(test_name, test_group.name);
1441 strcat(test_name, "mdp");
1443 do_aXp = False;
1444 goto again;
1446 k++;
1448 printf( "\n" );
1449 free(test_name);
1452 static void test_vx_vector_one_fp_arg(void)
1454 test_func_t func;
1455 int k;
1456 k = 0;
1457 build_special_fargs_table();
1459 while ((func = vx_vector_one_fp_arg_tests[k].test_func)) {
1460 int idx, i;
1461 vx_fp_test_t test_group = vx_vector_one_fp_arg_tests[k];
1462 Bool convToWord = (test_group.type == VX_CONV_WORD);
1463 Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
1464 Bool xvrespTest = (strstr(test_group.name , "xvresp") != NULL) ? True: False;
1465 int stride = dp ? 2 : 4;
1467 for (i = 0; i < test_group.num_tests; i+=stride) {
1468 unsigned int * pv;
1469 void * inB;
1471 pv = (unsigned int *)&vec_out;
1472 // clear vec_out
1473 for (idx = 0; idx < 4; idx++, pv++)
1474 *pv = 0;
1476 if (dp) {
1477 int j;
1478 unsigned long long * frB_dp, *dst_dp;
1479 for (j = 0; j < 2; j++) {
1480 inB = (void *)&spec_fargs[i + j];
1481 // copy double precision FP into vector element i
1482 memcpy(((void *)&vec_inB) + (j * 8), inB, 8);
1484 // execute test insn
1485 (*func)();
1486 dst_dp = (unsigned long long *) &vec_out;
1487 printf("#%d: %s ", i/2, test_group.name);
1488 for (j = 0; j < 2; j++) {
1489 if (j)
1490 printf("; ");
1491 frB_dp = (unsigned long long *)&spec_fargs[i + j];
1492 printf("%s(%016llx)", test_group.op, *frB_dp);
1493 printf(" = %016llx", convToWord ? (dst_dp[j] & 0x00000000ffffffffULL) : dst_dp[j]);
1495 printf("\n");
1496 } else {
1497 int j;
1498 unsigned int * frB_sp, * dst_sp;
1500 for (j = 0; j < 4; j++) {
1501 inB = (void *)&spec_sp_fargs[i + j];
1502 // copy single precision FP into vector element i
1503 memcpy(((void *)&vec_inB) + (j * 4), inB, 4);
1505 // execute test insn
1506 (*func)();
1507 dst_sp = (unsigned int *) &vec_out;
1508 // print result
1509 printf("#%d: %s ", i/4, test_group.name);
1510 for (j = 0; j < 4; j++) {
1511 if (j)
1512 printf("; ");
1513 frB_sp = (unsigned int *)&spec_sp_fargs[i + j];
1514 printf("%s(%08x)", test_group.op, *frB_sp);
1515 if (xvrespTest) {
1516 float calc_diff = fabs(spec_sp_fargs[i + j]/256);
1517 float sp_res;
1518 memcpy(&sp_res, &dst_sp[j], 4);
1519 float div_result = 1/spec_sp_fargs[i + j];
1520 float real_diff = fabs(sp_res - div_result);
1521 printf( " ==> %s",
1522 ( ( sp_res == div_result )
1523 || ( isnan(sp_res) && isnan(div_result) )
1524 || ( real_diff <= calc_diff ) ) ? "PASS"
1525 : "FAIL");
1526 } else {
1527 printf(" = %08x", dst_sp[j]);
1530 printf("\n");
1533 k++;
1534 printf( "\n" );
1539 /* This function assumes the instruction being tested requires two args. */
1540 static void test_vx_vector_fp_ops(void)
1542 test_func_t func;
1543 int k;
1544 k = 0;
1545 build_special_fargs_table();
1547 while ((func = vx_vector_fp_tests[k].test_func)) {
1548 int idx, i, repeat = 1;
1549 vx_fp_test_t test_group = vx_vector_fp_tests[k];
1550 int stride = test_group.precision == DOUBLE_TEST ? 2 : 4;
1551 do_dot = False;
1553 again:
1554 for (i = 0; i < test_group.num_tests; i+=stride) {
1555 unsigned int * pv, condreg;
1556 unsigned int flags;
1558 pv = (unsigned int *)&vec_out;
1559 if (test_group.precision == DOUBLE_TEST)
1560 setup_dp_fp_args(&test_group.targs[i], False);
1561 else
1562 setup_sp_fp_args(&test_group.targs[i], False);
1564 // clear vec_out
1565 for (idx = 0; idx < 4; idx++, pv++)
1566 *pv = 0;
1568 // execute test insn
1569 SET_FPSCR_ZERO;
1570 SET_CR_XER_ZERO;
1571 (*func)();
1572 GET_CR(flags);
1573 if (test_group.type == VX_BASIC_CMP) {
1574 condreg = (flags & 0x000000f0) >> 4;
1575 } else {
1576 condreg = VX_NOT_CMP_OP;
1578 print_vector_fp_result(condreg, &test_group, i);
1580 printf("\n");
1581 if (repeat && test_group.type == VX_BASIC_CMP) {
1582 repeat = 0;
1583 do_dot = True;
1584 goto again;
1586 k++;
1587 printf( "\n" );
1592 // The div doubleword test data
1593 signed long long div_dw_tdata[13][2] = {
1594 { 4, -4 },
1595 { 4, -3 },
1596 { 4, 4 },
1597 { 4, -5 },
1598 { 3, 8 },
1599 { 0x8000000000000000ULL, 0xa },
1600 { 0x50c, -1 },
1601 { 0x50c, -4096 },
1602 { 0x1234fedc, 0x8000a873 },
1603 { 0xabcd87651234fedcULL, 0xa123b893 },
1604 { 0x123456789abdcULL, 0 },
1605 { 0, 2 },
1606 { 0x77, 0xa3499 }
1608 #define dw_tdata_len (sizeof(div_dw_tdata)/sizeof(signed long long)/2)
1610 // The div word test data
1611 unsigned int div_w_tdata[6][2] = {
1612 { 0, 2 },
1613 { 2, 0 },
1614 { 0x7abc1234, 0xf0000000 },
1615 { 0xfabc1234, 5 },
1616 { 77, 66 },
1617 { 5, 0xfabc1234 },
1619 #define w_tdata_len (sizeof(div_w_tdata)/sizeof(unsigned int)/2)
1621 typedef struct div_ext_test
1623 test_func_t test_func;
1624 const char *name;
1625 int num_tests;
1626 div_type_t div_type;
1627 precision_type_t precision;
1628 } div_ext_test_t;
1630 static div_ext_test_t div_tests[] = {
1631 #ifdef __powerpc64__
1632 { &test_divde, "divde", dw_tdata_len, DIV_BASE, DOUBLE_TEST },
1633 { &test_divde, "divdeo", dw_tdata_len, DIV_OE, DOUBLE_TEST },
1634 #endif
1635 { &test_divweu, "divweu", w_tdata_len, DIV_BASE, SINGLE_TEST },
1636 { &test_divweu, "divweuo", w_tdata_len, DIV_OE, SINGLE_TEST },
1637 { NULL, NULL, 0, 0, 0 }
1640 static void test_div_extensions(void)
1642 test_func_t func;
1643 int k;
1644 k = 0;
1646 while ((func = div_tests[k].test_func)) {
1647 int i, repeat = 1;
1648 div_ext_test_t test_group = div_tests[k];
1649 do_dot = False;
1651 again:
1652 for (i = 0; i < test_group.num_tests; i++) {
1653 unsigned int condreg;
1655 if (test_group.div_type == DIV_OE)
1656 do_OE = True;
1657 else
1658 do_OE = False;
1660 if (test_group.precision == DOUBLE_TEST) {
1661 r14 = div_dw_tdata[i][0];
1662 r15 = div_dw_tdata[i][1];
1663 } else {
1664 r14 = div_w_tdata[i][0];
1665 r15 = div_w_tdata[i][1];
1667 // execute test insn
1668 (*func)();
1669 condreg = (div_flags & 0xf0000000) >> 28;
1670 printf("#%d: %s%s: ", i, test_group.name, do_dot ? "." : "");
1671 if (test_group.precision == DOUBLE_TEST) {
1672 printf("0x%016llx / 0x%016llx = 0x%016llx;",
1673 div_dw_tdata[i][0], div_dw_tdata[i][1], (signed long long) r17);
1674 } else {
1675 printf("0x%08x / 0x%08x = 0x%08x;",
1676 div_w_tdata[i][0], div_w_tdata[i][1], (unsigned int) r17);
1678 printf(" CR=%x; XER=%x\n", condreg, div_xer);
1680 printf("\n");
1681 if (repeat) {
1682 repeat = 0;
1683 do_dot = True;
1684 goto again;
1686 k++;
1687 printf( "\n" );
1692 static void test_fct_ops(void)
1694 test_func_t func;
1695 int k;
1696 k = 0;
1698 while ((func = fct_tests[k].test_func)) {
1699 int i, repeat = 1;
1700 simple_test_t test_group = fct_tests[k];
1701 do_dot = False;
1703 again:
1704 for (i = 0; i < nb_special_fargs; i++) {
1705 double result;
1706 #define SINGLE_MASK 0x00000000FFFFFFFFULL
1708 f14 = spec_fargs[i];
1709 // execute test insn
1710 SET_FPSCR_ZERO;
1711 (*func)();
1712 result = f17;
1713 printf("#%d: %s%s: ", i, test_group.name, do_dot ? "." : "");
1714 printf("0x%016llx (%e) ==> 0x%016llx\n",
1715 *((unsigned long long *)(&spec_fargs[i])), spec_fargs[i],
1716 test_group.precision == SINGLE_TEST ? (SINGLE_MASK &
1717 *((unsigned long long *)(&result))) :
1718 *((unsigned long long *)(&result)));
1720 printf("\n");
1721 if (repeat) {
1722 repeat = 0;
1723 do_dot = True;
1724 goto again;
1726 k++;
1727 printf( "\n" );
1731 #ifdef __powerpc64__
1732 void test_stdbrx(void)
1734 unsigned long long store, val = 0xdeadbacf12345678ULL;
1735 printf("stdbrx: 0x%llx ==> ", val);
1736 r17 = (HWord_t)val;
1737 r14 = (HWord_t)&store;
1738 __asm__ __volatile__ ("stdbrx %0, 0, %1" : : "r"(r17), "r"(r14));
1739 printf("0x%llx\n", store);
1740 printf( "\n" );
1742 #endif
1744 static test_table_t
1745 all_tests[] =
1747 { &test_vx_vector_one_fp_arg,
1748 "Test VSX vector single arg instructions", OTHER_INST },
1749 { &test_vx_vector_fp_ops,
1750 "Test VSX floating point compare and basic arithmetic instructions", OTHER_INST },
1751 #ifdef __powerpc64__
1752 { &test_bpermd,
1753 "Test bit permute double", OTHER_INST },
1754 #endif
1755 { &test_xxsel,
1756 "Test xxsel instruction", OTHER_INST },
1757 { &test_xxspltw,
1758 "Test xxspltw instruction", OTHER_INST },
1759 { &test_div_extensions,
1760 "Test div extensions", SCALAR_DIV_INST },
1761 { &test_fct_ops,
1762 "Test floating point convert [word | doubleword] unsigned, with round toward zero", OTHER_INST },
1763 #ifdef __powerpc64__
1764 { &test_stdbrx,
1765 "Test stdbrx instruction", OTHER_INST },
1766 #endif
1767 { &test_vx_aORm_fp_ops,
1768 "Test floating point arithmetic instructions -- with a{d|s}p or m{d|s}p", OTHER_INST },
1769 { &test_vx_simple_scalar_fp_ops,
1770 "Test scalar floating point arithmetic instructions", OTHER_INST },
1771 { NULL, NULL }
1773 #endif // HAS_VSX
1775 static void usage (void)
1777 fprintf(stderr,
1778 "Usage: test_isa_3_0 [OPTIONS]\n"
1779 "\t-d: test scalar division instructions (default)\n"
1780 "\t-o: test non scalar division instructions (default)\n"
1781 "\t-A: test all instructions (default)\n"
1782 "\t-h: display this help and exit\n"
1786 int main(int argc, char *argv[])
1788 #ifdef HAS_VSX
1790 test_table_t aTest;
1791 test_func_t func;
1792 int i = 0;
1793 int c;
1794 unsigned int test_run_mask = 0;
1796 /* NOTE, ISA 3.0 introduces the OV32 and CA32 bits in the FPSCR. These
1797 * bits are set on various arithimetic instructions. This means this
1798 * test generates different FPSCR output for pre ISA 3.0 versus ISA 3.0
1799 * hardware. The tests have been grouped so that the tests that generate
1800 * different results are in one test and the rest are in a different test.
1801 * this minimizes the size of the result expect files for the two cases.
1804 while ((c = getopt(argc, argv, "doAh")) != -1) {
1805 switch (c) {
1806 case 'd':
1807 test_run_mask |= SCALAR_DIV_INST;
1808 break;
1809 case 'o':
1810 test_run_mask |= OTHER_INST;
1811 break;
1812 case 'A':
1813 test_run_mask = 0xFFFF;
1814 break;
1815 case 'h':
1816 usage();
1817 return 0;
1819 default:
1820 usage();
1821 fprintf(stderr, "Unknown argument: '%c'\n", c);
1822 return 1;
1826 while ((func = all_tests[i].test_category)) {
1827 aTest = all_tests[i];
1828 if(test_run_mask & aTest.test_group) {
1829 /* Test group specified on command line */
1831 printf( "%s\n", aTest.name );
1832 (*func)();
1834 i++;
1836 if (spec_fargs)
1837 free(spec_fargs);
1838 if (spec_sp_fargs)
1839 free(spec_sp_fargs);
1841 #endif // HAS _VSX
1843 return 0;