Bug 497723 - forgot to restore callgrind output cleanup
[valgrind.git] / none / tests / ppc32 / test_isa_2_06_part3.c
blobdd0ad7ea91213e6b1028d9f5c0fca9c333c589a0
1 /* Copyright (C) 2011 IBM
3 Author: Maynard Johnson <maynardj@us.ibm.com>
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, see <http://www.gnu.org/licenses/>.
18 The GNU General Public License is contained in the file COPYING.
21 #include <stdio.h>
22 #include <stdint.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <malloc.h>
26 #include <math.h>
27 #include <unistd.h> // getopt
29 #ifdef HAS_VSX
31 #include <altivec.h>
33 #ifndef __powerpc64__
34 typedef uint32_t HWord_t;
35 #else
36 typedef uint64_t HWord_t;
37 #endif /* __powerpc64__ */
39 #ifdef VGP_ppc64le_linux
40 #define isLE 1
41 #else
42 #define isLE 0
43 #endif
45 typedef unsigned char Bool;
46 #define True 1
47 #define False 0
48 register HWord_t r14 __asm__ ("r14");
49 register HWord_t r15 __asm__ ("r15");
50 register HWord_t r16 __asm__ ("r16");
51 register HWord_t r17 __asm__ ("r17");
52 register double f14 __asm__ ("fr14");
53 register double f15 __asm__ ("fr15");
54 register double f16 __asm__ ("fr16");
55 register double f17 __asm__ ("fr17");
57 static volatile unsigned int div_flags, div_xer;
59 #define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"
61 #define SET_CR(_arg) \
62 __asm__ __volatile__ ("mtcr %0" : : "b"(_arg) : ALLCR );
64 #define SET_XER(_arg) \
65 __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );
67 #define GET_CR(_lval) \
68 __asm__ __volatile__ ("mfcr %0" : "=b"(_lval) )
70 #define GET_XER(_lval) \
71 __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )
73 #define GET_CR_XER(_lval_cr,_lval_xer) \
74 do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)
76 #define SET_CR_ZERO \
77 SET_CR(0)
79 #define SET_XER_ZERO \
80 SET_XER(0)
82 #define SET_CR_XER_ZERO \
83 do { SET_CR_ZERO; SET_XER_ZERO; } while (0)
85 #define SET_FPSCR_ZERO \
86 do { double _d = 0.0; \
87 __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
88 } while (0)
91 typedef void (*test_func_t)(void);
92 typedef struct test_table test_table_t;
94 /* Defines for the instructiion groups, use bit field to identify */
95 #define SCALAR_DIV_INST 0x0001
96 #define OTHER_INST 0x0002
98 /* These functions below that construct a table of floating point
99 * values were lifted from none/tests/ppc32/jm-insns.c.
102 #if defined (DEBUG_ARGS_BUILD)
103 #define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
104 #else
105 #define AB_DPRINTF(fmt, args...) do { } while (0)
106 #endif
108 static inline void register_farg (void *farg,
109 int s, uint16_t _exp, uint64_t mant)
111 uint64_t tmp;
113 tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant;
114 *(uint64_t *)farg = tmp;
115 AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
116 s, _exp, mant, *(uint64_t *)farg, *(double *)farg);
119 static inline void register_sp_farg (void *farg,
120 int s, uint16_t _exp, uint32_t mant)
122 uint32_t tmp;
123 tmp = ((uint32_t)s << 31) | ((uint32_t)_exp << 23) | mant;
124 *(uint32_t *)farg = tmp;
128 typedef struct fp_test_args {
129 int fra_idx;
130 int frb_idx;
131 } fp_test_args_t;
134 fp_test_args_t two_arg_fp_tests[] = {
135 {8, 8},
136 {8, 14},
137 {15, 16},
138 {8, 5},
139 {8, 4},
140 {8, 7},
141 {8, 9},
142 {8, 11},
143 {14, 8},
144 {14, 14},
145 {14, 6},
146 {14, 5},
147 {14, 4},
148 {14, 7},
149 {14, 9},
150 {14, 11},
151 {6, 8},
152 {6, 14},
153 {6, 6},
154 {6, 5},
155 {6, 4},
156 {6, 7},
157 {6, 9},
158 {6, 11},
159 {5, 8},
160 {5, 14},
161 {5, 6},
162 {5, 5},
163 {5, 4},
164 {5, 7},
165 {5, 9},
166 {5, 11},
167 {4, 8},
168 {4, 14},
169 {4, 6},
170 {4, 5},
171 {4, 1},
172 {4, 7},
173 {4, 9},
174 {4, 11},
175 {7, 8},
176 {7, 14},
177 {7, 6},
178 {7, 5},
179 {7, 4},
180 {7, 7},
181 {7, 9},
182 {7, 11},
183 {10, 8},
184 {10, 14},
185 {12, 6},
186 {12, 5},
187 {10, 4},
188 {10, 7},
189 {10, 9},
190 {10, 11},
191 {12, 8 },
192 {12, 14},
193 {12, 6},
194 {15, 16},
195 {15, 16},
196 {9, 11},
197 {11, 11},
198 {11, 12},
199 {16, 18},
200 {17, 16},
201 {19, 19},
202 {19, 18}
206 static int nb_special_fargs;
207 static double * spec_fargs;
208 static float * spec_sp_fargs;
210 static void build_special_fargs_table(void)
213 Entry Sign Exp fraction Special value
214 0 0 3fd 0x8000000000000ULL Positive finite number
215 1 0 404 0xf000000000000ULL ...
216 2 0 001 0x8000000b77501ULL ...
217 3 0 7fe 0x800000000051bULL ...
218 4 0 012 0x3214569900000ULL ...
219 5 0 000 0x0000000000000ULL +0.0 (+zero)
220 6 1 000 0x0000000000000ULL -0.0 (-zero)
221 7 0 7ff 0x0000000000000ULL +infinity
222 8 1 7ff 0x0000000000000ULL -infinity
223 9 0 7ff 0x7FFFFFFFFFFFFULL +SNaN
224 10 1 7ff 0x7FFFFFFFFFFFFULL -SNaN
225 11 0 7ff 0x8000000000000ULL +QNaN
226 12 1 7ff 0x8000000000000ULL -QNaN
227 13 1 000 0x8340000078000ULL Denormalized val (zero exp and non-zero fraction)
228 14 1 40d 0x0650f5a07b353ULL Negative finite number
229 15 0 412 0x32585a9900000ULL A few more positive finite numbers
230 16 0 413 0x82511a2000000ULL ...
231 17 . . . . . . . . . . . . . . . . . . . . . . .
232 18 . . . . . . . . . . . . . . . . . . . . . . .
233 19 . . . . . . . . . . . . . . . . . . . . . . .
236 uint64_t mant;
237 uint32_t mant_sp;
238 uint16_t _exp;
239 int s;
240 int j, i = 0;
242 if (spec_fargs)
243 return;
245 spec_fargs = malloc( 20 * sizeof(double) );
246 spec_sp_fargs = malloc( 20 * sizeof(float) );
248 // #0
249 s = 0;
250 _exp = 0x3fd;
251 mant = 0x8000000000000ULL;
252 register_farg(&spec_fargs[i++], s, _exp, mant);
254 // #1
255 s = 0;
256 _exp = 0x404;
257 mant = 0xf000000000000ULL;
258 register_farg(&spec_fargs[i++], s, _exp, mant);
260 // #2
261 s = 0;
262 _exp = 0x001;
263 mant = 0x8000000b77501ULL;
264 register_farg(&spec_fargs[i++], s, _exp, mant);
266 // #3
267 s = 0;
268 _exp = 0x7fe;
269 mant = 0x800000000051bULL;
270 register_farg(&spec_fargs[i++], s, _exp, mant);
272 // #4
273 s = 0;
274 _exp = 0x012;
275 mant = 0x3214569900000ULL;
276 register_farg(&spec_fargs[i++], s, _exp, mant);
279 /* Special values */
280 /* +0.0 : 0 0x000 0x0000000000000 */
281 // #5
282 s = 0;
283 _exp = 0x000;
284 mant = 0x0000000000000ULL;
285 register_farg(&spec_fargs[i++], s, _exp, mant);
287 /* -0.0 : 1 0x000 0x0000000000000 */
288 // #6
289 s = 1;
290 _exp = 0x000;
291 mant = 0x0000000000000ULL;
292 register_farg(&spec_fargs[i++], s, _exp, mant);
294 /* +infinity : 0 0x7FF 0x0000000000000 */
295 // #7
296 s = 0;
297 _exp = 0x7FF;
298 mant = 0x0000000000000ULL;
299 register_farg(&spec_fargs[i++], s, _exp, mant);
301 /* -infinity : 1 0x7FF 0x0000000000000 */
302 // #8
303 s = 1;
304 _exp = 0x7FF;
305 mant = 0x0000000000000ULL;
306 register_farg(&spec_fargs[i++], s, _exp, mant);
309 * This comment applies to values #9 and #10 below:
310 * When src is a SNaN, it's converted to a QNaN first before rounding to single-precision,
311 * so we can't just copy the double-precision value to the corresponding slot in the
312 * single-precision array (i.e., in the loop at the end of this function). Instead, we
313 * have to manually set the bits using register_sp_farg().
316 /* +SNaN : 0 0x7FF 0x7FFFFFFFFFFFF */
317 // #9
318 s = 0;
319 _exp = 0x7FF;
320 mant = 0x7FFFFFFFFFFFFULL;
321 register_farg(&spec_fargs[i++], s, _exp, mant);
322 _exp = 0xff;
323 mant_sp = 0x3FFFFF;
324 register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
326 /* -SNaN : 1 0x7FF 0x7FFFFFFFFFFFF */
327 // #10
328 s = 1;
329 _exp = 0x7FF;
330 mant = 0x7FFFFFFFFFFFFULL;
331 register_farg(&spec_fargs[i++], s, _exp, mant);
332 _exp = 0xff;
333 mant_sp = 0x3FFFFF;
334 register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
336 /* +QNaN : 0 0x7FF 0x8000000000000 */
337 // #11
338 s = 0;
339 _exp = 0x7FF;
340 mant = 0x8000000000000ULL;
341 register_farg(&spec_fargs[i++], s, _exp, mant);
343 /* -QNaN : 1 0x7FF 0x8000000000000 */
344 // #12
345 s = 1;
346 _exp = 0x7FF;
347 mant = 0x8000000000000ULL;
348 register_farg(&spec_fargs[i++], s, _exp, mant);
350 /* denormalized value */
351 // #13
352 s = 1;
353 _exp = 0x000;
354 mant = 0x8340000078000ULL;
355 register_farg(&spec_fargs[i++], s, _exp, mant);
357 /* Negative finite number */
358 // #14
359 s = 1;
360 _exp = 0x40d;
361 mant = 0x0650f5a07b353ULL;
362 register_farg(&spec_fargs[i++], s, _exp, mant);
364 /* A few positive finite numbers ... */
365 // #15
366 s = 0;
367 _exp = 0x412;
368 mant = 0x32585a9900000ULL;
369 register_farg(&spec_fargs[i++], s, _exp, mant);
371 // #16
372 s = 0;
373 _exp = 0x413;
374 mant = 0x82511a2000000ULL;
375 register_farg(&spec_fargs[i++], s, _exp, mant);
377 // #17
378 s = 0;
379 _exp = 0x403;
380 mant = 0x12ef5a9300000ULL;
381 register_farg(&spec_fargs[i++], s, _exp, mant);
383 // #18
384 s = 0;
385 _exp = 0x405;
386 mant = 0x14bf5d2300000ULL;
387 register_farg(&spec_fargs[i++], s, _exp, mant);
389 // #19
390 s = 0;
391 _exp = 0x409;
392 mant = 0x76bf982440000ULL;
393 register_farg(&spec_fargs[i++], s, _exp, mant);
395 nb_special_fargs = i;
396 for (j = 0; j < i; j++) {
397 if (!(j == 9 || j == 10))
398 spec_sp_fargs[j] = spec_fargs[j];
403 struct test_table
405 test_func_t test_category;
406 char * name;
407 unsigned int test_group;
410 /* Type of input for floating point operations.*/
411 typedef enum {
412 SINGLE_TEST,
413 DOUBLE_TEST
414 } precision_type_t;
416 typedef enum {
417 VX_SCALAR_CONV_TO_WORD,
418 VX_CONV_TO_SINGLE,
419 VX_CONV_TO_DOUBLE,
420 VX_ESTIMATE,
421 VX_DEFAULT
422 } vx_fp_test_type;
424 static vector unsigned int vec_out, vec_inA, vec_inB;
426 /* This function is for checking the reciprocal and reciprocal square root
427 * estimate instructions.
429 Bool check_estimate(precision_type_t type, Bool is_rsqrte, int idx, int output_vec_idx)
431 /* Technically, the number of bits of precision for xvredp and xvrsqrtedp is
432 * 14 bits (14 = log2 16384). However, the VEX emulation of these instructions
433 * does an actual reciprocal calculation versus estimation, so the answer we get back from
434 * valgrind can easily differ from the estimate in the lower bits (within the 14 bits of
435 * precision) and the estimate may still be within expected tolerances. On top of that,
436 * we can't count on these estimates always being the same across implementations.
437 * For example, with the fre[s] instruction (which should be correct to within one part
438 * in 256 -- i.e., 8 bits of precision) . . . When approximating the value 1.0111_1111_1111,
439 * one implementation could return 1.0111_1111_0000 and another implementation could return
440 * 1.1000_0000_0000. Both estimates meet the 1/256 accuracy requirement, but share only a
441 * single bit in common.
443 * The upshot is we can't validate the VEX output for these instructions by comparing against
444 * stored bit patterns. We must check that the result is within expected tolerances.
448 /* A mask to be used for validation as a last resort.
449 * Only use 12 bits of precision for reasons discussed above.
451 #define VSX_RECIP_ESTIMATE_MASK_DP 0xFFFFFF0000000000ULL
452 #define VSX_RECIP_ESTIMATE_MASK_SP 0xFFFFFF00
454 Bool result = False;
455 Bool dp_test = type == DOUBLE_TEST;
456 double src_dp, res_dp;
457 float src_sp, res_sp;
458 src_dp = res_dp = 0;
459 src_sp = res_sp = 0;
460 #define SRC (dp_test ? src_dp : src_sp)
461 #define RES (dp_test ? res_dp : res_sp)
462 Bool src_is_negative = False;
463 Bool res_is_negative = False;
464 unsigned long long * dst_dp = NULL;
465 unsigned int * dst_sp = NULL;
466 if (dp_test) {
467 unsigned long long * src_dp_ull;
468 dst_dp = (unsigned long long *) &vec_out;
469 src_dp = spec_fargs[idx];
470 src_dp_ull = (unsigned long long *) &src_dp;
471 src_is_negative = (*src_dp_ull & 0x8000000000000000ULL) ? True : False;
472 res_is_negative = (dst_dp[output_vec_idx] & 0x8000000000000000ULL) ? True : False;
473 memcpy(&res_dp, &dst_dp[output_vec_idx], 8);
474 } else {
475 unsigned int * src_sp_uint;
476 dst_sp = (unsigned int *) &vec_out;
477 src_sp = spec_sp_fargs[idx];
478 src_sp_uint = (unsigned int *) &src_sp;
479 src_is_negative = (*src_sp_uint & 0x80000000) ? True : False;
480 res_is_negative = (dst_sp[output_vec_idx] & 0x80000000) ? True : False;
481 memcpy(&res_sp, &dst_sp[output_vec_idx], 4);
484 // Below are common rules for xvre{d|s}p and xvrsqrte{d|s}p
485 if (isnan(SRC))
486 return isnan(RES);
487 if (fpclassify(SRC) == FP_ZERO)
488 return isinf(RES);
489 if (!src_is_negative && isinf(SRC))
490 return !res_is_negative && (fpclassify(RES) == FP_ZERO);
491 if (is_rsqrte) {
492 if (src_is_negative)
493 return isnan(RES);
494 } else {
495 if (src_is_negative && isinf(SRC))
496 return res_is_negative && (fpclassify(RES) == FP_ZERO);
498 if (dp_test) {
499 double calc_diff;
500 double real_diff;
501 double recip_divisor;
502 double div_result;
503 double calc_diff_tmp;
505 if (is_rsqrte)
506 recip_divisor = sqrt(src_dp);
507 else
508 recip_divisor = src_dp;
510 div_result = 1.0/recip_divisor;
511 calc_diff_tmp = recip_divisor * 16384.0;
512 if (isnormal(calc_diff_tmp)) {
513 calc_diff = fabs(1.0/calc_diff_tmp);
514 real_diff = fabs(res_dp - div_result);
515 result = ( ( res_dp == div_result )
516 || ( real_diff <= calc_diff ) );
517 } else {
518 /* Unable to compute theoretical difference, so we fall back to masking out
519 * un-precise bits.
521 unsigned long long * div_result_dp = (unsigned long long *) &div_result;
522 result = (dst_dp[output_vec_idx] & VSX_RECIP_ESTIMATE_MASK_DP) == (*div_result_dp & VSX_RECIP_ESTIMATE_MASK_DP);
524 /* For debug use . . .
525 if (!result) {
526 unsigned long long * dv = &div_result;
527 unsigned long long * rd = &real_diff;
528 unsigned long long * cd = &calc_diff;
529 printf("\n\t {actual div_result: %016llx; real_diff: %016llx; calc_diff: %016llx}\n",
530 *dv, *rd, *cd);
533 } else { // single precision test (only have xvrsqrtesp, since xvresp was implemented in stage 2)
534 float calc_diff;
535 float real_diff;
536 float div_result;
537 float calc_diff_tmp;
538 float recip_divisor = sqrt(src_sp);
540 div_result = 1.0/recip_divisor;
541 calc_diff_tmp = recip_divisor * 16384.0;
542 if (isnormal(calc_diff_tmp)) {
543 calc_diff = fabsf(1.0/calc_diff_tmp);
544 real_diff = fabsf(res_sp - div_result);
545 result = ( ( res_sp == div_result )
546 || ( real_diff <= calc_diff ) );
547 } else {
548 /* Unable to compute theoretical difference, so we fall back to masking out
549 * un-precise bits.
551 unsigned int * div_result_sp = (unsigned int *) &div_result;
552 result = (dst_sp[output_vec_idx] & VSX_RECIP_ESTIMATE_MASK_SP) == (*div_result_sp & VSX_RECIP_ESTIMATE_MASK_SP);
554 /* For debug use . . .
555 if (!result) {
556 unsigned long long * dv = &div_result;
557 unsigned long long * rd = &real_diff;
558 unsigned long long * cd = &calc_diff;
559 printf("\n\t {actual div_result: %016llx; real_diff: %016llx; calc_diff: %016llx}\n",
560 *dv, *rd, *cd);
564 return result;
567 typedef struct vx_fp_test
569 test_func_t test_func;
570 const char * name;
571 fp_test_args_t * targs;
572 int num_tests;
573 precision_type_t precision;
574 vx_fp_test_type type;
575 const char * op;
576 } vx_fp_test_t;
579 static Bool do_dot;
581 static void test_xvredp(void)
583 __asm__ __volatile__ ("xvredp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
586 static void test_xsredp(void)
588 __asm__ __volatile__ ("xsredp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
591 static void test_xvrsqrtedp(void)
593 __asm__ __volatile__ ("xvrsqrtedp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
596 static void test_xsrsqrtedp(void)
598 __asm__ __volatile__ ("xsrsqrtedp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
601 static void test_xvrsqrtesp(void)
603 __asm__ __volatile__ ("xvrsqrtesp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
606 static void test_xstsqrtdp(void)
608 __asm__ __volatile__ ("xstsqrtdp cr1, %x0" : : "wa" (vec_inB));
611 static void test_xvtsqrtdp(void)
613 __asm__ __volatile__ ("xvtsqrtdp cr1, %x0" : : "wa" (vec_inB));
616 static void test_xvtsqrtsp(void)
618 __asm__ __volatile__ ("xvtsqrtsp cr1, %x0" : : "wa" (vec_inB));
621 static void test_xvsqrtdp(void)
623 __asm__ __volatile__ ("xvsqrtdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
626 static void test_xvsqrtsp(void)
628 __asm__ __volatile__ ("xvsqrtsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
631 static void test_xvtdivdp(void)
633 __asm__ __volatile__ ("xvtdivdp cr1, %x0, %x1" : : "wa" (vec_inA), "wa" (vec_inB));
636 static void test_xvtdivsp(void)
638 __asm__ __volatile__ ("xvtdivsp cr1, %x0, %x1" : : "wa" (vec_inA), "wa" (vec_inB));
641 static void test_xscvdpsp(void)
643 __asm__ __volatile__ ("xscvdpsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
646 static void test_xscvdpuxws(void)
648 __asm__ __volatile__ ("xscvdpuxws %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
651 static void test_xscvspdp(void)
653 __asm__ __volatile__ ("xscvspdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
656 static void test_xvcvdpsp(void)
658 __asm__ __volatile__ ("xvcvdpsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
661 static void test_xvcvdpuxds(void)
663 __asm__ __volatile__ ("xvcvdpuxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
666 static void test_xvcvdpuxws(void)
668 __asm__ __volatile__ ("xvcvdpuxws %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
671 static void test_xvcvspdp(void)
673 __asm__ __volatile__ ("xvcvspdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
676 static void test_xvcvspsxds(void)
678 __asm__ __volatile__ ("xvcvspsxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
681 static void test_xvcvspuxds(void)
683 __asm__ __volatile__ ("xvcvspuxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
686 static void test_xvcvdpsxds(void)
688 __asm__ __volatile__ ("xvcvdpsxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
691 static void test_xvcvspuxws(void)
693 __asm__ __volatile__ ("xvcvspuxws %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
696 static void test_xvcvsxddp(void)
698 __asm__ __volatile__ ("xvcvsxddp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
701 static void test_xvcvuxddp(void)
703 __asm__ __volatile__ ("xvcvuxddp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
706 static void test_xvcvsxdsp(void)
708 __asm__ __volatile__ ("xvcvsxdsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
711 static void test_xvcvuxdsp(void)
713 __asm__ __volatile__ ("xvcvuxdsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
716 static void test_xvcvsxwdp(void)
718 __asm__ __volatile__ ("xvcvsxwdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
721 static void test_xvcvuxwdp(void)
723 __asm__ __volatile__ ("xvcvuxwdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
726 static void test_xvcvsxwsp(void)
728 __asm__ __volatile__ ("xvcvsxwsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
731 static void test_xvcvuxwsp(void)
733 __asm__ __volatile__ ("xvcvuxwsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
736 static void test_xsrdpic(void)
738 __asm__ __volatile__ ("xsrdpic %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
741 static void test_xsrdpiz(void)
743 __asm__ __volatile__ ("xsrdpiz %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
746 static void test_xsrdpi(void)
748 __asm__ __volatile__ ("xsrdpi %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
751 static void test_xvabsdp(void)
753 __asm__ __volatile__ ("xvabsdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
756 static void test_xvnabsdp(void)
758 __asm__ __volatile__ ("xvnabsdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
761 static void test_xvnegdp(void)
763 __asm__ __volatile__ ("xvnegdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
766 static void test_xvnegsp(void)
768 __asm__ __volatile__ ("xvnegsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
771 static void test_xvabssp(void)
773 __asm__ __volatile__ ("xvabssp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
776 static void test_xvnabssp(void)
778 __asm__ __volatile__ ("xvnabssp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
781 static void test_xvrdpi(void)
783 __asm__ __volatile__ ("xvrdpi %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
786 static void test_xvrdpic(void)
788 __asm__ __volatile__ ("xvrdpic %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
791 static void test_xvrdpim(void)
793 __asm__ __volatile__ ("xvrdpim %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
796 static void test_xvrdpip(void)
798 __asm__ __volatile__ ("xvrdpip %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
801 static void test_xvrdpiz(void)
803 __asm__ __volatile__ ("xvrdpiz %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
806 static void test_xvrspi(void)
808 __asm__ __volatile__ ("xvrspi %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
811 static void test_xvrspic(void)
813 __asm__ __volatile__ ("xvrspic %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
816 static void test_xvrspim(void)
818 __asm__ __volatile__ ("xvrspim %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
821 static void test_xvrspip(void)
823 __asm__ __volatile__ ("xvrspip %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
826 static void test_xvrspiz(void)
828 __asm__ __volatile__ ("xvrspiz %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
831 static vx_fp_test_t
832 vsx_one_fp_arg_tests[] = {
833 { &test_xvredp, "xvredp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x"},
834 { &test_xsredp, "xsredp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x"},
835 { &test_xvrsqrtedp, "xvrsqrtedp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x-sqrt"},
836 { &test_xsrsqrtedp, "xsrsqrtedp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x-sqrt"},
837 { &test_xvrsqrtesp, "xvrsqrtesp", NULL, 18, SINGLE_TEST, VX_ESTIMATE, "1/x-sqrt"},
838 { &test_xvsqrtdp, "xvsqrtdp", NULL, 18, DOUBLE_TEST, VX_DEFAULT, "sqrt"},
839 { &test_xvsqrtsp, "xvsqrtsp", NULL, 18, SINGLE_TEST, VX_DEFAULT, "sqrt"},
840 { &test_xscvdpsp, "xscvdpsp", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"},
841 { &test_xscvdpuxws, "xscvdpuxws", NULL, 20, DOUBLE_TEST, VX_SCALAR_CONV_TO_WORD, "conv"},
842 { &test_xscvspdp, "xscvspdp", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
843 { &test_xvcvdpsp, "xvcvdpsp", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"},
844 { &test_xvcvdpuxds, "xvcvdpuxds", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
845 { &test_xvcvdpuxws, "xvcvdpuxws", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"},
846 { &test_xvcvspdp, "xvcvspdp", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
847 { &test_xvcvspsxds, "xvcvspsxds", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
848 { &test_xvcvdpsxds, "xvcvdpsxds", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
849 { &test_xvcvspuxds, "xvcvspuxds", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
850 { &test_xvcvspuxws, "xvcvspuxws", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "conv"},
851 { &test_xsrdpic, "xsrdpic", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
852 { &test_xsrdpiz, "xsrdpiz", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
853 { &test_xsrdpi, "xsrdpi", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
854 { &test_xvabsdp, "xvabsdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "abs"},
855 { &test_xvnabsdp, "xvnabsdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "nabs"},
856 { &test_xvnegsp, "xvnegsp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "neg"},
857 { &test_xvnegdp, "xvnegdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "neg"},
858 { &test_xvabssp, "xvabssp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "abs"},
859 { &test_xvnabssp, "xvnabssp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "nabs"},
860 { &test_xvrdpi, "xvrdpi", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
861 { &test_xvrdpic, "xvrdpic", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
862 { &test_xvrdpim, "xvrdpim", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
863 { &test_xvrdpip, "xvrdpip", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
864 { &test_xvrdpiz, "xvrdpiz", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
865 { &test_xvrspi, "xvrspi", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
866 { &test_xvrspic, "xvrspic", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
867 { &test_xvrspim, "xvrspim", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
868 { &test_xvrspip, "xvrspip", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
869 { &test_xvrspiz, "xvrspiz", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
870 { NULL, NULL, NULL, 0, 0, 0, NULL}
873 static vx_fp_test_t
874 vx_tdivORtsqrt_tests[] = {
875 { &test_xstsqrtdp, "xstsqrtdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "test-sqrt"},
876 { &test_xvtsqrtdp, "xvtsqrtdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "test-sqrt"},
877 { &test_xvtsqrtsp, "xvtsqrtsp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "test-sqrt"},
878 { &test_xvtdivdp, "xvtdivdp", two_arg_fp_tests, 68, DOUBLE_TEST, VX_DEFAULT, "test-div"},
879 { &test_xvtdivsp, "xvtdivsp", two_arg_fp_tests, 68, SINGLE_TEST, VX_DEFAULT, "test-div"},
880 { NULL, NULL, NULL, 0 , 0, 0, NULL}
883 static unsigned long long doubleWord[] = { 0,
884 0xffffffff00000000LL,
885 0x00000000ffffffffLL,
886 0xffffffffffffffffLL,
887 0x89abcde123456789LL,
888 0x0102030405060708LL,
889 0x00000000a0b1c2d3LL,
890 0x1111222233334444LL
893 static unsigned int singleWord[] = {0,
894 0xffff0000,
895 0x0000ffff,
896 0xffffffff,
897 0x89a73522,
898 0x01020304,
899 0x0000abcd,
900 0x11223344
903 typedef struct vx_intToFp_test
905 test_func_t test_func;
906 const char * name;
907 void * targs;
908 int num_tests;
909 precision_type_t precision;
910 vx_fp_test_type type;
911 } vx_intToFp_test_t;
913 static vx_intToFp_test_t
914 intToFp_tests[] = {
915 { test_xvcvsxddp, "xvcvsxddp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_DOUBLE },
916 { test_xvcvuxddp, "xvcvuxddp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_DOUBLE },
917 { test_xvcvsxdsp, "xvcvsxdsp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_SINGLE },
918 { test_xvcvuxdsp, "xvcvuxdsp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_SINGLE },
919 { test_xvcvsxwdp, "xvcvsxwdp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_DOUBLE },
920 { test_xvcvuxwdp, "xvcvuxwdp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_DOUBLE },
921 { test_xvcvsxwsp, "xvcvsxwsp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_SINGLE },
922 { test_xvcvuxwsp, "xvcvuxwsp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_SINGLE },
923 { NULL, NULL, NULL, 0, 0 }
926 static Bool do_OE;
927 typedef enum {
928 DIV_BASE = 1,
929 DIV_OE = 2,
930 DIV_DOT = 4,
931 } div_type_t;
932 /* Possible divde type combinations are:
933 * - base
934 * - base+dot
935 * - base+OE
936 * - base+OE+dot
938 #ifdef __powerpc64__
939 static void test_divdeu(void)
941 int divdeu_type = DIV_BASE;
942 if (do_OE)
943 divdeu_type |= DIV_OE;
944 if (do_dot)
945 divdeu_type |= DIV_DOT;
947 switch (divdeu_type) {
948 case 1:
949 SET_CR_XER_ZERO;
950 __asm__ __volatile__ ("divdeu %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
951 GET_CR_XER(div_flags, div_xer);
952 break;
953 case 3:
954 SET_CR_XER_ZERO;
955 __asm__ __volatile__ ("divdeuo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
956 GET_CR_XER(div_flags, div_xer);
957 break;
958 case 5:
959 SET_CR_XER_ZERO;
960 __asm__ __volatile__ ("divdeu. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
961 GET_CR_XER(div_flags, div_xer);
962 break;
963 case 7:
964 SET_CR_XER_ZERO;
965 __asm__ __volatile__ ("divdeuo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
966 GET_CR_XER(div_flags, div_xer);
967 break;
968 default:
969 fprintf(stderr, "Invalid divdeu type. Exiting\n");
970 exit(1);
973 #endif
975 static void test_divwe(void)
977 int divwe_type = DIV_BASE;
978 if (do_OE)
979 divwe_type |= DIV_OE;
980 if (do_dot)
981 divwe_type |= DIV_DOT;
983 switch (divwe_type) {
984 case 1:
985 SET_CR_XER_ZERO;
986 __asm__ __volatile__ ("divwe %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
987 GET_CR_XER(div_flags, div_xer);
988 break;
989 case 3:
990 SET_CR_XER_ZERO;
991 __asm__ __volatile__ ("divweo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
992 GET_CR_XER(div_flags, div_xer);
993 break;
994 case 5:
995 SET_CR_XER_ZERO;
996 __asm__ __volatile__ ("divwe. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
997 GET_CR_XER(div_flags, div_xer);
998 break;
999 case 7:
1000 SET_CR_XER_ZERO;
1001 __asm__ __volatile__ ("divweo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
1002 GET_CR_XER(div_flags, div_xer);
1003 break;
1004 default:
1005 fprintf(stderr, "Invalid divweu type. Exiting\n");
1006 exit(1);
1011 typedef struct simple_test {
1012 test_func_t test_func;
1013 char * name;
1014 precision_type_t precision;
1015 } simple_test_t;
1018 static void setup_sp_fp_args(fp_test_args_t * targs, Bool swap_inputs)
1020 int a_idx, b_idx, i;
1021 void * inA, * inB;
1022 void * vec_src = swap_inputs ? &vec_out : &vec_inB;
1024 for (i = 0; i < 4; i++) {
1025 a_idx = targs->fra_idx;
1026 b_idx = targs->frb_idx;
1027 inA = (void *)&spec_sp_fargs[a_idx];
1028 inB = (void *)&spec_sp_fargs[b_idx];
1029 // copy single precision FP into vector element i
1030 memcpy(((void *)&vec_inA) + (i * 4), inA, 4);
1031 memcpy(vec_src + (i * 4), inB, 4);
1032 targs++;
1036 static void setup_dp_fp_args(fp_test_args_t * targs, Bool swap_inputs)
1038 int a_idx, b_idx, i;
1039 void * inA, * inB;
1040 void * vec_src = swap_inputs ? (void *)&vec_out : (void *)&vec_inB;
1042 for (i = 0; i < 2; i++) {
1043 a_idx = targs->fra_idx;
1044 b_idx = targs->frb_idx;
1045 inA = (void *)&spec_fargs[a_idx];
1046 inB = (void *)&spec_fargs[b_idx];
1047 // copy double precision FP into vector element i
1048 memcpy(((void *)&vec_inA) + (i * 8), inA, 8);
1049 memcpy(vec_src + (i * 8), inB, 8);
1050 targs++;
1054 #define VX_NOT_CMP_OP 0xffffffff
1055 static void print_vector_fp_result(unsigned int cc, vx_fp_test_t * test_group, int i, Bool print_vec_out)
1057 int a_idx, b_idx, k;
1058 char * name = malloc(20);
1059 int dp = test_group->precision == DOUBLE_TEST ? 1 : 0;
1060 int loops = dp ? 2 : 4;
1061 fp_test_args_t * targs = &test_group->targs[i];
1062 unsigned long long * frA_dp, * frB_dp, * dst_dp;
1063 unsigned int * frA_sp, *frB_sp, * dst_sp;
1064 strcpy(name, test_group->name);
1065 printf("#%d: %s%s ", dp? i/2 : i/4, name, (do_dot ? "." : ""));
1066 for (k = 0; k < loops; k++) {
1067 a_idx = targs->fra_idx;
1068 b_idx = targs->frb_idx;
1069 if (k)
1070 printf(" AND ");
1071 if (dp) {
1072 frA_dp = (unsigned long long *)&spec_fargs[a_idx];
1073 frB_dp = (unsigned long long *)&spec_fargs[b_idx];
1074 printf("%016llx %s %016llx", *frA_dp, test_group->op, *frB_dp);
1075 } else {
1076 frA_sp = (unsigned int *)&spec_sp_fargs[a_idx];
1077 frB_sp = (unsigned int *)&spec_sp_fargs[b_idx];
1078 printf("%08x %s %08x", *frA_sp, test_group->op, *frB_sp);
1080 targs++;
1082 if (cc != VX_NOT_CMP_OP)
1083 printf(" ? cc=%x", cc);
1085 if (print_vec_out) {
1086 if (dp) {
1087 dst_dp = (unsigned long long *) &vec_out;
1088 printf(" => %016llx %016llx\n", dst_dp[0], dst_dp[1]);
1089 } else {
1090 dst_sp = (unsigned int *) &vec_out;
1091 printf(" => %08x %08x %08x %08x\n", dst_sp[0], dst_sp[1], dst_sp[2], dst_sp[3]);
1093 } else {
1094 printf("\n");
1096 free(name);
1101 static void test_vsx_one_fp_arg(void)
1103 test_func_t func;
1104 int k;
1105 k = 0;
1106 build_special_fargs_table();
1108 while ((func = vsx_one_fp_arg_tests[k].test_func)) {
1109 int idx, i;
1110 vx_fp_test_t test_group = vsx_one_fp_arg_tests[k];
1111 Bool estimate = (test_group.type == VX_ESTIMATE);
1112 Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
1113 Bool is_sqrt = (strstr(test_group.name, "sqrt")) ? True : False;
1114 Bool is_scalar = (strstr(test_group.name, "xs")) ? True : False;
1115 Bool sparse_sp = False;
1116 int stride = dp ? 2 : 4;
1117 int loops = is_scalar ? 1 : stride;
1118 stride = is_scalar ? 1: stride;
1120 /* For conversions of single to double, the 128-bit input register is sparsely populated:
1121 * |___ SP___|_Unused_|___SP___|__Unused__| // for vector op
1122 * or
1123 * |___ SP___|_Unused_|_Unused_|__Unused__| // for scalar op
1125 * For the vector op case, we need to adjust stride from '4' to '2', since
1126 * we'll only be loading two values per loop into the input register.
1128 if (!dp && !is_scalar && test_group.type == VX_CONV_TO_DOUBLE) {
1129 sparse_sp = True;
1130 stride = 2;
1133 for (i = 0; i < test_group.num_tests; i+=stride) {
1134 unsigned int * pv;
1135 void * inB, * vecB_void_ptr = (void *)&vec_inB;
1137 pv = (unsigned int *)&vec_out;
1138 // clear vec_out
1139 for (idx = 0; idx < 4; idx++, pv++)
1140 *pv = 0;
1142 if (dp) {
1143 int j;
1144 unsigned long long * frB_dp, *dst_dp;
1145 for (j = 0; j < loops; j++) {
1146 inB = (void *)&spec_fargs[i + j];
1147 // copy double precision FP into vector element i
1148 if (isLE && is_scalar)
1149 vecB_void_ptr += 8;
1150 memcpy(vecB_void_ptr + (j * 8), inB, 8);
1152 // execute test insn
1153 (*func)();
1154 dst_dp = (unsigned long long *) &vec_out;
1155 if (isLE && is_scalar)
1156 dst_dp++;
1157 printf("#%d: %s ", i/stride, test_group.name);
1158 for (j = 0; j < loops; j++) {
1159 if (j)
1160 printf("; ");
1161 frB_dp = (unsigned long long *)&spec_fargs[i + j];
1162 printf("%s(%016llx)", test_group.op, *frB_dp);
1163 if (estimate) {
1164 Bool res = check_estimate(DOUBLE_TEST, is_sqrt, i + j, (isLE && is_scalar) ? 1: j);
1165 printf(" ==> %s)", res ? "PASS" : "FAIL");
1166 /* For debugging . . .
1167 printf(" ==> %s (res=%016llx)", res ? "PASS" : "FAIL", dst_dp[j]);
1169 } else {
1170 vx_fp_test_type type = test_group.type;
1171 switch (type) {
1172 case VX_SCALAR_CONV_TO_WORD:
1173 printf(" = %016llx", dst_dp[j] & 0x00000000ffffffffULL);
1174 break;
1175 case VX_CONV_TO_SINGLE:
1176 printf(" = %016llx", dst_dp[j] & 0xffffffff00000000ULL);
1177 break;
1178 default: // For VX_CONV_TO_DOUBLE and non-convert instructions . . .
1179 printf(" = %016llx", dst_dp[j]);
1183 printf("\n");
1184 } else {
1185 int j;
1186 unsigned int * frB_sp, * dst_sp = NULL;
1187 unsigned long long * dst_dp = NULL;
1188 if (sparse_sp)
1189 loops = 2;
1190 for (j = 0; j < loops; j++) {
1191 inB = (void *)&spec_sp_fargs[i + j];
1192 // copy single precision FP into vector element i
1193 if (sparse_sp) {
1194 if (isLE)
1195 memcpy(vecB_void_ptr + ((2 * j * 4) + 4), inB, 4);
1196 else
1197 memcpy(vecB_void_ptr + ((2 * j * 4) ), inB, 4);
1198 } else {
1199 if (isLE && is_scalar)
1200 vecB_void_ptr += 12;
1201 memcpy(vecB_void_ptr + (j * 4), inB, 4);
1204 // execute test insn
1205 (*func)();
1206 if (test_group.type == VX_CONV_TO_DOUBLE) {
1207 dst_dp = (unsigned long long *) &vec_out;
1208 if (isLE && is_scalar)
1209 dst_dp++;
1210 } else {
1211 dst_sp = (unsigned int *) &vec_out;
1212 if (isLE && is_scalar)
1213 dst_sp += 3;
1215 // print result
1216 printf("#%d: %s ", i/stride, test_group.name);
1217 for (j = 0; j < loops; j++) {
1218 if (j)
1219 printf("; ");
1220 frB_sp = (unsigned int *)&spec_sp_fargs[i + j];
1221 printf("%s(%08x)", test_group.op, *frB_sp);
1222 if (estimate) {
1223 Bool res = check_estimate(SINGLE_TEST, is_sqrt, i + j, (isLE && is_scalar) ? 3 : j);
1224 printf(" ==> %s)", res ? "PASS" : "FAIL");
1225 } else {
1226 if (test_group.type == VX_CONV_TO_DOUBLE)
1227 printf(" = %016llx", dst_dp[j]);
1228 else
1229 /* Special case: Current VEX implementation for fsqrts (single precision)
1230 * uses the same implementation as that used for double precision fsqrt.
1231 * However, I've found that for xvsqrtsp, the result from that implementation
1232 * may be off by the two LSBs. Generally, even this small inaccuracy can cause the
1233 * output to appear very different if you end up with a carry. But for the given
1234 * inputs in this testcase, we can simply mask out these bits.
1236 printf(" = %08x", is_sqrt ? (dst_sp[j] & 0xfffffffc) : dst_sp[j]);
1239 printf("\n");
1242 k++;
1243 printf( "\n" );
1247 static void test_int_to_fp_convert(void)
1249 test_func_t func;
1250 int k;
1251 k = 0;
1253 while ((func = intToFp_tests[k].test_func)) {
1254 int idx, i;
1255 vx_intToFp_test_t test_group = intToFp_tests[k];
1256 Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
1257 Bool sparse_sp = False;
1258 int stride = dp ? 2 : 4;
1259 int loops = stride;
1261 /* For conversions of single to double, the 128-bit input register is sparsely populated:
1262 * |___ int___|_Unused_|___int___|__Unused__| // for vector op
1263 * or
1264 * We need to adjust stride from '4' to '2', since we'll only be loading
1265 * two values per loop into the input register.
1267 if (!dp && test_group.type == VX_CONV_TO_DOUBLE) {
1268 sparse_sp = True;
1269 stride = 2;
1272 for (i = 0; i < test_group.num_tests; i+=stride) {
1273 unsigned int * pv;
1274 void * inB;
1276 pv = (unsigned int *)&vec_out;
1277 // clear vec_out
1278 for (idx = 0; idx < 4; idx++, pv++)
1279 *pv = 0;
1281 if (dp) {
1282 int j;
1283 unsigned long long *dst_dw, * targs = test_group.targs;
1284 for (j = 0; j < loops; j++) {
1285 inB = (void *)&targs[i + j];
1286 // copy doubleword into vector element i
1287 memcpy(((void *)&vec_inB) + (j * 8), inB, 8);
1289 // execute test insn
1290 (*func)();
1291 dst_dw = (unsigned long long *) &vec_out;
1292 printf("#%d: %s ", i/stride, test_group.name);
1293 for (j = 0; j < loops; j++) {
1294 if (j)
1295 printf("; ");
1296 printf("conv(%016llx)", targs[i + j]);
1298 if (test_group.type == VX_CONV_TO_SINGLE)
1299 printf(" = %016llx", dst_dw[j] & 0xffffffff00000000ULL);
1300 else
1301 printf(" = %016llx", dst_dw[j]);
1303 printf("\n");
1304 } else {
1305 int j;
1306 unsigned int * dst_sp = NULL;
1307 unsigned int * targs = test_group.targs;
1308 unsigned long long * dst_dp = NULL;
1309 void * vecB_void_ptr = (void *)&vec_inB;
1310 if (sparse_sp)
1311 loops = 2;
1312 for (j = 0; j < loops; j++) {
1313 inB = (void *)&targs[i + j];
1314 // copy single word into vector element i
1315 if (sparse_sp) {
1316 if (isLE)
1317 memcpy(vecB_void_ptr + ((2 * j * 4) + 4), inB, 4);
1318 else
1319 memcpy(vecB_void_ptr + ((2 * j * 4) ), inB, 4);
1320 } else {
1321 memcpy(vecB_void_ptr + (j * 4), inB, 4);
1324 // execute test insn
1325 (*func)();
1326 if (test_group.type == VX_CONV_TO_DOUBLE)
1327 dst_dp = (unsigned long long *) &vec_out;
1328 else
1329 dst_sp = (unsigned int *) &vec_out;
1330 // print result
1331 printf("#%d: %s ", i/stride, test_group.name);
1332 for (j = 0; j < loops; j++) {
1333 if (j)
1334 printf("; ");
1335 printf("conv(%08x)", targs[i + j]);
1336 if (test_group.type == VX_CONV_TO_DOUBLE)
1337 printf(" = %016llx", dst_dp[j]);
1338 else
1339 printf(" = %08x", dst_sp[j]);
1341 printf("\n");
1344 k++;
1345 printf( "\n" );
1351 // The div doubleword test data
1352 signed long long div_dw_tdata[13][2] = {
1353 { 4, -4 },
1354 { 4, -3 },
1355 { 4, 4 },
1356 { 4, -5 },
1357 { 3, 8 },
1358 { 0x8000000000000000ULL, 0xa },
1359 { 0x50c, -1 },
1360 { 0x50c, -4096 },
1361 { 0x1234fedc, 0x8000a873 },
1362 { 0xabcd87651234fedcULL, 0xa123b893 },
1363 { 0x123456789abdcULL, 0 },
1364 { 0, 2 },
1365 { 0x77, 0xa3499 }
1367 #define dw_tdata_len (sizeof(div_dw_tdata)/sizeof(signed long long)/2)
1369 // The div word test data
1370 unsigned int div_w_tdata[6][2] = {
1371 { 0, 2 },
1372 { 2, 0 },
1373 { 0x7abc1234, 0xf0000000 },
1374 { 0xfabc1234, 5 },
1375 { 77, 66 },
1376 { 5, 0xfabc1234 },
1378 #define w_tdata_len (sizeof(div_w_tdata)/sizeof(unsigned int)/2)
1380 typedef struct div_ext_test
1382 test_func_t test_func;
1383 const char *name;
1384 int num_tests;
1385 div_type_t div_type;
1386 precision_type_t precision;
1387 } div_ext_test_t;
1389 static div_ext_test_t div_tests[] = {
1390 #ifdef __powerpc64__
1391 { &test_divdeu, "divdeu", dw_tdata_len, DIV_BASE, DOUBLE_TEST },
1392 { &test_divdeu, "divdeuo", dw_tdata_len, DIV_OE, DOUBLE_TEST },
1393 #endif
1394 { &test_divwe, "divwe", w_tdata_len, DIV_BASE, SINGLE_TEST },
1395 { &test_divwe, "divweo", w_tdata_len, DIV_OE, SINGLE_TEST },
1396 { NULL, NULL, 0, 0, 0 }
1399 static void test_div_extensions(void)
1401 test_func_t func;
1402 int k;
1403 k = 0;
1405 while ((func = div_tests[k].test_func)) {
1406 int i, repeat = 1;
1407 div_ext_test_t test_group = div_tests[k];
1408 do_dot = False;
1410 again:
1411 for (i = 0; i < test_group.num_tests; i++) {
1412 unsigned int condreg;
1414 if (test_group.div_type == DIV_OE)
1415 do_OE = True;
1416 else
1417 do_OE = False;
1419 if (test_group.precision == DOUBLE_TEST) {
1420 r14 = div_dw_tdata[i][0];
1421 r15 = div_dw_tdata[i][1];
1422 } else {
1423 r14 = div_w_tdata[i][0];
1424 r15 = div_w_tdata[i][1];
1426 // execute test insn
1427 (*func)();
1428 condreg = (div_flags & 0xf0000000) >> 28;
1429 printf("#%d: %s%s: ", i, test_group.name, do_dot ? "." : "");
1430 if (test_group.precision == DOUBLE_TEST) {
1431 printf("0x%016llx0000000000000000 / 0x%016llx = 0x%016llx;",
1432 div_dw_tdata[i][0], div_dw_tdata[i][1], (signed long long) r17);
1433 } else {
1434 printf("0x%08x00000000 / 0x%08x = 0x%08x;",
1435 div_w_tdata[i][0], div_w_tdata[i][1], (unsigned int) r17);
1437 printf(" CR=%x; XER=%x\n", condreg, div_xer);
1439 printf("\n");
1440 if (repeat) {
1441 repeat = 0;
1442 do_dot = True;
1443 goto again;
1445 k++;
1446 printf( "\n" );
1451 static void test_vx_tdivORtsqrt(void)
1453 test_func_t func;
1454 int k, crx;
1455 unsigned int flags;
1456 k = 0;
1457 do_dot = False;
1458 build_special_fargs_table();
1460 while ((func = vx_tdivORtsqrt_tests[k].test_func)) {
1461 int idx, i;
1462 vx_fp_test_t test_group = vx_tdivORtsqrt_tests[k];
1463 Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
1464 Bool is_scalar = (strstr(test_group.name, "xs")) ? True : False;
1465 Bool two_args = test_group.targs ? True : False;
1466 int stride = dp ? 2 : 4;
1467 int loops = is_scalar ? 1 : stride;
1468 stride = is_scalar ? 1: stride;
1470 for (i = 0; i < test_group.num_tests; i+=stride) {
1471 unsigned int * pv;
1472 void * inB, * vecB_void_ptr = (void *)&vec_inB;
1474 pv = (unsigned int *)&vec_out;
1475 // clear vec_out
1476 for (idx = 0; idx < 4; idx++, pv++)
1477 *pv = 0;
1479 if (dp) {
1480 int j;
1481 unsigned long long * frB_dp;
1482 if (two_args) {
1483 setup_dp_fp_args(&test_group.targs[i], False);
1484 } else {
1485 for (j = 0; j < loops; j++) {
1486 inB = (void *)&spec_fargs[i + j];
1487 // copy double precision FP into vector element i
1488 if (isLE && is_scalar)
1489 vecB_void_ptr += 8;
1490 memcpy(vecB_void_ptr + (j * 8), inB, 8);
1493 // execute test insn
1494 // Must do set/get of CRs immediately before/after calling the asm func
1495 // to avoid CRs being modified by other instructions.
1496 SET_FPSCR_ZERO;
1497 SET_CR_XER_ZERO;
1498 (*func)();
1499 GET_CR(flags);
1500 // assumes using CR1
1501 crx = (flags & 0x0f000000) >> 24;
1502 if (two_args) {
1503 print_vector_fp_result(crx, &test_group, i, False/*do not print vec_out*/);
1504 } else {
1505 printf("#%d: %s ", i/stride, test_group.name);
1506 for (j = 0; j < loops; j++) {
1507 if (j)
1508 printf("; ");
1509 frB_dp = (unsigned long long *)&spec_fargs[i + j];
1510 printf("%s(%016llx)", test_group.op, *frB_dp);
1512 printf( " ? %x (CRx)\n", crx);
1514 } else {
1515 int j;
1516 unsigned int * frB_sp;
1517 if (two_args) {
1518 setup_sp_fp_args(&test_group.targs[i], False);
1519 } else {
1520 for (j = 0; j < loops; j++) {
1521 inB = (void *)&spec_sp_fargs[i + j];
1522 // copy single precision FP into vector element i
1523 memcpy(((void *)&vec_inB) + (j * 4), inB, 4);
1526 // execute test insn
1527 SET_FPSCR_ZERO;
1528 SET_CR_XER_ZERO;
1529 (*func)();
1530 GET_CR(flags);
1531 crx = (flags & 0x0f000000) >> 24;
1532 // print result
1533 if (two_args) {
1534 print_vector_fp_result(crx, &test_group, i, False/*do not print vec_out*/);
1535 } else {
1536 printf("#%d: %s ", i/stride, test_group.name);
1537 for (j = 0; j < loops; j++) {
1538 if (j)
1539 printf("; ");
1540 frB_sp = (unsigned int *)&spec_sp_fargs[i + j];
1541 printf("%s(%08x)", test_group.op, *frB_sp);
1543 printf( " ? %x (CRx)\n", crx);
1547 k++;
1548 printf( "\n" );
1553 static void test_ftsqrt(void)
1555 int i, crx;
1556 unsigned int flags;
1557 unsigned long long * frbp;
1558 build_special_fargs_table();
1561 for (i = 0; i < nb_special_fargs; i++) {
1562 f14 = spec_fargs[i];
1563 frbp = (unsigned long long *)&spec_fargs[i];
1564 SET_FPSCR_ZERO;
1565 SET_CR_XER_ZERO;
1566 __asm__ __volatile__ ("ftsqrt cr1, %0" : : "d" (f14));
1567 GET_CR(flags);
1568 crx = (flags & 0x0f000000) >> 24;
1569 printf( "ftsqrt: %016llx ? %x (CRx)\n", *frbp, crx);
1571 printf( "\n" );
1574 static void
1575 test_popcntw(void)
1577 #ifdef __powerpc64__
1578 uint64_t res;
1579 unsigned long long src = 0x9182736405504536ULL;
1580 r14 = src;
1581 __asm__ __volatile__ ("popcntw %0, %1" : "=r" (res): "r" (r14));
1582 printf("popcntw: 0x%llx => 0x%016llx\n", (unsigned long long)src, (unsigned long long)res);
1583 #else
1584 uint32_t res;
1585 unsigned int src = 0x9182730E;
1586 r14 = src;
1587 __asm__ __volatile__ ("popcntw %0, %1" : "=r" (res): "r" (r14));
1588 printf("popcntw: 0x%x => 0x%08x\n", src, (int)res);
1589 #endif
1590 printf( "\n" );
1594 static test_table_t
1595 all_tests[] =
1598 { &test_vsx_one_fp_arg,
1599 "Test VSX vector and scalar single argument instructions", OTHER_INST } ,
1600 { &test_int_to_fp_convert,
1601 "Test VSX vector integer to float conversion instructions", OTHER_INST },
1602 { &test_div_extensions,
1603 "Test div extensions", SCALAR_DIV_INST },
1604 { &test_ftsqrt,
1605 "Test ftsqrt instruction", OTHER_INST },
1606 { &test_vx_tdivORtsqrt,
1607 "Test vector and scalar tdiv and tsqrt instructions", OTHER_INST },
1608 { &test_popcntw,
1609 "Test popcntw instruction", OTHER_INST },
1610 { NULL, NULL }
1612 #endif // HAS_VSX
1614 static void usage (void)
1616 fprintf(stderr,
1617 "Usage: test_isa_3_0 [OPTIONS]\n"
1618 "\t-d: test scalar division instructions (default)\n"
1619 "\t-o: test non scalar division instructions (default)\n"
1620 "\t-A: test all instructions (default)\n"
1621 "\t-h: display this help and exit\n"
1625 int main(int argc, char **argv)
1627 #ifdef HAS_VSX
1629 test_table_t aTest;
1630 test_func_t func;
1631 int c;
1632 int i = 0;
1633 unsigned int test_run_mask = 0;
1635 /* NOTE, ISA 3.0 introduces the OV32 and CA32 bits in the FPSCR. These
1636 * bits are set on various arithimetic instructions. This means this
1637 * test generates different FPSCR output for pre ISA 3.0 versus ISA 3.0
1638 * hardware. The tests have been grouped so that the tests that generate
1639 * different results are in one test and the rest are in a different test.
1640 * this minimizes the size of the result expect files for the two cases.
1643 while ((c = getopt(argc, argv, "doAh")) != -1) {
1644 switch (c) {
1645 case 'd':
1646 test_run_mask |= SCALAR_DIV_INST;
1647 break;
1648 case 'o':
1649 test_run_mask |= OTHER_INST;
1650 break;
1651 case 'A':
1652 test_run_mask = 0xFFFF;
1653 break;
1654 case 'h':
1655 usage();
1656 return 0;
1658 default:
1659 usage();
1660 fprintf(stderr, "Unknown argument: '%c'\n", c);
1661 return 1;
1665 while ((func = all_tests[i].test_category)) {
1666 aTest = all_tests[i];
1668 if(test_run_mask & aTest.test_group) {
1669 /* Test group specified on command line */
1671 printf( "%s\n", aTest.name );
1672 (*func)();
1674 i++;
1676 if (spec_fargs)
1677 free(spec_fargs);
1678 if (spec_sp_fargs)
1679 free(spec_sp_fargs);
1681 #endif // HAS _VSX
1683 return 0;