none/tests/ppc32/test_isa_2_06_part3.c

   1 /*  Copyright (C) 2011 IBM
   2
   3  Author: Maynard Johnson <maynardj@us.ibm.com>
   4
   5  This program is free software; you can redistribute it and/or
   6  modify it under the terms of the GNU General Public License as
   7  published by the Free Software Foundation; either version 2 of the
   8  License, or (at your option) any later version.
   9
  10  This program is distributed in the hope that it will be useful, but
  11  WITHOUT ANY WARRANTY; without even the implied warranty of
  12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  General Public License for more details.
  14
  15  You should have received a copy of the GNU General Public License
  16  along with this program; if not, see <http://www.gnu.org/licenses/>.
  17
  18  The GNU General Public License is contained in the file COPYING.
  19  */
  20
  21 #include <stdio.h>
  22 #include <stdint.h>
  23 #include <stdlib.h>
  24 #include <string.h>
  25 #include <malloc.h>
  26 #include <math.h>
  27 #include <unistd.h>    // getopt
  28
  29 #ifdef HAS_VSX
  30
  31 #include <altivec.h>
  32
  33 #ifndef __powerpc64__
  34 typedef uint32_t HWord_t;
  35 #else
  36 typedef uint64_t HWord_t;
  37 #endif /* __powerpc64__ */
  38
  39 #ifdef VGP_ppc64le_linux
  40 #define isLE 1
  41 #else
  42 #define isLE 0
  43 #endif
  44
  45 typedef unsigned char Bool;
  46 #define True 1
  47 #define False 0
  48 register HWord_t r14 __asm__ ("r14");
  49 register HWord_t r15 __asm__ ("r15");
  50 register HWord_t r16 __asm__ ("r16");
  51 register HWord_t r17 __asm__ ("r17");
  52 register double f14 __asm__ ("fr14");
  53 register double f15 __asm__ ("fr15");
  54 register double f16 __asm__ ("fr16");
  55 register double f17 __asm__ ("fr17");
  56
  57 static volatile unsigned int div_flags, div_xer;
  58
  59 #define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"
  60
  61 #define SET_CR(_arg) \
  62       __asm__ __volatile__ ("mtcr  %0" : : "b"(_arg) : ALLCR );
  63
  64 #define SET_XER(_arg) \
  65       __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );
  66
  67 #define GET_CR(_lval) \
  68       __asm__ __volatile__ ("mfcr %0"  : "=b"(_lval) )
  69
  70 #define GET_XER(_lval) \
  71       __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )
  72
  73 #define GET_CR_XER(_lval_cr,_lval_xer) \
  74    do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)
  75
  76 #define SET_CR_ZERO \
  77       SET_CR(0)
  78
  79 #define SET_XER_ZERO \
  80       SET_XER(0)
  81
  82 #define SET_CR_XER_ZERO \
  83    do { SET_CR_ZERO; SET_XER_ZERO; } while (0)
  84
  85 #define SET_FPSCR_ZERO \
  86    do { double _d = 0.0; \
  87         __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
  88    } while (0)
  89
  90
  91 typedef void (*test_func_t)(void);
  92 typedef struct test_table test_table_t;
  93
  94 /* Defines for the instructiion groups, use bit field to identify */
  95 #define SCALAR_DIV_INST    0x0001
  96 #define OTHER_INST  0x0002
  97
  98 /* These functions below that construct a table of floating point
  99  * values were lifted from none/tests/ppc32/jm-insns.c.
 100  */
 101
 102 #if defined (DEBUG_ARGS_BUILD)
 103 #define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
 104 #else
 105 #define AB_DPRINTF(fmt, args...) do { } while (0)
 106 #endif
 107
 108 static inline void register_farg (void *farg,
 109                                   int s, uint16_t _exp, uint64_t mant)
 110 {
 111    uint64_t tmp;
 112
 113    tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant;
 114    *(uint64_t *)farg = tmp;
 115    AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
 116               s, _exp, mant, *(uint64_t *)farg, *(double *)farg);
 117 }
 118
 119 static inline void register_sp_farg (void *farg,
 120                                      int s, uint16_t _exp, uint32_t mant)
 121 {
 122    uint32_t tmp;
 123    tmp = ((uint32_t)s << 31) | ((uint32_t)_exp << 23) | mant;
 124    *(uint32_t *)farg = tmp;
 125 }
 126
 127
 128 typedef struct fp_test_args {
 129    int fra_idx;
 130    int frb_idx;
 131 } fp_test_args_t;
 132
 133
 134 fp_test_args_t two_arg_fp_tests[] = {
 135                                      {8, 8},
 136                                      {8, 14},
 137                                      {15, 16},
 138                                      {8, 5},
 139                                      {8, 4},
 140                                      {8, 7},
 141                                      {8, 9},
 142                                      {8, 11},
 143                                      {14, 8},
 144                                      {14, 14},
 145                                      {14, 6},
 146                                      {14, 5},
 147                                      {14, 4},
 148                                      {14, 7},
 149                                      {14, 9},
 150                                      {14, 11},
 151                                      {6, 8},
 152                                      {6, 14},
 153                                      {6, 6},
 154                                      {6, 5},
 155                                      {6, 4},
 156                                      {6, 7},
 157                                      {6, 9},
 158                                      {6, 11},
 159                                      {5, 8},
 160                                      {5, 14},
 161                                      {5, 6},
 162                                      {5, 5},
 163                                      {5, 4},
 164                                      {5, 7},
 165                                      {5, 9},
 166                                      {5, 11},
 167                                      {4, 8},
 168                                      {4, 14},
 169                                      {4, 6},
 170                                      {4, 5},
 171                                      {4, 1},
 172                                      {4, 7},
 173                                      {4, 9},
 174                                      {4, 11},
 175                                      {7, 8},
 176                                      {7, 14},
 177                                      {7, 6},
 178                                      {7, 5},
 179                                      {7, 4},
 180                                      {7, 7},
 181                                      {7, 9},
 182                                      {7, 11},
 183                                      {10, 8},
 184                                      {10, 14},
 185                                      {12, 6},
 186                                      {12, 5},
 187                                      {10, 4},
 188                                      {10, 7},
 189                                      {10, 9},
 190                                      {10, 11},
 191                                      {12, 8 },
 192                                      {12, 14},
 193                                      {12, 6},
 194                                      {15, 16},
 195                                      {15, 16},
 196                                      {9, 11},
 197                                      {11, 11},
 198                                      {11, 12},
 199                                      {16, 18},
 200                                      {17, 16},
 201                                      {19, 19},
 202                                      {19, 18}
 203 };
 204
 205
 206 static int nb_special_fargs;
 207 static double * spec_fargs;
 208 static float * spec_sp_fargs;
 209
 210 static void build_special_fargs_table(void)
 211 {
 212 /*
 213   Entry  Sign Exp   fraction                  Special value
 214    0      0   3fd   0x8000000000000ULL         Positive finite number
 215    1      0   404   0xf000000000000ULL         ...
 216    2      0   001   0x8000000b77501ULL         ...
 217    3      0   7fe   0x800000000051bULL         ...
 218    4      0   012   0x3214569900000ULL         ...
 219    5      0   000   0x0000000000000ULL         +0.0 (+zero)
 220    6      1   000   0x0000000000000ULL         -0.0 (-zero)
 221    7      0   7ff   0x0000000000000ULL         +infinity
 222    8      1   7ff   0x0000000000000ULL         -infinity
 223    9      0   7ff   0x7FFFFFFFFFFFFULL         +SNaN
 224    10     1   7ff   0x7FFFFFFFFFFFFULL         -SNaN
 225    11     0   7ff   0x8000000000000ULL         +QNaN
 226    12     1   7ff   0x8000000000000ULL         -QNaN
 227    13     1   000   0x8340000078000ULL         Denormalized val (zero exp and non-zero fraction)
 228    14     1   40d   0x0650f5a07b353ULL         Negative finite number
 229    15     0   412   0x32585a9900000ULL         A few more positive finite numbers
 230    16     0   413   0x82511a2000000ULL         ...
 231    17  . . . . . . . . . . . . . . . . . . . . . . .
 232    18  . . . . . . . . . . . . . . . . . . . . . . .
 233    19  . . . . . . . . . . . . . . . . . . . . . . .
 234 */
 235
 236    uint64_t mant;
 237    uint32_t mant_sp;
 238    uint16_t _exp;
 239    int s;
 240    int j, i = 0;
 241
 242    if (spec_fargs)
 243       return;
 244
 245    spec_fargs = malloc( 20 * sizeof(double) );
 246    spec_sp_fargs = malloc( 20 * sizeof(float) );
 247
 248    // #0
 249    s = 0;
 250    _exp = 0x3fd;
 251    mant = 0x8000000000000ULL;
 252    register_farg(&spec_fargs[i++], s, _exp, mant);
 253
 254    // #1
 255    s = 0;
 256    _exp = 0x404;
 257    mant = 0xf000000000000ULL;
 258    register_farg(&spec_fargs[i++], s, _exp, mant);
 259
 260    // #2
 261    s = 0;
 262    _exp = 0x001;
 263    mant = 0x8000000b77501ULL;
 264    register_farg(&spec_fargs[i++], s, _exp, mant);
 265
 266    // #3
 267    s = 0;
 268    _exp = 0x7fe;
 269    mant = 0x800000000051bULL;
 270    register_farg(&spec_fargs[i++], s, _exp, mant);
 271
 272    // #4
 273    s = 0;
 274    _exp = 0x012;
 275    mant = 0x3214569900000ULL;
 276    register_farg(&spec_fargs[i++], s, _exp, mant);
 277
 278
 279    /* Special values */
 280    /* +0.0      : 0 0x000 0x0000000000000 */
 281    // #5
 282    s = 0;
 283    _exp = 0x000;
 284    mant = 0x0000000000000ULL;
 285    register_farg(&spec_fargs[i++], s, _exp, mant);
 286
 287    /* -0.0      : 1 0x000 0x0000000000000 */
 288    // #6
 289    s = 1;
 290    _exp = 0x000;
 291    mant = 0x0000000000000ULL;
 292    register_farg(&spec_fargs[i++], s, _exp, mant);
 293
 294    /* +infinity : 0 0x7FF 0x0000000000000  */
 295    // #7
 296    s = 0;
 297    _exp = 0x7FF;
 298    mant = 0x0000000000000ULL;
 299    register_farg(&spec_fargs[i++], s, _exp, mant);
 300
 301    /* -infinity : 1 0x7FF 0x0000000000000 */
 302    // #8
 303    s = 1;
 304    _exp = 0x7FF;
 305    mant = 0x0000000000000ULL;
 306    register_farg(&spec_fargs[i++], s, _exp, mant);
 307
 308    /*
 309     * This comment applies to values #9 and #10 below:
 310     * When src is a SNaN, it's converted to a QNaN first before rounding to single-precision,
 311     * so we can't just copy the double-precision value to the corresponding slot in the
 312     * single-precision array (i.e., in the loop at the end of this function).  Instead, we
 313     * have to manually set the bits using register_sp_farg().
 314     */
 315
 316    /* +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF */
 317    // #9
 318    s = 0;
 319    _exp = 0x7FF;
 320    mant = 0x7FFFFFFFFFFFFULL;
 321    register_farg(&spec_fargs[i++], s, _exp, mant);
 322    _exp = 0xff;
 323    mant_sp = 0x3FFFFF;
 324    register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
 325
 326    /* -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF */
 327    // #10
 328    s = 1;
 329    _exp = 0x7FF;
 330    mant = 0x7FFFFFFFFFFFFULL;
 331    register_farg(&spec_fargs[i++], s, _exp, mant);
 332    _exp = 0xff;
 333    mant_sp = 0x3FFFFF;
 334    register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
 335
 336    /* +QNaN     : 0 0x7FF 0x8000000000000 */
 337    // #11
 338    s = 0;
 339    _exp = 0x7FF;
 340    mant = 0x8000000000000ULL;
 341    register_farg(&spec_fargs[i++], s, _exp, mant);
 342
 343    /* -QNaN     : 1 0x7FF 0x8000000000000 */
 344    // #12
 345    s = 1;
 346    _exp = 0x7FF;
 347    mant = 0x8000000000000ULL;
 348    register_farg(&spec_fargs[i++], s, _exp, mant);
 349
 350    /* denormalized value */
 351    // #13
 352    s = 1;
 353    _exp = 0x000;
 354    mant = 0x8340000078000ULL;
 355    register_farg(&spec_fargs[i++], s, _exp, mant);
 356
 357    /* Negative finite number */
 358    // #14
 359    s = 1;
 360    _exp = 0x40d;
 361    mant = 0x0650f5a07b353ULL;
 362    register_farg(&spec_fargs[i++], s, _exp, mant);
 363
 364    /* A few positive finite numbers ... */
 365    // #15
 366    s = 0;
 367    _exp = 0x412;
 368    mant = 0x32585a9900000ULL;
 369    register_farg(&spec_fargs[i++], s, _exp, mant);
 370
 371    // #16
 372    s = 0;
 373    _exp = 0x413;
 374    mant = 0x82511a2000000ULL;
 375    register_farg(&spec_fargs[i++], s, _exp, mant);
 376
 377    // #17
 378    s = 0;
 379    _exp = 0x403;
 380    mant = 0x12ef5a9300000ULL;
 381    register_farg(&spec_fargs[i++], s, _exp, mant);
 382
 383    // #18
 384    s = 0;
 385    _exp = 0x405;
 386    mant = 0x14bf5d2300000ULL;
 387    register_farg(&spec_fargs[i++], s, _exp, mant);
 388
 389    // #19
 390    s = 0;
 391    _exp = 0x409;
 392    mant = 0x76bf982440000ULL;
 393    register_farg(&spec_fargs[i++], s, _exp, mant);
 394
 395    nb_special_fargs = i;
 396    for (j = 0; j < i; j++) {
 397       if (!(j == 9 || j == 10))
 398          spec_sp_fargs[j] = spec_fargs[j];
 399    }
 400 }
 401
 402
 403 struct test_table
 404 {
 405    test_func_t test_category;
 406    char * name;
 407    unsigned int test_group;
 408 };
 409
 410 /*  Type of input for floating point operations.*/
 411 typedef enum {
 412    SINGLE_TEST,
 413    DOUBLE_TEST
 414 } precision_type_t;
 415
 416 typedef enum {
 417    VX_SCALAR_CONV_TO_WORD,
 418    VX_CONV_TO_SINGLE,
 419    VX_CONV_TO_DOUBLE,
 420    VX_ESTIMATE,
 421    VX_DEFAULT
 422 } vx_fp_test_type;
 423
 424 static vector unsigned int vec_out, vec_inA, vec_inB;
 425
 426 /* This function is for checking the reciprocal and reciprocal square root
 427  * estimate instructions.
 428  */
 429 Bool check_estimate(precision_type_t type, Bool is_rsqrte, int idx, int output_vec_idx)
 430 {
 431    /* Technically, the number of bits of precision for xvredp and xvrsqrtedp is
 432     * 14 bits (14 = log2 16384).  However, the VEX emulation of these instructions
 433     * does an actual reciprocal calculation versus estimation, so the answer we get back from
 434     * valgrind can easily differ from the estimate in the lower bits (within the 14 bits of
 435     * precision) and the estimate may still be within expected tolerances.  On top of that,
 436     * we can't count on these estimates always being the same across implementations.
 437     * For example, with the fre[s] instruction (which should be correct to within one part
 438     * in 256 -- i.e., 8 bits of precision) . . . When approximating the value 1.0111_1111_1111,
 439     * one implementation could return 1.0111_1111_0000 and another implementation could return
 440     * 1.1000_0000_0000.  Both estimates meet the 1/256 accuracy requirement, but share only a
 441     * single bit in common.
 442     *
 443     * The upshot is we can't validate the VEX output for these instructions by comparing against
 444     * stored bit patterns.  We must check that the result is within expected tolerances.
 445     */
 446
 447
 448    /* A mask to be used for validation as a last resort.
 449     * Only use 12 bits of precision for reasons discussed above.
 450     */
 451 #define VSX_RECIP_ESTIMATE_MASK_DP 0xFFFFFF0000000000ULL
 452 #define VSX_RECIP_ESTIMATE_MASK_SP 0xFFFFFF00
 453
 454    Bool result = False;
 455    Bool dp_test = type == DOUBLE_TEST;
 456    double src_dp, res_dp;
 457    float src_sp, res_sp;
 458    src_dp = res_dp = 0;
 459    src_sp = res_sp = 0;
 460 #define SRC (dp_test ? src_dp : src_sp)
 461 #define RES (dp_test ? res_dp : res_sp)
 462    Bool src_is_negative = False;
 463    Bool res_is_negative = False;
 464    unsigned long long * dst_dp = NULL;
 465    unsigned int * dst_sp = NULL;
 466    if (dp_test) {
 467       unsigned long long * src_dp_ull;
 468       dst_dp = (unsigned long long *) &vec_out;
 469       src_dp = spec_fargs[idx];
 470       src_dp_ull = (unsigned long long *) &src_dp;
 471       src_is_negative = (*src_dp_ull & 0x8000000000000000ULL) ? True : False;
 472       res_is_negative = (dst_dp[output_vec_idx] & 0x8000000000000000ULL) ? True : False;
 473       memcpy(&res_dp, &dst_dp[output_vec_idx], 8);
 474    } else {
 475       unsigned int * src_sp_uint;
 476       dst_sp = (unsigned int *) &vec_out;
 477       src_sp = spec_sp_fargs[idx];
 478       src_sp_uint = (unsigned int *) &src_sp;
 479       src_is_negative = (*src_sp_uint & 0x80000000) ? True : False;
 480       res_is_negative = (dst_sp[output_vec_idx] & 0x80000000) ? True : False;
 481       memcpy(&res_sp, &dst_sp[output_vec_idx], 4);
 482    }
 483
 484    // Below are common rules for xvre{d|s}p and xvrsqrte{d|s}p
 485    if (isnan(SRC))
 486       return isnan(RES);
 487    if (fpclassify(SRC) == FP_ZERO)
 488       return isinf(RES);
 489    if (!src_is_negative && isinf(SRC))
 490       return !res_is_negative && (fpclassify(RES) == FP_ZERO);
 491    if (is_rsqrte) {
 492       if (src_is_negative)
 493          return isnan(RES);
 494    } else {
 495       if (src_is_negative && isinf(SRC))
 496          return res_is_negative && (fpclassify(RES) == FP_ZERO);
 497    }
 498    if (dp_test) {
 499       double calc_diff;
 500       double real_diff;
 501       double recip_divisor;
 502       double div_result;
 503       double calc_diff_tmp;
 504
 505       if (is_rsqrte)
 506          recip_divisor = sqrt(src_dp);
 507       else
 508          recip_divisor = src_dp;
 509
 510       div_result = 1.0/recip_divisor;
 511       calc_diff_tmp = recip_divisor * 16384.0;
 512       if (isnormal(calc_diff_tmp)) {
 513          calc_diff = fabs(1.0/calc_diff_tmp);
 514          real_diff = fabs(res_dp - div_result);
 515          result = ( ( res_dp == div_result )
 516                   || ( real_diff <= calc_diff ) );
 517       } else {
 518          /* Unable to compute theoretical difference, so we fall back to masking out
 519           * un-precise bits.
 520           */
 521          unsigned long long * div_result_dp = (unsigned long long *) &div_result;
 522          result = (dst_dp[output_vec_idx] & VSX_RECIP_ESTIMATE_MASK_DP) == (*div_result_dp & VSX_RECIP_ESTIMATE_MASK_DP);
 523       }
 524       /* For debug use . . .
 525          if (!result) {
 526              unsigned long long * dv = &div_result;
 527              unsigned long long * rd = &real_diff;
 528              unsigned long long * cd = &calc_diff;
 529              printf("\n\t {actual div_result: %016llx; real_diff:  %016llx; calc_diff:  %016llx}\n",
 530        *dv, *rd, *cd);
 531           }
 532        */
 533    } else {  // single precision test (only have xvrsqrtesp, since xvresp was implemented in stage 2)
 534       float calc_diff;
 535       float real_diff;
 536       float div_result;
 537       float calc_diff_tmp;
 538       float recip_divisor = sqrt(src_sp);
 539
 540       div_result = 1.0/recip_divisor;
 541       calc_diff_tmp = recip_divisor * 16384.0;
 542       if (isnormal(calc_diff_tmp)) {
 543          calc_diff = fabsf(1.0/calc_diff_tmp);
 544          real_diff = fabsf(res_sp - div_result);
 545          result = ( ( res_sp == div_result )
 546                   || ( real_diff <= calc_diff ) );
 547       } else {
 548          /* Unable to compute theoretical difference, so we fall back to masking out
 549           * un-precise bits.
 550           */
 551          unsigned int * div_result_sp = (unsigned int *) &div_result;
 552          result = (dst_sp[output_vec_idx] & VSX_RECIP_ESTIMATE_MASK_SP) == (*div_result_sp & VSX_RECIP_ESTIMATE_MASK_SP);
 553       }
 554       /* For debug use . . .
 555          if (!result) {
 556              unsigned long long * dv = &div_result;
 557              unsigned long long * rd = &real_diff;
 558              unsigned long long * cd = &calc_diff;
 559              printf("\n\t {actual div_result: %016llx; real_diff:  %016llx; calc_diff:  %016llx}\n",
 560        *dv, *rd, *cd);
 561           }
 562        */
 563    }
 564    return result;
 565 }
 566
 567 typedef struct vx_fp_test
 568 {
 569    test_func_t test_func;
 570    const char * name;
 571    fp_test_args_t * targs;
 572    int num_tests;
 573    precision_type_t precision;
 574    vx_fp_test_type type;
 575    const char * op;
 576 } vx_fp_test_t;
 577
 578
 579 static Bool do_dot;
 580
 581 static void test_xvredp(void)
 582 {
 583    __asm__ __volatile__ ("xvredp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 584 }
 585
 586 static void test_xsredp(void)
 587 {
 588    __asm__ __volatile__ ("xsredp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 589 }
 590
 591 static void test_xvrsqrtedp(void)
 592 {
 593    __asm__ __volatile__ ("xvrsqrtedp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 594 }
 595
 596 static void test_xsrsqrtedp(void)
 597 {
 598    __asm__ __volatile__ ("xsrsqrtedp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 599 }
 600
 601 static void test_xvrsqrtesp(void)
 602 {
 603    __asm__ __volatile__ ("xvrsqrtesp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 604 }
 605
 606 static void test_xstsqrtdp(void)
 607 {
 608    __asm__ __volatile__ ("xstsqrtdp   cr1, %x0" : : "wa" (vec_inB));
 609 }
 610
 611 static void test_xvtsqrtdp(void)
 612 {
 613    __asm__ __volatile__ ("xvtsqrtdp   cr1, %x0" : : "wa" (vec_inB));
 614 }
 615
 616 static void test_xvtsqrtsp(void)
 617 {
 618    __asm__ __volatile__ ("xvtsqrtsp   cr1, %x0" : : "wa" (vec_inB));
 619 }
 620
 621 static void test_xvsqrtdp(void)
 622 {
 623    __asm__ __volatile__ ("xvsqrtdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 624 }
 625
 626 static void test_xvsqrtsp(void)
 627 {
 628    __asm__ __volatile__ ("xvsqrtsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 629 }
 630
 631 static void test_xvtdivdp(void)
 632 {
 633    __asm__ __volatile__ ("xvtdivdp   cr1, %x0, %x1" : : "wa" (vec_inA), "wa" (vec_inB));
 634 }
 635
 636 static void test_xvtdivsp(void)
 637 {
 638    __asm__ __volatile__ ("xvtdivsp   cr1, %x0, %x1" : : "wa" (vec_inA), "wa" (vec_inB));
 639 }
 640
 641 static void test_xscvdpsp(void)
 642 {
 643    __asm__ __volatile__ ("xscvdpsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 644 }
 645
 646 static void test_xscvdpuxws(void)
 647 {
 648    __asm__ __volatile__ ("xscvdpuxws   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 649 }
 650
 651 static void test_xscvspdp(void)
 652 {
 653    __asm__ __volatile__ ("xscvspdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 654 }
 655
 656 static void test_xvcvdpsp(void)
 657 {
 658    __asm__ __volatile__ ("xvcvdpsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 659 }
 660
 661 static void test_xvcvdpuxds(void)
 662 {
 663    __asm__ __volatile__ ("xvcvdpuxds   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 664 }
 665
 666 static void test_xvcvdpuxws(void)
 667 {
 668    __asm__ __volatile__ ("xvcvdpuxws   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 669 }
 670
 671 static void test_xvcvspdp(void)
 672 {
 673    __asm__ __volatile__ ("xvcvspdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 674 }
 675
 676 static void test_xvcvspsxds(void)
 677 {
 678    __asm__ __volatile__ ("xvcvspsxds   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 679 }
 680
 681 static void test_xvcvspuxds(void)
 682 {
 683    __asm__ __volatile__ ("xvcvspuxds   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 684 }
 685
 686 static void test_xvcvdpsxds(void)
 687 {
 688    __asm__ __volatile__ ("xvcvdpsxds   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 689 }
 690
 691 static void test_xvcvspuxws(void)
 692 {
 693    __asm__ __volatile__ ("xvcvspuxws   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 694 }
 695
 696 static void test_xvcvsxddp(void)
 697 {
 698    __asm__ __volatile__ ("xvcvsxddp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 699 }
 700
 701 static void test_xvcvuxddp(void)
 702 {
 703    __asm__ __volatile__ ("xvcvuxddp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 704 }
 705
 706 static void test_xvcvsxdsp(void)
 707 {
 708    __asm__ __volatile__ ("xvcvsxdsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 709 }
 710
 711 static void test_xvcvuxdsp(void)
 712 {
 713    __asm__ __volatile__ ("xvcvuxdsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 714 }
 715
 716 static void test_xvcvsxwdp(void)
 717 {
 718    __asm__ __volatile__ ("xvcvsxwdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 719 }
 720
 721 static void test_xvcvuxwdp(void)
 722 {
 723    __asm__ __volatile__ ("xvcvuxwdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 724 }
 725
 726 static void test_xvcvsxwsp(void)
 727 {
 728    __asm__ __volatile__ ("xvcvsxwsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 729 }
 730
 731 static void test_xvcvuxwsp(void)
 732 {
 733    __asm__ __volatile__ ("xvcvuxwsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 734 }
 735
 736 static void test_xsrdpic(void)
 737 {
 738    __asm__ __volatile__ ("xsrdpic   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 739 }
 740
 741 static void test_xsrdpiz(void)
 742 {
 743    __asm__ __volatile__ ("xsrdpiz   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 744 }
 745
 746 static void test_xsrdpi(void)
 747 {
 748    __asm__ __volatile__ ("xsrdpi   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 749 }
 750
 751 static void test_xvabsdp(void)
 752 {
 753    __asm__ __volatile__ ("xvabsdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 754 }
 755
 756 static void test_xvnabsdp(void)
 757 {
 758    __asm__ __volatile__ ("xvnabsdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 759 }
 760
 761 static void test_xvnegdp(void)
 762 {
 763    __asm__ __volatile__ ("xvnegdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 764 }
 765
 766 static void test_xvnegsp(void)
 767 {
 768    __asm__ __volatile__ ("xvnegsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 769 }
 770
 771 static void test_xvabssp(void)
 772 {
 773    __asm__ __volatile__ ("xvabssp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 774 }
 775
 776 static void test_xvnabssp(void)
 777 {
 778    __asm__ __volatile__ ("xvnabssp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 779 }
 780
 781 static void test_xvrdpi(void)
 782 {
 783    __asm__ __volatile__ ("xvrdpi   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 784 }
 785
 786 static void test_xvrdpic(void)
 787 {
 788    __asm__ __volatile__ ("xvrdpic   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 789 }
 790
 791 static void test_xvrdpim(void)
 792 {
 793    __asm__ __volatile__ ("xvrdpim   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 794 }
 795
 796 static void test_xvrdpip(void)
 797 {
 798    __asm__ __volatile__ ("xvrdpip   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 799 }
 800
 801 static void test_xvrdpiz(void)
 802 {
 803    __asm__ __volatile__ ("xvrdpiz   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 804 }
 805
 806 static void test_xvrspi(void)
 807 {
 808    __asm__ __volatile__ ("xvrspi   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 809 }
 810
 811 static void test_xvrspic(void)
 812 {
 813    __asm__ __volatile__ ("xvrspic   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 814 }
 815
 816 static void test_xvrspim(void)
 817 {
 818    __asm__ __volatile__ ("xvrspim   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 819 }
 820
 821 static void test_xvrspip(void)
 822 {
 823    __asm__ __volatile__ ("xvrspip   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 824 }
 825
 826 static void test_xvrspiz(void)
 827 {
 828    __asm__ __volatile__ ("xvrspiz   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 829 }
 830
 831 static vx_fp_test_t
 832 vsx_one_fp_arg_tests[] = {
 833                                 { &test_xvredp, "xvredp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x"},
 834                                 { &test_xsredp, "xsredp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x"},
 835                                 { &test_xvrsqrtedp, "xvrsqrtedp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x-sqrt"},
 836                                 { &test_xsrsqrtedp, "xsrsqrtedp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x-sqrt"},
 837                                 { &test_xvrsqrtesp, "xvrsqrtesp", NULL, 18, SINGLE_TEST, VX_ESTIMATE, "1/x-sqrt"},
 838                                 { &test_xvsqrtdp, "xvsqrtdp", NULL, 18, DOUBLE_TEST, VX_DEFAULT, "sqrt"},
 839                                 { &test_xvsqrtsp, "xvsqrtsp", NULL, 18, SINGLE_TEST, VX_DEFAULT, "sqrt"},
 840                                 { &test_xscvdpsp, "xscvdpsp", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"},
 841                                 { &test_xscvdpuxws, "xscvdpuxws", NULL, 20, DOUBLE_TEST, VX_SCALAR_CONV_TO_WORD, "conv"},
 842                                 { &test_xscvspdp, "xscvspdp", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
 843                                 { &test_xvcvdpsp, "xvcvdpsp", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"},
 844                                 { &test_xvcvdpuxds, "xvcvdpuxds", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
 845                                 { &test_xvcvdpuxws, "xvcvdpuxws", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"},
 846                                 { &test_xvcvspdp, "xvcvspdp", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
 847                                 { &test_xvcvspsxds, "xvcvspsxds", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
 848                                 { &test_xvcvdpsxds, "xvcvdpsxds", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
 849                                 { &test_xvcvspuxds, "xvcvspuxds", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
 850                                 { &test_xvcvspuxws, "xvcvspuxws", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "conv"},
 851                                 { &test_xsrdpic, "xsrdpic", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
 852                                 { &test_xsrdpiz, "xsrdpiz", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
 853                                 { &test_xsrdpi, "xsrdpi", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
 854                                 { &test_xvabsdp, "xvabsdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "abs"},
 855                                 { &test_xvnabsdp, "xvnabsdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "nabs"},
 856                                 { &test_xvnegsp, "xvnegsp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "neg"},
 857                                 { &test_xvnegdp, "xvnegdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "neg"},
 858                                 { &test_xvabssp, "xvabssp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "abs"},
 859                                 { &test_xvnabssp, "xvnabssp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "nabs"},
 860                                 { &test_xvrdpi,  "xvrdpi",  NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
 861                                 { &test_xvrdpic, "xvrdpic", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
 862                                 { &test_xvrdpim, "xvrdpim", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
 863                                 { &test_xvrdpip, "xvrdpip", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
 864                                 { &test_xvrdpiz, "xvrdpiz", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
 865                                 { &test_xvrspi,  "xvrspi",  NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
 866                                 { &test_xvrspic, "xvrspic", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
 867                                 { &test_xvrspim, "xvrspim", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
 868                                 { &test_xvrspip, "xvrspip", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
 869                                 { &test_xvrspiz, "xvrspiz", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
 870                                 { NULL, NULL, NULL, 0, 0, 0, NULL}
 871 };
 872
 873 static vx_fp_test_t
 874 vx_tdivORtsqrt_tests[] = {
 875                           { &test_xstsqrtdp, "xstsqrtdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "test-sqrt"},
 876                           { &test_xvtsqrtdp, "xvtsqrtdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "test-sqrt"},
 877                           { &test_xvtsqrtsp, "xvtsqrtsp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "test-sqrt"},
 878                           { &test_xvtdivdp, "xvtdivdp", two_arg_fp_tests, 68, DOUBLE_TEST, VX_DEFAULT, "test-div"},
 879                           { &test_xvtdivsp, "xvtdivsp", two_arg_fp_tests, 68, SINGLE_TEST, VX_DEFAULT, "test-div"},
 880                           { NULL, NULL, NULL, 0 , 0, 0, NULL}
 881 };
 882
 883 static unsigned long long doubleWord[] = { 0,
 884                                   0xffffffff00000000LL,
 885                                   0x00000000ffffffffLL,
 886                                   0xffffffffffffffffLL,
 887                                   0x89abcde123456789LL,
 888                                   0x0102030405060708LL,
 889                                   0x00000000a0b1c2d3LL,
 890                                   0x1111222233334444LL
 891 };
 892
 893 static unsigned int singleWord[] = {0,
 894                                   0xffff0000,
 895                                   0x0000ffff,
 896                                   0xffffffff,
 897                                   0x89a73522,
 898                                   0x01020304,
 899                                   0x0000abcd,
 900                                   0x11223344
 901 };
 902
 903 typedef struct vx_intToFp_test
 904 {
 905    test_func_t test_func;
 906    const char * name;
 907    void * targs;
 908    int num_tests;
 909    precision_type_t precision;
 910    vx_fp_test_type type;
 911 } vx_intToFp_test_t;
 912
 913 static vx_intToFp_test_t
 914 intToFp_tests[] = {
 915                    { test_xvcvsxddp, "xvcvsxddp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_DOUBLE },
 916                    { test_xvcvuxddp, "xvcvuxddp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_DOUBLE },
 917                    { test_xvcvsxdsp, "xvcvsxdsp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_SINGLE },
 918                    { test_xvcvuxdsp, "xvcvuxdsp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_SINGLE },
 919                    { test_xvcvsxwdp, "xvcvsxwdp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_DOUBLE },
 920                    { test_xvcvuxwdp, "xvcvuxwdp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_DOUBLE },
 921                    { test_xvcvsxwsp, "xvcvsxwsp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_SINGLE },
 922                    { test_xvcvuxwsp, "xvcvuxwsp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_SINGLE },
 923                    { NULL, NULL, NULL, 0, 0 }
 924 };
 925
 926 static Bool do_OE;
 927 typedef enum {
 928    DIV_BASE = 1,
 929    DIV_OE = 2,
 930    DIV_DOT = 4,
 931 } div_type_t;
 932 /* Possible divde type combinations are:
 933  *   - base
 934  *   - base+dot
 935  *   - base+OE
 936  *   - base+OE+dot
 937  */
 938 #ifdef __powerpc64__
 939 static void test_divdeu(void)
 940 {
 941    int divdeu_type = DIV_BASE;
 942    if (do_OE)
 943       divdeu_type |= DIV_OE;
 944    if (do_dot)
 945       divdeu_type |= DIV_DOT;
 946
 947    switch (divdeu_type) {
 948       case 1:
 949         SET_CR_XER_ZERO;
 950          __asm__ __volatile__ ("divdeu %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
 951          GET_CR_XER(div_flags, div_xer);
 952          break;
 953       case 3:
 954         SET_CR_XER_ZERO;
 955          __asm__ __volatile__ ("divdeuo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
 956          GET_CR_XER(div_flags, div_xer);
 957          break;
 958       case 5:
 959         SET_CR_XER_ZERO;
 960          __asm__ __volatile__ ("divdeu. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
 961          GET_CR_XER(div_flags, div_xer);
 962          break;
 963       case 7:
 964         SET_CR_XER_ZERO;
 965          __asm__ __volatile__ ("divdeuo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
 966          GET_CR_XER(div_flags, div_xer);
 967          break;
 968       default:
 969          fprintf(stderr, "Invalid divdeu type. Exiting\n");
 970          exit(1);
 971    }
 972 }
 973 #endif
 974
 975 static void test_divwe(void)
 976 {
 977    int divwe_type = DIV_BASE;
 978    if (do_OE)
 979       divwe_type |= DIV_OE;
 980    if (do_dot)
 981       divwe_type |= DIV_DOT;
 982
 983    switch (divwe_type) {
 984       case 1:
 985         SET_CR_XER_ZERO;
 986          __asm__ __volatile__ ("divwe %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
 987          GET_CR_XER(div_flags, div_xer);
 988          break;
 989       case 3:
 990         SET_CR_XER_ZERO;
 991          __asm__ __volatile__ ("divweo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
 992          GET_CR_XER(div_flags, div_xer);
 993          break;
 994       case 5:
 995         SET_CR_XER_ZERO;
 996          __asm__ __volatile__ ("divwe. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
 997          GET_CR_XER(div_flags, div_xer);
 998          break;
 999       case 7:
1000         SET_CR_XER_ZERO;
1001          __asm__ __volatile__ ("divweo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
1002          GET_CR_XER(div_flags, div_xer);
1003          break;
1004       default:
1005          fprintf(stderr, "Invalid divweu type. Exiting\n");
1006          exit(1);
1007    }
1008 }
1009
1010
1011 typedef struct simple_test {
1012    test_func_t test_func;
1013    char * name;
1014    precision_type_t precision;
1015 } simple_test_t;
1016
1017
1018 static void setup_sp_fp_args(fp_test_args_t * targs, Bool swap_inputs)
1019 {
1020    int a_idx, b_idx, i;
1021    void * inA, * inB;
1022    void * vec_src = swap_inputs ? &vec_out : &vec_inB;
1023
1024    for (i = 0; i < 4; i++) {
1025       a_idx = targs->fra_idx;
1026       b_idx = targs->frb_idx;
1027       inA = (void *)&spec_sp_fargs[a_idx];
1028       inB = (void *)&spec_sp_fargs[b_idx];
1029       // copy single precision FP  into vector element i
1030       memcpy(((void *)&vec_inA) + (i * 4), inA, 4);
1031       memcpy(vec_src + (i * 4), inB, 4);
1032       targs++;
1033    }
1034 }
1035
1036 static void setup_dp_fp_args(fp_test_args_t * targs, Bool swap_inputs)
1037 {
1038    int a_idx, b_idx, i;
1039    void * inA, * inB;
1040    void * vec_src = swap_inputs ? (void *)&vec_out : (void *)&vec_inB;
1041
1042    for (i = 0; i < 2; i++) {
1043       a_idx = targs->fra_idx;
1044       b_idx = targs->frb_idx;
1045       inA = (void *)&spec_fargs[a_idx];
1046       inB = (void *)&spec_fargs[b_idx];
1047       // copy double precision FP  into vector element i
1048       memcpy(((void *)&vec_inA) + (i * 8), inA, 8);
1049       memcpy(vec_src + (i * 8), inB, 8);
1050       targs++;
1051    }
1052 }
1053
1054 #define VX_NOT_CMP_OP 0xffffffff
1055 static void print_vector_fp_result(unsigned int cc, vx_fp_test_t * test_group, int i, Bool print_vec_out)
1056 {
1057    int a_idx, b_idx, k;
1058    char * name = malloc(20);
1059    int dp = test_group->precision == DOUBLE_TEST ? 1 : 0;
1060    int loops = dp ? 2 : 4;
1061    fp_test_args_t * targs = &test_group->targs[i];
1062    unsigned long long * frA_dp, * frB_dp, * dst_dp;
1063    unsigned int * frA_sp, *frB_sp, * dst_sp;
1064    strcpy(name, test_group->name);
1065    printf("#%d: %s%s ", dp? i/2 : i/4, name, (do_dot ? "." : ""));
1066    for (k = 0; k < loops; k++) {
1067       a_idx = targs->fra_idx;
1068       b_idx = targs->frb_idx;
1069       if (k)
1070          printf(" AND ");
1071       if (dp) {
1072          frA_dp = (unsigned long long *)&spec_fargs[a_idx];
1073          frB_dp = (unsigned long long *)&spec_fargs[b_idx];
1074          printf("%016llx %s %016llx", *frA_dp, test_group->op, *frB_dp);
1075       } else {
1076          frA_sp = (unsigned int *)&spec_sp_fargs[a_idx];
1077          frB_sp = (unsigned int *)&spec_sp_fargs[b_idx];
1078          printf("%08x %s %08x", *frA_sp, test_group->op, *frB_sp);
1079       }
1080       targs++;
1081    }
1082    if (cc != VX_NOT_CMP_OP)
1083       printf(" ? cc=%x", cc);
1084
1085    if (print_vec_out) {
1086       if (dp) {
1087          dst_dp = (unsigned long long *) &vec_out;
1088          printf(" => %016llx %016llx\n", dst_dp[0], dst_dp[1]);
1089       } else {
1090          dst_sp = (unsigned int *) &vec_out;
1091          printf(" => %08x %08x %08x %08x\n", dst_sp[0], dst_sp[1], dst_sp[2], dst_sp[3]);
1092       }
1093    } else {
1094       printf("\n");
1095    }
1096    free(name);
1097 }
1098
1099
1100
1101 static void test_vsx_one_fp_arg(void)
1102 {
1103    test_func_t func;
1104    int k;
1105    k = 0;
1106    build_special_fargs_table();
1107
1108    while ((func = vsx_one_fp_arg_tests[k].test_func)) {
1109       int idx, i;
1110       vx_fp_test_t test_group = vsx_one_fp_arg_tests[k];
1111       Bool estimate = (test_group.type == VX_ESTIMATE);
1112       Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
1113       Bool is_sqrt = (strstr(test_group.name, "sqrt")) ? True : False;
1114       Bool is_scalar = (strstr(test_group.name, "xs")) ? True : False;
1115       Bool sparse_sp = False;
1116       int stride = dp ? 2 : 4;
1117       int loops = is_scalar ? 1 : stride;
1118       stride = is_scalar ? 1: stride;
1119
1120       /* For conversions of single to double, the 128-bit input register is sparsely populated:
1121        *    |___ SP___|_Unused_|___SP___|__Unused__|   // for vector op
1122        *                     or
1123        *    |___ SP___|_Unused_|_Unused_|__Unused__|   // for scalar op
1124        *
1125        * For the vector op case, we need to adjust stride from '4' to '2', since
1126        * we'll only be loading two values per loop into the input register.
1127        */
1128       if (!dp && !is_scalar && test_group.type == VX_CONV_TO_DOUBLE) {
1129          sparse_sp = True;
1130          stride = 2;
1131       }
1132
1133       for (i = 0; i < test_group.num_tests; i+=stride) {
1134          unsigned int * pv;
1135          void * inB, * vecB_void_ptr = (void *)&vec_inB;
1136
1137          pv = (unsigned int *)&vec_out;
1138          // clear vec_out
1139          for (idx = 0; idx < 4; idx++, pv++)
1140             *pv = 0;
1141
1142          if (dp) {
1143             int j;
1144             unsigned long long * frB_dp, *dst_dp;
1145             for (j = 0; j < loops; j++) {
1146                inB = (void *)&spec_fargs[i + j];
1147                // copy double precision FP into vector element i
1148                if (isLE && is_scalar)
1149                   vecB_void_ptr += 8;
1150                memcpy(vecB_void_ptr + (j * 8), inB, 8);
1151             }
1152             // execute test insn
1153             (*func)();
1154             dst_dp = (unsigned long long *) &vec_out;
1155             if (isLE && is_scalar)
1156                dst_dp++;
1157             printf("#%d: %s ", i/stride, test_group.name);
1158             for (j = 0; j < loops; j++) {
1159                if (j)
1160                   printf("; ");
1161                frB_dp = (unsigned long long *)&spec_fargs[i + j];
1162                printf("%s(%016llx)", test_group.op, *frB_dp);
1163                if (estimate) {
1164                   Bool res = check_estimate(DOUBLE_TEST, is_sqrt, i + j, (isLE && is_scalar) ? 1: j);
1165                   printf(" ==> %s)", res ? "PASS" : "FAIL");
1166                   /* For debugging . . .
1167                    printf(" ==> %s (res=%016llx)", res ? "PASS" : "FAIL", dst_dp[j]);
1168                    */
1169                } else {
1170                   vx_fp_test_type type = test_group.type;
1171                   switch (type) {
1172                      case VX_SCALAR_CONV_TO_WORD:
1173                         printf(" = %016llx", dst_dp[j] & 0x00000000ffffffffULL);
1174                         break;
1175                      case VX_CONV_TO_SINGLE:
1176                         printf(" = %016llx", dst_dp[j] & 0xffffffff00000000ULL);
1177                         break;
1178                      default:  // For VX_CONV_TO_DOUBLE and non-convert instructions . . .
1179                         printf(" = %016llx", dst_dp[j]);
1180                   }
1181                }
1182             }
1183             printf("\n");
1184          } else {
1185             int j;
1186             unsigned int * frB_sp, * dst_sp = NULL;
1187             unsigned long long * dst_dp = NULL;
1188             if (sparse_sp)
1189                loops = 2;
1190             for (j = 0; j < loops; j++) {
1191                inB = (void *)&spec_sp_fargs[i + j];
1192                // copy single precision FP into vector element i
1193                if (sparse_sp) {
1194                   if (isLE)
1195                      memcpy(vecB_void_ptr + ((2 * j * 4) + 4), inB, 4);
1196                   else
1197                      memcpy(vecB_void_ptr + ((2 * j * 4) ), inB, 4);
1198                } else {
1199                   if (isLE && is_scalar)
1200                      vecB_void_ptr += 12;
1201                   memcpy(vecB_void_ptr + (j * 4), inB, 4);
1202                }
1203             }
1204             // execute test insn
1205             (*func)();
1206             if (test_group.type == VX_CONV_TO_DOUBLE) {
1207                dst_dp = (unsigned long long *) &vec_out;
1208                if (isLE && is_scalar)
1209                   dst_dp++;
1210             } else {
1211                dst_sp = (unsigned int *) &vec_out;
1212                if (isLE && is_scalar)
1213                   dst_sp += 3;
1214             }
1215             // print result
1216             printf("#%d: %s ", i/stride, test_group.name);
1217             for (j = 0; j < loops; j++) {
1218                if (j)
1219                   printf("; ");
1220                frB_sp = (unsigned int *)&spec_sp_fargs[i + j];
1221                printf("%s(%08x)", test_group.op, *frB_sp);
1222                if (estimate) {
1223                   Bool res = check_estimate(SINGLE_TEST, is_sqrt, i + j, (isLE && is_scalar) ? 3 : j);
1224                   printf(" ==> %s)", res ? "PASS" : "FAIL");
1225                } else {
1226                   if (test_group.type == VX_CONV_TO_DOUBLE)
1227                         printf(" = %016llx", dst_dp[j]);
1228                   else
1229                   /* Special case: Current VEX implementation for fsqrts (single precision)
1230                    * uses the same implementation as that used for double precision fsqrt.
1231                    * However, I've found that for xvsqrtsp, the result from that implementation
1232                    * may be off by the two LSBs.  Generally, even this small inaccuracy can cause the
1233                    * output to appear very different if you end up with a carry.  But for the given
1234                    * inputs in this testcase, we can simply mask out these bits.
1235                    */
1236                      printf(" = %08x", is_sqrt ? (dst_sp[j] & 0xfffffffc) : dst_sp[j]);
1237                }
1238             }
1239             printf("\n");
1240          }
1241       }
1242       k++;
1243       printf( "\n" );
1244    }
1245 }
1246
1247 static void test_int_to_fp_convert(void)
1248 {
1249    test_func_t func;
1250    int k;
1251    k = 0;
1252
1253    while ((func = intToFp_tests[k].test_func)) {
1254       int idx, i;
1255       vx_intToFp_test_t test_group = intToFp_tests[k];
1256       Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
1257       Bool sparse_sp = False;
1258       int stride = dp ? 2 : 4;
1259       int loops = stride;
1260
1261       /* For conversions of single to double, the 128-bit input register is sparsely populated:
1262        *    |___ int___|_Unused_|___int___|__Unused__|   // for vector op
1263        *                     or
1264        * We need to adjust stride from '4' to '2', since we'll only be loading
1265        * two values per loop into the input register.
1266        */
1267       if (!dp && test_group.type == VX_CONV_TO_DOUBLE) {
1268          sparse_sp = True;
1269          stride = 2;
1270       }
1271
1272       for (i = 0; i < test_group.num_tests; i+=stride) {
1273          unsigned int * pv;
1274          void * inB;
1275
1276          pv = (unsigned int *)&vec_out;
1277          // clear vec_out
1278          for (idx = 0; idx < 4; idx++, pv++)
1279             *pv = 0;
1280
1281          if (dp) {
1282             int j;
1283             unsigned long long  *dst_dw, * targs = test_group.targs;
1284             for (j = 0; j < loops; j++) {
1285                inB = (void *)&targs[i + j];
1286                // copy doubleword into vector element i
1287                memcpy(((void *)&vec_inB) + (j * 8), inB, 8);
1288             }
1289             // execute test insn
1290             (*func)();
1291             dst_dw = (unsigned long long *) &vec_out;
1292             printf("#%d: %s ", i/stride, test_group.name);
1293             for (j = 0; j < loops; j++) {
1294                if (j)
1295                   printf("; ");
1296                printf("conv(%016llx)", targs[i + j]);
1297
1298                if (test_group.type == VX_CONV_TO_SINGLE)
1299                   printf(" = %016llx", dst_dw[j] & 0xffffffff00000000ULL);
1300                else
1301                   printf(" = %016llx", dst_dw[j]);
1302             }
1303             printf("\n");
1304          } else {
1305             int j;
1306             unsigned int * dst_sp = NULL;
1307             unsigned int * targs = test_group.targs;
1308             unsigned long long * dst_dp = NULL;
1309             void * vecB_void_ptr = (void *)&vec_inB;
1310             if (sparse_sp)
1311                loops = 2;
1312             for (j = 0; j < loops; j++) {
1313                inB = (void *)&targs[i + j];
1314                // copy single word into vector element i
1315                if (sparse_sp) {
1316                   if (isLE)
1317                      memcpy(vecB_void_ptr + ((2 * j * 4) + 4), inB, 4);
1318                   else
1319                      memcpy(vecB_void_ptr + ((2 * j * 4) ), inB, 4);
1320                } else {
1321                   memcpy(vecB_void_ptr + (j * 4), inB, 4);
1322                }
1323             }
1324             // execute test insn
1325             (*func)();
1326             if (test_group.type == VX_CONV_TO_DOUBLE)
1327                dst_dp = (unsigned long long *) &vec_out;
1328             else
1329                dst_sp = (unsigned int *) &vec_out;
1330             // print result
1331             printf("#%d: %s ", i/stride, test_group.name);
1332             for (j = 0; j < loops; j++) {
1333                if (j)
1334                   printf("; ");
1335                printf("conv(%08x)", targs[i + j]);
1336                if (test_group.type == VX_CONV_TO_DOUBLE)
1337                   printf(" = %016llx", dst_dp[j]);
1338                else
1339                   printf(" = %08x", dst_sp[j]);
1340             }
1341             printf("\n");
1342          }
1343       }
1344       k++;
1345       printf( "\n" );
1346    }
1347 }
1348
1349
1350
1351 // The div doubleword test data
1352 signed long long div_dw_tdata[13][2] = {
1353                                        { 4, -4 },
1354                                        { 4, -3 },
1355                                        { 4, 4 },
1356                                        { 4, -5 },
1357                                        { 3, 8 },
1358                                        { 0x8000000000000000ULL, 0xa },
1359                                        { 0x50c, -1 },
1360                                        { 0x50c, -4096 },
1361                                        { 0x1234fedc, 0x8000a873 },
1362                                        { 0xabcd87651234fedcULL, 0xa123b893 },
1363                                        { 0x123456789abdcULL, 0 },
1364                                        { 0, 2 },
1365                                        { 0x77, 0xa3499 }
1366 };
1367 #define dw_tdata_len (sizeof(div_dw_tdata)/sizeof(signed long long)/2)
1368
1369 // The div word test data
1370 unsigned int div_w_tdata[6][2] = {
1371                               { 0, 2 },
1372                               { 2, 0 },
1373                               { 0x7abc1234, 0xf0000000 },
1374                               { 0xfabc1234, 5 },
1375                               { 77, 66 },
1376                               { 5, 0xfabc1234 },
1377 };
1378 #define w_tdata_len (sizeof(div_w_tdata)/sizeof(unsigned int)/2)
1379
1380 typedef struct div_ext_test
1381 {
1382    test_func_t test_func;
1383    const char *name;
1384    int num_tests;
1385    div_type_t div_type;
1386    precision_type_t precision;
1387 } div_ext_test_t;
1388
1389 static div_ext_test_t div_tests[] = {
1390 #ifdef __powerpc64__
1391                                    { &test_divdeu, "divdeu", dw_tdata_len, DIV_BASE, DOUBLE_TEST },
1392                                    { &test_divdeu, "divdeuo", dw_tdata_len, DIV_OE, DOUBLE_TEST },
1393 #endif
1394                                    { &test_divwe, "divwe", w_tdata_len, DIV_BASE, SINGLE_TEST },
1395                                    { &test_divwe, "divweo", w_tdata_len, DIV_OE, SINGLE_TEST },
1396                                    { NULL, NULL, 0, 0, 0 }
1397 };
1398
1399 static void test_div_extensions(void)
1400 {
1401    test_func_t func;
1402    int k;
1403    k = 0;
1404
1405    while ((func = div_tests[k].test_func)) {
1406       int i, repeat = 1;
1407       div_ext_test_t test_group = div_tests[k];
1408       do_dot = False;
1409
1410 again:
1411       for (i = 0; i < test_group.num_tests; i++) {
1412          unsigned int condreg;
1413
1414          if (test_group.div_type == DIV_OE)
1415             do_OE = True;
1416          else
1417             do_OE = False;
1418
1419          if (test_group.precision == DOUBLE_TEST) {
1420             r14 = div_dw_tdata[i][0];
1421             r15 = div_dw_tdata[i][1];
1422          } else {
1423             r14 = div_w_tdata[i][0];
1424             r15 = div_w_tdata[i][1];
1425          }
1426          // execute test insn
1427          (*func)();
1428          condreg = (div_flags & 0xf0000000) >> 28;
1429          printf("#%d: %s%s: ", i, test_group.name, do_dot ? "." : "");
1430          if (test_group.precision == DOUBLE_TEST) {
1431             printf("0x%016llx0000000000000000 / 0x%016llx = 0x%016llx;",
1432                    div_dw_tdata[i][0], div_dw_tdata[i][1], (signed long long) r17);
1433          } else {
1434             printf("0x%08x00000000 / 0x%08x = 0x%08x;",
1435                    div_w_tdata[i][0], div_w_tdata[i][1], (unsigned int) r17);
1436          }
1437          printf(" CR=%x; XER=%x\n", condreg, div_xer);
1438       }
1439       printf("\n");
1440       if (repeat) {
1441          repeat = 0;
1442          do_dot = True;
1443          goto again;
1444       }
1445       k++;
1446       printf( "\n" );
1447    }
1448 }
1449
1450
1451 static void test_vx_tdivORtsqrt(void)
1452 {
1453    test_func_t func;
1454    int k, crx;
1455    unsigned int flags;
1456    k = 0;
1457    do_dot = False;
1458    build_special_fargs_table();
1459
1460    while ((func = vx_tdivORtsqrt_tests[k].test_func)) {
1461       int idx, i;
1462       vx_fp_test_t test_group = vx_tdivORtsqrt_tests[k];
1463       Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
1464       Bool is_scalar = (strstr(test_group.name, "xs")) ? True : False;
1465       Bool two_args = test_group.targs ?  True : False;
1466       int stride = dp ? 2 : 4;
1467       int loops = is_scalar ? 1 : stride;
1468       stride = is_scalar ? 1: stride;
1469
1470       for (i = 0; i < test_group.num_tests; i+=stride) {
1471          unsigned int * pv;
1472          void * inB, * vecB_void_ptr = (void *)&vec_inB;
1473
1474          pv = (unsigned int *)&vec_out;
1475          // clear vec_out
1476          for (idx = 0; idx < 4; idx++, pv++)
1477             *pv = 0;
1478
1479          if (dp) {
1480             int j;
1481             unsigned long long * frB_dp;
1482             if (two_args) {
1483                setup_dp_fp_args(&test_group.targs[i], False);
1484             } else {
1485                for (j = 0; j < loops; j++) {
1486                   inB = (void *)&spec_fargs[i + j];
1487                   // copy double precision FP into vector element i
1488                   if (isLE && is_scalar)
1489                      vecB_void_ptr += 8;
1490                   memcpy(vecB_void_ptr + (j * 8), inB, 8);
1491                }
1492             }
1493             // execute test insn
1494             // Must do set/get of CRs immediately before/after calling the asm func
1495             // to avoid CRs being modified by other instructions.
1496             SET_FPSCR_ZERO;
1497             SET_CR_XER_ZERO;
1498             (*func)();
1499             GET_CR(flags);
1500             // assumes using CR1
1501             crx = (flags & 0x0f000000) >> 24;
1502             if (two_args) {
1503                print_vector_fp_result(crx, &test_group, i, False/*do not print vec_out*/);
1504             } else {
1505                printf("#%d: %s ", i/stride, test_group.name);
1506                for (j = 0; j < loops; j++) {
1507                   if (j)
1508                      printf("; ");
1509                   frB_dp = (unsigned long long *)&spec_fargs[i + j];
1510                   printf("%s(%016llx)", test_group.op, *frB_dp);
1511                }
1512                printf( " ? %x (CRx)\n", crx);
1513             }
1514          } else {
1515             int j;
1516             unsigned int * frB_sp;
1517             if (two_args) {
1518                setup_sp_fp_args(&test_group.targs[i], False);
1519             } else {
1520                for (j = 0; j < loops; j++) {
1521                   inB = (void *)&spec_sp_fargs[i + j];
1522                   // copy single precision FP into vector element i
1523                   memcpy(((void *)&vec_inB) + (j * 4), inB, 4);
1524                }
1525             }
1526             // execute test insn
1527             SET_FPSCR_ZERO;
1528             SET_CR_XER_ZERO;
1529             (*func)();
1530             GET_CR(flags);
1531             crx = (flags & 0x0f000000) >> 24;
1532             // print result
1533             if (two_args) {
1534                print_vector_fp_result(crx, &test_group, i, False/*do not print vec_out*/);
1535             } else {
1536                printf("#%d: %s ", i/stride, test_group.name);
1537                for (j = 0; j < loops; j++) {
1538                   if (j)
1539                      printf("; ");
1540                   frB_sp = (unsigned int *)&spec_sp_fargs[i + j];
1541                   printf("%s(%08x)", test_group.op, *frB_sp);
1542                }
1543                printf( " ? %x (CRx)\n", crx);
1544             }
1545          }
1546       }
1547       k++;
1548       printf( "\n" );
1549    }
1550 }
1551
1552
1553 static void test_ftsqrt(void)
1554 {
1555    int i, crx;
1556    unsigned int flags;
1557    unsigned long long * frbp;
1558    build_special_fargs_table();
1559
1560
1561    for (i = 0; i < nb_special_fargs; i++) {
1562       f14 = spec_fargs[i];
1563       frbp = (unsigned long long *)&spec_fargs[i];
1564       SET_FPSCR_ZERO;
1565       SET_CR_XER_ZERO;
1566       __asm__ __volatile__ ("ftsqrt           cr1, %0" : : "d" (f14));
1567       GET_CR(flags);
1568       crx = (flags & 0x0f000000) >> 24;
1569       printf( "ftsqrt: %016llx ? %x (CRx)\n", *frbp, crx);
1570    }
1571    printf( "\n" );
1572 }
1573
1574 static void
1575 test_popcntw(void)
1576 {
1577 #ifdef __powerpc64__
1578    uint64_t res;
1579    unsigned long long src = 0x9182736405504536ULL;
1580    r14 = src;
1581    __asm__ __volatile__ ("popcntw          %0, %1" : "=r" (res): "r" (r14));
1582    printf("popcntw: 0x%llx => 0x%016llx\n", (unsigned long long)src, (unsigned long long)res);
1583 #else
1584    uint32_t res;
1585    unsigned int src = 0x9182730E;
1586    r14 = src;
1587    __asm__ __volatile__ ("popcntw          %0, %1" : "=r" (res): "r" (r14));
1588    printf("popcntw: 0x%x => 0x%08x\n", src, (int)res);
1589 #endif
1590    printf( "\n" );
1591 }
1592
1593
1594 static test_table_t
1595          all_tests[] =
1596 {
1597
1598                     { &test_vsx_one_fp_arg,
1599                       "Test VSX vector and scalar single argument instructions", OTHER_INST } ,
1600                     { &test_int_to_fp_convert,
1601                       "Test VSX vector integer to float conversion instructions", OTHER_INST },
1602                     { &test_div_extensions,
1603                       "Test div extensions", SCALAR_DIV_INST },
1604                     { &test_ftsqrt,
1605                       "Test ftsqrt instruction", OTHER_INST },
1606                     { &test_vx_tdivORtsqrt,
1607                       "Test vector and scalar tdiv and tsqrt instructions", OTHER_INST },
1608                     { &test_popcntw,
1609                       "Test popcntw instruction", OTHER_INST },
1610                     { NULL, NULL }
1611 };
1612 #endif // HAS_VSX
1613
1614 static void usage (void)
1615 {
1616   fprintf(stderr,
1617           "Usage: test_isa_3_0 [OPTIONS]\n"
1618           "\t-d: test scalar division instructions (default)\n"
1619           "\t-o: test non scalar division instructions (default)\n"
1620           "\t-A: test all instructions (default)\n"
1621           "\t-h: display this help and exit\n"
1622           );
1623 }
1624
1625 int main(int argc, char **argv)
1626 {
1627 #ifdef HAS_VSX
1628
1629    test_table_t aTest;
1630    test_func_t func;
1631    int c;
1632    int i = 0;
1633    unsigned int test_run_mask = 0;
1634
1635    /* NOTE, ISA 3.0 introduces the OV32 and CA32 bits in the FPSCR. These
1636     * bits are set on various arithimetic instructions.  This means this
1637     * test generates different FPSCR output for pre ISA 3.0 versus ISA 3.0
1638     * hardware.  The tests have been grouped so that the tests that generate
1639     * different results are in one test and the rest are in a different test.
1640     * this minimizes the size of the result expect files for the two cases.
1641     */
1642
1643    while ((c = getopt(argc, argv, "doAh")) != -1) {
1644       switch (c) {
1645       case 'd':
1646         test_run_mask |= SCALAR_DIV_INST;
1647          break;
1648       case 'o':
1649         test_run_mask |= OTHER_INST;
1650          break;
1651       case 'A':
1652         test_run_mask = 0xFFFF;
1653          break;
1654       case 'h':
1655          usage();
1656          return 0;
1657
1658       default:
1659          usage();
1660          fprintf(stderr, "Unknown argument: '%c'\n", c);
1661          return 1;
1662       }
1663    }
1664
1665    while ((func = all_tests[i].test_category)) {
1666       aTest = all_tests[i];
1667
1668       if(test_run_mask & aTest.test_group) {
1669         /* Test group  specified on command line */
1670
1671         printf( "%s\n", aTest.name );
1672         (*func)();
1673       }
1674       i++;
1675    }
1676    if (spec_fargs)
1677      free(spec_fargs);
1678    if (spec_sp_fargs)
1679      free(spec_sp_fargs);
1680
1681 #endif // HAS _VSX
1682
1683    return 0;
1684 }