none/tests/ppc32/test_isa_2_06_part2.c

   1 /*  Copyright (C) 2011 IBM
   2
   3  Author: Maynard Johnson <maynardj@us.ibm.com>
   4
   5  This program is free software; you can redistribute it and/or
   6  modify it under the terms of the GNU General Public License as
   7  published by the Free Software Foundation; either version 2 of the
   8  License, or (at your option) any later version.
   9
  10  This program is distributed in the hope that it will be useful, but
  11  WITHOUT ANY WARRANTY; without even the implied warranty of
  12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  General Public License for more details.
  14
  15  You should have received a copy of the GNU General Public License
  16  along with this program; if not, see <http://www.gnu.org/licenses/>.
  17
  18  The GNU General Public License is contained in the file COPYING.
  19  */
  20
  21 #include <stdio.h>
  22 #include <stdint.h>
  23 #include <stdlib.h>
  24 #include <string.h>
  25 #include <malloc.h>
  26 #include <math.h>
  27 #include <unistd.h>    // getopt
  28
  29 #ifdef HAS_VSX
  30
  31 #include <altivec.h>
  32
  33 #ifndef __powerpc64__
  34 typedef uint32_t HWord_t;
  35 #else
  36 typedef uint64_t HWord_t;
  37 #endif /* __powerpc64__ */
  38
  39 typedef unsigned char Bool;
  40 #define True 1
  41 #define False 0
  42
  43 #ifdef VGP_ppc64le_linux
  44 #define isLE 1
  45 #else
  46 #define isLE 0
  47 #endif
  48
  49 register HWord_t r14 __asm__ ("r14");
  50 register HWord_t r15 __asm__ ("r15");
  51 register HWord_t r16 __asm__ ("r16");
  52 register HWord_t r17 __asm__ ("r17");
  53 register double f14 __asm__ ("fr14");
  54 register double f15 __asm__ ("fr15");
  55 register double f16 __asm__ ("fr16");
  56 register double f17 __asm__ ("fr17");
  57
  58 static volatile unsigned int div_flags, div_xer;
  59
  60 #define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"
  61
  62 #define SET_CR(_arg) \
  63       __asm__ __volatile__ ("mtcr  %0" : : "b"(_arg) : ALLCR );
  64
  65 #define SET_XER(_arg) \
  66       __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );
  67
  68 #define GET_CR(_lval) \
  69       __asm__ __volatile__ ("mfcr %0"  : "=b"(_lval) )
  70
  71 #define GET_XER(_lval) \
  72       __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )
  73
  74 #define GET_CR_XER(_lval_cr,_lval_xer) \
  75    do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)
  76
  77 #define SET_CR_ZERO \
  78       SET_CR(0)
  79
  80 #define SET_XER_ZERO \
  81       SET_XER(0)
  82
  83 #define SET_CR_XER_ZERO \
  84    do { SET_CR_ZERO; SET_XER_ZERO; } while (0)
  85
  86 #define SET_FPSCR_ZERO \
  87    do { double _d = 0.0; \
  88         __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
  89    } while (0)
  90
  91
  92 typedef void (*test_func_t)(void);
  93 typedef struct test_table test_table_t;
  94
  95 /* Defines for the instructiion groups, use bit field to identify */
  96 #define SCALAR_DIV_INST    0x0001
  97 #define OTHER_INST  0x0002
  98
  99 /* These functions below that construct a table of floating point
 100  * values were lifted from none/tests/ppc32/jm-insns.c.
 101  */
 102
 103 #if defined (DEBUG_ARGS_BUILD)
 104 #define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
 105 #else
 106 #define AB_DPRINTF(fmt, args...) do { } while (0)
 107 #endif
 108
 109 static inline void register_farg (void *farg,
 110                                   int s, uint16_t _exp, uint64_t mant)
 111 {
 112    uint64_t tmp;
 113
 114    tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant;
 115    *(uint64_t *)farg = tmp;
 116    AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
 117               s, _exp, mant, *(uint64_t *)farg, *(double *)farg);
 118 }
 119
 120 static inline void register_sp_farg (void *farg,
 121                                      int s, uint16_t _exp, uint32_t mant)
 122 {
 123    uint32_t tmp;
 124    tmp = ((uint32_t)s << 31) | ((uint32_t)_exp << 23) | mant;
 125    *(uint32_t *)farg = tmp;
 126 }
 127
 128 typedef struct fp_test_args {
 129    int fra_idx;
 130    int frb_idx;
 131 } fp_test_args_t;
 132
 133
 134 fp_test_args_t fp_cmp_tests[] = {
 135                                    {8, 8},
 136                                    {8, 14},
 137                                    {8, 6},
 138                                    {8, 5},
 139                                    {8, 4},
 140                                    {8, 7},
 141                                    {8, 9},
 142                                    {8, 11},
 143                                    {14, 8},
 144                                    {14, 14},
 145                                    {14, 6},
 146                                    {14, 5},
 147                                    {14, 4},
 148                                    {14, 7},
 149                                    {14, 9},
 150                                    {14, 11},
 151                                    {6, 8},
 152                                    {6, 14},
 153                                    {6, 6},
 154                                    {6, 5},
 155                                    {6, 4},
 156                                    {6, 7},
 157                                    {6, 9},
 158                                    {6, 11},
 159                                    {5, 8},
 160                                    {5, 14},
 161                                    {5, 6},
 162                                    {5, 5},
 163                                    {5, 4},
 164                                    {5, 7},
 165                                    {5, 9},
 166                                    {5, 11},
 167                                    {4, 8},
 168                                    {4, 14},
 169                                    {4, 6},
 170                                    {4, 5},
 171                                    {4, 1},
 172                                    {4, 7},
 173                                    {4, 9},
 174                                    {4, 11},
 175                                    {7, 8},
 176                                    {7, 14},
 177                                    {7, 6},
 178                                    {7, 5},
 179                                    {7, 4},
 180                                    {7, 7},
 181                                    {7, 9},
 182                                    {7, 11},
 183                                    {10, 8},
 184                                    {10, 14},
 185                                    {10, 6},
 186                                    {10, 5},
 187                                    {10, 4},
 188                                    {10, 7},
 189                                    {10, 9},
 190                                    {10, 10},
 191                                    {12, 8},
 192                                    {12, 14},
 193                                    {12, 6},
 194                                    {12, 5},
 195                                    {1, 1},
 196                                    {2, 2},
 197                                    {3, 3},
 198                                    {4, 4},
 199 };
 200
 201
 202 fp_test_args_t two_arg_fp_tests[] = {
 203                                      {8, 8},
 204                                      {8, 14},
 205                                      {15, 16},
 206                                      {8, 5},
 207                                      {8, 4},
 208                                      {8, 7},
 209                                      {8, 9},
 210                                      {8, 11},
 211                                      {14, 8},
 212                                      {14, 14},
 213                                      {14, 6},
 214                                      {14, 5},
 215                                      {14, 4},
 216                                      {14, 7},
 217                                      {14, 9},
 218                                      {14, 11},
 219                                      {6, 8},
 220                                      {6, 14},
 221                                      {6, 6},
 222                                      {6, 5},
 223                                      {6, 4},
 224                                      {6, 7},
 225                                      {6, 9},
 226                                      {6, 11},
 227                                      {5, 8},
 228                                      {5, 14},
 229                                      {5, 6},
 230                                      {5, 5},
 231                                      {5, 4},
 232                                      {5, 7},
 233                                      {5, 9},
 234                                      {5, 11},
 235                                      {4, 8},
 236                                      {4, 14},
 237                                      {4, 6},
 238                                      {4, 5},
 239                                      {4, 1},
 240                                      {4, 7},
 241                                      {4, 9},
 242                                      {4, 11},
 243                                      {7, 8},
 244                                      {7, 14},
 245                                      {7, 6},
 246                                      {7, 5},
 247                                      {7, 4},
 248                                      {7, 7},
 249                                      {7, 9},
 250                                      {7, 11},
 251                                      {10, 8},
 252                                      {10, 14},
 253                                      {12, 6},
 254                                      {12, 5},
 255                                      {10, 4},
 256                                      {10, 7},
 257                                      {10, 9},
 258                                      {10, 11},
 259                                      {12, 8 },
 260                                      {12, 14},
 261                                      {12, 6},
 262                                      {15, 16},
 263                                      {15, 16},
 264                                      {9, 11},
 265                                      {11, 11},
 266                                      {11, 12}
 267 };
 268
 269
 270 static int nb_special_fargs;
 271 static double * spec_fargs;
 272 static float * spec_sp_fargs;
 273
 274 static void build_special_fargs_table(void)
 275 {
 276 /*
 277   Entry  Sign Exp   fraction                  Special value
 278    0      0   3fd   0x8000000000000ULL         Positive finite number
 279    1      0   404   0xf000000000000ULL         ...
 280    2      0   001   0x8000000b77501ULL         ...
 281    3      0   7fe   0x800000000051bULL         ...
 282    4      0   012   0x3214569900000ULL         ...
 283    5      0   000   0x0000000000000ULL         +0.0 (+zero)
 284    6      1   000   0x0000000000000ULL         -0.0 (-zero)
 285    7      0   7ff   0x0000000000000ULL         +infinity
 286    8      1   7ff   0x0000000000000ULL         -infinity
 287    9      0   7ff   0x7FFFFFFFFFFFFULL         +SNaN
 288    10     1   7ff   0x7FFFFFFFFFFFFULL         -SNaN
 289    11     0   7ff   0x8000000000000ULL         +QNaN
 290    12     1   7ff   0x8000000000000ULL         -QNaN
 291    13     1   000   0x8340000078000ULL         Denormalized val (zero exp and non-zero fraction)
 292    14     1   40d   0x0650f5a07b353ULL         Negative finite number
 293    15     0   412   0x32585a9900000ULL         A couple more positive finite numbers
 294    16     0   413   0x82511a2000000ULL         ...
 295 */
 296
 297    uint64_t mant;
 298    uint32_t mant_sp;
 299    uint16_t _exp;
 300    int s;
 301    int j, i = 0;
 302
 303    if (spec_fargs)
 304       return;
 305
 306    spec_fargs = malloc( 17 * sizeof(double) );
 307    spec_sp_fargs = malloc( 17 * sizeof(float) );
 308
 309    // #0
 310    s = 0;
 311    _exp = 0x3fd;
 312    mant = 0x8000000000000ULL;
 313    register_farg(&spec_fargs[i++], s, _exp, mant);
 314
 315    // #1
 316    s = 0;
 317    _exp = 0x404;
 318    mant = 0xf000000000000ULL;
 319    register_farg(&spec_fargs[i++], s, _exp, mant);
 320
 321    /* None of the ftdiv tests succeed.
 322     * FRA = value #0; FRB = value #1
 323     * ea_ = -2; e_b = 5
 324     * fl_flag || fg_flag || fe_flag = 100
 325     */
 326
 327    /*************************************************
 328     *     fe_flag tests
 329     *
 330     *************************************************/
 331
 332    /* fe_flag <- 1 if FRA is a NaN
 333     * FRA = value #9; FRB = value #1
 334     * e_a = 1024; e_b = 5
 335     * fl_flag || fg_flag || fe_flag = 101
 336     */
 337
 338    /* fe_flag <- 1 if FRB is a NaN
 339     * FRA = value #1; FRB = value #12
 340     * e_a = 5; e_b = 1024
 341     * fl_flag || fg_flag || fe_flag = 101
 342     */
 343
 344    /* fe_flag <- 1 if e_b <= -1022
 345     * FRA = value #0; FRB = value #2
 346     * e_a = -2; e_b = -1022
 347     * fl_flag || fg_flag || fe_flag = 101
 348     *
 349     */
 350    // #2
 351    s = 0;
 352    _exp = 0x001;
 353    mant = 0x8000000b77501ULL;
 354    register_farg(&spec_fargs[i++], s, _exp, mant);
 355
 356    /* fe_flag <- 1 if e_b >= 1021
 357     * FRA = value #1; FRB = value #3
 358     * e_a = 5; e_b = 1023
 359     * fl_flag || fg_flag || fe_flag = 101
 360     */
 361    // #3
 362    s = 0;
 363    _exp = 0x7fe;
 364    mant = 0x800000000051bULL;
 365    register_farg(&spec_fargs[i++], s, _exp, mant);
 366
 367    /* fe_flag <- 1 if FRA != 0 && e_a - e_b >= 1023
 368     * Let FRA = value #3 and FRB be value #0.
 369     * e_a = 1023; e_b = -2
 370     * fl_flag || fg_flag || fe_flag = 101
 371     */
 372
 373    /* fe_flag <- 1 if FRA != 0 && e_a - e_b <= -1023
 374     * Let FRA = value #0 above and FRB be value #3 above
 375     * e_a = -2; e_b = 1023
 376     * fl_flag || fg_flag || fe_flag = 101
 377     */
 378
 379    /* fe_flag <- 1 if FRA != 0 && e_a <= -970
 380     * Let FRA = value #4 and FRB be value #0
 381     * e_a = -1005; e_b = -2
 382     * fl_flag || fg_flag || fe_flag = 101
 383    */
 384    // #4
 385    s = 0;
 386    _exp = 0x012;
 387    mant = 0x3214569900000ULL;
 388    register_farg(&spec_fargs[i++], s, _exp, mant);
 389
 390    /*************************************************
 391     *     fg_flag tests
 392     *
 393     *************************************************/
 394    /* fg_flag <- 1 if FRA is an Infinity
 395     * NOTE: FRA = Inf also sets fe_flag
 396     * Do two tests, using values #7 and #8 (+/- Inf) for FRA.
 397     * Test 1:
 398     *   Let FRA be value #7 and FRB be value #1
 399     *   e_a = 1024; e_b = 5
 400     *   fl_flag || fg_flag || fe_flag = 111
 401     *
 402     * Test 2:
 403     *   Let FRA be value #8 and FRB be value #1
 404     *   e_a = 1024; e_b = 5
 405     *   fl_flag || fg_flag || fe_flag = 111
 406     *
 407     */
 408
 409    /* fg_flag <- 1 if FRB is an Infinity
 410     * NOTE: FRB = Inf also sets fe_flag
 411     * Let FRA be value #1 and FRB be value #7
 412     * e_a = 5; e_b = 1024
 413     * fl_flag || fg_flag || fe_flag = 111
 414     */
 415
 416    /* fg_flag <- 1 if FRB is denormalized
 417     * NOTE: e_b < -1022 ==> fe_flag <- 1
 418     * Let FRA be value #0 and FRB be value #13
 419     * e_a = -2; e_b = -1023
 420     * fl_flag || fg_flag || fe_flag = 111
 421     */
 422
 423    /* fg_flag <- 1 if FRB is +zero
 424     * NOTE: FRA = Inf also sets fe_flag
 425     * Let FRA = val #5; FRB = val #5
 426     * ea_ = -1023; e_b = -1023
 427     * fl_flag || fg_flag || fe_flag = 111
 428     */
 429
 430    /* fg_flag <- 1 if FRB is -zero
 431     * NOTE: FRA = Inf also sets fe_flag
 432     * Let FRA = val #5; FRB = val #6
 433     * ea_ = -1023; e_b = -1023
 434     * fl_flag || fg_flag || fe_flag = 111
 435     */
 436
 437    /* Special values */
 438    /* +0.0      : 0 0x000 0x0000000000000 */
 439    // #5
 440    s = 0;
 441    _exp = 0x000;
 442    mant = 0x0000000000000ULL;
 443    register_farg(&spec_fargs[i++], s, _exp, mant);
 444
 445    /* -0.0      : 1 0x000 0x0000000000000 */
 446    // #6
 447    s = 1;
 448    _exp = 0x000;
 449    mant = 0x0000000000000ULL;
 450    register_farg(&spec_fargs[i++], s, _exp, mant);
 451
 452    /* +infinity : 0 0x7FF 0x0000000000000  */
 453    // #7
 454    s = 0;
 455    _exp = 0x7FF;
 456    mant = 0x0000000000000ULL;
 457    register_farg(&spec_fargs[i++], s, _exp, mant);
 458
 459    /* -infinity : 1 0x7FF 0x0000000000000 */
 460    // #8
 461    s = 1;
 462    _exp = 0x7FF;
 463    mant = 0x0000000000000ULL;
 464    register_farg(&spec_fargs[i++], s, _exp, mant);
 465
 466    /*
 467     * This comment applies to values #9 and #10 below:
 468     * When src is a SNaN, it's converted to a QNaN first before rounding to single-precision,
 469     * so we can't just copy the double-precision value to the corresponding slot in the
 470     * single-precision array (i.e., in the loop at the end of this function).  Instead, we
 471     * have to manually set the bits using register_sp_farg().
 472     */
 473
 474    /* +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF */
 475    // #9
 476    s = 0;
 477    _exp = 0x7FF;
 478    mant = 0x7FFFFFFFFFFFFULL;
 479    register_farg(&spec_fargs[i++], s, _exp, mant);
 480    _exp = 0xff;
 481    mant_sp = 0x3FFFFF;
 482    register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
 483
 484    /* -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF */
 485    // #10
 486    s = 1;
 487    _exp = 0x7FF;
 488    mant = 0x7FFFFFFFFFFFFULL;
 489    register_farg(&spec_fargs[i++], s, _exp, mant);
 490    _exp = 0xff;
 491    mant_sp = 0x3FFFFF;
 492    register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
 493
 494    /* +QNaN     : 0 0x7FF 0x8000000000000 */
 495    // #11
 496    s = 0;
 497    _exp = 0x7FF;
 498    mant = 0x8000000000000ULL;
 499    register_farg(&spec_fargs[i++], s, _exp, mant);
 500
 501    /* -QNaN     : 1 0x7FF 0x8000000000000 */
 502    // #12
 503    s = 1;
 504    _exp = 0x7FF;
 505    mant = 0x8000000000000ULL;
 506    register_farg(&spec_fargs[i++], s, _exp, mant);
 507
 508    /* denormalized value */
 509    // #13
 510    s = 1;
 511    _exp = 0x000;
 512    mant = 0x8340000078000ULL;
 513    register_farg(&spec_fargs[i++], s, _exp, mant);
 514
 515    /* Negative finite number */
 516    // #14
 517    s = 1;
 518    _exp = 0x40d;
 519    mant = 0x0650f5a07b353ULL;
 520    register_farg(&spec_fargs[i++], s, _exp, mant);
 521
 522    /* A couple positive finite numbers ... */
 523    // #15
 524    s = 0;
 525    _exp = 0x412;
 526    mant = 0x32585a9900000ULL;
 527    register_farg(&spec_fargs[i++], s, _exp, mant);
 528
 529    // #16
 530    s = 0;
 531    _exp = 0x413;
 532    mant = 0x82511a2000000ULL;
 533    register_farg(&spec_fargs[i++], s, _exp, mant);
 534
 535    nb_special_fargs = i;
 536    for (j = 0; j < i; j++) {
 537       if (!(j == 9 || j == 10))
 538          spec_sp_fargs[j] = spec_fargs[j];
 539    }
 540 }
 541
 542
 543 struct test_table
 544 {
 545    test_func_t test_category;
 546    char * name;
 547    unsigned int test_group;
 548 };
 549
 550 typedef enum {
 551    SINGLE_TEST,
 552    DOUBLE_TEST
 553 } precision_type_t;
 554
 555 typedef enum {
 556    VX_SCALAR_FP_NMSUB = 0,
 557    // ALL VECTOR-TYPE OPS SHOULD BE ADDED AFTER THIS LINE
 558    VX_VECTOR_FP_MULT_AND_OP2 = 10,
 559    // and before this line
 560    VX_BASIC_CMP = 30,
 561    VX_CONV_WORD,
 562    VX_DEFAULT
 563 } vx_fp_test_type;
 564
 565 typedef struct vx_fp_test
 566 {
 567    test_func_t test_func;
 568    const char * name;
 569    fp_test_args_t * targs;
 570    int num_tests;
 571    precision_type_t precision;
 572    vx_fp_test_type type;
 573    const char * op;
 574 } vx_fp_test_t;
 575
 576 static vector unsigned int vec_out, vec_inA, vec_inB, vec_inC;
 577
 578 static Bool do_dot;
 579 static void test_xvcmpeqdp(void)
 580 {
 581    if (do_dot)
 582       __asm__ __volatile__ ("xvcmpeqdp.          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 583    else
 584       __asm__ __volatile__ ("xvcmpeqdp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 585 }
 586
 587 static void test_xvcmpgedp(void)
 588 {
 589    if (do_dot)
 590       __asm__ __volatile__ ("xvcmpgedp.          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 591    else
 592       __asm__ __volatile__ ("xvcmpgedp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 593 }
 594
 595 static void test_xvcmpgtdp(void)
 596 {
 597    if (do_dot)
 598       __asm__ __volatile__ ("xvcmpgtdp.          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 599    else
 600       __asm__ __volatile__ ("xvcmpgtdp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 601 }
 602
 603 static void test_xvcmpeqsp(void)
 604 {
 605    if (do_dot)
 606       __asm__ __volatile__ ("xvcmpeqsp.          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 607    else
 608       __asm__ __volatile__ ("xvcmpeqsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 609 }
 610
 611 static void test_xvcmpgesp(void)
 612 {
 613    if (do_dot)
 614       __asm__ __volatile__ ("xvcmpgesp.          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 615    else
 616       __asm__ __volatile__ ("xvcmpgesp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 617 }
 618
 619 static void test_xvcmpgtsp(void)
 620 {
 621    if (do_dot)
 622       __asm__ __volatile__ ("xvcmpgtsp.          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 623    else
 624       __asm__ __volatile__ ("xvcmpgtsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 625 }
 626
 627 static Bool do_aXp;
 628 static Bool do_dp;
 629 static void test_xsnmsub(void)
 630 {
 631    if (do_aXp)
 632       __asm__ __volatile__ ("xsnmsubadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 633    else
 634       __asm__ __volatile__ ("xsnmsubmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 635 }
 636
 637 static void test_xvmadd(void)
 638 {
 639    if (do_aXp)
 640       if (do_dp)
 641          __asm__ __volatile__ ("xvmaddadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 642       else
 643          __asm__ __volatile__ ("xvmaddasp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 644    else
 645       if (do_dp)
 646          __asm__ __volatile__ ("xvmaddmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 647       else
 648          __asm__ __volatile__ ("xvmaddmsp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 649 }
 650
 651 static void test_xvnmadd(void)
 652 {
 653    if (do_aXp)
 654       if (do_dp)
 655          __asm__ __volatile__ ("xvnmaddadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 656       else
 657          __asm__ __volatile__ ("xvnmaddasp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 658    else
 659       if (do_dp)
 660          __asm__ __volatile__ ("xvnmaddmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 661       else
 662          __asm__ __volatile__ ("xvnmaddmsp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 663 }
 664
 665 static void test_xvnmsub(void)
 666 {
 667    if (do_aXp)
 668       if (do_dp)
 669          __asm__ __volatile__ ("xvnmsubadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 670       else
 671          __asm__ __volatile__ ("xvnmsubasp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 672    else
 673       if (do_dp)
 674          __asm__ __volatile__ ("xvnmsubmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 675       else
 676          __asm__ __volatile__ ("xvnmsubmsp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 677 }
 678
 679 static void test_xvmsub(void)
 680 {
 681    if (do_aXp)
 682       if (do_dp)
 683          __asm__ __volatile__ ("xvmsubadp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 684       else
 685          __asm__ __volatile__ ("xvmsubasp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 686    else
 687       if (do_dp)
 688          __asm__ __volatile__ ("xvmsubmdp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 689       else
 690          __asm__ __volatile__ ("xvmsubmsp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 691 }
 692
 693 static void test_xssqrtdp(void)
 694 {
 695    __asm__ __volatile__ ("xssqrtdp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 696 }
 697
 698 static void test_xsrdpim(void)
 699 {
 700    __asm__ __volatile__ ("xsrdpim   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 701 }
 702
 703 static void test_xsrdpip(void)
 704 {
 705    __asm__ __volatile__ ("xsrdpip   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 706 }
 707
 708 static void test_xstdivdp(void)
 709 {
 710    __asm__ __volatile__ ("xstdivdp   6, %x0, %x1" : : "wa" (vec_inA), "wa" (vec_inB));
 711 }
 712
 713 static void test_xsmaxdp(void)
 714 {
 715    __asm__ __volatile__ ("xsmaxdp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 716 }
 717
 718 static void test_xsmindp(void)
 719 {
 720    __asm__ __volatile__ ("xsmindp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 721 }
 722
 723 static void test_xvadddp(void)
 724 {
 725    __asm__ __volatile__ ("xvadddp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 726 }
 727
 728 static void test_xvaddsp(void)
 729 {
 730    __asm__ __volatile__ ("xvaddsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 731 }
 732
 733 static void test_xvdivdp(void)
 734 {
 735    __asm__ __volatile__ ("xvdivdp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 736 }
 737
 738 static void test_xvdivsp(void)
 739 {
 740    __asm__ __volatile__ ("xvdivsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 741 }
 742
 743 static void test_xvmuldp(void)
 744 {
 745    __asm__ __volatile__ ("xvmuldp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 746 }
 747
 748 static void test_xvmulsp(void)
 749 {
 750    __asm__ __volatile__ ("xvmulsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 751 }
 752
 753 static void test_xvsubdp(void)
 754 {
 755    __asm__ __volatile__ ("xvsubdp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 756 }
 757
 758 static void test_xvmaxdp(void)
 759 {
 760    __asm__ __volatile__ ("xvmaxdp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 761 }
 762
 763 static void test_xvmindp(void)
 764 {
 765    __asm__ __volatile__ ("xvmindp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 766 }
 767
 768 static void test_xvmaxsp(void)
 769 {
 770    __asm__ __volatile__ ("xvmaxsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 771 }
 772
 773 static void test_xvminsp(void)
 774 {
 775    __asm__ __volatile__ ("xvminsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 776 }
 777
 778 static void test_xvsubsp(void)
 779 {
 780    __asm__ __volatile__ ("xvsubsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 781 }
 782
 783 static void test_xvresp(void)
 784 {
 785    __asm__ __volatile__ ("xvresp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 786 }
 787
 788 static void test_xxsel(void)
 789 {
 790    unsigned long long * dst;
 791    unsigned long long xa[] =  { 0xa12bc37de56f9708ULL, 0x3894c1fddeadbeefULL};
 792    unsigned long long xb[] =  { 0xfedc432124681235ULL, 0xf1e2d3c4e0057708ULL};
 793    unsigned long long xc[] =  { 0xffffffff01020304ULL, 0x128934bd00000000ULL};
 794
 795    memcpy(&vec_inA, xa, 16);
 796    memcpy(&vec_inB, xb, 16);
 797    memcpy(&vec_inC, xc, 16);
 798
 799
 800    __asm__ __volatile__ ("xxsel   %x0, %x1, %x2, %x3" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB),"wa" (vec_inC));
 801    dst = (unsigned long long *) &vec_out;
 802    printf("xxsel %016llx,%016llx,%016llx => %016llx\n", xa[0], xb[0], xc[0], *dst);
 803    dst++;
 804    printf("xxsel %016llx,%016llx,%016llx => %016llx\n", xa[1], xb[1], xc[1], *dst);
 805    printf("\n");
 806 }
 807
 808 static void test_xxspltw(void)
 809 {
 810    int uim;
 811    unsigned long long * dst = NULL;
 812    unsigned int xb[] =  { 0xfedc4321, 0x24681235, 0xf1e2d3c4, 0xe0057708};
 813    int i;
 814    void * vecB_ptr = &vec_inB;
 815    if (isLE) {
 816       for (i = 3; i >=0; i--) {
 817          memcpy(vecB_ptr, &xb[i], 4);
 818          vecB_ptr+=4;
 819       }
 820    } else {
 821       for (i = 0; i < 4; i++) {
 822          memcpy(vecB_ptr, &xb[i], 4);
 823          vecB_ptr+=4;
 824       }
 825    }
 826
 827    for (uim = 0; uim < 4; uim++) {
 828       switch (uim) {
 829          case 0:
 830             __asm__ __volatile__ ("xxspltw   %x0, %x1, 0" : "=wa" (vec_out): "wa" (vec_inB));
 831             break;
 832          case 1:
 833             __asm__ __volatile__ ("xxspltw   %x0, %x1, 1" : "=wa" (vec_out): "wa" (vec_inB));
 834             break;
 835          case 2:
 836             __asm__ __volatile__ ("xxspltw   %x0, %x1, 2" : "=wa" (vec_out): "wa" (vec_inB));
 837             break;
 838          case 3:
 839             __asm__ __volatile__ ("xxspltw   %x0, %x1, 3" : "=wa" (vec_out): "wa" (vec_inB));
 840             break;
 841       }
 842       dst = (unsigned long long *) &vec_out;
 843       printf("xxspltw 0x%08x%08x%08x%08x %d=> 0x%016llx",  xb[0], xb[1],
 844              xb[2], xb[3], uim, *dst);
 845       dst++;
 846       printf("%016llx\n", *dst);
 847    }
 848    printf("\n");
 849 }
 850
 851 static void test_xscvdpsxws(void)
 852 {
 853    __asm__ __volatile__ ("xscvdpsxws  %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 854 }
 855
 856 static void test_xscvdpuxds(void)
 857 {
 858    __asm__ __volatile__ ("xscvdpuxds  %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 859 }
 860
 861 static void test_xvcpsgndp(void)
 862 {
 863    __asm__ __volatile__  ("xvcpsgndp  %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 864 }
 865
 866 static void test_xvcpsgnsp(void)
 867 {
 868    __asm__ __volatile__  ("xvcpsgnsp  %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 869 }
 870
 871 static void test_xvcvdpsxws(void)
 872 {
 873    __asm__ __volatile__ ("xvcvdpsxws  %x0, %x1 " : "=wa" (vec_out): "wa" (vec_inB));
 874 }
 875
 876 static void test_xvcvspsxws(void)
 877 {
 878    __asm__ __volatile__ ("xvcvspsxws  %x0, %x1 " : "=wa" (vec_out): "wa" (vec_inB));
 879 }
 880
 881 static vx_fp_test_t
 882 vx_vector_one_fp_arg_tests[] = {
 883                                 { &test_xvresp, "xvresp", NULL, 16, SINGLE_TEST, VX_BASIC_CMP, "1/x"},
 884                                 { &test_xvcvdpsxws, "xvcvdpsxws", NULL, 16, DOUBLE_TEST, VX_CONV_WORD, "conv"},
 885                                 { &test_xvcvspsxws, "xvcvspsxws", NULL, 16, SINGLE_TEST, VX_CONV_WORD, "conv"},
 886                                 { NULL, NULL, NULL, 0 , 0, 0, NULL}
 887 };
 888
 889 static vx_fp_test_t
 890 vx_vector_fp_tests[] = {
 891                         { &test_xvcmpeqdp, "xvcmpeqdp", fp_cmp_tests, 64, DOUBLE_TEST, VX_BASIC_CMP, "eq"},
 892                         { &test_xvcmpgedp, "xvcmpgedp", fp_cmp_tests, 64, DOUBLE_TEST, VX_BASIC_CMP, "ge"},
 893                         { &test_xvcmpgtdp, "xvcmpgtdp", fp_cmp_tests, 64, DOUBLE_TEST, VX_BASIC_CMP, "gt"},
 894                         { &test_xvcmpeqsp, "xvcmpeqsp", fp_cmp_tests, 64, SINGLE_TEST, VX_BASIC_CMP, "eq"},
 895                         { &test_xvcmpgesp, "xvcmpgesp", fp_cmp_tests, 64, SINGLE_TEST, VX_BASIC_CMP, "ge"},
 896                         { &test_xvcmpgtsp, "xvcmpgtsp", fp_cmp_tests, 64, SINGLE_TEST, VX_BASIC_CMP, "gt"},
 897                         { &test_xvadddp, "xvadddp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "+" },
 898                         { &test_xvaddsp, "xvaddsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "+" },
 899                         { &test_xvdivdp, "xvdivdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "/" },
 900                         { &test_xvdivsp, "xvdivsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "/" },
 901                         { &test_xvmuldp, "xvmuldp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "*" },
 902                         { &test_xvmulsp, "xvmulsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "*" },
 903                         { &test_xvsubdp, "xvsubdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "-" },
 904                         { &test_xvsubsp, "xvsubsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "-" },
 905                         { &test_xvmaxdp, "xvmaxdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "@max@" },
 906                         { &test_xvmindp, "xvmindp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "@min@" },
 907                         { &test_xvmaxsp, "xvmaxsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "@max@" },
 908                         { &test_xvminsp, "xvminsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "@min@" },
 909                         { &test_xvcpsgndp, "xvcpsgndp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, "+-cp"},
 910                         { &test_xvcpsgnsp, "xvcpsgnsp", two_arg_fp_tests, 64, SINGLE_TEST, VX_DEFAULT, "+-cp"},
 911                         { NULL, NULL, NULL, 0 , 0, 0, NULL}
 912 };
 913
 914
 915 static vx_fp_test_t
 916 vx_aORm_fp_tests[] = {
 917                        { &test_xsnmsub, "xsnmsub", two_arg_fp_tests, 64, DOUBLE_TEST, VX_SCALAR_FP_NMSUB, "!*-"},
 918                        { &test_xvmadd, "xvmadd", two_arg_fp_tests, 64, DOUBLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "*+"},
 919                        { &test_xvmadd, "xvmadd", two_arg_fp_tests, 64, SINGLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "*+"},
 920                        { &test_xvnmadd, "xvnmadd", two_arg_fp_tests, 64, DOUBLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "!*+"},
 921                        { &test_xvnmadd, "xvnmadd", two_arg_fp_tests, 64, SINGLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "!*+"},
 922                        { &test_xvmsub, "xvmsub", two_arg_fp_tests, 64, DOUBLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "*-"},
 923                        { &test_xvmsub, "xvmsub", two_arg_fp_tests, 64, SINGLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "*-"},
 924                        { &test_xvnmsub, "xvnmsub", two_arg_fp_tests, 64, DOUBLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "!*-"},
 925                        { &test_xvnmsub, "xvnmsub", two_arg_fp_tests, 64, SINGLE_TEST, VX_VECTOR_FP_MULT_AND_OP2, "!*-"},
 926                        { NULL, NULL, NULL, 0, 0, 0,  NULL }
 927 };
 928
 929 static vx_fp_test_t
 930 vx_simple_scalar_fp_tests[] = {
 931                                { &test_xssqrtdp, "xssqrtdp", NULL, 17, DOUBLE_TEST, VX_DEFAULT, NULL},
 932                                { &test_xsrdpim, "xsrdpim", NULL, 17, DOUBLE_TEST, VX_DEFAULT, NULL},
 933                                { &test_xsrdpip, "xsrdpip", NULL, 17, DOUBLE_TEST, VX_DEFAULT, NULL},
 934                                { &test_xstdivdp, "xstdivdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, NULL},
 935                                { &test_xsmaxdp, "xsmaxdp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, NULL},
 936                                { &test_xsmindp, "xsmindp", two_arg_fp_tests, 64, DOUBLE_TEST, VX_DEFAULT, NULL},
 937                                { &test_xscvdpsxws, "xscvdpsxws", NULL, 17, DOUBLE_TEST, VX_CONV_WORD, NULL},
 938                                { &test_xscvdpuxds, "xscvdpuxds", NULL, 17, DOUBLE_TEST, VX_DEFAULT, NULL},
 939                                { NULL, NULL, NULL, 0, 0, 0, NULL }
 940 };
 941
 942
 943 #ifdef __powerpc64__
 944 static void test_bpermd(void)
 945 {
 946    /* NOTE: Bit number is '0 . . . 63'
 947     *
 948     * Permuted bits are generated bit 0 -7 as follows:
 949     *    index = (r14)8*i:8*i+7
 950     *    perm[i] = (r15)index
 951     *
 952     * So, for i = 0, index is (r14)8*0:8*0+7, or (r14)0:7, which is the MSB
 953     * byte of r14, 0x1b(27/base 10).  This identifies bit 27 of r15, which is '1'.
 954     * For i = 1, index is 0x2c, identifying bit 44 of r15, which is '1'.
 955     * So the result of the first two iterations of i are:
 956     *   perm = 0b01xxxxxx
 957     *
 958     */
 959    r15 = 0xa12bc37de56f9708ULL;
 960    r14 = 0x1b2c31f030000001ULL;
 961    __asm__ __volatile__ ("bpermd %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
 962    printf("bpermd: 0x%016llx : 0x%016llx => 0x%llx\n", (unsigned long long)r14,
 963           (unsigned long long)r15, (unsigned long long)r17);
 964    printf("\n");
 965 }
 966 #endif
 967
 968 static Bool do_OE;
 969 typedef enum {
 970    DIV_BASE = 1,
 971    DIV_OE = 2,
 972    DIV_DOT = 4,
 973 } div_type_t;
 974 /* Possible divde type combinations are:
 975  *   - base
 976  *   - base+dot
 977  *   - base+OE
 978  *   - base+OE+dot
 979  */
 980 #ifdef __powerpc64__
 981 static void test_divde(void)
 982 {
 983    int divde_type = DIV_BASE;
 984    if (do_OE)
 985       divde_type |= DIV_OE;
 986    if (do_dot)
 987       divde_type |= DIV_DOT;
 988
 989    switch (divde_type) {
 990       case 1:
 991         SET_CR_XER_ZERO;
 992          __asm__ __volatile__ ("divde %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
 993          GET_CR_XER(div_flags, div_xer);
 994          break;
 995       case 3:
 996         SET_CR_XER_ZERO;
 997          __asm__ __volatile__ ("divdeo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
 998          GET_CR_XER(div_flags, div_xer);
 999          break;
1000       case 5:
1001         SET_CR_XER_ZERO;
1002          __asm__ __volatile__ ("divde. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
1003          GET_CR_XER(div_flags, div_xer);
1004          break;
1005       case 7:
1006         SET_CR_XER_ZERO;
1007          __asm__ __volatile__ ("divdeo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
1008          GET_CR_XER(div_flags, div_xer);
1009          break;
1010       default:
1011          fprintf(stderr, "Invalid divde type. Exiting\n");
1012          exit(1);
1013    }
1014 }
1015 #endif
1016
1017 static void test_divweu(void)
1018 {
1019    int divweu_type = DIV_BASE;
1020    if (do_OE)
1021       divweu_type |= DIV_OE;
1022    if (do_dot)
1023       divweu_type |= DIV_DOT;
1024
1025    switch (divweu_type) {
1026       case 1:
1027         SET_CR_XER_ZERO;
1028          __asm__ __volatile__ ("divweu %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
1029          GET_CR_XER(div_flags, div_xer);
1030          break;
1031       case 3:
1032         SET_CR_XER_ZERO;
1033          __asm__ __volatile__ ("divweuo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
1034          GET_CR_XER(div_flags, div_xer);
1035          break;
1036       case 5:
1037         SET_CR_XER_ZERO;
1038          __asm__ __volatile__ ("divweu. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
1039          GET_CR_XER(div_flags, div_xer);
1040          break;
1041       case 7:
1042         SET_CR_XER_ZERO;
1043          __asm__ __volatile__ ("divweuo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
1044          GET_CR_XER(div_flags, div_xer);
1045          break;
1046       default:
1047          fprintf(stderr, "Invalid divweu type. Exiting\n");
1048          exit(1);
1049    }
1050 }
1051
1052 static void test_fctiduz(void)
1053 {
1054    if (do_dot)
1055       __asm__ __volatile__ ("fctiduz. %0, %1" : "=d" (f17) : "d" (f14));
1056    else
1057       __asm__ __volatile__ ("fctiduz %0, %1" : "=d" (f17) : "d" (f14));
1058 }
1059
1060 static void test_fctidu(void)
1061 {
1062    if (do_dot)
1063       __asm__ __volatile__ ("fctidu. %0, %1" : "=d" (f17) : "d" (f14));
1064    else
1065       __asm__ __volatile__ ("fctidu %0, %1" : "=d" (f17) : "d" (f14));
1066 }
1067
1068 static void test_fctiwuz(void)
1069 {
1070    if (do_dot)
1071       __asm__ __volatile__ ("fctiwuz. %0, %1" : "=d" (f17) : "d" (f14));
1072    else
1073       __asm__ __volatile__ ("fctiwuz %0, %1" : "=d" (f17) : "d" (f14));
1074 }
1075
1076 static void test_fctiwu(void)
1077 {
1078    if (do_dot)
1079       __asm__ __volatile__ ("fctiwu. %0, %1" : "=d" (f17) : "d" (f14));
1080    else
1081       __asm__ __volatile__ ("fctiwu %0, %1" : "=d" (f17) : "d" (f14));
1082 }
1083
1084 typedef struct simple_test {
1085    test_func_t test_func;
1086    char * name;
1087    precision_type_t precision;
1088 } simple_test_t;
1089
1090 static simple_test_t fct_tests[] = {
1091                                     { &test_fctiduz, "fctiduz", DOUBLE_TEST },
1092                                     { &test_fctidu, "fctidu", DOUBLE_TEST },
1093                                     { &test_fctiwuz, "fctiwuz", SINGLE_TEST },
1094                                     { &test_fctiwu, "fctiwu", SINGLE_TEST },
1095                                    { NULL, NULL }
1096 };
1097
1098 static void setup_sp_fp_args(fp_test_args_t * targs, Bool swap_inputs)
1099 {
1100    int a_idx, b_idx, i;
1101    void * inA, * inB;
1102    void * vec_src = swap_inputs ? &vec_out : &vec_inB;
1103
1104    for (i = 0; i < 4; i++) {
1105       a_idx = targs->fra_idx;
1106       b_idx = targs->frb_idx;
1107       inA = (void *)&spec_sp_fargs[a_idx];
1108       inB = (void *)&spec_sp_fargs[b_idx];
1109       // copy single precision FP  into vector element i
1110       memcpy(((void *)&vec_inA) + (i * 4), inA, 4);
1111       memcpy(vec_src + (i * 4), inB, 4);
1112       targs++;
1113    }
1114 }
1115
1116 static void setup_dp_fp_args(fp_test_args_t * targs, Bool swap_inputs)
1117 {
1118    int a_idx, b_idx, i;
1119    void * inA, * inB;
1120    void * vec_src = swap_inputs ? (void *)&vec_out : (void *)&vec_inB;
1121
1122    for (i = 0; i < 2; i++) {
1123       a_idx = targs->fra_idx;
1124       b_idx = targs->frb_idx;
1125       inA = (void *)&spec_fargs[a_idx];
1126       inB = (void *)&spec_fargs[b_idx];
1127       // copy double precision FP  into vector element i
1128       memcpy(((void *)&vec_inA) + (i * 8), inA, 8);
1129       memcpy(vec_src + (i * 8), inB, 8);
1130       targs++;
1131    }
1132 }
1133
1134 #define VX_NOT_CMP_OP 0xffffffff
1135 static void print_vector_fp_result(unsigned int cc, vx_fp_test_t * test_group, int i)
1136 {
1137    int a_idx, b_idx, k;
1138    char * name = malloc(20);
1139    int dp = test_group->precision == DOUBLE_TEST ? 1 : 0;
1140    int loops = dp ? 2 : 4;
1141    fp_test_args_t * targs = &test_group->targs[i];
1142    unsigned long long * frA_dp, * frB_dp, * dst_dp;
1143    unsigned int * frA_sp, *frB_sp, * dst_sp;
1144    strcpy(name, test_group->name);
1145    printf("#%d: %s%s ", dp? i/2 : i/4, name, (do_dot ? "." : ""));
1146    for (k = 0; k < loops; k++) {
1147       a_idx = targs->fra_idx;
1148       b_idx = targs->frb_idx;
1149       if (k)
1150          printf(" AND ");
1151       if (dp) {
1152          frA_dp = (unsigned long long *)&spec_fargs[a_idx];
1153          frB_dp = (unsigned long long *)&spec_fargs[b_idx];
1154          printf("%016llx %s %016llx", *frA_dp, test_group->op, *frB_dp);
1155       } else {
1156          frA_sp = (unsigned int *)&spec_sp_fargs[a_idx];
1157          frB_sp = (unsigned int *)&spec_sp_fargs[b_idx];
1158          printf("%08x %s %08x", *frA_sp, test_group->op, *frB_sp);
1159       }
1160       targs++;
1161    }
1162    if (cc != VX_NOT_CMP_OP)
1163       printf(" ? cc=%x", cc);
1164
1165    if (dp) {
1166       dst_dp = (unsigned long long *) &vec_out;
1167       printf(" => %016llx %016llx\n", dst_dp[0], dst_dp[1]);
1168    } else {
1169       dst_sp = (unsigned int *) &vec_out;
1170       printf(" => %08x %08x %08x %08x\n", dst_sp[0], dst_sp[1], dst_sp[2], dst_sp[3]);
1171    }
1172    free(name);
1173 }
1174
1175
1176 static void print_vx_aORm_fp_result(unsigned long long * XT_arg, unsigned long long * XB_arg,
1177                                     vx_fp_test_t * test_group, int i)
1178 {
1179    int a_idx, k;
1180    char * name = malloc(20);
1181    int dp = test_group->precision == DOUBLE_TEST ? 1 : 0;
1182    int loops = dp ? 2 : 4;
1183    fp_test_args_t * targs = &test_group->targs[i];
1184    unsigned long long frA_dp, * dst_dp;
1185    unsigned int frA_sp, * dst_sp;
1186
1187    strcpy(name, test_group->name);
1188    if (do_aXp)
1189       if (dp)
1190          strcat(name, "adp");
1191       else
1192          strcat(name, "asp");
1193    else
1194       if (dp)
1195          strcat(name, "mdp");
1196       else
1197          strcat(name, "msp");
1198
1199    printf("#%d: %s ", dp? i/2 : i/4, name);
1200    for (k = 0; k < loops; k++) {
1201       a_idx = targs->fra_idx;
1202       if (k)
1203          printf(" AND ");
1204       if (dp) {
1205          frA_dp = *((unsigned long long *)&spec_fargs[a_idx]);
1206          printf("%s(%016llx,%016llx,%016llx)", test_group->op, XT_arg[k], frA_dp, XB_arg[k]);
1207       } else {
1208          unsigned int * xt_sp = (unsigned int *)XT_arg;
1209          unsigned int * xb_sp = (unsigned int *)XB_arg;
1210          frA_sp = *((unsigned int *)&spec_sp_fargs[a_idx]);
1211          printf("%s(%08x,%08x,%08x)", test_group->op, xt_sp[k], frA_sp, xb_sp[k]);
1212       }
1213       targs++;
1214    }
1215
1216    if (dp) {
1217       dst_dp = (unsigned long long *) &vec_out;
1218       printf(" => %016llx %016llx\n", dst_dp[0], dst_dp[1]);
1219    } else {
1220       dst_sp = (unsigned int *) &vec_out;
1221       printf(" => %08x %08x %08x %08x\n", dst_sp[0], dst_sp[1], dst_sp[2], dst_sp[3]);
1222    }
1223    free(name);
1224 }
1225
1226 /* This function currently only supports double precision input arguments. */
1227 static void test_vx_simple_scalar_fp_ops(void)
1228 {
1229    test_func_t func;
1230    int k = 0;
1231
1232    build_special_fargs_table();
1233    while ((func = vx_simple_scalar_fp_tests[k].test_func)) {
1234       unsigned long long * frap, * frbp, * dst;
1235       unsigned int * pv;
1236       int idx;
1237       vx_fp_test_t test_group = vx_simple_scalar_fp_tests[k];
1238       Bool convToWord = (test_group.type == VX_CONV_WORD);
1239       if (test_group.precision != DOUBLE_TEST) {
1240          fprintf(stderr, "Unsupported single precision for scalar op in test_vx_aORm_fp_ops\n");
1241          exit(1);
1242       }
1243       pv = (unsigned int *)&vec_out;
1244       // clear vec_out
1245       for (idx = 0; idx < 4; idx++, pv++)
1246          *pv = 0;
1247
1248       /* If num_tests is exactly equal to nb_special_fargs, this implies the
1249        * instruction being tested only requires one floating point argument
1250        * (e.g. xssqrtdp).
1251        */
1252       if (test_group.num_tests == nb_special_fargs && !test_group.targs) {
1253          void * inB, * vec_void_ptr = (void *)&vec_inB;
1254          int i;
1255          if (isLE)
1256             vec_void_ptr += 8;
1257          for (i = 0; i < nb_special_fargs; i++) {
1258             inB = (void *)&spec_fargs[i];
1259             frbp = (unsigned long long *)&spec_fargs[i];
1260             memcpy(vec_void_ptr, inB, 8);
1261             (*func)();
1262             dst = (unsigned long long *) &vec_out;
1263             if (isLE)
1264                dst++;
1265             printf("#%d: %s %016llx => %016llx\n", i, test_group.name, *frbp,
1266                    convToWord ? (*dst & 0x00000000ffffffffULL) : *dst);
1267          }
1268       } else {
1269          void * inA, * inB, * vecA_void_ptr, * vecB_void_ptr;
1270          unsigned int condreg, flags;
1271          int isTdiv = (strstr(test_group.name, "xstdivdp") != NULL) ? 1 : 0;
1272          int i;
1273          if (isLE) {
1274             vecA_void_ptr = (void *)&vec_inA + 8;
1275             vecB_void_ptr = (void *)&vec_inB + 8;
1276          } else {
1277             vecA_void_ptr = (void *)&vec_inA;
1278             vecB_void_ptr = (void *)&vec_inB;
1279          }
1280          for (i = 0; i < test_group.num_tests; i++) {
1281             fp_test_args_t aTest = test_group.targs[i];
1282             inA = (void *)&spec_fargs[aTest.fra_idx];
1283             inB = (void *)&spec_fargs[aTest.frb_idx];
1284             frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
1285             frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
1286             // Only need to copy one doubleword into each vector's element 0
1287             memcpy(vecA_void_ptr, inA, 8);
1288             memcpy(vecB_void_ptr, inB, 8);
1289             SET_FPSCR_ZERO;
1290             SET_CR_XER_ZERO;
1291             (*func)();
1292             GET_CR(flags);
1293             if (isTdiv) {
1294                condreg = (flags & 0x000000f0) >> 4;
1295                printf("#%d: %s %016llx,%016llx => cr %x\n", i, test_group.name, *frap, *frbp, condreg);
1296             } else {
1297                dst = (unsigned long long *) &vec_out;
1298                if (isLE)
1299                   dst++;
1300                printf("#%d: %s %016llx,%016llx => %016llx\n", i, test_group.name,
1301                       *frap, *frbp, *dst);
1302             }
1303          }
1304       }
1305       printf( "\n" );
1306       k++;
1307    }
1308 }
1309
1310 static void test_vx_aORm_fp_ops(void)
1311 {
1312    /* These ops need a third src argument, which is stored in element 0 of
1313     * VSX[XT] -- i.e., vec_out.  For the xs<ZZZ>m{d|s}p cases, VSX[XT] holds
1314     * src3 and VSX[XB] holds src2; for the xs<ZZZ>a{d|s}p cases, VSX[XT] holds
1315     * src2 and VSX[XB] holds src3.  The fp_test_args_t that holds the test
1316     * data (input args, result) contain only two inputs, so I arbitrarily
1317     * choose some spec_fargs elements for the third source argument.
1318     * Note that that by using the same input data for a given pair of
1319     * a{d|s}p/m{d|s}p-type instructions (by swapping the src2 and src3
1320     * arguments), the expected result should be the same.
1321     */
1322
1323    test_func_t func;
1324    int k;
1325    char * test_name = (char *)malloc(20);
1326    k = 0;
1327    do_dot = False;
1328
1329    build_special_fargs_table();
1330    while ((func = vx_aORm_fp_tests[k].test_func)) {
1331       int i, stride;
1332       Bool repeat = False;
1333       Bool scalar = False;
1334       unsigned long long * frap, * frbp, * dst;
1335       vx_fp_test_t test_group = vx_aORm_fp_tests[k];
1336       vx_fp_test_type test_type = test_group.type;
1337       do_dp = test_group.precision == DOUBLE_TEST ? True : False;
1338       frap = frbp = NULL;
1339
1340       if (test_type < VX_VECTOR_FP_MULT_AND_OP2) {
1341             scalar = True;
1342             strcpy(test_name, test_group.name);
1343             if (!repeat) {
1344                repeat = 1;
1345                stride = 1;
1346                // Only support double precision scalar ops in this function
1347                if (do_dp) {
1348                   strcat(test_name, "adp");
1349                } else {
1350                   fprintf(stderr, "Unsupported single precision for scalar op in test_vx_aORm_fp_ops\n");
1351                   exit(1);
1352                }
1353                do_aXp = True;
1354             }
1355       } else if (test_type < VX_BASIC_CMP) {
1356          // Then it must be a VX_VECTOR_xxx type
1357             stride = do_dp ? 2 : 4;
1358             if (!repeat) {
1359                // No need to work up the testcase name here, since that will be done in
1360                // the print_vx_aORm_fp_result() function we'll call for vector-type ops.
1361                repeat = 1;
1362                do_aXp = True;
1363             }
1364       } else {
1365             printf("ERROR:  Invalid VX FP test type %d\n", test_type);
1366             exit(1);
1367       }
1368
1369 again:
1370       for (i = 0; i < test_group.num_tests; i+=stride) {
1371          void  * inA, * inB;
1372          int m, fp_idx[4];
1373          unsigned long long vsr_XT[2];
1374          unsigned long long vsr_XB[2];
1375          fp_test_args_t aTest = test_group.targs[i];
1376          for (m = 0; m < stride; m++)
1377             fp_idx[m] = i % (nb_special_fargs - stride) + m;
1378
1379          /* When repeat == True, we're on the first time through of one of the VX_FP_SMx
1380           * test types, meaning we're testing a xs<ZZZ>adp case, thus we have to swap
1381           * inputs as described above:
1382           *    src2 <= VSX[XT]
1383           *    src3 <= VSX[XB]
1384           */
1385          if (scalar) {
1386 #ifdef VGP_ppc64le_linux
1387 #define VECTOR_ADDR(_v) ((void *)&_v) + 8
1388 #else
1389 #define VECTOR_ADDR(_v) ((void *)&_v)
1390 #endif
1391             // For scalar op, only need to copy one doubleword into each vector's element 0
1392             inA = (void *)&spec_fargs[aTest.fra_idx];
1393             inB = (void *)&spec_fargs[aTest.frb_idx];
1394             frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
1395             memcpy(VECTOR_ADDR(vec_inA), inA, 8);
1396             if (repeat) {
1397                memcpy(VECTOR_ADDR(vec_out), inB, 8);  // src2
1398                memcpy(VECTOR_ADDR(vec_inB), &spec_fargs[fp_idx[0]], 8);  //src3
1399                frbp = (unsigned long long *)&spec_fargs[fp_idx[0]];
1400             } else {
1401                frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
1402                memcpy(VECTOR_ADDR(vec_inB), inB, 8);  // src2
1403                memcpy(VECTOR_ADDR(vec_out), &spec_fargs[fp_idx[0]], 8);  //src3
1404             }
1405             memcpy(vsr_XT, VECTOR_ADDR(vec_out), 8);
1406          } else {
1407             int j, loops = do_dp ? 2 : 4;
1408             size_t len = do_dp ? 8 : 4;
1409             void * vec_src = repeat ? (void *)&vec_inB : (void *)&vec_out;
1410             for (j = 0; j < loops; j++) {
1411                if (do_dp)
1412                   memcpy(vec_src + (j * len), &spec_fargs[fp_idx[j]], len);
1413                else
1414                   memcpy(vec_src + (j * len), &spec_sp_fargs[fp_idx[j]], len);
1415             }
1416             if (do_dp)
1417                setup_dp_fp_args(&test_group.targs[i], repeat);
1418             else
1419                setup_sp_fp_args(&test_group.targs[i], repeat);
1420
1421             memcpy(vsr_XT, &vec_out, 16);
1422             memcpy(vsr_XB, &vec_inB, 16);
1423          }
1424
1425          (*func)();
1426          dst = (unsigned long long *) &vec_out;
1427          if (isLE)
1428             dst++;
1429          if (test_type < VX_VECTOR_FP_MULT_AND_OP2)
1430             printf( "#%d: %s %s(%016llx,%016llx,%016llx) = %016llx\n", i,
1431                     test_name, test_group.op, vsr_XT[0], *frap, *frbp, *dst );
1432          else
1433             print_vx_aORm_fp_result(vsr_XT, vsr_XB, &test_group, i);
1434       }
1435       printf( "\n" );
1436
1437       if (repeat) {
1438          repeat = 0;
1439          if (test_type < VX_VECTOR_FP_MULT_AND_OP2) {
1440                strcpy(test_name, test_group.name);
1441                strcat(test_name, "mdp");
1442          }
1443          do_aXp = False;
1444          goto again;
1445       }
1446       k++;
1447    }
1448    printf( "\n" );
1449    free(test_name);
1450 }
1451
1452 static void test_vx_vector_one_fp_arg(void)
1453 {
1454    test_func_t func;
1455    int k;
1456    k = 0;
1457    build_special_fargs_table();
1458
1459    while ((func = vx_vector_one_fp_arg_tests[k].test_func)) {
1460       int idx, i;
1461       vx_fp_test_t test_group = vx_vector_one_fp_arg_tests[k];
1462       Bool convToWord = (test_group.type == VX_CONV_WORD);
1463       Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
1464       Bool xvrespTest = (strstr(test_group.name , "xvresp") != NULL) ? True: False;
1465       int stride = dp ? 2 : 4;
1466
1467       for (i = 0; i < test_group.num_tests; i+=stride) {
1468          unsigned int * pv;
1469          void * inB;
1470
1471          pv = (unsigned int *)&vec_out;
1472          // clear vec_out
1473          for (idx = 0; idx < 4; idx++, pv++)
1474             *pv = 0;
1475
1476          if (dp) {
1477             int j;
1478             unsigned long long * frB_dp, *dst_dp;
1479             for (j = 0; j < 2; j++) {
1480                inB = (void *)&spec_fargs[i + j];
1481                // copy double precision FP into vector element i
1482                memcpy(((void *)&vec_inB) + (j * 8), inB, 8);
1483             }
1484             // execute test insn
1485             (*func)();
1486             dst_dp = (unsigned long long *) &vec_out;
1487             printf("#%d: %s ", i/2, test_group.name);
1488             for (j = 0; j < 2; j++) {
1489                if (j)
1490                   printf("; ");
1491                frB_dp = (unsigned long long *)&spec_fargs[i + j];
1492                printf("%s(%016llx)", test_group.op, *frB_dp);
1493                printf(" = %016llx", convToWord ? (dst_dp[j] & 0x00000000ffffffffULL) : dst_dp[j]);
1494             }
1495             printf("\n");
1496          } else {
1497             int j;
1498             unsigned int * frB_sp, * dst_sp;
1499
1500             for (j = 0; j < 4; j++) {
1501                inB = (void *)&spec_sp_fargs[i + j];
1502                // copy single precision FP into vector element i
1503                memcpy(((void *)&vec_inB) + (j * 4), inB, 4);
1504             }
1505             // execute test insn
1506             (*func)();
1507             dst_sp = (unsigned int *) &vec_out;
1508             // print result
1509             printf("#%d: %s ", i/4, test_group.name);
1510             for (j = 0; j < 4; j++) {
1511                if (j)
1512                   printf("; ");
1513                frB_sp = (unsigned int *)&spec_sp_fargs[i + j];
1514                printf("%s(%08x)", test_group.op, *frB_sp);
1515                if (xvrespTest) {
1516                   float calc_diff = fabs(spec_sp_fargs[i + j]/256);
1517                   float sp_res;
1518                   memcpy(&sp_res, &dst_sp[j], 4);
1519                   float div_result = 1/spec_sp_fargs[i + j];
1520                   float real_diff = fabs(sp_res - div_result);
1521                   printf( " ==> %s",
1522                           ( ( sp_res == div_result )
1523                                    || ( isnan(sp_res) && isnan(div_result) )
1524                                    || ( real_diff <= calc_diff ) ) ? "PASS"
1525                                                                      : "FAIL");
1526                } else {
1527                   printf(" = %08x", dst_sp[j]);
1528                }
1529             }
1530             printf("\n");
1531          }
1532       }
1533       k++;
1534       printf( "\n" );
1535    }
1536
1537 }
1538
1539 /* This function assumes the instruction being tested requires two args. */
1540 static void test_vx_vector_fp_ops(void)
1541 {
1542    test_func_t func;
1543    int k;
1544    k = 0;
1545    build_special_fargs_table();
1546
1547    while ((func = vx_vector_fp_tests[k].test_func)) {
1548       int idx, i, repeat = 1;
1549       vx_fp_test_t test_group = vx_vector_fp_tests[k];
1550       int stride = test_group.precision == DOUBLE_TEST ? 2 : 4;
1551       do_dot = False;
1552
1553 again:
1554       for (i = 0; i < test_group.num_tests; i+=stride) {
1555          unsigned int * pv, condreg;
1556          unsigned int flags;
1557
1558          pv = (unsigned int *)&vec_out;
1559          if (test_group.precision == DOUBLE_TEST)
1560             setup_dp_fp_args(&test_group.targs[i], False);
1561          else
1562             setup_sp_fp_args(&test_group.targs[i], False);
1563
1564          // clear vec_out
1565          for (idx = 0; idx < 4; idx++, pv++)
1566             *pv = 0;
1567
1568          // execute test insn
1569          SET_FPSCR_ZERO;
1570          SET_CR_XER_ZERO;
1571          (*func)();
1572          GET_CR(flags);
1573          if (test_group.type == VX_BASIC_CMP) {
1574             condreg = (flags & 0x000000f0) >> 4;
1575          } else {
1576             condreg = VX_NOT_CMP_OP;
1577          }
1578          print_vector_fp_result(condreg, &test_group, i);
1579       }
1580       printf("\n");
1581       if (repeat && test_group.type == VX_BASIC_CMP) {
1582          repeat = 0;
1583          do_dot = True;
1584          goto again;
1585       }
1586       k++;
1587       printf( "\n" );
1588    }
1589 }
1590
1591
1592 // The div doubleword test data
1593 signed long long div_dw_tdata[13][2] = {
1594                                        { 4, -4 },
1595                                        { 4, -3 },
1596                                        { 4, 4 },
1597                                        { 4, -5 },
1598                                        { 3, 8 },
1599                                        { 0x8000000000000000ULL, 0xa },
1600                                        { 0x50c, -1 },
1601                                        { 0x50c, -4096 },
1602                                        { 0x1234fedc, 0x8000a873 },
1603                                        { 0xabcd87651234fedcULL, 0xa123b893 },
1604                                        { 0x123456789abdcULL, 0 },
1605                                        { 0, 2 },
1606                                        { 0x77, 0xa3499 }
1607 };
1608 #define dw_tdata_len (sizeof(div_dw_tdata)/sizeof(signed long long)/2)
1609
1610 // The div word test data
1611 unsigned int div_w_tdata[6][2] = {
1612                               { 0, 2 },
1613                               { 2, 0 },
1614                               { 0x7abc1234, 0xf0000000 },
1615                               { 0xfabc1234, 5 },
1616                               { 77, 66 },
1617                               { 5, 0xfabc1234 },
1618 };
1619 #define w_tdata_len (sizeof(div_w_tdata)/sizeof(unsigned int)/2)
1620
1621 typedef struct div_ext_test
1622 {
1623    test_func_t test_func;
1624    const char *name;
1625    int num_tests;
1626    div_type_t div_type;
1627    precision_type_t precision;
1628 } div_ext_test_t;
1629
1630 static div_ext_test_t div_tests[] = {
1631 #ifdef __powerpc64__
1632                                    { &test_divde, "divde", dw_tdata_len, DIV_BASE, DOUBLE_TEST },
1633                                    { &test_divde, "divdeo", dw_tdata_len, DIV_OE, DOUBLE_TEST },
1634 #endif
1635                                    { &test_divweu, "divweu", w_tdata_len, DIV_BASE, SINGLE_TEST },
1636                                    { &test_divweu, "divweuo", w_tdata_len, DIV_OE, SINGLE_TEST },
1637                                    { NULL, NULL, 0, 0, 0 }
1638 };
1639
1640 static void test_div_extensions(void)
1641 {
1642    test_func_t func;
1643    int k;
1644    k = 0;
1645
1646    while ((func = div_tests[k].test_func)) {
1647       int i, repeat = 1;
1648       div_ext_test_t test_group = div_tests[k];
1649       do_dot = False;
1650
1651 again:
1652       for (i = 0; i < test_group.num_tests; i++) {
1653          unsigned int condreg;
1654
1655          if (test_group.div_type == DIV_OE)
1656             do_OE = True;
1657          else
1658             do_OE = False;
1659
1660          if (test_group.precision == DOUBLE_TEST) {
1661             r14 = div_dw_tdata[i][0];
1662             r15 = div_dw_tdata[i][1];
1663          } else {
1664             r14 = div_w_tdata[i][0];
1665             r15 = div_w_tdata[i][1];
1666          }
1667          // execute test insn
1668          (*func)();
1669          condreg = (div_flags & 0xf0000000) >> 28;
1670          printf("#%d: %s%s: ", i, test_group.name, do_dot ? "." : "");
1671          if (test_group.precision == DOUBLE_TEST) {
1672             printf("0x%016llx / 0x%016llx = 0x%016llx;",
1673                    div_dw_tdata[i][0], div_dw_tdata[i][1], (signed long long) r17);
1674          } else {
1675             printf("0x%08x / 0x%08x = 0x%08x;",
1676                    div_w_tdata[i][0], div_w_tdata[i][1], (unsigned int) r17);
1677          }
1678          printf(" CR=%x; XER=%x\n", condreg, div_xer);
1679       }
1680       printf("\n");
1681       if (repeat) {
1682          repeat = 0;
1683          do_dot = True;
1684          goto again;
1685       }
1686       k++;
1687       printf( "\n" );
1688    }
1689
1690 }
1691
1692 static void test_fct_ops(void)
1693 {
1694    test_func_t func;
1695    int k;
1696    k = 0;
1697
1698    while ((func = fct_tests[k].test_func)) {
1699       int i, repeat = 1;
1700       simple_test_t test_group = fct_tests[k];
1701       do_dot = False;
1702
1703 again:
1704       for (i = 0; i < nb_special_fargs; i++) {
1705          double result;
1706 #define SINGLE_MASK 0x00000000FFFFFFFFULL
1707
1708          f14 = spec_fargs[i];
1709          // execute test insn
1710          SET_FPSCR_ZERO;
1711          (*func)();
1712          result = f17;
1713          printf("#%d: %s%s: ", i, test_group.name, do_dot ? "." : "");
1714          printf("0x%016llx (%e) ==> 0x%016llx\n",
1715                 *((unsigned long long *)(&spec_fargs[i])), spec_fargs[i],
1716                 test_group.precision == SINGLE_TEST ? (SINGLE_MASK &
1717                          *((unsigned long long *)(&result))) :
1718                          *((unsigned long long *)(&result)));
1719       }
1720       printf("\n");
1721       if (repeat) {
1722          repeat = 0;
1723          do_dot = True;
1724          goto again;
1725       }
1726       k++;
1727       printf( "\n" );
1728    }
1729 }
1730
1731 #ifdef __powerpc64__
1732 void test_stdbrx(void)
1733 {
1734    unsigned long long store, val = 0xdeadbacf12345678ULL;
1735    printf("stdbrx: 0x%llx ==> ", val);
1736    r17 = (HWord_t)val;
1737    r14 = (HWord_t)&store;
1738    __asm__ __volatile__ ("stdbrx %0, 0, %1" : : "r"(r17), "r"(r14));
1739    printf("0x%llx\n", store);
1740    printf( "\n" );
1741 }
1742 #endif
1743
1744 static test_table_t
1745          all_tests[] =
1746 {
1747                     { &test_vx_vector_one_fp_arg,
1748                       "Test VSX vector single arg instructions", OTHER_INST },
1749                     { &test_vx_vector_fp_ops,
1750                       "Test VSX floating point compare and basic arithmetic instructions", OTHER_INST },
1751 #ifdef __powerpc64__
1752                      { &test_bpermd,
1753                        "Test bit permute double", OTHER_INST },
1754 #endif
1755                      { &test_xxsel,
1756                          "Test xxsel instruction", OTHER_INST },
1757                      { &test_xxspltw,
1758                          "Test xxspltw instruction", OTHER_INST },
1759                      { &test_div_extensions,
1760                        "Test div extensions", SCALAR_DIV_INST },
1761                      { &test_fct_ops,
1762                        "Test floating point convert [word | doubleword] unsigned, with round toward zero", OTHER_INST },
1763 #ifdef __powerpc64__
1764                      { &test_stdbrx,
1765                       "Test stdbrx instruction", OTHER_INST },
1766 #endif
1767                      { &test_vx_aORm_fp_ops,
1768                        "Test floating point arithmetic instructions -- with a{d|s}p or m{d|s}p", OTHER_INST },
1769                      { &test_vx_simple_scalar_fp_ops,
1770                       "Test scalar floating point arithmetic instructions", OTHER_INST },
1771                      { NULL, NULL }
1772 };
1773 #endif // HAS_VSX
1774
1775 static void usage (void)
1776 {
1777   fprintf(stderr,
1778           "Usage: test_isa_3_0 [OPTIONS]\n"
1779           "\t-d: test scalar division instructions (default)\n"
1780           "\t-o: test non scalar division instructions (default)\n"
1781           "\t-A: test all instructions (default)\n"
1782           "\t-h: display this help and exit\n"
1783           );
1784 }
1785
1786 int main(int argc, char *argv[])
1787 {
1788 #ifdef HAS_VSX
1789
1790    test_table_t aTest;
1791    test_func_t func;
1792    int i = 0;
1793    int c;
1794    unsigned int test_run_mask = 0;
1795
1796    /* NOTE, ISA 3.0 introduces the OV32 and CA32 bits in the FPSCR. These
1797     * bits are set on various arithimetic instructions.  This means this
1798     * test generates different FPSCR output for pre ISA 3.0 versus ISA 3.0
1799     * hardware.  The tests have been grouped so that the tests that generate
1800     * different results are in one test and the rest are in a different test.
1801     * this minimizes the size of the result expect files for the two cases.
1802     */
1803
1804    while ((c = getopt(argc, argv, "doAh")) != -1) {
1805       switch (c) {
1806       case 'd':
1807         test_run_mask |= SCALAR_DIV_INST;
1808          break;
1809       case 'o':
1810         test_run_mask |= OTHER_INST;
1811          break;
1812       case 'A':
1813         test_run_mask = 0xFFFF;
1814          break;
1815       case 'h':
1816          usage();
1817          return 0;
1818
1819       default:
1820          usage();
1821          fprintf(stderr, "Unknown argument: '%c'\n", c);
1822          return 1;
1823       }
1824    }
1825
1826    while ((func = all_tests[i].test_category)) {
1827       aTest = all_tests[i];
1828       if(test_run_mask & aTest.test_group) {
1829         /* Test group  specified on command line */
1830
1831         printf( "%s\n", aTest.name );
1832         (*func)();
1833       }
1834       i++;
1835    }
1836    if (spec_fargs)
1837      free(spec_fargs);
1838    if (spec_sp_fargs)
1839      free(spec_sp_fargs);
1840
1841 #endif // HAS _VSX
1842
1843    return 0;
1844 }