none/tests/ppc64/test_isa_2_07_part2.c

   1 /*  Copyright (C) 2013 IBM
   2
   3  Authors: Carl Love  <carll@us.ibm.com>
   4           Maynard Johnson <maynardj@us.ibm.com>
   5
   6  This program is free software; you can redistribute it and/or
   7  modify it under the terms of the GNU General Public License as
   8  published by the Free Software Foundation; either version 2 of the
   9  License, or (at your option) any later version.
  10
  11  This program is distributed in the hope that it will be useful, but
  12  WITHOUT ANY WARRANTY; without even the implied warranty of
  13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  General Public License for more details.
  15
  16  You should have received a copy of the GNU General Public License
  17  along with this program; if not, see <http://www.gnu.org/licenses/>.
  18
  19  The GNU General Public License is contained in the file COPYING.
  20
  21  This program is based heavily on the test_isa_2_06_part*.c source files.
  22  */
  23
  24 #include <stdio.h>
  25
  26 #ifdef HAS_ISA_2_07
  27
  28 #include <stdint.h>
  29 #include <stdlib.h>
  30 #include <string.h>
  31 #include <malloc.h>
  32 #include <altivec.h>
  33 #include <math.h>
  34
  35 #ifndef __powerpc64__
  36 typedef uint32_t HWord_t;
  37 #else
  38 typedef uint64_t HWord_t;
  39 #endif /* __powerpc64__ */
  40
  41 #ifdef VGP_ppc64le_linux
  42 #define isLE 1
  43 #else
  44 #define isLE 0
  45 #endif
  46
  47 register HWord_t r14 __asm__ ("r14");
  48 register HWord_t r15 __asm__ ("r15");
  49 register HWord_t r16 __asm__ ("r16");
  50 register HWord_t r17 __asm__ ("r17");
  51 register double f14 __asm__ ("fr14");
  52 register double f15 __asm__ ("fr15");
  53 register double f16 __asm__ ("fr16");
  54 register double f17 __asm__ ("fr17");
  55
  56 static volatile unsigned int cond_reg;
  57
  58 #define True  1
  59 #define False 0
  60
  61 #define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"
  62
  63 #define SET_CR(_arg) \
  64       __asm__ __volatile__ ("mtcr  %0" : : "b"(_arg) : ALLCR );
  65
  66 #define SET_XER(_arg) \
  67       __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );
  68
  69 #define GET_CR(_lval) \
  70       __asm__ __volatile__ ("mfcr %0"  : "=b"(_lval) )
  71
  72 #define GET_XER(_lval) \
  73       __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )
  74
  75 #define GET_CR_XER(_lval_cr,_lval_xer) \
  76    do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)
  77
  78 #define SET_CR_ZERO \
  79       SET_CR(0)
  80
  81 #define SET_XER_ZERO \
  82       SET_XER(0)
  83
  84 #define SET_CR_XER_ZERO \
  85    do { SET_CR_ZERO; SET_XER_ZERO; } while (0)
  86
  87 #define SET_FPSCR_ZERO \
  88    do { double _d = 0.0; \
  89         __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
  90    } while (0)
  91
  92 typedef unsigned char Bool;
  93
  94
  95 /* These functions below that construct a table of floating point
  96  * values were lifted from none/tests/ppc32/jm-insns.c.
  97  */
  98
  99 #if defined (DEBUG_ARGS_BUILD)
 100 #define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
 101 #else
 102 #define AB_DPRINTF(fmt, args...) do { } while (0)
 103 #endif
 104
 105 static inline void register_farg (void *farg,
 106                                   int s, uint16_t _exp, uint64_t mant)
 107 {
 108    uint64_t tmp;
 109
 110    tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant;
 111    *(uint64_t *)farg = tmp;
 112    AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
 113               s, _exp, mant, *(uint64_t *)farg, *(double *)farg);
 114 }
 115
 116 static inline void register_sp_farg (void *farg,
 117                                      int s, uint16_t _exp, uint32_t mant)
 118 {
 119    uint32_t tmp;
 120    tmp = ((uint32_t)s << 31) | ((uint32_t)_exp << 23) | mant;
 121    *(uint32_t *)farg = tmp;
 122 }
 123
 124
 125 typedef struct fp_test_args {
 126    int fra_idx;
 127    int frb_idx;
 128 } fp_test_args_t;
 129
 130 static int nb_special_fargs;
 131 static double * spec_fargs;
 132 static float * spec_sp_fargs;
 133
 134 static void build_special_fargs_table(void)
 135 {
 136    /*
 137     * Double precision:
 138     * Sign goes from zero to one               (1 bit)
 139     * Exponent goes from 0 to ((1 << 12) - 1)  (11 bits)
 140     * Mantissa goes from 1 to ((1 << 52) - 1)  (52 bits)
 141     * + special values:
 142     * +0.0      : 0 0x000 0x0000000000000 => 0x0000000000000000
 143     * -0.0      : 1 0x000 0x0000000000000 => 0x8000000000000000
 144     * +infinity : 0 0x7FF 0x0000000000000 => 0x7FF0000000000000
 145     * -infinity : 1 0x7FF 0x0000000000000 => 0xFFF0000000000000
 146     * +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF => 0x7FF7FFFFFFFFFFFF
 147     * -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF => 0xFFF7FFFFFFFFFFFF
 148     * +QNaN     : 0 0x7FF 0x8000000000000 => 0x7FF8000000000000
 149     * -QNaN     : 1 0x7FF 0x8000000000000 => 0xFFF8000000000000
 150     * (8 values)
 151     *
 152     * Single precision
 153     * Sign:     1 bit
 154     * Exponent: 8 bits
 155     * Mantissa: 23 bits
 156     * +0.0      : 0 0x00 0x000000 => 0x00000000
 157     * -0.0      : 1 0x00 0x000000 => 0x80000000
 158     * +infinity : 0 0xFF 0x000000 => 0x7F800000
 159     * -infinity : 1 0xFF 0x000000 => 0xFF800000
 160     * +SNaN     : 0 0xFF 0x3FFFFF => 0x7FBFFFFF
 161     * -SNaN     : 1 0xFF 0x3FFFFF => 0xFFBFFFFF
 162     * +QNaN     : 0 0xFF 0x400000 => 0x7FC00000
 163     * -QNaN     : 1 0xFF 0x400000 => 0xFFC00000
 164    */
 165
 166    uint64_t mant;
 167    uint32_t mant_sp;
 168    uint16_t _exp;
 169    int s;
 170    int j, i = 0;
 171
 172    if (spec_fargs)
 173       return;
 174
 175    spec_fargs = malloc( 20 * sizeof(double) );
 176    spec_sp_fargs = malloc( 20 * sizeof(float) );
 177
 178    // #0
 179    s = 0;
 180    _exp = 0x3fd;
 181    mant = 0x8000000000000ULL;
 182    register_farg(&spec_fargs[i++], s, _exp, mant);
 183
 184    // #1
 185    s = 0;
 186    _exp = 0x404;
 187    mant = 0xf000000000000ULL;
 188    register_farg(&spec_fargs[i++], s, _exp, mant);
 189
 190    // #2
 191    s = 0;
 192    _exp = 0x001;
 193    mant = 0x8000000b77501ULL;
 194    register_farg(&spec_fargs[i++], s, _exp, mant);
 195
 196    // #3
 197    s = 0;
 198    _exp = 0x7fe;
 199    mant = 0x800000000051bULL;
 200    register_farg(&spec_fargs[i++], s, _exp, mant);
 201
 202    // #4
 203    s = 0;
 204    _exp = 0x012;
 205    mant = 0x3214569900000ULL;
 206    register_farg(&spec_fargs[i++], s, _exp, mant);
 207
 208    /* Special values */
 209    /* +0.0      : 0 0x000 0x0000000000000 */
 210    // #5
 211    s = 0;
 212    _exp = 0x000;
 213    mant = 0x0000000000000ULL;
 214    register_farg(&spec_fargs[i++], s, _exp, mant);
 215
 216    /* -0.0      : 1 0x000 0x0000000000000 */
 217    // #6
 218    s = 1;
 219    _exp = 0x000;
 220    mant = 0x0000000000000ULL;
 221    register_farg(&spec_fargs[i++], s, _exp, mant);
 222
 223    /* +infinity : 0 0x7FF 0x0000000000000  */
 224    // #7
 225    s = 0;
 226    _exp = 0x7FF;
 227    mant = 0x0000000000000ULL;
 228    register_farg(&spec_fargs[i++], s, _exp, mant);
 229
 230    /* -infinity : 1 0x7FF 0x0000000000000 */
 231    // #8
 232    s = 1;
 233    _exp = 0x7FF;
 234    mant = 0x0000000000000ULL;
 235    register_farg(&spec_fargs[i++], s, _exp, mant);
 236
 237    /*
 238     * This comment applies to values #9 and #10 below:
 239     * When src is a SNaN, it's converted to a QNaN first before rounding to single-precision,
 240     * so we can't just copy the double-precision value to the corresponding slot in the
 241     * single-precision array (i.e., in the loop at the end of this function).  Instead, we
 242     * have to manually set the bits using register_sp_farg().
 243     */
 244
 245    /* +SNaN     : 0 0x7FF 0x7FFFFFFFFFFFF */
 246    // #9
 247    s = 0;
 248    _exp = 0x7FF;
 249    mant = 0x7FFFFFFFFFFFFULL;
 250    register_farg(&spec_fargs[i++], s, _exp, mant);
 251    _exp = 0xff;
 252    mant_sp = 0x3FFFFF;
 253    register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
 254
 255    /* -SNaN     : 1 0x7FF 0x7FFFFFFFFFFFF */
 256    // #10
 257    s = 1;
 258    _exp = 0x7FF;
 259    mant = 0x7FFFFFFFFFFFFULL;
 260    register_farg(&spec_fargs[i++], s, _exp, mant);
 261    _exp = 0xff;
 262    mant_sp = 0x3FFFFF;
 263    register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
 264
 265    /* +QNaN     : 0 0x7FF 0x8000000000000 */
 266    // #11
 267    s = 0;
 268    _exp = 0x7FF;
 269    mant = 0x8000000000000ULL;
 270    register_farg(&spec_fargs[i++], s, _exp, mant);
 271
 272    /* -QNaN     : 1 0x7FF 0x8000000000000 */
 273    // #12
 274    s = 1;
 275    _exp = 0x7FF;
 276    mant = 0x8000000000000ULL;
 277    register_farg(&spec_fargs[i++], s, _exp, mant);
 278
 279    /* denormalized value */
 280    // #13
 281    s = 1;
 282    _exp = 0x000;
 283    mant = 0x8340000078000ULL;
 284    register_farg(&spec_fargs[i++], s, _exp, mant);
 285
 286    /* Negative finite number */
 287    // #14
 288    s = 1;
 289    _exp = 0x40d;
 290    mant = 0x0650f5a07b353ULL;
 291    register_farg(&spec_fargs[i++], s, _exp, mant);
 292
 293    /* A few positive finite numbers ... */
 294    // #15
 295    s = 0;
 296    _exp = 0x412;
 297    mant = 0x32585a9900000ULL;
 298    register_farg(&spec_fargs[i++], s, _exp, mant);
 299
 300    // #16
 301    s = 0;
 302    _exp = 0x413;
 303    mant = 0x82511a2000000ULL;
 304    register_farg(&spec_fargs[i++], s, _exp, mant);
 305
 306    // #17
 307    s = 0;
 308    _exp = 0x403;
 309    mant = 0x12ef5a9300000ULL;
 310    register_farg(&spec_fargs[i++], s, _exp, mant);
 311
 312    // #18
 313    s = 0;
 314    _exp = 0x405;
 315    mant = 0x14bf5d2300000ULL;
 316    register_farg(&spec_fargs[i++], s, _exp, mant);
 317
 318    // #19
 319    s = 0;
 320    _exp = 0x409;
 321    mant = 0x76bf982440000ULL;
 322    register_farg(&spec_fargs[i++], s, _exp, mant);
 323
 324
 325    nb_special_fargs = i;
 326    for (j = 0; j < i; j++) {
 327       if (!(j == 9 || j == 10))
 328          spec_sp_fargs[j] = spec_fargs[j];
 329    }
 330 }
 331
 332 static unsigned int vstg[] __attribute__ ((aligned (16))) = { 0, 0, 0,0,
 333                                                               0, 0, 0, 0 };
 334
 335
 336 static unsigned int viargs[] __attribute__ ((aligned (16))) = { 0x80000001,
 337                                                                 0x89abcdef,
 338                                                                 0x00112233,
 339                                                                 0x74556677,
 340                                                                 0x00001abb,
 341                                                                 0x00000001,
 342                                                                 0x31929394,
 343                                                                 0xa1a2a3a4,
 344 };
 345 #define NUM_VIARGS_INTS (sizeof viargs/sizeof viargs[0])
 346 #define NUM_VIARGS_VECS  (NUM_VIARGS_INTS/4)
 347
 348
 349 static unsigned long long vdargs[] __attribute__ ((aligned (16))) = {
 350                                                                      0x0102030405060708ULL,
 351                                                                      0x090A0B0C0E0D0E0FULL,
 352                                                                      0xF1F2F3F4F5F6F7F8ULL,
 353                                                                      0xF9FAFBFCFEFDFEFFULL
 354 };
 355 #define NUM_VDARGS_INTS (sizeof vdargs/sizeof vdargs[0])
 356 #define NUM_VDARGS_VECS  (NUM_VDARGS_INTS/2)
 357
 358 typedef void (*test_func_t)(void);
 359
 360 struct test_table
 361 {
 362    test_func_t test_category;
 363    char * name;
 364 };
 365
 366
 367 typedef enum {
 368    SINGLE_TEST,
 369    SINGLE_TEST_SINGLE_RES,
 370    DOUBLE_TEST,
 371    DOUBLE_TEST_SINGLE_RES
 372 } precision_type_t;
 373 #define IS_DP_RESULT(x) ((x == SINGLE_TEST) || (x == DOUBLE_TEST))
 374
 375 typedef enum {
 376    VX_FP_SMAS,   // multiply add single precision result
 377    VX_FP_SMSS,   // multiply sub single precision result
 378    VX_FP_SNMAS,  // negative multiply add single precision result
 379    VX_FP_SNMSS,  // negative multiply sub single precision result
 380    VX_FP_OTHER,
 381    VX_CONV_WORD,
 382    VX_ESTIMATE,
 383    VX_CONV_TO_SINGLE,
 384    VX_CONV_TO_DOUBLE,
 385    VX_SCALAR_CONV_TO_WORD,
 386    VX_SCALAR_SP_TO_VECTOR_SP,
 387    VX_DEFAULT
 388 } vx_fp_test_type;
 389
 390 typedef enum {
 391    VSX_LOAD = 1,
 392    VSX_LOAD_SPLAT,
 393    VSX_STORE,
 394 } vsx_ldst_type;
 395
 396 typedef enum {
 397    VSX_AND = 1,
 398    VSX_NAND,
 399    VSX_ANDC,
 400    VSX_OR,
 401    VSX_ORC,
 402    VSX_NOR,
 403    VSX_XOR,
 404    VSX_EQV,
 405 } vsx_log_op;
 406
 407 struct vx_fp_test1
 408 {
 409    test_func_t test_func;
 410    const char *name;
 411    fp_test_args_t * targs;
 412    int num_tests;
 413     vx_fp_test_type test_type;
 414  };
 415
 416 struct ldst_test
 417 {
 418    test_func_t test_func;
 419    const char *name;
 420    precision_type_t precision;
 421    void * base_addr;
 422    uint32_t offset;
 423    vsx_ldst_type type;
 424 };
 425
 426 struct vx_fp_test2
 427 {
 428    test_func_t test_func;
 429    const char *name;
 430    fp_test_args_t * targs;
 431    int num_tests;
 432    precision_type_t precision;
 433    vx_fp_test_type test_type;
 434    const char * op;
 435 };
 436
 437 struct xs_conv_test
 438 {
 439    test_func_t test_func;
 440    const char *name;
 441    int num_tests;
 442 };
 443
 444 struct simple_test
 445 {
 446    test_func_t test_func;
 447    const char *name;
 448 };
 449
 450 struct vsx_logic_test
 451 {
 452    test_func_t test_func;
 453    const char *name;
 454    vsx_log_op op;
 455 };
 456
 457 typedef struct vsx_logic_test logic_test_t;
 458 typedef struct ldst_test ldst_test_t;
 459 typedef struct simple_test xs_conv_test_t;
 460 typedef struct vx_fp_test1 vx_fp_test_basic_t;
 461 typedef struct vx_fp_test2 vx_fp_test2_t;
 462 typedef struct test_table test_table_t;
 463
 464
 465 static vector unsigned int vec_out, vec_inA, vec_inB;
 466
 467 static void test_xscvdpspn(void)
 468 {
 469    __asm__ __volatile__ ("xscvdpspn   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 470 }
 471
 472 static void test_xscvspdpn(void)
 473 {
 474    __asm__ __volatile__ ("xscvspdpn  %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 475 }
 476
 477 static int do_asp;
 478 static void test_xsmadds(void)
 479 {
 480    if (do_asp)
 481       __asm__ __volatile__ ("xsmaddasp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 482    else
 483       __asm__ __volatile__ ("xsmaddmsp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 484 }
 485
 486 static void test_xsmsubs(void)
 487 {
 488    if (do_asp)
 489       __asm__ __volatile__ ("xsmsubasp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 490    else
 491       __asm__ __volatile__ ("xsmsubmsp          %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 492 }
 493
 494 static void test_xscvsxdsp (void)
 495 {
 496    __asm__ __volatile__ ("xscvsxdsp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 497 }
 498
 499 static void test_xscvuxdsp (void)
 500 {
 501    __asm__ __volatile__ ("xscvuxdsp          %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 502 }
 503
 504 static void test_xsnmadds(void)
 505 {
 506    if (do_asp)
 507       __asm__ __volatile__ ("xsnmaddasp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 508    else
 509       __asm__ __volatile__ ("xsnmaddmsp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 510 }
 511
 512 static void test_xsnmsubs(void)
 513 {
 514    if (do_asp)
 515       __asm__ __volatile__ ("xsnmsubasp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 516    else
 517       __asm__ __volatile__ ("xsnmsubmsp        %x0, %x1, %x2" : "+wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 518 }
 519
 520 static void test_stxsspx(void)
 521 {
 522    __asm__ __volatile__ ("stxsspx          %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
 523 }
 524
 525 static void test_stxsiwx(void)
 526 {
 527    __asm__ __volatile__ ("stxsiwx          %x0, %1, %2" : : "wa" (vec_inA), "b" (r14),"r" (r15));
 528 }
 529
 530 static void test_lxsiwax(void)
 531 {
 532    __asm__ __volatile__ ("lxsiwax          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
 533 }
 534
 535 static void test_lxsiwzx(void)
 536 {
 537    __asm__ __volatile__ ("lxsiwzx          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
 538 }
 539
 540 static void test_lxsspx(void)
 541 {
 542    __asm__ __volatile__ ("lxsspx          %x0, %1, %2" : "=wa" (vec_out): "b" (r14),"r" (r15));
 543 }
 544
 545 static void test_xssqrtsp(void)
 546 {
 547    __asm__ __volatile__ ("xssqrtsp         %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 548 }
 549
 550 static void test_xsrsqrtesp(void)
 551 {
 552    __asm__ __volatile__ ("xsrsqrtesp         %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 553 }
 554
 555 /* Three argument instuctions */
 556 static void test_xxleqv(void)
 557 {
 558    __asm__ __volatile__ ("xxleqv          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 559 }
 560
 561 static void test_xxlorc(void)
 562 {
 563    __asm__ __volatile__ ("xxlorc          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 564 }
 565
 566 static void test_xxlnand(void)
 567 {
 568    __asm__ __volatile__ ("xxlnand         %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 569 }
 570
 571 static void test_xsaddsp(void)
 572 {
 573   __asm__ __volatile__ ("xsaddsp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA), "wa" (vec_inB));
 574 }
 575
 576 static void test_xssubsp(void)
 577 {
 578   __asm__ __volatile__ ("xssubsp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA), "wa" (vec_inB));
 579 }
 580
 581 static void test_xsdivsp(void)
 582 {
 583   __asm__ __volatile__ ("xsdivsp   %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA), "wa" (vec_inB));
 584 }
 585
 586 static void test_xsmulsp(void)
 587 {
 588    __asm__ __volatile__ ("xsmulsp          %x0, %x1, %x2" : "=wa" (vec_out): "wa" (vec_inA),"wa" (vec_inB));
 589 }
 590
 591 static void test_xsresp(void)
 592 {
 593    __asm__ __volatile__ ("xsresp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 594 }
 595 static void test_xsrsp(void)
 596 {
 597    __asm__ __volatile__ ("xsrsp   %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
 598 }
 599
 600 fp_test_args_t vx_math_tests[] = {
 601                                   {8, 8},
 602                                   {8, 14},
 603                                   {8, 6},
 604                                   {8, 5},
 605                                   {8, 4},
 606                                   {8, 7},
 607                                   {8, 9},
 608                                   {8, 11},
 609                                   {14, 8},
 610                                   {14, 14},
 611                                   {14, 6},
 612                                   {14, 5},
 613                                   {14, 4},
 614                                   {14, 7},
 615                                   {14, 9},
 616                                   {14, 11},
 617                                   {6, 8},
 618                                   {6, 14},
 619                                   {6, 6},
 620                                   {6, 5},
 621                                   {6, 4},
 622                                   {6, 7},
 623                                   {6, 9},
 624                                   {6, 11},
 625                                   {5, 8},
 626                                   {5, 14},
 627                                   {5, 6},
 628                                   {5, 5},
 629                                   {5, 4},
 630                                   {5, 7},
 631                                   {5, 9},
 632                                   {5, 11},
 633                                   {4, 8},
 634                                   {4, 14},
 635                                   {4, 6},
 636                                   {4, 5},
 637                                   {4, 1},
 638                                   {4, 7},
 639                                   {4, 9},
 640                                   {4, 11},
 641                                   {7, 8},
 642                                   {7, 14},
 643                                   {7, 6},
 644                                   {7, 5},
 645                                   {7, 4},
 646                                   {7, 7},
 647                                   {7, 9},
 648                                   {7, 11},
 649                                   {10, 8},
 650                                   {10, 14},
 651                                   {10, 6},
 652                                   {10, 5},
 653                                   {10, 4},
 654                                   {10, 7},
 655                                   {10, 9},
 656                                   {10, 11},
 657                                   {12, 8},
 658                                   {12, 14},
 659                                   {12, 6},
 660                                   {12, 5},
 661                                   {12, 4},
 662                                   {12, 7},
 663                                   {12, 9},
 664                                   {12, 11},
 665                                   {8, 8},
 666                                   {8, 14},
 667                                   {8, 6},
 668                                   {8, 5},
 669                                   {8, 4},
 670                                   {8, 7},
 671                                   {8, 9},
 672                                   {8, 11},
 673                                   {14, 8},
 674                                   {14, 14},
 675                                   {14, 6},
 676                                   {14, 5},
 677                                   {14, 4},
 678                                   {14, 7},
 679                                   {14, 9},
 680                                   {14, 11},
 681                                   {6, 8},
 682                                   {6, 14},
 683                                   {6, 6},
 684                                   {6, 5},
 685                                   {6, 4},
 686                                   {6, 7},
 687                                   {6, 9},
 688                                   {6, 11},
 689                                   {5, 8},
 690                                   {5, 14},
 691                                   {5, 6},
 692                                   {5, 5},
 693                                   {5, 4},
 694                                   {5, 7},
 695                                   {5, 9},
 696                                   {5, 11},
 697                                   {4, 8},
 698                                   {4, 14},
 699                                   {4, 6},
 700                                   {4, 5},
 701                                   {4, 1},
 702                                   {4, 7},
 703                                   {4, 9},
 704                                   {4, 11},
 705                                   {7, 8},
 706                                   {7, 14},
 707                                   {7, 6},
 708                                   {7, 5},
 709                                   {7, 4},
 710                                   {7, 7},
 711                                   {7, 9},
 712                                   {7, 11},
 713                                   {10, 8},
 714                                   {10, 14},
 715                                   {10, 6},
 716                                   {10, 5},
 717                                   {10, 4},
 718                                   {10, 7},
 719                                   {10, 9},
 720                                   {10, 11},
 721                                   {12, 8},
 722                                   {12, 14},
 723                                   {12, 6},
 724                                   {12, 5},
 725                                   {12, 4},
 726                                   {12, 7},
 727                                   {12, 9},
 728                                   {12, 11}
 729 };
 730
 731 // These are all double precision inputs with double word outputs (mostly converted to single precision)
 732 static vx_fp_test_basic_t vx_fp_tests[] = {
 733                                      { &test_xsmadds, "xsmadd", vx_math_tests, 64, VX_FP_SMAS},
 734                                      { &test_xsmsubs, "xsmsub", vx_math_tests, 64, VX_FP_SMSS},
 735                                      { &test_xsmulsp, "xsmulsp", vx_math_tests, 64, VX_FP_OTHER},
 736                                      { &test_xsdivsp, "xsdivsp", vx_math_tests, 64, VX_FP_OTHER},
 737                                      { &test_xsnmadds, "xsnmadd", vx_math_tests, 64, VX_FP_SNMAS},
 738                                      { &test_xsnmsubs, "xsnmsub", vx_math_tests, 64, VX_FP_SNMSS},
 739                                      { NULL, NULL, NULL, 0, 0 }
 740 };
 741
 742 static vx_fp_test2_t
 743 vsx_one_fp_arg_tests[] = {
 744                           { &test_xscvdpspn, "xscvdpspn", NULL, 20, DOUBLE_TEST_SINGLE_RES, VX_SCALAR_SP_TO_VECTOR_SP, "conv"},
 745                           { &test_xscvspdpn, "xscvspdpn", NULL, 20, SINGLE_TEST, VX_DEFAULT, "conv"},
 746                           { &test_xsresp,    "xsresp", NULL, 20, DOUBLE_TEST, VX_ESTIMATE, "1/x"},
 747                           { &test_xsrsp,     "xsrsp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "round"},
 748                           { &test_xsrsqrtesp, "xsrsqrtesp", NULL, 20, DOUBLE_TEST, VX_ESTIMATE, "1/sqrt"},
 749                           { &test_xssqrtsp, "xssqrtsp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "sqrt"},
 750                           { NULL, NULL, NULL, 0, 0, 0, NULL}
 751 };
 752
 753 // These are all double precision inputs with double word outputs (mostly converted to single precision)
 754 static vx_fp_test_basic_t
 755 vx_simple_scalar_fp_tests[] = {
 756                           { &test_xssubsp, "xssubsp", vx_math_tests, 64, VX_DEFAULT},
 757                           { &test_xsaddsp, "xsaddsp", vx_math_tests, 64, VX_DEFAULT},
 758                           { NULL, NULL, NULL, 0 , 0}
 759 };
 760
 761 static ldst_test_t
 762 ldst_tests[] = {
 763                     { &test_stxsspx, "stxsspx", DOUBLE_TEST_SINGLE_RES, vstg, 0, VSX_STORE },
 764                     { &test_stxsiwx, "stxsiwx", SINGLE_TEST_SINGLE_RES, vstg, 4, VSX_STORE },
 765                     { &test_lxsiwax, "lxsiwax", SINGLE_TEST, viargs, 0, VSX_LOAD },
 766                     { &test_lxsiwzx, "lxsiwzx", SINGLE_TEST, viargs, 4, VSX_LOAD },
 767                     { &test_lxsspx,  "lxsspx",  SINGLE_TEST, NULL, 0, VSX_LOAD },
 768                     { NULL, NULL, 0, NULL, 0, 0 } };
 769
 770 static xs_conv_test_t
 771 xs_conv_tests[] = {
 772                    { &test_xscvsxdsp, "xscvsxdsp"},
 773                    { &test_xscvuxdsp, "xscvuxdsp"},
 774                    { NULL, NULL}
 775 };
 776
 777 static logic_test_t
 778 logic_tests[] = {
 779                  { &test_xxleqv,  "xxleqv", VSX_EQV },
 780                  { &test_xxlorc,  "xxlorc", VSX_ORC },
 781                  { &test_xxlnand, "xxlnand", VSX_NAND },
 782                  { NULL, NULL, 0}
 783 };
 784
 785 Bool check_reciprocal_estimate(Bool is_rsqrte, int idx, int output_vec_idx)
 786 {
 787    /* NOTE:
 788     * This function has been verified only with the xsresp and xsrsqrtes instructions.
 789     *
 790     * Technically, the number of bits of precision for xsresp and xsrsqrtesp is
 791     * 14 bits (14 = log2 16384).  However, the VEX emulation of these instructions
 792     * does an actual reciprocal calculation versus estimation, so the answer we get back from
 793     * valgrind can easily differ from the estimate in the lower bits (within the 14 bits of
 794     * precision) and the estimate may still be within expected tolerances.  On top of that,
 795     * we can't count on these estimates always being the same across implementations.
 796     * For example, with the fre[s] instruction (which should be correct to within one part
 797     * in 256 -- i.e., 8 bits of precision) . . . When approximating the value 1.0111_1111_1111,
 798     * one implementation could return 1.0111_1111_0000 and another implementation could return
 799     * 1.1000_0000_0000.  Both estimates meet the 1/256 accuracy requirement, but share only a
 800     * single bit in common.
 801     *
 802     * The upshot is we can't validate the VEX output for these instructions by comparing against
 803     * stored bit patterns.  We must check that the result is within expected tolerances.
 804     */
 805
 806    /* A mask to be used for validation as a last resort.
 807     * Only use 12 bits of precision for reasons discussed above.
 808     */
 809 #define VSX_RECIP_ESTIMATE_MASK_SP 0xFFFF8000
 810
 811
 812    Bool result = False;
 813    double src_dp, res_dp;
 814    float calc_diff = 0;
 815    float real_diff = 0;
 816    double recip_divisor;
 817    float div_result;
 818    float calc_diff_tmp;
 819
 820    src_dp = res_dp = 0;
 821    Bool src_is_negative = False;
 822    Bool res_is_negative = False;
 823    unsigned long long * dst_dp = NULL;
 824    unsigned long long * src_dp_ull;
 825    dst_dp = (unsigned long long *) &vec_out;
 826    src_dp = spec_fargs[idx];
 827    src_dp_ull = (unsigned long long *) &src_dp;
 828    src_is_negative = (*src_dp_ull & 0x8000000000000000ULL) ? True : False;
 829    res_is_negative = (dst_dp[output_vec_idx] & 0x8000000000000000ULL) ? True : False;
 830    memcpy(&res_dp, &dst_dp[output_vec_idx], 8);
 831
 832
 833    // Below are common rules
 834    if (isnan(src_dp))
 835       return isnan(res_dp);
 836    if (fpclassify(src_dp) == FP_ZERO)
 837       return isinf(res_dp);
 838    if (!src_is_negative && isinf(src_dp))
 839       return !res_is_negative && (fpclassify(res_dp) == FP_ZERO);
 840    if (is_rsqrte) {
 841       if (src_is_negative)
 842          return isnan(res_dp);
 843    } else {
 844       if (src_is_negative && isinf(src_dp))
 845          return res_is_negative && (fpclassify(res_dp) == FP_ZERO);
 846    }
 847
 848    if (is_rsqrte)
 849       recip_divisor = sqrt(src_dp);
 850    else
 851       recip_divisor = src_dp;
 852
 853    /* The instructions handled by this function take a double precision
 854     * input, perform a reciprocal estimate in double-precision, round
 855     * the result to single precision and store into the destination
 856     * register in double precision format.  So, to check the result
 857     * for accuracy, we use float (single precision) values.
 858     */
 859    div_result = 1.0/recip_divisor;
 860    calc_diff_tmp = recip_divisor * 16384.0;
 861    if (isnormal(calc_diff_tmp)) {
 862       calc_diff = fabs(1.0/calc_diff_tmp);
 863       real_diff = fabs((float)res_dp - div_result);
 864       result = ( ( res_dp == div_result )
 865                || ( real_diff <= calc_diff ) );
 866 #if FRES_DEBUG
 867       unsigned int * dv = (unsigned int *)&div_result;
 868       unsigned int * rd = (unsigned int *)&real_diff;
 869       unsigned int * cd = (unsigned int *)&calc_diff;
 870       printf("\n\t {computed div_result: %08x; real_diff:  %08x; calc_diff:  %08x}\n",
 871              *dv, *rd, *cd);
 872 #endif
 873
 874    } else {
 875       /* Unable to compute theoretical difference, so we fall back to masking out
 876        * un-precise bits.
 877        */
 878       unsigned int * div_result_sp = (unsigned int *)&div_result;
 879       float res_sp = (float)res_dp;
 880       unsigned int * dst_sp = (unsigned int *)&res_sp;
 881 #if FRES_DEBUG
 882       unsigned int * calc_diff_tmp_sp = (unsigned int *)&calc_diff_tmp;
 883       printf("Unable to compute theoretical difference, so we fall back to masking\n");
 884       printf("\tcalc_diff_tmp: %08x; div_result: %08x; vector result (sp): %08x\n",
 885              *calc_diff_tmp_sp, *div_result_sp, *dst_sp);
 886 #endif
 887       result = (*dst_sp & VSX_RECIP_ESTIMATE_MASK_SP) == (*div_result_sp & VSX_RECIP_ESTIMATE_MASK_SP);
 888    }
 889    return result;
 890 }
 891
 892 static void test_vx_fp_ops(void)
 893 {
 894
 895    test_func_t func;
 896    int k;
 897    char * test_name = (char *)malloc(20);
 898    void  * vecA_void_ptr, * vecB_void_ptr, * vecOut_void_ptr;
 899
 900    if (isLE) {
 901       vecA_void_ptr = (void *)&vec_inA + 8;
 902       vecB_void_ptr = (void *)&vec_inB + 8;
 903       vecOut_void_ptr = (void *)&vec_out + 8;
 904    } else {
 905       vecA_void_ptr = (void *)&vec_inA;
 906       vecB_void_ptr = (void *)&vec_inB;
 907       vecOut_void_ptr = (void *)&vec_out;
 908    }
 909
 910    k = 0;
 911    build_special_fargs_table();
 912    while ((func = vx_fp_tests[k].test_func)) {
 913       int i, repeat = 0;
 914       unsigned long long * frap, * frbp, * dst;
 915       vx_fp_test_basic_t test_group = vx_fp_tests[k];
 916       vx_fp_test_type test_type = test_group.test_type;
 917
 918       switch (test_type) {
 919          case VX_FP_SMAS:
 920          case VX_FP_SMSS:
 921          case VX_FP_SNMAS:
 922          case VX_FP_SNMSS:
 923             if (test_type == VX_FP_SMAS)
 924                strcpy(test_name, "xsmadd");
 925             else if (test_type == VX_FP_SMSS)
 926                strcpy(test_name, "xsmsub");
 927             else if (test_type == VX_FP_SNMAS)
 928                strcpy(test_name, "xsnmadd");
 929             else
 930                strcpy(test_name, "xsnmsub");
 931
 932             if (!repeat) {
 933                repeat = 1;
 934                strcat(test_name, "asp");
 935                do_asp = 1;
 936             }
 937             break;
 938          case VX_FP_OTHER:
 939             strcpy(test_name, test_group.name);
 940             break;
 941          default:
 942             printf("ERROR:  Invalid VX FP test type %d\n", test_type);
 943             exit(1);
 944       }
 945
 946 again:
 947       for (i = 0; i < test_group.num_tests; i++) {
 948          unsigned int * inA, * inB, * pv;
 949
 950          fp_test_args_t aTest = test_group.targs[i];
 951          inA = (unsigned int *)&spec_fargs[aTest.fra_idx];
 952          inB = (unsigned int *)&spec_fargs[aTest.frb_idx];
 953          frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
 954          frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
 955          int idx;
 956          unsigned long long vsr_XT;
 957          pv = (unsigned int *)&vec_out;
 958
 959          // Only need to copy one doubleword into each vector's element 0
 960          memcpy(vecA_void_ptr, inA, 8);
 961          memcpy(vecB_void_ptr, inB, 8);
 962
 963          // clear vec_out
 964          for (idx = 0; idx < 4; idx++, pv++)
 965             *pv = 0;
 966
 967          if (test_type != VX_FP_OTHER) {
 968             /* Then we need a third src argument, which is stored in element 0 of
 969              * VSX[XT] -- i.e., vec_out.  For the xs<ZZZ>mdp cases, VSX[XT] holds
 970              * src3 and VSX[XB] holds src2; for the xs<ZZZ>adp cases, VSX[XT] holds
 971              * src2 and VSX[XB] holds src3.  The fp_test_args_t that holds the test
 972              * data (input args, result) contain only two inputs, so I arbitrarily
 973              * use spec_fargs elements 4 and 14 (alternating) for the third source
 974              * argument.  We can use the same input data for a given pair of
 975              * adp/mdp-type instructions by swapping the src2 and src3 arguments; thus
 976              * the expected result should be the same.
 977              */
 978             int extra_arg_idx;
 979             if (i % 2)
 980                extra_arg_idx = 4;
 981             else
 982                extra_arg_idx = 14;
 983
 984             if (repeat) {
 985                /* We're on the first time through of one of the VX_FP_SMx
 986                 * test types, meaning we're testing a xs<ZZZ>adp case, thus
 987                 * we have to swap inputs as described above:
 988                 *    src2 <= VSX[XT]
 989                 *    src3 <= VSX[XB]
 990                 */
 991                memcpy(vecOut_void_ptr, inB, 8);  // src2
 992                memcpy(vecB_void_ptr, &spec_fargs[extra_arg_idx], 8);  //src3
 993                frbp = (unsigned long long *)&spec_fargs[extra_arg_idx];
 994             } else {
 995                // Don't need to init src2, as it's done before the switch()
 996                memcpy(vecOut_void_ptr, &spec_fargs[extra_arg_idx], 8);  //src3
 997             }
 998             memcpy(&vsr_XT, vecOut_void_ptr, 8);
 999          }
1000
1001          (*func)();
1002          dst = (unsigned long long *) &vec_out;
1003          if (isLE)
1004             dst++;
1005
1006          if (test_type == VX_FP_OTHER)
1007             printf("#%d: %s %016llx %016llx = %016llx\n", i, test_name,
1008                    *frap, *frbp, *dst);
1009          else
1010             printf( "#%d: %s %016llx %016llx %016llx = %016llx\n", i,
1011                     test_name, vsr_XT, *frap, *frbp, *dst );
1012
1013       }
1014       /*
1015            {
1016                // Debug code.  Keep this block commented out except when debugging.
1017                double result, expected;
1018                memcpy(&result, dst, 8);
1019                memcpy(&expected, &aTest.dp_bin_result, 8);
1020                printf( "\tFRA + FRB: %e + %e: Expected = %e; Actual = %e\n",
1021                        spec_fargs[aTest.fra_idx], spec_fargs[aTest.frb_idx],
1022                        expected, result );
1023             }
1024        */
1025       printf( "\n" );
1026
1027       if (repeat) {
1028          repeat = 0;
1029          strcat(test_name, "UNKNOWN");
1030          switch (test_type) {
1031             case VX_FP_SMAS:
1032             case VX_FP_SMSS:
1033             case VX_FP_SNMAS:
1034             case VX_FP_SNMSS:
1035                if (test_type == VX_FP_SMAS)
1036                   strcpy(test_name, "xsmadd");
1037                else if (test_type == VX_FP_SMSS)
1038                   strcpy(test_name, "xsmsub");
1039                else if (test_type == VX_FP_SNMAS)
1040                   strcpy(test_name, "xsnmadd");
1041                else
1042                   strcpy(test_name, "xsnmsub");
1043
1044                do_asp = 0;
1045                strcat(test_name, "msp");
1046                break;
1047             default:
1048                break;
1049          }
1050          goto again;
1051       }
1052       k++;
1053    }
1054    printf( "\n" );
1055    free(test_name);
1056 }
1057
1058
1059 static void test_vsx_one_fp_arg(void)
1060 {
1061    test_func_t func;
1062    int k;
1063    void  * vecB_void_ptr;
1064
1065    k = 0;
1066    build_special_fargs_table();
1067
1068    while ((func = vsx_one_fp_arg_tests[k].test_func)) {
1069       int idx, i;
1070       unsigned long long *dst_dp;
1071       unsigned int * dst_sp;
1072       vx_fp_test2_t test_group = vsx_one_fp_arg_tests[k];
1073       /* size of source operands */
1074       Bool dp  = ((test_group.precision == DOUBLE_TEST) ||
1075                   (test_group.precision == DOUBLE_TEST_SINGLE_RES)) ? True : False;
1076       /* size of result */
1077       Bool dp_res = IS_DP_RESULT(test_group.precision);
1078       Bool is_sqrt = (strstr(test_group.name, "sqrt")) ? True : False;
1079
1080       vecB_void_ptr = (void *)&vec_inB;
1081       if (isLE) {
1082          vecB_void_ptr += dp? 8 : 12;
1083       }
1084
1085       for (i = 0; i < test_group.num_tests; i++) {
1086          unsigned int * pv;
1087          void * inB;
1088
1089          pv = (unsigned int *)&vec_out;
1090          // clear vec_out
1091          for (idx = 0; idx < 4; idx++, pv++)
1092             *pv = 0;
1093
1094          if (dp) {
1095             int vec_out_idx;
1096             unsigned long long * frB_dp;
1097             if (isLE)
1098                vec_out_idx = dp_res ? 1 : 3;
1099             else
1100                vec_out_idx = 0;
1101
1102             if (test_group.test_type == VX_SCALAR_SP_TO_VECTOR_SP) {
1103                /* Take a single-precision value stored in double word element 0
1104                 * of src in double-precision format and convert to single-
1105                 * precision and store in word element 0 of dst.
1106                 */
1107                double input = spec_sp_fargs[i];
1108                memcpy(vecB_void_ptr, (void *)&input, 8);
1109             } else {
1110                inB = (void *)&spec_fargs[i];
1111                // copy double precision FP into input vector element 0
1112                memcpy(vecB_void_ptr, inB, 8);
1113             }
1114
1115             // execute test insn
1116             (*func)();
1117             if (dp_res)
1118                dst_dp = (unsigned long long *) &vec_out;
1119             else
1120                dst_sp = (unsigned int *) &vec_out;
1121
1122             printf("#%d: %s ", i, test_group.name);
1123             frB_dp = (unsigned long long *)&spec_fargs[i];
1124             printf("%s(%016llx)", test_group.op, *frB_dp);
1125             if (test_group.test_type == VX_ESTIMATE)
1126             {
1127                Bool res;
1128                res = check_reciprocal_estimate(is_sqrt, i, vec_out_idx);
1129                printf(" ==> %s)", res ? "PASS" : "FAIL");
1130             } else if (dp_res) {
1131                printf(" = %016llx", dst_dp[vec_out_idx]);
1132             } else {
1133                printf(" = %08x", dst_sp[vec_out_idx]);
1134             }
1135
1136             printf("\n");
1137          } else {  // single precision test type
1138             int vec_out_idx;
1139             if (isLE)
1140                vec_out_idx = dp_res ? 1 : 3;
1141             else
1142                vec_out_idx = 0;
1143             // Clear input vector
1144             pv = (unsigned int *)&vec_inB;
1145             for (idx = 0; idx < 4; idx++, pv++)
1146                *pv = 0;
1147             inB = (void *)&spec_sp_fargs[i];
1148             // copy single precision FP into input vector element i
1149             memcpy(vecB_void_ptr, inB, 4);
1150             // execute test insn
1151             (*func)();
1152             if (dp_res)
1153                dst_dp = (unsigned long long *) &vec_out;
1154             else
1155                dst_sp = (unsigned int *) &vec_out;
1156             // print result
1157             printf("#%d: %s ", i, test_group.name);
1158                printf("%s(%08x)", test_group.op, *((unsigned int *)&spec_sp_fargs[i]));
1159                if (dp_res)
1160                      printf(" = %016llx", dst_dp[vec_out_idx]);
1161                else
1162                   printf(" = %08x", dst_sp[vec_out_idx]);
1163
1164             printf("\n");
1165          }
1166       }
1167       k++;
1168       printf( "\n" );
1169    }
1170 }
1171
1172 /* This function currently only supports two double precision input arguments. */
1173 static void test_vsx_two_fp_arg(void)
1174 {
1175    test_func_t func;
1176    int k = 0;
1177    void  * vecA_void_ptr, * vecB_void_ptr;
1178
1179    if (isLE) {
1180       vecA_void_ptr = (void *)&vec_inA + 8;
1181       vecB_void_ptr = (void *)&vec_inB + 8;
1182    } else {
1183       vecA_void_ptr = (void *)&vec_inA;
1184       vecB_void_ptr = (void *)&vec_inB;
1185    }
1186
1187    build_special_fargs_table();
1188    while ((func = vx_simple_scalar_fp_tests[k].test_func)) {
1189       unsigned long long * frap, * frbp, * dst;
1190       unsigned int * pv;
1191       int idx;
1192       vx_fp_test_basic_t test_group = vx_simple_scalar_fp_tests[k];
1193       pv = (unsigned int *)&vec_out;
1194       // clear vec_out
1195       for (idx = 0; idx < 4; idx++, pv++)
1196          *pv = 0;
1197
1198       void * inA, * inB;
1199       int i;
1200       for (i = 0; i < test_group.num_tests; i++) {
1201          fp_test_args_t aTest = test_group.targs[i];
1202          inA = (void *)&spec_fargs[aTest.fra_idx];
1203          inB = (void *)&spec_fargs[aTest.frb_idx];
1204          frap = (unsigned long long *)&spec_fargs[aTest.fra_idx];
1205          frbp = (unsigned long long *)&spec_fargs[aTest.frb_idx];
1206          // Only need to copy one doubleword into each vector's element 0
1207          memcpy(vecA_void_ptr, inA, 8);
1208          memcpy(vecB_void_ptr, inB, 8);
1209          (*func)();
1210          dst = (unsigned long long *) &vec_out;
1211          if (isLE)
1212             dst++;
1213          printf("#%d: %s %016llx,%016llx => %016llx\n", i, test_group.name,
1214                 *frap, *frbp, *dst);
1215       }
1216       printf( "\n" );
1217       k++;
1218    }
1219 }
1220
1221 /* This function handles the following cases:
1222  *   1) Single precision value stored in double-precision
1223  *      floating-point format in doubleword element 0 of src VSX register
1224  *   2) Integer word value stored in word element 1 of src VSX register
1225  */
1226 static void _do_store_test (ldst_test_t storeTest)
1227 {
1228    test_func_t func;
1229    unsigned int *dst32;
1230    unsigned int i, idx;
1231    unsigned int * pv = (unsigned int *) storeTest.base_addr;
1232    void  * vecA_void_ptr;
1233
1234    if (isLE) {
1235       if (storeTest.precision == SINGLE_TEST_SINGLE_RES)
1236          vecA_void_ptr = (void *)&vec_inA + 8;
1237    } else {
1238       if (storeTest.precision == SINGLE_TEST_SINGLE_RES)
1239          vecA_void_ptr = (void *)&vec_inA + 4;
1240       else
1241          vecA_void_ptr = (void *)&vec_inA;
1242    }
1243
1244    func = storeTest.test_func;
1245    r14 = (HWord_t) storeTest.base_addr;
1246    r15 = (HWord_t) storeTest.offset;
1247
1248    /* test some of the pre-defined single precision values */
1249    for (i = 0; i < nb_special_fargs; i+=3) {
1250       // clear out storage destination
1251       for (idx = 0; idx < 4; idx++)
1252          *(pv + idx) = 0;
1253
1254       printf( "%s:", storeTest.name );
1255       if (storeTest.precision == SINGLE_TEST_SINGLE_RES)
1256       {
1257          unsigned int * arg_ptr = (unsigned int *)&spec_sp_fargs[i];
1258          memcpy(vecA_void_ptr, arg_ptr, sizeof(unsigned int));
1259          printf(" %08x ==> ", *arg_ptr);
1260       } else {
1261          unsigned long long * dp;
1262          double input = spec_sp_fargs[i];
1263          dp = (unsigned long long *)&input;
1264          memcpy(vecA_void_ptr, dp, sizeof(unsigned long long));
1265          printf(" %016llx ==> ", *dp);
1266       }
1267
1268       // execute test insn
1269       (*func)();
1270       dst32 = (unsigned int*)(storeTest.base_addr);
1271       dst32 += (storeTest.offset/sizeof(int));
1272       printf( "%08x\n", *dst32);
1273    }
1274
1275    printf("\n");
1276 }
1277
1278 static void _do_load_test(ldst_test_t loadTest)
1279 {
1280    test_func_t func;
1281    unsigned int i;
1282    unsigned long long * dst_dp;
1283
1284    func = loadTest.test_func;
1285    r15 = (HWord_t) loadTest.offset;
1286
1287    if (loadTest.base_addr == NULL) {
1288       /* Test lxsspx: source is single precision value, so let's */
1289       /* test some of the pre-defined single precision values. */
1290       int num_loops = (loadTest.offset == 0) ?  nb_special_fargs : (nb_special_fargs - (loadTest.offset/sizeof(int)));
1291       for (i = 0; i < num_loops; i+=3) {
1292          unsigned int * sp = (unsigned int *)&spec_sp_fargs[i + (loadTest.offset/sizeof(int))];
1293          printf( "%s:", loadTest.name );
1294          printf(" %08x ==> ", *sp);
1295          r14 = (HWord_t)&spec_sp_fargs[i];
1296
1297          // execute test insn
1298          (*func)();
1299          dst_dp = (unsigned long long *) &vec_out;
1300          if (isLE)
1301             dst_dp++;
1302          printf("%016llx\n", *dst_dp);
1303       }
1304    } else {
1305       // source is an integer word
1306       int num_loops = (loadTest.offset == 0) ?  NUM_VIARGS_INTS : (NUM_VIARGS_INTS - (loadTest.offset/sizeof(int)));
1307       for (i = 0; i < num_loops; i++) {
1308          printf( "%s:", loadTest.name );
1309          r14 = (HWord_t)&viargs[i];
1310          printf(" %08x ==> ", viargs[i + (loadTest.offset/sizeof(int))]);
1311
1312          // execute test insn
1313          (*func)();
1314          dst_dp = (unsigned long long *) &vec_out;
1315          if (isLE)
1316             dst_dp++;
1317          printf("%016llx\n", *dst_dp);
1318       }
1319    }
1320    printf("\n");
1321 }
1322
1323 static void test_ldst(void)
1324 {
1325    int k = 0;
1326
1327    while (ldst_tests[k].test_func) {
1328       if (ldst_tests[k].type == VSX_STORE)
1329          _do_store_test(ldst_tests[k]);
1330       else {
1331          _do_load_test(ldst_tests[k]);
1332       }
1333       k++;
1334       printf("\n");
1335    }
1336 }
1337
1338 static void test_xs_conv_ops(void)
1339 {
1340
1341    test_func_t func;
1342    int k = 0;
1343    void  * vecB_void_ptr;
1344
1345    if (isLE)
1346       vecB_void_ptr = (void *)&vec_inB + 8;
1347    else
1348       vecB_void_ptr = (void *)&vec_inB;
1349
1350    build_special_fargs_table();
1351    while ((func = xs_conv_tests[k].test_func)) {
1352       int i;
1353       unsigned long long * dst;
1354       xs_conv_test_t test_group = xs_conv_tests[k];
1355       for (i = 0; i < NUM_VDARGS_INTS; i++) {
1356          unsigned long long  * inB, * pv;
1357          int idx;
1358          inB = (unsigned long long *)&vdargs[i];
1359          memcpy(vecB_void_ptr, inB, 8);
1360          pv = (unsigned long long *)&vec_out;
1361          // clear vec_out
1362          for (idx = 0; idx < 2; idx++, pv++)
1363             *pv = 0ULL;
1364          (*func)();
1365          dst = (unsigned long long *) &vec_out;
1366          if (isLE)
1367             dst++;
1368          printf("#%d: %s %016llx => %016llx\n", i, test_group.name, vdargs[i], *dst);
1369       }
1370       k++;
1371       printf("\n");
1372    }
1373    printf( "\n" );
1374 }
1375
1376
1377 static void test_vsx_logic(void)
1378 {
1379    logic_test_t aTest;
1380    test_func_t func;
1381    int k;
1382    k = 0;
1383
1384    while ((func = logic_tests[k].test_func)) {
1385
1386       unsigned int * pv;
1387       unsigned int * inA, * inB, * dst;
1388       int idx, i;
1389       aTest = logic_tests[k];
1390       for (i = 0; i <= NUM_VIARGS_VECS; i+=4) {
1391          pv = (unsigned int *)&vec_out;
1392          inA = &viargs[i];
1393          inB = &viargs[i];
1394          memcpy(&vec_inA, inA, sizeof(vector unsigned int));
1395          memcpy(&vec_inB, inB, sizeof(vector unsigned int));
1396          // clear vec_out
1397          for (idx = 0; idx < 4; idx++, pv++)
1398             *pv = 0;
1399
1400          // execute test insn
1401          (*func)();
1402          dst = (unsigned int*) &vec_out;
1403
1404          printf( "#%d: %10s ", k, aTest.name);
1405          printf( " (%08x %08x %08x %08x, ", inA[0], inA[1], inA[2], inA[3]);
1406          printf( " %08x %08x %08x %08x)", inB[0], inB[1], inB[2], inB[3]);
1407          printf(" ==> %08x %08x %08x %08x\n", dst[0], dst[1], dst[2], dst[3]);
1408       }
1409       k++;
1410    }
1411    printf( "\n" );
1412 }
1413
1414
1415 //----------------------------------------------------------
1416
1417 static test_table_t all_tests[] = {
1418                                      { &test_vx_fp_ops,
1419                                        "Test VSX floating point instructions"},
1420                                      { &test_vsx_one_fp_arg,
1421                                        "Test VSX vector and scalar single argument instructions"} ,
1422                                      { &test_vsx_logic,
1423                                        "Test VSX logic instructions" },
1424                                      { &test_xs_conv_ops,
1425                                        "Test VSX scalar integer conversion instructions" },
1426                                      { &test_ldst,
1427                                        "Test VSX load/store dp to sp instructions" },
1428                                      { &test_vsx_two_fp_arg,
1429                                        "Test VSX vector and scalar two argument instructions"} ,
1430                                      { NULL, NULL }
1431 };
1432
1433 #endif
1434
1435 int main(int argc, char *argv[])
1436 {
1437
1438 #ifdef HAS_ISA_2_07
1439    test_table_t aTest;
1440    test_func_t func;
1441    int i = 0;
1442
1443    while ((func = all_tests[i].test_category)) {
1444       aTest = all_tests[i];
1445       printf( "%s\n", aTest.name );
1446       (*func)();
1447       i++;
1448    }
1449 #else
1450    printf("NO ISA 2.07 SUPPORT\n");
1451 #endif
1452    return 0;
1453 }