workbench/libs/mesa/src/gallium/auxiliary/rtasm/rtasm_ppc.c

   1 /**************************************************************************
   2  *
   3  * Copyright (C) 2008 Tungsten Graphics, Inc.   All Rights Reserved.
   4  * Copyright (C) 2009 VMware, Inc.  All Rights Reserved.
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included
  14  * in all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  20  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  22  *
  23  **************************************************************************/
  24
  25 /**
  26  * PPC code generation.
  27  * For reference, see http://www.power.org/resources/reading/PowerISA_V2.05.pdf
  28  * ABI info: http://www.cs.utsa.edu/~whaley/teach/cs6463FHPO/LEC/lec12_ho.pdf
  29  *
  30  * Other PPC refs:
  31  * http://www-01.ibm.com/chips/techlib/techlib.nsf/techdocs/852569B20050FF778525699600719DF2
  32  * http://www.ibm.com/developerworks/eserver/library/es-archguide-v2.html
  33  * http://www.freescale.com/files/product/doc/MPCFPE32B.pdf
  34  *
  35  * \author Brian Paul
  36  */
  37
  38
  39 #include <stdio.h>
  40 #include "util/u_memory.h"
  41 #include "util/u_debug.h"
  42 #include "rtasm_execmem.h"
  43 #include "rtasm_ppc.h"
  44
  45
  46 void
  47 ppc_init_func(struct ppc_function *p)
  48 {
  49    uint i;
  50
  51    memset(p, 0, sizeof(*p));
  52
  53    p->num_inst = 0;
  54    p->max_inst = 100; /* first guess at buffer size */
  55    p->store = rtasm_exec_malloc(p->max_inst * PPC_INST_SIZE);
  56    p->reg_used = 0x0;
  57    p->fp_used = 0x0;
  58    p->vec_used = 0x0;
  59
  60    p->print = FALSE;
  61    p->indent = 0;
  62
  63    /* only allow using gp registers 3..12 for now */
  64    for (i = 0; i < 3; i++)
  65       ppc_reserve_register(p, i);
  66    for (i = 12; i < PPC_NUM_REGS; i++)
  67       ppc_reserve_register(p, i);
  68 }
  69
  70
  71 void
  72 ppc_release_func(struct ppc_function *p)
  73 {
  74    assert(p->num_inst <= p->max_inst);
  75    if (p->store != NULL) {
  76       rtasm_exec_free(p->store);
  77    }
  78    p->store = NULL;
  79 }
  80
  81
  82 uint
  83 ppc_num_instructions(const struct ppc_function *p)
  84 {
  85    return p->num_inst;
  86 }
  87
  88
  89 void (*ppc_get_func(struct ppc_function *p))(void)
  90 {
  91 #if 0
  92    DUMP_END();
  93    if (DISASSEM && p->store)
  94       debug_printf("disassemble %p %p\n", p->store, p->csr);
  95
  96    if (p->store == p->error_overflow)
  97       return (void (*)(void)) NULL;
  98    else
  99 #endif
 100       return (void (*)(void)) pointer_to_func(p->store);
 101 }
 102
 103
 104 void
 105 ppc_dump_func(const struct ppc_function *p)
 106 {
 107    uint i;
 108    for (i = 0; i < p->num_inst; i++) {
 109       debug_printf("%3u: 0x%08x\n", i, (unsigned int)p->store[i]);
 110    }
 111 }
 112
 113
 114 void
 115 ppc_print_code(struct ppc_function *p, boolean enable)
 116 {
 117    p->print = enable;
 118 }
 119
 120
 121 void
 122 ppc_indent(struct ppc_function *p, int spaces)
 123 {
 124    p->indent += spaces;
 125 }
 126
 127
 128 static void
 129 indent(const struct ppc_function *p)
 130 {
 131    int i;
 132    for (i = 0; i < p->indent; i++) {
 133       putchar(' ');
 134    }
 135 }
 136
 137
 138 void
 139 ppc_comment(struct ppc_function *p, int rel_indent, const char *s)
 140 {
 141    if (p->print) {
 142       p->indent += rel_indent;
 143       indent(p);
 144       p->indent -= rel_indent;
 145       printf("# %s\n", s);
 146    }
 147 }
 148
 149
 150 /**
 151  * Mark a register as being unavailable.
 152  */
 153 int
 154 ppc_reserve_register(struct ppc_function *p, int reg)
 155 {
 156    assert(reg < PPC_NUM_REGS);
 157    p->reg_used |= (1 << reg);
 158    return reg;
 159 }
 160
 161
 162 /**
 163  * Allocate a general purpose register.
 164  * \return register index or -1 if none left.
 165  */
 166 int
 167 ppc_allocate_register(struct ppc_function *p)
 168 {
 169    unsigned i;
 170    for (i = 0; i < PPC_NUM_REGS; i++) {
 171       const uint32_t mask = 1 << i;
 172       if ((p->reg_used & mask) == 0) {
 173          p->reg_used |= mask;
 174          return i;
 175       }
 176    }
 177    printf("OUT OF PPC registers!\n");
 178    return -1;
 179 }
 180
 181
 182 /**
 183  * Mark the given general purpose register as "unallocated".
 184  */
 185 void
 186 ppc_release_register(struct ppc_function *p, int reg)
 187 {
 188    assert(reg < PPC_NUM_REGS);
 189    assert(p->reg_used & (1 << reg));
 190    p->reg_used &= ~(1 << reg);
 191 }
 192
 193
 194 /**
 195  * Allocate a floating point register.
 196  * \return register index or -1 if none left.
 197  */
 198 int
 199 ppc_allocate_fp_register(struct ppc_function *p)
 200 {
 201    unsigned i;
 202    for (i = 0; i < PPC_NUM_FP_REGS; i++) {
 203       const uint32_t mask = 1 << i;
 204       if ((p->fp_used & mask) == 0) {
 205          p->fp_used |= mask;
 206          return i;
 207       }
 208    }
 209    printf("OUT OF PPC FP registers!\n");
 210    return -1;
 211 }
 212
 213
 214 /**
 215  * Mark the given floating point register as "unallocated".
 216  */
 217 void
 218 ppc_release_fp_register(struct ppc_function *p, int reg)
 219 {
 220    assert(reg < PPC_NUM_FP_REGS);
 221    assert(p->fp_used & (1 << reg));
 222    p->fp_used &= ~(1 << reg);
 223 }
 224
 225
 226 /**
 227  * Allocate a vector register.
 228  * \return register index or -1 if none left.
 229  */
 230 int
 231 ppc_allocate_vec_register(struct ppc_function *p)
 232 {
 233    unsigned i;
 234    for (i = 0; i < PPC_NUM_VEC_REGS; i++) {
 235       const uint32_t mask = 1 << i;
 236       if ((p->vec_used & mask) == 0) {
 237          p->vec_used |= mask;
 238          return i;
 239       }
 240    }
 241    printf("OUT OF PPC VEC registers!\n");
 242    return -1;
 243 }
 244
 245
 246 /**
 247  * Mark the given vector register as "unallocated".
 248  */
 249 void
 250 ppc_release_vec_register(struct ppc_function *p, int reg)
 251 {
 252    assert(reg < PPC_NUM_VEC_REGS);
 253    assert(p->vec_used & (1 << reg));
 254    p->vec_used &= ~(1 << reg);
 255 }
 256
 257
 258 /**
 259  * Append instruction to instruction buffer.  Grow buffer if out of room.
 260  */
 261 static void
 262 emit_instruction(struct ppc_function *p, uint32_t inst_bits)
 263 {
 264    if (!p->store)
 265       return;  /* out of memory, drop the instruction */
 266
 267    if (p->num_inst == p->max_inst) {
 268       /* allocate larger buffer */
 269       uint32_t *newbuf;
 270       p->max_inst *= 2;  /* 2x larger */
 271       newbuf = rtasm_exec_malloc(p->max_inst * PPC_INST_SIZE);
 272       if (newbuf) {
 273          memcpy(newbuf, p->store, p->num_inst * PPC_INST_SIZE);
 274       }
 275       rtasm_exec_free(p->store);
 276       p->store = newbuf;
 277       if (!p->store) {
 278          /* out of memory */
 279          p->num_inst = 0;
 280          return;
 281       }
 282    }
 283
 284    p->store[p->num_inst++] = inst_bits;
 285 }
 286
 287
 288 union vx_inst {
 289    uint32_t bits;
 290    struct {
 291       unsigned op:6;
 292       unsigned vD:5;
 293       unsigned vA:5;
 294       unsigned vB:5;
 295       unsigned op2:11;
 296    } inst;
 297 };
 298
 299 static INLINE void
 300 emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB,
 301         const char *format, boolean transpose)
 302 {
 303    union vx_inst inst;
 304    inst.inst.op = 4;
 305    inst.inst.vD = vD;
 306    inst.inst.vA = vA;
 307    inst.inst.vB = vB;
 308    inst.inst.op2 = op2;
 309    emit_instruction(p, inst.bits);
 310    if (p->print) {
 311       indent(p);
 312       if (transpose)
 313          printf(format, vD, vB, vA);
 314       else
 315          printf(format, vD, vA, vB);
 316    }
 317 }
 318
 319
 320 union vxr_inst {
 321    uint32_t bits;
 322    struct {
 323       unsigned op:6;
 324       unsigned vD:5;
 325       unsigned vA:5;
 326       unsigned vB:5;
 327       unsigned rC:1;
 328       unsigned op2:10;
 329    } inst;
 330 };
 331
 332 static INLINE void
 333 emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB,
 334          const char *format)
 335 {
 336    union vxr_inst inst;
 337    inst.inst.op = 4;
 338    inst.inst.vD = vD;
 339    inst.inst.vA = vA;
 340    inst.inst.vB = vB;
 341    inst.inst.rC = 0;
 342    inst.inst.op2 = op2;
 343    emit_instruction(p, inst.bits);
 344    if (p->print) {
 345       indent(p);
 346       printf(format, vD, vA, vB);
 347    }
 348 }
 349
 350
 351 union va_inst {
 352    uint32_t bits;
 353    struct {
 354       unsigned op:6;
 355       unsigned vD:5;
 356       unsigned vA:5;
 357       unsigned vB:5;
 358       unsigned vC:5;
 359       unsigned op2:6;
 360    } inst;
 361 };
 362
 363 static INLINE void
 364 emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC,
 365         const char *format)
 366 {
 367    union va_inst inst;
 368    inst.inst.op = 4;
 369    inst.inst.vD = vD;
 370    inst.inst.vA = vA;
 371    inst.inst.vB = vB;
 372    inst.inst.vC = vC;
 373    inst.inst.op2 = op2;
 374    emit_instruction(p, inst.bits);
 375    if (p->print) {
 376       indent(p);
 377       printf(format, vD, vA, vB, vC);
 378    }
 379 }
 380
 381
 382 union i_inst {
 383    uint32_t bits;
 384    struct {
 385       unsigned op:6;
 386       unsigned li:24;
 387       unsigned aa:1;
 388       unsigned lk:1;
 389    } inst;
 390 };
 391
 392 static INLINE void
 393 emit_i(struct ppc_function *p, uint op, uint li, uint aa, uint lk)
 394 {
 395    union i_inst inst;
 396    inst.inst.op = op;
 397    inst.inst.li = li;
 398    inst.inst.aa = aa;
 399    inst.inst.lk = lk;
 400    emit_instruction(p, inst.bits);
 401 }
 402
 403
 404 union xl_inst {
 405    uint32_t bits;
 406    struct {
 407       unsigned op:6;
 408       unsigned bo:5;
 409       unsigned bi:5;
 410       unsigned unused:3;
 411       unsigned bh:2;
 412       unsigned op2:10;
 413       unsigned lk:1;
 414    } inst;
 415 };
 416
 417 static INLINE void
 418 emit_xl(struct ppc_function *p, uint op, uint bo, uint bi, uint bh,
 419         uint op2, uint lk)
 420 {
 421    union xl_inst inst;
 422    inst.inst.op = op;
 423    inst.inst.bo = bo;
 424    inst.inst.bi = bi;
 425    inst.inst.unused = 0x0;
 426    inst.inst.bh = bh;
 427    inst.inst.op2 = op2;
 428    inst.inst.lk = lk;
 429    emit_instruction(p, inst.bits);
 430 }
 431
 432 static INLINE void
 433 dump_xl(const char *name, uint inst)
 434 {
 435    union xl_inst i;
 436
 437    i.bits = inst;
 438    debug_printf("%s = 0x%08x\n", name, inst);
 439    debug_printf(" op: %d 0x%x\n", i.inst.op, i.inst.op);
 440    debug_printf(" bo: %d 0x%x\n", i.inst.bo, i.inst.bo);
 441    debug_printf(" bi: %d 0x%x\n", i.inst.bi, i.inst.bi);
 442    debug_printf(" unused: %d 0x%x\n", i.inst.unused, i.inst.unused);
 443    debug_printf(" bh: %d 0x%x\n", i.inst.bh, i.inst.bh);
 444    debug_printf(" op2: %d 0x%x\n", i.inst.op2, i.inst.op2);
 445    debug_printf(" lk: %d 0x%x\n", i.inst.lk, i.inst.lk);
 446 }
 447
 448
 449 union x_inst {
 450    uint32_t bits;
 451    struct {
 452       unsigned op:6;
 453       unsigned vrs:5;
 454       unsigned ra:5;
 455       unsigned rb:5;
 456       unsigned op2:10;
 457       unsigned unused:1;
 458    } inst;
 459 };
 460
 461 static INLINE void
 462 emit_x(struct ppc_function *p, uint op, uint vrs, uint ra, uint rb, uint op2,
 463        const char *format)
 464 {
 465    union x_inst inst;
 466    inst.inst.op = op;
 467    inst.inst.vrs = vrs;
 468    inst.inst.ra = ra;
 469    inst.inst.rb = rb;
 470    inst.inst.op2 = op2;
 471    inst.inst.unused = 0x0;
 472    emit_instruction(p, inst.bits);
 473    if (p->print) {
 474       indent(p);
 475       printf(format, vrs, ra, rb);
 476    }
 477 }
 478
 479
 480 union d_inst {
 481    uint32_t bits;
 482    struct {
 483       unsigned op:6;
 484       unsigned rt:5;
 485       unsigned ra:5;
 486       unsigned si:16;
 487    } inst;
 488 };
 489
 490 static INLINE void
 491 emit_d(struct ppc_function *p, uint op, uint rt, uint ra, int si,
 492        const char *format, boolean transpose)
 493 {
 494    union d_inst inst;
 495    assert(si >= -32768);
 496    assert(si <= 32767);
 497    inst.inst.op = op;
 498    inst.inst.rt = rt;
 499    inst.inst.ra = ra;
 500    inst.inst.si = (unsigned) (si & 0xffff);
 501    emit_instruction(p, inst.bits);
 502    if (p->print) {
 503       indent(p);
 504       if (transpose)
 505          printf(format, rt, si, ra);
 506       else
 507          printf(format, rt, ra, si);
 508    }
 509 }
 510
 511
 512 union a_inst {
 513    uint32_t bits;
 514    struct {
 515       unsigned op:6;
 516       unsigned frt:5;
 517       unsigned fra:5;
 518       unsigned frb:5;
 519       unsigned unused:5;
 520       unsigned op2:5;
 521       unsigned rc:1;
 522    } inst;
 523 };
 524
 525 static INLINE void
 526 emit_a(struct ppc_function *p, uint op, uint frt, uint fra, uint frb, uint op2,
 527        uint rc, const char *format)
 528 {
 529    union a_inst inst;
 530    inst.inst.op = op;
 531    inst.inst.frt = frt;
 532    inst.inst.fra = fra;
 533    inst.inst.frb = frb;
 534    inst.inst.unused = 0x0;
 535    inst.inst.op2 = op2;
 536    inst.inst.rc = rc;
 537    emit_instruction(p, inst.bits);
 538    if (p->print) {
 539       indent(p);
 540       printf(format, frt, fra, frb);
 541    }
 542 }
 543
 544
 545 union xo_inst {
 546    uint32_t bits;
 547    struct {
 548       unsigned op:6;
 549       unsigned rt:5;
 550       unsigned ra:5;
 551       unsigned rb:5;
 552       unsigned oe:1;
 553       unsigned op2:9;
 554       unsigned rc:1;
 555    } inst;
 556 };
 557
 558 static INLINE void
 559 emit_xo(struct ppc_function *p, uint op, uint rt, uint ra, uint rb, uint oe,
 560         uint op2, uint rc, const char *format)
 561 {
 562    union xo_inst inst;
 563    inst.inst.op = op;
 564    inst.inst.rt = rt;
 565    inst.inst.ra = ra;
 566    inst.inst.rb = rb;
 567    inst.inst.oe = oe;
 568    inst.inst.op2 = op2;
 569    inst.inst.rc = rc;
 570    emit_instruction(p, inst.bits);
 571    if (p->print) {
 572       indent(p);
 573       printf(format, rt, ra, rb);
 574    }
 575 }
 576
 577
 578
 579
 580
 581 /**
 582  ** float vector arithmetic
 583  **/
 584
 585 /** vector float add */
 586 void
 587 ppc_vaddfp(struct ppc_function *p, uint vD, uint vA, uint vB)
 588 {
 589    emit_vx(p, 10, vD, vA, vB, "vaddfp\t%u, v%u, v%u\n", FALSE);
 590 }
 591
 592 /** vector float substract */
 593 void
 594 ppc_vsubfp(struct ppc_function *p, uint vD, uint vA, uint vB)
 595 {
 596    emit_vx(p, 74, vD, vA, vB, "vsubfp\tv%u, v%u, v%u\n", FALSE);
 597 }
 598
 599 /** vector float min */
 600 void
 601 ppc_vminfp(struct ppc_function *p, uint vD, uint vA, uint vB)
 602 {
 603    emit_vx(p, 1098, vD, vA, vB, "vminfp\tv%u, v%u, v%u\n", FALSE);
 604 }
 605
 606 /** vector float max */
 607 void
 608 ppc_vmaxfp(struct ppc_function *p, uint vD, uint vA, uint vB)
 609 {
 610    emit_vx(p, 1034, vD, vA, vB, "vmaxfp\tv%u, v%u, v%u\n", FALSE);
 611 }
 612
 613 /** vector float mult add: vD = vA * vB + vC */
 614 void
 615 ppc_vmaddfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC)
 616 {
 617    /* note arg order */
 618    emit_va(p, 46, vD, vA, vC, vB, "vmaddfp\tv%u, v%u, v%u, v%u\n");
 619 }
 620
 621 /** vector float negative mult subtract: vD = vA - vB * vC */
 622 void
 623 ppc_vnmsubfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC)
 624 {
 625    /* note arg order */
 626    emit_va(p, 47, vD, vB, vA, vC, "vnmsubfp\tv%u, v%u, v%u, v%u\n");
 627 }
 628
 629 /** vector float compare greater than */
 630 void
 631 ppc_vcmpgtfpx(struct ppc_function *p, uint vD, uint vA, uint vB)
 632 {
 633    emit_vxr(p, 710, vD, vA, vB, "vcmpgtfpx\tv%u, v%u, v%u");
 634 }
 635
 636 /** vector float compare greater than or equal to */
 637 void
 638 ppc_vcmpgefpx(struct ppc_function *p, uint vD, uint vA, uint vB)
 639 {
 640    emit_vxr(p, 454, vD, vA, vB, "vcmpgefpx\tv%u, v%u, v%u");
 641 }
 642
 643 /** vector float compare equal */
 644 void
 645 ppc_vcmpeqfpx(struct ppc_function *p, uint vD, uint vA, uint vB)
 646 {
 647    emit_vxr(p, 198, vD, vA, vB, "vcmpeqfpx\tv%u, v%u, v%u");
 648 }
 649
 650 /** vector float 2^x */
 651 void
 652 ppc_vexptefp(struct ppc_function *p, uint vD, uint vB)
 653 {
 654    emit_vx(p, 394, vD, 0, vB, "vexptefp\tv%u, 0%u, v%u\n", FALSE);
 655 }
 656
 657 /** vector float log2(x) */
 658 void
 659 ppc_vlogefp(struct ppc_function *p, uint vD, uint vB)
 660 {
 661    emit_vx(p, 458, vD, 0, vB, "vlogefp\tv%u, 0%u, v%u\n", FALSE);
 662 }
 663
 664 /** vector float reciprocol */
 665 void
 666 ppc_vrefp(struct ppc_function *p, uint vD, uint vB)
 667 {
 668    emit_vx(p, 266, vD, 0, vB, "vrefp\tv%u, 0%u, v%u\n", FALSE);
 669 }
 670
 671 /** vector float reciprocol sqrt estimate */
 672 void
 673 ppc_vrsqrtefp(struct ppc_function *p, uint vD, uint vB)
 674 {
 675    emit_vx(p, 330, vD, 0, vB, "vrsqrtefp\tv%u, 0%u, v%u\n", FALSE);
 676 }
 677
 678 /** vector float round to negative infinity */
 679 void
 680 ppc_vrfim(struct ppc_function *p, uint vD, uint vB)
 681 {
 682    emit_vx(p, 714, vD, 0, vB, "vrfim\tv%u, 0%u, v%u\n", FALSE);
 683 }
 684
 685 /** vector float round to positive infinity */
 686 void
 687 ppc_vrfip(struct ppc_function *p, uint vD, uint vB)
 688 {
 689    emit_vx(p, 650, vD, 0, vB, "vrfip\tv%u, 0%u, v%u\n", FALSE);
 690 }
 691
 692 /** vector float round to nearest int */
 693 void
 694 ppc_vrfin(struct ppc_function *p, uint vD, uint vB)
 695 {
 696    emit_vx(p, 522, vD, 0, vB, "vrfin\tv%u, 0%u, v%u\n", FALSE);
 697 }
 698
 699 /** vector float round to int toward zero */
 700 void
 701 ppc_vrfiz(struct ppc_function *p, uint vD, uint vB)
 702 {
 703    emit_vx(p, 586, vD, 0, vB, "vrfiz\tv%u, 0%u, v%u\n", FALSE);
 704 }
 705
 706 /** vector store: store vR at mem[rA+rB] */
 707 void
 708 ppc_stvx(struct ppc_function *p, uint vR, uint rA, uint rB)
 709 {
 710    emit_x(p, 31, vR, rA, rB, 231, "stvx\tv%u, r%u, r%u\n");
 711 }
 712
 713 /** vector load: vR = mem[rA+rB] */
 714 void
 715 ppc_lvx(struct ppc_function *p, uint vR, uint rA, uint rB)
 716 {
 717    emit_x(p, 31, vR, rA, rB, 103, "lvx\tv%u, r%u, r%u\n");
 718 }
 719
 720 /** load vector element word: vR = mem_word[ra+rb] */
 721 void
 722 ppc_lvewx(struct ppc_function *p, uint vR, uint rA, uint rB)
 723 {
 724    emit_x(p, 31, vR, rA, rB, 71, "lvewx\tv%u, r%u, r%u\n");
 725 }
 726
 727
 728
 729
 730 /**
 731  ** vector bitwise operations
 732  **/
 733
 734 /** vector and */
 735 void
 736 ppc_vand(struct ppc_function *p, uint vD, uint vA, uint vB)
 737 {
 738    emit_vx(p, 1028, vD, vA, vB, "vand\tv%u, v%u, v%u\n", FALSE);
 739 }
 740
 741 /** vector and complement */
 742 void
 743 ppc_vandc(struct ppc_function *p, uint vD, uint vA, uint vB)
 744 {
 745    emit_vx(p, 1092, vD, vA, vB, "vandc\tv%u, v%u, v%u\n", FALSE);
 746 }
 747
 748 /** vector or */
 749 void
 750 ppc_vor(struct ppc_function *p, uint vD, uint vA, uint vB)
 751 {
 752    emit_vx(p, 1156, vD, vA, vB, "vor\tv%u, v%u, v%u\n", FALSE);
 753 }
 754
 755 /** vector nor */
 756 void
 757 ppc_vnor(struct ppc_function *p, uint vD, uint vA, uint vB)
 758 {
 759    emit_vx(p, 1284, vD, vA, vB, "vnor\tv%u, v%u, v%u\n", FALSE);
 760 }
 761
 762 /** vector xor */
 763 void
 764 ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB)
 765 {
 766    emit_vx(p, 1220, vD, vA, vB, "vxor\tv%u, v%u, v%u\n", FALSE);
 767 }
 768
 769 /** Pseudo-instruction: vector move */
 770 void
 771 ppc_vmove(struct ppc_function *p, uint vD, uint vA)
 772 {
 773    boolean print = p->print;
 774    p->print = FALSE;
 775    ppc_vor(p, vD, vA, vA);
 776    if (print) {
 777       indent(p);
 778       printf("vor\tv%u, v%u, v%u \t# v%u = v%u\n", vD, vA, vA, vD, vA);
 779    }
 780    p->print = print;
 781 }
 782
 783 /** Set vector register to {0,0,0,0} */
 784 void
 785 ppc_vzero(struct ppc_function *p, uint vr)
 786 {
 787    boolean print = p->print;
 788    p->print = FALSE;
 789    ppc_vxor(p, vr, vr, vr);
 790    if (print) {
 791       indent(p);
 792       printf("vxor\tv%u, v%u, v%u \t# v%u = {0,0,0,0}\n", vr, vr, vr, vr);
 793    }
 794    p->print = print;
 795 }
 796
 797
 798
 799
 800 /**
 801  ** Vector shuffle / select / splat / etc
 802  **/
 803
 804 /** vector permute */
 805 void
 806 ppc_vperm(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC)
 807 {
 808    emit_va(p, 43, vD, vA, vB, vC, "vperm\tr%u, r%u, r%u, r%u");
 809 }
 810
 811 /** vector select */
 812 void
 813 ppc_vsel(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC)
 814 {
 815    emit_va(p, 42, vD, vA, vB, vC, "vsel\tr%u, r%u, r%u, r%u");
 816 }
 817
 818 /** vector splat byte */
 819 void
 820 ppc_vspltb(struct ppc_function *p, uint vD, uint vB, uint imm)
 821 {
 822    emit_vx(p, 42, vD, imm, vB, "vspltb\tv%u, v%u, %u\n", TRUE);
 823 }
 824
 825 /** vector splat half word */
 826 void
 827 ppc_vsplthw(struct ppc_function *p, uint vD, uint vB, uint imm)
 828 {
 829    emit_vx(p, 588, vD, imm, vB, "vsplthw\tv%u, v%u, %u\n", TRUE);
 830 }
 831
 832 /** vector splat word */
 833 void
 834 ppc_vspltw(struct ppc_function *p, uint vD, uint vB, uint imm)
 835 {
 836    emit_vx(p, 652, vD, imm, vB, "vspltw\tv%u, v%u, %u\n", TRUE);
 837 }
 838
 839 /** vector splat signed immediate word */
 840 void
 841 ppc_vspltisw(struct ppc_function *p, uint vD, int imm)
 842 {
 843    assert(imm >= -16);
 844    assert(imm < 15);
 845    emit_vx(p, 908, vD, imm, 0, "vspltisw\tv%u, %d, %u\n", FALSE);
 846 }
 847
 848 /** vector shift left word: vD[word] = vA[word] << (vB[word] & 0x1f) */
 849 void
 850 ppc_vslw(struct ppc_function *p, uint vD, uint vA, uint vB)
 851 {
 852    emit_vx(p, 388, vD, vA, vB, "vslw\tv%u, v%u, v%u\n", FALSE);
 853 }
 854
 855
 856
 857
 858 /**
 859  ** integer arithmetic
 860  **/
 861
 862 /** rt = ra + imm */
 863 void
 864 ppc_addi(struct ppc_function *p, uint rt, uint ra, int imm)
 865 {
 866    emit_d(p, 14, rt, ra, imm, "addi\tr%u, r%u, %d\n", FALSE);
 867 }
 868
 869 /** rt = ra + (imm << 16) */
 870 void
 871 ppc_addis(struct ppc_function *p, uint rt, uint ra, int imm)
 872 {
 873    emit_d(p, 15, rt, ra, imm, "addis\tr%u, r%u, %d\n", FALSE);
 874 }
 875
 876 /** rt = ra + rb */
 877 void
 878 ppc_add(struct ppc_function *p, uint rt, uint ra, uint rb)
 879 {
 880    emit_xo(p, 31, rt, ra, rb, 0, 266, 0, "add\tr%u, r%u, r%u\n");
 881 }
 882
 883 /** rt = ra AND ra */
 884 void
 885 ppc_and(struct ppc_function *p, uint rt, uint ra, uint rb)
 886 {
 887    emit_x(p, 31, ra, rt, rb, 28, "and\tr%u, r%u, r%u\n");  /* note argument order */
 888 }
 889
 890 /** rt = ra AND imm */
 891 void
 892 ppc_andi(struct ppc_function *p, uint rt, uint ra, int imm)
 893 {
 894    /* note argument order */
 895    emit_d(p, 28, ra, rt, imm, "andi\tr%u, r%u, %d\n", FALSE);
 896 }
 897
 898 /** rt = ra OR ra */
 899 void
 900 ppc_or(struct ppc_function *p, uint rt, uint ra, uint rb)
 901 {
 902    emit_x(p, 31, ra, rt, rb, 444, "or\tr%u, r%u, r%u\n");  /* note argument order */
 903 }
 904
 905 /** rt = ra OR imm */
 906 void
 907 ppc_ori(struct ppc_function *p, uint rt, uint ra, int imm)
 908 {
 909    /* note argument order */
 910    emit_d(p, 24, ra, rt, imm, "ori\tr%u, r%u, %d\n", FALSE);
 911 }
 912
 913 /** rt = ra XOR ra */
 914 void
 915 ppc_xor(struct ppc_function *p, uint rt, uint ra, uint rb)
 916 {
 917    emit_x(p, 31, ra, rt, rb, 316, "xor\tr%u, r%u, r%u\n");  /* note argument order */
 918 }
 919
 920 /** rt = ra XOR imm */
 921 void
 922 ppc_xori(struct ppc_function *p, uint rt, uint ra, int imm)
 923 {
 924    /* note argument order */
 925    emit_d(p, 26, ra, rt, imm, "xori\tr%u, r%u, %d\n", FALSE);
 926 }
 927
 928 /** pseudo instruction: move: rt = ra */
 929 void
 930 ppc_mr(struct ppc_function *p, uint rt, uint ra)
 931 {
 932    ppc_or(p, rt, ra, ra);
 933 }
 934
 935 /** pseudo instruction: load immediate: rt = imm */
 936 void
 937 ppc_li(struct ppc_function *p, uint rt, int imm)
 938 {
 939    boolean print = p->print;
 940    p->print = FALSE;
 941    ppc_addi(p, rt, 0, imm);
 942    if (print) {
 943       indent(p);
 944       printf("addi\tr%u, r0, %d \t# r%u = %d\n", rt, imm, rt, imm);
 945    }
 946    p->print = print;
 947 }
 948
 949 /** rt = imm << 16 */
 950 void
 951 ppc_lis(struct ppc_function *p, uint rt, int imm)
 952 {
 953    ppc_addis(p, rt, 0, imm);
 954 }
 955
 956 /** rt = imm */
 957 void
 958 ppc_load_int(struct ppc_function *p, uint rt, int imm)
 959 {
 960    ppc_lis(p, rt, (imm >> 16));          /* rt = imm >> 16 */
 961    ppc_ori(p, rt, rt, (imm & 0xffff));   /* rt = rt | (imm & 0xffff) */
 962 }
 963
 964
 965
 966
 967 /**
 968  ** integer load/store
 969  **/
 970
 971 /** store rs at memory[(ra)+d],
 972  * then update ra = (ra)+d
 973  */
 974 void
 975 ppc_stwu(struct ppc_function *p, uint rs, uint ra, int d)
 976 {
 977    emit_d(p, 37, rs, ra, d, "stwu\tr%u, %d(r%u)\n", TRUE);
 978 }
 979
 980 /** store rs at memory[(ra)+d] */
 981 void
 982 ppc_stw(struct ppc_function *p, uint rs, uint ra, int d)
 983 {
 984    emit_d(p, 36, rs, ra, d, "stw\tr%u, %d(r%u)\n", TRUE);
 985 }
 986
 987 /** Load rt = mem[(ra)+d];  then zero set high 32 bits to zero. */
 988 void
 989 ppc_lwz(struct ppc_function *p, uint rt, uint ra, int d)
 990 {
 991    emit_d(p, 32, rt, ra, d, "lwz\tr%u, %d(r%u)\n", TRUE);
 992 }
 993
 994
 995
 996 /**
 997  ** Float (non-vector) arithmetic
 998  **/
 999
1000 /** add: frt = fra + frb */
1001 void
1002 ppc_fadd(struct ppc_function *p, uint frt, uint fra, uint frb)
1003 {
1004    emit_a(p, 63, frt, fra, frb, 21, 0, "fadd\tf%u, f%u, f%u\n");
1005 }
1006
1007 /** sub: frt = fra - frb */
1008 void
1009 ppc_fsub(struct ppc_function *p, uint frt, uint fra, uint frb)
1010 {
1011    emit_a(p, 63, frt, fra, frb, 20, 0, "fsub\tf%u, f%u, f%u\n");
1012 }
1013
1014 /** convert to int: rt = (int) ra */
1015 void
1016 ppc_fctiwz(struct ppc_function *p, uint rt, uint fra)
1017 {
1018    emit_x(p, 63, rt, 0, fra, 15, "fctiwz\tr%u, r%u, r%u\n");
1019 }
1020
1021 /** store frs at mem[(ra)+offset] */
1022 void
1023 ppc_stfs(struct ppc_function *p, uint frs, uint ra, int offset)
1024 {
1025    emit_d(p, 52, frs, ra, offset, "stfs\tr%u, %d(r%u)\n", TRUE);
1026 }
1027
1028 /** store frs at mem[(ra)+(rb)] */
1029 void
1030 ppc_stfiwx(struct ppc_function *p, uint frs, uint ra, uint rb)
1031 {
1032    emit_x(p, 31, frs, ra, rb, 983, "stfiwx\tr%u, r%u, r%u\n");
1033 }
1034
1035 /** load frt = mem[(ra)+offset] */
1036 void
1037 ppc_lfs(struct ppc_function *p, uint frt, uint ra, int offset)
1038 {
1039    emit_d(p, 48, frt, ra, offset, "stfs\tr%u, %d(r%u)\n", TRUE);
1040 }
1041
1042
1043
1044
1045
1046 /**
1047  ** branch instructions
1048  **/
1049
1050 /** BLR: Branch to link register (p. 35) */
1051 void
1052 ppc_blr(struct ppc_function *p)
1053 {
1054    emit_i(p, 18, 0, 0, 1);
1055    if (p->print) {
1056       indent(p);
1057       printf("blr\n");
1058    }
1059 }
1060
1061 /** Branch Conditional to Link Register (p. 36) */
1062 void
1063 ppc_bclr(struct ppc_function *p, uint condOp, uint branchHint, uint condReg)
1064 {
1065    emit_xl(p, 19, condOp, condReg, branchHint, 16, 0);
1066    if (p->print) {
1067       indent(p);
1068       printf("bclr\t%u %u %u\n", condOp, branchHint, condReg);
1069    }
1070 }
1071
1072 /** Pseudo instruction: return from subroutine */
1073 void
1074 ppc_return(struct ppc_function *p)
1075 {
1076    ppc_bclr(p, BRANCH_COND_ALWAYS, BRANCH_HINT_SUB_RETURN, 0);
1077 }